[Bio] / FigKernelScripts / get_journals_for_org.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/get_journals_for_org.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : hwang 1.1 #This script searches pubmed central to get journals for organism.
2 :     #NOTE that this gets the PMCID not PMID
3 :     #PMCID is for PubMed Central
4 :     #PMID is for PubMed
5 :    
6 :     #!/usr/local/bin/perl
7 :    
8 :     use strict;
9 :     use LWP;
10 :     use XML::LibXML;
11 :     use FigWebServices::SeedComponents::PubMed;
12 :    
13 :     my $request;
14 :     my $response;
15 :    
16 :     my $numArgs = $#ARGV + 1;
17 :    
18 :     if ($numArgs eq 0) {
19 :     print "Provide a taxonomy id or organism name\n";
20 :     print "useage: get_journals_for_org functional_role";
21 :     exit;
22 :     }
23 :    
24 :     my $query = $ARGV[0];
25 :    
26 :     # The following are urls to search PubMed Central/Eutils
27 :     my $eutils_base = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pmc&term=";
28 :     my $url = $eutils_base.$query;
29 :    
30 :     &test_url_results;
31 :     # I cannot bypass the return max of 20. I believe eutils does this on purpose to prevent
32 :     # the user from hitting their database frequently.
33 :     # So hit the eutils once to get the retmax, and then hit them again to get all the pmcid
34 :    
35 :     my $retmax = &get_return_max_num;
36 :     my $url_with_retmax = "$url&retmax=$retmax";
37 :     my $pmcids = &get_pmcids;
38 :    
39 :     print $pmcids;
40 :     sub test_url_results {
41 :    
42 :     # Searches Pubmed and Returns the number of results
43 :     $request=LWP::UserAgent->new();
44 :     $response=$request->get($url);
45 :     my $results= $response->content;
46 :     #die unless
47 :    
48 :     if ($results ne "") {
49 :     return $results;
50 :     }
51 :     else {
52 :     return;
53 :     }
54 :     }
55 :    
56 :     sub get_return_max_num {
57 :    
58 :     my $parser=XML::LibXML->new;
59 :    
60 :     $response=$request->get($url);
61 :     my $results= $response->content;
62 :    
63 :     return unless $response->is_success;
64 :    
65 :     $results =~ m/<Count>(.*)<\/Count>/;
66 :     my $max_pmcid = $1;
67 :     return $max_pmcid;
68 :    
69 :     }
70 :    
71 :    
72 :     sub get_pmcids {
73 :    
74 :     my $parser=XML::LibXML->new;
75 :    
76 :     $response=$request->get($url_with_retmax);
77 :     my $results= $response->content;
78 :    
79 :     return unless $response->is_success;
80 :     my $pmcids;
81 :     while ( $results =~ m/<Id>(.*)<\/Id>/g) {
82 :     $pmcids .= "$1 ";
83 :     }
84 :     return $pmcids;
85 :    
86 :     }
87 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3