[Bio] / FigKernelScripts / get_taxid_journals.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/get_taxid_journals.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : hwang 1.1 #!/usr/bin/perl
2 :     use strict;
3 :     use FIG;
4 :    
5 :     #Specify call_pmc.pl file_path
6 :     #Will output to the command line
7 :     #So do call_pmc.pl file_path > fileout.txt
8 :    
9 :     my $file = $ARGV[0];
10 :     open(ORG, $file) || die "can't find file\n";
11 :     my @org = <ORG>;
12 :    
13 :     foreach (@org) {
14 :    
15 :     #File should be a tab deliminated format. Taxid<tab>Name
16 :     my ($taxid, $orgname) = split(/\t/,$_);
17 :     $orgname =~ s/\n//;
18 :     if ($taxid !~ m/NO TAXID/) {
19 :     my $output = &FIG::run_gathering_output("$FIG_Config::bin/get_journals_for_org", "txid$taxid");
20 :     my @array = split(/\s/,$output);
21 :     my $num_elements = @array;
22 :    
23 :     print ">TAXID:$taxid $num_elements publication[s]\n";
24 :     my $out = &get_author_date_title(\@array);
25 :     print $out;
26 :     }
27 :    
28 :     if ($orgname) {
29 :     my $output = &FIG::run_gathering_output("$FIG_Config::bin/get_journals_for_org", "$orgname");
30 :     my @array = split(/\s/,$output);
31 :     my $num_elements = @array;
32 :    
33 :     print ">Org:$orgname $num_elements publication[s]\n";
34 :     my $out = &get_author_date_title(\@array);
35 :     print $out;
36 :     }
37 :     #Keeping with eutils etiquette. Jobs have a sleep of 3
38 :     sleep 3;
39 :     }
40 :    
41 :    
42 :     #The following routines are similar to the other journals scripts, but this evoke the
43 :     #PMC database. this is quick and easy, and may fix when there's time.
44 :    
45 :    
46 :     sub test_url_results {
47 :    
48 :     my $url = $_[0];
49 :    
50 :     # Searches Pubmed and Returns the number of results
51 :     my $request=LWP::UserAgent->new();
52 :     my $response=$request->get($url);
53 :     my $results= $response->content;
54 :    
55 :     if ($results ne "") {
56 :     return $results;
57 :     }
58 :     else {
59 :     return;
60 :     }
61 :     }
62 :    
63 :     sub get_author_date_title {
64 :     #This subroutine gets the author, date, title for a list of pubmed identifiers.
65 :     #May take in one or an array
66 :    
67 :     my $journal_in = $_[0];
68 :     my $publication_list;
69 :     my $out_info;
70 :    
71 :     if (ref $journal_in eq 'ARRAY') {
72 :     my @journals = @{$journal_in};
73 :     foreach (@journals) {
74 :     $publication_list .= "$_,";
75 :     }
76 :     }
77 :     else {
78 :     $publication_list = $journal_in;
79 :     }
80 :    
81 :     my $journal_url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pmc&id=";
82 :     my $url = "$journal_url"."$publication_list"."&retmod=xml";
83 :    
84 :     if (my $esearch_results = &test_url_results($url)) {
85 :    
86 :     my @tmp = split(/<DocSum>/, $esearch_results);
87 :     foreach(@tmp) {
88 :    
89 :     next if ($_ !~ m/<Id>/);
90 :    
91 :     $_ =~ m/<Id>(.*)<\/Id>/;
92 :     $out_info .= "$1\t";
93 :     $_ =~ m/<*Author.*>(.*)<\/Item>/;
94 :     $out_info .= "$1\t";
95 :     $_ =~ m/<*PubDate.*>(.*)<\/Item>/;
96 :     $out_info .= "$1\t";
97 :     $_ =~ m/<*Title.*>(.*)<\/Item>/;
98 :     $out_info .= "$1\n";
99 :     }
100 :    
101 :     }
102 :     return $out_info;
103 :     }
104 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3