[Bio] / FigKernelScripts / get_taxid_journals.pl Repository:
ViewVC logotype

View of /FigKernelScripts/get_taxid_journals.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Tue Mar 20 19:15:24 2007 UTC (12 years, 8 months ago) by hwang
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
get journals from PMC using taxide

#!/usr/bin/perl
use strict;
use FIG;

#Specify call_pmc.pl file_path
#Will output to the command line 
#So do call_pmc.pl file_path > fileout.txt
 
my $file = $ARGV[0];
open(ORG, $file) || die "can't find file\n";
my @org = <ORG>;

foreach (@org) {

    #File should be a tab deliminated format. Taxid<tab>Name
    my ($taxid, $orgname) = split(/\t/,$_);
    $orgname =~ s/\n//; 
    if ($taxid !~ m/NO TAXID/) { 
	my $output = &FIG::run_gathering_output("$FIG_Config::bin/get_journals_for_org", "txid$taxid");
	my @array = split(/\s/,$output);
	my $num_elements = @array;
	
	print ">TAXID:$taxid $num_elements publication[s]\n";
	my $out = &get_author_date_title(\@array);
	print $out;
    }

    if ($orgname) {
	my $output = &FIG::run_gathering_output("$FIG_Config::bin/get_journals_for_org", "$orgname");
	my @array = split(/\s/,$output);
	my $num_elements = @array;
	
	print ">Org:$orgname $num_elements publication[s]\n";
	my $out = &get_author_date_title(\@array);
	print $out;
    }
    #Keeping with eutils etiquette. Jobs have a sleep of 3 
    sleep 3;
}


#The following routines are similar to the other journals scripts, but this evoke the 
#PMC database. this is quick and easy, and may fix when there's time.


sub test_url_results {

    my $url = $_[0];
    
    # Searches Pubmed and Returns the number of results
    my $request=LWP::UserAgent->new();
    my $response=$request->get($url);
    my $results= $response->content;
    
    if ($results ne "") {
	return $results;	
    }
    else {
	return;
    }
}

sub get_author_date_title {
    #This subroutine gets the author, date, title for a list of pubmed identifiers.  
    #May take in one or an array

    my $journal_in = $_[0]; 
    my $publication_list;
    my $out_info;
    
    if (ref $journal_in eq 'ARRAY') {
	my @journals = @{$journal_in};
	foreach (@journals) {
	    $publication_list .= "$_,";   
	}
    }
    else {
	$publication_list = $journal_in;
    }
    
    my $journal_url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pmc&id=";
    my $url = "$journal_url"."$publication_list"."&retmod=xml";

    if (my $esearch_results = &test_url_results($url)) {        
	
	my @tmp = split(/<DocSum>/, $esearch_results);
	foreach(@tmp) { 
	 
	    next if ($_ !~ m/<Id>/);

	    $_ =~ m/<Id>(.*)<\/Id>/;
	    $out_info .= "$1\t";
	    $_ =~ m/<*Author.*>(.*)<\/Item>/;
	    $out_info .= "$1\t";
	    $_ =~ m/<*PubDate.*>(.*)<\/Item>/;
	    $out_info .= "$1\t";
	    $_ =~ m/<*Title.*>(.*)<\/Item>/;
	    $out_info .= "$1\n";
	}
	
    }
    return $out_info;
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3