[Bio] / FigKernelScripts / get_journals_for_org.pl Repository:
ViewVC logotype

View of /FigKernelScripts/get_journals_for_org.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Tue Mar 20 19:06:10 2007 UTC (12 years, 11 months ago) by hwang
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
this script is the backend to get journals using taxonomy id or name. This calls the PubMed Central database

#This script searches pubmed central to get journals for organism.
#NOTE that this gets the PMCID not PMID
#PMCID is for PubMed Central
#PMID is for PubMed

#!/usr/local/bin/perl

use strict;
use LWP;
use XML::LibXML; 
use FigWebServices::SeedComponents::PubMed;

my $request;
my $response;

my $numArgs = $#ARGV + 1;

if ($numArgs eq 0) {
    print "Provide a taxonomy id or organism name\n";
    print "useage: get_journals_for_org functional_role";
    exit;
}

my $query  = $ARGV[0];

# The following are urls to search PubMed Central/Eutils
my $eutils_base = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pmc&term=";
my $url = $eutils_base.$query;

&test_url_results;
# I cannot bypass the return max of 20. I believe eutils does this on purpose to prevent 
# the user from hitting their database frequently.
# So hit the eutils once to get the retmax, and then hit them again to get all the pmcid

my $retmax = &get_return_max_num;
my $url_with_retmax = "$url&retmax=$retmax";
my $pmcids = &get_pmcids;

print $pmcids;
sub test_url_results {
        
    # Searches Pubmed and Returns the number of results
    $request=LWP::UserAgent->new();
    $response=$request->get($url);
    my $results= $response->content;
    #die unless 
    
    if ($results ne "") {
	return $results;	
    }
    else {
	return;
    }
}

sub get_return_max_num {

    my $parser=XML::LibXML->new;

    $response=$request->get($url);
    my $results= $response->content;
    
    return unless $response->is_success;
    
    $results =~ m/<Count>(.*)<\/Count>/;
    my $max_pmcid = $1;
    return $max_pmcid;
    
}


sub get_pmcids {

    my $parser=XML::LibXML->new;

    $response=$request->get($url_with_retmax);
    my $results= $response->content;
    
    return unless $response->is_success;
    my $pmcids;
    while ( $results =~ m/<Id>(.*)<\/Id>/g) {
	$pmcids .= "$1 ";
    }
    return $pmcids;
    
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3