Revision 1.2 - (download) (as text) (annotate)
Mon Sep 4 20:19:26 2006 UTC (13 years, 6 months ago) by redwards
Branch: MAIN
Changes since 1.1: +3 -1 lines
making sure gene id is right

# convert the list of GeneID/pubmed IDs to a list of the pegs that match.
# Download the file 
#       ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene2pubmed.gz
# this has three columns: taxid, geneid, and pubmed id
# We will conver this to peg, geneid, pubmed id

use strict;
use FIG;
my $fig=new FIG;

my $file=shift || die "gene2pubmed file?";

#my %tax;
#foreach my $gen ($fig->genomes) 
#    $gen =~ /^fig\|(\d+)\.\d+/;
#    $tax{$1}=$gen;

if ($file =~ /\.gz$/)
    open(IN, "gunzip -c $file |") || die "Can't open a pipe to gunzip $file";
    open(IN, $file) || die "CAn't open $file";

my $dbh=$fig->db_handle;

while (<IN>)
    my @a=split /\t/;
    my $relational_db_response;
    if (($relational_db_response = $dbh->SQL("SELECT id,aliases FROM features WHERE aliases LIKE \'\%GeneID:$a[1]\%\'")) && (@$relational_db_response > 0))
        foreach my $rdbr (@$relational_db_response)
            $rdbr->[1] =~ /GeneID:(\w+)/;
            next unless ($1 eq $a[1]);
            print join("\t", $rdbr->[0], $a[1], $a[2]), "\n";

