[Bio] / FigKernelPackages / raelib.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/raelib.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.13, Wed May 4 03:07:23 2005 UTC revision 1.17, Thu May 26 18:15:59 2005 UTC
# Line 11  Line 11 
11    
12  package raelib;  package raelib;
13  use strict;  use strict;
14    use Bio::SeqIO;
15    use Bio::Seq;
16    use Bio::SeqFeature::Generic;
17  use FIG;  use FIG;
18  my $fig=new FIG;  my $fig=new FIG;
19    
# Line 95  Line 98 
98   my $linecount;   my $linecount;
99   while (<IN>) {   while (<IN>) {
100    $linecount++;    $linecount++;
101    unless ($linecount % 10000) {print STDERR "Correspondence of $linecount lines calculated\n"}    if ($verbose && !($linecount % 10000))  {print STDERR "Parsed $linecount lines\n"}
102    if (/^>/) {print OUT; next}    if (/^>/) {print OUT; next}
103    chomp;    chomp;
104    my $done;    foreach my $peg ($self->swiss_pir_ids($_)) {
   foreach my $peg ($fig->by_alias("uni|$_")) {  
105     print OUT $_, "\t", $peg, "\n";     print OUT $_, "\t", $peg, "\n";
    $done=1;  
106    }    }
   unless ($done) {  
    foreach my $peg ($fig->by_alias("tr|$_")) {  
     print OUT $_, "\t", $peg, "\n";  
     $done=1;  
107     }     }
108     close IN;
109     close OUT;
110     return $linecount;
111    }    }
112    unless ($done) {  
113     foreach my $peg ($fig->by_alias("sp|$_")) {  =head2 uniprotcorrespondence
114      print OUT $_, "\t", $peg, "\n";  
115      $done=1;  Generate a correspondence table between uniprot knowledge base IDs and FIG ID's.
116    
117    The uniprot KB file is in the form:  UniProtKB_Primary_Accession | UniProtKB_ID | Section | Protein Name
118    
119     This method takes three arguments:
120       from    : uniprotKB file
121       to      : file to write information to
122       verbose : report on progress
123    
124     Returns the number of lines in the pirsinfo file that were read.
125    
126    =cut
127    
128    sub uniprotcorrespondence {
129     my ($self, $from, $to, $verbose)=@_;
130     unless (-e $from) {
131      print STDERR "File $from does not exist as called in $0\n";
132      return 0;
133     }     }
134     open (IN, $from) || die "Can't open $from";
135     open (OUT, ">$to") || die "Can't write to $to";
136     my $linecount;
137     while (<IN>) {
138      chomp;
139      $linecount++;
140      if ($verbose && !($linecount % 10000))  {print STDERR "Parsed $linecount lines\n"}
141      my @line=split /\s+\|\s+/;
142      my $added;
143      foreach my $peg ($self->swiss_pir_ids($line[0])) {
144       print OUT "$_ | $peg\n";
145       $added=1;
146    }    }
147    unless ($done) {print OUT $_, "\t\n"}    unless ($added) {print OUT "$_\n"}
148   }   }
149   close IN;   close IN;
150   close OUT;   close OUT;
# Line 123  Line 152 
152  }  }
153    
154    
155    
156    =head2 swiss_pir_ids()
157    
158    SwissProt/PIR have lots of ID's that we want to get, usually in this order - uni --> tr --> sp. This routine will map swissprot/pir ids to fig id's.
159    
160    =cut
161    
162    sub swiss_pir_ids {
163     my ($self, $id)=@_;
164     return () unless ($id);
165    
166     my @return=($fig->by_alias("uni|$id"));
167     return @return if ($return[0]);
168    
169     @return=($fig->by_alias("tr|$id"));
170     return @return if ($return[0]);
171    
172     @return=($fig->by_alias("sp|$id"));
173     return @return if ($return[0]);
174    
175     return ();
176    }
177    
178  =head2 ss_by_id  =head2 ss_by_id
179    
180   Generate a list of subsystems that a peg occurs in. This is a ; separated list.   Generate a list of subsystems that a peg occurs in. This is a ; separated list.
# Line 343  Line 395 
395          );          );
396  }  }
397    
398    =head2 GenBank
399    
400     This object will take a genome number and return a Bio::Seq::RichSeq object that has the whole genome
401     in GenBank format. This should be a nice way of getting some data out, but will probably be quite slow
402     at building the object.
403    
404     Note that you need to call this with the genome name and the contig. This will then go through that contig.
405    
406     Something like this should work
407    
408     foreach my $contig ($fig->all_contigs($genome)) {
409      my $seqobj=FIGRob->GenBank($genome, $contig);
410      # process the contig
411     }
412    
413    =cut
414    
415    sub GenBank {
416     my ($self, $genome, $contig)=@_;
417     my $gs=$fig->genus_species($genome);
418     return unless ($gs);
419     unless ($contig) {
420      print STDERR "You didn't provide a contig for $gs. I think that was a mistake. Sorry\n";
421      return;
422     }
423     my $len=$fig->contig_ln($genome, $contig);
424     unless ($len) {
425      print STDERR "$contig from $gs doesn't appear to have a length. Is it right?\n";
426      return;
427     }
428    
429    
430     # first find all the pegs ...
431     my $features; # all the features in the genome
432     my $allpegs; # all the pegs
433     my $translation; # all the protein sequences
434     foreach my $peg ($fig->pegs_of($genome)) {
435      my @location=$fig->feature_location($peg);
436      my $func=$fig->function_of($peg);
437      foreach my $loc (@location) {
438       $loc =~ /^(.*)\_(\d+)\_(\d+)$/;
439       my ($cg, $start, $stop)=($1, $2, $3);
440       next unless ($cg eq $contig);
441       # save this information for later
442       $features->{'peg'}->{$loc}=$func;
443       $allpegs->{'peg'}->{$loc}=$peg;
444       $translation->{'peg'}->{$loc}=$fig->get_translation($peg);
445      }
446     }
447     # ... and all the RNAs
448     foreach my $peg ($fig->rnas_of($genome)) {
449      my @location=$fig->feature_location($peg);
450      my $func=$fig->function_of($peg);
451      foreach my $loc (@location) {
452       $loc =~ /^(.*)\_(\d+)\_(\d+)$/;
453       my ($cg, $start, $stop)=($1, $2, $3);
454       next unless ($cg eq $contig);
455       # save this information for later
456       $features->{'rna'}->{$loc}=$func;
457       $allpegs->{'rna'}->{$loc}=$peg;
458      }
459     }
460    
461    
462     # now get all the contigs out
463     my $seq=$fig->dna_seq($genome, $contig."_1_".$len);
464     my $description = "Contig $contig from " . $fig->genus_species($genome);
465     my $sobj=Bio::Seq->new(
466              -seq              =>  $seq,
467              -id               =>  $contig,
468              -desc             =>  $description,
469              -accession_number =>  $genome
470              );
471     foreach my $prot (keys %{$features->{'peg'}}) {
472       $prot =~ /^(.*)\_(\d+)\_(\d+)$/;
473       my ($cg, $start, $stop)=($1, $2, $3);
474       my $strand=1;
475       if ($stop < $start) {
476        ($stop, $start)=($start, $stop);
477        $strand=-1;
478     }
479    
480     my $feat=Bio::SeqFeature::Generic->new(
481            -start         =>  $start,
482            -end           =>  $stop,
483            -strand        =>  $strand,
484            -primary       =>  'CDS',
485            -display_name  =>  $allpegs->{'peg'}->{$prot},
486            -source_tag    =>  'the SEED',
487            -tag           =>
488                           {
489                           db_xref     =>   $allpegs->{'peg'}->{$prot},
490                           note        =>  'Generated by the Fellowship for the Interpretation of Genomes',
491                           function    =>  $features->{'peg'}->{$prot},
492                           translation =>  $translation->{'peg'}->{$prot}
493                          }
494           );
495    
496       $sobj->add_SeqFeature($feat);
497     }
498    
499     foreach my $prot (keys %{$features->{'rna'}}) {
500       $prot =~ /^(.*)\_(\d+)\_(\d+)$/;
501       my ($cg, $start, $stop)=($1, $2, $3);
502       my $strand=1;
503       if ($stop < $start) {
504        ($stop, $start)=($start, $stop);
505        $strand=-1;
506       }
507    
508       my $feat=Bio::SeqFeature::Generic->new(
509            -start         =>  $start,
510            -end           =>  $stop,
511            -strand        =>  $strand,
512            -primary       =>  'RNA',
513            -source_tag    =>  'the SEED',
514            -display_name  =>  $allpegs->{'rna'}->{$prot},
515            -tag           =>
516                          {
517                           db_xref     =>   $allpegs->{'rna'}->{$prot},
518                           note        =>  'Generated by the Fellowship for the Interpretation of Genomes',
519                           function    =>  $features->{'rna'}->{$prot},
520                          }
521           );
522    
523      $sobj->add_SeqFeature($feat);
524     }
525     return $sobj;
526    }
527    
528    =head2 best_hit
529    
530     Returns the FIG id of the single best hit to a peg
531    
532     eg
533    
534     my $bh=$fr->best_hit($peg);
535     print 'function is ', scalar $fig->function_of($bh);
536    
537    =cut
538    
539    sub best_hit {
540     my ($self, $peg)=@_;
541     return unless ($peg);
542    
543     my ($maxN, $maxP)=(1, 1e-5);
544     my @sims=$fig->sims($peg, $maxN, $maxP, 'fig');
545     return ${$sims[0]}[1];
546    }
547    
548  1;  1;
549    

Legend:
Removed from v.1.13  
changed lines
  Added in v.1.17

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3