[Bio] / FigKernelPackages / FIGO.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/FIGO.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.30, Fri Nov 30 21:35:51 2007 UTC revision 1.32, Thu Dec 6 13:59:34 2007 UTC
# Line 105  Line 105 
105    
106  =item USAGE:  =item USAGE:
107    
108  C<< my $figo = FIGO->new();           #...Subclass defaults to FIG >>      my $figo = FIGO->new();           #...Subclass defaults to FIG
109    
110  C<< my $figo = FIGO->new('SPROUT');   #...Subclass is a SPROUT object >>      my $figo = FIGO->new('SPROUT');   #...Subclass is a SPROUT object
111    
112  =back  =back
113    
# Line 150  Line 150 
150    
151  =item USAGE:  =item USAGE:
152    
153  C<< my @tax_ids = $figo->genomes(); >>      my @tax_ids = $figo->genomes();
154    
155  C<< my @tax_ids = $figo->genomes( @constraints ); >>      my @tax_ids = $figo->genomes( @constraints );
156    
157  =item @constraints  =item @constraints
158    
# Line 271  Line 271 
271    
272  =item USAGE:  =item USAGE:
273    
274  C<< foreach $fam ($figO->all_figfams) { #...Do something } >>      foreach $fam ($figO->all_figfams) { #...Do something }
275    
276  =item RETURNS:  =item RETURNS:
277    
# Line 300  Line 300 
300    
301  =item USAGE:  =item USAGE:
302    
303  C<< my ($fam, $sims) = $figO->family_containing($seq); >>      my ($fam, $sims) = $figO->family_containing($seq);
304    
305  =item $seq:  =item $seq:
306    
# Line 340  Line 340 
340    
341  =item USAGE:  =item USAGE:
342    
343  C<< my $fam = $figO->figfam($family_id); >>      my $fam = $figO->figfam($family_id);
344    
345  =item $family_id;  =item $family_id;
346    
# Line 379  Line 379 
379    
380  =item USAGE:  =item USAGE:
381    
382  C<< my $orgO = GenomeO->new($figO, $tax_id); >>      my $orgO = GenomeO->new($figO, $tax_id);
383    
384  =item RETURNS:  =item RETURNS:
385    
# Line 406  Line 406 
406    
407  =item USAGE:  =item USAGE:
408    
409  C<< my $tax_id = $orgO->id(); >>      my $tax_id = $orgO->id();
410    
411  =item RETURNS:  =item RETURNS:
412    
# Line 430  Line 430 
430    
431  =item USAGE:  =item USAGE:
432    
433  C<< $gs = $orgO->genus_species(); >>      $gs = $orgO->genus_species();
434    
435  =item RETURNS:  =item RETURNS:
436    
# Line 460  Line 460 
460    
461  =item USAGE:  =item USAGE:
462    
463  C<< my $taxonomy = $orgO->taxonomy_of(); >>      my $taxonomy = $orgO->taxonomy_of();
464    
465  =item RETURNS:  =item RETURNS:
466    
# Line 516  Line 516 
516    
517  =item USAGE:  =item USAGE:
518    
519  C<< my @featureOs = $orgO->features_of();        #...Fetch all features >>      my @featureOs = $orgO->features_of();        #...Fetch all features
520    
521  or  or
522    
523  C<< my @featureOs = $orgO->features_of('peg');   #...Fetch only PEGs >>      my @featureOs = $orgO->features_of('peg');   #...Fetch only PEGs
524    
525  =item RETURNS:  =item RETURNS:
526    
# Line 548  Line 548 
548    
549  =item USAGE:  =item USAGE:
550    
551  C<< $genome->display(); >>      $genome->display();
552    
553  =item RETURNS:  =item RETURNS:
554    
# Line 585  Line 585 
585    
586  =item USAGE:  =item USAGE:
587    
588  C<< $contig = ContigO->new( $figO, $genomeId, $contigId); >>      $contig = ContigO->new( $figO, $genomeId, $contigId);
589    
590  =item $figO:  =item $figO:
591    
# Line 644  Line 644 
644    
645  =item USAGE:  =item USAGE:
646    
647  C<< my $tax_id = $contig->genome->id(); >>      my $tax_id = $contig->genome->id();
648    
649  =item RETURNS:  =item RETURNS:
650    
# Line 669  Line 669 
669    
670  =item USAGE:  =item USAGE:
671    
672  C<< my $len = $contig->contig_length(); >>      my $len = $contig->contig_length();
673    
674  =item RETURNS:  =item RETURNS:
675    
# Line 694  Line 694 
694    
695  =item USAGE:  =item USAGE:
696    
697  C<< my $seq = $contig->dna_seq(beg, $end); >>      my $seq = $contig->dna_seq(beg, $end);
698    
699  =item $beg:  =item $beg:
700    
# Line 788  Line 788 
788    
789  =item USAGE:  =item USAGE:
790    
791  C<< my $feature = FeatureO->new( $figO, $fid ); >>      my $feature = FeatureO->new( $figO, $fid );
792    
793  =item C<$figO>:  =item C<$figO>:
794    
# Line 824  Line 824 
824    
825  =item USAGE:  =item USAGE:
826    
827  C<< my $fid = $feature->id(); >>      my $fid = $feature->id();
828    
829  =item RETURNS:  =item RETURNS:
830    
# Line 848  Line 848 
848    
849  =item USAGE:  =item USAGE:
850    
851  C<< my $taxid = $feature->genome(); >>      my $taxid = $feature->genome();
852    
853  =item RETURNS:  =item RETURNS:
854    
# Line 873  Line 873 
873    
874  =item USAGE:  =item USAGE:
875    
876  C<< my $feature_type = $feature->type(); >>      my $feature_type = $feature->type();
877    
878  =item RETURNS:  =item RETURNS:
879    
# Line 898  Line 898 
898    
899  =item USAGE:  =item USAGE:
900    
901  C<< my $loc = $feature->location(); >>      my $loc = $feature->location();
902    
903  =item RETURNS:  =item RETURNS:
904    
# Line 923  Line 923 
923    
924  =item USAGE:  =item USAGE:
925    
926  C<< my $contig = $feature->contig(); >>      my $contig = $feature->contig();
927    
928  =item RETURNS:  =item RETURNS:
929    
# Line 951  Line 951 
951    
952  =item USAGE:  =item USAGE:
953    
954  C<< my $beg = $feature->begin(); >>      my $beg = $feature->begin();
955    
956  =item RETURNS:  =item RETURNS:
957    
# Line 976  Line 976 
976    
977  =item USAGE:  =item USAGE:
978    
979  C<< my $end = $feature->end(); >>      my $end = $feature->end();
980    
981  =item RETURNS:  =item RETURNS:
982    
# Line 1001  Line 1001 
1001    
1002  =item USAGE:  =item USAGE:
1003    
1004  C<< my $dna_seq = $feature->dna_seq(); >>      my $dna_seq = $feature->dna_seq();
1005    
1006  =item RETURNS:  =item RETURNS:
1007    
# Line 1031  Line 1031 
1031    
1032  =item USAGE:  =item USAGE:
1033    
1034  C<< my $dna_seq = $feature->prot_seq(); >>      my $dna_seq = $feature->prot_seq();
1035    
1036  =item RETURNS:  =item RETURNS:
1037    
# Line 1059  Line 1059 
1059    
1060  =item USAGE:  =item USAGE:
1061    
1062  C<< my $func = $feature->function_of(); >>      my $func = $feature->function_of();
1063    
1064  =item RETURNS:  =item RETURNS:
1065    
# Line 1086  Line 1086 
1086    
1087  =item USAGE:  =item USAGE:
1088    
1089  C<< my @coupled_features = $feature->coupled_to(); >>      my @coupled_features = $feature->coupled_to();
1090    
1091  =item RETURNS:  =item RETURNS:
1092    
# Line 1121  Line 1121 
1121    
1122  =item USAGE:  =item USAGE:
1123    
1124  C<< my @annot_list = $feature->annotations(); >>      my @annot_list = $feature->annotations();
1125    
1126  =item RETURNS:  =item RETURNS:
1127    
# Line 1147  Line 1147 
1147    
1148  =item USAGE:  =item USAGE:
1149    
1150  C<< my @subsys_list = $feature->in_subsystems(); >>      my @subsys_list = $feature->in_subsystems();
1151    
1152  =item RETURNS:  =item RETURNS:
1153    
# Line 1173  Line 1173 
1173    
1174  =item USAGE:  =item USAGE:
1175    
1176  C<< my $trunc = $feature->possibly_truncated(); >>      my $trunc = $feature->possibly_truncated();
1177    
1178  =item RETURNS:  =item RETURNS:
1179    
# Line 1200  Line 1200 
1200    
1201  =item USAGE:  =item USAGE:
1202    
1203  C<< my $fs = $feature->possible_frameshift(); >>      my $fs = $feature->possible_frameshift();
1204    
1205  =item RETURNS:  =item RETURNS:
1206    
# Line 1218  Line 1218 
1218      my($self) = @_;      my($self) = @_;
1219      my $figO = $self->{_figO};      my $figO = $self->{_figO};
1220      my $fig = $figO->{_fig};      my $fig = $figO->{_fig};
     my($tmp_dir) = $figO->{_tmp_dir};  
   
     my $tmp_dna  = "$tmp_dir/tmp_dna.$$.fasta";  
     my $tmp_prot = "$tmp_dir/tmp_prot.$$.fasta";  
   
     #...Skip tests and return '0' if truncated...  
     if (! $self->possibly_truncated)  
     {  
         #...Get best precomputed BLAST hit if E-value < 1.0e-20:  
         my @sims = $self->sims( -max => 5, -cutoff => 1.0e-20);  
         while ((@sims > 0) && $fig->possibly_truncated($sims[0]->id2)) { shift @sims }  
   
         #...If a sim was returned:  
         if (my $sim = shift @sims)  
         {  
             #...Get best hit FID and boundaries:  
             my $peg2 = $sim->id2;  
             my $ln1  = $sim->ln1;  
             my $ln2  = $sim->ln2;  
             my $b2   = $sim->b2;  
             my $e2   = $sim->e2;  
   
             #...Convert from AA to BP, and pad out w/ 100 bp guard region:  
             my $adjL = 100 + (($b2-1) * 3);  
             my $adjR = 100 + (($ln2 - $e2) * 3);  
   
             if ($ENV{DEBUG}) { print STDERR "adjL = $adjL adjR = $adjR ln1 = $ln1 peg2 = $peg2 ln2 = $ln2\n" }  
             #...If hit is more than 20% longer than query:  
             if ($ln2 > (1.2 * $ln1))  
             {  
                 #...Get and parse query location:  
                 my $loc = $self->location;  
                 if ($loc =~ /^(\S+)_(\d+)_(\d+)/)  
                 {  
                     my $contig = $1;  
                     my $beg    = $2;  
                     my $end    = $3;  
   
                     #...Create new ContigO object:  
                     my $contigO = new ContigO($figO, $self->genome->id, $contig);  
   
                     #...Extract DNA subsequence, including guard regions:  
                     my($begA,$endA,$dna);  
                     if ($beg < $end)  
                     {  
                         $begA = &max(1, $beg - $adjL);  
                         $endA = &min($end+$adjR, $contigO->contig_length);  
                         $dna  = $contigO->dna_seq($begA,$endA);  
                     }  
                     else  
                     {  
                         $endA = &max(1, $beg - $adjL);  
                         $begA = &min($end+$adjR, $contigO->contig_length);  
                         $dna  = $contigO->dna_seq($begA,$endA);  
                     }  
1221    
1222                      if (defined($dna) && (length($dna) > 90))      return $fig->possible_frameshift($self->id);
                     {  
                         #...Open tmp-file and write FASTA containing DNA subregion to be BLASTed:  
                         open( TMP, ">$tmp_dna") || die "could not open $tmp_dna";  
                         print TMP  ">dna\n$dna\n";  
                         close(TMP);  
   
                         #...Create new FeatureO object corresponding tp $peg2:  
                         my $pegO2 = new FeatureO($figO,$peg2);  
   
                         #...Fetch its translation, and print to tmp FASTA file for BLASTing:  
                         my $prot  = $pegO2->prot_seq;  
                         if (defined($prot) && (length($prot) > 30))  
                         {  
                             open( TMP, ">$tmp_prot") || die "could not open $tmp_prot";  
                             print TMP  ">tmp_prot\n$prot\n";  
                             close(TMP);  
   
                             #...Build BLAST nucleotide database for extracted DNA region,  
                             #   and TBLASTN $peg2 against the DNA:  
                             &run("formatdb -i $tmp_dna -pF");  
                             open(BLAST,"blastall -i $tmp_prot -d $tmp_dna -p tblastn -FF -e 1.0e-20 |")  
                                 || die "could not blast";  
   
                             #...Parse the TBLASTN output; find and sort HSPs by left boundary:  
                             my $db_seq_out = &gjoparseblast::next_blast_subject(\*BLAST,1);  
                             if ($ENV{DEBUG}) { print STDERR &Dumper(['blast output',$db_seq_out]) }  
                             my @hsps       = sort { $a->[0] <=> $b->[0] }  
                                               map { [$_->[9], $_->[10], $_->[12], $_->[13]] }  
                                               grep { $_->[1] < 1.0e-20 }  
                                               @ { $db_seq_out->[6] };  
   
                             #...Extract HSP boundary pairs:  
                             my @prot = map { [$_->[0], $_->[1]] } @hsps;  
                             my @dna  = map { [$_->[2], $_->[3]] } @hsps;  
                             if ($ENV{DEBUG}) { print STDERR &Dumper(\@prot,\@dna) }  
   
                             #...If the "cover" of the HSPs covers more than 90% of $peg2 w gaps < 3 AA,  
                             #   and the "cover" of the HPSs cover more than 90% of the extracted DNA  
                             #   w/ gaps < 9 bp (but not a multiple of 3), suspect a possible frameshift:  
                             if (&covers(\@prot,length($prot),3,0) && &covers(\@dna,3*length($prot),9,1))  
                             {  
                                 unlink($tmp_dna,$tmp_prot);  
                                 return [$contig,$begA,$endA,$dna,$peg2];  
                             }  
                         }  
                     }  
                 }  
             }  
         }  
     }  
     unlink($tmp_dna,$tmp_prot);  
     return 0;  
1223  }  }
1224    
1225    
# Line 1343  Line 1236 
1236    
1237  =item USAGE:  =item USAGE:
1238    
1239  C<< $feature->run($cmd); >>      $feature->run($cmd);
1240    
1241  =item RETURNS:  =item RETURNS:
1242    
# Line 1369  Line 1262 
1262    
1263  =item USAGE:  =item USAGE:
1264    
1265  C<< my $max = $feature->max($x, $y); >>      my $max = $feature->max($x, $y);
1266    
1267  =item C<$x> and  C<$y>  =item C<$x> and  C<$y>
1268    
# Line 1398  Line 1291 
1291    
1292  =item USAGE:  =item USAGE:
1293    
1294  C<< my $min = $feature->min($x, $y); >>      my $min = $feature->min($x, $y);
1295    
1296  =item C<$x> and C<$y>  =item C<$x> and C<$y>
1297    
# Line 1417  Line 1310 
1310      return ($x < $y) ? $x : $y;      return ($x < $y) ? $x : $y;
1311  }  }
1312    
   
   
 =head3 covers  
   
 (Question: Should this function be considered "PRIVATE" ???)  
   
 USAGE:  
     C<< if (&covers(\@hits, $len, $diff, $must_shift)) { #...Do stuff } >>  
   
 Returns boolean C<TRUE> if a set of BLAST HSPs "cover" more than 90%  
 of the database sequence(?).  
   
 =cut  
   
 sub covers {  
     my($hsps,$ln,$diff,$must_shift) = @_;  
   
     if ($ENV{DEBUG}) { print STDERR &Dumper(['hsps',$hsps,'ln',$ln,'diff',$diff,'must_shift',$must_shift]) }  
     my $hsp1 = shift @$hsps;  
     my $hsp2;  
     my $merged = 0;  
     while ($hsp1 && ($hsp2 = shift @$hsps) &&  
            ($must_shift ? &diff_frames($hsp1,$hsp2) : 1) &&  
            ($hsp1 = &merge($hsp1,$hsp2,$diff)))  
     {  
         $merged = 1;  
         if ($ENV{DEBUG}) { print STDERR &Dumper(['merged',$hsp1]) }  
     }  
     return ($merged && $hsp1 && (($hsp1->[1] - $hsp1->[0]) > (0.9 * $ln)));  
 }  
   
 sub diff_frames {  
     my($hsp1,$hsp2) = @_;  
     return ((($hsp1->[1]+1) % 3) != ($hsp2->[0] % 3));  
 }  
   
   
   
 =head3 merge  
   
 Merge two HSPs unless their overlap or separation is too large.  
   
 RETURNS: Merged boundaries if merger succeeds, and C<undef> if merger fails.  
   
 =cut  
   
 sub merge {  
     my($hsp1,$hsp2,$diff) = @_;  
   
     my($b1,$e1) = @$hsp1;  
     my($b2,$e2) = @$hsp2;  
     return (($e2 > $e1) && (($b2-$e1) <= $diff)) ? [$b1,$e2] : undef;  
 }  
   
   
   
1313  =head3 sims  =head3 sims
1314    
1315  =over 4  =over 4
# Line 1483  Line 1320 
1320    
1321  =item USAGE:  =item USAGE:
1322    
1323  C<< my @sims = $pegO->sims( -all, -cutoff => 1.0e-10); >>      my @sims = $pegO->sims( -all, -cutoff => 1.0e-10);
1324    
1325  C<< my @sims = $pegO->sims( -max => 50, -cutoff => 1.0e-10); >>      my @sims = $pegO->sims( -max => 50, -cutoff => 1.0e-10);
1326    
1327  =item RETURNS: List of sim objects.  =item RETURNS: List of sim objects.
1328    
# Line 1531  Line 1368 
1368    
1369  =item USAGE:  =item USAGE:
1370    
1371  C<< my @bbhs = $pegO->bbhs(); >>      my @bbhs = $pegO->bbhs();
1372    
1373  =item RETURNS:  =item RETURNS:
1374    
# Line 1567  Line 1404 
1404    
1405  =item USAGE:  =item USAGE:
1406    
1407  C<< $pegO->display(); >>      $pegO->display();
1408    
1409  =item RETURNS;  =item RETURNS;
1410    
# Line 1626  Line 1463 
1463    
1464  =item USAGE:  =item USAGE:
1465    
1466  C<< my $peg1 = $bbh->peg1(); >>      my $peg1 = $bbh->peg1();
1467    
1468  =item RETURNS:  =item RETURNS:
1469    
# Line 1650  Line 1487 
1487    
1488  =item USAGE:  =item USAGE:
1489    
1490  C<< my $peg2 = $bbh->peg2(); >>      my $peg2 = $bbh->peg2();
1491    
1492  =item RETURNS:  =item RETURNS:
1493    
# Line 1676  Line 1513 
1513    
1514  =item USAGE:  =item USAGE:
1515    
1516  C<< my $psc = $bbh->psc(); >>      my $psc = $bbh->psc();
1517    
1518  =item RETURNS:  =item RETURNS:
1519    
# Line 1701  Line 1538 
1538    
1539  =item USAGE:  =item USAGE:
1540    
1541  C<< my $bsc = $bbh->norm_bitscore(); >>      my $bsc = $bbh->norm_bitscore();
1542    
1543  =item RETURNS:  =item RETURNS:
1544    
# Line 1741  Line 1578 
1578    
1579  =item USAGE:  =item USAGE:
1580    
1581  C<< my $annotO = AnnotationO->new( $fid, $timestamp, $who, $text); >>      my $annotO = AnnotationO->new( $fid, $timestamp, $who, $text);
1582    
1583  =item C<$fid>  =item C<$fid>
1584    
# Line 1790  Line 1627 
1627    
1628  =item USAGE:  =item USAGE:
1629    
1630  C<< my $fid = $annotO->fid(); >>      my $fid = $annotO->fid();
1631    
1632  =item RETURNS;  =item RETURNS;
1633    
# Line 1818  Line 1655 
1655    
1656  =item USAGE:  =item USAGE:
1657    
1658  C<< my $fid = $annotO->timestamp(); >>      my $fid = $annotO->timestamp();
1659    
1660  =item RETURNS;  =item RETURNS;
1661    
# Line 1853  Line 1690 
1690    
1691  =item USAGE:  =item USAGE:
1692    
1693  C<< my $fid = $annotO->made_by(); >>      my $fid = $annotO->made_by();
1694    
1695  =item RETURNS;  =item RETURNS;
1696    
# Line 1883  Line 1720 
1720    
1721  =item USGAE:  =item USGAE:
1722    
1723  C<< my $text = $annotO->text(); >>      my $text = $annotO->text();
1724    
1725  =item RETURNS:  =item RETURNS:
1726    
# Line 1912  Line 1749 
1749    
1750  =item USAGE:  =item USAGE:
1751    
1752  C<< my $annotO->display(); >>      my $annotO->display();
1753    
1754  =item RETURNS:  =item RETURNS:
1755    
# Line 1956  Line 1793 
1793    
1794  =item USAGE:  =item USAGE:
1795    
1796  C<< $couplingO = CouplingO->new($figO, $fid1, $fid2, $sc); >>      $couplingO = CouplingO->new($figO, $fid1, $fid2, $sc);
1797    
1798  =item C<$figO>  =item C<$figO>
1799    
# Line 2003  Line 1840 
1840    
1841  =item USAGE:  =item USAGE:
1842    
1843  C<< my $peg1 = $couplingO->peg1(); >>      my $peg1 = $couplingO->peg1();
1844    
1845  =item RETURNS:  =item RETURNS:
1846    
# Line 2032  Line 1869 
1869    
1870  =item USAGE:  =item USAGE:
1871    
1872  C<< my $peg2 = $couplingO->peg2(); >>      my $peg2 = $couplingO->peg2();
1873    
1874  =item RETURNS:  =item RETURNS:
1875    
# Line 2061  Line 1898 
1898    
1899  =item USAGE:  =item USAGE:
1900    
1901  C<< my $sc = $couplingO->sc(); >>      my $sc = $couplingO->sc();
1902    
1903  =item RETURNS:  =item RETURNS:
1904    
# Line 2091  Line 1928 
1928    
1929  =item USAGE:  =item USAGE:
1930    
1931  C<< my $evidence = $couplingO->evidence(); >>      my $evidence = $couplingO->evidence();
1932    
1933  =item RETURNS  =item RETURNS
1934    
# Line 2129  Line 1966 
1966    
1967  =item USAGE:  =item USAGE:
1968    
1969  C<< $couplingO->display(); >>      $couplingO->display();
1970    
1971  =item RETURNS:  =item RETURNS:
1972    

Legend:
Removed from v.1.30  
changed lines
  Added in v.1.32

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3