[Bio] / FigKernelPackages / FIGO.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/FIGO.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.21, Tue Mar 27 17:57:49 2007 UTC revision 1.31, Sat Dec 1 22:17:03 2007 UTC
# Line 17  Line 17 
17  #  #
18  ########################################################################  ########################################################################
19    
20    =head1 TODO
21    
22    =over 4
23    
24    =item Null arg to ContigO::dna_seq() should return entire contig seq.
25    
26    =item Add method to access "FIG::crude_estimate_of_distance()"
27    
28    =back
29    
30    =cut
31    
32  =head1 Overview  =head1 Overview
33    
34  This module is a set of packages encapsulating the SEED's core methods  This module is a set of packages encapsulating the SEED's core methods
35  using an "OOP-like" style.  using an "OOP-like" style.
36    
37  There are several modules clearly related to "individual genomes:"  There are several modules clearly related to "individual genomes:"
38  FIGO, GenomeO, ContigO, FeatureO (and I<maybe> AnnotationO).  GenomeO, ContigO, FeatureO (and I<maybe> AnnotationO).
39    
40  There are also modules that deal with complex relationships between  There are also modules that deal with complex relationships between
41  pairs or sets of features in one, two, or more genomes,  pairs or sets of features in one, two, or more genomes,
# Line 32  Line 44 
44    
45  Finally, the methods in "Attribute" might in principle attach  Finally, the methods in "Attribute" might in principle attach
46  "atributes" to any type of object.  "atributes" to any type of object.
47  (Likewise, in principle one might like to attach an "annotation"  (Likewise, in principle one might also want to attach an "annotation"
48  to any type of object  to any type of object,
49    although currently we only support annotations of "features.")
50    
51  Four of the modules dealing with "genomes" have a reasonable clear  The three modules that act on "individual genomes" have a reasonable clear
52  "implied heirarchy:"  "implied heirarchy" relative to FIGO:
53    
54  =over 4  =over 4
55    
# Line 52  Line 65 
65  We have chosen to in many cases sidestep the entire issue of inheritance  We have chosen to in many cases sidestep the entire issue of inheritance
66  via an I<ad hoc> mechanism:  via an I<ad hoc> mechanism:
67  If a "child" object needs access to its "ancestors'" methods,  If a "child" object needs access to its "ancestors'" methods,
68  we pass it references to its "ancestors" using subroutine arguments.  we will explicitly pass it references to its "ancestors,"
69    as subroutine arguments.
70  This is admittedly ugly, clumsy, and potentially error-prone ---  This is admittedly ugly, clumsy, and potentially error-prone ---
71  but it has the advantage that, unlike multiple inheritance,  but it has the advantage that, unlike multiple inheritance,
72  we understand how to do it...  we understand how to do it...
# Line 72  Line 86 
86  use SproutFIG;  use SproutFIG;
87  use Tracer;  use Tracer;
88  use Data::Dumper;  use Data::Dumper;
89    use Carp;
90  use FigFams;  use FigFams;
91  use gjoparseblast;  use gjoparseblast;
92    
# Line 364  Line 379 
379    
380  =item USAGE:  =item USAGE:
381    
382  C<< my $org = GenomeO->new($figo, $tax_id); >>  C<< my $orgO = GenomeO->new($figO, $tax_id); >>
383    
384  =item RETURNS:  =item RETURNS:
385    
386      A new GenomeO object.  A new "GenomeO" object.
387    
388  =back  =back
389    
# Line 391  Line 406 
406    
407  =item USAGE:  =item USAGE:
408    
409  C<< my $tax_id = $org->id(); >>  C<< my $tax_id = $orgO->id(); >>
410    
411  =item RETURNS:  =item RETURNS:
412    
413      Taxonomy-ID of GenomeO object.  Taxonomy-ID of "GenomeO" object.
414    
415  =back  =back
416    
# Line 415  Line 430 
430    
431  =item USAGE:  =item USAGE:
432    
433  C<< $gs = $genome->genus_species(); >>  C<< $gs = $orgO->genus_species(); >>
434    
435  =item RETURNS:  =item RETURNS:
436    
# Line 433  Line 448 
448  }  }
449    
450    
451    
452    
453    =head3 taxonomy_of
454    
455    =over 4
456    
457    =item FUNCTION:
458    
459    Return the TAXONOMY string of a "GenomeO" object.
460    
461    =item USAGE:
462    
463    C<< my $taxonomy = $orgO->taxonomy_of(); >>
464    
465    =item RETURNS:
466    
467    TAXONOMY string.
468    
469    =back
470    
471    =cut
472    
473    sub taxonomy_of {
474        my ($self) = @_;
475    
476        my $figO = $self->{_figO};
477        my $fig  = $figO->{_fig};
478    
479        return $fig->taxonomy_of($self->{_id});
480    }
481    
482    
483  =head3 contigs_of  =head3 contigs_of
484    
485  =over 4  =over 4
# Line 461  Line 508 
508    
509  =head3 features_of  =head3 features_of
510    
511    =over 4
512    
513    =item FUNCTION:
514    
515    Returns a list of "FeatureO" objects contained in a "GenomeO" object.
516    
517    =item USAGE:
518    
519    C<< my @featureOs = $orgO->features_of();        #...Fetch all features >>
520    
521    or
522    
523    C<< my @featureOs = $orgO->features_of('peg');   #...Fetch only PEGs >>
524    
525    =item RETURNS:
526    
527    List of "FeatureO" objects.
528    
529    =back
530    
531  =cut  =cut
532    
533  sub features_of {  sub features_of {
# Line 639  Line 706 
706    
707  =item RETURNS:  =item RETURNS:
708    
709      string of DNA sequence running from $beg to $end  String containing DNA subsequence running from $beg to $end
710      (NOTE: if $beg > $end, returns reverse complement of DNA subsequence.)      (NOTE: if $beg > $end, returns reverse complement of DNA subsequence.)
711    
712  =back  =back
# Line 704  Line 771 
771  package FeatureO;  package FeatureO;
772  ########################################################################  ########################################################################
773  use Data::Dumper;  use Data::Dumper;
774    use Carp;
775    
776  =head1 FeatureO  =head1 FeatureO
777    
# Line 714  Line 782 
782    
783  =head3 new  =head3 new
784    
785  Constructor of "FeatureO" objects  Constructor of new "FeatureO" objects
786    
787  =over 4  =over 4
788    
# Line 784  Line 852 
852    
853  =item RETURNS:  =item RETURNS:
854    
855  The TAxon-ID for the "GenomeO" object containg the feature.  The Taxon-ID for the "GenomeO" object containing the feature.
856    
857  =back  =back
858    
# Line 1022  Line 1090 
1090    
1091  =item RETURNS:  =item RETURNS:
1092    
1093  A list of L<CouplingO> objects describing the evidence for functional coupling  A list of "CouplingO" objects describing the evidence for functional coupling
1094  between this feature and other nearby features.  between this feature and other nearby features.
1095    
1096  =back  =back
# Line 1057  Line 1125 
1125    
1126  =item RETURNS:  =item RETURNS:
1127    
1128  A list of L<AnnotationO> objects allowing access to the annotations for this feature.  A list of "AnnotationO" objects allowing access to the annotations for this feature.
1129    
1130  =back  =back
1131    
# Line 1083  Line 1151 
1151    
1152  =item RETURNS:  =item RETURNS:
1153    
1154  A list of L<SubsystemO> objects allowing access to the subsystems  A list of "SubsystemO" objects allowing access to the subsystems
1155  that this feature particupates in.  that this feature particupates in.
1156    
1157  =back  =back
# Line 1149  Line 1217 
1217  sub possible_frameshift {  sub possible_frameshift {
1218      my($self) = @_;      my($self) = @_;
1219      my $figO = $self->{_figO};      my $figO = $self->{_figO};
1220      my($tmp_dir) = $figO->{_tmp_dir};      my $fig = $figO->{_fig};
1221    
1222      if (! $self->possibly_truncated)      return $fig->possible_frameshift($self->id);
     {  
         my @sims = $self->sims( -max => 1, -cutoff => 1.0e-50);  
         if (my $sim = shift @sims)  
         {  
             my $peg2 = $sim->id2;  
             my $ln1  = $sim->ln1;  
             my $ln2  = $sim->ln2;  
             my $b2   = $sim->b2;  
             my $e2   = $sim->e2;  
             my $adjL = 100 + (($b2-1) * 3);  
             my $adjR = 100 + (($ln2 - $e2) * 3);  
             if ($ln2 > (1.2 * $ln1))  
             {  
                 my $loc = $self->location;  
                 if ($loc =~ /^(\S+)_(\d+)_(\d+)/)  
                 {  
                     my $contig = $1;  
                     my $beg    = $2;  
                     my $end = $3;  
                     my $contigO = new ContigO($figO,$self->genome->id,$contig);  
                     my $begA = &max(1,$beg - $adjL);  
                     my $endA = &min($end+$adjR,$contigO->contig_length);  
                     my $dna  = $contigO->dna_seq($begA,$endA);  
                     open(TMP,">$tmp_dir/tmp_dna") || die "couild not open tmp_dna";  
                     print TMP ">dna\n$dna\n";  
                     close(TMP);  
   
                     my $peg2O = new FeatureO($figO,$peg2);  
                     my $prot  = $peg2O->prot_seq;  
                     open(TMP,">$tmp_dir/tmp_prot") || die "could not open tmp_prot";  
                     print TMP ">tmp_prot\n$prot\n";  
                     close(TMP);  
                     &run("formatdb -i $tmp_dir/tmp_dna -pF");  
                     open(BLAST,"blastall -i $tmp_dir/tmp_prot -d $tmp_dir/tmp_dna -p tblastn -FF -e 1.0e-50 |")  
                         || die "could not blast";  
   
                     my $db_seq_out = &gjoparseblast::next_blast_subject(\*BLAST,1);  
                     my @hsps       = sort { $a->[0] <=> $b->[0] }  
                                      map { [$_->[9],$_->[10],$_->[12],$_->[13]] }  
                                      grep { $_->[1] < 1.0e-50 }  
                                      @{$db_seq_out->[6]};  
                     my @prot = map { [$_->[0],$_->[1]] } @hsps;  
                     my @dna  = map { [$_->[2],$_->[3]] } @hsps;  
                     if (&covers(\@prot,length($prot),3,0) && &covers(\@dna,3*length($prot),9,1))  
                     {  
                         return 1;  
                     }  
                 }  
             }  
         }  
     }  
     return 0;  
1223  }  }
1224    
1225    
# Line 1233  Line 1249 
1249    
1250  sub run {  sub run {
1251      my($cmd) = @_;      my($cmd) = @_;
1252      (system($cmd) == 0) || Confess("FAILED: $cmd");      (system($cmd) == 0) || confess("FAILED: $cmd");
1253  }  }
1254    
1255    
# Line 1248  Line 1264 
1264    
1265  C<< my $max = $feature->max($x, $y); >>  C<< my $max = $feature->max($x, $y); >>
1266    
1267  =item C<$x>  =item C<$x> and  C<$y>
   
 Numerical value.  
   
 =item C<$y>  
1268    
1269  Numerical value.  Numerical values.
1270    
1271  =items RETURNS:  =item RETURNS:
1272    
1273  The larger of the two numerical values C<$x> and C<$y>.  The larger of the two numerical values C<$x> and C<$y>.
1274    
# Line 1281  Line 1293 
1293    
1294  C<< my $min = $feature->min($x, $y); >>  C<< my $min = $feature->min($x, $y); >>
1295    
1296  =item C<$x>  =item C<$x> and C<$y>
   
 Numerical value.  
1297    
1298  =item C<$y>  Numerical values.
   
 Numerical value.  
1299    
1300  =item RETURNS:  =item RETURNS:
1301    
# Line 1302  Line 1310 
1310      return ($x < $y) ? $x : $y;      return ($x < $y) ? $x : $y;
1311  }  }
1312    
   
   
 =head3 covers  
   
 (Question: Should this function be considered "PRIVATE" ???)  
   
 USAGE:  
     C<< if (&covers(\@hits, $len, $diff, $must_shift)) { #...Do stuff } >>  
   
 Returns boolean C<TRUE> if a set of BLAST HSPs "cover" more than 90%  
 of the database sequence(?).  
   
 =cut  
   
 sub covers {  
     my($hsps,$ln,$diff,$must_shift) = @_;  
   
     my $hsp1 = shift @$hsps;  
     my $hsp2;  
     my $merged = 0;  
     while ($hsp1 && ($hsp2 = shift @$hsps) &&  
            ($must_shift ? &diff_frames($hsp1,$hsp2) : 1) &&  
            ($hsp1 = &merge($hsp1,$hsp2,$diff))) { $merged = 1 }  
     return ($merged && $hsp1 && (($hsp1->[1] - $hsp1->[0]) > (0.9 * $ln)));  
 }  
   
 sub diff_frames {  
     my($hsp1,$hsp2) = @_;  
     return (($hsp1->[0] % 3) != ($hsp2->[0] % 3));  
 }  
   
   
   
 =head3 merge  
   
 Merge two HSPs unless their overlap or separation is too large.  
   
 RETURNS: Merged boundaries if merger succeeds, and C<undef> if merger fails.  
   
 =cut  
   
 sub merge {  
     my($hsp1,$hsp2,$diff) = @_;  
   
     my($b1,$e1) = @$hsp1;  
     my($b2,$e2) = @$hsp2;  
     return (($e2 > $e1) && (abs($b2-$e1) <= $diff)) ? [$b1,$e2] : undef;  
 }  
   
   
   
1313  =head3 sims  =head3 sims
1314    
1315  =over 4  =over 4
# Line 1413  Line 1370 
1370    
1371  C<< my @bbhs = $pegO->bbhs(); >>  C<< my @bbhs = $pegO->bbhs(); >>
1372    
1373  =item List of BBHO objects.  =item RETURNS:
1374    
1375    List of BBHO objects.
1376    
1377  =back  =back
1378    
# Line 1434  Line 1393 
1393                                                  },'BBHO') } @bbhs;                                                  },'BBHO') } @bbhs;
1394  }  }
1395    
1396    
1397  =head3 display  =head3 display
1398    
1399    =over 4
1400    
1401    =item FUNCTION:
1402    
1403  Prints info about a "FeatureO" object to STDOUT.  Prints info about a "FeatureO" object to STDOUT.
1404    
1405  USAGE:  =item USAGE:
1406    
1407  C<< $pegO->display(); >>  C<< $pegO->display(); >>
1408    
1409    =item RETURNS;
1410    
1411    (void)
1412    
1413    =back
1414    
1415  =cut  =cut
1416    
1417  sub display {  sub display {
# Line 1600  Line 1570 
1570    
1571  =head3 new  =head3 new
1572    
1573    =over 4
1574    
1575    =item FUNCTION:
1576    
1577    Cronstruct a new "AnnotationO" object
1578    
1579    =item USAGE:
1580    
1581    C<< my $annotO = AnnotationO->new( $fid, $timestamp, $who, $text); >>
1582    
1583    =item C<$fid>
1584    
1585    A feature identifier.
1586    
1587    =item C<$timestamp>
1588    
1589    The C<UN*X> timestamp one wishes to associate with the annotation.
1590    
1591    =item C<$who>
1592    
1593    The annotator's user-name.
1594    
1595    =item C<$text>
1596    
1597    The textual content of the annotation.
1598    
1599    =item RETURNS:
1600    
1601    An "AnnotationO" object.
1602    
1603    =back
1604    
1605  =cut  =cut
1606    
1607  sub new {  sub new {
# Line 1617  Line 1619 
1619    
1620  =head3 fid  =head3 fid
1621    
1622    =over 4
1623    
1624    =item FUNCTION:
1625    
1626    Extract the feature-ID that was annotated.
1627    
1628    =item USAGE:
1629    
1630    C<< my $fid = $annotO->fid(); >>
1631    
1632    =item RETURNS;
1633    
1634    The feature-ID as a string.
1635    
1636    =back
1637    
1638  =cut  =cut
1639    
1640  sub fid {  sub fid {
# Line 1629  Line 1647 
1647    
1648  =head3 timestamp  =head3 timestamp
1649    
1650    =over 4
1651    
1652    =item FUNCTION:
1653    
1654    Extract the C<UN*X> timestamp of the annotation.
1655    
1656    =item USAGE:
1657    
1658    C<< my $fid = $annotO->timestamp(); >>
1659    
1660    =item RETURNS;
1661    
1662    The timestamp as a string.
1663    
1664    =back
1665    
1666  =cut  =cut
1667    
1668  sub timestamp {  sub timestamp {
# Line 1648  Line 1682 
1682    
1683  =head3 made_by  =head3 made_by
1684    
1685    =over 4
1686    
1687    =item FUNCTION:
1688    
1689    Extract the annotator's user-name.
1690    
1691    =item USAGE:
1692    
1693    C<< my $fid = $annotO->made_by(); >>
1694    
1695    =item RETURNS;
1696    
1697    The username of the annotator, as a string.
1698    
1699    =back
1700    
1701  =cut  =cut
1702    
1703  sub made_by {  sub made_by {
# Line 1662  Line 1712 
1712    
1713  =head3 text  =head3 text
1714    
1715    =over 4
1716    
1717    =item FUNCTION:
1718    
1719    Extract the text of the annotation.
1720    
1721    =item USGAE:
1722    
1723    C<< my $text = $annotO->text(); >>
1724    
1725    =item RETURNS:
1726    
1727    The text of the annotation, as a string.
1728    
1729    =back
1730    
1731  =cut  =cut
1732    
1733  sub text {  sub text {
# Line 1674  Line 1740 
1740    
1741  =head3 display  =head3 display
1742    
1743    =over 4
1744    
1745    =item FUNCTION:
1746    
1747    Print the contents of an "AnnotationO" object to B<STDOUT>
1748    in human-readable form.
1749    
1750    =item USAGE:
1751    
1752    C<< my $annotO->display(); >>
1753    
1754    =item RETURNS:
1755    
1756    (void)
1757    
1758    =back
1759    
1760  =cut  =cut
1761    
1762  sub display {  sub display {
# Line 1700  Line 1783 
1783    
1784  =head3 new  =head3 new
1785    
1786    =over 4
1787    
1788    =item FUNCTION:
1789    
1790    Construct a new "CouplingO" object
1791    encapsulating the "functional coupling" score
1792    between a pair of features in some genome.
1793    
1794    =item USAGE:
1795    
1796    C<< $couplingO = CouplingO->new($figO, $fid1, $fid2, $sc); >>
1797    
1798    =item C<$figO>
1799    
1800    Parent "FIGO" object.
1801    
1802    =item C<$fid1> and C<$fid2>
1803    
1804    A pair of feature-IDs.
1805    
1806    =item C<$sc>
1807    
1808    A functional-coupling score
1809    
1810    =item RETURNS:
1811    
1812    A "CouplingO" object.
1813    
1814    =back
1815    
1816  =cut  =cut
1817    
1818  sub new {  sub new {
# Line 1719  Line 1832 
1832    
1833  =head3 peg1  =head3 peg1
1834    
1835    =over 4
1836    
1837    =item FUNCTION:
1838    
1839    Returns a "FeatureO" object corresponding to the first FID in a coupled pair.
1840    
1841    =item USAGE:
1842    
1843    C<< my $peg1 = $couplingO->peg1(); >>
1844    
1845    =item RETURNS:
1846    
1847    A "FeatureO" object.
1848    
1849    =back
1850    
1851  =cut  =cut
1852    
1853  sub peg1 {  sub peg1 {
# Line 1730  Line 1859 
1859    
1860    
1861    
1862  =head3 peg1  =head3 peg2
1863    
1864    =over 4
1865    
1866    =item FUNCTION:
1867    
1868    Returns a "FeatureO" object corresponding to the second FID in a coupled pair.
1869    
1870    =item USAGE:
1871    
1872    C<< my $peg2 = $couplingO->peg2(); >>
1873    
1874    =item RETURNS:
1875    
1876    A "FeatureO" object.
1877    
1878    =back
1879    
1880  =cut  =cut
1881    
# Line 1745  Line 1890 
1890    
1891  =head3 sc  =head3 sc
1892    
1893    =over 4
1894    
1895    =item FUNCTION:
1896    
1897    Extracts the "functional coupling" score from a "CouplingO" object.
1898    
1899    =item USAGE:
1900    
1901    C<< my $sc = $couplingO->sc(); >>
1902    
1903    =item RETURNS:
1904    
1905    A scalar score.
1906    
1907    =back
1908    
1909  =cut  =cut
1910    
1911  sub sc {  sub sc {
# Line 1757  Line 1918 
1918    
1919  =head3 evidence  =head3 evidence
1920    
1921    =over 4
1922    
1923    =item FUNCTION:
1924    
1925    Fetch the evidence for a "functional coupling" between two close PEGs,
1926    in the form of a list of objects describing the "Pairs of Close Homologs" (PCHs)
1927    supporting the existence of a functional coupling between the two close PEGs.
1928    
1929    =item USAGE:
1930    
1931    C<< my $evidence = $couplingO->evidence(); >>
1932    
1933    =item RETURNS
1934    
1935    List of pairs of "FeatureO" objects.
1936    
1937    =back
1938    
1939  =cut  =cut
1940    
1941  sub evidence {  sub evidence {
# Line 1779  Line 1958 
1958    
1959  =head3 display  =head3 display
1960    
1961    =over 4
1962    
1963    =item FUNCTION:
1964    
1965    Print the contents of a "CouplingO" object to B<STDOUT> in human-readable form.
1966    
1967    =item USAGE:
1968    
1969    C<< $couplingO->display(); >>
1970    
1971    =item RETURNS:
1972    
1973    (Void)
1974    
1975    =back
1976    
1977  =cut  =cut
1978    
1979  sub display {  sub display {

Legend:
Removed from v.1.21  
changed lines
  Added in v.1.31

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3