[Bio] / FigKernelPackages / FIGO.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/FIGO.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.19, Mon Mar 19 19:49:25 2007 UTC revision 1.24, Tue Sep 25 08:47:33 2007 UTC
# Line 35  Line 35 
35  (Likewise, in principle one might like to attach an "annotation"  (Likewise, in principle one might like to attach an "annotation"
36  to any type of object  to any type of object
37    
38  Four of the modules dealing with "genomes" have a reasonable clear  The four modules dealing with "genomes" have a reasonable clear
39  "implied heirarchy:"  "implied heirarchy:"
40    
41  =over 4  =over 4
# Line 52  Line 52 
52  We have chosen to in many cases sidestep the entire issue of inheritance  We have chosen to in many cases sidestep the entire issue of inheritance
53  via an I<ad hoc> mechanism:  via an I<ad hoc> mechanism:
54  If a "child" object needs access to its "ancestors'" methods,  If a "child" object needs access to its "ancestors'" methods,
55  we pass it references to its "ancestors" using subroutine arguments.  we will explicitly pass it references to its "ancestors,"
56    as subroutine arguments.
57  This is admittedly ugly, clumsy, and potentially error-prone ---  This is admittedly ugly, clumsy, and potentially error-prone ---
58  but it has the advantage that, unlike multiple inheritance,  but it has the advantage that, unlike multiple inheritance,
59  we understand how to do it...  we understand how to do it...
# Line 72  Line 73 
73  use SproutFIG;  use SproutFIG;
74  use Tracer;  use Tracer;
75  use Data::Dumper;  use Data::Dumper;
76    use Carp;
77  use FigFams;  use FigFams;
78  use gjoparseblast;  use gjoparseblast;
79    
# Line 117  Line 119 
119      return bless $self, $class;      return bless $self, $class;
120  }  }
121    
122    sub function_of {
123        my($self,$id) = @_;
124    
125        my $fig  = $self->{_fig};
126        my $func = $fig->function_of($id);
127    
128        return ($func ? $func : "");
129    }
130    
131  =head3 genomes  =head3 genomes
132    
# Line 697  Line 706 
706  package FeatureO;  package FeatureO;
707  ########################################################################  ########################################################################
708  use Data::Dumper;  use Data::Dumper;
709    use Carp;
710    
711  =head1 FeatureO  =head1 FeatureO
712    
# Line 707  Line 717 
717    
718  =head3 new  =head3 new
719    
720  Constructor of "FeatureO" objects  Constructor of new "FeatureO" objects
721    
722  =over 4  =over 4
723    
# Line 777  Line 787 
787    
788  =item RETURNS:  =item RETURNS:
789    
790  The TAxon-ID for the "GenomeO" object containg the feature.  The Taxon-ID for the "GenomeO" object containing the feature.
791    
792  =back  =back
793    
# Line 1015  Line 1025 
1025    
1026  =item RETURNS:  =item RETURNS:
1027    
1028  A list of L<CouplingO> objects describing the evidence for functional coupling  A list of "CouplingO" objects describing the evidence for functional coupling
1029  between this feature and other nearby features.  between this feature and other nearby features.
1030    
1031  =back  =back
# Line 1050  Line 1060 
1060    
1061  =item RETURNS:  =item RETURNS:
1062    
1063  A list of L<AnnotationO> objects allowing access to the annotations for this feature.  A list of "AnnotationO" objects allowing access to the annotations for this feature.
1064    
1065  =back  =back
1066    
# Line 1076  Line 1086 
1086    
1087  =item RETURNS:  =item RETURNS:
1088    
1089  A list of L<SubsystemO> objects allowing access to the subsystems  A list of "SubsystemO" objects allowing access to the subsystems
1090  that this feature particupates in.  that this feature particupates in.
1091    
1092  =back  =back
# Line 1144  Line 1154 
1154      my $figO = $self->{_figO};      my $figO = $self->{_figO};
1155      my($tmp_dir) = $figO->{_tmp_dir};      my($tmp_dir) = $figO->{_tmp_dir};
1156    
1157        #...Skip tests and return '0' if truncated...
1158      if (! $self->possibly_truncated)      if (! $self->possibly_truncated)
1159      {      {
1160            #...Get best precomputed BLAST hit if E-value < 1.0e-50:
1161          my @sims = $self->sims( -max => 1, -cutoff => 1.0e-50);          my @sims = $self->sims( -max => 1, -cutoff => 1.0e-50);
1162    
1163            #...If a sim was returned:
1164          if (my $sim = shift @sims)          if (my $sim = shift @sims)
1165          {          {
1166                #...Get best hit FID and boundaries:
1167              my $peg2 = $sim->id2;              my $peg2 = $sim->id2;
1168              my $ln1  = $sim->ln1;              my $ln1  = $sim->ln1;
1169              my $ln2  = $sim->ln2;              my $ln2  = $sim->ln2;
1170              my $b2   = $sim->b2;              my $b2   = $sim->b2;
1171              my $e2   = $sim->e2;              my $e2   = $sim->e2;
1172    
1173                #...Convert from AA to BP, and pad out w/ 100 bp guard region:
1174              my $adjL = 100 + (($b2-1) * 3);              my $adjL = 100 + (($b2-1) * 3);
1175              my $adjR = 100 + (($ln2 - $e2) * 3);              my $adjR = 100 + (($ln2 - $e2) * 3);
1176    
1177                #...If hit is more than 20% longer than query:
1178              if ($ln2 > (1.2 * $ln1))              if ($ln2 > (1.2 * $ln1))
1179              {              {
1180                    #...Get and parse query location:
1181                  my $loc = $self->location;                  my $loc = $self->location;
1182                  if ($loc =~ /^(\S+)_(\d+)_(\d+)/)                  if ($loc =~ /^(\S+)_(\d+)_(\d+)/)
1183                  {                  {
1184                      my $contig = $1;                      my $contig = $1;
1185                      my $beg    = $2;                      my $beg    = $2;
1186                      my $end = $3;                      my $end = $3;
1187    
1188                        #...Create new ContigO object:
1189                      my $contigO = new ContigO($figO,$self->genome->id,$contig);                      my $contigO = new ContigO($figO,$self->genome->id,$contig);
1190    
1191                        #...Extract DNA subsequence, including guard regions:
1192                      my $begA = &max(1,$beg - $adjL);                      my $begA = &max(1,$beg - $adjL);
1193                      my $endA = &min($end+$adjR,$contigO->contig_length);                      my $endA = &min($end+$adjR,$contigO->contig_length);
1194                      my $dna  = $contigO->dna_seq($begA,$endA);                      my $dna  = $contigO->dna_seq($begA,$endA);
1195                      open(TMP,">$tmp_dir/tmp_dna") || die "couild not open tmp_dna";                      if (defined($dna) && (length($dna) > 90))
1196                        {
1197                            #...Open tmp-file and write FASTA containing DNA subregion to be BLASTed:
1198                            open( TMP, ">$tmp_dir/tmp_dna") || die "could not open tmp_dna";
1199                      print TMP ">dna\n$dna\n";                      print TMP ">dna\n$dna\n";
1200                      close(TMP);                      close(TMP);
1201    
1202                      my $peg2O = new FeatureO($figO,$peg2);                          #...Create new FeatureO object corresponding tp $peg2:
1203                      my $prot  = $peg2O->prot_seq;                          my $pegO2 = new FeatureO($figO,$peg2);
1204    
1205                            #...Fetch its translation, and print to tmp FASTA file for BLASTing:
1206                            my $prot  = $pegO2->prot_seq;
1207                            if (defined($prot) && (length($prot) > 30))
1208                            {
1209                      open(TMP,">$tmp_dir/tmp_prot") || die "could not open tmp_prot";                      open(TMP,">$tmp_dir/tmp_prot") || die "could not open tmp_prot";
1210                      print TMP ">tmp_prot\n$prot\n";                      print TMP ">tmp_prot\n$prot\n";
1211                      close(TMP);                      close(TMP);
1212    
1213                                #...Build BLAST nucleotide database for extracted DNA region,
1214                                #   and TBLASTN $peg2 against the DNA:
1215                      &run("formatdb -i $tmp_dir/tmp_dna -pF");                      &run("formatdb -i $tmp_dir/tmp_dna -pF");
1216                      open(BLAST,"blastall -i $tmp_dir/tmp_prot -d $tmp_dir/tmp_dna -p tblastn -FF -e 1.0e-50 |")                      open(BLAST,"blastall -i $tmp_dir/tmp_prot -d $tmp_dir/tmp_dna -p tblastn -FF -e 1.0e-50 |")
1217                          || die "could not blast";                          || die "could not blast";
1218    
1219                                #...Parse the TBLASTN output; find and sort HSPs by left boundary:
1220                      my $db_seq_out = &gjoparseblast::next_blast_subject(\*BLAST,1);                      my $db_seq_out = &gjoparseblast::next_blast_subject(\*BLAST,1);
1221                      my @hsps       = sort { $a->[0] <=> $b->[0] }                      my @hsps       = sort { $a->[0] <=> $b->[0] }
1222                                       map { [$_->[9],$_->[10],$_->[12],$_->[13]] }                                       map { [$_->[9],$_->[10],$_->[12],$_->[13]] }
1223                                       grep { $_->[1] < 1.0e-50 }                                       grep { $_->[1] < 1.0e-50 }
1224                                       @{$db_seq_out->[6]};                                       @{$db_seq_out->[6]};
1225    
1226                                #...Extract HSP boundary pairs:
1227                      my @prot = map { [$_->[0],$_->[1]] } @hsps;                      my @prot = map { [$_->[0],$_->[1]] } @hsps;
1228                      my @dna  = map { [$_->[2],$_->[3]] } @hsps;                      my @dna  = map { [$_->[2],$_->[3]] } @hsps;
1229    
1230                                #...If the "cover" of the HSPs covers more than 90% of $peg2 w gaps < 3 AA,
1231                                #   and the "cover" of the HPSs cover more than 90% of the extracted DNA
1232                                #   w/ gaps < 9 bp (but not a multiple of 3), suspect a possible frameshift:
1233                      if (&covers(\@prot,length($prot),3,0) && &covers(\@dna,3*length($prot),9,1))                      if (&covers(\@prot,length($prot),3,0) && &covers(\@dna,3*length($prot),9,1))
1234                      {                      {
1235                          return 1;                          return 1;
# Line 1196  Line 1238 
1238              }              }
1239          }          }
1240      }      }
1241            }
1242        }
1243      return 0;      return 0;
1244  }  }
1245    
# Line 1226  Line 1270 
1270    
1271  sub run {  sub run {
1272      my($cmd) = @_;      my($cmd) = @_;
1273      (system($cmd) == 0) || Confess("FAILED: $cmd");      (system($cmd) == 0) || confess("FAILED: $cmd");
1274  }  }
1275    
1276    
# Line 1241  Line 1285 
1285    
1286  C<< my $max = $feature->max($x, $y); >>  C<< my $max = $feature->max($x, $y); >>
1287    
1288  =item C<$x>  =item C<$x> and  C<$y>
1289    
1290  Numerical value.  Numerical values.
1291    
1292  =item C<$y>  =item RETURNS:
   
 Numerical value.  
   
 =items RETURNS:  
1293    
1294  The larger of the two numerical values C<$x> and C<$y>.  The larger of the two numerical values C<$x> and C<$y>.
1295    
# Line 1274  Line 1314 
1314    
1315  C<< my $min = $feature->min($x, $y); >>  C<< my $min = $feature->min($x, $y); >>
1316    
1317  =item C<$x>  =item C<$x> and C<$y>
1318    
1319  Numerical value.  Numerical values.
   
 =item C<$y>  
   
 Numerical value.  
1320    
1321  =item RETURNS:  =item RETURNS:
1322    
# Line 1374  Line 1410 
1410      my $fig  = $figO->{_fig};      my $fig  = $figO->{_fig};
1411    
1412      my $cutoff = $args{-cutoff} ? $args{-cutoff} : 1.0e-5;      my $cutoff = $args{-cutoff} ? $args{-cutoff} : 1.0e-5;
1413      my $all    = $args{-all}    ? $args{-all}    : "fig";      my $all    = $args{-all}    ? 'all'          : "fig";
1414      my $max    = $args{-max}    ? $args{-max}    : 10000;      my $max    = $args{-max}    ? $args{-max}    : 10000;
1415    
1416      return $fig->sims($self->id,$max,$cutoff,$all);      my @sims = $fig->sims($self->id,$max,$cutoff,$all);
1417    
1418        if (@sims) {
1419            my $peg1 = FeatureO->new($figO, $sims[0]->[0]);
1420    
1421            foreach my $sim (@sims) {
1422    #           $sim->[0] = $peg1;
1423    #           $sim->[1] = FeatureO->new($figO, $sim->[1]);
1424            }
1425        }
1426    
1427        return @sims;
1428  }  }
1429    
1430    
# Line 1395  Line 1442 
1442    
1443  C<< my @bbhs = $pegO->bbhs(); >>  C<< my @bbhs = $pegO->bbhs(); >>
1444    
1445  =item List of BBHO objects.  =item RETURNS:
1446    
1447    List of BBHO objects.
1448    
1449  =back  =back
1450    
# Line 1416  Line 1465 
1465                                                  },'BBHO') } @bbhs;                                                  },'BBHO') } @bbhs;
1466  }  }
1467    
1468    
1469  =head3 display  =head3 display
1470    
1471    =over 4
1472    
1473    =item FUNCTION:
1474    
1475  Prints info about a "FeatureO" object to STDOUT.  Prints info about a "FeatureO" object to STDOUT.
1476    
1477  USAGE:  =item USAGE:
1478    
1479  C<< $pegO->display(); >>  C<< $pegO->display(); >>
1480    
1481    =item RETURNS;
1482    
1483    (void)
1484    
1485    =back
1486    
1487  =cut  =cut
1488    
1489  sub display {  sub display {
# Line 1582  Line 1642 
1642    
1643  =head3 new  =head3 new
1644    
1645    =over 4
1646    
1647    =item FUNCTION:
1648    
1649    Cronstruct a new "AnnotationO" object
1650    
1651    =item USAGE:
1652    
1653    C<< my $annotO = AnnotationO->new( $fid, $timestamp, $who, $text); >>
1654    
1655    =item C<$fid>
1656    
1657    A feature identifier.
1658    
1659    =item C<$timestamp>
1660    
1661    The C<UN*X> timestamp one wishes to associate with the annotation.
1662    
1663    =item C<$who>
1664    
1665    The annotator's user-name.
1666    
1667    =item C<$text>
1668    
1669    The textual content of the annotation.
1670    
1671    =item RETURNS:
1672    
1673    An "AnnotationO" object.
1674    
1675    =back
1676    
1677  =cut  =cut
1678    
1679  sub new {  sub new {
# Line 1599  Line 1691 
1691    
1692  =head3 fid  =head3 fid
1693    
1694    =over 4
1695    
1696    =item FUNCTION:
1697    
1698    Extract the feature-ID that was annotated.
1699    
1700    =item USAGE:
1701    
1702    C<< my $fid = $annotO->fid(); >>
1703    
1704    =item RETURNS;
1705    
1706    The feature-ID as a string.
1707    
1708    =back
1709    
1710  =cut  =cut
1711    
1712  sub fid {  sub fid {
# Line 1611  Line 1719 
1719    
1720  =head3 timestamp  =head3 timestamp
1721    
1722    =over 4
1723    
1724    =item FUNCTION:
1725    
1726    Extract the C<UN*X> timestamp of the annotation.
1727    
1728    =item USAGE:
1729    
1730    C<< my $fid = $annotO->timestamp(); >>
1731    
1732    =item RETURNS;
1733    
1734    The timestamp as a string.
1735    
1736    =back
1737    
1738  =cut  =cut
1739    
1740  sub timestamp {  sub timestamp {
# Line 1630  Line 1754 
1754    
1755  =head3 made_by  =head3 made_by
1756    
1757    =over 4
1758    
1759    =item FUNCTION:
1760    
1761    Extract the annotator's user-name.
1762    
1763    =item USAGE:
1764    
1765    C<< my $fid = $annotO->made_by(); >>
1766    
1767    =item RETURNS;
1768    
1769    The username of the annotator, as a string.
1770    
1771    =back
1772    
1773  =cut  =cut
1774    
1775  sub made_by {  sub made_by {
# Line 1644  Line 1784 
1784    
1785  =head3 text  =head3 text
1786    
1787    =over 4
1788    
1789    =item FUNCTION:
1790    
1791    Extract the text of the annotation.
1792    
1793    =item USGAE:
1794    
1795    C<< my $text = $annotO->text(); >>
1796    
1797    =item RETURNS:
1798    
1799    The text of the annotation, as a string.
1800    
1801    =back
1802    
1803  =cut  =cut
1804    
1805  sub text {  sub text {
# Line 1656  Line 1812 
1812    
1813  =head3 display  =head3 display
1814    
1815    =over 4
1816    
1817    =item FUNCTION:
1818    
1819    Print the contents of an "AnnotationO" object to B<STDOUT>
1820    in human-readable form.
1821    
1822    =item USAGE:
1823    
1824    C<< my $annotO->display(); >>
1825    
1826    =item RETURNS:
1827    
1828    (void)
1829    
1830    =back
1831    
1832  =cut  =cut
1833    
1834  sub display {  sub display {
# Line 1682  Line 1855 
1855    
1856  =head3 new  =head3 new
1857    
1858    =over 4
1859    
1860    =item FUNCTION:
1861    
1862    Construct a new "CouplingO" object
1863    encapsulating the "functional coupling" score
1864    between a pair of features in some genome.
1865    
1866    =item USAGE:
1867    
1868    C<< $couplingO = CouplingO->new($figO, $fid1, $fid2, $sc); >>
1869    
1870    =item C<$figO>
1871    
1872    Parent "FIGO" object.
1873    
1874    =item C<$fid1> and C<$fid2>
1875    
1876    A pair of feature-IDs.
1877    
1878    =item C<$sc>
1879    
1880    A functional-coupling score
1881    
1882    =item RETURNS:
1883    
1884    A "CouplingO" object.
1885    
1886    =back
1887    
1888  =cut  =cut
1889    
1890  sub new {  sub new {
# Line 1701  Line 1904 
1904    
1905  =head3 peg1  =head3 peg1
1906    
1907    =over 4
1908    
1909    =item FUNCTION:
1910    
1911    Returns a "FeatureO" object corresponding to the first FID in a coupled pair.
1912    
1913    =item USAGE:
1914    
1915    C<< my $peg1 = $couplingO->peg1(); >>
1916    
1917    =item RETURNS:
1918    
1919    A "FeatureO" object.
1920    
1921    =back
1922    
1923  =cut  =cut
1924    
1925  sub peg1 {  sub peg1 {
# Line 1712  Line 1931 
1931    
1932    
1933    
1934  =head3 peg1  =head3 peg2
1935    
1936    =over 4
1937    
1938    =item FUNCTION:
1939    
1940    Returns a "FeatureO" object corresponding to the second FID in a coupled pair.
1941    
1942    =item USAGE:
1943    
1944    C<< my $peg2 = $couplingO->peg2(); >>
1945    
1946    =item RETURNS:
1947    
1948    A "FeatureO" object.
1949    
1950    =back
1951    
1952  =cut  =cut
1953    
# Line 1727  Line 1962 
1962    
1963  =head3 sc  =head3 sc
1964    
1965    =over 4
1966    
1967    =item FUNCTION:
1968    
1969    Extracts the "functional coupling" score from a "CouplingO" object.
1970    
1971    =item USAGE:
1972    
1973    C<< my $sc = $couplingO->sc(); >>
1974    
1975    =item RETURNS:
1976    
1977    A scalar score.
1978    
1979    =back
1980    
1981  =cut  =cut
1982    
1983  sub sc {  sub sc {
# Line 1739  Line 1990 
1990    
1991  =head3 evidence  =head3 evidence
1992    
1993    =over 4
1994    
1995    =item FUNCTION:
1996    
1997    Fetch the evidence for a "functional coupling" between two close PEGs,
1998    in the form of a list of objects describing the "Pairs of Close Homologs" (PCHs)
1999    supporting the existence of a functional coupling between the two close PEGs.
2000    
2001    =item USAGE:
2002    
2003    C<< my $evidence = $couplingO->evidence(); >>
2004    
2005    =item RETURNS
2006    
2007    List of pairs of "FeatureO" objects.
2008    
2009    =back
2010    
2011  =cut  =cut
2012    
2013  sub evidence {  sub evidence {
# Line 1761  Line 2030 
2030    
2031  =head3 display  =head3 display
2032    
2033    =over 4
2034    
2035    =item FUNCTION:
2036    
2037    Print the contents of a "CouplingO" object to B<STDOUT> in human-readable form.
2038    
2039    =item USAGE:
2040    
2041    C<< $couplingO->display(); >>
2042    
2043    =item RETURNS:
2044    
2045    (Void)
2046    
2047    =back
2048    
2049  =cut  =cut
2050    
2051  sub display {  sub display {

Legend:
Removed from v.1.19  
changed lines
  Added in v.1.24

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3