[Bio] / FigKernelPackages / FIGO.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/FIGO.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.20, Sun Mar 25 14:21:41 2007 UTC revision 1.25, Wed Sep 26 03:40:10 2007 UTC
# Line 35  Line 35 
35  (Likewise, in principle one might like to attach an "annotation"  (Likewise, in principle one might like to attach an "annotation"
36  to any type of object  to any type of object
37    
38  Four of the modules dealing with "genomes" have a reasonable clear  The four modules dealing with "genomes" have a reasonable clear
39  "implied heirarchy:"  "implied heirarchy:"
40    
41  =over 4  =over 4
# Line 52  Line 52 
52  We have chosen to in many cases sidestep the entire issue of inheritance  We have chosen to in many cases sidestep the entire issue of inheritance
53  via an I<ad hoc> mechanism:  via an I<ad hoc> mechanism:
54  If a "child" object needs access to its "ancestors'" methods,  If a "child" object needs access to its "ancestors'" methods,
55  we pass it references to its "ancestors" using subroutine arguments.  we will explicitly pass it references to its "ancestors,"
56    as subroutine arguments.
57  This is admittedly ugly, clumsy, and potentially error-prone ---  This is admittedly ugly, clumsy, and potentially error-prone ---
58  but it has the advantage that, unlike multiple inheritance,  but it has the advantage that, unlike multiple inheritance,
59  we understand how to do it...  we understand how to do it...
# Line 72  Line 73 
73  use SproutFIG;  use SproutFIG;
74  use Tracer;  use Tracer;
75  use Data::Dumper;  use Data::Dumper;
76    use Carp;
77  use FigFams;  use FigFams;
78  use gjoparseblast;  use gjoparseblast;
79    
# Line 117  Line 119 
119      return bless $self, $class;      return bless $self, $class;
120  }  }
121    
122    sub function_of {
123        my($self,$id) = @_;
124    
125        my $fig  = $self->{_fig};
126        my $func = $fig->function_of($id);
127    
128        return ($func ? $func : "");
129    }
130    
131  =head3 genomes  =head3 genomes
132    
# Line 357  Line 366 
366    
367  =item USAGE:  =item USAGE:
368    
369  C<< my $org = GenomeO->new($figo, $tax_id); >>  C<< my $org = GenomeO->new($figO, $tax_id); >>
370    
371  =item RETURNS:  =item RETURNS:
372    
# Line 426  Line 435 
435  }  }
436    
437    
438    
439    
440    =head3 taxonomy_of
441    
442    =cut
443    
444    sub taxonomy_of {
445        my ($self) = @_;
446    
447        my $figO = $self->{_figO};
448        my $fig  = $figO->{_fig};
449    
450        return $fig->taxonomy_of($self->id());
451    }
452    
453    
454  =head3 contigs_of  =head3 contigs_of
455    
456  =over 4  =over 4
# Line 697  Line 722 
722  package FeatureO;  package FeatureO;
723  ########################################################################  ########################################################################
724  use Data::Dumper;  use Data::Dumper;
725    use Carp;
726    
727  =head1 FeatureO  =head1 FeatureO
728    
# Line 707  Line 733 
733    
734  =head3 new  =head3 new
735    
736  Constructor of "FeatureO" objects  Constructor of new "FeatureO" objects
737    
738  =over 4  =over 4
739    
# Line 777  Line 803 
803    
804  =item RETURNS:  =item RETURNS:
805    
806  The TAxon-ID for the "GenomeO" object containg the feature.  The Taxon-ID for the "GenomeO" object containing the feature.
807    
808  =back  =back
809    
# Line 1015  Line 1041 
1041    
1042  =item RETURNS:  =item RETURNS:
1043    
1044  A list of L<CouplingO> objects describing the evidence for functional coupling  A list of "CouplingO" objects describing the evidence for functional coupling
1045  between this feature and other nearby features.  between this feature and other nearby features.
1046    
1047  =back  =back
# Line 1050  Line 1076 
1076    
1077  =item RETURNS:  =item RETURNS:
1078    
1079  A list of L<AnnotationO> objects allowing access to the annotations for this feature.  A list of "AnnotationO" objects allowing access to the annotations for this feature.
1080    
1081  =back  =back
1082    
# Line 1076  Line 1102 
1102    
1103  =item RETURNS:  =item RETURNS:
1104    
1105  A list of L<SubsystemO> objects allowing access to the subsystems  A list of "SubsystemO" objects allowing access to the subsystems
1106  that this feature particupates in.  that this feature particupates in.
1107    
1108  =back  =back
# Line 1144  Line 1170 
1170      my $figO = $self->{_figO};      my $figO = $self->{_figO};
1171      my($tmp_dir) = $figO->{_tmp_dir};      my($tmp_dir) = $figO->{_tmp_dir};
1172    
1173        #...Skip tests and return '0' if truncated...
1174      if (! $self->possibly_truncated)      if (! $self->possibly_truncated)
1175      {      {
1176            #...Get best precomputed BLAST hit if E-value < 1.0e-50:
1177          my @sims = $self->sims( -max => 1, -cutoff => 1.0e-50);          my @sims = $self->sims( -max => 1, -cutoff => 1.0e-50);
1178    
1179            #...If a sim was returned:
1180          if (my $sim = shift @sims)          if (my $sim = shift @sims)
1181          {          {
1182                #...Get best hit FID and boundaries:
1183              my $peg2 = $sim->id2;              my $peg2 = $sim->id2;
1184              my $ln1  = $sim->ln1;              my $ln1  = $sim->ln1;
1185              my $ln2  = $sim->ln2;              my $ln2  = $sim->ln2;
1186              my $b2   = $sim->b2;              my $b2   = $sim->b2;
1187              my $e2   = $sim->e2;              my $e2   = $sim->e2;
1188    
1189                #...Convert from AA to BP, and pad out w/ 100 bp guard region:
1190              my $adjL = 100 + (($b2-1) * 3);              my $adjL = 100 + (($b2-1) * 3);
1191              my $adjR = 100 + (($ln2 - $e2) * 3);              my $adjR = 100 + (($ln2 - $e2) * 3);
1192    
1193                #...If hit is more than 20% longer than query:
1194              if ($ln2 > (1.2 * $ln1))              if ($ln2 > (1.2 * $ln1))
1195              {              {
1196                    #...Get and parse query location:
1197                  my $loc = $self->location;                  my $loc = $self->location;
1198                  if ($loc =~ /^(\S+)_(\d+)_(\d+)/)                  if ($loc =~ /^(\S+)_(\d+)_(\d+)/)
1199                  {                  {
1200                      my $contig = $1;                      my $contig = $1;
1201                      my $beg    = $2;                      my $beg    = $2;
1202                      my $end = $3;                      my $end = $3;
1203    
1204                        #...Create new ContigO object:
1205                      my $contigO = new ContigO($figO,$self->genome->id,$contig);                      my $contigO = new ContigO($figO,$self->genome->id,$contig);
1206    
1207                        #...Extract DNA subsequence, including guard regions:
1208                      my $begA = &max(1,$beg - $adjL);                      my $begA = &max(1,$beg - $adjL);
1209                      my $endA = &min($end+$adjR,$contigO->contig_length);                      my $endA = &min($end+$adjR,$contigO->contig_length);
1210                      my $dna  = $contigO->dna_seq($begA,$endA);                      my $dna  = $contigO->dna_seq($begA,$endA);
1211                      open(TMP,">$tmp_dir/tmp_dna") || die "couild not open tmp_dna";                      if (defined($dna) && (length($dna) > 90))
1212                        {
1213                            #...Open tmp-file and write FASTA containing DNA subregion to be BLASTed:
1214                            open( TMP, ">$tmp_dir/tmp_dna") || die "could not open tmp_dna";
1215                      print TMP ">dna\n$dna\n";                      print TMP ">dna\n$dna\n";
1216                      close(TMP);                      close(TMP);
1217    
1218                      my $peg2O = new FeatureO($figO,$peg2);                          #...Create new FeatureO object corresponding tp $peg2:
1219                      my $prot  = $peg2O->prot_seq;                          my $pegO2 = new FeatureO($figO,$peg2);
1220    
1221                            #...Fetch its translation, and print to tmp FASTA file for BLASTing:
1222                            my $prot  = $pegO2->prot_seq;
1223                            if (defined($prot) && (length($prot) > 30))
1224                            {
1225                      open(TMP,">$tmp_dir/tmp_prot") || die "could not open tmp_prot";                      open(TMP,">$tmp_dir/tmp_prot") || die "could not open tmp_prot";
1226                      print TMP ">tmp_prot\n$prot\n";                      print TMP ">tmp_prot\n$prot\n";
1227                      close(TMP);                      close(TMP);
1228    
1229                                #...Build BLAST nucleotide database for extracted DNA region,
1230                                #   and TBLASTN $peg2 against the DNA:
1231                      &run("formatdb -i $tmp_dir/tmp_dna -pF");                      &run("formatdb -i $tmp_dir/tmp_dna -pF");
1232                      open(BLAST,"blastall -i $tmp_dir/tmp_prot -d $tmp_dir/tmp_dna -p tblastn -FF -e 1.0e-50 |")                      open(BLAST,"blastall -i $tmp_dir/tmp_prot -d $tmp_dir/tmp_dna -p tblastn -FF -e 1.0e-50 |")
1233                          || die "could not blast";                          || die "could not blast";
1234    
1235                                #...Parse the TBLASTN output; find and sort HSPs by left boundary:
1236                      my $db_seq_out = &gjoparseblast::next_blast_subject(\*BLAST,1);                      my $db_seq_out = &gjoparseblast::next_blast_subject(\*BLAST,1);
1237                      my @hsps       = sort { $a->[0] <=> $b->[0] }                      my @hsps       = sort { $a->[0] <=> $b->[0] }
1238                                       map { [$_->[9],$_->[10],$_->[12],$_->[13]] }                                       map { [$_->[9],$_->[10],$_->[12],$_->[13]] }
1239                                       grep { $_->[1] < 1.0e-50 }                                       grep { $_->[1] < 1.0e-50 }
1240                                       @{$db_seq_out->[6]};                                       @{$db_seq_out->[6]};
1241    
1242                                #...Extract HSP boundary pairs:
1243                      my @prot = map { [$_->[0],$_->[1]] } @hsps;                      my @prot = map { [$_->[0],$_->[1]] } @hsps;
1244                      my @dna  = map { [$_->[2],$_->[3]] } @hsps;                      my @dna  = map { [$_->[2],$_->[3]] } @hsps;
1245    
1246                                #...If the "cover" of the HSPs covers more than 90% of $peg2 w gaps < 3 AA,
1247                                #   and the "cover" of the HPSs cover more than 90% of the extracted DNA
1248                                #   w/ gaps < 9 bp (but not a multiple of 3), suspect a possible frameshift:
1249                      if (&covers(\@prot,length($prot),3,0) && &covers(\@dna,3*length($prot),9,1))                      if (&covers(\@prot,length($prot),3,0) && &covers(\@dna,3*length($prot),9,1))
1250                      {                      {
1251                          return 1;                          return 1;
# Line 1196  Line 1254 
1254              }              }
1255          }          }
1256      }      }
1257            }
1258        }
1259      return 0;      return 0;
1260  }  }
1261    
# Line 1226  Line 1286 
1286    
1287  sub run {  sub run {
1288      my($cmd) = @_;      my($cmd) = @_;
1289      (system($cmd) == 0) || Confess("FAILED: $cmd");      (system($cmd) == 0) || confess("FAILED: $cmd");
1290  }  }
1291    
1292    
# Line 1241  Line 1301 
1301    
1302  C<< my $max = $feature->max($x, $y); >>  C<< my $max = $feature->max($x, $y); >>
1303    
1304  =item C<$x>  =item C<$x> and  C<$y>
   
 Numerical value.  
1305    
1306  =item C<$y>  Numerical values.
1307    
1308  Numerical value.  =item RETURNS:
   
 =items RETURNS:  
1309    
1310  The larger of the two numerical values C<$x> and C<$y>.  The larger of the two numerical values C<$x> and C<$y>.
1311    
# Line 1274  Line 1330 
1330    
1331  C<< my $min = $feature->min($x, $y); >>  C<< my $min = $feature->min($x, $y); >>
1332    
1333  =item C<$x>  =item C<$x> and C<$y>
   
 Numerical value.  
1334    
1335  =item C<$y>  Numerical values.
   
 Numerical value.  
1336    
1337  =item RETURNS:  =item RETURNS:
1338    
# Line 1374  Line 1426 
1426      my $fig  = $figO->{_fig};      my $fig  = $figO->{_fig};
1427    
1428      my $cutoff = $args{-cutoff} ? $args{-cutoff} : 1.0e-5;      my $cutoff = $args{-cutoff} ? $args{-cutoff} : 1.0e-5;
1429      my $all    = $args{-all}    ? $args{-all}    : "fig";      my $all    = $args{-all}    ? 'all'          : "fig";
1430      my $max    = $args{-max}    ? $args{-max}    : 10000;      my $max    = $args{-max}    ? $args{-max}    : 10000;
1431    
1432      my @sims = $fig->sims($self->id,$max,$cutoff,$all);      my @sims = $fig->sims($self->id,$max,$cutoff,$all);
# Line 1383  Line 1435 
1435          my $peg1 = FeatureO->new($figO, $sims[0]->[0]);          my $peg1 = FeatureO->new($figO, $sims[0]->[0]);
1436    
1437          foreach my $sim (@sims) {          foreach my $sim (@sims) {
1438              $sim->[0] = $peg1;  #           $sim->[0] = $peg1;
1439              $sim->[1] = FeatureO->new($figO, $sim->[1]);  #           $sim->[1] = FeatureO->new($figO, $sim->[1]);
1440          }          }
1441      }      }
1442    
# Line 1406  Line 1458 
1458    
1459  C<< my @bbhs = $pegO->bbhs(); >>  C<< my @bbhs = $pegO->bbhs(); >>
1460    
1461  =item List of BBHO objects.  =item RETURNS:
1462    
1463    List of BBHO objects.
1464    
1465  =back  =back
1466    
# Line 1427  Line 1481 
1481                                                  },'BBHO') } @bbhs;                                                  },'BBHO') } @bbhs;
1482  }  }
1483    
1484    
1485  =head3 display  =head3 display
1486    
1487    =over 4
1488    
1489    =item FUNCTION:
1490    
1491  Prints info about a "FeatureO" object to STDOUT.  Prints info about a "FeatureO" object to STDOUT.
1492    
1493  USAGE:  =item USAGE:
1494    
1495  C<< $pegO->display(); >>  C<< $pegO->display(); >>
1496    
1497    =item RETURNS;
1498    
1499    (void)
1500    
1501    =back
1502    
1503  =cut  =cut
1504    
1505  sub display {  sub display {
# Line 1593  Line 1658 
1658    
1659  =head3 new  =head3 new
1660    
1661    =over 4
1662    
1663    =item FUNCTION:
1664    
1665    Cronstruct a new "AnnotationO" object
1666    
1667    =item USAGE:
1668    
1669    C<< my $annotO = AnnotationO->new( $fid, $timestamp, $who, $text); >>
1670    
1671    =item C<$fid>
1672    
1673    A feature identifier.
1674    
1675    =item C<$timestamp>
1676    
1677    The C<UN*X> timestamp one wishes to associate with the annotation.
1678    
1679    =item C<$who>
1680    
1681    The annotator's user-name.
1682    
1683    =item C<$text>
1684    
1685    The textual content of the annotation.
1686    
1687    =item RETURNS:
1688    
1689    An "AnnotationO" object.
1690    
1691    =back
1692    
1693  =cut  =cut
1694    
1695  sub new {  sub new {
# Line 1610  Line 1707 
1707    
1708  =head3 fid  =head3 fid
1709    
1710    =over 4
1711    
1712    =item FUNCTION:
1713    
1714    Extract the feature-ID that was annotated.
1715    
1716    =item USAGE:
1717    
1718    C<< my $fid = $annotO->fid(); >>
1719    
1720    =item RETURNS;
1721    
1722    The feature-ID as a string.
1723    
1724    =back
1725    
1726  =cut  =cut
1727    
1728  sub fid {  sub fid {
# Line 1622  Line 1735 
1735    
1736  =head3 timestamp  =head3 timestamp
1737    
1738    =over 4
1739    
1740    =item FUNCTION:
1741    
1742    Extract the C<UN*X> timestamp of the annotation.
1743    
1744    =item USAGE:
1745    
1746    C<< my $fid = $annotO->timestamp(); >>
1747    
1748    =item RETURNS;
1749    
1750    The timestamp as a string.
1751    
1752    =back
1753    
1754  =cut  =cut
1755    
1756  sub timestamp {  sub timestamp {
# Line 1641  Line 1770 
1770    
1771  =head3 made_by  =head3 made_by
1772    
1773    =over 4
1774    
1775    =item FUNCTION:
1776    
1777    Extract the annotator's user-name.
1778    
1779    =item USAGE:
1780    
1781    C<< my $fid = $annotO->made_by(); >>
1782    
1783    =item RETURNS;
1784    
1785    The username of the annotator, as a string.
1786    
1787    =back
1788    
1789  =cut  =cut
1790    
1791  sub made_by {  sub made_by {
# Line 1655  Line 1800 
1800    
1801  =head3 text  =head3 text
1802    
1803    =over 4
1804    
1805    =item FUNCTION:
1806    
1807    Extract the text of the annotation.
1808    
1809    =item USGAE:
1810    
1811    C<< my $text = $annotO->text(); >>
1812    
1813    =item RETURNS:
1814    
1815    The text of the annotation, as a string.
1816    
1817    =back
1818    
1819  =cut  =cut
1820    
1821  sub text {  sub text {
# Line 1667  Line 1828 
1828    
1829  =head3 display  =head3 display
1830    
1831    =over 4
1832    
1833    =item FUNCTION:
1834    
1835    Print the contents of an "AnnotationO" object to B<STDOUT>
1836    in human-readable form.
1837    
1838    =item USAGE:
1839    
1840    C<< my $annotO->display(); >>
1841    
1842    =item RETURNS:
1843    
1844    (void)
1845    
1846    =back
1847    
1848  =cut  =cut
1849    
1850  sub display {  sub display {
# Line 1693  Line 1871 
1871    
1872  =head3 new  =head3 new
1873    
1874    =over 4
1875    
1876    =item FUNCTION:
1877    
1878    Construct a new "CouplingO" object
1879    encapsulating the "functional coupling" score
1880    between a pair of features in some genome.
1881    
1882    =item USAGE:
1883    
1884    C<< $couplingO = CouplingO->new($figO, $fid1, $fid2, $sc); >>
1885    
1886    =item C<$figO>
1887    
1888    Parent "FIGO" object.
1889    
1890    =item C<$fid1> and C<$fid2>
1891    
1892    A pair of feature-IDs.
1893    
1894    =item C<$sc>
1895    
1896    A functional-coupling score
1897    
1898    =item RETURNS:
1899    
1900    A "CouplingO" object.
1901    
1902    =back
1903    
1904  =cut  =cut
1905    
1906  sub new {  sub new {
# Line 1712  Line 1920 
1920    
1921  =head3 peg1  =head3 peg1
1922    
1923    =over 4
1924    
1925    =item FUNCTION:
1926    
1927    Returns a "FeatureO" object corresponding to the first FID in a coupled pair.
1928    
1929    =item USAGE:
1930    
1931    C<< my $peg1 = $couplingO->peg1(); >>
1932    
1933    =item RETURNS:
1934    
1935    A "FeatureO" object.
1936    
1937    =back
1938    
1939  =cut  =cut
1940    
1941  sub peg1 {  sub peg1 {
# Line 1723  Line 1947 
1947    
1948    
1949    
1950  =head3 peg1  =head3 peg2
1951    
1952    =over 4
1953    
1954    =item FUNCTION:
1955    
1956    Returns a "FeatureO" object corresponding to the second FID in a coupled pair.
1957    
1958    =item USAGE:
1959    
1960    C<< my $peg2 = $couplingO->peg2(); >>
1961    
1962    =item RETURNS:
1963    
1964    A "FeatureO" object.
1965    
1966    =back
1967    
1968  =cut  =cut
1969    
# Line 1738  Line 1978 
1978    
1979  =head3 sc  =head3 sc
1980    
1981    =over 4
1982    
1983    =item FUNCTION:
1984    
1985    Extracts the "functional coupling" score from a "CouplingO" object.
1986    
1987    =item USAGE:
1988    
1989    C<< my $sc = $couplingO->sc(); >>
1990    
1991    =item RETURNS:
1992    
1993    A scalar score.
1994    
1995    =back
1996    
1997  =cut  =cut
1998    
1999  sub sc {  sub sc {
# Line 1750  Line 2006 
2006    
2007  =head3 evidence  =head3 evidence
2008    
2009    =over 4
2010    
2011    =item FUNCTION:
2012    
2013    Fetch the evidence for a "functional coupling" between two close PEGs,
2014    in the form of a list of objects describing the "Pairs of Close Homologs" (PCHs)
2015    supporting the existence of a functional coupling between the two close PEGs.
2016    
2017    =item USAGE:
2018    
2019    C<< my $evidence = $couplingO->evidence(); >>
2020    
2021    =item RETURNS
2022    
2023    List of pairs of "FeatureO" objects.
2024    
2025    =back
2026    
2027  =cut  =cut
2028    
2029  sub evidence {  sub evidence {
# Line 1772  Line 2046 
2046    
2047  =head3 display  =head3 display
2048    
2049    =over 4
2050    
2051    =item FUNCTION:
2052    
2053    Print the contents of a "CouplingO" object to B<STDOUT> in human-readable form.
2054    
2055    =item USAGE:
2056    
2057    C<< $couplingO->display(); >>
2058    
2059    =item RETURNS:
2060    
2061    (Void)
2062    
2063    =back
2064    
2065  =cut  =cut
2066    
2067  sub display {  sub display {

Legend:
Removed from v.1.20  
changed lines
  Added in v.1.25

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3