[Bio] / FigKernelPackages / FIGO.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/FIGO.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.7, Thu Feb 22 14:05:27 2007 UTC revision 1.31, Sat Dec 1 22:17:03 2007 UTC
# Line 1  Line 1 
1    ########################################################################
2  #  #
3  # Copyright (c) 2003-2006 University of Chicago and Fellowship  # Copyright (c) 2003-2006 University of Chicago and Fellowship
4  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
# Line 14  Line 15 
15  # Genomes at veronika@thefig.info or download a copy from  # Genomes at veronika@thefig.info or download a copy from
16  # http://www.theseed.org/LICENSE.TXT.  # http://www.theseed.org/LICENSE.TXT.
17  #  #
18    ########################################################################
19    
20  package FIGO;  =head1 TODO
21    
22    =over 4
23    
24    =item Null arg to ContigO::dna_seq() should return entire contig seq.
25    
26    =item Add method to access "FIG::crude_estimate_of_distance()"
27    
28    =back
29    
30    =cut
31    
32    =head1 Overview
33    
34    This module is a set of packages encapsulating the SEED's core methods
35    using an "OOP-like" style.
36    
37    There are several modules clearly related to "individual genomes:"
38    GenomeO, ContigO, FeatureO (and I<maybe> AnnotationO).
39    
40    There are also modules that deal with complex relationships between
41    pairs or sets of features in one, two, or more genomes,
42    rather than any particular single genome:
43    BBHO, CouplingO, SubsystemO, FunctionalRoleO, FigFamO.
44    
45    Finally, the methods in "Attribute" might in principle attach
46    "atributes" to any type of object.
47    (Likewise, in principle one might also want to attach an "annotation"
48    to any type of object,
49    although currently we only support annotations of "features.")
50    
51    The three modules that act on "individual genomes" have a reasonable clear
52    "implied heirarchy" relative to FIGO:
53    
54    =over 4
55    
56        FIGO > GenomeO > ContigO > FeatureO
57    
58    =back
59    
60    However, inheritance is B<NOT> implemented using the C<@ISA> mechanism,
61    because some methods deal with "pairwise" or "setwise" relations between objects
62    or other more complex relationships that do not naturally fit into any heirarchy ---
63    which would get us into the whole quagmire of "multiple inheritance."
64    
65    We have chosen to in many cases sidestep the entire issue of inheritance
66    via an I<ad hoc> mechanism:
67    If a "child" object needs access to its "ancestors'" methods,
68    we will explicitly pass it references to its "ancestors,"
69    as subroutine arguments.
70    This is admittedly ugly, clumsy, and potentially error-prone ---
71    but it has the advantage that, unlike multiple inheritance,
72    we understand how to do it...
73    
74    MODULE DEPENDENCIES: FIG, FIG_Config, FigFams, SFXlate, SproutFIG, Tracer,
75        gjoparseblast, Data::Dumper.
76    
77    =cut
78    
79    ########################################################################
80    package FIGO;
81    ########################################################################
82  use strict;  use strict;
83  use FIG;  use FIG;
84  use FIG_Config;  use FIG_Config;
# Line 24  Line 86 
86  use SproutFIG;  use SproutFIG;
87  use Tracer;  use Tracer;
88  use Data::Dumper;  use Data::Dumper;
89    use Carp;
90  use FigFams;  use FigFams;
91  use gjoparseblast;  use gjoparseblast;
92    
93  =head1 FIGO Methods  =head1 FIGO
94    
95    The effective "base class" containing a few "top-level" methods.
96    
97    =cut
98    
99    
100  =head3 new  =head3 new
101    
# Line 64  Line 132 
132      return bless $self, $class;      return bless $self, $class;
133  }  }
134    
135    sub function_of {
136        my($self,$id) = @_;
137    
138        my $fig  = $self->{_fig};
139        my $func = $fig->function_of($id);
140    
141        return ($func ? $func : "");
142    }
143    
144  =head3 genomes  =head3 genomes
145    
# Line 80  Line 155 
155  C<< my @tax_ids = $figo->genomes( @constraints ); >>  C<< my @tax_ids = $figo->genomes( @constraints ); >>
156    
157  =item @constraints  =item @constraints
158    
159  One or more element of: complete, prokaryotic, eukaryotic, bacterial, archaeal, nmpdr.  One or more element of: complete, prokaryotic, eukaryotic, bacterial, archaeal, nmpdr.
160    
161  =item RETURNS: List of Tax-IDs.  =item RETURNS: List of Tax-IDs.
162    
163  =item EXAMPLE: L<Display all complete, prokaryotic genomes>  =item EXAMPLE:
164    
165    L<Display all complete, prokaryotic genomes>
166    
167  =back  =back
168    
# Line 141  Line 219 
219  =over4  =over4
220    
221  =item RETURNS:  =item RETURNS:
222    
223  List of all subsystems.  List of all subsystems.
224    
225  =item EXAMPLE: L<Accessing Subsystem data>  =item EXAMPLE:
226    
227    L<Accessing Subsystem data>
228    
229  =back  =back
230    
# Line 188  Line 269 
269    
270  =over4  =over4
271    
272  =item USAGE:  C<< foreach $fam ($figO->all_figfams) { #...Do something } >>  =item USAGE:
273    
274  =item RETURNS: List of FIGfam Objects  C<< foreach $fam ($figO->all_figfams) { #...Do something } >>
275    
276  =item EXAMPLE: L<Accessing FIGfams>  =item RETURNS:
277    
278    List of FIGfam Objects
279    
280    =item EXAMPLE:
281    
282    L<Accessing FIGfams>
283    
284  =back  =back
285    
# Line 211  Line 298 
298    
299  =over4  =over4
300    
301  =item USAGE:   C<< my ($fam, $sims) = $figO->family_containing($seq); >>  =item USAGE:
302    
303    C<< my ($fam, $sims) = $figO->family_containing($seq); >>
304    
305    =item $seq:
306    
307  =item $seq:    A protein translation string.  A protein translation string.
308    
309  =item RETURNS:  =item RETURNS:
310    
311        $fam:  A FIGfam Object.        $fam:  A FIGfam Object.
312    
313        $sims: A set of similarity objects.        $sims: A set of similarity objects.
314    
315  =item EXAMPLE: L<Placing a sequence into a FIGfam>  =item EXAMPLE: L<Placing a sequence into a FIGfam>
# Line 241  Line 334 
334      }      }
335  }  }
336    
337    =head3 figfam
338    
339    =over 4
340    
341    =item USAGE:
342    
343    C<< my $fam = $figO->figfam($family_id); >>
344    
345    =item $family_id;
346    
347    A FigFam ID
348    
349    =item RETURNS:
350    
351    $fam:  A FIGfam Object.
352    
353    =back
354    
355    =cut
356    
357    sub figfam {
358        my($self,$fam_id) = @_;
359    
360        return &FigFamO::new('FigFamO',$self,$fam_id);
361    }
362    
363    
364  ########################################################################  ########################################################################
365  package GenomeO;  package GenomeO;
# Line 251  Line 370 
370    
371  =cut  =cut
372    
373    
374  =head3 new  =head3 new
375    
376  Constructor of GenomeO objects.  Constructor of GenomeO objects.
# Line 258  Line 378 
378  =over4  =over4
379    
380  =item USAGE:  =item USAGE:
 C<< my $org = GenomeO->new($figo, $tax_id); >>  
381    
382  =item RETURNS: A new GenomeO object.  C<< my $orgO = GenomeO->new($figO, $tax_id); >>
383    
384    =item RETURNS:
385    
386    A new "GenomeO" object.
387    
388  =back  =back
389    
# Line 281  Line 404 
404    
405  =over4  =over4
406    
407  =item USAGE:   C<< my $tax_id = $org->id(); >>  =item USAGE:
408    
409    C<< my $tax_id = $orgO->id(); >>
410    
411  =item RETURNS: Taxonomy-ID of GenomeO object.  =item RETURNS:
412    
413    Taxonomy-ID of "GenomeO" object.
414    
415  =back  =back
416    
# Line 301  Line 428 
428    
429  =over4  =over4
430    
431  =item USAGE:   C<< $gs = $genome->genus_species(); >>  =item USAGE:
432    
433  =item RETURNS: Genus-species-strain string  C<< $gs = $orgO->genus_species(); >>
434    
435    =item RETURNS:
436    
437    Genus-species-strain string
438    
439  =back  =back
440    
# Line 317  Line 448 
448  }  }
449    
450    
451    
452    
453    =head3 taxonomy_of
454    
455    =over 4
456    
457    =item FUNCTION:
458    
459    Return the TAXONOMY string of a "GenomeO" object.
460    
461    =item USAGE:
462    
463    C<< my $taxonomy = $orgO->taxonomy_of(); >>
464    
465    =item RETURNS:
466    
467    TAXONOMY string.
468    
469    =back
470    
471    =cut
472    
473    sub taxonomy_of {
474        my ($self) = @_;
475    
476        my $figO = $self->{_figO};
477        my $fig  = $figO->{_fig};
478    
479        return $fig->taxonomy_of($self->{_id});
480    }
481    
482    
483  =head3 contigs_of  =head3 contigs_of
484    
485  =over4  =over4
486    
487  =item RETURNS: List of C<contig> objects contained in a C<GenomeO> object.  =item RETURNS:
488    
489  =item EXAMPLE: L<Show how to access contigs and extract sequence>  List of C<contig> objects contained in a C<GenomeO> object.
490    
491    =item EXAMPLE:
492    
493    L<Show how to access contigs and extract sequence>
494    
495  =back  =back
496    
# Line 341  Line 508 
508    
509  =head3 features_of  =head3 features_of
510    
511    =over 4
512    
513    =item FUNCTION:
514    
515    Returns a list of "FeatureO" objects contained in a "GenomeO" object.
516    
517    =item USAGE:
518    
519    C<< my @featureOs = $orgO->features_of();        #...Fetch all features >>
520    
521    or
522    
523    C<< my @featureOs = $orgO->features_of('peg');   #...Fetch only PEGs >>
524    
525    =item RETURNS:
526    
527    List of "FeatureO" objects.
528    
529    =back
530    
531  =cut  =cut
532    
533  sub features_of {  sub features_of {
# Line 359  Line 546 
546    
547  =over4  =over4
548    
549  =item USAGE:   C<< $genome->display(); >>  =item USAGE:
550    
551    C<< $genome->display(); >>
552    
553    =item RETURNS:
554    
555  =item RETURNS: Null  (Void)
556    
557  =back  =back
558    
# Line 393  Line 584 
584  =over4  =over4
585    
586  =item USAGE:  =item USAGE:
587    
588  C<< $contig = ContigO->new( $figO, $genomeId, $contigId); >>  C<< $contig = ContigO->new( $figO, $genomeId, $contigId); >>
589    
590  =item $figO: A FIGO object.  =item $figO:
591    
592    Parent FIGO object.
593    
594    =item $genomeId:
595    
596  =item $genomeId: Taxon-ID for the genome the contig is from.  Taxon-ID for the genome the contig is from.
597    
598  =item $contigId: Identifier for the contig  =item $contigId:
599    
600    Identifier for the contig
601    
602    =item RETURNS:
603    
604  =item RETURNS: A "ContigO" object.  A "ContigO" object.
605    
606  =back  =back
607    
# Line 423  Line 623 
623    
624  =over4  =over4
625    
626  =item RETURNS: Sequence ID string of "ContigO" object  =item RETURNS:
627    
628    Sequence ID string of "ContigO" object
629    
630  =back  =back
631    
# Line 441  Line 643 
643  =over4  =over4
644    
645  =item USAGE:  =item USAGE:
     C<< my $tax_id = $contig->genome(); >>  
646    
647  =item RETURNS: GenomeO object containing the contig object.  C<< my $tax_id = $contig->genome->id(); >>
648    
649    =item RETURNS:
650    
651    Tax-ID of the GenomeO object containing the contig object.
652    
653  =back  =back
654    
# Line 452  Line 657 
657  sub genome {  sub genome {
658      my($self) = @_;      my($self) = @_;
659    
660      return $self->{_genome};      my $figO = $self->{_figO};
661        return new GenomeO($figO,$self->{_genome});
662  }  }
663    
664    
# Line 462  Line 668 
668  =over4  =over4
669    
670  =item USAGE:  =item USAGE:
671    
672      C<< my $len = $contig->contig_length(); >>      C<< my $len = $contig->contig_length(); >>
673    
674  =item RETURNS: Length of contig's DNA sequence.  =item RETURNS:
675    
676    Length of contig's DNA sequence.
677    
678  =back  =back
679    
# Line 474  Line 683 
683      my($self) = @_;      my($self) = @_;
684    
685      my $fig = $self->{_figO}->{_fig};      my $fig = $self->{_figO}->{_fig};
686      my $contig_lengths = $fig->contig_lengths($self->genome);      my $contig_lengths = $fig->contig_lengths($self->genome->id);
687      return $contig_lengths->{$self->id};      return $contig_lengths->{$self->id};
688  }  }
689    
# Line 484  Line 693 
693  =over4  =over4
694    
695  =item USAGE:  =item USAGE:
696    
697      C<< my $seq = $contig->dna_seq(beg, $end); >>      C<< my $seq = $contig->dna_seq(beg, $end); >>
698    
699  =item $beg: Begining point of DNA subsequence  =item $beg:
700    
701  =item $end: End point of DNA subsequence  Begining point of DNA subsequence
702    
703  =item RETURNS: string of DNA sequence from $beg to $end  =item $end:
704    
705    End point of DNA subsequence
706    
707    =item RETURNS:
708    
709    String containing DNA subsequence running from $beg to $end
710  (NOTE: if $beg > $end, returns reverse complement of DNA subsequence.)  (NOTE: if $beg > $end, returns reverse complement of DNA subsequence.)
711    
712  =back  =back
# Line 506  Line 721 
721      if (($beg && (&FIG::between(1,$beg,$max))) &&      if (($beg && (&FIG::between(1,$beg,$max))) &&
722          ($end && (&FIG::between(1,$end,$max))))          ($end && (&FIG::between(1,$end,$max))))
723      {      {
724          return $fig->dna_seq($self->genome,join("_",($self->id,$beg,$end)));          return $fig->dna_seq($self->genome->id,join("_",($self->id,$beg,$end)));
725      }      }
726      else      else
727      {      {
# Line 527  Line 742 
742    
743  =over4  =over4
744    
745  =item RETURNS: Nil  =item RETURNS:
746    
747    (Void)
748    
749  =back  =back
750    
# Line 536  Line 753 
753  sub display {  sub display {
754      my($self) = @_;      my($self) = @_;
755    
756      print join("ContigO",$self->genome,$self->id,$self->contig_length),"\n";      print join("ContigO",$self->genome->id,$self->id,$self->contig_length),"\n";
757    }
758    
759    sub features_in_region {
760        my($self,$beg,$end) = @_;
761        my $figO = $self->{_figO};
762        my $fig = $figO->{_fig};
763    
764        my($features) = $fig->genes_in_region($self->genome->id,$self->id,$beg,$end);
765        return map { new FeatureO($figO,$_) } @$features;
766  }  }
767    
768    
# Line 545  Line 771 
771  package FeatureO;  package FeatureO;
772  ########################################################################  ########################################################################
773  use Data::Dumper;  use Data::Dumper;
774    use Carp;
775    
776  =head1 FeatureO  =head1 FeatureO
777    
778    Methods for working with features on "ContigO" objects.
779    
780  =cut  =cut
781    
782    
783    =head3 new
784    
785    Constructor of new "FeatureO" objects
786    
787    =over 4
788    
789    =item USAGE:
790    
791    C<< my $feature = FeatureO->new( $figO, $fid ); >>
792    
793    =item C<$figO>:
794    
795    "Base" FIGO object.
796    
797    =item C<$fid>:
798    
799  =head1 new  Feature-ID for new feature
800    
801  Constructor of "FeatureO" objects  =item RETURNS:
802    
803    A newly created "FeatureO" object.
804    
805    =back
806    
807  =cut  =cut
808    
# Line 569  Line 817 
817  }  }
818    
819    
820    
821  =head3 id  =head3 id
822    
823    =over 4
824    
825    =item USAGE:
826    
827    C<< my $fid = $feature->id(); >>
828    
829    =item RETURNS:
830    
831    The FID (Feature ID) of a "FeatureO" object.
832    
833    =back
834    
835  =cut  =cut
836    
837  sub id {  sub id {
# Line 583  Line 844 
844    
845  =head3 genome  =head3 genome
846    
847    =over 4
848    
849    =item USAGE:
850    
851    C<< my $taxid = $feature->genome(); >>
852    
853    =item RETURNS:
854    
855    The Taxon-ID for the "GenomeO" object containing the feature.
856    
857    =back
858    
859  =cut  =cut
860    
861  sub genome {  sub genome {
862      my($self) = @_;      my($self) = @_;
863        my $figO = $self->{_figO};
864      $self->id =~ /^fig\|(\d+\.\d+)/;      $self->id =~ /^fig\|(\d+\.\d+)/;
865      return $1;      return new GenomeO($figO,$1);
866  }  }
867    
868    
869    
870  =head3 type  =head3 type
871    
872    =over 4
873    
874    =item USAGE:
875    
876    C<< my $feature_type = $feature->type(); >>
877    
878    =item RETURNS:
879    
880    The feature object's "type" (e.g., "peg," "rna," etc.)
881    
882    =back
883    
884  =cut  =cut
885    
886  sub type {  sub type {
# Line 607  Line 892 
892    
893    
894    
   
895  =head3 location  =head3 location
896    
897    =over 4
898    
899    =item USAGE:
900    
901    C<< my $loc = $feature->location(); >>
902    
903    =item RETURNS:
904    
905    A string representing the feature object's location on the genome's DNA,
906    in SEED "tbl format" (i.e., "contig_beging_end").
907    
908    =back
909    
910  =cut  =cut
911    
912  sub location {  sub location {
# Line 620  Line 917 
917  }  }
918    
919    
920    =head3 contig
921    
922    =over 4
923    
924    =item USAGE:
925    
926    C<< my $contig = $feature->contig(); >>
927    
928    =item RETURNS:
929    
930    A "ContigO" object to access the contig data
931    for the contig the feature is on.
932    
933    =back
934    
935    =cut
936    
937    sub contig {
938        my($self) = @_;
939    
940        my $figO = $self->{_figO};
941        my $loc      = $self->location;
942        my $genomeID = $self->genome->id;
943        return ($loc =~ /^(\S+)_\d+_\d+$/) ? new ContigO($figO,$genomeID,$1) : undef;
944    }
945    
946    
947    
948    =head3 begin
949    
950    =over 4
951    
952    =item USAGE:
953    
954    C<< my $beg = $feature->begin(); >>
955    
956    =item RETURNS:
957    
958    The numerical coordinate of the first base of the feature.
959    
960    =back
961    
962    =cut
963    
964    sub begin {
965        my($self) = @_;
966    
967        my $loc = $self->location;
968        return ($loc =~ /^\S+_(\d+)_\d+$/) ? $1 : undef;
969    }
970    
971    
972    
973    =head3 end
974    
975    =over 4
976    
977    =item USAGE:
978    
979    C<< my $end = $feature->end(); >>
980    
981    =item RETURNS:
982    
983    The numerical coordinate of the last base of the feature.
984    
985    =back
986    
987    =cut
988    
989    sub end {
990        my($self) = @_;
991    
992        my $loc = $self->location;
993        return ($loc =~ /^\S+_\d+_(\d+)$/) ? $1 : undef;
994    }
995    
996    
997    
998  =head3 dna_seq  =head3 dna_seq
999    
1000    =over 4
1001    
1002    =item USAGE:
1003    
1004    C<< my $dna_seq = $feature->dna_seq(); >>
1005    
1006    =item RETURNS:
1007    
1008    A string contining the DNA subsequence of the contig
1009    running from the first to the last base of the feature.
1010    
1011    If ($beg > $end), the reverse complement subsequence is returned.
1012    
1013    =back
1014    
1015  =cut  =cut
1016    
1017  sub dna_seq {  sub dna_seq {
# Line 638  Line 1027 
1027    
1028  =head3 prot_seq  =head3 prot_seq
1029    
1030    =over 4
1031    
1032    =item USAGE:
1033    
1034    C<< my $dna_seq = $feature->prot_seq(); >>
1035    
1036    =item RETURNS:
1037    
1038    A string contining the protein translation of the feature (if it exists),
1039    or the "undef" value if the feature does not exist or is not a PEG.
1040    
1041    =back
1042    
1043  =cut  =cut
1044    
1045  sub prot_seq {  sub prot_seq {
# Line 653  Line 1055 
1055    
1056  =head3 function_of  =head3 function_of
1057    
1058    =over 4
1059    
1060    =item USAGE:
1061    
1062    C<< my $func = $feature->function_of(); >>
1063    
1064    =item RETURNS:
1065    
1066    A string containing the function assigned to the feature,
1067    or the "undef" value if no function has been assigned.
1068    
1069    =back
1070    
1071  =cut  =cut
1072    
1073  sub function_of {  sub function_of {
# Line 667  Line 1082 
1082    
1083  =head3 coupled_to  =head3 coupled_to
1084    
1085    =over 4
1086    
1087    =item USAGE:
1088    
1089    C<< my @coupled_features = $feature->coupled_to(); >>
1090    
1091    =item RETURNS:
1092    
1093    A list of "CouplingO" objects describing the evidence for functional coupling
1094    between this feature and other nearby features.
1095    
1096    =back
1097    
1098  =cut  =cut
1099    
1100  sub coupled_to {  sub coupled_to {
1101      my($self) = @_;      my($self) = @_;
1102    
1103      ($self->type eq "peg") || return undef;      ($self->type eq "peg") || return ();
1104      my $figO = $self->{_figO};      my $figO = $self->{_figO};
1105      my $fig  = $figO->{_fig};      my $fig  = $figO->{_fig};
1106      my $peg1 = $self->id;      my $peg1 = $self->id;
# Line 689  Line 1117 
1117    
1118  =head3 annotations  =head3 annotations
1119    
1120    =over 4
1121    
1122    =item USAGE:
1123    
1124    C<< my @annot_list = $feature->annotations(); >>
1125    
1126    =item RETURNS:
1127    
1128    A list of "AnnotationO" objects allowing access to the annotations for this feature.
1129    
1130    =back
1131    
1132  =cut  =cut
1133    
1134  sub annotations {  sub annotations {
# Line 700  Line 1140 
1140      return map { &AnnotationO::new('AnnotationO',@$_) } $fig->feature_annotations($self->id,1);      return map { &AnnotationO::new('AnnotationO',@$_) } $fig->feature_annotations($self->id,1);
1141  }  }
1142    
1143    
1144    =head3 in_subsystems
1145    
1146    =over 4
1147    
1148    =item USAGE:
1149    
1150    C<< my @subsys_list = $feature->in_subsystems(); >>
1151    
1152    =item RETURNS:
1153    
1154    A list of "SubsystemO" objects allowing access to the subsystems
1155    that this feature particupates in.
1156    
1157    =back
1158    
1159    =cut
1160    
1161  sub in_subsystems {  sub in_subsystems {
1162      my($self) = @_;      my($self) = @_;
1163      my $figO = $self->{_figO};      my $figO = $self->{_figO};
# Line 711  Line 1169 
1169    
1170  =head3 possibly_truncated  =head3 possibly_truncated
1171    
1172    =over 4
1173    
1174    =item USAGE:
1175    
1176    C<< my $trunc = $feature->possibly_truncated(); >>
1177    
1178    =item RETURNS:
1179    
1180    Boolean C<TRUE> if the feature may be truncated;
1181    boolean C<FALSE> otherwise.
1182    
1183    =back
1184    
1185  =cut  =cut
1186    
1187  sub possibly_truncated {  sub possibly_truncated {
# Line 725  Line 1196 
1196    
1197  =head3 possible_frameshift  =head3 possible_frameshift
1198    
1199    =over 4
1200    
1201    =item USAGE:
1202    
1203    C<< my $fs = $feature->possible_frameshift(); >>
1204    
1205    =item RETURNS:
1206    
1207    Boolean C<TRUE> if the feature may be a frameshifted fragment;
1208    boolean C<FALSE> otherwise.
1209    
1210    (NOTE: This is a crude prototype implementation,
1211    and is mostly as an example of how to code using FIGO.)
1212    
1213    =back
1214    
1215  =cut  =cut
1216    
1217  sub possible_frameshift {  sub possible_frameshift {
1218      my($self) = @_;      my($self) = @_;
1219      my $figO = $self->{_figO};      my $figO = $self->{_figO};
1220      my($tmp_dir) = $figO->{_tmp_dir};      my $fig = $figO->{_fig};
1221    
1222      if (! $self->possibly_truncated)      return $fig->possible_frameshift($self->id);
     {  
         my @sims = $self->sims( -max => 1, -cutoff => 1.0e-50);  
         if (my $sim = shift @sims)  
         {  
             my $peg2 = $sim->id2;  
             my $ln1  = $sim->ln1;  
             my $ln2  = $sim->ln2;  
             my $b2   = $sim->b2;  
             my $e2   = $sim->e2;  
             my $adjL = 100 + (($b2-1) * 3);  
             my $adjR = 100 + (($ln2 - $e2) * 3);  
             if ($ln2 > (1.2 * $ln1))  
             {  
                 my $loc = $self->location;  
                 if ($loc =~ /^(\S+)_(\d+)_(\d+)/)  
                 {  
                     my $contig = $1;  
                     my $beg    = $2;  
                     my $end = $3;  
                     my $contigO = new ContigO($figO,$self->genome,$contig);  
                     my $begA = &max(1,$beg - $adjL);  
                     my $endA = &min($end+$adjR,$contigO->contig_length);  
                     my $dna  = $contigO->dna_seq($begA,$endA);  
                     open(TMP,">$tmp_dir/tmp_dna") || die "couild not open tmp_dna";  
                     print TMP ">dna\n$dna\n";  
                     close(TMP);  
   
                     my $peg2O = new FeatureO($figO,$peg2);  
                     my $prot  = $peg2O->prot_seq;  
                     open(TMP,">$tmp_dir/tmp_prot") || die "could not open tmp_prot";  
                     print TMP ">tmp_prot\n$prot\n";  
                     close(TMP);  
                     &run("formatdb -i $tmp_dir/tmp_dna -pF");  
                     open(BLAST,"blastall -i $tmp_dir/tmp_prot -d $tmp_dir/tmp_dna -p tblastn -FF -e 1.0e-50 |")  
                         || die "could not blast";  
   
                     my $db_seq_out = &gjoparseblast::next_blast_subject(\*BLAST,1);  
                     my @hsps       = sort { $a->[0] <=> $b->[0] }  
                                      map { [$_->[9],$_->[10],$_->[12],$_->[13]] }  
                                      grep { $_->[1] < 1.0e-50 }  
                                      @{$db_seq_out->[6]};  
                     my @prot = map { [$_->[0],$_->[1]] } @hsps;  
                     my @dna  = map { [$_->[2],$_->[3]] } @hsps;  
                     if (&covers(\@prot,length($prot),3) && &covers(\@dna,3*length($prot),9))  
                     {  
                         return 1;  
                     }  
                 }  
             }  
         }  
     }  
     return 0;  
1223  }  }
1224    
1225    
1226    
1227  =head3 run  =head3 run
1228    
1229    (Note: This function should be considered "PRIVATE")
1230    
1231    =over 4
1232    
1233    =item FUNCTION:
1234    
1235    Passes a string containing a command to be execture by the "system" shell command.
1236    
1237    =item USAGE:
1238    
1239    C<< $feature->run($cmd); >>
1240    
1241    =item RETURNS:
1242    
1243    Nil if the execution of C<$cmd> was successful;
1244    aborts with traceback if C<$cmd> fails.
1245    
1246    =back
1247    
1248  =cut  =cut
1249    
1250  sub run {  sub run {
1251      my($cmd) = @_;      my($cmd) = @_;
1252      (system($cmd) == 0) || Confess("FAILED: $cmd");      (system($cmd) == 0) || confess("FAILED: $cmd");
1253  }  }
1254    
1255    
1256    
1257  =head3 max  =head3 max
1258    
1259    (Note: This function should be considered "PRIVATE")
1260    
1261    =over 4
1262    
1263    =item USAGE:
1264    
1265    C<< my $max = $feature->max($x, $y); >>
1266    
1267    =item C<$x> and  C<$y>
1268    
1269    Numerical values.
1270    
1271    =item RETURNS:
1272    
1273    The larger of the two numerical values C<$x> and C<$y>.
1274    
1275    =back
1276    
1277  =cut  =cut
1278    
1279  sub max {  sub max {
# Line 813  Line 1285 
1285    
1286  =head3 min  =head3 min
1287    
1288  =cut  (Note: This function should be considered "PRIVATE")
1289    
1290  sub min {  =over 4
     my($x,$y) = @_;  
     return ($x < $y) ? $x : $y;  
 }  
1291    
1292    =item USAGE:
1293    
1294    C<< my $min = $feature->min($x, $y); >>
1295    
1296    =item C<$x> and C<$y>
1297    
1298    Numerical values.
1299    
1300    =item RETURNS:
1301    
1302    The smaller of the two numerical values C<$x> and C<$y>.
1303    
1304  =head3 covers  =back
1305    
1306  =cut  =cut
1307    
1308  sub covers {  sub min {
1309      my($hsps,$ln,$diff) = @_;      my($x,$y) = @_;
1310        return ($x < $y) ? $x : $y;
     my $hsp1 = shift @$hsps;  
     my $hsp2;  
     while ($hsp1 && ($hsp2 = shift @$hsps) && ($hsp1 = &merge($hsp1,$hsp2,$diff))) {}  
     return ($hsp1 && (($hsp1->[1] - $hsp1->[0]) > (0.9 * $ln)));  
1311  }  }
1312    
1313    =head3 sims
1314    
1315    =over 4
1316    
1317  =head3 merge  =item FUNCTION:
1318    
1319  =cut  Returns precomputed "Sim.pm" objects from the SEED.
1320    
1321  sub merge {  =item USAGE:
     my($hsp1,$hsp2,$diff) = @_;  
1322    
1323      my($b1,$e1) = @$hsp1;  C<< my @sims = $pegO->sims( -all, -cutoff => 1.0e-10); >>
     my($b2,$e2) = @$hsp2;  
     return (($e2 > $e1) && (abs($b2-$e1) <= $diff)) ? [$b1,$e2] : undef;  
 }  
1324    
1325    C<< my @sims = $pegO->sims( -max => 50, -cutoff => 1.0e-10); >>
1326    
1327    =item RETURNS: List of sim objects.
1328    
1329  =head3 sims  =back
1330    
1331  =cut  =cut
1332    
# Line 863  Line 1338 
1338      my $fig  = $figO->{_fig};      my $fig  = $figO->{_fig};
1339    
1340      my $cutoff = $args{-cutoff} ? $args{-cutoff} : 1.0e-5;      my $cutoff = $args{-cutoff} ? $args{-cutoff} : 1.0e-5;
1341      my $all    = $args{-all}    ? $args{-all}    : "fig";      my $all    = $args{-all}    ? 'all'          : "fig";
1342      my $max    = $args{-max}    ? $args{-max}    : 10000;      my $max    = $args{-max}    ? $args{-max}    : 10000;
1343    
1344      return $fig->sims($self->id,$max,$cutoff,$all);      my @sims = $fig->sims($self->id,$max,$cutoff,$all);
1345    
1346        if (@sims) {
1347            my $peg1 = FeatureO->new($figO, $sims[0]->[0]);
1348    
1349            foreach my $sim (@sims) {
1350    #           $sim->[0] = $peg1;
1351    #           $sim->[1] = FeatureO->new($figO, $sim->[1]);
1352            }
1353        }
1354    
1355        return @sims;
1356  }  }
1357    
1358    
1359    
1360  =head3 bbhs  =head3 bbhs
1361    
1362    =over 4
1363    
1364    =item FUNCTION:
1365    
1366    Given a PEG-type "FeatureO" object, returns the list of BBHO objects
1367    corresponding to the pre-computed BBHs for that PEG.
1368    
1369    =item USAGE:
1370    
1371    C<< my @bbhs = $pegO->bbhs(); >>
1372    
1373    =item RETURNS:
1374    
1375    List of BBHO objects.
1376    
1377    =back
1378    
1379  =cut  =cut
1380    
1381  sub bbhs {  sub bbhs {
# Line 890  Line 1393 
1393                                                  },'BBHO') } @bbhs;                                                  },'BBHO') } @bbhs;
1394  }  }
1395    
1396    
1397  =head3 display  =head3 display
1398    
1399    =over 4
1400    
1401    =item FUNCTION:
1402    
1403    Prints info about a "FeatureO" object to STDOUT.
1404    
1405    =item USAGE:
1406    
1407    C<< $pegO->display(); >>
1408    
1409    =item RETURNS;
1410    
1411    (void)
1412    
1413    =back
1414    
1415  =cut  =cut
1416    
1417  sub display {  sub display {
# Line 910  Line 1430 
1430    
1431  =head1 BBHO  =head1 BBHO
1432    
1433    Methods for accessing "Bidirectiona Best Hits" (BBHs).
1434    
1435  =cut  =cut
1436    
1437    
1438  =head3 new  =head3 new
1439    
1440    Constructor of BBHO objects.
1441    
1442    (NOTE: The "average user" should never need to invoke this method.)
1443    
1444  =cut  =cut
1445    
1446  sub new {  sub new {
# Line 930  Line 1456 
1456  }  }
1457    
1458    
1459    
1460  =head3 peg1  =head3 peg1
1461    
1462    =over 4
1463    
1464    =item USAGE:
1465    
1466    C<< my $peg1 = $bbh->peg1(); >>
1467    
1468    =item RETURNS:
1469    
1470    A "FeatureO" object corresponding to the "query" sequence
1471    in a BBH pair.
1472    
1473    =back
1474    
1475  =cut  =cut
1476    
1477  sub peg1 {  sub peg1 {
# Line 943  Line 1483 
1483    
1484  =head3 peg2  =head3 peg2
1485    
1486    =over 4
1487    
1488    =item USAGE:
1489    
1490    C<< my $peg2 = $bbh->peg2(); >>
1491    
1492    =item RETURNS:
1493    
1494    A "FeatureO" object corresponding to the "database" sequence
1495    in a BBH pair.
1496    
1497    =back
1498    
1499  =cut  =cut
1500    
1501  sub peg2 {  sub peg2 {
# Line 956  Line 1509 
1509    
1510  =head3 psc  =head3 psc
1511    
1512    =over 4
1513    
1514    =item USAGE:
1515    
1516    C<< my $psc = $bbh->psc(); >>
1517    
1518    =item RETURNS:
1519    
1520    The numerical value of the BLAST E-value for the pair.
1521    
1522    =back
1523    
1524  =cut  =cut
1525    
1526  sub psc {  sub psc {
# Line 968  Line 1533 
1533    
1534  =head3 norm_bitscore  =head3 norm_bitscore
1535    
1536    
1537    =over 4
1538    
1539    =item USAGE:
1540    
1541    C<< my $bsc = $bbh->norm_bitscore(); >>
1542    
1543    =item RETURNS:
1544    
1545    The "BLAST bit-score per aligned character" for the pair.
1546    
1547    =back
1548    
1549  =cut  =cut
1550    
1551  sub norm_bitscore {  sub norm_bitscore {
# Line 984  Line 1562 
1562    
1563  =head1 AnnotationO  =head1 AnnotationO
1564    
1565    Methods for accessing SEED annotations.
1566    
1567  =cut  =cut
1568    
1569    
1570    
1571  =head3 new  =head3 new
1572    
1573    =over 4
1574    
1575    =item FUNCTION:
1576    
1577    Cronstruct a new "AnnotationO" object
1578    
1579    =item USAGE:
1580    
1581    C<< my $annotO = AnnotationO->new( $fid, $timestamp, $who, $text); >>
1582    
1583    =item C<$fid>
1584    
1585    A feature identifier.
1586    
1587    =item C<$timestamp>
1588    
1589    The C<UN*X> timestamp one wishes to associate with the annotation.
1590    
1591    =item C<$who>
1592    
1593    The annotator's user-name.
1594    
1595    =item C<$text>
1596    
1597    The textual content of the annotation.
1598    
1599    =item RETURNS:
1600    
1601    An "AnnotationO" object.
1602    
1603    =back
1604    
1605  =cut  =cut
1606    
1607  sub new {  sub new {
# Line 1007  Line 1619 
1619    
1620  =head3 fid  =head3 fid
1621    
1622    =over 4
1623    
1624    =item FUNCTION:
1625    
1626    Extract the feature-ID that was annotated.
1627    
1628    =item USAGE:
1629    
1630    C<< my $fid = $annotO->fid(); >>
1631    
1632    =item RETURNS;
1633    
1634    The feature-ID as a string.
1635    
1636    =back
1637    
1638  =cut  =cut
1639    
1640  sub fid {  sub fid {
# Line 1019  Line 1647 
1647    
1648  =head3 timestamp  =head3 timestamp
1649    
1650    =over 4
1651    
1652    =item FUNCTION:
1653    
1654    Extract the C<UN*X> timestamp of the annotation.
1655    
1656    =item USAGE:
1657    
1658    C<< my $fid = $annotO->timestamp(); >>
1659    
1660    =item RETURNS;
1661    
1662    The timestamp as a string.
1663    
1664    =back
1665    
1666  =cut  =cut
1667    
1668  sub timestamp {  sub timestamp {
# Line 1038  Line 1682 
1682    
1683  =head3 made_by  =head3 made_by
1684    
1685    =over 4
1686    
1687    =item FUNCTION:
1688    
1689    Extract the annotator's user-name.
1690    
1691    =item USAGE:
1692    
1693    C<< my $fid = $annotO->made_by(); >>
1694    
1695    =item RETURNS;
1696    
1697    The username of the annotator, as a string.
1698    
1699    =back
1700    
1701  =cut  =cut
1702    
1703  sub made_by {  sub made_by {
# Line 1052  Line 1712 
1712    
1713  =head3 text  =head3 text
1714    
1715    =over 4
1716    
1717    =item FUNCTION:
1718    
1719    Extract the text of the annotation.
1720    
1721    =item USGAE:
1722    
1723    C<< my $text = $annotO->text(); >>
1724    
1725    =item RETURNS:
1726    
1727    The text of the annotation, as a string.
1728    
1729    =back
1730    
1731  =cut  =cut
1732    
1733  sub text {  sub text {
# Line 1064  Line 1740 
1740    
1741  =head3 display  =head3 display
1742    
1743    =over 4
1744    
1745    =item FUNCTION:
1746    
1747    Print the contents of an "AnnotationO" object to B<STDOUT>
1748    in human-readable form.
1749    
1750    =item USAGE:
1751    
1752    C<< my $annotO->display(); >>
1753    
1754    =item RETURNS:
1755    
1756    (void)
1757    
1758    =back
1759    
1760  =cut  =cut
1761    
1762  sub display {  sub display {
# Line 1079  Line 1772 
1772  ########################################################################  ########################################################################
1773  use Data::Dumper;  use Data::Dumper;
1774    
1775    =head1 CouplingO
1776    
1777    Methods for accessing the "Functional coupling scores"
1778    of PEGs in close physical proximity to each other.
1779    
1780    =cut
1781    
1782    
1783    
1784  =head3 new  =head3 new
1785    
1786    =over 4
1787    
1788    =item FUNCTION:
1789    
1790    Construct a new "CouplingO" object
1791    encapsulating the "functional coupling" score
1792    between a pair of features in some genome.
1793    
1794    =item USAGE:
1795    
1796    C<< $couplingO = CouplingO->new($figO, $fid1, $fid2, $sc); >>
1797    
1798    =item C<$figO>
1799    
1800    Parent "FIGO" object.
1801    
1802    =item C<$fid1> and C<$fid2>
1803    
1804    A pair of feature-IDs.
1805    
1806    =item C<$sc>
1807    
1808    A functional-coupling score
1809    
1810    =item RETURNS:
1811    
1812    A "CouplingO" object.
1813    
1814    =back
1815    
1816  =cut  =cut
1817    
1818  sub new {  sub new {
# Line 1100  Line 1832 
1832    
1833  =head3 peg1  =head3 peg1
1834    
1835    =over 4
1836    
1837    =item FUNCTION:
1838    
1839    Returns a "FeatureO" object corresponding to the first FID in a coupled pair.
1840    
1841    =item USAGE:
1842    
1843    C<< my $peg1 = $couplingO->peg1(); >>
1844    
1845    =item RETURNS:
1846    
1847    A "FeatureO" object.
1848    
1849    =back
1850    
1851  =cut  =cut
1852    
1853  sub peg1 {  sub peg1 {
# Line 1111  Line 1859 
1859    
1860    
1861    
1862  =head3 peg1  =head3 peg2
1863    
1864    =over 4
1865    
1866    =item FUNCTION:
1867    
1868    Returns a "FeatureO" object corresponding to the second FID in a coupled pair.
1869    
1870    =item USAGE:
1871    
1872    C<< my $peg2 = $couplingO->peg2(); >>
1873    
1874    =item RETURNS:
1875    
1876    A "FeatureO" object.
1877    
1878    =back
1879    
1880  =cut  =cut
1881    
# Line 1126  Line 1890 
1890    
1891  =head3 sc  =head3 sc
1892    
1893    =over 4
1894    
1895    =item FUNCTION:
1896    
1897    Extracts the "functional coupling" score from a "CouplingO" object.
1898    
1899    =item USAGE:
1900    
1901    C<< my $sc = $couplingO->sc(); >>
1902    
1903    =item RETURNS:
1904    
1905    A scalar score.
1906    
1907    =back
1908    
1909  =cut  =cut
1910    
1911  sub sc {  sub sc {
# Line 1138  Line 1918 
1918    
1919  =head3 evidence  =head3 evidence
1920    
1921    =over 4
1922    
1923    =item FUNCTION:
1924    
1925    Fetch the evidence for a "functional coupling" between two close PEGs,
1926    in the form of a list of objects describing the "Pairs of Close Homologs" (PCHs)
1927    supporting the existence of a functional coupling between the two close PEGs.
1928    
1929    =item USAGE:
1930    
1931    C<< my $evidence = $couplingO->evidence(); >>
1932    
1933    =item RETURNS
1934    
1935    List of pairs of "FeatureO" objects.
1936    
1937    =back
1938    
1939  =cut  =cut
1940    
1941  sub evidence {  sub evidence {
# Line 1146  Line 1944 
1944      my $figO = $self->{_figO};      my $figO = $self->{_figO};
1945      my $fig  = $figO->{_fig};      my $fig  = $figO->{_fig};
1946      my @ev = ();      my @ev = ();
1947      foreach my $tuple ($fig->coupling_evidence($self->peg1,$self->peg2))      foreach my $tuple ($fig->coupling_evidence($self->peg1->id,$self->peg2->id))
1948      {      {
1949          my($peg3,$peg4,$rep) = @$tuple;          my($peg3,$peg4,$rep) = @$tuple;
1950          push(@ev,[&FeatureO::new('FeatureO',$figO,$peg3),          push(@ev,[&FeatureO::new('FeatureO',$figO,$peg3),
# Line 1160  Line 1958 
1958    
1959  =head3 display  =head3 display
1960    
1961    =over 4
1962    
1963    =item FUNCTION:
1964    
1965    Print the contents of a "CouplingO" object to B<STDOUT> in human-readable form.
1966    
1967    =item USAGE:
1968    
1969    C<< $couplingO->display(); >>
1970    
1971    =item RETURNS:
1972    
1973    (Void)
1974    
1975    =back
1976    
1977  =cut  =cut
1978    
1979  sub display {  sub display {
# Line 1212  Line 2026 
2026    
2027  =head3 usable  =head3 usable
2028    
2029    
2030  =cut  =cut
2031    
2032  sub usable {  sub usable {
# Line 1234  Line 2049 
2049      my $figO = $self->{_figO};      my $figO = $self->{_figO};
2050      my $subO = $self->{_subO};      my $subO = $self->{_subO};
2051      if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }      if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }
     if (! defined($subO) { return undef }  
2052    
2053      return map { &GenomeO::new('GenomeO',$figO,$_) } $subO->get_genomes;      return map { &GenomeO::new('GenomeO',$figO,$_) } $subO->get_genomes;
2054  }  }
# Line 1251  Line 2065 
2065      my $figO = $self->{_figO};      my $figO = $self->{_figO};
2066      my $subO = $self->{_subO};      my $subO = $self->{_subO};
2067      if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }      if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }
2068      if (! defined($subO) { return undef }  
2069      return map { &FunctionalRoleO::new('FunctionalRoleO',$figO,$_) }  $subO->get_roles($self->id);      return map { &FunctionalRoleO::new('FunctionalRoleO',$figO,$_) }  $subO->get_roles($self->id);
2070  }  }
2071    
2072    
2073    
2074  =head3 curator  =head3 curator
2075    
2076  =cut  =cut
# Line 1266  Line 2082 
2082      my $subO = $self->{_subO};      my $subO = $self->{_subO};
2083      if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }      if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }
2084    
2085      return defined($subO) ? $subO->get_curator : undef;      return $subO->get_curator;
2086  }  }
2087    
2088    
# Line 1282  Line 2098 
2098      my $figO = $self->{_figO};      my $figO = $self->{_figO};
2099      my $subO = $self->{_subO};      my $subO = $self->{_subO};
2100      if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }      if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }
     if (! defined($subO) { return undef }  
2101    
2102      return $subO->get_variant_code_for_genome($genome->id);      return $subO->get_variant_code_for_genome($genome->id);
2103  }  }
# Line 1299  Line 2114 
2114      my $figO = $self->{_figO};      my $figO = $self->{_figO};
2115      my $subO = $self->{_subO};      my $subO = $self->{_subO};
2116      if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }      if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }
     if (! defined($subO) { return undef }  
2117    
2118      return $subO->get_pegs_from_cell($genome->id,$role->id);      return $subO->get_pegs_from_cell($genome->id,$role->id);
2119  }  }
# Line 1313  Line 2127 
2127    
2128  =head1 FunctionalRoleO  =head1 FunctionalRoleO
2129    
2130    Methods for accessing the functional roles of features.
2131    
2132  =cut  =cut
2133    
2134    
# Line 1409  Line 2225 
2225      my $famO = $self->{_famO};      my $famO = $self->{_famO};
2226      if (! $famO) { $famO = $self->{_famO} = &FigFam::new('FigFam',$fig,$self->id) }      if (! $famO) { $famO = $self->{_famO} = &FigFam::new('FigFam',$fig,$self->id) }
2227    
2228      return map { &FigFamO::new('FigFamO',$figO,$_) } $famO->list_members;      return map { &FeatureO::new('FeatureO',$figO,$_) } $famO->list_members;
2229  }  }
2230    
   
   
2231  =head3 rep_seqs  =head3 rep_seqs
2232    
2233  =cut  =cut
# Line 1465  Line 2279 
2279  ########################################################################  ########################################################################
2280  =head1 Attribute  =head1 Attribute
2281    
2282    (Note yet implemented.)
2283    
2284  =cut  =cut
2285    
2286  1;  1;

Legend:
Removed from v.1.7  
changed lines
  Added in v.1.31

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3