[Bio] / FigKernelPackages / FIGO.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/FIGO.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.6, Thu Feb 22 13:43:18 2007 UTC revision 1.23, Mon May 7 00:43:26 2007 UTC
# Line 1  Line 1 
1    ########################################################################
2  #  #
3  # Copyright (c) 2003-2006 University of Chicago and Fellowship  # Copyright (c) 2003-2006 University of Chicago and Fellowship
4  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
# Line 14  Line 15 
15  # Genomes at veronika@thefig.info or download a copy from  # Genomes at veronika@thefig.info or download a copy from
16  # http://www.theseed.org/LICENSE.TXT.  # http://www.theseed.org/LICENSE.TXT.
17  #  #
18    ########################################################################
19    
20  package FIGO;  =head1 Overview
21    
22    This module is a set of packages encapsulating the SEED's core methods
23    using an "OOP-like" style.
24    
25    There are several modules clearly related to "individual genomes:"
26    FIGO, GenomeO, ContigO, FeatureO (and I<maybe> AnnotationO).
27    
28    There are also modules that deal with complex relationships between
29    pairs or sets of features in one, two, or more genomes,
30    rather than any particular single genome:
31    BBHO, CouplingO, SubsystemO, FunctionalRoleO, FigFamO.
32    
33    Finally, the methods in "Attribute" might in principle attach
34    "atributes" to any type of object.
35    (Likewise, in principle one might like to attach an "annotation"
36    to any type of object
37    
38    Four of the modules dealing with "genomes" have a reasonable clear
39    "implied heirarchy:"
40    
41    =over 4
42    
43        FIGO > GenomeO > ContigO > FeatureO
44    
45    =back
46    
47    However, inheritance is B<NOT> implemented using the C<@ISA> mechanism,
48    because some methods deal with "pairwise" or "setwise" relations between objects
49    or other more complex relationships that do not naturally fit into any heirarchy ---
50    which would get us into the whole quagmire of "multiple inheritance."
51    
52    We have chosen to in many cases sidestep the entire issue of inheritance
53    via an I<ad hoc> mechanism:
54    If a "child" object needs access to its "ancestors'" methods,
55    we pass it references to its "ancestors" using subroutine arguments.
56    This is admittedly ugly, clumsy, and potentially error-prone ---
57    but it has the advantage that, unlike multiple inheritance,
58    we understand how to do it...
59    
60    MODULE DEPENDENCIES: FIG, FIG_Config, FigFams, SFXlate, SproutFIG, Tracer,
61        gjoparseblast, Data::Dumper.
62    
63    =cut
64    
65    ########################################################################
66    package FIGO;
67    ########################################################################
68  use strict;  use strict;
69  use FIG;  use FIG;
70  use FIG_Config;  use FIG_Config;
# Line 24  Line 72 
72  use SproutFIG;  use SproutFIG;
73  use Tracer;  use Tracer;
74  use Data::Dumper;  use Data::Dumper;
75    use Carp;
76  use FigFams;  use FigFams;
77  use gjoparseblast;  use gjoparseblast;
78    
79  =head1 FIGO Methods  =head1 FIGO
80    
81    The effective "base class" containing a few "top-level" methods.
82    
83    =cut
84    
85    
86  =head3 new  =head3 new
87    
# Line 64  Line 118 
118      return bless $self, $class;      return bless $self, $class;
119  }  }
120    
121    sub function_of {
122        my($self,$id) = @_;
123    
124        my $fig  = $self->{_fig};
125        my $func = $fig->function_of($id);
126    
127        return ($func ? $func : "");
128    }
129    
130  =head3 genomes  =head3 genomes
131    
# Line 80  Line 141 
141  C<< my @tax_ids = $figo->genomes( @constraints ); >>  C<< my @tax_ids = $figo->genomes( @constraints ); >>
142    
143  =item @constraints  =item @constraints
144    
145  One or more element of: complete, prokaryotic, eukaryotic, bacterial, archaeal, nmpdr.  One or more element of: complete, prokaryotic, eukaryotic, bacterial, archaeal, nmpdr.
146    
147  =item RETURNS: List of Tax-IDs.  =item RETURNS: List of Tax-IDs.
148    
149  =item EXAMPLE: L<Display all complete, prokaryotic genomes>  =item EXAMPLE:
150    
151    L<Display all complete, prokaryotic genomes>
152    
153  =back  =back
154    
# Line 141  Line 205 
205  =over4  =over4
206    
207  =item RETURNS:  =item RETURNS:
208    
209  List of all subsystems.  List of all subsystems.
210    
211  =item EXAMPLE: L<Accessing Subsystem data>  =item EXAMPLE:
212    
213    L<Accessing Subsystem data>
214    
215  =back  =back
216    
# Line 188  Line 255 
255    
256  =over4  =over4
257    
258  =item USAGE:  C<< foreach $fam ($figO->all_figfams) { #...Do something } >>  =item USAGE:
259    
260    C<< foreach $fam ($figO->all_figfams) { #...Do something } >>
261    
262    =item RETURNS:
263    
264  =item RETURNS: List of FIGfam Objects      List of FIGfam Objects
265    
266  =item EXAMPLE: L<Accessing FIGfams>  =item EXAMPLE:
267    
268    L<Accessing FIGfams>
269    
270  =back  =back
271    
# Line 211  Line 284 
284    
285  =over4  =over4
286    
287  =item USAGE:   C<< my ($fam, $sims) = $figO->family_containing($seq); >>  =item USAGE:
288    
289    C<< my ($fam, $sims) = $figO->family_containing($seq); >>
290    
291    =item $seq:
292    
293  =item $seq:    A protein translation string.      A protein translation string.
294    
295  =item RETURNS:  =item RETURNS:
296    
297        $fam:  A FIGfam Object.        $fam:  A FIGfam Object.
298    
299        $sims: A set of similarity objects.        $sims: A set of similarity objects.
300    
301  =item EXAMPLE: L<Placing a sequence into a FIGfam>  =item EXAMPLE: L<Placing a sequence into a FIGfam>
# Line 241  Line 320 
320      }      }
321  }  }
322    
323    =head3 figfam
324    
325    =over 4
326    
327    =item USAGE:
328    
329    C<< my $fam = $figO->figfam($family_id); >>
330    
331    =item $family_id;
332    
333        A FigFam ID
334    
335    =item RETURNS:
336    
337          $fam:  A FIGfam Object.
338    
339    =back
340    
341    =cut
342    
343    sub figfam {
344        my($self,$fam_id) = @_;
345    
346        return &FigFamO::new('FigFamO',$self,$fam_id);
347    }
348    
349    
350  ########################################################################  ########################################################################
351  package GenomeO;  package GenomeO;
# Line 251  Line 356 
356    
357  =cut  =cut
358    
359    
360  =head3 new  =head3 new
361    
362  Constructor of GenomeO objects.  Constructor of GenomeO objects.
# Line 258  Line 364 
364  =over4  =over4
365    
366  =item USAGE:  =item USAGE:
367    
368  C<< my $org = GenomeO->new($figo, $tax_id); >>  C<< my $org = GenomeO->new($figo, $tax_id); >>
369    
370  =item RETURNS: A new GenomeO object.  =item RETURNS:
371    
372        A new GenomeO object.
373    
374  =back  =back
375    
# Line 281  Line 390 
390    
391  =over4  =over4
392    
393  =item USAGE:   C<< my $tax_id = $org->id(); >>  =item USAGE:
394    
395    C<< my $tax_id = $org->id(); >>
396    
397    =item RETURNS:
398    
399  =item RETURNS: Taxonomy-ID of GenomeO object.      Taxonomy-ID of GenomeO object.
400    
401  =back  =back
402    
# Line 301  Line 414 
414    
415  =over4  =over4
416    
417  =item USAGE:   C<< $gs = $genome->genus_species(); >>  =item USAGE:
418    
419    C<< $gs = $genome->genus_species(); >>
420    
421  =item RETURNS: Genus-species-strain string  =item RETURNS:
422    
423        Genus-species-strain string
424    
425  =back  =back
426    
# Line 321  Line 438 
438    
439  =over4  =over4
440    
441  =item RETURNS: List of C<contig> objects contained in a C<GenomeO> object.  =item RETURNS:
442    
443        List of C<contig> objects contained in a C<GenomeO> object.
444    
445  =item EXAMPLE: L<Show how to access contigs and extract sequence>  =item EXAMPLE:
446    
447    L<Show how to access contigs and extract sequence>
448    
449  =back  =back
450    
# Line 359  Line 480 
480    
481  =over4  =over4
482    
483  =item USAGE:   C<< $genome->display(); >>  =item USAGE:
484    
485    C<< $genome->display(); >>
486    
487  =item RETURNS: Null  =item RETURNS:
488    
489        (Void)
490    
491  =back  =back
492    
# Line 393  Line 518 
518  =over4  =over4
519    
520  =item USAGE:  =item USAGE:
521    
522  C<< $contig = ContigO->new( $figO, $genomeId, $contigId); >>  C<< $contig = ContigO->new( $figO, $genomeId, $contigId); >>
523    
524  =item $figO: A FIGO object.  =item $figO:
525    
526        Parent FIGO object.
527    
528    =item $genomeId:
529    
530        Taxon-ID for the genome the contig is from.
531    
532    =item $contigId:
533    
534  =item $genomeId: Taxon-ID for the genome the contig is from.      Identifier for the contig
535    
536  =item $contigId: Identifier for the contig  =item RETURNS:
537    
538  =item RETURNS: A "ContigO" object.      A "ContigO" object.
539    
540  =back  =back
541    
# Line 423  Line 557 
557    
558  =over4  =over4
559    
560  =item RETURNS: Sequence ID string of "ContigO" object  =item RETURNS:
561    
562        Sequence ID string of "ContigO" object
563    
564  =back  =back
565    
# Line 441  Line 577 
577  =over4  =over4
578    
579  =item USAGE:  =item USAGE:
     C<< my $tax_id = $contig->genome(); >>  
580    
581  =item RETURNS: GenomeO object containing the contig object.  C<< my $tax_id = $contig->genome->id(); >>
582    
583    =item RETURNS:
584    
585        Tax-ID of the GenomeO object containing the contig object.
586    
587  =back  =back
588    
# Line 452  Line 591 
591  sub genome {  sub genome {
592      my($self) = @_;      my($self) = @_;
593    
594      return $self->{_genome};      my $figO = $self->{_figO};
595        return new GenomeO($figO,$self->{_genome});
596  }  }
597    
598    
# Line 462  Line 602 
602  =over4  =over4
603    
604  =item USAGE:  =item USAGE:
605    
606      C<< my $len = $contig->contig_length(); >>      C<< my $len = $contig->contig_length(); >>
607    
608  =item RETURNS: Length of contig's DNA sequence.  =item RETURNS:
609    
610        Length of contig's DNA sequence.
611    
612  =back  =back
613    
# Line 474  Line 617 
617      my($self) = @_;      my($self) = @_;
618    
619      my $fig = $self->{_figO}->{_fig};      my $fig = $self->{_figO}->{_fig};
620      my $contig_lengths = $fig->contig_lengths($self->genome);      my $contig_lengths = $fig->contig_lengths($self->genome->id);
621      return $contig_lengths->{$self->id};      return $contig_lengths->{$self->id};
622  }  }
623    
# Line 484  Line 627 
627  =over4  =over4
628    
629  =item USAGE:  =item USAGE:
630    
631      C<< my $seq = $contig->dna_seq(beg, $end); >>      C<< my $seq = $contig->dna_seq(beg, $end); >>
632    
633  =item $beg: Begining point of DNA subsequence  =item $beg:
634    
635        Begining point of DNA subsequence
636    
637  =item $end: End point of DNA subsequence  =item $end:
638    
639  =item RETURNS: string of DNA sequence from $beg to $end      End point of DNA subsequence
640    
641    =item RETURNS:
642    
643        string of DNA sequence running from $beg to $end
644  (NOTE: if $beg > $end, returns reverse complement of DNA subsequence.)  (NOTE: if $beg > $end, returns reverse complement of DNA subsequence.)
645    
646  =back  =back
# Line 506  Line 655 
655      if (($beg && (&FIG::between(1,$beg,$max))) &&      if (($beg && (&FIG::between(1,$beg,$max))) &&
656          ($end && (&FIG::between(1,$end,$max))))          ($end && (&FIG::between(1,$end,$max))))
657      {      {
658          return $fig->dna_seq($self->genome,join("_",($self->id,$beg,$end)));          return $fig->dna_seq($self->genome->id,join("_",($self->id,$beg,$end)));
659      }      }
660      else      else
661      {      {
# Line 527  Line 676 
676    
677  =over4  =over4
678    
679  =item RETURNS: Nil  =item RETURNS:
680    
681        (Void)
682    
683  =back  =back
684    
# Line 536  Line 687 
687  sub display {  sub display {
688      my($self) = @_;      my($self) = @_;
689    
690      print join("ContigO",$self->genome,$self->id,$self->contig_length),"\n";      print join("ContigO",$self->genome->id,$self->id,$self->contig_length),"\n";
691    }
692    
693    sub features_in_region {
694        my($self,$beg,$end) = @_;
695        my $figO = $self->{_figO};
696        my $fig = $figO->{_fig};
697    
698        my($features) = $fig->genes_in_region($self->genome->id,$self->id,$beg,$end);
699        return map { new FeatureO($figO,$_) } @$features;
700  }  }
701    
702    
# Line 545  Line 705 
705  package FeatureO;  package FeatureO;
706  ########################################################################  ########################################################################
707  use Data::Dumper;  use Data::Dumper;
708    use Carp;
709    
710  =head1 FeatureO  =head1 FeatureO
711    
712  =cut  Methods for working with features on "ContigO" objects.
713    
714    =cut
715    
716    
717  =head1 new  =head3 new
718    
719  Constructor of "FeatureO" objects  Constructor of "FeatureO" objects
720    
721    =over 4
722    
723    =item USAGE:
724    
725    C<< my $feature = FeatureO->new( $figO, $fid ); >>
726    
727    =item C<$figO>:
728    
729    "Base" FIGO object.
730    
731    =item C<$fid>:
732    
733    Feature-ID for new feature
734    
735    =item RETURNS:
736    
737    A newly created "FeatureO" object.
738    
739    =back
740    
741  =cut  =cut
742    
743  sub new {  sub new {
# Line 569  Line 751 
751  }  }
752    
753    
754    
755  =head3 id  =head3 id
756    
757    =over 4
758    
759    =item USAGE:
760    
761    C<< my $fid = $feature->id(); >>
762    
763    =item RETURNS:
764    
765    The FID (Feature ID) of a "FeatureO" object.
766    
767    =back
768    
769  =cut  =cut
770    
771  sub id {  sub id {
# Line 583  Line 778 
778    
779  =head3 genome  =head3 genome
780    
781    =over 4
782    
783    =item USAGE:
784    
785    C<< my $taxid = $feature->genome(); >>
786    
787    =item RETURNS:
788    
789    The Taxon-ID for the "GenomeO" object containing the feature.
790    
791    =back
792    
793  =cut  =cut
794    
795  sub genome {  sub genome {
796      my($self) = @_;      my($self) = @_;
797        my $figO = $self->{_figO};
798      $self->id =~ /^fig\|(\d+\.\d+)/;      $self->id =~ /^fig\|(\d+\.\d+)/;
799      return $1;      return new GenomeO($figO,$1);
800  }  }
801    
802    
803    
804  =head3 type  =head3 type
805    
806    =over 4
807    
808    =item USAGE:
809    
810    C<< my $feature_type = $feature->type(); >>
811    
812    =item RETURNS:
813    
814    The feature object's "type" (e.g., "peg," "rna," etc.)
815    
816    =back
817    
818  =cut  =cut
819    
820  sub type {  sub type {
# Line 607  Line 826 
826    
827    
828    
   
829  =head3 location  =head3 location
830    
831    =over 4
832    
833    =item USAGE:
834    
835    C<< my $loc = $feature->location(); >>
836    
837    =item RETURNS:
838    
839    A string representing the feature object's location on the genome's DNA,
840    in SEED "tbl format" (i.e., "contig_beging_end").
841    
842    =back
843    
844  =cut  =cut
845    
846  sub location {  sub location {
# Line 620  Line 851 
851  }  }
852    
853    
854    =head3 contig
855    
856    =over 4
857    
858    =item USAGE:
859    
860    C<< my $contig = $feature->contig(); >>
861    
862    =item RETURNS:
863    
864    A "ContigO" object to access the contig data
865    for the contig the feature is on.
866    
867    =back
868    
869    =cut
870    
871    sub contig {
872        my($self) = @_;
873    
874        my $figO = $self->{_figO};
875        my $loc      = $self->location;
876        my $genomeID = $self->genome->id;
877        return ($loc =~ /^(\S+)_\d+_\d+$/) ? new ContigO($figO,$genomeID,$1) : undef;
878    }
879    
880    
881    
882    =head3 begin
883    
884    =over 4
885    
886    =item USAGE:
887    
888    C<< my $beg = $feature->begin(); >>
889    
890    =item RETURNS:
891    
892    The numerical coordinate of the first base of the feature.
893    
894    =back
895    
896    =cut
897    
898    sub begin {
899        my($self) = @_;
900    
901        my $loc = $self->location;
902        return ($loc =~ /^\S+_(\d+)_\d+$/) ? $1 : undef;
903    }
904    
905    
906    
907    =head3 end
908    
909    =over 4
910    
911    =item USAGE:
912    
913    C<< my $end = $feature->end(); >>
914    
915    =item RETURNS:
916    
917    The numerical coordinate of the last base of the feature.
918    
919    =back
920    
921    =cut
922    
923    sub end {
924        my($self) = @_;
925    
926        my $loc = $self->location;
927        return ($loc =~ /^\S+_\d+_(\d+)$/) ? $1 : undef;
928    }
929    
930    
931    
932  =head3 dna_seq  =head3 dna_seq
933    
934    =over 4
935    
936    =item USAGE:
937    
938    C<< my $dna_seq = $feature->dna_seq(); >>
939    
940    =item RETURNS:
941    
942    A string contining the DNA subsequence of the contig
943    running from the first to the last base of the feature.
944    
945    If ($beg > $end), the reverse complement subsequence is returned.
946    
947    =back
948    
949  =cut  =cut
950    
951  sub dna_seq {  sub dna_seq {
# Line 638  Line 961 
961    
962  =head3 prot_seq  =head3 prot_seq
963    
964    =over 4
965    
966    =item USAGE:
967    
968    C<< my $dna_seq = $feature->prot_seq(); >>
969    
970    =item RETURNS:
971    
972    A string contining the protein translation of the feature (if it exists),
973    or the "undef" value if the feature does not exist or is not a PEG.
974    
975    =back
976    
977  =cut  =cut
978    
979  sub prot_seq {  sub prot_seq {
# Line 653  Line 989 
989    
990  =head3 function_of  =head3 function_of
991    
992    =over 4
993    
994    =item USAGE:
995    
996    C<< my $func = $feature->function_of(); >>
997    
998    =item RETURNS:
999    
1000    A string containing the function assigned to the feature,
1001    or the "undef" value if no function has been assigned.
1002    
1003    =back
1004    
1005  =cut  =cut
1006    
1007  sub function_of {  sub function_of {
# Line 667  Line 1016 
1016    
1017  =head3 coupled_to  =head3 coupled_to
1018    
1019    =over 4
1020    
1021    =item USAGE:
1022    
1023    C<< my @coupled_features = $feature->coupled_to(); >>
1024    
1025    =item RETURNS:
1026    
1027    A list of L<CouplingO> objects describing the evidence for functional coupling
1028    between this feature and other nearby features.
1029    
1030    =back
1031    
1032  =cut  =cut
1033    
1034  sub coupled_to {  sub coupled_to {
1035      my($self) = @_;      my($self) = @_;
1036    
1037      ($self->type eq "peg") || return undef;      ($self->type eq "peg") || return ();
1038      my $figO = $self->{_figO};      my $figO = $self->{_figO};
1039      my $fig  = $figO->{_fig};      my $fig  = $figO->{_fig};
1040      my $peg1 = $self->id;      my $peg1 = $self->id;
# Line 689  Line 1051 
1051    
1052  =head3 annotations  =head3 annotations
1053    
1054    =over 4
1055    
1056    =item USAGE:
1057    
1058    C<< my @annot_list = $feature->annotations(); >>
1059    
1060    =item RETURNS:
1061    
1062    A list of L<AnnotationO> objects allowing access to the annotations for this feature.
1063    
1064    =back
1065    
1066  =cut  =cut
1067    
1068  sub annotations {  sub annotations {
# Line 700  Line 1074 
1074      return map { &AnnotationO::new('AnnotationO',@$_) } $fig->feature_annotations($self->id,1);      return map { &AnnotationO::new('AnnotationO',@$_) } $fig->feature_annotations($self->id,1);
1075  }  }
1076    
1077    
1078    =head3 in_subsystems
1079    
1080    =over 4
1081    
1082    =item USAGE:
1083    
1084    C<< my @subsys_list = $feature->in_subsystems(); >>
1085    
1086    =item RETURNS:
1087    
1088    A list of L<SubsystemO> objects allowing access to the subsystems
1089    that this feature particupates in.
1090    
1091    =back
1092    
1093    =cut
1094    
1095  sub in_subsystems {  sub in_subsystems {
1096      my($self) = @_;      my($self) = @_;
1097      my $figO = $self->{_figO};      my $figO = $self->{_figO};
# Line 711  Line 1103 
1103    
1104  =head3 possibly_truncated  =head3 possibly_truncated
1105    
1106    =over 4
1107    
1108    =item USAGE:
1109    
1110    C<< my $trunc = $feature->possibly_truncated(); >>
1111    
1112    =item RETURNS:
1113    
1114    Boolean C<TRUE> if the feature may be truncated;
1115    boolean C<FALSE> otherwise.
1116    
1117    =back
1118    
1119  =cut  =cut
1120    
1121  sub possibly_truncated {  sub possibly_truncated {
# Line 725  Line 1130 
1130    
1131  =head3 possible_frameshift  =head3 possible_frameshift
1132    
1133    =over 4
1134    
1135    =item USAGE:
1136    
1137    C<< my $fs = $feature->possible_frameshift(); >>
1138    
1139    =item RETURNS:
1140    
1141    Boolean C<TRUE> if the feature may be a frameshifted fragment;
1142    boolean C<FALSE> otherwise.
1143    
1144    (NOTE: This is a crude prototype implementation,
1145    and is mostly as an example of how to code using FIGO.)
1146    
1147    =back
1148    
1149  =cut  =cut
1150    
1151  sub possible_frameshift {  sub possible_frameshift {
# Line 732  Line 1153 
1153      my $figO = $self->{_figO};      my $figO = $self->{_figO};
1154      my($tmp_dir) = $figO->{_tmp_dir};      my($tmp_dir) = $figO->{_tmp_dir};
1155    
1156        #...Skip tests and return '0' if truncated...
1157      if (! $self->possibly_truncated)      if (! $self->possibly_truncated)
1158      {      {
1159            #...Get best precomputed BLAST hit if E-value < 1.0e-50:
1160          my @sims = $self->sims( -max => 1, -cutoff => 1.0e-50);          my @sims = $self->sims( -max => 1, -cutoff => 1.0e-50);
1161    
1162            #...If a sim was returned:
1163          if (my $sim = shift @sims)          if (my $sim = shift @sims)
1164          {          {
1165                #...Get best hit FID and boundaries:
1166              my $peg2 = $sim->id2;              my $peg2 = $sim->id2;
1167              my $ln1  = $sim->ln1;              my $ln1  = $sim->ln1;
1168              my $ln2  = $sim->ln2;              my $ln2  = $sim->ln2;
1169              my $b2   = $sim->b2;              my $b2   = $sim->b2;
1170              my $e2   = $sim->e2;              my $e2   = $sim->e2;
1171    
1172                #...Convert from AA to BP, and pad out w/ 100 bp guard region:
1173              my $adjL = 100 + (($b2-1) * 3);              my $adjL = 100 + (($b2-1) * 3);
1174              my $adjR = 100 + (($ln2 - $e2) * 3);              my $adjR = 100 + (($ln2 - $e2) * 3);
1175    
1176                #...If hit is more than 20% longer than query:
1177              if ($ln2 > (1.2 * $ln1))              if ($ln2 > (1.2 * $ln1))
1178              {              {
1179                    #...Get and parse query location:
1180                  my $loc = $self->location;                  my $loc = $self->location;
1181                  if ($loc =~ /^(\S+)_(\d+)_(\d+)/)                  if ($loc =~ /^(\S+)_(\d+)_(\d+)/)
1182                  {                  {
1183                      my $contig = $1;                      my $contig = $1;
1184                      my $beg    = $2;                      my $beg    = $2;
1185                      my $end = $3;                      my $end = $3;
1186                      my $contigO = new ContigO($figO,$self->genome,$contig);  
1187                        #...Create new ContigO object:
1188                        my $contigO = new ContigO($figO, $self->genome->id, $contig);
1189    
1190                        #...Extract DNA subsequence, including guard regions:
1191                      my $begA = &max(1,$beg - $adjL);                      my $begA = &max(1,$beg - $adjL);
1192                      my $endA = &min($end+$adjR,$contigO->contig_length);                      my $endA = &min($end+$adjR,$contigO->contig_length);
1193                      my $dna  = $contigO->dna_seq($begA,$endA);                      my $dna  = $contigO->dna_seq($begA,$endA);
1194                      open(TMP,">$tmp_dir/tmp_dna") || die "couild not open tmp_dna";                      if (defined($dna) && (length($dna) > 90))
1195                        {
1196                            #...Open tmp-file and write FASTA containing DNA subregion to be BLASTed:
1197                            open( TMP, ">$tmp_dir/tmp_dna") || die "could not open tmp_dna";
1198                      print TMP ">dna\n$dna\n";                      print TMP ">dna\n$dna\n";
1199                      close(TMP);                      close(TMP);
1200    
1201                      my $peg2O = new FeatureO($figO,$peg2);                          #...Create new FeatureO object corresponding tp $peg2:
1202                      my $prot  = $peg2O->prot_seq;                          my $pegO2 = new FeatureO($figO,$peg2);
1203    
1204                            #...Fetch its translation, and print to tmp FASTA file for BLASTing:
1205                            my $prot  = $pegO2->prot_seq;
1206                            if (defined($prot) && (length($prot) > 30))
1207                            {
1208                      open(TMP,">$tmp_dir/tmp_prot") || die "could not open tmp_prot";                      open(TMP,">$tmp_dir/tmp_prot") || die "could not open tmp_prot";
1209                      print TMP ">tmp_prot\n$prot\n";                      print TMP ">tmp_prot\n$prot\n";
1210                      close(TMP);                      close(TMP);
1211    
1212                                #...Build BLAST nucleotide database for extracted DNA region,
1213                                #   and TBLASTN $peg2 against the DNA:
1214                      &run("formatdb -i $tmp_dir/tmp_dna -pF");                      &run("formatdb -i $tmp_dir/tmp_dna -pF");
1215                      open(BLAST,"blastall -i $tmp_dir/tmp_prot -d $tmp_dir/tmp_dna -p tblastn -FF -e 1.0e-50 |")                      open(BLAST,"blastall -i $tmp_dir/tmp_prot -d $tmp_dir/tmp_dna -p tblastn -FF -e 1.0e-50 |")
1216                          || die "could not blast";                          || die "could not blast";
1217    
1218                                #...Parse the TBLASTN output; find and sort HSPs by left boundary:
1219                      my $db_seq_out = &gjoparseblast::next_blast_subject(\*BLAST,1);                      my $db_seq_out = &gjoparseblast::next_blast_subject(\*BLAST,1);
1220                      my @hsps       = sort { $a->[0] <=> $b->[0] }                      my @hsps       = sort { $a->[0] <=> $b->[0] }
1221                                       map { [$_->[9],$_->[10],$_->[12],$_->[13]] }                                       map { [$_->[9],$_->[10],$_->[12],$_->[13]] }
1222                                       grep { $_->[1] < 1.0e-50 }                                       grep { $_->[1] < 1.0e-50 }
1223                                       @{$db_seq_out->[6]};                                       @{$db_seq_out->[6]};
1224    
1225                                #...Extract HSP boundary pairs:
1226                      my @prot = map { [$_->[0],$_->[1]] } @hsps;                      my @prot = map { [$_->[0],$_->[1]] } @hsps;
1227                      my @dna  = map { [$_->[2],$_->[3]] } @hsps;                      my @dna  = map { [$_->[2],$_->[3]] } @hsps;
1228                      if (&covers(\@prot,length($prot),3) && &covers(\@dna,3*length($prot),9))  
1229                                #...If the "cover" of the HSPs covers more than 90% of $peg2 w gaps < 3 AA,
1230                                #   and the "cover" of the HPSs cover more than 90% of the extracted DNA
1231                                #   w/ gaps < 9 bp (but not a multiple of 3), suspect a possible frameshift:
1232                                if (&covers(\@prot,length($prot),3,0) && &covers(\@dna,3*length($prot),9,1))
1233                      {                      {
1234                          return 1;                          return 1;
1235                      }                      }
# Line 784  Line 1237 
1237              }              }
1238          }          }
1239      }      }
1240            }
1241        }
1242      return 0;      return 0;
1243  }  }
1244    
# Line 791  Line 1246 
1246    
1247  =head3 run  =head3 run
1248    
1249    (Note: This function should be considered "PRIVATE")
1250    
1251    =over 4
1252    
1253    =item FUNCTION:
1254    
1255    Passes a string containing a command to be execture by the "system" shell command.
1256    
1257    =item USAGE:
1258    
1259    C<< $feature->run($cmd); >>
1260    
1261    =item RETURNS:
1262    
1263    Nil if the execution of C<$cmd> was successful;
1264    aborts with traceback if C<$cmd> fails.
1265    
1266    =back
1267    
1268  =cut  =cut
1269    
1270  sub run {  sub run {
1271      my($cmd) = @_;      my($cmd) = @_;
1272      (system($cmd) == 0) || Confess("FAILED: $cmd");      (system($cmd) == 0) || confess("FAILED: $cmd");
1273  }  }
1274    
1275    
1276    
1277  =head3 max  =head3 max
1278    
1279    (Note: This function should be considered "PRIVATE")
1280    
1281    =over 4
1282    
1283    =item USAGE:
1284    
1285    C<< my $max = $feature->max($x, $y); >>
1286    
1287    =item C<$x>
1288    
1289    Numerical value.
1290    
1291    =item C<$y>
1292    
1293    Numerical value.
1294    
1295    =items RETURNS:
1296    
1297    The larger of the two numerical values C<$x> and C<$y>.
1298    
1299    =back
1300    
1301  =cut  =cut
1302    
1303  sub max {  sub max {
# Line 813  Line 1309 
1309    
1310  =head3 min  =head3 min
1311    
1312    (Note: This function should be considered "PRIVATE")
1313    
1314    =over 4
1315    
1316    =item USAGE:
1317    
1318    C<< my $min = $feature->min($x, $y); >>
1319    
1320    =item C<$x>
1321    
1322    Numerical value.
1323    
1324    =item C<$y>
1325    
1326    Numerical value.
1327    
1328    =item RETURNS:
1329    
1330    The smaller of the two numerical values C<$x> and C<$y>.
1331    
1332    =back
1333    
1334  =cut  =cut
1335    
1336  sub min {  sub min {
# Line 824  Line 1342 
1342    
1343  =head3 covers  =head3 covers
1344    
1345    (Question: Should this function be considered "PRIVATE" ???)
1346    
1347    USAGE:
1348        C<< if (&covers(\@hits, $len, $diff, $must_shift)) { #...Do stuff } >>
1349    
1350    Returns boolean C<TRUE> if a set of BLAST HSPs "cover" more than 90%
1351    of the database sequence(?).
1352    
1353  =cut  =cut
1354    
1355  sub covers {  sub covers {
1356      my($hsps,$ln,$diff) = @_;      my($hsps,$ln,$diff,$must_shift) = @_;
1357    
1358      my $hsp1 = shift @$hsps;      my $hsp1 = shift @$hsps;
1359      my $hsp2;      my $hsp2;
1360      while ($hsp1 && ($hsp2 = shift @$hsps) && ($hsp1 = &merge($hsp1,$hsp2,$diff))) {}      my $merged = 0;
1361      return ($hsp1 && (($hsp1->[1] - $hsp1->[0]) > (0.9 * $ln)));      while ($hsp1 && ($hsp2 = shift @$hsps) &&
1362               ($must_shift ? &diff_frames($hsp1,$hsp2) : 1) &&
1363               ($hsp1 = &merge($hsp1,$hsp2,$diff))) { $merged = 1 }
1364        return ($merged && $hsp1 && (($hsp1->[1] - $hsp1->[0]) > (0.9 * $ln)));
1365    }
1366    
1367    sub diff_frames {
1368        my($hsp1,$hsp2) = @_;
1369        return (($hsp1->[0] % 3) != ($hsp2->[0] % 3));
1370  }  }
1371    
1372    
1373    
1374  =head3 merge  =head3 merge
1375    
1376    Merge two HSPs unless their overlap or separation is too large.
1377    
1378    RETURNS: Merged boundaries if merger succeeds, and C<undef> if merger fails.
1379    
1380  =cut  =cut
1381    
1382  sub merge {  sub merge {
# Line 853  Line 1391 
1391    
1392  =head3 sims  =head3 sims
1393    
1394    =over 4
1395    
1396    =item FUNCTION:
1397    
1398    Returns precomputed "Sim.pm" objects from the SEED.
1399    
1400    =item USAGE:
1401    
1402    C<< my @sims = $pegO->sims( -all, -cutoff => 1.0e-10); >>
1403    
1404    C<< my @sims = $pegO->sims( -max => 50, -cutoff => 1.0e-10); >>
1405    
1406    =item RETURNS: List of sim objects.
1407    
1408    =back
1409    
1410  =cut  =cut
1411    
1412  use Sim;  use Sim;
# Line 863  Line 1417 
1417      my $fig  = $figO->{_fig};      my $fig  = $figO->{_fig};
1418    
1419      my $cutoff = $args{-cutoff} ? $args{-cutoff} : 1.0e-5;      my $cutoff = $args{-cutoff} ? $args{-cutoff} : 1.0e-5;
1420      my $all    = $args{-all}    ? $args{-all}    : "fig";      my $all    = $args{-all}    ? 'all'          : "fig";
1421      my $max    = $args{-max}    ? $args{-max}    : 10000;      my $max    = $args{-max}    ? $args{-max}    : 10000;
1422    
1423      return $fig->sims($self->id,$max,$cutoff,$all);      my @sims = $fig->sims($self->id,$max,$cutoff,$all);
1424    
1425        if (@sims) {
1426            my $peg1 = FeatureO->new($figO, $sims[0]->[0]);
1427    
1428            foreach my $sim (@sims) {
1429    #           $sim->[0] = $peg1;
1430    #           $sim->[1] = FeatureO->new($figO, $sim->[1]);
1431            }
1432        }
1433    
1434        return @sims;
1435  }  }
1436    
1437    
1438    
1439  =head3 bbhs  =head3 bbhs
1440    
1441    =over 4
1442    
1443    =item FUNCTION:
1444    
1445    Given a PEG-type "FeatureO" object, returns the list of BBHO objects
1446    corresponding to the pre-computed BBHs for that PEG.
1447    
1448    =item USAGE:
1449    
1450    C<< my @bbhs = $pegO->bbhs(); >>
1451    
1452    =item List of BBHO objects.
1453    
1454    =back
1455    
1456  =cut  =cut
1457    
1458  sub bbhs {  sub bbhs {
# Line 892  Line 1472 
1472    
1473  =head3 display  =head3 display
1474    
1475    Prints info about a "FeatureO" object to STDOUT.
1476    
1477    USAGE:
1478    
1479    C<< $pegO->display(); >>
1480    
1481  =cut  =cut
1482    
1483  sub display {  sub display {
# Line 910  Line 1496 
1496    
1497  =head1 BBHO  =head1 BBHO
1498    
1499    Methods for accessing "Bidirectiona Best Hits" (BBHs).
1500    
1501  =cut  =cut
1502    
1503    
1504  =head3 new  =head3 new
1505    
1506    Constructor of BBHO objects.
1507    
1508    (NOTE: The "average user" should never need to invoke this method.)
1509    
1510  =cut  =cut
1511    
1512  sub new {  sub new {
# Line 930  Line 1522 
1522  }  }
1523    
1524    
1525    
1526  =head3 peg1  =head3 peg1
1527    
1528    =over 4
1529    
1530    =item USAGE:
1531    
1532    C<< my $peg1 = $bbh->peg1(); >>
1533    
1534    =item RETURNS:
1535    
1536    A "FeatureO" object corresponding to the "query" sequence
1537    in a BBH pair.
1538    
1539    =back
1540    
1541  =cut  =cut
1542    
1543  sub peg1 {  sub peg1 {
# Line 943  Line 1549 
1549    
1550  =head3 peg2  =head3 peg2
1551    
1552    =over 4
1553    
1554    =item USAGE:
1555    
1556    C<< my $peg2 = $bbh->peg2(); >>
1557    
1558    =item RETURNS:
1559    
1560    A "FeatureO" object corresponding to the "database" sequence
1561    in a BBH pair.
1562    
1563    =back
1564    
1565  =cut  =cut
1566    
1567  sub peg2 {  sub peg2 {
# Line 956  Line 1575 
1575    
1576  =head3 psc  =head3 psc
1577    
1578    =over 4
1579    
1580    =item USAGE:
1581    
1582    C<< my $psc = $bbh->psc(); >>
1583    
1584    =item RETURNS:
1585    
1586    The numerical value of the BLAST E-value for the pair.
1587    
1588    =back
1589    
1590  =cut  =cut
1591    
1592  sub psc {  sub psc {
# Line 968  Line 1599 
1599    
1600  =head3 norm_bitscore  =head3 norm_bitscore
1601    
1602    
1603    =over 4
1604    
1605    =item USAGE:
1606    
1607    C<< my $bsc = $bbh->norm_bitscore(); >>
1608    
1609    =item RETURNS:
1610    
1611    The "BLAST bit-score per aligned character" for the pair.
1612    
1613    =back
1614    
1615  =cut  =cut
1616    
1617  sub norm_bitscore {  sub norm_bitscore {
# Line 984  Line 1628 
1628    
1629  =head1 AnnotationO  =head1 AnnotationO
1630    
1631    Methods for accessing SEED annotations.
1632    
1633  =cut  =cut
1634    
1635    
# Line 1079  Line 1725 
1725  ########################################################################  ########################################################################
1726  use Data::Dumper;  use Data::Dumper;
1727    
1728    =head1 CouplingO
1729    
1730    Methods for accessing the "Functional coupling scores"
1731    of PEGs in close physical proximity to each other.
1732    
1733    =cut
1734    
1735    
1736    
1737  =head3 new  =head3 new
1738    
1739  =cut  =cut
# Line 1146  Line 1801 
1801      my $figO = $self->{_figO};      my $figO = $self->{_figO};
1802      my $fig  = $figO->{_fig};      my $fig  = $figO->{_fig};
1803      my @ev = ();      my @ev = ();
1804      foreach my $tuple ($fig->coupling_evidence($self->peg1,$self->peg2))      foreach my $tuple ($fig->coupling_evidence($self->peg1->id,$self->peg2->id))
1805      {      {
1806          my($peg3,$peg4,$rep) = @$tuple;          my($peg3,$peg4,$rep) = @$tuple;
1807          push(@ev,[&FeatureO::new('FeatureO',$figO,$peg3),          push(@ev,[&FeatureO::new('FeatureO',$figO,$peg3),
# Line 1212  Line 1867 
1867    
1868  =head3 usable  =head3 usable
1869    
1870    
1871  =cut  =cut
1872    
1873  sub usable {  sub usable {
# Line 1312  Line 1968 
1968    
1969  =head1 FunctionalRoleO  =head1 FunctionalRoleO
1970    
1971    Methods for accessing the functional roles of features.
1972    
1973  =cut  =cut
1974    
1975    
# Line 1408  Line 2066 
2066      my $famO = $self->{_famO};      my $famO = $self->{_famO};
2067      if (! $famO) { $famO = $self->{_famO} = &FigFam::new('FigFam',$fig,$self->id) }      if (! $famO) { $famO = $self->{_famO} = &FigFam::new('FigFam',$fig,$self->id) }
2068    
2069      return map { &FigFamO::new('FigFamO',$figO,$_) } $famO->list_members;      return map { &FeatureO::new('FeatureO',$figO,$_) } $famO->list_members;
2070  }  }
2071    
   
   
2072  =head3 rep_seqs  =head3 rep_seqs
2073    
2074  =cut  =cut
# Line 1464  Line 2120 
2120  ########################################################################  ########################################################################
2121  =head1 Attribute  =head1 Attribute
2122    
2123    (Note yet implemented.)
2124    
2125  =cut  =cut
2126    
2127  1;  1;

Legend:
Removed from v.1.6  
changed lines
  Added in v.1.23

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3