[Bio] / FigKernelPackages / FIGO.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/FIGO.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Thu Nov 23 16:15:30 2006 UTC revision 1.30, Fri Nov 30 21:35:51 2007 UTC
# Line 1  Line 1 
1    ########################################################################
2  #  #
3  # Copyright (c) 2003-2006 University of Chicago and Fellowship  # Copyright (c) 2003-2006 University of Chicago and Fellowship
4  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
# Line 14  Line 15 
15  # Genomes at veronika@thefig.info or download a copy from  # Genomes at veronika@thefig.info or download a copy from
16  # http://www.theseed.org/LICENSE.TXT.  # http://www.theseed.org/LICENSE.TXT.
17  #  #
18    ########################################################################
19    
20  package FIGO;  =head1 TODO
21    
22    =over 4
23    
24    =item Null arg to ContigO::dna_seq() should return entire contig seq.
25    
26    =item Add method to access "FIG::crude_estimate_of_distance()"
27    
28    =back
29    
30    =cut
31    
32    =head1 Overview
33    
34    This module is a set of packages encapsulating the SEED's core methods
35    using an "OOP-like" style.
36    
37    There are several modules clearly related to "individual genomes:"
38    GenomeO, ContigO, FeatureO (and I<maybe> AnnotationO).
39    
40    There are also modules that deal with complex relationships between
41    pairs or sets of features in one, two, or more genomes,
42    rather than any particular single genome:
43    BBHO, CouplingO, SubsystemO, FunctionalRoleO, FigFamO.
44    
45    Finally, the methods in "Attribute" might in principle attach
46    "atributes" to any type of object.
47    (Likewise, in principle one might also want to attach an "annotation"
48    to any type of object,
49    although currently we only support annotations of "features.")
50    
51    The three modules that act on "individual genomes" have a reasonable clear
52    "implied heirarchy" relative to FIGO:
53    
54    =over 4
55    
56        FIGO > GenomeO > ContigO > FeatureO
57    
58    =back
59    
60    However, inheritance is B<NOT> implemented using the C<@ISA> mechanism,
61    because some methods deal with "pairwise" or "setwise" relations between objects
62    or other more complex relationships that do not naturally fit into any heirarchy ---
63    which would get us into the whole quagmire of "multiple inheritance."
64    
65    We have chosen to in many cases sidestep the entire issue of inheritance
66    via an I<ad hoc> mechanism:
67    If a "child" object needs access to its "ancestors'" methods,
68    we will explicitly pass it references to its "ancestors,"
69    as subroutine arguments.
70    This is admittedly ugly, clumsy, and potentially error-prone ---
71    but it has the advantage that, unlike multiple inheritance,
72    we understand how to do it...
73    
74    MODULE DEPENDENCIES: FIG, FIG_Config, FigFams, SFXlate, SproutFIG, Tracer,
75        gjoparseblast, Data::Dumper.
76    
77    =cut
78    
79    ########################################################################
80    package FIGO;
81    ########################################################################
82  use strict;  use strict;
83  use FIG;  use FIG;
84  use FIG_Config;  use FIG_Config;
# Line 24  Line 86 
86  use SproutFIG;  use SproutFIG;
87  use Tracer;  use Tracer;
88  use Data::Dumper;  use Data::Dumper;
89    use Carp;
90  use FigFams;  use FigFams;
91    use gjoparseblast;
92    
93    =head1 FIGO
94    
95    The effective "base class" containing a few "top-level" methods.
96    
97    =cut
98    
99    
100    =head3 new
101    
102    Constructs a new FIGO object.
103    
104    =over 4
105    
106    =item USAGE:
107    
108    C<< my $figo = FIGO->new();           #...Subclass defaults to FIG >>
109    
110    C<< my $figo = FIGO->new('SPROUT');   #...Subclass is a SPROUT object >>
111    
112    =back
113    
114    =cut
115    
116  sub new {  sub new {
117      my($class,$low_level) = @_;      my($class,$low_level) = @_;
# Line 41  Line 128 
128    
129      my $self = {};      my $self = {};
130      $self->{_fig} = $fig;      $self->{_fig} = $fig;
131        $self->{_tmp_dir} = $FIG_Config::temp;
132      return bless $self, $class;      return bless $self, $class;
133  }  }
134    
135    sub function_of {
136        my($self,$id) = @_;
137    
138        my $fig  = $self->{_fig};
139        my $func = $fig->function_of($id);
140    
141        return ($func ? $func : "");
142    }
143    
144    =head3 genomes
145    
146    Returns a list of Taxonomy-IDs, possibly constrained by selection criteria.
147    (Default: Empty constraint returns all Tax-IDs in the SEED or SPROUT.)
148    
149    =over 4
150    
151    =item USAGE:
152    
153    C<< my @tax_ids = $figo->genomes(); >>
154    
155    C<< my @tax_ids = $figo->genomes( @constraints ); >>
156    
157    =item @constraints
158    
159    One or more element of: complete, prokaryotic, eukaryotic, bacterial, archaeal, nmpdr.
160    
161    =item RETURNS: List of Tax-IDs.
162    
163    =item EXAMPLE:
164    
165    L<Display all complete, prokaryotic genomes>
166    
167    =back
168    
169    =cut
170    
171  sub genomes {  sub genomes {
172      my($self,@constraints) = @_;      my($self,@constraints) = @_;
173      my $fig = $self->{_fig};      my $fig = $self->{_fig};
# Line 88  Line 212 
212      return map { &GenomeO::new('GenomeO',$self,$_) } @genomes;      return map { &GenomeO::new('GenomeO',$self,$_) } @genomes;
213  }  }
214    
215    
216    
217    =head3 subsystems
218    
219    =over 4
220    
221    =item RETURNS:
222    
223    List of all subsystems.
224    
225    =item EXAMPLE:
226    
227    L<Accessing Subsystem data>
228    
229    =back
230    
231    =cut
232    
233  sub subsystems {  sub subsystems {
234      my($self) = @_;      my($self) = @_;
235      my $fig = $self->{_fig};      my $fig = $self->{_fig};
# Line 95  Line 237 
237      return map { &SubsystemO::new('SubsystemO',$self,$_) } $fig->all_subsystems;      return map { &SubsystemO::new('SubsystemO',$self,$_) } $fig->all_subsystems;
238  }  }
239    
240    
241    =head3 functional_roles
242    
243    (Not yet implemented)
244    
245    =over
246    
247    =item RETURNS:
248    
249    =item EXAMPLE:
250    
251    =back
252    
253    =cut
254    
255  sub functional_roles {  sub functional_roles {
256      my($self) = @_;      my($self) = @_;
257      my $fig = $self->{_fig};      my $fig = $self->{_fig};
# Line 104  Line 261 
261      return @functional_roles;      return @functional_roles;
262  }  }
263    
264    
265    
266    =head3 all_figfams
267    
268    Returns a list of all FIGfam objects.
269    
270    =over 4
271    
272    =item USAGE:
273    
274    C<< foreach $fam ($figO->all_figfams) { #...Do something } >>
275    
276    =item RETURNS:
277    
278    List of FIGfam Objects
279    
280    =item EXAMPLE:
281    
282    L<Accessing FIGfams>
283    
284    =back
285    
286    =cut
287    
288  sub all_figfams {  sub all_figfams {
289      my($self) = @_;      my($self) = @_;
290      my $fig = $self->{_fig};      my $fig = $self->{_fig};
# Line 111  Line 292 
292      return map { &FigFamO::new('FigFamO',$self,$_) } $fams->all_families;      return map { &FigFamO::new('FigFamO',$self,$_) } $fams->all_families;
293  }  }
294    
295    
296    
297    =head3 family_containing
298    
299    =over 4
300    
301    =item USAGE:
302    
303    C<< my ($fam, $sims) = $figO->family_containing($seq); >>
304    
305    =item $seq:
306    
307    A protein translation string.
308    
309    =item RETURNS:
310    
311    $fam:  A FIGfam Object.
312    
313    $sims: A set of similarity objects.
314    
315    =item EXAMPLE: L<Placing a sequence into a FIGfam>
316    
317    =back
318    
319    =cut
320    
321  sub family_containing {  sub family_containing {
322      my($self,$seq) = @_;      my($self,$seq) = @_;
323    
# Line 127  Line 334 
334      }      }
335  }  }
336    
337  package GenomeO;  =head3 figfam
338    
339    =over 4
340    
341    =item USAGE:
342    
343    C<< my $fam = $figO->figfam($family_id); >>
344    
345    =item $family_id;
346    
347    A FigFam ID
348    
349    =item RETURNS:
350    
351    $fam:  A FIGfam Object.
352    
353    =back
354    
355    =cut
356    
357    sub figfam {
358        my($self,$fam_id) = @_;
359    
360        return &FigFamO::new('FigFamO',$self,$fam_id);
361    }
362    
363    
364    ########################################################################
365    package GenomeO;
366    ########################################################################
367  use Data::Dumper;  use Data::Dumper;
368    
369    =head1 GenomeO
370    
371    =cut
372    
373    
374    =head3 new
375    
376    Constructor of GenomeO objects.
377    
378    =over 4
379    
380    =item USAGE:
381    
382    C<< my $orgO = GenomeO->new($figO, $tax_id); >>
383    
384    =item RETURNS:
385    
386    A new "GenomeO" object.
387    
388    =back
389    
390    =cut
391    
392  sub new {  sub new {
393      my($class,$figO,$genomeId) = @_;      my($class,$figO,$genomeId) = @_;
394    
# Line 140  Line 398 
398      return bless $self, $class;      return bless $self, $class;
399  }  }
400    
401    
402    
403    =head3 id
404    
405    =over 4
406    
407    =item USAGE:
408    
409    C<< my $tax_id = $orgO->id(); >>
410    
411    =item RETURNS:
412    
413    Taxonomy-ID of "GenomeO" object.
414    
415    =back
416    
417    =cut
418    
419  sub id {  sub id {
420      my($self) = @_;      my($self) = @_;
421    
422      return $self->{_id};      return $self->{_id};
423  }  }
424    
425    
426    
427    =head3 genus_species
428    
429    =over 4
430    
431    =item USAGE:
432    
433    C<< $gs = $orgO->genus_species(); >>
434    
435    =item RETURNS:
436    
437    Genus-species-strain string
438    
439    =back
440    
441    =cut
442    
443  sub genus_species {  sub genus_species {
444      my($self) = @_;      my($self) = @_;
445    
# Line 153  Line 447 
447      return $fig->genus_species($self->{_id});      return $fig->genus_species($self->{_id});
448  }  }
449    
450    
451    
452    
453    =head3 taxonomy_of
454    
455    =over 4
456    
457    =item FUNCTION:
458    
459    Return the TAXONOMY string of a "GenomeO" object.
460    
461    =item USAGE:
462    
463    C<< my $taxonomy = $orgO->taxonomy_of(); >>
464    
465    =item RETURNS:
466    
467    TAXONOMY string.
468    
469    =back
470    
471    =cut
472    
473    sub taxonomy_of {
474        my ($self) = @_;
475    
476        my $figO = $self->{_figO};
477        my $fig  = $figO->{_fig};
478    
479        return $fig->taxonomy_of($self->{_id});
480    }
481    
482    
483    =head3 contigs_of
484    
485    =over 4
486    
487    =item RETURNS:
488    
489    List of C<contig> objects contained in a C<GenomeO> object.
490    
491    =item EXAMPLE:
492    
493    L<Show how to access contigs and extract sequence>
494    
495    =back
496    
497    =cut
498    
499  sub contigs_of {  sub contigs_of {
500      my($self) = @_;      my($self) = @_;
501    
# Line 161  Line 504 
504      return map { &ContigO::new('ContigO',$figO,$self->id,$_) } $fig->contigs_of($self->id);      return map { &ContigO::new('ContigO',$figO,$self->id,$_) } $fig->contigs_of($self->id);
505  }  }
506    
507    
508    
509    =head3 features_of
510    
511    =over 4
512    
513    =item FUNCTION:
514    
515    Returns a list of "FeatureO" objects contained in a "GenomeO" object.
516    
517    =item USAGE:
518    
519    C<< my @featureOs = $orgO->features_of();        #...Fetch all features >>
520    
521    or
522    
523    C<< my @featureOs = $orgO->features_of('peg');   #...Fetch only PEGs >>
524    
525    =item RETURNS:
526    
527    List of "FeatureO" objects.
528    
529    =back
530    
531    =cut
532    
533  sub features_of {  sub features_of {
534      my($self,$type) = @_;      my($self,$type) = @_;
535    
# Line 170  Line 539 
539      return map { &FeatureO::new('FeatureO',$figO,$_) } $fig->all_features($self->id,$type);      return map { &FeatureO::new('FeatureO',$figO,$_) } $fig->all_features($self->id,$type);
540  }  }
541    
542    
543    =head3 display
544    
545    Prints the genus, species, and strain information about a genome to STDOUT.
546    
547    =over 4
548    
549    =item USAGE:
550    
551    C<< $genome->display(); >>
552    
553    =item RETURNS:
554    
555    (Void)
556    
557    =back
558    
559    =cut
560    
561  sub display {  sub display {
562      my($self) = @_;      my($self) = @_;
563    
564      print join("\t",("Genome",$self->id,$self->genus_species)),"\n";      print join("\t",("Genome",$self->id,$self->genus_species)),"\n";
565  }  }
566    
 package ContigO;  
567    
568    
569    ########################################################################
570    package ContigO;
571    ########################################################################
572  use Data::Dumper;  use Data::Dumper;
573    
574    =head1 ContigO
575    
576    Methods for working with DNA sequence objects.
577    
578    =cut
579    
580    =head3 new
581    
582    Contig constructor.
583    
584    =over 4
585    
586    =item USAGE:
587    
588    C<< $contig = ContigO->new( $figO, $genomeId, $contigId); >>
589    
590    =item $figO:
591    
592    Parent FIGO object.
593    
594    =item $genomeId:
595    
596    Taxon-ID for the genome the contig is from.
597    
598    =item $contigId:
599    
600    Identifier for the contig
601    
602    =item RETURNS:
603    
604    A "ContigO" object.
605    
606    =back
607    
608    =cut
609    
610  sub new {  sub new {
611      my($class,$figO,$genomeId,$contigId) = @_;      my($class,$figO,$genomeId,$contigId) = @_;
612    
# Line 190  Line 617 
617      return bless $self, $class;      return bless $self, $class;
618  }  }
619    
620    
621    
622    =head3 id
623    
624    =over 4
625    
626    =item RETURNS:
627    
628    Sequence ID string of "ContigO" object
629    
630    =back
631    
632    =cut
633    
634  sub id {  sub id {
635      my($self) = @_;      my($self) = @_;
636    
637      return $self->{_id};      return $self->{_id};
638  }  }
639    
 sub genome {  
     my($self) = @_;  
640    
641      return $self->{_genome};  =head3 genome
 }  
642    
643  sub contig_length {  =over 4
     my($self) = @_;  
644    
645      my $fig = $self->{_figO}->{_fig};  =item USAGE:
     my $contig_lengths = $fig->contig_lengths($self->genome);  
     return $contig_lengths->{$self->id};  
 }  
646    
647  sub dna_seq {  C<< my $tax_id = $contig->genome->id(); >>
     my($self,$beg,$end) = @_;  
648    
649      my $fig = $self->{_figO}->{_fig};  =item RETURNS:
     my $max = $self->contig_length;  
     if (($beg && (&FIG::between(1,$beg,$max))) &&  
         ($end && (&FIG::between(1,$end,$max))))  
     {  
         return $fig->dna_seq($self->genome,join("_",($self->id,$beg,$end)));  
     }  
     else  
     {  
         return undef;  
     }  
 }  
650    
651  sub display {  Tax-ID of the GenomeO object containing the contig object.
652    
653    =back
654    
655    =cut
656    
657    sub genome {
658      my($self) = @_;      my($self) = @_;
659    
660      print join("ContigO",$self->genome,$self->id,$self->contig_length),"\n";      my $figO = $self->{_figO};
661        return new GenomeO($figO,$self->{_genome});
662  }  }
663    
 package FeatureO;  
664    
 use Data::Dumper;  
665    
666  sub new {  =head3 contig_length
     my($class,$figO,$fid) = @_;  
667    
668      ($fid =~ /^fig\|\d+\.\d+\.[^\.]+\.\d+$/) || return undef;  =over 4
     my $self = {};  
     $self->{_figO} = $figO;  
     $self->{_id} = $fid;  
     return bless $self, $class;  
 }  
669    
670  sub id {  =item USAGE:
     my($self) = @_;  
671    
672      return $self->{_id};  C<< my $len = $contig->contig_length(); >>
 }  
673    
674  sub genome {  =item RETURNS:
     my($self) = @_;  
675    
676      $self->id =~ /^fig\|(\d+\.\d+)/;  Length of contig's DNA sequence.
     return $1;  
 }  
677    
678  sub type {  =back
     my($self) = @_;  
679    
680      $self->id =~ /^fig\|\d+\.\d+\.([^\.]+)/;  =cut
     return $1;  
 }  
681    
682  sub location {  sub contig_length {
683      my($self) = @_;      my($self) = @_;
684    
685      my $fig = $self->{_figO}->{_fig};      my $fig = $self->{_figO}->{_fig};
686      return scalar $fig->feature_location($self->id);      my $contig_lengths = $fig->contig_lengths($self->genome->id);
687        return $contig_lengths->{$self->id};
688  }  }
689    
 sub dna_seq {  
     my($self) = @_;  
690    
691      my $fig = $self->{_figO}->{_fig};  =head3 dna_seq
     my $fid = $self->id;  
     my @loc = $fig->feature_location($fid);  
     return $fig->dna_seq(&FIG::genome_of($fid),@loc);  
 }  
692    
693  sub prot_seq {  =over 4
     my($self) = @_;  
694    
695      ($self->type eq "peg") || return undef;  =item USAGE:
     my $fig = $self->{_figO}->{_fig};  
     my $fid = $self->id;  
     return $fig->get_translation($fid);  
 }  
696    
697  sub function_of {  C<< my $seq = $contig->dna_seq(beg, $end); >>
     my($self) = @_;  
698    
699      my $fig = $self->{_figO}->{_fig};  =item $beg:
     my $fid = $self->id;  
     return scalar $fig->function_of($fid);  
 }  
700    
701  sub coupled_to {  Begining point of DNA subsequence
     my($self) = @_;  
702    
703      ($self->type eq "peg") || return undef;  =item $end:
704      my $figO = $self->{_figO};  
705      my $fig  = $figO->{_fig};  End point of DNA subsequence
706      my $peg1 = $self->id;  
707      my @coupled = ();  =item RETURNS:
708      foreach my $tuple ($fig->coupled_to($peg1))  
709    String containing DNA subsequence running from $beg to $end
710    (NOTE: if $beg > $end, returns reverse complement of DNA subsequence.)
711    
712    =back
713    
714    =cut
715    
716    sub dna_seq {
717        my($self,$beg,$end) = @_;
718    
719        my $fig = $self->{_figO}->{_fig};
720        my $max = $self->contig_length;
721        if (($beg && (&FIG::between(1,$beg,$max))) &&
722            ($end && (&FIG::between(1,$end,$max))))
723      {      {
724          my($peg2,$sc) = @$tuple;          return $fig->dna_seq($self->genome->id,join("_",($self->id,$beg,$end)));
725          push(@coupled, &CouplingO::new('CouplingO',$figO,$peg1,$peg2,$sc));      }
726        else
727        {
728            return undef;
729      }      }
     return @coupled;  
730  }  }
731    
 sub annotations {  
     my($self) = @_;  
732    
733      my $figO = $self->{_figO};  =head3 display
     my $fig  = $figO->{_fig};  
734    
735      return map { &AnnotationO::new('AnnotationO',@$_) } $fig->feature_annotations($self->id,1);  Prints summary information about a "ContigO" object to STDOUT:
 }  
736    
737  use Sim;  Genus, species, strain
 sub sims {  
     my($self,%args) = @_;  
738    
739      my $figO = $self->{_figO};  Contig ID
     my $fig  = $figO->{_fig};  
740    
741      my $cutoff = $args{-cutoff} ? $args{-cutoff} : 1.0e-5;  Contig length
     my $all    = $args{-all}    ? $args{-all}    : "fig";  
     my $max    = $args{-max}    ? $args{-max}    : 10000;  
742    
743      return $fig->sims($self->id,$max,$cutoff,$all);  =over 4
 }  
744    
745  sub bbhs {  =item RETURNS:
746    
747    (Void)
748    
749    =back
750    
751    =cut
752    
753    sub display {
754      my($self) = @_;      my($self) = @_;
755    
756        print join("ContigO",$self->genome->id,$self->id,$self->contig_length),"\n";
757    }
758    
759    sub features_in_region {
760        my($self,$beg,$end) = @_;
761      my $figO = $self->{_figO};      my $figO = $self->{_figO};
762      my $fig  = $figO->{_fig};      my $fig  = $figO->{_fig};
763    
764      my @bbhs  = $fig->bbhs($self->id);      my($features) = $fig->genes_in_region($self->genome->id,$self->id,$beg,$end);
765      return map { my($peg2,$sc,$bs) = @$_; bless({ _peg1 => $self->id,      return map { new FeatureO($figO,$_) } @$features;
                                                   _peg2 => $peg2,  
                                                   _psc => $sc,  
                                                   _bit_score => $bs  
                                                 },'BBHO') } @bbhs;  
766  }  }
767    
 sub display {  
     my($self) = @_;  
768    
     print join("\t",$self->id,$self->location,$self->function_of),"\n",  
           $self->dna_seq,"\n",  
           $self->prot_seq,"\n";  
 }  
769    
770  package BBHO;  ########################################################################
771    package FeatureO;
772    ########################################################################
773    use Data::Dumper;
774    use Carp;
775    
776  sub new {  =head1 FeatureO
     my($class,$peg1,$peg2,$sc,$normalized_bitscore) = @_;  
777    
778      my $self = {};  Methods for working with features on "ContigO" objects.
     $self->{_peg1}      = $peg1;  
     $self->{_peg2}      = $peg2;  
     $self->{_psc}       = $sc;  
     $self->{_bit_score} = $normalized_bitscore  
779    
780  }  =cut
781    
 sub peg1 {  
     my($self) = @_;  
782    
783      return $self->{_peg1};  =head3 new
 }  
784    
785  sub peg2 {  Constructor of new "FeatureO" objects
     my($self) = @_;  
786    
787      return $self->{_peg2};  =over 4
 }  
788    
789  sub psc {  =item USAGE:
     my($self) = @_;  
790    
791      return $self->{_psc};  C<< my $feature = FeatureO->new( $figO, $fid ); >>
 }  
792    
793  sub norm_bitscore {  =item C<$figO>:
     my($self) = @_;  
794    
795      return $self->{_bit_score};  "Base" FIGO object.
 }  
796    
797  package AnnotationO;  =item C<$fid>:
798    
799    Feature-ID for new feature
800    
801    =item RETURNS:
802    
803    A newly created "FeatureO" object.
804    
805    =back
806    
807    =cut
808    
809  sub new {  sub new {
810      my($class,$fid,$timestamp,$who,$text) = @_;      my($class,$figO,$fid) = @_;
811    
812        ($fid =~ /^fig\|\d+\.\d+\.[^\.]+\.\d+$/) || return undef;
813      my $self = {};      my $self = {};
814      $self->{_fid} = $fid;      $self->{_figO} = $figO;
815      $self->{_timestamp} = $timestamp;      $self->{_id} = $fid;
     $self->{_who} = $who;  
     $self->{_text} = $text;  
816      return bless $self, $class;      return bless $self, $class;
817  }  }
818    
 sub fid {  
     my($self) = @_;  
819    
     return $self->{_fid};  
 }  
820    
821  sub timestamp {  =head3 id
     my($self,$convert) = @_;  
822    
823      if ($convert)  =over 4
     {  
         return scalar localtime($self->{_timestamp});  
     }  
     else  
     {  
         return $self->{_timestamp};  
     }  
 }  
824    
825  sub made_by {  =item USAGE:
     my($self) = @_;  
826    
827      my $who = $self->{_who};  C<< my $fid = $feature->id(); >>
     $who =~ s/^master://i;  
     return $who;  
 }  
828    
829  sub text {  =item RETURNS:
     my($self) = @_;  
830    
831      my $text = $self->{_text};  The FID (Feature ID) of a "FeatureO" object.
     return $text;  
 }  
832    
833  sub display {  =back
834    
835    =cut
836    
837    sub id {
838      my($self) = @_;      my($self) = @_;
839    
840      print join("\t",($self->fid,$self->timestamp(1),$self->made_by)),"\n",$self->text,"\n";      return $self->{_id};
841  }  }
842    
 package CouplingO;  
843    
 use Data::Dumper;  
844    
845  sub new {  =head3 genome
     my($class,$figO,$peg1,$peg2,$sc) = @_;  
846    
847      ($peg1 =~ /^fig\|\d+\.\d+\.peg\.\d+$/) || return undef;  =over 4
     ($peg2 =~ /^fig\|\d+\.\d+\.peg\.\d+$/) || return undef;  
     my $self = {};  
     $self->{_figO} = $figO;  
     $self->{_peg1} = $peg1;  
     $self->{_peg2} = $peg2;  
     $self->{_sc}   = $sc;  
     return bless $self, $class;  
 }  
848    
849  sub peg1 {  =item USAGE:
     my($self) = @_;  
850    
851      return $self->{_peg1};  C<< my $taxid = $feature->genome(); >>
 }  
852    
853  sub peg2 {  =item RETURNS:
     my($self) = @_;  
854    
855      return $self->{_peg2};  The Taxon-ID for the "GenomeO" object containing the feature.
 }  
856    
857  sub sc {  =back
     my($self) = @_;  
858    
859      return $self->{_sc};  =cut
 }  
860    
861  sub evidence {  sub genome {
862      my($self) = @_;      my($self) = @_;
   
863      my $figO = $self->{_figO};      my $figO = $self->{_figO};
864      my $fig  = $figO->{_fig};      $self->id =~ /^fig\|(\d+\.\d+)/;
865      my @ev = ();      return new GenomeO($figO,$1);
     foreach my $tuple ($fig->coupling_evidence($self->peg1,$self->peg2))  
     {  
         my($peg3,$peg4,$rep) = @$tuple;  
         push(@ev,[&FeatureO::new('FeatureO',$figO,$peg3),  
                   &FeatureO::new('FeatureO',$figO,$peg4),  
                   $rep]);  
     }  
     return @ev;  
866  }  }
867    
 sub display {  
     my($self) = @_;  
868    
     print join("\t",($self->peg1,$self->peg2,$self->sc)),"\n";  
 }  
869    
870  package SubsystemO;  =head3 type
871    
872  use Data::Dumper;  =over 4
 use Subsystem;  
873    
874  sub new {  =item USAGE:
     my($class,$figO,$name) = @_;  
875    
876      my $self = {};  C<< my $feature_type = $feature->type(); >>
     $self->{_figO} = $figO;  
     $self->{_id} = $name;  
877    
878      return bless $self, $class;  =item RETURNS:
 }  
879    
880  sub id {  The feature object's "type" (e.g., "peg," "rna," etc.)
     my($self) = @_;  
881    
882      return $self->{_id};  =back
 }  
883    
884  sub usable {  =cut
885    
886    sub type {
887      my($self) = @_;      my($self) = @_;
888    
889      my $figO = $self->{_figO};      $self->id =~ /^fig\|\d+\.\d+\.([^\.]+)/;
890      my $fig  = $figO->{_fig};      return $1;
     return $fig->usable_subsystem($self->id);  
891  }  }
892    
 sub genomes {  
     my($self) = @_;  
893    
     my $figO = $self->{_figO};  
     my $subO = $self->{_subO};  
     if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }  
894    
895      return map { &GenomeO::new('GenomeO',$figO,$_) } $subO->get_genomes;  =head3 location
 }  
896    
897  sub roles {  =over 4
     my($self) = @_;  
898    
899      my $figO = $self->{_figO};  =item USAGE:
     my $subO = $self->{_subO};  
     if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }  
900    
901      return map { &FunctionalRoleO::new('FunctionalRoleO',$figO,$_) }  $subO->get_roles($self->id);  C<< my $loc = $feature->location(); >>
 }  
902    
903  sub curator {  =item RETURNS:
     my($self) = @_;  
904    
905      my $figO = $self->{_figO};  A string representing the feature object's location on the genome's DNA,
906    in SEED "tbl format" (i.e., "contig_beging_end").
907    
908    =back
909    
910    =cut
911    
912    sub location {
913        my($self) = @_;
914    
915        my $fig = $self->{_figO}->{_fig};
916        return scalar $fig->feature_location($self->id);
917    }
918    
919    
920    =head3 contig
921    
922    =over 4
923    
924    =item USAGE:
925    
926    C<< my $contig = $feature->contig(); >>
927    
928    =item RETURNS:
929    
930    A "ContigO" object to access the contig data
931    for the contig the feature is on.
932    
933    =back
934    
935    =cut
936    
937    sub contig {
938        my($self) = @_;
939    
940        my $figO = $self->{_figO};
941        my $loc      = $self->location;
942        my $genomeID = $self->genome->id;
943        return ($loc =~ /^(\S+)_\d+_\d+$/) ? new ContigO($figO,$genomeID,$1) : undef;
944    }
945    
946    
947    
948    =head3 begin
949    
950    =over 4
951    
952    =item USAGE:
953    
954    C<< my $beg = $feature->begin(); >>
955    
956    =item RETURNS:
957    
958    The numerical coordinate of the first base of the feature.
959    
960    =back
961    
962    =cut
963    
964    sub begin {
965        my($self) = @_;
966    
967        my $loc = $self->location;
968        return ($loc =~ /^\S+_(\d+)_\d+$/) ? $1 : undef;
969    }
970    
971    
972    
973    =head3 end
974    
975    =over 4
976    
977    =item USAGE:
978    
979    C<< my $end = $feature->end(); >>
980    
981    =item RETURNS:
982    
983    The numerical coordinate of the last base of the feature.
984    
985    =back
986    
987    =cut
988    
989    sub end {
990        my($self) = @_;
991    
992        my $loc = $self->location;
993        return ($loc =~ /^\S+_\d+_(\d+)$/) ? $1 : undef;
994    }
995    
996    
997    
998    =head3 dna_seq
999    
1000    =over 4
1001    
1002    =item USAGE:
1003    
1004    C<< my $dna_seq = $feature->dna_seq(); >>
1005    
1006    =item RETURNS:
1007    
1008    A string contining the DNA subsequence of the contig
1009    running from the first to the last base of the feature.
1010    
1011    If ($beg > $end), the reverse complement subsequence is returned.
1012    
1013    =back
1014    
1015    =cut
1016    
1017    sub dna_seq {
1018        my($self) = @_;
1019    
1020        my $fig = $self->{_figO}->{_fig};
1021        my $fid = $self->id;
1022        my @loc = $fig->feature_location($fid);
1023        return $fig->dna_seq(&FIG::genome_of($fid),@loc);
1024    }
1025    
1026    
1027    
1028    =head3 prot_seq
1029    
1030    =over 4
1031    
1032    =item USAGE:
1033    
1034    C<< my $dna_seq = $feature->prot_seq(); >>
1035    
1036    =item RETURNS:
1037    
1038    A string contining the protein translation of the feature (if it exists),
1039    or the "undef" value if the feature does not exist or is not a PEG.
1040    
1041    =back
1042    
1043    =cut
1044    
1045    sub prot_seq {
1046        my($self) = @_;
1047    
1048        ($self->type eq "peg") || return undef;
1049        my $fig = $self->{_figO}->{_fig};
1050        my $fid = $self->id;
1051        return $fig->get_translation($fid);
1052    }
1053    
1054    
1055    
1056    =head3 function_of
1057    
1058    =over 4
1059    
1060    =item USAGE:
1061    
1062    C<< my $func = $feature->function_of(); >>
1063    
1064    =item RETURNS:
1065    
1066    A string containing the function assigned to the feature,
1067    or the "undef" value if no function has been assigned.
1068    
1069    =back
1070    
1071    =cut
1072    
1073    sub function_of {
1074        my($self) = @_;
1075    
1076        my $fig = $self->{_figO}->{_fig};
1077        my $fid = $self->id;
1078        return scalar $fig->function_of($fid);
1079    }
1080    
1081    
1082    
1083    =head3 coupled_to
1084    
1085    =over 4
1086    
1087    =item USAGE:
1088    
1089    C<< my @coupled_features = $feature->coupled_to(); >>
1090    
1091    =item RETURNS:
1092    
1093    A list of "CouplingO" objects describing the evidence for functional coupling
1094    between this feature and other nearby features.
1095    
1096    =back
1097    
1098    =cut
1099    
1100    sub coupled_to {
1101        my($self) = @_;
1102    
1103        ($self->type eq "peg") || return ();
1104        my $figO = $self->{_figO};
1105        my $fig  = $figO->{_fig};
1106        my $peg1 = $self->id;
1107        my @coupled = ();
1108        foreach my $tuple ($fig->coupled_to($peg1))
1109        {
1110            my($peg2,$sc) = @$tuple;
1111            push(@coupled, &CouplingO::new('CouplingO',$figO,$peg1,$peg2,$sc));
1112        }
1113        return @coupled;
1114    }
1115    
1116    
1117    
1118    =head3 annotations
1119    
1120    =over 4
1121    
1122    =item USAGE:
1123    
1124    C<< my @annot_list = $feature->annotations(); >>
1125    
1126    =item RETURNS:
1127    
1128    A list of "AnnotationO" objects allowing access to the annotations for this feature.
1129    
1130    =back
1131    
1132    =cut
1133    
1134    sub annotations {
1135        my($self) = @_;
1136    
1137        my $figO = $self->{_figO};
1138        my $fig  = $figO->{_fig};
1139    
1140        return map { &AnnotationO::new('AnnotationO',@$_) } $fig->feature_annotations($self->id,1);
1141    }
1142    
1143    
1144    =head3 in_subsystems
1145    
1146    =over 4
1147    
1148    =item USAGE:
1149    
1150    C<< my @subsys_list = $feature->in_subsystems(); >>
1151    
1152    =item RETURNS:
1153    
1154    A list of "SubsystemO" objects allowing access to the subsystems
1155    that this feature particupates in.
1156    
1157    =back
1158    
1159    =cut
1160    
1161    sub in_subsystems {
1162        my($self) = @_;
1163        my $figO = $self->{_figO};
1164        my $fig  = $figO->{_fig};
1165    
1166        return map { new SubsystemO($figO,$_) } $fig->peg_to_subsystems($self->id);
1167    }
1168    
1169    
1170    =head3 possibly_truncated
1171    
1172    =over 4
1173    
1174    =item USAGE:
1175    
1176    C<< my $trunc = $feature->possibly_truncated(); >>
1177    
1178    =item RETURNS:
1179    
1180    Boolean C<TRUE> if the feature may be truncated;
1181    boolean C<FALSE> otherwise.
1182    
1183    =back
1184    
1185    =cut
1186    
1187    sub possibly_truncated {
1188        my($self) = @_;
1189        my $figO = $self->{_figO};
1190        my $fig  = $figO->{_fig};
1191    
1192        return $fig->possibly_truncated($self->id);
1193    }
1194    
1195    
1196    
1197    =head3 possible_frameshift
1198    
1199    =over 4
1200    
1201    =item USAGE:
1202    
1203    C<< my $fs = $feature->possible_frameshift(); >>
1204    
1205    =item RETURNS:
1206    
1207    Boolean C<TRUE> if the feature may be a frameshifted fragment;
1208    boolean C<FALSE> otherwise.
1209    
1210    (NOTE: This is a crude prototype implementation,
1211    and is mostly as an example of how to code using FIGO.)
1212    
1213    =back
1214    
1215    =cut
1216    
1217    sub possible_frameshift {
1218        my($self) = @_;
1219        my $figO = $self->{_figO};
1220        my $fig = $figO->{_fig};
1221        my($tmp_dir) = $figO->{_tmp_dir};
1222    
1223        my $tmp_dna  = "$tmp_dir/tmp_dna.$$.fasta";
1224        my $tmp_prot = "$tmp_dir/tmp_prot.$$.fasta";
1225    
1226        #...Skip tests and return '0' if truncated...
1227        if (! $self->possibly_truncated)
1228        {
1229            #...Get best precomputed BLAST hit if E-value < 1.0e-20:
1230            my @sims = $self->sims( -max => 5, -cutoff => 1.0e-20);
1231            while ((@sims > 0) && $fig->possibly_truncated($sims[0]->id2)) { shift @sims }
1232    
1233            #...If a sim was returned:
1234            if (my $sim = shift @sims)
1235            {
1236                #...Get best hit FID and boundaries:
1237                my $peg2 = $sim->id2;
1238                my $ln1  = $sim->ln1;
1239                my $ln2  = $sim->ln2;
1240                my $b2   = $sim->b2;
1241                my $e2   = $sim->e2;
1242    
1243                #...Convert from AA to BP, and pad out w/ 100 bp guard region:
1244                my $adjL = 100 + (($b2-1) * 3);
1245                my $adjR = 100 + (($ln2 - $e2) * 3);
1246    
1247                if ($ENV{DEBUG}) { print STDERR "adjL = $adjL adjR = $adjR ln1 = $ln1 peg2 = $peg2 ln2 = $ln2\n" }
1248                #...If hit is more than 20% longer than query:
1249                if ($ln2 > (1.2 * $ln1))
1250                {
1251                    #...Get and parse query location:
1252                    my $loc = $self->location;
1253                    if ($loc =~ /^(\S+)_(\d+)_(\d+)/)
1254                    {
1255                        my $contig = $1;
1256                        my $beg    = $2;
1257                        my $end    = $3;
1258    
1259                        #...Create new ContigO object:
1260                        my $contigO = new ContigO($figO, $self->genome->id, $contig);
1261    
1262                        #...Extract DNA subsequence, including guard regions:
1263                        my($begA,$endA,$dna);
1264                        if ($beg < $end)
1265                        {
1266                            $begA = &max(1, $beg - $adjL);
1267                            $endA = &min($end+$adjR, $contigO->contig_length);
1268                            $dna  = $contigO->dna_seq($begA,$endA);
1269                        }
1270                        else
1271                        {
1272                            $endA = &max(1, $beg - $adjL);
1273                            $begA = &min($end+$adjR, $contigO->contig_length);
1274                            $dna  = $contigO->dna_seq($begA,$endA);
1275                        }
1276    
1277                        if (defined($dna) && (length($dna) > 90))
1278                        {
1279                            #...Open tmp-file and write FASTA containing DNA subregion to be BLASTed:
1280                            open( TMP, ">$tmp_dna") || die "could not open $tmp_dna";
1281                            print TMP  ">dna\n$dna\n";
1282                            close(TMP);
1283    
1284                            #...Create new FeatureO object corresponding tp $peg2:
1285                            my $pegO2 = new FeatureO($figO,$peg2);
1286    
1287                            #...Fetch its translation, and print to tmp FASTA file for BLASTing:
1288                            my $prot  = $pegO2->prot_seq;
1289                            if (defined($prot) && (length($prot) > 30))
1290                            {
1291                                open( TMP, ">$tmp_prot") || die "could not open $tmp_prot";
1292                                print TMP  ">tmp_prot\n$prot\n";
1293                                close(TMP);
1294    
1295                                #...Build BLAST nucleotide database for extracted DNA region,
1296                                #   and TBLASTN $peg2 against the DNA:
1297                                &run("formatdb -i $tmp_dna -pF");
1298                                open(BLAST,"blastall -i $tmp_prot -d $tmp_dna -p tblastn -FF -e 1.0e-20 |")
1299                                    || die "could not blast";
1300    
1301                                #...Parse the TBLASTN output; find and sort HSPs by left boundary:
1302                                my $db_seq_out = &gjoparseblast::next_blast_subject(\*BLAST,1);
1303                                if ($ENV{DEBUG}) { print STDERR &Dumper(['blast output',$db_seq_out]) }
1304                                my @hsps       = sort { $a->[0] <=> $b->[0] }
1305                                                  map { [$_->[9], $_->[10], $_->[12], $_->[13]] }
1306                                                  grep { $_->[1] < 1.0e-20 }
1307                                                  @ { $db_seq_out->[6] };
1308    
1309                                #...Extract HSP boundary pairs:
1310                                my @prot = map { [$_->[0], $_->[1]] } @hsps;
1311                                my @dna  = map { [$_->[2], $_->[3]] } @hsps;
1312                                if ($ENV{DEBUG}) { print STDERR &Dumper(\@prot,\@dna) }
1313    
1314                                #...If the "cover" of the HSPs covers more than 90% of $peg2 w gaps < 3 AA,
1315                                #   and the "cover" of the HPSs cover more than 90% of the extracted DNA
1316                                #   w/ gaps < 9 bp (but not a multiple of 3), suspect a possible frameshift:
1317                                if (&covers(\@prot,length($prot),3,0) && &covers(\@dna,3*length($prot),9,1))
1318                                {
1319                                    unlink($tmp_dna,$tmp_prot);
1320                                    return [$contig,$begA,$endA,$dna,$peg2];
1321                                }
1322                            }
1323                        }
1324                    }
1325                }
1326            }
1327        }
1328        unlink($tmp_dna,$tmp_prot);
1329        return 0;
1330    }
1331    
1332    
1333    
1334    =head3 run
1335    
1336    (Note: This function should be considered "PRIVATE")
1337    
1338    =over 4
1339    
1340    =item FUNCTION:
1341    
1342    Passes a string containing a command to be execture by the "system" shell command.
1343    
1344    =item USAGE:
1345    
1346    C<< $feature->run($cmd); >>
1347    
1348    =item RETURNS:
1349    
1350    Nil if the execution of C<$cmd> was successful;
1351    aborts with traceback if C<$cmd> fails.
1352    
1353    =back
1354    
1355    =cut
1356    
1357    sub run {
1358        my($cmd) = @_;
1359        (system($cmd) == 0) || confess("FAILED: $cmd");
1360    }
1361    
1362    
1363    
1364    =head3 max
1365    
1366    (Note: This function should be considered "PRIVATE")
1367    
1368    =over 4
1369    
1370    =item USAGE:
1371    
1372    C<< my $max = $feature->max($x, $y); >>
1373    
1374    =item C<$x> and  C<$y>
1375    
1376    Numerical values.
1377    
1378    =item RETURNS:
1379    
1380    The larger of the two numerical values C<$x> and C<$y>.
1381    
1382    =back
1383    
1384    =cut
1385    
1386    sub max {
1387        my($x,$y) = @_;
1388        return ($x < $y) ? $y : $x;
1389    }
1390    
1391    
1392    
1393    =head3 min
1394    
1395    (Note: This function should be considered "PRIVATE")
1396    
1397    =over 4
1398    
1399    =item USAGE:
1400    
1401    C<< my $min = $feature->min($x, $y); >>
1402    
1403    =item C<$x> and C<$y>
1404    
1405    Numerical values.
1406    
1407    =item RETURNS:
1408    
1409    The smaller of the two numerical values C<$x> and C<$y>.
1410    
1411    =back
1412    
1413    =cut
1414    
1415    sub min {
1416        my($x,$y) = @_;
1417        return ($x < $y) ? $x : $y;
1418    }
1419    
1420    
1421    
1422    =head3 covers
1423    
1424    (Question: Should this function be considered "PRIVATE" ???)
1425    
1426    USAGE:
1427        C<< if (&covers(\@hits, $len, $diff, $must_shift)) { #...Do stuff } >>
1428    
1429    Returns boolean C<TRUE> if a set of BLAST HSPs "cover" more than 90%
1430    of the database sequence(?).
1431    
1432    =cut
1433    
1434    sub covers {
1435        my($hsps,$ln,$diff,$must_shift) = @_;
1436    
1437        if ($ENV{DEBUG}) { print STDERR &Dumper(['hsps',$hsps,'ln',$ln,'diff',$diff,'must_shift',$must_shift]) }
1438        my $hsp1 = shift @$hsps;
1439        my $hsp2;
1440        my $merged = 0;
1441        while ($hsp1 && ($hsp2 = shift @$hsps) &&
1442               ($must_shift ? &diff_frames($hsp1,$hsp2) : 1) &&
1443               ($hsp1 = &merge($hsp1,$hsp2,$diff)))
1444        {
1445            $merged = 1;
1446            if ($ENV{DEBUG}) { print STDERR &Dumper(['merged',$hsp1]) }
1447        }
1448        return ($merged && $hsp1 && (($hsp1->[1] - $hsp1->[0]) > (0.9 * $ln)));
1449    }
1450    
1451    sub diff_frames {
1452        my($hsp1,$hsp2) = @_;
1453        return ((($hsp1->[1]+1) % 3) != ($hsp2->[0] % 3));
1454    }
1455    
1456    
1457    
1458    =head3 merge
1459    
1460    Merge two HSPs unless their overlap or separation is too large.
1461    
1462    RETURNS: Merged boundaries if merger succeeds, and C<undef> if merger fails.
1463    
1464    =cut
1465    
1466    sub merge {
1467        my($hsp1,$hsp2,$diff) = @_;
1468    
1469        my($b1,$e1) = @$hsp1;
1470        my($b2,$e2) = @$hsp2;
1471        return (($e2 > $e1) && (($b2-$e1) <= $diff)) ? [$b1,$e2] : undef;
1472    }
1473    
1474    
1475    
1476    =head3 sims
1477    
1478    =over 4
1479    
1480    =item FUNCTION:
1481    
1482    Returns precomputed "Sim.pm" objects from the SEED.
1483    
1484    =item USAGE:
1485    
1486    C<< my @sims = $pegO->sims( -all, -cutoff => 1.0e-10); >>
1487    
1488    C<< my @sims = $pegO->sims( -max => 50, -cutoff => 1.0e-10); >>
1489    
1490    =item RETURNS: List of sim objects.
1491    
1492    =back
1493    
1494    =cut
1495    
1496    use Sim;
1497    sub sims {
1498        my($self,%args) = @_;
1499    
1500        my $figO = $self->{_figO};
1501        my $fig  = $figO->{_fig};
1502    
1503        my $cutoff = $args{-cutoff} ? $args{-cutoff} : 1.0e-5;
1504        my $all    = $args{-all}    ? 'all'          : "fig";
1505        my $max    = $args{-max}    ? $args{-max}    : 10000;
1506    
1507        my @sims = $fig->sims($self->id,$max,$cutoff,$all);
1508    
1509        if (@sims) {
1510            my $peg1 = FeatureO->new($figO, $sims[0]->[0]);
1511    
1512            foreach my $sim (@sims) {
1513    #           $sim->[0] = $peg1;
1514    #           $sim->[1] = FeatureO->new($figO, $sim->[1]);
1515            }
1516        }
1517    
1518        return @sims;
1519    }
1520    
1521    
1522    
1523    =head3 bbhs
1524    
1525    =over 4
1526    
1527    =item FUNCTION:
1528    
1529    Given a PEG-type "FeatureO" object, returns the list of BBHO objects
1530    corresponding to the pre-computed BBHs for that PEG.
1531    
1532    =item USAGE:
1533    
1534    C<< my @bbhs = $pegO->bbhs(); >>
1535    
1536    =item RETURNS:
1537    
1538    List of BBHO objects.
1539    
1540    =back
1541    
1542    =cut
1543    
1544    sub bbhs {
1545        my($self) = @_;
1546    
1547        my $figO = $self->{_figO};
1548        my $fig  = $figO->{_fig};
1549    
1550        my @bbhs  = $fig->bbhs($self->id);
1551        return map { my($peg2,$sc,$bs) = @$_; bless({ _figO => $figO,
1552                                                      _peg1 => $self->id,
1553                                                      _peg2 => $peg2,
1554                                                      _psc => $sc,
1555                                                      _bit_score => $bs
1556                                                    },'BBHO') } @bbhs;
1557    }
1558    
1559    
1560    =head3 display
1561    
1562    =over 4
1563    
1564    =item FUNCTION:
1565    
1566    Prints info about a "FeatureO" object to STDOUT.
1567    
1568    =item USAGE:
1569    
1570    C<< $pegO->display(); >>
1571    
1572    =item RETURNS;
1573    
1574    (void)
1575    
1576    =back
1577    
1578    =cut
1579    
1580    sub display {
1581        my($self) = @_;
1582    
1583        print join("\t",$self->id,$self->location,$self->function_of),"\n",
1584              $self->dna_seq,"\n",
1585              $self->prot_seq,"\n";
1586    }
1587    
1588    
1589    
1590    ########################################################################
1591    package BBHO;
1592    ########################################################################
1593    
1594    =head1 BBHO
1595    
1596    Methods for accessing "Bidirectiona Best Hits" (BBHs).
1597    
1598    =cut
1599    
1600    
1601    =head3 new
1602    
1603    Constructor of BBHO objects.
1604    
1605    (NOTE: The "average user" should never need to invoke this method.)
1606    
1607    =cut
1608    
1609    sub new {
1610        my($class,$figO,$peg1,$peg2,$sc,$normalized_bitscore) = @_;
1611    
1612        my $self = {};
1613        $self->{_figO}      = $figO;
1614        $self->{_peg1}      = $peg1;
1615        $self->{_peg2}      = $peg2;
1616        $self->{_psc}       = $sc;
1617        $self->{_bit_score} = $normalized_bitscore
1618    
1619    }
1620    
1621    
1622    
1623    =head3 peg1
1624    
1625    =over 4
1626    
1627    =item USAGE:
1628    
1629    C<< my $peg1 = $bbh->peg1(); >>
1630    
1631    =item RETURNS:
1632    
1633    A "FeatureO" object corresponding to the "query" sequence
1634    in a BBH pair.
1635    
1636    =back
1637    
1638    =cut
1639    
1640    sub peg1 {
1641        my($self) = @_;
1642    
1643        my $figO = $self->{_figO};
1644        return new FeatureO($figO,$self->{_peg1});
1645    }
1646    
1647    =head3 peg2
1648    
1649    =over 4
1650    
1651    =item USAGE:
1652    
1653    C<< my $peg2 = $bbh->peg2(); >>
1654    
1655    =item RETURNS:
1656    
1657    A "FeatureO" object corresponding to the "database" sequence
1658    in a BBH pair.
1659    
1660    =back
1661    
1662    =cut
1663    
1664    sub peg2 {
1665        my($self) = @_;
1666    
1667        my $figO = $self->{_figO};
1668        return new FeatureO($figO,$self->{_peg2});
1669    }
1670    
1671    
1672    
1673    =head3 psc
1674    
1675    =over 4
1676    
1677    =item USAGE:
1678    
1679    C<< my $psc = $bbh->psc(); >>
1680    
1681    =item RETURNS:
1682    
1683    The numerical value of the BLAST E-value for the pair.
1684    
1685    =back
1686    
1687    =cut
1688    
1689    sub psc {
1690        my($self) = @_;
1691    
1692        return $self->{_psc};
1693    }
1694    
1695    
1696    
1697    =head3 norm_bitscore
1698    
1699    
1700    =over 4
1701    
1702    =item USAGE:
1703    
1704    C<< my $bsc = $bbh->norm_bitscore(); >>
1705    
1706    =item RETURNS:
1707    
1708    The "BLAST bit-score per aligned character" for the pair.
1709    
1710    =back
1711    
1712    =cut
1713    
1714    sub norm_bitscore {
1715        my($self) = @_;
1716    
1717        return $self->{_bit_score};
1718    }
1719    
1720    
1721    
1722    ########################################################################
1723    package AnnotationO;
1724    ########################################################################
1725    
1726    =head1 AnnotationO
1727    
1728    Methods for accessing SEED annotations.
1729    
1730    =cut
1731    
1732    
1733    
1734    =head3 new
1735    
1736    =over 4
1737    
1738    =item FUNCTION:
1739    
1740    Cronstruct a new "AnnotationO" object
1741    
1742    =item USAGE:
1743    
1744    C<< my $annotO = AnnotationO->new( $fid, $timestamp, $who, $text); >>
1745    
1746    =item C<$fid>
1747    
1748    A feature identifier.
1749    
1750    =item C<$timestamp>
1751    
1752    The C<UN*X> timestamp one wishes to associate with the annotation.
1753    
1754    =item C<$who>
1755    
1756    The annotator's user-name.
1757    
1758    =item C<$text>
1759    
1760    The textual content of the annotation.
1761    
1762    =item RETURNS:
1763    
1764    An "AnnotationO" object.
1765    
1766    =back
1767    
1768    =cut
1769    
1770    sub new {
1771        my($class,$fid,$timestamp,$who,$text) = @_;
1772    
1773        my $self = {};
1774        $self->{_fid} = $fid;
1775        $self->{_timestamp} = $timestamp;
1776        $self->{_who} = $who;
1777        $self->{_text} = $text;
1778        return bless $self, $class;
1779    }
1780    
1781    
1782    
1783    =head3 fid
1784    
1785    =over 4
1786    
1787    =item FUNCTION:
1788    
1789    Extract the feature-ID that was annotated.
1790    
1791    =item USAGE:
1792    
1793    C<< my $fid = $annotO->fid(); >>
1794    
1795    =item RETURNS;
1796    
1797    The feature-ID as a string.
1798    
1799    =back
1800    
1801    =cut
1802    
1803    sub fid {
1804        my($self) = @_;
1805    
1806        return $self->{_fid};
1807    }
1808    
1809    
1810    
1811    =head3 timestamp
1812    
1813    =over 4
1814    
1815    =item FUNCTION:
1816    
1817    Extract the C<UN*X> timestamp of the annotation.
1818    
1819    =item USAGE:
1820    
1821    C<< my $fid = $annotO->timestamp(); >>
1822    
1823    =item RETURNS;
1824    
1825    The timestamp as a string.
1826    
1827    =back
1828    
1829    =cut
1830    
1831    sub timestamp {
1832        my($self,$convert) = @_;
1833    
1834        if ($convert)
1835        {
1836            return scalar localtime($self->{_timestamp});
1837        }
1838        else
1839        {
1840            return $self->{_timestamp};
1841        }
1842    }
1843    
1844    
1845    
1846    =head3 made_by
1847    
1848    =over 4
1849    
1850    =item FUNCTION:
1851    
1852    Extract the annotator's user-name.
1853    
1854    =item USAGE:
1855    
1856    C<< my $fid = $annotO->made_by(); >>
1857    
1858    =item RETURNS;
1859    
1860    The username of the annotator, as a string.
1861    
1862    =back
1863    
1864    =cut
1865    
1866    sub made_by {
1867        my($self) = @_;
1868    
1869        my $who = $self->{_who};
1870        $who =~ s/^master://i;
1871        return $who;
1872    }
1873    
1874    
1875    
1876    =head3 text
1877    
1878    =over 4
1879    
1880    =item FUNCTION:
1881    
1882    Extract the text of the annotation.
1883    
1884    =item USGAE:
1885    
1886    C<< my $text = $annotO->text(); >>
1887    
1888    =item RETURNS:
1889    
1890    The text of the annotation, as a string.
1891    
1892    =back
1893    
1894    =cut
1895    
1896    sub text {
1897        my($self) = @_;
1898    
1899        my $text = $self->{_text};
1900        return $text;
1901    }
1902    
1903    
1904    =head3 display
1905    
1906    =over 4
1907    
1908    =item FUNCTION:
1909    
1910    Print the contents of an "AnnotationO" object to B<STDOUT>
1911    in human-readable form.
1912    
1913    =item USAGE:
1914    
1915    C<< my $annotO->display(); >>
1916    
1917    =item RETURNS:
1918    
1919    (void)
1920    
1921    =back
1922    
1923    =cut
1924    
1925    sub display {
1926        my($self) = @_;
1927    
1928        print join("\t",($self->fid,$self->timestamp(1),$self->made_by)),"\n",$self->text,"\n";
1929    }
1930    
1931    
1932    
1933    ########################################################################
1934    package CouplingO;
1935    ########################################################################
1936    use Data::Dumper;
1937    
1938    =head1 CouplingO
1939    
1940    Methods for accessing the "Functional coupling scores"
1941    of PEGs in close physical proximity to each other.
1942    
1943    =cut
1944    
1945    
1946    
1947    =head3 new
1948    
1949    =over 4
1950    
1951    =item FUNCTION:
1952    
1953    Construct a new "CouplingO" object
1954    encapsulating the "functional coupling" score
1955    between a pair of features in some genome.
1956    
1957    =item USAGE:
1958    
1959    C<< $couplingO = CouplingO->new($figO, $fid1, $fid2, $sc); >>
1960    
1961    =item C<$figO>
1962    
1963    Parent "FIGO" object.
1964    
1965    =item C<$fid1> and C<$fid2>
1966    
1967    A pair of feature-IDs.
1968    
1969    =item C<$sc>
1970    
1971    A functional-coupling score
1972    
1973    =item RETURNS:
1974    
1975    A "CouplingO" object.
1976    
1977    =back
1978    
1979    =cut
1980    
1981    sub new {
1982        my($class,$figO,$peg1,$peg2,$sc) = @_;
1983    
1984        ($peg1 =~ /^fig\|\d+\.\d+\.peg\.\d+$/) || return undef;
1985        ($peg2 =~ /^fig\|\d+\.\d+\.peg\.\d+$/) || return undef;
1986        my $self = {};
1987        $self->{_figO} = $figO;
1988        $self->{_peg1} = $peg1;
1989        $self->{_peg2} = $peg2;
1990        $self->{_sc}   = $sc;
1991        return bless $self, $class;
1992    }
1993    
1994    
1995    
1996    =head3 peg1
1997    
1998    =over 4
1999    
2000    =item FUNCTION:
2001    
2002    Returns a "FeatureO" object corresponding to the first FID in a coupled pair.
2003    
2004    =item USAGE:
2005    
2006    C<< my $peg1 = $couplingO->peg1(); >>
2007    
2008    =item RETURNS:
2009    
2010    A "FeatureO" object.
2011    
2012    =back
2013    
2014    =cut
2015    
2016    sub peg1 {
2017        my($self) = @_;
2018    
2019        my $figO = $self->{_figO};
2020        return new FeatureO($figO,$self->{_peg1});
2021    }
2022    
2023    
2024    
2025    =head3 peg2
2026    
2027    =over 4
2028    
2029    =item FUNCTION:
2030    
2031    Returns a "FeatureO" object corresponding to the second FID in a coupled pair.
2032    
2033    =item USAGE:
2034    
2035    C<< my $peg2 = $couplingO->peg2(); >>
2036    
2037    =item RETURNS:
2038    
2039    A "FeatureO" object.
2040    
2041    =back
2042    
2043    =cut
2044    
2045    sub peg2 {
2046        my($self) = @_;
2047    
2048        my $figO = $self->{_figO};
2049        return new FeatureO($figO,$self->{_peg2});
2050    }
2051    
2052    
2053    
2054    =head3 sc
2055    
2056    =over 4
2057    
2058    =item FUNCTION:
2059    
2060    Extracts the "functional coupling" score from a "CouplingO" object.
2061    
2062    =item USAGE:
2063    
2064    C<< my $sc = $couplingO->sc(); >>
2065    
2066    =item RETURNS:
2067    
2068    A scalar score.
2069    
2070    =back
2071    
2072    =cut
2073    
2074    sub sc {
2075        my($self) = @_;
2076    
2077        return $self->{_sc};
2078    }
2079    
2080    
2081    
2082    =head3 evidence
2083    
2084    =over 4
2085    
2086    =item FUNCTION:
2087    
2088    Fetch the evidence for a "functional coupling" between two close PEGs,
2089    in the form of a list of objects describing the "Pairs of Close Homologs" (PCHs)
2090    supporting the existence of a functional coupling between the two close PEGs.
2091    
2092    =item USAGE:
2093    
2094    C<< my $evidence = $couplingO->evidence(); >>
2095    
2096    =item RETURNS
2097    
2098    List of pairs of "FeatureO" objects.
2099    
2100    =back
2101    
2102    =cut
2103    
2104    sub evidence {
2105        my($self) = @_;
2106    
2107        my $figO = $self->{_figO};
2108        my $fig  = $figO->{_fig};
2109        my @ev = ();
2110        foreach my $tuple ($fig->coupling_evidence($self->peg1->id,$self->peg2->id))
2111        {
2112            my($peg3,$peg4,$rep) = @$tuple;
2113            push(@ev,[&FeatureO::new('FeatureO',$figO,$peg3),
2114                      &FeatureO::new('FeatureO',$figO,$peg4),
2115                      $rep]);
2116        }
2117        return @ev;
2118    }
2119    
2120    
2121    
2122    =head3 display
2123    
2124    =over 4
2125    
2126    =item FUNCTION:
2127    
2128    Print the contents of a "CouplingO" object to B<STDOUT> in human-readable form.
2129    
2130    =item USAGE:
2131    
2132    C<< $couplingO->display(); >>
2133    
2134    =item RETURNS:
2135    
2136    (Void)
2137    
2138    =back
2139    
2140    =cut
2141    
2142    sub display {
2143        my($self) = @_;
2144    
2145        print join("\t",($self->peg1,$self->peg2,$self->sc)),"\n";
2146    }
2147    
2148    
2149    
2150    ########################################################################
2151    package SubsystemO;
2152    ########################################################################
2153    use Data::Dumper;
2154    use Subsystem;
2155    
2156    =head1 SubsystemO
2157    
2158    =cut
2159    
2160    
2161    
2162    =head3 new
2163    
2164    =cut
2165    
2166    sub new {
2167        my($class,$figO,$name) = @_;
2168    
2169        my $self = {};
2170        $self->{_figO} = $figO;
2171        $self->{_id} = $name;
2172    
2173        return bless $self, $class;
2174    }
2175    
2176    
2177    
2178    =head3 id
2179    
2180    =cut
2181    
2182    sub id {
2183        my($self) = @_;
2184    
2185        return $self->{_id};
2186    }
2187    
2188    
2189    
2190    =head3 usable
2191    
2192    
2193    =cut
2194    
2195    sub usable {
2196        my($self) = @_;
2197    
2198        my $figO = $self->{_figO};
2199        my $fig  = $figO->{_fig};
2200        return $fig->usable_subsystem($self->id);
2201    }
2202    
2203    
2204    
2205    =head3 genomes
2206    
2207    =cut
2208    
2209    sub genomes {
2210        my($self) = @_;
2211    
2212        my $figO = $self->{_figO};
2213        my $subO = $self->{_subO};
2214        if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }
2215    
2216        return map { &GenomeO::new('GenomeO',$figO,$_) } $subO->get_genomes;
2217    }
2218    
2219    
2220    
2221    =head3 roles
2222    
2223    =cut
2224    
2225    sub roles {
2226        my($self) = @_;
2227    
2228        my $figO = $self->{_figO};
2229        my $subO = $self->{_subO};
2230        if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }
2231    
2232        return map { &FunctionalRoleO::new('FunctionalRoleO',$figO,$_) }  $subO->get_roles($self->id);
2233    }
2234    
2235    
2236    
2237    =head3 curator
2238    
2239    =cut
2240    
2241    sub curator {
2242        my($self) = @_;
2243    
2244        my $figO = $self->{_figO};
2245      my $subO = $self->{_subO};      my $subO = $self->{_subO};
2246      if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }      if (! $subO) { $subO = $self->{_subO} = new Subsystem($self->{_id},$figO->{_fig}); }
2247    
2248      return $subO->get_curator;      return $subO->get_curator;
2249  }  }
2250    
2251    
2252    
2253    
2254    =head3 variant
2255    
2256    =cut
2257    
2258  sub variant {  sub variant {
2259      my($self,$genome) = @_;      my($self,$genome) = @_;
2260    
# Line 576  Line 2265 
2265      return $subO->get_variant_code_for_genome($genome->id);      return $subO->get_variant_code_for_genome($genome->id);
2266  }  }
2267    
2268    
2269    
2270    =head3 pegs_in_cell
2271    
2272    =cut
2273    
2274  sub pegs_in_cell {  sub pegs_in_cell {
2275      my($self,$genome,$role) = @_;      my($self,$genome,$role) = @_;
2276    
# Line 586  Line 2281 
2281      return $subO->get_pegs_from_cell($genome->id,$role->id);      return $subO->get_pegs_from_cell($genome->id,$role->id);
2282  }  }
2283    
 package FunctionalRoleO;  
2284    
2285    
2286    ########################################################################
2287    package FunctionalRoleO;
2288    ########################################################################
2289  use Data::Dumper;  use Data::Dumper;
2290    
2291    =head1 FunctionalRoleO
2292    
2293    Methods for accessing the functional roles of features.
2294    
2295    =cut
2296    
2297    
2298    =head3 new
2299    
2300    =cut
2301    
2302  sub new {  sub new {
2303      my($class,$figO,$fr) = @_;      my($class,$figO,$fr) = @_;
2304    
# Line 599  Line 2308 
2308      return bless $self, $class;      return bless $self, $class;
2309  }  }
2310    
2311    
2312    
2313    =head3 id
2314    
2315    =cut
2316    
2317  sub id {  sub id {
2318      my($self) = @_;      my($self) = @_;
2319    
2320      return $self->{_id};      return $self->{_id};
2321  }  }
2322    
 package FigFamO;  
2323    
2324    
2325    ########################################################################
2326    package FigFamO;
2327    ########################################################################
2328  use FigFams;  use FigFams;
2329  use FigFam;  use FigFam;
2330    
2331    
2332    =head1 FigFamO
2333    
2334    =cut
2335    
2336    
2337    =head3 new
2338    
2339    =cut
2340    
2341  sub new {  sub new {
2342      my($class,$figO,$id) = @_;      my($class,$figO,$id) = @_;
2343    
# Line 619  Line 2347 
2347      return bless $self, $class;      return bless $self, $class;
2348  }  }
2349    
2350    
2351    
2352    =head3 id
2353    
2354    =cut
2355    
2356  sub id {  sub id {
2357      my($self) = @_;      my($self) = @_;
2358    
2359      return $self->{_id};      return $self->{_id};
2360  }  }
2361    
2362    
2363    =head3 function
2364    
2365    =cut
2366    
2367  sub function {  sub function {
2368      my($self) = @_;      my($self) = @_;
2369    
# Line 635  Line 2374 
2374      return $famO->family_function;      return $famO->family_function;
2375  }  }
2376    
2377    
2378    
2379    =head3 members
2380    
2381    =cut
2382    
2383  sub members {  sub members {
2384      my($self) = @_;      my($self) = @_;
2385    
# Line 643  Line 2388 
2388      my $famO = $self->{_famO};      my $famO = $self->{_famO};
2389      if (! $famO) { $famO = $self->{_famO} = &FigFam::new('FigFam',$fig,$self->id) }      if (! $famO) { $famO = $self->{_famO} = &FigFam::new('FigFam',$fig,$self->id) }
2390    
2391      return map { &FigFamO::new('FigFamO',$figO,$_) } $famO->list_members;      return map { &FeatureO::new('FeatureO',$figO,$_) } $famO->list_members;
2392  }  }
2393    
2394    =head3 rep_seqs
2395    
2396    =cut
2397    
2398  sub rep_seqs {  sub rep_seqs {
2399      my($self) = @_;      my($self) = @_;
2400    
# Line 657  Line 2406 
2406      return $famO->representatives;      return $famO->representatives;
2407  }  }
2408    
2409    
2410    
2411    =head3 should_be_member
2412    
2413    =cut
2414    
2415  sub should_be_member {  sub should_be_member {
2416      my($self,$seq) = @_;      my($self,$seq) = @_;
2417    
# Line 670  Line 2425 
2425    
2426    
2427    
2428    =head3 display
2429    
2430    =cut
2431    
2432  sub display {  sub display {
2433      my($self) = @_;      my($self) = @_;
2434    
# Line 678  Line 2437 
2437    
2438    
2439    
2440    ########################################################################
2441  package Attribute;  package Attribute;
2442    ########################################################################
2443    =head1 Attribute
2444    
2445    (Note yet implemented.)
2446    
2447    =cut
2448    
2449  1;  1;
2450    __END__
2451    
2452    =head1 Examples
2453    
2454    =head3 Display all complete, prokaryotic genomes
2455    
2456    use FIGO;
2457    my $figO = new FIGO;
2458    
2459    foreach $genome ($figO->genomes('complete','prokaryotic'))
2460    {
2461        $genome->display;
2462    }
2463    
2464    #---------------------------------------------
2465    
2466    use FIG;
2467    my $fig = new FIG;
2468    
2469    foreach $genome (grep { $fig->is_prokaryotic($_) } $fig->genomes('complete'))
2470    {
2471        print join("\t",("Genome",$genome,$fig->genus_species($genome))),"\n";
2472    }
2473    
2474    ###############################################
2475    
2476    =head3 Show how to access contigs and extract sequence
2477    
2478    use FIGO;
2479    my $figO = new FIGO;
2480    
2481    $genomeId = '83333.1';
2482    my $genome = new GenomeO($figO,$genomeId);
2483    
2484    foreach $contig ($genome->contigs_of)
2485    {
2486        $tag1 = $contig->dna_seq(1,10);
2487        $tag2 = $contig->dna_seq(10,1);
2488        print join("\t",($tag1,$tag2,$contig->id,$contig->contig_length)),"\n";
2489    }
2490    
2491    #---------------------------------------------
2492    
2493    use FIG;
2494    my $fig = new FIG;
2495    
2496    $genomeId = '83333.1';
2497    
2498    $contig_lengths = $fig->contig_lengths($genomeId);
2499    
2500    foreach $contig ($fig->contigs_of($genomeId))
2501    {
2502        $tag1 = $fig->dna_seq($genomeId,join("_",($contig,1,10)));
2503        $tag2 = $fig->dna_seq($genomeId,join("_",($contig,10,1)));
2504        print join("\t",($tag1,$tag2,$contig,$contig_lengths->{$contig})),"\n";
2505    }
2506    
2507    ###############################################
2508    
2509    ### accessing data related to features
2510    
2511    use FIGO;
2512    my $figO = new FIGO;
2513    
2514    my $genome = new GenomeO($figO,"83333.1");
2515    my $peg  = "fig|83333.1.peg.4";
2516    my $pegO = new FeatureO($figO,$peg);
2517    
2518    print join("\t",$pegO->id,$pegO->location,$pegO->function_of),"\n",
2519          $pegO->dna_seq,"\n",
2520          $pegO->prot_seq,"\n";
2521    
2522    foreach $fidO ($genome->features_of('rna'))
2523    {
2524        print join("\t",$fidO->id,$fidO->location,$fidO->function_of),"\n";
2525    }
2526    
2527    #---------------------------------------------
2528    
2529    
2530    use FIG;
2531    my $fig = new FIG;
2532    
2533    my $genome = "83333.1";
2534    my $peg  = "fig|83333.1.peg.4";
2535    
2536    print join("\t",$peg,scalar $fig->feature_location($peg),scalar $fig->function_of($peg)),"\n",
2537          $fig->dna_seq($genome,$fig->feature_location($peg)),"\n",
2538          $fig->get_translation($peg),"\n";
2539    
2540    foreach $fid ($fig->all_features($genome,'rna'))
2541    {
2542        print join("\t",$fid,scalar $fig->feature_location($fid),scalar $fig->function_of($fid)),"\n";
2543    }
2544    
2545    ###############################################
2546    
2547    ### accessing similarities
2548    
2549    use FIGO;
2550    my $figO = new FIGO;
2551    
2552    $peg  = "fig|83333.1.peg.4";
2553    $pegO = new FeatureO($figO,$peg);
2554    
2555    @sims = $pegO->sims;  # use sims( -all => 1, -max => 10000, -cutoff => 1.0e-20) to all
2556                          # sims (including non-FIG sequences
2557    foreach $sim (@sims)
2558    {
2559        $peg2  = $sim->id2;
2560        $pegO2 = new FeatureO($figO,$peg2);
2561        $func  = $pegO2->function_of;
2562        $sc    = $sim->psc;
2563        print join("\t",($peg2,$sc,$func)),"\n";
2564    }
2565    
2566    #---------------------------------------------
2567    
2568    
2569    use FIG;
2570    my $fig = new FIG;
2571    
2572    $peg  = "fig|83333.1.peg.4";
2573    
2574    @sims = $fig->sims($peg,1000,1.0e-5,"fig");
2575    foreach $sim (@sims)
2576    {
2577        $peg2  = $sim->id2;
2578        $func  = $fig->function_of($peg2);
2579        $sc    = $sim->psc;
2580        print join("\t",($peg2,$sc,$func)),"\n";
2581    }
2582    
2583    ###############################################
2584    
2585    ### accessing BBHs
2586    
2587    use FIGO;
2588    my $figO = new FIGO;
2589    
2590    $peg  = "fig|83333.1.peg.4";
2591    $pegO = new FeatureO($figO,$peg);
2592    
2593    @bbhs = $pegO->bbhs;
2594    foreach $bbh (@bbhs)
2595    {
2596        $peg2  = $bbh->peg2;
2597        $pegO2 = new FeatureO($figO,$peg2);
2598        $func  = $pegO2->function_of;
2599        $sc    = $bbh->psc;
2600        print join("\t",($peg2,$sc,$func)),"\n";
2601    }
2602    
2603    #---------------------------------------------
2604    
2605    use FIG;
2606    my $fig = new FIG;
2607    
2608    $peg  = "fig|83333.1.peg.4";
2609    
2610    @bbhs = $fig->bbhs($peg);
2611    foreach $bbh (@bbhs)
2612    {
2613        ($peg2,$sc,$bit_score) = @$bbh;
2614        $func  = $fig->function_of($peg2);
2615        print join("\t",($peg2,$sc,$func)),"\n";
2616    }
2617    
2618    ###############################################
2619    
2620    ### accessing annotations
2621    
2622    use FIGO;
2623    my $figO = new FIGO;
2624    
2625    $peg  = "fig|83333.1.peg.4";
2626    $pegO = new FeatureO($figO,$peg);
2627    
2628    @annotations = $pegO->annotations;
2629    
2630    foreach $ann (@annotations)
2631    {
2632        print join("\n",$ann->fid,$ann->timestamp(1),$ann->made_by,$ann->text),"\n\n";
2633    }
2634    
2635    #---------------------------------------------
2636    
2637    use FIG;
2638    my $fig = new FIG;
2639    
2640    $peg = "fig|83333.1.peg.4";
2641    @annotations = $fig->feature_annotations($peg);
2642    foreach $_ (@annotations)
2643    {
2644        (undef,$ts,$who,$text) = @$_;
2645        $who =~ s/master://i;
2646        print "$ts\n$who\n$text\n\n";
2647    }
2648    
2649    ###############################################
2650    
2651    ### accessing coupling data
2652    
2653    
2654    use FIGO;
2655    my $figO = new FIGO;
2656    
2657    my $peg  = "fig|83333.1.peg.4";
2658    my $pegO = new FeatureO($figO,$peg);
2659    foreach $coupled ($pegO->coupled_to)
2660    {
2661        print join("\t",($coupled->peg1,$coupled->peg2,$coupled->sc)),"\n";
2662        foreach $tuple ($coupled->evidence)
2663        {
2664            my($peg3O,$peg4O,$rep) = @$tuple;
2665            print "\t",join("\t",($peg3O->id,$peg4O->id,$rep)),"\n";
2666        }
2667        print "\n";
2668    }
2669    
2670    #---------------------------------------------
2671    
2672    
2673    use FIG;
2674    my $fig = new FIG;
2675    
2676    my $peg1  = "fig|83333.1.peg.4";
2677    foreach $coupled ($fig->coupled_to($peg1))
2678    {
2679        ($peg2,$sc) = @$coupled;
2680        print join("\t",($peg1,$peg2,$sc)),"\n";
2681        foreach $tuple ($fig->coupling_evidence($peg1,$peg2))
2682        {
2683            my($peg3,$peg4,$rep) = @$tuple;
2684            print "\t",join("\t",($peg3,$peg4,$rep)),"\n";
2685        }
2686        print "\n";
2687    }
2688    
2689    ###############################################
2690    
2691    =head3 Accessing Subsystem data
2692    
2693    use FIGO;
2694    my $figO = new FIGO;
2695    
2696    foreach $sub ($figO->subsystems)
2697    {
2698        if ($sub->usable)
2699        {
2700            print join("\t",($sub->id,$sub->curator)),"\n";
2701    
2702            print "\tRoles\n";
2703            @roles = $sub->roles;
2704            foreach $role (@roles)
2705            {
2706                print "\t\t",join("\t",($role->id)),"\n";
2707            }
2708    
2709            print "\tGenomes\n";
2710            foreach $genome ($sub->genomes)
2711            {
2712                print "\t\t",join("\t",($sub->variant($genome),
2713                                        $genome->id,
2714                                        $genome->genus_species)),"\n";
2715                @pegs = ();
2716                foreach $role (@roles)
2717                {
2718                    push(@pegs,$sub->pegs_in_cell($genome,$role));
2719                }
2720                print "\t\t\t",join(",",@pegs),"\n";
2721            }
2722        }
2723    }
2724    
2725    #---------------------------------------------
2726    
2727    use FIG;
2728    my $fig = new FIG;
2729    
2730    foreach $sub (grep { $fig->usable_subsystem($_) } $fig->all_subsystems)
2731    {
2732        $subO = new Subsystem($sub,$fig);
2733        $curator = $subO->get_curator;
2734        print join("\t",($sub,$curator)),"\n";
2735    
2736        print "\tRoles\n";
2737        @roles = $subO->get_roles;
2738        foreach $role (@roles)
2739        {
2740            print "\t\t",join("\t",($role)),"\n";
2741        }
2742    
2743        print "\tGenomes\n";
2744        foreach $genome ($subO->get_genomes)
2745        {
2746            print "\t\t",join("\t",($subO->get_variant_code_for_genome($genome),
2747                                    $genome,
2748                                    $fig->genus_species($genome))),"\n";
2749            foreach $role (@roles)
2750            {
2751                push(@pegs,$subO->get_pegs_from_cell($genome,$role));
2752            }
2753            print "\t\t\t",join(",",@pegs),"\n";
2754        }
2755        print "\n";
2756    }
2757    
2758    ###############################################
2759    
2760    =head3 Accessing FIGfams
2761    
2762    use FIGO;
2763    my $figO = new FIGO;
2764    
2765    foreach $fam ($figO->all_figfams)
2766    {
2767        print join("\t",($fam->id,$fam->function)),"\n";
2768        foreach $pegO ($fam->members)
2769        {
2770            $peg = $pegO->id;
2771            print "\t$peg\n";
2772        }
2773    }
2774    
2775    #---------------------------------------------
2776    
2777    use FIG;
2778    use FigFam;
2779    use FigFams;
2780    
2781    my $fig = new FIG;
2782    my $figfams = new FigFams($fig);
2783    
2784    foreach $fam ($figfams->all_families)
2785    {
2786        my $figfam = new FigFam($fig,$fam);
2787        print join("\t",($fam,$figfam->family_function)),"\n";
2788        foreach $peg ($figfam->list_members)
2789        {
2790            print "\t$peg\n";
2791        }
2792    }
2793    
2794    ###############################################
2795    
2796    =head3 Placing a sequence into a FIGfam
2797    
2798    use FIGO;
2799    my $figO = new FIGO;
2800    
2801    $seq = "MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILS
2802    AFIGDEIPQEILEERVRAAFAFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTH
2803    IAGDKPVTILTATSGDTGAAVAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETV
2804    AIDGDFDACQALVKQAFDDEELKVALGLNSANSINISRLLAQICYYFEAVAQLPQETRNQ
2805    LVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVPRFLHDGQWSPKATQATLSNA
2806    MDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMRELKELGYTSEPHAAVAYRA
2807    LRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKELAERADLPLLSHNLPADFAAL
2808    RKLMMNHQ";
2809    $seq =~ s/\n//gs;
2810    
2811    my($fam,$sims) = $figO->family_containing($seq);
2812    
2813    if ($fam)
2814    {
2815        print join("\t",($fam->id,$fam->function)),"\n";
2816        print &Dumper($sims);
2817    }
2818    else
2819    {
2820        print "Could not place it in a family\n";
2821    }
2822    
2823    #---------------------------------------------
2824    
2825    use FIG;
2826    use FigFam;
2827    use FigFams;
2828    
2829    my $fig = new FIG;
2830    my $figfams = new FigFams($fig);
2831    
2832    $seq = "MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILS
2833    AFIGDEIPQEILEERVRAAFAFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTH
2834    IAGDKPVTILTATSGDTGAAVAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETV
2835    AIDGDFDACQALVKQAFDDEELKVALGLNSANSINISRLLAQICYYFEAVAQLPQETRNQ
2836    LVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVPRFLHDGQWSPKATQATLSNA
2837    MDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMRELKELGYTSEPHAAVAYRA
2838    LRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKELAERADLPLLSHNLPADFAAL
2839    RKLMMNHQ";
2840    $seq =~ s/\n//gs;
2841    
2842    my($fam,$sims) = $figfams->place_in_family($seq);
2843    
2844    if ($fam)
2845    {
2846        print join("\t",($fam->family_id,$fam->family_function)),"\n";
2847        print &Dumper($sims);
2848    }
2849    else
2850    {
2851        print "Could not place it in a family\n";
2852    }
2853    
2854    ###############################################
2855    
2856    =head3 Getting representative sequences for a FIGfam
2857    
2858    use FIGO;
2859    my $figO = new FIGO;
2860    
2861    $fam         = "FIG102446";
2862    my $famO     = &FigFamO::new('FigFamO',$figO,$fam);
2863    my @rep_seqs = $famO->rep_seqs;
2864    
2865    foreach $seq (@rep_seqs)
2866    {
2867        print ">query\n$seq\n";
2868    }
2869    
2870    #---------------------------------------------
2871    
2872    use FIG;
2873    use FigFam;
2874    use FigFams;
2875    
2876    my $fig = new FIG;
2877    
2878    $fam         = "FIG102446";
2879    my $famO     = new FigFam($fig,$fam);
2880    my @rep_seqs = $famO->representatives;
2881    
2882    foreach $seq (@rep_seqs)
2883    {
2884        print ">query\n$seq\n";
2885    }
2886    
2887    
2888    ###############################################
2889    
2890    
2891    =head3 Testing for membership in FIGfam
2892    
2893    use FIGO;
2894    my $figO = new FIGO;
2895    
2896    $seq = "MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILS
2897    AFIGDEIPQEILEERVRAAFAFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTH
2898    IAGDKPVTILTATSGDTGAAVAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETV
2899    AIDGDFDACQALVKQAFDDEELKVALGLNSANSINISRLLAQICYYFEAVAQLPQETRNQ
2900    LVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVPRFLHDGQWSPKATQATLSNA
2901    MDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMRELKELGYTSEPHAAVAYRA
2902    LRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKELAERADLPLLSHNLPADFAAL
2903    RKLMMNHQ";
2904    $seq =~ s/\n//gs;
2905    
2906    $fam                  = "FIG102446";
2907    my $famO              = &FigFamO::new('FigFamO',$figO,$fam);
2908    my($should_be, $sims) = $famO->should_be_member($seq);
2909    
2910    if ($should_be)
2911    {
2912        print join("\t",($famO->id,$famO->function)),"\n";
2913        print &Dumper($sims);
2914    }
2915    else
2916    {
2917        print "Sequence should not be added to family\n";
2918    }
2919    
2920    #---------------------------------------------
2921    
2922    use FIG;
2923    use FigFam;
2924    use FigFams;
2925    
2926    my $fig = new FIG;
2927    
2928    $seq = "MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILS
2929    AFIGDEIPQEILEERVRAAFAFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTH
2930    IAGDKPVTILTATSGDTGAAVAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETV
2931    AIDGDFDACQALVKQAFDDEELKVALGLNSANSINISRLLAQICYYFEAVAQLPQETRNQ
2932    LVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVPRFLHDGQWSPKATQATLSNA
2933    MDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMRELKELGYTSEPHAAVAYRA
2934    LRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKELAERADLPLLSHNLPADFAAL
2935    RKLMMNHQ";
2936    $seq =~ s/\n//gs;
2937    
2938    $fam                  = "FIG102446";
2939    my $famO              = new FigFam($fig,$fam);
2940    my($should_be, $sims) = $famO->should_be_member($seq);
2941    
2942    if ($should_be)
2943    {
2944        print join("\t",($famO->family_id,$famO->family_function)),"\n";
2945        print &Dumper($sims);
2946    }
2947    else
2948    {
2949        print "Sequence should not be added to family\n";
2950    }
2951    
2952    =cut
2953    

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.30

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3