[Bio] / FigKernelPackages / FIGO.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/FIGO.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.26, Wed Sep 26 04:03:48 2007 UTC revision 1.31, Sat Dec 1 22:17:03 2007 UTC
# Line 17  Line 17 
17  #  #
18  ########################################################################  ########################################################################
19    
20    =head1 TODO
21    
22    =over 4
23    
24    =item Null arg to ContigO::dna_seq() should return entire contig seq.
25    
26    =item Add method to access "FIG::crude_estimate_of_distance()"
27    
28    =back
29    
30    =cut
31    
32  =head1 Overview  =head1 Overview
33    
34  This module is a set of packages encapsulating the SEED's core methods  This module is a set of packages encapsulating the SEED's core methods
35  using an "OOP-like" style.  using an "OOP-like" style.
36    
37  There are several modules clearly related to "individual genomes:"  There are several modules clearly related to "individual genomes:"
38  FIGO, GenomeO, ContigO, FeatureO (and I<maybe> AnnotationO).  GenomeO, ContigO, FeatureO (and I<maybe> AnnotationO).
39    
40  There are also modules that deal with complex relationships between  There are also modules that deal with complex relationships between
41  pairs or sets of features in one, two, or more genomes,  pairs or sets of features in one, two, or more genomes,
# Line 32  Line 44 
44    
45  Finally, the methods in "Attribute" might in principle attach  Finally, the methods in "Attribute" might in principle attach
46  "atributes" to any type of object.  "atributes" to any type of object.
47  (Likewise, in principle one might like to attach an "annotation"  (Likewise, in principle one might also want to attach an "annotation"
48  to any type of object  to any type of object,
49    although currently we only support annotations of "features.")
50    
51  The four modules dealing with "genomes" have a reasonable clear  The three modules that act on "individual genomes" have a reasonable clear
52  "implied heirarchy:"  "implied heirarchy" relative to FIGO:
53    
54  =over 4  =over 4
55    
# Line 693  Line 706 
706    
707  =item RETURNS:  =item RETURNS:
708    
709      string of DNA sequence running from $beg to $end  String containing DNA subsequence running from $beg to $end
710      (NOTE: if $beg > $end, returns reverse complement of DNA subsequence.)      (NOTE: if $beg > $end, returns reverse complement of DNA subsequence.)
711    
712  =back  =back
# Line 1204  Line 1217 
1217  sub possible_frameshift {  sub possible_frameshift {
1218      my($self) = @_;      my($self) = @_;
1219      my $figO = $self->{_figO};      my $figO = $self->{_figO};
1220      my($tmp_dir) = $figO->{_tmp_dir};      my $fig = $figO->{_fig};
   
     #...Skip tests and return '0' if truncated...  
     if (! $self->possibly_truncated)  
     {  
         #...Get best precomputed BLAST hit if E-value < 1.0e-50:  
         my @sims = $self->sims( -max => 1, -cutoff => 1.0e-50);  
   
         #...If a sim was returned:  
         if (my $sim = shift @sims)  
         {  
             #...Get best hit FID and boundaries:  
             my $peg2 = $sim->id2;  
             my $ln1  = $sim->ln1;  
             my $ln2  = $sim->ln2;  
             my $b2   = $sim->b2;  
             my $e2   = $sim->e2;  
   
             #...Convert from AA to BP, and pad out w/ 100 bp guard region:  
             my $adjL = 100 + (($b2-1) * 3);  
             my $adjR = 100 + (($ln2 - $e2) * 3);  
   
             #...If hit is more than 20% longer than query:  
             if ($ln2 > (1.2 * $ln1))  
             {  
                 #...Get and parse query location:  
                 my $loc = $self->location;  
                 if ($loc =~ /^(\S+)_(\d+)_(\d+)/)  
                 {  
                     my $contig = $1;  
                     my $beg    = $2;  
                     my $end    = $3;  
   
                     #...Create new ContigO object:  
                     my $contigO = new ContigO($figO, $self->genome->id, $contig);  
   
                     #...Extract DNA subsequence, including guard regions:  
                     my $begA = &max(1, $beg - $adjL);  
                     my $endA = &min($end+$adjR, $contigO->contig_length);  
                     my $dna  = $contigO->dna_seq($begA,$endA);  
                     if (defined($dna) && (length($dna) > 90))  
                     {  
                         #...Open tmp-file and write FASTA containing DNA subregion to be BLASTed:  
                         open( TMP, ">$tmp_dir/tmp_dna") || die "could not open tmp_dna";  
                         print TMP  ">dna\n$dna\n";  
                         close(TMP);  
   
                         #...Create new FeatureO object corresponding tp $peg2:  
                         my $pegO2 = new FeatureO($figO,$peg2);  
   
                         #...Fetch its translation, and print to tmp FASTA file for BLASTing:  
                         my $prot  = $pegO2->prot_seq;  
                         if (defined($prot) && (length($prot) > 30))  
                         {  
                             open( TMP, ">$tmp_dir/tmp_prot") || die "could not open tmp_prot";  
                             print TMP  ">tmp_prot\n$prot\n";  
                             close(TMP);  
   
                             #...Build BLAST nucleotide database for extracted DNA region,  
                             #   and TBLASTN $peg2 against the DNA:  
                             &run("formatdb -i $tmp_dir/tmp_dna -pF");  
                             open(BLAST,"blastall -i $tmp_dir/tmp_prot -d $tmp_dir/tmp_dna -p tblastn -FF -e 1.0e-50 |")  
                                 || die "could not blast";  
   
                             #...Parse the TBLASTN output; find and sort HSPs by left boundary:  
                             my $db_seq_out = &gjoparseblast::next_blast_subject(\*BLAST,1);  
                             my @hsps       = sort { $a->[0] <=> $b->[0] }  
                                               map { [$_->[9], $_->[10], $_->[12], $_->[13]] }  
                                               grep { $_->[1] < 1.0e-50 }  
                                               @ { $db_seq_out->[6] };  
   
                             #...Extract HSP boundary pairs:  
                             my @prot = map { [$_->[0], $_->[1]] } @hsps;  
                             my @dna  = map { [$_->[2], $_->[3]] } @hsps;  
1221    
1222                              #...If the "cover" of the HSPs covers more than 90% of $peg2 w gaps < 3 AA,      return $fig->possible_frameshift($self->id);
                             #   and the "cover" of the HPSs cover more than 90% of the extracted DNA  
                             #   w/ gaps < 9 bp (but not a multiple of 3), suspect a possible frameshift:  
                             if (&covers(\@prot,length($prot),3,0) && &covers(\@dna,3*length($prot),9,1))  
                             {  
                                 return 1;  
                             }  
                         }  
                     }  
                 }  
             }  
         }  
     }  
     return 0;  
1223  }  }
1224    
1225    
# Line 1383  Line 1310 
1310      return ($x < $y) ? $x : $y;      return ($x < $y) ? $x : $y;
1311  }  }
1312    
   
   
 =head3 covers  
   
 (Question: Should this function be considered "PRIVATE" ???)  
   
 USAGE:  
     C<< if (&covers(\@hits, $len, $diff, $must_shift)) { #...Do stuff } >>  
   
 Returns boolean C<TRUE> if a set of BLAST HSPs "cover" more than 90%  
 of the database sequence(?).  
   
 =cut  
   
 sub covers {  
     my($hsps,$ln,$diff,$must_shift) = @_;  
   
     my $hsp1 = shift @$hsps;  
     my $hsp2;  
     my $merged = 0;  
     while ($hsp1 && ($hsp2 = shift @$hsps) &&  
            ($must_shift ? &diff_frames($hsp1,$hsp2) : 1) &&  
            ($hsp1 = &merge($hsp1,$hsp2,$diff))) { $merged = 1 }  
     return ($merged && $hsp1 && (($hsp1->[1] - $hsp1->[0]) > (0.9 * $ln)));  
 }  
   
 sub diff_frames {  
     my($hsp1,$hsp2) = @_;  
     return (($hsp1->[0] % 3) != ($hsp2->[0] % 3));  
 }  
   
   
   
 =head3 merge  
   
 Merge two HSPs unless their overlap or separation is too large.  
   
 RETURNS: Merged boundaries if merger succeeds, and C<undef> if merger fails.  
   
 =cut  
   
 sub merge {  
     my($hsp1,$hsp2,$diff) = @_;  
   
     my($b1,$e1) = @$hsp1;  
     my($b2,$e2) = @$hsp2;  
     return (($e2 > $e1) && (abs($b2-$e1) <= $diff)) ? [$b1,$e2] : undef;  
 }  
   
   
   
1313  =head3 sims  =head3 sims
1314    
1315  =over 4  =over 4

Legend:
Removed from v.1.26  
changed lines
  Added in v.1.31

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3