[Bio] / FigKernelPackages / SeedUtils.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/SeedUtils.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.14, Sat Nov 7 17:42:08 2009 UTC revision 1.15, Tue Nov 24 19:23:36 2009 UTC
# Line 24  Line 24 
24      use strict;      use strict;
25      no warnings qw(once);      no warnings qw(once);
26      use base qw(Exporter);      use base qw(Exporter);
27      our @EXPORT = qw(hypo boundaries_of parse_fasta_record create_fasta_record rev_comp genome_of min max sims verify_dir);      our @EXPORT = qw(hypo boundaries_of parse_fasta_record create_fasta_record
28                         rev_comp genome_of min max sims verify_dir between translate
29                         standard_genetic_code);
30    
31  =head1 SEED Utility Methods  =head1 SEED Utility Methods
32    
# Line 286  Line 288 
288      }      }
289  }  }
290    
291    
292    =head3 by_fig_id
293    
294        my @sorted_by_fig_id = sort { FIG::by_fig_id($a,$b) } @fig_ids;
295    
296    Compare two feature IDs.
297    
298    This function is designed to assist in sorting features by ID. The sort is by
299    genome ID followed by feature type and then feature number.
300    
301    =over 4
302    
303    =item a
304    
305    First feature ID.
306    
307    =item b
308    
309    Second feature ID.
310    
311    =item RETURN
312    
313    Returns a negative number if the first parameter is smaller, zero if both parameters
314    are equal, and a positive number if the first parameter is greater.
315    
316    =back
317    
318    =cut
319    
320    sub by_fig_id {
321        my($a,$b) = @_;
322        my($g1,$g2,$t1,$t2,$n1,$n2);
323        if (($a =~ /^fig\|(\d+\.\d+).([^\.]+)\.(\d+)$/) && (($g1,$t1,$n1) = ($1,$2,$3)) &&
324             ($b =~ /^fig\|(\d+\.\d+).([^\.]+)\.(\d+)$/) && (($g2,$t2,$n2) = ($1,$2,$3))) {
325            ($g1 <=> $g2) or ($t1 cmp $t2) or ($n1 <=> $n2);
326        } else {
327            $a cmp $b;
328        }
329    }
330    
331    
332  =head3 genome_of  =head3 genome_of
333    
334      my $genomeID = genome_of($fid);      my $genomeID = genome_of($fid);
# Line 653  Line 696 
696    
697  =head3 between  =head3 between
698    
699      my $flag = FIG::between($x, $y, $z);      my $flag = between($x, $y, $z);
   
 or  
   
     my $flag = $fig->between($x, $y, $z);  
700    
701  Determine whether or not $y is between $x and $z.  Determine whether or not $y is between $x and $z.
702    
# Line 698  Line 737 
737      }      }
738  }  }
739    
740    =head3 standard_genetic_code
741    
742        my $code = standard_genetic_code();
743    
744    Return a hash containing the standard translation of nucleotide triples to proteins.
745    Methods such as L</translate> can take a translation scheme as a parameter. This method
746    returns the default translation scheme. The scheme is implemented as a reference to a
747    hash that contains nucleotide triplets as keys and has protein letters as values.
748    
749    =cut
750    
751    sub standard_genetic_code {
752    
753        my $code = {};
754    
755        $code->{"AAA"} = "K";
756        $code->{"AAC"} = "N";
757        $code->{"AAG"} = "K";
758        $code->{"AAT"} = "N";
759        $code->{"ACA"} = "T";
760        $code->{"ACC"} = "T";
761        $code->{"ACG"} = "T";
762        $code->{"ACT"} = "T";
763        $code->{"AGA"} = "R";
764        $code->{"AGC"} = "S";
765        $code->{"AGG"} = "R";
766        $code->{"AGT"} = "S";
767        $code->{"ATA"} = "I";
768        $code->{"ATC"} = "I";
769        $code->{"ATG"} = "M";
770        $code->{"ATT"} = "I";
771        $code->{"CAA"} = "Q";
772        $code->{"CAC"} = "H";
773        $code->{"CAG"} = "Q";
774        $code->{"CAT"} = "H";
775        $code->{"CCA"} = "P";
776        $code->{"CCC"} = "P";
777        $code->{"CCG"} = "P";
778        $code->{"CCT"} = "P";
779        $code->{"CGA"} = "R";
780        $code->{"CGC"} = "R";
781        $code->{"CGG"} = "R";
782        $code->{"CGT"} = "R";
783        $code->{"CTA"} = "L";
784        $code->{"CTC"} = "L";
785        $code->{"CTG"} = "L";
786        $code->{"CTT"} = "L";
787        $code->{"GAA"} = "E";
788        $code->{"GAC"} = "D";
789        $code->{"GAG"} = "E";
790        $code->{"GAT"} = "D";
791        $code->{"GCA"} = "A";
792        $code->{"GCC"} = "A";
793        $code->{"GCG"} = "A";
794        $code->{"GCT"} = "A";
795        $code->{"GGA"} = "G";
796        $code->{"GGC"} = "G";
797        $code->{"GGG"} = "G";
798        $code->{"GGT"} = "G";
799        $code->{"GTA"} = "V";
800        $code->{"GTC"} = "V";
801        $code->{"GTG"} = "V";
802        $code->{"GTT"} = "V";
803        $code->{"TAA"} = "*";
804        $code->{"TAC"} = "Y";
805        $code->{"TAG"} = "*";
806        $code->{"TAT"} = "Y";
807        $code->{"TCA"} = "S";
808        $code->{"TCC"} = "S";
809        $code->{"TCG"} = "S";
810        $code->{"TCT"} = "S";
811        $code->{"TGA"} = "*";
812        $code->{"TGC"} = "C";
813        $code->{"TGG"} = "W";
814        $code->{"TGT"} = "C";
815        $code->{"TTA"} = "L";
816        $code->{"TTC"} = "F";
817        $code->{"TTG"} = "L";
818        $code->{"TTT"} = "F";
819    
820        return $code;
821    }
822    
823    =head3 translate
824    
825        my $aa_seq = translate($dna_seq, $code, $fix_start);
826    
827    Translate a DNA sequence to a protein sequence using the specified genetic code.
828    If I<$fix_start> is TRUE, will translate an initial C<TTG> or C<GTG> code to
829    C<M>. (In the standard genetic code, these two combinations normally translate
830    to C<V> and C<L>, respectively.)
831    
832    =over 4
833    
834    =item dna_seq
835    
836    DNA sequence to translate. Note that the DNA sequence can only contain
837    known nucleotides.
838    
839    =item code
840    
841    Reference to a hash specifying the translation code. The hash is keyed by
842    nucleotide triples, and the value for each key is the corresponding protein
843    letter. If this parameter is omitted, the L</standard_genetic_code> will be
844    used.
845    
846    =item fix_start
847    
848    TRUE if the first triple is to get special treatment, else FALSE. If TRUE,
849    then a value of C<TTG> or C<GTG> in the first position will be translated to
850    C<M> instead of the value specified in the translation code.
851    
852    =item RETURN
853    
854    Returns a string resulting from translating each nucleotide triple into a
855    protein letter.
856    
857    =back
858    
859    =cut
860    #: Return Type $;
861    sub translate {
862        shift if UNIVERSAL::isa($_[0],__PACKAGE__);
863    
864        my( $dna,$code,$start ) = @_;
865        my( $i,$j,$ln );
866        my( $x,$y );
867        my( $prot );
868    
869        if (! defined($code)) {
870            $code = &FIG::standard_genetic_code;
871        }
872        $ln = length($dna);
873        $prot = "X" x ($ln/3);
874        $dna =~ tr/a-z/A-Z/;
875    
876        for ($i=0,$j=0; ($i < ($ln-2)); $i += 3,$j++) {
877            $x = substr($dna,$i,3);
878            if ($y = $code->{$x}) {
879                substr($prot,$j,1) = $y;
880            }
881        }
882    
883        if (($start) && ($ln >= 3) && (substr($dna,0,3) =~ /^[GT]TG$/)) {
884            substr($prot,0,1) = 'M';
885        }
886        return $prot;
887    }
888    
889    
890  1;  1;

Legend:
Removed from v.1.14  
changed lines
  Added in v.1.15

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3