[Bio] / FigKernelPackages / gjogenbank.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/gjogenbank.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.16, Wed Jul 27 17:46:37 2016 UTC revision 1.17, Wed Jul 19 23:39:21 2017 UTC
# Line 141  Line 141 
141  #  #
142  #     $ftr_location = location( $ftr )      #  Returns empty string on failure.  #     $ftr_location = location( $ftr )      #  Returns empty string on failure.
143  #  #
144    #  Feature location as cbdl = [ [ contig, beg, dir, len ], ... ]
145    #
146    #     $loc                           = location_as_cbdl( $ftr, $entry )
147    #   ( $loc, $partial_5, $partial_3 ) = location_as_cbdl( $ftr, $entry )
148    #
149    #  Feature location as a SEED or Sapling location string
150    #
151    #     $loc                           = location_as_seed( $ftr, $entry )
152    #   ( $loc, $partial_5, $partial_3 ) = location_as_seed( $ftr, $entry )
153    #
154    #     $loc                           = location_as_sapling( $ftr, $entry )
155    #   ( $loc, $partial_5, $partial_3 ) = location_as_sapling( $ftr, $entry )
156    #
157  #  Identify features with partial 5' or 3' ends.  #  Identify features with partial 5' or 3' ends.
158  #  #
159  #     $partial_5_prime = partial_5_prime( $ftr )  #     $partial_5_prime = partial_5_prime( $ftr )
# Line 156  Line 169 
169  #     @EC_number = EC_number( $ftr )  #     @EC_number = EC_number( $ftr )
170  #    \@EC_number = EC_number( $ftr )  #    \@EC_number = EC_number( $ftr )
171  #  #
172  #     $translation = CDS_translation( $ftr )          # Uses in situ if found  #     $pseudo    = is_pseudo( $ftr )
173  #     $translation = CDS_translation( $ftr,  $dna )   # If not in feature, translate  #
174  #     $translation = CDS_translation( $ftr, \$dna )  #  CDS translation table number
175  #     $translation = CDS_translation( $ftr,  $entry )  #
176    #     $trans_table = CDS_trans_table( $ftr )
177    #
178    #  CDS translation (uses the supplied translation if provided)
179    #
180    #     $translation = CDS_translation( $ftr,  $entry ) # This is the preferred form
181    #     $translation = CDS_translation( $ftr,  $dna )   # Assumes code table 1
182    #     $translation = CDS_translation( $ftr, \$dna )   # Assumes code table 1
183    #     $translation = CDS_translation( $ftr )          # Cannot de novo translate
184  #  #
185    #-------------------------------------------------------------------------------
186  #  Convert GenBank location to [ [ $contig, $begin, $dir, $length ], ... ]  #  Convert GenBank location to [ [ $contig, $begin, $dir, $length ], ... ]
187  #  #
188  #    \@cbdl = genbank_loc_2_cbdl( $loc, $contig_id )  #    \@cbdl = genbank_loc_2_cbdl( $loc, $contig_id )
# Line 199  Line 221 
221                    features_of_type                    features_of_type
222                    feature_list                    feature_list
223                    ftr_dna                    ftr_dna
224                      ftr_seq
225                      CDS_translation
226                      CDS_trans_table
227    
228                    genbank_loc_2_seed                    genbank_loc_2_seed
229                    genbank_loc_2_sapling                    genbank_loc_2_sapling
# Line 208  Line 233 
233                    write_genbank                    write_genbank
234                  );                  );
235    
236    our @EXPORT_OK = qw ( next_entry
237                          location
238                          location_as_cbdl
239                          location_as_seed
240                          location_as_sapling
241                          partial_5_prime
242                          partial_3_prime
243                          qualifiers
244                          ftr_id
245                          ftr_locus_tag
246                          ftr_old_tag
247                          ftr_gene_or_id
248                          ftr_gi_or_id
249                          ftr_gi
250                          ftr_xref
251                          gene
252                          product
253                          is_pseudo
254                          EC_number
255                        );
256    
257  #  An approximate ordering of the common qualifiers in GenBank feature table  #  An approximate ordering of the common qualifiers in GenBank feature table
258  #  entries:  #  entries:
# Line 354  Line 399 
399  #  same parameter will return successive entries.  Calls to different files  #  same parameter will return successive entries.  Calls to different files
400  #  can be interlaced.  #  can be interlaced.
401  #  #
402    #      $entry = next_entry( )         #  STDIN
403    #      $entry = next_entry( \*FH )
404    #      $entry = next_entry(  $file )
405    #
406  #      $entry = parse_next_genbank( )         #  STDIN  #      $entry = parse_next_genbank( )         #  STDIN
407  #      $entry = parse_next_genbank( \*FH )  #      $entry = parse_next_genbank( \*FH )
408  #      $entry = parse_next_genbank( $file )  #      $entry = parse_next_genbank( $file )
409  #  #
410  #  Error or end-of-file returns undef.  #  Error or end-of-file returns undef.
411  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
412  sub parse_next_genbank  
413    sub parse_next_genbank { next_entry( @_ ) }
414    
415    sub next_entry
416  {  {
417      my $file = shift;      my $file = shift;
418    
# Line 1084  Line 1136 
1136    
1137    
1138  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1139    #  Feature location as cbdl = [ [ contig, beg, dir, len ], ... ]
1140    #
1141    #     $loc                           = location_as_cbdl( $ftr, $entry )
1142    #   ( $loc, $partial_5, $partial_3 ) = location_as_cbdl( $ftr, $entry )
1143    #
1144    #  Feature location as a SEED or Sapling location string
1145    #
1146    #     $loc                           = location_as_seed( $ftr, $entry )
1147    #   ( $loc, $partial_5, $partial_3 ) = location_as_seed( $ftr, $entry )
1148    #
1149    #     $loc                           = location_as_sapling( $ftr, $entry )
1150    #   ( $loc, $partial_5, $partial_3 ) = location_as_sapling( $ftr, $entry )
1151    #
1152    #-------------------------------------------------------------------------------
1153    
1154    sub location_as_cbdl
1155    {
1156        genbank_loc_2_cbdl( location( $_[0] ),
1157                            ( $_[1]->{ ACCESSION } || [] )->[0] || $_[1]->{ LOCUS },
1158                          );
1159    }
1160    
1161    sub location_as_seed
1162    {
1163        genbank_loc_2_seed( ( $_[1]->{ ACCESSION } || [] )->[0] || $_[1]->{ LOCUS },
1164                            location( $_[0] )
1165                          );
1166    }
1167    
1168    sub location_as_sapling
1169    {
1170        genbank_loc_2_sapling( ( $_[1]->{ ACCESSION } || [] )->[0] || $_[1]->{ LOCUS },
1171                               location( $_[0] )
1172                             );
1173    }
1174    
1175    
1176    #-------------------------------------------------------------------------------
1177  #  Identify features with partial 5' or 3' ends.  #  Identify features with partial 5' or 3' ends.
1178  #  #
1179  #     $partial_5_prime = partial_5_prime( $ftr )  #     $partial_5_prime = partial_5_prime( $ftr )
# Line 1312  Line 1402 
1402    
1403    
1404  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1405    #  Feature is pseudo gene?
1406    #
1407    #   $pseudo = is_pseudo( $ftr )
1408    #
1409    #-------------------------------------------------------------------------------
1410    sub is_pseudo { qualifiers( $_[0] )->{ pseudo } ? 1 : 0 }
1411    
1412    
1413    #-------------------------------------------------------------------------------
1414  #  #
1415  #   @EC_number = EC_number( $ftr )  #   @EC_number = EC_number( $ftr )
1416  #  \@EC_number = EC_number( $ftr )  #  \@EC_number = EC_number( $ftr )
# Line 1327  Line 1426 
1426    
1427    
1428  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1429    #  Find a CDS translation table number:
1430    #
1431    #   $trans_table = CDS_trans_table( $ftr );
1432    #
1433    #-------------------------------------------------------------------------------
1434    sub CDS_trans_table
1435    {
1436        ( qualifiers( $_[0] )->{ transl_table } || [] )->[0] || 1;
1437    }
1438    
1439    
1440    #-------------------------------------------------------------------------------
1441  #   This is the in situ translation.  Will extract from the DNA sequence if  #   This is the in situ translation.  Will extract from the DNA sequence if
1442  #   supplied.  #  necessary.
1443  #  #
1444  #   $translation = CDS_translation( $ftr )  #   $translation = CDS_translation( $ftr )
1445  #   $translation = CDS_translation( $ftr,  $dna )  #   $translation = CDS_translation( $ftr,  $dna )
1446  #   $translation = CDS_translation( $ftr, \$dna )  #   $translation = CDS_translation( $ftr, \$dna )
1447  #   $translation = CDS_translation( $ftr,  $entry )  #   $translation = CDS_translation( $ftr,  $entry )  #  <--- preferred form
1448  #  #
1449    #  We should look for translation exceptions, but ....
1450  #  #
1451  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1452  sub CDS_translation  sub CDS_translation
# Line 1346  Line 1458 
1458    
1459      return undef if ! $dna;      return undef if ! $dna;
1460    
1461        #  Beware that ftr_seq() removes a leading partial codon based on /codon_start
1462    
1463        my $CDS_dna = ftr_seq( $ftr, $dna ) or return undef;
1464    
1465        my $transl_table = $qual->{ transl_table }->[0] || 1;
1466    
1467        my $start_with_met = ! partial_5_prime( $ftr );
1468    
1469        translate_seq_with_NCBI_code( $CDS_dna, $transl_table, $start_with_met );
1470    }
1471    
1472    
1473    sub translate_seq_with_NCBI_code
1474    {
1475        my ( $seq, $transl_table, $start_with_met ) = @_;
1476    
1477      eval { require gjoseqlib; }      eval { require gjoseqlib; }
1478          or return undef;          or return undef;
1479    
1480      my $CDS_dna = ftr_dna( $dna, $ftr ) or return undef;      eval { require NCBI_genetic_code; }
1481      my $pep = gjoseqlib::translate_seq( $CDS_dna, ! partial_5_prime( $ftr ) );          or return undef;
1482    
1483        $seq =~ tr/-//d;     #  remove gaps (should never happen)
1484        $seq =~ tr/Uu/Tt/;   #  make it DNA
1485    
1486        my $gc = NCBI_genetic_code::genetic_code( $transl_table );
1487    
1488        my $ambigs = \%gjoseqlib::DNA_letter_can_be;
1489    
1490        #  We can now do the codon-by-codon translation:
1491    
1492        my @codons = map { /[a-z]/ ? lc( $_ ) : $_ }
1493                     $seq =~ m/(...?)/g;  #  will try to translate last 2 nt
1494    
1495        my @met;
1496        if ( $start_with_met && ( my $codon1 = shift @codons ) )
1497        {
1498            push @met, ( $codon1 =~ /^[a-z]/ ? 'm' : 'M' );
1499        }
1500    
1501        my $pep = join( '', @met, map { gjoseqlib::translate_codon_with_user_code( $seq, $gc, $ambigs ) } @codons );
1502    
1503        #  If it ends with stop, and it usually will, remove it
1504    
1505      $pep =~ s/\*$// if $pep;      $pep =~ s/\*$// if $pep;
1506    
1507      $pep;      $pep;

Legend:
Removed from v.1.16  
changed lines
  Added in v.1.17

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3