[Bio] / FigKernelPackages / gjogenbank.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/gjogenbank.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.12, Mon Aug 31 20:29:13 2015 UTC revision 1.13, Tue Sep 22 22:32:54 2015 UTC
# Line 65  Line 65 
65  #                   }  #                   }
66  #  #
67  #  #
68    #-------------------------------------------------------------------------------
69  #  Access functions to parts of structure:  #  Access functions to parts of structure:
70  #  #
71  #     @types = feature_types( $entry );  #     @types = feature_types( $entry );
72  #    \@types = feature_types( $entry );  #    \@types = feature_types( $entry );
73  #  #
74  #  Note that the returned features DO NOT include the type!  #  Features of a type:
75  #  #
76  #     @ftrs = features_of_type( $entry,  @types );  #     @ftrs = features_of_type( $entry,  @types );
77  #    \@ftrs = features_of_type( $entry,  @types );  #    \@ftrs = features_of_type( $entry,  @types );
78  #     @ftrs = features_of_type( $entry, \@types );  #     @ftrs = features_of_type( $entry, \@types );
79  #    \@ftrs = features_of_type( $entry, \@types );  #    \@ftrs = features_of_type( $entry, \@types );
80  #  #
81    #     WARNING: The returned features DO NOT include their respective types, so
82    #              this function is only useful for features of a single type.
83    #
84    #-------------------------------------------------------------------------------
85  #  Sequence of a feature, optionally including information on partial ends.  #  Sequence of a feature, optionally including information on partial ends.
86    #  Use this form for the reasons noted below.
87    #
88    #     $seq                           = ftr_seq( $ftr,  $dna   )
89    #     $seq                           = ftr_seq( $ftr, \$dna   )
90    #     $seq                           = ftr_seq( $ftr,  $entry )
91    #   ( $seq, $partial_5, $partial_3 ) = ftr_seq( $ftr,  $dna   )  # boolean of > or < in location
92    #   ( $seq, $partial_5, $partial_3 ) = ftr_seq( $ftr, \$dna   )
93    #   ( $seq, $partial_5, $partial_3 ) = ftr_seq( $ftr,  $entry )
94    #
95    #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
96    #  Deprecated method to get the sequence of a feature, optionally including
97    #  information on partial ends.  This interface reverses the feature and the
98    #  sequence args relative to all other feature access functions.  Yuk.
99  #  #
100  #     $seq                           = ftr_dna(  $dna, $ftr )  #     $seq                           = ftr_dna(  $dna, $ftr )
101  #     $seq                           = ftr_dna( \$dna, $ftr )  #     $seq                           = ftr_dna( \$dna, $ftr )
102  #   ( $seq, $partial_5, $partial_3 ) = ftr_dna(  $dna, $ftr )  # boolean of > or < in location  #   ( $seq, $partial_5, $partial_3 ) = ftr_dna(  $dna, $ftr )
103  #   ( $seq, $partial_5, $partial_3 ) = ftr_dna( \$dna, $ftr )  #   ( $seq, $partial_5, $partial_3 ) = ftr_dna( \$dna, $ftr )
104  #  #
105    #-------------------------------------------------------------------------------
106    #  Feature ids in order of preference.  For non-CDS features on many genomes,
107    #  locus_tag is the most consistently present.  For proteins, protein_id is
108    #  is usually the accession number for the corresponding protein database entry.
109    #
110    #   $id  = ftr_id( $ftr,  @types )  #  The first id drawn from an ordered list
111    #   $id  = ftr_id( $ftr, \@types )  #      of type preferences, each defined by
112    #                                   #    1. a feature qualifier name,
113    #                                   #    2. "gi", or
114    #                                   #    3. "xref:$db", where $db is a database id
115    #                                   #  It is up to the user to handle multiword ids.
116    #
117    #  Predefined lists:
118    #
119    #   $id  = ftr_id( $ftr )           #  protein_id, locus_tag, gene or gi
120    #   $id  = ftr_locus_tag( $ftr )    #  locus_tag, protein_id, gene or gi
121    #   $id  = ftr_old_tag( $ftr )      #  old_locus_tag, locus_tag, protein_id, gene or gi
122    #   $id  = ftr_gene_or_id( $ftr )   #  gene, locus_tag, protein_id or gi
123    #   $id  = ftr_gi_or_id( $ftr )     #  gi, protein_id, locus_tag or gene
124    #
125    #  Get the feature id of a specific type, or return undef:
126    #
127    #   $gi  = ftr_gi( $ftr )           #  gi number or undef
128    #   $id  = ftr_xref( $ftr, $type )  #  db cross reference of $type
129    #   @ids = ftr_xref( $ftr, $type )  #  db cross references of $type
130    #
131    #  Feature ids in old interface:
132    #
133    #   $id  = CDS_id( $ftr )           #  protein_id, locus_tag, gene or gi
134    #   $id  = CDS_locus_tag( $ftr )    #  locus_tag, protein_id, gene or gi
135    #   $id  = CDS_gi_or_id( $ftr )     #  gi, protein_id, locus_tag or gene
136    #   $gi  = CDS_gi( $ftr )           #  gi number or undef
137    #
138    #-------------------------------------------------------------------------------
139    #  Feature location (as GenBank format location string; see conversion
140    #  conversion functions below).
141    #
142  #    $ftr_location = location( $ftr )      #  Returns empty string on failure.  #    $ftr_location = location( $ftr )      #  Returns empty string on failure.
143  #  #
144  #  Identify features with partial 5' or 3' ends.  #  Identify features with partial 5' or 3' ends.
# Line 95  Line 150 
150  #  #
151  #     $gene              = gene( $ftr )  #     $gene              = gene( $ftr )
152  #     @gene_and_synonyms = gene( $ftr )  #     @gene_and_synonyms = gene( $ftr )
153  #  
 #     $id = CDS_id( $ftr )         #  Prefer protein_id as id:  
 #     $id = CDS_gi_or_id( $ftr )   #  Prefer gi number as id:  
 #     $gi = CDS_gi( $ftr )         #  gi number or nothing:  
 #  
154  #     $product = product( $ftr )  #     $product = product( $ftr )
155  #  #
156  #     @EC_number = EC_number( $ftr )  #     @EC_number = EC_number( $ftr )
# Line 915  Line 966 
966  #  specify contigs.  Also, this adjusts CDS features to the first nucleotide  #  specify contigs.  Also, this adjusts CDS features to the first nucleotide
967  #  of the first complete codon, which is not really what we should be doing.  #  of the first complete codon, which is not really what we should be doing.
968  #  #
969    #     $seq                           = ftr_seq( $ftr,  $dna   )
970    #     $seq                           = ftr_seq( $ftr, \$dna   )
971    #     $seq                           = ftr_seq( $ftr,  $entry )
972    #   ( $seq, $partial_5, $partial_3 ) = ftr_seq( $ftr,  $dna   )  # boolean of > or < in location
973    #   ( $seq, $partial_5, $partial_3 ) = ftr_seq( $ftr, \$dna   )
974    #   ( $seq, $partial_5, $partial_3 ) = ftr_seq( $ftr,  $entry )
975    #
976    #  Handles both [ $location, \%quals ] and [ $type, $location, \%quals ]
977    #
978    #  Deprecated interface because args are reversed relative to other methods:
979    #
980  #     $seq                           = ftr_dna(  $dna,   $ftr )  #     $seq                           = ftr_dna(  $dna,   $ftr )
981  #     $seq                           = ftr_dna( \$dna,   $ftr )  #     $seq                           = ftr_dna( \$dna,   $ftr )
982  #     $seq                           = ftr_dna(  $entry, $ftr )  #     $seq                           = ftr_dna(  $entry, $ftr )
# Line 922  Line 984 
984  #   ( $seq, $partial_5, $partial_3 ) = ftr_dna( \$dna,   $ftr )  #   ( $seq, $partial_5, $partial_3 ) = ftr_dna( \$dna,   $ftr )
985  #   ( $seq, $partial_5, $partial_3 ) = ftr_dna(  $entry, $ftr )  #   ( $seq, $partial_5, $partial_3 ) = ftr_dna(  $entry, $ftr )
986  #  #
987  #  Handles both [ $location, \%quals ] and [ $type, $location, \%quals ]  
988    sub ftr_dna { ftr_seq( @_[1,0] ) }
989    
990  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
991  sub ftr_dna  
992    sub ftr_seq
993  {  {
994      my ( $dna, $ftr ) = @_;      my ( $ftr, $dna ) = @_;
995      return undef if ! ( $dna && $ftr );      return undef if ! ( $ftr && $dna );
996    
997      my $dnaR =   ref $dna eq 'SCALAR'                     ?  $dna      my $dnaR =   ref $dna eq 'SCALAR'                     ?  $dna
998               :   ref $dna eq 'HASH' && $dna->{ SEQUENCE } ? \$dna->{ SEQUENCE }               :   ref $dna eq 'HASH' && $dna->{ SEQUENCE } ? \$dna->{ SEQUENCE }
# Line 935  Line 1000 
1000               :                                               undef;               :                                               undef;
1001      return undef if ! $dnaR;      return undef if ! $dnaR;
1002    
1003      my $have_lib = 0;      eval { require gjoseqlib; }
1004      eval { require gjoseqlib; $have_lib = 1; };          or return undef;
     return undef if ! $have_lib;  
1005    
1006      my $loc = &location( $ftr );      my $loc = &location( $ftr );
1007      $loc or return undef;      $loc or return undef;
# Line 1067  Line 1131 
1131    
1132    
1133  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1134    #  Feature gene:
1135  #  #
1136  #   $gene              = gene( $ftr )  #   $gene              = gene( $ftr )
1137  #   @gene_and_synonyms = gene( $ftr )  #   @gene_and_syns = gene( $ftr )
1138  #  #
1139  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1140  sub gene  sub gene
# Line 1086  Line 1151 
1151    
1152    
1153  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1154  #  Prefer protein_id as id:  #  Feature ids in order of preference.  For non-CDS features on many genomes,
1155    #  locus_tag is the most consistently present.  For proteins, protein_id is
1156    #
1157    #
1158    #   $id  = ftr_id( $ftr,  @types )  #  the first id drawn from an ordered list
1159    #   $id  = ftr_id( $ftr, \@types )  #      of type preferences, each defined by
1160    #                                   #    1. a feature qualifier name,
1161    #                                   #    2. "gi", or
1162    #                                   #    3. "xref:$db", where $db is a database id
1163    #                                   #  It is up to the user to handle multiword ids.
1164  #  #
1165  #   $id = CDS_id( $ftr )  #  Predefined lists:
1166    #
1167    #   $id  = ftr_id( $ftr )           #  protein_id, locus_tag, gene or gi
1168    #   $id  = ftr_locus_tag( $ftr )    #  locus_tag, protein_id, gene or gi
1169    #   $id  = ftr_old_tag( $ftr )      #  old_locus_tag, locus_tag, protein_id, gene or gi
1170    #   $id  = ftr_gene_or_id( $ftr )   #  gene, locus_tag, protein_id or gi
1171    #   $id  = ftr_gi_or_id( $ftr )     #  gi, protein_id, locus_tag or gene
1172    #
1173    #  Specific feature ids or undef:
1174    #
1175    #   $gi  = ftr_gi( $ftr )           #  gi number or undef
1176    #   $id  = ftr_xref( $ftr, $type )  #  db cross reference of $type
1177    #   @ids = ftr_xref( $ftr, $type )  #  db cross references of $type
1178    #
1179    #  Feature ids in old interface:
1180    #
1181    #   $id  = CDS_id( $ftr )           #  protein_id, locus_tag, gene or gi
1182    #   $id  = CDS_locus_tag( $ftr )    #  locus_tag, protein_id, gene or gi
1183    #   $id  = CDS_gi_or_id( $ftr )     #  gi, protein_id, locus_tag or gene
1184    #   $gi  = CDS_gi( $ftr )           #  gi number or undef
1185  #  #
1186  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1187    sub ftr_id
1188    {
1189        my $ftr  = shift;
1190        my $qual = &qualifiers( $ftr );
1191    
1192        my @types = grep { $_ } ref( $_[0] ) eq 'ARRAY' ? @$_ : @_;
1193        @types = qw( protein_id locus_tag gene gi )  if ! @types;
1194    
1195        my $id;
1196        foreach ( @types )
1197        {
1198            if    ( /^gi$/i       ) { $id = ftr_gi( $ftr ) }
1199            elsif ( /^xref:(.+)$/ ) { $id = ftr_xref( $ftr, $1 ) }
1200            else                    { $id = ( $qual->{$_} || [] )->[0] }
1201            last if $id;
1202        }
1203    
1204        $id;
1205    }
1206    
1207    
1208    sub ftr_locus_tag  { ftr_id( $_[0], qw( locus_tag protein_id gene gi ) ) }
1209    sub ftr_old_tag    { ftr_id( $_[0], qw( old_locus_tag locus_tag protein_id gene gi ) ) }
1210    sub ftr_gene_or_id { ftr_id( $_[0], qw( gene locus_tag protein_id gi ) ) }
1211    sub ftr_gi_or_id   { ftr_id( $_[0], qw( gi locus_tag protein_id gene ) ) }
1212    
1213    
1214    sub ftr_gi
1215    {
1216        my $qual = &qualifiers( @_ );
1217    
1218        my ( $id ) = map { m/^GI:(.+)$/i ? $1 : () } @{ $qual->{db_xref} || [] };
1219    
1220        $id;
1221    }
1222    
1223    
1224    sub ftr_xref
1225    {
1226        my ( $ftr, $type ) = @_;
1227        my $qual = &qualifiers( $ftr );
1228    
1229        my @ids = map { m/^\Q$type\E:(.+)$/i ? $1 : () } @{ $qual->{db_xref} || [] };
1230    
1231        wantarray ? @ids : $ids[0];
1232    }
1233    
1234    
1235  sub CDS_id  sub CDS_id
1236  {  {
1237      my $qual = &qualifiers( @_ );      my $qual = &qualifiers( @_ );
1238      my $id;      my $id;
1239    
1240      ( $id ) =                                 @{ $qual->{ protein_id } } if          $qual->{ protein_id };      ( $id ) =                                 @{ $qual->{ protein_id } } if          $qual->{ protein_id };
     ( $id ) = map { m/^GI:(.+)$/i ? $1 : () } @{ $qual->{ db_xref } }    if ! $id && $qual->{ db_xref };  
1241      ( $id ) =                                 @{ $qual->{ locus_tag } }  if ! $id && $qual->{ locus_tag };      ( $id ) =                                 @{ $qual->{ locus_tag } }  if ! $id && $qual->{ locus_tag };
1242        ( $id ) =                                 @{ $qual->{ gene } }       if ! $id && $qual->{ gene };
1243        ( $id ) = map { m/^GI:(.+)$/i ? $1 : () } @{ $qual->{ db_xref } }    if ! $id && $qual->{ db_xref };
1244    
1245      $id;      $id;
1246  }  }
1247    
1248    
 #-------------------------------------------------------------------------------  
 #  Prefer gi number as id:  
 #  
 #   $id = CDS_gi_or_id( $ftr )  
 #  
 #-------------------------------------------------------------------------------  
1249  sub CDS_gi_or_id  sub CDS_gi_or_id
1250  {  {
1251      my $qual = &qualifiers( @_ );      my $qual = &qualifiers( @_ );
# Line 1118  Line 1254 
1254      ( $id ) = map { m/^GI:(.+)$/i ? $1 : () } @{ $qual->{ db_xref } }    if          $qual->{ db_xref };      ( $id ) = map { m/^GI:(.+)$/i ? $1 : () } @{ $qual->{ db_xref } }    if          $qual->{ db_xref };
1255      ( $id ) =                                 @{ $qual->{ protein_id } } if ! $id && $qual->{ protein_id };      ( $id ) =                                 @{ $qual->{ protein_id } } if ! $id && $qual->{ protein_id };
1256      ( $id ) =                                 @{ $qual->{ locus_tag } }  if ! $id && $qual->{ locus_tag };      ( $id ) =                                 @{ $qual->{ locus_tag } }  if ! $id && $qual->{ locus_tag };
1257        ( $id ) =                                 @{ $qual->{ gene } }       if ! $id && $qual->{ gene };
1258    
1259        $id;
1260    }
1261    
1262    
1263    sub CDS_locus_tag
1264    {
1265        my $qual = &qualifiers( @_ );
1266        my $id;
1267    
1268        ( $id ) =                                 @{ $qual->{ locus_tag } }  if          $qual->{ locus_tag };
1269        ( $id ) =                                 @{ $qual->{ protein_id } } if ! $id && $qual->{ protein_id };
1270        ( $id ) =                                 @{ $qual->{ gene } }       if ! $id && $qual->{ gene };
1271        ( $id ) = map { m/^GI:(.+)$/i ? $1 : () } @{ $qual->{ db_xref } }    if ! $id && $qual->{ db_xref };
1272    
1273      $id;      $id;
1274  }  }
1275    
1276    
 #-------------------------------------------------------------------------------  
 #  gi number or nothing:  
 #  
 #   $gi = CDS_gi( $ftr )  
 #  
 #-------------------------------------------------------------------------------  
1277  sub CDS_gi  sub CDS_gi
1278  {  {
1279      my $qual = &qualifiers( @_ );      my $qual = &qualifiers( @_ );
# Line 1140  Line 1285 
1285    
1286    
1287  #-------------------------------------------------------------------------------  #-------------------------------------------------------------------------------
1288    #  Feature product:
1289  #  #
1290  #   $product = product( $ftr )  #   $product = product( $ftr )
1291  #  #
# Line 1192  Line 1338 
1338    
1339      return undef if ! $dna;      return undef if ! $dna;
1340    
1341      my $have_lib = 0;      eval { require gjoseqlib; }
1342      eval { require gjoseqlib; $have_lib = 1; };          or return undef;
     return undef if ! $have_lib;  
1343    
1344      my $CDS_dna = ftr_dna( $dna, $ftr ) or return undef;      my $CDS_dna = ftr_dna( $dna, $ftr ) or return undef;
1345      my $pep = gjoseqlib::translate_seq( $CDS_dna, ! partial_5_prime( $ftr ) );      my $pep = gjoseqlib::translate_seq( $CDS_dna, ! partial_5_prime( $ftr ) );

Legend:
Removed from v.1.12  
changed lines
  Added in v.1.13

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3