[Bio] / FigKernelScripts / svr_genbank_to_table.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/svr_genbank_to_table.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : gdpusch 1.1 # -*- perl -*-
2 :    
3 :     #
4 :     # This is a SAS Component.
5 :     #
6 :    
7 :     #
8 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
9 :     # for Interpretations of Genomes. All Rights Reserved.
10 :     #
11 :     # This file is part of the SEED Toolkit.
12 :     #
13 :     # The SEED Toolkit is free software. You can redistribute
14 :     # it and/or modify it under the terms of the SEED Toolkit
15 :     # Public License.
16 :     #
17 :     # You should have received a copy of the SEED Toolkit Public License
18 :     # along with this program; if not write to the University of Chicago
19 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
20 :     # Genomes at veronika@thefig.info or download a copy from
21 :     # http://www.theseed.org/LICENSE.TXT.
22 :     #
23 :    
24 :     # usage: svr_genbank_to_table < genbank.file > features.tab
25 :    
26 :     use strict;
27 :     use warnings;
28 :    
29 :     use SeedUtils;
30 :     use gjogenbank;
31 :    
32 :     use Data::Dumper;
33 :    
34 : gdpusch 1.2 my $cds_num = '00000';
35 :     my $rna_num = '00000';
36 :    
37 :     print STDOUT (join(qq(\t), (q(#main_id), qw(Gene_Name Locus_Tag Protein_ID DB_Xrefs SEED_loc Assigned_Function)), qq(\n)));
38 : gdpusch 1.1
39 :     foreach my $accession (gjogenbank::parse_genbank()) {
40 :     my $contig = $accession->{LOCUS};
41 :    
42 :     foreach my $cds (@ { $accession->{FEATURES}->{CDS} }) {
43 : gdpusch 1.2 ++$cds_num;
44 :     my $cds_id = q(CDS_) . $cds_num;
45 :    
46 : gdpusch 1.1 my $gb_loc = gjogenbank::location( $cds, $accession );
47 :     my $locus = gjogenbank::genbank_loc_2_seed($contig, $gb_loc);
48 :     my $func = gjogenbank::product( $cds ) || q();
49 : gdpusch 1.2
50 :     my $gene_name = defined($cds->[1]->{gene}->[0]) ? $cds->[1]->{gene}->[0] : q();
51 :     my $locus_tag = defined($cds->[1]->{locus_tag}->[0]) ? $cds->[1]->{locus_tag}->[0] : q();
52 :     my $protein_id = defined($cds->[1]->{protein_id}->[0]) ? $cds->[1]->{protein_id}->[0] : q();
53 :     my @db_xrefs = defined($cds->[1]->{db_xref}->[0]) ? @ { $cds->[1]->{db_xref} } : ();
54 :     my $db_xrefs = join(q(,), @db_xrefs);
55 :    
56 :     my @gi_nums = map { m/GI\:(\d+)/o ? (q(gi|).$1) : () } @db_xrefs;
57 :     my @gene_nums = map { m/GeneID\:(\d+)/o ? (q(GeneID|).$1) : () } @db_xrefs;
58 :    
59 :     my $main_id = $locus_tag || $protein_id || $gi_nums[0] || $gene_nums[0] || $cds_id;
60 : gdpusch 1.1
61 :     if ($cds_id && $locus && defined($func)) {
62 : gdpusch 1.2 print (join(qq(\t), ($main_id, $gene_name, $locus_tag, $protein_id, $db_xrefs, $locus, $func)), qq(\n));
63 : gdpusch 1.1 }
64 :     else {
65 :     die (qq(Could not parse CDS feature in accession '$contig':\n), Dumper($cds));
66 :     }
67 :     }
68 :     }
69 :    
70 :     exit(0);

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3