[Bio] / FigKernelScripts / svr_taxonomically_related_genomes.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/svr_taxonomically_related_genomes.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.10 - (view) (download) (as text)

1 : fangfang 1.1
2 :     #
3 :     # This is a SAS Component
4 :     #
5 :    
6 :     #
7 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
8 :     # for Interpretations of Genomes. All Rights Reserved.
9 :     #
10 :     # This file is part of the SEED Toolkit.
11 :     #
12 :     # The SEED Toolkit is free software. You can redistribute
13 :     # it and/or modify it under the terms of the SEED Toolkit
14 :     # Public License.
15 :     #
16 :     # You should have received a copy of the SEED Toolkit Public License
17 :     # along with this program; if not write to the University of Chicago
18 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
19 :     # Genomes at veronika@thefig.info or download a copy from
20 :     # http://www.theseed.org/LICENSE.TXT.
21 :     #
22 :    
23 :     use strict;
24 :     use Data::Dumper;
25 :     use Carp;
26 :     use Getopt::Long;
27 :    
28 :     =head1 svr_taxonomically_related_genomes
29 :    
30 : fangfang 1.3 svr_taxonomically_related_genomes [-c] [-d depth] < genome_ids > expanded_genome_ids
31 : fangfang 1.1
32 : fangfang 1.10 Get a list of genomes that are taxonomically related to the input genomes.
33 : fangfang 1.1
34 :     =head1 Introduction
35 :    
36 : fangfang 1.3 Usage: svr_taxonomically_related_genomes [-c] [-d depth] < genome_ids > expanded_genome_ids
37 : fangfang 1.1
38 :     -c turn on caching; fetch taxonomy data over sapling only once
39 : fangfang 1.8 -d taxonomy level (D = 7, species)
40 : fangfang 1.1 -t show taxonomy hierarchy of the input genomes
41 :    
42 :     If genome IDs are found in the command line, STDIN is not read.
43 :    
44 :     Examples:
45 :    
46 : fangfang 1.8 1. Get all Bacillus genomes
47 : fangfang 1.1
48 : fangfang 1.8 svr_taxonomically_related_genomes -d 6 224308.1
49 : fangfang 1.1
50 : fangfang 1.8 2. Get all Escherichia coli genomes using cached taxonomy info
51 :    
52 :     svr_taxonomically_related_genomes -d -1 -c -t 83333.1
53 : fangfang 1.1
54 :    
55 :     =head2 Command-Line options
56 :    
57 :     =over 4
58 :    
59 :     =item -c
60 :    
61 :     Turn on caching; fetch taxonomy data over sapling only once.
62 :    
63 : fangfang 1.8 =item -d depth (D = 7)
64 :    
65 :     Taxonomy level. (1: Kindom, 2: Phylum, 3: Class, 4: Order, 5: Family,
66 :     6: Genus, 7: Species, 8: Strain)
67 : fangfang 1.1
68 : fangfang 1.8 Use negative numbers to go up from the highest taxonomy level.
69 : fangfang 1.1
70 :     =item -t
71 :    
72 :     Show taxonomy hierarchy of the query genomes.
73 :    
74 :     =back
75 :    
76 :     =head2 Input
77 :    
78 :     The input is a list of genome IDs supplied in the command line or read from STDIN.
79 :    
80 :     =head2 Output
81 :    
82 :     The output is a list of related genome IDs written to STDOUT.
83 :    
84 :     =cut
85 :    
86 :     use SeedAware;
87 :     use SeedUtils;
88 :     use SAPserver;
89 :     use Storable;
90 :    
91 :     my $usage = <<"End_of_Usage";
92 :    
93 : fangfang 1.3 Usage: svr_taxonomically_related_genomes [-c] [-d depth] < genome_ids > expanded_genome_ids
94 : fangfang 1.1
95 :     -c turn on caching; fetch taxonomy data over sapling only once
96 : fangfang 1.8 -d depth of taxonomy subtree (D = 7, species)
97 : fangfang 1.1 -t show taxonomy hierarchy of the input genomes
98 :    
99 :     If genome IDs are found in the command line, STDIN is not read.
100 :    
101 :     Examples:
102 :    
103 : fangfang 1.8 1. Get all Bacillus genomes
104 : fangfang 1.1
105 : fangfang 1.8 svr_taxonomically_related_genomes -d 6 224308.1
106 : fangfang 1.1
107 : fangfang 1.8 2. Get all Escherichia coli genomes using cached taxonomy info
108 : fangfang 1.1
109 : fangfang 1.8 svr_taxonomically_related_genomes -d -1 -c -t 83333.1
110 : fangfang 1.1
111 :     End_of_Usage
112 :    
113 :     my $help;
114 :     my $cache;
115 : fangfang 1.9 my $depth = 7;
116 : fangfang 1.1 my $showtax = 0;
117 :    
118 :     GetOptions("h|help" => \$help,
119 :     "c|cache" => \$cache,
120 :     "d|depth=i" => \$depth,
121 :     "t|showtax" => \$showtax);
122 :    
123 :     $help and die $usage;
124 :    
125 :     my @gids = map { /(\d+\.\d+)/ ? $1 : () } @ARGV;
126 : fangfang 1.4 @gids > 0 or @gids = ( join(" ", <STDIN>) =~ m/(\d+\.\d+)/g );
127 : fangfang 1.1
128 :     die $usage unless @gids > 0;
129 :    
130 :    
131 :     my $pseed = 1;
132 :     my $envParm = $ENV{SAS_SERVER};
133 : fangfang 1.2
134 : fangfang 1.1 $ENV{SAS_SERVER} = 'PSEED' if $pseed;
135 :    
136 :     my $sap = SAPserver->new();
137 :    
138 :     my ($gnmH, $taxH);
139 :    
140 :     retrieve_if_exists('gnmH', '$sap->all_genomes()', { cache => $cache});
141 :     retrieve_if_exists('taxH', '$sap->taxonomy_of(-ids => [ keys %$gnmH ])', { cache => $cache});
142 :    
143 :     my @orgs = keys %$gnmH;
144 :    
145 :     my $chosen;
146 :    
147 :     foreach my $gid (@gids) {
148 :    
149 :     if (!$taxH->{$gid}) {
150 :     print STDERR "Taxnomy info not found for genome ID: $gid\n";
151 :     next;
152 : fangfang 1.8 }
153 :    
154 :     my @levels = @{$taxH->{$gid}};
155 :    
156 :     my $l = ($depth > 0) ? ($depth - 1) : ($#levels + $depth);
157 :     $l = 0 if $l < 0;
158 :    
159 :     if ($showtax) {
160 :     print STDERR $gid . "\n";
161 :     for my $i (0 .. $#levels) {
162 :     my $padding = ($i == $l) ? " => " : " ";
163 :     print STDERR $padding . $levels[$i]. "\n";
164 : fangfang 1.1 }
165 : fangfang 1.8 print STDERR "\n";
166 : fangfang 1.1 }
167 : fangfang 1.8
168 :     my $t1 = $taxH->{$gid}->[$l];
169 : fangfang 1.5 next unless $t1;
170 :    
171 :     for my $org (@orgs) {
172 : fangfang 1.8 # $chosen->{$_} = 1 if $taxH->{$_} && $t1 eq $taxH->{$_}->[-$depth-1]; # only match the corresponding taxonomy level
173 : fangfang 1.5 if ($taxH->{$org}) {
174 :     for my $t2 (@{$taxH->{$org}}) {
175 :     if ($t1 eq $t2) {
176 :     $chosen->{$org} = 1;
177 :     last;
178 :     }
179 :     }
180 :     }
181 : fangfang 1.1 }
182 :     }
183 :    
184 :     for (keys %$chosen) {
185 :     print join("\t", $_, $gnmH->{$_}) . "\n";
186 :     }
187 :    
188 :     $ENV{SAS_SERVER} = $envParm if $pseed;
189 :    
190 :     sub retrieve_if_exists {
191 :     my ($var, $func, $opts) = @_;
192 :     my $tmpdir = SeedAware::location_of_tmp($opts);
193 :     my $store = "$tmpdir/$var.store";
194 :     unlink $store if -e $store && !$opts->{cache};
195 :     my $str = '
196 :     if (-s $store) {
197 :     $'.$var.' = retrieve($store);
198 :     } else {
199 :     $'.$var.' = '.$func.';
200 :     store($'.$var.', $store);
201 :     }';
202 :     eval $str;
203 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3