Parent Directory
|
Revision Log
Revision 1.4 - (view) (download) (as text)
1 : | parrello | 1.1 | #!/usr/bin/env perl |
2 : | # | ||
3 : | # Copyright (c) 2003-2015 University of Chicago and Fellowship | ||
4 : | # for Interpretations of Genomes. All Rights Reserved. | ||
5 : | # | ||
6 : | # This file is part of the SEED Toolkit. | ||
7 : | # | ||
8 : | # The SEED Toolkit is free software. You can redistribute | ||
9 : | # it and/or modify it under the terms of the SEED Toolkit | ||
10 : | # Public License. | ||
11 : | # | ||
12 : | # You should have received a copy of the SEED Toolkit Public License | ||
13 : | # along with this program; if not write to the University of Chicago | ||
14 : | # at info@ci.uchicago.edu or the Fellowship for Interpretation of | ||
15 : | # Genomes at veronika@thefig.info or download a copy from | ||
16 : | # http://www.theseed.org/LICENSE.TXT. | ||
17 : | # | ||
18 : | |||
19 : | |||
20 : | use strict; | ||
21 : | use warnings; | ||
22 : | use RASTlib; | ||
23 : | use SeedUtils; | ||
24 : | use gjoseqlib; | ||
25 : | use P3Utils; | ||
26 : | |||
27 : | |||
28 : | parrello | 1.4 | |
29 : | parrello | 1.1 | =head1 Annotate a Genome Using RAST |
30 : | |||
31 : | parrello | 1.3 | p3-rast.pl [ options ] taxonID name |
32 : | parrello | 1.1 | |
33 : | This script invokes the RAST service over the web to annotate a genome. It will submit a FASTA | ||
34 : | file to RAST, wait for the job to finish, and then format the results into a JSON-form L<GenomeTypeObject>. | ||
35 : | |||
36 : | =head2 Parameters | ||
37 : | |||
38 : | The input can be a contig-only GenomeTypeObject in JSON format or a contig FASTA file. The | ||
39 : | parrello | 1.3 | two positional parameters are the proposed taxonomic ID and the genome name. The command-line options in |
40 : | parrello | 1.1 | L<P3Utils/ih_options> are used to specify the standard input. The additional command-line |
41 : | options are as follows. | ||
42 : | |||
43 : | =over 4 | ||
44 : | |||
45 : | =item gto | ||
46 : | |||
47 : | If specified, then the input file is presumed to be a contig object or a workspace contig object | ||
48 : | encoded in JSON format. The contigs must be in the form of a list attached to the C<contigs> | ||
49 : | member or the C<contigs> member of the C<data> member (the latter indicating a workspace object). | ||
50 : | |||
51 : | =item domain | ||
52 : | |||
53 : | The domain of the new genome-- C<B> for bacteria, C<A> for archaea, and so forth. The default is | ||
54 : | C<B>. | ||
55 : | |||
56 : | =item geneticCode | ||
57 : | |||
58 : | The genetic code of the new genome. The default is C<11>. | ||
59 : | |||
60 : | =item user | ||
61 : | |||
62 : | User name for RAST access. | ||
63 : | |||
64 : | =item password | ||
65 : | |||
66 : | Password for RAST access. | ||
67 : | |||
68 : | =item sleep | ||
69 : | |||
70 : | Sleep interval in seconds while waiting for the job to complete. The default is C<60>. | ||
71 : | |||
72 : | =back | ||
73 : | |||
74 : | =cut | ||
75 : | |||
76 : | # URL for RAST requests | ||
77 : | use constant RAST_URL => 'http://redwood.mcs.anl.gov:5000/quick'; | ||
78 : | |||
79 : | # Get the command-line parameters. | ||
80 : | my $opt = P3Utils::script_opts('genomeID name', P3Utils::ih_options(), | ||
81 : | ["gto|j", "input file is in JSON format"], | ||
82 : | ["domain|d=s", "domain (A or B) of the new genome", { default => 'B' }], | ||
83 : | ["geneticCode=i", "genetic code for the new genome", { default => 11 }], | ||
84 : | ["sleep=i", "sleep interval for status polling", { default => 60 }], | ||
85 : | ); | ||
86 : | # Open the input file. | ||
87 : | my $ih = P3Utils::ih($opt); | ||
88 : | # We will put the genome information in here. If the input is a GTO, it can be overridden. | ||
89 : | my $domain = $opt->domain; | ||
90 : | my $geneticCode = $opt->geneticcode; | ||
91 : | my ($genomeID, $name) = @ARGV; | ||
92 : | # Get the contigs from the file. We form the contigs into a FASTA string. | ||
93 : | my $contigs; | ||
94 : | if (! $opt->gto) { | ||
95 : | # Here we have FASTA input. | ||
96 : | $contigs = gjoseqlib::read_fasta($ih); | ||
97 : | } else { | ||
98 : | # Here we have JSON input. | ||
99 : | my $genomeJson = SeedUtils::read_encoded_object($ih); | ||
100 : | # Get as much other information as we can directly from the GTO. | ||
101 : | $name //= ServicesUtils::json_field($genomeJson, 'name'); | ||
102 : | $geneticCode = ServicesUtils::json_field($genomeJson, 'genetic_code', optional => 1) // $geneticCode; | ||
103 : | $domain = ServicesUtils::json_field($genomeJson, 'domain', optional => 1) // $domain; | ||
104 : | $genomeID //= ServicesUtils::json_field($genomeJson, 'id'); | ||
105 : | # Correct the genome ID if this is a contigs object. | ||
106 : | $genomeID =~ s/\.contigs$//; | ||
107 : | # Normalize the domain. | ||
108 : | $domain = uc substr($domain, 0, 1); | ||
109 : | # Create the contig string. | ||
110 : | $contigs = ServicesUtils::contig_tuples($genomeJson); | ||
111 : | } | ||
112 : | # Complain if we still do not have a name and ID. | ||
113 : | if (! $genomeID || ! $name) { | ||
114 : | die "You must specify a genome ID and name somewhere."; | ||
115 : | } | ||
116 : | # Invoke the RAST service. | ||
117 : | parrello | 1.2 | my $annotation = RASTlib::Annotate($contigs, $genomeID, $name, user => undef, password => undef, |
118 : | parrello | 1.1 | domain => $domain, geneticCode => $geneticCode, sleep => $opt->sleep); |
119 : | # Write the result. | ||
120 : | SeedUtils::write_encoded_object($annotation, \*STDOUT); |
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |