[Bio] / FigKernelScripts / p3-rast.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/p3-rast.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/env perl
2 :     #
3 :     # Copyright (c) 2003-2015 University of Chicago and Fellowship
4 :     # for Interpretations of Genomes. All Rights Reserved.
5 :     #
6 :     # This file is part of the SEED Toolkit.
7 :     #
8 :     # The SEED Toolkit is free software. You can redistribute
9 :     # it and/or modify it under the terms of the SEED Toolkit
10 :     # Public License.
11 :     #
12 :     # You should have received a copy of the SEED Toolkit Public License
13 :     # along with this program; if not write to the University of Chicago
14 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15 :     # Genomes at veronika@thefig.info or download a copy from
16 :     # http://www.theseed.org/LICENSE.TXT.
17 :     #
18 :    
19 :    
20 :     use strict;
21 :     use warnings;
22 :     use RASTlib;
23 :     use SeedUtils;
24 :     use gjoseqlib;
25 :     use P3Utils;
26 :    
27 :    
28 : parrello 1.4
29 : parrello 1.1 =head1 Annotate a Genome Using RAST
30 :    
31 : parrello 1.3 p3-rast.pl [ options ] taxonID name
32 : parrello 1.1
33 :     This script invokes the RAST service over the web to annotate a genome. It will submit a FASTA
34 :     file to RAST, wait for the job to finish, and then format the results into a JSON-form L<GenomeTypeObject>.
35 :    
36 :     =head2 Parameters
37 :    
38 :     The input can be a contig-only GenomeTypeObject in JSON format or a contig FASTA file. The
39 : parrello 1.3 two positional parameters are the proposed taxonomic ID and the genome name. The command-line options in
40 : parrello 1.1 L<P3Utils/ih_options> are used to specify the standard input. The additional command-line
41 :     options are as follows.
42 :    
43 :     =over 4
44 :    
45 :     =item gto
46 :    
47 :     If specified, then the input file is presumed to be a contig object or a workspace contig object
48 :     encoded in JSON format. The contigs must be in the form of a list attached to the C<contigs>
49 :     member or the C<contigs> member of the C<data> member (the latter indicating a workspace object).
50 :    
51 :     =item domain
52 :    
53 :     The domain of the new genome-- C<B> for bacteria, C<A> for archaea, and so forth. The default is
54 :     C<B>.
55 :    
56 :     =item geneticCode
57 :    
58 :     The genetic code of the new genome. The default is C<11>.
59 :    
60 :     =item user
61 :    
62 :     User name for RAST access.
63 :    
64 :     =item password
65 :    
66 :     Password for RAST access.
67 :    
68 :     =item sleep
69 :    
70 :     Sleep interval in seconds while waiting for the job to complete. The default is C<60>.
71 :    
72 :     =back
73 :    
74 :     =cut
75 :    
76 :     # URL for RAST requests
77 :     use constant RAST_URL => 'http://redwood.mcs.anl.gov:5000/quick';
78 :    
79 :     # Get the command-line parameters.
80 :     my $opt = P3Utils::script_opts('genomeID name', P3Utils::ih_options(),
81 :     ["gto|j", "input file is in JSON format"],
82 :     ["domain|d=s", "domain (A or B) of the new genome", { default => 'B' }],
83 :     ["geneticCode=i", "genetic code for the new genome", { default => 11 }],
84 :     ["sleep=i", "sleep interval for status polling", { default => 60 }],
85 :     );
86 :     # Open the input file.
87 :     my $ih = P3Utils::ih($opt);
88 :     # We will put the genome information in here. If the input is a GTO, it can be overridden.
89 :     my $domain = $opt->domain;
90 :     my $geneticCode = $opt->geneticcode;
91 :     my ($genomeID, $name) = @ARGV;
92 :     # Get the contigs from the file. We form the contigs into a FASTA string.
93 :     my $contigs;
94 :     if (! $opt->gto) {
95 :     # Here we have FASTA input.
96 :     $contigs = gjoseqlib::read_fasta($ih);
97 :     } else {
98 :     # Here we have JSON input.
99 :     my $genomeJson = SeedUtils::read_encoded_object($ih);
100 :     # Get as much other information as we can directly from the GTO.
101 :     $name //= ServicesUtils::json_field($genomeJson, 'name');
102 :     $geneticCode = ServicesUtils::json_field($genomeJson, 'genetic_code', optional => 1) // $geneticCode;
103 :     $domain = ServicesUtils::json_field($genomeJson, 'domain', optional => 1) // $domain;
104 :     $genomeID //= ServicesUtils::json_field($genomeJson, 'id');
105 :     # Correct the genome ID if this is a contigs object.
106 :     $genomeID =~ s/\.contigs$//;
107 :     # Normalize the domain.
108 :     $domain = uc substr($domain, 0, 1);
109 :     # Create the contig string.
110 :     $contigs = ServicesUtils::contig_tuples($genomeJson);
111 :     }
112 :     # Complain if we still do not have a name and ID.
113 :     if (! $genomeID || ! $name) {
114 :     die "You must specify a genome ID and name somewhere.";
115 :     }
116 :     # Invoke the RAST service.
117 : parrello 1.2 my $annotation = RASTlib::Annotate($contigs, $genomeID, $name, user => undef, password => undef,
118 : parrello 1.1 domain => $domain, geneticCode => $geneticCode, sleep => $opt->sleep);
119 :     # Write the result.
120 :     SeedUtils::write_encoded_object($annotation, \*STDOUT);

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3