[Bio] / Sprout / SproutGFF.pl Repository:
ViewVC logotype

Annotation of /Sprout/SproutGFF.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     # -*- perl -*-
4 :     #
5 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
6 :     # for Interpretations of Genomes. All Rights Reserved.
7 :     #
8 :     # This file is part of the SEED Toolkit.
9 : parrello 1.2 #
10 : parrello 1.1 # The SEED Toolkit is free software. You can redistribute
11 :     # it and/or modify it under the terms of the SEED Toolkit
12 : parrello 1.2 # Public License.
13 : parrello 1.1 #
14 :     # You should have received a copy of the SEED Toolkit Public License
15 :     # along with this program; if not write to the University of Chicago
16 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
17 :     # Genomes at veronika@thefig.info or download a copy from
18 :     # http://www.theseed.org/LICENSE.TXT.
19 :     #
20 :    
21 :     =head1 SproutGFF
22 :    
23 :     This is a fancy wrapper around B<seed2gff> that can be used to generate the
24 :     GFF3 files for the NMPDR. The single parameter is the output directory name.
25 :     The files will be organized by NMPDR group.
26 :    
27 :     The currently-supported command-line options are as follows.
28 :    
29 :     =over 4
30 :    
31 :     =item user
32 :    
33 :     Name suffix to be used for log files. If omitted, the PID is used.
34 :    
35 :     =item trace
36 :    
37 :     Numeric trace level. A higher trace level causes more messages to appear. The
38 :     default trace level is 2. Tracing will be directly to the standard output
39 :     as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
40 :     where I<User> is the value of the B<user> option above.
41 :    
42 :     =item sql
43 :    
44 :     If specified, turns on tracing of SQL activity.
45 :    
46 :     =item background
47 :    
48 :     Save the standard and error output to files. The files will be created
49 :     in the FIG temporary directory and will be named C<err>I<User>C<.log> and
50 :     C<out>I<User>C<.log>, respectively, where I<User> is the value of the
51 :     B<user> option above.
52 :    
53 :     =item h
54 :    
55 :     Display this command's parameters and options.
56 :    
57 :     =item scan
58 :    
59 :     If specified, the genomes will be collected and the directories created, but no GFF
60 :     files will be output. This is mostly useful for testing.
61 :    
62 :     =item phone
63 :    
64 :     Phone number to message when the script is complete.
65 :    
66 :     =back
67 :    
68 :     =cut
69 :    
70 :     use strict;
71 :     use Tracer;
72 :     use Cwd;
73 :     use File::Copy;
74 :     use File::Path;
75 :     use SFXlate;
76 :    
77 :     # Get the command-line options and parameters.
78 :     my ($options, @parameters) = StandardSetup([qw(Sprout ERDB) ],
79 :     {
80 :     phone => ["", "phone number (international format) to call when load finishes"],
81 :     genome => ["", "genome to process; the default is to process all NMPDR core genomes"],
82 :     scan => ["", "if specified, the output directories will be created but no files will be written"],
83 :     },
84 :     "<output directory>",
85 :     @ARGV);
86 :     # Set a variable to contain return type information.
87 :     my $rtype;
88 :     # Insure we catch errors.
89 :     eval {
90 :     # Create a Sprout object.
91 :     my $sprout = SFXlate->new_sprout_only();
92 :     # Insure the output directory exists.
93 :     my $outDir = $parameters[0];
94 :     if (! $outDir) {
95 :     Confess("No output directory specified.");
96 :     } else {
97 :     Insure($outDir, 0777);
98 :     # Create the genome map. This lists all the genomes we want along with the corresponding
99 :     # output file name.
100 :     my %genomes;
101 :     # Check for a single-genome situation.
102 :     if ($options->{genome}) {
103 :     # Get the genome name.
104 :     my $genomeID = $options->{genome};
105 :     my $genomeName = $sprout->GenusSpecies($genomeID);
106 :     # Compute the file name.
107 :     my $fileName = CleanGenomeName($genomeName);
108 :     $genomes{$genomeID} = "$outDir/$fileName.gff";
109 :     } else {
110 :     # Here we want all the core organisms, split into super-groups. First, we get the
111 :     # genomes for each group in a hash.
112 :     my %baseGroups = $sprout->GetGroups();
113 :     # Fix it into a hash by super-group.
114 : parrello 1.2 my %coreGroups = $sprout->Fix(%baseGroups);
115 : parrello 1.1 for my $coreGroup (keys %coreGroups) {
116 :     # Compute the directory and isure it exists.
117 :     my $superDirectory = "$outDir/$coreGroup";
118 :     Insure($superDirectory, 0777);
119 :     # Put all of this group's genomes in the output hash.
120 :     for my $coreGenome (@{$coreGroups{$coreGroup}}) {
121 :     my $fileName = CleanGenomeName($sprout->GenusSpecies($coreGenome));
122 :     $genomes{$coreGenome} = "$superDirectory/$fileName.gff";
123 :     }
124 :     }
125 :     }
126 :     # Now we loop through %genomes, creating GFF files.
127 :     for my $genome (sort keys %genomes) {
128 :     my $fileName = $genomes{$genome};
129 :     if ($options->{scan}) {
130 :     Trace("$genome would be written to $fileName") if T(2);
131 :     } else {
132 :     Trace("Writing $genome to $fileName.") if T(3);
133 :     # Do the conversion.
134 :     my @output = `seed2gff -g $genome -o "$fileName" -s -t all -nmpdr`;
135 :     # At trace level 3, we show the output.
136 :     Trace("Output from seed2gff:\n" . join("\n", @output)) if T(3) && scalar(@output);
137 :     }
138 :     }
139 :     }
140 :    
141 :     };
142 :     if ($@) {
143 :     Trace("Script failed with error: $@") if T(0);
144 :     $rtype = "error";
145 :     } else {
146 :     Trace("Script complete.") if T(2);
147 :     $rtype = "no error";
148 :     }
149 :     if ($options->{phone}) {
150 :     my $msgID = Tracer::SendSMS($options->{phone}, "SproutGFF terminated with $rtype.");
151 :     if ($msgID) {
152 :     Trace("Phone message sent with ID $msgID.") if T(2);
153 :     } else {
154 :     Trace("Phone message not sent.") if T(2);
155 :     }
156 :     }
157 :    
158 :     =head3 CleanGenomeName
159 :    
160 : parrello 1.2 my $cleaned = CleanGenomeName($name);
161 : parrello 1.1
162 :     Clean up a genome name so it can be used as a file name.
163 :    
164 :     =over 4
165 :    
166 :     =item name
167 :    
168 :     Name of the genome, for cleaning purposes.
169 :    
170 :     =item RETURN
171 :    
172 :     Returns the incoming name with all its evil characters converted to dots.
173 :    
174 :     =back
175 :    
176 :     =cut
177 :    
178 :     sub CleanGenomeName {
179 :     # Get the parameters.
180 :     my ($name) = @_;
181 :     # Declare the return variable.
182 :     my $retVal = $name;
183 :     # Convert spaces to dots.
184 :     $retVal =~ s/\s+/\./g;
185 :     # Convert double dots to dots.
186 :     $retVal =~ s/\.\./\./g;
187 :     # Convert other bad guys to underscores.
188 :     $retVal =~ tr/():/___/;
189 :     # Return the result.
190 :     return $retVal;
191 :     }
192 :    
193 :    
194 : parrello 1.2 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3