[Bio] / Sprout / NewStuffCheck.pl Repository:
ViewVC logotype

Annotation of /Sprout/NewStuffCheck.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     =head1 New Stuff Checker
4 :    
5 :     This script compares the genomes, features, and annotations in
6 :     the old and new sprouts and lists the differences.
7 :    
8 :     The currently-supported command-line options are as follows.
9 :    
10 :     =over 4
11 :    
12 : parrello 1.2 =item summary
13 :    
14 :     Do not display details, only difference summaries.
15 :    
16 : parrello 1.1 =item user
17 :    
18 :     Name suffix to be used for log files. If omitted, the PID is used.
19 :    
20 :     =item trace
21 :    
22 :     Numeric trace level. A higher trace level causes more messages to appear. The
23 :     default trace level is 2. Tracing will be directly to the standard output
24 :     as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
25 :     where I<User> is the value of the B<user> option above.
26 :    
27 :     =item sql
28 :    
29 :     If specified, turns on tracing of SQL activity.
30 :    
31 :     =item background
32 :    
33 :     Save the standard and error output to files. The files will be created
34 :     in the FIG temporary directory and will be named C<err>I<User>C<.log> and
35 :     C<out>I<User>C<.log>, respectively, where I<User> is the value of the
36 :     B<user> option above.
37 :    
38 :     =item h
39 :    
40 :     Display this command's parameters and options.
41 :    
42 :     =item phone
43 :    
44 :     Phone number to message when the script is complete.
45 :    
46 :     =back
47 :    
48 :     =cut
49 :    
50 :     use strict;
51 :     use Tracer;
52 :     use DocUtils;
53 :     use TestUtils;
54 :     use Cwd;
55 :     use File::Copy;
56 :     use File::Path;
57 :     use FIG;
58 :     use SFXlate;
59 :     use Sprout;
60 :    
61 :     # Get the command-line options and parameters.
62 :     my ($options, @parameters) = StandardSetup([qw(Sprout) ],
63 :     {
64 : parrello 1.5 trace => ["2-", "tracing level; use a minus to prevent tracing to standard output"],
65 : parrello 1.2 summary => ["", "if specified, detailed lists of the different items will not be displayed"],
66 : parrello 1.1 phone => ["", "phone number (international format) to call when load finishes"],
67 :     },
68 :     "",
69 :     @ARGV);
70 :     # Set a variable to contain return type information.
71 :     my $rtype;
72 :     # Insure we catch errors.
73 :     eval {
74 : parrello 1.2 Trace("Processing genomes.") if T(2);
75 : parrello 1.1 # Get the old Sprout.
76 :     my $oldSprout = SFXlate->new_sprout_only($FIG_Config::oldSproutDB);
77 :     # Get its genomes in alphabetical order.
78 :     my @oldGenomes = GetGenes($oldSprout);
79 :     # Get the new Sprout.
80 :     my $newSprout = SFXlate->new_sprout_only();
81 :     # Get its genomes in alphabetical order.
82 :     my @newGenomes = GetGenes($newSprout);
83 : parrello 1.7 # Compare the two genomes lists.
84 : parrello 1.1 my ($insertedGenomes, $deletedGenomes) = Tracer::CompareLists(\@newGenomes, \@oldGenomes);
85 : parrello 1.7 # Add feature counts to the new genomes.
86 :     for my $insertedGenome (@{$insertedGenomes}) {
87 :     my $genomeID = $insertedGenome->[0];
88 :     # For a new genome, display the feature count.
89 :     my $count = $newSprout->GetCount(['HasFeature'], "HasFeature(from-link) = ?",
90 :     [$genomeID]);
91 :     my $suffix = ($count == 1 ? " one feature" : "$count features");
92 :     $insertedGenome->[1] .= "($suffix)";
93 :     }
94 : parrello 1.1 # Display the lists.
95 : parrello 1.2 ShowLists(! $options->{summary},
96 :     'New Genomes' => $insertedGenomes,
97 :     'Deleted Genomes' => $deletedGenomes);
98 : parrello 1.1 # Next, we get the subsystems.
99 : parrello 1.2 Trace("Processing subsystems.") if T(2);
100 : parrello 1.1 my @oldSubsystems = GetSubsystems($oldSprout);
101 :     my @newSubsystems = GetSubsystems($newSprout);
102 :     # Compare and display the subsystem lists.
103 :     my ($insertedSubs, $deletedSubs) = Tracer::CompareLists(\@newSubsystems, \@oldSubsystems);
104 : parrello 1.2 ShowLists(! $options->{summary},
105 :     'New Subsystems' => $insertedSubs,
106 :     'Deleted Subsystems' => $deletedSubs);
107 : parrello 1.1 # Now we process the features of the common genes. First we need a hash
108 :     # of the inserted stuff so we know to skip it.
109 :     my %skipGenes = map { $_->[0] => 1 } @{$insertedGenomes};
110 :     # Loop through the genomees.
111 :     for my $genome (@newGenomes) {
112 : parrello 1.6 # Get the ID and name.
113 : parrello 1.1 my ($genomeID, $genomeName) = @{$genome};
114 : parrello 1.6 Trace("Processing $genomeID.") if T(3);
115 : parrello 1.7 # Only process the common genes.
116 :     if (! $skipGenes{$genomeID}) {
117 : parrello 1.1 # Get the new and old features. This will be very stressful to the machine,
118 :     # because there are lots of features.
119 :     my @oldFeatures = GetFeatures($oldSprout, $genomeID);
120 :     my @newFeatures = GetFeatures($newSprout, $genomeID);
121 : parrello 1.2 Trace("Comparing features for $genomeID.") if T(3);
122 : parrello 1.1 # Compare the lists.
123 :     my ($insertedFeatures, $deletedFeatures) = Tracer::CompareLists(\@newFeatures, \@oldFeatures);
124 :     # If either list has data, we want to display it.
125 : parrello 1.3 if (scalar @{$insertedFeatures} + scalar @{$deletedFeatures} > 0) {
126 : parrello 1.2 Trace("Displaying feature differences.") if T(3);
127 : parrello 1.5 ShowLists(! $options->{summary},
128 : parrello 1.2 "New Features for $genomeID" => $insertedFeatures,
129 : parrello 1.1 "Features Deleted from $genomeID" => $deletedFeatures);
130 :     }
131 :     }
132 :     }
133 :     };
134 :     if ($@) {
135 :     Trace("Script failed with error: $@") if T(0);
136 :     $rtype = "error";
137 :     } else {
138 :     Trace("Script complete.") if T(2);
139 :     $rtype = "no error";
140 :     }
141 :     if ($options->{phone}) {
142 :     my $msgID = Tracer::SendSMS($options->{phone}, "Subsystem Checker terminated with $rtype.");
143 :     if ($msgID) {
144 :     Trace("Phone message sent with ID $msgID.") if T(2);
145 :     } else {
146 :     Trace("Phone message not sent.") if T(2);
147 :     }
148 :     }
149 :    
150 :     =head3 GetGenes
151 :    
152 :     C<< my @geneList = GetGenes($sprout); >>
153 :    
154 :     Return a list of the genomes in the specified Sprout instance. The genomes
155 :     are returned in alphabetical order by genome ID.
156 :    
157 :     =over 4
158 :    
159 :     =item sprout
160 :    
161 :     Sprout instance whose gene list is desired.
162 :    
163 :     =item RETURN
164 :    
165 :     Returns a list of two-tuples. The first element in each tuple is the genome ID,
166 :     and the second is the genome name (genus, species, strain).
167 :    
168 :     =back
169 :    
170 :     =cut
171 :    
172 :     sub GetGenes {
173 :     # Get the parameters.
174 :     my ($sprout) = @_;
175 :     # Get the desired data.
176 :     my @genomes = $sprout->GetAll(['Genome'], "ORDER BY Genome(id)", [], ['Genome(id)',
177 :     'Genome(genus)',
178 :     'Genome(species)',
179 :     'Genome(unique-characterization)']);
180 :     # Create the genome names from the three pieces of the name.
181 :     my @retVal = map { [$_->[0], join(" ", @{$_}[1..3])] } @genomes;
182 :     # Return the result.
183 :     return @retVal;
184 :     }
185 :    
186 :     =head3 GetSubsystems
187 :    
188 :     C<< my @subsystems = GetSubsystems($sprout); >>
189 :    
190 :     Get a list of the subsystems in the specified Sprout instance.
191 :    
192 :     =over 4
193 :    
194 :     =item sprout
195 :    
196 :     Sprout instance whose subsystems are desired.
197 :    
198 :     =item RETURN
199 :    
200 :     Returns a list of 2-tuples, each consisting of the subsystem name followed by
201 :     the name of the curator.
202 :    
203 :     =back
204 :    
205 :     =cut
206 :    
207 :     sub GetSubsystems {
208 :     # Get the parameters.
209 :     my ($sprout) = @_;
210 :     # Declare the return variable.
211 :     my @retVal = $sprout->GetAll(['Subsystem'], "ORDER BY Subsystem(id)",
212 :     [], ['Subsystem(id)', 'Subsystem(curator)']);
213 :     # Return the result.
214 :     return @retVal;
215 :     }
216 :    
217 :     =head3 GetFeatures
218 :    
219 :     C<< my @features = GetFeatures($sprout, $genomeID); >>
220 :    
221 :     Return the features of the specified genome in the specified Sprout instance.
222 :    
223 :     =over 4
224 :    
225 :     =item sprout
226 :    
227 :     Sprout instance to use to get the features.
228 :    
229 :     =item genomeID
230 :    
231 :     ID of the genome in question.
232 :    
233 :     =item RETURN
234 :    
235 :     Returns a list of 2-tuples, the first element being the feature ID and the second its
236 :     functional assignment (if any).
237 :    
238 :     =back
239 :    
240 :     =cut
241 :    
242 :     sub GetFeatures {
243 :     # Get the parameters.
244 :     my ($sprout, $genomeID) = @_;
245 :     # Get a list of the feature IDs and map them to their functional assignments.
246 :     my @retVal = map { [$_, $sprout->FunctionOf($_)] } $sprout->GetFlat(['HasFeature'],
247 :     "HasFeature(from-link) = ? ORDER BY HasFeature(to-link)",
248 :     [$genomeID], 'HasFeature(to-link)');
249 :     # Return the result.
250 :     return @retVal;
251 :     }
252 :    
253 :     =head3 ShowLists
254 :    
255 : parrello 1.2 C<< ShowLists($all, %lists); >>
256 : parrello 1.1
257 : parrello 1.2 Display a set of lists. Each list should consist of 2-tuples.
258 : parrello 1.1
259 :     =over 4
260 :    
261 : parrello 1.2 =item all
262 :    
263 :     TRUE if details should be displayed; FALSE if only summaries should be displayed.
264 :    
265 : parrello 1.1 =item lists
266 :    
267 :     A hash mapping list names to list references.
268 :    
269 :     =cut
270 :    
271 :     sub ShowLists {
272 :     # Get the parameters.
273 : parrello 1.2 my $all = shift @_;
274 : parrello 1.1 my %lists = @_;
275 :     # Loop through the lists in alphabetical order by list name.
276 : parrello 1.6 for my $listName (sort keys %lists) {
277 : parrello 1.1 # Get the list itself.
278 :     my $list = $lists{$listName};
279 :     # Get the number of list items.
280 :     my $listSize = scalar @{$list};
281 : parrello 1.6 # Only proceed if the list is nonempty.
282 :     if ($listSize > 0) {
283 : parrello 1.7 my $header = ShowHeader($listName, $listSize);
284 : parrello 1.6 print "$header\n";
285 :     Trace($header) if T(3);
286 :     # If we're at trace level 3, display the list.
287 :     if ($all) {
288 :     # Put a spacer under the title.
289 :     print "\n";
290 :     # Get the width of the name column.
291 :     my $width = 0;
292 :     for my $entryLen (map { length $_->[0] } @{$list}) {
293 :     $width = $entryLen if $entryLen > $width;
294 :     }
295 :     # Now display the list.
296 :     for my $entry (@{$list}) {
297 :     my ($name, $data) = @{$entry};
298 :     print " $name" . (" " x ($width - length $name)) . " $data\n";
299 :     }
300 :     print "\n\n";
301 : parrello 1.1 }
302 :     }
303 :     }
304 :     }
305 :    
306 : parrello 1.7 =head3 ShowHeader
307 :    
308 :     C<< my $header = ShowHeader($name, $count); >>
309 :    
310 :     Return a list header for a list of the specified length.
311 :    
312 :     =over 4
313 :    
314 :     =item name
315 :    
316 :     Name of the list.
317 :    
318 :     =item count
319 :    
320 :     Number of entries in the list.
321 :    
322 :     =item RETURN
323 :    
324 :     Returns a list header that shows the name of the list and the number of entries.
325 :    
326 :     =back
327 :    
328 :     =cut
329 :    
330 :     sub ShowHeader {
331 :     # Get the parameters.
332 :     my ($name, $count) = @_;
333 :     # Declare the return variable.
334 :     my $retVal;
335 :     if ($count == 0) {
336 :     $retVal = "*** $name: none";
337 :     } elsif ($count == 1) {
338 :     $retVal = "*** $name: one";
339 :     } else {
340 :     $retVal = "*** $name: $count";
341 :     }
342 :     # Return the result.
343 :     return $retVal;
344 :     }
345 :    
346 : parrello 1.1 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3