[Bio] / Sprout / NewStuffCheck.pl Repository:
ViewVC logotype

Annotation of /Sprout/NewStuffCheck.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     =head1 New Stuff Checker
4 :    
5 :     This script compares the genomes, features, and annotations in
6 :     the old and new sprouts and lists the differences.
7 :    
8 :     The currently-supported command-line options are as follows.
9 :    
10 :     =over 4
11 :    
12 :     =item user
13 :    
14 :     Name suffix to be used for log files. If omitted, the PID is used.
15 :    
16 :     =item trace
17 :    
18 :     Numeric trace level. A higher trace level causes more messages to appear. The
19 :     default trace level is 2. Tracing will be directly to the standard output
20 :     as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
21 :     where I<User> is the value of the B<user> option above.
22 :    
23 :     =item sql
24 :    
25 :     If specified, turns on tracing of SQL activity.
26 :    
27 :     =item background
28 :    
29 :     Save the standard and error output to files. The files will be created
30 :     in the FIG temporary directory and will be named C<err>I<User>C<.log> and
31 :     C<out>I<User>C<.log>, respectively, where I<User> is the value of the
32 :     B<user> option above.
33 :    
34 :     =item h
35 :    
36 :     Display this command's parameters and options.
37 :    
38 :     =item phone
39 :    
40 :     Phone number to message when the script is complete.
41 :    
42 :     =back
43 :    
44 :     =cut
45 :    
46 :     use strict;
47 :     use Tracer;
48 :     use DocUtils;
49 :     use TestUtils;
50 :     use Cwd;
51 :     use File::Copy;
52 :     use File::Path;
53 :     use FIG;
54 :     use SFXlate;
55 :     use Sprout;
56 :    
57 :     # Get the command-line options and parameters.
58 :     my ($options, @parameters) = StandardSetup([qw(Sprout) ],
59 :     {
60 :     phone => ["", "phone number (international format) to call when load finishes"],
61 :     },
62 :     "",
63 :     @ARGV);
64 :     # Set a variable to contain return type information.
65 :     my $rtype;
66 :     # Insure we catch errors.
67 :     eval {
68 :     # Get the old Sprout.
69 :     my $oldSprout = SFXlate->new_sprout_only($FIG_Config::oldSproutDB);
70 :     # Get its genomes in alphabetical order.
71 :     my @oldGenomes = GetGenes($oldSprout);
72 :     # Get the new Sprout.
73 :     my $newSprout = SFXlate->new_sprout_only();
74 :     # Get its genomes in alphabetical order.
75 :     my @newGenomes = GetGenes($newSprout);
76 :     # Compare the two gene lists.
77 :     my ($insertedGenomes, $deletedGenomes) = Tracer::CompareLists(\@newGenomes, \@oldGenomes);
78 :     # Display the lists.
79 :     ShowLists('New Genomes' => $insertedGenomes, 'Deleted Genomes' => $deletedGenomes);
80 :     # Next, we get the subsystems.
81 :     my @oldSubsystems = GetSubsystems($oldSprout);
82 :     my @newSubsystems = GetSubsystems($newSprout);
83 :     # Compare and display the subsystem lists.
84 :     my ($insertedSubs, $deletedSubs) = Tracer::CompareLists(\@newSubsystems, \@oldSubsystems);
85 :     ShowLists('New Subsystems' => $insertedSubs, 'Deleted Subsystems' => $deletedSubs);
86 :     # Now we process the features of the common genes. First we need a hash
87 :     # of the inserted stuff so we know to skip it.
88 :     my %skipGenes = map { $_->[0] => 1 } @{$insertedGenomes};
89 :     # Loop through the genomees.
90 :     for my $genome (@newGenomes) {
91 :     # Only process this gene if it's common to both
92 :     my ($genomeID, $genomeName) = @{$genome};
93 :     if (! exists $skipGenes{$genomeID}) {
94 :     # Get the new and old features. This will be very stressful to the machine,
95 :     # because there are lots of features.
96 :     my @oldFeatures = GetFeatures($oldSprout, $genomeID);
97 :     my @newFeatures = GetFeatures($newSprout, $genomeID);
98 :     # Compare the lists.
99 :     my ($insertedFeatures, $deletedFeatures) = Tracer::CompareLists(\@newFeatures, \@oldFeatures);
100 :     # If either list has data, we want to display it.
101 :     if (scalar @{$insertedFeatures} + scalar @{$deletedFeatures} > 0) {
102 :     ShowLists("New Features for $genomeID" => $insertedFeatures,
103 :     "Features Deleted from $genomeID" => $deletedFeatures);
104 :     }
105 :     }
106 :     }
107 :     };
108 :     if ($@) {
109 :     Trace("Script failed with error: $@") if T(0);
110 :     $rtype = "error";
111 :     } else {
112 :     Trace("Script complete.") if T(2);
113 :     $rtype = "no error";
114 :     }
115 :     if ($options->{phone}) {
116 :     my $msgID = Tracer::SendSMS($options->{phone}, "Subsystem Checker terminated with $rtype.");
117 :     if ($msgID) {
118 :     Trace("Phone message sent with ID $msgID.") if T(2);
119 :     } else {
120 :     Trace("Phone message not sent.") if T(2);
121 :     }
122 :     }
123 :    
124 :     =head3 GetGenes
125 :    
126 :     C<< my @geneList = GetGenes($sprout); >>
127 :    
128 :     Return a list of the genomes in the specified Sprout instance. The genomes
129 :     are returned in alphabetical order by genome ID.
130 :    
131 :     =over 4
132 :    
133 :     =item sprout
134 :    
135 :     Sprout instance whose gene list is desired.
136 :    
137 :     =item RETURN
138 :    
139 :     Returns a list of two-tuples. The first element in each tuple is the genome ID,
140 :     and the second is the genome name (genus, species, strain).
141 :    
142 :     =back
143 :    
144 :     =cut
145 :    
146 :     sub GetGenes {
147 :     # Get the parameters.
148 :     my ($sprout) = @_;
149 :     # Get the desired data.
150 :     my @genomes = $sprout->GetAll(['Genome'], "ORDER BY Genome(id)", [], ['Genome(id)',
151 :     'Genome(genus)',
152 :     'Genome(species)',
153 :     'Genome(unique-characterization)']);
154 :     # Create the genome names from the three pieces of the name.
155 :     my @retVal = map { [$_->[0], join(" ", @{$_}[1..3])] } @genomes;
156 :     # Return the result.
157 :     return @retVal;
158 :     }
159 :    
160 :     =head3 GetSubsystems
161 :    
162 :     C<< my @subsystems = GetSubsystems($sprout); >>
163 :    
164 :     Get a list of the subsystems in the specified Sprout instance.
165 :    
166 :     =over 4
167 :    
168 :     =item sprout
169 :    
170 :     Sprout instance whose subsystems are desired.
171 :    
172 :     =item RETURN
173 :    
174 :     Returns a list of 2-tuples, each consisting of the subsystem name followed by
175 :     the name of the curator.
176 :    
177 :     =back
178 :    
179 :     =cut
180 :    
181 :     sub GetSubsystems {
182 :     # Get the parameters.
183 :     my ($sprout) = @_;
184 :     # Declare the return variable.
185 :     my @retVal = $sprout->GetAll(['Subsystem'], "ORDER BY Subsystem(id)",
186 :     [], ['Subsystem(id)', 'Subsystem(curator)']);
187 :     # Return the result.
188 :     return @retVal;
189 :     }
190 :    
191 :     =head3 GetFeatures
192 :    
193 :     C<< my @features = GetFeatures($sprout, $genomeID); >>
194 :    
195 :     Return the features of the specified genome in the specified Sprout instance.
196 :    
197 :     =over 4
198 :    
199 :     =item sprout
200 :    
201 :     Sprout instance to use to get the features.
202 :    
203 :     =item genomeID
204 :    
205 :     ID of the genome in question.
206 :    
207 :     =item RETURN
208 :    
209 :     Returns a list of 2-tuples, the first element being the feature ID and the second its
210 :     functional assignment (if any).
211 :    
212 :     =back
213 :    
214 :     =cut
215 :    
216 :     sub GetFeatures {
217 :     # Get the parameters.
218 :     my ($sprout, $genomeID) = @_;
219 :     # Get a list of the feature IDs and map them to their functional assignments.
220 :     my @retVal = map { [$_, $sprout->FunctionOf($_)] } $sprout->GetFlat(['HasFeature'],
221 :     "HasFeature(from-link) = ? ORDER BY HasFeature(to-link)",
222 :     [$genomeID], 'HasFeature(to-link)');
223 :     # Return the result.
224 :     return @retVal;
225 :     }
226 :    
227 :     =head3 ShowLists
228 :    
229 :     C<< ShowLists(%lists); >>
230 :    
231 :     Display a set of lists. The name and count are displayed at level 2 and
232 :     the entries at level 3. Each list should consist of 2-tuples.
233 :    
234 :     =over 4
235 :    
236 :     =item lists
237 :    
238 :     A hash mapping list names to list references.
239 :    
240 :     =cut
241 :    
242 :     sub ShowLists {
243 :     # Get the parameters.
244 :     my %lists = @_;
245 :     # Loop through the lists in alphabetical order by list name.
246 :     for my $listName (keys %lists) {
247 :     # Get the list itself.
248 :     my $list = $lists{$listName};
249 :     # Get the number of list items.
250 :     my $listSize = scalar @{$list};
251 :     # Display the header.
252 :     if ($listSize == 0) {
253 :     Trace("*** $listName: no entries") if T(2);
254 :     } elsif ($listSize == 1) {
255 :     Trace("*** $listName: one entry") if T(2);
256 :     } else {
257 :     Trace("*** $listName: $listSize entries") if T(2);
258 :     }
259 :     # If we're at trace level 3, display the list.
260 :     if (T(3)) {
261 :     # Put a spacer under the title.
262 :     Trace("");
263 :     # Get the width of the name column.
264 :     my $width = 0;
265 :     for my $entryLen (map { length $_->[0] } @{$list}) {
266 :     $width = $entryLen if $entryLen > $width;
267 :     }
268 :     # Now display the list.
269 :     for my $entry (@{$list}) {
270 :     my ($name, $data) = @{$entry};
271 :     Trace(" $name" . (" " x ($width - length $name)) . " $data");
272 :     }
273 :     Trace("");
274 :     Trace("");
275 :     }
276 :     }
277 :     }
278 :    
279 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3