[Bio] / FigKernelPackages / ServerThing.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/ServerThing.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.51 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     package ServerThing;
4 :    
5 :     use strict;
6 :     use Tracer;
7 :     use YAML;
8 : parrello 1.50 use JSON::Any;
9 : parrello 1.1 use ERDB;
10 :     use TestUtils;
11 :     use Time::HiRes;
12 : parrello 1.9 use File::Temp;
13 : parrello 1.10 use ErrorMessage;
14 : parrello 1.1 use CGI;
15 : parrello 1.9 no warnings qw(once);
16 : parrello 1.1
17 : parrello 1.11 # Maximum number of requests to run per invocation.
18 : olson 1.48 use constant MAX_REQUESTS => 50;
19 : parrello 1.11
20 : parrello 1.1 =head1 General Server Helper
21 :    
22 :     This package provides a method-- I<RunServer>-- that can be called from a CGI
23 : parrello 1.9 script to perform the duties of a FIG server. RunServer is called with two
24 :     parameters: the name of the server package (e.g. C<SAP> for B<SAP.pm>) and
25 :     the first command-line parameter. The command-line parameter (if defined) will
26 :     be used as the tracing key, and also indicates that the script is being invoked
27 :     from the command line rather than over the web.
28 : parrello 1.1
29 :     =cut
30 :    
31 :     sub RunServer {
32 :     # Get the parameters.
33 : parrello 1.9 my ($serverName, $key) = @_;
34 : parrello 1.25 # Set up tracing. We never do CGI tracing here; the only question is whether
35 :     # or not the caller passed in a tracing key. If he didn't, we use the server
36 :     # name.
37 : parrello 1.32 ETracing($key || $serverName, destType => 'APPEND', level => '0 ServerThing');
38 : parrello 1.9 # Turn off YAML compression, which causes problems with some of our hash keys.
39 :     $YAML::CompressSeries = 0;
40 : parrello 1.13 # Create the server object.
41 : parrello 1.15 Trace("Requiring $serverName for task $$.") if T(3);
42 : parrello 1.13 eval {
43 :     require "$serverName.pm";
44 :     };
45 :     # If we have an error, create an error document.
46 :     if ($@) {
47 :     SendError($@, "Could not load server module.");
48 :     } else {
49 :     # Having successfully loaded the server code, we create the object.
50 :     my $serverThing = eval("$serverName" . '->new()');
51 : parrello 1.15 Trace("$serverName object created for task $$.") if T(2);
52 : parrello 1.13 # If we have an error, create an error document.
53 :     if ($@) {
54 :     SendError($@, "Could not start server.");
55 : parrello 1.3 } else {
56 : parrello 1.13 # No error, so now we can process the request.
57 :     my $cgi;
58 :     if (! defined $key) {
59 :     # No tracing key, so presume we're a web service. Check for Fast CGI.
60 :     if ($ENV{REQUEST_METHOD} eq '') {
61 :     # Count the number of requests.
62 :     my $requests = 0;
63 : parrello 1.24 Trace("Starting Fast CGI loop.") if T(3);
64 : parrello 1.13 # Loop through the fast CGI requests. If we have request throttling,
65 :     # we exit after a maximum number of requests has been exceeded.
66 :     require CGI::Fast;
67 : parrello 1.23 while ((MAX_REQUESTS == 0 || ++$requests < MAX_REQUESTS) &&
68 :     ($cgi = new CGI::Fast())) {
69 : parrello 1.13 RunRequest($cgi, $serverThing);
70 : parrello 1.16 Trace("Request $requests complete in task $$.") if T(3);
71 : parrello 1.13 }
72 : parrello 1.15 Trace("Terminating FastCGI task $$ after $requests requests.") if T(2);
73 : parrello 1.13 } else {
74 :     # Here we have a normal web service (non-Fast).
75 :     my $cgi = CGI->new();
76 :     # Check for a source parameter. This gets used as the tracing key.
77 :     $key = $cgi->param('source');
78 :     # Run this request.
79 :     RunRequest($cgi, $serverThing);
80 :     }
81 : parrello 1.6 } else {
82 : parrello 1.13 # We're being invoked from the command line. Use the tracing
83 :     # key to find the parm file and create the CGI object from that.
84 :     my $ih = Open(undef, "<$FIG_Config::temp/$key.parms");
85 :     $cgi = CGI->new($ih);
86 :     # Run this request.
87 :     RunRequest($cgi, $serverThing);
88 : parrello 1.6 }
89 : parrello 1.3 }
90 : parrello 1.1 }
91 : parrello 1.6 }
92 :    
93 :    
94 : parrello 1.9 =head2 Server Utility Methods
95 :    
96 :     The methods in this section are utilities of general use to the various
97 :     server modules.
98 :    
99 : parrello 1.21 =head3 AddSubsystemFilter
100 :    
101 :     ServerThing::AddSubsystemFilter(\$filter, $args);
102 :    
103 :     Add subsystem filtering information to the specified query filter clause
104 :     based on data in the argument hash. The argument hash will be checked for
105 : parrello 1.22 the C<-usable> parameter, which includes or excludes unusuable subsystems, and
106 :     the C<-exclude> parameter, which lists types of subsystems that should be
107 : parrello 1.21 excluded.
108 :    
109 :     =over 4
110 :    
111 :     =item filter
112 :    
113 :     Reference to the current filter string. If additional filtering is required,
114 :     this string will be updated.
115 :    
116 :     =item args
117 :    
118 :     Reference to the parameter hash for the current server call. This hash will
119 : parrello 1.22 be examined for the C<-usable> and C<-exclude> parameters.
120 : parrello 1.21
121 :     =back
122 :    
123 :     =cut
124 :    
125 :     use constant SS_TYPE_EXCLUDE_ITEMS => { 'cluster-based' => 1,
126 :     experimental => 1,
127 :     private => 1 };
128 :    
129 :     sub AddSubsystemFilter {
130 :     # Get the parameters.
131 :     my ($filter, $args) = @_;
132 :     # We'll put the new filter stuff in here.
133 :     my @newFilters;
134 :     # Unless unusable subsystems are desired, we must add a clause to the filter.
135 : parrello 1.22 # The default is that only usable subsystems are included.
136 :     my $usable = 1;
137 :     # This default can be overridden by the "-usable" parameter.
138 :     if (exists $args->{-usable}) {
139 :     $usable = $args->{-usable};
140 :     }
141 :     # If we're restricting to usable subsystems, add a filter to that effect.
142 :     if ($usable) {
143 : parrello 1.21 push @newFilters, "Subsystem(usable) = 1";
144 :     }
145 :     # Check for exclusion filters.
146 :     my $exclusions = ServerThing::GetIdList(-exclude => $args, 1);
147 :     for my $exclusion (@$exclusions) {
148 :     if (! SS_TYPE_EXCLUDE_ITEMS->{$exclusion}) {
149 :     Confess("Invalid exclusion type \"$exclusion\".");
150 :     } else {
151 :     # Here we have to exclude subsystems of the specified type.
152 :     push @newFilters, "Subsystem($exclusion) = 0";
153 :     }
154 :     }
155 :     # Do we need to update the incoming filter?
156 :     if (@newFilters) {
157 :     # Yes. If the incoming filter is nonempty, push it onto the list
158 :     # so it gets included in the result.
159 :     if ($$filter) {
160 :     push @newFilters, $$filter;
161 :     }
162 :     # Put all the filters together to form the new filter.
163 :     $$filter = join(" AND ", @newFilters);
164 : parrello 1.26 Trace("Subsystem filter is $$filter.") if T(ServerUtilities => 3);
165 : parrello 1.21 }
166 :     }
167 :    
168 :    
169 :    
170 : parrello 1.9 =head3 GetIdList
171 :    
172 : parrello 1.19 my $ids = ServerThing::GetIdList($name => $args, $optional);
173 : parrello 1.9
174 :     Get a named list of IDs from an argument structure. If the IDs are
175 :     missing, or are not a list, an error will occur.
176 :    
177 :     =over 4
178 :    
179 :     =item name
180 :    
181 :     Name of the argument structure member that should contain the ID list.
182 :    
183 :     =item args
184 :    
185 :     Argument structure from which the ID list is to be extracted.
186 :    
187 : parrello 1.19 =item optional (optional)
188 :    
189 :     If TRUE, then a missing value will not generate an error. Instead, an empty list
190 :     will be returned. The default is FALSE.
191 :    
192 : parrello 1.9 =item RETURN
193 :    
194 :     Returns a reference to a list of IDs taken from the argument structure.
195 :    
196 :     =back
197 :    
198 :     =cut
199 :    
200 :     sub GetIdList {
201 :     # Get the parameters.
202 : parrello 1.19 my ($name, $args, $optional) = @_;
203 : parrello 1.35 # Declare the return variable.
204 :     my $retVal;
205 : parrello 1.32 # Check the argument format.
206 : parrello 1.35 if (! defined $args && $optional) {
207 :     # Here there are no parameters, but the arguments are optional so it's
208 :     # okay.
209 :     $retVal = [];
210 :     } elsif (ref $args ne 'HASH') {
211 :     # Here we have an invalid parameter structure.
212 : parrello 1.32 Confess("No '$name' parameter present.");
213 : parrello 1.35 } else {
214 :     # Here we have a hash with potential parameters in it. Try to get the
215 :     # IDs from the argument structure.
216 :     $retVal = $args->{$name};
217 :     # Was a member found?
218 :     if (! defined $retVal) {
219 :     # No. If we're optional, return an empty list; otherwise throw an error.
220 :     if ($optional) {
221 :     $retVal = [];
222 :     } else {
223 :     Confess("No '$name' parameter found.");
224 :     }
225 : parrello 1.19 } else {
226 : parrello 1.35 # Here we found something. Get the parameter type. We want a list reference.
227 :     # If it's a scalar, we'll convert it to a singleton list. If it's anything
228 :     # else, it's an error.
229 :     my $type = ref $retVal;
230 :     if (! $type) {
231 :     $retVal = [$retVal];
232 :     } elsif ($type ne 'ARRAY') {
233 :     Confess("The '$name' parameter must be a list.");
234 :     }
235 : parrello 1.19 }
236 : parrello 1.9 }
237 :     # Return the result.
238 :     return $retVal;
239 :     }
240 :    
241 :    
242 :     =head3 RunTool
243 :    
244 :     ServerThing::RunTool($name => $cmd);
245 :    
246 :     Run a command-line tool. A non-zero return value from the tool will cause
247 :     a fatal error, and the tool's error log will be traced.
248 :    
249 :     =over 4
250 :    
251 :     =item name
252 :    
253 :     Name to give to the tool in the error output.
254 :    
255 :     =item cmd
256 :    
257 :     Command to use for running the tool. This should be the complete command line.
258 :     The command should not contain any fancy piping, though it may redirect the
259 :     standard input and output. The command will be modified by this method to
260 :     redirect the error output to a temporary file.
261 :    
262 :     =back
263 :    
264 :     =cut
265 :    
266 :     sub RunTool {
267 :     # Get the parameters.
268 :     my ($name, $cmd) = @_;
269 :     # Compute the log file name.
270 :     my $errorLog = "$FIG_Config::temp/errors$$.log";
271 :     # Execute the command.
272 : parrello 1.26 Trace("Executing command: $cmd") if T(ServerUtilities => 3);
273 : parrello 1.9 my $res = system("$cmd 2> $errorLog");
274 : parrello 1.26 Trace("Return from $name tool is $res.") if T(ServerUtilities => 3);
275 : parrello 1.9 # Check the result code.
276 :     if ($res != 0) {
277 :     # We have an error. If tracing is on, trace it.
278 : parrello 1.26 if (T(ServerUtilities => 1)) {
279 : parrello 1.9 TraceErrorLog($name, $errorLog);
280 :     }
281 :     # Delete the error log.
282 :     unlink $errorLog;
283 :     # Confess the error.
284 : parrello 1.10 Confess("$name command failed with error code $res.");
285 : parrello 1.9 } else {
286 :     # Everything worked. Trace the error log if necessary.
287 : parrello 1.26 if (T(ServerUtilities => 3) && -s $errorLog) {
288 : parrello 1.9 TraceErrorLog($name, $errorLog);
289 :     }
290 :     # Delete the error log if there is one.
291 :     unlink $errorLog;
292 :     }
293 :     }
294 :    
295 : parrello 1.36
296 : parrello 1.37 =head2 Gene Correspondence File Methods
297 : parrello 1.36
298 : parrello 1.37 These methods relate to gene correspondence files, which are generated by the
299 :     L<svr_corresponding_genes.pl> script. Correspondence files are cached in the
300 :     organism cache (I<$FIG_Config::orgCache>) directory. Eventually they will be
301 :     copied into the organism directories themselves. At that point, the code below
302 :     will be modified to check the organism directories first and use the cache
303 :     directory if no file is found there.
304 :    
305 :     A gene correspondence file contains correspondences from a source genome to a
306 :     target genome. Most such correspondences are bidirectional best hits. A unidirectional
307 :     best hit may exist from the source genome to the target genome or in the reverse
308 :     direction from the targtet genome to the source genome. The cache directory itself
309 :     is divided into subdirectories by organism. The subdirectory has the source genome
310 :     name and the files themselves are named by the target genome.
311 :    
312 :     Some of the files are invalid and will be erased when they are found. A file is
313 :     considered invalid if it has a non-numeric value in a numeric column or if it
314 :     does not have any unidirectional hits from the target genome to the source
315 :     genome.
316 :    
317 :     The process of managing the correspondence files is tricky and dangerous because
318 :     of the possibility of race conditions. It can take several minutes to generate a
319 :     file, and if two processes try to generate the same file at the same time we need
320 :     to make sure they don't step on each other.
321 :    
322 :     In stored files, the source genome ID is always lexically lower than the target
323 :     genome ID. If a correspondence in the reverse direction is desired, the converse
324 :     file is found and the contents flipped automatically as they are read. So, the
325 :     correspondence from B<360108.3> to B<100226.1> would be found in a file with the
326 :     name B<360108.3> in the directory for B<100226.1>. Since this file actually has
327 :     B<100226.1> as the source and B<360108.3> as the target, the columns are
328 :     re-ordered and the arrows reversed before the file contents are passed to the
329 :     caller.
330 :    
331 :     =head4 Gene Correspondence List
332 :    
333 :     A gene correspondence file contains 18 columns. These are usually packaged as
334 :     a reference to list of lists. Each sub-list has the following format.
335 :    
336 :     =over 4
337 :    
338 :     =item 0
339 :    
340 :     The ID of a PEG in genome 1.
341 :    
342 :     =item 1
343 :    
344 :     The ID of a PEG in genome 2 that is our best estimate of a "corresponding gene".
345 :    
346 :     =item 2
347 :    
348 :     Count of the number of pairs of matching genes were found in the context.
349 :    
350 :     =item 3
351 :    
352 :     Pairs of corresponding genes from the contexts.
353 :    
354 :     =item 4
355 :    
356 :     The function of the gene in genome 1.
357 :    
358 :     =item 5
359 :    
360 :     The function of the gene in genome 2.
361 :    
362 :     =item 6
363 :    
364 :     Comma-separated list of aliases for the gene in genome 1 (any protein with an
365 :     identical sequence is considered an alias, whether or not it is actually the
366 :     name of the same gene in the same genome).
367 :    
368 :     =item 7
369 :    
370 :     Comma-separated list of aliases for the gene in genome 2 (any protein with an
371 :     identical sequence is considered an alias, whether or not it is actually the
372 :     name of the same gene in the same genome).
373 :    
374 :     =item 8
375 :    
376 :     Bi-directional best hits will contain "<=>" in this column; otherwise, "->" will appear.
377 :    
378 :     =item 9
379 :    
380 :     Percent identity over the region of the detected match.
381 :    
382 :     =item 10
383 :    
384 :     The P-score for the detected match.
385 :    
386 :     =item 11
387 :    
388 :     Beginning match coordinate in the protein encoded by the gene in genome 1.
389 :    
390 :     =item 12
391 :    
392 :     Ending match coordinate in the protein encoded by the gene in genome 1.
393 :    
394 :     =item 13
395 :    
396 :     Length of the protein encoded by the gene in genome 1.
397 :    
398 :     =item 14
399 :    
400 :     Beginning match coordinate in the protein encoded by the gene in genome 2.
401 :    
402 :     =item 15
403 :    
404 :     Ending match coordinate in the protein encoded by the gene in genome 2.
405 :    
406 :     =item 16
407 :    
408 :     Length of the protein encoded by the gene in genome 2.
409 :    
410 :     =item 17
411 :    
412 :     Bit score for the match. Divide by the length of the longer PEG to get
413 :     what we often refer to as a "normalized bit score".
414 :    
415 :     =back
416 :    
417 :     In the actual files, there will also be reverse correspondences indicated by a
418 :     back-arrow ("<-") in item (8). The output returned by the servers, however,
419 :     is filtered so that only forward correspondences occur. If a converse file
420 :     is used, the columns are re-ordered and the arrows reversed so that it looks
421 :     correct.
422 :    
423 :     =cut
424 :    
425 :     # hash for reversing the arrows
426 :     use constant ARROW_FLIP => { '->' => '<-', '<=>' => '<=>', '<-' => '->' };
427 :     # list of columns that contain numeric values that need to be validated
428 :     use constant NUM_COLS => [2,9,10,11,12,13,14,15,16,17];
429 :    
430 :     =head3 CheckForGeneCorrespondenceFile
431 :    
432 :     my ($fileName, $converse) = ServerThing::CheckForGeneCorrespondenceFile($genome1, $genome2);
433 :    
434 :     Try to find a gene correspondence file for the specified genome pairing. If the
435 :     file exists, its name and an indication of whether or not it is in the correct
436 :     direction will be returned.
437 : parrello 1.36
438 :     =over 4
439 :    
440 :     =item genome1
441 :    
442 : parrello 1.37 Source genome for the desired correspondence.
443 : parrello 1.36
444 :     =item genome2
445 :    
446 : parrello 1.37 Target genome for the desired correspondence.
447 : parrello 1.36
448 :     =item RETURN
449 :    
450 : parrello 1.37 Returns a two-element list. The first element is the name of the file containing the
451 :     correspondence, or C<undef> if the file does not exist. The second element is TRUE
452 :     if the correspondence would be forward or FALSE if the file needs to be flipped.
453 : parrello 1.36
454 :     =back
455 :    
456 :     =cut
457 :    
458 : parrello 1.37 sub CheckForGeneCorrespondenceFile {
459 : parrello 1.36 # Get the parameters.
460 :     my ($genome1, $genome2) = @_;
461 : parrello 1.37 # Declare the return variables.
462 :     my ($fileName, $converse);
463 :     # Determine the ordering of the genome IDs.
464 :     my ($corrFileName, $genomeA, $genomeB) = ComputeCorrespondenceFileName($genome1, $genome2);
465 :     $converse = ($genomeA ne $genome1);
466 :     # Look for a file containing the desired correspondence. (The code to check for a
467 :     # pre-computed file in the organism directories is currently turned off, because
468 :     # these files are all currently invalid.)
469 :     my $testFileName = "$FIG_Config::organisms/$genomeA/CorrToReferenceGenomes/$genomeB";
470 :     if (0 && -f $testFileName) {
471 : parrello 1.36 # Use the pre-computed file.
472 : parrello 1.44 Trace("Using pre-computed file $fileName for genome correspondence.") if T(Corr => 3);
473 : parrello 1.37 $fileName = $testFileName;
474 :     } elsif (-f $corrFileName) {
475 :     $fileName = $corrFileName;
476 : parrello 1.44 Trace("Using cached file $fileName for genome correspondence.") if T(Corr => 3);
477 : parrello 1.37 }
478 :     # Return the result.
479 :     return ($fileName, $converse);
480 :     }
481 :    
482 :    
483 :     =head3 ComputeCorrespondenceFileName
484 :    
485 :     my ($fileName, $genomeA, $genomeB) = ServerThing::ComputeCorrespondenceFileName($genome1, $genome2);
486 :    
487 :     Compute the name to be given to a genome correspondence file in the organism cache
488 :     and return the source and target genomes that would be in it.
489 :    
490 :     =over 4
491 :    
492 :     =item genome1
493 :    
494 :     Source genome for the desired correspondence.
495 :    
496 :     =item genome2
497 :    
498 :     Target genome for the desired correspondence.
499 :    
500 :     =item RETURN
501 :    
502 :     Returns a three-element list. The first element is the name of the file to contain the
503 :     correspondence, the second element is the name of the genome that would act as the
504 :     source genome in the file, and the third element is the name of the genome that would
505 :     act as the target genome in the file.
506 :    
507 :     =back
508 :    
509 :     =cut
510 :    
511 :     sub ComputeCorrespondenceFileName {
512 :     # Get the parameters.
513 :     my ($genome1, $genome2) = @_;
514 :     # Declare the return variables.
515 :     my ($fileName, $genomeA, $genomeB);
516 :     # Determine the ordering of the genome IDs.
517 : parrello 1.41 if (MustFlipGenomeIDs($genome1, $genome2)) {
518 : parrello 1.43 ($genomeA, $genomeB) = ($genome2, $genome1);
519 :     } else {
520 : parrello 1.37 ($genomeA, $genomeB) = ($genome1, $genome2);
521 :     }
522 :     # Insure the source organism has a subdirectory in the organism cache.
523 : parrello 1.47 my $orgDir = ComputeCorrespondenceDirectory($genomeA);
524 : parrello 1.37 # Compute the name of the correspondence file for the appropriate target genome.
525 :     $fileName = "$orgDir/$genomeB";
526 :     # Return the results.
527 :     return ($fileName, $genomeA, $genomeB);
528 :     }
529 :    
530 :    
531 : parrello 1.47 =head3 ComputeCorresopndenceDirectory
532 :    
533 :     my $dirName = ServerThing::ComputeCorrespondenceDirectory($genome);
534 :    
535 :     Return the name of the directory that would contain the correspondence files
536 :     for the specified genome.
537 :    
538 :     =over 4
539 :    
540 :     =item genome
541 :    
542 :     ID of the genome whose correspondence file directory is desired.
543 :    
544 :     =item RETURN
545 :    
546 :     Returns the name of the directory of interest.
547 :    
548 :     =back
549 :    
550 :     =cut
551 :    
552 :     sub ComputeCorrespondenceDirectory {
553 :     # Get the parameters.
554 :     my ($genome) = @_;
555 :     # Insure the source organism has a subdirectory in the organism cache.
556 :     my $retVal = "$FIG_Config::orgCache/$genome";
557 :     Tracer::Insure($retVal, 0777);
558 :     # Return it.
559 :     return $retVal;
560 :     }
561 :    
562 :    
563 : parrello 1.37 =head3 CreateGeneCorrespondenceFile
564 :    
565 :     my ($fileName, $converse) = ServerThing::CheckForGeneCorrespondenceFile($genome1, $genome2);
566 :    
567 :     Create a new gene correspondence file in the organism cache for the specified
568 :     genome correspondence. The name of the new file will be returned along with
569 :     an indicator of whether or not it is in the correct direction.
570 :    
571 :     =over 4
572 :    
573 :     =item genome1
574 :    
575 :     Source genome for the desired correspondence.
576 :    
577 :     =item genome2
578 :    
579 :     Target genome for the desired correspondence.
580 :    
581 :     =item RETURN
582 :    
583 :     Returns a two-element list. The first element is the name of the file containing the
584 :     correspondence, or C<undef> if an error occurred. The second element is TRUE
585 :     if the correspondence would be forward or FALSE if the file needs to be flipped.
586 :    
587 :     =back
588 :    
589 :     =cut
590 :    
591 :     sub CreateGeneCorrespondenceFile {
592 :     # Get the parameters.
593 :     my ($genome1, $genome2) = @_;
594 :     # Declare the return variables.
595 :     my ($fileName, $converse);
596 :     # Compute the ultimate name for the correspondence file.
597 :     my ($corrFileName, $genomeA, $genomeB) = ComputeCorrespondenceFileName($genome1, $genome2);
598 :     $converse = ($genome1 ne $genomeA);
599 :     # Generate a temporary file name in the same directory. We'll build the temporary
600 :     # file and then rename it when we're done.
601 :     my $tempFileName = "$corrFileName.$$.tmp";
602 :     # This will be set to FALSE if we detect an error.
603 :     my $fileOK = 1;
604 :     # The file handles will be put in here.
605 :     my ($ih, $oh);
606 :     # Protect from errors.
607 :     eval {
608 :     # Open the temporary file for output.
609 :     $oh = Open(undef, ">$tempFileName");
610 :     # Open a pipe to get the correspondence data.
611 :     $ih = Open(undef, "$FIG_Config::bin/svr_corresponding_genes -u localhost $genomeA $genomeB |");
612 :     Trace("Creating correspondence file for $genomeA to $genomeB in temporary file $tempFileName.") if T(3);
613 :     # Copy the pipe date into the temporary file.
614 :     while (! eof $ih) {
615 :     my $line = <$ih>;
616 :     print $oh $line;
617 :     }
618 :     # Close both files. If the close fails we need to know: it means there was a pipe
619 :     # error.
620 :     $fileOK &&= close $ih;
621 :     $fileOK &&= close $oh;
622 :     };
623 :     if ($@) {
624 :     # Here a fatal error of some sort occurred. We need to force the files closed.
625 :     close $ih if $ih;
626 :     close $oh if $oh;
627 :     } elsif ($fileOK) {
628 :     # Here everything worked. Try to rename the temporary file to the real
629 :     # file name.
630 :     if (rename $tempFileName, $corrFileName) {
631 :     # Everything is ok, fix the permissions and return the file name.
632 :     chmod 0664, $corrFileName;
633 :     $fileName = $corrFileName;
634 : parrello 1.44 Trace("Created correspondence file $fileName.") if T(Corr => 3);
635 : parrello 1.37 }
636 :     }
637 :     # If the temporary file exists, delete it.
638 :     if (-f $tempFileName) {
639 :     unlink $tempFileName;
640 :     }
641 :     # Return the results.
642 :     return ($fileName, $converse);
643 :     }
644 :    
645 :    
646 : parrello 1.41 =head3 MustFlipGenomeIDs
647 :    
648 :     my $converse = ServerThing::MustFlipGenomeIDs($genome1, $genome2);
649 :    
650 :     Return TRUE if the specified genome IDs are out of order. When genome IDs are out of
651 :     order, they are stored in the converse order in correspondence files on the server.
652 :     This is a simple method that allows the caller to check for the need to flip.
653 :    
654 :     =over 4
655 :    
656 :     =item genome1
657 :    
658 :     ID of the proposed source genome.
659 :    
660 :     =item genome2
661 :    
662 :     ID of the proposed target genome.
663 :    
664 :     =item RETURN
665 :    
666 :     Returns TRUE if the first genome would be stored on the server as a target, FALSE if
667 :     it would be stored as a source.
668 :    
669 :     =cut
670 :    
671 :     sub MustFlipGenomeIDs {
672 :     # Get the parameters.
673 :     my ($genome1, $genome2) = @_;
674 :     # Return an indication.
675 :     return ($genome1 gt $genome2);
676 :     }
677 :    
678 :    
679 : parrello 1.37 =head3 ReadGeneCorrespondenceFile
680 :    
681 : parrello 1.40 my $list = ServerThing::ReadGeneCorrespondenceFile($fileName, $converse, $all);
682 : parrello 1.37
683 :     Return the contents of the specified gene correspondence file in the form of
684 :     a list of lists, with backward correspondences filtered out. If the file is
685 :     for the converse of the desired correspondence, the columns will be reordered
686 :     automatically so that it looks as if the file were designed for the proper
687 :     direction.
688 :    
689 :     =over 4
690 :    
691 :     =item fileName
692 :    
693 :     The name of the gene correspondence file to read.
694 :    
695 :     =item converse (optional)
696 :    
697 :     TRUE if the file is for the converse of the desired correspondence, else FALSE.
698 :     If TRUE, the file columns will be reorderd automatically. The default is FALSE,
699 :     meaning we want to use the file as it appears on disk.
700 :    
701 : parrello 1.40 =item all (optional)
702 :    
703 :     TRUE if backward unidirectional correspondences should be included in the output.
704 :     The default is FALSE, in which case only forward and bidirectional correspondences
705 :     are included.
706 :    
707 : parrello 1.37 =item RETURN
708 :    
709 :     Returns a L</Gene Correspondence List> in the form of a reference to a list of lists.
710 :     If the file's contents are invalid or an error occurs, an undefined value will be
711 :     returned.
712 :    
713 :     =back
714 :    
715 :     =cut
716 :    
717 :     sub ReadGeneCorrespondenceFile {
718 :     # Get the parameters.
719 : parrello 1.40 my ($fileName, $converse, $all) = @_;
720 : parrello 1.37 # Declare the return variable. We will only put something in here if we are
721 :     # completely successful.
722 :     my $retVal;
723 :     # This value will be set to 1 if an error is detected.
724 :     my $error = 0;
725 :     # Try to open the file.
726 :     my $ih;
727 :     Trace("Reading correspondence file $fileName.") if T(3);
728 :     if (! open $ih, "<$fileName") {
729 :     # Here the open failed, so we have an error.
730 : parrello 1.44 Trace("Failed to open gene correspondence file $fileName: $!") if T(Corr => 1);
731 : parrello 1.37 $error = 1;
732 :     }
733 :     # The gene correspondence list will be built in here.
734 :     my @corrList;
735 :     # This variable will be set to TRUE if we find a reverse correspondence somewhere
736 :     # in the file. Not finding one is an error.
737 :     my $reverseFound = 0;
738 :     # Loop until we hit the end of the file or an error occurs. We must check the error
739 :     # first in case the file handle failed to open.
740 :     while (! $error && ! eof $ih) {
741 :     # Get the current line.
742 :     my @row = Tracer::GetLine($ih);
743 :     # Get the correspondence direction and check for a reverse arrow.
744 :     $reverseFound = 1 if ($row[8] eq '<-');
745 :     # If we're in converse mode, reformat the line.
746 :     if ($converse) {
747 : parrello 1.39 ReverseGeneCorrespondenceRow(\@row);
748 : parrello 1.37 }
749 :     # Validate the row.
750 :     if (ValidateGeneCorrespondenceRow(\@row)) {
751 : parrello 1.44 Trace("Invalid row $. found in correspondence file $fileName.") if T(Corr => 1);
752 : parrello 1.37 $error = 1;
753 :     }
754 :     # If this row is in the correct direction, keep it.
755 : parrello 1.40 if ($all || $row[8] ne '<-') {
756 : parrello 1.37 push @corrList, \@row;
757 :     }
758 :     }
759 :     # Close the input file.
760 :     close $ih;
761 :     # If we have no errors and we found a reverse arrow, keep the result.
762 :     if (! $error) {
763 :     if ($reverseFound) {
764 :     $retVal = \@corrList;
765 : parrello 1.36 } else {
766 : parrello 1.44 Trace("No reverse arrow found in correspondence file $fileName.") if T(Corr => 1);
767 : parrello 1.37 }
768 :     }
769 :     # Return the result (if any).
770 :     return $retVal;
771 :     }
772 :    
773 : parrello 1.39 =head3 ReverseGeneCorrespondenceRow
774 :    
775 :     ServerThing::ReverseGeneCorrespondenceRow($row)
776 :    
777 :     Convert a gene correspondence row to represent the converse correspondence. The
778 :     elements in the row will be reordered to represent a correspondence from the
779 :     target genome to the source genome.
780 :    
781 :     =over 4
782 :    
783 :     =item row
784 :    
785 :     Reference to a list containing a single row from a L</Gene Correspondence List>.
786 :    
787 :     =back
788 :    
789 :     =cut
790 :    
791 :     sub ReverseGeneCorrespondenceRow {
792 :     # Get the parameters.
793 :     my ($row) = @_;
794 :     # Flip the row in place.
795 :     ($row->[1], $row->[0], $row->[2], $row->[3], $row->[5], $row->[4], $row->[7],
796 : parrello 1.41 $row->[6], $row->[8], $row->[9], $row->[10], $row->[14],
797 : parrello 1.39 $row->[15], $row->[16], $row->[11], $row->[12], $row->[13], $row->[17]) = @$row;
798 : parrello 1.41 # Flip the arrow.
799 :     $row->[8] = ARROW_FLIP->{$row->[8]};
800 :     # Flip the pairs.
801 : parrello 1.42 my @elements = split /,/, $row->[3];
802 : parrello 1.45 $row->[3] = join(",", map { join(":", reverse split /:/, $_) } @elements);
803 : parrello 1.39 }
804 : parrello 1.37
805 :     =head3 ValidateGeneCorrespondenceRow
806 :    
807 :     my $errorCount = ServerThing::ValidateGeneCorrespondenceRow($row);
808 :    
809 :     Validate a gene correspondence row. The numeric fields are checked to insure they
810 :     are numeric and the source and target gene IDs are validated. The return value will
811 :     indicate the number of errors found.
812 :    
813 :     =over 4
814 :    
815 :     =item row
816 :    
817 :     Reference to a list containing a single row from a L</Gene Correspondence List>.
818 :    
819 :     =item RETURN
820 :    
821 :     Returns the number of errors found in the row. A return of C<0> indicates the row
822 :     is valid.
823 :    
824 :     =back
825 :    
826 :     =cut
827 :    
828 :     sub ValidateGeneCorrespondenceRow {
829 :     # Get the parameters.
830 :     my ($row, $genome1, $genome2) = @_;
831 :     # Denote no errors have been found so far.
832 :     my $retVal = 0;
833 :     # Check for non-numeric values in the number columns.
834 :     for my $col (@{NUM_COLS()}) {
835 :     unless ($row->[$col] =~ /^-?\d+\.?\d*(?:e[+-]?\d+)?$/) {
836 : parrello 1.44 Trace("Gene correspondence error. \"$row->[$col]\" not numeric.") if T(Corr => 2);
837 : parrello 1.37 $retVal++;
838 :     }
839 :     }
840 :     # Check the gene IDs.
841 :     for my $col (0, 1) {
842 :     unless ($row->[$col] =~ /^fig\|\d+\.\d+\.\w+\.\d+$/) {
843 : parrello 1.44 Trace("Gene correspondence error. \"$row->[$col]\" not a gene ID.") if T(Corr => 2);
844 : parrello 1.37 $retVal++;
845 : parrello 1.36 }
846 :     }
847 : parrello 1.37 # Verify the arrow.
848 :     unless (exists ARROW_FLIP->{$row->[8]}) {
849 : parrello 1.44 Trace("Gene correspondence error. \"$row->[8]\" not an arrow.") if T(Corr => 2);
850 : parrello 1.37 $retVal++;
851 :     }
852 :     # Return the error count.
853 : parrello 1.36 return $retVal;
854 :     }
855 :    
856 : parrello 1.9
857 :     =head2 Internal Utility Methods
858 :    
859 :     The methods in this section are used internally by this package.
860 :    
861 : parrello 1.6 =head3 RunRequest
862 :    
863 : parrello 1.9 ServerThing::RunRequest($cgi, $serverName);
864 : parrello 1.6
865 :     Run a request from the specified server using the incoming CGI parameter
866 :     object for the parameters.
867 :    
868 :     =over 4
869 :    
870 :     =item cgi
871 :    
872 : parrello 1.49 CGI query object containing the parameters from the web service request. The
873 :     significant parameters are as follows.
874 :    
875 :     =over 8
876 :    
877 :     =item function
878 :    
879 :     Name of the function to run.
880 :    
881 :     =item args
882 :    
883 :     Parameters for the function.
884 :    
885 :     =item encoding
886 :    
887 :     Encoding scheme for the function parameters, either C<yaml> (the default) or C<json> (used
888 :     by the Java interface).
889 :    
890 :     =back
891 :    
892 :     Certain unusual requests can come in outside of the standard function interface.
893 :     These are indicated by special parameters that override all the others.
894 :    
895 :     =over 8
896 :    
897 :     =item pod
898 :    
899 :     Display a POD documentation module.
900 :    
901 :     =item code
902 :    
903 :     Display an example code file.
904 :    
905 :     =item file
906 :    
907 :     Transfer a file (not implemented).
908 :    
909 :     =back
910 : parrello 1.6
911 : parrello 1.13 =item serverThing
912 : parrello 1.6
913 : parrello 1.13 Server object against which to run the request.
914 : parrello 1.6
915 :     =back
916 :    
917 :     =cut
918 :    
919 :     sub RunRequest {
920 :     # Get the parameters.
921 : parrello 1.13 my ($cgi, $serverThing, $docURL) = @_;
922 : parrello 1.9 # Determine the request type.
923 :     if ($cgi->param('pod')) {
924 : parrello 1.4 # Here we have a documentation request. In this case, we produce POD HTML.
925 : parrello 1.9 ProducePod($cgi->param('pod'));
926 : disz 1.31 } elsif ($cgi->param('code')) {
927 : parrello 1.32 # Here the user wants to see the code for one of our scripts.
928 :     LineNumberize($cgi->param('code'));
929 : parrello 1.9 } elsif ($cgi->param('file')) {
930 :     # Here we have a file request. Process according to the type.
931 :     my $type = $cgi->param('file');
932 :     if ($type eq 'open') {
933 :     OpenFile($cgi->param('name'));
934 :     } elsif ($type eq 'create') {
935 :     CreateFile();
936 :     } elsif ($type eq 'read') {
937 :     ReadChunk($cgi->param('name'), $cgi->param('location'), $cgi->param('size'));
938 :     } elsif ($type eq 'write') {
939 :     WriteChunk($cgi->param('name'), $cgi->param('data'));
940 :     } else {
941 :     Die("Invalid file function \"$type\".");
942 : parrello 1.4 }
943 : parrello 1.1 } else {
944 : parrello 1.9 # The default is a function request. Get the function name.
945 : parrello 1.4 my $function = $cgi->param('function') || "";
946 : parrello 1.15 Trace("Server function for task $$ is $function.") if T(3);
947 : parrello 1.4 # Insure the function name is valid.
948 :     Die("Invalid function name.")
949 :     if $function =~ /\W/;
950 : parrello 1.51 # Determing the encoding scheme. The default is YAML.
951 :     my $encoding = $cgi->param('encoding') || 'yaml';
952 : parrello 1.4 # The parameter structure will go in here.
953 :     my $args;
954 :     # Start the timer.
955 :     my $start = time();
956 :     # The output document goes in here.
957 :     my $document;
958 :     # The sapling database goes in here.
959 :     my $sapling;
960 :     # Protect from errors.
961 : parrello 1.1 eval {
962 : parrello 1.49 # Here we parse the arguments. This is affected by the encoding parameter.
963 :     # Get the argument string.
964 :     my $argString = $cgi->param('args');
965 :     if ($encoding eq 'yaml') {
966 :     # Parse the arguments using YAML.
967 :     $args = YAML::Load($argString);
968 :     } elsif ($encoding eq 'json') {
969 :     # Parse the arguments using JSON.
970 : parrello 1.50 Trace("Incoming string is:\n$argString") if T(3);
971 : parrello 1.49 $args = JSON::Any->jsonToObj($argString);
972 :     } else {
973 :     Die("Invalid encoding type $encoding.");
974 :     }
975 : parrello 1.1 };
976 : parrello 1.4 # Check to make sure we got everything.
977 : parrello 1.1 if ($@) {
978 : parrello 1.10 SendError($@, "Error formatting parameters.");
979 : parrello 1.4 } elsif (! $function) {
980 : parrello 1.10 SendError("No function specified.", "No function specified.");
981 : parrello 1.1 } else {
982 : parrello 1.30 $document = eval { $serverThing->$function($args) };
983 : parrello 1.1 # If we have an error, create an error document.
984 :     if ($@) {
985 : parrello 1.13 SendError($@, "Error detected by service.");
986 : parrello 1.32 Trace("Error encountered by service: $@") if T(0);
987 : parrello 1.1 } else {
988 : parrello 1.51 # No error, so we output the result. Start with an HTML header.
989 : parrello 1.13 print $cgi->header(-type => 'text/plain');
990 : parrello 1.51 # The nature of the output depends on the encoding type.
991 :     my $string;
992 :     if ($encoding eq 'yaml') {
993 :     $string = YAML::Dump($document);
994 :     } else {
995 :     $string = JSON::Any->objToJson($document);
996 :     }
997 : parrello 1.18 print $string;
998 :     MemTrace(length($string) . " bytes returned from $function by task $$.") if T(Memory => 3);
999 : parrello 1.1 }
1000 :     }
1001 : parrello 1.4 # Stop the timer.
1002 :     my $duration = int(time() - $start + 0.5);
1003 : parrello 1.29 Trace("Function $function executed in $duration seconds by task $$.") if T(2);
1004 : parrello 1.1 }
1005 :     }
1006 :    
1007 : parrello 1.9 =head3 CreateFile
1008 :    
1009 :     ServerThing::CreateFile();
1010 :    
1011 :     Create a new, empty temporary file and send its name back to the client.
1012 :    
1013 :     =cut
1014 :    
1015 :     sub CreateFile {
1016 :     ##TODO: Code
1017 :     }
1018 :    
1019 :     =head3 OpenFile
1020 : parrello 1.6
1021 : parrello 1.9 ServerThing::OpenFile($name);
1022 : parrello 1.1
1023 : parrello 1.9 Send the length of the named file back to the client.
1024 :    
1025 :     =over 4
1026 :    
1027 :     =item name
1028 :    
1029 :     ##TODO: name description
1030 :    
1031 :     =back
1032 :    
1033 :     =cut
1034 :    
1035 :     sub OpenFile {
1036 :     # Get the parameters.
1037 :     my ($name) = @_;
1038 :     ##TODO: Code
1039 :     }
1040 : parrello 1.1
1041 : parrello 1.9 =head3 ReadChunk
1042 : parrello 1.1
1043 : parrello 1.9 ServerThing::ReadChunk($name, $location, $size);
1044 : parrello 1.1
1045 : parrello 1.9 Read the indicated number of bytes from the specified location of the
1046 :     named file and send them back to the client.
1047 : parrello 1.1
1048 :     =over 4
1049 :    
1050 :     =item name
1051 :    
1052 : parrello 1.9 ##TODO: name description
1053 : parrello 1.1
1054 : parrello 1.9 =item location
1055 : parrello 1.1
1056 : parrello 1.9 ##TODO: location description
1057 : parrello 1.1
1058 : parrello 1.9 =item size
1059 : parrello 1.1
1060 : parrello 1.9 ##TODO: size description
1061 : parrello 1.1
1062 :     =back
1063 :    
1064 :     =cut
1065 :    
1066 : parrello 1.9 sub ReadChunk {
1067 : parrello 1.1 # Get the parameters.
1068 : parrello 1.9 my ($name, $location, $size) = @_;
1069 :     ##TODO: Code
1070 : parrello 1.1 }
1071 :    
1072 : parrello 1.9 =head3 WriteChunk
1073 : parrello 1.1
1074 : parrello 1.9 ServerThing::WriteChunk($name, $data);
1075 : parrello 1.8
1076 : parrello 1.9 Write the specified data to the named file.
1077 : parrello 1.8
1078 :     =over 4
1079 :    
1080 :     =item name
1081 :    
1082 : parrello 1.9 ##TODO: name description
1083 :    
1084 :     =item data
1085 :    
1086 :     ##TODO: data description
1087 :    
1088 :     =back
1089 :    
1090 :     =cut
1091 :    
1092 :     sub WriteChunk {
1093 :     # Get the parameters.
1094 :     my ($name, $data) = @_;
1095 :     ##TODO: Code
1096 :     }
1097 :    
1098 :    
1099 : disz 1.31 =head3 LineNumberize
1100 :    
1101 :     ServerThing::LineNumberize($module);
1102 :    
1103 :     Output the module line by line with line numbers
1104 :    
1105 :     =over 4
1106 :    
1107 :     =item module
1108 :    
1109 :     Name of the module to line numberized
1110 :    
1111 :     =back
1112 :    
1113 :     =cut
1114 :    
1115 :     sub LineNumberize {
1116 :     # Get the parameters.
1117 :     my ($module) = @_;
1118 :     my $fks_path = "$FIG_Config::fig_disk/dist/releases/current/FigKernelScripts/$module";
1119 :     # Start the output page.
1120 :     print CGI::header();
1121 :     print CGI::start_html(-title => 'Documentation Page',
1122 :     -style => { src => "http://servers.nmpdr.org/sapling/Html/css/ERDB.css" });
1123 :     # Protect from errors.
1124 :     eval {
1125 : parrello 1.32 if (-e $fks_path) {
1126 :     print "<pre>\n";
1127 :     my $i = 1;
1128 :     foreach my $line (`cat $fks_path`) {
1129 :     print "$i.\t$line";
1130 :     $i++;
1131 :     }
1132 :     print "</pre>\n";
1133 :     } else {
1134 :     print "File $fks_path not found";
1135 :     }
1136 : disz 1.31 };
1137 :     # Process any error.
1138 :     if ($@) {
1139 :     print CGI::blockquote({ class => 'error' }, $@);
1140 :     }
1141 :     # Close off the page.
1142 :     print CGI::end_html();
1143 :    
1144 :     }
1145 :    
1146 : parrello 1.9 =head3 ProducePod
1147 :    
1148 :     ServerThing::ProducePod($module);
1149 :    
1150 :     Output the POD documentation for the specified module.
1151 :    
1152 :     =over 4
1153 : parrello 1.8
1154 : parrello 1.9 =item module
1155 : parrello 1.8
1156 : parrello 1.9 Name of the module whose POD document is to be displayed.
1157 : parrello 1.8
1158 :     =back
1159 :    
1160 :     =cut
1161 :    
1162 : parrello 1.9 sub ProducePod {
1163 : parrello 1.8 # Get the parameters.
1164 : parrello 1.9 my ($module) = @_;
1165 :     # Start the output page.
1166 :     print CGI::header();
1167 : parrello 1.46 print CGI::start_html(-title => "$module Documentation Page",
1168 : parrello 1.9 -style => { src => "http://servers.nmpdr.org/sapling/Html/css/ERDB.css" });
1169 :     # Protect from errors.
1170 :     eval {
1171 :     # We'll format the HTML text in here.
1172 :     require DocUtils;
1173 : parrello 1.20 my $html = DocUtils::ShowPod($module, "http://servers.nmpdr.org/sapling/server.cgi?pod=");
1174 : parrello 1.9 # Output the POD HTML.
1175 :     print $html;
1176 :     };
1177 :     # Process any error.
1178 :     if ($@) {
1179 :     print CGI::blockquote({ class => 'error' }, $@);
1180 : parrello 1.8 }
1181 : parrello 1.9 # Close off the page.
1182 :     print CGI::end_html();
1183 :    
1184 : parrello 1.8 }
1185 :    
1186 :     =head3 TraceErrorLog
1187 :    
1188 :     ServerThing::TraceErrorLog($name, $errorLog);
1189 :    
1190 :     Trace the specified error log file. This is a very dinky routine that
1191 :     performs a task required by L</RunTool> in multiple places.
1192 :    
1193 :     =over 4
1194 :    
1195 :     =item name
1196 :    
1197 :     Name of the tool relevant to the log file.
1198 :    
1199 :     =item errorLog
1200 :    
1201 :     Name of the log file.
1202 :    
1203 :     =back
1204 :    
1205 :     =cut
1206 :    
1207 :     sub TraceErrorLog {
1208 :     my ($name, $errorLog) = @_;
1209 :     my $errorData = Tracer::GetFile($errorLog);
1210 :     Trace("$name error log:\n$errorData");
1211 :     }
1212 :    
1213 : parrello 1.10 =head3 SendError
1214 :    
1215 :     ServerThing::SendError($message, $status);
1216 :    
1217 :     Fail an HTTP request with the specified error message and the specified
1218 :     status message.
1219 :    
1220 :     =over 4
1221 :    
1222 :     =item message
1223 :    
1224 :     Detailed error message. This is sent as the page content.
1225 :    
1226 :     =item status
1227 :    
1228 :     Status message. This is sent as part of the status code.
1229 :    
1230 :     =back
1231 :    
1232 :     =cut
1233 :    
1234 :     sub SendError {
1235 :     # Get the parameters.
1236 :     my ($message, $status) = @_;
1237 :     Trace("Error \"$status\" $message") if T(2);
1238 : parrello 1.30 # Check for a DBserver error. These can be retried and get a special status
1239 :     # code.
1240 :     my $realStatus;
1241 :     if ($message =~ /DBServer Error:\s+/) {
1242 :     $realStatus = "503 $status";
1243 :     } else {
1244 :     $realStatus = "500 $status";
1245 :     }
1246 : parrello 1.10 # Print the header and the status message.
1247 :     print CGI::header(-type => 'text/plain',
1248 : parrello 1.30 -status => $realStatus);
1249 : parrello 1.10 # Print the detailed message.
1250 :     print $message;
1251 :     }
1252 :    
1253 :    
1254 : disz 1.31 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3