Parent Directory
|
Revision Log
Changed POD format for better compatability with Wiki.
#!/usr/bin/perl -w # -*- perl -*- # # Copyright (c) 2003-2006 University of Chicago and Fellowship # for Interpretations of Genomes. All Rights Reserved. # # This file is part of the SEED Toolkit. # # The SEED Toolkit is free software. You can redistribute # it and/or modify it under the terms of the SEED Toolkit # Public License. # # You should have received a copy of the SEED Toolkit Public License # along with this program; if not write to the University of Chicago # at info@ci.uchicago.edu or the Fellowship for Interpretation of # Genomes at veronika@thefig.info or download a copy from # http://www.theseed.org/LICENSE.TXT. # =head1 SproutGFF This is a fancy wrapper around B<seed2gff> that can be used to generate the GFF3 files for the NMPDR. The single parameter is the output directory name. The files will be organized by NMPDR group. The currently-supported command-line options are as follows. =over 4 =item user Name suffix to be used for log files. If omitted, the PID is used. =item trace Numeric trace level. A higher trace level causes more messages to appear. The default trace level is 2. Tracing will be directly to the standard output as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory, where I<User> is the value of the B<user> option above. =item sql If specified, turns on tracing of SQL activity. =item background Save the standard and error output to files. The files will be created in the FIG temporary directory and will be named C<err>I<User>C<.log> and C<out>I<User>C<.log>, respectively, where I<User> is the value of the B<user> option above. =item h Display this command's parameters and options. =item scan If specified, the genomes will be collected and the directories created, but no GFF files will be output. This is mostly useful for testing. =item phone Phone number to message when the script is complete. =back =cut use strict; use Tracer; use DocUtils; use TestUtils; use Cwd; use File::Copy; use File::Path; use SFXlate; # Get the command-line options and parameters. my ($options, @parameters) = StandardSetup([qw(Sprout ERDB) ], { phone => ["", "phone number (international format) to call when load finishes"], genome => ["", "genome to process; the default is to process all NMPDR core genomes"], scan => ["", "if specified, the output directories will be created but no files will be written"], }, "<output directory>", @ARGV); # Set a variable to contain return type information. my $rtype; # Insure we catch errors. eval { # Create a Sprout object. my $sprout = SFXlate->new_sprout_only(); # Insure the output directory exists. my $outDir = $parameters[0]; if (! $outDir) { Confess("No output directory specified."); } else { Insure($outDir, 0777); # Create the genome map. This lists all the genomes we want along with the corresponding # output file name. my %genomes; # Check for a single-genome situation. if ($options->{genome}) { # Get the genome name. my $genomeID = $options->{genome}; my $genomeName = $sprout->GenusSpecies($genomeID); # Compute the file name. my $fileName = CleanGenomeName($genomeName); $genomes{$genomeID} = "$outDir/$fileName.gff"; } else { # Here we want all the core organisms, split into super-groups. First, we get the # genomes for each group in a hash. my %baseGroups = $sprout->GetGroups(); # Fix it into a hash by super-group. my %coreGroups = $sprout->Fix(%baseGroups); for my $coreGroup (keys %coreGroups) { # Compute the directory and isure it exists. my $superDirectory = "$outDir/$coreGroup"; Insure($superDirectory, 0777); # Put all of this group's genomes in the output hash. for my $coreGenome (@{$coreGroups{$coreGroup}}) { my $fileName = CleanGenomeName($sprout->GenusSpecies($coreGenome)); $genomes{$coreGenome} = "$superDirectory/$fileName.gff"; } } } # Now we loop through %genomes, creating GFF files. for my $genome (sort keys %genomes) { my $fileName = $genomes{$genome}; if ($options->{scan}) { Trace("$genome would be written to $fileName") if T(2); } else { Trace("Writing $genome to $fileName.") if T(3); # Do the conversion. my @output = `seed2gff -g $genome -o "$fileName" -s -t all -nmpdr`; # At trace level 3, we show the output. Trace("Output from seed2gff:\n" . join("\n", @output)) if T(3) && scalar(@output); } } } }; if ($@) { Trace("Script failed with error: $@") if T(0); $rtype = "error"; } else { Trace("Script complete.") if T(2); $rtype = "no error"; } if ($options->{phone}) { my $msgID = Tracer::SendSMS($options->{phone}, "SproutGFF terminated with $rtype."); if ($msgID) { Trace("Phone message sent with ID $msgID.") if T(2); } else { Trace("Phone message not sent.") if T(2); } } =head3 CleanGenomeName my $cleaned = CleanGenomeName($name); Clean up a genome name so it can be used as a file name. =over 4 =item name Name of the genome, for cleaning purposes. =item RETURN Returns the incoming name with all its evil characters converted to dots. =back =cut sub CleanGenomeName { # Get the parameters. my ($name) = @_; # Declare the return variable. my $retVal = $name; # Convert spaces to dots. $retVal =~ s/\s+/\./g; # Convert double dots to dots. $retVal =~ s/\.\./\./g; # Convert other bad guys to underscores. $retVal =~ tr/():/___/; # Return the result. return $retVal; } 1;
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |