[Bio] / Sprout / CreateLoaderFile.pl Repository:
ViewVC logotype

View of /Sprout/CreateLoaderFile.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Mon Mar 4 19:04:31 2013 UTC (6 years, 5 months ago) by parrello
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
Support for PSEED load and updates.

#!/usr/bin/perl -w

=head1 Create Sapling Loader File Script

This script creates a Sapling Loader input file from a list of
genomes in a tab-delimited file.

There are three positional parameters-- the name of the input file,
the name of the output file, and the name of the directory containing
the genome directories.

The following command-line options are supported.

=over 4

=item col

Index (1-based) of the column containing the genome ID. The default
is C<1>.

=item user

Name suffix to be used for log files. If omitted, the PID is used.

=item trace

Numeric trace level. A higher trace level causes more messages to appear. The
default trace level is 2. Tracing will be directly to the standard output
as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
where I<User> is the value of the B<user> option above.

=item background

Save the standard and error output to files. The files will be created
in the FIG temporary directory and will be named C<err>I<User>C<.log> and
C<out>I<User>C<.log>, respectively, where I<User> is the value of the
B<user> option above.

=item h

Display this command's parameters and options.

=back

=cut

use strict;
use Tracer;
use Stats;

# Get the command-line options and parameters.
my ($options, @parameters) = StandardSetup([qw() ],
                                           { col => ["1", "input column index"] },
                                           "<inputFile> <outputfile> <genomeDirectory>",
                                           @ARGV);
# Create the statistics object.
my $stats = Stats->new();
# Validate the positional parameters.
my ($inFile, $outFile, $genomeDir) = @parameters;
if (! $inFile) {
    Confess("No input file specified.");
} elsif (! -f $inFile) {
    Confess("Input file $inFile not found.");
} elsif (! $outFile) {
    Confess("No output file specified.");
} elsif (! $genomeDir) {
    Confess("No genome directory specified.");
} elsif (! -d $genomeDir) {
    Confess("Genome directory $genomeDir not found.");
} else {
    # Fix the column index.
    my $col = $options->{col} - 1;
    # Open the files.
    Trace("Opening files.") if T(2);
    my $ih = Open(undef, "<$inFile");
    my $oh = Open(undef, ">$outFile");
    Trace("Reading input.") if T(2);
    # Loop through the input file.
    while (! eof $ih) {
        my @cols = Tracer::GetLine($ih);
        $stats->Add(lineIn => 1);
        # Get the genome ID from this line.
        my $genomeID = $cols[$col];
        # Verify the genome directory and act accordingly.
        my $targetDir = "$genomeDir/$genomeID";
        if (! -d $targetDir) {
            Trace("Missing directory for $genomeID: skipped.") if T(1);
            $stats->Add(missingDirectory => 1);
        } else {
            Tracer::PutLine($oh, [Genome => $genomeID, $targetDir]);
            $stats->Add(lineOut => 1);
        }
    }
    # Close the files.
    close $ih;
    close $oh;
    Trace("All done:\n" . $stats->Show()) if T(2);
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3