[Bio] / Sprout / CopyCombinedMaps.pl Repository:
ViewVC logotype

View of /Sprout/CopyCombinedMaps.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Sat Mar 26 12:43:36 2011 UTC (8 years, 5 months ago) by parrello
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_release_3_0_4, mgrast_dev_06072011, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2014_0729, mgrast_dev_04012011, myrast_33, mgrast_release_3_1_2, mgrast_release_3_1_1, rast_rel_2011_0928, mgrast_dev_04132011, mgrast_dev_04052011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_10262011, mgrast_dev_04082011, mgrast_release_3_1_0, mgrast_dev_03312011, mgrast_release_3_0_3, HEAD
Set up directory pointer for expression data. Added CopyCombinedMaps.

#!/usr/bin/perl -w

=head1 Copy Combined Maps

This script takes as parameters a directory name and parses the combined map files
it contains to break them into component parts. The component parts are then
copied to the appropriate directory in the organism cache so they can be read
as correspondence-map files by the servers.

This script connects to the Sapling server to store the correspondences, so the
environment variables should be set to point to the correct server.

The currently-supported command-line options are as follows.

=over 4

=item user

Name suffix to be used for log files. If omitted, the PID is used.

=item trace

Numeric trace level. A higher trace level causes more messages to appear. The
default trace level is 2. Tracing will be directly to the standard output
as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
where I<User> is the value of the B<user> option above.

=item background

Save the standard and error output to files. The files will be created
in the FIG temporary directory and will be named C<err>I<User>C<.log> and
C<out>I<User>C<.log>, respectively, where I<User> is the value of the
B<user> option above.

=item passive

If specified, then correspondences that already exist will not be updated on the server.

=back

=cut

    use strict;
    use Tracer;
    use SeedUtils;
    use Cwd;
    use Stats;
    use ServerThing;
    use SAPserver;
    
    # Get the command-line options and parameters.
    my ($options, @parameters) = StandardSetup([],
                                               { passive => ["", "if specified, existing correspondence files will not be overwritten"] },
                                               "<dirname>",
                                               @ARGV);
    # We'll keep statistics in here.
    my $stats = Stats->new();
    # Get the directory name.
    my $dirName = $parameters[0];
    if (! $dirName) {
        $dirName = getcwd();
        Trace("No directory specified. Current directory $dirName used.") if T(1);
    } elsif (! -d $dirName) {
        Trace("Directory $dirName not found.") if T(0);
    } else {
        Trace("Working in directory $dirName.") if T(2);
    }
    # Get the names of all the genome files in the target directory.
    my @genomes = sort grep { $_ =~ /^\d+\.\d+$/ } OpenDir($dirName);
    Trace(scalar(@genomes) . " genome files found in $dirName.") if T(2);
    # Connect to the server.
    my $sapObject = SAPserver->new();
    Trace("Connected to Sapling server.") if T(2);
    # Loop through the genomes.
    for my $genome (@genomes) {
        # Open the genome's file.
        my $ih = Open(undef, "<$dirName/$genome");
        Trace("Processing $genome file.") if T(2);
        $stats->Add(genomes => 1);
        # Loop through the file, We'll accumulate data rows in here.
        my @rows;
        while (! eof $ih) {
            # Get the current row of data.
            my @row = Tracer::GetLine($ih);
            $stats->Add(lines => 1);
            # Is this the end of a section?
            if ($row[0] eq '//') {
                # Yes, output the section.
                ProcessSection($sapObject, $stats, $options, $genome, \@rows);
                $stats->Add(groups => 1);
                # Clear the row list to start the next section.
                @rows = ();
            } else {
                # No, add this row to the current section.
                push @rows, \@row;
                $stats->Add(rows => 1);
            }
        }
        # All done with this file.
        if (@rows) {
            # Here we have a section without an end marker.
            ProcessSection($sapObject, $stats, $options, $genome, \@rows);
            $stats->Add(orphanGroup => 1);
        }
    }
    # All done. Output the statistics.
    Trace("All done.\n" . $stats->Show()) if T(2);

# This method actually submits the correspondence data accumulated from the current file.
sub ProcessSection {
    # Get the parameters.
    my ($sapObject, $stats, $options, $genome, $rows) = @_;
    # Look at the first row to determine the target genome.
    my $firstRow = $rows->[0];
    if (! $firstRow) {
        # No first row, so the group is empty.
        Trace("Empty group found in file for $genome: group ignored.") if T(1);
        $stats->Add(emptyGroup => 1);
    } elsif ($firstRow->[1] !~ /^fig\|(\d+\.\d+)/) {
        # Invalid row contents, because the second column is not a gene ID. This should never
        # happen.
        Trace("Invalid entry \"$firstRow->[1]\" found in group for $genome: group ignored.") if T(0);
        $stats->Add(errorGroup => 1);
    } else {
        # Get the target genome ID, which was remembered when we checked the target gene ID in the
        # first row.
        my $genome2 = $1;
        # Determine the passivity option.
        my $passive = $options->{passive};
        # Ask the server to store this correspondence.
        Trace("Submitting correspondence from $genome to $genome2.") if T(3);
        $stats->Add(submission => 1);
        my $status;
        eval {
            $status = $sapObject->submit_gene_correspondence(-genome1 => $genome,
                                                             -genome2 => $genome2,
                                                             -correspondences => $rows,
                                                             -passive => $passive);
        };
        if ($@) {
            Trace("Error in correspondence from $genome to $genome2:\n$@") if T(0);
            $stats->Add(submitError => 1);
        } elsif (! $status) {
            Trace("Correspondence from $genome to $genome2 not stored.") if T(1);
            $stats->Add(submitFailure => 1);
        } else {
            $stats->Add(submitSuccess => 1);
        }
    }
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3