[Bio] / Sprout / NewStuffCheck.pl Repository:
ViewVC logotype

View of /Sprout/NewStuffCheck.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (download) (as text) (annotate)
Thu Aug 24 08:18:09 2006 UTC (13 years, 2 months ago) by parrello
Branch: MAIN
Changes since 1.3: +6 -3 lines
*** empty log message ***

#!/usr/bin/perl -w

=head1 New Stuff Checker

This script compares the genomes, features, and annotations in
the old and new sprouts and lists the differences.

The currently-supported command-line options are as follows.

=over 4

=item summary

Do not display details, only difference summaries.

=item user

Name suffix to be used for log files. If omitted, the PID is used.

=item trace

Numeric trace level. A higher trace level causes more messages to appear. The
default trace level is 2. Tracing will be directly to the standard output
as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
where I<User> is the value of the B<user> option above.

=item sql

If specified, turns on tracing of SQL activity.

=item background

Save the standard and error output to files. The files will be created
in the FIG temporary directory and will be named C<err>I<User>C<.log> and
C<out>I<User>C<.log>, respectively, where I<User> is the value of the
B<user> option above.

=item h

Display this command's parameters and options.

=item phone

Phone number to message when the script is complete.

=back

=cut

use strict;
use Tracer;
use DocUtils;
use TestUtils;
use Cwd;
use File::Copy;
use File::Path;
use FIG;
use SFXlate;
use Sprout;

# Get the command-line options and parameters.
my ($options, @parameters) = StandardSetup([qw(Sprout) ],
                                           {
                                              summary => ["", "if specified, detailed lists of the different items will not be displayed"],
                                              phone => ["", "phone number (international format) to call when load finishes"],
                                           },
                                           "",
                                           @ARGV);
# Set a variable to contain return type information.
my $rtype;
# Insure we catch errors.
eval {
    Trace("Processing genomes.") if T(2);
    # Get the old Sprout.
    my $oldSprout = SFXlate->new_sprout_only($FIG_Config::oldSproutDB);
    # Get its genomes in alphabetical order.
    my @oldGenomes = GetGenes($oldSprout);
    # Get the new Sprout.
    my $newSprout = SFXlate->new_sprout_only();
    # Get its genomes in alphabetical order.
    my @newGenomes = GetGenes($newSprout);
    # Compare the two gene lists.
    my ($insertedGenomes, $deletedGenomes) = Tracer::CompareLists(\@newGenomes, \@oldGenomes);
    # Display the lists.
    ShowLists(! $options->{summary},
              'New Genomes'     => $insertedGenomes,
              'Deleted Genomes' => $deletedGenomes);
    # Next, we get the subsystems.
    Trace("Processing subsystems.") if T(2);
    my @oldSubsystems = GetSubsystems($oldSprout);
    my @newSubsystems = GetSubsystems($newSprout);
    # Compare and display the subsystem lists.
    my ($insertedSubs, $deletedSubs) = Tracer::CompareLists(\@newSubsystems, \@oldSubsystems);
    ShowLists(! $options->{summary},
              'New Subsystems'     => $insertedSubs,
              'Deleted Subsystems' => $deletedSubs);
    # Now we process the features of the common genes. First we need a hash
    # of the inserted stuff so we know to skip it.
    my %skipGenes = map { $_->[0] => 1 } @{$insertedGenomes};
    # Loop through the genomees.
    for my $genome (@newGenomes) {
        # Only process this gene if it's common to both
        my ($genomeID, $genomeName) = @{$genome};
        if (! exists $skipGenes{$genomeID}) {
            Trace("Processing $genomeID.") if T(3);
            # Get the new and old features. This will be very stressful to the machine,
            # because there are lots of features.
            my @oldFeatures = GetFeatures($oldSprout, $genomeID);
            my @newFeatures = GetFeatures($newSprout, $genomeID);
            Trace("Comparing features for $genomeID.") if T(3);
            # Compare the lists.
            my ($insertedFeatures, $deletedFeatures) = Tracer::CompareLists(\@newFeatures, \@oldFeatures);
            # If either list has data, we want to display it.
            if (scalar @{$insertedFeatures} + scalar @{$deletedFeatures} > 0) {
                Trace("Displaying feature differences.") if T(3);
                ShowLists($options->{summary},
                          "New Features for $genomeID"      => $insertedFeatures,
                          "Features Deleted from $genomeID" => $deletedFeatures);
            }
        }
    }
};
if ($@) {
    Trace("Script failed with error: $@") if T(0);
    $rtype = "error";
} else {
    Trace("Script complete.") if T(2);
    $rtype = "no error";
}
if ($options->{phone}) {
    my $msgID = Tracer::SendSMS($options->{phone}, "Subsystem Checker terminated with $rtype.");
    if ($msgID) {
        Trace("Phone message sent with ID $msgID.") if T(2);
    } else {
        Trace("Phone message not sent.") if T(2);
    }
}

=head3 GetGenes

C<< my @geneList = GetGenes($sprout); >>

Return a list of the genomes in the specified Sprout instance. The genomes
are returned in alphabetical order by genome ID.

=over 4

=item sprout

Sprout instance whose gene list is desired.

=item RETURN

Returns a list of two-tuples. The first element in each tuple is the genome ID,
and the second is the genome name (genus, species, strain).

=back

=cut

sub GetGenes {
    # Get the parameters.
    my ($sprout) = @_;
    # Get the desired data.
    my @genomes = $sprout->GetAll(['Genome'], "ORDER BY Genome(id)", [], ['Genome(id)',
                                                                          'Genome(genus)',
                                                                          'Genome(species)',
                                                                          'Genome(unique-characterization)']);
    # Create the genome names from the three pieces of the name.
    my @retVal = map { [$_->[0], join(" ", @{$_}[1..3])] } @genomes;
    # Return the result.
    return @retVal;
}

=head3 GetSubsystems

C<< my @subsystems = GetSubsystems($sprout); >>

Get a list of the subsystems in the specified Sprout instance.

=over 4

=item sprout

Sprout instance whose subsystems are desired.

=item RETURN

Returns a list of 2-tuples, each consisting of the subsystem name followed by
the name of the curator.

=back

=cut

sub GetSubsystems {
    # Get the parameters.
    my ($sprout) = @_;
    # Declare the return variable.
    my @retVal = $sprout->GetAll(['Subsystem'], "ORDER BY Subsystem(id)",
                                 [], ['Subsystem(id)', 'Subsystem(curator)']);
    # Return the result.
    return @retVal;
}

=head3 GetFeatures

C<< my @features = GetFeatures($sprout, $genomeID); >>

Return the features of the specified genome in the specified Sprout instance.

=over 4

=item sprout

Sprout instance to use to get the features.

=item genomeID

ID of the genome in question.

=item RETURN

Returns a list of 2-tuples, the first element being the feature ID and the second its
functional assignment (if any).

=back

=cut

sub GetFeatures {
    # Get the parameters.
    my ($sprout, $genomeID) = @_;
    # Get a list of the feature IDs and map them to their functional assignments.
    my @retVal = map { [$_, $sprout->FunctionOf($_)] } $sprout->GetFlat(['HasFeature'],
                                                                        "HasFeature(from-link) = ? ORDER BY HasFeature(to-link)",
                                                                        [$genomeID], 'HasFeature(to-link)');
    # Return the result.
    return @retVal;
}

=head3 ShowLists

C<< ShowLists($all, %lists); >>

Display a set of lists. Each list should consist of 2-tuples.

=over 4

=item all

TRUE if details should be displayed; FALSE if only summaries should be displayed.

=item lists

A hash mapping list names to list references.

=cut

sub ShowLists {
    # Get the parameters.
    my $all = shift @_;
    my %lists = @_;
    # Loop through the lists in alphabetical order by list name.
    for my $listName (keys %lists) {
        # Get the list itself.
        my $list = $lists{$listName};
        # Get the number of list items.
        my $listSize = scalar @{$list};
        # Display the header.
        my $header;
        if ($listSize == 0) {
            $header = "*** $listName: no entries";
        } elsif ($listSize == 1) {
            $header = "*** $listName: one entry";
        } else {
            $header = "*** $listName: $listSize entries";
        }
        print "$header\n";
        Trace($header) if T(3);
        # If we're at trace level 3, display the list.
        if ($all) {
            # Put a spacer under the title.
            print "\n";
            # Get the width of the name column.
            my $width = 0;
            for my $entryLen (map { length $_->[0] } @{$list}) {
                $width = $entryLen if $entryLen > $width;
            }
            # Now display the list.
            for my $entry (@{$list}) {
                my ($name, $data) = @{$entry};
                print "    $name" . (" " x ($width - length $name)) . " $data\n";
            }
            print "\n\n";
        }
    }
}

1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3