[Bio] / Sprout / BBHCheck.pl Repository:
ViewVC logotype

View of /Sprout/BBHCheck.pl

Parent Directory Parent Directory | Revision Log Revision Log

Revision 1.2 - (download) (as text) (annotate)
Mon Aug 14 05:25:50 2006 UTC (13 years, 7 months ago) by parrello
Branch: MAIN
Changes since 1.1: +22 -1 lines
Added the ability to check bad genomes against the SEED database.

#!/usr/bin/perl -w

=head1 BBH Check

Find all genomes in Sprout without any BBHs. This can be an indicator of bad
data in the SEED.

The currently-supported command-line options are as follows.

=over 4

=item user

Name suffix to be used for log files. If omitted, the PID is used.

=item trace

Numeric trace level. A higher trace level causes more messages to appear. The
default trace level is 2. Tracing will be directly to the standard output
as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
where I<User> is the value of the B<user> option above.

=item fig

Check the SEED for genomes that have no BBHs in Sprout.

=item sql

If specified, turns on tracing of SQL activity.

=item background

Save the standard and error output to files. The files will be created
in the FIG temporary directory and will be named C<err>I<User>C<.log> and
C<out>I<User>C<.log>, respectively, where I<User> is the value of the
B<user> option above.

=item h

Display this command's parameters and options.

=item phone

Phone number to message when the script is complete.



use strict;
use Tracer;
use DocUtils;
use TestUtils;
use Cwd;
use File::Copy;
use File::Path;
use FIG;
use Sprout;
use SFXlate;

# Get the command-line options and parameters.
my ($options, @parameters) = StandardSetup([qw(Sprout) ],
                                              fig => [0, "check the SEED database as well as the Sprout"],
                                              phone => ["", "phone number (international format) to call when load finishes"],
# Set a variable to contain return type information.
my $rtype;
# Insure we catch errors.
eval {
    # Get a sprout object.
    my $sprout = SFXlate->new_sprout_only();
    # Get the FIG object's DB handle.
    my $fig = FIG->new();
    my $fDBH = $fig->db_handle();
    # Get the list of genomes.
    my @genomes = $sprout->Genomes();
    # Get the genome names.
    my %genomeNames = ();
    for my $genome (@genomes) {
        my $name = $sprout->GenusSpecies($genome) . " [$genome]";
        $genomeNames{$name} = $genome;
    # Count the bad genomes.
    my $badGenomes = 0;
    # Process the genomes in name order.
    for my $name (sort keys %genomeNames) {
        my $genome = $genomeNames{$name};
        # Count this genome's BBHs.
        my $count = $sprout->GetCount(['IsBidirectionalBestHitOf', 'HasFeature', 'Genome'],
                                      "HasFeature(from-link) = ?", [$genome]);
        # Get the genome name.
        my $name = $sprout->GenusSpecies($genome) . " [$genome]";
        # A count of 0 is bad.
        if ($count) {
            Trace("$name BBH count is $count.") if T(3);
        } else {
            if ($options->{fig}) {
                # Check to see if the SEED is bad, too.
                my $response = $fDBH->SQL("SELECT * FROM bbh WHERE peg1 LIKE ? LIMIT 10", "", ["fig|$genome.peg%"]);
                if (@$response == 0) {
                    # Here there are no BBHs anywhere.
                    Trace("$name has no BBHs in SEED or Sprout. ***") if T(1);
                } else {
                    # Here we can fix the problem by reloading the Sprout.
                    Trace("$name has no BBHs in Sprout but is okay in SEED.") if T(1);
            } else {
                    # Here we don't care about the SEED.
                    Trace("$name has no BBHs. ***") if T(1);
    # Tell the user how bad things are.
    my $total = scalar @genomes;
    Trace("$badGenomes out of $total genomes had no BBHs.") if T(2);
if ($@) {
    Trace("Script failed with error: $@") if T(0);
    $rtype = "error";
} else {
    Trace("Script complete.") if T(2);
    $rtype = "no error";
if ($options->{phone}) {
    my $msgID = Tracer::SendSMS($options->{phone}, "BBH Check terminated with $rtype.");
    if ($msgID) {
        Trace("Phone message sent with ID $msgID.") if T(2);
    } else {
        Trace("Phone message not sent.") if T(2);


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3