[Bio] / Sprout / BBHCheck.pl Repository:
ViewVC logotype

Annotation of /Sprout/BBHCheck.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     =head1 BBH Check
4 :    
5 :     Find all genomes in Sprout without any BBHs. This can be an indicator of bad
6 :     data in the SEED.
7 :    
8 :     The currently-supported command-line options are as follows.
9 :    
10 :     =over 4
11 :    
12 :     =item user
13 :    
14 :     Name suffix to be used for log files. If omitted, the PID is used.
15 :    
16 :     =item trace
17 :    
18 :     Numeric trace level. A higher trace level causes more messages to appear. The
19 :     default trace level is 2. Tracing will be directly to the standard output
20 :     as well as to a C<trace>I<User>C<.log> file in the FIG temporary directory,
21 :     where I<User> is the value of the B<user> option above.
22 :    
23 : parrello 1.2 =item fig
24 :    
25 :     Check the SEED for genomes that have no BBHs in Sprout.
26 :    
27 : parrello 1.1 =item sql
28 :    
29 :     If specified, turns on tracing of SQL activity.
30 :    
31 :     =item background
32 :    
33 :     Save the standard and error output to files. The files will be created
34 :     in the FIG temporary directory and will be named C<err>I<User>C<.log> and
35 :     C<out>I<User>C<.log>, respectively, where I<User> is the value of the
36 :     B<user> option above.
37 :    
38 :     =item h
39 :    
40 :     Display this command's parameters and options.
41 :    
42 :     =item phone
43 :    
44 :     Phone number to message when the script is complete.
45 :    
46 :     =back
47 :    
48 :     =cut
49 :    
50 :     use strict;
51 :     use Tracer;
52 :     use DocUtils;
53 :     use TestUtils;
54 :     use Cwd;
55 :     use File::Copy;
56 :     use File::Path;
57 :     use FIG;
58 :     use Sprout;
59 :     use SFXlate;
60 :    
61 :     # Get the command-line options and parameters.
62 :     my ($options, @parameters) = StandardSetup([qw(Sprout) ],
63 :     {
64 : parrello 1.2 fig => [0, "check the SEED database as well as the Sprout"],
65 : parrello 1.1 phone => ["", "phone number (international format) to call when load finishes"],
66 :     },
67 :     "",
68 :     @ARGV);
69 :     # Set a variable to contain return type information.
70 :     my $rtype;
71 :     # Insure we catch errors.
72 :     eval {
73 :     # Get a sprout object.
74 :     my $sprout = SFXlate->new_sprout_only();
75 : parrello 1.2 # Get the FIG object's DB handle.
76 :     my $fig = FIG->new();
77 : parrello 1.1 # Get the list of genomes.
78 :     my @genomes = $sprout->Genomes();
79 :     # Get the genome names.
80 :     my %genomeNames = ();
81 :     for my $genome (@genomes) {
82 :     my $name = $sprout->GenusSpecies($genome) . " [$genome]";
83 :     $genomeNames{$name} = $genome;
84 :     }
85 :     # Count the bad genomes.
86 :     my $badGenomes = 0;
87 :     # Process the genomes in name order.
88 :     for my $name (sort keys %genomeNames) {
89 :     my $genome = $genomeNames{$name};
90 :     # Count this genome's BBHs.
91 :     my $count = $sprout->GetCount(['IsBidirectionalBestHitOf', 'HasFeature', 'Genome'],
92 :     "HasFeature(from-link) = ?", [$genome]);
93 :     # Get the genome name.
94 :     my $name = $sprout->GenusSpecies($genome) . " [$genome]";
95 :     # A count of 0 is bad.
96 :     if ($count) {
97 :     Trace("$name BBH count is $count.") if T(3);
98 :     } else {
99 : parrello 1.2 if ($options->{fig}) {
100 :     # Check to see if the SEED is bad, too.
101 : parrello 1.3 my @fids = $fig->all_features($genome);
102 :     my $seedCount = 0;
103 :     for my $fid (@fids) {
104 :     my $bbc = scalar ($fig->bbhs($fid));
105 :     $seedCount += $bbc;
106 :     }
107 :     if ($seedCount == 0) {
108 : parrello 1.2 # Here there are no BBHs anywhere.
109 :     Trace("$name has no BBHs in SEED or Sprout. ***") if T(1);
110 :     } else {
111 :     # Here we can fix the problem by reloading the Sprout.
112 : parrello 1.3 Trace("$name has no BBHs in Sprout but $seedCount in SEED.") if T(1);
113 : parrello 1.2 }
114 :     } else {
115 : parrello 1.3 # Here we don't care about the SEED.
116 :     Trace("$name has no BBHs. ***") if T(1);
117 : parrello 1.2 }
118 : parrello 1.1 $badGenomes++;
119 :     }
120 :     }
121 :     # Tell the user how bad things are.
122 :     my $total = scalar @genomes;
123 :     Trace("$badGenomes out of $total genomes had no BBHs.") if T(2);
124 :     };
125 :     if ($@) {
126 :     Trace("Script failed with error: $@") if T(0);
127 :     $rtype = "error";
128 :     } else {
129 :     Trace("Script complete.") if T(2);
130 :     $rtype = "no error";
131 :     }
132 :     if ($options->{phone}) {
133 :     my $msgID = Tracer::SendSMS($options->{phone}, "BBH Check terminated with $rtype.");
134 :     if ($msgID) {
135 :     Trace("Phone message sent with ID $msgID.") if T(2);
136 :     } else {
137 :     Trace("Phone message not sent.") if T(2);
138 :     }
139 :     }
140 :    
141 :    
142 :    
143 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3