[Bio] / Sprout / EvCodeRefresh.pl Repository:
ViewVC logotype

Annotation of /Sprout/EvCodeRefresh.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     #
4 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
5 :     # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 :    
20 :     use strict;
21 :     use Tracer;
22 :     use CustomAttributes;
23 :     use Stats;
24 :    
25 :     =head1 EvCodeRefresh Script
26 :    
27 :     EvCodeRefresh [options] <filename>
28 :    
29 :     Refresh evidence codes from a sequential file
30 :    
31 :     =head2 Introduction
32 :    
33 :     This script loads evidence codes from a tab-delimited file into the
34 :     B<IsEvidencedBy> table. The incoming file should contain two columns of data-- a
35 :     feature ID followed by an evidence code. For compatibility, there may be an
36 :     intervening column that is ignored.
37 :    
38 :     =head2 Positional Parameters
39 :    
40 :     =over 4
41 :    
42 :     =item filename
43 :    
44 :     Name of the file from which the evidence codes are to be loaded. The file must
45 :     be a tab-delimited file containing two or three columns. The first column must
46 :     contain a feature ID, and the last column an evidence code to be applied to that
47 :     feature. The evidence code will automatically be split into a class (e.g.
48 :     C<ilit>, C<ff>) and a modifier.
49 :    
50 :     =back
51 :    
52 :     =head2 Command-Line Options
53 :    
54 :     =over 4
55 :    
56 :     =item trace
57 :    
58 :     Specifies the tracing level. The higher the tracing level, the more messages
59 :     will appear in the trace log. Use E to specify emergency tracing.
60 :    
61 :     =item append
62 :    
63 :     Normally, the existing evidence codes are erased before the data from the file
64 :     is loaded. If C<append> is specified, then the erase is suppressed, and the
65 :     existing codes are kept.
66 :    
67 :     =item classes
68 :    
69 :     If this option is specified, it should be the name of a tab-delimited file
70 :     containing the evidence classes. The evidence class table will be deleted and
71 :     reloaded from the file, which should be a valid ERDB load file for the
72 :     B<EvidenceClass> table.
73 :    
74 :     =item user
75 :    
76 :     Name suffix to be used for log files. If omitted, the PID is used.
77 :    
78 :     =item sql
79 :    
80 :     If specified, turns on tracing of SQL activity.
81 :    
82 :     =item background
83 :    
84 :     Save the standard and error output to files. The files will be created
85 :     in the FIG temporary directory and will be named C<err>I<User>C<.log> and
86 :     C<out>I<User>C<.log>, respectively, where I<User> is the value of the
87 :     B<user> option above.
88 :    
89 :     =item help
90 :    
91 :     Display this command's parameters and options.
92 :    
93 :     =item warn
94 :    
95 :     Create an event in the RSS feed when an error occurs.
96 :    
97 :     =item phone
98 :    
99 :     Phone number to message when the script is complete.
100 :    
101 :     =back
102 :    
103 :     =cut
104 :    
105 :     # Get the command-line options and parameters.
106 :     my ($options, @parameters) = StandardSetup([qw(ERDB CustomAttributes) ],
107 :     {
108 :     trace => ["2", "tracing level"],
109 :     append => ["", "if specified, the evidence codes will be appended to existing codes"],
110 :     classes => ["", "evidence class file name (optional)"],
111 :     phone => ["", "phone number (international format) to call when load finishes"]
112 :     },
113 :     "<filename>",
114 :     @ARGV);
115 :     # Set a variable to contain return type information.
116 :     my $rtype;
117 :     # Insure we catch errors.
118 :     eval {
119 :     # Get the attributes database.
120 :     my $attr = CustomAttributes->new();
121 :     # Check for a class file.
122 :     if ($options->{classes}) {
123 :     # We have one. Load it into the evidence class table.
124 :     Trace("Loading evidence classes from $options->{classes}.") if T(2);
125 :     $attr->LoadTable($options->{classes}, 'EvidenceClass', truncate => 1);
126 :     Trace("Evidence classes loaded.") if T(2);
127 :     }
128 :     # Verify that the input file exists.
129 :     my $fileName = $parameters[0];
130 :     if (! $fileName) {
131 : parrello 1.2 Trace("No input file name specified.") if T(2);
132 : parrello 1.1 } else {
133 :     # Now we convert the input file into a load file. First, we open it.
134 :     my $ih = Open(undef, "<$fileName");
135 :     # We'll count the number of codes in here.
136 :     my $stats = Stats->new();
137 :     # Create the load file. We sort it to speed up the load.
138 :     my $loadFileName = "$FIG_Config::temp/IsEvidencedBy$$.dtx";
139 :     my $oh = Open(undef, "| sort >$loadFileName");
140 :     # Finally, we use this hash to track all the evidence classes.
141 :     my %classes = ();
142 :     Trace("Reading evidence codes.") if T(3);
143 :     # Loop through the input file, writing load records.
144 :     while (! eof $ih) {
145 :     # Read the input record.
146 :     my @fields = Tracer::GetLine($ih);
147 :     my $line = $.;
148 :     Trace("$line input lines processed.") if T(3) && ($. % 10000 == 0);
149 :     # Insure it's valid.
150 :     my $last = $#fields;
151 :     if ($last >= 3 || $last < 1) {
152 :     Trace("Record $line in input file has incorrect number of columns.") if T(3);
153 :     $stats->Add(errors => 1);
154 :     } else {
155 :     # Get the feature ID and the code.
156 :     my ($fid, $code) = @fields[0, $last];
157 :     # Validate the feature ID and the evidence code.
158 :     if (! ($fid =~ /^fig\|\d+/)) {
159 :     Trace("Record $line in input file has invalid feature ID \"$fid\".") if T(3);
160 :     $stats->Add(errors => 1);
161 :     } elsif (! ($code =~ /^([a-z]+)(.*)/)) {
162 :     Trace("Record $line in input file has invalid evidence code \"$code\".") if T(3);
163 :     $stats->Add(errors => 1);
164 :     } else {
165 :     # We have a valid input row. Produce the output line. Note that as a
166 :     # result of the pattern match that validated the evidence code, $1
167 :     # contains the class and $2 the modifier.
168 :     my ($class, $modifier) = ($1, $2);
169 :     Tracer::PutLine($oh, [$fid, $class, $modifier]);
170 :     # Count this as an output row and as a member of the specified class.
171 :     $stats->Add(rows => 1);
172 :     $stats->Add($class => 1);
173 :     $classes{$class} = 1;
174 :     }
175 :     }
176 :     }
177 :     Trace("Evidence codes reformatted.") if T(2);
178 :     # Close the files.
179 :     close $oh;
180 :     close $ih;
181 :     Trace("Evidence codes will be loaded from $loadFileName.") if T(2);
182 :     # Now we need to verify the incoming evidence codes against the known
183 :     # evidence classes. We issue a message for every non-existent class. It's
184 :     # not a serious error, but it's something the user should know.
185 :     Trace("Verifying evidence classes.") if T(2);
186 :     for my $class (keys %classes) {
187 :     if (! $attr->Exists(EvidenceClass => $class)) {
188 :     Trace("Evidence class \"$class\" not found in database!") if T(2);
189 :     $stats->Add(bad_class => 1);
190 :     }
191 :     }
192 :     # We are almost ready to reload the evidence codes. Set up the append option.
193 :     # It's passed in as the truncate option flag on the load.
194 :     my $truncate = ($options->{append} ? 0 : 1);
195 :     # Now we load.
196 :     Trace("Loading evidence codes.") if T(2);
197 :     $attr->LoadTable($loadFileName, 'IsEvidencedBy', truncate => $truncate,
198 :     mode => 'concurrent');
199 :     # Tell the user we're done, and show the statistics.
200 :     Trace("Evidence codes loaded.\n" . $stats->Show()) if T(2);
201 :     }
202 :     };
203 :     if ($@) {
204 :     Trace("Script failed with error: $@") if T(0);
205 :     $rtype = "error";
206 :     } else {
207 :     Trace("Script complete.") if T(2);
208 :     $rtype = "no error";
209 :     }
210 :     if ($options->{phone}) {
211 :     my $msgID = Tracer::SendSMS($options->{phone}, "EvCodeRefresh terminated with $rtype.");
212 :     if ($msgID) {
213 :     Trace("Phone message sent with ID $msgID.") if T(2);
214 :     } else {
215 :     Trace("Phone message not sent.") if T(2);
216 :     }
217 :     }
218 :    
219 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3