[Bio] / Sprout / AttrKeyConvert.pl Repository:
ViewVC logotype

Annotation of /Sprout/AttrKeyConvert.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     #
4 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
5 :     # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 :    
20 :     use strict;
21 :     use Tracer;
22 :     use Stats;
23 :    
24 :     =head1 AttrKeyConvert Script
25 :    
26 :     AttrKeyConvert [options] <inFile> <outFile>
27 :    
28 :     Convert an old attribute key dump to the new format
29 :    
30 :     =head2 Introduction
31 :    
32 :     The new attribute system adds the ability to store a specified keyword's values
33 :     in a separate table, which requires a new field in the B<AttributeKey> table.
34 :     This script takes a key dump created in the old attribute system and modifies it
35 :     so that it can be used to load keys into the new system.
36 :    
37 :     =head2 Positional Parameters
38 :    
39 :     =over 4
40 :    
41 :     =item inFile
42 :    
43 :     Name of the file containing the old attribute key dump. If this parameter is
44 :     not specified, the standard input will be used as the input and the output will
45 :     be to the standard output.
46 :    
47 :     =item outFile
48 :    
49 :     Name of the file to be created with the attribute key dump in the new format.
50 :     If this parameter is not specified, the standard output will be used as the
51 :     output. When this happens, insure the tracing output is not also directed
52 :     to the standard output by using the default trace value (C<2->) or specifying
53 :     a minus sign in your trace value override (e,g, C<3-> instead of C<3>).
54 :    
55 :     =back
56 :    
57 :     =head2 Command-Line Options
58 :    
59 :     =over 4
60 :    
61 :     =item trace
62 :    
63 :     Specifies the tracing level. The higher the tracing level, the more messages
64 :     will appear in the trace log. Use E to specify emergency tracing.
65 :    
66 :     =item user
67 :    
68 :     Name suffix to be used for log files. If omitted, the PID is used.
69 :    
70 :     =item sql
71 :    
72 :     If specified, turns on tracing of SQL activity.
73 :    
74 :     =item background
75 :    
76 :     Save the standard and error output to files. The files will be created
77 :     in the FIG temporary directory and will be named C<err>I<User>C<.log> and
78 :     C<out>I<User>C<.log>, respectively, where I<User> is the value of the
79 :     B<user> option above.
80 :    
81 :     =item help
82 :    
83 :     Display this command's parameters and options.
84 :    
85 :     =item warn
86 :    
87 :     Create an event in the RSS feed when an error occurs.
88 :    
89 :     =item phone
90 :    
91 :     Phone number to message when the script is complete.
92 :    
93 :     =item tableName
94 :    
95 :     Name of the default table. This value is inserted into the 3rd column
96 :     of the input to create the output.
97 :    
98 :     =back
99 :    
100 :     =cut
101 :    
102 :     # Get the command-line options and parameters.
103 :     my ($options, @parameters) = StandardSetup([qw(CustomAttributes) ],
104 :     {
105 :     trace => ["2-", "tracing level"],
106 :     phone => ["", "phone number (international format) to call when load finishes"],
107 :     tableName => ["HasValueFor", "name to be inserted as the new table name"]
108 :     },
109 :     "<inFile> <outFile>",
110 :     @ARGV);
111 :     # Set a variable to contain return type information.
112 :     my $rtype;
113 :     # Insure we catch errors.
114 :     eval {
115 :     # Get the names of the input and output files.
116 :     my ($inFile, $outFile) = @parameters;
117 :     # These variables will contain the input and output file handles.
118 :     my ($ih, $oh);
119 :     if (! defined $inFile) {
120 :     # No parameters, so we pipe from the standard input to the standard
121 :     # output.
122 :     $ih = \*STDIN;
123 :     $oh = \*STDOUT;
124 :     Trace("Data piped from standard input to standard output.") if T(2);
125 :     } elsif (! defined $outFile) {
126 :     $ih = Open(undef, "<$inFile");
127 :     $oh = \*STDOUT;
128 :     Trace("Data from \"$inFile\" piped to standard output.") if T(2);
129 :     } else {
130 :     $ih = Open(undef, "<$inFile");
131 :     $oh = Open(undef, ">$outFile");
132 :     }
133 :     # Create a statistics object to track our activity.
134 :     my $stats = Stats->new(qw(input output groupLines errors dataLines));
135 :     # Get the table name.
136 :     my $tableName = $options->{tableName};
137 :     Trace("Table name will be \"$tableName\".") if T(2);
138 :     # Loop through the input.
139 :     while (! eof $ih) {
140 :     # Read this line and split it into sections.
141 :     my @cols = Tracer::GetLine($ih);
142 :     # Count the line and trace our progress.
143 :     Trace($stats->Ask('input') . " input lines processed.") if T(3) && $stats->Check(input => 50);
144 :     # Parse the line.
145 :     if ($cols[0] =~ /^#GROUPS/) {
146 :     # This is a group list. Spit it out the way we found it.
147 :     $stats->Add(groupLines => 1);
148 :     Tracer::PutLine($oh, \@cols);
149 :     $stats->Add(output => 1);
150 :     } elsif (scalar(@cols) != 3) {
151 :     # This line has an invalid format. Complain about it.
152 :     $stats->Add(errors => 1);
153 :     my $lineCount = $stats->Ask('input');
154 :     Trace("Error in line $lineCount (starts with \"$cols[0]\"): wrong number of columns (should be 3).") if T(0);
155 :     } else {
156 :     # We have a valid data line. Add the table name before the third column.
157 :     $stats->Add(dataLines => 1);
158 :     splice @cols, 2, 0, $tableName;
159 :     Tracer::PutLine($oh, \@cols);
160 :     $stats->Add(output => 1);
161 :     }
162 :     }
163 :     # All done. Output the statistics.
164 :     Trace("Statistics for this run:\n" . $stats->Show()) if T(2);
165 :     };
166 :     if ($@) {
167 :     Trace("Script failed with error: $@") if T(0);
168 :     $rtype = "error";
169 :     } else {
170 :     Trace("Script complete.") if T(2);
171 :     $rtype = "no error";
172 :     }
173 :     if ($options->{phone}) {
174 :     my $msgID = Tracer::SendSMS($options->{phone}, "AttrKeyConvert terminated with $rtype.");
175 :     if ($msgID) {
176 :     Trace("Phone message sent with ID $msgID.") if T(2);
177 :     } else {
178 :     Trace("Phone message not sent.") if T(2);
179 :     }
180 :     }
181 :    
182 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3