[Bio] / Sprout / LoadExpressionGenome.pl Repository:
ViewVC logotype

Annotation of /Sprout/LoadExpressionGenome.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     #
4 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
5 :     # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 :    
20 :     use strict;
21 :     use Tracer;
22 :     use Stats;
23 :     use SaplingExpressionLoader;
24 :     use Sapling;
25 :     use SaplingGenomeLoader;
26 :     use File::Spec;
27 :     use FIG_Config;
28 :    
29 :     =head1 LoadExpressionGenome Script
30 :    
31 :     =head2 Introduction
32 :    
33 :     LoadExpressionGenome [options] genome1 genome2 ...
34 :    
35 :     This script loads an expression directory and its associated genome into a Sapling database.
36 :    
37 :     =head2 Command-Line Options
38 :    
39 :     =over 4
40 :    
41 :     =item trace
42 :    
43 :     Specifies the tracing level. The higher the tracing level, the more messages
44 :     will appear in the trace log. Use E to specify emergency tracing.
45 :    
46 :     =item root
47 :    
48 :     Directory in which the expression data is found. Each genome's expression data should be
49 :     in a subdirectory of this one with the same name as the genome ID. So, if the value
50 :     were C</vol/expression-data/current>, the expression data for genome C<100226.1> would
51 :     be in the directory C</vol/expression-data/current/100226.1>.
52 :    
53 :     =item user
54 :    
55 :     Name suffix to be used for log files. If omitted, the PID is used.
56 :    
57 :     =item sql
58 :    
59 :     If specified, turns on tracing of SQL activity.
60 :    
61 :     =item background
62 :    
63 :     Save the standard and error output to files. The files will be created
64 :     in the FIG temporary directory and will be named C<err>I<User>C<.log> and
65 :     C<out>I<User>C<.log>, respectively, where I<User> is the value of the
66 :     B<user> option above.
67 :    
68 :     =item help
69 :    
70 :     Display this command's parameters and options.
71 :    
72 :     =item warn
73 :    
74 :     Create an event in the RSS feed when an error occurs.
75 :    
76 :     =item genome
77 :    
78 :     For each genome, also load the associated genome directory from the current SEED. This
79 :     will take place before loading the expression data.
80 :    
81 :     =item init
82 :    
83 :     If specified, the database will be initialized before loading.
84 :    
85 :     =item dbName
86 :    
87 :     Name of the database to contain the data, if different from the Sapling master database.
88 :    
89 :     =back
90 :    
91 :     =cut
92 :    
93 :     # Get the command-line options and parameters.
94 :     my ($options, @parameters) = StandardSetup([qw(SaplingExpressionLoader SaplingGenomeLoader SaplingDataLoader) ],
95 :     {
96 :     trace => ["3", "tracing level"],
97 :     root => ["/vol/expression/Jan5.processed/", "root directory for expression data subdirectories"],
98 :     genome => ["", "if specified, the genome directory will be loaded before the expression data"],
99 :     init => ["", "if specified, the database will be initialized before loading"],
100 :     dbName => ["", "name of the database, if not the standard Sapling"],
101 :     },
102 :     "genome1 genome2 ...",
103 :     @ARGV);
104 :     # Set a variable to contain return type information.
105 :     my $rtype;
106 :     # Create a statistics object.
107 :     my $stats = Stats->new();
108 :     # Insure we catch errors.
109 :     eval {
110 :     # Get the Sapling database.
111 :     my $dbName;
112 :     if ($options->{dbName}) {
113 :     $dbName = $options->{dbName};
114 :     Trace("Connecting to Sapling database $dbName.") if T(2);
115 :     } else {
116 :     Trace("Connecting to default Sapling database.") if T(2);
117 :     }
118 :     my $sap = Sapling->new(dbName => $dbName);
119 :     # Initialize if we need to. Note that if we DO initialize, we indicate that we don't need
120 :     # to clear genomes before loading.
121 :     my $clearNeeded = 1;
122 :     if ($options->{init}) {
123 :     Trace("Initializing database.") if T(2);
124 :     $sap->CreateTables();
125 :     $sap->InternalizeDBD();
126 :     $clearNeeded = 0;
127 :     }
128 :     # Get the expression directory.
129 :     my $rootDirectory = $options->{root};
130 :     if (! -d $rootDirectory) {
131 :     Trace("Root directory $rootDirectory not found.") if T(0);
132 :     } else {
133 :     # Loop through the genomes.
134 :     for my $genome (@parameters) {
135 :     Trace("Processing genome $genome.") if T(2);
136 :     # Compute the expression directory name.
137 :     my $expDirectory = File::Spec->catfile($rootDirectory, $genome);
138 :     if (! -d $expDirectory) {
139 :     # The directory is invalid, so we skip this genome.
140 :     Trace("Expression directory $expDirectory not found.") if T(1);
141 :     $stats->Add(missingDirectory => 1);
142 :     } else {
143 :     # The direcytory is valid. Check to see if we need to do the genome.
144 :     if ($options->{genome}) {
145 :     # Here we have to load the genome first. Check to see if we have to clear.
146 :     if ($clearNeeded) {
147 :     # Clear the old genome data.
148 :     Trace("Deleting old data for genome $genome.") if T(3);
149 :     my $newStats = SaplingGenomeLoader::ClearGenome($sap, $genome);
150 :     # Update the delete counts.
151 :     AccumulateDeletions($stats, $newStats);
152 :     }
153 :     # Now load the genome from its organism directory.
154 :     my $newStats = SaplingGenomeLoader::Load($sap, $genome, "$FIG_Config::organisms/$genome");
155 :     # Roll up the statistics.
156 :     $stats->Accumulate($newStats);
157 :     }
158 :     # The genome is safe, so now we process the expression data.
159 :     if ($clearNeeded) {
160 :     # Clear the old expression data.
161 :     Trace("Deleting old expression data for genome $genome.") if T(3);
162 :     my $newStats = SaplingExpressionLoader::ClearExpressionData($sap, $genome);
163 :     # Update the delete counts.
164 :     AccumulateDeletions($stats, $newStats);
165 :     }
166 :     # Finally, load the expression data itself.
167 :     Trace("Loading expression data for genome $genome.") if T(3);
168 :     my $newStats = SaplingExpressionLoader::Load($sap, $genome, $expDirectory);
169 :     }
170 :     }
171 :     }
172 :     };
173 :     if ($@) {
174 :     Trace("Script failed with error: $@") if T(0);
175 :     $rtype = "error";
176 :     } else {
177 :     Trace("Script complete.") if T(2);
178 :     $rtype = "no error";
179 :     }
180 :     # Display the statistics.
181 :     Trace("Statistics for this run:\n" . $stats->Show()) if T(2);
182 :    
183 :     =head2 Utility Methods
184 :    
185 :     =head3 AccumulateDeletions
186 :    
187 :     AccumulateDeletions($stats, $deleteStats);
188 :    
189 :     Accumulate the deletions listed in a statistics object in another statistics object. As
190 :     they are transferred, the prefix C<delete> is added to each table name.
191 :    
192 :     =over 4
193 :    
194 :     =item stats
195 :    
196 :     Target statistics object.
197 :    
198 :     =item deleteStats
199 :    
200 :     Statistics object containing the record-deletion counts for each table.
201 :    
202 :     =back
203 :    
204 :     =cut
205 :    
206 :     sub AccumulateDeletions {
207 :     # Get the parameters.
208 :     my ($stats, $deleteStats) = @_;
209 :     # Get the map of the deletion statistics.
210 :     my $mapHash = $deleteStats->Map();
211 :     # Loop through the delete counts, adding them to the result hash.
212 :     for my $table (keys %$mapHash) {
213 :     $stats->Add("delete-$table", $mapHash->{$table});
214 :     }
215 :     }
216 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3