Parent Directory
|
Revision Log
Revision 1.13 - (view) (download) (as text)
1 : | parrello | 1.1 | #!/usr/bin/perl -w |
2 : | |||
3 : | =head1 Load Sprout Tables | ||
4 : | |||
5 : | parrello | 1.12 | =head2 Introduction |
6 : | |||
7 : | This script creates the load files for Sprout tables and optionally loads them. | ||
8 : | The parameters are the names of the table groups whose data is to be created. | ||
9 : | The legal table group names are given below. | ||
10 : | parrello | 1.1 | |
11 : | =over 4 | ||
12 : | |||
13 : | =item Genome | ||
14 : | |||
15 : | Loads B<Genome>, B<HasContig>, B<Contig>, B<IsMadeUpOf>, and B<Sequence>. | ||
16 : | |||
17 : | =item Coupling | ||
18 : | |||
19 : | Loads B<Coupling>, B<IsEvidencedBy>, B<PCH>, B<ParticipatesInCoupling>, | ||
20 : | B<UsesAsEvidence>. | ||
21 : | |||
22 : | =item Feature | ||
23 : | |||
24 : | Loads B<Feature>, B<FeatureAlias>, B<FeatureTranslation>, B<FeatureUpstream>, | ||
25 : | parrello | 1.2 | B<IsLocatedIn>, B<FeatureLink>. |
26 : | parrello | 1.1 | |
27 : | =item Subsystem | ||
28 : | |||
29 : | parrello | 1.2 | Loads B<Subsystem>, B<Role>, B<SSCell>, B<ContainsFeature>, B<IsGenomeOf>, |
30 : | parrello | 1.8 | B<IsRoleOf>, B<OccursInSubsystem>, B<ParticipatesIn>, B<HasSSCell>, |
31 : | parrello | 1.11 | B<Catalyzes>, B<ConsistsOfRoles>, B<RoleSubset>, B<HasRoleSubset>, |
32 : | parrello | 1.13 | B<ConsistsOfGenomes>, B<GenomeSubset>, B<HasGenomeSubset>, B<Diagram>, |
33 : | B<RoleOccursIn>. | ||
34 : | parrello | 1.1 | |
35 : | parrello | 1.2 | =item Annotation |
36 : | |||
37 : | Loads B<SproutUser>, B<UserAccess>, B<Annotation>, B<IsTargetOfAnnotation>, | ||
38 : | B<MadeAnnotation>. | ||
39 : | |||
40 : | =item Property | ||
41 : | |||
42 : | Loads B<Property>, B<HasProperty>. | ||
43 : | |||
44 : | =item BBH | ||
45 : | |||
46 : | Loads B<IsBidirectionalBestHitOf>. | ||
47 : | |||
48 : | parrello | 1.3 | =item Group |
49 : | |||
50 : | Loads B<GenomeGroups>. | ||
51 : | |||
52 : | =item Source | ||
53 : | |||
54 : | Loads B<Source>, B<ComesFrom>, B<SourceURL>. | ||
55 : | |||
56 : | parrello | 1.4 | =item External |
57 : | |||
58 : | Loads B<ExternalAliasOrg>, B<ExternalAliasFunc>. | ||
59 : | |||
60 : | parrello | 1.8 | =item Reaction |
61 : | |||
62 : | Loads B<ReactionURL>, B<Compound>, B<CompoundName>, | ||
63 : | parrello | 1.11 | B<CompoundCAS>, B<IsAComponentOf>, B<Reaction>. |
64 : | parrello | 1.8 | |
65 : | parrello | 1.3 | =item * |
66 : | |||
67 : | Loads all of the above tables. | ||
68 : | |||
69 : | parrello | 1.1 | =back |
70 : | |||
71 : | parrello | 1.7 | The command-line options are given below. |
72 : | parrello | 1.1 | |
73 : | =over 4 | ||
74 : | |||
75 : | =item geneFile | ||
76 : | |||
77 : | The name of the file containing the genomes and their associated access codes. The | ||
78 : | file should have one line per genome, each line consisting of the genome ID followed | ||
79 : | by the access code, separated by a tab. If no file is specified, all complete genomes | ||
80 : | will be processed and the access code will be 1. | ||
81 : | |||
82 : | =item subsysFile | ||
83 : | |||
84 : | The name of the file containing the trusted subsystems. The file should have one line | ||
85 : | per trusted subsystem. If no file is specified, all subsystems will be trusted. | ||
86 : | |||
87 : | =item trace | ||
88 : | |||
89 : | Desired tracing level. The default is 3. | ||
90 : | |||
91 : | parrello | 1.7 | =item limitedFeatures |
92 : | |||
93 : | Only generate the B<Feature> and B<IsLocatedIn> tables when processing the feature group. | ||
94 : | |||
95 : | parrello | 1.10 | =item dbLoad |
96 : | |||
97 : | If TRUE, the database tables will be loaded automatically from the load files created. | ||
98 : | |||
99 : | parrello | 1.1 | =back |
100 : | |||
101 : | parrello | 1.12 | =head2 Usage |
102 : | |||
103 : | To load all the Sprout tables and then validate the result, you need to issue three | ||
104 : | commands. | ||
105 : | |||
106 : | LoadSproutTables -dbLoad "*" | ||
107 : | TestSproutLoad | ||
108 : | index_sprout | ||
109 : | |||
110 : | All three commands send output to the console. In addition, C<LoadSproutTables> and | ||
111 : | C<TestSproutLoad> write tracing information to C<trace.log> in the FIG temporary | ||
112 : | directory (B<$FIG_Config::Tmp>). At the bottom of the log file will be a complete | ||
113 : | list of errors. If errors occur in C<LoadSproutTables>, then the data must be corrected | ||
114 : | and the offending table group reloaded. So, for example, if there are errors in the | ||
115 : | load of the B<MadeAnnotation> and B<Compound> tables, you would need to run | ||
116 : | |||
117 : | LoadSproutTables -dbLoad Annotation Reaction | ||
118 : | |||
119 : | because B<MadeAnnotation> is in the C<Annotation> group, and B<Compound> is in the | ||
120 : | C<Reaction> group. You can omit the C<dbLoad> option to create the load files without | ||
121 : | loading the database, and you can add a C<trace> option to change the trace level. | ||
122 : | The command below creates the Genome-related load files with a trace level of 3 and | ||
123 : | does not load them into the Sprout database. | ||
124 : | |||
125 : | LoadSproutTables -trace=3 Genome | ||
126 : | |||
127 : | C<LoadSproutTables> takes a long time to run, so setting the trace level to 3 helps | ||
128 : | to give you an idea of the progress. | ||
129 : | |||
130 : | Once the Sprout database is loaded, B<TestSproutLoad> can be used to verify the load | ||
131 : | against the FIG data. Again, the end of the C<trace.log> file will contain a summary | ||
132 : | of the errors found. Like C<LoadSproutTables>, C<TestSproutLoad> is a time-consuming | ||
133 : | script, so you may want to set the trace level to 3 to see visible progress. | ||
134 : | |||
135 : | TestSproutLoad -trace=3 | ||
136 : | |||
137 : | Unlike C<LoadSproutTables>, in C<TestSproutLoad>, the individual errors found are | ||
138 : | mixed in with the trace messages. They are all, however, marked with a trace type | ||
139 : | of B<Problem>, as shown in the fragment below. | ||
140 : | |||
141 : | 11/02/2005 19:15:16 <main>: Processing feature fig|100226.1.peg.7742. | ||
142 : | 11/02/2005 19:15:17 <main>: Processing feature fig|100226.1.peg.7741. | ||
143 : | 11/02/2005 19:15:17 <Problem>: assignment "Short-chain dehydrodenase ... | ||
144 : | 11/02/2005 19:15:17 <Problem>: assignment "putative oxidoreductase." ... | ||
145 : | 11/02/2005 19:15:17 <Problem>: Incorrect assignment for fig|100226.1.peg.7741... | ||
146 : | 11/02/2005 19:15:17 <Problem>: Incorrect number of annotations found in ... | ||
147 : | 11/02/2005 19:15:17 <main>: Processing feature fig|100226.1.peg.7740. | ||
148 : | 11/02/2005 19:15:18 <main>: Processing feature fig|100226.1.peg.7739. | ||
149 : | |||
150 : | The test may reveal that some tables need to be reloaded, or that a software | ||
151 : | problem has crept into the Sprout. | ||
152 : | |||
153 : | Once all the tables have the correct data, C<index_sprout> can be run to create the | ||
154 : | Glimpse indexes. | ||
155 : | |||
156 : | parrello | 1.1 | =cut |
157 : | |||
158 : | use strict; | ||
159 : | use Tracer; | ||
160 : | use DocUtils; | ||
161 : | use Cwd; | ||
162 : | use FIG; | ||
163 : | use SFXlate; | ||
164 : | use File::Copy; | ||
165 : | use File::Path; | ||
166 : | use SproutLoad; | ||
167 : | use Stats; | ||
168 : | parrello | 1.9 | use SFXlate; |
169 : | parrello | 1.1 | |
170 : | # Get the command-line parameters and options. | ||
171 : | my ($options, @parameters) = Tracer::ParseCommand({ geneFile => "", subsysFile => "", | ||
172 : | parrello | 1.10 | trace => 3, limitedFeatures => 0, |
173 : | dbLoad => 0 }, @ARGV); | ||
174 : | parrello | 1.1 | # Set up tracing. |
175 : | parrello | 1.4 | TSetup("$options->{trace} SproutLoad ERDBLoad ERDB Stats Tracer Load", "+>$FIG_Config::temp/trace.log"); |
176 : | parrello | 1.9 | # Create the sprout loader object. Note that the Sprout object does not |
177 : | parrello | 1.10 | # open the database unless the "dbLoad" option is turned on. |
178 : | parrello | 1.1 | my $fig = FIG->new(); |
179 : | parrello | 1.10 | my $sprout = SFXlate->new_sprout_only(undef, undef, undef, ! $options->{dbLoad}); |
180 : | parrello | 1.7 | my $spl = SproutLoad->new($sprout, $fig, $options->{geneFile}, $options->{subsysFile}, $options); |
181 : | parrello | 1.1 | # Process the parameters. |
182 : | for my $group (@parameters) { | ||
183 : | Trace("Processing load group $group.") if T(2); | ||
184 : | my $stats; | ||
185 : | parrello | 1.3 | if ($group eq 'Genome' || $group eq '*') { |
186 : | parrello | 1.1 | $spl->LoadGenomeData(); |
187 : | parrello | 1.3 | } |
188 : | if ($group eq 'Feature' || $group eq '*') { | ||
189 : | parrello | 1.1 | $spl->LoadFeatureData(); |
190 : | parrello | 1.3 | } |
191 : | if ($group eq 'Coupling' || $group eq '*') { | ||
192 : | parrello | 1.1 | $spl->LoadCouplingData(); |
193 : | parrello | 1.3 | } |
194 : | if ($group eq 'Subsystem' || $group eq '*') { | ||
195 : | parrello | 1.1 | $spl->LoadSubsystemData(); |
196 : | parrello | 1.3 | } |
197 : | if ($group eq 'Property' || $group eq '*') { | ||
198 : | parrello | 1.1 | $spl->LoadPropertyData(); |
199 : | parrello | 1.3 | } |
200 : | if ($group eq 'Annotation' || $group eq '*') { | ||
201 : | parrello | 1.2 | $spl->LoadAnnotationData(); |
202 : | parrello | 1.3 | } |
203 : | if ($group eq 'BBH' || $group eq '*') { | ||
204 : | parrello | 1.2 | $spl->LoadBBHData(); |
205 : | parrello | 1.1 | } |
206 : | parrello | 1.4 | if ($group eq 'Group' || $group eq '*') { |
207 : | parrello | 1.3 | $spl->LoadGroupData(); |
208 : | } | ||
209 : | if ($group eq 'Source' || $group eq '*') { | ||
210 : | $spl->LoadSourceData(); | ||
211 : | } | ||
212 : | parrello | 1.4 | if ($group eq 'External' || $group eq '*') { |
213 : | $spl->LoadExternalData(); | ||
214 : | } | ||
215 : | parrello | 1.8 | if ($group eq 'Reaction' || $group eq '*') { |
216 : | $spl->LoadReactionData(); | ||
217 : | } | ||
218 : | parrello | 1.3 | |
219 : | parrello | 1.1 | } |
220 : | Trace("Load complete.") if T(2); | ||
221 : | |||
222 : | 1; |
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |