[Bio] / Sprout / SubsystemSaplingLoader.pm Repository:
ViewVC logotype

Annotation of /Sprout/SubsystemSaplingLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     #
4 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
5 :     # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 :    
20 :     package SubsystemSaplingLoader;
21 :    
22 :     use strict;
23 :     use Tracer;
24 :     use ERDB;
25 :     use base 'BaseSaplingLoader';
26 :    
27 :     =head1 Sapling Subsystem Load Group Class
28 :    
29 :     =head2 Introduction
30 :    
31 :     The Subsystem Load Group includes all of the major subsystem-related tables.
32 :    
33 :     =head3 new
34 :    
35 :     my $sl = SubsystemSaplingLoader->new($erdb, $options, @tables);
36 :    
37 :     Construct a new SubsystemSaplingLoader object.
38 :    
39 :     =over 4
40 :    
41 :     =item erdb
42 :    
43 :     [[SaplingPm]] object for the database being loaded.
44 :    
45 :     =item options
46 :    
47 :     Reference to a hash of command-line options.
48 :    
49 :     =item tables
50 :    
51 :     List of tables in this load group.
52 :    
53 :     =back
54 :    
55 :     =cut
56 :    
57 :     sub new {
58 :     # Get the parameters.
59 :     my ($class, $erdb, $options) = @_;
60 :     # Create the table list.
61 : parrello 1.3 my @tables = sort qw(Subsystem IsClassFor SubsystemClass IsSuperclassOf Includes
62 :     Describes Role Variant IsRoleOf IsImplementedBy MachineRole
63 :     IsMachineOf MolecularMachine IsContainedIn Uses);
64 : parrello 1.1 # Create the BaseSaplingLoader object.
65 :     my $retVal = BaseSaplingLoader::new($class, $erdb, $options, @tables);
66 :     # Return it.
67 :     return $retVal;
68 :     }
69 :    
70 :     =head2 Public Methods
71 :    
72 :     =head3 Generate
73 :    
74 :     $sl->Generate();
75 :    
76 :     Generate the data for the subsystem-related files.
77 :    
78 :     =cut
79 :    
80 :     sub Generate {
81 :     # Get the parameters.
82 :     my ($self) = @_;
83 :     # Get the database object.
84 :     my $erdb = $self->db();
85 :     # Get the source object.
86 :     my $fig = $self->source();
87 :     # Is this the global section?
88 :     if ($self->global()) {
89 :     # Yes, build the subsystem framework.
90 :     $self->GenerateSubsystems($fig, $erdb);
91 :     } else {
92 :     # Get the section ID.
93 :     my $genomeID = $self->section();
94 :     # Generate the subsystem date for this genome.
95 :     $self->GenerateSubsystemData($fig, $erdb, $genomeID);
96 :     }
97 :     }
98 :    
99 :     =head3 GenerateSubsystems
100 :    
101 :     $sl->GenerateSubsystems($fig, $erdb);
102 :    
103 :     Generate the subsystems, variants, and roles for this database. This
104 :     method concerns itself primarily with the genome-independent part of the
105 :     subsystem framework. This includes the following tables:
106 :    
107 :     Subsystem
108 :     Describes
109 :     Variant
110 :     Includes
111 :     Role
112 :     IsClassFor
113 :     SubsystemClass
114 :     IsSuperclassOf
115 :    
116 :     =over 4
117 :    
118 :     =item fig
119 :    
120 :     Source object from which the subsystem data will be extracted.
121 :    
122 :     =item erdb
123 :    
124 :     Database object for the Sapling database.
125 :    
126 :     =back
127 :    
128 :     =cut
129 :    
130 :     sub GenerateSubsystems {
131 :     # Get the parameters.
132 :     my ($self, $fig, $erdb) = @_;
133 :     # Get the subsystem hash for this Sapling instance. Its key list will be
134 :     # the list of subsystems to put in the database.
135 :     my $subHash = $erdb->SubsystemHash();
136 :     # We'll track the various subsystem classes in here.
137 :     my %subClassHash = ();
138 :     # Loop through the subsystems.
139 :     for my $subsystem (keys %$subHash) {
140 : parrello 1.3 Trace("Processing subsystem $subsystem.") if T(ERDBLoadGroup => 3);
141 : parrello 1.1 # Get the FIG subsystem object.
142 :     my $ssData = $fig->get_subsystem($subsystem);
143 :     # Get the subsystem properties.
144 :     my $curator = $ssData->get_curator();
145 :     my $description = $ssData->get_description();
146 :     my $notes = $ssData->get_notes();
147 :     my $version = $ssData->get_version();
148 : parrello 1.3 $self->PutE(Subsystem => $subsystem, curator => $curator,
149 : parrello 1.1 description => $description, notes => $notes,
150 : parrello 1.3 version => $version);
151 : parrello 1.1 # Get this subsystem's roles.
152 :     my @roles = $ssData->get_roles();
153 :     # This will track the column number for the role.
154 :     my $col = 0;
155 :     # Loop through the roles.
156 :     for my $role (@roles) {
157 : parrello 1.3 # Compute this role's type.
158 : parrello 1.1 my $hypothetical = ($role =~ /hypothetical/i ? 1 : 0);
159 :     # Create its entity.
160 : parrello 1.3 $self->PutE(Role => $role, hypothetical => $hypothetical);
161 : parrello 1.1 # Connect it to the subsystem.
162 : parrello 1.3 $self->PutR(Includes => $subsystem, $role,
163 : parrello 1.1 abbreviation => $ssData->get_abbr_for_role($role),
164 :     sequence => $col++)
165 :     }
166 :     # Put the subsystem in its classes.
167 :     my $classes = $ssData->get_classification();
168 :     my $class = pop @$classes;
169 :     if (defined $class) {
170 :     # Create the class record.
171 :     $self->CreateClass($class);
172 :     # Connect it to the subsystem.
173 : parrello 1.3 $self->PutR(IsClassFor => $class, $subsystem);
174 : parrello 1.1 # Move up the hierarchy.
175 :     while (my $newClass = pop @$classes) {
176 :     $self->CreateClass($newClass);
177 :     $self->PutR(IsSuperclassOf => $newClass, $class);
178 :     $class = $newClass;
179 :     }
180 :     }
181 :     # Next come the variants. Variant data is sparse in the SEED. We
182 :     # start by getting all the known variant codes.
183 : parrello 1.2 my %variants = map { BaseSaplingLoader::Starless($_) => '' } $ssData->get_variant_codes();
184 : parrello 1.1 # -1 and 0 are always present.
185 :     $variants{'0'} = 'Subsystem functionality is incomplete.';
186 :     $variants{'-1'} = 'Subsystem is not functional.';
187 : parrello 1.2 # Now get notes from any variants that have them. Note that we need
188 :     # to clean up the variant code with a call to Starless.
189 : parrello 1.1 my $variantHash = $ssData->get_variants();
190 :     for my $variant (keys %$variantHash) {
191 : parrello 1.2 my $realVariantID = BaseSaplingLoader::Starless($variant);
192 :     $variants{$realVariantID} = $variantHash->{$variant};
193 : parrello 1.1 }
194 :     # Create the variants.
195 :     for my $variant (keys %variants) {
196 :     # The variant key is the subsystem ID plus the variant code.
197 : parrello 1.3 my $variantID = "$subsystem:$variant";
198 : parrello 1.1 # At this time, the role rule is not available, so we only have
199 :     # the comment.
200 :     $self->PutE(Variant => $variantID, comment => $variants{$variant},
201 :     role_rule => '');
202 :     # Link the subsystem to the variant.
203 : parrello 1.3 $self->PutR(Describes => $subsystem, $variantID);
204 : parrello 1.1 }
205 : parrello 1.4 # Clear the subsystem cache to keep memory under control.
206 :     $fig->clear_subsystem_cache();
207 : parrello 1.1 }
208 :     }
209 :    
210 :     =head3 GenerateSubsystemData
211 :    
212 :     $sl->GenerateSubsystemData($fig, $erdb, $genomeID);
213 :    
214 :     Generate the molecular machines and subsystem spreadsheet cells for this
215 :     database. This method concerns itself primarily with the genome-dependent
216 :     part of the subsystem framework. This includes the following tables.
217 :    
218 :     IsImplementedBy
219 :     MolecularMachine
220 :     IsMachineOf
221 :     MachineRole
222 :     Uses
223 :     IsContainedIn
224 :     IsRoleOf
225 :    
226 :     =over 4
227 :    
228 :     =item fig
229 :    
230 :     Source object from which the subsystem data will be extracted.
231 :    
232 :     =item erdb
233 :    
234 :     Database object for the Sapling database.
235 :    
236 :     =item genomeID
237 :    
238 :     ID of the relevant genome.
239 :    
240 :     =back
241 :    
242 :     =cut
243 :    
244 :     sub GenerateSubsystemData {
245 :     # Get the parameters.
246 :     my ($self, $fig, $erdb, $genomeID) = @_;
247 :     # Get the subsystem hash for this Sapling instance. Its key list will be
248 :     # the list of subsystems being put in the database.
249 :     my $subHash = $erdb->SubsystemHash();
250 :     # Get the list of subsystems for this genome. The "1" indicates we want
251 :     # all of them, including the ones for 0 and -1 variants. Note we grep
252 :     # against the subsystem hash to exclude subsystems that are not flagged
253 :     # for Sapling.
254 :     my @subNames = grep { exists $subHash->{$_} }
255 :     $fig->subsystems_for_genome($genomeID, 1);
256 :     # Loop through the named subsystems. Each one corresponds to a molecular
257 :     # machine.
258 :     for my $subName (@subNames) {
259 :     $self->Track(MolecularMachines => $subName, 100);
260 :     # Get the subsystem object.
261 :     my $ssData = $fig->get_subsystem($subName);
262 : parrello 1.3 # Now we find the molecular machines for this subsystem/genome pair.
263 :     my @rows = $ssData->get_genomes();
264 :     for (my $gidx = 0; $gidx <= $#rows; $gidx++) {
265 :     my ($rowGenome, $regionString) = split /:/, $rows[$gidx], 2;
266 :     if ($rowGenome eq $genomeID) {
267 :     # Here we're positioned on a row for our genome. If it is
268 :     # a region-restricted molecular machine, then the region
269 :     # string will be defined. If it's global, we use an empty
270 :     # string for the region.
271 :     $regionString ||= "";
272 :     # Create the molecular machine. To do that, we need the variant code
273 :     # for this genome.
274 :     my $raw_variant_code = $ssData->get_variant_code($gidx);
275 :     # Check for a leading asterisk. This means the variant assignment is not
276 :     # curated.
277 :     my $curated = ($raw_variant_code =~ /^\s*\*/ ? 0 : 1);
278 :     # Clear any waste from the variant code.
279 :     my $variant_code = BaseSaplingLoader::Starless($raw_variant_code);
280 :     # Compute its type.
281 :     my $variant_type = ($variant_code =~ /^0/ ? 'incomplete' :
282 :     $variant_code =~ /^-/ ? 'vacant' : 'normal');
283 :     # Create the variant and machine IDs.
284 :     my $variantID = "$subName:$variant_code";
285 :     my $machineID = ERDB::DigestKey("$variantID:$genomeID:$regionString");
286 :     # Create the molecular machine and connect it to the genome and
287 :     # subsystem.
288 :     $self->PutE(MolecularMachine => $machineID, type => $variant_type,
289 :     curated => $curated, region => $regionString);
290 :     $self->PutR(IsImplementedBy => $variantID, $machineID);
291 :     $self->PutR(Uses => $genomeID, $machineID);
292 :     # Now we loop through the subsystem's roles, creating the MachineRoles.
293 :     # Molecular machines function as spreadsheet rows; machine roles are
294 :     # spreadsheet cells.
295 :     my @roles = $ssData->get_roles();
296 :     for my $role (@roles) {
297 :     # Get this role's abbreviation.
298 :     my $ridx = $ssData->get_role_index($role);
299 :     my $abbr = $ssData->get_role_abbr($ridx);
300 :     # Create the machine-role ID.
301 :     my $machineRoleID = "$machineID:$abbr";
302 :     # Create the machine-role and connect it to the role and the
303 :     # machine.
304 :     $self->PutE(MachineRole => $machineRoleID);
305 :     $self->PutR(IsMachineOf => $machineID, $machineRoleID);
306 :     $self->PutR(IsRoleOf => $role, $machineRoleID);
307 :     # Now get a list of the features in this cell.
308 :     my @pegs = $ssData->get_pegs_from_cell($genomeID, $ridx);
309 :     # Connect them to the cell.
310 :     for my $peg (@pegs) {
311 :     $self->PutR(IsContainedIn => $peg, $machineRoleID);
312 :     }
313 :     }
314 : parrello 1.1 }
315 :     }
316 : parrello 1.4 # Clear the subsystem cache to save space.
317 :     $fig->clear_subsystem_cache();
318 : parrello 1.1 }
319 :     }
320 :    
321 :     =head3 CreateClass
322 :    
323 :     $sl->CreateClass($className);
324 :    
325 :     Create a SubsystemClass record with the specified class name.
326 :    
327 :     =over 4
328 :    
329 :     =item className
330 :    
331 :     Name of the subsystem classification to create.
332 :    
333 :     =back
334 :    
335 :     =cut
336 :    
337 :     sub CreateClass {
338 :     # Get the parameters.
339 :     my ($self, $className) = @_;
340 :     # Create the subsystem class record.
341 :     $self->PutE(SubsystemClass => $className);
342 :     }
343 :    
344 :    
345 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3