[Bio] / Sprout / SubsystemSaplingLoader.pm Repository:
ViewVC logotype

Annotation of /Sprout/SubsystemSaplingLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     #
4 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
5 :     # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 :    
20 :     package SubsystemSaplingLoader;
21 :    
22 :     use strict;
23 :     use Tracer;
24 :     use ERDB;
25 :     use base 'BaseSaplingLoader';
26 :    
27 :     =head1 Sapling Subsystem Load Group Class
28 :    
29 :     =head2 Introduction
30 :    
31 :     The Subsystem Load Group includes all of the major subsystem-related tables.
32 :    
33 :     =head3 new
34 :    
35 :     my $sl = SubsystemSaplingLoader->new($erdb, $options, @tables);
36 :    
37 :     Construct a new SubsystemSaplingLoader object.
38 :    
39 :     =over 4
40 :    
41 :     =item erdb
42 :    
43 :     [[SaplingPm]] object for the database being loaded.
44 :    
45 :     =item options
46 :    
47 :     Reference to a hash of command-line options.
48 :    
49 :     =item tables
50 :    
51 :     List of tables in this load group.
52 :    
53 :     =back
54 :    
55 :     =cut
56 :    
57 :     sub new {
58 :     # Get the parameters.
59 :     my ($class, $erdb, $options) = @_;
60 :     # Create the table list.
61 :     my @tables = sort qw(Subsystem Describes Variant Includes Role IsClassFor
62 :     SubsystemClass IsSuperclassOf IsImplementedBy
63 :     MolecularMachine IsMachineOf MachineRole Uses
64 :     IsContainedIn IsRoleOf);
65 :     # Create the BaseSaplingLoader object.
66 :     my $retVal = BaseSaplingLoader::new($class, $erdb, $options, @tables);
67 :     # Return it.
68 :     return $retVal;
69 :     }
70 :    
71 :     =head2 Public Methods
72 :    
73 :     =head3 Generate
74 :    
75 :     $sl->Generate();
76 :    
77 :     Generate the data for the subsystem-related files.
78 :    
79 :     =cut
80 :    
81 :     sub Generate {
82 :     # Get the parameters.
83 :     my ($self) = @_;
84 :     # Get the database object.
85 :     my $erdb = $self->db();
86 :     # Get the source object.
87 :     my $fig = $self->source();
88 :     # Is this the global section?
89 :     if ($self->global()) {
90 :     # Yes, build the subsystem framework.
91 :     $self->GenerateSubsystems($fig, $erdb);
92 :     } else {
93 :     # Get the section ID.
94 :     my $genomeID = $self->section();
95 :     # Generate the subsystem date for this genome.
96 :     $self->GenerateSubsystemData($fig, $erdb, $genomeID);
97 :     }
98 :     }
99 :    
100 :     =head3 GenerateSubsystems
101 :    
102 :     $sl->GenerateSubsystems($fig, $erdb);
103 :    
104 :     Generate the subsystems, variants, and roles for this database. This
105 :     method concerns itself primarily with the genome-independent part of the
106 :     subsystem framework. This includes the following tables:
107 :    
108 :     Subsystem
109 :     Describes
110 :     Variant
111 :     Includes
112 :     Role
113 :     IsClassFor
114 :     SubsystemClass
115 :     IsSuperclassOf
116 :    
117 :     =over 4
118 :    
119 :     =item fig
120 :    
121 :     Source object from which the subsystem data will be extracted.
122 :    
123 :     =item erdb
124 :    
125 :     Database object for the Sapling database.
126 :    
127 :     =back
128 :    
129 :     =cut
130 :    
131 :     sub GenerateSubsystems {
132 :     # Get the parameters.
133 :     my ($self, $fig, $erdb) = @_;
134 :     # Get the subsystem hash for this Sapling instance. Its key list will be
135 :     # the list of subsystems to put in the database.
136 :     my $subHash = $erdb->SubsystemHash();
137 :     # We'll track the various subsystem classes in here.
138 :     my %subClassHash = ();
139 :     # Loop through the subsystems.
140 :     for my $subsystem (keys %$subHash) {
141 :     # Compute this subsystem's ID.
142 :     my $subsystemID = $erdb->SubsystemID($subsystem);
143 :     Trace("Processing subsystem $subsystemID ($subsystem).") if T(3);
144 :     # Get the FIG subsystem object.
145 :     my $ssData = $fig->get_subsystem($subsystem);
146 :     my ($subsystemName, $subID) = $self->AnalyzeSubsystemName($subsystem);
147 :     # Get the subsystem properties.
148 :     my $curator = $ssData->get_curator();
149 :     my $description = $ssData->get_description();
150 :     my $notes = $ssData->get_notes();
151 :     my $version = $ssData->get_version();
152 :     $self->PutE(Subsystem => $subID, curator => $curator,
153 :     description => $description, notes => $notes,
154 :     name => $subsystemName, version => $version);
155 :     # Get this subsystem's roles.
156 :     my @roles = $ssData->get_roles();
157 :     # This will track the column number for the role.
158 :     my $col = 0;
159 :     # Loop through the roles.
160 :     for my $role (@roles) {
161 :     # Compute this role's ID and type.
162 :     my ($roleName, $roleID) = $self->AnalyzeSubsystemName($role);
163 :     my $hypothetical = ($role =~ /hypothetical/i ? 1 : 0);
164 :     # Create its entity.
165 :     $self->PutE(Role => $roleID, hypothetical => $hypothetical,
166 :     name => $roleName);
167 :     # Connect it to the subsystem.
168 :     $self->PutR(Includes => $subID, $roleID,
169 :     abbreviation => $ssData->get_abbr_for_role($role),
170 :     sequence => $col++)
171 :     }
172 :     # Put the subsystem in its classes.
173 :     my $classes = $ssData->get_classification();
174 :     my $class = pop @$classes;
175 :     if (defined $class) {
176 :     # Create the class record.
177 :     $self->CreateClass($class);
178 :     # Connect it to the subsystem.
179 :     $self->PutR(IsClassFor => $class, $subID);
180 :     # Move up the hierarchy.
181 :     while (my $newClass = pop @$classes) {
182 :     $self->CreateClass($newClass);
183 :     $self->PutR(IsSuperclassOf => $newClass, $class);
184 :     $class = $newClass;
185 :     }
186 :     }
187 :     # Next come the variants. Variant data is sparse in the SEED. We
188 :     # start by getting all the known variant codes.
189 :     my %variants = map { $self->Starless($_) => '' } $ssData->get_variant_codes();
190 :     # -1 and 0 are always present.
191 :     $variants{'0'} = 'Subsystem functionality is incomplete.';
192 :     $variants{'-1'} = 'Subsystem is not functional.';
193 :     # Now get notes from any variants that have them.
194 :     my $variantHash = $ssData->get_variants();
195 :     for my $variant (keys %$variantHash) {
196 :     $variants{$variant} = $variantHash->{$variant};
197 :     }
198 :     # Create the variants.
199 :     for my $variant (keys %variants) {
200 :     # The variant key is the subsystem ID plus the variant code.
201 :     my $variantID = "$subID:$variant";
202 :     # At this time, the role rule is not available, so we only have
203 :     # the comment.
204 :     $self->PutE(Variant => $variantID, comment => $variants{$variant},
205 :     role_rule => '');
206 :     # Link the subsystem to the variant.
207 :     $self->PutR(Describes => $subID, $variantID);
208 :     }
209 :     }
210 :     }
211 :    
212 :     =head3 GenerateSubsystemData
213 :    
214 :     $sl->GenerateSubsystemData($fig, $erdb, $genomeID);
215 :    
216 :     Generate the molecular machines and subsystem spreadsheet cells for this
217 :     database. This method concerns itself primarily with the genome-dependent
218 :     part of the subsystem framework. This includes the following tables.
219 :    
220 :     IsImplementedBy
221 :     MolecularMachine
222 :     IsMachineOf
223 :     MachineRole
224 :     Uses
225 :     IsContainedIn
226 :     IsRoleOf
227 :    
228 :     =over 4
229 :    
230 :     =item fig
231 :    
232 :     Source object from which the subsystem data will be extracted.
233 :    
234 :     =item erdb
235 :    
236 :     Database object for the Sapling database.
237 :    
238 :     =item genomeID
239 :    
240 :     ID of the relevant genome.
241 :    
242 :     =back
243 :    
244 :     =cut
245 :    
246 :     sub GenerateSubsystemData {
247 :     # Get the parameters.
248 :     my ($self, $fig, $erdb, $genomeID) = @_;
249 :     # Get the subsystem hash for this Sapling instance. Its key list will be
250 :     # the list of subsystems being put in the database.
251 :     my $subHash = $erdb->SubsystemHash();
252 :     # Get the list of subsystems for this genome. The "1" indicates we want
253 :     # all of them, including the ones for 0 and -1 variants. Note we grep
254 :     # against the subsystem hash to exclude subsystems that are not flagged
255 :     # for Sapling.
256 :     my @subNames = grep { exists $subHash->{$_} }
257 :     $fig->subsystems_for_genome($genomeID, 1);
258 :     # Loop through the named subsystems. Each one corresponds to a molecular
259 :     # machine.
260 :     for my $subName (@subNames) {
261 :     $self->Track(MolecularMachines => $subName, 100);
262 :     # Compute the subsystem ID.
263 :     my (undef, $subID) = $self->AnalyzeSubsystemName($subName);
264 :     # Get the subsystem object.
265 :     my $ssData = $fig->get_subsystem($subName);
266 :     # Create the molecular machine. To do that, we need the variant code
267 :     # for this genome.
268 :     my $gidx = $ssData->get_genome_index($genomeID);
269 :     my $raw_variant_code = $ssData->get_variant_code($gidx);
270 :     # Check for a leading asterisk. This means the variant assignment is not
271 :     # curated.
272 :     my $curated = ($raw_variant_code =~ /^\s*\*/ ? 0 : 1);
273 :     # Clear any waste from the variant code.
274 :     my $variant_code = $self->Starless($raw_variant_code);
275 :     # Compute its type.
276 :     my $variant_type = ($variant_code == 0 ? 'incomplete' :
277 :     $variant_code < 0 ? 'vacant' : 'normal');
278 :     # Create the variant and machine IDs.
279 :     my $variantID = "$subID:$variant_code";
280 :     my $machineID = "$variantID:$genomeID";
281 :     # Create the molecular machine and connect it to the genome and
282 :     # subsystem.
283 :     $self->PutE(MolecularMachine => $machineID, type => $variant_type,
284 :     curated => $curated);
285 :     $self->PutR(IsImplementedBy => $variantID, $machineID);
286 :     $self->PutR(Uses => $genomeID, $machineID);
287 :     # Now we loop through the subsystem's roles, creating the MachineRoles.
288 :     # Molecular machines function as spreadsheet rows; machine roles are
289 :     # spreadsheet cells.
290 :     my @roles = $ssData->get_roles();
291 :     for my $role (@roles) {
292 :     # Get this role's abbreviation.
293 :     my $ridx = $ssData->get_role_index($role);
294 :     my $abbr = $ssData->get_role_abbr($ridx);
295 :     # Compute the role's ID.
296 :     my (undef, $roleID) = $self->AnalyzeSubsystemName($role);
297 :     # Create the machine-role ID.
298 :     my $machineRoleID = "$machineID:$abbr";
299 :     # Create the machine-role and connect it to the role and the
300 :     # machine.
301 :     $self->PutE(MachineRole => $machineRoleID);
302 :     $self->PutR(IsMachineOf => $machineID, $machineRoleID);
303 :     $self->PutR(IsRoleOf => $roleID, $machineRoleID);
304 :     # Now get a list of the features in this cell.
305 :     my @pegs = $ssData->get_pegs_from_cell($genomeID, $ridx);
306 :     # Connect them to the cell.
307 :     for my $peg (@pegs) {
308 :     $self->PutR(IsContainedIn => $peg, $machineRoleID);
309 :     }
310 :     }
311 :     }
312 :     }
313 :    
314 :     =head3 CreateClass
315 :    
316 :     $sl->CreateClass($className);
317 :    
318 :     Create a SubsystemClass record with the specified class name.
319 :    
320 :     =over 4
321 :    
322 :     =item className
323 :    
324 :     Name of the subsystem classification to create.
325 :    
326 :     =back
327 :    
328 :     =cut
329 :    
330 :     sub CreateClass {
331 :     # Get the parameters.
332 :     my ($self, $className) = @_;
333 :     # Create the subsystem class record.
334 :     $self->PutE(SubsystemClass => $className);
335 :     }
336 :    
337 :    
338 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3