[Bio] / Sprout / SproutSubsys.pm Repository:
ViewVC logotype

Annotation of /Sprout/SproutSubsys.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     package SproutSubsys;
4 :    
5 :     use strict;
6 :     use Tracer;
7 :     use PageBuilder;
8 :     use FIG;
9 :     use Sprout;
10 :    
11 :     =head1 Sprout Subsystem Object
12 :    
13 :     =head2 Introduction
14 :    
15 :     This object emulates the capabilities of the FIG-style C<Subsystem> object, but
16 :     uses Sprout methods to retrieve the data. This object can be dropped in place of
17 :     the UnvSubsys object to create subsystem displays for the Sprout rather than the
18 :     SEED.
19 :    
20 :     The structure created by the constructor contains the following data members.
21 :    
22 :     =over 4
23 :    
24 :     =item name
25 :    
26 :     Name of the subsystem. This is needed for any further database accesses required.
27 :    
28 :     =item curator
29 :    
30 :     Name of the subsystem's official curator.
31 :    
32 :     =item notes
33 :    
34 :     General notes about the subsystem.
35 :    
36 :     =item sprout
37 :    
38 :     Sprout object for accessing the database. This is a genuine Sprout object, not
39 :     an SFXlate object.
40 :    
41 :     =item genomeHash
42 :    
43 :     Map of genome IDs to row indices.
44 :    
45 :     =item genomes
46 :    
47 :     List of [genomeID, variantCode] tuples in row order.
48 :    
49 :     =item roleHash
50 :    
51 :     Map of role IDs and abbreviations to column indices. In other words, plugging
52 :     either a full-blown role ID or its abbreviation into this hash will return
53 :     the role's column index.
54 :    
55 :     =item roles
56 :    
57 :     List of [roleID, abbreviation] tuples in column order.
58 :    
59 :     =item reactionHash
60 :    
61 :     Map of role IDs to a list of the reactions catalyzed by the role.
62 :    
63 :     =item colorHash
64 :    
65 :     Map of PEG IDs to cluster numbers. This is used to create color maps for
66 :     display of a subsystem's PEGs.
67 :    
68 :     =back
69 :    
70 :     =cut
71 :    
72 :     #: Constructor SproutSubsys->new();
73 :    
74 :     =head2 Public Methods
75 :    
76 :     =head3 new
77 :    
78 :     C<< my $sub = Subsystem->new($subName, $sprout); >>
79 :    
80 :     Load the subsystem.
81 :    
82 :     =over 4
83 :    
84 :     =item subName
85 :    
86 :     Name of the desired subsystem.
87 :    
88 :     =item sprout
89 :    
90 :     Sprout or SFXlate object for accessing the Sprout data store.
91 :    
92 :     =back
93 :    
94 :     =cut
95 :    
96 :     sub new {
97 :     # Get the parameters.
98 :     my ($class, $subName, $sprout) = @_;
99 :     # Insure we have a Sprout object.
100 :     if (ref $sprout eq 'SFXlate') {
101 :     $sprout = $sprout->{sprout};
102 :     }
103 :     # Declare the return value.
104 :     my $retVal;
105 :     # Get the subsystem's data fields.
106 :     my ($curator, $notes) = $sprout->GetEntityValues('Subsystem', $subName, ['Subsystem(curator)',
107 :     'Subsystem(notes)']);
108 :     # Only proceed if we found the subsystem.
109 :     if (defined $curator) {
110 :     # Get the genome IDs and variant codes for the rows. The list returned
111 :     # by GetAll will be a list of 2-tuples, each consisting of a genome ID
112 :     # and a subsystem variant code.
113 :     my @genomes = $sprout->GetAll(['ParticipatesIn'],
114 :     'ParticipatesIn(to-link) = ? ORDER BY ParticipatesIn(variant-code), ParticipatesIn(from-link)',
115 :     [$subName], ['ParticipatesIn(from-link)',
116 :     'ParticipatesIn(variant-code)']);
117 :     # Create the genome ID directory. This is a hash that maps a genome ID to its
118 :     # row index.
119 :     my $idx = 0;
120 :     my %genomeHash = map { $_->[0] => $idx++ } @genomes;
121 :     # Get the role IDs and abbreviations. The list returned by GetAll will be
122 :     # a list of 2-tuples, each consisting of a role ID and abbreviation. The
123 :     # 2-tuples will be ordered by the spreadsheet column number.
124 :     my @roles = $sprout->GetAll(['OccursInSubsystem', 'Role'],
125 :     'OccursInSubsystem(to-link) = ? ORDER BY OccursInSubsystem(column-number)',
126 :     [$subName], ['OccursInSubsystem(from-link)', 'Role(abbr)']);
127 :     # Now we need to create the role ID directory and the reaction hash.
128 :     # The role ID directory maps role IDs and their abbreviations to column numbers.
129 :     # The reaction hash maps a role ID to a list of the IDs for the reactions it
130 :     # catalyzes.
131 :     my %roleHash = ();
132 :     my %reactionHash = ();
133 :     for ($idx = 0; $idx <= $#roles; $idx++) {
134 :     # Get the role ID and abbreviation for this column's role.
135 :     my ($roleID, $abbr) = @{$roles[$idx]};
136 :     # Put them both in the role directory.
137 :     $roleHash{$roleID} = $idx;
138 :     $roleHash{$abbr} = $idx;
139 :     # Get this role's reactions.
140 :     my @reactions = $sprout->GetFlat(['Catalyzes'], 'Catalyzes(from-link) = ?',
141 :     [$roleID], 'Catalyzes(to-link)');
142 :     # Put them in the reaction hash.
143 :     $reactionHash{$roleID} = \@reactions;
144 :     }
145 :     # Create the subsystem object.
146 :     $retVal = {
147 :     # Name of the subsystem. This is needed for any further database
148 :     # accesses required.
149 :     name => $subName,
150 :     # Name of the subsystem's official curator.
151 :     curator => $curator,
152 :     # General notes about the subsystem.
153 :     notes => $notes,
154 :     # Sprout object for accessing the database.
155 :     sprout => $sprout,
156 :     # Map of genome IDs to row indices.
157 :     genomeHash => \%genomeHash,
158 :     # List of [genomeID, variantCode] tuples in row order.
159 :     genomes => \@genomes,
160 :     # Map of role IDs and abbreviations to column indices.
161 :     roleHash => \%roleHash,
162 :     # List of [roleID, abbreviation] tuples in column order.
163 :     roles => \@roles,
164 :     # Map of PEG IDs to cluster numbers.
165 :     colorHash => {},
166 :     };
167 :     # Bless and return it.
168 :     bless $retVal, $class;
169 :     }
170 :     return $retVal;
171 :     }
172 :    
173 :     =head3 get_genomes
174 :    
175 :     C<< my @genomeList = $sub->get_genomes(); >>
176 :    
177 :     Return a list of the genome IDs for this subsystem. Each genome corresponds to a row
178 :     in the subsystem spreadsheet. Indexing into this list returns the ID of the genome
179 :     in the specified row.
180 :    
181 :     =cut
182 :    
183 :     sub get_genomes {
184 :     # Get the parameters.
185 :     my ($self) = @_;
186 :     # Return a list of the genome IDs. The "genomes" member contains a 2-tuple
187 :     # with the genome ID followed by the variant code. We only return the
188 :     # genome IDs.
189 :     my @retVal = map { $_->[0] } @{$self->{genomes}};
190 :     return @retVal;
191 :     }
192 :    
193 :     =head3 get_variant_code
194 :    
195 :     C<< my $code = $sub->get_variant_code($gidx); >>
196 :    
197 :     Return the variant code for the specified genome. Each subsystem has multiple
198 :     variants which involve slightly different chemical reactions, and each variant
199 :     has an associated variant code. When a genome is connected to the spreadsheet,
200 :     the subsystem variant used by the genome must be specified.
201 :    
202 :     =over 4
203 :    
204 :     =item gidx
205 :    
206 :     Row index for the genome whose variant code is desired.
207 :    
208 :     =item RETURN
209 :    
210 :     Returns the variant code for the specified genome.
211 :    
212 :     =back
213 :    
214 :     =cut
215 :    
216 :     sub get_variant_code {
217 :     # Get the parameters.
218 :     my ($self, $gidx) = @_;
219 :     # Extract the variant code for the specified row index. It is the second
220 :     # element of the tuple from the "genomes" member.
221 :     my $retVal = $self->{genomes}->{$gidx}->[1];
222 :     return $retVal;
223 :     }
224 :    
225 :     =head3 get_curator
226 :    
227 :     C<< my $userName = $sub->get_curator(); >>
228 :    
229 :     Return the name of this subsystem's official curator.
230 :    
231 :     =cut
232 :    
233 :     sub get_curator {
234 :     # Get the parameters.
235 :     my ($self) = @_;
236 :     # Return the curator member.
237 :     return $self->{curator};
238 :     }
239 :    
240 :     =head3 get_notes
241 :    
242 :     C<< my $text = $sub->get_notes(); >>
243 :    
244 :     Return the descriptive notes for this subsystem.
245 :    
246 :     =cut
247 :    
248 :     sub get_notes {
249 :     # Get the parameters.
250 :     my ($self) = @_;
251 :     # Return the notes member.
252 :     return $self->{notes};
253 :     }
254 :    
255 :     =head3 get_roles
256 :    
257 :     C<< my @roles = $sub->get_roles(); >>
258 :    
259 :     Return a list of the subsystem's roles. Each role corresponds to a column
260 :     in the subsystem spreadsheet. The list entry at a specified position in
261 :     the list will contain the ID of that column's role.
262 :    
263 :     =cut
264 :    
265 :     sub get_roles {
266 :     # Get the parameters.
267 :     my ($self) = @_;
268 :     # Return the list of role IDs. The role IDs are stored as the first
269 :     # element of each 2-tuple in the "roles" member.
270 :     my @retVal = map { $_->[0] } @{$self->{roles}};
271 :     return @retVal;
272 :     }
273 :    
274 :     =head3 get_reactions
275 :    
276 :     C<< my $reactHash = $sub->get_reactions(); >>
277 :    
278 :     Return a reference to a hash that maps each role ID to a list of the reactions
279 :     catalyzed by the role.
280 :    
281 :     =cut
282 :    
283 :     sub get_reactions {
284 :     # Get the parameters.
285 :     my ($self) = @_;
286 :     # Return the reaction hash member.
287 :     return $self->{reactionHash};
288 :     }
289 :    
290 :     =head3 get_subset_namesC
291 :    
292 :     C<< my @subsetNames = $sub->get_subset_namesC(); >>
293 :    
294 :     Return a list of the names for all the column (role) subsets. Given a subset
295 :     name, you can use the L</get_subsetC_roles> method to get the roles in the
296 :     subset.
297 :    
298 :     =cut
299 :    
300 :     sub get_subset_namesC {
301 :     # Get the parameters.
302 :     my ($self) = @_;
303 :     # Get the sprout object and use it to retrieve the subset names.
304 :     my $sprout = $self->{sprout};
305 :     my @subsets = $sprout->GetFlat(['HasRoleSubset'], 'HasRoleSubset(from-link) = ?',
306 :     [$self->{name}], 'HasRoleSubset(to-link)');
307 :     # The sprout subset names are prefixed by the subsystem name. We need to pull the
308 :     # prefix off before we return the results. The prefixing character is a colon (:),
309 :     # so we search for the last colon to get ourselves the true subset name.
310 :     my @retVal = map { $_ =~ /:([^:]+)$/; $1 } @subsets;
311 :     return @retVal;
312 :     }
313 :    
314 :     =head3 get_role_abbr
315 :    
316 :     C<< my $abbr = $sub->get_role_abbr($ridx); >>
317 :    
318 :     Return the abbreviation for the role in the specified column. The abbreviation
319 :     is a shortened identifier that is not necessarily unique, but is more likely to
320 :     fit in a column heading.
321 :    
322 :     =over 4
323 :    
324 :     =item ridx
325 :    
326 :     Column index for the role whose abbreviation is desired.
327 :    
328 :     =item RETURN
329 :    
330 :     Returns an abbreviated name for the role corresponding to the indexed column.
331 :    
332 :     =back
333 :    
334 :     =cut
335 :    
336 :     sub get_role_abbr {
337 :     # Get the parameters.
338 :     my ($self, $ridx) = @_;
339 :     # Return the role abbreviation. The abbreviation is the second element
340 :     # in the 2-tuple for the specified column in the "roles" member.
341 :     my $retVal = $self->{roles}->[$ridx]->[1];
342 :     return $retVal;
343 :     }
344 :    
345 :     =head3 get_role_index
346 :    
347 :     C<< my $idx = $sub->get_role_index($role); >>
348 :    
349 :     Return the column index for the role with the specified ID.
350 :    
351 :     =over 4
352 :    
353 :     =item role
354 :    
355 :     ID (full name) or abbreviation of the role whose column index is desired.
356 :    
357 :     =item RETURN
358 :    
359 :     Returns the column index for the role with the specified name or abbreviation.
360 :    
361 :     =back
362 :    
363 :     =cut
364 :    
365 :     sub get_role_index {
366 :     # Get the parameters.
367 :     my ($self, $role) = @_;
368 :     # The role index is directly available from the "roleHash" member.
369 :     my $retVal = $self->{roleHash}->{$role};
370 :     return $retVal;
371 :     }
372 :    
373 :     =head3 get_subsetC_roles
374 :    
375 :     C<< my @roles = $sub->get_subsetC_roles($subname); >>
376 :    
377 :     Return the names of the roles contained in the specified role (column) subset.
378 :    
379 :     =over 4
380 :    
381 :     =item subname
382 :    
383 :     Name of the role subset whose roles are desired.
384 :    
385 :     =item RETURN
386 :    
387 :     Returns a list of the role names for the columns in the named subset.
388 :    
389 :     =back
390 :    
391 :     =cut
392 :    
393 :     sub get_subsetC_roles {
394 :     # Get the parameters.
395 :     my ($self, $subname) = @_;
396 :     # Get the sprout object. We need it to be able to get the subset data.
397 :     my $sprout = $self->{sprout};
398 :     # Convert the subset name to Sprout format. In Sprout, the subset name is
399 :     # prefixed by the subsystem name in order to get a unique subset ID.
400 :     my $subsetID = $self->{name} . ":$subname";
401 :     # Get a list of the role names for this subset.
402 :     my @roleNames = $sprout->GetFlat(['ConsistsOfRoles'], 'ConsistsOfRoles(from-link) = ?',
403 :     [$subsetID], 'ConsistsOfRoles(to-link)');
404 :     # Sort them by column number. We get the column number from the role hash.
405 :     my $roleHash = $self->{roleHash};
406 :     my @retVal = sort { $roleHash->{$a} <=> $roleHash->{$b} } @roleNames;
407 :     # Return the sorted list.
408 :     return @retVal;
409 :     }
410 :    
411 :     =head3 get_genome_index
412 :    
413 :     C<< my $idx = $sub->get_genome_index($genome); >>
414 :    
415 :     Return the row index for the genome with the specified ID.
416 :    
417 :     =over 4
418 :    
419 :     =item genome
420 :    
421 :     ID of the genome whose row index is desired.
422 :    
423 :     =item RETURN
424 :    
425 :     Returns the row index for the genome with the specified ID, or an undefined
426 :     value if the genome does not participate in the subsystem.
427 :    
428 :     =back
429 :    
430 :     =cut
431 :    
432 :     sub get_genome_index {
433 :     # Get the parameters.
434 :     my ($self, $genome) = @_;
435 :     # Get the genome row index from the "genomeHash" member.
436 :     my $retVal = $self->{genomeHash}->{$genome};
437 :     return $retVal;
438 :     }
439 :    
440 :     =head3 get_cluster_number
441 :    
442 :     C<< my $number = $sub->get_cluster_number($pegID); >>
443 :    
444 :     Return the cluster number for the specified PEG, or C<-1> if the
445 :     cluster number for the PEG is unknown or it is not clustered.
446 :    
447 :     The cluster number is read into the color hash by the
448 :     L</get_pegs_from_cell> method. If the incoming PEG IDs do not
449 :     come from the most recent cell retrievals, the information returned
450 :     will be invalid. This is a serious design flaw which needs to be
451 :     fixed soon.
452 :    
453 :     =over 4
454 :    
455 :     =item pegID
456 :    
457 :     ID of the PEG whose cluster number is desired.
458 :     TODO: items
459 :    
460 :     =back
461 :    
462 :     =cut
463 :     #: Return Type $;
464 :     sub get_cluster_number {
465 :     # Get the parameters.
466 :     my ($self, $pegID) = @_;
467 :     # Declare the return variable.
468 :     my $retVal = -1;
469 :     # Check for a cluster number in the color hash.
470 :     if (exists $self->{colorHash}->{$pegID}) {
471 :     $retVal = $self->{colorHash}->{$pegID};
472 :     }
473 :     # Return the result.
474 :     return $retVal;
475 :     }
476 :    
477 :     =head3 get_pegs_from_cell
478 :    
479 :     C<< my @pegs = $sub->get_pegs_from_cell($rowstr, $colstr); >>
480 :    
481 :     Return a list of the peg IDs for the features in the specified spreadsheet cell.
482 :    
483 :     =over 4
484 :    
485 :     =item rowstr
486 :    
487 :     Genome row, specified either as a row index or a genome ID.
488 :    
489 :     =item colstr
490 :    
491 :     Role column, specified either as a column index, a role name, or a role
492 :     abbreviation.
493 :    
494 :     =item RETURN
495 :    
496 :     Returns a list of PEG IDs. The PEGs in the list belong to the genome in the
497 :     specified row and perform the role in the specified column. If the indicated
498 :     row and column does not exist, returns an empty list.
499 :    
500 :     =back
501 :    
502 :     =cut
503 :    
504 :     sub get_pegs_from_cell {
505 :     # Get the parameters.
506 :     my ($self, $rowstr, $colstr) = @_;
507 :     # Get the sprout object for accessing the database.
508 :     my $sprout = $self->{sprout};
509 :     # We need to convert the incoming row and column identifiers. We need a
510 :     # numeric column index and a character genome ID to create the ID for the
511 :     # subsystem spreadsheet cell. First, the column index: note that our version
512 :     # of "get_role_index" conveniently works for both abbreviations and full role IDs.
513 :     my $colIdx = ($colstr =~ /^(\d+)$/ ? $colstr : $self->get_role_index($colstr));
514 :     # Next the genome ID. In this case, we convert any number we find to a string.
515 :     # This requires a little care to avoid a run-time error if the row number is
516 :     # out of range.
517 :     my $genomeID = $rowstr;
518 :     if ($rowstr =~ /^(\d+)$/) {
519 :     # Here we need to convert the row number to an ID. Insure the number is in
520 :     # range. Note that if we do have a row number out of range, the genome ID
521 :     # will be invalid, and our attempt to read from the database will return an
522 :     # empty list.
523 :     my $genomeList = $self->{genomes};
524 :     if ($rowstr >= 0 && $rowstr < @{$genomeList}) {
525 :     $genomeID = $genomeList->[$rowstr]->[0];
526 :     }
527 :     }
528 :     # Construct the spreadsheet cell ID from the information we have.
529 :     my $cellID = $self->{name} . ":$genomeID:$colIdx";
530 :     # Get the list of PEG IDs and cluster numbers for the indicated cell.
531 :     my @pegList = $sprout->GetAll(['ContainsFeature'], 'ContainsFeature(from-link) = ?',
532 :     [$cellID], ['ContainsFeature(to-link)',
533 :     'ContainsFeature(cluster-number)']);
534 :     # Copy the pegs into the return list, and save the cluster numbers in the color hash.
535 :     my @retVal = ();
536 :     for my $pegEntry (@pegList) {
537 :     my ($peg, $cluster) = @{$pegEntry};
538 :     $self->{colorHash}->{$peg} = $cluster;
539 :     push @retVal, $peg;
540 :     }
541 :     # Return the list. If the spreadsheet cell was empty or non-existent, we'll end
542 :     # up returning an empty list.
543 :     return @retVal;
544 :     }
545 :    
546 :     1;
547 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3