[Bio] / Sprout / SproutSubsys.pm Repository:
ViewVC logotype

Annotation of /Sprout/SproutSubsys.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     package SproutSubsys;
4 :    
5 :     use strict;
6 :     use Tracer;
7 :     use PageBuilder;
8 :     use FIG;
9 :     use Sprout;
10 :    
11 :     =head1 Sprout Subsystem Object
12 :    
13 :     =head2 Introduction
14 :    
15 :     This object emulates the capabilities of the FIG-style C<Subsystem> object, but
16 :     uses Sprout methods to retrieve the data. This object can be dropped in place of
17 :     the UnvSubsys object to create subsystem displays for the Sprout rather than the
18 :     SEED.
19 :    
20 :     The structure created by the constructor contains the following data members.
21 :    
22 :     =over 4
23 :    
24 :     =item name
25 :    
26 :     Name of the subsystem. This is needed for any further database accesses required.
27 :    
28 :     =item curator
29 :    
30 :     Name of the subsystem's official curator.
31 :    
32 :     =item notes
33 :    
34 :     General notes about the subsystem.
35 :    
36 :     =item sprout
37 :    
38 :     Sprout object for accessing the database. This is a genuine Sprout object, not
39 :     an SFXlate object.
40 :    
41 :     =item genomeHash
42 :    
43 :     Map of genome IDs to row indices.
44 :    
45 :     =item genomes
46 :    
47 :     List of [genomeID, variantCode] tuples in row order.
48 :    
49 :     =item roleHash
50 :    
51 :     Map of role IDs and abbreviations to column indices. In other words, plugging
52 :     either a full-blown role ID or its abbreviation into this hash will return
53 :     the role's column index.
54 :    
55 :     =item roles
56 :    
57 :     List of [roleID, abbreviation] tuples in column order.
58 :    
59 :     =item reactionHash
60 :    
61 :     Map of role IDs to a list of the reactions catalyzed by the role.
62 :    
63 :     =item colorHash
64 :    
65 :     Map of PEG IDs to cluster numbers. This is used to create color maps for
66 :     display of a subsystem's PEGs.
67 :    
68 :     =back
69 :    
70 :     =cut
71 :    
72 :     #: Constructor SproutSubsys->new();
73 :    
74 :     =head2 Public Methods
75 :    
76 :     =head3 new
77 :    
78 :     C<< my $sub = Subsystem->new($subName, $sprout); >>
79 :    
80 :     Load the subsystem.
81 :    
82 :     =over 4
83 :    
84 :     =item subName
85 :    
86 :     Name of the desired subsystem.
87 :    
88 :     =item sprout
89 :    
90 :     Sprout or SFXlate object for accessing the Sprout data store.
91 :    
92 :     =back
93 :    
94 :     =cut
95 :    
96 :     sub new {
97 :     # Get the parameters.
98 :     my ($class, $subName, $sprout) = @_;
99 :     # Insure we have a Sprout object.
100 :     if (ref $sprout eq 'SFXlate') {
101 :     $sprout = $sprout->{sprout};
102 :     }
103 :     # Declare the return value.
104 :     my $retVal;
105 :     # Get the subsystem's data fields.
106 :     my ($curator, $notes) = $sprout->GetEntityValues('Subsystem', $subName, ['Subsystem(curator)',
107 :     'Subsystem(notes)']);
108 :     # Only proceed if we found the subsystem.
109 :     if (defined $curator) {
110 :     # Get the genome IDs and variant codes for the rows. The list returned
111 :     # by GetAll will be a list of 2-tuples, each consisting of a genome ID
112 :     # and a subsystem variant code.
113 :     my @genomes = $sprout->GetAll(['ParticipatesIn'],
114 :     'ParticipatesIn(to-link) = ? ORDER BY ParticipatesIn(variant-code), ParticipatesIn(from-link)',
115 :     [$subName], ['ParticipatesIn(from-link)',
116 :     'ParticipatesIn(variant-code)']);
117 :     # Create the genome ID directory. This is a hash that maps a genome ID to its
118 :     # row index.
119 :     my $idx = 0;
120 :     my %genomeHash = map { $_->[0] => $idx++ } @genomes;
121 :     # Get the role IDs and abbreviations. The list returned by GetAll will be
122 :     # a list of 2-tuples, each consisting of a role ID and abbreviation. The
123 :     # 2-tuples will be ordered by the spreadsheet column number.
124 :     my @roles = $sprout->GetAll(['OccursInSubsystem', 'Role'],
125 :     'OccursInSubsystem(to-link) = ? ORDER BY OccursInSubsystem(column-number)',
126 :     [$subName], ['OccursInSubsystem(from-link)', 'Role(abbr)']);
127 :     # Now we need to create the role ID directory and the reaction hash.
128 :     # The role ID directory maps role IDs and their abbreviations to column numbers.
129 :     # The reaction hash maps a role ID to a list of the IDs for the reactions it
130 :     # catalyzes.
131 :     my %roleHash = ();
132 :     my %reactionHash = ();
133 :     for ($idx = 0; $idx <= $#roles; $idx++) {
134 :     # Get the role ID and abbreviation for this column's role.
135 :     my ($roleID, $abbr) = @{$roles[$idx]};
136 :     # Put them both in the role directory.
137 :     $roleHash{$roleID} = $idx;
138 :     $roleHash{$abbr} = $idx;
139 :     # Get this role's reactions.
140 :     my @reactions = $sprout->GetFlat(['Catalyzes'], 'Catalyzes(from-link) = ?',
141 :     [$roleID], 'Catalyzes(to-link)');
142 :     # Put them in the reaction hash.
143 : parrello 1.3 if (@reactions > 0) {
144 :     $reactionHash{$roleID} = \@reactions;
145 :     }
146 : parrello 1.1 }
147 :     # Create the subsystem object.
148 :     $retVal = {
149 :     # Name of the subsystem. This is needed for any further database
150 :     # accesses required.
151 :     name => $subName,
152 :     # Name of the subsystem's official curator.
153 :     curator => $curator,
154 :     # General notes about the subsystem.
155 :     notes => $notes,
156 :     # Sprout object for accessing the database.
157 :     sprout => $sprout,
158 :     # Map of genome IDs to row indices.
159 :     genomeHash => \%genomeHash,
160 :     # List of [genomeID, variantCode] tuples in row order.
161 :     genomes => \@genomes,
162 :     # Map of role IDs and abbreviations to column indices.
163 :     roleHash => \%roleHash,
164 :     # List of [roleID, abbreviation] tuples in column order.
165 :     roles => \@roles,
166 :     # Map of PEG IDs to cluster numbers.
167 :     colorHash => {},
168 : parrello 1.2 # Map of role IDs to reactions.
169 :     reactionHash => \%reactionHash,
170 : parrello 1.1 };
171 :     # Bless and return it.
172 :     bless $retVal, $class;
173 :     }
174 :     return $retVal;
175 :     }
176 :    
177 :     =head3 get_genomes
178 :    
179 :     C<< my @genomeList = $sub->get_genomes(); >>
180 :    
181 :     Return a list of the genome IDs for this subsystem. Each genome corresponds to a row
182 :     in the subsystem spreadsheet. Indexing into this list returns the ID of the genome
183 :     in the specified row.
184 :    
185 :     =cut
186 :    
187 :     sub get_genomes {
188 :     # Get the parameters.
189 :     my ($self) = @_;
190 :     # Return a list of the genome IDs. The "genomes" member contains a 2-tuple
191 :     # with the genome ID followed by the variant code. We only return the
192 :     # genome IDs.
193 :     my @retVal = map { $_->[0] } @{$self->{genomes}};
194 :     return @retVal;
195 :     }
196 :    
197 :     =head3 get_variant_code
198 :    
199 :     C<< my $code = $sub->get_variant_code($gidx); >>
200 :    
201 :     Return the variant code for the specified genome. Each subsystem has multiple
202 :     variants which involve slightly different chemical reactions, and each variant
203 :     has an associated variant code. When a genome is connected to the spreadsheet,
204 :     the subsystem variant used by the genome must be specified.
205 :    
206 :     =over 4
207 :    
208 :     =item gidx
209 :    
210 :     Row index for the genome whose variant code is desired.
211 :    
212 :     =item RETURN
213 :    
214 :     Returns the variant code for the specified genome.
215 :    
216 :     =back
217 :    
218 :     =cut
219 :    
220 :     sub get_variant_code {
221 :     # Get the parameters.
222 :     my ($self, $gidx) = @_;
223 :     # Extract the variant code for the specified row index. It is the second
224 :     # element of the tuple from the "genomes" member.
225 : parrello 1.5 my $retVal = $self->{genomes}->[$gidx]->[1];
226 : parrello 1.1 return $retVal;
227 :     }
228 :    
229 :     =head3 get_curator
230 :    
231 :     C<< my $userName = $sub->get_curator(); >>
232 :    
233 :     Return the name of this subsystem's official curator.
234 :    
235 :     =cut
236 :    
237 :     sub get_curator {
238 :     # Get the parameters.
239 :     my ($self) = @_;
240 :     # Return the curator member.
241 :     return $self->{curator};
242 :     }
243 :    
244 :     =head3 get_notes
245 :    
246 :     C<< my $text = $sub->get_notes(); >>
247 :    
248 :     Return the descriptive notes for this subsystem.
249 :    
250 :     =cut
251 :    
252 :     sub get_notes {
253 :     # Get the parameters.
254 :     my ($self) = @_;
255 :     # Return the notes member.
256 :     return $self->{notes};
257 :     }
258 :    
259 :     =head3 get_roles
260 :    
261 :     C<< my @roles = $sub->get_roles(); >>
262 :    
263 :     Return a list of the subsystem's roles. Each role corresponds to a column
264 :     in the subsystem spreadsheet. The list entry at a specified position in
265 :     the list will contain the ID of that column's role.
266 :    
267 :     =cut
268 :    
269 :     sub get_roles {
270 :     # Get the parameters.
271 :     my ($self) = @_;
272 :     # Return the list of role IDs. The role IDs are stored as the first
273 :     # element of each 2-tuple in the "roles" member.
274 :     my @retVal = map { $_->[0] } @{$self->{roles}};
275 :     return @retVal;
276 :     }
277 :    
278 :     =head3 get_reactions
279 :    
280 :     C<< my $reactHash = $sub->get_reactions(); >>
281 :    
282 :     Return a reference to a hash that maps each role ID to a list of the reactions
283 :     catalyzed by the role.
284 :    
285 :     =cut
286 :    
287 :     sub get_reactions {
288 :     # Get the parameters.
289 :     my ($self) = @_;
290 :     # Return the reaction hash member.
291 :     return $self->{reactionHash};
292 :     }
293 :    
294 :     =head3 get_subset_namesC
295 :    
296 :     C<< my @subsetNames = $sub->get_subset_namesC(); >>
297 :    
298 :     Return a list of the names for all the column (role) subsets. Given a subset
299 :     name, you can use the L</get_subsetC_roles> method to get the roles in the
300 :     subset.
301 :    
302 :     =cut
303 :    
304 :     sub get_subset_namesC {
305 :     # Get the parameters.
306 :     my ($self) = @_;
307 :     # Get the sprout object and use it to retrieve the subset names.
308 :     my $sprout = $self->{sprout};
309 :     my @subsets = $sprout->GetFlat(['HasRoleSubset'], 'HasRoleSubset(from-link) = ?',
310 :     [$self->{name}], 'HasRoleSubset(to-link)');
311 :     # The sprout subset names are prefixed by the subsystem name. We need to pull the
312 :     # prefix off before we return the results. The prefixing character is a colon (:),
313 :     # so we search for the last colon to get ourselves the true subset name.
314 :     my @retVal = map { $_ =~ /:([^:]+)$/; $1 } @subsets;
315 :     return @retVal;
316 :     }
317 :    
318 :     =head3 get_role_abbr
319 :    
320 :     C<< my $abbr = $sub->get_role_abbr($ridx); >>
321 :    
322 :     Return the abbreviation for the role in the specified column. The abbreviation
323 :     is a shortened identifier that is not necessarily unique, but is more likely to
324 :     fit in a column heading.
325 :    
326 :     =over 4
327 :    
328 :     =item ridx
329 :    
330 :     Column index for the role whose abbreviation is desired.
331 :    
332 :     =item RETURN
333 :    
334 :     Returns an abbreviated name for the role corresponding to the indexed column.
335 :    
336 :     =back
337 :    
338 :     =cut
339 :    
340 :     sub get_role_abbr {
341 :     # Get the parameters.
342 :     my ($self, $ridx) = @_;
343 :     # Return the role abbreviation. The abbreviation is the second element
344 :     # in the 2-tuple for the specified column in the "roles" member.
345 :     my $retVal = $self->{roles}->[$ridx]->[1];
346 :     return $retVal;
347 :     }
348 :    
349 :     =head3 get_role_index
350 :    
351 :     C<< my $idx = $sub->get_role_index($role); >>
352 :    
353 :     Return the column index for the role with the specified ID.
354 :    
355 :     =over 4
356 :    
357 :     =item role
358 :    
359 :     ID (full name) or abbreviation of the role whose column index is desired.
360 :    
361 :     =item RETURN
362 :    
363 :     Returns the column index for the role with the specified name or abbreviation.
364 :    
365 :     =back
366 :    
367 :     =cut
368 :    
369 :     sub get_role_index {
370 :     # Get the parameters.
371 :     my ($self, $role) = @_;
372 :     # The role index is directly available from the "roleHash" member.
373 :     my $retVal = $self->{roleHash}->{$role};
374 :     return $retVal;
375 :     }
376 :    
377 :     =head3 get_subsetC_roles
378 :    
379 :     C<< my @roles = $sub->get_subsetC_roles($subname); >>
380 :    
381 :     Return the names of the roles contained in the specified role (column) subset.
382 :    
383 :     =over 4
384 :    
385 :     =item subname
386 :    
387 :     Name of the role subset whose roles are desired.
388 :    
389 :     =item RETURN
390 :    
391 :     Returns a list of the role names for the columns in the named subset.
392 :    
393 :     =back
394 :    
395 :     =cut
396 :    
397 :     sub get_subsetC_roles {
398 :     # Get the parameters.
399 :     my ($self, $subname) = @_;
400 :     # Get the sprout object. We need it to be able to get the subset data.
401 :     my $sprout = $self->{sprout};
402 :     # Convert the subset name to Sprout format. In Sprout, the subset name is
403 :     # prefixed by the subsystem name in order to get a unique subset ID.
404 :     my $subsetID = $self->{name} . ":$subname";
405 :     # Get a list of the role names for this subset.
406 :     my @roleNames = $sprout->GetFlat(['ConsistsOfRoles'], 'ConsistsOfRoles(from-link) = ?',
407 :     [$subsetID], 'ConsistsOfRoles(to-link)');
408 :     # Sort them by column number. We get the column number from the role hash.
409 :     my $roleHash = $self->{roleHash};
410 :     my @retVal = sort { $roleHash->{$a} <=> $roleHash->{$b} } @roleNames;
411 :     # Return the sorted list.
412 :     return @retVal;
413 :     }
414 :    
415 :     =head3 get_genome_index
416 :    
417 :     C<< my $idx = $sub->get_genome_index($genome); >>
418 :    
419 :     Return the row index for the genome with the specified ID.
420 :    
421 :     =over 4
422 :    
423 :     =item genome
424 :    
425 :     ID of the genome whose row index is desired.
426 :    
427 :     =item RETURN
428 :    
429 :     Returns the row index for the genome with the specified ID, or an undefined
430 :     value if the genome does not participate in the subsystem.
431 :    
432 :     =back
433 :    
434 :     =cut
435 :    
436 :     sub get_genome_index {
437 :     # Get the parameters.
438 :     my ($self, $genome) = @_;
439 :     # Get the genome row index from the "genomeHash" member.
440 :     my $retVal = $self->{genomeHash}->{$genome};
441 :     return $retVal;
442 :     }
443 :    
444 :     =head3 get_cluster_number
445 :    
446 :     C<< my $number = $sub->get_cluster_number($pegID); >>
447 :    
448 :     Return the cluster number for the specified PEG, or C<-1> if the
449 :     cluster number for the PEG is unknown or it is not clustered.
450 :    
451 :     The cluster number is read into the color hash by the
452 :     L</get_pegs_from_cell> method. If the incoming PEG IDs do not
453 :     come from the most recent cell retrievals, the information returned
454 :     will be invalid. This is a serious design flaw which needs to be
455 :     fixed soon.
456 :    
457 :     =over 4
458 :    
459 :     =item pegID
460 :    
461 :     ID of the PEG whose cluster number is desired.
462 : parrello 1.4
463 :     =item RETURN
464 :    
465 :     Returns the appropriate cluster number.
466 : parrello 1.1
467 :     =back
468 :    
469 :     =cut
470 :     #: Return Type $;
471 :     sub get_cluster_number {
472 :     # Get the parameters.
473 :     my ($self, $pegID) = @_;
474 :     # Declare the return variable.
475 :     my $retVal = -1;
476 :     # Check for a cluster number in the color hash.
477 :     if (exists $self->{colorHash}->{$pegID}) {
478 :     $retVal = $self->{colorHash}->{$pegID};
479 :     }
480 :     # Return the result.
481 :     return $retVal;
482 :     }
483 :    
484 :     =head3 get_pegs_from_cell
485 :    
486 :     C<< my @pegs = $sub->get_pegs_from_cell($rowstr, $colstr); >>
487 :    
488 :     Return a list of the peg IDs for the features in the specified spreadsheet cell.
489 :    
490 :     =over 4
491 :    
492 :     =item rowstr
493 :    
494 :     Genome row, specified either as a row index or a genome ID.
495 :    
496 :     =item colstr
497 :    
498 :     Role column, specified either as a column index, a role name, or a role
499 :     abbreviation.
500 :    
501 :     =item RETURN
502 :    
503 :     Returns a list of PEG IDs. The PEGs in the list belong to the genome in the
504 :     specified row and perform the role in the specified column. If the indicated
505 :     row and column does not exist, returns an empty list.
506 :    
507 :     =back
508 :    
509 :     =cut
510 :    
511 :     sub get_pegs_from_cell {
512 :     # Get the parameters.
513 :     my ($self, $rowstr, $colstr) = @_;
514 :     # Get the sprout object for accessing the database.
515 :     my $sprout = $self->{sprout};
516 :     # We need to convert the incoming row and column identifiers. We need a
517 :     # numeric column index and a character genome ID to create the ID for the
518 :     # subsystem spreadsheet cell. First, the column index: note that our version
519 :     # of "get_role_index" conveniently works for both abbreviations and full role IDs.
520 :     my $colIdx = ($colstr =~ /^(\d+)$/ ? $colstr : $self->get_role_index($colstr));
521 :     # Next the genome ID. In this case, we convert any number we find to a string.
522 :     # This requires a little care to avoid a run-time error if the row number is
523 :     # out of range.
524 :     my $genomeID = $rowstr;
525 :     if ($rowstr =~ /^(\d+)$/) {
526 :     # Here we need to convert the row number to an ID. Insure the number is in
527 :     # range. Note that if we do have a row number out of range, the genome ID
528 :     # will be invalid, and our attempt to read from the database will return an
529 :     # empty list.
530 :     my $genomeList = $self->{genomes};
531 :     if ($rowstr >= 0 && $rowstr < @{$genomeList}) {
532 :     $genomeID = $genomeList->[$rowstr]->[0];
533 :     }
534 :     }
535 :     # Construct the spreadsheet cell ID from the information we have.
536 :     my $cellID = $self->{name} . ":$genomeID:$colIdx";
537 :     # Get the list of PEG IDs and cluster numbers for the indicated cell.
538 :     my @pegList = $sprout->GetAll(['ContainsFeature'], 'ContainsFeature(from-link) = ?',
539 :     [$cellID], ['ContainsFeature(to-link)',
540 :     'ContainsFeature(cluster-number)']);
541 :     # Copy the pegs into the return list, and save the cluster numbers in the color hash.
542 :     my @retVal = ();
543 :     for my $pegEntry (@pegList) {
544 :     my ($peg, $cluster) = @{$pegEntry};
545 :     $self->{colorHash}->{$peg} = $cluster;
546 :     push @retVal, $peg;
547 :     }
548 :     # Return the list. If the spreadsheet cell was empty or non-existent, we'll end
549 :     # up returning an empty list.
550 :     return @retVal;
551 :     }
552 :    
553 : parrello 1.4 =head3 get_diagrams
554 :    
555 :     C<< my @list = $sub->get_diagrams(); >>
556 :    
557 :     Return a list of the diagrams associated with this subsystem. Each diagram
558 :     is represented in the return list as a 4-tuple C<[diagram_id, diagram_name,
559 :     page_link, img_link]> where
560 :    
561 :     =over 4
562 :    
563 :     =item diagram_id
564 :    
565 :     ID code for this diagram.
566 :    
567 :     =item diagram_name
568 :    
569 :     Displayable name of the diagram.
570 :    
571 :     =item page_link
572 :    
573 :     URL of an HTML page containing information about the diagram.
574 :    
575 :     =item img_link
576 :    
577 :     URL of an HTML page containing an image for the diagram.
578 :    
579 :     =back
580 :    
581 :     Note that the URLs are in fact for CGI scripts with parameters that point them
582 :     to the correct place. Though Sprout has diagram information in it, it has
583 :     no relationship to the diagrams displayed in SEED, so the work is done entirely
584 :     on the SEED side.
585 :    
586 :     =cut
587 :    
588 :     sub get_diagrams {
589 :     # Get the parameters.
590 :     my ($self) = @_;
591 :     # Find the subsystem directory.
592 :     my $subDir = Subsystem::get_dir_from_name($self->{name});
593 : parrello 1.6 Trace("Subsystem directory is $subDir.") if T(3);
594 : parrello 1.4 # Get the diagram IDs.
595 :     my @diagramIDs = Subsystem::GetDiagramIDs($subDir);
596 : parrello 1.6 Trace("Diagram IDs are " . join(", ", @diagramIDs)) if T(3);
597 : parrello 1.4 # Create the return variable.
598 :     my @retVal = ();
599 :     # Loop through the diagram IDs.
600 :     for my $diagramID (@diagramIDs) {
601 : parrello 1.6 Trace("Processing diagram $diagramID.") if T(3);
602 : parrello 1.4 # Get the diagram name.
603 : parrello 1.6 my $name = Subsystem::GetDiagramName($subDir, $diagramID);
604 :     Trace("Diagram name is $name.") if T(3);
605 :     # Get the URLs.
606 :     my ($link, $imgLink) = Subsystem::ComputeDiagramURLs($self->{name},
607 : parrello 1.4 $diagramID);
608 : parrello 1.6 Trace("Diagram URLs are \"$link\" and \"$imgLink\".") if T(3);
609 :     push @retVal, [$diagramID, $name, $link, $imgLink];
610 : parrello 1.4 }
611 : parrello 1.6 # Return the result.
612 :     return @retVal;
613 : parrello 1.4 }
614 :    
615 : parrello 1.1
616 : parrello 1.4 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3