[Bio] / Sprout / SproutSubsys.pm Repository:
ViewVC logotype

Annotation of /Sprout/SproutSubsys.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.17 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     package SproutSubsys;
4 :    
5 :     use strict;
6 :     use Tracer;
7 :     use PageBuilder;
8 :     use FIG;
9 :     use Sprout;
10 :    
11 :     =head1 Sprout Subsystem Object
12 :    
13 :     =head2 Introduction
14 :    
15 :     This object emulates the capabilities of the FIG-style C<Subsystem> object, but
16 :     uses Sprout methods to retrieve the data. This object can be dropped in place of
17 :     the UnvSubsys object to create subsystem displays for the Sprout rather than the
18 :     SEED.
19 :    
20 :     The structure created by the constructor contains the following data members.
21 :    
22 :     =over 4
23 :    
24 :     =item name
25 :    
26 :     Name of the subsystem. This is needed for any further database accesses required.
27 :    
28 :     =item curator
29 :    
30 :     Name of the subsystem's official curator.
31 :    
32 :     =item notes
33 :    
34 :     General notes about the subsystem.
35 :    
36 :     =item sprout
37 :    
38 :     Sprout object for accessing the database. This is a genuine Sprout object, not
39 :     an SFXlate object.
40 :    
41 :     =item genomeHash
42 :    
43 :     Map of genome IDs to row indices.
44 :    
45 :     =item genomes
46 :    
47 :     List of [genomeID, variantCode] tuples in row order.
48 :    
49 :     =item roleHash
50 :    
51 :     Map of role IDs and abbreviations to column indices. In other words, plugging
52 :     either a full-blown role ID or its abbreviation into this hash will return
53 :     the role's column index.
54 :    
55 :     =item roles
56 :    
57 :     List of [roleID, abbreviation] tuples in column order.
58 :    
59 : parrello 1.9 =item dir
60 :    
61 :     Directory root for the diagram and image files.
62 :    
63 : parrello 1.1 =item reactionHash
64 :    
65 :     Map of role IDs to a list of the reactions catalyzed by the role.
66 :    
67 :     =item colorHash
68 :    
69 :     Map of PEG IDs to cluster numbers. This is used to create color maps for
70 :     display of a subsystem's PEGs.
71 :    
72 : parrello 1.12 =item hopeReactions
73 :    
74 :     Map of roles to EC numbers for the Hope reactions. This object is not loaded
75 :     until it is needed.
76 :    
77 : parrello 1.1 =back
78 :    
79 :     =cut
80 :    
81 :     #: Constructor SproutSubsys->new();
82 :    
83 :     =head2 Public Methods
84 :    
85 :     =head3 new
86 :    
87 : parrello 1.12 my $sub = Subsystem->new($subName, $sprout);
88 : parrello 1.1
89 :     Load the subsystem.
90 :    
91 :     =over 4
92 :    
93 :     =item subName
94 :    
95 :     Name of the desired subsystem.
96 :    
97 :     =item sprout
98 :    
99 :     Sprout or SFXlate object for accessing the Sprout data store.
100 :    
101 :     =back
102 :    
103 :     =cut
104 :    
105 :     sub new {
106 :     # Get the parameters.
107 :     my ($class, $subName, $sprout) = @_;
108 :     # Insure we have a Sprout object.
109 :     if (ref $sprout eq 'SFXlate') {
110 :     $sprout = $sprout->{sprout};
111 :     }
112 :     # Declare the return value.
113 :     my $retVal;
114 : parrello 1.14 # Get the subsystem's object.
115 :     my $subsystemObject = $sprout->GetEntity('Subsystem', $subName);
116 :     if (! defined $subsystemObject) {
117 :     # Here we're stuck.
118 :     Confess("Subsystem \"$subName\" not found in database.");
119 :     } else {
120 :     # We've found it, so get the major data.
121 :     my ($curator, $notes, $description) = $subsystemObject->Values(['Subsystem(curator)', 'Subsystem(notes)',
122 :     'Subsystem(description)']);
123 : parrello 1.1 # Get the genome IDs and variant codes for the rows. The list returned
124 :     # by GetAll will be a list of 2-tuples, each consisting of a genome ID
125 :     # and a subsystem variant code.
126 :     my @genomes = $sprout->GetAll(['ParticipatesIn'],
127 :     'ParticipatesIn(to-link) = ? ORDER BY ParticipatesIn(variant-code), ParticipatesIn(from-link)',
128 :     [$subName], ['ParticipatesIn(from-link)',
129 :     'ParticipatesIn(variant-code)']);
130 :     # Create the genome ID directory. This is a hash that maps a genome ID to its
131 :     # row index.
132 :     my $idx = 0;
133 :     my %genomeHash = map { $_->[0] => $idx++ } @genomes;
134 :     # Get the role IDs and abbreviations. The list returned by GetAll will be
135 :     # a list of 2-tuples, each consisting of a role ID and abbreviation. The
136 :     # 2-tuples will be ordered by the spreadsheet column number.
137 : parrello 1.11 my @roles = $sprout->GetAll(['OccursInSubsystem'],
138 : parrello 1.1 'OccursInSubsystem(to-link) = ? ORDER BY OccursInSubsystem(column-number)',
139 : parrello 1.11 [$subName], ['OccursInSubsystem(from-link)', 'OccursInSubsystem(abbr)']);
140 : parrello 1.1 # Now we need to create the role ID directory and the reaction hash.
141 :     # The role ID directory maps role IDs and their abbreviations to column numbers.
142 :     # The reaction hash maps a role ID to a list of the IDs for the reactions it
143 :     # catalyzes.
144 :     my %roleHash = ();
145 : parrello 1.11 my %abbrHash = ();
146 : parrello 1.1 my %reactionHash = ();
147 :     for ($idx = 0; $idx <= $#roles; $idx++) {
148 :     # Get the role ID and abbreviation for this column's role.
149 :     my ($roleID, $abbr) = @{$roles[$idx]};
150 :     # Put them both in the role directory.
151 :     $roleHash{$roleID} = $idx;
152 :     $roleHash{$abbr} = $idx;
153 : parrello 1.11 # Put the full name in the abbreviation directory.
154 :     $abbrHash{$abbr} = $roleID;
155 : parrello 1.1 # Get this role's reactions.
156 :     my @reactions = $sprout->GetFlat(['Catalyzes'], 'Catalyzes(from-link) = ?',
157 :     [$roleID], 'Catalyzes(to-link)');
158 :     # Put them in the reaction hash.
159 : parrello 1.3 if (@reactions > 0) {
160 :     $reactionHash{$roleID} = \@reactions;
161 :     }
162 : parrello 1.1 }
163 : parrello 1.9 # Find the subsystem directory.
164 :     my $subDir = Subsystem::get_dir_from_name($subName);
165 :     Trace("Subsystem directory is $subDir.") if T(3);
166 : parrello 1.1 # Create the subsystem object.
167 :     $retVal = {
168 :     # Name of the subsystem. This is needed for any further database
169 :     # accesses required.
170 :     name => $subName,
171 : parrello 1.9 # Directory root for diagram and image files.
172 :     dir => $subDir,
173 : parrello 1.1 # Name of the subsystem's official curator.
174 :     curator => $curator,
175 :     # General notes about the subsystem.
176 :     notes => $notes,
177 :     # Sprout object for accessing the database.
178 :     sprout => $sprout,
179 :     # Map of genome IDs to row indices.
180 :     genomeHash => \%genomeHash,
181 :     # List of [genomeID, variantCode] tuples in row order.
182 :     genomes => \@genomes,
183 :     # Map of role IDs and abbreviations to column indices.
184 :     roleHash => \%roleHash,
185 :     # List of [roleID, abbreviation] tuples in column order.
186 :     roles => \@roles,
187 :     # Map of PEG IDs to cluster numbers.
188 :     colorHash => {},
189 : parrello 1.11 # Map of abbreviations to role names.
190 :     abbrHash => \%abbrHash,
191 : parrello 1.2 # Map of role IDs to reactions.
192 :     reactionHash => \%reactionHash,
193 : parrello 1.1 };
194 :     # Bless and return it.
195 :     bless $retVal, $class;
196 :     }
197 :     return $retVal;
198 :     }
199 :    
200 : parrello 1.12 =head3 get_row
201 :    
202 :     my $rowData = $sub->get_row($rowIndex);
203 :    
204 :     Return the specified row in the subsystem spreadsheet. The row consists
205 :     of a list of lists. Each position in the major list represents the role
206 :     for that position, and contains a list of the IDs for the features that
207 :     perform the role.
208 :    
209 :     =over 4
210 :    
211 :     =item rowIndex
212 :    
213 :     Index of the row to return. A row contains data for a single genome.
214 :    
215 :     =item RETURN
216 :    
217 :     Returns a reference to a list of lists. Each element in the list represents
218 :     a spreadsheet column (role) and contains a list of features that perform the
219 :     role.
220 :    
221 :     =back
222 :    
223 :     =cut
224 :    
225 :     sub get_row {
226 :     # Get the parameters.
227 :     my ($self, $rowIndex) = @_;
228 :     # Get the genome ID for the specified row's genome.
229 :     my $genomeID = $self->{genomes}->[$rowIndex]->[0];
230 :     # Read the row from the database. We won't get exactly what we want. Instead, we'll
231 :     # get a list of triplets, each consisting of a role name, a feature ID, and a cluster
232 :     # number. We need to convert this into a list of lists and stash the clustering information
233 :     # in the color hash.
234 :     my @rowData = $self->{sprout}->GetAll([qw(Subsystem HasSSCell IsGenomeOf IsRoleOf ContainsFeature)],
235 :     "Subsystem(id) = ? AND IsGenomeOf(from-link) = ?",
236 :     [$self->{name}, $genomeID],
237 :     [qw(IsRoleOf(from-link) ContainsFeature(to-link)
238 :     ContainsFeature(cluster-number))]);
239 :     # Now we do the conversion. We must first create an array of empty lists, one per
240 :     # row index.
241 :     my @retVal = map { [] } @{$self->{roles}};
242 :     # Get the hash for converting role IDs to role indexes.
243 :     my $roleHash = $self->{roleHash};
244 :     # Now we stash all the feature IDs in the appropriate columns of the row list.
245 :     for my $rowDatum (@rowData) {
246 :     # Get the role ID, the peg ID, and the cluster number.
247 :     my ($role, $peg, $cluster) = @{$rowDatum};
248 :     # Put the peg in the role's peg list.
249 :     push @{$retVal[$roleHash->{$role}]}, $peg;
250 :     # Put the cluster number in the color hash.
251 :     $self->{colorHash}->{$peg} = $cluster;
252 :     }
253 :     # Return the result.
254 :     return \@retVal;
255 :     }
256 :    
257 : parrello 1.14 =head3 get_roles_for_genome
258 :    
259 :     my @roles = $sub->get_roles_for_genome($genome_id);
260 :    
261 :     Return a list of the roles in this subsystem that have nonempty
262 :     spreadsheet cells for the given genome.
263 :    
264 :     =over 4
265 :    
266 :     =item genome_id
267 :    
268 :     ID of the relevant genome.
269 :    
270 :     =item RETURN
271 :    
272 :     Returns a list of role IDs.
273 :    
274 :     =back
275 :    
276 :     =cut
277 :    
278 :     sub get_roles_for_genome {
279 :     # Get the parameters.
280 :     my ($self, $genome_id) = @_;
281 :     # This next statement gets all of the nonempty cells for the genome's row and memorizes
282 :     # the roles by rolling them into a hash. The query connects four relationship tables on
283 :     # a single common key-- the spreadsheet cell ID. The IsGenomeOf table insures the cell is for the
284 :     # correct genome. The HasSSCell table insures that it belongs to the correct subsystem. The
285 :     # ContainsFeature table insures that it contains at least one feature. Finally, IsRoleOf tells
286 :     # us the cell's role. If a cell has more than one feature, the result list from the query will return
287 :     # one instance of the role for every distinct feature. The hash collapses the duplicates automatically.
288 :     my %retVal = map { $_ => 1 } $self->{sprout}->GetFlat([qw(ContainsFeature HasSSCell IsGenomeOf IsRoleOf)],
289 :     "HasSSCell(from-link) = ? AND IsGenomeOf(from-link) = ?",
290 :     [$self->{name}, $genome_id], 'IsRoleOf(from-link)');
291 :     # Return the result.
292 :     return keys %retVal;
293 :     }
294 :    
295 : parrello 1.12 =head3 get_abbr_for_role
296 :    
297 :     my $abbr = $sub->get_abbr_for_role($name);
298 :    
299 :     Get this subsystem's abbreviation for the specified role.
300 :    
301 :     =over 4
302 :    
303 :     =item name
304 :    
305 :     Name of the relevant role.
306 :    
307 :     =item RETURN
308 :    
309 :     Returns the abbreviation for the role. Each subsystem has its own abbreviation
310 :     system; the abbreviations make it easier to display the subsystem spreadsheet.
311 :    
312 :     =back
313 :    
314 :     =cut
315 :    
316 : parrello 1.14 sub get_abbr_for_role {
317 : parrello 1.12 # Get the parameters.
318 :     my ($self, $name) = @_;
319 :     # Get the index for this role.
320 :     my $idx = $self->get_role_index($name);
321 :     # Return the abbreviation.
322 :     return $self->get_role_abbr($idx);
323 :     }
324 :    
325 :     =head3 get_subsetC
326 :    
327 :     my @columns = $sub->get_subsetC($subsetName);
328 :    
329 :     Return a list of the column numbers for the columns in the named role
330 :     subset.
331 :    
332 :     =over 4
333 :    
334 :     =item subsetName
335 :    
336 :     Name of the subset whose columns are desired.
337 :    
338 :     =item RETURN
339 :    
340 :     Returns a list of the indices for the columns in the named subset.
341 :    
342 :     =back
343 :    
344 :     =cut
345 :    
346 :     sub get_subsetC {
347 :     # Get the parameters.
348 :     my ($self, $subsetName) = @_;
349 :     # Get the roles in the subset.
350 :     my @roles = $self->get_subsetC_roles($subsetName);
351 :     # Convert them to indices.
352 :     my $roleHash = $self->{roleHash};
353 :     my @retVal = map { $roleHash->{$_} } @roles;
354 :     # Return the result.
355 :     return @retVal;
356 :     }
357 :    
358 : parrello 1.1 =head3 get_genomes
359 :    
360 : parrello 1.12 my @genomeList = $sub->get_genomes();
361 : parrello 1.1
362 :     Return a list of the genome IDs for this subsystem. Each genome corresponds to a row
363 :     in the subsystem spreadsheet. Indexing into this list returns the ID of the genome
364 :     in the specified row.
365 :    
366 :     =cut
367 :    
368 :     sub get_genomes {
369 :     # Get the parameters.
370 :     my ($self) = @_;
371 :     # Return a list of the genome IDs. The "genomes" member contains a 2-tuple
372 :     # with the genome ID followed by the variant code. We only return the
373 :     # genome IDs.
374 :     my @retVal = map { $_->[0] } @{$self->{genomes}};
375 :     return @retVal;
376 :     }
377 :    
378 :     =head3 get_variant_code
379 :    
380 : parrello 1.12 my $code = $sub->get_variant_code($gidx);
381 : parrello 1.1
382 :     Return the variant code for the specified genome. Each subsystem has multiple
383 :     variants which involve slightly different chemical reactions, and each variant
384 :     has an associated variant code. When a genome is connected to the spreadsheet,
385 :     the subsystem variant used by the genome must be specified.
386 :    
387 :     =over 4
388 :    
389 :     =item gidx
390 :    
391 :     Row index for the genome whose variant code is desired.
392 :    
393 :     =item RETURN
394 :    
395 :     Returns the variant code for the specified genome.
396 :    
397 :     =back
398 :    
399 :     =cut
400 :    
401 :     sub get_variant_code {
402 :     # Get the parameters.
403 :     my ($self, $gidx) = @_;
404 :     # Extract the variant code for the specified row index. It is the second
405 :     # element of the tuple from the "genomes" member.
406 : parrello 1.5 my $retVal = $self->{genomes}->[$gidx]->[1];
407 : parrello 1.1 return $retVal;
408 :     }
409 :    
410 :     =head3 get_curator
411 :    
412 : parrello 1.12 my $userName = $sub->get_curator();
413 : parrello 1.1
414 :     Return the name of this subsystem's official curator.
415 :    
416 :     =cut
417 :    
418 :     sub get_curator {
419 :     # Get the parameters.
420 :     my ($self) = @_;
421 :     # Return the curator member.
422 :     return $self->{curator};
423 :     }
424 :    
425 :     =head3 get_notes
426 :    
427 : parrello 1.12 my $text = $sub->get_notes();
428 : parrello 1.1
429 :     Return the descriptive notes for this subsystem.
430 :    
431 :     =cut
432 :    
433 :     sub get_notes {
434 :     # Get the parameters.
435 :     my ($self) = @_;
436 :     # Return the notes member.
437 :     return $self->{notes};
438 :     }
439 :    
440 : parrello 1.13 =head3 get_description
441 :    
442 :     my $text = $sub->get_description();
443 :    
444 :     Return the description for this subsystem.
445 :    
446 :     =cut
447 :    
448 :     sub get_description
449 :     {
450 :     my($self) = @_;
451 :     return $self->{description};
452 :     }
453 :    
454 : parrello 1.1 =head3 get_roles
455 :    
456 : parrello 1.12 my @roles = $sub->get_roles();
457 : parrello 1.1
458 :     Return a list of the subsystem's roles. Each role corresponds to a column
459 :     in the subsystem spreadsheet. The list entry at a specified position in
460 :     the list will contain the ID of that column's role.
461 :    
462 :     =cut
463 :    
464 :     sub get_roles {
465 :     # Get the parameters.
466 :     my ($self) = @_;
467 :     # Return the list of role IDs. The role IDs are stored as the first
468 :     # element of each 2-tuple in the "roles" member.
469 :     my @retVal = map { $_->[0] } @{$self->{roles}};
470 :     return @retVal;
471 :     }
472 :    
473 :     =head3 get_reactions
474 :    
475 : parrello 1.12 my $reactHash = $sub->get_reactions();
476 : parrello 1.1
477 :     Return a reference to a hash that maps each role ID to a list of the reactions
478 :     catalyzed by the role.
479 :    
480 :     =cut
481 :    
482 :     sub get_reactions {
483 :     # Get the parameters.
484 :     my ($self) = @_;
485 :     # Return the reaction hash member.
486 :     return $self->{reactionHash};
487 :     }
488 :    
489 :     =head3 get_subset_namesC
490 :    
491 : parrello 1.12 my @subsetNames = $sub->get_subset_namesC();
492 : parrello 1.1
493 :     Return a list of the names for all the column (role) subsets. Given a subset
494 :     name, you can use the L</get_subsetC_roles> method to get the roles in the
495 :     subset.
496 :    
497 :     =cut
498 :    
499 :     sub get_subset_namesC {
500 :     # Get the parameters.
501 :     my ($self) = @_;
502 :     # Get the sprout object and use it to retrieve the subset names.
503 :     my $sprout = $self->{sprout};
504 :     my @subsets = $sprout->GetFlat(['HasRoleSubset'], 'HasRoleSubset(from-link) = ?',
505 :     [$self->{name}], 'HasRoleSubset(to-link)');
506 :     # The sprout subset names are prefixed by the subsystem name. We need to pull the
507 :     # prefix off before we return the results. The prefixing character is a colon (:),
508 :     # so we search for the last colon to get ourselves the true subset name.
509 :     my @retVal = map { $_ =~ /:([^:]+)$/; $1 } @subsets;
510 :     return @retVal;
511 :     }
512 :    
513 : parrello 1.12 =head3 get_subset_names
514 :    
515 :     my @subsetNames = $sub->get_subset_names();
516 :    
517 :     Return the names of the column subsets.
518 :    
519 :     =cut
520 :    
521 :     sub get_subset_names{
522 :     # Get the parameters.
523 :     my ($self) = @_;
524 :     # Return the result.
525 :     return $self->get_subset_namesC();
526 :     }
527 :    
528 : parrello 1.1 =head3 get_role_abbr
529 :    
530 : parrello 1.12 my $abbr = $sub->get_role_abbr($ridx);
531 : parrello 1.1
532 :     Return the abbreviation for the role in the specified column. The abbreviation
533 :     is a shortened identifier that is not necessarily unique, but is more likely to
534 :     fit in a column heading.
535 :    
536 :     =over 4
537 :    
538 :     =item ridx
539 :    
540 :     Column index for the role whose abbreviation is desired.
541 :    
542 :     =item RETURN
543 :    
544 :     Returns an abbreviated name for the role corresponding to the indexed column.
545 :    
546 :     =back
547 :    
548 :     =cut
549 :    
550 :     sub get_role_abbr {
551 :     # Get the parameters.
552 :     my ($self, $ridx) = @_;
553 :     # Return the role abbreviation. The abbreviation is the second element
554 :     # in the 2-tuple for the specified column in the "roles" member.
555 :     my $retVal = $self->{roles}->[$ridx]->[1];
556 :     return $retVal;
557 :     }
558 :    
559 : parrello 1.15
560 :     =head3 get_hope_reactions_for_genome
561 :    
562 :     my %ss_reactions = $subsys->get_hope_reactions_for_genome($genome);
563 :    
564 :     This method returns a hash that maps reactions to the pegs that catalyze
565 :     them for the specified genome. For each role in the subsystem, the pegs
566 :     are computed, and these are attached to the reactions for the role.
567 :    
568 :     =over 4
569 :    
570 :     =item genome
571 :    
572 :     ID of the genome whose reactions are to be put into the hash.
573 :    
574 :     =item RETURN
575 :    
576 :     Returns a hash mapping reactions in the subsystem to pegs in the
577 :     specified genome, or C<undef> if the genome is not found in the
578 :     subsystem.
579 :    
580 :     =back
581 :    
582 :     =cut
583 :    
584 :     sub get_hope_reactions_for_genome {
585 :     my($self, $genome) = @_;
586 :     my $index = $self->{genome_index}->{$genome};
587 :     if (defined $index) {
588 :     my @roles = $self->get_roles;
589 :     my %hope_reactions = $self->get_hope_reactions;
590 :    
591 :     my %ss_reactions;
592 :    
593 :     foreach my $role (@roles)
594 :     {
595 :     my @peg_list = $self->get_pegs_from_cell($genome,$role);
596 :    
597 :     if (defined $hope_reactions{$role} && scalar @peg_list > 0)
598 :    
599 :     {
600 :     foreach my $reaction (@{$hope_reactions{$role}})
601 :     {
602 :     push @{$ss_reactions{$reaction}}, @peg_list;
603 :     }
604 :     }
605 :     }
606 :    
607 :     return %ss_reactions;
608 :     }
609 :     else {
610 :     return undef;
611 :     }
612 :     }
613 :    
614 :    
615 :     =head3 get_hope_additional_reactions
616 :    
617 :     my %ss_reactions = $subsys->get_hope_additional_reactions($scenario_name);
618 :    
619 :     Return a list of the additional reactions for the specified scenario.
620 :    
621 :     =over 4
622 :    
623 :     =item scenario_name
624 :    
625 :     Name of the scenario whose additional reactions are desired.
626 :    
627 :     =item RETURN
628 :    
629 :     Returns a list of the additional reactions attached to the named scenario.
630 :    
631 :     =back
632 :    
633 :     =cut
634 :    
635 :     sub get_hope_additional_reactions
636 :     {
637 :     my($self,$scenario_name) = @_;
638 : parrello 1.16 Trace("Hope additional reactions not available in NMPDR.") if T(0); ##HACK
639 : parrello 1.15 my @retVal;
640 :     return @retVal;
641 :     }
642 :    
643 :    
644 : parrello 1.12 =head3 get_hope_reactions
645 :    
646 : parrello 1.14 my %reactionHash = $subsys->get_hope_reactions();
647 : parrello 1.12
648 :     Return a hash mapping the roles of this subsystem to the EC numbers for
649 :     the reactions used in scenarios (if any). It may return an empty hash
650 :     if the Hope reactions are not yet known.
651 :    
652 :     =cut
653 :    
654 :     sub get_hope_reactions {
655 :     # Get the parameters.
656 :     my ($self) = @_;
657 :     # Try to get the hope reactions from the object.
658 :     my $retVal = $self->{hopeReactions};
659 :     if (! defined($retVal)) {
660 :     # They do not exist, so we must create them.
661 : parrello 1.16 $retVal = FIGRules::GetHopeReactions($self, $self->{dir}); ##HACK
662 : parrello 1.12 # Insure we have it if we need it again.
663 :     $self->{hopeReactions} = $retVal;
664 :     }
665 :     # Return the result.
666 : parrello 1.14 return %{$retVal};
667 : parrello 1.12 }
668 :    
669 : parrello 1.1 =head3 get_role_index
670 :    
671 : parrello 1.12 my $idx = $sub->get_role_index($role);
672 : parrello 1.1
673 :     Return the column index for the role with the specified ID.
674 :    
675 :     =over 4
676 :    
677 :     =item role
678 :    
679 :     ID (full name) or abbreviation of the role whose column index is desired.
680 :    
681 :     =item RETURN
682 :    
683 :     Returns the column index for the role with the specified name or abbreviation.
684 :    
685 :     =back
686 :    
687 :     =cut
688 :    
689 :     sub get_role_index {
690 :     # Get the parameters.
691 :     my ($self, $role) = @_;
692 :     # The role index is directly available from the "roleHash" member.
693 :     my $retVal = $self->{roleHash}->{$role};
694 :     return $retVal;
695 :     }
696 :    
697 :     =head3 get_subsetC_roles
698 :    
699 : parrello 1.12 my @roles = $sub->get_subsetC_roles($subname);
700 : parrello 1.1
701 :     Return the names of the roles contained in the specified role (column) subset.
702 :    
703 :     =over 4
704 :    
705 :     =item subname
706 :    
707 :     Name of the role subset whose roles are desired.
708 :    
709 :     =item RETURN
710 :    
711 :     Returns a list of the role names for the columns in the named subset.
712 :    
713 :     =back
714 :    
715 :     =cut
716 :    
717 :     sub get_subsetC_roles {
718 :     # Get the parameters.
719 :     my ($self, $subname) = @_;
720 :     # Get the sprout object. We need it to be able to get the subset data.
721 :     my $sprout = $self->{sprout};
722 :     # Convert the subset name to Sprout format. In Sprout, the subset name is
723 :     # prefixed by the subsystem name in order to get a unique subset ID.
724 :     my $subsetID = $self->{name} . ":$subname";
725 :     # Get a list of the role names for this subset.
726 :     my @roleNames = $sprout->GetFlat(['ConsistsOfRoles'], 'ConsistsOfRoles(from-link) = ?',
727 :     [$subsetID], 'ConsistsOfRoles(to-link)');
728 :     # Sort them by column number. We get the column number from the role hash.
729 :     my $roleHash = $self->{roleHash};
730 :     my @retVal = sort { $roleHash->{$a} <=> $roleHash->{$b} } @roleNames;
731 :     # Return the sorted list.
732 :     return @retVal;
733 :     }
734 :    
735 :     =head3 get_genome_index
736 :    
737 : parrello 1.12 my $idx = $sub->get_genome_index($genome);
738 : parrello 1.1
739 :     Return the row index for the genome with the specified ID.
740 :    
741 :     =over 4
742 :    
743 :     =item genome
744 :    
745 :     ID of the genome whose row index is desired.
746 :    
747 :     =item RETURN
748 :    
749 :     Returns the row index for the genome with the specified ID, or an undefined
750 :     value if the genome does not participate in the subsystem.
751 :    
752 :     =back
753 :    
754 :     =cut
755 :    
756 :     sub get_genome_index {
757 :     # Get the parameters.
758 :     my ($self, $genome) = @_;
759 :     # Get the genome row index from the "genomeHash" member.
760 :     my $retVal = $self->{genomeHash}->{$genome};
761 :     return $retVal;
762 :     }
763 :    
764 :     =head3 get_cluster_number
765 :    
766 : parrello 1.12 my $number = $sub->get_cluster_number($pegID);
767 : parrello 1.1
768 :     Return the cluster number for the specified PEG, or C<-1> if the
769 :     cluster number for the PEG is unknown or it is not clustered.
770 :    
771 :     The cluster number is read into the color hash by the
772 :     L</get_pegs_from_cell> method. If the incoming PEG IDs do not
773 :     come from the most recent cell retrievals, the information returned
774 :     will be invalid. This is a serious design flaw which needs to be
775 :     fixed soon.
776 :    
777 :     =over 4
778 :    
779 :     =item pegID
780 :    
781 :     ID of the PEG whose cluster number is desired.
782 : parrello 1.4
783 :     =item RETURN
784 :    
785 :     Returns the appropriate cluster number.
786 : parrello 1.1
787 :     =back
788 :    
789 :     =cut
790 :     #: Return Type $;
791 :     sub get_cluster_number {
792 :     # Get the parameters.
793 :     my ($self, $pegID) = @_;
794 :     # Declare the return variable.
795 :     my $retVal = -1;
796 :     # Check for a cluster number in the color hash.
797 :     if (exists $self->{colorHash}->{$pegID}) {
798 :     $retVal = $self->{colorHash}->{$pegID};
799 :     }
800 :     # Return the result.
801 :     return $retVal;
802 :     }
803 :    
804 :     =head3 get_pegs_from_cell
805 :    
806 : parrello 1.12 my @pegs = $sub->get_pegs_from_cell($rowstr, $colstr);
807 : parrello 1.1
808 :     Return a list of the peg IDs for the features in the specified spreadsheet cell.
809 :    
810 :     =over 4
811 :    
812 :     =item rowstr
813 :    
814 :     Genome row, specified either as a row index or a genome ID.
815 :    
816 :     =item colstr
817 :    
818 :     Role column, specified either as a column index, a role name, or a role
819 :     abbreviation.
820 :    
821 :     =item RETURN
822 :    
823 :     Returns a list of PEG IDs. The PEGs in the list belong to the genome in the
824 :     specified row and perform the role in the specified column. If the indicated
825 :     row and column does not exist, returns an empty list.
826 :    
827 :     =back
828 :    
829 :     =cut
830 :    
831 :     sub get_pegs_from_cell {
832 :     # Get the parameters.
833 :     my ($self, $rowstr, $colstr) = @_;
834 :     # Get the sprout object for accessing the database.
835 :     my $sprout = $self->{sprout};
836 :     # We need to convert the incoming row and column identifiers. We need a
837 :     # numeric column index and a character genome ID to create the ID for the
838 :     # subsystem spreadsheet cell. First, the column index: note that our version
839 :     # of "get_role_index" conveniently works for both abbreviations and full role IDs.
840 :     my $colIdx = ($colstr =~ /^(\d+)$/ ? $colstr : $self->get_role_index($colstr));
841 :     # Next the genome ID. In this case, we convert any number we find to a string.
842 :     # This requires a little care to avoid a run-time error if the row number is
843 :     # out of range.
844 :     my $genomeID = $rowstr;
845 :     if ($rowstr =~ /^(\d+)$/) {
846 :     # Here we need to convert the row number to an ID. Insure the number is in
847 :     # range. Note that if we do have a row number out of range, the genome ID
848 :     # will be invalid, and our attempt to read from the database will return an
849 :     # empty list.
850 :     my $genomeList = $self->{genomes};
851 :     if ($rowstr >= 0 && $rowstr < @{$genomeList}) {
852 :     $genomeID = $genomeList->[$rowstr]->[0];
853 :     }
854 :     }
855 :     # Construct the spreadsheet cell ID from the information we have.
856 : parrello 1.7 my $cellID = $sprout->DigestKey($self->{name} . ":$genomeID:$colIdx");
857 : parrello 1.1 # Get the list of PEG IDs and cluster numbers for the indicated cell.
858 :     my @pegList = $sprout->GetAll(['ContainsFeature'], 'ContainsFeature(from-link) = ?',
859 :     [$cellID], ['ContainsFeature(to-link)',
860 :     'ContainsFeature(cluster-number)']);
861 :     # Copy the pegs into the return list, and save the cluster numbers in the color hash.
862 :     my @retVal = ();
863 :     for my $pegEntry (@pegList) {
864 :     my ($peg, $cluster) = @{$pegEntry};
865 :     $self->{colorHash}->{$peg} = $cluster;
866 :     push @retVal, $peg;
867 :     }
868 :     # Return the list. If the spreadsheet cell was empty or non-existent, we'll end
869 :     # up returning an empty list.
870 :     return @retVal;
871 :     }
872 :    
873 : parrello 1.10 =head3 get_subsetR
874 : parrello 1.8
875 : parrello 1.12 my @genomes = $sub->get_subsetR($subName);
876 : parrello 1.10
877 :     Return the genomes in the row subset indicated by the specified subset name.
878 :    
879 :     =over 4
880 :    
881 :     =item subName
882 :    
883 :     Name of the desired row subset, or C<All> to get all of the rows.
884 :    
885 :     =item RETURN
886 :    
887 :     Returns a list of genome IDs corresponding to the named subset.
888 :    
889 :     =back
890 :    
891 :     =cut
892 :    
893 :     sub get_subsetR {
894 :     # Get the parameters.
895 :     my ($self, $subName) = @_;
896 :     # Look for the specified row subset in the database. A row subset is identified using
897 :     # the subsystem name and the subset name. The special subset "All" is actually
898 :     # represented in the database, so we don't need to check for it.
899 :     my @rows = $self->{sprout}->GetFlat(['ConsistsOfGenomes'], "ConsistsOfGenomes(from-link) = ?",
900 :     ["$self->{name}:$subName"], 'ConsistsOfGenomes(to-link)');
901 :     return @rows;
902 :     }
903 : parrello 1.8
904 : parrello 1.4 =head3 get_diagrams
905 :    
906 : parrello 1.12 my @list = $sub->get_diagrams();
907 : parrello 1.4
908 :     Return a list of the diagrams associated with this subsystem. Each diagram
909 :     is represented in the return list as a 4-tuple C<[diagram_id, diagram_name,
910 :     page_link, img_link]> where
911 :    
912 :     =over 4
913 :    
914 :     =item diagram_id
915 :    
916 :     ID code for this diagram.
917 :    
918 :     =item diagram_name
919 :    
920 :     Displayable name of the diagram.
921 :    
922 :     =item page_link
923 :    
924 :     URL of an HTML page containing information about the diagram.
925 :    
926 :     =item img_link
927 :    
928 :     URL of an HTML page containing an image for the diagram.
929 :    
930 :     =back
931 :    
932 :     Note that the URLs are in fact for CGI scripts with parameters that point them
933 :     to the correct place. Though Sprout has diagram information in it, it has
934 :     no relationship to the diagrams displayed in SEED, so the work is done entirely
935 :     on the SEED side.
936 :    
937 :     =cut
938 :    
939 :     sub get_diagrams {
940 :     # Get the parameters.
941 :     my ($self) = @_;
942 :     # Get the diagram IDs.
943 : parrello 1.9 my @diagramIDs = Subsystem::GetDiagramIDs($self->{dir});
944 : parrello 1.6 Trace("Diagram IDs are " . join(", ", @diagramIDs)) if T(3);
945 : parrello 1.4 # Create the return variable.
946 :     my @retVal = ();
947 :     # Loop through the diagram IDs.
948 :     for my $diagramID (@diagramIDs) {
949 : parrello 1.6 Trace("Processing diagram $diagramID.") if T(3);
950 : parrello 1.9 my ($name, $link, $imgLink) = $self->get_diagram($diagramID);
951 :     Trace("Diagram $name URLs are \"$link\" and \"$imgLink\".") if T(3);
952 : parrello 1.6 push @retVal, [$diagramID, $name, $link, $imgLink];
953 : parrello 1.4 }
954 : parrello 1.6 # Return the result.
955 :     return @retVal;
956 : parrello 1.4 }
957 :    
958 : parrello 1.9 =head3 get_diagram
959 :    
960 : parrello 1.12 my ($name, $pageURL, $imgURL) = $sub->get_diagram($id);
961 : parrello 1.9
962 :     Get the information (if any) for the specified diagram. The diagram corresponds
963 :     to a subdirectory of the subsystem's C<diagrams> directory. For example, if the
964 :     diagram ID is C<d03>, the diagram's subdirectory would be C<$dir/diagrams/d03>,
965 :     where I<$dir> is the subsystem directory. The diagram's name is extracted from
966 :     a tiny file containing the name, and then the links are computed using the
967 :     subsystem name and the diagram ID. The parameters are as follows.
968 :    
969 :     =over 4
970 :    
971 :     =item id
972 :    
973 :     ID code for the desired diagram.
974 :    
975 :     =item RETURN
976 :    
977 :     Returns a three-element list. The first element is the diagram name, the second
978 :     a URL for displaying information about the diagram, and the third a URL for
979 :     displaying the diagram image.
980 :    
981 :     =back
982 :    
983 :     =cut
984 :    
985 :     sub get_diagram {
986 :     my($self, $id) = @_;
987 :     my $name = Subsystem::GetDiagramName($self->{dir}, $id);
988 : parrello 1.11 my ($link, $img_link) = Subsystem::ComputeDiagramURLs($self, $self->{name}, $id, 1);
989 : parrello 1.9 return($name, $link, $img_link);
990 :     }
991 :    
992 :    
993 :     =head3 get_diagram_html_file
994 :    
995 : parrello 1.12 my $fileName = $sub->get_diagram_html_file($id);
996 : parrello 1.9
997 :     Get the HTML file (if any) for the specified diagram. The diagram corresponds
998 :     to a subdirectory of the subsystem's C<diagrams> directory. For example, if the
999 :     diagram ID is C<d03>, the diagram's subdirectory would be C<$dir/diagrams/d03>,
1000 :     where I<$dir> is the subsystem directory. If an HTML file exists, it will be
1001 :     named C<diagram.html> in the diagram directory. The parameters are as follows.
1002 :    
1003 :     =over 4
1004 :    
1005 :     =item id
1006 :    
1007 :     ID code for the desired diagram.
1008 :    
1009 :     =item RETURN
1010 :    
1011 :     Returns the name of an HTML diagram file, or C<undef> if no such file exists.
1012 :    
1013 :     =back
1014 :    
1015 :     =cut
1016 :    
1017 :     sub get_diagram_html_file {
1018 :     my ($self, $id) = @_;
1019 :     my $retVal;
1020 :     my $ddir = "$self->{dir}/diagrams/$id";
1021 :     if (-d $ddir) {
1022 :     my $html = "$ddir/diagram.html";
1023 :     if (-f $html) {
1024 :     $retVal = $html;
1025 :     }
1026 :     }
1027 :     return $retVal;
1028 :     }
1029 :    
1030 : parrello 1.11 =head3 is_new_diagram
1031 :    
1032 : parrello 1.12 my $flag = $sub->is_new_diagram($id);
1033 : parrello 1.11
1034 :     Return TRUE if the specified diagram is in the new format, else FALSE.
1035 :    
1036 :     =over 4
1037 :    
1038 :     =item id
1039 :    
1040 :     ID code (e.g. C<d03>) of the relevant diagram.
1041 :    
1042 :     =item RETURN
1043 :    
1044 :     Returns TRUE if the diagram is in the new format, else FALSE.
1045 :    
1046 :     =back
1047 :    
1048 :     =cut
1049 :    
1050 :     sub is_new_diagram {
1051 :     my ($self, $id) = @_;
1052 :    
1053 :     my $image_map = $self->get_diagram_html_file($id);
1054 :     if ($image_map) {
1055 :     Trace("Image map found for diagram $id at $image_map.") if T(3);
1056 : parrello 1.17 Open(\*IN, "<$image_map");
1057 : parrello 1.11 my $header = <IN>;
1058 :     close(IN);
1059 : parrello 1.12
1060 : parrello 1.11 if ($header =~ /\<map name=\"GraffleExport\"\>/) {
1061 :     return 1;
1062 :     }
1063 :     }
1064 :    
1065 :     return undef;
1066 :     }
1067 :    
1068 :     =head3 get_role_from_abbr
1069 :    
1070 : parrello 1.12 my $roleName = $sub->get_role_from_abbr($abbr);
1071 : parrello 1.11
1072 :     Return the role name corresponding to an abbreviation.
1073 :    
1074 :     =over 4
1075 :    
1076 :     =item abbr
1077 :    
1078 :     Abbreviation name of the relevant role.
1079 :    
1080 :     =item RETURN
1081 :    
1082 :     Returns the full name of the specified role.
1083 :    
1084 :     =back
1085 :    
1086 :     =cut
1087 :    
1088 :     sub get_role_from_abbr {
1089 :     # Get the parameters.
1090 :     my($self, $abbr) = @_;
1091 :     # Get the role name from the abbreviation hash.
1092 :     my $retVal = $self->{abbrHash}->{$abbr};
1093 :     # Check for a case incompatability.
1094 :     if (! defined $retVal) {
1095 :     $retVal = $self->{abbrHash}->{lcfirst $abbr};
1096 :     }
1097 :     # Return the result.
1098 :     return $retVal;
1099 :     }
1100 :    
1101 :    
1102 : parrello 1.9 =head3 get_name
1103 :    
1104 : parrello 1.12 my $name = $sub->get_name();
1105 : parrello 1.9
1106 :     Return the name of this subsystem.
1107 :    
1108 :     =cut
1109 :    
1110 :     sub get_name {
1111 :     # Get the parameters.
1112 :     my ($self) = @_;
1113 :     # Return the result.
1114 :     return $self->{name};
1115 :     }
1116 :    
1117 :     =head3 open_diagram_image
1118 :    
1119 : parrello 1.12 my ($type, $fh) = $sub->open_diagram_image($id);
1120 : parrello 1.9
1121 :     Open a diagram's image file and return the type and file handle.
1122 :    
1123 :     =over 4
1124 :    
1125 :     =item id
1126 :    
1127 :     ID of the desired diagram
1128 :    
1129 :     =item RETURN
1130 :    
1131 :     Returns a 2-tuple containing the diagram's MIME type and an open filehandle
1132 :     for the diagram's data. If the diagram does not exist, the type will be
1133 :     returned as <undef>.
1134 :    
1135 :     =back
1136 :    
1137 :     =cut
1138 :    
1139 :     sub open_diagram_image {
1140 :     # Get the parameters.
1141 :     my ($self, $id) = @_;
1142 :     # Declare the return variables.
1143 :     my ($type, $fh);
1144 :     # Get the diagram directory.
1145 :     my $img_base = "$self->{dir}/diagrams/$id/diagram";
1146 :     # Get a list of file extensions and types.
1147 :     my %types = (png => "image/png",
1148 :     gif => "image/gif",
1149 :     jpg => "image/jpeg");
1150 :     # This is my new syntax for the for-each-while loop.
1151 :     # We loop until we run out of keys or come up with a type value.
1152 :     for my $ext (keys %types) { last if (defined $type);
1153 :     my $myType = $types{$ext};
1154 :     # Compute a file name for this diagram.
1155 :     my $file = "$img_base.$ext";
1156 :     # If it exists, try to open it.
1157 :     if (-f $file) {
1158 :     $fh = Open(undef, "<$file");
1159 :     $type = $myType;
1160 :     }
1161 :     }
1162 :     # Return the result.
1163 :     return ($type, $fh);
1164 :     }
1165 :    
1166 : parrello 1.1
1167 : parrello 1.12 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3