[Bio] / Sprout / SproutSubsys.pm Repository:
ViewVC logotype

Annotation of /Sprout/SproutSubsys.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     package SproutSubsys;
4 :    
5 :     use strict;
6 :     use Tracer;
7 :     use PageBuilder;
8 :     use FIG;
9 :     use Sprout;
10 :    
11 :     =head1 Sprout Subsystem Object
12 :    
13 :     =head2 Introduction
14 :    
15 :     This object emulates the capabilities of the FIG-style C<Subsystem> object, but
16 :     uses Sprout methods to retrieve the data. This object can be dropped in place of
17 :     the UnvSubsys object to create subsystem displays for the Sprout rather than the
18 :     SEED.
19 :    
20 :     The structure created by the constructor contains the following data members.
21 :    
22 :     =over 4
23 :    
24 :     =item name
25 :    
26 :     Name of the subsystem. This is needed for any further database accesses required.
27 :    
28 :     =item curator
29 :    
30 :     Name of the subsystem's official curator.
31 :    
32 :     =item notes
33 :    
34 :     General notes about the subsystem.
35 :    
36 :     =item sprout
37 :    
38 :     Sprout object for accessing the database. This is a genuine Sprout object, not
39 :     an SFXlate object.
40 :    
41 :     =item genomeHash
42 :    
43 :     Map of genome IDs to row indices.
44 :    
45 :     =item genomes
46 :    
47 :     List of [genomeID, variantCode] tuples in row order.
48 :    
49 :     =item roleHash
50 :    
51 :     Map of role IDs and abbreviations to column indices. In other words, plugging
52 :     either a full-blown role ID or its abbreviation into this hash will return
53 :     the role's column index.
54 :    
55 :     =item roles
56 :    
57 :     List of [roleID, abbreviation] tuples in column order.
58 :    
59 :     =item reactionHash
60 :    
61 :     Map of role IDs to a list of the reactions catalyzed by the role.
62 :    
63 :     =item colorHash
64 :    
65 :     Map of PEG IDs to cluster numbers. This is used to create color maps for
66 :     display of a subsystem's PEGs.
67 :    
68 :     =back
69 :    
70 :     =cut
71 :    
72 :     #: Constructor SproutSubsys->new();
73 :    
74 :     =head2 Public Methods
75 :    
76 :     =head3 new
77 :    
78 :     C<< my $sub = Subsystem->new($subName, $sprout); >>
79 :    
80 :     Load the subsystem.
81 :    
82 :     =over 4
83 :    
84 :     =item subName
85 :    
86 :     Name of the desired subsystem.
87 :    
88 :     =item sprout
89 :    
90 :     Sprout or SFXlate object for accessing the Sprout data store.
91 :    
92 :     =back
93 :    
94 :     =cut
95 :    
96 :     sub new {
97 :     # Get the parameters.
98 :     my ($class, $subName, $sprout) = @_;
99 :     # Insure we have a Sprout object.
100 :     if (ref $sprout eq 'SFXlate') {
101 :     $sprout = $sprout->{sprout};
102 :     }
103 :     # Declare the return value.
104 :     my $retVal;
105 :     # Get the subsystem's data fields.
106 :     my ($curator, $notes) = $sprout->GetEntityValues('Subsystem', $subName, ['Subsystem(curator)',
107 :     'Subsystem(notes)']);
108 :     # Only proceed if we found the subsystem.
109 :     if (defined $curator) {
110 :     # Get the genome IDs and variant codes for the rows. The list returned
111 :     # by GetAll will be a list of 2-tuples, each consisting of a genome ID
112 :     # and a subsystem variant code.
113 :     my @genomes = $sprout->GetAll(['ParticipatesIn'],
114 :     'ParticipatesIn(to-link) = ? ORDER BY ParticipatesIn(variant-code), ParticipatesIn(from-link)',
115 :     [$subName], ['ParticipatesIn(from-link)',
116 :     'ParticipatesIn(variant-code)']);
117 :     # Create the genome ID directory. This is a hash that maps a genome ID to its
118 :     # row index.
119 :     my $idx = 0;
120 :     my %genomeHash = map { $_->[0] => $idx++ } @genomes;
121 :     # Get the role IDs and abbreviations. The list returned by GetAll will be
122 :     # a list of 2-tuples, each consisting of a role ID and abbreviation. The
123 :     # 2-tuples will be ordered by the spreadsheet column number.
124 :     my @roles = $sprout->GetAll(['OccursInSubsystem', 'Role'],
125 :     'OccursInSubsystem(to-link) = ? ORDER BY OccursInSubsystem(column-number)',
126 :     [$subName], ['OccursInSubsystem(from-link)', 'Role(abbr)']);
127 :     # Now we need to create the role ID directory and the reaction hash.
128 :     # The role ID directory maps role IDs and their abbreviations to column numbers.
129 :     # The reaction hash maps a role ID to a list of the IDs for the reactions it
130 :     # catalyzes.
131 :     my %roleHash = ();
132 :     my %reactionHash = ();
133 :     for ($idx = 0; $idx <= $#roles; $idx++) {
134 :     # Get the role ID and abbreviation for this column's role.
135 :     my ($roleID, $abbr) = @{$roles[$idx]};
136 :     # Put them both in the role directory.
137 :     $roleHash{$roleID} = $idx;
138 :     $roleHash{$abbr} = $idx;
139 :     # Get this role's reactions.
140 :     my @reactions = $sprout->GetFlat(['Catalyzes'], 'Catalyzes(from-link) = ?',
141 :     [$roleID], 'Catalyzes(to-link)');
142 :     # Put them in the reaction hash.
143 : parrello 1.3 if (@reactions > 0) {
144 :     $reactionHash{$roleID} = \@reactions;
145 :     }
146 : parrello 1.1 }
147 :     # Create the subsystem object.
148 :     $retVal = {
149 :     # Name of the subsystem. This is needed for any further database
150 :     # accesses required.
151 :     name => $subName,
152 :     # Name of the subsystem's official curator.
153 :     curator => $curator,
154 :     # General notes about the subsystem.
155 :     notes => $notes,
156 :     # Sprout object for accessing the database.
157 :     sprout => $sprout,
158 :     # Map of genome IDs to row indices.
159 :     genomeHash => \%genomeHash,
160 :     # List of [genomeID, variantCode] tuples in row order.
161 :     genomes => \@genomes,
162 :     # Map of role IDs and abbreviations to column indices.
163 :     roleHash => \%roleHash,
164 :     # List of [roleID, abbreviation] tuples in column order.
165 :     roles => \@roles,
166 :     # Map of PEG IDs to cluster numbers.
167 :     colorHash => {},
168 : parrello 1.2 # Map of role IDs to reactions.
169 :     reactionHash => \%reactionHash,
170 : parrello 1.1 };
171 :     # Bless and return it.
172 :     bless $retVal, $class;
173 :     }
174 :     return $retVal;
175 :     }
176 :    
177 :     =head3 get_genomes
178 :    
179 :     C<< my @genomeList = $sub->get_genomes(); >>
180 :    
181 :     Return a list of the genome IDs for this subsystem. Each genome corresponds to a row
182 :     in the subsystem spreadsheet. Indexing into this list returns the ID of the genome
183 :     in the specified row.
184 :    
185 :     =cut
186 :    
187 :     sub get_genomes {
188 :     # Get the parameters.
189 :     my ($self) = @_;
190 :     # Return a list of the genome IDs. The "genomes" member contains a 2-tuple
191 :     # with the genome ID followed by the variant code. We only return the
192 :     # genome IDs.
193 :     my @retVal = map { $_->[0] } @{$self->{genomes}};
194 :     return @retVal;
195 :     }
196 :    
197 :     =head3 get_variant_code
198 :    
199 :     C<< my $code = $sub->get_variant_code($gidx); >>
200 :    
201 :     Return the variant code for the specified genome. Each subsystem has multiple
202 :     variants which involve slightly different chemical reactions, and each variant
203 :     has an associated variant code. When a genome is connected to the spreadsheet,
204 :     the subsystem variant used by the genome must be specified.
205 :    
206 :     =over 4
207 :    
208 :     =item gidx
209 :    
210 :     Row index for the genome whose variant code is desired.
211 :    
212 :     =item RETURN
213 :    
214 :     Returns the variant code for the specified genome.
215 :    
216 :     =back
217 :    
218 :     =cut
219 :    
220 :     sub get_variant_code {
221 :     # Get the parameters.
222 :     my ($self, $gidx) = @_;
223 :     # Extract the variant code for the specified row index. It is the second
224 :     # element of the tuple from the "genomes" member.
225 :     my $retVal = $self->{genomes}->{$gidx}->[1];
226 :     return $retVal;
227 :     }
228 :    
229 :     =head3 get_curator
230 :    
231 :     C<< my $userName = $sub->get_curator(); >>
232 :    
233 :     Return the name of this subsystem's official curator.
234 :    
235 :     =cut
236 :    
237 :     sub get_curator {
238 :     # Get the parameters.
239 :     my ($self) = @_;
240 :     # Return the curator member.
241 :     return $self->{curator};
242 :     }
243 :    
244 :     =head3 get_notes
245 :    
246 :     C<< my $text = $sub->get_notes(); >>
247 :    
248 :     Return the descriptive notes for this subsystem.
249 :    
250 :     =cut
251 :    
252 :     sub get_notes {
253 :     # Get the parameters.
254 :     my ($self) = @_;
255 :     # Return the notes member.
256 :     return $self->{notes};
257 :     }
258 :    
259 :     =head3 get_roles
260 :    
261 :     C<< my @roles = $sub->get_roles(); >>
262 :    
263 :     Return a list of the subsystem's roles. Each role corresponds to a column
264 :     in the subsystem spreadsheet. The list entry at a specified position in
265 :     the list will contain the ID of that column's role.
266 :    
267 :     =cut
268 :    
269 :     sub get_roles {
270 :     # Get the parameters.
271 :     my ($self) = @_;
272 :     # Return the list of role IDs. The role IDs are stored as the first
273 :     # element of each 2-tuple in the "roles" member.
274 :     my @retVal = map { $_->[0] } @{$self->{roles}};
275 :     return @retVal;
276 :     }
277 :    
278 :     =head3 get_reactions
279 :    
280 :     C<< my $reactHash = $sub->get_reactions(); >>
281 :    
282 :     Return a reference to a hash that maps each role ID to a list of the reactions
283 :     catalyzed by the role.
284 :    
285 :     =cut
286 :    
287 :     sub get_reactions {
288 :     # Get the parameters.
289 :     my ($self) = @_;
290 :     # Return the reaction hash member.
291 :     return $self->{reactionHash};
292 :     }
293 :    
294 :     =head3 get_subset_namesC
295 :    
296 :     C<< my @subsetNames = $sub->get_subset_namesC(); >>
297 :    
298 :     Return a list of the names for all the column (role) subsets. Given a subset
299 :     name, you can use the L</get_subsetC_roles> method to get the roles in the
300 :     subset.
301 :    
302 :     =cut
303 :    
304 :     sub get_subset_namesC {
305 :     # Get the parameters.
306 :     my ($self) = @_;
307 :     # Get the sprout object and use it to retrieve the subset names.
308 :     my $sprout = $self->{sprout};
309 :     my @subsets = $sprout->GetFlat(['HasRoleSubset'], 'HasRoleSubset(from-link) = ?',
310 :     [$self->{name}], 'HasRoleSubset(to-link)');
311 :     # The sprout subset names are prefixed by the subsystem name. We need to pull the
312 :     # prefix off before we return the results. The prefixing character is a colon (:),
313 :     # so we search for the last colon to get ourselves the true subset name.
314 :     my @retVal = map { $_ =~ /:([^:]+)$/; $1 } @subsets;
315 :     return @retVal;
316 :     }
317 :    
318 :     =head3 get_role_abbr
319 :    
320 :     C<< my $abbr = $sub->get_role_abbr($ridx); >>
321 :    
322 :     Return the abbreviation for the role in the specified column. The abbreviation
323 :     is a shortened identifier that is not necessarily unique, but is more likely to
324 :     fit in a column heading.
325 :    
326 :     =over 4
327 :    
328 :     =item ridx
329 :    
330 :     Column index for the role whose abbreviation is desired.
331 :    
332 :     =item RETURN
333 :    
334 :     Returns an abbreviated name for the role corresponding to the indexed column.
335 :    
336 :     =back
337 :    
338 :     =cut
339 :    
340 :     sub get_role_abbr {
341 :     # Get the parameters.
342 :     my ($self, $ridx) = @_;
343 :     # Return the role abbreviation. The abbreviation is the second element
344 :     # in the 2-tuple for the specified column in the "roles" member.
345 :     my $retVal = $self->{roles}->[$ridx]->[1];
346 :     return $retVal;
347 :     }
348 :    
349 :     =head3 get_role_index
350 :    
351 :     C<< my $idx = $sub->get_role_index($role); >>
352 :    
353 :     Return the column index for the role with the specified ID.
354 :    
355 :     =over 4
356 :    
357 :     =item role
358 :    
359 :     ID (full name) or abbreviation of the role whose column index is desired.
360 :    
361 :     =item RETURN
362 :    
363 :     Returns the column index for the role with the specified name or abbreviation.
364 :    
365 :     =back
366 :    
367 :     =cut
368 :    
369 :     sub get_role_index {
370 :     # Get the parameters.
371 :     my ($self, $role) = @_;
372 :     # The role index is directly available from the "roleHash" member.
373 :     my $retVal = $self->{roleHash}->{$role};
374 :     return $retVal;
375 :     }
376 :    
377 :     =head3 get_subsetC_roles
378 :    
379 :     C<< my @roles = $sub->get_subsetC_roles($subname); >>
380 :    
381 :     Return the names of the roles contained in the specified role (column) subset.
382 :    
383 :     =over 4
384 :    
385 :     =item subname
386 :    
387 :     Name of the role subset whose roles are desired.
388 :    
389 :     =item RETURN
390 :    
391 :     Returns a list of the role names for the columns in the named subset.
392 :    
393 :     =back
394 :    
395 :     =cut
396 :    
397 :     sub get_subsetC_roles {
398 :     # Get the parameters.
399 :     my ($self, $subname) = @_;
400 :     # Get the sprout object. We need it to be able to get the subset data.
401 :     my $sprout = $self->{sprout};
402 :     # Convert the subset name to Sprout format. In Sprout, the subset name is
403 :     # prefixed by the subsystem name in order to get a unique subset ID.
404 :     my $subsetID = $self->{name} . ":$subname";
405 :     # Get a list of the role names for this subset.
406 :     my @roleNames = $sprout->GetFlat(['ConsistsOfRoles'], 'ConsistsOfRoles(from-link) = ?',
407 :     [$subsetID], 'ConsistsOfRoles(to-link)');
408 :     # Sort them by column number. We get the column number from the role hash.
409 :     my $roleHash = $self->{roleHash};
410 :     my @retVal = sort { $roleHash->{$a} <=> $roleHash->{$b} } @roleNames;
411 :     # Return the sorted list.
412 :     return @retVal;
413 :     }
414 :    
415 :     =head3 get_genome_index
416 :    
417 :     C<< my $idx = $sub->get_genome_index($genome); >>
418 :    
419 :     Return the row index for the genome with the specified ID.
420 :    
421 :     =over 4
422 :    
423 :     =item genome
424 :    
425 :     ID of the genome whose row index is desired.
426 :    
427 :     =item RETURN
428 :    
429 :     Returns the row index for the genome with the specified ID, or an undefined
430 :     value if the genome does not participate in the subsystem.
431 :    
432 :     =back
433 :    
434 :     =cut
435 :    
436 :     sub get_genome_index {
437 :     # Get the parameters.
438 :     my ($self, $genome) = @_;
439 :     # Get the genome row index from the "genomeHash" member.
440 :     my $retVal = $self->{genomeHash}->{$genome};
441 :     return $retVal;
442 :     }
443 :    
444 :     =head3 get_cluster_number
445 :    
446 :     C<< my $number = $sub->get_cluster_number($pegID); >>
447 :    
448 :     Return the cluster number for the specified PEG, or C<-1> if the
449 :     cluster number for the PEG is unknown or it is not clustered.
450 :    
451 :     The cluster number is read into the color hash by the
452 :     L</get_pegs_from_cell> method. If the incoming PEG IDs do not
453 :     come from the most recent cell retrievals, the information returned
454 :     will be invalid. This is a serious design flaw which needs to be
455 :     fixed soon.
456 :    
457 :     =over 4
458 :    
459 :     =item pegID
460 :    
461 :     ID of the PEG whose cluster number is desired.
462 :     TODO: items
463 :    
464 :     =back
465 :    
466 :     =cut
467 :     #: Return Type $;
468 :     sub get_cluster_number {
469 :     # Get the parameters.
470 :     my ($self, $pegID) = @_;
471 :     # Declare the return variable.
472 :     my $retVal = -1;
473 :     # Check for a cluster number in the color hash.
474 :     if (exists $self->{colorHash}->{$pegID}) {
475 :     $retVal = $self->{colorHash}->{$pegID};
476 :     }
477 :     # Return the result.
478 :     return $retVal;
479 :     }
480 :    
481 :     =head3 get_pegs_from_cell
482 :    
483 :     C<< my @pegs = $sub->get_pegs_from_cell($rowstr, $colstr); >>
484 :    
485 :     Return a list of the peg IDs for the features in the specified spreadsheet cell.
486 :    
487 :     =over 4
488 :    
489 :     =item rowstr
490 :    
491 :     Genome row, specified either as a row index or a genome ID.
492 :    
493 :     =item colstr
494 :    
495 :     Role column, specified either as a column index, a role name, or a role
496 :     abbreviation.
497 :    
498 :     =item RETURN
499 :    
500 :     Returns a list of PEG IDs. The PEGs in the list belong to the genome in the
501 :     specified row and perform the role in the specified column. If the indicated
502 :     row and column does not exist, returns an empty list.
503 :    
504 :     =back
505 :    
506 :     =cut
507 :    
508 :     sub get_pegs_from_cell {
509 :     # Get the parameters.
510 :     my ($self, $rowstr, $colstr) = @_;
511 :     # Get the sprout object for accessing the database.
512 :     my $sprout = $self->{sprout};
513 :     # We need to convert the incoming row and column identifiers. We need a
514 :     # numeric column index and a character genome ID to create the ID for the
515 :     # subsystem spreadsheet cell. First, the column index: note that our version
516 :     # of "get_role_index" conveniently works for both abbreviations and full role IDs.
517 :     my $colIdx = ($colstr =~ /^(\d+)$/ ? $colstr : $self->get_role_index($colstr));
518 :     # Next the genome ID. In this case, we convert any number we find to a string.
519 :     # This requires a little care to avoid a run-time error if the row number is
520 :     # out of range.
521 :     my $genomeID = $rowstr;
522 :     if ($rowstr =~ /^(\d+)$/) {
523 :     # Here we need to convert the row number to an ID. Insure the number is in
524 :     # range. Note that if we do have a row number out of range, the genome ID
525 :     # will be invalid, and our attempt to read from the database will return an
526 :     # empty list.
527 :     my $genomeList = $self->{genomes};
528 :     if ($rowstr >= 0 && $rowstr < @{$genomeList}) {
529 :     $genomeID = $genomeList->[$rowstr]->[0];
530 :     }
531 :     }
532 :     # Construct the spreadsheet cell ID from the information we have.
533 :     my $cellID = $self->{name} . ":$genomeID:$colIdx";
534 :     # Get the list of PEG IDs and cluster numbers for the indicated cell.
535 :     my @pegList = $sprout->GetAll(['ContainsFeature'], 'ContainsFeature(from-link) = ?',
536 :     [$cellID], ['ContainsFeature(to-link)',
537 :     'ContainsFeature(cluster-number)']);
538 :     # Copy the pegs into the return list, and save the cluster numbers in the color hash.
539 :     my @retVal = ();
540 :     for my $pegEntry (@pegList) {
541 :     my ($peg, $cluster) = @{$pegEntry};
542 :     $self->{colorHash}->{$peg} = $cluster;
543 :     push @retVal, $peg;
544 :     }
545 :     # Return the list. If the spreadsheet cell was empty or non-existent, we'll end
546 :     # up returning an empty list.
547 :     return @retVal;
548 :     }
549 :    
550 :     1;
551 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3