[Bio] / FigKernelPackages / UnvSubsys.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/UnvSubsys.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.17 - (view) (download) (as text)

1 : olson 1.15 #
2 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 : overbeek 1.1 package UnvSubsys;
19 :    
20 : overbeek 1.2 use Subsystem;
21 : overbeek 1.1 use Carp;
22 :     use FIG;
23 : parrello 1.12 use SFXlate;
24 : overbeek 1.1 use Data::Dumper;
25 :     use strict;
26 : parrello 1.12 use Tracer;
27 : overbeek 1.1
28 : parrello 1.12 =head1 Universal Subsystem Object
29 :    
30 :     =head2 Introduction
31 :    
32 :     The universal subsystem object provides methods used to display useful information
33 :     about a subsystem. Its intent is to support both Sprout and SEED.
34 :    
35 :     The soul of a subsystem is its spreadsheet. The columns of the spreadsheet are
36 :     the subsystem roles. The rows are the genomes that contain the subsystem. PEGs
37 :     are stored in the spreadsheet's cells. Each genome can be identified by genome
38 :     ID or row index. Each role can be identified by role name or column index.
39 :    
40 :     It is worth noting that the term I<subset> has two meanings-- a subset of roles
41 :     or a subset of genomes. A subset of roles is called a I<column subset>. A
42 :     subset of genomes is called a I<row subset>.
43 :    
44 :     The object created contains a great deal of information, so it's worthwhile to
45 :     stop for a moment and discuss it. The object will have the following members.
46 :    
47 :     =over 4
48 :    
49 :     =item Roles
50 :    
51 :     A list of 3-tuples, each consisting of a role name, the abbreviated role name,
52 :     and a list of URLs for the reactions. Each role is a column in the
53 :     subsystem spreadsheet. Indexing into the list by column index yields the
54 :     ID, abbreviation, and reactions for the role in that column.
55 :    
56 :     =item RoleIndex
57 :    
58 :     A hash mapping each role to its column index in the spreadsheet.
59 :    
60 :     =item RoleSubsets
61 :    
62 :     A list of 2-tuples, each consisting of a subset name followed by a list of the
63 :     column indices for the roles in the subset.
64 :    
65 :     =item Genomes
66 :    
67 :     A list of 2-tuples, each containing the genome ID for the relevant row and the
68 :     variant code for that genome. Subsystems can have several variations, and the
69 :     variant code indicates which variant of the subsystem that the genome uses.
70 :     There is one genome for each row in the spreadsheet. Indexing into the list
71 :     yields the ID of the genome for that row and its variant code.
72 :    
73 :     =item GenomeIndex
74 :    
75 :     A hash mapping each genome ID to its row index in the spreadsheet.
76 :    
77 :     =item PegHash
78 :    
79 :     A hash of hashes containing the spreadsheet cells. If C<$pegHash> is the hash
80 :     of hashes, C<$row> is a genome index, and C<$col> is a role index, then
81 :    
82 :     $pegHash->{$row}->{$col}
83 :    
84 :     returns a reference to a list of the IDs for the PEGs in the relevant
85 :     spreadsheet cell.
86 :    
87 :     =item ColorHash
88 :    
89 :     A hash mapping each PEG ID to the color that should be used to represent that
90 :     PEG in the display.
91 :    
92 :     =item AliasHash
93 :    
94 :     A hash mapping each PEG ID to a list of its aliases.
95 :    
96 :     =item ReactionHash
97 :    
98 :     A hash mapping each role to the list of reactions it catalyzes.
99 :    
100 :     =back
101 :    
102 :     =head2 Public Methods
103 :    
104 :     =head3 new
105 :    
106 :     C<< my $usub = UnvSubsys->new($ssa, $fig, $active_subsetR, $focus, $show_clusters, $aliases, \@peg_colors); >>
107 :    
108 :     Construct a new universal subsystem object for a specified subsystem.
109 :    
110 :     =over 4
111 :    
112 :     =item ssa
113 :    
114 :     Name of the subsystem.
115 :    
116 :     =item fig
117 :    
118 :     Data access object, either a FIG object or an SFXlate object.
119 :    
120 :     =item active_subsetR
121 :    
122 :     Name of the active row subset. A row subset names a group of genomes.
123 :    
124 :     =item focus
125 :    
126 :     ID of the genome currently in focus.
127 :    
128 :     =item show_clusters
129 :    
130 :     TRUE if clusters should be painted by color, else FALSE.
131 :    
132 :     =item aliases
133 :    
134 :     TRUE if PEG aliases should be shown, else FALSE.
135 :    
136 :     =item peg_colors (optional)
137 : overbeek 1.1
138 : parrello 1.12 Reference to a list of 2-tuples, each 2-tuple consisting of a PEG ID and the color
139 :     to be assigned to the PEG.
140 : overbeek 1.1
141 : parrello 1.12 =back
142 : overbeek 1.1
143 : parrello 1.12 =cut
144 : overbeek 1.1
145 : parrello 1.12 sub new {
146 :     # Get the parameters.
147 :     my($class, $ssa, $fig, $active_subsetR, $focus, $show_clusters, $aliases, $peg_colors) = @_;
148 :     # Fix the subsystem name. Spaces are replaced by underscores to avoid naming problems
149 :     # in the seed's file system.
150 :     $ssa =~ s/ /_/g;
151 :     # Get the FIG object. At this point in time, we're only getting data from the SEED. On
152 :     # a future pass through the code, we'll get data from a SEED or Sprout.
153 :     if ((ref($fig) eq "FIG") || (ref($fig) eq 'SFXlate')) {
154 :     # Create a subsystem object. The "get_subsystem" method is provided by both FIG
155 :     # and SFXlate; however, the object returned in each case is different: FIG
156 :     # returns a "Subsystem" object; SFXlate returns a "SproutSubsys" object.
157 :     my $subsystem = $fig->get_subsystem($ssa);
158 :     # Get the key subsystem data. Note that CRs in the notes are converted to LFs
159 :     # in case the notes come from a Mac.
160 : parrello 1.5 my $curator = $subsystem->get_curator;
161 :     my $notes = $subsystem->get_notes;
162 : parrello 1.12 $notes =~ s/\r/\n/g;
163 : parrello 1.5 my @roles = $subsystem->get_roles;
164 :     my $reactions = $subsystem->get_reactions;
165 :     my @genomes = $subsystem->get_genomes;
166 : overbeek 1.1 my @col_subsets = $subsystem->get_subset_namesC;
167 : parrello 1.14 my @diagrams = $subsystem->get_diagrams();
168 : parrello 1.12 # Create the data structures for the role list and the role index.
169 : parrello 1.5 my $role_info = [];
170 :     my $roleH = {};
171 : parrello 1.12 # Loop through the roles to create the role list. The $i index will move
172 :     # through the columns of the spreadsheet.
173 : parrello 1.5 my($i,$j,$subset,$peg);
174 :     for ($i=0; ($i < @roles); $i++)
175 :     {
176 : parrello 1.12 # Get the ID of the role in the current column.
177 : parrello 1.5 my $role = $roles[$i];
178 : parrello 1.12 # Extract its abbreviation.
179 :     my $abbrev = $subsystem->get_role_abbr($i);
180 :     # Get the reactions. Note that its possible no reactions were found. If there were
181 :     # reactions found, however, we convert from reaction IDs to a comma-delimited
182 :     # list of HTML code full of hyperlinks.
183 : parrello 1.5 my $react = $reactions ? join(",", map { &HTML::reaction_link($_) } @{$reactions->{$role}}) : [];
184 : parrello 1.12 # Form the role name, abbreviation, and reaction list into a 3-tuple and push
185 :     # them onto the main role list.
186 : parrello 1.5 push(@$role_info,[$role,$abbrev,$react]);
187 : parrello 1.12 # Set the role hash so that we can get back the column index for a given
188 :     # role name.
189 : parrello 1.5 $roleH->{$role} = $i;
190 :     }
191 : parrello 1.12 # Get the column subsets. A column subset is a list of role IDs, so we need to
192 :     # convert it to a set of column indices. We ignore the special "All" subset
193 :     # that contains everything.
194 : parrello 1.5 my $subset_info = [];
195 :     foreach $subset (@col_subsets)
196 :     {
197 :     if ($subset ne 'All')
198 :     {
199 :     push(@$subset_info,[$subset,[map { $roleH->{$_} } $subsystem->get_subsetC_roles($subset)]]);
200 :     }
201 :     }
202 : parrello 1.12 # Now we create the genome directories. For each genome we need to be able to
203 :     # hash from the genomeID to the specified role index and we must be able to
204 :     # get the genome's variant code. (The variant code indicates which subsystem
205 :     # variant the genome uses.
206 : parrello 1.5 my $genomes_info = [];
207 :     my $genomeH = {};
208 :     for ($i=0; ($i < @genomes); $i++)
209 :     {
210 : parrello 1.12 # Get the genome ID for this row.
211 : parrello 1.5 my $genome = $genomes[$i];
212 : parrello 1.12 # Get its variant code.
213 :     my $variant = $subsystem->get_variant_code($i);
214 :     # Form them into a 2-tuple and add the result to the genome list.
215 : parrello 1.5 push(@$genomes_info,[$genome,$variant]);
216 : parrello 1.12 # Set up the hash to get from the genome ID to the row index.
217 : parrello 1.5 $genomeH->{$genome} = $i;
218 :     }
219 : parrello 1.12
220 :     # Next we gather the data from the actual spreadsheet cells. For an SFXlate
221 :     # object, this is the most expensive part of the operation, since it requires
222 :     # a database call for each cell.
223 : parrello 1.5 my $pegH = {};
224 : parrello 1.12 # $i is the row index, which cycles through genomes.
225 : parrello 1.5 for ($i=0; ($i < @genomes); $i++)
226 :     {
227 : parrello 1.12 # $j is the column index, which cycles through roles.
228 : parrello 1.5 for ($j=0; ($j < @roles); $j++)
229 :     {
230 : parrello 1.12 my @pegs = $subsystem->get_pegs_from_cell($i,$j);
231 : parrello 1.5 $pegH->{$i}->{$j} = [@pegs];
232 :     }
233 :     }
234 : overbeek 1.7
235 : parrello 1.12 # Get the row subsets. Row subsets are determined by the genome
236 :     # taxonomy, so these are different from the row subsets stored
237 :     # in the subsystem object.
238 :     my $row_subsets = &row_subsets($fig, $genomeH);
239 :     # Here we try to get a list of the active genomes. The caller
240 :     # gives us hints in the form of the "$focus" and "$active_subsetR"
241 :     # parameters. If, for example, we are using this object to generate
242 :     # a subsystem web page, the focus information would steer us to
243 :     # whatever the user wants to look at on the page.
244 : olson 1.13 my $active_genomes = &active_genomes($fig, $row_subsets,$active_subsetR,$focus,
245 : parrello 1.12 $genomeH,$genomes_info);
246 :    
247 :     # Now we generate a table of colors for the various PEGs. If the
248 :     # caller gave us a map of peg IDs to colors, we use that. Otherwise,
249 :     # we allow the option of painting the PEGs by cluster number. (The
250 :     # caller indicates this by setting the "show_clusters" flag in the
251 :     # parameter list.)
252 :     my $colorsH;
253 :     if ($peg_colors)
254 :     {
255 :     # Here the caller gave us a list of peg colors. The list contains
256 :     # 2-tuples, each consisting of a PEG ID followed by the
257 :     # color value. The loop below extracts the pairs and stuffs them
258 :     # into the color hash.
259 :     $colorsH = {};
260 :     foreach $_ (@$peg_colors)
261 :     {
262 :     my($peg,$color) = @$_;
263 :     $colorsH->{$peg} = $color;
264 :     }
265 :     }
266 :     elsif ($show_clusters)
267 :     {
268 :     # Here the user wants us to base the colors on the genome clustering
269 :     # information.
270 :     $colorsH = &set_colors($fig,$subsystem,$pegH,$active_genomes);
271 :     }
272 :     else
273 :     {
274 :     # Here the user is not interested in coloring the PEGs.
275 :     $colorsH = {};
276 :     }
277 :     # If the user wants to see aliases, compute the alias hash. Aliases
278 :     # will only be computed for PEGs belonging to active (highlighted)
279 :     # genomes, and there is a maximum of one alias per PEG.
280 : overbeek 1.7 my $aliasesH = $aliases ? &set_aliases($fig,$pegH,$active_genomes) : {};
281 : parrello 1.12 # Create and bless the UnvSubsys object.
282 : paarmann 1.17 my $self = { SSA => $ssa,
283 :     Roles => $role_info,
284 : parrello 1.5 RoleIndex => $roleH,
285 :     RoleSubsets => $subset_info,
286 :     Genomes => $genomes_info,
287 :     GenomeIndex => $genomeH,
288 : parrello 1.12 GenomeSubsets => $row_subsets,
289 : parrello 1.5 PegHash => $pegH,
290 :     Colors => $colorsH,
291 :     Aliases => $aliasesH,
292 :     Curator => $curator,
293 :     Notes => $notes,
294 : parrello 1.14 Reactions => $reactions,
295 :     Diagrams => \@diagrams,
296 : parrello 1.5 };
297 :     bless($self, $class);
298 : parrello 1.12 # Return the object.
299 : parrello 1.5 return $self;
300 : overbeek 1.1 }
301 :     else
302 :     {
303 : parrello 1.12 # Here the FIG-like object was not recognized, so we return an
304 :     # undefined value.
305 : parrello 1.5 return undef;
306 : overbeek 1.1 }
307 :     }
308 :    
309 : paarmann 1.17 =head3 get_ssa
310 :    
311 :     C<< my $ssa = $unvsub->get_ssa(); >>
312 :    
313 :     Return the name of the subsystem
314 :    
315 :     =cut
316 :    
317 :     sub get_ssa {
318 :     my($self) = @_;
319 :     return $self->{SSA};
320 :     }
321 :    
322 :     =head3 get_ssa_pretty
323 :    
324 :     C<< my $ssa = $unvsub->get_ssa_pretty(); >>
325 :    
326 :     Return the 'prettyfied' name of the subsystem
327 :    
328 :     =cut
329 :    
330 :     sub get_ssa_pretty{
331 :     my($self) = @_;
332 :     my $ssa = $self->{SSA};
333 :     $ssa =~ s/_/ /g;
334 :     return $ssa;
335 :     }
336 :    
337 :    
338 : parrello 1.12 =head3 get_subset_namesR
339 :    
340 :     C<< my @names = $unvsub->get_subset_namesR(); >>
341 :    
342 :     Return the names of the genome (row) subsets.
343 :    
344 :     =cut
345 :    
346 : overbeek 1.6 sub get_subset_namesR {
347 :     my($self) = @_;
348 :    
349 :     return map { $_->[0] } @{$self->{GenomeSubsets}};
350 :     }
351 :    
352 : parrello 1.12 =head3 get_subsetR
353 :    
354 :     C<< my @genomes = $unvsub->get_subsetR($set); >>
355 :    
356 :     Return a list of the genome IDs covered by a row subset.
357 :    
358 :     =over 4
359 :    
360 :     =item set
361 :    
362 :     Name of the row subset whose genomes are desired.
363 :    
364 :     =item RETURN
365 :    
366 :     Returns a list of the IDs for the genomes found in the specified row
367 :     set.
368 :    
369 :     =back
370 :    
371 :     =cut
372 :    
373 : overbeek 1.6 sub get_subsetR {
374 : parrello 1.12 # Get the parameters.
375 : overbeek 1.6 my($self,$set) = @_;
376 :     my($i);
377 : parrello 1.12 # Get the list of row subsets.
378 : overbeek 1.6 my $sets = $self->{GenomeSubsets};
379 : parrello 1.12 # Find the row subset with the specified name. The row subset list is a
380 :     # list of 2-tuples, and the first element of each tuple is the set
381 :     # name.
382 : overbeek 1.6 for ($i=0; ($i < @$sets) && ($sets->[$i]->[0] ne $set); $i++) {}
383 :     if ($i < @$sets)
384 :     {
385 : parrello 1.12 # Here we found the named subset. The subset tuple's second element is
386 :     # the list of row indices. We map these to genome IDs before returning
387 :     # them.
388 :     return map { $self->{Genomes}->[$_]->[0] } @{$sets->[$i]->[1]}
389 : overbeek 1.6 }
390 : parrello 1.12 # Here we subset was not found, so we return the undefined value.
391 : overbeek 1.6 return undef;
392 :     }
393 :    
394 : parrello 1.12 =head3 get_subsetR
395 :    
396 :     C<< my @pairs = $unvsub->get_subsetsR(); >>
397 :    
398 :     Return a list of all the row subsets. The subsets are returned in the form
399 :     of 2-tuples, each consisting of a subset name followed by a reference to a
400 :     list of genome IDs. The genome IDs correspond to the rows in the subset.
401 :    
402 :     =cut
403 :    
404 : overbeek 1.6 sub get_subsetsR {
405 : parrello 1.12 # Get the parameters.
406 : overbeek 1.6 my($self) = @_;
407 : parrello 1.12 # Extract the list of genome subsets. This list is in the form of
408 :     # 2-tuples, but the rows are identified by row index, not genome ID.
409 : overbeek 1.6 my $sets = $self->{GenomeSubsets};
410 : parrello 1.12 # Create the return list.
411 : overbeek 1.6 my @pairs = ();
412 : parrello 1.12 # Loop through the subsets.
413 : overbeek 1.6 my $pair;
414 :     foreach $pair (@$sets)
415 :     {
416 : parrello 1.12 # Convert this subset's member list from row indices to genome IDs
417 :     # and stash the result in the return list.
418 :     my($id,$members) = @$pair;
419 :     push(@pairs,[$id,[map { $self->{Genomes}->[$_]->[0] } @$members]]);
420 : overbeek 1.6 }
421 : parrello 1.12 # Return the list constructed.
422 : overbeek 1.6 return @pairs;
423 :     }
424 :    
425 : parrello 1.12 =head3 row_subsets
426 :    
427 :     C<< my $subsetList = UnvSubsys::row_subsets($fig, \%genomeH); >>
428 :    
429 :     This method computes the taxonomic row subsets for a subsystem. It takes
430 :     as input a hash that maps genome IDs to column indices and a FIG object.
431 :     The FIG object provides a list of taxonomic groups of 10 or more complete
432 :     genomes. From the list, we extract subsets which have more than 10
433 :     genomes in the list of subsystem genomes. If no such subsets exist,
434 :     we extract subsets which have at least 1 genome from the list of
435 :     subsystem genomes. The subsets are returned as 2-tuples, the first
436 :     element being the subset ID and the second being a list of row indices
437 :     for the genomes in the subset.
438 :    
439 :     =over 4
440 :    
441 :     =item fig
442 :    
443 :     A FIG-like object for accessing the data store.
444 :    
445 :     =item genomeH
446 :    
447 :     Reference to a hash that maps each genome ID to its row index in the
448 :     subsystem spreadsheet.
449 :    
450 :     =item RETURN
451 :    
452 :     Returns a reference to a list of 2-tuples. Each 2-tuple consists of a
453 :     subset ID followed by a list of the row indices for the genomes in the
454 :     subset.
455 :    
456 :     =back
457 :    
458 :     =cut
459 :    
460 : overbeek 1.6 sub row_subsets {
461 : parrello 1.12 my ($fig, $genomeH) = @_;
462 : overbeek 1.6
463 : parrello 1.12 # We need a real FIG object, since SFXlate does not yet support
464 :     # taxonomy trees. The "FIG" method does this for us.
465 :     $fig = $fig->FIG();
466 :     # Initialize the return value.
467 : overbeek 1.6 my $subsets = [];
468 : parrello 1.12 # Get a list of taxonomic groups. This will come back as a list of
469 :     # 2-tuples.
470 : overbeek 1.11 my $taxonomic_groups = $fig->taxonomic_groups_of_complete(5);
471 : parrello 1.12 # Loop through the 2-tuples. We're looking for subsets which
472 :     # contain at least one genome on the subsystem's spreadsheet.
473 : overbeek 1.6 my($pair,$id,$members);
474 :     foreach $pair (@$taxonomic_groups)
475 :     {
476 :     ($id,$members) = @$pair;
477 : parrello 1.12 # Extract the genomes in the member list that participate in this
478 :     # subsystem. To do this, we convert each genome ID to its row
479 :     # index. If no row index exists, the GREP condition discards the
480 :     # member.
481 :     my @mem = grep { defined($_) } map { $genomeH->{$_} } @$members;
482 :     # If there are enough members, save the subset.
483 :     if (@mem > 0)
484 :     {
485 :     push(@$subsets,[$id,[@mem]]);
486 :     }
487 : overbeek 1.6 }
488 : parrello 1.12 # Return the list of row subsets.
489 : overbeek 1.6 return $subsets;
490 :     }
491 :    
492 : parrello 1.12 =head3 set_aliases
493 :    
494 :     C<< my $aliasHash = UnvSubsys::set_aliases($fig, $pegH, $active_genomes); >>
495 :    
496 :     Return a hash mapping PEG IDs to aliases.
497 :    
498 :     =over 4
499 :    
500 :     =item fig
501 :    
502 :     FIG-like object that can be used to access the data store.
503 :    
504 :     =item pegH
505 :    
506 :     Reference to the spreadsheet hash table. Given a row index I<$row> and a
507 :     column index I<$col>,
508 :    
509 :     $pegH->{$row}->{$col}
510 :    
511 :     will return a reference to a list of PEGs in the specified spreadsheet cell.
512 :    
513 :     =item active_genomes
514 :    
515 :     Reference to a hash whose keys correspond to the spreadsheet row indices
516 :     of genomes that should be highlighted.
517 :    
518 :     =item RETURN
519 :    
520 :     Returns a hash that takes as input a PEG ID and returns an alias. Only PEGs
521 :     for active genomes will be processed.
522 :    
523 :     =back
524 :    
525 :     =cut
526 :    
527 : overbeek 1.4 sub set_aliases {
528 : parrello 1.12 # Get the parameters.
529 : overbeek 1.7 my($fig,$pegH,$active_genomes) = @_;
530 : overbeek 1.4 my($genomeI,$roleI,$pegs,$peg,$roleH);
531 :    
532 : parrello 1.12 # Create the return hash.
533 : overbeek 1.4 my $aliasesH = {};
534 :    
535 : parrello 1.12 # Loop through each row that corresponds to an active genome.
536 :     # The active genome list contains row indices, and there is
537 :     # one genome per row. Note that the genome ID is never used,
538 :     # only the row index.
539 : overbeek 1.7 foreach $genomeI (grep { $active_genomes->{$_} } keys(%$pegH))
540 : overbeek 1.4 {
541 : parrello 1.12 # Get the role hash for the specified genome. The role hash
542 :     # maps column indices (representing roles) to lists of PEGs.
543 : parrello 1.5 $roleH = $pegH->{$genomeI};
544 : parrello 1.12 # Loop through the role (column) indices.
545 : parrello 1.5 foreach $roleI (keys(%$roleH))
546 :     {
547 : parrello 1.12 # Get the PEG list for this row/column combination.
548 : parrello 1.5 $pegs = $roleH->{$roleI};
549 : parrello 1.12 # Only proceed if data was found in the cell.
550 :     if (defined $pegs) {
551 :     # Loop through the pegs in the cell.
552 :     foreach $peg (@$pegs)
553 : parrello 1.5 {
554 : parrello 1.12 # If we do not already have an alias for this PEG,
555 :     # compute one.
556 :     if (! $aliasesH->{$peg})
557 :     {
558 :     $aliasesH->{$peg} = scalar &ext_id($fig,$peg);
559 :     }
560 : parrello 1.5 }
561 :     }
562 :     }
563 : overbeek 1.4 }
564 : parrello 1.12 # Return the hash we built.
565 : overbeek 1.4 return $aliasesH;
566 :     }
567 :    
568 : parrello 1.12 =head3 set_colors
569 :    
570 :     C<< my $colorHash = UnvSubsys::set_colors($fig, $sub, \%pegH, \%active_genomes); >>
571 :    
572 :     Return a hash that maps each PEG in the subsystem spreadsheet to a display
573 :     color. Not all PEGs need to be mapped. Those that do not have a color
574 :     assigned will generally be displayed in white.
575 :    
576 :     =over 4
577 :    
578 :     =item fig
579 :    
580 :     FIG-like object that can be used to access the data store.
581 :    
582 :     =item sub
583 :    
584 :     Subsystem object for the current subsystem.
585 :    
586 :     =item pegH
587 :    
588 :     Reference to the spreadsheet hash table. Given a row index I<$row> and a
589 :     column index I<$col>,
590 :    
591 :     $pegH->{$row}->{$col}
592 :    
593 :     will return a reference to a list of PEGs in the specified spreadsheet cell.
594 :    
595 :     =item active_genomes
596 :    
597 :     Reference to a hash whose keys correspond to the spreadsheet row indices
598 :     of genomes that should be highlighted.
599 :    
600 :     =item RETURN
601 :    
602 :     Returns a hash that takes as input a PEG ID and returns a color. These colors
603 :     are used when displaying the PEGs in the subsystem spreadsheet. Only PEGs
604 :     for active genomes will be colored.
605 :    
606 :     =back
607 :    
608 :     =cut
609 :    
610 : overbeek 1.4 sub set_colors {
611 : parrello 1.12 # Get the parameters.
612 :     my($fig,$sub,$pegH,$active_genomes) = @_;
613 :    
614 :     my($genomeI,$roleI,$pegs,$peg,$roleH,%pegs_in_genome);
615 :     # Create the return hash.
616 : overbeek 1.4 my $colorsH = {};
617 : parrello 1.12 # Loop through the active genomes. The keys of "%$pegH" are the row indices
618 :     # for rows that have at least one occupied cell in the spreadsheet. The
619 :     # Grep then reduces this list to those rows that are highlighted.
620 : overbeek 1.7 foreach $genomeI (grep { $active_genomes->{$_} } keys(%$pegH))
621 : overbeek 1.4 {
622 : parrello 1.12 # We will use the following hash to compile a list of all the PEGs
623 :     # in the spreadsheet for the current genome, that is, the genome
624 :     # represented by row index "$genomeI".
625 : parrello 1.5 undef %pegs_in_genome;
626 : parrello 1.12 # Get the hash for the current row. This hash maps column indices to
627 :     # lists of pegs.
628 : parrello 1.5 $roleH = $pegH->{$genomeI};
629 : parrello 1.12 # Loop through the column indices for the specified row.
630 : parrello 1.5 foreach $roleI (keys(%$roleH))
631 :     {
632 : parrello 1.12 # Here we can finally get a list of the pegs in this spreadsheet
633 :     # cell. We loop through them, marking them in the "%pegs_in_genome"
634 :     # hash.
635 : parrello 1.5 $pegs = $roleH->{$roleI};
636 :     foreach $peg (@$pegs)
637 :     {
638 :     $pegs_in_genome{$peg} = 1;
639 :     }
640 :     }
641 : parrello 1.12 # Extract the "%pegs_in_genome" hash keys. This gives us a duplicate-free
642 :     # list of all the pegs for the current spreadsheet role.
643 : parrello 1.5 my @pegs = keys(%pegs_in_genome);
644 :     my($tuple,$peg,$color);
645 : parrello 1.12 # Get a hash that maps the PEG IDs to colors.
646 :     my $colors_for_one_genome = &set_colors_for_genome($fig,$sub, \@pegs);
647 :     # Loop through the hash we got back and assign the colors from that
648 :     # hash to the master hash we're returning to the caller.
649 : parrello 1.5 while (($peg,$color) = each %$colors_for_one_genome)
650 :     {
651 :     $colorsH->{$peg} = $colors_for_one_genome->{$peg};
652 :     }
653 : overbeek 1.4 }
654 : parrello 1.12 # Return the color hash.
655 : overbeek 1.4 return $colorsH;
656 :     }
657 :    
658 : parrello 1.12 =head3 set_colors_for_genome
659 :    
660 :     C<< my $colorHash = UnvSubsys::set_colors_for_genome($fig, $sub, \@pegs); >>
661 :    
662 :     Return a reference to a hash mapping the specified pegs to colors. PEGs that
663 :     are physically close to each other will be painted the same color.
664 :    
665 :     =over 4
666 :    
667 :     =item fig
668 :    
669 :     A fig-like object that can be used to access the data store.
670 :    
671 :     =item sub
672 :    
673 :     Subsystem object for the relevant subsystem.
674 : overbeek 1.4
675 : parrello 1.12 =item pegs
676 : overbeek 1.4
677 : parrello 1.12 Reference to a list of PEG IDs. All of the peg IDs should be for the
678 :     same genome.
679 : overbeek 1.4
680 : parrello 1.12 =item RETURN
681 : overbeek 1.4
682 : parrello 1.12 Returns a reference to a hash that maps each PEG ID to a color.
683 : overbeek 1.4
684 : parrello 1.12 =back
685 : overbeek 1.4
686 : parrello 1.12 =cut
687 : overbeek 1.4
688 : parrello 1.12 sub set_colors_for_genome {
689 :     # Get the parameters.
690 :     my($fig, $sub, $pegs) = @_;
691 :     # Default all the PEGs to white.
692 :     my %color_of = map { $_ => '#FFFFFF' } @$pegs;
693 :     # Divide the pegs into clusters.
694 :     my @clusters = $fig->compute_clusters($pegs, $sub);
695 :     # Get a list of useful colors.
696 :     my @colors = &cool_colors();
697 :     # If we have too many clusters, chop off the big ones at the end. These
698 :     # are least likely to be important.
699 :     if (@clusters > @colors) { splice(@clusters, 0, (@clusters - @colors)) }
700 :     # Loop through the clusters.
701 :     for my $cluster (@clusters) {
702 :     # Get the color for this cluster.
703 :     my $color = shift @colors;
704 :     # Loop through this cluster, putting this color into the color_of
705 :     # entries for each PEG.
706 :     for my $peg (@$cluster) {
707 :     $color_of{$peg} = $color;
708 : parrello 1.5 }
709 : overbeek 1.4 }
710 : parrello 1.12 # Return the color map.
711 :     return \%color_of;
712 : overbeek 1.4 }
713 :    
714 : parrello 1.12 =head3 cool_colors
715 :    
716 :     C<< my @colorList = UnvSubsys::cool_colors(); >>
717 :    
718 :     Return a list of web-safe colors.
719 :    
720 :     =cut
721 :    
722 : overbeek 1.4 sub cool_colors {
723 :     # just an array of "websafe" colors or whatever colors we want to use. Feel free to remove bad colors (hence the lines not being equal length!)
724 :     return (
725 :     '#C0C0C0', '#FF40C0', '#FF8040', '#FF0080', '#FFC040', '#40C0FF', '#40FFC0', '#C08080', '#C0FF00', '#00FF80', '#00C040',
726 :     "#6B8E23", "#483D8B", "#2E8B57", "#008000", "#006400", "#800000", "#00FF00", "#7FFFD4",
727 :     "#87CEEB", "#A9A9A9", "#90EE90", "#D2B48C", "#8DBC8F", "#D2691E", "#87CEFA", "#E9967A", "#FFE4C4", "#FFB6C1",
728 :     "#E0FFFF", "#FFA07A", "#DB7093", "#9370DB", "#008B8B", "#FFDEAD", "#DA70D6", "#DCDCDC", "#FF00FF", "#6A5ACD",
729 :     "#00FA9A", "#228B22", "#1E90FF", "#FA8072", "#CD853F", "#DC143C", "#FF6347", "#98FB98", "#4682B4",
730 :     "#D3D3D3", "#7B68EE", "#2F4F4F", "#FF7F50", "#FF69B4", "#BC8F8F", "#A0522D", "#DEB887", "#00DED1",
731 :     "#6495ED", "#800080", "#FFD700", "#F5DEB3", "#66CDAA", "#FF4500", "#4B0082", "#CD5C5C",
732 :     "#EE82EE", "#7CFC00", "#FFFF00", "#191970", "#FFFFE0", "#DDA0DD", "#00BFFF", "#DAA520", "#008080",
733 :     "#00FF7F", "#9400D3", "#BA55D3", "#D8BFD8", "#8B4513", "#3CB371", "#00008B", "#5F9EA0",
734 :     "#4169E1", "#20B2AA", "#8A2BE2", "#ADFF2F", "#556B2F",
735 :     "#F0FFFF", "#B0E0E6", "#FF1493", "#B8860B", "#FF0000", "#F08080", "#7FFF00", "#8B0000",
736 :     "#40E0D0", "#0000CD", "#48D1CC", "#8B008B", "#696969", "#AFEEEE", "#FF8C00", "#EEE8AA", "#A52A2A",
737 :     "#FFE4B5", "#B0C4DE", "#FAF0E6", "#9ACD32", "#B22222", "#FAFAD2", "#808080", "#0000FF",
738 :     "#000080", "#32CD32", "#FFFACD", "#9932CC", "#FFA500", "#F0E68C", "#E6E6FA", "#F4A460", "#C71585",
739 :     "#BDB76B", "#00FFFF", "#FFDAB9", "#ADD8E6", "#778899",
740 :     );
741 :     }
742 :    
743 : parrello 1.12 =head3 ext_id
744 :    
745 :     C<< my $externalID = UnvSubsys::ext_id($fig, $peg); >>
746 :    
747 :     or
748 :    
749 :     C<< my @externalIDs = UnvSubsys::ext_id($fig, $peg); >>
750 :    
751 :     Return a list of non-FIG IDs for the specified feature. In a scalar context, return
752 :     a single non-FIG ID for the specified feature.
753 :    
754 :     This method returns IDs that are all of the same type, that is, all UniProt IDs, or
755 :     all KEGG IDs, and so forth. To do this, it checks the feature's alias list for IDs
756 :     of a given type. If it finds at least one, then all IDs of that type are returned.
757 :     Highest priority is given to the UniProt IDs, then SP IDs, GI IDs, and finally
758 :     KEGG IDs.
759 :    
760 :     =over 4
761 :    
762 :     =item fig
763 :    
764 :     A FIG-like object for accessing the data store.
765 :    
766 :     =item peg
767 :    
768 :     ID of the feature whose aliases are desired.
769 :    
770 :     =item RETURN
771 :    
772 :     In list context, a list of non-FIG IDs for the feature that are all of the same
773 :     type. In scalar context, the first non-FIG ID for the feature of the
774 :     highest-priority available type.
775 :    
776 :     =back
777 :    
778 :     =cut
779 :    
780 : overbeek 1.4 sub ext_id {
781 :     my($fig,$peg) = @_;
782 :    
783 :     my @tmp;
784 :     my @aliases = $fig->feature_aliases($peg);
785 :     if ((@tmp = grep { $_ =~ /^uni\|/ } @aliases) > 0)
786 :     {
787 : parrello 1.5 @aliases = @tmp;
788 : overbeek 1.4 }
789 :     elsif ((@tmp = grep { $_ =~ /^sp\|/ } @aliases) > 0)
790 :     {
791 : parrello 1.5 @aliases = @tmp;
792 : overbeek 1.4 }
793 :     elsif ((@tmp = grep { $_ =~ /^gi\|/ } @aliases) > 0)
794 :     {
795 : parrello 1.5 @aliases = @tmp;
796 : overbeek 1.4 }
797 :     elsif ((@tmp = grep { $_ =~ /^kegg\|/ } @aliases) > 0)
798 :     {
799 : parrello 1.5 @aliases = @tmp;
800 : overbeek 1.4 }
801 :     else
802 :     {
803 : parrello 1.5 @aliases = ();
804 : overbeek 1.4 }
805 :    
806 :     if (wantarray())
807 :     {
808 : parrello 1.5 return @aliases;
809 : overbeek 1.4 }
810 :     else
811 :     {
812 : parrello 1.5 return $aliases[0];
813 : overbeek 1.4 }
814 :     }
815 :    
816 : parrello 1.12 =head3 subsystem_curator
817 :    
818 :     C<< my $name = $unvsub->subsystem_curator(); >>
819 :    
820 :     Return the name of the subsystem curator. The database stores user names as
821 :     C<master:>I<name>. This method strips off the C<master:> prefix before it
822 :     passes the result back to the caller.
823 :    
824 :     =cut
825 : overbeek 1.4
826 : overbeek 1.2 sub subsystem_curator {
827 :     my($self) = @_;
828 :    
829 :     my $curator = $self->{Curator};
830 :     $curator =~ s/master://;
831 :     return $curator;
832 :     }
833 :    
834 : parrello 1.12 =head3 get_roles
835 :    
836 :     C<< my @roles = $unvsub->get_roles(); >>
837 :    
838 :     Return a list of the roles (columns) for this subsystem. The roles will be
839 :     returned in column order, so that if you access the sixth element of the
840 :     return list, you'll get the name of the role for the sixth column.
841 :    
842 :     =cut
843 :    
844 : overbeek 1.2 sub get_roles {
845 :     my($self) = @_;
846 : parrello 1.12 # The role index in this object is a list of 3-tuples. The caller only
847 :     # wants the first element of each tuple, which is the role name.
848 : overbeek 1.2 return map { $_->[0] } @{$self->{Roles}};
849 :     }
850 :    
851 : parrello 1.12 =head3 get_genome_index
852 :    
853 :     C<< my $index = $unvsub->get_genome_index($genome); >>
854 :    
855 :     Return the row index of the specified genome.
856 :    
857 :     =over 4
858 :    
859 :     =item genome
860 :    
861 :     ID of the genome whose row index is desired.
862 :    
863 :     =item RETURN
864 :    
865 :     Returns the index of the row corresponding to the specified genome, or an
866 :     undefined value if the genome is not represented in the subsystem
867 :     spreadsheet.
868 :    
869 :     =back
870 :    
871 :     =cut
872 :    
873 : overbeek 1.3 sub get_genome_index {
874 :     my($self,$genome) = @_;
875 :    
876 :     return $self->{GenomeIndex}->{$genome};
877 :     }
878 :    
879 : parrello 1.12 =head3 get_genomes
880 :    
881 :     C<< my @genomes = $unvsub->get_genomes(); >>
882 :    
883 :     Return a list of the genome IDs for the subsystem. The genomes will be
884 :     presented in row order. In other words, if you index into the sixth
885 :     element of the return list, you will retrieve the ID of the genome for
886 :     the sixth row.
887 :    
888 :     =cut
889 :    
890 : overbeek 1.3 sub get_genomes {
891 :     my($self) = @_;
892 : parrello 1.12 # The genome array is a list of 2-tuples. We extract the first
893 :     # element of each tuple, which is the genome ID.
894 : overbeek 1.3 return map { $_->[0] } @{$self->{Genomes}};
895 :     }
896 :    
897 : parrello 1.12 =head3 get_variant_code
898 :    
899 :     C<< my $code = $unvsub->get_variant_code($genome); >>
900 :    
901 :     Return the variant code for a genome. Each subsystem has several variations.
902 :     The variant code indicates which variation of a subsystem is used by a
903 :     particular genome.
904 :    
905 :     Genome data is stored in a list of 2-tuples. The first element is the genome
906 :     ID; the second is the variant code.
907 :    
908 :     =over 4
909 :    
910 :     =item genome
911 :    
912 :     ID or row index of the genome whose variant code is desired.
913 :    
914 :     =item RETURN
915 :    
916 :     Returns the variant code for the specified genome.
917 :    
918 :     =back
919 :    
920 :     =cut
921 :    
922 : overbeek 1.3 sub get_variant_code {
923 :     my($self,$genome) = @_;
924 : parrello 1.12 # Check to see if we have a row index.
925 : overbeek 1.3 if ($genome =~ /^\d+$/)
926 :     {
927 : parrello 1.12 # Here we have a row index, so use it to find the genome's variant
928 :     # code.
929 : parrello 1.5 return $self->{Genomes}->[$genome]->[1];
930 : overbeek 1.3 }
931 :     else
932 :     {
933 : parrello 1.12 # Here we have a genome ID, so we need to convert it to a row index.
934 : parrello 1.5 my $genomeI = $self->{GenomeIndex}->{$genome};
935 :     return $self->{Genomes}->[$genomeI]->[1];
936 : overbeek 1.3 }
937 :     }
938 :    
939 : parrello 1.12 =head3 get_pegs_from_cell
940 :    
941 :     C<< my @pegs = $unvsub->get_pegs_from_cell($genome, $role); >>
942 :    
943 :     Return a list of the features in a specified spreadsheet cell. The cell is specified
944 :     by genome ID and role ID.
945 :    
946 :     =over 4
947 :    
948 :     =item genome
949 :    
950 :     ID of the genome relevant to the cell.
951 :    
952 :     =item role
953 :    
954 :     ID of the role relevant to the cell.
955 :    
956 :     =item RETURN
957 :    
958 :     Returns a list of the features in the cell, or an empty list if the cell is empty.
959 :    
960 :     =back
961 :    
962 :     =cut
963 :    
964 : overbeek 1.3 sub get_pegs_from_cell {
965 :     my($self,$genome,$role) = @_;
966 : parrello 1.12 # Convert the genome and role IDs to row and column indices.
967 : overbeek 1.3 my $genomeI = $self->{GenomeIndex}->{$genome};
968 :     my $roleI = $self->{RoleIndex}->{$role};
969 : parrello 1.12 # Get the pegs from the cell and return them.
970 : overbeek 1.3 my $pegs = $self->{PegHash}->{$genomeI}->{$roleI};
971 :     return $pegs ? @$pegs : ();
972 :     }
973 :    
974 : parrello 1.14 =head3 get_diagrams
975 :    
976 :     C<< my @list = $sub->get_diagrams(); >>
977 :    
978 :     Return a list of the diagrams associated with this subsystem. Each diagram
979 :     is represented in the return list as a 4-tuple C<[diagram_id, diagram_name,
980 :     page_link, img_link]> where
981 :    
982 :     =over 4
983 :    
984 :     =item diagram_id
985 :    
986 :     ID code for this diagram.
987 :    
988 :     =item diagram_name
989 :    
990 :     Displayable name of the diagram.
991 :    
992 :     =item page_link
993 :    
994 :     URL of an HTML page containing information about the diagram.
995 :    
996 :     =item img_link
997 :    
998 :     URL of an HTML page containing an image for the diagram.
999 :    
1000 :     =back
1001 :    
1002 :     Note that the URLs are in fact for CGI scripts with parameters that point them
1003 :     to the correct place. Though Sprout has diagram information in it, it has
1004 :     no relationship to the diagrams displayed in SEED, so the work is done entirely
1005 :     on the SEED side.
1006 :    
1007 :     =cut
1008 :    
1009 :     sub get_diagrams {
1010 :     # Get the parameters.
1011 :     my ($self) = @_;
1012 :     # Return the diagram list.
1013 :     return @{$self->{Diagrams}};
1014 :     }
1015 :    
1016 : overbeek 1.3 sub get_notes {
1017 :     my($self) = @_;
1018 :    
1019 :     return $self->{Notes};
1020 :     }
1021 :    
1022 : overbeek 1.2 sub get_role_index {
1023 :     my($self,$role) = @_;
1024 :    
1025 :     return $self->{RoleIndex}->{$role};
1026 :     }
1027 :    
1028 :     sub get_role_abbr {
1029 :     my($self,$roleI) = @_;
1030 :    
1031 :     if ($roleI !~ /^\d+$/)
1032 :     {
1033 : parrello 1.5 $roleI = $self->{RoleIndex}->{$roleI};
1034 : overbeek 1.2 }
1035 :     my $roles = $self->{Roles};
1036 :     return $roles->[$roleI]->[1];
1037 :     }
1038 :    
1039 :     sub get_reactions {
1040 :     my($self) = @_;
1041 :    
1042 :     return $self->{Reactions};
1043 :     }
1044 :    
1045 :     sub get_subset_namesC {
1046 :     my($self) = @_;
1047 :    
1048 :     return map { $_->[0] } @{$self->{RoleSubsets}};
1049 :     }
1050 :    
1051 :     sub get_subsetC_roles {
1052 :     my($self,$subset) = @_;
1053 :     my($i,$j);
1054 :    
1055 :     my $subset_info = $self->{RoleSubsets};
1056 :     for ($i=0; ($i < @$subset_info) && ($subset_info->[$i]->[0] ne $subset); $i++) {}
1057 :     if ($i < @$subset_info)
1058 :     {
1059 : parrello 1.5 my @roles = ();
1060 :     foreach $j (@{$subset_info->[$i]->[1]})
1061 :     {
1062 :     push(@roles,$self->{Roles}->[$j]->[0]);
1063 :     }
1064 :     return @roles;
1065 : overbeek 1.2 }
1066 :     return undef;
1067 :     }
1068 :    
1069 : overbeek 1.4 sub get_color_of {
1070 :     my($self,$peg) = @_;
1071 :    
1072 :     return $self->{Colors}->{$peg};
1073 :     }
1074 :    
1075 : parrello 1.12 =head3 active_genomes
1076 :    
1077 :     C<< my $activeHash = UnvSubsys::active_genomes(\@row_subsets, $active_subsetR, $focus, \%genomeH, \@genomes_info); >>
1078 :    
1079 :     Return a hash containing the active genomes for this subsystem display. The
1080 :     keys of the hash will be the row indices of the genomes to be highlighted on the
1081 :     display. Each genome ID will map to 1. Thus, if C<< $activeHash->{3} >>
1082 :     tests TRUE, the fourth row should be highlighted.
1083 :    
1084 :     The rules for determining the active genomes are as follows. If I<$active_subsetR> is
1085 :     specified, it is presumed to be the ID of the subset containing the active genomes.
1086 :     If it is not specified and I<$focus> is specified, then I<$focus> is presumed to be the
1087 :     ID of the genome currently in focus, and the active genomes will be the ones in the
1088 :     smallest row subset containing the genome in focus. If neither I<$active_subsetR> nor
1089 :     I<$focus> are specified, then all genomes are active.
1090 :    
1091 :     =over 4
1092 :    
1093 :     =item row_subsets
1094 :    
1095 :     Reference to a list of 2-tuples. Each tuple consists of a row subset ID followed by
1096 :     a reference to a list of the row indices for the rows in the identified subset.
1097 :    
1098 :     =item active_subsetR (optional)
1099 :    
1100 :     ID of the active subset (if any).
1101 :    
1102 :     =item focus
1103 :    
1104 :     ID of the genome currently in focus (if any). If there is no active subset, then
1105 :     the smallest subset containing this genome will be made active.
1106 :    
1107 :     =item genomeH
1108 :    
1109 :     Reference to a hash of genome IDs to row indices. The keys of this hash are
1110 :     the genomes in this subsystem, which also form the subsystem spreadsheet's
1111 :     rows.
1112 :    
1113 :     =item genomes_info
1114 :    
1115 :     Reference to a list of 2-tuples. The first element of each 2-tuple is the
1116 :     ID of a genome; the second is the variant code for the subsystem variant
1117 :     used by the genome. The tuples are ordered by row index, so that the ID
1118 :     and variant code of the genome in a particular row can be located by indexing
1119 :     into this parameter using the subsystem spreadsheet row number.
1120 :    
1121 :     =item RETURN
1122 :    
1123 :     Returns a reference to a hash that maps the row indices of the active genomes
1124 :     to 1. This hash can be used to quickly determine whether or not a particular
1125 :     row is to be highlighted.
1126 :    
1127 :     =back
1128 :    
1129 :     =cut
1130 :    
1131 : overbeek 1.7 sub active_genomes {
1132 : parrello 1.12 # Get the parameters.
1133 :     my($fig, $row_subsets, $active_subsetR, $focus, $genomeH, $genomes_info) = @_;
1134 : overbeek 1.7 my($i,@bestL);
1135 : parrello 1.12 # Declare the return variable.
1136 : overbeek 1.7 my $active_genomes = {};
1137 : parrello 1.12 # Check for an active subset.
1138 : overbeek 1.7 if ($active_subsetR)
1139 :     {
1140 : parrello 1.12 # Search for the active subset in the row subset array.
1141 :     for ($i=0; ($i < @$row_subsets) && ($row_subsets->[$i]->[0] ne $active_subsetR); $i++) {}
1142 :     if ($i < @$row_subsets)
1143 :     {
1144 :     # Here we found the active subset, so we extract its list of row indices.
1145 :     @bestL = @{$row_subsets->[$i]->[1]};
1146 :     }
1147 :     else {
1148 :     # Here we have the ID of the active subset. First, we search for that ID
1149 :     # in the row subset list.
1150 :     for ($i=0; ($i < @$row_subsets) && ($row_subsets->[$i]->[0] ne $active_subsetR); $i++) {}
1151 :     if ($i < @$row_subsets)
1152 :     {
1153 :     # Here we found the named subset, so we return its member list.
1154 :     @bestL = @{$row_subsets->[$i]->[1]};
1155 :     }
1156 :     else
1157 :     {
1158 :     # Here the active subset does not exist. We punt by extracting a
1159 :     # list of all the row indices in the spreadsheet.
1160 :     $active_subsetR = 'All';
1161 :     @bestL = map { $genomeH->{$_} } keys(%$genomeH);
1162 :     }
1163 :     }
1164 : overbeek 1.7 }
1165 : parrello 1.12 elsif ($focus)
1166 : overbeek 1.7 {
1167 : parrello 1.12 # Here we don't have an active row subset, but a particular genome is in
1168 :     # focus. We'll look for the smallest row subset containing the genome
1169 :     # in focus. First, we need to prime the loop. "$bestN" will be the ID
1170 :     # of the best subset found so far; "@bestL" is where we stash the list
1171 :     # of IDs in the subset. Our initial selection, then, will be the
1172 :     # fake "All" subset, which contains the entire collection of rows.
1173 : overbeek 1.16
1174 : parrello 1.12 if (! $fig->is_complete($focus))
1175 :     {
1176 :     # Here the gnome in focus is incomplete, so it won't be anywhere
1177 :     # in our list. We default to making everything active.
1178 :     $active_subsetR = 'All';
1179 :     @bestL = map { $genomeH->{$_} } keys(%$genomeH);
1180 :     } else {
1181 :     my $bestN = "All";
1182 :     @bestL = map { $genomeH->{$_} } keys(%$genomeH);
1183 :     # Next, we get the row index for the genome in focus.
1184 :     my $focusIndex = $genomeH->{$focus};
1185 :     # Now we loop through all the row subsets.
1186 :     my $tuple;
1187 :     foreach $tuple (@$row_subsets)
1188 :     {
1189 :     # Extract the subset ID and its list of row indices. The latter is
1190 :     # in "$genomeIs".
1191 :     my($id,$genomeIs) = @$tuple;
1192 :     # Search for the index of the focus row in the current subset's list
1193 :     # of row indices.
1194 :     for ($i=0; ($i < @$genomeIs) && ($genomeIs->[$i] != $focusIndex); $i++) {}
1195 :     # Now either $i will be the index of the focus row in the subset, or
1196 :     # it is going to be equal to the number of rows in the subset.
1197 :     if ($i < @$genomeIs)
1198 :     {
1199 :     # We've found the focus row in this subset. Select it as the new
1200 :     # best subset if it's smaller than the last one we found.
1201 :     if (@$genomeIs < @bestL)
1202 :     {
1203 :     $bestN = $id;
1204 :     @bestL = @$genomeIs;
1205 :     }
1206 :     }
1207 :     }
1208 :     # Save the best subset found as the active one.
1209 :     $active_subsetR = $bestN;
1210 :     }
1211 :     } else {
1212 :     # Here we have nothing: no active subset, and no focus row. We make
1213 :     # all rows active.
1214 :     $active_subsetR = 'All';
1215 :     @bestL = map { $genomeH->{$_} } keys(%$genomeH);
1216 : overbeek 1.7 }
1217 : parrello 1.12 # "@bestL" now contains a list of the row indices for the active genomes.
1218 :     # We convert it to a hash and return it.
1219 : overbeek 1.7 my %active_genomes = map { $_ => 1 } @bestL;
1220 :     return \%active_genomes;
1221 :     }
1222 :    
1223 : olson 1.13 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3