[Bio] / FigKernelPackages / UnvSubsys.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/UnvSubsys.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.14 - (view) (download) (as text)

1 : overbeek 1.1 package UnvSubsys;
2 :    
3 : overbeek 1.2 use Subsystem;
4 : overbeek 1.1 use Carp;
5 :     use FIG;
6 : parrello 1.12 use SFXlate;
7 : overbeek 1.1 use Data::Dumper;
8 :     use strict;
9 : parrello 1.12 use Tracer;
10 : overbeek 1.1
11 : parrello 1.12 =head1 Universal Subsystem Object
12 :    
13 :     =head2 Introduction
14 :    
15 :     The universal subsystem object provides methods used to display useful information
16 :     about a subsystem. Its intent is to support both Sprout and SEED.
17 :    
18 :     The soul of a subsystem is its spreadsheet. The columns of the spreadsheet are
19 :     the subsystem roles. The rows are the genomes that contain the subsystem. PEGs
20 :     are stored in the spreadsheet's cells. Each genome can be identified by genome
21 :     ID or row index. Each role can be identified by role name or column index.
22 :    
23 :     It is worth noting that the term I<subset> has two meanings-- a subset of roles
24 :     or a subset of genomes. A subset of roles is called a I<column subset>. A
25 :     subset of genomes is called a I<row subset>.
26 :    
27 :     The object created contains a great deal of information, so it's worthwhile to
28 :     stop for a moment and discuss it. The object will have the following members.
29 :    
30 :     =over 4
31 :    
32 :     =item Roles
33 :    
34 :     A list of 3-tuples, each consisting of a role name, the abbreviated role name,
35 :     and a list of URLs for the reactions. Each role is a column in the
36 :     subsystem spreadsheet. Indexing into the list by column index yields the
37 :     ID, abbreviation, and reactions for the role in that column.
38 :    
39 :     =item RoleIndex
40 :    
41 :     A hash mapping each role to its column index in the spreadsheet.
42 :    
43 :     =item RoleSubsets
44 :    
45 :     A list of 2-tuples, each consisting of a subset name followed by a list of the
46 :     column indices for the roles in the subset.
47 :    
48 :     =item Genomes
49 :    
50 :     A list of 2-tuples, each containing the genome ID for the relevant row and the
51 :     variant code for that genome. Subsystems can have several variations, and the
52 :     variant code indicates which variant of the subsystem that the genome uses.
53 :     There is one genome for each row in the spreadsheet. Indexing into the list
54 :     yields the ID of the genome for that row and its variant code.
55 :    
56 :     =item GenomeIndex
57 :    
58 :     A hash mapping each genome ID to its row index in the spreadsheet.
59 :    
60 :     =item PegHash
61 :    
62 :     A hash of hashes containing the spreadsheet cells. If C<$pegHash> is the hash
63 :     of hashes, C<$row> is a genome index, and C<$col> is a role index, then
64 :    
65 :     $pegHash->{$row}->{$col}
66 :    
67 :     returns a reference to a list of the IDs for the PEGs in the relevant
68 :     spreadsheet cell.
69 :    
70 :     =item ColorHash
71 :    
72 :     A hash mapping each PEG ID to the color that should be used to represent that
73 :     PEG in the display.
74 :    
75 :     =item AliasHash
76 :    
77 :     A hash mapping each PEG ID to a list of its aliases.
78 :    
79 :     =item ReactionHash
80 :    
81 :     A hash mapping each role to the list of reactions it catalyzes.
82 :    
83 :     =back
84 :    
85 :     =head2 Public Methods
86 :    
87 :     =head3 new
88 :    
89 :     C<< my $usub = UnvSubsys->new($ssa, $fig, $active_subsetR, $focus, $show_clusters, $aliases, \@peg_colors); >>
90 :    
91 :     Construct a new universal subsystem object for a specified subsystem.
92 :    
93 :     =over 4
94 :    
95 :     =item ssa
96 :    
97 :     Name of the subsystem.
98 :    
99 :     =item fig
100 :    
101 :     Data access object, either a FIG object or an SFXlate object.
102 :    
103 :     =item active_subsetR
104 :    
105 :     Name of the active row subset. A row subset names a group of genomes.
106 :    
107 :     =item focus
108 :    
109 :     ID of the genome currently in focus.
110 :    
111 :     =item show_clusters
112 :    
113 :     TRUE if clusters should be painted by color, else FALSE.
114 :    
115 :     =item aliases
116 :    
117 :     TRUE if PEG aliases should be shown, else FALSE.
118 :    
119 :     =item peg_colors (optional)
120 : overbeek 1.1
121 : parrello 1.12 Reference to a list of 2-tuples, each 2-tuple consisting of a PEG ID and the color
122 :     to be assigned to the PEG.
123 : overbeek 1.1
124 : parrello 1.12 =back
125 : overbeek 1.1
126 : parrello 1.12 =cut
127 : overbeek 1.1
128 : parrello 1.12 sub new {
129 :     # Get the parameters.
130 :     my($class, $ssa, $fig, $active_subsetR, $focus, $show_clusters, $aliases, $peg_colors) = @_;
131 :     # Fix the subsystem name. Spaces are replaced by underscores to avoid naming problems
132 :     # in the seed's file system.
133 :     $ssa =~ s/ /_/g;
134 :     # Get the FIG object. At this point in time, we're only getting data from the SEED. On
135 :     # a future pass through the code, we'll get data from a SEED or Sprout.
136 :     if ((ref($fig) eq "FIG") || (ref($fig) eq 'SFXlate')) {
137 :     # Create a subsystem object. The "get_subsystem" method is provided by both FIG
138 :     # and SFXlate; however, the object returned in each case is different: FIG
139 :     # returns a "Subsystem" object; SFXlate returns a "SproutSubsys" object.
140 :     my $subsystem = $fig->get_subsystem($ssa);
141 :     # Get the key subsystem data. Note that CRs in the notes are converted to LFs
142 :     # in case the notes come from a Mac.
143 : parrello 1.5 my $curator = $subsystem->get_curator;
144 :     my $notes = $subsystem->get_notes;
145 : parrello 1.12 $notes =~ s/\r/\n/g;
146 : parrello 1.5 my @roles = $subsystem->get_roles;
147 :     my $reactions = $subsystem->get_reactions;
148 :     my @genomes = $subsystem->get_genomes;
149 : overbeek 1.1 my @col_subsets = $subsystem->get_subset_namesC;
150 : parrello 1.14 my @diagrams = $subsystem->get_diagrams();
151 : parrello 1.12 # Create the data structures for the role list and the role index.
152 : parrello 1.5 my $role_info = [];
153 :     my $roleH = {};
154 : parrello 1.12 # Loop through the roles to create the role list. The $i index will move
155 :     # through the columns of the spreadsheet.
156 : parrello 1.5 my($i,$j,$subset,$peg);
157 :     for ($i=0; ($i < @roles); $i++)
158 :     {
159 : parrello 1.12 # Get the ID of the role in the current column.
160 : parrello 1.5 my $role = $roles[$i];
161 : parrello 1.12 # Extract its abbreviation.
162 :     my $abbrev = $subsystem->get_role_abbr($i);
163 :     # Get the reactions. Note that its possible no reactions were found. If there were
164 :     # reactions found, however, we convert from reaction IDs to a comma-delimited
165 :     # list of HTML code full of hyperlinks.
166 : parrello 1.5 my $react = $reactions ? join(",", map { &HTML::reaction_link($_) } @{$reactions->{$role}}) : [];
167 : parrello 1.12 # Form the role name, abbreviation, and reaction list into a 3-tuple and push
168 :     # them onto the main role list.
169 : parrello 1.5 push(@$role_info,[$role,$abbrev,$react]);
170 : parrello 1.12 # Set the role hash so that we can get back the column index for a given
171 :     # role name.
172 : parrello 1.5 $roleH->{$role} = $i;
173 :     }
174 : parrello 1.12 # Get the column subsets. A column subset is a list of role IDs, so we need to
175 :     # convert it to a set of column indices. We ignore the special "All" subset
176 :     # that contains everything.
177 : parrello 1.5 my $subset_info = [];
178 :     foreach $subset (@col_subsets)
179 :     {
180 :     if ($subset ne 'All')
181 :     {
182 :     push(@$subset_info,[$subset,[map { $roleH->{$_} } $subsystem->get_subsetC_roles($subset)]]);
183 :     }
184 :     }
185 : parrello 1.12 # Now we create the genome directories. For each genome we need to be able to
186 :     # hash from the genomeID to the specified role index and we must be able to
187 :     # get the genome's variant code. (The variant code indicates which subsystem
188 :     # variant the genome uses.
189 : parrello 1.5 my $genomes_info = [];
190 :     my $genomeH = {};
191 :     for ($i=0; ($i < @genomes); $i++)
192 :     {
193 : parrello 1.12 # Get the genome ID for this row.
194 : parrello 1.5 my $genome = $genomes[$i];
195 : parrello 1.12 # Get its variant code.
196 :     my $variant = $subsystem->get_variant_code($i);
197 :     # Form them into a 2-tuple and add the result to the genome list.
198 : parrello 1.5 push(@$genomes_info,[$genome,$variant]);
199 : parrello 1.12 # Set up the hash to get from the genome ID to the row index.
200 : parrello 1.5 $genomeH->{$genome} = $i;
201 :     }
202 : parrello 1.12
203 :     # Next we gather the data from the actual spreadsheet cells. For an SFXlate
204 :     # object, this is the most expensive part of the operation, since it requires
205 :     # a database call for each cell.
206 : parrello 1.5 my $pegH = {};
207 : parrello 1.12 # $i is the row index, which cycles through genomes.
208 : parrello 1.5 for ($i=0; ($i < @genomes); $i++)
209 :     {
210 : parrello 1.12 # $j is the column index, which cycles through roles.
211 : parrello 1.5 for ($j=0; ($j < @roles); $j++)
212 :     {
213 : parrello 1.12 my @pegs = $subsystem->get_pegs_from_cell($i,$j);
214 : parrello 1.5 $pegH->{$i}->{$j} = [@pegs];
215 :     }
216 :     }
217 : overbeek 1.7
218 : parrello 1.12 # Get the row subsets. Row subsets are determined by the genome
219 :     # taxonomy, so these are different from the row subsets stored
220 :     # in the subsystem object.
221 :     my $row_subsets = &row_subsets($fig, $genomeH);
222 :     # Here we try to get a list of the active genomes. The caller
223 :     # gives us hints in the form of the "$focus" and "$active_subsetR"
224 :     # parameters. If, for example, we are using this object to generate
225 :     # a subsystem web page, the focus information would steer us to
226 :     # whatever the user wants to look at on the page.
227 : olson 1.13 my $active_genomes = &active_genomes($fig, $row_subsets,$active_subsetR,$focus,
228 : parrello 1.12 $genomeH,$genomes_info);
229 :    
230 :     # Now we generate a table of colors for the various PEGs. If the
231 :     # caller gave us a map of peg IDs to colors, we use that. Otherwise,
232 :     # we allow the option of painting the PEGs by cluster number. (The
233 :     # caller indicates this by setting the "show_clusters" flag in the
234 :     # parameter list.)
235 :     my $colorsH;
236 :     if ($peg_colors)
237 :     {
238 :     # Here the caller gave us a list of peg colors. The list contains
239 :     # 2-tuples, each consisting of a PEG ID followed by the
240 :     # color value. The loop below extracts the pairs and stuffs them
241 :     # into the color hash.
242 :     $colorsH = {};
243 :     foreach $_ (@$peg_colors)
244 :     {
245 :     my($peg,$color) = @$_;
246 :     $colorsH->{$peg} = $color;
247 :     }
248 :     }
249 :     elsif ($show_clusters)
250 :     {
251 :     # Here the user wants us to base the colors on the genome clustering
252 :     # information.
253 :     $colorsH = &set_colors($fig,$subsystem,$pegH,$active_genomes);
254 :     }
255 :     else
256 :     {
257 :     # Here the user is not interested in coloring the PEGs.
258 :     $colorsH = {};
259 :     }
260 :     # If the user wants to see aliases, compute the alias hash. Aliases
261 :     # will only be computed for PEGs belonging to active (highlighted)
262 :     # genomes, and there is a maximum of one alias per PEG.
263 : overbeek 1.7 my $aliasesH = $aliases ? &set_aliases($fig,$pegH,$active_genomes) : {};
264 : parrello 1.12 # Create and bless the UnvSubsys object.
265 : parrello 1.5 my $self = { Roles => $role_info,
266 :     RoleIndex => $roleH,
267 :     RoleSubsets => $subset_info,
268 :     Genomes => $genomes_info,
269 :     GenomeIndex => $genomeH,
270 : parrello 1.12 GenomeSubsets => $row_subsets,
271 : parrello 1.5 PegHash => $pegH,
272 :     Colors => $colorsH,
273 :     Aliases => $aliasesH,
274 :     Curator => $curator,
275 :     Notes => $notes,
276 : parrello 1.14 Reactions => $reactions,
277 :     Diagrams => \@diagrams,
278 : parrello 1.5 };
279 :     bless($self, $class);
280 : parrello 1.12 # Return the object.
281 : parrello 1.5 return $self;
282 : overbeek 1.1 }
283 :     else
284 :     {
285 : parrello 1.12 # Here the FIG-like object was not recognized, so we return an
286 :     # undefined value.
287 : parrello 1.5 return undef;
288 : overbeek 1.1 }
289 :     }
290 :    
291 : parrello 1.12 =head3 get_subset_namesR
292 :    
293 :     C<< my @names = $unvsub->get_subset_namesR(); >>
294 :    
295 :     Return the names of the genome (row) subsets.
296 :    
297 :     =cut
298 :    
299 : overbeek 1.6 sub get_subset_namesR {
300 :     my($self) = @_;
301 :    
302 :     return map { $_->[0] } @{$self->{GenomeSubsets}};
303 :     }
304 :    
305 : parrello 1.12 =head3 get_subsetR
306 :    
307 :     C<< my @genomes = $unvsub->get_subsetR($set); >>
308 :    
309 :     Return a list of the genome IDs covered by a row subset.
310 :    
311 :     =over 4
312 :    
313 :     =item set
314 :    
315 :     Name of the row subset whose genomes are desired.
316 :    
317 :     =item RETURN
318 :    
319 :     Returns a list of the IDs for the genomes found in the specified row
320 :     set.
321 :    
322 :     =back
323 :    
324 :     =cut
325 :    
326 : overbeek 1.6 sub get_subsetR {
327 : parrello 1.12 # Get the parameters.
328 : overbeek 1.6 my($self,$set) = @_;
329 :     my($i);
330 : parrello 1.12 # Get the list of row subsets.
331 : overbeek 1.6 my $sets = $self->{GenomeSubsets};
332 : parrello 1.12 # Find the row subset with the specified name. The row subset list is a
333 :     # list of 2-tuples, and the first element of each tuple is the set
334 :     # name.
335 : overbeek 1.6 for ($i=0; ($i < @$sets) && ($sets->[$i]->[0] ne $set); $i++) {}
336 :     if ($i < @$sets)
337 :     {
338 : parrello 1.12 # Here we found the named subset. The subset tuple's second element is
339 :     # the list of row indices. We map these to genome IDs before returning
340 :     # them.
341 :     return map { $self->{Genomes}->[$_]->[0] } @{$sets->[$i]->[1]}
342 : overbeek 1.6 }
343 : parrello 1.12 # Here we subset was not found, so we return the undefined value.
344 : overbeek 1.6 return undef;
345 :     }
346 :    
347 : parrello 1.12 =head3 get_subsetR
348 :    
349 :     C<< my @pairs = $unvsub->get_subsetsR(); >>
350 :    
351 :     Return a list of all the row subsets. The subsets are returned in the form
352 :     of 2-tuples, each consisting of a subset name followed by a reference to a
353 :     list of genome IDs. The genome IDs correspond to the rows in the subset.
354 :    
355 :     =cut
356 :    
357 : overbeek 1.6 sub get_subsetsR {
358 : parrello 1.12 # Get the parameters.
359 : overbeek 1.6 my($self) = @_;
360 : parrello 1.12 # Extract the list of genome subsets. This list is in the form of
361 :     # 2-tuples, but the rows are identified by row index, not genome ID.
362 : overbeek 1.6 my $sets = $self->{GenomeSubsets};
363 : parrello 1.12 # Create the return list.
364 : overbeek 1.6 my @pairs = ();
365 : parrello 1.12 # Loop through the subsets.
366 : overbeek 1.6 my $pair;
367 :     foreach $pair (@$sets)
368 :     {
369 : parrello 1.12 # Convert this subset's member list from row indices to genome IDs
370 :     # and stash the result in the return list.
371 :     my($id,$members) = @$pair;
372 :     push(@pairs,[$id,[map { $self->{Genomes}->[$_]->[0] } @$members]]);
373 : overbeek 1.6 }
374 : parrello 1.12 # Return the list constructed.
375 : overbeek 1.6 return @pairs;
376 :     }
377 :    
378 : parrello 1.12 =head3 row_subsets
379 :    
380 :     C<< my $subsetList = UnvSubsys::row_subsets($fig, \%genomeH); >>
381 :    
382 :     This method computes the taxonomic row subsets for a subsystem. It takes
383 :     as input a hash that maps genome IDs to column indices and a FIG object.
384 :     The FIG object provides a list of taxonomic groups of 10 or more complete
385 :     genomes. From the list, we extract subsets which have more than 10
386 :     genomes in the list of subsystem genomes. If no such subsets exist,
387 :     we extract subsets which have at least 1 genome from the list of
388 :     subsystem genomes. The subsets are returned as 2-tuples, the first
389 :     element being the subset ID and the second being a list of row indices
390 :     for the genomes in the subset.
391 :    
392 :     =over 4
393 :    
394 :     =item fig
395 :    
396 :     A FIG-like object for accessing the data store.
397 :    
398 :     =item genomeH
399 :    
400 :     Reference to a hash that maps each genome ID to its row index in the
401 :     subsystem spreadsheet.
402 :    
403 :     =item RETURN
404 :    
405 :     Returns a reference to a list of 2-tuples. Each 2-tuple consists of a
406 :     subset ID followed by a list of the row indices for the genomes in the
407 :     subset.
408 :    
409 :     =back
410 :    
411 :     =cut
412 :    
413 : overbeek 1.6 sub row_subsets {
414 : parrello 1.12 my ($fig, $genomeH) = @_;
415 : overbeek 1.6
416 : parrello 1.12 # We need a real FIG object, since SFXlate does not yet support
417 :     # taxonomy trees. The "FIG" method does this for us.
418 :     $fig = $fig->FIG();
419 :     # Initialize the return value.
420 : overbeek 1.6 my $subsets = [];
421 : parrello 1.12 # Get a list of taxonomic groups. This will come back as a list of
422 :     # 2-tuples.
423 : overbeek 1.11 my $taxonomic_groups = $fig->taxonomic_groups_of_complete(5);
424 : parrello 1.12 # Loop through the 2-tuples. We're looking for subsets which
425 :     # contain at least one genome on the subsystem's spreadsheet.
426 : overbeek 1.6 my($pair,$id,$members);
427 :     foreach $pair (@$taxonomic_groups)
428 :     {
429 :     ($id,$members) = @$pair;
430 : parrello 1.12 # Extract the genomes in the member list that participate in this
431 :     # subsystem. To do this, we convert each genome ID to its row
432 :     # index. If no row index exists, the GREP condition discards the
433 :     # member.
434 :     my @mem = grep { defined($_) } map { $genomeH->{$_} } @$members;
435 :     # If there are enough members, save the subset.
436 :     if (@mem > 0)
437 :     {
438 :     push(@$subsets,[$id,[@mem]]);
439 :     }
440 : overbeek 1.6 }
441 : parrello 1.12 # Return the list of row subsets.
442 : overbeek 1.6 return $subsets;
443 :     }
444 :    
445 : parrello 1.12 =head3 set_aliases
446 :    
447 :     C<< my $aliasHash = UnvSubsys::set_aliases($fig, $pegH, $active_genomes); >>
448 :    
449 :     Return a hash mapping PEG IDs to aliases.
450 :    
451 :     =over 4
452 :    
453 :     =item fig
454 :    
455 :     FIG-like object that can be used to access the data store.
456 :    
457 :     =item pegH
458 :    
459 :     Reference to the spreadsheet hash table. Given a row index I<$row> and a
460 :     column index I<$col>,
461 :    
462 :     $pegH->{$row}->{$col}
463 :    
464 :     will return a reference to a list of PEGs in the specified spreadsheet cell.
465 :    
466 :     =item active_genomes
467 :    
468 :     Reference to a hash whose keys correspond to the spreadsheet row indices
469 :     of genomes that should be highlighted.
470 :    
471 :     =item RETURN
472 :    
473 :     Returns a hash that takes as input a PEG ID and returns an alias. Only PEGs
474 :     for active genomes will be processed.
475 :    
476 :     =back
477 :    
478 :     =cut
479 :    
480 : overbeek 1.4 sub set_aliases {
481 : parrello 1.12 # Get the parameters.
482 : overbeek 1.7 my($fig,$pegH,$active_genomes) = @_;
483 : overbeek 1.4 my($genomeI,$roleI,$pegs,$peg,$roleH);
484 :    
485 : parrello 1.12 # Create the return hash.
486 : overbeek 1.4 my $aliasesH = {};
487 :    
488 : parrello 1.12 # Loop through each row that corresponds to an active genome.
489 :     # The active genome list contains row indices, and there is
490 :     # one genome per row. Note that the genome ID is never used,
491 :     # only the row index.
492 : overbeek 1.7 foreach $genomeI (grep { $active_genomes->{$_} } keys(%$pegH))
493 : overbeek 1.4 {
494 : parrello 1.12 # Get the role hash for the specified genome. The role hash
495 :     # maps column indices (representing roles) to lists of PEGs.
496 : parrello 1.5 $roleH = $pegH->{$genomeI};
497 : parrello 1.12 # Loop through the role (column) indices.
498 : parrello 1.5 foreach $roleI (keys(%$roleH))
499 :     {
500 : parrello 1.12 # Get the PEG list for this row/column combination.
501 : parrello 1.5 $pegs = $roleH->{$roleI};
502 : parrello 1.12 # Only proceed if data was found in the cell.
503 :     if (defined $pegs) {
504 :     # Loop through the pegs in the cell.
505 :     foreach $peg (@$pegs)
506 : parrello 1.5 {
507 : parrello 1.12 # If we do not already have an alias for this PEG,
508 :     # compute one.
509 :     if (! $aliasesH->{$peg})
510 :     {
511 :     $aliasesH->{$peg} = scalar &ext_id($fig,$peg);
512 :     }
513 : parrello 1.5 }
514 :     }
515 :     }
516 : overbeek 1.4 }
517 : parrello 1.12 # Return the hash we built.
518 : overbeek 1.4 return $aliasesH;
519 :     }
520 :    
521 : parrello 1.12 =head3 set_colors
522 :    
523 :     C<< my $colorHash = UnvSubsys::set_colors($fig, $sub, \%pegH, \%active_genomes); >>
524 :    
525 :     Return a hash that maps each PEG in the subsystem spreadsheet to a display
526 :     color. Not all PEGs need to be mapped. Those that do not have a color
527 :     assigned will generally be displayed in white.
528 :    
529 :     =over 4
530 :    
531 :     =item fig
532 :    
533 :     FIG-like object that can be used to access the data store.
534 :    
535 :     =item sub
536 :    
537 :     Subsystem object for the current subsystem.
538 :    
539 :     =item pegH
540 :    
541 :     Reference to the spreadsheet hash table. Given a row index I<$row> and a
542 :     column index I<$col>,
543 :    
544 :     $pegH->{$row}->{$col}
545 :    
546 :     will return a reference to a list of PEGs in the specified spreadsheet cell.
547 :    
548 :     =item active_genomes
549 :    
550 :     Reference to a hash whose keys correspond to the spreadsheet row indices
551 :     of genomes that should be highlighted.
552 :    
553 :     =item RETURN
554 :    
555 :     Returns a hash that takes as input a PEG ID and returns a color. These colors
556 :     are used when displaying the PEGs in the subsystem spreadsheet. Only PEGs
557 :     for active genomes will be colored.
558 :    
559 :     =back
560 :    
561 :     =cut
562 :    
563 : overbeek 1.4 sub set_colors {
564 : parrello 1.12 # Get the parameters.
565 :     my($fig,$sub,$pegH,$active_genomes) = @_;
566 :    
567 :     my($genomeI,$roleI,$pegs,$peg,$roleH,%pegs_in_genome);
568 :     # Create the return hash.
569 : overbeek 1.4 my $colorsH = {};
570 : parrello 1.12 # Loop through the active genomes. The keys of "%$pegH" are the row indices
571 :     # for rows that have at least one occupied cell in the spreadsheet. The
572 :     # Grep then reduces this list to those rows that are highlighted.
573 : overbeek 1.7 foreach $genomeI (grep { $active_genomes->{$_} } keys(%$pegH))
574 : overbeek 1.4 {
575 : parrello 1.12 # We will use the following hash to compile a list of all the PEGs
576 :     # in the spreadsheet for the current genome, that is, the genome
577 :     # represented by row index "$genomeI".
578 : parrello 1.5 undef %pegs_in_genome;
579 : parrello 1.12 # Get the hash for the current row. This hash maps column indices to
580 :     # lists of pegs.
581 : parrello 1.5 $roleH = $pegH->{$genomeI};
582 : parrello 1.12 # Loop through the column indices for the specified row.
583 : parrello 1.5 foreach $roleI (keys(%$roleH))
584 :     {
585 : parrello 1.12 # Here we can finally get a list of the pegs in this spreadsheet
586 :     # cell. We loop through them, marking them in the "%pegs_in_genome"
587 :     # hash.
588 : parrello 1.5 $pegs = $roleH->{$roleI};
589 :     foreach $peg (@$pegs)
590 :     {
591 :     $pegs_in_genome{$peg} = 1;
592 :     }
593 :     }
594 : parrello 1.12 # Extract the "%pegs_in_genome" hash keys. This gives us a duplicate-free
595 :     # list of all the pegs for the current spreadsheet role.
596 : parrello 1.5 my @pegs = keys(%pegs_in_genome);
597 :     my($tuple,$peg,$color);
598 : parrello 1.12 # Get a hash that maps the PEG IDs to colors.
599 :     my $colors_for_one_genome = &set_colors_for_genome($fig,$sub, \@pegs);
600 :     # Loop through the hash we got back and assign the colors from that
601 :     # hash to the master hash we're returning to the caller.
602 : parrello 1.5 while (($peg,$color) = each %$colors_for_one_genome)
603 :     {
604 :     $colorsH->{$peg} = $colors_for_one_genome->{$peg};
605 :     }
606 : overbeek 1.4 }
607 : parrello 1.12 # Return the color hash.
608 : overbeek 1.4 return $colorsH;
609 :     }
610 :    
611 : parrello 1.12 =head3 set_colors_for_genome
612 :    
613 :     C<< my $colorHash = UnvSubsys::set_colors_for_genome($fig, $sub, \@pegs); >>
614 :    
615 :     Return a reference to a hash mapping the specified pegs to colors. PEGs that
616 :     are physically close to each other will be painted the same color.
617 :    
618 :     =over 4
619 :    
620 :     =item fig
621 :    
622 :     A fig-like object that can be used to access the data store.
623 :    
624 :     =item sub
625 :    
626 :     Subsystem object for the relevant subsystem.
627 : overbeek 1.4
628 : parrello 1.12 =item pegs
629 : overbeek 1.4
630 : parrello 1.12 Reference to a list of PEG IDs. All of the peg IDs should be for the
631 :     same genome.
632 : overbeek 1.4
633 : parrello 1.12 =item RETURN
634 : overbeek 1.4
635 : parrello 1.12 Returns a reference to a hash that maps each PEG ID to a color.
636 : overbeek 1.4
637 : parrello 1.12 =back
638 : overbeek 1.4
639 : parrello 1.12 =cut
640 : overbeek 1.4
641 : parrello 1.12 sub set_colors_for_genome {
642 :     # Get the parameters.
643 :     my($fig, $sub, $pegs) = @_;
644 :     # Default all the PEGs to white.
645 :     my %color_of = map { $_ => '#FFFFFF' } @$pegs;
646 :     # Divide the pegs into clusters.
647 :     my @clusters = $fig->compute_clusters($pegs, $sub);
648 :     # Get a list of useful colors.
649 :     my @colors = &cool_colors();
650 :     # If we have too many clusters, chop off the big ones at the end. These
651 :     # are least likely to be important.
652 :     if (@clusters > @colors) { splice(@clusters, 0, (@clusters - @colors)) }
653 :     # Loop through the clusters.
654 :     for my $cluster (@clusters) {
655 :     # Get the color for this cluster.
656 :     my $color = shift @colors;
657 :     # Loop through this cluster, putting this color into the color_of
658 :     # entries for each PEG.
659 :     for my $peg (@$cluster) {
660 :     $color_of{$peg} = $color;
661 : parrello 1.5 }
662 : overbeek 1.4 }
663 : parrello 1.12 # Return the color map.
664 :     return \%color_of;
665 : overbeek 1.4 }
666 :    
667 : parrello 1.12 =head3 cool_colors
668 :    
669 :     C<< my @colorList = UnvSubsys::cool_colors(); >>
670 :    
671 :     Return a list of web-safe colors.
672 :    
673 :     =cut
674 :    
675 : overbeek 1.4 sub cool_colors {
676 :     # just an array of "websafe" colors or whatever colors we want to use. Feel free to remove bad colors (hence the lines not being equal length!)
677 :     return (
678 :     '#C0C0C0', '#FF40C0', '#FF8040', '#FF0080', '#FFC040', '#40C0FF', '#40FFC0', '#C08080', '#C0FF00', '#00FF80', '#00C040',
679 :     "#6B8E23", "#483D8B", "#2E8B57", "#008000", "#006400", "#800000", "#00FF00", "#7FFFD4",
680 :     "#87CEEB", "#A9A9A9", "#90EE90", "#D2B48C", "#8DBC8F", "#D2691E", "#87CEFA", "#E9967A", "#FFE4C4", "#FFB6C1",
681 :     "#E0FFFF", "#FFA07A", "#DB7093", "#9370DB", "#008B8B", "#FFDEAD", "#DA70D6", "#DCDCDC", "#FF00FF", "#6A5ACD",
682 :     "#00FA9A", "#228B22", "#1E90FF", "#FA8072", "#CD853F", "#DC143C", "#FF6347", "#98FB98", "#4682B4",
683 :     "#D3D3D3", "#7B68EE", "#2F4F4F", "#FF7F50", "#FF69B4", "#BC8F8F", "#A0522D", "#DEB887", "#00DED1",
684 :     "#6495ED", "#800080", "#FFD700", "#F5DEB3", "#66CDAA", "#FF4500", "#4B0082", "#CD5C5C",
685 :     "#EE82EE", "#7CFC00", "#FFFF00", "#191970", "#FFFFE0", "#DDA0DD", "#00BFFF", "#DAA520", "#008080",
686 :     "#00FF7F", "#9400D3", "#BA55D3", "#D8BFD8", "#8B4513", "#3CB371", "#00008B", "#5F9EA0",
687 :     "#4169E1", "#20B2AA", "#8A2BE2", "#ADFF2F", "#556B2F",
688 :     "#F0FFFF", "#B0E0E6", "#FF1493", "#B8860B", "#FF0000", "#F08080", "#7FFF00", "#8B0000",
689 :     "#40E0D0", "#0000CD", "#48D1CC", "#8B008B", "#696969", "#AFEEEE", "#FF8C00", "#EEE8AA", "#A52A2A",
690 :     "#FFE4B5", "#B0C4DE", "#FAF0E6", "#9ACD32", "#B22222", "#FAFAD2", "#808080", "#0000FF",
691 :     "#000080", "#32CD32", "#FFFACD", "#9932CC", "#FFA500", "#F0E68C", "#E6E6FA", "#F4A460", "#C71585",
692 :     "#BDB76B", "#00FFFF", "#FFDAB9", "#ADD8E6", "#778899",
693 :     );
694 :     }
695 :    
696 : parrello 1.12 =head3 ext_id
697 :    
698 :     C<< my $externalID = UnvSubsys::ext_id($fig, $peg); >>
699 :    
700 :     or
701 :    
702 :     C<< my @externalIDs = UnvSubsys::ext_id($fig, $peg); >>
703 :    
704 :     Return a list of non-FIG IDs for the specified feature. In a scalar context, return
705 :     a single non-FIG ID for the specified feature.
706 :    
707 :     This method returns IDs that are all of the same type, that is, all UniProt IDs, or
708 :     all KEGG IDs, and so forth. To do this, it checks the feature's alias list for IDs
709 :     of a given type. If it finds at least one, then all IDs of that type are returned.
710 :     Highest priority is given to the UniProt IDs, then SP IDs, GI IDs, and finally
711 :     KEGG IDs.
712 :    
713 :     =over 4
714 :    
715 :     =item fig
716 :    
717 :     A FIG-like object for accessing the data store.
718 :    
719 :     =item peg
720 :    
721 :     ID of the feature whose aliases are desired.
722 :    
723 :     =item RETURN
724 :    
725 :     In list context, a list of non-FIG IDs for the feature that are all of the same
726 :     type. In scalar context, the first non-FIG ID for the feature of the
727 :     highest-priority available type.
728 :    
729 :     =back
730 :    
731 :     =cut
732 :    
733 : overbeek 1.4 sub ext_id {
734 :     my($fig,$peg) = @_;
735 :    
736 :     my @tmp;
737 :     my @aliases = $fig->feature_aliases($peg);
738 :     if ((@tmp = grep { $_ =~ /^uni\|/ } @aliases) > 0)
739 :     {
740 : parrello 1.5 @aliases = @tmp;
741 : overbeek 1.4 }
742 :     elsif ((@tmp = grep { $_ =~ /^sp\|/ } @aliases) > 0)
743 :     {
744 : parrello 1.5 @aliases = @tmp;
745 : overbeek 1.4 }
746 :     elsif ((@tmp = grep { $_ =~ /^gi\|/ } @aliases) > 0)
747 :     {
748 : parrello 1.5 @aliases = @tmp;
749 : overbeek 1.4 }
750 :     elsif ((@tmp = grep { $_ =~ /^kegg\|/ } @aliases) > 0)
751 :     {
752 : parrello 1.5 @aliases = @tmp;
753 : overbeek 1.4 }
754 :     else
755 :     {
756 : parrello 1.5 @aliases = ();
757 : overbeek 1.4 }
758 :    
759 :     if (wantarray())
760 :     {
761 : parrello 1.5 return @aliases;
762 : overbeek 1.4 }
763 :     else
764 :     {
765 : parrello 1.5 return $aliases[0];
766 : overbeek 1.4 }
767 :     }
768 :    
769 : parrello 1.12 =head3 subsystem_curator
770 :    
771 :     C<< my $name = $unvsub->subsystem_curator(); >>
772 :    
773 :     Return the name of the subsystem curator. The database stores user names as
774 :     C<master:>I<name>. This method strips off the C<master:> prefix before it
775 :     passes the result back to the caller.
776 :    
777 :     =cut
778 : overbeek 1.4
779 : overbeek 1.2 sub subsystem_curator {
780 :     my($self) = @_;
781 :    
782 :     my $curator = $self->{Curator};
783 :     $curator =~ s/master://;
784 :     return $curator;
785 :     }
786 :    
787 : parrello 1.12 =head3 get_roles
788 :    
789 :     C<< my @roles = $unvsub->get_roles(); >>
790 :    
791 :     Return a list of the roles (columns) for this subsystem. The roles will be
792 :     returned in column order, so that if you access the sixth element of the
793 :     return list, you'll get the name of the role for the sixth column.
794 :    
795 :     =cut
796 :    
797 : overbeek 1.2 sub get_roles {
798 :     my($self) = @_;
799 : parrello 1.12 # The role index in this object is a list of 3-tuples. The caller only
800 :     # wants the first element of each tuple, which is the role name.
801 : overbeek 1.2 return map { $_->[0] } @{$self->{Roles}};
802 :     }
803 :    
804 : parrello 1.12 =head3 get_genome_index
805 :    
806 :     C<< my $index = $unvsub->get_genome_index($genome); >>
807 :    
808 :     Return the row index of the specified genome.
809 :    
810 :     =over 4
811 :    
812 :     =item genome
813 :    
814 :     ID of the genome whose row index is desired.
815 :    
816 :     =item RETURN
817 :    
818 :     Returns the index of the row corresponding to the specified genome, or an
819 :     undefined value if the genome is not represented in the subsystem
820 :     spreadsheet.
821 :    
822 :     =back
823 :    
824 :     =cut
825 :    
826 : overbeek 1.3 sub get_genome_index {
827 :     my($self,$genome) = @_;
828 :    
829 :     return $self->{GenomeIndex}->{$genome};
830 :     }
831 :    
832 : parrello 1.12 =head3 get_genomes
833 :    
834 :     C<< my @genomes = $unvsub->get_genomes(); >>
835 :    
836 :     Return a list of the genome IDs for the subsystem. The genomes will be
837 :     presented in row order. In other words, if you index into the sixth
838 :     element of the return list, you will retrieve the ID of the genome for
839 :     the sixth row.
840 :    
841 :     =cut
842 :    
843 : overbeek 1.3 sub get_genomes {
844 :     my($self) = @_;
845 : parrello 1.12 # The genome array is a list of 2-tuples. We extract the first
846 :     # element of each tuple, which is the genome ID.
847 : overbeek 1.3 return map { $_->[0] } @{$self->{Genomes}};
848 :     }
849 :    
850 : parrello 1.12 =head3 get_variant_code
851 :    
852 :     C<< my $code = $unvsub->get_variant_code($genome); >>
853 :    
854 :     Return the variant code for a genome. Each subsystem has several variations.
855 :     The variant code indicates which variation of a subsystem is used by a
856 :     particular genome.
857 :    
858 :     Genome data is stored in a list of 2-tuples. The first element is the genome
859 :     ID; the second is the variant code.
860 :    
861 :     =over 4
862 :    
863 :     =item genome
864 :    
865 :     ID or row index of the genome whose variant code is desired.
866 :    
867 :     =item RETURN
868 :    
869 :     Returns the variant code for the specified genome.
870 :    
871 :     =back
872 :    
873 :     =cut
874 :    
875 : overbeek 1.3 sub get_variant_code {
876 :     my($self,$genome) = @_;
877 : parrello 1.12 # Check to see if we have a row index.
878 : overbeek 1.3 if ($genome =~ /^\d+$/)
879 :     {
880 : parrello 1.12 # Here we have a row index, so use it to find the genome's variant
881 :     # code.
882 : parrello 1.5 return $self->{Genomes}->[$genome]->[1];
883 : overbeek 1.3 }
884 :     else
885 :     {
886 : parrello 1.12 # Here we have a genome ID, so we need to convert it to a row index.
887 : parrello 1.5 my $genomeI = $self->{GenomeIndex}->{$genome};
888 :     return $self->{Genomes}->[$genomeI]->[1];
889 : overbeek 1.3 }
890 :     }
891 :    
892 : parrello 1.12 =head3 get_pegs_from_cell
893 :    
894 :     C<< my @pegs = $unvsub->get_pegs_from_cell($genome, $role); >>
895 :    
896 :     Return a list of the features in a specified spreadsheet cell. The cell is specified
897 :     by genome ID and role ID.
898 :    
899 :     =over 4
900 :    
901 :     =item genome
902 :    
903 :     ID of the genome relevant to the cell.
904 :    
905 :     =item role
906 :    
907 :     ID of the role relevant to the cell.
908 :    
909 :     =item RETURN
910 :    
911 :     Returns a list of the features in the cell, or an empty list if the cell is empty.
912 :    
913 :     =back
914 :    
915 :     =cut
916 :    
917 : overbeek 1.3 sub get_pegs_from_cell {
918 :     my($self,$genome,$role) = @_;
919 : parrello 1.12 # Convert the genome and role IDs to row and column indices.
920 : overbeek 1.3 my $genomeI = $self->{GenomeIndex}->{$genome};
921 :     my $roleI = $self->{RoleIndex}->{$role};
922 : parrello 1.12 # Get the pegs from the cell and return them.
923 : overbeek 1.3 my $pegs = $self->{PegHash}->{$genomeI}->{$roleI};
924 :     return $pegs ? @$pegs : ();
925 :     }
926 :    
927 : parrello 1.14 =head3 get_diagrams
928 :    
929 :     C<< my @list = $sub->get_diagrams(); >>
930 :    
931 :     Return a list of the diagrams associated with this subsystem. Each diagram
932 :     is represented in the return list as a 4-tuple C<[diagram_id, diagram_name,
933 :     page_link, img_link]> where
934 :    
935 :     =over 4
936 :    
937 :     =item diagram_id
938 :    
939 :     ID code for this diagram.
940 :    
941 :     =item diagram_name
942 :    
943 :     Displayable name of the diagram.
944 :    
945 :     =item page_link
946 :    
947 :     URL of an HTML page containing information about the diagram.
948 :    
949 :     =item img_link
950 :    
951 :     URL of an HTML page containing an image for the diagram.
952 :    
953 :     =back
954 :    
955 :     Note that the URLs are in fact for CGI scripts with parameters that point them
956 :     to the correct place. Though Sprout has diagram information in it, it has
957 :     no relationship to the diagrams displayed in SEED, so the work is done entirely
958 :     on the SEED side.
959 :    
960 :     =cut
961 :    
962 :     sub get_diagrams {
963 :     # Get the parameters.
964 :     my ($self) = @_;
965 :     # Return the diagram list.
966 :     return @{$self->{Diagrams}};
967 :     }
968 :    
969 : overbeek 1.3 sub get_notes {
970 :     my($self) = @_;
971 :    
972 :     return $self->{Notes};
973 :     }
974 :    
975 : overbeek 1.2 sub get_role_index {
976 :     my($self,$role) = @_;
977 :    
978 :     return $self->{RoleIndex}->{$role};
979 :     }
980 :    
981 :     sub get_role_abbr {
982 :     my($self,$roleI) = @_;
983 :    
984 :     if ($roleI !~ /^\d+$/)
985 :     {
986 : parrello 1.5 $roleI = $self->{RoleIndex}->{$roleI};
987 : overbeek 1.2 }
988 :     my $roles = $self->{Roles};
989 :     return $roles->[$roleI]->[1];
990 :     }
991 :    
992 :     sub get_reactions {
993 :     my($self) = @_;
994 :    
995 :     return $self->{Reactions};
996 :     }
997 :    
998 :     sub get_subset_namesC {
999 :     my($self) = @_;
1000 :    
1001 :     return map { $_->[0] } @{$self->{RoleSubsets}};
1002 :     }
1003 :    
1004 :     sub get_subsetC_roles {
1005 :     my($self,$subset) = @_;
1006 :     my($i,$j);
1007 :    
1008 :     my $subset_info = $self->{RoleSubsets};
1009 :     for ($i=0; ($i < @$subset_info) && ($subset_info->[$i]->[0] ne $subset); $i++) {}
1010 :     if ($i < @$subset_info)
1011 :     {
1012 : parrello 1.5 my @roles = ();
1013 :     foreach $j (@{$subset_info->[$i]->[1]})
1014 :     {
1015 :     push(@roles,$self->{Roles}->[$j]->[0]);
1016 :     }
1017 :     return @roles;
1018 : overbeek 1.2 }
1019 :     return undef;
1020 :     }
1021 :    
1022 : overbeek 1.4 sub get_color_of {
1023 :     my($self,$peg) = @_;
1024 :    
1025 :     return $self->{Colors}->{$peg};
1026 :     }
1027 :    
1028 : parrello 1.12 =head3 active_genomes
1029 :    
1030 :     C<< my $activeHash = UnvSubsys::active_genomes(\@row_subsets, $active_subsetR, $focus, \%genomeH, \@genomes_info); >>
1031 :    
1032 :     Return a hash containing the active genomes for this subsystem display. The
1033 :     keys of the hash will be the row indices of the genomes to be highlighted on the
1034 :     display. Each genome ID will map to 1. Thus, if C<< $activeHash->{3} >>
1035 :     tests TRUE, the fourth row should be highlighted.
1036 :    
1037 :     The rules for determining the active genomes are as follows. If I<$active_subsetR> is
1038 :     specified, it is presumed to be the ID of the subset containing the active genomes.
1039 :     If it is not specified and I<$focus> is specified, then I<$focus> is presumed to be the
1040 :     ID of the genome currently in focus, and the active genomes will be the ones in the
1041 :     smallest row subset containing the genome in focus. If neither I<$active_subsetR> nor
1042 :     I<$focus> are specified, then all genomes are active.
1043 :    
1044 :     =over 4
1045 :    
1046 :     =item row_subsets
1047 :    
1048 :     Reference to a list of 2-tuples. Each tuple consists of a row subset ID followed by
1049 :     a reference to a list of the row indices for the rows in the identified subset.
1050 :    
1051 :     =item active_subsetR (optional)
1052 :    
1053 :     ID of the active subset (if any).
1054 :    
1055 :     =item focus
1056 :    
1057 :     ID of the genome currently in focus (if any). If there is no active subset, then
1058 :     the smallest subset containing this genome will be made active.
1059 :    
1060 :     =item genomeH
1061 :    
1062 :     Reference to a hash of genome IDs to row indices. The keys of this hash are
1063 :     the genomes in this subsystem, which also form the subsystem spreadsheet's
1064 :     rows.
1065 :    
1066 :     =item genomes_info
1067 :    
1068 :     Reference to a list of 2-tuples. The first element of each 2-tuple is the
1069 :     ID of a genome; the second is the variant code for the subsystem variant
1070 :     used by the genome. The tuples are ordered by row index, so that the ID
1071 :     and variant code of the genome in a particular row can be located by indexing
1072 :     into this parameter using the subsystem spreadsheet row number.
1073 :    
1074 :     =item RETURN
1075 :    
1076 :     Returns a reference to a hash that maps the row indices of the active genomes
1077 :     to 1. This hash can be used to quickly determine whether or not a particular
1078 :     row is to be highlighted.
1079 :    
1080 :     =back
1081 :    
1082 :     =cut
1083 :    
1084 : overbeek 1.7 sub active_genomes {
1085 : parrello 1.12 # Get the parameters.
1086 :     my($fig, $row_subsets, $active_subsetR, $focus, $genomeH, $genomes_info) = @_;
1087 : overbeek 1.7 my($i,@bestL);
1088 : parrello 1.12 # Declare the return variable.
1089 : overbeek 1.7 my $active_genomes = {};
1090 : parrello 1.12 # Check for an active subset.
1091 : overbeek 1.7 if ($active_subsetR)
1092 :     {
1093 : parrello 1.12 # Search for the active subset in the row subset array.
1094 :     for ($i=0; ($i < @$row_subsets) && ($row_subsets->[$i]->[0] ne $active_subsetR); $i++) {}
1095 :     if ($i < @$row_subsets)
1096 :     {
1097 :     # Here we found the active subset, so we extract its list of row indices.
1098 :     @bestL = @{$row_subsets->[$i]->[1]};
1099 :     }
1100 :     else {
1101 :     # Here we have the ID of the active subset. First, we search for that ID
1102 :     # in the row subset list.
1103 :     for ($i=0; ($i < @$row_subsets) && ($row_subsets->[$i]->[0] ne $active_subsetR); $i++) {}
1104 :     if ($i < @$row_subsets)
1105 :     {
1106 :     # Here we found the named subset, so we return its member list.
1107 :     @bestL = @{$row_subsets->[$i]->[1]};
1108 :     }
1109 :     else
1110 :     {
1111 :     # Here the active subset does not exist. We punt by extracting a
1112 :     # list of all the row indices in the spreadsheet.
1113 :     $active_subsetR = 'All';
1114 :     @bestL = map { $genomeH->{$_} } keys(%$genomeH);
1115 :     }
1116 :     }
1117 : overbeek 1.7 }
1118 : parrello 1.12 elsif ($focus)
1119 : overbeek 1.7 {
1120 : parrello 1.12 # Here we don't have an active row subset, but a particular genome is in
1121 :     # focus. We'll look for the smallest row subset containing the genome
1122 :     # in focus. First, we need to prime the loop. "$bestN" will be the ID
1123 :     # of the best subset found so far; "@bestL" is where we stash the list
1124 :     # of IDs in the subset. Our initial selection, then, will be the
1125 :     # fake "All" subset, which contains the entire collection of rows.
1126 :     if (! $fig->is_complete($focus))
1127 :     {
1128 :     # Here the gnome in focus is incomplete, so it won't be anywhere
1129 :     # in our list. We default to making everything active.
1130 :     $active_subsetR = 'All';
1131 :     @bestL = map { $genomeH->{$_} } keys(%$genomeH);
1132 :     } else {
1133 :     my $bestN = "All";
1134 :     @bestL = map { $genomeH->{$_} } keys(%$genomeH);
1135 :     # Next, we get the row index for the genome in focus.
1136 :     my $focusIndex = $genomeH->{$focus};
1137 :     # Now we loop through all the row subsets.
1138 :     my $tuple;
1139 :     foreach $tuple (@$row_subsets)
1140 :     {
1141 :     # Extract the subset ID and its list of row indices. The latter is
1142 :     # in "$genomeIs".
1143 :     my($id,$genomeIs) = @$tuple;
1144 :     # Search for the index of the focus row in the current subset's list
1145 :     # of row indices.
1146 :     for ($i=0; ($i < @$genomeIs) && ($genomeIs->[$i] != $focusIndex); $i++) {}
1147 :     # Now either $i will be the index of the focus row in the subset, or
1148 :     # it is going to be equal to the number of rows in the subset.
1149 :     if ($i < @$genomeIs)
1150 :     {
1151 :     # We've found the focus row in this subset. Select it as the new
1152 :     # best subset if it's smaller than the last one we found.
1153 :     if (@$genomeIs < @bestL)
1154 :     {
1155 :     $bestN = $id;
1156 :     @bestL = @$genomeIs;
1157 :     }
1158 :     }
1159 :     }
1160 :     # Save the best subset found as the active one.
1161 :     $active_subsetR = $bestN;
1162 :     }
1163 :     } else {
1164 :     # Here we have nothing: no active subset, and no focus row. We make
1165 :     # all rows active.
1166 :     $active_subsetR = 'All';
1167 :     @bestL = map { $genomeH->{$_} } keys(%$genomeH);
1168 : overbeek 1.7 }
1169 : parrello 1.12 # "@bestL" now contains a list of the row indices for the active genomes.
1170 :     # We convert it to a hash and return it.
1171 : overbeek 1.7 my %active_genomes = map { $_ => 1 } @bestL;
1172 :     return \%active_genomes;
1173 :     }
1174 :    
1175 : olson 1.13 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3