[Bio] / FigKernelPackages / UnvSubsys.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/UnvSubsys.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.19 - (view) (download) (as text)

1 : olson 1.15 #
2 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 : parrello 1.19 #
7 : olson 1.15 # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 : parrello 1.19 # Public License.
10 : olson 1.15 #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 : overbeek 1.1 package UnvSubsys;
19 :    
20 : overbeek 1.2 use Subsystem;
21 : overbeek 1.1 use Carp;
22 :     use FIG;
23 : parrello 1.12 use SFXlate;
24 : overbeek 1.1 use Data::Dumper;
25 :     use strict;
26 : parrello 1.12 use Tracer;
27 : overbeek 1.1
28 : parrello 1.12 =head1 Universal Subsystem Object
29 :    
30 :     =head2 Introduction
31 :    
32 :     The universal subsystem object provides methods used to display useful information
33 :     about a subsystem. Its intent is to support both Sprout and SEED.
34 :    
35 :     The soul of a subsystem is its spreadsheet. The columns of the spreadsheet are
36 :     the subsystem roles. The rows are the genomes that contain the subsystem. PEGs
37 :     are stored in the spreadsheet's cells. Each genome can be identified by genome
38 :     ID or row index. Each role can be identified by role name or column index.
39 :    
40 :     It is worth noting that the term I<subset> has two meanings-- a subset of roles
41 :     or a subset of genomes. A subset of roles is called a I<column subset>. A
42 :     subset of genomes is called a I<row subset>.
43 :    
44 :     The object created contains a great deal of information, so it's worthwhile to
45 :     stop for a moment and discuss it. The object will have the following members.
46 :    
47 :     =over 4
48 :    
49 :     =item Roles
50 :    
51 :     A list of 3-tuples, each consisting of a role name, the abbreviated role name,
52 :     and a list of URLs for the reactions. Each role is a column in the
53 :     subsystem spreadsheet. Indexing into the list by column index yields the
54 :     ID, abbreviation, and reactions for the role in that column.
55 :    
56 :     =item RoleIndex
57 :    
58 :     A hash mapping each role to its column index in the spreadsheet.
59 :    
60 :     =item RoleSubsets
61 :    
62 :     A list of 2-tuples, each consisting of a subset name followed by a list of the
63 :     column indices for the roles in the subset.
64 :    
65 :     =item Genomes
66 :    
67 :     A list of 2-tuples, each containing the genome ID for the relevant row and the
68 :     variant code for that genome. Subsystems can have several variations, and the
69 :     variant code indicates which variant of the subsystem that the genome uses.
70 :     There is one genome for each row in the spreadsheet. Indexing into the list
71 :     yields the ID of the genome for that row and its variant code.
72 :    
73 :     =item GenomeIndex
74 :    
75 :     A hash mapping each genome ID to its row index in the spreadsheet.
76 :    
77 :     =item PegHash
78 :    
79 :     A hash of hashes containing the spreadsheet cells. If C<$pegHash> is the hash
80 :     of hashes, C<$row> is a genome index, and C<$col> is a role index, then
81 :    
82 :     $pegHash->{$row}->{$col}
83 :    
84 :     returns a reference to a list of the IDs for the PEGs in the relevant
85 :     spreadsheet cell.
86 :    
87 :     =item ColorHash
88 :    
89 :     A hash mapping each PEG ID to the color that should be used to represent that
90 :     PEG in the display.
91 :    
92 :     =item AliasHash
93 :    
94 :     A hash mapping each PEG ID to a list of its aliases.
95 :    
96 :     =item ReactionHash
97 :    
98 :     A hash mapping each role to the list of reactions it catalyzes.
99 :    
100 :     =back
101 :    
102 :     =head2 Public Methods
103 :    
104 :     =head3 new
105 :    
106 : parrello 1.19 my $usub = UnvSubsys->new($ssa, $fig, $active_subsetR, $focus, $show_clusters, $aliases, \@peg_colors);
107 : parrello 1.12
108 :     Construct a new universal subsystem object for a specified subsystem.
109 :    
110 :     =over 4
111 :    
112 :     =item ssa
113 :    
114 :     Name of the subsystem.
115 :    
116 :     =item fig
117 :    
118 :     Data access object, either a FIG object or an SFXlate object.
119 :    
120 :     =item active_subsetR
121 :    
122 :     Name of the active row subset. A row subset names a group of genomes.
123 :    
124 :     =item focus
125 :    
126 :     ID of the genome currently in focus.
127 :    
128 :     =item show_clusters
129 :    
130 :     TRUE if clusters should be painted by color, else FALSE.
131 :    
132 :     =item aliases
133 :    
134 :     TRUE if PEG aliases should be shown, else FALSE.
135 :    
136 :     =item peg_colors (optional)
137 : overbeek 1.1
138 : parrello 1.12 Reference to a list of 2-tuples, each 2-tuple consisting of a PEG ID and the color
139 :     to be assigned to the PEG.
140 : overbeek 1.1
141 : parrello 1.12 =back
142 : overbeek 1.1
143 : parrello 1.12 =cut
144 : overbeek 1.1
145 : parrello 1.12 sub new {
146 :     # Get the parameters.
147 :     my($class, $ssa, $fig, $active_subsetR, $focus, $show_clusters, $aliases, $peg_colors) = @_;
148 :     # Fix the subsystem name. Spaces are replaced by underscores to avoid naming problems
149 :     # in the seed's file system.
150 :     $ssa =~ s/ /_/g;
151 :     # Get the FIG object. At this point in time, we're only getting data from the SEED. On
152 :     # a future pass through the code, we'll get data from a SEED or Sprout.
153 : olson 1.18 if ((ref($fig) eq "FIG") || (ref($fig) eq "FIGV") || (ref($fig) eq 'SFXlate')) {
154 : parrello 1.12 # Create a subsystem object. The "get_subsystem" method is provided by both FIG
155 :     # and SFXlate; however, the object returned in each case is different: FIG
156 :     # returns a "Subsystem" object; SFXlate returns a "SproutSubsys" object.
157 :     my $subsystem = $fig->get_subsystem($ssa);
158 :     # Get the key subsystem data. Note that CRs in the notes are converted to LFs
159 :     # in case the notes come from a Mac.
160 : parrello 1.5 my $curator = $subsystem->get_curator;
161 :     my $notes = $subsystem->get_notes;
162 : parrello 1.12 $notes =~ s/\r/\n/g;
163 : parrello 1.5 my @roles = $subsystem->get_roles;
164 :     my $reactions = $subsystem->get_reactions;
165 :     my @genomes = $subsystem->get_genomes;
166 : overbeek 1.1 my @col_subsets = $subsystem->get_subset_namesC;
167 : parrello 1.14 my @diagrams = $subsystem->get_diagrams();
168 : parrello 1.12 # Create the data structures for the role list and the role index.
169 : parrello 1.5 my $role_info = [];
170 :     my $roleH = {};
171 : parrello 1.12 # Loop through the roles to create the role list. The $i index will move
172 :     # through the columns of the spreadsheet.
173 : parrello 1.5 my($i,$j,$subset,$peg);
174 :     for ($i=0; ($i < @roles); $i++)
175 :     {
176 : parrello 1.12 # Get the ID of the role in the current column.
177 : parrello 1.5 my $role = $roles[$i];
178 : parrello 1.12 # Extract its abbreviation.
179 :     my $abbrev = $subsystem->get_role_abbr($i);
180 :     # Get the reactions. Note that its possible no reactions were found. If there were
181 :     # reactions found, however, we convert from reaction IDs to a comma-delimited
182 :     # list of HTML code full of hyperlinks.
183 : parrello 1.5 my $react = $reactions ? join(",", map { &HTML::reaction_link($_) } @{$reactions->{$role}}) : [];
184 : parrello 1.12 # Form the role name, abbreviation, and reaction list into a 3-tuple and push
185 :     # them onto the main role list.
186 : parrello 1.5 push(@$role_info,[$role,$abbrev,$react]);
187 : parrello 1.12 # Set the role hash so that we can get back the column index for a given
188 :     # role name.
189 : parrello 1.5 $roleH->{$role} = $i;
190 :     }
191 : parrello 1.12 # Get the column subsets. A column subset is a list of role IDs, so we need to
192 :     # convert it to a set of column indices. We ignore the special "All" subset
193 :     # that contains everything.
194 : parrello 1.5 my $subset_info = [];
195 :     foreach $subset (@col_subsets)
196 :     {
197 :     if ($subset ne 'All')
198 :     {
199 :     push(@$subset_info,[$subset,[map { $roleH->{$_} } $subsystem->get_subsetC_roles($subset)]]);
200 :     }
201 :     }
202 : parrello 1.12 # Now we create the genome directories. For each genome we need to be able to
203 :     # hash from the genomeID to the specified role index and we must be able to
204 :     # get the genome's variant code. (The variant code indicates which subsystem
205 :     # variant the genome uses.
206 : parrello 1.5 my $genomes_info = [];
207 :     my $genomeH = {};
208 :     for ($i=0; ($i < @genomes); $i++)
209 :     {
210 : parrello 1.12 # Get the genome ID for this row.
211 : parrello 1.5 my $genome = $genomes[$i];
212 : parrello 1.19 # Get its variant code.
213 : parrello 1.12 my $variant = $subsystem->get_variant_code($i);
214 :     # Form them into a 2-tuple and add the result to the genome list.
215 : parrello 1.5 push(@$genomes_info,[$genome,$variant]);
216 : parrello 1.12 # Set up the hash to get from the genome ID to the row index.
217 : parrello 1.5 $genomeH->{$genome} = $i;
218 :     }
219 : parrello 1.19
220 : parrello 1.12 # Next we gather the data from the actual spreadsheet cells. For an SFXlate
221 :     # object, this is the most expensive part of the operation, since it requires
222 :     # a database call for each cell.
223 : parrello 1.5 my $pegH = {};
224 : parrello 1.12 # $i is the row index, which cycles through genomes.
225 : parrello 1.5 for ($i=0; ($i < @genomes); $i++)
226 :     {
227 : parrello 1.12 # $j is the column index, which cycles through roles.
228 : parrello 1.5 for ($j=0; ($j < @roles); $j++)
229 :     {
230 : parrello 1.12 my @pegs = $subsystem->get_pegs_from_cell($i,$j);
231 : parrello 1.5 $pegH->{$i}->{$j} = [@pegs];
232 :     }
233 :     }
234 : overbeek 1.7
235 : parrello 1.12 # Get the row subsets. Row subsets are determined by the genome
236 :     # taxonomy, so these are different from the row subsets stored
237 :     # in the subsystem object.
238 :     my $row_subsets = &row_subsets($fig, $genomeH);
239 :     # Here we try to get a list of the active genomes. The caller
240 :     # gives us hints in the form of the "$focus" and "$active_subsetR"
241 :     # parameters. If, for example, we are using this object to generate
242 :     # a subsystem web page, the focus information would steer us to
243 :     # whatever the user wants to look at on the page.
244 : olson 1.13 my $active_genomes = &active_genomes($fig, $row_subsets,$active_subsetR,$focus,
245 : parrello 1.12 $genomeH,$genomes_info);
246 :    
247 :     # Now we generate a table of colors for the various PEGs. If the
248 :     # caller gave us a map of peg IDs to colors, we use that. Otherwise,
249 :     # we allow the option of painting the PEGs by cluster number. (The
250 :     # caller indicates this by setting the "show_clusters" flag in the
251 :     # parameter list.)
252 :     my $colorsH;
253 :     if ($peg_colors)
254 :     {
255 :     # Here the caller gave us a list of peg colors. The list contains
256 :     # 2-tuples, each consisting of a PEG ID followed by the
257 :     # color value. The loop below extracts the pairs and stuffs them
258 :     # into the color hash.
259 :     $colorsH = {};
260 :     foreach $_ (@$peg_colors)
261 :     {
262 :     my($peg,$color) = @$_;
263 :     $colorsH->{$peg} = $color;
264 :     }
265 :     }
266 :     elsif ($show_clusters)
267 :     {
268 :     # Here the user wants us to base the colors on the genome clustering
269 :     # information.
270 :     $colorsH = &set_colors($fig,$subsystem,$pegH,$active_genomes);
271 :     }
272 :     else
273 :     {
274 :     # Here the user is not interested in coloring the PEGs.
275 :     $colorsH = {};
276 :     }
277 :     # If the user wants to see aliases, compute the alias hash. Aliases
278 :     # will only be computed for PEGs belonging to active (highlighted)
279 :     # genomes, and there is a maximum of one alias per PEG.
280 : overbeek 1.7 my $aliasesH = $aliases ? &set_aliases($fig,$pegH,$active_genomes) : {};
281 : parrello 1.12 # Create and bless the UnvSubsys object.
282 : paarmann 1.17 my $self = { SSA => $ssa,
283 :     Roles => $role_info,
284 : parrello 1.5 RoleIndex => $roleH,
285 :     RoleSubsets => $subset_info,
286 :     Genomes => $genomes_info,
287 :     GenomeIndex => $genomeH,
288 : parrello 1.12 GenomeSubsets => $row_subsets,
289 : parrello 1.5 PegHash => $pegH,
290 :     Colors => $colorsH,
291 :     Aliases => $aliasesH,
292 :     Curator => $curator,
293 :     Notes => $notes,
294 : parrello 1.14 Reactions => $reactions,
295 :     Diagrams => \@diagrams,
296 : parrello 1.5 };
297 :     bless($self, $class);
298 : parrello 1.12 # Return the object.
299 : parrello 1.5 return $self;
300 : overbeek 1.1 }
301 :     else
302 :     {
303 : parrello 1.12 # Here the FIG-like object was not recognized, so we return an
304 :     # undefined value.
305 : parrello 1.5 return undef;
306 : overbeek 1.1 }
307 :     }
308 :    
309 : paarmann 1.17 =head3 get_ssa
310 :    
311 : parrello 1.19 my $ssa = $unvsub->get_ssa();
312 : paarmann 1.17
313 :     Return the name of the subsystem
314 :    
315 :     =cut
316 :    
317 :     sub get_ssa {
318 :     my($self) = @_;
319 :     return $self->{SSA};
320 :     }
321 :    
322 :     =head3 get_ssa_pretty
323 :    
324 : parrello 1.19 my $ssa = $unvsub->get_ssa_pretty();
325 : paarmann 1.17
326 : parrello 1.19 Return the 'prettyfied' name of the subsystem
327 : paarmann 1.17
328 :     =cut
329 :    
330 :     sub get_ssa_pretty{
331 :     my($self) = @_;
332 :     my $ssa = $self->{SSA};
333 :     $ssa =~ s/_/ /g;
334 :     return $ssa;
335 :     }
336 :    
337 :    
338 : parrello 1.12 =head3 get_subset_namesR
339 :    
340 : parrello 1.19 my @names = $unvsub->get_subset_namesR();
341 : parrello 1.12
342 :     Return the names of the genome (row) subsets.
343 :    
344 :     =cut
345 :    
346 : overbeek 1.6 sub get_subset_namesR {
347 :     my($self) = @_;
348 :    
349 :     return map { $_->[0] } @{$self->{GenomeSubsets}};
350 :     }
351 :    
352 : parrello 1.12 =head3 get_subsetR
353 :    
354 : parrello 1.19 my @genomes = $unvsub->get_subsetR($set);
355 : parrello 1.12
356 :     Return a list of the genome IDs covered by a row subset.
357 :    
358 :     =over 4
359 :    
360 :     =item set
361 :    
362 :     Name of the row subset whose genomes are desired.
363 :    
364 :     =item RETURN
365 :    
366 :     Returns a list of the IDs for the genomes found in the specified row
367 :     set.
368 :    
369 :     =back
370 :    
371 :     =cut
372 :    
373 : overbeek 1.6 sub get_subsetR {
374 : parrello 1.12 # Get the parameters.
375 : overbeek 1.6 my($self,$set) = @_;
376 :     my($i);
377 : parrello 1.12 # Get the list of row subsets.
378 : overbeek 1.6 my $sets = $self->{GenomeSubsets};
379 : parrello 1.12 # Find the row subset with the specified name. The row subset list is a
380 :     # list of 2-tuples, and the first element of each tuple is the set
381 :     # name.
382 : overbeek 1.6 for ($i=0; ($i < @$sets) && ($sets->[$i]->[0] ne $set); $i++) {}
383 :     if ($i < @$sets)
384 :     {
385 : parrello 1.12 # Here we found the named subset. The subset tuple's second element is
386 :     # the list of row indices. We map these to genome IDs before returning
387 :     # them.
388 :     return map { $self->{Genomes}->[$_]->[0] } @{$sets->[$i]->[1]}
389 : overbeek 1.6 }
390 : parrello 1.12 # Here we subset was not found, so we return the undefined value.
391 : overbeek 1.6 return undef;
392 :     }
393 :    
394 : parrello 1.12 =head3 get_subsetR
395 :    
396 : parrello 1.19 my @pairs = $unvsub->get_subsetsR();
397 : parrello 1.12
398 :     Return a list of all the row subsets. The subsets are returned in the form
399 :     of 2-tuples, each consisting of a subset name followed by a reference to a
400 :     list of genome IDs. The genome IDs correspond to the rows in the subset.
401 :    
402 :     =cut
403 :    
404 : overbeek 1.6 sub get_subsetsR {
405 : parrello 1.12 # Get the parameters.
406 : overbeek 1.6 my($self) = @_;
407 : parrello 1.12 # Extract the list of genome subsets. This list is in the form of
408 :     # 2-tuples, but the rows are identified by row index, not genome ID.
409 : overbeek 1.6 my $sets = $self->{GenomeSubsets};
410 : parrello 1.12 # Create the return list.
411 : overbeek 1.6 my @pairs = ();
412 : parrello 1.12 # Loop through the subsets.
413 : overbeek 1.6 my $pair;
414 :     foreach $pair (@$sets)
415 :     {
416 : parrello 1.12 # Convert this subset's member list from row indices to genome IDs
417 :     # and stash the result in the return list.
418 :     my($id,$members) = @$pair;
419 :     push(@pairs,[$id,[map { $self->{Genomes}->[$_]->[0] } @$members]]);
420 : overbeek 1.6 }
421 : parrello 1.12 # Return the list constructed.
422 : overbeek 1.6 return @pairs;
423 :     }
424 :    
425 : parrello 1.12 =head3 row_subsets
426 :    
427 : parrello 1.19 my $subsetList = UnvSubsys::row_subsets($fig, \%genomeH);
428 : parrello 1.12
429 :     This method computes the taxonomic row subsets for a subsystem. It takes
430 :     as input a hash that maps genome IDs to column indices and a FIG object.
431 :     The FIG object provides a list of taxonomic groups of 10 or more complete
432 :     genomes. From the list, we extract subsets which have more than 10
433 :     genomes in the list of subsystem genomes. If no such subsets exist,
434 :     we extract subsets which have at least 1 genome from the list of
435 :     subsystem genomes. The subsets are returned as 2-tuples, the first
436 :     element being the subset ID and the second being a list of row indices
437 :     for the genomes in the subset.
438 :    
439 :     =over 4
440 :    
441 :     =item fig
442 :    
443 :     A FIG-like object for accessing the data store.
444 :    
445 :     =item genomeH
446 :    
447 :     Reference to a hash that maps each genome ID to its row index in the
448 :     subsystem spreadsheet.
449 :    
450 :     =item RETURN
451 :    
452 :     Returns a reference to a list of 2-tuples. Each 2-tuple consists of a
453 :     subset ID followed by a list of the row indices for the genomes in the
454 :     subset.
455 :    
456 :     =back
457 :    
458 :     =cut
459 :    
460 : overbeek 1.6 sub row_subsets {
461 : parrello 1.12 my ($fig, $genomeH) = @_;
462 : overbeek 1.6
463 : parrello 1.12 # We need a real FIG object, since SFXlate does not yet support
464 :     # taxonomy trees. The "FIG" method does this for us.
465 :     $fig = $fig->FIG();
466 :     # Initialize the return value.
467 : overbeek 1.6 my $subsets = [];
468 : parrello 1.12 # Get a list of taxonomic groups. This will come back as a list of
469 :     # 2-tuples.
470 : overbeek 1.11 my $taxonomic_groups = $fig->taxonomic_groups_of_complete(5);
471 : parrello 1.12 # Loop through the 2-tuples. We're looking for subsets which
472 :     # contain at least one genome on the subsystem's spreadsheet.
473 : overbeek 1.6 my($pair,$id,$members);
474 :     foreach $pair (@$taxonomic_groups)
475 :     {
476 :     ($id,$members) = @$pair;
477 : olson 1.18 # warn "Group $id is @$members\n";
478 :     # if ($id eq 'All')
479 :     # {
480 :     # push(@$members, '372461.6');
481 :     # }
482 : parrello 1.19
483 : parrello 1.12 # Extract the genomes in the member list that participate in this
484 :     # subsystem. To do this, we convert each genome ID to its row
485 :     # index. If no row index exists, the GREP condition discards the
486 :     # member.
487 :     my @mem = grep { defined($_) } map { $genomeH->{$_} } @$members;
488 :     # If there are enough members, save the subset.
489 :     if (@mem > 0)
490 :     {
491 :     push(@$subsets,[$id,[@mem]]);
492 :     }
493 : overbeek 1.6 }
494 : parrello 1.12 # Return the list of row subsets.
495 : overbeek 1.6 return $subsets;
496 :     }
497 :    
498 : parrello 1.12 =head3 set_aliases
499 :    
500 : parrello 1.19 my $aliasHash = UnvSubsys::set_aliases($fig, $pegH, $active_genomes);
501 : parrello 1.12
502 :     Return a hash mapping PEG IDs to aliases.
503 :    
504 :     =over 4
505 :    
506 :     =item fig
507 :    
508 :     FIG-like object that can be used to access the data store.
509 :    
510 :     =item pegH
511 :    
512 :     Reference to the spreadsheet hash table. Given a row index I<$row> and a
513 :     column index I<$col>,
514 :    
515 :     $pegH->{$row}->{$col}
516 :    
517 :     will return a reference to a list of PEGs in the specified spreadsheet cell.
518 :    
519 :     =item active_genomes
520 :    
521 :     Reference to a hash whose keys correspond to the spreadsheet row indices
522 :     of genomes that should be highlighted.
523 :    
524 :     =item RETURN
525 :    
526 :     Returns a hash that takes as input a PEG ID and returns an alias. Only PEGs
527 :     for active genomes will be processed.
528 :    
529 :     =back
530 :    
531 :     =cut
532 :    
533 : overbeek 1.4 sub set_aliases {
534 : parrello 1.12 # Get the parameters.
535 : overbeek 1.7 my($fig,$pegH,$active_genomes) = @_;
536 : overbeek 1.4 my($genomeI,$roleI,$pegs,$peg,$roleH);
537 :    
538 : parrello 1.12 # Create the return hash.
539 : overbeek 1.4 my $aliasesH = {};
540 :    
541 : parrello 1.12 # Loop through each row that corresponds to an active genome.
542 :     # The active genome list contains row indices, and there is
543 :     # one genome per row. Note that the genome ID is never used,
544 :     # only the row index.
545 : overbeek 1.7 foreach $genomeI (grep { $active_genomes->{$_} } keys(%$pegH))
546 : overbeek 1.4 {
547 : parrello 1.12 # Get the role hash for the specified genome. The role hash
548 :     # maps column indices (representing roles) to lists of PEGs.
549 : parrello 1.5 $roleH = $pegH->{$genomeI};
550 : parrello 1.12 # Loop through the role (column) indices.
551 : parrello 1.5 foreach $roleI (keys(%$roleH))
552 :     {
553 : parrello 1.12 # Get the PEG list for this row/column combination.
554 : parrello 1.5 $pegs = $roleH->{$roleI};
555 : parrello 1.12 # Only proceed if data was found in the cell.
556 :     if (defined $pegs) {
557 :     # Loop through the pegs in the cell.
558 :     foreach $peg (@$pegs)
559 : parrello 1.5 {
560 : parrello 1.12 # If we do not already have an alias for this PEG,
561 :     # compute one.
562 :     if (! $aliasesH->{$peg})
563 :     {
564 :     $aliasesH->{$peg} = scalar &ext_id($fig,$peg);
565 :     }
566 : parrello 1.5 }
567 :     }
568 :     }
569 : overbeek 1.4 }
570 : parrello 1.12 # Return the hash we built.
571 : overbeek 1.4 return $aliasesH;
572 :     }
573 :    
574 : parrello 1.12 =head3 set_colors
575 :    
576 : parrello 1.19 my $colorHash = UnvSubsys::set_colors($fig, $sub, \%pegH, \%active_genomes);
577 : parrello 1.12
578 :     Return a hash that maps each PEG in the subsystem spreadsheet to a display
579 :     color. Not all PEGs need to be mapped. Those that do not have a color
580 :     assigned will generally be displayed in white.
581 :    
582 :     =over 4
583 :    
584 :     =item fig
585 :    
586 :     FIG-like object that can be used to access the data store.
587 :    
588 :     =item sub
589 :    
590 :     Subsystem object for the current subsystem.
591 :    
592 :     =item pegH
593 :    
594 :     Reference to the spreadsheet hash table. Given a row index I<$row> and a
595 :     column index I<$col>,
596 :    
597 :     $pegH->{$row}->{$col}
598 :    
599 :     will return a reference to a list of PEGs in the specified spreadsheet cell.
600 :    
601 :     =item active_genomes
602 :    
603 :     Reference to a hash whose keys correspond to the spreadsheet row indices
604 :     of genomes that should be highlighted.
605 :    
606 :     =item RETURN
607 :    
608 :     Returns a hash that takes as input a PEG ID and returns a color. These colors
609 :     are used when displaying the PEGs in the subsystem spreadsheet. Only PEGs
610 :     for active genomes will be colored.
611 :    
612 :     =back
613 :    
614 :     =cut
615 :    
616 : overbeek 1.4 sub set_colors {
617 : parrello 1.12 # Get the parameters.
618 :     my($fig,$sub,$pegH,$active_genomes) = @_;
619 : parrello 1.19
620 : parrello 1.12 my($genomeI,$roleI,$pegs,$peg,$roleH,%pegs_in_genome);
621 :     # Create the return hash.
622 : overbeek 1.4 my $colorsH = {};
623 : parrello 1.12 # Loop through the active genomes. The keys of "%$pegH" are the row indices
624 :     # for rows that have at least one occupied cell in the spreadsheet. The
625 :     # Grep then reduces this list to those rows that are highlighted.
626 : overbeek 1.7 foreach $genomeI (grep { $active_genomes->{$_} } keys(%$pegH))
627 : overbeek 1.4 {
628 : parrello 1.12 # We will use the following hash to compile a list of all the PEGs
629 :     # in the spreadsheet for the current genome, that is, the genome
630 :     # represented by row index "$genomeI".
631 : parrello 1.5 undef %pegs_in_genome;
632 : parrello 1.12 # Get the hash for the current row. This hash maps column indices to
633 :     # lists of pegs.
634 : parrello 1.5 $roleH = $pegH->{$genomeI};
635 : parrello 1.12 # Loop through the column indices for the specified row.
636 : parrello 1.5 foreach $roleI (keys(%$roleH))
637 :     {
638 : parrello 1.12 # Here we can finally get a list of the pegs in this spreadsheet
639 :     # cell. We loop through them, marking them in the "%pegs_in_genome"
640 :     # hash.
641 : parrello 1.5 $pegs = $roleH->{$roleI};
642 :     foreach $peg (@$pegs)
643 :     {
644 :     $pegs_in_genome{$peg} = 1;
645 :     }
646 :     }
647 : parrello 1.12 # Extract the "%pegs_in_genome" hash keys. This gives us a duplicate-free
648 :     # list of all the pegs for the current spreadsheet role.
649 : parrello 1.5 my @pegs = keys(%pegs_in_genome);
650 :     my($tuple,$peg,$color);
651 : parrello 1.12 # Get a hash that maps the PEG IDs to colors.
652 :     my $colors_for_one_genome = &set_colors_for_genome($fig,$sub, \@pegs);
653 :     # Loop through the hash we got back and assign the colors from that
654 :     # hash to the master hash we're returning to the caller.
655 : parrello 1.5 while (($peg,$color) = each %$colors_for_one_genome)
656 :     {
657 :     $colorsH->{$peg} = $colors_for_one_genome->{$peg};
658 :     }
659 : overbeek 1.4 }
660 : parrello 1.12 # Return the color hash.
661 : overbeek 1.4 return $colorsH;
662 :     }
663 :    
664 : parrello 1.12 =head3 set_colors_for_genome
665 :    
666 : parrello 1.19 my $colorHash = UnvSubsys::set_colors_for_genome($fig, $sub, \@pegs);
667 : parrello 1.12
668 :     Return a reference to a hash mapping the specified pegs to colors. PEGs that
669 :     are physically close to each other will be painted the same color.
670 :    
671 :     =over 4
672 :    
673 :     =item fig
674 :    
675 :     A fig-like object that can be used to access the data store.
676 :    
677 :     =item sub
678 :    
679 :     Subsystem object for the relevant subsystem.
680 : overbeek 1.4
681 : parrello 1.12 =item pegs
682 : overbeek 1.4
683 : parrello 1.12 Reference to a list of PEG IDs. All of the peg IDs should be for the
684 :     same genome.
685 : overbeek 1.4
686 : parrello 1.12 =item RETURN
687 : overbeek 1.4
688 : parrello 1.12 Returns a reference to a hash that maps each PEG ID to a color.
689 : overbeek 1.4
690 : parrello 1.12 =back
691 : overbeek 1.4
692 : parrello 1.12 =cut
693 : overbeek 1.4
694 : parrello 1.12 sub set_colors_for_genome {
695 :     # Get the parameters.
696 :     my($fig, $sub, $pegs) = @_;
697 :     # Default all the PEGs to white.
698 :     my %color_of = map { $_ => '#FFFFFF' } @$pegs;
699 :     # Divide the pegs into clusters.
700 :     my @clusters = $fig->compute_clusters($pegs, $sub);
701 :     # Get a list of useful colors.
702 :     my @colors = &cool_colors();
703 :     # If we have too many clusters, chop off the big ones at the end. These
704 :     # are least likely to be important.
705 :     if (@clusters > @colors) { splice(@clusters, 0, (@clusters - @colors)) }
706 :     # Loop through the clusters.
707 :     for my $cluster (@clusters) {
708 :     # Get the color for this cluster.
709 :     my $color = shift @colors;
710 :     # Loop through this cluster, putting this color into the color_of
711 :     # entries for each PEG.
712 :     for my $peg (@$cluster) {
713 :     $color_of{$peg} = $color;
714 : parrello 1.5 }
715 : overbeek 1.4 }
716 : parrello 1.12 # Return the color map.
717 :     return \%color_of;
718 : overbeek 1.4 }
719 :    
720 : parrello 1.12 =head3 cool_colors
721 :    
722 : parrello 1.19 my @colorList = UnvSubsys::cool_colors();
723 : parrello 1.12
724 :     Return a list of web-safe colors.
725 :    
726 :     =cut
727 :    
728 : overbeek 1.4 sub cool_colors {
729 :     # just an array of "websafe" colors or whatever colors we want to use. Feel free to remove bad colors (hence the lines not being equal length!)
730 :     return (
731 :     '#C0C0C0', '#FF40C0', '#FF8040', '#FF0080', '#FFC040', '#40C0FF', '#40FFC0', '#C08080', '#C0FF00', '#00FF80', '#00C040',
732 :     "#6B8E23", "#483D8B", "#2E8B57", "#008000", "#006400", "#800000", "#00FF00", "#7FFFD4",
733 :     "#87CEEB", "#A9A9A9", "#90EE90", "#D2B48C", "#8DBC8F", "#D2691E", "#87CEFA", "#E9967A", "#FFE4C4", "#FFB6C1",
734 :     "#E0FFFF", "#FFA07A", "#DB7093", "#9370DB", "#008B8B", "#FFDEAD", "#DA70D6", "#DCDCDC", "#FF00FF", "#6A5ACD",
735 :     "#00FA9A", "#228B22", "#1E90FF", "#FA8072", "#CD853F", "#DC143C", "#FF6347", "#98FB98", "#4682B4",
736 :     "#D3D3D3", "#7B68EE", "#2F4F4F", "#FF7F50", "#FF69B4", "#BC8F8F", "#A0522D", "#DEB887", "#00DED1",
737 :     "#6495ED", "#800080", "#FFD700", "#F5DEB3", "#66CDAA", "#FF4500", "#4B0082", "#CD5C5C",
738 :     "#EE82EE", "#7CFC00", "#FFFF00", "#191970", "#FFFFE0", "#DDA0DD", "#00BFFF", "#DAA520", "#008080",
739 :     "#00FF7F", "#9400D3", "#BA55D3", "#D8BFD8", "#8B4513", "#3CB371", "#00008B", "#5F9EA0",
740 :     "#4169E1", "#20B2AA", "#8A2BE2", "#ADFF2F", "#556B2F",
741 :     "#F0FFFF", "#B0E0E6", "#FF1493", "#B8860B", "#FF0000", "#F08080", "#7FFF00", "#8B0000",
742 :     "#40E0D0", "#0000CD", "#48D1CC", "#8B008B", "#696969", "#AFEEEE", "#FF8C00", "#EEE8AA", "#A52A2A",
743 :     "#FFE4B5", "#B0C4DE", "#FAF0E6", "#9ACD32", "#B22222", "#FAFAD2", "#808080", "#0000FF",
744 :     "#000080", "#32CD32", "#FFFACD", "#9932CC", "#FFA500", "#F0E68C", "#E6E6FA", "#F4A460", "#C71585",
745 :     "#BDB76B", "#00FFFF", "#FFDAB9", "#ADD8E6", "#778899",
746 :     );
747 :     }
748 :    
749 : parrello 1.12 =head3 ext_id
750 :    
751 : parrello 1.19 my $externalID = UnvSubsys::ext_id($fig, $peg);
752 : parrello 1.12
753 :     or
754 :    
755 : parrello 1.19 my @externalIDs = UnvSubsys::ext_id($fig, $peg);
756 : parrello 1.12
757 :     Return a list of non-FIG IDs for the specified feature. In a scalar context, return
758 :     a single non-FIG ID for the specified feature.
759 :    
760 :     This method returns IDs that are all of the same type, that is, all UniProt IDs, or
761 :     all KEGG IDs, and so forth. To do this, it checks the feature's alias list for IDs
762 :     of a given type. If it finds at least one, then all IDs of that type are returned.
763 :     Highest priority is given to the UniProt IDs, then SP IDs, GI IDs, and finally
764 :     KEGG IDs.
765 :    
766 :     =over 4
767 :    
768 :     =item fig
769 :    
770 :     A FIG-like object for accessing the data store.
771 :    
772 :     =item peg
773 :    
774 :     ID of the feature whose aliases are desired.
775 :    
776 :     =item RETURN
777 :    
778 :     In list context, a list of non-FIG IDs for the feature that are all of the same
779 :     type. In scalar context, the first non-FIG ID for the feature of the
780 :     highest-priority available type.
781 :    
782 :     =back
783 :    
784 :     =cut
785 :    
786 : overbeek 1.4 sub ext_id {
787 :     my($fig,$peg) = @_;
788 :    
789 :     my @tmp;
790 :     my @aliases = $fig->feature_aliases($peg);
791 :     if ((@tmp = grep { $_ =~ /^uni\|/ } @aliases) > 0)
792 :     {
793 : parrello 1.5 @aliases = @tmp;
794 : overbeek 1.4 }
795 :     elsif ((@tmp = grep { $_ =~ /^sp\|/ } @aliases) > 0)
796 :     {
797 : parrello 1.5 @aliases = @tmp;
798 : overbeek 1.4 }
799 :     elsif ((@tmp = grep { $_ =~ /^gi\|/ } @aliases) > 0)
800 :     {
801 : parrello 1.5 @aliases = @tmp;
802 : overbeek 1.4 }
803 :     elsif ((@tmp = grep { $_ =~ /^kegg\|/ } @aliases) > 0)
804 :     {
805 : parrello 1.5 @aliases = @tmp;
806 : overbeek 1.4 }
807 :     else
808 :     {
809 : parrello 1.5 @aliases = ();
810 : overbeek 1.4 }
811 :    
812 :     if (wantarray())
813 :     {
814 : parrello 1.5 return @aliases;
815 : overbeek 1.4 }
816 :     else
817 :     {
818 : parrello 1.5 return $aliases[0];
819 : overbeek 1.4 }
820 :     }
821 :    
822 : parrello 1.12 =head3 subsystem_curator
823 :    
824 : parrello 1.19 my $name = $unvsub->subsystem_curator();
825 : parrello 1.12
826 :     Return the name of the subsystem curator. The database stores user names as
827 :     C<master:>I<name>. This method strips off the C<master:> prefix before it
828 :     passes the result back to the caller.
829 :    
830 :     =cut
831 : overbeek 1.4
832 : overbeek 1.2 sub subsystem_curator {
833 :     my($self) = @_;
834 :    
835 :     my $curator = $self->{Curator};
836 :     $curator =~ s/master://;
837 :     return $curator;
838 :     }
839 :    
840 : parrello 1.12 =head3 get_roles
841 :    
842 : parrello 1.19 my @roles = $unvsub->get_roles();
843 : parrello 1.12
844 :     Return a list of the roles (columns) for this subsystem. The roles will be
845 :     returned in column order, so that if you access the sixth element of the
846 :     return list, you'll get the name of the role for the sixth column.
847 :    
848 :     =cut
849 :    
850 : overbeek 1.2 sub get_roles {
851 :     my($self) = @_;
852 : parrello 1.12 # The role index in this object is a list of 3-tuples. The caller only
853 :     # wants the first element of each tuple, which is the role name.
854 : overbeek 1.2 return map { $_->[0] } @{$self->{Roles}};
855 :     }
856 :    
857 : parrello 1.12 =head3 get_genome_index
858 :    
859 : parrello 1.19 my $index = $unvsub->get_genome_index($genome);
860 : parrello 1.12
861 :     Return the row index of the specified genome.
862 :    
863 :     =over 4
864 :    
865 :     =item genome
866 :    
867 :     ID of the genome whose row index is desired.
868 :    
869 :     =item RETURN
870 :    
871 :     Returns the index of the row corresponding to the specified genome, or an
872 :     undefined value if the genome is not represented in the subsystem
873 :     spreadsheet.
874 :    
875 :     =back
876 :    
877 :     =cut
878 :    
879 : overbeek 1.3 sub get_genome_index {
880 :     my($self,$genome) = @_;
881 :    
882 :     return $self->{GenomeIndex}->{$genome};
883 :     }
884 :    
885 : parrello 1.12 =head3 get_genomes
886 :    
887 : parrello 1.19 my @genomes = $unvsub->get_genomes();
888 : parrello 1.12
889 :     Return a list of the genome IDs for the subsystem. The genomes will be
890 :     presented in row order. In other words, if you index into the sixth
891 :     element of the return list, you will retrieve the ID of the genome for
892 :     the sixth row.
893 :    
894 :     =cut
895 :    
896 : overbeek 1.3 sub get_genomes {
897 :     my($self) = @_;
898 : parrello 1.12 # The genome array is a list of 2-tuples. We extract the first
899 :     # element of each tuple, which is the genome ID.
900 : overbeek 1.3 return map { $_->[0] } @{$self->{Genomes}};
901 :     }
902 :    
903 : parrello 1.12 =head3 get_variant_code
904 :    
905 : parrello 1.19 my $code = $unvsub->get_variant_code($genome);
906 : parrello 1.12
907 :     Return the variant code for a genome. Each subsystem has several variations.
908 :     The variant code indicates which variation of a subsystem is used by a
909 :     particular genome.
910 :    
911 :     Genome data is stored in a list of 2-tuples. The first element is the genome
912 :     ID; the second is the variant code.
913 :    
914 :     =over 4
915 :    
916 :     =item genome
917 :    
918 :     ID or row index of the genome whose variant code is desired.
919 :    
920 :     =item RETURN
921 :    
922 :     Returns the variant code for the specified genome.
923 :    
924 :     =back
925 :    
926 :     =cut
927 :    
928 : overbeek 1.3 sub get_variant_code {
929 :     my($self,$genome) = @_;
930 : parrello 1.12 # Check to see if we have a row index.
931 : overbeek 1.3 if ($genome =~ /^\d+$/)
932 :     {
933 : parrello 1.12 # Here we have a row index, so use it to find the genome's variant
934 :     # code.
935 : parrello 1.5 return $self->{Genomes}->[$genome]->[1];
936 : overbeek 1.3 }
937 :     else
938 :     {
939 : parrello 1.12 # Here we have a genome ID, so we need to convert it to a row index.
940 : parrello 1.5 my $genomeI = $self->{GenomeIndex}->{$genome};
941 :     return $self->{Genomes}->[$genomeI]->[1];
942 : overbeek 1.3 }
943 :     }
944 :    
945 : parrello 1.12 =head3 get_pegs_from_cell
946 :    
947 : parrello 1.19 my @pegs = $unvsub->get_pegs_from_cell($genome, $role);
948 : parrello 1.12
949 :     Return a list of the features in a specified spreadsheet cell. The cell is specified
950 :     by genome ID and role ID.
951 :    
952 :     =over 4
953 :    
954 :     =item genome
955 :    
956 :     ID of the genome relevant to the cell.
957 :    
958 :     =item role
959 :    
960 :     ID of the role relevant to the cell.
961 :    
962 :     =item RETURN
963 :    
964 :     Returns a list of the features in the cell, or an empty list if the cell is empty.
965 :    
966 :     =back
967 :    
968 : parrello 1.19 =cut
969 : parrello 1.12
970 : overbeek 1.3 sub get_pegs_from_cell {
971 :     my($self,$genome,$role) = @_;
972 : parrello 1.12 # Convert the genome and role IDs to row and column indices.
973 : overbeek 1.3 my $genomeI = $self->{GenomeIndex}->{$genome};
974 :     my $roleI = $self->{RoleIndex}->{$role};
975 : parrello 1.12 # Get the pegs from the cell and return them.
976 : overbeek 1.3 my $pegs = $self->{PegHash}->{$genomeI}->{$roleI};
977 :     return $pegs ? @$pegs : ();
978 :     }
979 :    
980 : parrello 1.14 =head3 get_diagrams
981 :    
982 : parrello 1.19 my @list = $sub->get_diagrams();
983 : parrello 1.14
984 :     Return a list of the diagrams associated with this subsystem. Each diagram
985 :     is represented in the return list as a 4-tuple C<[diagram_id, diagram_name,
986 :     page_link, img_link]> where
987 :    
988 :     =over 4
989 :    
990 :     =item diagram_id
991 :    
992 :     ID code for this diagram.
993 :    
994 :     =item diagram_name
995 :    
996 :     Displayable name of the diagram.
997 :    
998 :     =item page_link
999 :    
1000 :     URL of an HTML page containing information about the diagram.
1001 :    
1002 :     =item img_link
1003 :    
1004 :     URL of an HTML page containing an image for the diagram.
1005 :    
1006 :     =back
1007 :    
1008 :     Note that the URLs are in fact for CGI scripts with parameters that point them
1009 :     to the correct place. Though Sprout has diagram information in it, it has
1010 :     no relationship to the diagrams displayed in SEED, so the work is done entirely
1011 :     on the SEED side.
1012 :    
1013 :     =cut
1014 :    
1015 :     sub get_diagrams {
1016 :     # Get the parameters.
1017 :     my ($self) = @_;
1018 :     # Return the diagram list.
1019 :     return @{$self->{Diagrams}};
1020 :     }
1021 :    
1022 : overbeek 1.3 sub get_notes {
1023 :     my($self) = @_;
1024 :    
1025 :     return $self->{Notes};
1026 :     }
1027 :    
1028 : overbeek 1.2 sub get_role_index {
1029 :     my($self,$role) = @_;
1030 :    
1031 :     return $self->{RoleIndex}->{$role};
1032 :     }
1033 :    
1034 :     sub get_role_abbr {
1035 :     my($self,$roleI) = @_;
1036 :    
1037 :     if ($roleI !~ /^\d+$/)
1038 :     {
1039 : parrello 1.5 $roleI = $self->{RoleIndex}->{$roleI};
1040 : overbeek 1.2 }
1041 :     my $roles = $self->{Roles};
1042 :     return $roles->[$roleI]->[1];
1043 :     }
1044 :    
1045 :     sub get_reactions {
1046 :     my($self) = @_;
1047 :    
1048 :     return $self->{Reactions};
1049 :     }
1050 :    
1051 :     sub get_subset_namesC {
1052 :     my($self) = @_;
1053 :    
1054 :     return map { $_->[0] } @{$self->{RoleSubsets}};
1055 :     }
1056 :    
1057 :     sub get_subsetC_roles {
1058 :     my($self,$subset) = @_;
1059 :     my($i,$j);
1060 :    
1061 :     my $subset_info = $self->{RoleSubsets};
1062 :     for ($i=0; ($i < @$subset_info) && ($subset_info->[$i]->[0] ne $subset); $i++) {}
1063 :     if ($i < @$subset_info)
1064 :     {
1065 : parrello 1.5 my @roles = ();
1066 :     foreach $j (@{$subset_info->[$i]->[1]})
1067 :     {
1068 :     push(@roles,$self->{Roles}->[$j]->[0]);
1069 :     }
1070 :     return @roles;
1071 : overbeek 1.2 }
1072 :     return undef;
1073 :     }
1074 :    
1075 : overbeek 1.4 sub get_color_of {
1076 :     my($self,$peg) = @_;
1077 :    
1078 :     return $self->{Colors}->{$peg};
1079 :     }
1080 :    
1081 : parrello 1.12 =head3 active_genomes
1082 :    
1083 : parrello 1.19 my $activeHash = UnvSubsys::active_genomes(\@row_subsets, $active_subsetR, $focus, \%genomeH, \@genomes_info);
1084 : parrello 1.12
1085 :     Return a hash containing the active genomes for this subsystem display. The
1086 :     keys of the hash will be the row indices of the genomes to be highlighted on the
1087 :     display. Each genome ID will map to 1. Thus, if C<< $activeHash->{3} >>
1088 :     tests TRUE, the fourth row should be highlighted.
1089 :    
1090 :     The rules for determining the active genomes are as follows. If I<$active_subsetR> is
1091 :     specified, it is presumed to be the ID of the subset containing the active genomes.
1092 :     If it is not specified and I<$focus> is specified, then I<$focus> is presumed to be the
1093 :     ID of the genome currently in focus, and the active genomes will be the ones in the
1094 :     smallest row subset containing the genome in focus. If neither I<$active_subsetR> nor
1095 :     I<$focus> are specified, then all genomes are active.
1096 :    
1097 :     =over 4
1098 :    
1099 :     =item row_subsets
1100 :    
1101 :     Reference to a list of 2-tuples. Each tuple consists of a row subset ID followed by
1102 :     a reference to a list of the row indices for the rows in the identified subset.
1103 :    
1104 :     =item active_subsetR (optional)
1105 :    
1106 :     ID of the active subset (if any).
1107 :    
1108 :     =item focus
1109 :    
1110 :     ID of the genome currently in focus (if any). If there is no active subset, then
1111 :     the smallest subset containing this genome will be made active.
1112 :    
1113 :     =item genomeH
1114 :    
1115 :     Reference to a hash of genome IDs to row indices. The keys of this hash are
1116 :     the genomes in this subsystem, which also form the subsystem spreadsheet's
1117 :     rows.
1118 :    
1119 :     =item genomes_info
1120 :    
1121 :     Reference to a list of 2-tuples. The first element of each 2-tuple is the
1122 :     ID of a genome; the second is the variant code for the subsystem variant
1123 :     used by the genome. The tuples are ordered by row index, so that the ID
1124 :     and variant code of the genome in a particular row can be located by indexing
1125 :     into this parameter using the subsystem spreadsheet row number.
1126 :    
1127 :     =item RETURN
1128 :    
1129 :     Returns a reference to a hash that maps the row indices of the active genomes
1130 :     to 1. This hash can be used to quickly determine whether or not a particular
1131 :     row is to be highlighted.
1132 :    
1133 :     =back
1134 :    
1135 :     =cut
1136 :    
1137 : overbeek 1.7 sub active_genomes {
1138 : parrello 1.12 # Get the parameters.
1139 :     my($fig, $row_subsets, $active_subsetR, $focus, $genomeH, $genomes_info) = @_;
1140 : overbeek 1.7 my($i,@bestL);
1141 : parrello 1.12 # Declare the return variable.
1142 : overbeek 1.7 my $active_genomes = {};
1143 : parrello 1.12 # Check for an active subset.
1144 : overbeek 1.7 if ($active_subsetR)
1145 :     {
1146 : parrello 1.12 # Search for the active subset in the row subset array.
1147 :     for ($i=0; ($i < @$row_subsets) && ($row_subsets->[$i]->[0] ne $active_subsetR); $i++) {}
1148 :     if ($i < @$row_subsets)
1149 :     {
1150 :     # Here we found the active subset, so we extract its list of row indices.
1151 :     @bestL = @{$row_subsets->[$i]->[1]};
1152 :     }
1153 :     else {
1154 :     # Here we have the ID of the active subset. First, we search for that ID
1155 :     # in the row subset list.
1156 :     for ($i=0; ($i < @$row_subsets) && ($row_subsets->[$i]->[0] ne $active_subsetR); $i++) {}
1157 :     if ($i < @$row_subsets)
1158 :     {
1159 :     # Here we found the named subset, so we return its member list.
1160 :     @bestL = @{$row_subsets->[$i]->[1]};
1161 :     }
1162 :     else
1163 :     {
1164 :     # Here the active subset does not exist. We punt by extracting a
1165 :     # list of all the row indices in the spreadsheet.
1166 :     $active_subsetR = 'All';
1167 :     @bestL = map { $genomeH->{$_} } keys(%$genomeH);
1168 :     }
1169 :     }
1170 : overbeek 1.7 }
1171 : parrello 1.12 elsif ($focus)
1172 : overbeek 1.7 {
1173 : parrello 1.12 # Here we don't have an active row subset, but a particular genome is in
1174 :     # focus. We'll look for the smallest row subset containing the genome
1175 :     # in focus. First, we need to prime the loop. "$bestN" will be the ID
1176 :     # of the best subset found so far; "@bestL" is where we stash the list
1177 :     # of IDs in the subset. Our initial selection, then, will be the
1178 :     # fake "All" subset, which contains the entire collection of rows.
1179 : overbeek 1.16
1180 : parrello 1.12 if (! $fig->is_complete($focus))
1181 :     {
1182 :     # Here the gnome in focus is incomplete, so it won't be anywhere
1183 :     # in our list. We default to making everything active.
1184 :     $active_subsetR = 'All';
1185 :     @bestL = map { $genomeH->{$_} } keys(%$genomeH);
1186 :     } else {
1187 :     my $bestN = "All";
1188 :     @bestL = map { $genomeH->{$_} } keys(%$genomeH);
1189 :     # Next, we get the row index for the genome in focus.
1190 :     my $focusIndex = $genomeH->{$focus};
1191 :     # Now we loop through all the row subsets.
1192 :     my $tuple;
1193 :     foreach $tuple (@$row_subsets)
1194 :     {
1195 :     # Extract the subset ID and its list of row indices. The latter is
1196 :     # in "$genomeIs".
1197 :     my($id,$genomeIs) = @$tuple;
1198 :     # Search for the index of the focus row in the current subset's list
1199 :     # of row indices.
1200 :     for ($i=0; ($i < @$genomeIs) && ($genomeIs->[$i] != $focusIndex); $i++) {}
1201 :     # Now either $i will be the index of the focus row in the subset, or
1202 :     # it is going to be equal to the number of rows in the subset.
1203 :     if ($i < @$genomeIs)
1204 :     {
1205 :     # We've found the focus row in this subset. Select it as the new
1206 :     # best subset if it's smaller than the last one we found.
1207 :     if (@$genomeIs < @bestL)
1208 :     {
1209 :     $bestN = $id;
1210 :     @bestL = @$genomeIs;
1211 :     }
1212 :     }
1213 :     }
1214 :     # Save the best subset found as the active one.
1215 :     $active_subsetR = $bestN;
1216 :     }
1217 :     } else {
1218 :     # Here we have nothing: no active subset, and no focus row. We make
1219 :     # all rows active.
1220 :     $active_subsetR = 'All';
1221 :     @bestL = map { $genomeH->{$_} } keys(%$genomeH);
1222 : overbeek 1.7 }
1223 : parrello 1.12 # "@bestL" now contains a list of the row indices for the active genomes.
1224 :     # We convert it to a hash and return it.
1225 : overbeek 1.7 my %active_genomes = map { $_ => 1 } @bestL;
1226 :     return \%active_genomes;
1227 :     }
1228 :    
1229 : olson 1.13 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3