[Bio] / FigKernelPackages / UnvSubsys.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/UnvSubsys.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.20 - (view) (download) (as text)

1 : olson 1.15 #
2 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 : parrello 1.19 #
7 : olson 1.15 # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 : parrello 1.19 # Public License.
10 : olson 1.15 #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 : overbeek 1.1 package UnvSubsys;
19 :    
20 : overbeek 1.2 use Subsystem;
21 : overbeek 1.1 use Carp;
22 :     use FIG;
23 : parrello 1.12 use SFXlate;
24 : overbeek 1.1 use Data::Dumper;
25 :     use strict;
26 : parrello 1.12 use Tracer;
27 : paczian 1.20 use HTML;
28 : overbeek 1.1
29 : parrello 1.12 =head1 Universal Subsystem Object
30 :    
31 :     =head2 Introduction
32 :    
33 :     The universal subsystem object provides methods used to display useful information
34 :     about a subsystem. Its intent is to support both Sprout and SEED.
35 :    
36 :     The soul of a subsystem is its spreadsheet. The columns of the spreadsheet are
37 :     the subsystem roles. The rows are the genomes that contain the subsystem. PEGs
38 :     are stored in the spreadsheet's cells. Each genome can be identified by genome
39 :     ID or row index. Each role can be identified by role name or column index.
40 :    
41 :     It is worth noting that the term I<subset> has two meanings-- a subset of roles
42 :     or a subset of genomes. A subset of roles is called a I<column subset>. A
43 :     subset of genomes is called a I<row subset>.
44 :    
45 :     The object created contains a great deal of information, so it's worthwhile to
46 :     stop for a moment and discuss it. The object will have the following members.
47 :    
48 :     =over 4
49 :    
50 :     =item Roles
51 :    
52 :     A list of 3-tuples, each consisting of a role name, the abbreviated role name,
53 :     and a list of URLs for the reactions. Each role is a column in the
54 :     subsystem spreadsheet. Indexing into the list by column index yields the
55 :     ID, abbreviation, and reactions for the role in that column.
56 :    
57 :     =item RoleIndex
58 :    
59 :     A hash mapping each role to its column index in the spreadsheet.
60 :    
61 :     =item RoleSubsets
62 :    
63 :     A list of 2-tuples, each consisting of a subset name followed by a list of the
64 :     column indices for the roles in the subset.
65 :    
66 :     =item Genomes
67 :    
68 :     A list of 2-tuples, each containing the genome ID for the relevant row and the
69 :     variant code for that genome. Subsystems can have several variations, and the
70 :     variant code indicates which variant of the subsystem that the genome uses.
71 :     There is one genome for each row in the spreadsheet. Indexing into the list
72 :     yields the ID of the genome for that row and its variant code.
73 :    
74 :     =item GenomeIndex
75 :    
76 :     A hash mapping each genome ID to its row index in the spreadsheet.
77 :    
78 :     =item PegHash
79 :    
80 :     A hash of hashes containing the spreadsheet cells. If C<$pegHash> is the hash
81 :     of hashes, C<$row> is a genome index, and C<$col> is a role index, then
82 :    
83 :     $pegHash->{$row}->{$col}
84 :    
85 :     returns a reference to a list of the IDs for the PEGs in the relevant
86 :     spreadsheet cell.
87 :    
88 :     =item ColorHash
89 :    
90 :     A hash mapping each PEG ID to the color that should be used to represent that
91 :     PEG in the display.
92 :    
93 :     =item AliasHash
94 :    
95 :     A hash mapping each PEG ID to a list of its aliases.
96 :    
97 :     =item ReactionHash
98 :    
99 :     A hash mapping each role to the list of reactions it catalyzes.
100 :    
101 :     =back
102 :    
103 :     =head2 Public Methods
104 :    
105 :     =head3 new
106 :    
107 : parrello 1.19 my $usub = UnvSubsys->new($ssa, $fig, $active_subsetR, $focus, $show_clusters, $aliases, \@peg_colors);
108 : parrello 1.12
109 :     Construct a new universal subsystem object for a specified subsystem.
110 :    
111 :     =over 4
112 :    
113 :     =item ssa
114 :    
115 :     Name of the subsystem.
116 :    
117 :     =item fig
118 :    
119 :     Data access object, either a FIG object or an SFXlate object.
120 :    
121 :     =item active_subsetR
122 :    
123 :     Name of the active row subset. A row subset names a group of genomes.
124 :    
125 :     =item focus
126 :    
127 :     ID of the genome currently in focus.
128 :    
129 :     =item show_clusters
130 :    
131 :     TRUE if clusters should be painted by color, else FALSE.
132 :    
133 :     =item aliases
134 :    
135 :     TRUE if PEG aliases should be shown, else FALSE.
136 :    
137 :     =item peg_colors (optional)
138 : overbeek 1.1
139 : parrello 1.12 Reference to a list of 2-tuples, each 2-tuple consisting of a PEG ID and the color
140 :     to be assigned to the PEG.
141 : overbeek 1.1
142 : parrello 1.12 =back
143 : overbeek 1.1
144 : parrello 1.12 =cut
145 : overbeek 1.1
146 : parrello 1.12 sub new {
147 :     # Get the parameters.
148 :     my($class, $ssa, $fig, $active_subsetR, $focus, $show_clusters, $aliases, $peg_colors) = @_;
149 :     # Fix the subsystem name. Spaces are replaced by underscores to avoid naming problems
150 :     # in the seed's file system.
151 :     $ssa =~ s/ /_/g;
152 :     # Get the FIG object. At this point in time, we're only getting data from the SEED. On
153 :     # a future pass through the code, we'll get data from a SEED or Sprout.
154 : olson 1.18 if ((ref($fig) eq "FIG") || (ref($fig) eq "FIGV") || (ref($fig) eq 'SFXlate')) {
155 : parrello 1.12 # Create a subsystem object. The "get_subsystem" method is provided by both FIG
156 :     # and SFXlate; however, the object returned in each case is different: FIG
157 :     # returns a "Subsystem" object; SFXlate returns a "SproutSubsys" object.
158 :     my $subsystem = $fig->get_subsystem($ssa);
159 :     # Get the key subsystem data. Note that CRs in the notes are converted to LFs
160 :     # in case the notes come from a Mac.
161 : parrello 1.5 my $curator = $subsystem->get_curator;
162 :     my $notes = $subsystem->get_notes;
163 : parrello 1.12 $notes =~ s/\r/\n/g;
164 : parrello 1.5 my @roles = $subsystem->get_roles;
165 :     my $reactions = $subsystem->get_reactions;
166 :     my @genomes = $subsystem->get_genomes;
167 : overbeek 1.1 my @col_subsets = $subsystem->get_subset_namesC;
168 : parrello 1.14 my @diagrams = $subsystem->get_diagrams();
169 : parrello 1.12 # Create the data structures for the role list and the role index.
170 : parrello 1.5 my $role_info = [];
171 :     my $roleH = {};
172 : parrello 1.12 # Loop through the roles to create the role list. The $i index will move
173 :     # through the columns of the spreadsheet.
174 : parrello 1.5 my($i,$j,$subset,$peg);
175 :     for ($i=0; ($i < @roles); $i++)
176 :     {
177 : parrello 1.12 # Get the ID of the role in the current column.
178 : parrello 1.5 my $role = $roles[$i];
179 : parrello 1.12 # Extract its abbreviation.
180 :     my $abbrev = $subsystem->get_role_abbr($i);
181 :     # Get the reactions. Note that its possible no reactions were found. If there were
182 :     # reactions found, however, we convert from reaction IDs to a comma-delimited
183 :     # list of HTML code full of hyperlinks.
184 : parrello 1.5 my $react = $reactions ? join(",", map { &HTML::reaction_link($_) } @{$reactions->{$role}}) : [];
185 : parrello 1.12 # Form the role name, abbreviation, and reaction list into a 3-tuple and push
186 :     # them onto the main role list.
187 : parrello 1.5 push(@$role_info,[$role,$abbrev,$react]);
188 : parrello 1.12 # Set the role hash so that we can get back the column index for a given
189 :     # role name.
190 : parrello 1.5 $roleH->{$role} = $i;
191 :     }
192 : parrello 1.12 # Get the column subsets. A column subset is a list of role IDs, so we need to
193 :     # convert it to a set of column indices. We ignore the special "All" subset
194 :     # that contains everything.
195 : parrello 1.5 my $subset_info = [];
196 :     foreach $subset (@col_subsets)
197 :     {
198 :     if ($subset ne 'All')
199 :     {
200 :     push(@$subset_info,[$subset,[map { $roleH->{$_} } $subsystem->get_subsetC_roles($subset)]]);
201 :     }
202 :     }
203 : parrello 1.12 # Now we create the genome directories. For each genome we need to be able to
204 :     # hash from the genomeID to the specified role index and we must be able to
205 :     # get the genome's variant code. (The variant code indicates which subsystem
206 :     # variant the genome uses.
207 : parrello 1.5 my $genomes_info = [];
208 :     my $genomeH = {};
209 :     for ($i=0; ($i < @genomes); $i++)
210 :     {
211 : parrello 1.12 # Get the genome ID for this row.
212 : parrello 1.5 my $genome = $genomes[$i];
213 : parrello 1.19 # Get its variant code.
214 : parrello 1.12 my $variant = $subsystem->get_variant_code($i);
215 :     # Form them into a 2-tuple and add the result to the genome list.
216 : parrello 1.5 push(@$genomes_info,[$genome,$variant]);
217 : parrello 1.12 # Set up the hash to get from the genome ID to the row index.
218 : parrello 1.5 $genomeH->{$genome} = $i;
219 :     }
220 : parrello 1.19
221 : parrello 1.12 # Next we gather the data from the actual spreadsheet cells. For an SFXlate
222 :     # object, this is the most expensive part of the operation, since it requires
223 :     # a database call for each cell.
224 : parrello 1.5 my $pegH = {};
225 : parrello 1.12 # $i is the row index, which cycles through genomes.
226 : parrello 1.5 for ($i=0; ($i < @genomes); $i++)
227 :     {
228 : parrello 1.12 # $j is the column index, which cycles through roles.
229 : parrello 1.5 for ($j=0; ($j < @roles); $j++)
230 :     {
231 : parrello 1.12 my @pegs = $subsystem->get_pegs_from_cell($i,$j);
232 : parrello 1.5 $pegH->{$i}->{$j} = [@pegs];
233 :     }
234 :     }
235 : overbeek 1.7
236 : parrello 1.12 # Get the row subsets. Row subsets are determined by the genome
237 :     # taxonomy, so these are different from the row subsets stored
238 :     # in the subsystem object.
239 :     my $row_subsets = &row_subsets($fig, $genomeH);
240 :     # Here we try to get a list of the active genomes. The caller
241 :     # gives us hints in the form of the "$focus" and "$active_subsetR"
242 :     # parameters. If, for example, we are using this object to generate
243 :     # a subsystem web page, the focus information would steer us to
244 :     # whatever the user wants to look at on the page.
245 : olson 1.13 my $active_genomes = &active_genomes($fig, $row_subsets,$active_subsetR,$focus,
246 : parrello 1.12 $genomeH,$genomes_info);
247 :    
248 :     # Now we generate a table of colors for the various PEGs. If the
249 :     # caller gave us a map of peg IDs to colors, we use that. Otherwise,
250 :     # we allow the option of painting the PEGs by cluster number. (The
251 :     # caller indicates this by setting the "show_clusters" flag in the
252 :     # parameter list.)
253 :     my $colorsH;
254 :     if ($peg_colors)
255 :     {
256 :     # Here the caller gave us a list of peg colors. The list contains
257 :     # 2-tuples, each consisting of a PEG ID followed by the
258 :     # color value. The loop below extracts the pairs and stuffs them
259 :     # into the color hash.
260 :     $colorsH = {};
261 :     foreach $_ (@$peg_colors)
262 :     {
263 :     my($peg,$color) = @$_;
264 :     $colorsH->{$peg} = $color;
265 :     }
266 :     }
267 :     elsif ($show_clusters)
268 :     {
269 :     # Here the user wants us to base the colors on the genome clustering
270 :     # information.
271 :     $colorsH = &set_colors($fig,$subsystem,$pegH,$active_genomes);
272 :     }
273 :     else
274 :     {
275 :     # Here the user is not interested in coloring the PEGs.
276 :     $colorsH = {};
277 :     }
278 :     # If the user wants to see aliases, compute the alias hash. Aliases
279 :     # will only be computed for PEGs belonging to active (highlighted)
280 :     # genomes, and there is a maximum of one alias per PEG.
281 : overbeek 1.7 my $aliasesH = $aliases ? &set_aliases($fig,$pegH,$active_genomes) : {};
282 : parrello 1.12 # Create and bless the UnvSubsys object.
283 : paarmann 1.17 my $self = { SSA => $ssa,
284 :     Roles => $role_info,
285 : parrello 1.5 RoleIndex => $roleH,
286 :     RoleSubsets => $subset_info,
287 :     Genomes => $genomes_info,
288 :     GenomeIndex => $genomeH,
289 : parrello 1.12 GenomeSubsets => $row_subsets,
290 : parrello 1.5 PegHash => $pegH,
291 :     Colors => $colorsH,
292 :     Aliases => $aliasesH,
293 :     Curator => $curator,
294 :     Notes => $notes,
295 : parrello 1.14 Reactions => $reactions,
296 :     Diagrams => \@diagrams,
297 : parrello 1.5 };
298 :     bless($self, $class);
299 : parrello 1.12 # Return the object.
300 : parrello 1.5 return $self;
301 : overbeek 1.1 }
302 :     else
303 :     {
304 : parrello 1.12 # Here the FIG-like object was not recognized, so we return an
305 :     # undefined value.
306 : parrello 1.5 return undef;
307 : overbeek 1.1 }
308 :     }
309 :    
310 : paarmann 1.17 =head3 get_ssa
311 :    
312 : parrello 1.19 my $ssa = $unvsub->get_ssa();
313 : paarmann 1.17
314 :     Return the name of the subsystem
315 :    
316 :     =cut
317 :    
318 :     sub get_ssa {
319 :     my($self) = @_;
320 :     return $self->{SSA};
321 :     }
322 :    
323 :     =head3 get_ssa_pretty
324 :    
325 : parrello 1.19 my $ssa = $unvsub->get_ssa_pretty();
326 : paarmann 1.17
327 : parrello 1.19 Return the 'prettyfied' name of the subsystem
328 : paarmann 1.17
329 :     =cut
330 :    
331 :     sub get_ssa_pretty{
332 :     my($self) = @_;
333 :     my $ssa = $self->{SSA};
334 :     $ssa =~ s/_/ /g;
335 :     return $ssa;
336 :     }
337 :    
338 :    
339 : parrello 1.12 =head3 get_subset_namesR
340 :    
341 : parrello 1.19 my @names = $unvsub->get_subset_namesR();
342 : parrello 1.12
343 :     Return the names of the genome (row) subsets.
344 :    
345 :     =cut
346 :    
347 : overbeek 1.6 sub get_subset_namesR {
348 :     my($self) = @_;
349 :    
350 :     return map { $_->[0] } @{$self->{GenomeSubsets}};
351 :     }
352 :    
353 : parrello 1.12 =head3 get_subsetR
354 :    
355 : parrello 1.19 my @genomes = $unvsub->get_subsetR($set);
356 : parrello 1.12
357 :     Return a list of the genome IDs covered by a row subset.
358 :    
359 :     =over 4
360 :    
361 :     =item set
362 :    
363 :     Name of the row subset whose genomes are desired.
364 :    
365 :     =item RETURN
366 :    
367 :     Returns a list of the IDs for the genomes found in the specified row
368 :     set.
369 :    
370 :     =back
371 :    
372 :     =cut
373 :    
374 : overbeek 1.6 sub get_subsetR {
375 : parrello 1.12 # Get the parameters.
376 : overbeek 1.6 my($self,$set) = @_;
377 :     my($i);
378 : parrello 1.12 # Get the list of row subsets.
379 : overbeek 1.6 my $sets = $self->{GenomeSubsets};
380 : parrello 1.12 # Find the row subset with the specified name. The row subset list is a
381 :     # list of 2-tuples, and the first element of each tuple is the set
382 :     # name.
383 : overbeek 1.6 for ($i=0; ($i < @$sets) && ($sets->[$i]->[0] ne $set); $i++) {}
384 :     if ($i < @$sets)
385 :     {
386 : parrello 1.12 # Here we found the named subset. The subset tuple's second element is
387 :     # the list of row indices. We map these to genome IDs before returning
388 :     # them.
389 :     return map { $self->{Genomes}->[$_]->[0] } @{$sets->[$i]->[1]}
390 : overbeek 1.6 }
391 : parrello 1.12 # Here we subset was not found, so we return the undefined value.
392 : overbeek 1.6 return undef;
393 :     }
394 :    
395 : parrello 1.12 =head3 get_subsetR
396 :    
397 : parrello 1.19 my @pairs = $unvsub->get_subsetsR();
398 : parrello 1.12
399 :     Return a list of all the row subsets. The subsets are returned in the form
400 :     of 2-tuples, each consisting of a subset name followed by a reference to a
401 :     list of genome IDs. The genome IDs correspond to the rows in the subset.
402 :    
403 :     =cut
404 :    
405 : overbeek 1.6 sub get_subsetsR {
406 : parrello 1.12 # Get the parameters.
407 : overbeek 1.6 my($self) = @_;
408 : parrello 1.12 # Extract the list of genome subsets. This list is in the form of
409 :     # 2-tuples, but the rows are identified by row index, not genome ID.
410 : overbeek 1.6 my $sets = $self->{GenomeSubsets};
411 : parrello 1.12 # Create the return list.
412 : overbeek 1.6 my @pairs = ();
413 : parrello 1.12 # Loop through the subsets.
414 : overbeek 1.6 my $pair;
415 :     foreach $pair (@$sets)
416 :     {
417 : parrello 1.12 # Convert this subset's member list from row indices to genome IDs
418 :     # and stash the result in the return list.
419 :     my($id,$members) = @$pair;
420 :     push(@pairs,[$id,[map { $self->{Genomes}->[$_]->[0] } @$members]]);
421 : overbeek 1.6 }
422 : parrello 1.12 # Return the list constructed.
423 : overbeek 1.6 return @pairs;
424 :     }
425 :    
426 : parrello 1.12 =head3 row_subsets
427 :    
428 : parrello 1.19 my $subsetList = UnvSubsys::row_subsets($fig, \%genomeH);
429 : parrello 1.12
430 :     This method computes the taxonomic row subsets for a subsystem. It takes
431 :     as input a hash that maps genome IDs to column indices and a FIG object.
432 :     The FIG object provides a list of taxonomic groups of 10 or more complete
433 :     genomes. From the list, we extract subsets which have more than 10
434 :     genomes in the list of subsystem genomes. If no such subsets exist,
435 :     we extract subsets which have at least 1 genome from the list of
436 :     subsystem genomes. The subsets are returned as 2-tuples, the first
437 :     element being the subset ID and the second being a list of row indices
438 :     for the genomes in the subset.
439 :    
440 :     =over 4
441 :    
442 :     =item fig
443 :    
444 :     A FIG-like object for accessing the data store.
445 :    
446 :     =item genomeH
447 :    
448 :     Reference to a hash that maps each genome ID to its row index in the
449 :     subsystem spreadsheet.
450 :    
451 :     =item RETURN
452 :    
453 :     Returns a reference to a list of 2-tuples. Each 2-tuple consists of a
454 :     subset ID followed by a list of the row indices for the genomes in the
455 :     subset.
456 :    
457 :     =back
458 :    
459 :     =cut
460 :    
461 : overbeek 1.6 sub row_subsets {
462 : parrello 1.12 my ($fig, $genomeH) = @_;
463 : overbeek 1.6
464 : parrello 1.12 # We need a real FIG object, since SFXlate does not yet support
465 :     # taxonomy trees. The "FIG" method does this for us.
466 :     $fig = $fig->FIG();
467 :     # Initialize the return value.
468 : overbeek 1.6 my $subsets = [];
469 : parrello 1.12 # Get a list of taxonomic groups. This will come back as a list of
470 :     # 2-tuples.
471 : overbeek 1.11 my $taxonomic_groups = $fig->taxonomic_groups_of_complete(5);
472 : parrello 1.12 # Loop through the 2-tuples. We're looking for subsets which
473 :     # contain at least one genome on the subsystem's spreadsheet.
474 : overbeek 1.6 my($pair,$id,$members);
475 :     foreach $pair (@$taxonomic_groups)
476 :     {
477 :     ($id,$members) = @$pair;
478 : olson 1.18 # warn "Group $id is @$members\n";
479 :     # if ($id eq 'All')
480 :     # {
481 :     # push(@$members, '372461.6');
482 :     # }
483 : parrello 1.19
484 : parrello 1.12 # Extract the genomes in the member list that participate in this
485 :     # subsystem. To do this, we convert each genome ID to its row
486 :     # index. If no row index exists, the GREP condition discards the
487 :     # member.
488 :     my @mem = grep { defined($_) } map { $genomeH->{$_} } @$members;
489 :     # If there are enough members, save the subset.
490 :     if (@mem > 0)
491 :     {
492 :     push(@$subsets,[$id,[@mem]]);
493 :     }
494 : overbeek 1.6 }
495 : parrello 1.12 # Return the list of row subsets.
496 : overbeek 1.6 return $subsets;
497 :     }
498 :    
499 : parrello 1.12 =head3 set_aliases
500 :    
501 : parrello 1.19 my $aliasHash = UnvSubsys::set_aliases($fig, $pegH, $active_genomes);
502 : parrello 1.12
503 :     Return a hash mapping PEG IDs to aliases.
504 :    
505 :     =over 4
506 :    
507 :     =item fig
508 :    
509 :     FIG-like object that can be used to access the data store.
510 :    
511 :     =item pegH
512 :    
513 :     Reference to the spreadsheet hash table. Given a row index I<$row> and a
514 :     column index I<$col>,
515 :    
516 :     $pegH->{$row}->{$col}
517 :    
518 :     will return a reference to a list of PEGs in the specified spreadsheet cell.
519 :    
520 :     =item active_genomes
521 :    
522 :     Reference to a hash whose keys correspond to the spreadsheet row indices
523 :     of genomes that should be highlighted.
524 :    
525 :     =item RETURN
526 :    
527 :     Returns a hash that takes as input a PEG ID and returns an alias. Only PEGs
528 :     for active genomes will be processed.
529 :    
530 :     =back
531 :    
532 :     =cut
533 :    
534 : overbeek 1.4 sub set_aliases {
535 : parrello 1.12 # Get the parameters.
536 : overbeek 1.7 my($fig,$pegH,$active_genomes) = @_;
537 : overbeek 1.4 my($genomeI,$roleI,$pegs,$peg,$roleH);
538 :    
539 : parrello 1.12 # Create the return hash.
540 : overbeek 1.4 my $aliasesH = {};
541 :    
542 : parrello 1.12 # Loop through each row that corresponds to an active genome.
543 :     # The active genome list contains row indices, and there is
544 :     # one genome per row. Note that the genome ID is never used,
545 :     # only the row index.
546 : overbeek 1.7 foreach $genomeI (grep { $active_genomes->{$_} } keys(%$pegH))
547 : overbeek 1.4 {
548 : parrello 1.12 # Get the role hash for the specified genome. The role hash
549 :     # maps column indices (representing roles) to lists of PEGs.
550 : parrello 1.5 $roleH = $pegH->{$genomeI};
551 : parrello 1.12 # Loop through the role (column) indices.
552 : parrello 1.5 foreach $roleI (keys(%$roleH))
553 :     {
554 : parrello 1.12 # Get the PEG list for this row/column combination.
555 : parrello 1.5 $pegs = $roleH->{$roleI};
556 : parrello 1.12 # Only proceed if data was found in the cell.
557 :     if (defined $pegs) {
558 :     # Loop through the pegs in the cell.
559 :     foreach $peg (@$pegs)
560 : parrello 1.5 {
561 : parrello 1.12 # If we do not already have an alias for this PEG,
562 :     # compute one.
563 :     if (! $aliasesH->{$peg})
564 :     {
565 :     $aliasesH->{$peg} = scalar &ext_id($fig,$peg);
566 :     }
567 : parrello 1.5 }
568 :     }
569 :     }
570 : overbeek 1.4 }
571 : parrello 1.12 # Return the hash we built.
572 : overbeek 1.4 return $aliasesH;
573 :     }
574 :    
575 : parrello 1.12 =head3 set_colors
576 :    
577 : parrello 1.19 my $colorHash = UnvSubsys::set_colors($fig, $sub, \%pegH, \%active_genomes);
578 : parrello 1.12
579 :     Return a hash that maps each PEG in the subsystem spreadsheet to a display
580 :     color. Not all PEGs need to be mapped. Those that do not have a color
581 :     assigned will generally be displayed in white.
582 :    
583 :     =over 4
584 :    
585 :     =item fig
586 :    
587 :     FIG-like object that can be used to access the data store.
588 :    
589 :     =item sub
590 :    
591 :     Subsystem object for the current subsystem.
592 :    
593 :     =item pegH
594 :    
595 :     Reference to the spreadsheet hash table. Given a row index I<$row> and a
596 :     column index I<$col>,
597 :    
598 :     $pegH->{$row}->{$col}
599 :    
600 :     will return a reference to a list of PEGs in the specified spreadsheet cell.
601 :    
602 :     =item active_genomes
603 :    
604 :     Reference to a hash whose keys correspond to the spreadsheet row indices
605 :     of genomes that should be highlighted.
606 :    
607 :     =item RETURN
608 :    
609 :     Returns a hash that takes as input a PEG ID and returns a color. These colors
610 :     are used when displaying the PEGs in the subsystem spreadsheet. Only PEGs
611 :     for active genomes will be colored.
612 :    
613 :     =back
614 :    
615 :     =cut
616 :    
617 : overbeek 1.4 sub set_colors {
618 : parrello 1.12 # Get the parameters.
619 :     my($fig,$sub,$pegH,$active_genomes) = @_;
620 : parrello 1.19
621 : parrello 1.12 my($genomeI,$roleI,$pegs,$peg,$roleH,%pegs_in_genome);
622 :     # Create the return hash.
623 : overbeek 1.4 my $colorsH = {};
624 : parrello 1.12 # Loop through the active genomes. The keys of "%$pegH" are the row indices
625 :     # for rows that have at least one occupied cell in the spreadsheet. The
626 :     # Grep then reduces this list to those rows that are highlighted.
627 : overbeek 1.7 foreach $genomeI (grep { $active_genomes->{$_} } keys(%$pegH))
628 : overbeek 1.4 {
629 : parrello 1.12 # We will use the following hash to compile a list of all the PEGs
630 :     # in the spreadsheet for the current genome, that is, the genome
631 :     # represented by row index "$genomeI".
632 : parrello 1.5 undef %pegs_in_genome;
633 : parrello 1.12 # Get the hash for the current row. This hash maps column indices to
634 :     # lists of pegs.
635 : parrello 1.5 $roleH = $pegH->{$genomeI};
636 : parrello 1.12 # Loop through the column indices for the specified row.
637 : parrello 1.5 foreach $roleI (keys(%$roleH))
638 :     {
639 : parrello 1.12 # Here we can finally get a list of the pegs in this spreadsheet
640 :     # cell. We loop through them, marking them in the "%pegs_in_genome"
641 :     # hash.
642 : parrello 1.5 $pegs = $roleH->{$roleI};
643 :     foreach $peg (@$pegs)
644 :     {
645 :     $pegs_in_genome{$peg} = 1;
646 :     }
647 :     }
648 : parrello 1.12 # Extract the "%pegs_in_genome" hash keys. This gives us a duplicate-free
649 :     # list of all the pegs for the current spreadsheet role.
650 : parrello 1.5 my @pegs = keys(%pegs_in_genome);
651 :     my($tuple,$peg,$color);
652 : parrello 1.12 # Get a hash that maps the PEG IDs to colors.
653 :     my $colors_for_one_genome = &set_colors_for_genome($fig,$sub, \@pegs);
654 :     # Loop through the hash we got back and assign the colors from that
655 :     # hash to the master hash we're returning to the caller.
656 : parrello 1.5 while (($peg,$color) = each %$colors_for_one_genome)
657 :     {
658 :     $colorsH->{$peg} = $colors_for_one_genome->{$peg};
659 :     }
660 : overbeek 1.4 }
661 : parrello 1.12 # Return the color hash.
662 : overbeek 1.4 return $colorsH;
663 :     }
664 :    
665 : parrello 1.12 =head3 set_colors_for_genome
666 :    
667 : parrello 1.19 my $colorHash = UnvSubsys::set_colors_for_genome($fig, $sub, \@pegs);
668 : parrello 1.12
669 :     Return a reference to a hash mapping the specified pegs to colors. PEGs that
670 :     are physically close to each other will be painted the same color.
671 :    
672 :     =over 4
673 :    
674 :     =item fig
675 :    
676 :     A fig-like object that can be used to access the data store.
677 :    
678 :     =item sub
679 :    
680 :     Subsystem object for the relevant subsystem.
681 : overbeek 1.4
682 : parrello 1.12 =item pegs
683 : overbeek 1.4
684 : parrello 1.12 Reference to a list of PEG IDs. All of the peg IDs should be for the
685 :     same genome.
686 : overbeek 1.4
687 : parrello 1.12 =item RETURN
688 : overbeek 1.4
689 : parrello 1.12 Returns a reference to a hash that maps each PEG ID to a color.
690 : overbeek 1.4
691 : parrello 1.12 =back
692 : overbeek 1.4
693 : parrello 1.12 =cut
694 : overbeek 1.4
695 : parrello 1.12 sub set_colors_for_genome {
696 :     # Get the parameters.
697 :     my($fig, $sub, $pegs) = @_;
698 :     # Default all the PEGs to white.
699 :     my %color_of = map { $_ => '#FFFFFF' } @$pegs;
700 :     # Divide the pegs into clusters.
701 :     my @clusters = $fig->compute_clusters($pegs, $sub);
702 :     # Get a list of useful colors.
703 :     my @colors = &cool_colors();
704 :     # If we have too many clusters, chop off the big ones at the end. These
705 :     # are least likely to be important.
706 :     if (@clusters > @colors) { splice(@clusters, 0, (@clusters - @colors)) }
707 :     # Loop through the clusters.
708 :     for my $cluster (@clusters) {
709 :     # Get the color for this cluster.
710 :     my $color = shift @colors;
711 :     # Loop through this cluster, putting this color into the color_of
712 :     # entries for each PEG.
713 :     for my $peg (@$cluster) {
714 :     $color_of{$peg} = $color;
715 : parrello 1.5 }
716 : overbeek 1.4 }
717 : parrello 1.12 # Return the color map.
718 :     return \%color_of;
719 : overbeek 1.4 }
720 :    
721 : parrello 1.12 =head3 cool_colors
722 :    
723 : parrello 1.19 my @colorList = UnvSubsys::cool_colors();
724 : parrello 1.12
725 :     Return a list of web-safe colors.
726 :    
727 :     =cut
728 :    
729 : overbeek 1.4 sub cool_colors {
730 :     # just an array of "websafe" colors or whatever colors we want to use. Feel free to remove bad colors (hence the lines not being equal length!)
731 :     return (
732 :     '#C0C0C0', '#FF40C0', '#FF8040', '#FF0080', '#FFC040', '#40C0FF', '#40FFC0', '#C08080', '#C0FF00', '#00FF80', '#00C040',
733 :     "#6B8E23", "#483D8B", "#2E8B57", "#008000", "#006400", "#800000", "#00FF00", "#7FFFD4",
734 :     "#87CEEB", "#A9A9A9", "#90EE90", "#D2B48C", "#8DBC8F", "#D2691E", "#87CEFA", "#E9967A", "#FFE4C4", "#FFB6C1",
735 :     "#E0FFFF", "#FFA07A", "#DB7093", "#9370DB", "#008B8B", "#FFDEAD", "#DA70D6", "#DCDCDC", "#FF00FF", "#6A5ACD",
736 :     "#00FA9A", "#228B22", "#1E90FF", "#FA8072", "#CD853F", "#DC143C", "#FF6347", "#98FB98", "#4682B4",
737 :     "#D3D3D3", "#7B68EE", "#2F4F4F", "#FF7F50", "#FF69B4", "#BC8F8F", "#A0522D", "#DEB887", "#00DED1",
738 :     "#6495ED", "#800080", "#FFD700", "#F5DEB3", "#66CDAA", "#FF4500", "#4B0082", "#CD5C5C",
739 :     "#EE82EE", "#7CFC00", "#FFFF00", "#191970", "#FFFFE0", "#DDA0DD", "#00BFFF", "#DAA520", "#008080",
740 :     "#00FF7F", "#9400D3", "#BA55D3", "#D8BFD8", "#8B4513", "#3CB371", "#00008B", "#5F9EA0",
741 :     "#4169E1", "#20B2AA", "#8A2BE2", "#ADFF2F", "#556B2F",
742 :     "#F0FFFF", "#B0E0E6", "#FF1493", "#B8860B", "#FF0000", "#F08080", "#7FFF00", "#8B0000",
743 :     "#40E0D0", "#0000CD", "#48D1CC", "#8B008B", "#696969", "#AFEEEE", "#FF8C00", "#EEE8AA", "#A52A2A",
744 :     "#FFE4B5", "#B0C4DE", "#FAF0E6", "#9ACD32", "#B22222", "#FAFAD2", "#808080", "#0000FF",
745 :     "#000080", "#32CD32", "#FFFACD", "#9932CC", "#FFA500", "#F0E68C", "#E6E6FA", "#F4A460", "#C71585",
746 :     "#BDB76B", "#00FFFF", "#FFDAB9", "#ADD8E6", "#778899",
747 :     );
748 :     }
749 :    
750 : parrello 1.12 =head3 ext_id
751 :    
752 : parrello 1.19 my $externalID = UnvSubsys::ext_id($fig, $peg);
753 : parrello 1.12
754 :     or
755 :    
756 : parrello 1.19 my @externalIDs = UnvSubsys::ext_id($fig, $peg);
757 : parrello 1.12
758 :     Return a list of non-FIG IDs for the specified feature. In a scalar context, return
759 :     a single non-FIG ID for the specified feature.
760 :    
761 :     This method returns IDs that are all of the same type, that is, all UniProt IDs, or
762 :     all KEGG IDs, and so forth. To do this, it checks the feature's alias list for IDs
763 :     of a given type. If it finds at least one, then all IDs of that type are returned.
764 :     Highest priority is given to the UniProt IDs, then SP IDs, GI IDs, and finally
765 :     KEGG IDs.
766 :    
767 :     =over 4
768 :    
769 :     =item fig
770 :    
771 :     A FIG-like object for accessing the data store.
772 :    
773 :     =item peg
774 :    
775 :     ID of the feature whose aliases are desired.
776 :    
777 :     =item RETURN
778 :    
779 :     In list context, a list of non-FIG IDs for the feature that are all of the same
780 :     type. In scalar context, the first non-FIG ID for the feature of the
781 :     highest-priority available type.
782 :    
783 :     =back
784 :    
785 :     =cut
786 :    
787 : overbeek 1.4 sub ext_id {
788 :     my($fig,$peg) = @_;
789 :    
790 :     my @tmp;
791 :     my @aliases = $fig->feature_aliases($peg);
792 :     if ((@tmp = grep { $_ =~ /^uni\|/ } @aliases) > 0)
793 :     {
794 : parrello 1.5 @aliases = @tmp;
795 : overbeek 1.4 }
796 :     elsif ((@tmp = grep { $_ =~ /^sp\|/ } @aliases) > 0)
797 :     {
798 : parrello 1.5 @aliases = @tmp;
799 : overbeek 1.4 }
800 :     elsif ((@tmp = grep { $_ =~ /^gi\|/ } @aliases) > 0)
801 :     {
802 : parrello 1.5 @aliases = @tmp;
803 : overbeek 1.4 }
804 :     elsif ((@tmp = grep { $_ =~ /^kegg\|/ } @aliases) > 0)
805 :     {
806 : parrello 1.5 @aliases = @tmp;
807 : overbeek 1.4 }
808 :     else
809 :     {
810 : parrello 1.5 @aliases = ();
811 : overbeek 1.4 }
812 :    
813 :     if (wantarray())
814 :     {
815 : parrello 1.5 return @aliases;
816 : overbeek 1.4 }
817 :     else
818 :     {
819 : parrello 1.5 return $aliases[0];
820 : overbeek 1.4 }
821 :     }
822 :    
823 : parrello 1.12 =head3 subsystem_curator
824 :    
825 : parrello 1.19 my $name = $unvsub->subsystem_curator();
826 : parrello 1.12
827 :     Return the name of the subsystem curator. The database stores user names as
828 :     C<master:>I<name>. This method strips off the C<master:> prefix before it
829 :     passes the result back to the caller.
830 :    
831 :     =cut
832 : overbeek 1.4
833 : overbeek 1.2 sub subsystem_curator {
834 :     my($self) = @_;
835 :    
836 :     my $curator = $self->{Curator};
837 :     $curator =~ s/master://;
838 :     return $curator;
839 :     }
840 :    
841 : parrello 1.12 =head3 get_roles
842 :    
843 : parrello 1.19 my @roles = $unvsub->get_roles();
844 : parrello 1.12
845 :     Return a list of the roles (columns) for this subsystem. The roles will be
846 :     returned in column order, so that if you access the sixth element of the
847 :     return list, you'll get the name of the role for the sixth column.
848 :    
849 :     =cut
850 :    
851 : overbeek 1.2 sub get_roles {
852 :     my($self) = @_;
853 : parrello 1.12 # The role index in this object is a list of 3-tuples. The caller only
854 :     # wants the first element of each tuple, which is the role name.
855 : overbeek 1.2 return map { $_->[0] } @{$self->{Roles}};
856 :     }
857 :    
858 : parrello 1.12 =head3 get_genome_index
859 :    
860 : parrello 1.19 my $index = $unvsub->get_genome_index($genome);
861 : parrello 1.12
862 :     Return the row index of the specified genome.
863 :    
864 :     =over 4
865 :    
866 :     =item genome
867 :    
868 :     ID of the genome whose row index is desired.
869 :    
870 :     =item RETURN
871 :    
872 :     Returns the index of the row corresponding to the specified genome, or an
873 :     undefined value if the genome is not represented in the subsystem
874 :     spreadsheet.
875 :    
876 :     =back
877 :    
878 :     =cut
879 :    
880 : overbeek 1.3 sub get_genome_index {
881 :     my($self,$genome) = @_;
882 :    
883 :     return $self->{GenomeIndex}->{$genome};
884 :     }
885 :    
886 : parrello 1.12 =head3 get_genomes
887 :    
888 : parrello 1.19 my @genomes = $unvsub->get_genomes();
889 : parrello 1.12
890 :     Return a list of the genome IDs for the subsystem. The genomes will be
891 :     presented in row order. In other words, if you index into the sixth
892 :     element of the return list, you will retrieve the ID of the genome for
893 :     the sixth row.
894 :    
895 :     =cut
896 :    
897 : overbeek 1.3 sub get_genomes {
898 :     my($self) = @_;
899 : parrello 1.12 # The genome array is a list of 2-tuples. We extract the first
900 :     # element of each tuple, which is the genome ID.
901 : overbeek 1.3 return map { $_->[0] } @{$self->{Genomes}};
902 :     }
903 :    
904 : parrello 1.12 =head3 get_variant_code
905 :    
906 : parrello 1.19 my $code = $unvsub->get_variant_code($genome);
907 : parrello 1.12
908 :     Return the variant code for a genome. Each subsystem has several variations.
909 :     The variant code indicates which variation of a subsystem is used by a
910 :     particular genome.
911 :    
912 :     Genome data is stored in a list of 2-tuples. The first element is the genome
913 :     ID; the second is the variant code.
914 :    
915 :     =over 4
916 :    
917 :     =item genome
918 :    
919 :     ID or row index of the genome whose variant code is desired.
920 :    
921 :     =item RETURN
922 :    
923 :     Returns the variant code for the specified genome.
924 :    
925 :     =back
926 :    
927 :     =cut
928 :    
929 : overbeek 1.3 sub get_variant_code {
930 :     my($self,$genome) = @_;
931 : parrello 1.12 # Check to see if we have a row index.
932 : overbeek 1.3 if ($genome =~ /^\d+$/)
933 :     {
934 : parrello 1.12 # Here we have a row index, so use it to find the genome's variant
935 :     # code.
936 : parrello 1.5 return $self->{Genomes}->[$genome]->[1];
937 : overbeek 1.3 }
938 :     else
939 :     {
940 : parrello 1.12 # Here we have a genome ID, so we need to convert it to a row index.
941 : parrello 1.5 my $genomeI = $self->{GenomeIndex}->{$genome};
942 :     return $self->{Genomes}->[$genomeI]->[1];
943 : overbeek 1.3 }
944 :     }
945 :    
946 : parrello 1.12 =head3 get_pegs_from_cell
947 :    
948 : parrello 1.19 my @pegs = $unvsub->get_pegs_from_cell($genome, $role);
949 : parrello 1.12
950 :     Return a list of the features in a specified spreadsheet cell. The cell is specified
951 :     by genome ID and role ID.
952 :    
953 :     =over 4
954 :    
955 :     =item genome
956 :    
957 :     ID of the genome relevant to the cell.
958 :    
959 :     =item role
960 :    
961 :     ID of the role relevant to the cell.
962 :    
963 :     =item RETURN
964 :    
965 :     Returns a list of the features in the cell, or an empty list if the cell is empty.
966 :    
967 :     =back
968 :    
969 : parrello 1.19 =cut
970 : parrello 1.12
971 : overbeek 1.3 sub get_pegs_from_cell {
972 :     my($self,$genome,$role) = @_;
973 : parrello 1.12 # Convert the genome and role IDs to row and column indices.
974 : overbeek 1.3 my $genomeI = $self->{GenomeIndex}->{$genome};
975 :     my $roleI = $self->{RoleIndex}->{$role};
976 : parrello 1.12 # Get the pegs from the cell and return them.
977 : overbeek 1.3 my $pegs = $self->{PegHash}->{$genomeI}->{$roleI};
978 :     return $pegs ? @$pegs : ();
979 :     }
980 :    
981 : parrello 1.14 =head3 get_diagrams
982 :    
983 : parrello 1.19 my @list = $sub->get_diagrams();
984 : parrello 1.14
985 :     Return a list of the diagrams associated with this subsystem. Each diagram
986 :     is represented in the return list as a 4-tuple C<[diagram_id, diagram_name,
987 :     page_link, img_link]> where
988 :    
989 :     =over 4
990 :    
991 :     =item diagram_id
992 :    
993 :     ID code for this diagram.
994 :    
995 :     =item diagram_name
996 :    
997 :     Displayable name of the diagram.
998 :    
999 :     =item page_link
1000 :    
1001 :     URL of an HTML page containing information about the diagram.
1002 :    
1003 :     =item img_link
1004 :    
1005 :     URL of an HTML page containing an image for the diagram.
1006 :    
1007 :     =back
1008 :    
1009 :     Note that the URLs are in fact for CGI scripts with parameters that point them
1010 :     to the correct place. Though Sprout has diagram information in it, it has
1011 :     no relationship to the diagrams displayed in SEED, so the work is done entirely
1012 :     on the SEED side.
1013 :    
1014 :     =cut
1015 :    
1016 :     sub get_diagrams {
1017 :     # Get the parameters.
1018 :     my ($self) = @_;
1019 :     # Return the diagram list.
1020 :     return @{$self->{Diagrams}};
1021 :     }
1022 :    
1023 : overbeek 1.3 sub get_notes {
1024 :     my($self) = @_;
1025 :    
1026 :     return $self->{Notes};
1027 :     }
1028 :    
1029 : overbeek 1.2 sub get_role_index {
1030 :     my($self,$role) = @_;
1031 :    
1032 :     return $self->{RoleIndex}->{$role};
1033 :     }
1034 :    
1035 :     sub get_role_abbr {
1036 :     my($self,$roleI) = @_;
1037 :    
1038 :     if ($roleI !~ /^\d+$/)
1039 :     {
1040 : parrello 1.5 $roleI = $self->{RoleIndex}->{$roleI};
1041 : overbeek 1.2 }
1042 :     my $roles = $self->{Roles};
1043 :     return $roles->[$roleI]->[1];
1044 :     }
1045 :    
1046 :     sub get_reactions {
1047 :     my($self) = @_;
1048 :    
1049 :     return $self->{Reactions};
1050 :     }
1051 :    
1052 :     sub get_subset_namesC {
1053 :     my($self) = @_;
1054 :    
1055 :     return map { $_->[0] } @{$self->{RoleSubsets}};
1056 :     }
1057 :    
1058 :     sub get_subsetC_roles {
1059 :     my($self,$subset) = @_;
1060 :     my($i,$j);
1061 :    
1062 :     my $subset_info = $self->{RoleSubsets};
1063 :     for ($i=0; ($i < @$subset_info) && ($subset_info->[$i]->[0] ne $subset); $i++) {}
1064 :     if ($i < @$subset_info)
1065 :     {
1066 : parrello 1.5 my @roles = ();
1067 :     foreach $j (@{$subset_info->[$i]->[1]})
1068 :     {
1069 :     push(@roles,$self->{Roles}->[$j]->[0]);
1070 :     }
1071 :     return @roles;
1072 : overbeek 1.2 }
1073 :     return undef;
1074 :     }
1075 :    
1076 : overbeek 1.4 sub get_color_of {
1077 :     my($self,$peg) = @_;
1078 :    
1079 :     return $self->{Colors}->{$peg};
1080 :     }
1081 :    
1082 : parrello 1.12 =head3 active_genomes
1083 :    
1084 : parrello 1.19 my $activeHash = UnvSubsys::active_genomes(\@row_subsets, $active_subsetR, $focus, \%genomeH, \@genomes_info);
1085 : parrello 1.12
1086 :     Return a hash containing the active genomes for this subsystem display. The
1087 :     keys of the hash will be the row indices of the genomes to be highlighted on the
1088 :     display. Each genome ID will map to 1. Thus, if C<< $activeHash->{3} >>
1089 :     tests TRUE, the fourth row should be highlighted.
1090 :    
1091 :     The rules for determining the active genomes are as follows. If I<$active_subsetR> is
1092 :     specified, it is presumed to be the ID of the subset containing the active genomes.
1093 :     If it is not specified and I<$focus> is specified, then I<$focus> is presumed to be the
1094 :     ID of the genome currently in focus, and the active genomes will be the ones in the
1095 :     smallest row subset containing the genome in focus. If neither I<$active_subsetR> nor
1096 :     I<$focus> are specified, then all genomes are active.
1097 :    
1098 :     =over 4
1099 :    
1100 :     =item row_subsets
1101 :    
1102 :     Reference to a list of 2-tuples. Each tuple consists of a row subset ID followed by
1103 :     a reference to a list of the row indices for the rows in the identified subset.
1104 :    
1105 :     =item active_subsetR (optional)
1106 :    
1107 :     ID of the active subset (if any).
1108 :    
1109 :     =item focus
1110 :    
1111 :     ID of the genome currently in focus (if any). If there is no active subset, then
1112 :     the smallest subset containing this genome will be made active.
1113 :    
1114 :     =item genomeH
1115 :    
1116 :     Reference to a hash of genome IDs to row indices. The keys of this hash are
1117 :     the genomes in this subsystem, which also form the subsystem spreadsheet's
1118 :     rows.
1119 :    
1120 :     =item genomes_info
1121 :    
1122 :     Reference to a list of 2-tuples. The first element of each 2-tuple is the
1123 :     ID of a genome; the second is the variant code for the subsystem variant
1124 :     used by the genome. The tuples are ordered by row index, so that the ID
1125 :     and variant code of the genome in a particular row can be located by indexing
1126 :     into this parameter using the subsystem spreadsheet row number.
1127 :    
1128 :     =item RETURN
1129 :    
1130 :     Returns a reference to a hash that maps the row indices of the active genomes
1131 :     to 1. This hash can be used to quickly determine whether or not a particular
1132 :     row is to be highlighted.
1133 :    
1134 :     =back
1135 :    
1136 :     =cut
1137 :    
1138 : overbeek 1.7 sub active_genomes {
1139 : parrello 1.12 # Get the parameters.
1140 :     my($fig, $row_subsets, $active_subsetR, $focus, $genomeH, $genomes_info) = @_;
1141 : overbeek 1.7 my($i,@bestL);
1142 : parrello 1.12 # Declare the return variable.
1143 : overbeek 1.7 my $active_genomes = {};
1144 : parrello 1.12 # Check for an active subset.
1145 : overbeek 1.7 if ($active_subsetR)
1146 :     {
1147 : parrello 1.12 # Search for the active subset in the row subset array.
1148 :     for ($i=0; ($i < @$row_subsets) && ($row_subsets->[$i]->[0] ne $active_subsetR); $i++) {}
1149 :     if ($i < @$row_subsets)
1150 :     {
1151 :     # Here we found the active subset, so we extract its list of row indices.
1152 :     @bestL = @{$row_subsets->[$i]->[1]};
1153 :     }
1154 :     else {
1155 :     # Here we have the ID of the active subset. First, we search for that ID
1156 :     # in the row subset list.
1157 :     for ($i=0; ($i < @$row_subsets) && ($row_subsets->[$i]->[0] ne $active_subsetR); $i++) {}
1158 :     if ($i < @$row_subsets)
1159 :     {
1160 :     # Here we found the named subset, so we return its member list.
1161 :     @bestL = @{$row_subsets->[$i]->[1]};
1162 :     }
1163 :     else
1164 :     {
1165 :     # Here the active subset does not exist. We punt by extracting a
1166 :     # list of all the row indices in the spreadsheet.
1167 :     $active_subsetR = 'All';
1168 :     @bestL = map { $genomeH->{$_} } keys(%$genomeH);
1169 :     }
1170 :     }
1171 : overbeek 1.7 }
1172 : parrello 1.12 elsif ($focus)
1173 : overbeek 1.7 {
1174 : parrello 1.12 # Here we don't have an active row subset, but a particular genome is in
1175 :     # focus. We'll look for the smallest row subset containing the genome
1176 :     # in focus. First, we need to prime the loop. "$bestN" will be the ID
1177 :     # of the best subset found so far; "@bestL" is where we stash the list
1178 :     # of IDs in the subset. Our initial selection, then, will be the
1179 :     # fake "All" subset, which contains the entire collection of rows.
1180 : overbeek 1.16
1181 : parrello 1.12 if (! $fig->is_complete($focus))
1182 :     {
1183 :     # Here the gnome in focus is incomplete, so it won't be anywhere
1184 :     # in our list. We default to making everything active.
1185 :     $active_subsetR = 'All';
1186 :     @bestL = map { $genomeH->{$_} } keys(%$genomeH);
1187 :     } else {
1188 :     my $bestN = "All";
1189 :     @bestL = map { $genomeH->{$_} } keys(%$genomeH);
1190 :     # Next, we get the row index for the genome in focus.
1191 :     my $focusIndex = $genomeH->{$focus};
1192 :     # Now we loop through all the row subsets.
1193 :     my $tuple;
1194 :     foreach $tuple (@$row_subsets)
1195 :     {
1196 :     # Extract the subset ID and its list of row indices. The latter is
1197 :     # in "$genomeIs".
1198 :     my($id,$genomeIs) = @$tuple;
1199 :     # Search for the index of the focus row in the current subset's list
1200 :     # of row indices.
1201 :     for ($i=0; ($i < @$genomeIs) && ($genomeIs->[$i] != $focusIndex); $i++) {}
1202 :     # Now either $i will be the index of the focus row in the subset, or
1203 :     # it is going to be equal to the number of rows in the subset.
1204 :     if ($i < @$genomeIs)
1205 :     {
1206 :     # We've found the focus row in this subset. Select it as the new
1207 :     # best subset if it's smaller than the last one we found.
1208 :     if (@$genomeIs < @bestL)
1209 :     {
1210 :     $bestN = $id;
1211 :     @bestL = @$genomeIs;
1212 :     }
1213 :     }
1214 :     }
1215 :     # Save the best subset found as the active one.
1216 :     $active_subsetR = $bestN;
1217 :     }
1218 :     } else {
1219 :     # Here we have nothing: no active subset, and no focus row. We make
1220 :     # all rows active.
1221 :     $active_subsetR = 'All';
1222 :     @bestL = map { $genomeH->{$_} } keys(%$genomeH);
1223 : overbeek 1.7 }
1224 : parrello 1.12 # "@bestL" now contains a list of the row indices for the active genomes.
1225 :     # We convert it to a hash and return it.
1226 : overbeek 1.7 my %active_genomes = map { $_ => 1 } @bestL;
1227 :     return \%active_genomes;
1228 :     }
1229 :    
1230 : olson 1.13 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3