[Bio] / FigKernelPackages / UnvSubsys.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/UnvSubsys.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.16 - (view) (download) (as text)

1 : olson 1.15 #
2 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 : overbeek 1.1 package UnvSubsys;
19 :    
20 : overbeek 1.2 use Subsystem;
21 : overbeek 1.1 use Carp;
22 :     use FIG;
23 : parrello 1.12 use SFXlate;
24 : overbeek 1.1 use Data::Dumper;
25 :     use strict;
26 : parrello 1.12 use Tracer;
27 : overbeek 1.1
28 : parrello 1.12 =head1 Universal Subsystem Object
29 :    
30 :     =head2 Introduction
31 :    
32 :     The universal subsystem object provides methods used to display useful information
33 :     about a subsystem. Its intent is to support both Sprout and SEED.
34 :    
35 :     The soul of a subsystem is its spreadsheet. The columns of the spreadsheet are
36 :     the subsystem roles. The rows are the genomes that contain the subsystem. PEGs
37 :     are stored in the spreadsheet's cells. Each genome can be identified by genome
38 :     ID or row index. Each role can be identified by role name or column index.
39 :    
40 :     It is worth noting that the term I<subset> has two meanings-- a subset of roles
41 :     or a subset of genomes. A subset of roles is called a I<column subset>. A
42 :     subset of genomes is called a I<row subset>.
43 :    
44 :     The object created contains a great deal of information, so it's worthwhile to
45 :     stop for a moment and discuss it. The object will have the following members.
46 :    
47 :     =over 4
48 :    
49 :     =item Roles
50 :    
51 :     A list of 3-tuples, each consisting of a role name, the abbreviated role name,
52 :     and a list of URLs for the reactions. Each role is a column in the
53 :     subsystem spreadsheet. Indexing into the list by column index yields the
54 :     ID, abbreviation, and reactions for the role in that column.
55 :    
56 :     =item RoleIndex
57 :    
58 :     A hash mapping each role to its column index in the spreadsheet.
59 :    
60 :     =item RoleSubsets
61 :    
62 :     A list of 2-tuples, each consisting of a subset name followed by a list of the
63 :     column indices for the roles in the subset.
64 :    
65 :     =item Genomes
66 :    
67 :     A list of 2-tuples, each containing the genome ID for the relevant row and the
68 :     variant code for that genome. Subsystems can have several variations, and the
69 :     variant code indicates which variant of the subsystem that the genome uses.
70 :     There is one genome for each row in the spreadsheet. Indexing into the list
71 :     yields the ID of the genome for that row and its variant code.
72 :    
73 :     =item GenomeIndex
74 :    
75 :     A hash mapping each genome ID to its row index in the spreadsheet.
76 :    
77 :     =item PegHash
78 :    
79 :     A hash of hashes containing the spreadsheet cells. If C<$pegHash> is the hash
80 :     of hashes, C<$row> is a genome index, and C<$col> is a role index, then
81 :    
82 :     $pegHash->{$row}->{$col}
83 :    
84 :     returns a reference to a list of the IDs for the PEGs in the relevant
85 :     spreadsheet cell.
86 :    
87 :     =item ColorHash
88 :    
89 :     A hash mapping each PEG ID to the color that should be used to represent that
90 :     PEG in the display.
91 :    
92 :     =item AliasHash
93 :    
94 :     A hash mapping each PEG ID to a list of its aliases.
95 :    
96 :     =item ReactionHash
97 :    
98 :     A hash mapping each role to the list of reactions it catalyzes.
99 :    
100 :     =back
101 :    
102 :     =head2 Public Methods
103 :    
104 :     =head3 new
105 :    
106 :     C<< my $usub = UnvSubsys->new($ssa, $fig, $active_subsetR, $focus, $show_clusters, $aliases, \@peg_colors); >>
107 :    
108 :     Construct a new universal subsystem object for a specified subsystem.
109 :    
110 :     =over 4
111 :    
112 :     =item ssa
113 :    
114 :     Name of the subsystem.
115 :    
116 :     =item fig
117 :    
118 :     Data access object, either a FIG object or an SFXlate object.
119 :    
120 :     =item active_subsetR
121 :    
122 :     Name of the active row subset. A row subset names a group of genomes.
123 :    
124 :     =item focus
125 :    
126 :     ID of the genome currently in focus.
127 :    
128 :     =item show_clusters
129 :    
130 :     TRUE if clusters should be painted by color, else FALSE.
131 :    
132 :     =item aliases
133 :    
134 :     TRUE if PEG aliases should be shown, else FALSE.
135 :    
136 :     =item peg_colors (optional)
137 : overbeek 1.1
138 : parrello 1.12 Reference to a list of 2-tuples, each 2-tuple consisting of a PEG ID and the color
139 :     to be assigned to the PEG.
140 : overbeek 1.1
141 : parrello 1.12 =back
142 : overbeek 1.1
143 : parrello 1.12 =cut
144 : overbeek 1.1
145 : parrello 1.12 sub new {
146 :     # Get the parameters.
147 :     my($class, $ssa, $fig, $active_subsetR, $focus, $show_clusters, $aliases, $peg_colors) = @_;
148 :     # Fix the subsystem name. Spaces are replaced by underscores to avoid naming problems
149 :     # in the seed's file system.
150 :     $ssa =~ s/ /_/g;
151 :     # Get the FIG object. At this point in time, we're only getting data from the SEED. On
152 :     # a future pass through the code, we'll get data from a SEED or Sprout.
153 :     if ((ref($fig) eq "FIG") || (ref($fig) eq 'SFXlate')) {
154 :     # Create a subsystem object. The "get_subsystem" method is provided by both FIG
155 :     # and SFXlate; however, the object returned in each case is different: FIG
156 :     # returns a "Subsystem" object; SFXlate returns a "SproutSubsys" object.
157 :     my $subsystem = $fig->get_subsystem($ssa);
158 :     # Get the key subsystem data. Note that CRs in the notes are converted to LFs
159 :     # in case the notes come from a Mac.
160 : parrello 1.5 my $curator = $subsystem->get_curator;
161 :     my $notes = $subsystem->get_notes;
162 : parrello 1.12 $notes =~ s/\r/\n/g;
163 : parrello 1.5 my @roles = $subsystem->get_roles;
164 :     my $reactions = $subsystem->get_reactions;
165 :     my @genomes = $subsystem->get_genomes;
166 : overbeek 1.1 my @col_subsets = $subsystem->get_subset_namesC;
167 : parrello 1.14 my @diagrams = $subsystem->get_diagrams();
168 : parrello 1.12 # Create the data structures for the role list and the role index.
169 : parrello 1.5 my $role_info = [];
170 :     my $roleH = {};
171 : parrello 1.12 # Loop through the roles to create the role list. The $i index will move
172 :     # through the columns of the spreadsheet.
173 : parrello 1.5 my($i,$j,$subset,$peg);
174 :     for ($i=0; ($i < @roles); $i++)
175 :     {
176 : parrello 1.12 # Get the ID of the role in the current column.
177 : parrello 1.5 my $role = $roles[$i];
178 : parrello 1.12 # Extract its abbreviation.
179 :     my $abbrev = $subsystem->get_role_abbr($i);
180 :     # Get the reactions. Note that its possible no reactions were found. If there were
181 :     # reactions found, however, we convert from reaction IDs to a comma-delimited
182 :     # list of HTML code full of hyperlinks.
183 : parrello 1.5 my $react = $reactions ? join(",", map { &HTML::reaction_link($_) } @{$reactions->{$role}}) : [];
184 : parrello 1.12 # Form the role name, abbreviation, and reaction list into a 3-tuple and push
185 :     # them onto the main role list.
186 : parrello 1.5 push(@$role_info,[$role,$abbrev,$react]);
187 : parrello 1.12 # Set the role hash so that we can get back the column index for a given
188 :     # role name.
189 : parrello 1.5 $roleH->{$role} = $i;
190 :     }
191 : parrello 1.12 # Get the column subsets. A column subset is a list of role IDs, so we need to
192 :     # convert it to a set of column indices. We ignore the special "All" subset
193 :     # that contains everything.
194 : parrello 1.5 my $subset_info = [];
195 :     foreach $subset (@col_subsets)
196 :     {
197 :     if ($subset ne 'All')
198 :     {
199 :     push(@$subset_info,[$subset,[map { $roleH->{$_} } $subsystem->get_subsetC_roles($subset)]]);
200 :     }
201 :     }
202 : parrello 1.12 # Now we create the genome directories. For each genome we need to be able to
203 :     # hash from the genomeID to the specified role index and we must be able to
204 :     # get the genome's variant code. (The variant code indicates which subsystem
205 :     # variant the genome uses.
206 : parrello 1.5 my $genomes_info = [];
207 :     my $genomeH = {};
208 :     for ($i=0; ($i < @genomes); $i++)
209 :     {
210 : parrello 1.12 # Get the genome ID for this row.
211 : parrello 1.5 my $genome = $genomes[$i];
212 : parrello 1.12 # Get its variant code.
213 :     my $variant = $subsystem->get_variant_code($i);
214 :     # Form them into a 2-tuple and add the result to the genome list.
215 : parrello 1.5 push(@$genomes_info,[$genome,$variant]);
216 : parrello 1.12 # Set up the hash to get from the genome ID to the row index.
217 : parrello 1.5 $genomeH->{$genome} = $i;
218 :     }
219 : parrello 1.12
220 :     # Next we gather the data from the actual spreadsheet cells. For an SFXlate
221 :     # object, this is the most expensive part of the operation, since it requires
222 :     # a database call for each cell.
223 : parrello 1.5 my $pegH = {};
224 : parrello 1.12 # $i is the row index, which cycles through genomes.
225 : parrello 1.5 for ($i=0; ($i < @genomes); $i++)
226 :     {
227 : parrello 1.12 # $j is the column index, which cycles through roles.
228 : parrello 1.5 for ($j=0; ($j < @roles); $j++)
229 :     {
230 : parrello 1.12 my @pegs = $subsystem->get_pegs_from_cell($i,$j);
231 : parrello 1.5 $pegH->{$i}->{$j} = [@pegs];
232 :     }
233 :     }
234 : overbeek 1.7
235 : parrello 1.12 # Get the row subsets. Row subsets are determined by the genome
236 :     # taxonomy, so these are different from the row subsets stored
237 :     # in the subsystem object.
238 :     my $row_subsets = &row_subsets($fig, $genomeH);
239 :     # Here we try to get a list of the active genomes. The caller
240 :     # gives us hints in the form of the "$focus" and "$active_subsetR"
241 :     # parameters. If, for example, we are using this object to generate
242 :     # a subsystem web page, the focus information would steer us to
243 :     # whatever the user wants to look at on the page.
244 : olson 1.13 my $active_genomes = &active_genomes($fig, $row_subsets,$active_subsetR,$focus,
245 : parrello 1.12 $genomeH,$genomes_info);
246 :    
247 :     # Now we generate a table of colors for the various PEGs. If the
248 :     # caller gave us a map of peg IDs to colors, we use that. Otherwise,
249 :     # we allow the option of painting the PEGs by cluster number. (The
250 :     # caller indicates this by setting the "show_clusters" flag in the
251 :     # parameter list.)
252 :     my $colorsH;
253 :     if ($peg_colors)
254 :     {
255 :     # Here the caller gave us a list of peg colors. The list contains
256 :     # 2-tuples, each consisting of a PEG ID followed by the
257 :     # color value. The loop below extracts the pairs and stuffs them
258 :     # into the color hash.
259 :     $colorsH = {};
260 :     foreach $_ (@$peg_colors)
261 :     {
262 :     my($peg,$color) = @$_;
263 :     $colorsH->{$peg} = $color;
264 :     }
265 :     }
266 :     elsif ($show_clusters)
267 :     {
268 :     # Here the user wants us to base the colors on the genome clustering
269 :     # information.
270 :     $colorsH = &set_colors($fig,$subsystem,$pegH,$active_genomes);
271 :     }
272 :     else
273 :     {
274 :     # Here the user is not interested in coloring the PEGs.
275 :     $colorsH = {};
276 :     }
277 :     # If the user wants to see aliases, compute the alias hash. Aliases
278 :     # will only be computed for PEGs belonging to active (highlighted)
279 :     # genomes, and there is a maximum of one alias per PEG.
280 : overbeek 1.7 my $aliasesH = $aliases ? &set_aliases($fig,$pegH,$active_genomes) : {};
281 : parrello 1.12 # Create and bless the UnvSubsys object.
282 : parrello 1.5 my $self = { Roles => $role_info,
283 :     RoleIndex => $roleH,
284 :     RoleSubsets => $subset_info,
285 :     Genomes => $genomes_info,
286 :     GenomeIndex => $genomeH,
287 : parrello 1.12 GenomeSubsets => $row_subsets,
288 : parrello 1.5 PegHash => $pegH,
289 :     Colors => $colorsH,
290 :     Aliases => $aliasesH,
291 :     Curator => $curator,
292 :     Notes => $notes,
293 : parrello 1.14 Reactions => $reactions,
294 :     Diagrams => \@diagrams,
295 : parrello 1.5 };
296 :     bless($self, $class);
297 : parrello 1.12 # Return the object.
298 : parrello 1.5 return $self;
299 : overbeek 1.1 }
300 :     else
301 :     {
302 : parrello 1.12 # Here the FIG-like object was not recognized, so we return an
303 :     # undefined value.
304 : parrello 1.5 return undef;
305 : overbeek 1.1 }
306 :     }
307 :    
308 : parrello 1.12 =head3 get_subset_namesR
309 :    
310 :     C<< my @names = $unvsub->get_subset_namesR(); >>
311 :    
312 :     Return the names of the genome (row) subsets.
313 :    
314 :     =cut
315 :    
316 : overbeek 1.6 sub get_subset_namesR {
317 :     my($self) = @_;
318 :    
319 :     return map { $_->[0] } @{$self->{GenomeSubsets}};
320 :     }
321 :    
322 : parrello 1.12 =head3 get_subsetR
323 :    
324 :     C<< my @genomes = $unvsub->get_subsetR($set); >>
325 :    
326 :     Return a list of the genome IDs covered by a row subset.
327 :    
328 :     =over 4
329 :    
330 :     =item set
331 :    
332 :     Name of the row subset whose genomes are desired.
333 :    
334 :     =item RETURN
335 :    
336 :     Returns a list of the IDs for the genomes found in the specified row
337 :     set.
338 :    
339 :     =back
340 :    
341 :     =cut
342 :    
343 : overbeek 1.6 sub get_subsetR {
344 : parrello 1.12 # Get the parameters.
345 : overbeek 1.6 my($self,$set) = @_;
346 :     my($i);
347 : parrello 1.12 # Get the list of row subsets.
348 : overbeek 1.6 my $sets = $self->{GenomeSubsets};
349 : parrello 1.12 # Find the row subset with the specified name. The row subset list is a
350 :     # list of 2-tuples, and the first element of each tuple is the set
351 :     # name.
352 : overbeek 1.6 for ($i=0; ($i < @$sets) && ($sets->[$i]->[0] ne $set); $i++) {}
353 :     if ($i < @$sets)
354 :     {
355 : parrello 1.12 # Here we found the named subset. The subset tuple's second element is
356 :     # the list of row indices. We map these to genome IDs before returning
357 :     # them.
358 :     return map { $self->{Genomes}->[$_]->[0] } @{$sets->[$i]->[1]}
359 : overbeek 1.6 }
360 : parrello 1.12 # Here we subset was not found, so we return the undefined value.
361 : overbeek 1.6 return undef;
362 :     }
363 :    
364 : parrello 1.12 =head3 get_subsetR
365 :    
366 :     C<< my @pairs = $unvsub->get_subsetsR(); >>
367 :    
368 :     Return a list of all the row subsets. The subsets are returned in the form
369 :     of 2-tuples, each consisting of a subset name followed by a reference to a
370 :     list of genome IDs. The genome IDs correspond to the rows in the subset.
371 :    
372 :     =cut
373 :    
374 : overbeek 1.6 sub get_subsetsR {
375 : parrello 1.12 # Get the parameters.
376 : overbeek 1.6 my($self) = @_;
377 : parrello 1.12 # Extract the list of genome subsets. This list is in the form of
378 :     # 2-tuples, but the rows are identified by row index, not genome ID.
379 : overbeek 1.6 my $sets = $self->{GenomeSubsets};
380 : parrello 1.12 # Create the return list.
381 : overbeek 1.6 my @pairs = ();
382 : parrello 1.12 # Loop through the subsets.
383 : overbeek 1.6 my $pair;
384 :     foreach $pair (@$sets)
385 :     {
386 : parrello 1.12 # Convert this subset's member list from row indices to genome IDs
387 :     # and stash the result in the return list.
388 :     my($id,$members) = @$pair;
389 :     push(@pairs,[$id,[map { $self->{Genomes}->[$_]->[0] } @$members]]);
390 : overbeek 1.6 }
391 : parrello 1.12 # Return the list constructed.
392 : overbeek 1.6 return @pairs;
393 :     }
394 :    
395 : parrello 1.12 =head3 row_subsets
396 :    
397 :     C<< my $subsetList = UnvSubsys::row_subsets($fig, \%genomeH); >>
398 :    
399 :     This method computes the taxonomic row subsets for a subsystem. It takes
400 :     as input a hash that maps genome IDs to column indices and a FIG object.
401 :     The FIG object provides a list of taxonomic groups of 10 or more complete
402 :     genomes. From the list, we extract subsets which have more than 10
403 :     genomes in the list of subsystem genomes. If no such subsets exist,
404 :     we extract subsets which have at least 1 genome from the list of
405 :     subsystem genomes. The subsets are returned as 2-tuples, the first
406 :     element being the subset ID and the second being a list of row indices
407 :     for the genomes in the subset.
408 :    
409 :     =over 4
410 :    
411 :     =item fig
412 :    
413 :     A FIG-like object for accessing the data store.
414 :    
415 :     =item genomeH
416 :    
417 :     Reference to a hash that maps each genome ID to its row index in the
418 :     subsystem spreadsheet.
419 :    
420 :     =item RETURN
421 :    
422 :     Returns a reference to a list of 2-tuples. Each 2-tuple consists of a
423 :     subset ID followed by a list of the row indices for the genomes in the
424 :     subset.
425 :    
426 :     =back
427 :    
428 :     =cut
429 :    
430 : overbeek 1.6 sub row_subsets {
431 : parrello 1.12 my ($fig, $genomeH) = @_;
432 : overbeek 1.6
433 : parrello 1.12 # We need a real FIG object, since SFXlate does not yet support
434 :     # taxonomy trees. The "FIG" method does this for us.
435 :     $fig = $fig->FIG();
436 :     # Initialize the return value.
437 : overbeek 1.6 my $subsets = [];
438 : parrello 1.12 # Get a list of taxonomic groups. This will come back as a list of
439 :     # 2-tuples.
440 : overbeek 1.11 my $taxonomic_groups = $fig->taxonomic_groups_of_complete(5);
441 : parrello 1.12 # Loop through the 2-tuples. We're looking for subsets which
442 :     # contain at least one genome on the subsystem's spreadsheet.
443 : overbeek 1.6 my($pair,$id,$members);
444 :     foreach $pair (@$taxonomic_groups)
445 :     {
446 :     ($id,$members) = @$pair;
447 : parrello 1.12 # Extract the genomes in the member list that participate in this
448 :     # subsystem. To do this, we convert each genome ID to its row
449 :     # index. If no row index exists, the GREP condition discards the
450 :     # member.
451 :     my @mem = grep { defined($_) } map { $genomeH->{$_} } @$members;
452 :     # If there are enough members, save the subset.
453 :     if (@mem > 0)
454 :     {
455 :     push(@$subsets,[$id,[@mem]]);
456 :     }
457 : overbeek 1.6 }
458 : parrello 1.12 # Return the list of row subsets.
459 : overbeek 1.6 return $subsets;
460 :     }
461 :    
462 : parrello 1.12 =head3 set_aliases
463 :    
464 :     C<< my $aliasHash = UnvSubsys::set_aliases($fig, $pegH, $active_genomes); >>
465 :    
466 :     Return a hash mapping PEG IDs to aliases.
467 :    
468 :     =over 4
469 :    
470 :     =item fig
471 :    
472 :     FIG-like object that can be used to access the data store.
473 :    
474 :     =item pegH
475 :    
476 :     Reference to the spreadsheet hash table. Given a row index I<$row> and a
477 :     column index I<$col>,
478 :    
479 :     $pegH->{$row}->{$col}
480 :    
481 :     will return a reference to a list of PEGs in the specified spreadsheet cell.
482 :    
483 :     =item active_genomes
484 :    
485 :     Reference to a hash whose keys correspond to the spreadsheet row indices
486 :     of genomes that should be highlighted.
487 :    
488 :     =item RETURN
489 :    
490 :     Returns a hash that takes as input a PEG ID and returns an alias. Only PEGs
491 :     for active genomes will be processed.
492 :    
493 :     =back
494 :    
495 :     =cut
496 :    
497 : overbeek 1.4 sub set_aliases {
498 : parrello 1.12 # Get the parameters.
499 : overbeek 1.7 my($fig,$pegH,$active_genomes) = @_;
500 : overbeek 1.4 my($genomeI,$roleI,$pegs,$peg,$roleH);
501 :    
502 : parrello 1.12 # Create the return hash.
503 : overbeek 1.4 my $aliasesH = {};
504 :    
505 : parrello 1.12 # Loop through each row that corresponds to an active genome.
506 :     # The active genome list contains row indices, and there is
507 :     # one genome per row. Note that the genome ID is never used,
508 :     # only the row index.
509 : overbeek 1.7 foreach $genomeI (grep { $active_genomes->{$_} } keys(%$pegH))
510 : overbeek 1.4 {
511 : parrello 1.12 # Get the role hash for the specified genome. The role hash
512 :     # maps column indices (representing roles) to lists of PEGs.
513 : parrello 1.5 $roleH = $pegH->{$genomeI};
514 : parrello 1.12 # Loop through the role (column) indices.
515 : parrello 1.5 foreach $roleI (keys(%$roleH))
516 :     {
517 : parrello 1.12 # Get the PEG list for this row/column combination.
518 : parrello 1.5 $pegs = $roleH->{$roleI};
519 : parrello 1.12 # Only proceed if data was found in the cell.
520 :     if (defined $pegs) {
521 :     # Loop through the pegs in the cell.
522 :     foreach $peg (@$pegs)
523 : parrello 1.5 {
524 : parrello 1.12 # If we do not already have an alias for this PEG,
525 :     # compute one.
526 :     if (! $aliasesH->{$peg})
527 :     {
528 :     $aliasesH->{$peg} = scalar &ext_id($fig,$peg);
529 :     }
530 : parrello 1.5 }
531 :     }
532 :     }
533 : overbeek 1.4 }
534 : parrello 1.12 # Return the hash we built.
535 : overbeek 1.4 return $aliasesH;
536 :     }
537 :    
538 : parrello 1.12 =head3 set_colors
539 :    
540 :     C<< my $colorHash = UnvSubsys::set_colors($fig, $sub, \%pegH, \%active_genomes); >>
541 :    
542 :     Return a hash that maps each PEG in the subsystem spreadsheet to a display
543 :     color. Not all PEGs need to be mapped. Those that do not have a color
544 :     assigned will generally be displayed in white.
545 :    
546 :     =over 4
547 :    
548 :     =item fig
549 :    
550 :     FIG-like object that can be used to access the data store.
551 :    
552 :     =item sub
553 :    
554 :     Subsystem object for the current subsystem.
555 :    
556 :     =item pegH
557 :    
558 :     Reference to the spreadsheet hash table. Given a row index I<$row> and a
559 :     column index I<$col>,
560 :    
561 :     $pegH->{$row}->{$col}
562 :    
563 :     will return a reference to a list of PEGs in the specified spreadsheet cell.
564 :    
565 :     =item active_genomes
566 :    
567 :     Reference to a hash whose keys correspond to the spreadsheet row indices
568 :     of genomes that should be highlighted.
569 :    
570 :     =item RETURN
571 :    
572 :     Returns a hash that takes as input a PEG ID and returns a color. These colors
573 :     are used when displaying the PEGs in the subsystem spreadsheet. Only PEGs
574 :     for active genomes will be colored.
575 :    
576 :     =back
577 :    
578 :     =cut
579 :    
580 : overbeek 1.4 sub set_colors {
581 : parrello 1.12 # Get the parameters.
582 :     my($fig,$sub,$pegH,$active_genomes) = @_;
583 :    
584 :     my($genomeI,$roleI,$pegs,$peg,$roleH,%pegs_in_genome);
585 :     # Create the return hash.
586 : overbeek 1.4 my $colorsH = {};
587 : parrello 1.12 # Loop through the active genomes. The keys of "%$pegH" are the row indices
588 :     # for rows that have at least one occupied cell in the spreadsheet. The
589 :     # Grep then reduces this list to those rows that are highlighted.
590 : overbeek 1.7 foreach $genomeI (grep { $active_genomes->{$_} } keys(%$pegH))
591 : overbeek 1.4 {
592 : parrello 1.12 # We will use the following hash to compile a list of all the PEGs
593 :     # in the spreadsheet for the current genome, that is, the genome
594 :     # represented by row index "$genomeI".
595 : parrello 1.5 undef %pegs_in_genome;
596 : parrello 1.12 # Get the hash for the current row. This hash maps column indices to
597 :     # lists of pegs.
598 : parrello 1.5 $roleH = $pegH->{$genomeI};
599 : parrello 1.12 # Loop through the column indices for the specified row.
600 : parrello 1.5 foreach $roleI (keys(%$roleH))
601 :     {
602 : parrello 1.12 # Here we can finally get a list of the pegs in this spreadsheet
603 :     # cell. We loop through them, marking them in the "%pegs_in_genome"
604 :     # hash.
605 : parrello 1.5 $pegs = $roleH->{$roleI};
606 :     foreach $peg (@$pegs)
607 :     {
608 :     $pegs_in_genome{$peg} = 1;
609 :     }
610 :     }
611 : parrello 1.12 # Extract the "%pegs_in_genome" hash keys. This gives us a duplicate-free
612 :     # list of all the pegs for the current spreadsheet role.
613 : parrello 1.5 my @pegs = keys(%pegs_in_genome);
614 :     my($tuple,$peg,$color);
615 : parrello 1.12 # Get a hash that maps the PEG IDs to colors.
616 :     my $colors_for_one_genome = &set_colors_for_genome($fig,$sub, \@pegs);
617 :     # Loop through the hash we got back and assign the colors from that
618 :     # hash to the master hash we're returning to the caller.
619 : parrello 1.5 while (($peg,$color) = each %$colors_for_one_genome)
620 :     {
621 :     $colorsH->{$peg} = $colors_for_one_genome->{$peg};
622 :     }
623 : overbeek 1.4 }
624 : parrello 1.12 # Return the color hash.
625 : overbeek 1.4 return $colorsH;
626 :     }
627 :    
628 : parrello 1.12 =head3 set_colors_for_genome
629 :    
630 :     C<< my $colorHash = UnvSubsys::set_colors_for_genome($fig, $sub, \@pegs); >>
631 :    
632 :     Return a reference to a hash mapping the specified pegs to colors. PEGs that
633 :     are physically close to each other will be painted the same color.
634 :    
635 :     =over 4
636 :    
637 :     =item fig
638 :    
639 :     A fig-like object that can be used to access the data store.
640 :    
641 :     =item sub
642 :    
643 :     Subsystem object for the relevant subsystem.
644 : overbeek 1.4
645 : parrello 1.12 =item pegs
646 : overbeek 1.4
647 : parrello 1.12 Reference to a list of PEG IDs. All of the peg IDs should be for the
648 :     same genome.
649 : overbeek 1.4
650 : parrello 1.12 =item RETURN
651 : overbeek 1.4
652 : parrello 1.12 Returns a reference to a hash that maps each PEG ID to a color.
653 : overbeek 1.4
654 : parrello 1.12 =back
655 : overbeek 1.4
656 : parrello 1.12 =cut
657 : overbeek 1.4
658 : parrello 1.12 sub set_colors_for_genome {
659 :     # Get the parameters.
660 :     my($fig, $sub, $pegs) = @_;
661 :     # Default all the PEGs to white.
662 :     my %color_of = map { $_ => '#FFFFFF' } @$pegs;
663 :     # Divide the pegs into clusters.
664 :     my @clusters = $fig->compute_clusters($pegs, $sub);
665 :     # Get a list of useful colors.
666 :     my @colors = &cool_colors();
667 :     # If we have too many clusters, chop off the big ones at the end. These
668 :     # are least likely to be important.
669 :     if (@clusters > @colors) { splice(@clusters, 0, (@clusters - @colors)) }
670 :     # Loop through the clusters.
671 :     for my $cluster (@clusters) {
672 :     # Get the color for this cluster.
673 :     my $color = shift @colors;
674 :     # Loop through this cluster, putting this color into the color_of
675 :     # entries for each PEG.
676 :     for my $peg (@$cluster) {
677 :     $color_of{$peg} = $color;
678 : parrello 1.5 }
679 : overbeek 1.4 }
680 : parrello 1.12 # Return the color map.
681 :     return \%color_of;
682 : overbeek 1.4 }
683 :    
684 : parrello 1.12 =head3 cool_colors
685 :    
686 :     C<< my @colorList = UnvSubsys::cool_colors(); >>
687 :    
688 :     Return a list of web-safe colors.
689 :    
690 :     =cut
691 :    
692 : overbeek 1.4 sub cool_colors {
693 :     # just an array of "websafe" colors or whatever colors we want to use. Feel free to remove bad colors (hence the lines not being equal length!)
694 :     return (
695 :     '#C0C0C0', '#FF40C0', '#FF8040', '#FF0080', '#FFC040', '#40C0FF', '#40FFC0', '#C08080', '#C0FF00', '#00FF80', '#00C040',
696 :     "#6B8E23", "#483D8B", "#2E8B57", "#008000", "#006400", "#800000", "#00FF00", "#7FFFD4",
697 :     "#87CEEB", "#A9A9A9", "#90EE90", "#D2B48C", "#8DBC8F", "#D2691E", "#87CEFA", "#E9967A", "#FFE4C4", "#FFB6C1",
698 :     "#E0FFFF", "#FFA07A", "#DB7093", "#9370DB", "#008B8B", "#FFDEAD", "#DA70D6", "#DCDCDC", "#FF00FF", "#6A5ACD",
699 :     "#00FA9A", "#228B22", "#1E90FF", "#FA8072", "#CD853F", "#DC143C", "#FF6347", "#98FB98", "#4682B4",
700 :     "#D3D3D3", "#7B68EE", "#2F4F4F", "#FF7F50", "#FF69B4", "#BC8F8F", "#A0522D", "#DEB887", "#00DED1",
701 :     "#6495ED", "#800080", "#FFD700", "#F5DEB3", "#66CDAA", "#FF4500", "#4B0082", "#CD5C5C",
702 :     "#EE82EE", "#7CFC00", "#FFFF00", "#191970", "#FFFFE0", "#DDA0DD", "#00BFFF", "#DAA520", "#008080",
703 :     "#00FF7F", "#9400D3", "#BA55D3", "#D8BFD8", "#8B4513", "#3CB371", "#00008B", "#5F9EA0",
704 :     "#4169E1", "#20B2AA", "#8A2BE2", "#ADFF2F", "#556B2F",
705 :     "#F0FFFF", "#B0E0E6", "#FF1493", "#B8860B", "#FF0000", "#F08080", "#7FFF00", "#8B0000",
706 :     "#40E0D0", "#0000CD", "#48D1CC", "#8B008B", "#696969", "#AFEEEE", "#FF8C00", "#EEE8AA", "#A52A2A",
707 :     "#FFE4B5", "#B0C4DE", "#FAF0E6", "#9ACD32", "#B22222", "#FAFAD2", "#808080", "#0000FF",
708 :     "#000080", "#32CD32", "#FFFACD", "#9932CC", "#FFA500", "#F0E68C", "#E6E6FA", "#F4A460", "#C71585",
709 :     "#BDB76B", "#00FFFF", "#FFDAB9", "#ADD8E6", "#778899",
710 :     );
711 :     }
712 :    
713 : parrello 1.12 =head3 ext_id
714 :    
715 :     C<< my $externalID = UnvSubsys::ext_id($fig, $peg); >>
716 :    
717 :     or
718 :    
719 :     C<< my @externalIDs = UnvSubsys::ext_id($fig, $peg); >>
720 :    
721 :     Return a list of non-FIG IDs for the specified feature. In a scalar context, return
722 :     a single non-FIG ID for the specified feature.
723 :    
724 :     This method returns IDs that are all of the same type, that is, all UniProt IDs, or
725 :     all KEGG IDs, and so forth. To do this, it checks the feature's alias list for IDs
726 :     of a given type. If it finds at least one, then all IDs of that type are returned.
727 :     Highest priority is given to the UniProt IDs, then SP IDs, GI IDs, and finally
728 :     KEGG IDs.
729 :    
730 :     =over 4
731 :    
732 :     =item fig
733 :    
734 :     A FIG-like object for accessing the data store.
735 :    
736 :     =item peg
737 :    
738 :     ID of the feature whose aliases are desired.
739 :    
740 :     =item RETURN
741 :    
742 :     In list context, a list of non-FIG IDs for the feature that are all of the same
743 :     type. In scalar context, the first non-FIG ID for the feature of the
744 :     highest-priority available type.
745 :    
746 :     =back
747 :    
748 :     =cut
749 :    
750 : overbeek 1.4 sub ext_id {
751 :     my($fig,$peg) = @_;
752 :    
753 :     my @tmp;
754 :     my @aliases = $fig->feature_aliases($peg);
755 :     if ((@tmp = grep { $_ =~ /^uni\|/ } @aliases) > 0)
756 :     {
757 : parrello 1.5 @aliases = @tmp;
758 : overbeek 1.4 }
759 :     elsif ((@tmp = grep { $_ =~ /^sp\|/ } @aliases) > 0)
760 :     {
761 : parrello 1.5 @aliases = @tmp;
762 : overbeek 1.4 }
763 :     elsif ((@tmp = grep { $_ =~ /^gi\|/ } @aliases) > 0)
764 :     {
765 : parrello 1.5 @aliases = @tmp;
766 : overbeek 1.4 }
767 :     elsif ((@tmp = grep { $_ =~ /^kegg\|/ } @aliases) > 0)
768 :     {
769 : parrello 1.5 @aliases = @tmp;
770 : overbeek 1.4 }
771 :     else
772 :     {
773 : parrello 1.5 @aliases = ();
774 : overbeek 1.4 }
775 :    
776 :     if (wantarray())
777 :     {
778 : parrello 1.5 return @aliases;
779 : overbeek 1.4 }
780 :     else
781 :     {
782 : parrello 1.5 return $aliases[0];
783 : overbeek 1.4 }
784 :     }
785 :    
786 : parrello 1.12 =head3 subsystem_curator
787 :    
788 :     C<< my $name = $unvsub->subsystem_curator(); >>
789 :    
790 :     Return the name of the subsystem curator. The database stores user names as
791 :     C<master:>I<name>. This method strips off the C<master:> prefix before it
792 :     passes the result back to the caller.
793 :    
794 :     =cut
795 : overbeek 1.4
796 : overbeek 1.2 sub subsystem_curator {
797 :     my($self) = @_;
798 :    
799 :     my $curator = $self->{Curator};
800 :     $curator =~ s/master://;
801 :     return $curator;
802 :     }
803 :    
804 : parrello 1.12 =head3 get_roles
805 :    
806 :     C<< my @roles = $unvsub->get_roles(); >>
807 :    
808 :     Return a list of the roles (columns) for this subsystem. The roles will be
809 :     returned in column order, so that if you access the sixth element of the
810 :     return list, you'll get the name of the role for the sixth column.
811 :    
812 :     =cut
813 :    
814 : overbeek 1.2 sub get_roles {
815 :     my($self) = @_;
816 : parrello 1.12 # The role index in this object is a list of 3-tuples. The caller only
817 :     # wants the first element of each tuple, which is the role name.
818 : overbeek 1.2 return map { $_->[0] } @{$self->{Roles}};
819 :     }
820 :    
821 : parrello 1.12 =head3 get_genome_index
822 :    
823 :     C<< my $index = $unvsub->get_genome_index($genome); >>
824 :    
825 :     Return the row index of the specified genome.
826 :    
827 :     =over 4
828 :    
829 :     =item genome
830 :    
831 :     ID of the genome whose row index is desired.
832 :    
833 :     =item RETURN
834 :    
835 :     Returns the index of the row corresponding to the specified genome, or an
836 :     undefined value if the genome is not represented in the subsystem
837 :     spreadsheet.
838 :    
839 :     =back
840 :    
841 :     =cut
842 :    
843 : overbeek 1.3 sub get_genome_index {
844 :     my($self,$genome) = @_;
845 :    
846 :     return $self->{GenomeIndex}->{$genome};
847 :     }
848 :    
849 : parrello 1.12 =head3 get_genomes
850 :    
851 :     C<< my @genomes = $unvsub->get_genomes(); >>
852 :    
853 :     Return a list of the genome IDs for the subsystem. The genomes will be
854 :     presented in row order. In other words, if you index into the sixth
855 :     element of the return list, you will retrieve the ID of the genome for
856 :     the sixth row.
857 :    
858 :     =cut
859 :    
860 : overbeek 1.3 sub get_genomes {
861 :     my($self) = @_;
862 : parrello 1.12 # The genome array is a list of 2-tuples. We extract the first
863 :     # element of each tuple, which is the genome ID.
864 : overbeek 1.3 return map { $_->[0] } @{$self->{Genomes}};
865 :     }
866 :    
867 : parrello 1.12 =head3 get_variant_code
868 :    
869 :     C<< my $code = $unvsub->get_variant_code($genome); >>
870 :    
871 :     Return the variant code for a genome. Each subsystem has several variations.
872 :     The variant code indicates which variation of a subsystem is used by a
873 :     particular genome.
874 :    
875 :     Genome data is stored in a list of 2-tuples. The first element is the genome
876 :     ID; the second is the variant code.
877 :    
878 :     =over 4
879 :    
880 :     =item genome
881 :    
882 :     ID or row index of the genome whose variant code is desired.
883 :    
884 :     =item RETURN
885 :    
886 :     Returns the variant code for the specified genome.
887 :    
888 :     =back
889 :    
890 :     =cut
891 :    
892 : overbeek 1.3 sub get_variant_code {
893 :     my($self,$genome) = @_;
894 : parrello 1.12 # Check to see if we have a row index.
895 : overbeek 1.3 if ($genome =~ /^\d+$/)
896 :     {
897 : parrello 1.12 # Here we have a row index, so use it to find the genome's variant
898 :     # code.
899 : parrello 1.5 return $self->{Genomes}->[$genome]->[1];
900 : overbeek 1.3 }
901 :     else
902 :     {
903 : parrello 1.12 # Here we have a genome ID, so we need to convert it to a row index.
904 : parrello 1.5 my $genomeI = $self->{GenomeIndex}->{$genome};
905 :     return $self->{Genomes}->[$genomeI]->[1];
906 : overbeek 1.3 }
907 :     }
908 :    
909 : parrello 1.12 =head3 get_pegs_from_cell
910 :    
911 :     C<< my @pegs = $unvsub->get_pegs_from_cell($genome, $role); >>
912 :    
913 :     Return a list of the features in a specified spreadsheet cell. The cell is specified
914 :     by genome ID and role ID.
915 :    
916 :     =over 4
917 :    
918 :     =item genome
919 :    
920 :     ID of the genome relevant to the cell.
921 :    
922 :     =item role
923 :    
924 :     ID of the role relevant to the cell.
925 :    
926 :     =item RETURN
927 :    
928 :     Returns a list of the features in the cell, or an empty list if the cell is empty.
929 :    
930 :     =back
931 :    
932 :     =cut
933 :    
934 : overbeek 1.3 sub get_pegs_from_cell {
935 :     my($self,$genome,$role) = @_;
936 : parrello 1.12 # Convert the genome and role IDs to row and column indices.
937 : overbeek 1.3 my $genomeI = $self->{GenomeIndex}->{$genome};
938 :     my $roleI = $self->{RoleIndex}->{$role};
939 : parrello 1.12 # Get the pegs from the cell and return them.
940 : overbeek 1.3 my $pegs = $self->{PegHash}->{$genomeI}->{$roleI};
941 :     return $pegs ? @$pegs : ();
942 :     }
943 :    
944 : parrello 1.14 =head3 get_diagrams
945 :    
946 :     C<< my @list = $sub->get_diagrams(); >>
947 :    
948 :     Return a list of the diagrams associated with this subsystem. Each diagram
949 :     is represented in the return list as a 4-tuple C<[diagram_id, diagram_name,
950 :     page_link, img_link]> where
951 :    
952 :     =over 4
953 :    
954 :     =item diagram_id
955 :    
956 :     ID code for this diagram.
957 :    
958 :     =item diagram_name
959 :    
960 :     Displayable name of the diagram.
961 :    
962 :     =item page_link
963 :    
964 :     URL of an HTML page containing information about the diagram.
965 :    
966 :     =item img_link
967 :    
968 :     URL of an HTML page containing an image for the diagram.
969 :    
970 :     =back
971 :    
972 :     Note that the URLs are in fact for CGI scripts with parameters that point them
973 :     to the correct place. Though Sprout has diagram information in it, it has
974 :     no relationship to the diagrams displayed in SEED, so the work is done entirely
975 :     on the SEED side.
976 :    
977 :     =cut
978 :    
979 :     sub get_diagrams {
980 :     # Get the parameters.
981 :     my ($self) = @_;
982 :     # Return the diagram list.
983 :     return @{$self->{Diagrams}};
984 :     }
985 :    
986 : overbeek 1.3 sub get_notes {
987 :     my($self) = @_;
988 :    
989 :     return $self->{Notes};
990 :     }
991 :    
992 : overbeek 1.2 sub get_role_index {
993 :     my($self,$role) = @_;
994 :    
995 :     return $self->{RoleIndex}->{$role};
996 :     }
997 :    
998 :     sub get_role_abbr {
999 :     my($self,$roleI) = @_;
1000 :    
1001 :     if ($roleI !~ /^\d+$/)
1002 :     {
1003 : parrello 1.5 $roleI = $self->{RoleIndex}->{$roleI};
1004 : overbeek 1.2 }
1005 :     my $roles = $self->{Roles};
1006 :     return $roles->[$roleI]->[1];
1007 :     }
1008 :    
1009 :     sub get_reactions {
1010 :     my($self) = @_;
1011 :    
1012 :     return $self->{Reactions};
1013 :     }
1014 :    
1015 :     sub get_subset_namesC {
1016 :     my($self) = @_;
1017 :    
1018 :     return map { $_->[0] } @{$self->{RoleSubsets}};
1019 :     }
1020 :    
1021 :     sub get_subsetC_roles {
1022 :     my($self,$subset) = @_;
1023 :     my($i,$j);
1024 :    
1025 :     my $subset_info = $self->{RoleSubsets};
1026 :     for ($i=0; ($i < @$subset_info) && ($subset_info->[$i]->[0] ne $subset); $i++) {}
1027 :     if ($i < @$subset_info)
1028 :     {
1029 : parrello 1.5 my @roles = ();
1030 :     foreach $j (@{$subset_info->[$i]->[1]})
1031 :     {
1032 :     push(@roles,$self->{Roles}->[$j]->[0]);
1033 :     }
1034 :     return @roles;
1035 : overbeek 1.2 }
1036 :     return undef;
1037 :     }
1038 :    
1039 : overbeek 1.4 sub get_color_of {
1040 :     my($self,$peg) = @_;
1041 :    
1042 :     return $self->{Colors}->{$peg};
1043 :     }
1044 :    
1045 : parrello 1.12 =head3 active_genomes
1046 :    
1047 :     C<< my $activeHash = UnvSubsys::active_genomes(\@row_subsets, $active_subsetR, $focus, \%genomeH, \@genomes_info); >>
1048 :    
1049 :     Return a hash containing the active genomes for this subsystem display. The
1050 :     keys of the hash will be the row indices of the genomes to be highlighted on the
1051 :     display. Each genome ID will map to 1. Thus, if C<< $activeHash->{3} >>
1052 :     tests TRUE, the fourth row should be highlighted.
1053 :    
1054 :     The rules for determining the active genomes are as follows. If I<$active_subsetR> is
1055 :     specified, it is presumed to be the ID of the subset containing the active genomes.
1056 :     If it is not specified and I<$focus> is specified, then I<$focus> is presumed to be the
1057 :     ID of the genome currently in focus, and the active genomes will be the ones in the
1058 :     smallest row subset containing the genome in focus. If neither I<$active_subsetR> nor
1059 :     I<$focus> are specified, then all genomes are active.
1060 :    
1061 :     =over 4
1062 :    
1063 :     =item row_subsets
1064 :    
1065 :     Reference to a list of 2-tuples. Each tuple consists of a row subset ID followed by
1066 :     a reference to a list of the row indices for the rows in the identified subset.
1067 :    
1068 :     =item active_subsetR (optional)
1069 :    
1070 :     ID of the active subset (if any).
1071 :    
1072 :     =item focus
1073 :    
1074 :     ID of the genome currently in focus (if any). If there is no active subset, then
1075 :     the smallest subset containing this genome will be made active.
1076 :    
1077 :     =item genomeH
1078 :    
1079 :     Reference to a hash of genome IDs to row indices. The keys of this hash are
1080 :     the genomes in this subsystem, which also form the subsystem spreadsheet's
1081 :     rows.
1082 :    
1083 :     =item genomes_info
1084 :    
1085 :     Reference to a list of 2-tuples. The first element of each 2-tuple is the
1086 :     ID of a genome; the second is the variant code for the subsystem variant
1087 :     used by the genome. The tuples are ordered by row index, so that the ID
1088 :     and variant code of the genome in a particular row can be located by indexing
1089 :     into this parameter using the subsystem spreadsheet row number.
1090 :    
1091 :     =item RETURN
1092 :    
1093 :     Returns a reference to a hash that maps the row indices of the active genomes
1094 :     to 1. This hash can be used to quickly determine whether or not a particular
1095 :     row is to be highlighted.
1096 :    
1097 :     =back
1098 :    
1099 :     =cut
1100 :    
1101 : overbeek 1.7 sub active_genomes {
1102 : parrello 1.12 # Get the parameters.
1103 :     my($fig, $row_subsets, $active_subsetR, $focus, $genomeH, $genomes_info) = @_;
1104 : overbeek 1.7 my($i,@bestL);
1105 : parrello 1.12 # Declare the return variable.
1106 : overbeek 1.7 my $active_genomes = {};
1107 : parrello 1.12 # Check for an active subset.
1108 : overbeek 1.7 if ($active_subsetR)
1109 :     {
1110 : parrello 1.12 # Search for the active subset in the row subset array.
1111 :     for ($i=0; ($i < @$row_subsets) && ($row_subsets->[$i]->[0] ne $active_subsetR); $i++) {}
1112 :     if ($i < @$row_subsets)
1113 :     {
1114 :     # Here we found the active subset, so we extract its list of row indices.
1115 :     @bestL = @{$row_subsets->[$i]->[1]};
1116 :     }
1117 :     else {
1118 :     # Here we have the ID of the active subset. First, we search for that ID
1119 :     # in the row subset list.
1120 :     for ($i=0; ($i < @$row_subsets) && ($row_subsets->[$i]->[0] ne $active_subsetR); $i++) {}
1121 :     if ($i < @$row_subsets)
1122 :     {
1123 :     # Here we found the named subset, so we return its member list.
1124 :     @bestL = @{$row_subsets->[$i]->[1]};
1125 :     }
1126 :     else
1127 :     {
1128 :     # Here the active subset does not exist. We punt by extracting a
1129 :     # list of all the row indices in the spreadsheet.
1130 :     $active_subsetR = 'All';
1131 :     @bestL = map { $genomeH->{$_} } keys(%$genomeH);
1132 :     }
1133 :     }
1134 : overbeek 1.7 }
1135 : parrello 1.12 elsif ($focus)
1136 : overbeek 1.7 {
1137 : parrello 1.12 # Here we don't have an active row subset, but a particular genome is in
1138 :     # focus. We'll look for the smallest row subset containing the genome
1139 :     # in focus. First, we need to prime the loop. "$bestN" will be the ID
1140 :     # of the best subset found so far; "@bestL" is where we stash the list
1141 :     # of IDs in the subset. Our initial selection, then, will be the
1142 :     # fake "All" subset, which contains the entire collection of rows.
1143 : overbeek 1.16
1144 : parrello 1.12 if (! $fig->is_complete($focus))
1145 :     {
1146 :     # Here the gnome in focus is incomplete, so it won't be anywhere
1147 :     # in our list. We default to making everything active.
1148 :     $active_subsetR = 'All';
1149 :     @bestL = map { $genomeH->{$_} } keys(%$genomeH);
1150 :     } else {
1151 :     my $bestN = "All";
1152 :     @bestL = map { $genomeH->{$_} } keys(%$genomeH);
1153 :     # Next, we get the row index for the genome in focus.
1154 :     my $focusIndex = $genomeH->{$focus};
1155 :     # Now we loop through all the row subsets.
1156 :     my $tuple;
1157 :     foreach $tuple (@$row_subsets)
1158 :     {
1159 :     # Extract the subset ID and its list of row indices. The latter is
1160 :     # in "$genomeIs".
1161 :     my($id,$genomeIs) = @$tuple;
1162 :     # Search for the index of the focus row in the current subset's list
1163 :     # of row indices.
1164 :     for ($i=0; ($i < @$genomeIs) && ($genomeIs->[$i] != $focusIndex); $i++) {}
1165 :     # Now either $i will be the index of the focus row in the subset, or
1166 :     # it is going to be equal to the number of rows in the subset.
1167 :     if ($i < @$genomeIs)
1168 :     {
1169 :     # We've found the focus row in this subset. Select it as the new
1170 :     # best subset if it's smaller than the last one we found.
1171 :     if (@$genomeIs < @bestL)
1172 :     {
1173 :     $bestN = $id;
1174 :     @bestL = @$genomeIs;
1175 :     }
1176 :     }
1177 :     }
1178 :     # Save the best subset found as the active one.
1179 :     $active_subsetR = $bestN;
1180 :     }
1181 :     } else {
1182 :     # Here we have nothing: no active subset, and no focus row. We make
1183 :     # all rows active.
1184 :     $active_subsetR = 'All';
1185 :     @bestL = map { $genomeH->{$_} } keys(%$genomeH);
1186 : overbeek 1.7 }
1187 : parrello 1.12 # "@bestL" now contains a list of the row indices for the active genomes.
1188 :     # We convert it to a hash and return it.
1189 : overbeek 1.7 my %active_genomes = map { $_ => 1 } @bestL;
1190 :     return \%active_genomes;
1191 :     }
1192 :    
1193 : olson 1.13 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3