[Bio] / FigKernelPackages / SS.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/SS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :     use strict;
3 :    
4 :     #
5 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
6 :     # for Interpretations of Genomes. All Rights Reserved.
7 :     #
8 :     # This file is part of the SEED Toolkit.
9 :     #
10 :     # The SEED Toolkit is free software. You can redistribute
11 :     # it and/or modify it under the terms of the SEED Toolkit
12 :     # Public License.
13 :     #
14 :     # You should have received a copy of the SEED Toolkit Public License
15 :     # along with this program; if not write to the University of Chicago
16 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
17 :     # Genomes at veronika@thefig.info or download a copy from
18 :     # http://www.theseed.org/LICENSE.TXT.
19 :     #
20 :     package SS;
21 :    
22 :     use strict;
23 :     use ERDB;
24 :     use Tracer;
25 :     use SeedUtils;
26 :     use ServerThing;
27 :    
28 :     =head1 Subsystem Server Function Object
29 :    
30 :     This file contains the functions and utilities used by the Subsystem Server
31 :     (B<subsystem_server_sapling.cgi>). The L</Primary Methods> represent function
32 :     calls direct to the server. These all have a signature similar to the following.
33 :    
34 :     my $document = $ssObject->function_name($args);
35 :    
36 :     where C<$ssObject> is an object created by this module,
37 :     C<$args> is a parameter structure, and C<function_name> is the Subsystem
38 :     Server function name. The output is a structure, generally a hash reference, but
39 :     sometimes a string or a list reference.
40 :    
41 :     =head2 Special Methods
42 :    
43 :     =head3 new
44 :    
45 :     my $ssObject = SS->new();
46 :    
47 :     Create a new Subsystem Server function object. The server function object
48 :     contains a pointer to a [[SaplingPm]] object, and is used to invoke the
49 :     server functions.
50 :    
51 :     =cut
52 :    
53 :     sub new {
54 :     my ($class) = @_;
55 :     # Create the sapling object.
56 :     my $sap = ERDB::GetDatabase('Sapling');
57 :     # Create the server object.
58 :     my $retVal = { db => $sap };
59 :     # Bless and return it.
60 :     bless $retVal, $class;
61 :     return $retVal;
62 :     }
63 :    
64 :    
65 :     =head2 Primary Methods
66 :    
67 :     =head3 is_in_subsystem
68 :    
69 :     my $document = $ssObject->is_in_subsystem($args);
70 :    
71 :     Return the subsystem and role for each specified feature.
72 :    
73 :     =over 4
74 :    
75 :     =item args
76 :    
77 :     Reference to either (1) a hash with a key of C<-ids> whose value is a list
78 :     of FIG feature IDs or (2) a list of FIG feature IDs.
79 :    
80 :     =item RETURN
81 :    
82 :     Returns a reference to a list of 3-tuples. Each 3-tuple consists of a subsystem
83 :     ID, a role ID, and the ID of a feature from the input list.
84 :    
85 :     =back
86 :    
87 :     =cut
88 :    
89 :     sub is_in_subsystem {
90 :     # Get the parameters.
91 :     my ($self, $args) = @_;
92 :     # Get the sapling database.
93 :     my $sapling = $self->{db};
94 :     # Declare the return variable.
95 :     my $retVal;
96 :     # Convert a list to a hash.
97 :     if (ref $args ne 'HASH') {
98 :     $args = { -ids => $args };
99 :     }
100 :     # Get the fig IDs from the parameters.
101 :     my $ids = ServerThing::GetIdList(-ids => $args);
102 :     foreach my $fid (@$ids) {
103 :     my @resultRows = $sapling->GetAll("Subsystem Includes Role IsRoleOf MachineRole Contains Feature",
104 :     'Feature(id) = ?', [$fid],
105 :     [qw(Subsystem(id) Role(id) Feature(id))]);
106 :     push (@$retVal, \@resultRows);
107 :     }
108 :     # Return the result.
109 :     return $retVal;
110 :     }
111 :    
112 :     =head3 is_in_subsystem_with
113 :    
114 :     my $document = $ssObject->is_in_subsystem_with($args);
115 :    
116 :     Given a list of features, return the other features in the same subsystem
117 :     with it. For each other feature returned, its role, functional
118 :     assignment, subsystem variant, and subsystem ID will be returned as well.
119 :    
120 :     =over 4
121 :    
122 :     =item args
123 :    
124 :     Reference to either (1) a hash with a key of C<-ids> whose value is a list
125 :     of FIG feature IDs or (2) a list of FIG feature IDs.
126 :    
127 :     =item RETURN
128 :    
129 :     Returns a reference to a list of lists. Each sub-list contains 6-tuples relating
130 :     to a single incoming feature ID. Each 6-tuple consists of a subsystem
131 :     ID, a variant ID, the incoming feature ID, the other feature ID, the other feature's
132 :     functional assignment, and the other feature's role in the subsystem.
133 :    
134 :     =back
135 :    
136 :     =cut
137 :    
138 :     sub is_in_subsystem_with {
139 :     # Get the parameters.
140 :     my ($self, $args) = @_;
141 :     # Get the sapling database.
142 :     my $sapling = $self->{db};
143 :     # Declare the return variable.
144 :     my $retVal;
145 :     # Convert a list to a hash.
146 :     if (ref $args ne 'HASH') {
147 :     $args = { -ids => $args };
148 :     }
149 :     # Get the fig IDs from the parameters.
150 :     my $ids = ServerThing::GetIdList(-ids => $args);
151 :     foreach my $fid (@$ids) {
152 :     my @resultRows = $sapling->GetAll("Feature IsContainedIn MachineRole IsRoleFor MolecularMachine Implements Variant IsDescribedBy Subsystem AND MolecularMachine IsMachineOf MachineRole2 Contains Feature2 AND MachineRole2 HasRole Role",
153 :     'Feature(id) = ? ',
154 :     [$fid], [qw(Subsystem(id)
155 :     Variant(id)
156 :     Feature(id)
157 :     Feature2(id)
158 :     Feature2(function)
159 :     Role(id))]);
160 :     push (@$retVal, \@resultRows);
161 :     }
162 :     # Return the result.
163 :     return $retVal;
164 :     }
165 :    
166 :     =head3 all_subsystems
167 :    
168 :     my $document = $ssObject->all_subsystems($args);
169 :    
170 :     Return a list of all subsystems in the system. For each subsystem, this
171 :     method will return the ID, curator, and roles.
172 :    
173 :     =over 4
174 :    
175 :     =item args
176 :    
177 :     This function has no parameters.
178 :    
179 :     =item RETURN
180 :    
181 :     Returns a list of 3-tuples. Each 3-tuple will consist of a subsystem ID, a curator
182 :     name, and a role ID.
183 :    
184 :     =back
185 :    
186 :     =cut
187 :    
188 :     sub all_subsystems {
189 :     # Get the parameters.
190 :     my ($self, $args) = @_;
191 :     # Get the spaling database.
192 :     my $sapling = $self->{db};
193 :     # Read the subsystem data from the database.
194 :     my @retVal = $sapling->GetAll("Subsystem Includes Role",
195 :     'ORDER BY Subsystem(id)', [],
196 :     [qw(Subsystem(id) Subsystem(curator)
197 :     Role(id))]);
198 :     # Return the result.
199 :     return \@retVal;
200 :     }
201 :    
202 :     =head3 subsystem_spreadsheet
203 :    
204 :     my $document = $ssObject->subsystem_spreadsheet($args);
205 :    
206 :     This method takes a list of subsystem IDs, and for each one returns a
207 :     list of the features in the subsystem. For each feature, it will include
208 :     the feature's functional assignment, the subsystem name and variant
209 :     (spreadsheet row), and its role (spreadsheet column).
210 :    
211 :     =over 4
212 :    
213 :     =item args
214 :    
215 :     Reference to a hash that either maps C<-ids> to a list of subsystem IDs, or a
216 :     reference to a list of subsystem IDs.
217 :    
218 :     =item RETURN
219 :    
220 :     Returns a list of 5-tuples. Each tuple contains a subsystem ID, a variant ID, a
221 :     feature ID, the feature's functional assignment, and the feature's role in the
222 :     subsystem.
223 :    
224 :     =back
225 :    
226 :     =cut
227 :    
228 :     sub subsystem_spreadsheet {
229 :     # Get the parameters.
230 :     my ($self, $args) = @_;
231 :     # Get the sapling database.
232 :     my $sapling = $self->{db};
233 :     # Declare the return variable.
234 :     my $retVal;
235 :     # Convert a list to a hash.
236 :     if (ref $args ne 'HASH') {
237 :     $args = { -ids => $args };
238 :     }
239 :     # Get the list of subsystem IDs.
240 :     my $ids = ServerThing::GetIdList(-ids => $args);
241 :     # Loop through the subsystem IDs.
242 :     foreach my $subsysName (@$ids) {
243 :     # Normalize the subsystem ID.
244 :     my $subsysID = $sapling->SubsystemID($subsysName);
245 :     # Get the subsystem's spreadsheet data.
246 :     my @resultRows = $sapling->GetAll("Subsystem Describes Variant IsImplementedBy MolecularMachine IsMachineOf MachineRole Contains Feature AND MachineRole HasRole Role",
247 :     'Subsystem(id) = ? ORDER BY Feature(id)',
248 :     [$subsysID], [qw(Subsystem(id)
249 :     Variant(id)
250 :     Feature(id)
251 :     Feature(function)
252 :     Role(id))]);
253 :     push (@$retVal, \@resultRows);
254 :     }
255 :     # Return the result.
256 :     return $retVal;
257 :     }
258 :    
259 :     =head3 pegs_in_subsystem
260 :    
261 :     my $document = $ssObject->pegs_in_subsystem($args);
262 :    
263 :     This method takes a list of genomes and a list of subsystems and returns
264 :     a list of the roles represented in each genome/subsystem pair.
265 :    
266 :     =over 4
267 :    
268 :     =item args
269 :    
270 :     Either (1) a reference to a hash with the keys C<-genomes> and C<-subsystems>,
271 :     where C<-genomes> maps to a list of genome IDs and C<-subsystems> maps to a
272 :     list of subsystem IDs, or (2) a reference to a list of lists, where the first
273 :     is a list of genome IDs and the second is a list of subsystem IDs.
274 :    
275 :     =item RETURN
276 :    
277 :     Returns a list of 2-tuples. Each tuple consists of a subsystem ID and a second
278 :     2-tuple that contains a role ID and a reference to a list of the feature IDs for
279 :     that role.
280 :    
281 :     =back
282 :    
283 :     =cut
284 :    
285 :     sub pegs_in_subsystem {
286 :     # Get the parameters.
287 :     my ($self, $args) = @_;
288 :     # Get the sapling database.
289 :     my $sapling = $self->{db};
290 :     # Get the sapling subsystem object.
291 :     require SaplingSubsys;
292 :     # Declare the return variable.
293 :     my $retVal;
294 :     # Convert a list to a hash.
295 :     if (ref $args ne 'HASH') {
296 :     $args = { -genomes => $args->[0], -subsystems => $args->[1] };
297 :     }
298 :     # Get the list of genome IDs.
299 :     my $genomes = ServerThing::GetIdList(-genomes => $args);
300 :     # Get the list of subsystem IDs.
301 :     my $subs = ServerThing::GetIdList(-subsystems => $args);
302 :     # Loop through the subsystems.
303 :     foreach my $sub (@{$subs}) {
304 :     # Normalize the subsystem ID.
305 :     my $subID = $sapling->SubsystemID($sub);
306 :     # Get the subsystem spreadsheet in memory.
307 :     my $ss = SaplingSubsys->new($subID, $sapling);
308 :     # Loop through the genomes.
309 :     foreach my $g (@{$genomes}) {
310 :     my @roles = $ss->get_roles_for_genome($g, 1);
311 :     foreach my $role (@roles) {
312 :     push (@$retVal, [$sub, $role]);
313 :     }
314 :     }
315 :     }
316 :     # Return the result.
317 :     return $retVal;
318 :     }
319 :    
320 :     =head3 pegs_implementing_roles
321 :    
322 :     my $document = $ssObject->pegs_implementing_roles($args);
323 :    
324 :     Given a subsystem and a list of roles, return a list of the subsystem's
325 :     features for each role.
326 :    
327 :     =over 4
328 :    
329 :     =item args
330 :    
331 :     Reference to either (1) a hash that maps C<-subsystem> to a subsystem ID and
332 :     C<-roles> to a list of roles or (2) a 2-tuple containing a subsystem ID followed
333 :     by a reference to a list of roles in that subsystem.
334 :    
335 :     =item RETURN
336 :    
337 :     Returns a list of 2-tuples. Each tuple consists of a role and a reference to a
338 :     list of the features in that role.
339 :    
340 :     =back
341 :    
342 :     =cut
343 :    
344 :     sub pegs_implementing_roles {
345 :     # Get the parameters.
346 :     my ($self, $args) = @_;
347 :     # Get the sapling database.
348 :     my $sapling = $self->{db};
349 :     # Get the sapling subsystem object.
350 :     require SaplingSubsys;
351 :     # Declare the return variable.
352 :     my $retVal;
353 :     # Convert a list to a hash.
354 :     if (ref $args ne 'HASH') {
355 :     $args = { -subsystem => $args->[0], -roles => $args->[1] };
356 :     }
357 :     # Get the subsystem ID.
358 :     my $subsystem = $args->{-subsystem};
359 :     # If there is no subsystem ID, it's an error.
360 :     if (! defined $subsystem) {
361 :     Confess("Subsystem ID not specified.");
362 :     } else {
363 :     # Normalize the subsystem ID.
364 :     my $subsystemID = $sapling->SubsystemID($subsystem);
365 :     # Get the list of roles.
366 :     my $roles = ServerThing::GetIdList(-roles => $args);
367 :     my $ss = SaplingSubsys->new($subsystemID, $sapling);
368 :     foreach my $role (@$roles) {
369 :     my @pegs = $ss->pegs_for_role($role);
370 :     push (@$retVal, [$role, \@pegs]);
371 :     }
372 :     }
373 :     # Return the result.
374 :     return $retVal;
375 :     }
376 :    
377 :    
378 :     =head3 metabolic_reconstruction
379 :    
380 :     my $document = $ssObject->metabolic_reconstruction($args);
381 :    
382 : parrello 1.2 This method will find for each subsystem, the subsystem variant that contains a
383 :     maximal subset of the roles in an incoming list, and output the ID of the
384 :     variant and a list of the roles in it.
385 : parrello 1.1
386 :     =over 4
387 :    
388 :     =item args
389 :    
390 :     Reference to (1) a list of role descriptors or (2) a hash mapping the key C<-roles>
391 :     to a list of role descriptors. A role descriptor is a 2-tuple consisting of the
392 :     role ID followed by an arbitrary ID of the caller's choosing.
393 :    
394 :     =item RETURN
395 :    
396 :     Returns a list of tuples, each containing a variant ID, a role ID, and optionally a
397 :     caller-provided ID for the role.
398 :    
399 :     =back
400 :    
401 :     =cut
402 :    
403 :     sub metabolic_reconstruction {
404 :     # Get the parameters.
405 :     my ($self, $args) = @_;
406 :     # Get the sapling database.
407 :     my $sapling = $self->{db};
408 :     # Declare the return variable.
409 :     my $retVal = [];
410 :     # Convert a list to a hash.
411 :     if (ref $args ne 'HASH') {
412 :     $args = { -roles => $args };
413 :     }
414 : parrello 1.2 # This counter will be used to generate user IDs for roles without them.
415 :     my $next = 1000;
416 : parrello 1.1 # Get the list of roles.
417 :     my $id_roles = ServerThing::GetIdList(-roles => $args);
418 : overbeek 1.3 my @id_roles1 = map { (ref $_ ? $_ : [$_, "FR" . ++$next]) } @$id_roles;
419 :    
420 :     my @id_roles = ();
421 :     foreach my $tuple (@id_roles1)
422 :     {
423 :     my($function,$id) = @$tuple;
424 :     foreach my $role (split(/(; )|( [\]\@] )/,$function))
425 :     {
426 :     push(@id_roles,[$role,$id]);
427 :     }
428 :     }
429 :    
430 : parrello 1.1 my %big;
431 :     my $id_display = 1;
432 :     map {push(@{$big{$_->[0]}}, $_->[1])} @id_roles;
433 :     my @resultRows = $sapling->GetAll("Subsystem Includes Role",
434 :     'ORDER BY Subsystem(id), Includes(sequence)', [],
435 :     [qw(Subsystem(id) Role(id) Includes(abbreviation))]);
436 :     my %ss_roles;
437 :     foreach my $row (@resultRows) {
438 :     my ($sub, $role, $abbr) = @$row;
439 :     $ss_roles{$sub}->{$role} = $abbr;
440 :     }
441 :     foreach my $sub (keys %ss_roles) {
442 :     my $roles = $ss_roles{$sub};
443 :    
444 :     my @abbr = map{$roles->{$_}} grep { $big{$_}} keys %$roles;
445 :     my $set = join(" ", @abbr);
446 :     if (@abbr > 0) {
447 :     my ($variant, $size) = $self->get_max_subset($sub, $set);
448 :     if ($variant) {
449 :     foreach my $role (keys %$roles) {
450 :     if ($id_display) {
451 :     foreach my $id (@{$big{$role}}) {
452 :     push (@$retVal, [$variant, $role, $id]);
453 :     }
454 :     } else {
455 :     push (@$retVal, [$variant, $role]);
456 :     }
457 :     }
458 :     }
459 :     }
460 :     }
461 :     # Return the result.
462 :     return $retVal;
463 :     }
464 :    
465 :     =head2 Internal Utility Methods
466 :    
467 :     =head3 get_max_subset
468 :    
469 :     my ($max_variant, $max_size) = $ssObject->get_max_subset($sub, $setA);
470 :    
471 :     Given a subsystem ID and a role rule, return the ID of the variant for
472 :     the subsystem that matches the most roles in the rule and the number of
473 :     roles matched.
474 :    
475 :     =over 4
476 :    
477 :     =item sub
478 :    
479 :     Name (ID) of the subsystem whose variants are to be examined.
480 :    
481 :     =item setA
482 :    
483 :     A space-delimited list of role abbreviations, lexically ordered. This provides
484 :     a unique specification of the roles in the set.
485 :    
486 :     =item RETURN
487 :    
488 :     Returns a 2-element list consisting of the ID of the variant found and the number
489 :     of roles matched.
490 :    
491 :     =back
492 :    
493 :     =cut
494 :    
495 :     sub get_max_subset {
496 :     my ($self, $sub, $setA) = @_;
497 :     my $sapling = $self->{db};
498 :     my $max_size = 0;
499 :     my $max_set;
500 :     my $max_variant;
501 :     my %set_hash;
502 :     my $qh = $sapling->Get("Subsystem Describes Variant", 'Subsystem(id) = ? AND Variant(type) = ?', [$sub, 'normal']);
503 :     while (my $resultRow = $qh->Fetch()) {
504 :     my @variantRoleRule = $resultRow->Value('Variant(role-rule)');
505 :     my ($variantCode) = $resultRow->Value('Variant(code)');
506 :     my $variantId = $sub.":".$variantCode;
507 :     foreach my $setB (@variantRoleRule) {
508 :     my $size = is_A_a_superset_of_B($setA, $setB);
509 :     if ($size && $size > $max_size) {
510 :     $max_size = $size;
511 :     $max_set = $setB;
512 :     $max_variant = $variantId;
513 :     }
514 :     }
515 :     }
516 :     #if ($max_size) {
517 :     #print STDERR "Success $max_variant, $max_set\n";
518 :     #}
519 :     return($max_variant, $max_size);
520 :     }
521 :    
522 :    
523 :     =head3 is_A_a_superset_of_B
524 :    
525 :     my $size = SS::is_A_a_superset_of_B($a, $b);
526 :    
527 :     This method takes as input two role rules, and returns 0 if the first
528 :     role rule is NOT a superset of the second; otherwise, it returns the size
529 :     of the second rule. A role rule is a space-delimited list of role
530 :     abbreviations in lexical order. This provides a unique identifier for a
531 :     set of roles in a subsystem.
532 :    
533 :     =over 4
534 :    
535 :     =item a
536 :    
537 :     First role rule.
538 :    
539 :     =item b
540 :    
541 :     Second role rule.
542 :    
543 :     =item RETURN
544 :    
545 :     Returns 0 if the first rule is NOT a superset of the second and the size of the
546 :     second rule if it is. As a result, if the first rule IS a superset, this method
547 :     will evaluate to TRUE, and to FALSE otherwise.
548 :    
549 :     =back
550 :    
551 :     =cut
552 :    
553 :     sub is_A_a_superset_of_B {
554 :     my ($a, $b) = @_;
555 :     my @a = split(" ", $a);
556 :     my @b = split(" ", $b);
557 :     if (@b > @a) {
558 :     return(0);
559 :     }
560 :     my %given;
561 :     map { $given{$_} = 1} @a;
562 :     map { if (! $given{$_}) {return 0}} split(" ", $b);
563 :     my $l = scalar(@b);
564 :     return scalar(@b);
565 :     }
566 :    
567 :    
568 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3