[Bio] / FigKernelPackages / SFXlate.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/SFXlate.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.119 - (view) (download) (as text)

1 : olson 1.53 #
2 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 : parrello 1.59 #
7 : olson 1.53 # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 : parrello 1.59 # Public License.
10 : olson 1.53 #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 : parrello 1.1 package SFXlate;
19 :    
20 :     use strict;
21 :    
22 : overbeek 1.33 use Carp;
23 : olson 1.41
24 :     #
25 :     # Conditonally import sprout stuff.
26 :     #
27 :     BEGIN {
28 :     eval {
29 : parrello 1.47
30 : parrello 1.43 require Sprout;
31 :     import Sprout;
32 :     require SproutSubsys;
33 :     import SproutSubsys;
34 : parrello 1.87 require CustomAttributes;
35 :     import CustomAttributes;
36 : parrello 1.93 require RemoteCustomAttributes;
37 :     import RemoteCustomAttributes;
38 : olson 1.41 };
39 :     }
40 :    
41 : olson 1.2 use Data::Dumper;
42 : parrello 1.1 use FIG_Config;
43 :     use SeedDas;
44 : parrello 1.6 use Tracer;
45 : parrello 1.24 use FIG;
46 : parrello 1.80 use FIGRules;
47 : parrello 1.90 use BasicLocation;
48 : parrello 1.103 use FullLocation;
49 : parrello 1.1
50 : parrello 1.10 =head1 Sprout/FIG API Shim
51 :    
52 :     =head2 Introduction
53 :    
54 :     This object accepts calls using the standard FIG method signatures and
55 :     translates them into Sprout calls. The end result is that an SFXlate
56 : parrello 1.24 object can be used in place of a standard FIG object in legacy code.
57 : parrello 1.10 provided that no unsupported functions are used.
58 :    
59 :     =cut
60 :    
61 : parrello 1.20 #: Constructor SFXlate->new();
62 : parrello 1.10
63 : parrello 1.24 =head2 Constructors
64 : parrello 1.10
65 :     =head3 new
66 :    
67 : parrello 1.98 my $sfxlate = SFXlate->new($fig, $sproutDB, $sproutData);
68 : parrello 1.10
69 : parrello 1.13 This is the constructor for a full-service SFXlate object.
70 : parrello 1.10
71 :     =over 4
72 :    
73 :     =item fig
74 :    
75 : parrello 1.13 This is a legacy parameter that is effectively ignored.
76 : parrello 1.10
77 :     =item sproutDB
78 :    
79 :     Name of the sprout database. If undefined, the B<sproutDB> value from
80 :     the FIG configuration module will be used.
81 :    
82 :     =item sproutData
83 :    
84 :     Name of the directory containing the sprout data files. If undefined, the
85 :     B<sproutData> value from the FIG configuration module will be used.
86 :    
87 : parrello 1.114 =item sproutDBD
88 :    
89 :     Name of the DBD file defining the sprout database.
90 :    
91 : parrello 1.10 =back
92 :    
93 :     =cut
94 : parrello 1.20
95 : parrello 1.10 sub new {
96 : parrello 1.114 my($class, $fig, $sproutDB, $sproutData, $sproutDBD) = @_;
97 :     my $sprout = SFXlate->new_sprout_only($sproutDB, $sproutData, $sproutDBD);
98 : parrello 1.10 my $self = {
99 : overbeek 1.36 sprout => $sprout,
100 : parrello 1.92 fig => $fig,
101 :     ca => undef,
102 : parrello 1.10 };
103 :     return bless $self, $class;
104 :     }
105 :    
106 :     =head3 new_sprout_only
107 :    
108 : parrello 1.98 my $sprout = SFXlate->new_sprout_only($sproutDB, $sproutData, $xmlFile, $noOpen);
109 : parrello 1.1
110 : parrello 1.10 This is a special constructor that returns a pure Sprout object using
111 : parrello 1.61 the FIG configuration defaults. Note that the Sprout database has a completely separate
112 :     set of configuration parameters from the SEED database. Thus, C<$FIG_Config::sproutUser>
113 :     is the user name for Sprout while C<$FIG_Config::user> is the user name for SEED.
114 : parrello 1.10
115 :     =over 4
116 :    
117 :     =item sproutDB
118 :    
119 :     Name of the sprout database. If undefined, the B<sproutDB> value from
120 :     the FIG configuration module will be used.
121 :    
122 :     =item sproutData
123 :    
124 :     Name of the directory containing the sprout data files. If undefined, the
125 :     B<sproutData> value from the FIG configuration module will be used.
126 :    
127 : parrello 1.22 =item xmlFile
128 :    
129 :     Name of the XML file containing the database definition. If undefined,
130 :     the file C<SproutDBD.xml> will be used, either from the main FIG
131 :     directory or the Sprout data directory.
132 :    
133 : parrello 1.42 =item noOpen
134 :    
135 :     If TRUE, the database will not be opened. The default is FALSE.
136 :    
137 : parrello 1.10 =back
138 :    
139 :     =cut
140 :    
141 :     sub new_sprout_only {
142 : parrello 1.42 my($class, $sproutDB, $sproutData, $xmlFile, $noOpen) = @_;
143 : parrello 1.10 $sproutDB = $FIG_Config::sproutDB if !defined $sproutDB;
144 :     $sproutData = $FIG_Config::sproutData if !defined $sproutData;
145 : parrello 1.6 Trace("Using sproutDB=$sproutDB sproutData=$sproutData") if T(1);
146 : parrello 1.42 my $openFlag = ($noOpen ? 1 : 0);
147 : parrello 1.22 if (! $xmlFile) {
148 : parrello 1.77 # Compute the DBD directory.
149 :     my $dbd_dir = (defined($FIG_Config::dbd_dir) ? $FIG_Config::dbd_dir :
150 :     $FIG_Config::fig );
151 :     $xmlFile = "$dbd_dir/SproutDBD.xml";
152 : parrello 1.22 }
153 : parrello 1.118 my $sprout = new Sprout(dbName => $sproutDB, dbd => $xmlFile,
154 :     options => { port => $FIG_Config::sproutPort,
155 :     dbType => $FIG_Config::sproutDbms,
156 :     dataDir => $sproutData,
157 :     userData => "$FIG_Config::sproutUser/$FIG_Config::sproutPass",
158 :     noDBOpen => $openFlag,
159 :     sock => $FIG_Config::sproutSock,
160 :     host => ($FIG_Config::sprout_host ||
161 :     $FIG_Config::dbhost),
162 : parrello 1.1 });
163 : parrello 1.10 return $sprout;
164 :     }
165 :    
166 : parrello 1.90 =head3 old_sprout_only
167 :    
168 : parrello 1.98 my $sprout = SFXlate->old_sprout_only();
169 : parrello 1.90
170 :     Open a Sprout object for the old NMPDR database. This simply calls L</new_sprout_only>
171 :     with the correct parameters required to access the old database.
172 :    
173 :     =cut
174 :    
175 :     sub old_sprout_only {
176 :     # Get the parameters.
177 :     my ($class) = @_;
178 :     # Create and return the sprout object.
179 :     return new_sprout_only($class, $FIG_Config::oldSproutDB, undef, $FIG_Config::oldSproutDBD);
180 :     }
181 :    
182 : parrello 1.103 =head3 all_usable_subsystems
183 :    
184 :     my @subs = $fig->all_usable_subsystems();
185 :    
186 :     Return a list of the available subsystems. The FIG version of this method
187 :     skips subsystems that are experimental, deleted, or cluster-based;
188 :     however, these subsystems are filtered out before the data is loaded into
189 :     Sprout, so we simply return the complete list.
190 :    
191 :     =cut
192 :    
193 :     sub all_usable_subsystems {
194 :     # Get the parameters.
195 :     my ($self) = @_;
196 :     # Declare the return variable.
197 :     my @retVal = $self->{sprout}->GetFlat(['Subsystem'], "", [], 'Subsystem(id)');
198 :     # Return the result.
199 :     return @retVal;
200 :     }
201 :    
202 : parrello 1.10 =head3 new_sprout
203 :    
204 : parrello 1.98 my $sfxlate = SFXlate->new_sprout($fig, $sprout);
205 : parrello 1.10
206 :     This constructor creates an SFXlate object from pre-built FIG and
207 :     Sprout objects. Use this constructor if you have a pre-built Sprout
208 :     object and wish to pass it in to a module that is expecting a FIG
209 :     object.
210 :    
211 :     =over 4
212 :    
213 :     =item fig
214 : parrello 1.1
215 : parrello 1.13 This is a legacy parameter that is effectively ignored.
216 : parrello 1.10
217 :     =item sprout
218 :    
219 :     Sprout object to be used for Sprout calls.
220 :    
221 :     =back
222 :    
223 :     =cut
224 :    
225 :     sub new_sprout {
226 :     my($class, $fig, $sprout) = @_;
227 : parrello 1.1 my $self = {
228 : parrello 1.92 sprout => $sprout,
229 :     ca => undef,
230 : parrello 1.1 };
231 :    
232 :     return bless $self, $class;
233 :     }
234 :    
235 : parrello 1.23 =head3 get_db
236 :    
237 : parrello 1.98 my $dbObject = SFXLate::get_db($modName);
238 : parrello 1.23
239 :     This method returns an ERDB object for the specified database. The database name
240 : parrello 1.87 should be C<Sprout> for the Sprout database and C<CustomAttribute> for the custom
241 :     attributes database. This method needs to be made more robust; right now it's the
242 : parrello 1.23 minimum needed for creating database test pages.
243 :    
244 :     =over 4
245 :    
246 :     =item modName
247 :    
248 :     Name of the database to be loaded into the ERDB object.
249 :    
250 :     =item RETURN
251 :    
252 :     Returns an ERDB object for the default database with the specified name.
253 :    
254 :     =back
255 :    
256 :     =cut
257 :    
258 :     sub get_db {
259 :     # Get the parameters.
260 :     my ($modName) = @_;
261 :     # Declare the return variable.
262 :     my $retVal;
263 :     # Process according to the module name passed in.
264 :     if ($modName eq "Sprout") {
265 :     Trace("Selecting Sprout module.") if T(ERDB => 2);
266 :     # Create a sprout object for the default database.
267 : parrello 1.63 $retVal = SFXlate->new_sprout_only();
268 : parrello 1.87 } elsif ($modName eq "CustomAttributes") {
269 :     Trace("Selecting Custom Attributes.") if T(ERDB => 2);
270 :     $retVal = CustomAttributes->new();
271 : parrello 1.23 } else {
272 :     Confess("Invalid module name \"$modName\" specified in get_db.");
273 :     }
274 :     return $retVal;
275 :     }
276 :    
277 : parrello 1.24 =head2 Public Methods
278 :    
279 : parrello 1.118 =head3 minmax
280 :    
281 :     my ($lo, $hi) = SFXlate::minmax($a, $b);
282 :    
283 :     Return a list containing the incoming pair of arguments sorted
284 :     numerically.
285 :    
286 :     =over 4
287 :    
288 :     =item a
289 :    
290 :     First parameter.
291 :    
292 :     =item b
293 :    
294 :     Second parameter.
295 :    
296 :     =item RETURN
297 :    
298 :     Returns the incoming pair of arguments in numeric order.
299 :    
300 :     =back
301 :    
302 :     =cut
303 :    
304 :     sub minmax {
305 :     # Get the parameters.
306 :     my ($a, $b) = @_;
307 :     # Declare the return variables.
308 :     my ($lo, $hi) = ($a <= $b ? ($a, $b) : ($b, $a));
309 :     # Return the results.
310 :     return ($lo, $hi);
311 :     }
312 :    
313 :     =head3 location_overlap
314 :    
315 :     my $overlap = $fig->location_overlap($loc1, $loc2);
316 :    
317 :     or
318 :    
319 :     my $overlap = SFXlate::location_overlap($loc1, $loc2);
320 :    
321 :     Return the number of overlapping nucleotides between 2 locations.
322 :    
323 :     =over 4
324 :    
325 :     =item loc1
326 :    
327 :     A location string for the first location.
328 :    
329 :     =item loc2
330 :    
331 :     A location string for the second location.
332 :    
333 :     =item RETURN
334 :    
335 :     Returns the number of overlapping base pairs, or zero if there is no
336 :     overlap.
337 :    
338 :     =back
339 :    
340 :     =cut
341 :    
342 :     sub location_overlap {
343 :     # Get the parameters.
344 :     shift if UNIVERSAL::isa($_[0], __PACKAGE__);
345 :     my ($loc1, $loc2) = @_;
346 :     # Convert the locations to location objects.
347 :     my $loc1Object = BasicLocation->new($loc1);
348 :     my $loc2Object = BasicLocation->new($loc2);
349 :     # Compute the overlap.
350 :     my $retVal = $loc1Object->Overlap($loc2Object->Left, $loc2Object->Right);
351 :     # Return the result.
352 :     return $retVal;
353 :     }
354 :    
355 :    
356 : parrello 1.24 =head3 to_alias
357 :    
358 : parrello 1.98 my @aliases = $sfxlate->to_alias($featureID, $type);
359 : parrello 1.24
360 :     or
361 :    
362 : parrello 1.98 my $alias = $sfxlate->to_alias($featureID, %type);
363 : parrello 1.24
364 :     Return all of a feature's aliases of the specified type. This is a fairly simplistic
365 :     method: the alias's type is the string preceding the vertical bar (e.g. C<uni|12345> is
366 :     a UniProt alias).
367 :    
368 :     =over 4
369 :    
370 :     =item featureID
371 :    
372 :     ID of the feature whose aliases are desired.
373 :    
374 :     =item type
375 :    
376 :     Type of aliases that are desired.
377 :    
378 :     =item RETURN
379 :    
380 :     In a list context, returns a list of the aliases of the specified type for the specified
381 :     feature ID. In a scalar context, returns the first alias of the specified type.
382 :    
383 :     =back
384 :    
385 :     =cut
386 :     #: Return Type @;
387 :     sub to_alias {
388 :     # Get the parameters.
389 :     my ($self, $featureID, $type) = @_;
390 :     # Get the desired aliases.
391 :     my @retVal = grep { $_ =~ /^$type\|/ } $self->{sprout}->FeatureAliases($featureID);
392 :     # Return the result.
393 :     return @retVal;
394 :     }
395 :    
396 : parrello 1.98 =head3 genome_list
397 :    
398 :     my $genomeData = $fig->genome_list();
399 :    
400 :     Return a reference to a list of all the genomes. For each genome there will be a
401 :     list reference that contains the genome ID, the genome name, and the genome
402 :     domain.
403 :    
404 :     =cut
405 :    
406 :     sub genome_list {
407 :     # Get the parameters.
408 :     my ($self) = @_;
409 :     # Declare the return variable.
410 :     my @retVal = ();
411 :     # Get the genome data.
412 :     my @genomeData = $self->{sprout}->GetAll([qw(Genome)], "", [],
413 :     ['Genome(id)',
414 :     'Genome(genus)',
415 :     'Genome(species)',
416 :     'Genome(unique-characterization)',
417 :     'Genome(taxonomy)']);
418 :     # Reformat it. We combine the genus, species, and unique-characterization to form the name,
419 :     # and strip off the first taxonomy word to get the domain.
420 :     for my $genomeDatum (@genomeData) {
421 :     my ($id, $genus, $species, $strain, $taxonomy) = @{$genomeDatum};
422 :     my ($domain) = split(/\s*;\s+/, $taxonomy, 2);
423 :     push @retVal, [$id, "$genus $species $strain", $domain];
424 :     }
425 :     # Return the result.
426 :     return \@retVal;
427 :     }
428 :    
429 : parrello 1.43 =head3 all_compounds
430 :    
431 : parrello 1.98 my @compounds = $sfx->all_compounds();
432 : parrello 1.43
433 :     Return a list containing all of the KEGG compounds.
434 :    
435 :     =cut
436 :    
437 :     sub all_compounds {
438 :     # Get the parameters.
439 :     my ($self) = @_;
440 :     # Get all the compound IDs.
441 : parrello 1.46 my @retVal = $self->{sprout}->GetFlat(['Compound'], "", [], 'Compound(id)');
442 : parrello 1.43 # Return them to the caller.
443 :     return @retVal;
444 :     }
445 :    
446 :     =head3 names_of_compound
447 :    
448 : parrello 1.98 my @names = $sfx->names_of_compound($cid);
449 : parrello 1.43
450 :     Returns a list containing all of the names assigned to the specified KEGG compound. The list
451 : parrello 1.51 will be ordered with the primary name first.
452 : parrello 1.43
453 :     =over 4
454 :    
455 :     =item cid
456 :    
457 :     ID of the desired compound.
458 :    
459 :     =item RETURN
460 :    
461 :     Returns a list of names for the specified compound.
462 :    
463 :     =back
464 :    
465 :     =cut
466 :    
467 :     sub names_of_compound {
468 :     # Get the parameters.
469 :     my($self, $cid) = @_;
470 : parrello 1.114 # Get the specified compound's list of names.
471 :     my @names = $self->{sprout}->GetFlat('HasCompoundName',
472 :     "HasCompoundName(from-link) = ?",
473 :     [$cid], 'to-link');
474 :     # Put the main label at the front.
475 :     my ($label) = $self->{sprout}->GetEntityValues(Compound => $cid, ['label']);
476 : parrello 1.51 my @retVal = grep { $_ ne $label } @names;
477 :     unshift @retVal, $label;
478 :     # Return the result to the caller.
479 : parrello 1.50 return @retVal;
480 : parrello 1.43 }
481 :    
482 :     =head3 comp2react
483 :    
484 : parrello 1.98 my @rids = $sfx->comp2react($cid);
485 : parrello 1.43
486 :     Return a list containing all of the reaction IDs for reactions that take the
487 :     specified compound as either a substrate or a product.
488 :    
489 :     =cut
490 :    
491 :     sub comp2react {
492 :     # Get the parameters.
493 :     my ($self, $cid) = @_;
494 :     # Get a list of the reactions connected to this compound.
495 :     my @retVal = $self->{sprout}->GetFlat(['IsAComponentOf'],
496 :     'IsAComponentOf(from-link) = ?',
497 :     [$cid], 'IsAComponentOf(to-link)');
498 :     return @retVal;
499 :     }
500 :    
501 :     =head3 valid_reaction_id
502 :    
503 : parrello 1.98 my $flag = $sfx->valid_reaction_id($rid);
504 : parrello 1.43
505 :     Returns true iff the specified ID is a valid reaction ID.
506 :    
507 :     This will become important as we include non-KEGG reactions
508 :    
509 :     =over 4
510 :    
511 :     =item rid
512 :    
513 :     Reaction ID to test.
514 :    
515 :     =item RETURN
516 :    
517 :     Returns TRUE if the reaction ID is in the data store, else FALSE.
518 :    
519 :     =back
520 :    
521 :     =cut
522 :    
523 :     sub valid_reaction_id {
524 :     # Get the parameters.
525 :     my ($self, $rid) = @_;
526 :     # Check to see if a reaction with the specified ID exists.
527 :     my $retVal = $self->{sprout}->Exists('Reaction', $rid);
528 :     return $retVal;
529 :     }
530 :    
531 :     =head3 cas
532 :    
533 : parrello 1.98 my $cas = $sfx->cas($cid);
534 : parrello 1.43
535 :     Return the Chemical Abstract Service (CAS) ID for the compound, if known.
536 :    
537 :     =over 4
538 :    
539 :     =item cid
540 :    
541 :     ID of the compound whose CAS ID is desired.
542 :    
543 :     =item RETURN
544 :    
545 :     Returns the CAS ID of the specified compound, or an empty string if the CAS ID
546 :     is not known or does not exist.
547 :    
548 :     =back
549 :    
550 :     =cut
551 :    
552 :     sub cas {
553 :     # Get the parameters.
554 :     my ($self, $cid) = @_;
555 :     # Ask for the CAS ID.
556 :     my ($retVal) = $self->{sprout}->GetFlat(['Compound'], 'Compound(id) = ?',
557 :     [$cid], 'Compound(cas-id)');
558 :     # If we didn't find a CAS ID, return an empty string.
559 :     if (! $retVal) {
560 :     $retVal = "";
561 :     }
562 :     return $retVal;
563 :     }
564 :    
565 :     =head3 cas_to_cid
566 :    
567 : parrello 1.98 my $cid = $sfx->cas_to_cid($cas);
568 : parrello 1.43
569 :     Return the compound id (cid), given the Chemical Abstract Service (CAS) ID.
570 :    
571 :     =over 4
572 :    
573 :     =item cas
574 :    
575 :     CAS ID of the desired compound.
576 :    
577 :     =item RETURN
578 :    
579 :     Returns the ID of the compound corresponding to the specified CAS ID, or an empty
580 :     string if the CAS ID is not in the data store.
581 :    
582 :     =back
583 :    
584 :     =cut
585 :    
586 :     sub cas_to_cid {
587 :     # Get the parameters.
588 :     my ($self, $cas) = @_;
589 :     # Look for the compound.
590 : parrello 1.46 my @retVal = $self->{sprout}->GetFlat(['Compound'], "Compound(cas-id) = ?",
591 : parrello 1.43 [$cas], 'Compound(id)');
592 : parrello 1.46 # Return an empty string if the compound was not found or there were too
593 :     # many.
594 :     my $retVal = (@retVal != 1 ? "" : $retVal[0]);
595 : parrello 1.43 return $retVal;
596 :     }
597 :    
598 :     =head3 all_reactions
599 :    
600 : parrello 1.98 my @rids = $sfx->all_reactions();
601 : parrello 1.43
602 :     Return a list containing all of the KEGG reaction IDs.
603 :    
604 :     =cut
605 :    
606 :     sub all_reactions {
607 :     # Get the parameters.
608 :     my ($self) = @_;
609 :     # Get a list of reaction IDs.
610 :     my @retVal = $self->{sprout}->GetFlat(['Reaction'], "", [], 'Reaction(id)');
611 :     return @retVal;
612 :     }
613 :    
614 : parrello 1.57 =head3 delete_genomes
615 :    
616 : parrello 1.98 my $stats = $sfx->delete_genomes(\@genomes);
617 : parrello 1.57
618 :     Delete the specified genomes from the database.
619 :    
620 :     =over 4
621 :    
622 :     =item genomes
623 :    
624 :     Reference to a list of the IDs of the genomes to be deleted.
625 :    
626 :     =item RETURN
627 :    
628 :     Returns a statistics object detailing the number of rows deleted from each table.
629 :    
630 :     =back
631 :    
632 :     =cut
633 :     #: Return Type $%;
634 :     sub delete_genomes {
635 :     # Get the parameters.
636 :     my ($self, $genomes) = @_;
637 :     # Create the statistics object.
638 :     my $retVal = Stats->new('genomeIDs');
639 :     # Loop through the genome IDs, deleting them individually.
640 :     for my $genomeID (@{$genomes}) {
641 :     # Delete the genome.
642 :     my $stats = $self->{sprout}->DeleteGenome($genomeID);
643 :     # Accumulate the statistics.
644 :     $retVal->Accumulate($stats);
645 :     # Denote we've handled another genome.
646 :     $retVal->Add('genomeIDs');
647 :     }
648 :     # Return the result.
649 :     return $retVal;
650 :     }
651 :    
652 : parrello 1.102 =head3 feature_aliases_bulk
653 :    
654 :     my $aliasHash = $fig->feature_aliases_bulk(\@fids);
655 :    
656 :     Return a reference to a hash mapping feature IDs to aliases. The aliases
657 :     are retrieved using a single query, rather than one at a time.
658 :     The FIG version of this method has an additional parameter to suppress
659 :     the check for deleted features, but this is not necessary in the Sprout
660 :     database, because deleted features are truly deleted instead of being
661 :     marked inactive.
662 :    
663 :     =over 4
664 :    
665 :     =item fids
666 :    
667 :     Reference to a list of feature IDs. The aliases returned will all belong to
668 :     the specified features.
669 :    
670 :     =item RETURN
671 :    
672 :     Returns a reference to a hash mapping each feature ID to a list of its aliases.
673 :    
674 :     =back
675 :    
676 :     =cut
677 :    
678 :     sub feature_aliases_bulk {
679 :     # Get the parameters.
680 :     my ($self, $fids) = @_;
681 :     # Create a filter for the IDs in the list.
682 :     my @filterMarks = ();
683 :     my @filterParms = ();
684 :     for my $fid (@{$fids}) {
685 :     push @filterMarks, '?';
686 :     push @filterParms, $fid;
687 :     }
688 :     my $filterString = "IsAliasOf(to-link) IN (" . join(", ", @filterMarks) . ")";
689 :     # Get the aliases.
690 :     my @rows = $self->{sprout}->GetAll(['IsAliasOf'], $filterString, \@filterParms, [qw(IsAliasOf(to-link) IsAliasOf(from-link))]);
691 :     # Form them into a hash.
692 :     my %retVal = ();
693 :     for my $row (@rows) {
694 :     push @{$retVal{$row->[0]}}, $row->[1];
695 :     }
696 :     # Return the result.
697 :     return \%retVal;
698 :     }
699 :    
700 : parrello 1.44 =head3 reversible
701 :    
702 : parrello 1.98 my $flag = $sfx->reversible($rid);
703 : parrello 1.44
704 :     Return TRUE if the specified reaction is reversible. A reversible reaction has no main
705 :     direction. The connector is symbolized by C<< <=> >> instead of C<< => >>.
706 :    
707 :     =over 4
708 :    
709 :     =item rid
710 :    
711 :     ID of the ralevant reaction.
712 :    
713 :     =item RETURN
714 :    
715 :     Returns TRUE if the specified reaction is reversible, else FALSE.
716 :    
717 :     =back
718 :    
719 :     =cut
720 :    
721 :     sub reversible {
722 :     # Get the parameters.
723 :     my ($self, $rid) = @_;
724 :     # Assume a reversible reaction unless we prove otherwise.
725 :     my $retVal = 1;
726 :     # Look for the reaction's reversibility flag.
727 :     my ($reversible) = $self->{sprout}->GetFlat(['Reaction'], "Reaction(id) = ?", [$rid],
728 :     'Reaction(rev)');
729 :     if (defined $reversible) {
730 :     $retVal = $reversible;
731 :     }
732 :     # Return the result.
733 :     return $retVal;
734 :     }
735 :    
736 : parrello 1.98 =head3 contig_lengths
737 :    
738 :     my $contigHash = $fig->contig_lengths($genomeID);
739 :    
740 :     Return a hash reference that maps each contig ID to a length that
741 :     indicates the number of base pairs.
742 :    
743 :     =over 4
744 :    
745 :     =item genomeID
746 :    
747 :     ID of the genome whose contigs are desired.
748 :    
749 :     =item RETURN
750 :    
751 :     Returns a hash that maps the IDs of each of the genome's contigs to their lengths in base pairs.
752 :    
753 :     =back
754 :    
755 :     =cut
756 :    
757 :     sub contig_lengths {
758 :     # Get the parameters.
759 :     my ($self, $genomeID) = @_;
760 :     # Declare the return variable.
761 :     my $retVal = {};
762 :     # Get a list of the contigs for this genome.
763 :     my @contigData = $self->{sprout}->GetAll([qw(HasContig IsMadeUpOf)], "HasContig(from-link) = ?", [$genomeID],
764 :     [qw(HasContig(to-link) IsMadeUpOf(start-position) IsMadeUpOf(len))]);
765 :     # Loop through the contigs.
766 :     for my $contigRow (@contigData) {
767 :     # Break out the data items in the row.
768 :     my ($contigID, $start, $len) = @{$contigRow};
769 :     Trace("Contig $contigID has start $start and length $len.") if T(3);
770 :     # Compute the position after the end of this sequence. If it's the last sequence,
771 :     # that will be the contig length.
772 :     my $end = $start + $len;
773 :     # If this is the best value so far, save it.
774 :     if (! exists $retVal->{$contigID} || $end > $retVal->{$contigID}) {
775 :     $retVal->{$contigID} = $end;
776 :     }
777 :     }
778 :     # Return the result.
779 :     return $retVal;
780 :     }
781 :    
782 :    
783 : parrello 1.44 =head3 reaction2comp
784 :    
785 : parrello 1.98 my @tuples = $fig->reaction2comp($rid, $which);
786 : parrello 1.44
787 :     Return the substrates or products for a reaction. In any event (i.e.,
788 :     whether you ask for substrates or products), you get back a list of
789 :     3-tuples. Each 3-tuple will contain
790 :    
791 :     [$cid,$stoich,$main]
792 :    
793 :     Stoichiometry indicates how many copies of the compound participate in
794 :     the reaction. It is normally numeric, but can be things like "n" or "(n+1)".
795 :     $main is 1 iff the compound is considered "main" or "connectable".
796 :    
797 :     =over 4
798 :    
799 :     =item rid
800 :    
801 :     ID of the raction whose compounds are desired.
802 :    
803 :     =item which
804 :    
805 :     TRUE if the products (right side) should be returned, FALSE if the substrates
806 :     (left side) should be returned.
807 :    
808 :     =item RETURN
809 :    
810 :     Returns a list of 3-tuples. Each tuple contains the ID of a compound, its
811 :     stoichiometry, and a flag that is TRUE if the compound is one of the main
812 :     participants in the reaction.
813 :    
814 :     =back
815 :    
816 :     =cut
817 :    
818 :     sub reaction2comp {
819 :     # Get the parameters.
820 :     my ($self, $rid, $which) = @_;
821 :     # Convert the which flag into a set number.
822 :     my $setN = ($which ? 1 : 0);
823 :     # Get the reaction's compounds from the database. We sort them by component ID for
824 :     # compatability with the SEED method.
825 :     my @retVal = $self->{sprout}->GetAll(['IsAComponentOf'],
826 :     "IsAComponentOf(to-link) = ? AND IsAComponentOf(product) = ? ORDER BY IsAComponentOf(from-link)",
827 :     [$rid, $setN], ['IsAComponentOf(from-link)',
828 : parrello 1.46 'IsAComponentOf(stoichiometry)',
829 : parrello 1.44 'IsAComponentOf(main)']);
830 :     # Return the result.
831 :     return @retVal;
832 :     }
833 :    
834 :     =head3 catalyzed_by
835 :    
836 : parrello 1.98 my @ecs = $sfx->catalyzed_by($rid);
837 : parrello 1.44
838 :     Return the ECs (roles) that are reputed to catalyze the reaction. Note that we are currently
839 :     just returning the ECs that KEGG gives.
840 :    
841 :     =over 4
842 :    
843 :     =item rid
844 :    
845 :     ID of the reaction whose catalyzing roles are desired.
846 :    
847 :     =item RETURN
848 :    
849 :     Returns the EC codes of the roles that catalyze the reaction.
850 :    
851 :     =back
852 :    
853 :     =cut
854 :    
855 :     sub catalyzed_by {
856 :     # Get the parameters.
857 :     my($self, $rid) = @_;
858 :     # Get the roles.
859 :     my @retVal = $self->{sprout}->GetFlat(['Catalyzes', 'Role'], "Catalyzes(to-link) = ?", [$rid],
860 :     'Role(EC)');
861 :     # Return them to the caller.
862 :     return @retVal;
863 :     }
864 :    
865 : parrello 1.46 =head3 catalyzes
866 :    
867 : parrello 1.98 my @ecs = $fig->catalyzes($role);
868 : parrello 1.46
869 :     Returns the reaction IDs of the reactions catalyzed by the specified role (normally an EC).
870 :    
871 :     =over 4
872 :    
873 :     =item role
874 :    
875 :     ID or EC number of the role whose reactions are desired.
876 :    
877 :     =item RETURN
878 :    
879 :     Returns a list containing the IDs of the reactions catalyzed by the role.
880 :    
881 :     =back
882 :    
883 :     =cut
884 :    
885 :     sub catalyzes {
886 :     # Get the parameters.
887 :     my ($self, $role) = @_;
888 :     # Check the type of role specifier.
889 :     my $key = (FIG::is_ec($role) ? "Role(EC)" : "Role(id)");
890 :     # Look for a list of reactions corresponding to the role.
891 :     my @retVal = $self->{sprout}->GetFlat(['Role', 'Catalyzes'], "$key = ?",
892 :     [$role], 'Catalyzes(to-link)');
893 :     return @retVal;
894 :     }
895 :    
896 :     =head3 displayable_reaction
897 :    
898 : parrello 1.98 my $displayString = $fig->displayable_reaction($rid);
899 : parrello 1.46
900 :     Return the displayable version of a reaction. This is built on the fly from
901 :     the B<IsAComponentOf> relationship.
902 :    
903 :     =cut
904 :    
905 :     sub displayable_reaction {
906 :     # Get the parameters.
907 :     my($self, $rid) = @_;
908 :     # Declare the return variable.
909 :     my $retVal = "";
910 :     # Get the reaction's connector type.
911 :     my ($connector) = $self->{sprout}->GetEntityValues('Reaction', $rid, ['Reaction(rev)']);
912 :     if (! defined $connector) {
913 :     # Here the reaction does not exist. We return the ID unmodified.
914 :     $retVal = $rid;
915 :     } else {
916 :     # Determine the connector style. TRUE means the reaction is
917 :     # reversible.
918 :     $connector = ($connector ? "<=>" : "=>");
919 :     # The reaction display consists of the substrate compounds, the
920 :     # connector, and then the product compounds. First, we need the
921 :     # data.
922 :     my @compounds = $self->{sprout}->GetAll(['IsAComponentOf', 'Compound'],
923 :     "IsAComponentOf(to-link) = ? ORDER BY IsAComponentOf(product), IsAComponentOf(loc), IsAComponentOf(main) DESC",
924 :     [$rid], ['IsAComponentOf(product)',
925 :     'IsAComponentOf(stoichiometry)',
926 :     'Compound(label)']);
927 :     # Each compound is prefixed by an operator. The first substrate
928 :     # has nothing in front of it. The first product has a connector
929 :     # in front of it. Everything else is preceded by a plus sign.
930 :     # We use a saved mode indicator to detect the changes, and a
931 :     # simple array to hold the two types of special operators.
932 :     my $thisMode = -1;
933 :     my @op = ("", " $connector ");
934 :     # Loop through the compounds.
935 :     for my $compoundData (@compounds) {
936 :     # Split out the compound data.
937 :     my ($modeFlag, $stoich, $name) = @{$compoundData};
938 :     # Determine the stoichiometry prefix. If it's 1, there is
939 :     # no prefix. Otherwise, we use the stoichiometry value
940 :     # followed by a space. In other words, two DNAs is "2 DNA",
941 :     # but one DNA is simply "DNA".
942 :     my $prefix = ($stoich == 1 ? "" : "$stoich ");
943 :     # Determine the operator to go in front of this compound.
944 :     if ($thisMode eq $modeFlag) {
945 :     $retVal .= " + ";
946 :     } else {
947 :     # Here the mode has changed, so we pull the appropriate
948 :     # operator out of the @op array. Note that we don't
949 :     # surround it by spaces the way we do with the plus.
950 :     # The empty string is supposed to be empty, and the
951 :     # connector has already had the spaces put in.
952 :     $retVal .= $op[$modeFlag];
953 :     $thisMode = $modeFlag;
954 :     }
955 :     # Add this compound.
956 :     $retVal .= $prefix . $name;
957 :     }
958 :     }
959 :     # Return the result string.
960 :     return $retVal;
961 :     }
962 :    
963 :     =head3 all_maps
964 :    
965 : parrello 1.98 my @maps = $fig->all_maps();
966 : parrello 1.46
967 :     Return all of the KEGG maps in the data store. KEGG maps in Sprout are
968 :     represented by the B<Diagram> entity.
969 :    
970 :     =cut
971 :    
972 :     sub all_maps {
973 :     # Get the parameters.
974 :     my ($self) = @_;
975 :     # Get all of the diagram IDs.
976 :     my @retVal = $self->{sprout}->GetFlat(['Diagram'], "", [], 'Diagram(id)');
977 :     # Return the list of IDs.
978 :     return @retVal;
979 :     }
980 :    
981 :     =head3 ec_to_maps
982 :    
983 : parrello 1.98 my @maps = $fig->ec_to_maps($ec);
984 : parrello 1.46
985 :     Return the set of maps that contain a specific functional role. The role can be
986 :     specified by an EC number or a full-blown role ID. Maps in Sprout are stored as
987 :     diagrams.
988 :    
989 :     =over 4
990 :    
991 :     =item ec
992 :    
993 :     The EC number or role ID of the role whose maps are desired.
994 :    
995 :     =item RETURN
996 :    
997 :     Returns a list of the IDs for the maps that contain the specified role.
998 :    
999 :     =back
1000 :    
1001 :     =cut
1002 :    
1003 :     sub ec_to_maps {
1004 :     # Get the parameters.
1005 :     my ($self, $ec) = @_;
1006 : parrello 1.94 # Declare the return value.
1007 :     my @retVal;
1008 :     # Get the Sprout object.
1009 :     my $sprout = $self->{sprout};
1010 : parrello 1.46 # Determine whether this is an EC number or a role.
1011 :     if (FIG::is_ec($ec)) {
1012 : parrello 1.94 # Here we have an EC number. We determine the roles using the IsIdentifiedByEC
1013 :     # relationship.
1014 :     @retVal = $sprout->GetFlat([qw(IsIdentifiedByEC RoleOccursIn)], "IsIdentifiedByEC(to-link) = ?",
1015 :     [$ec], 'RoleOccursIn(to-link)');
1016 :     } else {
1017 :     # Here we have a role ID, which cuts out a step.
1018 :     @retVal = $sprout->GetFlat(['RoleOccursIn'], "RoleOccursIn(from-link) = ?", [$ec],
1019 :     'RoleOccursIn(to-link)');
1020 : parrello 1.46 }
1021 : parrello 1.48 if (T(ECLink => 4)) {
1022 :     my $count = @retVal;
1023 :     Trace("$count maps returned for $ec.");
1024 :     }
1025 : parrello 1.46 return @retVal;
1026 :     }
1027 :    
1028 :     =head3 role_to_maps
1029 :    
1030 :     This is an alternate name for L</ec_to_maps>.
1031 :    
1032 :     =cut
1033 :    
1034 :     sub role_to_maps {
1035 :     my ($self, $role) = @_;
1036 :     return $self->ec_to_maps($role);
1037 :     }
1038 :    
1039 : parrello 1.99 =head3 in_pch_pin_with_and_evidence
1040 :    
1041 :     my @list = $fig->in_pch_pin_with_and_evidence($peg);
1042 :    
1043 :     Return a list of the features pinned to the specified feature. Each
1044 :     element of the list will be a 2-tuple, the first element being the ID of
1045 :     a pinned feature and the second a flag that is TRUE if the feature has
1046 :     physically close homologs in diverse organisms and 0 otherwise.
1047 :    
1048 :     =over 4
1049 :    
1050 :     =item peg
1051 :    
1052 :     ID of the relevant feature.
1053 :    
1054 :     =item RETURN
1055 :    
1056 :     Returns a list of 2-tuples, each consisting of a pinned feature ID and an indicator of
1057 :     whether or not the pin is conserved in any diverse organisms.
1058 :    
1059 :     =back
1060 :    
1061 :     =cut
1062 :    
1063 :     sub in_pch_pin_with_and_evidence {
1064 :     # Get the parameters.
1065 :     my ($self, $peg) = @_;
1066 :     # Ask the coupling server for the data.
1067 :     my @retVal = FIGRules::NetCouplingData('in_pch_pin_with_and_evidence', id1 => $peg);
1068 :     # Return the result.
1069 :     return @retVal;
1070 :     }
1071 :    
1072 :    
1073 : parrello 1.46 =head3 map_to_ecs
1074 :    
1075 : parrello 1.98 my @ecs = $fig->map_to_ecs($map);
1076 : parrello 1.46
1077 :     Return the set of functional roles (usually ECs) that are contained in the functionality
1078 :     depicted by a map.
1079 :    
1080 :     This method only returns EC numbers. If we need roles that don't have EC numbers, it
1081 :     will need to be changed.
1082 :    
1083 :     =over 4
1084 :    
1085 :     =item map
1086 :    
1087 :     ID of the KEGG map whose roles are desired.
1088 :    
1089 :     =item RETURN
1090 :    
1091 :     Returns a list of EC numbers for the roles in the specified map.
1092 :    
1093 :     =back
1094 :    
1095 :     =cut
1096 :    
1097 :     sub map_to_ecs {
1098 :     # Get the parameters.
1099 :     my ($self, $map) = @_;
1100 :     # Get all the EC numbers for the roles in the specified map.
1101 :     my @retVal = $self->{sprout}->GetFlat(['Role', 'RoleOccursIn'], "RoleOccursIn(to-link) = ?",
1102 :     [$map], 'Role(EC)');
1103 :     return @retVal;
1104 :     }
1105 :    
1106 :     =head3 map_name
1107 :    
1108 : parrello 1.98 my $name = $fig->map_name($map);
1109 : parrello 1.46
1110 :     Return the descriptive name covering the functionality depicted by the specified map.
1111 :    
1112 :     =over 4
1113 :    
1114 :     =item map
1115 :    
1116 :     ID of the map whose description is desired.
1117 :    
1118 :     =item RETURN
1119 :    
1120 :     Returns the descriptive name of the map, or an empty string if no description is available.
1121 :    
1122 :     =back
1123 :    
1124 :     =cut
1125 :    
1126 :     sub map_name {
1127 :     # Get the parameters.
1128 :     my ($self, $map) = @_;
1129 :     # Ask for the map name.
1130 :     my ($retVal) = $self->{sprout}->GetFlat(['Diagram'], "Diagram(id) = ?", [$map],
1131 :     'Diagram(name)');
1132 :     # If the map was not found, return an empty string.
1133 :     if (! defined $retVal) {
1134 :     $retVal = "";
1135 :     }
1136 :     # Return the result.
1137 :     return $retVal;
1138 :     }
1139 : parrello 1.44
1140 : parrello 1.24 =head3 abbrev
1141 :    
1142 : parrello 1.98 my $abbreviated_name = $sfxlate->abbrev($genome_name);
1143 : parrello 1.24
1144 :     Abbreviate a genome name to 10 characters or less.
1145 :    
1146 :     For alignments and such, it is very useful to be able to produce an abbreviation of genus/species.
1147 :     That's what this does. Note that multiple genus/species might reduce to the same abbreviation, so
1148 :     be careful (disambiguate them, if you must).
1149 :    
1150 :     The abbreviation is formed from the first three letters of the species name followed by the
1151 :     first three letters of the genus name followed by the first three letters of the species name and
1152 :     then the next four nonblank characters.
1153 :    
1154 :     =over 4
1155 :    
1156 :     =item genome_name
1157 :    
1158 :     The name to abbreviate.
1159 :    
1160 :     =item RETURN
1161 :    
1162 :     An abbreviated version of the specified name.
1163 :    
1164 :     =back
1165 :    
1166 :     =cut
1167 :    
1168 :     sub abbrev {
1169 :     my ($self, $genome_name) = @_;
1170 :     return FIG::abbrev($genome_name);
1171 :     }
1172 :    
1173 :     =head3 add_attribute
1174 :    
1175 : parrello 1.98 $sfxlate->add_attribute($peg, $key, $value, $url);
1176 : parrello 1.24
1177 :     Add a new attribute value (Property) to a feature. In the SEED system, attributes can
1178 :     be added to almost any object. In Sprout, they can only be added to features. In
1179 :     Sprout, attributes are implemented using I<properties>. A property represents a key/value
1180 :     pair. If the particular key/value pair coming in is not already in the database, a new
1181 :     B<Property> record is created to hold it.
1182 :    
1183 :     =over 4
1184 :    
1185 :     =item peg
1186 :    
1187 :     ID of the feature to which the attribute is to be replied.
1188 :    
1189 :     =item key
1190 :    
1191 :     Name of the attribute (key).
1192 :    
1193 :     =item value
1194 :    
1195 :     Value of the attribute.
1196 :    
1197 :     =item url
1198 :    
1199 :     URL or text citation from which the property was obtained.
1200 :    
1201 :     =back
1202 :    
1203 :     =cut
1204 :    
1205 :     sub add_attribute {
1206 :     my($self, $peg, $key, $value, $url) = @_;
1207 :     $self->{sprout}->AddProperty($peg, $key, $value, $url);
1208 :     }
1209 :    
1210 : parrello 1.10 =head3 get_system_name
1211 :    
1212 : parrello 1.98 my $name = $sfxlate->get_system_name;
1213 : parrello 1.10
1214 :     Returns C<sprout>, indicating that this is object is using the Sprout
1215 :     database. The same method on a FIG object will return C<seed>.
1216 :    
1217 :     =cut
1218 : parrello 1.20 #: Return Type $;
1219 : parrello 1.10 sub get_system_name {
1220 : olson 1.7 return "sprout";
1221 :     }
1222 :    
1223 : parrello 1.10 =head3 beg_of
1224 :    
1225 : parrello 1.98 my $offset = $sfxlate->beg_of($loc);
1226 : parrello 1.10
1227 :     Return the beginning offset of the specified list of locations.
1228 :    
1229 :     =over 4
1230 :    
1231 :     =item loc
1232 :    
1233 :     Space-delimited list of locations in the Sprout format. In other words,
1234 :     each location must be of the form I<Contig>C<_>I<beg>C<+>I<len> or
1235 :     I<Contig>C<_>I<beg>C<->I<len>, where I<Contig> is a contig ID, I<beg>
1236 :     is the starting offset of the location, and I<len> is the length of
1237 :     the location.
1238 :    
1239 :     =item RETURN
1240 :    
1241 :     Returns a number indicating the starting offset of the first location if it
1242 :     is for a forward gene, or the starting offset of the last location
1243 :     if it is for a backward gene.
1244 : parrello 1.1
1245 : parrello 1.10 =back
1246 : parrello 1.1
1247 : parrello 1.10 =cut
1248 : parrello 1.13 #: Return Type $;
1249 : parrello 1.10 sub beg_of {
1250 : parrello 1.1 my($self, $loc) = @_;
1251 :    
1252 :     #
1253 :     # Loc is a space-separated list of spans.
1254 :     #
1255 :    
1256 :     my @spans = split(/\s+/, $loc);
1257 :    
1258 :     my $first = $spans[0];
1259 :     my $last = $spans[$#spans];
1260 :    
1261 : parrello 1.10 if ($first =~ /_(\d+)\+\d+$/) {
1262 : parrello 1.24 return $1;
1263 : parrello 1.10 } elsif ($last =~ /_(\d+)-\d+$/) {
1264 : parrello 1.24 return $1;
1265 : parrello 1.10 } else {
1266 : parrello 1.105 Confess("Bad beg_of for loc='$loc' spans='@spans'");
1267 : parrello 1.1 }
1268 :     }
1269 :    
1270 : parrello 1.10 =head3 end_of
1271 :    
1272 : parrello 1.98 my $offset = $sfxlate->end_of($loc);
1273 : parrello 1.10
1274 :     Return the ending offset of the specified list of locations.
1275 :    
1276 :     =over 4
1277 :    
1278 :     =item loc
1279 :    
1280 :     Space-delimited list of locations in the Sprout format. In other words,
1281 :     each location must be of the form I<Contig>C<_>I<beg>C<+>I<len> or
1282 :     I<Contig>C<_>I<beg>C<->I<len>, where I<Contig> is a contig ID, I<beg>
1283 :     is the starting offset of the location, and I<len> is the length of
1284 :     the location.
1285 :    
1286 :     =item RETURN
1287 :    
1288 :     Returns a number indicating the ending offset of the first location if it
1289 :     is for a backward gene, or the ending offset of the last location
1290 :     if it is for a forward gene.
1291 :    
1292 :     =back
1293 :    
1294 :     =cut
1295 : parrello 1.13 #: Return Type $;
1296 : parrello 1.10 sub end_of {
1297 : parrello 1.1 my($self, $loc) = @_;
1298 :    
1299 :     #
1300 :     # Loc is a space-separated list of spans.
1301 :     #
1302 :    
1303 :     my @spans = split(/\s+/, $loc);
1304 :    
1305 :     my $first = $spans[0];
1306 :     my $last = $spans[$#spans];
1307 :    
1308 : parrello 1.10 if ($first =~ /_(\d+)-(\d+)$/) {
1309 : parrello 1.24 return $1 - $2;
1310 : parrello 1.10 } elsif ($last =~ /_(\d+)\+(\d+)$/) {
1311 : parrello 1.24 return $1 + $2;
1312 : parrello 1.10 } else {
1313 : parrello 1.105 Confess("Bad end_of for \"$loc\".");
1314 : parrello 1.1 }
1315 :     }
1316 :    
1317 : parrello 1.10 =head3 contig_of
1318 :    
1319 : parrello 1.98 my $contigID = $sfxlate->contig_of($loc);
1320 : parrello 1.10
1321 :     Return the contig ID from a location string. The location must be of the
1322 :     form I<Contig>C<_>I<beg>C<+>I<len> or I<Contig>C<_>I<beg>C<->I<len>,
1323 :     where I<Contig> is a contig ID, I<beg> is the starting offset of the
1324 :     location, and I<len> is the length of the location.
1325 :    
1326 :     =cut
1327 : parrello 1.13 #: Return Type $;
1328 : parrello 1.10 sub contig_of {
1329 : parrello 1.1 my($self, $loc) = @_;
1330 :    
1331 : parrello 1.10 if ($loc =~ /^(\S+)_\d+[+-]\d+/) {
1332 : parrello 1.24 return $1;
1333 : parrello 1.10 } else {
1334 : parrello 1.105 Confess("Bad contig_of for \"$loc\".");
1335 : parrello 1.1 }
1336 :     }
1337 :    
1338 : parrello 1.10 =head3 feature_aliases
1339 :    
1340 : parrello 1.98 my @aliasList = $sfxlate->feature_aliases($feature);
1341 : parrello 1.10
1342 :     Return a list of the aliases for the specified feature.
1343 :    
1344 :     =over 4
1345 :    
1346 :     =item feature
1347 :    
1348 :     ID of the feature whose aliases are desired.
1349 :    
1350 :     =item RETURN
1351 :    
1352 :     Returns a list of the alias names for the specified feature.
1353 :    
1354 :     =back
1355 :    
1356 :     =cut
1357 : parrello 1.13 #: Return Type @;
1358 : parrello 1.10 sub feature_aliases {
1359 : parrello 1.1 my($self, $feature) = @_;
1360 :    
1361 :     return $self->{sprout}->FeatureAliases($feature);
1362 :     }
1363 :    
1364 : parrello 1.102
1365 :     =head3 feature_aliases_in_tbl
1366 :    
1367 :     my @aliasList = $sfxlate->feature_aliases($feature);
1368 :    
1369 :     Return a list of the aliases for the specified feature. This method is identical
1370 :     to L</feature_aliases>. The FIG method with this name gets a smaller set of
1371 :     aliases than the ones returned by its B<feature_aliases> method; however, the
1372 :     aliases omitted are ones we don't have in the database.
1373 :    
1374 :     =over 4
1375 :    
1376 :     =item feature
1377 :    
1378 :     ID of the feature whose aliases are desired.
1379 :    
1380 :     =item RETURN
1381 :    
1382 :     Returns a list of the alias names for the specified feature.
1383 :    
1384 :     =back
1385 :    
1386 :     =cut
1387 :     #: Return Type @;
1388 :     sub feature_aliases_in_tbl {
1389 :     my($self, $feature) = @_;
1390 :     return $self->{sprout}->FeatureAliases($feature);
1391 :     }
1392 :    
1393 : parrello 1.10 =head3 feature_location
1394 :    
1395 : parrello 1.98 my @locations = $sfxlate->feature_location($feature);
1396 : parrello 1.10
1397 :     Return a list of the location descriptors for the specified feature.
1398 :    
1399 :     =over 4
1400 :    
1401 :     =item feature
1402 :    
1403 :     ID of the feature whose locations are desired.
1404 :    
1405 :     =item RETURN
1406 :    
1407 :     Returns a list of the descriptors for the specified feature's
1408 :     locations, in transcription order. In a scalar context, returns
1409 : parrello 1.35 the locations as a single comma-delimited string.
1410 : parrello 1.10
1411 :     =back
1412 :    
1413 :     =cut
1414 : parrello 1.13 #: Return Type @;
1415 :     #: Return Type $;
1416 : parrello 1.10 sub feature_location {
1417 : parrello 1.1 my($self, $feature) = @_;
1418 :     return $self->{sprout}->FeatureLocation($feature);
1419 :     }
1420 :    
1421 : parrello 1.10 =head3 ftype
1422 :    
1423 : parrello 1.98 my $ftype = $sfxlate->ftype($feature);
1424 : parrello 1.10
1425 :     Return the type (peg, rna, etc.) of the specified feature.
1426 :    
1427 :     =over 4
1428 :    
1429 :     =item feature
1430 :    
1431 :     ID of the feature whose type is desired.
1432 :    
1433 :     =item RETURN
1434 :    
1435 :     Returns the type of the specified feature.
1436 :    
1437 :     =back
1438 : parrello 1.1
1439 : parrello 1.10 =cut
1440 : parrello 1.13 #: Return Type $;
1441 : parrello 1.10 sub ftype {
1442 : parrello 1.1 my($self, $feature) = @_;
1443 :    
1444 :     return $self->{sprout}->FType($feature);
1445 :     }
1446 :    
1447 : parrello 1.10 =head3 contig_ln
1448 :    
1449 : parrello 1.98 my $length = $sfxlate->contig_ln($genome, $contig);
1450 : parrello 1.10
1451 :     Return the length of the specified contig.
1452 :    
1453 :     =over 4
1454 :    
1455 :     =item genome
1456 :    
1457 :     ID of the genome to which the contig belongs
1458 :    
1459 :     =item contig
1460 :    
1461 :     ID of the contig whose length is desired
1462 :    
1463 :     =back
1464 :    
1465 :     =cut
1466 : parrello 1.13 #: Return Type $;
1467 : parrello 1.10 sub contig_ln {
1468 : parrello 1.1 my($self, $genome, $contig) = @_;
1469 : parrello 1.40 return $self->{sprout}->ContigLength($contig);
1470 :     }
1471 :    
1472 :     =head3 compute_clusters
1473 :    
1474 : parrello 1.98 my @clusterList = $fig->compute_clusters(\@pegList, $subsystem, $distance);
1475 : parrello 1.40
1476 :     Partition a list of PEGs into sections that are clustered close together on
1477 :     the genome. The PEGs must be from a single subsystem row that was recently
1478 :     retrieved using the C<get_pegs_from_cell> method on the subsystem object
1479 :     passed in. If this is not the case, the method will still work, but the
1480 :     PEGs could be mapped into the incorrect clusters.
1481 :    
1482 :     =over 4
1483 :    
1484 :     =item pegList
1485 :    
1486 :     Reference to a list of PEG IDs.
1487 :    
1488 :     =item subsystem
1489 :    
1490 :     Subsystem object for the relevant subsystem.
1491 :    
1492 :     =item distance (optional)
1493 :    
1494 :     The maximum distance between PEGs that makes them considered close. This
1495 :     parameter is not used, but is required for compatability with SEED.
1496 :    
1497 :     =item RETURN
1498 :    
1499 :     Returns a list of lists. Each sub-list is a cluster of PEGs.
1500 :    
1501 :     =back
1502 : parrello 1.1
1503 : parrello 1.40 =cut
1504 :    
1505 :     sub compute_clusters {
1506 :     # Get the parameters.
1507 :     my ($self, $pegList, $subsystem, $distance) = @_;
1508 :     # Compute the result.
1509 :     my $retVal = $self->{sprout}->ClusterPEGs($subsystem, $pegList);
1510 :     # Return it to the caller.
1511 :     return @{$retVal};
1512 : parrello 1.1 }
1513 :    
1514 : parrello 1.24 =head3 maps_to_id
1515 :    
1516 : parrello 1.98 my $peg = $sfxlate->maps_to_id($id);
1517 : parrello 1.24
1518 : parrello 1.67 The "major synonym" of a feature is one that is selected (and represents the
1519 :     longest version of a set of essentially identical sequences). This routine
1520 :     returns the "major synonym".
1521 : parrello 1.24
1522 :     =over 4
1523 :    
1524 :     =item id
1525 :    
1526 : parrello 1.67 Feature ID whose major synonym is desired.
1527 : parrello 1.24
1528 :     =item RETURN
1529 :    
1530 : parrello 1.67 Returns the major synonym corresponding to the named feature, or the
1531 :     feature itself if there is no synonym for the named feature.
1532 : parrello 1.24
1533 :     =back
1534 :    
1535 :     =cut
1536 :     #: Return Type $;
1537 :     sub maps_to_id {
1538 :     my ($self, $id) = @_;
1539 : parrello 1.67 return $self->{sprout}->GetSynonymGroup($id);
1540 : parrello 1.24 }
1541 : parrello 1.10
1542 :     =head3 cgi_url
1543 :    
1544 : parrello 1.98 my $url = $sfxlate->cgi_url;
1545 : parrello 1.10
1546 :     Return the URL of the directory containing the CGI scripts.
1547 :    
1548 :     =cut
1549 : parrello 1.13 #: Return Type $;
1550 : parrello 1.10 sub cgi_url {
1551 : parrello 1.1 my($self) = @_;
1552 :    
1553 : parrello 1.10 return FIG::cgi_url();
1554 : parrello 1.1 }
1555 :    
1556 : parrello 1.10 =head3 function_of
1557 :    
1558 : parrello 1.98 my $function = $sfxlate->function_of($id, $user);
1559 : parrello 1.10
1560 :     or
1561 :    
1562 : parrello 1.98 my @functions = $sfxlate->function_of($id);
1563 : parrello 1.10
1564 :     In a scalar context, returns the most recently-determined functional
1565 :     assignment of a specified feature by a particular user. In a list
1566 :     context, returns a list of 2-tuples, each consisting of a user ID
1567 :     followed by a functional assighment by that user. In this case,
1568 :     the list contains all the functional assignments for the feature.
1569 :    
1570 :     =over 4
1571 :    
1572 :     =item id
1573 :    
1574 :     ID of the relevant feature.
1575 :    
1576 :     =item user
1577 :    
1578 :     ID of the user whose assignment is desired (scalar context only)
1579 :    
1580 :     =item RETURN
1581 :    
1582 :     Returns the most recent functional assignment by the given user in scalar
1583 :     context, and a list of functional assignments in list context.
1584 :    
1585 :     =back
1586 :    
1587 :     =cut
1588 : parrello 1.13 #: Return Type $;
1589 : parrello 1.15 #: Return Type @@;
1590 : parrello 1.10 sub function_of {
1591 : parrello 1.24 if (wantarray()) {
1592 :     my ($self, $id) = @_;
1593 : parrello 1.45 my %mapping = $self->{sprout}->AllFunctionsOf($id);
1594 :     my @retVal = ();
1595 :     for my $user (sort keys %mapping) {
1596 :     push @retVal, [$user, $mapping{$user}];
1597 :     }
1598 : parrello 1.24 return @retVal;
1599 :     } else {
1600 :     my($self, $id, $user) = @_;
1601 :     return $self->{sprout}->FunctionOf($id, $user);
1602 :     }
1603 : parrello 1.1 }
1604 :    
1605 : parrello 1.101
1606 :    
1607 : parrello 1.10 =head3 bbh_list
1608 :    
1609 : parrello 1.98 my $bbhHash = $sfxlate->bbh_list($genome, \@features);
1610 : parrello 1.10
1611 :     Return a hash mapping the features in a specified list to their bidirectional best hits
1612 :     on a specified target genome.
1613 :    
1614 :     =over 4
1615 :    
1616 :     =item genomeID
1617 :    
1618 :     ID of the genome from which the best hits should be taken.
1619 :    
1620 :     =item featureList
1621 :    
1622 :     List of the features whose best hits are desired.
1623 :    
1624 :     =item RETURN
1625 :    
1626 :     Returns a reference to a hash that maps the IDs of the incoming features to the IDs of
1627 :     their best hits.
1628 :    
1629 :     =back
1630 :    
1631 :     =cut
1632 : parrello 1.13 #: Return Type $%;
1633 : parrello 1.10 sub bbh_list {
1634 : parrello 1.1 my($self, $genome, $features) = @_;
1635 :    
1636 :     return $self->{sprout}->BBHList($genome, $features);
1637 :     }
1638 :    
1639 : parrello 1.101 =head3 sprout
1640 :    
1641 :     my $sprout = $fig->sprout();
1642 :    
1643 :     Return the embedded Sprout object.
1644 :    
1645 :     =cut
1646 :    
1647 :     sub sprout {
1648 :     # Get the parameters.
1649 :     my ($self) = @_;
1650 :     # Return the result.
1651 :     return $self->{sprout};
1652 :     }
1653 :    
1654 : parrello 1.10 =head3 init_das
1655 :    
1656 : parrello 1.98 my $das = $sfxlate->init_das($url, $dsn);
1657 : parrello 1.10
1658 :     Create a DAS object for use by the GBrowse facility.
1659 :    
1660 :     =over 4
1661 :    
1662 :     =item url
1663 :    
1664 :     URL of the DAS script.
1665 :    
1666 :     =item dsn
1667 :    
1668 :     ID of the relevant Genome. The separating dot may be replaced by an
1669 :     underscore
1670 :    
1671 :     =item RETURN
1672 :    
1673 :     Returns a DAS object for browsing the specified genome using Sprout data.
1674 :     If this installation is not configured for GBrowse, returns an undefined
1675 :     value.
1676 :    
1677 :     =back
1678 :    
1679 :     =cut
1680 : parrello 1.24 #: Return Type %;
1681 : parrello 1.10 sub init_das {
1682 : parrello 1.1 my($self, $url, $dsn) = @_;
1683 :    
1684 : parrello 1.91 my $das_data_dir = "$FIG_Config::var/DAS";
1685 : parrello 1.1
1686 : parrello 1.10 if (-d $das_data_dir) {
1687 :     return new SeedDas($self,$das_data_dir, $url, $dsn);
1688 :     } else {
1689 :     return undef;
1690 : parrello 1.1 }
1691 :     }
1692 :    
1693 : parrello 1.10 =head3 genomes
1694 :    
1695 : parrello 1.98 my @genomeList = $sfxlate->genomes;
1696 : parrello 1.10
1697 :     Return a list of the IDs of all the genomes in the system.
1698 :    
1699 :     =cut
1700 : parrello 1.13 #: Return Type @;
1701 : parrello 1.10 sub genomes {
1702 : parrello 1.1 my($self) = @_;
1703 :    
1704 :     return $self->{sprout}->Genomes();
1705 :     }
1706 :    
1707 : parrello 1.10 =head3 genus_species
1708 :    
1709 : parrello 1.98 my $infoString = $sfxlate->genus_species($genome);
1710 : parrello 1.10
1711 :     Return the genus, species, and unique characterization for the specified
1712 :     genome.
1713 :    
1714 :     =over 4
1715 :    
1716 :     =item genome
1717 :    
1718 :     ID of the genome whose identifying information is desired.
1719 : parrello 1.1
1720 : parrello 1.10 =item RETURN
1721 :    
1722 :     Returns the genus and species of the genome, with the unique characterization
1723 :     (if any). If the genome does not exist, returns an undefined value.
1724 :    
1725 :     =back
1726 :    
1727 :     =cut
1728 : parrello 1.13 #: Return Type $;
1729 : parrello 1.10 sub genus_species {
1730 :     my($self, $genome) = @_;
1731 :    
1732 :     return $self->{sprout}->GenusSpecies($genome);
1733 : parrello 1.1 }
1734 :    
1735 : parrello 1.60 =head3 genome_counts
1736 :    
1737 : parrello 1.98 my ($arch, $bact, $euk, $vir, $env, $unk) = $fig->genome_counts($complete);
1738 : parrello 1.60
1739 :     Count the number of genomes in each domain. If I<$complete> is TRUE, only complete
1740 :     genomes will be included in the counts.
1741 :    
1742 :     =over 4
1743 :    
1744 :     =item complete
1745 :    
1746 :     TRUE if only complete genomes are to be counted, FALSE if all genomes are to be
1747 :     counted
1748 :    
1749 :     =item RETURN
1750 :    
1751 :     A six-element list containing the number of genomes in each of six categories--
1752 :     Archaea, Bacteria, Eukaryota, Viral, Environmental, and Unknown, respectively.
1753 :    
1754 :     =back
1755 :    
1756 :     =cut
1757 :    
1758 :     sub genome_counts {
1759 :     # Get the parameters.
1760 :     my ($self, $complete) = @_;
1761 :     # Get the Sprout object.
1762 :     my $sprout = $self->{sprout};
1763 : parrello 1.62 # Get the counts.
1764 :     my @counts = $sprout->GenomeCounts($complete);
1765 : parrello 1.60 # Return the counts.
1766 : parrello 1.62 return @counts;
1767 : parrello 1.60 }
1768 :    
1769 : parrello 1.10 =head3 dna_seq
1770 :    
1771 : parrello 1.98 my $sequence = $sfxlate->dna_seq($genome, @locations);
1772 : parrello 1.10
1773 :     Return the sequence represented by a list of locations. The locations
1774 :     should be in the standard sprout form I<contigID>C<_>I<begin>I<dir>I<end>.
1775 :    
1776 :     =over 4
1777 :    
1778 :     =item genome
1779 :    
1780 :     ID of the relevant genome.
1781 :    
1782 :     =item location1, location2, ... locationN
1783 :    
1784 :     List of locations to be included in the DNA sequence.
1785 :    
1786 :     =item RETURN
1787 :    
1788 :     Returns a string specifying the DNA nucleotides in the specified locations.
1789 :    
1790 :     =back
1791 :    
1792 :     =cut
1793 : parrello 1.13 #: Return Type $;
1794 : parrello 1.10 sub dna_seq {
1795 : parrello 1.1 my($self, $genome, @locations) = @_;
1796 :     return $self->{sprout}->DNASeq(\@locations);
1797 :     }
1798 :    
1799 : parrello 1.10 =head3 all_contigs
1800 :    
1801 : parrello 1.98 my @contigs = $sfxlate->all_contigs($genome);
1802 : parrello 1.10
1803 :     Return a list of the contigs that make up the specified genome.
1804 :    
1805 :     =over 4
1806 :    
1807 :     =item genome
1808 :    
1809 :     ID of the genome whose contigs are desired.
1810 :    
1811 :     =item RETURN
1812 :    
1813 :     Returns a list of the IDs for the contigs in the genome.
1814 :    
1815 :     =back
1816 :    
1817 :     =cut
1818 : parrello 1.13 #: Return Type @;
1819 : parrello 1.10 sub all_contigs {
1820 : parrello 1.1 my($self, $genome) = @_;
1821 :    
1822 :     return $self->{sprout}->AllContigs($genome);
1823 :     }
1824 :    
1825 : parrello 1.75 =head3 contigs_of
1826 :    
1827 : parrello 1.98 my @contig_ids = $fig->contigs_of($genome);
1828 : parrello 1.75
1829 :     Returns a list of all of the contigs occurring in the designated genome.
1830 :     This is in fact just an alternate way of calling L</all_contigs>.
1831 :    
1832 :     =over 4
1833 :    
1834 :     =item genome
1835 :    
1836 :     ID of the genome whose contigs are desired.
1837 :    
1838 :     =item RETURN
1839 :    
1840 :     Returns a list of the IDs for the contigs occurring in the specified genome.
1841 :    
1842 :     =back
1843 :    
1844 :     =cut
1845 :    
1846 :     sub contigs_of {
1847 :     my($self,$genome) = @_;
1848 :     return $self->all_contigs($genome);
1849 :     }
1850 :    
1851 : parrello 1.98 =head3 get_genome_subsystem_count
1852 :    
1853 :     my $num_subsytems = $fig->get_genome_subsystem_count($genomeID);
1854 :    
1855 :     Return the number of subsystems in which a genome participates.
1856 :    
1857 :     =over 4
1858 :    
1859 :     =item genomeID
1860 :    
1861 :     ID of the relevant genome.
1862 :    
1863 :     =item RETURN
1864 :    
1865 :     Returns the number of subsystems that have this genome in them.
1866 :    
1867 :     =back
1868 :    
1869 :     =cut
1870 :    
1871 :     sub get_genome_subsystem_count{
1872 :     # Get the parameters.
1873 :     my ($self, $genomeID) = @_;
1874 :     # Get the count of subsystems connected to this genome.
1875 :     my $retVal = $self->{sprout}->GetCount([qw(ParticipatesIn)], "ParticipatesIn(from-link) = ?", [$genomeID]);
1876 :     # Return the result.
1877 :     return $retVal;
1878 :     }
1879 :    
1880 :     =head3 all_subsystem_classifications
1881 :    
1882 :     my @classifications = $fig->all_subsystem_classifications();
1883 :    
1884 :     Return a list of all the subsystem classifications. Each element in the
1885 :     list will contain a main subsystem class and a basic subsystem class.
1886 :     The resulting list enables us to determine easily what the three-level
1887 :     subsystem tree would look like.
1888 :    
1889 :     =cut
1890 :    
1891 :     sub all_subsystem_classifications {
1892 :     # Get the parameters.
1893 :     my ($self) = @_;
1894 :     # Get a list of all the subsystem classifications. Each element of this
1895 :     # list will be a sub-list containing the two classifications as a string
1896 :     # with a splitter in it.
1897 :     my @classList = $self->{sprout}->GetFlat(['Subsystem'], '', [], 'Subsystem(classification)');
1898 :     # Merge out duplicates using a hash.
1899 :     my %classHash = map { $_ => 1 } @classList;
1900 :     # Convert the hash into the desired return list. Each key is the two class names
1901 :     # separated by a splitter string. We use "split" to make this a two-element list reference.
1902 :     my @retVal = map { [ split(/$FIG_Config::splitter/, $_)] } keys %classHash;
1903 :     # Return the result.
1904 :     return @retVal;
1905 :     }
1906 :    
1907 : parrello 1.10 =head3 genes_in_region
1908 :    
1909 : parrello 1.98 my @features = $sfxlate->genes_in_region($genome, $contig, $start, $end);
1910 : parrello 1.10
1911 :     Return a list of the features that overlap the specified region in a contig.
1912 :    
1913 :     =over 4
1914 :    
1915 :     =item genome
1916 :    
1917 :     ID of the genome containing the contig in question.
1918 :    
1919 :     =item contig
1920 :    
1921 :     ID of the contig containing the desired region.
1922 :    
1923 :     =item start
1924 :    
1925 :     Offset of the first nucleotide in the region.
1926 :    
1927 :     =item end
1928 :    
1929 :     Offset of the last nucleotide in the region.
1930 :    
1931 :     =item RETURN
1932 :    
1933 :     Returns a list of the IDs of the features that overlap the specified
1934 :     region.
1935 :    
1936 :     =back
1937 :    
1938 :     =cut
1939 : parrello 1.13 #: Return Type @;
1940 : parrello 1.10 sub genes_in_region {
1941 : parrello 1.1 my($self, $genome, $contig, $start, $end) = @_;
1942 : olson 1.4 my @results = $self->{sprout}->GenesInRegion($contig, $start, $end);
1943 :     return @results;
1944 : parrello 1.1 }
1945 :    
1946 : parrello 1.10 =head3 get_attribute
1947 :    
1948 : parrello 1.98 my @values = $sfxlate->get_attribute($feature, $attr);
1949 : parrello 1.10
1950 :     Return a list of the values for the named attribute of a specified
1951 :     feature.
1952 :    
1953 :     =over 4
1954 :    
1955 :     =item feature
1956 :    
1957 :     ID of the feature whose attribute values are desired.
1958 :    
1959 :     =item attr
1960 :    
1961 :     Name of the attribute whose values are desired.
1962 :    
1963 :     =item RETURN
1964 :    
1965 :     Returns a list of values for the named attribute. In most cases the list
1966 :     will be a singleton, however some attributes (such as C<alias>) can have
1967 :     multiple values in the list.
1968 :    
1969 :     =back
1970 :    
1971 :     =cut
1972 : parrello 1.13 #: Return Type @;
1973 : parrello 1.10 sub get_attribute {
1974 : parrello 1.1 my($self, $feature, $attr) = @_;
1975 :    
1976 :     my $obj = $self->{sprout}->GetEntity("Feature", $feature);
1977 :     $obj or return undef;
1978 :    
1979 :     return $obj->Value("Feature($attr)");
1980 :     }
1981 :    
1982 : parrello 1.10 =head3 in_cluster_with
1983 :    
1984 : parrello 1.98 my @features = $sfxlate->in_cluster_with($peg);
1985 : parrello 1.10
1986 :     Return a list of the features functionally coupled with the specified
1987 :     feature. A feature is considered functionally coupled if it tends to reside
1988 :     on the same chromosome as the specified feature.
1989 :    
1990 :     =over 4
1991 :    
1992 :     =item peg
1993 :    
1994 :     ID of the features whose functional couplings are desired.
1995 :    
1996 :     =item RETURN
1997 :    
1998 :     Returns a list of the IDs for the functionally-couplped features.
1999 :    
2000 :     =back
2001 :    
2002 :     =cut
2003 : parrello 1.13 #: Return Type @;
2004 : parrello 1.10 sub in_cluster_with {
2005 : parrello 1.1 my($self, $peg) = @_;
2006 : parrello 1.24 my %coupleData = $self->{sprout}->CoupledFeatures($peg);
2007 : parrello 1.11 return sort { FIG::by_fig_id($a,$b); } keys %coupleData;
2008 : parrello 1.1 }
2009 :    
2010 : parrello 1.10 =head3 add_annotation
2011 :    
2012 : parrello 1.98 my $ok = $sfxlate->add_annotation($featureID, $user, $text);
2013 : parrello 1.10
2014 :     Add an annotation to a feature. This method inserts new data into the
2015 :     Sprout database.
2016 :    
2017 :     =over 4
2018 :    
2019 :     =item featureID
2020 :    
2021 :     ID of the feature to annotate.'
2022 :    
2023 :     =item user
2024 :    
2025 :     ID of the user making the annotation.
2026 :    
2027 :     =item text
2028 :    
2029 :     Text of the annotation.
2030 :    
2031 :     =item RETURN
2032 :    
2033 :     Returns 1 if successful, 0 if an error occurred.
2034 :    
2035 :     =back
2036 :    
2037 :     =cut
2038 : parrello 1.13 #: Return Type $;
2039 : parrello 1.1 sub add_annotation {
2040 : parrello 1.24 my ($self, $featureID, $user, $text) = @_;
2041 : parrello 1.32 Trace("Adding annotation in SFXlate.") if T(Bruce => 4);
2042 : parrello 1.24 my $timestamp = time;
2043 :     my $retVal = $self->{sprout}->Annotate($featureID, $timestamp, $user, $text);
2044 :     return $retVal;
2045 : parrello 1.1 }
2046 :    
2047 : parrello 1.10 =head3 boundaries_of
2048 :    
2049 : parrello 1.98 my ($contig, $beg, $end) = $sfxlate->boundaries_of($locations);
2050 : parrello 1.10
2051 :     Examine a list of locations and return a location that encompasses the
2052 :     entire set. The location returned will be a list specifying the relevant
2053 :     contig and the beginning and ending offsets.
2054 :    
2055 :     =over 4
2056 :    
2057 :     =item locations
2058 :    
2059 :     A reference to a list of the desired locations or a string containing a
2060 :     comma-delimited list of the locations.
2061 :    
2062 :     =item RETURN
2063 :    
2064 :     Returns a three-element list consisting of the contig, beginning offset,
2065 :     and ending offset of a region containing all of the specified locations.
2066 :    
2067 :     =back
2068 :    
2069 :     =cut
2070 : parrello 1.13 #: Return Type @;
2071 : parrello 1.1 sub boundaries_of {
2072 : parrello 1.24 my ($self, $locations) = @_;
2073 :     if (ref($locations) ne "ARRAY") {
2074 : parrello 1.10 $locations = [ split /\s*,\s*/, $locations ];
2075 : parrello 1.24 }
2076 : parrello 1.6 Trace("Boundaries of [" . join(", ", @{$locations})) if T(4);
2077 : parrello 1.68 my ($contig,$beg,$end) = $self->{sprout}->GetBoundaries(@{$locations});
2078 : parrello 1.6 Trace("Boundaries are ($beg, $end) in $contig.") if T(3);
2079 : parrello 1.24 return ($contig,$beg,$end);
2080 : parrello 1.1 }
2081 :    
2082 : parrello 1.10 =head3 coupling_and_evidence
2083 :    
2084 : parrello 1.98 my @couplings = $sfxlate->coupling_and_evidence($feature_id);
2085 : parrello 1.10
2086 :     Return a list of the features functionally coupled to the specified feature
2087 :     along with their scores. Note that the FIG version of this method has
2088 :     four additional parameters. If provided, these parameters are simply
2089 :     ignored.
2090 :    
2091 :     =over 4
2092 :    
2093 : parrello 1.28 =item peg1
2094 : parrello 1.10
2095 :     ID of the feature whose couplings and evidence are desired.
2096 :    
2097 :     =item RETURN
2098 :    
2099 : parrello 1.28 Returns a list of 3-tuples. Each 3-tuple consists of a coupling score
2100 :     followed by the ID of a coupled feature and a list of the evidence for
2101 :     the coupling to that feature. The evidence format is the same as that
2102 :     for L</coupling_evidence>.
2103 : parrello 1.10
2104 :     =back
2105 : parrello 1.1
2106 : parrello 1.10 =cut
2107 : parrello 1.13 #: Return Type @@;
2108 : parrello 1.1 sub coupling_and_evidence {
2109 : overbeek 1.27 my ($self,$peg1) = @_;
2110 :     my %featureHash = $self->{sprout}->CoupledFeatures($peg1);
2111 : parrello 1.24 my @retVal = ();
2112 : parrello 1.38 for my $peg2 (keys %featureHash) {
2113 : parrello 1.40 # Only proceed if this is not the bogus reflexive coupling.
2114 :     if ($peg2 ne $peg1) {
2115 :     my $sc = $featureHash{$peg2};
2116 : parrello 1.46 my @ev = map { [$_->[0], $_->[1]] } $self->coupling_evidence($peg1,$peg2);
2117 :     push @retVal, [$sc,$peg2,\@ev];
2118 : parrello 1.40 }
2119 : parrello 1.24 }
2120 :     return @retVal;
2121 : parrello 1.1 }
2122 :    
2123 : parrello 1.28 =head3 coupling_evidence
2124 :    
2125 : parrello 1.98 my @evidence = $sfxlate->coupling_evidence($peg1, $peg2);
2126 : parrello 1.28
2127 :     Return the evidence for a functional coupling between two features. A coupling
2128 :     is considered functional if the specified pegs are frequently found together.
2129 :     The evidence for the coupling is therefore based on finding other genomes in
2130 :     which similar pegs are clustered on the same chromosome.
2131 :    
2132 :     =over 4
2133 :    
2134 :     =item peg1
2135 :    
2136 :     ID of the first feature of interest.
2137 :    
2138 :     =item peg2
2139 :    
2140 :     ID of the second feature of interest.
2141 :    
2142 :     =item RETURN
2143 :    
2144 :     Returns a list of 3-tuples. Each tuple consists of a feature similar to the feature
2145 :     of interest, a feature similar to the functionally coupled feature, and a flag
2146 :     that is TRUE for a representative piece of evidence and FALSE otherwise.
2147 :    
2148 :     =back
2149 :    
2150 :     =cut
2151 :     #: Return Type @@;
2152 : overbeek 1.27 sub coupling_evidence {
2153 :     my($self,$peg1,$peg2) = @_;
2154 :     return $self->{sprout}->CouplingEvidence($peg1,$peg2);
2155 :     }
2156 :    
2157 : parrello 1.10 =head3 is_deleted_fid
2158 :    
2159 : parrello 1.98 my $flag = $sfxlate->is_deleted_fid($fid);
2160 : parrello 1.10
2161 :     Return TRUE if the specified feature does B<not> exist, FALSE if it does
2162 :     exist.
2163 :    
2164 :     =over 4
2165 :    
2166 :     =item fid
2167 :    
2168 :     ID of the feature whose existence is to be tested.
2169 :    
2170 :     =item RETURN
2171 :    
2172 :     Returns TRUE if the feature does not exist, else FALSE. Note that if TRUE
2173 :     is returned, there is no guarantee that the feature ever existed, only that
2174 :     it does not exist now.
2175 :    
2176 :     =back
2177 :    
2178 :     =cut
2179 : parrello 1.13 #: Return Type $;
2180 : parrello 1.1 sub is_deleted_fid {
2181 : parrello 1.24 my ($self, $fid) = @_;
2182 :     my $exists = $self->{sprout}->Exists("Feature", $fid);
2183 :     return !$exists;
2184 : parrello 1.1 }
2185 :    
2186 : parrello 1.10 =head3 close_enough
2187 :    
2188 : parrello 1.98 my $flag = $sfxlate->close_enough($locs1, $locs2, $bound);
2189 : parrello 1.10
2190 :     Return TRUE if the specified locations are within the specified
2191 :     distance. The locations must be on the same contig, and the midpoints
2192 :     must be within the specified bound.
2193 :    
2194 : parrello 1.13 =over 4
2195 :    
2196 : parrello 1.10 =item locs1, locs2
2197 :    
2198 :     Locations to compare. Each location is a 3-tuple consisting of a contig ID,
2199 :     a starting offset, and an ending offset. Note that the 3-tuple represents
2200 :     a SEED-style, not a Sprout-style location.
2201 :    
2202 :     =item bound
2203 :    
2204 :     Maximum distance between the midpoints of the location.
2205 :    
2206 :     =item RETURN
2207 :    
2208 :     Returns TRUE if the two locations are close enough, else FALSE.
2209 :    
2210 :     =back
2211 :    
2212 :     =cut
2213 : parrello 1.13 #: Return Type $;
2214 : parrello 1.1 sub close_enough {
2215 : parrello 1.10 my ($self, $locs1, $locs2, $bound) = @_;
2216 :     return FIG::close_enough($locs1, $locs2, $bound);
2217 : parrello 1.1 }
2218 :    
2219 : parrello 1.10 =head3 taxonomy_of
2220 :    
2221 : parrello 1.98 my $taxonomy = $sfxlate->taxonomy_of($genome);
2222 : parrello 1.10
2223 :     Return the taxonomy of the specified genome.
2224 :    
2225 :     =over 4
2226 :    
2227 :     =item genome
2228 :    
2229 :     ID of the genome whose taxonomy is desired.
2230 :    
2231 :     =item RETURN
2232 :    
2233 :     Returns a complete taxonomy for the organism, with entiries separated
2234 :     by semi-colons.
2235 :    
2236 :     =back
2237 :    
2238 :     =cut
2239 : parrello 1.13 #: Return Type $;
2240 : parrello 1.1 sub taxonomy_of {
2241 : parrello 1.24 my ($self, $genome) = @_;
2242 :     my @retVal = $self->{sprout}->Taxonomy($genome);
2243 :     return join "; ", @retVal;
2244 : parrello 1.1 }
2245 :    
2246 : parrello 1.10 =head3 crude_estimate_of_distance
2247 :    
2248 : parrello 1.98 my $distance = $sfxlate->crude_estimate_of_distance($genome1, $genome2);
2249 : parrello 1.10
2250 :     Returns a crude taxonomic distance between the two genomes. The distance
2251 :     will be 0 for genomes with identical taxonomies and 1 for genomes from
2252 :     different domains.
2253 :    
2254 :     =cut
2255 : parrello 1.13 #: Return Type $;
2256 : parrello 1.1 sub crude_estimate_of_distance {
2257 : parrello 1.24 my ($self, $genome1, $genome2) = @_;
2258 :     return $self->{sprout}->CrudeDistance($genome1, $genome2);
2259 : parrello 1.1 }
2260 :    
2261 : parrello 1.10 =head3 ec_name
2262 :    
2263 : parrello 1.98 my $name = $sfxlate->ec_name($ec);
2264 : parrello 1.10
2265 :     Return the name of the role identified by the specified EC number.
2266 :    
2267 :     =over 4
2268 :    
2269 :     =item ec
2270 :    
2271 :     EC number of the role whose name is desired.
2272 :    
2273 :     =item RETURN
2274 :    
2275 :     Returns the name of the role identified by the specified EC number.
2276 :    
2277 : parrello 1.13 =back
2278 :    
2279 : parrello 1.10 =cut
2280 : parrello 1.13 #: Return Type $;
2281 : parrello 1.1 sub ec_name {
2282 : parrello 1.24 my ($self, $ec) = @_;
2283 :     return $self->{sprout}->RoleName($ec);
2284 :     }
2285 :    
2286 :     =head3 coupled_to
2287 :    
2288 : parrello 1.98 my @coupled_to = $fig->coupled_to($peg);
2289 : parrello 1.24
2290 :     Return a list of functionally coupled PEGs.
2291 :    
2292 :     =over 4
2293 :    
2294 :     =item peg
2295 :    
2296 :     ID of the protein encoding group whose functionally-coupled proteins are desired.
2297 :    
2298 :     =item RETURN
2299 :    
2300 :     Returns a list of 2-tuples, each consisting of the ID of a coupled PEG and a score. If
2301 :     there are no PEGs functionally coupled to the incoming PEG, it will return an empty
2302 :     list. If the PEG data is not present, it will return C<undef>.
2303 :    
2304 :     =back
2305 :    
2306 :     =cut
2307 :     #: Return Type @@;
2308 :     sub coupled_to {
2309 :     my ($self, $peg) = @_;
2310 :     my %couplets = $self->{sprout}->CoupledFeatures($peg);
2311 : parrello 1.95 Trace(scalar(keys %couplets) . " couplings returned from Sprout.") if T(coupling => 3);
2312 : parrello 1.24 my @retVal = ();
2313 :     for my $otherPeg (sort keys %couplets) {
2314 :     push @retVal, [$otherPeg, $couplets{$otherPeg}];
2315 :     }
2316 :     return @retVal;
2317 : parrello 1.1 }
2318 :    
2319 : overbeek 1.56 # Bruce will have to add abstract coupling data later
2320 :    
2321 :     sub abstract_coupled_to {
2322 :     my ($self, $peg) = @_;
2323 :     return undef;
2324 :     }
2325 :    
2326 : parrello 1.102 =head3 coupled_to_batch
2327 :    
2328 :     my @couplings = $fig->coupled_to_batch(@pegs);
2329 :    
2330 :     Return the functional couplings of one or more features. This method essentially
2331 :     returns the result one would get from calling L</coupled_to> for each individual
2332 :     feature, but saves some overhead because it only queries the coupling server
2333 :     once.
2334 :    
2335 :     =over 4
2336 :    
2337 :     =item pegs
2338 :    
2339 :     A list of the relevant feature IDs.
2340 :    
2341 :     =item RETURN
2342 :    
2343 :     Returns a list of 3-tuples. Each tuple consists of a feature from the input list, a coupled-to
2344 :     feature, and the coupling score.
2345 :    
2346 :     =back
2347 :    
2348 :     =cut
2349 :    
2350 :     sub coupled_to_batch {
2351 :     # Get the parameters.
2352 :     my ($self, @pegs) = @_;
2353 :     # Query the coupling server.
2354 :     my @retVal = FIGRules::NetCouplingData('coupled_to_batch', id1 => \@pegs);
2355 :     # Return the result.
2356 :     return @retVal;
2357 :     }
2358 :    
2359 :     =head3 genome_info
2360 :    
2361 :     my $info = $fig->genome_info();
2362 :    
2363 :     Return an array reference of information from the genome table.
2364 :    
2365 :     =over 4
2366 :    
2367 :     =item RETURN
2368 :    
2369 :     Returns a reference to a list of lists, one list per genome. Each genome's list entry
2370 :     contains the genome ID, organism name, number of base pairs, taxonomic domain,
2371 :     number of PEGs, number of RNAs, and the complete flag, in that order.
2372 :    
2373 :     =back
2374 :    
2375 :     =cut
2376 :    
2377 :     sub genome_info {
2378 :     my ($self) = @_;
2379 :     # Get the desired data from the genome table.
2380 :     my @gData = $self->{sprout}->GetAll(['Genome'], "", [], ['Genome(id)', 'Genome(taxonomy)', 'Genome(dna-size)',
2381 :     'Genome(taxonomy)', 'Genome(pegs)', 'Genome(rnas)',
2382 :     'Genome(complete)']);
2383 :     # We need to convert the taxonomy information to the domain and species name, so we'll
2384 :     # loop through the genomes and reconstruct the list.
2385 :     my @retVal = ();
2386 :     for my $gDatum (@gData) {
2387 :     my @taxa = split /\s*;\s*/, $gDatum->[1];
2388 :     # The domain is always first.
2389 :     my $domain = $taxa[0];
2390 :     # The name is the last three pieces joined together.
2391 :     my $gname = join(" ", @taxa[-3, -2, -1]);
2392 :     # Put the result together.
2393 :     push @retVal, [ $gDatum->[0], $gname, $gDatum->[2], $domain, $gDatum->[4], $gDatum->[5],
2394 :     $gDatum->[6] ]
2395 :     }
2396 :     # Return the result.
2397 :     return \@retVal;
2398 :     }
2399 :    
2400 :    
2401 : parrello 1.10 =head3 feature_annotations
2402 :    
2403 : parrello 1.98 my @descriptors = $sfxlate->feature_annotations($feature, $rawFlag);
2404 : parrello 1.10
2405 :     Return the annotations of a feature.
2406 :    
2407 :     =over 4
2408 :    
2409 :     =item feature
2410 :    
2411 :     ID of the feature whose annotations are desired.
2412 :    
2413 : parrello 1.40 =item rawFlag (optional)
2414 :    
2415 :     If TRUE, the time will be returned as a raw number; otherwise, the time will
2416 :     be returned in human-readable form.
2417 :    
2418 : parrello 1.10 =item RETURN
2419 :    
2420 : parrello 1.13 Returns a list of annotation descriptors. Each descriptor is a 4-tuple with
2421 : parrello 1.10 the following elements.
2422 :    
2423 :     * B<featureID> ID of the relevant feature.
2424 :    
2425 : parrello 1.40 * B<timeStamp> time the annotation was made.
2426 : parrello 1.10
2427 :     * B<user> ID of the user who made the annotation
2428 :    
2429 :     * B<text> text of the annotation.
2430 :    
2431 :     =back
2432 :    
2433 :     =cut
2434 : parrello 1.13 #: Return Type @@;
2435 : parrello 1.1 sub feature_annotations {
2436 : parrello 1.40 my ($self, $feature, $rawFlag) = @_;
2437 :     my @annotations = $self->{sprout}->FeatureAnnotations($feature, $rawFlag);
2438 : parrello 1.13 # Sprout hands back hashes. We need to convert them to tuples.
2439 :     my @retVal = ();
2440 :     for my $tupleHash (@annotations) {
2441 :     push @retVal, [$tupleHash->{featureID}, $tupleHash->{timeStamp}, $tupleHash->{user}, $tupleHash->{text}];
2442 :     }
2443 :     return @retVal;
2444 : parrello 1.1 }
2445 :    
2446 : parrello 1.10 =head3 possibly_truncated
2447 :    
2448 : parrello 1.98 my $flag = $sfxlate->possibly_truncated($fid);
2449 : parrello 1.10
2450 :     Returns TRUE if the indicated feature occurs near the end of a contig,
2451 :     else FALSE. This method calls the FIG.pm method passing in the SFXlate
2452 :     object as the first parameter. As a result, when the FIG method tries to
2453 :     call another FIG method, it will call the corresponding method on this
2454 :     object instead.
2455 :    
2456 :     =over 4
2457 :    
2458 :     =item fid
2459 :    
2460 :     ID of the relevant feature.
2461 :    
2462 :     =item RETURN
2463 :    
2464 :     Returns TRUE if the feature's location is near the end of the containing
2465 :     contig, else FALSE.
2466 :    
2467 :     =back
2468 :    
2469 :     =cut
2470 : parrello 1.13 #: Return Type $;
2471 : parrello 1.10 sub possibly_truncated {
2472 :     my ($self, $fid) = @_;
2473 :     return FIG::possibly_truncated($self, $fid);
2474 :     }
2475 :    
2476 :     =head3 near_end
2477 :    
2478 : parrello 1.98 my $flag = $sfxlate->near_end($genome, $contig, $x);
2479 : parrello 1.10
2480 : parrello 1.40 Return TRUE if the offset I<$x> is near either end of the specified contig,
2481 : parrello 1.10 else FALSE.
2482 :    
2483 :     =over 4
2484 :    
2485 :     =item genome
2486 :    
2487 :     ID of the relevant genome.
2488 :    
2489 :     =item contig
2490 :    
2491 :     ID of the relevant contig.
2492 :    
2493 :     =item x
2494 :    
2495 :     Offset to check for its proximity to either end of the contig.
2496 :    
2497 :     =item RETURN
2498 :    
2499 :     Returns TRUE if the specified offset is within 300 positions of either end
2500 :     of the contig, else FALSE.
2501 :    
2502 :     =back
2503 :    
2504 :     =cut
2505 : parrello 1.13 #: Return Type $;
2506 : parrello 1.10 sub near_end {
2507 :     my ($self, $genome, $contig, $x) = @_;
2508 :     return FIG::near_end($self, $genome, $contig, $x);
2509 :     }
2510 :    
2511 :     =head3 genome_of
2512 :    
2513 : parrello 1.98 my $genomeID = $sprout->genome_of($fid);
2514 : parrello 1.10
2515 :     Return the ID of the genome containing the specified feature.
2516 :    
2517 :     =over 4
2518 :    
2519 :     =item fid
2520 :    
2521 :     ID of the feature whose genome is desired.
2522 :    
2523 :     =item RETURN
2524 :    
2525 :     Returns the ID of the genome that contains the feature.
2526 :    
2527 : parrello 1.13 =back
2528 :    
2529 : parrello 1.10 =cut
2530 : parrello 1.13 #: Return Type $;
2531 : parrello 1.10 sub genome_of {
2532 :     my ($self, $fid) = @_;
2533 : parrello 1.100 my $retVal;
2534 :     # FIG.pm returns undefined if the feature ID is undefined. We need to do the same.
2535 :     if (defined $fid) {
2536 :     $retVal = $self->{sprout}->GenomeOf($fid);
2537 :     }
2538 :     return $retVal;
2539 : parrello 1.10 }
2540 :    
2541 : parrello 1.24 =head3 get_attributes
2542 : parrello 1.10
2543 : parrello 1.98 my @properties = $sfxlate->get_attributes(@parms);
2544 : parrello 1.10
2545 : parrello 1.92 Return a list of n-tuples from the attribute server.
2546 : parrello 1.105
2547 : parrello 1.10
2548 :     =cut
2549 : parrello 1.13 #: Return Type @@;
2550 : parrello 1.24 sub get_attributes {
2551 : parrello 1.93 my ($self, @parms) = @_;
2552 : parrello 1.98 my $ca = $self->attribute_object();
2553 : parrello 1.105 my @retVal;
2554 : parrello 1.111 if ($ca) {
2555 : parrello 1.105 push @retVal, $ca->GetAttributes(@parms);
2556 :     }
2557 :     return @retVal;
2558 : parrello 1.98 }
2559 :    
2560 :     # Deprecated feature_attributes call. Use get_attributes instead.
2561 :     sub feature_attributes {
2562 :     return get_attributes(@_);
2563 :     }
2564 :    
2565 :     =head3 attribute_object
2566 :    
2567 :     my $ca = $fig->attribute_object();
2568 :    
2569 :     Return the attribute object for this Sprout instance. If an attribute
2570 :     object is already attached, it will be returned immediately; otherwise,
2571 :     one will be created.
2572 :    
2573 :     =cut
2574 :    
2575 :     sub attribute_object {
2576 :     # Get the parameters.
2577 :     my ($self) = @_;
2578 :     # See if we have an attribute object already cached.
2579 : parrello 1.92 if (! defined $self->{ca}) {
2580 : parrello 1.98 # We don't, so determine the type of object we need.
2581 : parrello 1.92 if ($FIG_Config::attrURL) {
2582 : parrello 1.98 # Here it's a remote attribute object.
2583 : parrello 1.92 Trace("Remote attribute server $FIG_Config::attrURL chosen.") if T(3);
2584 :     $self->{ca} = RemoteCustomAttributes->new($FIG_Config::attrURL);
2585 : parrello 1.111 } elsif (! $FIG_Config::attrHost) {
2586 :     # Here attributes are disabled.
2587 :     $self->{ca} = "";
2588 : parrello 1.92 } else {
2589 : parrello 1.98 # Here it's a local database.
2590 : parrello 1.92 Trace("Local attribute database $FIG_Config::attrDbName chosen.") if T(3);
2591 :     my $user = ($FIG_Config::arch eq 'win' ? 'self' : scalar(getpwent()));
2592 : parrello 1.105 # Insure we recover from errors.
2593 :     eval {
2594 :     $self->{ca} = CustomAttributes->new(user => $user);
2595 :     };
2596 :     if ($@) {
2597 :     Trace("Attribute connection error: $@") if T(0);
2598 :     }
2599 : parrello 1.92 }
2600 :     }
2601 : parrello 1.98 # Return the cached object.
2602 :     return $self->{ca};
2603 : parrello 1.24 }
2604 :    
2605 : parrello 1.98 =head3 get_peg_keys
2606 :    
2607 :     my @list = $fig->get_peg_keys();
2608 :    
2609 :     Return a list of the attribute keys that only apply to pegs. This list is
2610 :     essentially all of the keys in the C<peg> group.
2611 :    
2612 :     =cut
2613 :    
2614 :     sub get_peg_keys {
2615 :     # Get the parameters.
2616 :     my ($self) = @_;
2617 :     # Get the attribute object.
2618 :     my $ca = $self->attribute_object();
2619 : parrello 1.111 # Compute the list of peg keys. Note we may not have a valid attribute
2620 :     # object, in which case we return nothing.
2621 :     my @retVal;
2622 :     if ($ca) {
2623 :     @retVal = $ca->GetAttributeKeys('peg');
2624 :     }
2625 : parrello 1.98 # Return the result.
2626 :     return @retVal;
2627 : parrello 1.1 }
2628 :    
2629 : parrello 1.24 =head3 get_translation
2630 :    
2631 : parrello 1.98 my $translation = $sfxlate->get_translation($feature);
2632 : parrello 1.24
2633 :     Return the protein sequence for the specified feature.
2634 :    
2635 :     =over 4
2636 :    
2637 :     =item feature
2638 :    
2639 :     ID of the feature whose protein sequence is desired.
2640 :    
2641 :     =item RETURN
2642 :    
2643 :     Returns the protein sequence for the specified feature.
2644 :    
2645 :     =back
2646 :    
2647 :     =cut
2648 :     #: Return Type $;
2649 : parrello 1.1 sub get_translation {
2650 : parrello 1.24 my ($self, $feature) = @_;
2651 :     return $self->{sprout}->FeatureTranslation($feature);
2652 : parrello 1.1 }
2653 :    
2654 : parrello 1.116 =head3 subsystems_for_peg_complete
2655 : parrello 1.106
2656 : parrello 1.116 my @list = $fig->subsystems_for_peg_complete($peg);
2657 : parrello 1.106
2658 :     Return information about the subsystems in which the specified feature
2659 :     participate.
2660 :    
2661 :     =over 4
2662 :    
2663 : parrello 1.115 =item peg
2664 : parrello 1.106
2665 : parrello 1.115 ID of the relevant feature or a reference to a list of the relevant features.
2666 : parrello 1.106
2667 :     =item RETURN
2668 :    
2669 :     Returns a list of 4-tuples. Each 4-tuple will contain a subsystem name, the role the indicated
2670 :     feature plays in the subsystem, the variant code, and a flag that is TRUE if the role is auxiliary.
2671 :    
2672 :     =back
2673 :    
2674 :     =cut
2675 :    
2676 :     sub subsystems_for_peg_complete {
2677 :     # Get the parameters.
2678 :     my ($self, $peg) = @_;
2679 : parrello 1.115 # Insure that we're dealing with a list of pegs.
2680 :     my $pegs = (ref($peg) eq 'ARRAY' ? $peg : [$peg]);
2681 :     # Get the sprout object.
2682 :     my $sprout = $self->{sprout};
2683 : parrello 1.106 # Declare the return variable.
2684 : parrello 1.115 my @retVal;
2685 :     # Loop through the PEGs.
2686 :     for my $fid (@$pegs) {
2687 :     # Get the genome ID.
2688 :     my $genomeID = $self->genome_of($fid);
2689 :     # Get the desired 4-tuples.
2690 :     push @retVal, $sprout->GetAll([qw(ContainsFeature IsRoleOf HasSSCell ParticipatesIn OccursInSubsystem)],
2691 :     "ContainsFeature(to-link) = ? AND ParticipatesIn(from-link) = ? AND " .
2692 :     "OccursInSubsystem(from-link) = IsRoleOf(from-link)",
2693 :     [$fid, $genomeID],
2694 :     [qw(HasSSCell(from-link) IsRoleOf(from-link) ParticipatesIn(variant-code)
2695 :     OccursInSubsystem(auxiliary))]);
2696 :     }
2697 : parrello 1.106 # Return the result.
2698 :     return @retVal;
2699 :     }
2700 :    
2701 : parrello 1.116 =head3 subsystems_for_pegs_complete
2702 :    
2703 :     my %pegHash = $fig->subsystems_for_pegs_complete(\@pegs, $aux_flag);
2704 :    
2705 :     Return a hash that maps the incoming pegs to the list of subsystems,
2706 :     roles and variants that the pegs appear in. Each peg is mapped to an
2707 :     array of 3-tuples. Each 3-tuple contains a subsystem name, a role name,
2708 :     and a variant code. Thus, for each peg we will get a list of the
2709 :     subsystems in which it appears along with the relevant role and variant.
2710 :    
2711 :     =over 4
2712 :    
2713 :     =item pegs
2714 :    
2715 :     Reference to a list of feature IDs.
2716 :    
2717 :     =item aux_flag
2718 :    
2719 :     TRUE if auxiliary roles are to be included in the results, else FALSE.
2720 :    
2721 :     =item RETURN
2722 :    
2723 :     Returns a hash that maps each incoming feature ID to a list of its
2724 :     subsystem roles. Each role is represented by a 3-tuple consisting of
2725 :     the subsystem name, the role name, and the relevant variant code.
2726 :    
2727 :     =back
2728 :    
2729 :     =cut
2730 :    
2731 :     sub subsystems_for_pegs_complete {
2732 :     # Get the parameters.
2733 :     my ($self, $pegs, $aux_flag) = @_;
2734 :     # Get the sprout object.
2735 :     my $sprout = $self->{sprout};
2736 :     # We need to compute the filter string for this query.
2737 :     my $filter = "ContainsFeature(to-link) = ? AND ParticipatesIn(from-link) = ? AND " .
2738 :     "OccursInSubsystem(from-link) = IsRoleOf(from-link)";
2739 :     # If we do NOT want auxiliary roles, we add an extra filter condition.
2740 :     if (! $aux_flag) {
2741 :     $filter .= " AND OccursInSubsystem(auxiliary) = 0";
2742 :     }
2743 :     # Declare the return variable.
2744 :     my %retVal;
2745 :     # Loop through the PEGs.
2746 :     for my $fid (@$pegs) {
2747 :     # Get the genome ID.
2748 :     my $genomeID = $self->genome_of($fid);
2749 :     # Get the desired 3-tuples.
2750 :     my @list = $sprout->GetAll([qw(ContainsFeature IsRoleOf HasSSCell
2751 :     ParticipatesIn OccursInSubsystem)],
2752 :     $filter, [$fid, $genomeID],
2753 :     [qw(HasSSCell(from-link) IsRoleOf(from-link)
2754 :     ParticipatesIn(variant-code))]);
2755 :     # Store the list in the return hash.
2756 :     $retVal{$fid} = \@list;
2757 :     }
2758 :    
2759 :     # Return the result.
2760 :     return %retVal;
2761 :     }
2762 :    
2763 : parrello 1.98 =head3 subsystems_for_pegs
2764 :    
2765 :     my @list = $fig->subsystems_for_pegs(\@pegs, $noaux);
2766 :    
2767 :     Return information about the subsystems in which the features in a list
2768 :     participate. For each incoming feature, this method will return a list of
2769 :     2-tuples, the first element being the subsystem name and the second being
2770 :     the feature's role in that subsystem.
2771 :    
2772 :     =over 4
2773 :    
2774 :     =item pegs
2775 :    
2776 :     Reference to a list of features whose subsystem information is desired.
2777 :    
2778 :     =item noaux (optional)
2779 :    
2780 :     If TRUE, subsystems in which a feature has an auxiliary role will be omitted
2781 : parrello 1.106 from the results.
2782 : parrello 1.98
2783 :     =item RETURN
2784 :    
2785 :     Returns a list that contains feature IDs followed by list references. For example,
2786 :     if three features were specified in the parameters, the list would have six
2787 :     elements. The first element will be a feature ID, the second will be a reference
2788 :     to a list of 2-tuples, the third will be another feature ID, the fourth will be
2789 :     a reference to another list of 2-tuples, and so on. Each 2-tuple contains the
2790 :     name of a subsystem followed by a role. The 2-tuples contain the subsystem information
2791 :     for the preceding feature.
2792 :    
2793 :     =back
2794 :    
2795 :     =cut
2796 :    
2797 :     sub subsystems_for_pegs {
2798 :     # Get the parameters.
2799 :     my ($self, $pegs, $noaux) = @_;
2800 : parrello 1.107 # If we're filtering out auxiliary roles, create a filter clause for that.
2801 : parrello 1.111 my $auxFilter = ($noaux ? "AND OccursInSubsystem(auxiliary) = 0" : "");
2802 : parrello 1.98 # Build a hash of the result data.
2803 :     my %retVal = ();
2804 :     # Loop through the pegs.
2805 :     for my $peg (@{$pegs}) {
2806 :     # Get this peg's subsystem data. It will come back as a series of 2-tuples.
2807 : parrello 1.107 my @subPairs = $self->{sprout}->GetAll([qw(ContainsFeature IsRoleOf HasSSCell OccursInSubsystem)],
2808 :     "ContainsFeature(to-link) = ? AND " .
2809 : parrello 1.111 "OccursInSubsystem(from-link) = IsRoleOf(from-link) $auxFilter", [$peg],
2810 : parrello 1.98 [qw(HasSSCell(from-link) IsRoleOf(from-link))]);
2811 :     # Add it to the hash we're building.
2812 :     $retVal{$peg} = \@subPairs;
2813 :     }
2814 :     # Return the result. We convert the hash to a list.
2815 :     return (%retVal);
2816 :     }
2817 : parrello 1.1
2818 : parrello 1.106
2819 : parrello 1.119 =head3 families_containing_peg
2820 :    
2821 :     my @fams = $fig->families_containing_peg($fid);
2822 :    
2823 :     Return a list of the names of the FIGfams containing the specified
2824 :     feature.
2825 :    
2826 :     =over 4
2827 :    
2828 :     =item fid
2829 :    
2830 :     ID of the feature of interest.
2831 :    
2832 :     =item RETURN
2833 :    
2834 :     Returns a list of FIGfam IDs for families containing the feature. If the feature
2835 :     is not in any FIGfam, it returns an empty list. Currently, no feature can be in
2836 :     more than one FIGfam, but this is not necessarily guaranteed for the future.
2837 :    
2838 :     =back
2839 :    
2840 :     =cut
2841 :    
2842 :     sub families_containing_peg {
2843 :     # Get the parameters.
2844 :     my ($self, $fid) = @_;
2845 :     # Get a lightweight FigFams object.
2846 :     my $figfam_data = &FIG::get_figfams_data();
2847 :     my $ffs = new FFs($figfam_data, $self);
2848 :     my @retVal = $ffs->families_containing_peg($fid);
2849 :     # Return the result.
2850 :     return @retVal;
2851 :     }
2852 :    
2853 :    
2854 : parrello 1.13 =head3 merged_related_annotations
2855 :    
2856 : parrello 1.98 my @annotations = $sfxlate->merged_related_annotations(\@list);
2857 : parrello 1.13
2858 :     Returns a merged list of the annotations for the features in a list. Each annotation is
2859 :     represented by a 4-tuple of the form C<($fid, $timestamp, $userID, $annotation)>, where
2860 :     C<$fid> is the ID of a feature, C<$timestamp> is the time at which the annotation was made,
2861 :     C<$userID> is the ID of the user who made the annotation, and C<$annotation> is the annotation
2862 :     text. The list is sorted by the timestamp.
2863 :    
2864 :     =over 4
2865 :    
2866 :     =item list
2867 :    
2868 :     List of the IDs for the features whose annotations are desired.
2869 :    
2870 :     =item RETURN
2871 :    
2872 :     Returns a list of annotation descriptions sorted by the annotation time.
2873 :    
2874 :     =back
2875 :    
2876 :     =cut
2877 :     #: Return Type @@;
2878 : parrello 1.1 sub merged_related_annotations {
2879 : parrello 1.24 my ($self, $list) = @_;
2880 :     my @retVal = $self->{sprout}->MergedAnnotations($list);
2881 :     return @retVal;
2882 : parrello 1.1 }
2883 :    
2884 : parrello 1.98 =head3 run
2885 :    
2886 :     SFXlate::run($cmd);
2887 :    
2888 :     or
2889 :    
2890 :     $fig->run($cmd);
2891 :    
2892 :     =head3 run
2893 :    
2894 :     FIG::run($cmd);
2895 :    
2896 :     or
2897 :    
2898 :     $fig->run($cmd);
2899 :    
2900 :     Run a command. If the command fails, the error will be traced.
2901 :    
2902 :     =over 4
2903 :    
2904 :     =item cmd
2905 :    
2906 :     Text of the command to run. The C<system> function will be used to
2907 :     invoke the command.
2908 :    
2909 :     =back
2910 :    
2911 :     =cut
2912 :    
2913 :     sub run {
2914 :     shift if UNIVERSAL::isa($_[0],__PACKAGE__);
2915 :     my($cmd) = @_;
2916 :    
2917 :     if ($ENV{FIG_VERBOSE}) {
2918 :     my @tmp = `date`;
2919 :     chomp @tmp;
2920 :     print STDERR "$tmp[0]: running $cmd\n";
2921 :     }
2922 :     Trace("Running command: $cmd") if T(3);
2923 :     (system($cmd) == 0) || Confess("FAILED: $cmd");
2924 :     }
2925 :    
2926 :     =head3 run_gathering_output
2927 :    
2928 :     my @lines = SFXlate::run_gathering_output($cmd, @args);
2929 :    
2930 :     or
2931 :    
2932 :     my @lines = $fig->run_gathering_output($cmd, @args);
2933 :    
2934 :     or
2935 :    
2936 :     my $text = $fig->run_gathering_output($cmd, @args);
2937 :    
2938 :     Run a command, gathering the output. This is similar to the backtick
2939 :     operator, but it does not invoke the shell. Note that the argument list
2940 :     must be explicitly passed in the parameter lit.
2941 :    
2942 :     If the command fails, the error will be traced.
2943 :    
2944 :     =over 4
2945 :    
2946 :     =item cmd
2947 :    
2948 :     Name of the command to run.
2949 :    
2950 :     =item args
2951 :    
2952 :     List of the arguments to the command. Each argument must be
2953 :     passed as a separate element of this list.
2954 :    
2955 :     =item RETURN
2956 :    
2957 :     In array context, returns a list of output lines. In a scalar context,
2958 :     returns the command output as a single string.
2959 :    
2960 :     =back
2961 :    
2962 :     =cut
2963 :    
2964 :     sub run_gathering_output {
2965 :     shift if UNIVERSAL::isa($_[0],__PACKAGE__);
2966 :     my($cmd, @args) = @_;
2967 :    
2968 :     #
2969 :     # Run the command in a safe fork-with-pipe/exec.
2970 :     #
2971 :    
2972 :     my $pid = open(PROC_READ, "-|");
2973 :    
2974 :     if ($pid == 0) {
2975 :     exec { $cmd } $cmd, @args;
2976 : parrello 1.101 }
2977 :     if ($pid == 0) {
2978 :     # This next statement will only execute if the exec function fails. The goofy
2979 :     # use of the redundant IF is to avoid a compiler warning.
2980 : parrello 1.98 Confess("Could not execute $cmd @args: $!");
2981 :     }
2982 :     if (wantarray) {
2983 :     my @out;
2984 :     while (<PROC_READ>) {
2985 :     push(@out, $_);
2986 :     }
2987 :     if (!close(PROC_READ)) {
2988 :     Confess("FAILED: $cmd @args with error return $?");
2989 :     }
2990 :     return @out;
2991 :     } else {
2992 :     my $out = '';
2993 :    
2994 :     while (<PROC_READ>) {
2995 :     $out .= $_;
2996 :     }
2997 :     if (!close(PROC_READ)) {
2998 :     Confess("FAILED: $cmd @args with error return $?");
2999 :     }
3000 :     return $out;
3001 :     }
3002 :     }
3003 :    
3004 : parrello 1.15 =head3 assign_function
3005 :    
3006 : parrello 1.98 my $ok = $sfxlate->assign_function($fid,$user,$function);
3007 : parrello 1.15
3008 :     Assign a function to the specified feature. In Sprout, an assigned function
3009 :     is a special type of annotation. The Sprout methods automatically convert
3010 :     the function text into the correct structured formay.
3011 :    
3012 :     =over 4
3013 :    
3014 :     =item fid
3015 :    
3016 :     ID of the feature to receive the assignment.
3017 :    
3018 :     =item user
3019 :    
3020 :     Name of the user making the assignment.
3021 :    
3022 :     =item function
3023 :    
3024 :     Text of the functional assignment.
3025 :    
3026 :     =back
3027 :    
3028 :     =cut
3029 :     #: Return Type $;
3030 :     sub assign_function {
3031 :     my ($self, $fid, $user, $function) = @_;
3032 :     my $retVal = $self->{sprout}->AssignFunction($fid, $user, $function);
3033 :     return $retVal;
3034 :     }
3035 :    
3036 : parrello 1.24 =head3 neighborhood_of_role
3037 :    
3038 : parrello 1.98 my @roleList = $sprout->neighborhood_of_role($role);
3039 : parrello 1.24
3040 :     Returns a list of the roles that occur in the same diagram as the specified role. Because
3041 :     diagrams and roles are in a many-to-many relationship with each other, the list is
3042 :     essentially the set of roles from all of the maps that contain the incoming role. Such
3043 :     roles are considered neighbors because they are used together in cellular subsystems.
3044 :    
3045 :     =over 4
3046 :    
3047 :     =item role
3048 :    
3049 :     ID of the role whose neighbors are desired.
3050 :    
3051 :     =item RETURN
3052 :    
3053 :     Returns a list containing the IDs of the roles that are related to the incoming role.
3054 :    
3055 :     =back
3056 :    
3057 :     =cut
3058 :     #: Return Type @;
3059 : parrello 1.1 sub neighborhood_of_role {
3060 : parrello 1.24 my ($self, $role) = @_;
3061 :     my @retVal = $self->{sprout}->RoleNeighbors($role);
3062 : parrello 1.6 Trace("roles found = " . join(", ", @retVal)) if T(4);
3063 : parrello 1.24 return @retVal;
3064 : parrello 1.1 }
3065 :    
3066 : parrello 1.24 =head3 org_and_color_of
3067 :    
3068 : parrello 1.98 my ($orgName, $color) = $sfxlate->org_and_color_of($fid);
3069 : parrello 1.24
3070 :     Return the name of the organism to which the specified feature belongs.
3071 :     The organism name is generally the genus and species followed by the
3072 :     unique characterization.
3073 :    
3074 :     =over 4
3075 :    
3076 :     =item fid
3077 :    
3078 :     ID of the feature whose organism information is desired.
3079 :    
3080 :     =item RETURN
3081 :    
3082 :     Returns a 2-tuple. The first element is a string consisting of the genus,
3083 :     species, and unique characterization of the specified feature's organism.
3084 :     The second element is an HTML color code based on the domain.
3085 :    
3086 :     =back
3087 :    
3088 :     =cut
3089 :     #: Return Type @;
3090 : parrello 1.1 sub org_and_color_of {
3091 : parrello 1.24 my ($self, $fid) = @_;
3092 :     my $genome = $self->{sprout}->GenomeOf($fid);
3093 :     my @taxonomy = $self->{sprout}->Taxonomy($genome);
3094 :     my $color = FIG::domain_color($taxonomy[0]);
3095 :     my $gs = $taxonomy[$#taxonomy];
3096 :     return ($gs, $color);
3097 : parrello 1.1 }
3098 :    
3099 : parrello 1.13 =head3 org_of
3100 :    
3101 : parrello 1.98 my $orgName = $sfxlate->org_of($fid);
3102 : parrello 1.13
3103 :     Return the name of the organism to which the specified feature belongs.
3104 :     The organism name is generally the genus and species followed by the
3105 :     unique characterization.
3106 :    
3107 :     =over 4
3108 :    
3109 :     =item fid
3110 :    
3111 :     ID of the feature whose organism information is desired.
3112 :    
3113 :     =item RETURN
3114 :    
3115 :     Returns the genus, species, and unique characterization of the specified
3116 :     feature's organism. The result is returned as a single string.
3117 :    
3118 :     =back
3119 :    
3120 :     =cut
3121 :     #: Return Type $;
3122 : parrello 1.1 sub org_of {
3123 : parrello 1.24 my ($self, $fid) = @_;
3124 : parrello 1.104 my $genome = $self->{sprout}->GenomeOf($fid);
3125 :     my $retVal = $self->{sprout}->GenusSpecies($genome);
3126 : parrello 1.13 return $retVal;
3127 : parrello 1.1 }
3128 :    
3129 : parrello 1.104 =head3 to_structured_english
3130 :    
3131 :     my ($ev_code_list, $subsys_list, $english_string) = $fig->to_structured_english($fig, $peg, $escape_flag);
3132 :    
3133 :     Create a structured English description of the evidence codes for a PEG,
3134 :     in either HTML or text format. In addition to the structured text, we
3135 :     also return the subsystems and evidence codes for the PEG in list form.
3136 :    
3137 :     =over 4
3138 :    
3139 :     =item peg
3140 :    
3141 :     ID of the protein or feature whose evidence is desired.
3142 :    
3143 :     =item escape_flag
3144 :    
3145 :     TRUE if the output text should be HTML, else FALSE
3146 :    
3147 :     =item RETURN
3148 :    
3149 :     Returns a three-element list. The first element is a reference to a list of evidence codes,
3150 :     the second is a list of the subsystem containing the peg, and the third is the readable
3151 :     text description of the evidence.
3152 :    
3153 :     =back
3154 :    
3155 :     =cut
3156 :    
3157 :     sub to_structured_english {
3158 :     my ($self, $peg, $escape_flag) = @_;
3159 :     return FIGRules::to_structured_english($self, $peg, $escape_flag);
3160 :     }
3161 :    
3162 : parrello 1.102
3163 : parrello 1.19 =head3 peg_to_subsystems
3164 :    
3165 : parrello 1.98 my @subsystems = $sfxlate->peg_to_subsystems($fid);
3166 : parrello 1.19
3167 :     Return a list of the subsystems in which the specified feature participates.
3168 :     In the Sprout system, a subsystem is connected to features indirectly
3169 :     via the B<SSCell> object.
3170 :    
3171 :     =over 4
3172 :    
3173 :     =item fid
3174 :    
3175 :     ID of the feature whose subsystems are desired.
3176 :    
3177 :     =item RETURN
3178 :    
3179 :     Returns a list of the IDs of the subsystems containing the specified
3180 :     feature.
3181 :    
3182 :     =back
3183 :    
3184 :     =cut
3185 :     #: Return Type @;
3186 : parrello 1.1 sub peg_to_subsystems {
3187 : parrello 1.24 my ($self, $fid) = @_;
3188 : parrello 1.26 my @retVal = $self->{sprout}->SubsystemList($fid);
3189 : parrello 1.83 return @retVal;
3190 : parrello 1.1 }
3191 :    
3192 : parrello 1.19 =head3 hypo
3193 :    
3194 : parrello 1.98 my $flag = $sfxlate->hypo($func);
3195 : parrello 1.19
3196 :     Return TRUE if the specified functional role is hypothetical, else FALSE.
3197 :     Hypothetical functional roles are identified by key words in the text,
3198 :     such as I<hypothesis>, I<predicted>, or I<glimmer> (among others).
3199 :    
3200 :     =over 4
3201 :    
3202 :     =item func
3203 :    
3204 :     Text of the functional role whose nature is to be determined.
3205 :    
3206 :     =item RETURN
3207 :    
3208 :     Returns TRUE if the role is hypothetical, else FALSE.
3209 :    
3210 :     =back
3211 :    
3212 :     =cut
3213 :     #: Return Type $;
3214 : parrello 1.1 sub hypo {
3215 : parrello 1.24 my ($self, $func) = @_;
3216 :     return FIG::hypo($func);
3217 : parrello 1.1 }
3218 :    
3219 : parrello 1.19 =head3 related_by_func_sim
3220 :    
3221 : parrello 1.98 my @fids = $sfxlate->related_by_func_sim($fid, $user);
3222 : parrello 1.19
3223 :     Return a list of the features that have a similar function to the specified
3224 :     feature as determined by the specified user. This method looks at the
3225 :     bidirectional best hits of the incoming feature and returns a list of the
3226 :     ones who have the same functional assignment with respect to the specified
3227 :     user.
3228 :    
3229 :     =over 4
3230 :    
3231 :     =item fid
3232 :    
3233 :     ID of the feature whose similarities are desired.
3234 :    
3235 :     =item user
3236 :    
3237 :     ID of the user whose functional assignments are to be used. The functional
3238 :     assignments chosen will be the most recent ones by the specified user or
3239 :     a user trusted by the specified user. If no user is specified, only the
3240 :     user C<FIG> will be considered.
3241 :    
3242 :     =item RETURN
3243 :    
3244 :     Returns a list of the IDs of the desired features.
3245 :    
3246 : parrello 1.26 =back
3247 : parrello 1.19
3248 :     =cut
3249 :     #: Return Type @;
3250 : parrello 1.1 sub related_by_func_sim {
3251 : parrello 1.24 my ($self, $fid, $user) = @_;
3252 :     my $function = $self->{sprout}->FunctionOf($fid, $user);
3253 :     my @retVal = ();
3254 :     if (! $self->hypo($function)) {
3255 :     push @retVal, $self->{sprout}->RelatedFeatures($fid, $function, $user);
3256 :     }
3257 :     return @retVal;
3258 : parrello 1.1 }
3259 :    
3260 : parrello 1.19 =head3 sort_fids_by_taxonomy
3261 :    
3262 : parrello 1.98 my @sortedFids = $sfxlate->sort_fids_by_taxonomy(@fidList);
3263 : parrello 1.19
3264 :     Sort the specified list of features according to the taxonomy of the
3265 :     feature's genome. The intent is to group features belonging to similar
3266 :     organisms.
3267 :    
3268 :     =over 4
3269 :    
3270 :     =item fidList
3271 :    
3272 :     List of feature IDs.
3273 :    
3274 :     =item RETURN
3275 :    
3276 :     Returns a list containing the same feature IDs ordered by their respective
3277 :     taxonomies.
3278 :    
3279 :     =back
3280 :    
3281 :     =cut
3282 :     #: Return Type @;
3283 : parrello 1.1 sub sort_fids_by_taxonomy {
3284 : parrello 1.12 my ($self, @fidList) = @_;
3285 :     return $self->{sprout}->TaxonomySort(\@fidList);
3286 : parrello 1.1 }
3287 :    
3288 : parrello 1.19 =head3 translatable
3289 :    
3290 : parrello 1.98 my $flag = $sfxlate->translatable($fid);
3291 : parrello 1.19
3292 :     Return TRUE if the specified feature has a translation, else FALSE.
3293 :    
3294 :     =over 4
3295 :    
3296 :     =item fid
3297 :    
3298 :     ID of the feature whose translatability is to be determined.
3299 :    
3300 :     =item RETURN
3301 :    
3302 : parrello 1.85 Returns TRUE if the feature exists in the database, else FALSE.
3303 : parrello 1.19
3304 :     =back
3305 :    
3306 :     =cut
3307 :     #: Return Type $;
3308 : parrello 1.1 sub translatable {
3309 : parrello 1.24 my ($self, $fid) = @_;
3310 :     my $feature = $self->{sprout}->GetEntity('Feature', $fid);
3311 :     my $retVal = 0;
3312 :     if ($feature) {
3313 : parrello 1.85 $retVal = 1;
3314 : parrello 1.24 }
3315 :     return $retVal;
3316 : parrello 1.1 }
3317 :    
3318 : parrello 1.19 =head3 peg_links
3319 :    
3320 : parrello 1.98 my @linkList = $sfxlate->peg_links($fid);
3321 : parrello 1.19
3322 :     List the links associated with a feature. These are generally HTML
3323 :     hyperlinks to pages with information about the feature or the feature's
3324 :     associated protein.
3325 :    
3326 :     =over 4
3327 :    
3328 :     =item fid
3329 :    
3330 :     ID of the feature whose links are desired.
3331 :    
3332 :     =item RETURN
3333 :    
3334 :     Returns a list of the HTML links stored for the specified feature, sorted
3335 :     more or less by the link's target URL.
3336 :    
3337 :     =back
3338 :    
3339 :     =cut
3340 :     #: Return Type @;
3341 : parrello 1.3 sub peg_links {
3342 : parrello 1.24 my ($self, $fid) = @_;
3343 :     my @links = $self->{sprout}->FeatureLinks($fid);
3344 :     return sort { $a =~ /\>([^\<]+)\<\/a\>/; my $l1 = $1;
3345 :     $b =~ /\>([^\<]+)\<\/a\>/; my $l2 = $1;
3346 :     $l1 cmp $l2 } @links;
3347 : parrello 1.3 }
3348 :    
3349 : parrello 1.19 =head3 get_gbrowse_feature_link
3350 :    
3351 : parrello 1.98 my $url = $sfxlate->get_gbrowse_feature_link;
3352 : olson 1.16
3353 : parrello 1.19 Compute the URL required to pull up a Gbrowse page for the the
3354 :     specified feature. In order to do this, we need to pull out
3355 :     the ID of the feature's Genome, its contig ID, and some rough
3356 :     starting and stopping offsets.
3357 :    
3358 :     =over 4
3359 :    
3360 :     =item feat
3361 :    
3362 :     ID of the feature whose Gbrowse URL is desired.
3363 :    
3364 :     =item RETURN
3365 :    
3366 :     Returns a GET-style URL for the Gbrowse CGI, with parameters
3367 :     specifying the genome ID, contig ID, starting offset, and
3368 :     stopping offset.
3369 :    
3370 :     =back
3371 :    
3372 :     =cut
3373 :     #: Return Type $;
3374 :     sub get_gbrowse_feature_link {
3375 : olson 1.18 my($self, $feat) = @_;
3376 : olson 1.16
3377 :     my $genome;
3378 :    
3379 : parrello 1.19 if ($feat =~ /fig\|(\d+\.\d+)/) {
3380 :     $genome = $1;
3381 :     } else {
3382 : olson 1.16 return undef;
3383 :     }
3384 :    
3385 :     my $gs = $self->{sprout}->GenusSpecies($genome);
3386 :     my $loc = $self->{sprout}->FeatureLocation($feat);
3387 :    
3388 : overbeek 1.33 my($start, $stop, $contig);
3389 :    
3390 :     #
3391 :     # Eval this code to catch possible badness in the database loads.
3392 :     #
3393 : parrello 1.40
3394 : overbeek 1.33 eval {
3395 :    
3396 : parrello 1.37 $start = $self->beg_of($loc);
3397 :     $stop = $self->end_of($loc);
3398 :     $contig = $self->contig_of($loc);
3399 : overbeek 1.33 };
3400 :    
3401 :     if ($@)
3402 :     {
3403 : parrello 1.37 warn "Error in getting location information for feature $feat\n$@\n";
3404 :     return undef;
3405 : overbeek 1.33 }
3406 : olson 1.16
3407 :     my $mid = int(($start + $stop) / 2);
3408 :    
3409 :     my $chunk_len = 20000;
3410 :     my $max_feature = 40000;
3411 :     #
3412 :     # Make sure large features show up.
3413 :     #
3414 :     # However, if the feature is larger than max_feature,
3415 :     # show the start.
3416 :     #
3417 :     my $feat_len = abs($stop - $start);
3418 :    
3419 : parrello 1.19 if ($feat_len > $chunk_len) {
3420 :     if ($feat_len > $max_feature) {
3421 :     $chunk_len = $max_feature;
3422 :     } else {
3423 :     $chunk_len = $feat_len + 100;
3424 :     }
3425 : olson 1.16 }
3426 :    
3427 :     my($show_start, $show_stop);
3428 : parrello 1.19 if ($chunk_len == $max_feature) {
3429 :     $show_start = $start - 300;
3430 :     } else {
3431 :     $show_start = $mid - int($chunk_len / 2);
3432 : olson 1.16 }
3433 : parrello 1.19 if ($show_start < 1) {
3434 :     $show_start = 1;
3435 : olson 1.16 }
3436 :     $show_stop = $show_start + $chunk_len - 1;
3437 :    
3438 :     my $clen = $self->{sprout}->ContigLength($contig);
3439 : parrello 1.19 if ($show_stop > $clen) {
3440 :     $show_stop = $clen;
3441 : olson 1.16 }
3442 :    
3443 :     my $seg_id = $contig;
3444 :     $seg_id =~ s/:/--/g;
3445 :     return ("/gbrowse.cgi/GB_$genome?ref=$seg_id&start=$show_start&stop=$show_stop");
3446 : parrello 1.19 }
3447 :    
3448 :    
3449 : parrello 1.23 =head3 subsystems_for_peg
3450 : parrello 1.19
3451 : parrello 1.98 my @ssList = $sfxlate->subsystems_for_peg($fid);
3452 : parrello 1.19
3453 : parrello 1.106 Return a list of the subsystems in which a specified feature participates.unlike L</peg_to_subsystems>,
3454 :     this method returns the role the feature plays in the subsystem in addition to the subsystem name.
3455 : parrello 1.19
3456 :     =over 4
3457 : olson 1.16
3458 : parrello 1.19 =item fid
3459 :    
3460 :     ID of the feature whose subsystem list is desired.
3461 :    
3462 :     =item RETURN
3463 :    
3464 :     Returns a list of 2-tuples, one per subsystem. Each tuple consists of the
3465 : parrello 1.35 subsystem ID followed by a role the input feature plays in that subsystem.
3466 : parrello 1.19
3467 : parrello 1.24 =back
3468 : parrello 1.19
3469 :     =cut
3470 :     #: Return Type @@;
3471 : parrello 1.43 sub subsystems_for_peg {
3472 : parrello 1.107 # Get the parameters.
3473 :     my ($self, $featureID) = @_;
3474 :     my $sprout = $self->{sprout};
3475 :     # Get the subsystem list.
3476 :     my @subsystems = $sprout->GetAll([qw(ContainsFeature HasSSCell IsRoleOf)],
3477 :     "ContainsFeature(to-link) = ?", [$featureID],
3478 :     [qw(HasSSCell(from-link) IsRoleOf(from-link))]);
3479 :     # Create the return value.
3480 :     return @subsystems;
3481 : parrello 1.3 }
3482 : parrello 1.19
3483 :     =head3 bbhs
3484 :    
3485 : parrello 1.98 my @bbhList = $sfxlate->bbhs($peg, $cutoff);
3486 : parrello 1.19
3487 :     Return a list of the specified feature's bidirectional best hits. All the
3488 :     hits returned will have a score lower than the specified cutoff score.
3489 :    
3490 :     =over 4
3491 :    
3492 :     =item peg
3493 :    
3494 :     ID of the feature whose BBHs are desired.
3495 :    
3496 :     =item cutoff
3497 :    
3498 : parrello 1.76 Maximum permissible score to be accepted. If omitted, 1e-10 is used.
3499 : parrello 1.19
3500 :     =item RETURN
3501 :    
3502 :     Returns a list of 2-tuples. Each tuple will consist of a feature ID followed
3503 :     by a score. The identified feature will be a bidirectional best hit of the
3504 :     incoming feature and the score is guaranteed to be no greater than the
3505 :     cutoff value.
3506 :    
3507 : parrello 1.82 =back
3508 : parrello 1.19
3509 :     =cut
3510 :     #: Return Type @@;
3511 : parrello 1.8 sub bbhs {
3512 : parrello 1.24 my ($self,$peg,$cutoff) = @_;
3513 : parrello 1.76 if (! defined $cutoff) { $cutoff = 1e-10; }
3514 : parrello 1.9 my %bbhMap = $self->{sprout}->LowBBHs($peg, $cutoff);
3515 : parrello 1.24 my @retVal = ();
3516 : parrello 1.65 for my $peg2 (keys %bbhMap) {
3517 :     Trace("Pushing BBH from $peg to $peg2.") if T(4);
3518 :     push @retVal, [$peg2, $bbhMap{$peg2}];
3519 : parrello 1.24 }
3520 :     return @retVal;
3521 : parrello 1.8 }
3522 :    
3523 : parrello 1.20 =head3 get_pathogen_groups
3524 :    
3525 : parrello 1.98 my @groups = $sfxlate->get_pathogen_groups();
3526 : parrello 1.20
3527 :     Return a list of all the pathogen groups and the genomes in them.
3528 :    
3529 :     This method is a special case of the Sprout B<GetGroups> function. It
3530 :     returns a list of 2-tuples, with each tuple containing the name of a
3531 :     group followed by a list of the genomes in the group, each genome
3532 :     being represented in the list by its ID. Note that the genome IDs are
3533 :     a full three layers deep. The code to get the first genome ID for the
3534 :     third group is C<< $groups[2]->[1]->[0] >>. The C<2> indicates we are
3535 :     asking for the third group, the C<1> means we're asking for a group member
3536 :     rather than the group name, and the C<0> means we want the first
3537 :     group member. Note that a single genome could be in more than one group,
3538 :     and that in fact many genomes are not in a group at all.
3539 : parrello 1.24
3540 : parrello 1.20 =cut
3541 :     #: Return Type @@;
3542 :     sub get_pathogen_groups {
3543 :     my ($self) = @_;
3544 :     # Sprout returns a hash of lists. Because there are no parameters,
3545 :     # all groups will be returned.
3546 :     my %groups = $self->{sprout}->GetGroups();
3547 :     # Now we convert the hash to a list of lists.
3548 :     my @retVal = ();
3549 :     for my $groupName (sort keys %groups) {
3550 :     my $groupList = [$groupName, $groups{$groupName}];
3551 :     push @retVal, $groupList;
3552 :     }
3553 :     return @retVal;
3554 :     }
3555 :    
3556 : parrello 1.69 =head3 subsystem_classification
3557 :    
3558 : parrello 1.98 my $class = $sfx->subsystem_classification($subsystemName);
3559 : parrello 1.69
3560 :     Return the classification of a subsystem.
3561 :    
3562 :     =over 4
3563 :    
3564 :     =item subsystemName
3565 :    
3566 :     Name of the subsystem whose classification is desired.
3567 :    
3568 :     =item RETURN
3569 :    
3570 : parrello 1.81 Returns a reference to a list of the classification elements of the subsystem, or an
3571 :     empty list if the subsystem is not classified.
3572 : parrello 1.69
3573 :     =back
3574 :    
3575 :     =cut
3576 :    
3577 :     sub subsystem_classification {
3578 :     # Get the parameters.
3579 :     my ($self, $subsystemName) = @_;
3580 :     # Declare the return variable.
3581 :     my $retVal;
3582 : parrello 1.81 # Try to get the subsystem classifications.
3583 : parrello 1.84 my ($classes) = $self->{sprout}->GetFlat(['Subsystem'], "Subsystem(id) = ?", [$subsystemName],
3584 :     'Subsystem(classification)');
3585 :     if (defined $classes) {
3586 : parrello 1.88 $retVal = [split($FIG_Config::splitter, $classes)];
3587 : parrello 1.84 } else {
3588 :     $retVal = [];
3589 :     }
3590 : parrello 1.69 # Return the result.
3591 : parrello 1.84 return $retVal;
3592 : parrello 1.69 }
3593 :    
3594 : parrello 1.82
3595 :     =head3 usable_subsystem
3596 :    
3597 : parrello 1.98 my $flag = $fig->usable_subsystem($sub);
3598 : parrello 1.82
3599 :     Return TRUE if the named subsystem is not experimental or deleted.
3600 :    
3601 :     =over 4
3602 :    
3603 :     =item sub
3604 :    
3605 :     Name of the subsystem to check.
3606 :    
3607 :     =item RETURN
3608 :    
3609 :     Returns TRUE if the named subsystem is usable, FALSE if it is experimental or deleted.
3610 :    
3611 :     =back
3612 :    
3613 :     =cut
3614 :    
3615 :     sub usable_subsystem {
3616 :     # Get the parameters.
3617 :     my($self, $sub) = @_;
3618 : parrello 1.106 # Declare the return value. We default to not usable.
3619 :     my $retVal = 0;
3620 :     # Get the subsystem record.
3621 :     my $subsys = $self->{sprout}->GetEntity(Subsystem => $sub);
3622 :     # Only proceed if we found it.
3623 : parrello 1.107 if (! defined $subsys) {
3624 :     Trace("Subsystem $subsys not found.") if T(3);
3625 :     } else {
3626 : parrello 1.106 # Get the subsystem's classifications.
3627 :     my @cats = $subsys->Values(['Subsystem(classification)']);
3628 :     # Look for an experimental or deleted marker.
3629 : parrello 1.107 $retVal = 1;
3630 : parrello 1.106 for my $cat (@cats) {
3631 :     if ($cat =~ /experimental/i || $cat =~ /delete/i) {
3632 : parrello 1.107 Trace("Subsystem not usable: classification is $cat.") if T(3);
3633 : parrello 1.106 $retVal = 0;
3634 :     }
3635 : parrello 1.82 }
3636 :     }
3637 :     return $retVal;
3638 :     }
3639 :    
3640 :    
3641 : parrello 1.70 =head3 is_genome
3642 :    
3643 : parrello 1.98 my $flag = $fig->is_genome($genome);
3644 : parrello 1.70
3645 :     Return TRUE if the specified genome exists, else FALSE.
3646 :    
3647 :     =over 4
3648 :    
3649 :     =item genome
3650 :    
3651 :     ID of the genome to test.
3652 :    
3653 :     =item RETURN
3654 :    
3655 :     Returns TRUE if a genome with the specified ID exists in the data store, else FALSE.
3656 :    
3657 :     =back
3658 :    
3659 :     =cut
3660 :    
3661 :     sub is_genome {
3662 :     # Get the parameters.
3663 :     my ($self, $genome) = @_;
3664 : parrello 1.100 my $retVal;
3665 :     # Only proceed if the genome ID exists. The FIG.pm method allows undefined as a parameter.
3666 :     if (defined($genome)) {
3667 :     # Test for the genome ID.
3668 :     $retVal = $self->{sprout}->Exists('Genome', $genome);
3669 :     }
3670 : parrello 1.70 # Return the test result.
3671 :     return $retVal;
3672 :     }
3673 :    
3674 : parrello 1.71 =head3 function_of_bulk
3675 :    
3676 : parrello 1.98 my $functionHash = $fig->function_of_bulk(\@fids, $no_del_check);
3677 : parrello 1.71
3678 :     Return a hash mapping the specified proteins to their master functional assignments.
3679 :    
3680 :     =over 4
3681 :    
3682 :     =item fids
3683 :    
3684 :     Reference to a list of feature IDs.
3685 :    
3686 :     =item no_del_check
3687 :    
3688 :     If TRUE, then deleted features B<will not> be removed from the list. The default
3689 :     is FALSE, which means deleted feature B<will> be removed from the list.
3690 :    
3691 :     =item RETURN
3692 :    
3693 :     REturns a reference to a hash mapping feature IDs to their main functional assignments.
3694 :    
3695 :     =back
3696 :    
3697 :     =cut
3698 :    
3699 :     sub function_of_bulk {
3700 :     # Get the parameters.
3701 :     my ($self, $fids, $no_del_check) = @_;
3702 :     # Remove any deleted features from the list according to the value of the
3703 :     # no_del_check parameter. Note we copy the list in the process so we don't
3704 :     # do any damage to the caller's data.
3705 :     my $del_check = ($no_del_check ? 0 : 1);
3706 :     my @fids = grep { $del_check || ! $self->is_deleted_fid($_) } @{$fids};
3707 :     # Declare the return variable.
3708 :     my $retVal = {};
3709 :     # Get the underlying Sprout object.
3710 :     my $sprout = $self->{sprout};
3711 :     # Loop through the features.
3712 :     for my $fid (@fids) {
3713 :     # Look for a functional assignment.
3714 :     my $assignment = $sprout->FunctionOf($fid);
3715 :     # If we found one, remember it.
3716 :     if ($assignment) {
3717 :     $retVal->{$fid} = $assignment;
3718 :     }
3719 :     }
3720 :     # Return the hash.
3721 :     return $retVal;
3722 :     }
3723 :    
3724 : parrello 1.72 =head3 uniprot_aliases_bulk
3725 :    
3726 : parrello 1.98 my $hash = $fig->uniprot_aliases_bulk(\@fids, $no_del_check);
3727 : parrello 1.72
3728 :     Return a hash mapping the specified feature IDs to lists of their uniprot
3729 :     aliases.
3730 :    
3731 :     =over 4
3732 :    
3733 :     =item fids
3734 :    
3735 :     A list of FIG feature IDs.
3736 :    
3737 :     =item no_del_check
3738 :    
3739 :     If TRUE, deleted feature IDs B<will not> be removed from the feature ID list
3740 :     before processing. The default is FALSE, which means deleted feature IDs
3741 :     B<will> be removed before processing.
3742 :    
3743 :     =item RETURN
3744 :    
3745 :     Returns a hash mapping each feature ID to a list of its uniprot aliases.
3746 :    
3747 :     =back
3748 :    
3749 :     =cut
3750 :    
3751 :     sub uniprot_aliases_bulk {
3752 :     # Get the parameters.
3753 :     my ($self, $fids, $no_del_check) = @_;
3754 :     # Remove any deleted features from the list according to the value of the
3755 :     # no_del_check parameter. Note we copy the list in the process so we don't
3756 :     # do any damage to the caller's data.
3757 :     my $del_check = ($no_del_check ? 0 : 1);
3758 :     my @fids = grep { $del_check || ! $self->is_deleted_fid($_) } @{$fids};
3759 :     # Declare the return variable.
3760 :     my $retVal = {};
3761 :     # Get the underlying Sprout object.
3762 :     my $sprout = $self->{sprout};
3763 :     # Loop through the features.
3764 :     for my $fid (@fids) {
3765 :     # Get this feature's aliases.
3766 : parrello 1.97 my @aliases = $sprout->GetFlat(['IsAliasOf'], "IsAliasOf(to-link) = ?",
3767 :     [$fid], 'IsAliasOf(from-link)');
3768 : parrello 1.72 # Put the uniprot aliases into the hash for the given feature.
3769 :     my @unis = grep { $_ =~ /^uni\|/ } @aliases;
3770 :     if (@unis) {
3771 :     $retVal->{$fid} = [ sort @unis ];
3772 :     }
3773 :     }
3774 :     # Return the result.
3775 :     return $retVal;
3776 :     }
3777 :    
3778 : parrello 1.24 =head3 by_alias
3779 :    
3780 : parrello 1.98 my @features = $sfxlate->by_alias($alias);
3781 : parrello 1.10
3782 : parrello 1.24 or
3783 :    
3784 : parrello 1.98 my $features = $sfxlate->by_alias($alias);
3785 : parrello 1.24
3786 :     Returns a list of features with the specified alias. The alias is parsed to determine
3787 :     the type of the alias. A string of digits is a GenBack ID and a string of exactly 6
3788 :     alphanumerics is a UniProt ID. A built-in FIG.pm method is used to analyze the alias
3789 :     string and attach the necessary prefix. If the result is a FIG ID then it is returned
3790 :     unmodified; otherwise, we look for an alias.
3791 :    
3792 :     =over 4
3793 :    
3794 :     =item alias
3795 :    
3796 :     Alias whose feature is desired.
3797 :    
3798 :     =item RETURN
3799 :    
3800 :     Returns the ID of the feature with the given alias. In a list context, the feature
3801 :     ID is returned as a singleton list; in a scalar context, it's returned as a string.
3802 :    
3803 :     =back
3804 :    
3805 :     =cut
3806 :     #: Return Type $;
3807 :     #: Return Type @;
3808 :     sub by_alias {
3809 :     my ($self, $alias) = @_;
3810 :     my @retVal = $self->{sprout}->FeaturesByAlias($alias);
3811 :     if (@retVal == 0) {
3812 :     return (wantarray ? () : "");
3813 :     } else {
3814 :     return (wantarray ? @retVal : $retVal[0]);
3815 :     }
3816 : parrello 1.10 }
3817 :    
3818 : parrello 1.24 =head3 genome_domain
3819 :    
3820 : parrello 1.98 my $domain = $sfxlate->genome_domain($genomeID);
3821 : parrello 1.24
3822 :     Return the domain for a specified genome: Archaea, Bacteria, Eukaryotes, Viruses, or
3823 :     Environmental Samples.
3824 :    
3825 :     =cut
3826 :     #: Return Type $;
3827 :     sub genome_domain {
3828 :     my ($self, $genome) = @_;
3829 :     my ($retVal) = $self->{sprout}->Taxonomy($genome);
3830 :     return $retVal;
3831 : parrello 1.10 }
3832 :    
3833 : parrello 1.114 =head3 find_role_in_org
3834 :    
3835 :     my @table = $fig->find_role_in_org($role, $org, $user, $sims_cutoff);
3836 :    
3837 :     Find features in a specified organism that probably have the specified
3838 :     functional role. To do this, we look for features with the specified role
3839 :     in close genomes, then find features in this genome that are similar. This
3840 :     will only work if the role in question can be found in a subsystem.
3841 :    
3842 :     =over 4
3843 :    
3844 :     =item role
3845 :    
3846 :     Text of the desired functional role.
3847 :    
3848 :     =item org
3849 :    
3850 :     Genome ID for the target organism.
3851 :    
3852 :     =item user
3853 :    
3854 :     Name of the user whose annotations are of interest. This parameter is required in
3855 :     [[FigPm]], but it is ignored here.
3856 :    
3857 :     =item sims_cutoff
3858 :    
3859 :     Cutoff value to use for similarities.
3860 :    
3861 :     =item RETURN
3862 :    
3863 :     Returns a list of 7-tuples, each containing a p-score, the ID of a feature in
3864 :     the target organism, its amino acid count, its current functional role,
3865 :     the ID of the similar feature, its amino acid count,
3866 :     and its functional role. The desired feature IDs are in the second position
3867 :     of each tuple; the remaining values are designed to make it easier to interpret
3868 :     the results.
3869 :    
3870 :     =back
3871 :    
3872 :     =cut
3873 :    
3874 :     sub find_role_in_org {
3875 :     # Get the parameters.
3876 :     my ($self, $role, $org, $user, $sims_cutoff) = @_;
3877 :     # Get the database.
3878 :     my $sprout = $self->{sprout};
3879 :     # Declare the return variable.
3880 :     my @retVal;
3881 :     # Find all features with the specified role.
3882 :     my @candidatesData = $sprout->GetAll("IsRoleOf ContainsFeature IsInGenome Genome",
3883 :     "IsRoleOf(from-link) LIKE ?",
3884 :     [$role],
3885 :     "IsInGenome(from-link) Genome(taxonomy)");
3886 :     Trace(scalar(@candidatesData) . " candidates found for role \"$role\".") if T(3);
3887 :     # Get the target genome's taxonomy.
3888 :     my @orgTaxonomy = $sprout->Taxonomy($org);
3889 :     # For each candidate, compute the taxonomic distance of its genome to ours.
3890 :     my %candidates;
3891 :     for my $candidate (@candidatesData) {
3892 :     # Get this candidate's data.
3893 :     my ($peg, $taxonomy) = @$candidate;
3894 :     # Split its taxonomy.
3895 :     my @taxonomy = split /\s*;\s*/, $taxonomy;
3896 :     # Compute the taxonomic distance.
3897 :     $candidates{$peg} = FIGRules::CrudeDistanceFormula(\@taxonomy, \@orgTaxonomy);
3898 :     }
3899 :     # Sort the pegs by distance.
3900 :     my @pegs = Tracer::SortByValue(\%candidates);
3901 :     # We process at most ten hits.
3902 :     if (scalar(@pegs) > 10) {
3903 :     splice @pegs, 10, scalar(@pegs) - 10;
3904 :     }
3905 :     Trace("Processing: " . join(", ", @pegs)) if T(3);
3906 :     # Find similarities for these hits. We retrieve a limited number.
3907 :     my @possibleSims = $self->sims(\@pegs, $FIG_Config::estimation_sim_limit, $sims_cutoff);
3908 :     # Only retain those that are in the target genome. Here we also weed out non-FIG
3909 :     # features.
3910 :     my @targetSims = grep { $_->id2 =~ /^fig\|$org/ } @possibleSims;
3911 :     Trace(scalar(@targetSims) . " target sims found out of " . scalar(@possibleSims) . " possibles.") if T(3);
3912 :     # Loop through them, building the output list.
3913 :     for my $sim (@targetSims) {
3914 :     # Get the two features relevant to this similarity.
3915 :     my $ourFeature = $sprout->GetEntity(Feature => $sim->id2);
3916 :     # Only proceed if the feature exists.
3917 :     if (defined $ourFeature) {
3918 :     # This one has to exist because we found it earlier when we were isolating
3919 :     # candidates.
3920 :     my $otherFeature = $sprout->GetEntity(Feature => $sim->id1);
3921 :     # Get both translations.
3922 :     my ($ourTran) = $ourFeature->Value('translation');
3923 :     my ($otherTran) = $ourFeature->Value('translation');
3924 :     # Build the result tuple.
3925 :     my $tuple = [$sim->psc,
3926 :     $sim->id2, length $ourTran,
3927 :     $ourFeature->PrimaryValue('assignment'),
3928 :     $sim->id1, length $otherTran,
3929 :     $otherFeature->PrimaryValue('assignment')];
3930 :     push @retVal, $tuple;
3931 :     }
3932 :     }
3933 :     # Return the result.
3934 :     return @retVal;
3935 :     }
3936 :    
3937 : parrello 1.24 =head3 is_complete
3938 :    
3939 : parrello 1.98 my $flag = $fig->is_complete($genome);
3940 : parrello 1.24
3941 :     Return TRUE if the genome with the specified ID is complete, else FALSE.
3942 :    
3943 :     =over 4
3944 :    
3945 :     =item genome
3946 :    
3947 :     ID of the relevant genome.
3948 :    
3949 :     =item RETURN
3950 : parrello 1.10
3951 : parrello 1.24 Returns TRUE if there is a complete genome in the database with the specified ID,
3952 :     else FALSE.
3953 : parrello 1.10
3954 : parrello 1.24 =back
3955 : parrello 1.10
3956 : parrello 1.24 =cut
3957 :     #: Return Type $;
3958 :     sub is_complete {
3959 :     my ($self, $genome) = @_;
3960 : parrello 1.55 # return $self->FIG()->is_complete($genome);
3961 :     return $self->{sprout}->IsComplete($genome);
3962 : parrello 1.10 }
3963 :    
3964 : parrello 1.90 =head3 all_features_detailed_fast
3965 :    
3966 : parrello 1.98 my $featureList = $fig->all_features_detailed($genomeID, $min, $max, $contig);
3967 : parrello 1.90
3968 :     Returns a list of all features in the designated genome, with various useful information
3969 :     included.
3970 :    
3971 :     Deleted features are not returned!
3972 :    
3973 :     =over 4
3974 :    
3975 :     =item genome
3976 :    
3977 :     ID of the genome whose features are desired.
3978 :    
3979 :     =item min (optional)
3980 :    
3981 :     If specified, the minimum contig location of interest. Features not entirely to the right
3982 :     of this location are ignored.
3983 :    
3984 :     =item max (optional)
3985 :    
3986 :     If specified, the maximum contig location of interest. Features not entirely to the left
3987 :     of this location are ignore.
3988 :    
3989 :     =item contig (optional)
3990 :    
3991 :     If specified, the contig of interest. Features not on this contig are ignored.
3992 :    
3993 :     =item RETURN
3994 :    
3995 : parrello 1.103 Returns a reference to a list of tuples. Each tuple consists of nine elements: (0) the feature
3996 :     ID, (1) the feature location (as a comma-delimited list of location specifiers), (2) the feature
3997 :     aliases (as a comma-delimited list of named aliases), (3) the feature type, (4) the leftmost
3998 :     index of the feature's leftmost location, (5) the rightmost index of the feature's rightmost location,
3999 :     (6) the current functional assignment, (7) the user who made the assignment, and (8) the
4000 : parrello 1.90 quality of the assignment (which is usually a space).
4001 :    
4002 :     =back
4003 :    
4004 :     =cut
4005 :    
4006 :     sub all_features_detailed_fast {
4007 :     # Get the parameters.
4008 :     my ($self, $genome, $min, $max, $contig) = @_;
4009 :     # We are going to service this request by reading feature and location records. Our biggest
4010 :     # performance hit is going to be the aliases, which have to be read one feature at a time.
4011 :     # For the rest, however, we will be getting one value per feature/location pair. Most of
4012 :     # the features will be single-location, but we have to beware of the multi-location ones
4013 :     # every step of the way. They will require a bit of read-ahead logic.
4014 :     # First, however, we create the filter clause. The genome filtering is easy, but the
4015 :     # location filtering is complicated by the fact that it's optional.
4016 :     my @possibleFilters = ("HasFeature(from-link) = ?", "IsLocatedIn(beg) > ?",
4017 :     "IsLocatedIn(beg) + IsLocatedIn(len) < ?",
4018 :     "IsLocatedIn(to-link) = ?");
4019 :     my @possibleParms = ($genome, $min, $max, $contig);
4020 :     my @actualFilters = ();
4021 :     my @actualParms = ();
4022 : parrello 1.103 Trace("All_features_detailed_fast called for $genome.") if T(3);
4023 : parrello 1.90 for (my $i = 0; $i <= $#possibleFilters; $i++) {
4024 : parrello 1.98 Trace("Checking detailed_fast parameter $i.") if T(3);
4025 : parrello 1.90 if (defined $possibleParms[$i]) {
4026 : parrello 1.98 Trace("detailed_fast parameter $i is $possibleParms[$i].") if T(3);
4027 : parrello 1.90 push @actualFilters, $possibleFilters[$i];
4028 :     push @actualParms, $possibleParms[$i];
4029 :     }
4030 :     }
4031 : parrello 1.98 my $actualFilter = join(" AND ", @actualFilters);
4032 : parrello 1.90 my $actualParmList = \@actualParms;
4033 :     # Now we use the filter to make a query.
4034 :     my $query = $self->{sprout}->Get(['HasFeature', 'Feature', 'IsLocatedIn'],
4035 : parrello 1.103 "$actualFilter ORDER BY Feature(id)",
4036 : parrello 1.90 $actualParmList);
4037 :     # With the query in hand, we declare the return variable.
4038 :     my @retVal = ();
4039 :     # The following variable will contain the ID of the feature currently being processed. That
4040 :     # feature ID is presumed to be the one in the last entry of @retVal.
4041 : parrello 1.107 my $fid = "";
4042 : parrello 1.90 # We will fill the return variable using the following loop, which runs through all the query results.
4043 : parrello 1.103 # The only tricky part is that we may have multiple results for a single feature. We process the first
4044 :     # one and ignore the rest.
4045 : parrello 1.90 while (my $object = $query->Fetch()) {
4046 :     # Find out if this is a new feature or a new location for an old feature.
4047 : parrello 1.95 my $newFid = $object->PrimaryValue('Feature(id)');
4048 : parrello 1.103 if ($newFid ne $fid) {
4049 : parrello 1.107 Trace("Processing feature $newFid.") if T(4);
4050 : parrello 1.90 # Here we have a new feature, so we need to create a new row. We start with the easy stuff.
4051 : parrello 1.103 my ($type, $assignment, $user, $quality, $locs) = $object->Values(['Feature(feature-type)',
4052 :     'Feature(assignment)',
4053 :     'Feature(assignment-maker)',
4054 :     'Feature(assignment-quality)',
4055 :     'Feature(location-string)']);
4056 : parrello 1.90 # Next, we get the aliases and convert them to a string.
4057 : parrello 1.95 my @aliases = $self->{sprout}->GetFlat(['IsAliasOf'], 'IsAliasOf(to-link) = ?', [$newFid], 'IsAliasOf(from-link)');
4058 : parrello 1.90 my $aliasList = join(",", @aliases);
4059 : parrello 1.103 # We also need a location object for the locations.
4060 :     my $locObject = FullLocation->new($self, $genome, $locs);
4061 :     # Get the boundaries.
4062 :     my ($boundingLoc) = $locObject->GetBounds();
4063 : parrello 1.90 # Now we can build the row.
4064 : parrello 1.103 my @newRow = ($newFid, $locObject->SeedString, $aliasList, $type, $boundingLoc->Left,
4065 :     $boundingLoc->Right, $assignment, $user, $quality);
4066 : parrello 1.90 # Push it into the return list.
4067 :     push @retVal, \@newRow;
4068 :     # Remember the feature ID.
4069 :     $fid = $newFid;
4070 :     }
4071 :     }
4072 :     # Return the result as a list reference.
4073 :     return \@retVal;
4074 :     }
4075 :    
4076 : parrello 1.37 =head3 all_features
4077 :    
4078 : parrello 1.98 my @featureIDs = $sfx->all_features($genomeID,$type);
4079 : parrello 1.37
4080 :     Return a list of the IDs of all the features for a specified genome.
4081 :    
4082 :     =over 4
4083 :    
4084 :     =item genomeID
4085 :    
4086 :     ID of the genome whose features are desired.
4087 :    
4088 : parrello 1.74 =item type (optional)
4089 :    
4090 :     Type of feature desired (peg, rna, etc.). If omitted, all features will be returned.
4091 :    
4092 : parrello 1.37 =item RETURN
4093 :    
4094 :     =back
4095 :    
4096 :     =cut
4097 :     #: Return Type @;
4098 :     sub all_features {
4099 :     # Get the parameters.
4100 : parrello 1.74 my ($self, $genomeID, $type) = @_;
4101 :     # Form the filter clause.
4102 :     my $filter = "HasFeature(from-link) = ?";
4103 :     my @parms = ($genomeID);
4104 :     if ($type) {
4105 :     $filter .= " AND HasFeature(type) = ?";
4106 :     push @parms, $type;
4107 :     }
4108 :     # Ask for the feature IDs.
4109 :     my @retVal = $self->{sprout}->GetFlat(['HasFeature'], $filter, \@parms, 'HasFeature(to-link)');
4110 : parrello 1.37 # Return the result.
4111 : parrello 1.74 return @retVal;
4112 : parrello 1.37 }
4113 :    
4114 : parrello 1.24 =head3 is_real_feature
4115 :    
4116 : parrello 1.98 my $flag = $sfxlate->is_real_feature($fid);
4117 : parrello 1.24
4118 :     Return TRUE if the specified feature is in the database, else FALSE.
4119 :    
4120 :     =over 4
4121 :    
4122 :     =item fid
4123 :    
4124 :     ID of the feature whose existence is in question.
4125 :    
4126 :     =item RETURN
4127 :    
4128 :     Returns TRUE if the specified feature exists in the database, else FALSE.
4129 :    
4130 :     =back
4131 :    
4132 :     =cut
4133 :     #: Return Type $;
4134 :     sub is_real_feature {
4135 :     my ($self, $fid) = @_;
4136 :     return $self->{sprout}->Exists('Feature', $fid);
4137 :     }
4138 :    
4139 : parrello 1.80 =head3 active_subsystems
4140 :    
4141 : parrello 1.98 my $ssHash = $fig->active_subsystems($genome, $allFlag);
4142 : parrello 1.80
4143 :     Get all the subsystems in which a genome is present. The return value is a hash
4144 :     which maps each subsystem name to the code for the variant used by the specified
4145 :     genome.
4146 :    
4147 :     =over 4
4148 :    
4149 :     =item genome
4150 :    
4151 :     ID of the genome whose subsystems are desired.
4152 :    
4153 :     =item allFlag (optional)
4154 :    
4155 :     If TRUE, all subsystems are returned, with unknown variants marked by a variant
4156 :     code of C<-1> and iffy variants marked by a code of C<0>. If FALSE or omitted,
4157 :     only subsystems in which the variant is definitively known are returned. The
4158 :     default is FALSE.
4159 :    
4160 : parrello 1.82 =back
4161 :    
4162 : parrello 1.80 =cut
4163 :    
4164 :     sub active_subsystems {
4165 :     # Get the parameters.
4166 :     my($self, $genome, $allFlag) = @_;
4167 :     # Build the filter from the all-flag.
4168 :     my $filter = "ParticipatesIn(from-link) = ?";
4169 :     if (! $allFlag) {
4170 :     $filter .= " AND ParticipatesIn(variant-code) >= 0";
4171 :     }
4172 :     # Get the subsystems and codes.
4173 :     my @subList = $self->{sprout}->GetAll(['ParticipatesIn'], $filter, [$genome],
4174 :     ['ParticipatesIn(to-link)', 'ParticipatesIn(variant-code)']);
4175 :     # Form them into a hash.
4176 :     my %retVal = map { $_->[0] => $_->[1] } @subList;
4177 :     # Return the results.
4178 :     return \%retVal;
4179 :     }
4180 :    
4181 : parrello 1.40 =head3 get_subsystem
4182 :    
4183 : parrello 1.98 my $subsysObject = $sfx->get_subsystem($name);
4184 : parrello 1.40
4185 :     Return a subsystem object for manipulation of the named subsystem. If the
4186 :     subsystem does not exist, an undefined value will be returned.
4187 :    
4188 :     =over 4
4189 :    
4190 :     =item name
4191 :    
4192 :     Name of the desired subsystem.
4193 :    
4194 :     =item RETURN
4195 :    
4196 :     Returns a blessed object that allows access to subsystem data.
4197 :    
4198 :     =back
4199 :    
4200 :     =cut
4201 :    
4202 :     sub get_subsystem {
4203 :     # Get the parameters.
4204 :     my ($self, $name) = @_;
4205 : parrello 1.110 # Declare the return value. If we don't change it, then the caller will know
4206 :     # the subsystem was not found.
4207 :     my $retVal;
4208 :     # Get the database.
4209 :     my $sprout = $self->{sprout};
4210 :     # Does the subsystem exist?
4211 :     if ($sprout->Exists(Subsystem => $name)) {
4212 :     # Yes, construct the subsystem object.
4213 :     $retVal = SproutSubsys->new($name, $self->{sprout});
4214 :     }
4215 : parrello 1.40 return $retVal;
4216 :     }
4217 :    
4218 : parrello 1.78 =head3 proteins_in_family
4219 : parrello 1.40
4220 : parrello 1.98 my @pegs = $fig->in_family($family);
4221 : parrello 1.78
4222 :     Return a list of the features in a specified protein family.
4223 :    
4224 :     Sprout does not support protein families, so it calls through to the SEED method.
4225 :    
4226 :     =over 4
4227 :    
4228 :     =item family
4229 :    
4230 :     Name of the protein family whose features are desired.
4231 :    
4232 :     =item RETURN
4233 :    
4234 :     Returns a list of the IDs of the features in the specified protein family.
4235 :    
4236 :     =back
4237 :    
4238 :     =cut
4239 :     #: Return Type @;
4240 :     sub proteins_in_family {
4241 :     # Get the parameters.
4242 :     my($self, $family) = @_;
4243 :     # Get the Sprout object.
4244 :     my $sprout = $self->{sprout};
4245 :     # Get a list of the PEGs for the specified family. Note that if the family
4246 :     # doesn't exist we'll simply get an empty list back.
4247 : parrello 1.79 my @retVal = $sprout->GetFlat(['IsFamilyForFeature'], "IsFamilyForFeature(from-link) = ?",
4248 :     [$family], 'IsFamilyForFeature(to-link)');
4249 : parrello 1.78 return @retVal;
4250 :     }
4251 :    
4252 :     =head3 families_for_protein
4253 :    
4254 : parrello 1.98 my @families = $fig->families_for_protein($peg);
4255 : parrello 1.78
4256 :     Return a list of all the families containing the specified protein.
4257 :    
4258 :     =over 4
4259 :    
4260 :     =item peg
4261 :    
4262 :     ID of the PEG representing the protein in question.
4263 :    
4264 :     =item RETURN
4265 :    
4266 :     Returns a list of the IDs of the families containing the protein.
4267 :    
4268 :     =back
4269 :    
4270 :     =cut
4271 :    
4272 :     sub families_for_protein {
4273 :     # Get the parameters.
4274 :     my ($self, $peg) = @_;
4275 :     # Get the Sprout object.
4276 :     my $sprout = $self->{sprout};
4277 :     # Read all the families for this protein.
4278 : parrello 1.79 my @retVal = $sprout->GetFlat(['IsFamilyForFeature'], "IsFamilyForFeature(to-link) = ?",
4279 :     [$peg], 'IsFamilyForFeature(from-link)');
4280 : parrello 1.78 return @retVal;
4281 :     }
4282 :    
4283 :    
4284 :     =head3 family_function
4285 :    
4286 : parrello 1.98 my $func = $fig->family_function($family);
4287 : parrello 1.78
4288 :     Returns the putative function of all of the pegs in a protein family. Remember, we
4289 :     are defining "protein family" as a set of homologous proteins that have the
4290 :     same function.
4291 :    
4292 :     =over 4
4293 :    
4294 :     =item family
4295 :    
4296 :     ID of the relevant protein family.
4297 :    
4298 :     =item RETURN
4299 :    
4300 :     Returns the name of the function assigned to the members of the specified family.
4301 :    
4302 :     =back
4303 :    
4304 :     =cut
4305 : parrello 1.24
4306 : parrello 1.78 sub family_function {
4307 :     # Get the parameters.
4308 :     my ($self, $family) = @_;
4309 :     # Get the Sprout object.
4310 :     my $sprout = $self->{sprout};
4311 :     # Find the family's function.
4312 :     my ($retVal) = $sprout->GetEntityValues('Family', $family, ['Family(function)']);
4313 :     # If it doesn't exist, return a null string.
4314 :     if (! defined $retVal) {
4315 :     $retVal = "";
4316 :     }
4317 :     return $retVal;
4318 :     }
4319 : parrello 1.24
4320 : parrello 1.78 =head3 sz_family
4321 : parrello 1.24
4322 : parrello 1.98 my $n = $fig->sz_family($family);
4323 : parrello 1.24
4324 : parrello 1.78 Returns the number of proteins in a family.
4325 : parrello 1.24
4326 :     =over 4
4327 :    
4328 : parrello 1.78 =item family
4329 : parrello 1.24
4330 : parrello 1.78 ID of the relevant protein family.
4331 : parrello 1.24
4332 :     =item RETURN
4333 :    
4334 : parrello 1.78 Returns the number of proteins in the specified family.
4335 : parrello 1.24
4336 :     =back
4337 :    
4338 :     =cut
4339 : parrello 1.78
4340 :     sub sz_family {
4341 : parrello 1.24 # Get the parameters.
4342 : parrello 1.78 my ($self, $family) = @_;
4343 :     # Get the Sprout object.
4344 :     my $sprout = $self->{sprout};
4345 :     # Find the family's size.
4346 :     my ($retVal) = $sprout->GetEntityValues('Family', $family, ['Family(size)']);
4347 :     # If it doesn't exist, return 0.
4348 :     if (! defined $retVal) {
4349 :     $retVal = 0;
4350 :     }
4351 : parrello 1.24 return $retVal;
4352 :     }
4353 :    
4354 :     =head3 sims
4355 :    
4356 : parrello 1.98 my @sims = $fig->sims($pegs, $maxN, $maxP, $raw);
4357 : parrello 1.24
4358 :     Returns a list of similarities for $peg governed by the constraints of the parameters.
4359 :    
4360 :     =over 4
4361 :    
4362 : parrello 1.94 =item pegs
4363 : parrello 1.24
4364 : parrello 1.94 ID of the feature whose similarities are desired, or a reference to a list of feature IDs,
4365 :     all of whose similarities are desired.
4366 : parrello 1.24
4367 :     =item maxN
4368 :    
4369 :     Maximum number of similarities to be performed.
4370 :    
4371 : parrello 1.66 =item maxP
4372 :    
4373 :     Maximum allowable similarity score.
4374 :    
4375 :     =item raw
4376 :