[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.46, Thu Oct 20 11:52:36 2005 UTC revision 1.98, Tue Apr 10 06:13:33 2007 UTC
# Line 1  Line 1 
1  package Sprout;  package Sprout;
2    
3        require Exporter;
4        use ERDB;
5        @ISA = qw(Exporter ERDB);
6      use Data::Dumper;      use Data::Dumper;
7      use strict;      use strict;
     use Carp;  
8      use DBKernel;      use DBKernel;
9      use XML::Simple;      use XML::Simple;
10      use DBQuery;      use DBQuery;
11      use DBObject;      use ERDBObject;
     use ERDB;  
12      use Tracer;      use Tracer;
13      use FIGRules;      use FIGRules;
14        use FidCheck;
15      use Stats;      use Stats;
16      use POSIX qw(strftime);      use POSIX qw(strftime);
17        use BasicLocation;
18    
19  =head1 Sprout Database Manipulation Object  =head1 Sprout Database Manipulation Object
20    
# Line 32  Line 34 
34  query tasks. For example, L</genomes> lists the IDs of all the genomes in the database and  query tasks. For example, L</genomes> lists the IDs of all the genomes in the database and
35  L</dna_seq> returns the DNA sequence for a specified genome location.  L</dna_seq> returns the DNA sequence for a specified genome location.
36    
37    The Sprout object is a subclass of the ERDB object and inherits all its properties and methods.
38    
39  =cut  =cut
40    
41  #: Constructor SFXlate->new_sprout_only();  #: Constructor SFXlate->new_sprout_only();
# Line 62  Line 66 
66    
67  * B<xmlFileName> name of the XML file containing the database definition (default C<SproutDBD.xml>)  * B<xmlFileName> name of the XML file containing the database definition (default C<SproutDBD.xml>)
68    
69  * B<userData> user name and password, delimited by a slash (default C<root/>)  * B<userData> user name and password, delimited by a slash (default same as SEED)
70    
71  * B<port> connection port (default C<0>)  * B<port> connection port (default C<0>)
72    
73    * B<sock> connection socket (default same as SEED)
74    
75  * B<maxSegmentLength> maximum number of residues per feature segment, (default C<4500>)  * B<maxSegmentLength> maximum number of residues per feature segment, (default C<4500>)
76    
77  * B<maxSequenceLength> maximum number of residues per sequence, (default C<8000>)  * B<maxSequenceLength> maximum number of residues per sequence, (default C<8000>)
# Line 85  Line 91 
91  sub new {  sub new {
92      # Get the parameters.      # Get the parameters.
93      my ($class, $dbName, $options) = @_;      my ($class, $dbName, $options) = @_;
94        # Compute the DBD directory.
95        my $dbd_dir = (defined($FIG_Config::dbd_dir) ? $FIG_Config::dbd_dir :
96                                                      $FIG_Config::fig );
97      # Compute the options. We do this by starting with a table of defaults and overwriting with      # Compute the options. We do this by starting with a table of defaults and overwriting with
98      # the incoming data.      # the incoming data.
99      my $optionTable = Tracer::GetOptions({      my $optionTable = Tracer::GetOptions({
# Line 92  Line 101 
101                                                          # database type                                                          # database type
102                         dataDir      => $FIG_Config::sproutData,                         dataDir      => $FIG_Config::sproutData,
103                                                          # data file directory                                                          # data file directory
104                         xmlFileName  => "$FIG_Config::sproutData/SproutDBD.xml",                         xmlFileName  => "$dbd_dir/SproutDBD.xml",
105                                                          # database definition file name                                                          # database definition file name
106                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",
107                                                          # user name and password                                                          # user name and password
108                         port         => $FIG_Config::dbport,                         port         => $FIG_Config::dbport,
109                                                          # database connection port                                                          # database connection port
110                           sock         => $FIG_Config::dbsock,
111                           host         => $FIG_Config::dbhost,
112                         maxSegmentLength => 4500,        # maximum feature segment length                         maxSegmentLength => 4500,        # maximum feature segment length
113                         maxSequenceLength => 8000,       # maximum contig sequence length                         maxSequenceLength => 8000,       # maximum contig sequence length
114                         noDBOpen     => 0,               # 1 to suppress the database open                         noDBOpen     => 0,               # 1 to suppress the database open
# Line 111  Line 122 
122      my $dbh;      my $dbh;
123      if (! $optionTable->{noDBOpen}) {      if (! $optionTable->{noDBOpen}) {
124          $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,          $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,
125                                  $password, $optionTable->{port});                                  $password, $optionTable->{port}, $optionTable->{host}, $optionTable->{sock});
126      }      }
127      # Create the ERDB object.      # Create the ERDB object.
128      my $xmlFileName = "$optionTable->{xmlFileName}";      my $xmlFileName = "$optionTable->{xmlFileName}";
129      my $erdb = ERDB->new($dbh, $xmlFileName);      my $retVal = ERDB::new($class, $dbh, $xmlFileName);
130      # Create this object.      # Add the option table and XML file name.
131      my $self = { _erdb => $erdb, _options => $optionTable, _xmlName => $xmlFileName };      $retVal->{_options} = $optionTable;
132      # Bless and return it.      $retVal->{_xmlName} = $xmlFileName;
133      bless $self;      # Set up space for the group file data.
134      return $self;      $retVal->{groupHash} = undef;
135        # Return it.
136        return $retVal;
137  }  }
138    
139  =head3 MaxSegment  =head3 MaxSegment
# Line 155  Line 168 
168      return $self->{_options}->{maxSequenceLength};      return $self->{_options}->{maxSequenceLength};
169  }  }
170    
 =head3 Get  
   
 C<< my $query = $sprout->Get(\@objectNames, $filterClause, \@parameterList); >>  
   
 This method allows a general query against the Sprout data using a specified filter clause.  
   
 The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each  
 field name represented in the form B<I<objectName>(I<fieldName>)>. For example, the  
 following call requests all B<Genome> objects for the genus specified in the variable  
 $genus.  
   
 C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ?", [$genus]); >>  
   
 The WHERE clause contains a single question mark, so there is a single additional  
 parameter representing the parameter value. It would also be possible to code  
   
 C<< $query = $sprout->Get(['Genome'], "Genome(genus) = \'$genus\'"); >>  
   
 however, this version of the call would generate a syntax error if there were any quote  
 characters inside the variable C<$genus>.  
   
 The use of the strange parenthesized notation for field names enables us to distinguish  
 hyphens contained within field names from minus signs that participate in the computation  
 of the WHERE clause. All of the methods that manipulate fields will use this same notation.  
   
 It is possible to specify multiple entity and relationship names in order to retrieve more than  
 one object's data at the same time, which allows highly complex joined queries. For example,  
   
 C<< $query = $sprout->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", [$genus]); >>  
   
 This query returns all the genomes for a particular genus and allows access to the  
 sources from which they came. The join clauses to go from Genome to Source are generated  
 automatically.  
   
 Finally, the filter clause can contain sort information. To do this, simply put an C<ORDER BY>  
 clause at the end of the filter. Field references in the ORDER BY section follow the same rules  
 as they do in the filter itself; in other words, each one must be of the form B<I<objectName>(I<fieldName>)>.  
 For example, the following filter string gets all genomes for a particular genus and sorts  
 them by species name.  
   
 C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ? ORDER BY Genome(species)", [$genus]); >>  
   
 It is also permissible to specify I<only> an ORDER BY clause. For example, the following invocation gets  
 all genomes ordered by genus and species.  
   
 C<< $query = $sprout->Get(['Genome'], "ORDER BY Genome(genus), Genome(species)"); >>  
   
 Odd things may happen if one of the ORDER BY fields is in a secondary relation. So, for example, an  
 attempt to order B<Feature>s by alias may (depending on the underlying database engine used) cause  
 a single feature to appear more than once.  
   
 If multiple names are specified, then the query processor will automatically determine a  
 join path between the entities and relationships. The algorithm used is very simplistic.  
 In particular, you can't specify any entity or relationship more than once, and if a  
 relationship is recursive, the path is determined by the order in which the entity  
 and the relationship appear. For example, consider a recursive relationship B<IsParentOf>  
 which relates B<People> objects to other B<People> objects. If the join path is  
 coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,  
 the join path is C<['IsParentOf', 'People']>, then the people returned will be children.  
   
 =over 4  
   
 =item objectNames  
   
 List containing the names of the entity and relationship objects to be retrieved.  
   
 =item filterClause  
   
 WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can  
 be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form  
 B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the  
 parameter list as additional parameters. The fields in a filter clause can come from primary  
 entity relations, relationship relations, or secondary entity relations; however, all of the  
 entities and relationships involved must be included in the list of object names.  
   
 =item parameterList  
   
 List of the parameters to be substituted in for the parameters marks in the filter clause.  
   
 =item RETURN  
   
 Returns a B<DBQuery> that can be used to iterate through all of the results.  
   
 =back  
   
 =cut  
   
 sub Get {  
     # Get the parameters.  
     my ($self, $objectNames, $filterClause, $parameterList) = @_;  
     # We differ from the ERDB Get method in that the parameter list is passed in as a list reference  
     # rather than a list of parameters. The next step is to convert the parameters from a reference  
     # to a real list. We can only do this if the parameters have been specified.  
     my @parameters;  
     if ($parameterList) { @parameters = @{$parameterList}; }  
     return $self->{_erdb}->Get($objectNames, $filterClause, @parameters);  
 }  
   
 =head3 GetEntity  
   
 C<< my $entityObject = $sprout->GetEntity($entityType, $ID); >>  
   
 Return an object describing the entity instance with a specified ID.  
   
 =over 4  
   
 =item entityType  
   
 Entity type name.  
   
 =item ID  
   
 ID of the desired entity.  
   
 =item RETURN  
   
 Returns a B<DBObject> representing the desired entity instance, or an undefined value if no  
 instance is found with the specified key.  
   
 =back  
   
 =cut  
   
 sub GetEntity {  
     # Get the parameters.  
     my ($self, $entityType, $ID) = @_;  
     # Call the ERDB method.  
     return $self->{_erdb}->GetEntity($entityType, $ID);  
 }  
   
 =head3 GetEntityValues  
   
 C<< my @values = GetEntityValues($entityType, $ID, \@fields); >>  
   
 Return a list of values from a specified entity instance.  
   
 =over 4  
   
 =item entityType  
   
 Entity type name.  
   
 =item ID  
   
 ID of the desired entity.  
   
 =item fields  
   
 List of field names, each of the form I<objectName>C<(>I<fieldName>C<)>.  
   
 =item RETURN  
   
 Returns a flattened list of the values of the specified fields for the specified entity.  
   
 =back  
   
 =cut  
 #: Return Type @;  
 sub GetEntityValues {  
     # Get the parameters.  
     my ($self, $entityType, $ID, $fields) = @_;  
     # Call the ERDB method.  
     return $self->{_erdb}->GetEntityValues($entityType, $ID, $fields);  
 }  
   
 =head3 ShowMetaData  
   
 C<< $sprout->ShowMetaData($fileName); >>  
   
 This method outputs a description of the database to an HTML file in the data directory.  
   
 =over 4  
   
 =item fileName  
   
 Fully-qualified name to give to the output file.  
   
 =back  
   
 =cut  
   
 sub ShowMetaData {  
     # Get the parameters.  
     my ($self, $fileName) = @_;  
     # Compute the file name.  
     my $options = $self->{_options};  
     # Call the show method on the underlying ERDB object.  
     $self->{_erdb}->ShowMetaData($fileName);  
 }  
   
171  =head3 Load  =head3 Load
172    
173  C<< $sprout->Load($rebuild); >>;  C<< $sprout->Load($rebuild); >>;
# Line 379  Line 202 
202  sub Load {  sub Load {
203      # Get the parameters.      # Get the parameters.
204      my ($self, $rebuild) = @_;      my ($self, $rebuild) = @_;
     # Get the database object.  
     my $erdb = $self->{_erdb};  
205      # Load the tables from the data directory.      # Load the tables from the data directory.
206      my $retVal = $erdb->LoadTables($self->{_options}->{dataDir}, $rebuild);      my $retVal = $self->LoadTables($self->{_options}->{dataDir}, $rebuild);
207      # Return the statistics.      # Return the statistics.
208      return $retVal;      return $retVal;
209  }  }
# Line 422  Line 243 
243  sub LoadUpdate {  sub LoadUpdate {
244      # Get the parameters.      # Get the parameters.
245      my ($self, $truncateFlag, $tableList) = @_;      my ($self, $truncateFlag, $tableList) = @_;
     # Get the database object.  
     my $erdb = $self->{_erdb};  
246      # Declare the return value.      # Declare the return value.
247      my $retVal = Stats->new();      my $retVal = Stats->new();
248      # Get the data directory.      # Get the data directory.
# Line 437  Line 256 
256              Trace("No load file found for $tableName in $dataDir.") if T(0);              Trace("No load file found for $tableName in $dataDir.") if T(0);
257          } else {          } else {
258              # Attempt to load this table.              # Attempt to load this table.
259              my $result = $erdb->LoadTable($fileName, $tableName, $truncateFlag);              my $result = $self->LoadTable($fileName, $tableName, $truncateFlag);
260              # Accumulate the resulting statistics.              # Accumulate the resulting statistics.
261              $retVal->Accumulate($result);              $retVal->Accumulate($result);
262          }          }
# Line 446  Line 265 
265      return $retVal;      return $retVal;
266  }  }
267    
268    =head3 GenomeCounts
269    
270    C<< my ($arch, $bact, $euk, $vir, $env, $unk) = $sprout->GenomeCounts($complete); >>
271    
272    Count the number of genomes in each domain. If I<$complete> is TRUE, only complete
273    genomes will be included in the counts.
274    
275    =over 4
276    
277    =item complete
278    
279    TRUE if only complete genomes are to be counted, FALSE if all genomes are to be
280    counted
281    
282    =item RETURN
283    
284    A six-element list containing the number of genomes in each of six categories--
285    Archaea, Bacteria, Eukaryota, Viral, Environmental, and Unknown, respectively.
286    
287    =back
288    
289    =cut
290    
291    sub GenomeCounts {
292        # Get the parameters.
293        my ($self, $complete) = @_;
294        # Set the filter based on the completeness flag.
295        my $filter = ($complete ? "Genome(complete) = 1" : "");
296        # Get all the genomes and the related taxonomy information.
297        my @genomes = $self->GetAll(['Genome'], $filter, [], ['Genome(id)', 'Genome(taxonomy)']);
298        # Clear the counters.
299        my ($arch, $bact, $euk, $vir, $env, $unk) = (0, 0, 0, 0, 0, 0);
300        # Loop through, counting the domains.
301        for my $genome (@genomes) {
302            if    ($genome->[1] =~ /^archaea/i)  { ++$arch }
303            elsif ($genome->[1] =~ /^bacter/i)   { ++$bact }
304            elsif ($genome->[1] =~ /^eukar/i)    { ++$euk }
305            elsif ($genome->[1] =~ /^vir/i)      { ++$vir }
306            elsif ($genome->[1] =~ /^env/i)      { ++$env }
307            else  { ++$unk }
308        }
309        # Return the counts.
310        return ($arch, $bact, $euk, $vir, $env, $unk);
311    }
312    
313    =head3 ContigCount
314    
315    C<< my $count = $sprout->ContigCount($genomeID); >>
316    
317    Return the number of contigs for the specified genome ID.
318    
319    =over 4
320    
321    =item genomeID
322    
323    ID of the genome whose contig count is desired.
324    
325    =item RETURN
326    
327    Returns the number of contigs for the specified genome.
328    
329    =back
330    
331    =cut
332    
333    sub ContigCount {
334        # Get the parameters.
335        my ($self, $genomeID) = @_;
336        # Get the contig count.
337        my $retVal = $self->GetCount(['Contig', 'HasContig'], "HasContig(from-link) = ?", [$genomeID]);
338        # Return the result.
339        return $retVal;
340    }
341    
342    =head3 GeneMenu
343    
344    C<< my $selectHtml = $sprout->GeneMenu(\%attributes, $filterString, \@params, $selected); >>
345    
346    Return an HTML select menu of genomes. Each genome will be an option in the menu,
347    and will be displayed by name with the ID and a contig count attached. The selection
348    value will be the genome ID. The genomes will be sorted by genus/species name.
349    
350    =over 4
351    
352    =item attributes
353    
354    Reference to a hash mapping attributes to values for the SELECT tag generated.
355    
356    =item filterString
357    
358    A filter string for use in selecting the genomes. The filter string must conform
359    to the rules for the C<< ERDB->Get >> method.
360    
361    =item params
362    
363    Reference to a list of values to be substituted in for the parameter marks in
364    the filter string.
365    
366    =item selected (optional)
367    
368    ID of the genome to be initially selected.
369    
370    =item fast (optional)
371    
372    If specified and TRUE, the contig counts will be omitted to improve performance.
373    
374    =item RETURN
375    
376    Returns an HTML select menu with the specified genomes as selectable options.
377    
378    =back
379    
380    =cut
381    
382    sub GeneMenu {
383        # Get the parameters.
384        my ($self, $attributes, $filterString, $params, $selected, $fast) = @_;
385        my $slowMode = ! $fast;
386        # Default to nothing selected. This prevents an execution warning if "$selected"
387        # is undefined.
388        $selected = "" unless defined $selected;
389        Trace("Gene Menu called with slow mode \"$slowMode\" and selection \"$selected\".") if T(3);
390        # Start the menu.
391        my $retVal = "<select " .
392            join(" ", map { "$_=\"$attributes->{$_}\"" } keys %{$attributes}) .
393            ">\n";
394        # Get the genomes.
395        my @genomes = $self->GetAll(['Genome'], $filterString, $params, ['Genome(id)',
396                                                                         'Genome(genus)',
397                                                                         'Genome(species)',
398                                                                         'Genome(unique-characterization)']);
399        # Sort them by name.
400        my @sorted = sort { lc("$a->[1] $a->[2]") cmp lc("$b->[1] $b->[2]") } @genomes;
401        # Loop through the genomes, creating the option tags.
402        for my $genomeData (@sorted) {
403            # Get the data for this genome.
404            my ($genomeID, $genus, $species, $strain) = @{$genomeData};
405            # Get the contig count.
406            my $contigInfo = "";
407            if ($slowMode) {
408                my $count = $self->ContigCount($genomeID);
409                my $counting = ($count == 1 ? "contig" : "contigs");
410                $contigInfo = "[$count $counting]";
411            }
412            # Find out if we're selected.
413            my $selectOption = ($selected eq $genomeID ? " selected" : "");
414            # Build the option tag.
415            $retVal .= "<option value=\"$genomeID\"$selectOption>$genus $species $strain ($genomeID)$contigInfo</option>\n";
416        }
417        # Close the SELECT tag.
418        $retVal .= "</select>\n";
419        # Return the result.
420        return $retVal;
421    }
422    
423  =head3 Build  =head3 Build
424    
425  C<< $sprout->Build(); >>  C<< $sprout->Build(); >>
# Line 460  Line 434 
434      # Get the parameters.      # Get the parameters.
435      my ($self) = @_;      my ($self) = @_;
436      # Create the tables.      # Create the tables.
437      $self->{_erdb}->CreateTables;      $self->CreateTables();
438  }  }
439    
440  =head3 Genomes  =head3 Genomes
# Line 680  Line 654 
654      return ($contigID, $start, $dir, $len);      return ($contigID, $start, $dir, $len);
655  }  }
656    
657    
658    
659  =head3 PointLocation  =head3 PointLocation
660    
661  C<< my $found = Sprout::PointLocation($location, $point); >>  C<< my $found = Sprout::PointLocation($location, $point); >>
# Line 740  Line 716 
716  should be of the form returned by L</featureLocation> when in a list context. In other words,  should be of the form returned by L</featureLocation> when in a list context. In other words,
717  each location is of the form I<contigID>C<_>I<begin>I<dir>I<end>.  each location is of the form I<contigID>C<_>I<begin>I<dir>I<end>.
718    
719    For example, the following would return the DNA sequence for contig C<83333.1:NC_000913>
720    between positions 1401 and 1532, inclusive.
721    
722        my $sequence = $sprout->DNASeq('83333.1:NC_000913_1401_1532');
723    
724  =over 4  =over 4
725    
726  =item locationList  =item locationList
727    
728  List of location specifiers, each in the form I<contigID>C<_>I<begin>I<dir>I<end> (see  List of location specifiers, each in the form I<contigID>C<_>I<begin>I<dir>I<len> or
729  L</FeatureLocation> for more about this format).  I<contigID>C<_>I<begin>C<_>I<end> (see L</FeatureLocation> for more about this format).
730    
731  =item RETURN  =item RETURN
732    
# Line 841  Line 822 
822      return @retVal;      return @retVal;
823  }  }
824    
825    =head3 GenomeLength
826    
827    C<< my $length = $sprout->GenomeLength($genomeID); >>
828    
829    Return the length of the specified genome in base pairs.
830    
831    =over 4
832    
833    =item genomeID
834    
835    ID of the genome whose base pair count is desired.
836    
837    =item RETURN
838    
839    Returns the number of base pairs in all the contigs of the specified
840    genome.
841    
842    =back
843    
844    =cut
845    
846    sub GenomeLength {
847        # Get the parameters.
848        my ($self, $genomeID) = @_;
849        # Declare the return variable.
850        my $retVal = 0;
851        # Get the genome's contig sequence lengths.
852        my @lens = $self->GetFlat(['HasContig', 'IsMadeUpOf'], 'HasContig(from-link) = ?',
853                           [$genomeID], 'IsMadeUpOf(len)');
854        # Sum the lengths.
855        map { $retVal += $_ } @lens;
856        # Return the result.
857        return $retVal;
858    }
859    
860    =head3 FeatureCount
861    
862    C<< my $count = $sprout->FeatureCount($genomeID, $type); >>
863    
864    Return the number of features of the specified type in the specified genome.
865    
866    =over 4
867    
868    =item genomeID
869    
870    ID of the genome whose feature count is desired.
871    
872    =item type
873    
874    Type of feature to count (eg. C<peg>, C<rna>, etc.).
875    
876    =item RETURN
877    
878    Returns the number of features of the specified type for the specified genome.
879    
880    =back
881    
882    =cut
883    
884    sub FeatureCount {
885        # Get the parameters.
886        my ($self, $genomeID, $type) = @_;
887        # Compute the count.
888        my $retVal = $self->GetCount(['HasFeature', 'Feature'],
889                                    "HasFeature(from-link) = ? AND Feature(feature-type) = ?",
890                                    [$genomeID, $type]);
891        # Return the result.
892        return $retVal;
893    }
894    
895    =head3 GenomeAssignments
896    
897    C<< my $fidHash = $sprout->GenomeAssignments($genomeID); >>
898    
899    Return a list of a genome's assigned features. The return hash will contain each
900    assigned feature of the genome mapped to the text of its most recent functional
901    assignment.
902    
903    =over 4
904    
905    =item genomeID
906    
907    ID of the genome whose functional assignments are desired.
908    
909    =item RETURN
910    
911    Returns a reference to a hash which maps each feature to its most recent
912    functional assignment.
913    
914    =back
915    
916    =cut
917    
918    sub GenomeAssignments {
919        # Get the parameters.
920        my ($self, $genomeID) = @_;
921        # Declare the return variable.
922        my $retVal = {};
923        # Query the genome's features.
924        my $query = $self->Get(['HasFeature', 'Feature'], "HasFeature(from-link) = ?",
925                               [$genomeID]);
926        # Loop through the features.
927        while (my $data = $query->Fetch) {
928            # Get the feature ID and assignment.
929            my ($fid, $assignment) = $data->Values(['Feature(id)', 'Feature(assignment)']);
930            if ($assignment) {
931                $retVal->{$fid} = $assignment;
932            }
933        }
934        # Return the result.
935        return $retVal;
936    }
937    
938  =head3 ContigLength  =head3 ContigLength
939    
940  C<< my $length = $sprout->ContigLength($contigID); >>  C<< my $length = $sprout->ContigLength($contigID); >>
# Line 1163  Line 1257 
1257      # Get the parameters.      # Get the parameters.
1258      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
1259      # Get all of the feature's annotations.      # Get all of the feature's annotations.
1260      my @query = $self->GetAll(['IsTargetOfAnnotation', 'Annotation'],      my @query = $self->GetAll(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],
1261                              "IsTargetOfAnnotation(from-link) = ?",                              "IsTargetOfAnnotation(from-link) = ?",
1262                              [$featureID], ['Annotation(time)', 'Annotation(annotation)']);                              [$featureID], ['Annotation(time)', 'Annotation(annotation)',
1263                                               'MadeAnnotation(from-link)']);
1264      # Declare the return hash.      # Declare the return hash.
1265      my %retVal;      my %retVal;
1266      # Now we sort the assignments by timestamp in reverse.      # Now we sort the assignments by timestamp in reverse.
# Line 1173  Line 1268 
1268      # Loop until we run out of annotations.      # Loop until we run out of annotations.
1269      for my $annotation (@sortedQuery) {      for my $annotation (@sortedQuery) {
1270          # Get the annotation fields.          # Get the annotation fields.
1271          my ($timeStamp, $text) = @{$annotation};          my ($timeStamp, $text, $user) = @{$annotation};
1272          # Check to see if this is a functional assignment.          # Check to see if this is a functional assignment.
1273          my ($user, $function) = _ParseAssignment($text);          my ($actualUser, $function) = _ParseAssignment($user, $text);
1274          if ($user && ! exists $retVal{$user}) {          if ($actualUser && ! exists $retVal{$actualUser}) {
1275              # Here it is a functional assignment and there has been no              # Here it is a functional assignment and there has been no
1276              # previous assignment for this user, so we stuff it in the              # previous assignment for this user, so we stuff it in the
1277              # return hash.              # return hash.
1278              $retVal{$user} = $function;              $retVal{$actualUser} = $function;
1279          }          }
1280      }      }
1281      # Return the hash of assignments found.      # Return the hash of assignments found.
# Line 1194  Line 1289 
1289  Return the most recently-determined functional assignment of a particular feature.  Return the most recently-determined functional assignment of a particular feature.
1290    
1291  The functional assignment is handled differently depending on the type of feature. If  The functional assignment is handled differently depending on the type of feature. If
1292  the feature is identified by a FIG ID (begins with the string C<fig|>), then a functional  the feature is identified by a FIG ID (begins with the string C<fig|>), then the functional
1293  assignment is a type of annotation. The format of an assignment is described in  assignment is taken from the B<Feature> or C<Annotation> table, depending.
 L</ParseAssignment>. Its worth noting that we cannot filter on the content of the  
 annotation itself because it's a text field; however, this is not a big problem because  
 most features only have a small number of annotations.  
1294    
1295  Each user has an associated list of trusted users. The assignment returned will be the most  Each user has an associated list of trusted users. The assignment returned will be the most
1296  recent one by at least one of the trusted users. If no trusted user list is available, then  recent one by at least one of the trusted users. If no trusted user list is available, then
# Line 1217  Line 1309 
1309    
1310  =item userID (optional)  =item userID (optional)
1311    
1312  ID of the user whose function determination is desired. If omitted, only the latest  ID of the user whose function determination is desired. If omitted, the primary
1313  C<FIG> assignment will be returned.  functional assignment in the B<Feature> table will be returned.
1314    
1315  =item RETURN  =item RETURN
1316    
# Line 1235  Line 1327 
1327      my $retVal;      my $retVal;
1328      # Determine the ID type.      # Determine the ID type.
1329      if ($featureID =~ m/^fig\|/) {      if ($featureID =~ m/^fig\|/) {
1330          # Here we have a FIG feature ID. We must build the list of trusted          # Here we have a FIG feature ID.
1331          # users.          if (!$userID) {
1332                # Use the primary assignment.
1333                ($retVal) = $self->GetEntityValues('Feature', $featureID, ['Feature(assignment)']);
1334            } else {
1335                # We must build the list of trusted users.
1336          my %trusteeTable = ();          my %trusteeTable = ();
1337          # Check the user ID.          # Check the user ID.
1338          if (!$userID) {          if (!$userID) {
# Line 1258  Line 1354 
1354              }              }
1355          }          }
1356          # Build a query for all of the feature's annotations, sorted by date.          # Build a query for all of the feature's annotations, sorted by date.
1357          my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation'],              my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],
1358                                 "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC",                                 "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC",
1359                                 [$featureID]);                                 [$featureID]);
1360          my $timeSelected = 0;          my $timeSelected = 0;
1361          # Loop until we run out of annotations.          # Loop until we run out of annotations.
1362          while (my $annotation = $query->Fetch()) {          while (my $annotation = $query->Fetch()) {
1363              # Get the annotation text.              # Get the annotation text.
1364              my ($text, $time) = $annotation->Values(['Annotation(annotation)','Annotation(time)']);                  my ($text, $time, $user) = $annotation->Values(['Annotation(annotation)',
1365                                                             'Annotation(time)', 'MadeAnnotation(from-link)']);
1366              # Check to see if this is a functional assignment for a trusted user.              # Check to see if this is a functional assignment for a trusted user.
1367              my ($user, $function) = _ParseAssignment($text);                  my ($actualUser, $function) = _ParseAssignment($user, $text);
1368              if ($user) {                  Trace("Assignment user is $actualUser, text is $function.") if T(4);
1369                    if ($actualUser) {
1370                  # Here it is a functional assignment. Check the time and the user                  # Here it is a functional assignment. Check the time and the user
1371                  # name. The time must be recent and the user must be trusted.                  # name. The time must be recent and the user must be trusted.
1372                  if ((exists $trusteeTable{$user}) && ($time > $timeSelected)) {                      if ((exists $trusteeTable{$actualUser}) && ($time > $timeSelected)) {
1373                      $retVal = $function;                      $retVal = $function;
1374                      $timeSelected = $time;                      $timeSelected = $time;
1375                  }                  }
1376              }              }
1377          }          }
1378            }
1379      } else {      } else {
1380          # Here we have a non-FIG feature ID. In this case the user ID does not          # Here we have a non-FIG feature ID. In this case the user ID does not
1381          # matter. We simply get the information from the External Alias Function          # matter. We simply get the information from the External Alias Function
# Line 1330  Line 1429 
1429          # users.          # users.
1430          my %trusteeTable = ();          my %trusteeTable = ();
1431          # Build a query for all of the feature's annotations, sorted by date.          # Build a query for all of the feature's annotations, sorted by date.
1432          my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation'],          my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],
1433                                 "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC",                                 "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC",
1434                                 [$featureID]);                                 [$featureID]);
1435          my $timeSelected = 0;          my $timeSelected = 0;
1436          # Loop until we run out of annotations.          # Loop until we run out of annotations.
1437          while (my $annotation = $query->Fetch()) {          while (my $annotation = $query->Fetch()) {
1438              # Get the annotation text.              # Get the annotation text.
1439              my ($text, $time) = $annotation->Values(['Annotation(annotation)','Annotation(time)']);              my ($text, $time, $user) = $annotation->Values(['Annotation(annotation)',
1440                                                                'Annotation(time)',
1441                                                                'MadeAnnotation(user)']);
1442              # Check to see if this is a functional assignment for a trusted user.              # Check to see if this is a functional assignment for a trusted user.
1443              my ($user, $function) = _ParseAssignment($text);              my ($actualUser, $function) = _ParseAssignment($user, $text);
1444              if ($user) {              if ($actualUser) {
1445                  # Here it is a functional assignment.                  # Here it is a functional assignment.
1446                  push @retVal, [$user, $function];                  push @retVal, [$actualUser, $function];
1447              }              }
1448          }          }
1449      } else {      } else {
1450          # Here we have a non-FIG feature ID. In this case the user ID does not          # Here we have a non-FIG feature ID. In this case the user ID does not
1451          # matter. We simply get the information from the External Alias Function          # matter. We simply get the information from the External Alias Function
1452          # table.          # table.
1453          push @retVal, $self->GetEntityValues('ExternalAliasFunc', $featureID, ['ExternalAliasFunc(func)']);          my @assignments = $self->GetEntityValues('ExternalAliasFunc', $featureID,
1454                                                     ['ExternalAliasFunc(func)']);
1455            push @retVal, map { ['master', $_] } @assignments;
1456      }      }
1457      # Return the assignments found.      # Return the assignments found.
1458      return @retVal;      return @retVal;
# Line 1388  Line 1491 
1491      my %retVal = ();      my %retVal = ();
1492      # Loop through the incoming features.      # Loop through the incoming features.
1493      for my $featureID (@{$featureList}) {      for my $featureID (@{$featureList}) {
1494          # Create a query to get the feature's best hit.          # Ask the server for the feature's best hit.
1495          my $query = $self->Get(['IsBidirectionalBestHitOf'],          my @bbhData = FIGRules::BBHData($featureID);
                                "IsBidirectionalBestHitOf(from-link) = ? AND IsBidirectionalBestHitOf(genome) = ?",  
                                [$featureID, $genomeID]);  
1496          # Peel off the BBHs found.          # Peel off the BBHs found.
1497          my @found = ();          my @found = ();
1498          while (my $bbh = $query->Fetch) {          for my $bbh (@bbhData) {
1499              push @found, $bbh->Value('IsBidirectionalBestHitOf(to-link)');              my $fid = $bbh->[0];
1500                my $bbGenome = $self->GenomeOf($fid);
1501                if ($bbGenome eq $genomeID) {
1502                    push @found, $fid;
1503                }
1504          }          }
1505          $retVal{$featureID} = \@found;          $retVal{$featureID} = \@found;
1506      }      }
# Line 1409  Line 1514 
1514    
1515  Return a list of the similarities to the specified feature.  Return a list of the similarities to the specified feature.
1516    
1517  Sprout does not support real similarities, so this method just returns the bidirectional  This method just returns the bidirectional best hits for performance reasons.
 best hits.  
1518    
1519  =over 4  =over 4
1520    
# Line 1430  Line 1534 
1534      # Get the parameters.      # Get the parameters.
1535      my ($self, $featureID, $count) = @_;      my ($self, $featureID, $count) = @_;
1536      # Ask for the best hits.      # Ask for the best hits.
1537      my @lists = $self->GetAll(['IsBidirectionalBestHitOf'],      my @lists = FIGRules::BBHData($featureID);
                               "IsBidirectionalBestHitOf(from-link) = ? ORDER BY IsBidirectionalBestHitOf(score) DESC",  
                               [$featureID], ['IsBidirectionalBestHitOf(to-link)', 'IsBidirectionalBestHitOf(score)'],  
                               $count);  
1538      # Create the return value.      # Create the return value.
1539      my %retVal = ();      my %retVal = ();
1540      for my $tuple (@lists) {      for my $tuple (@lists) {
# Line 1443  Line 1544 
1544      return %retVal;      return %retVal;
1545  }  }
1546    
   
   
1547  =head3 IsComplete  =head3 IsComplete
1548    
1549  C<< my $flag = $sprout->IsComplete($genomeID); >>  C<< my $flag = $sprout->IsComplete($genomeID); >>
# Line 1475  Line 1574 
1574      my $genomeData = $self->GetEntity('Genome', $genomeID);      my $genomeData = $self->GetEntity('Genome', $genomeID);
1575      if ($genomeData) {      if ($genomeData) {
1576          # The genome exists, so get the completeness flag.          # The genome exists, so get the completeness flag.
1577          ($retVal) = $genomeData->Value('complete');          ($retVal) = $genomeData->Value('Genome(complete)');
1578      }      }
1579      # Return the result.      # Return the result.
1580      return $retVal;      return $retVal;
# Line 1515  Line 1614 
1614    
1615  C<< my $genomeID = $sprout->GenomeOf($featureID); >>  C<< my $genomeID = $sprout->GenomeOf($featureID); >>
1616    
1617  Return the genome that contains a specified feature.  Return the genome that contains a specified feature or contig.
1618    
1619  =over 4  =over 4
1620    
1621  =item featureID  =item featureID
1622    
1623  ID of the feature whose genome is desired.  ID of the feature or contig whose genome is desired.
1624    
1625  =item RETURN  =item RETURN
1626    
1627  Returns the ID of the genome for the specified feature. If the feature is not found, returns  Returns the ID of the genome for the specified feature or contig. If the feature or contig is not
1628  an undefined value.  found, returns an undefined value.
1629    
1630  =back  =back
1631    
# Line 1535  Line 1634 
1634  sub GenomeOf {  sub GenomeOf {
1635      # Get the parameters.      # Get the parameters.
1636      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
1637      # Create a query to find the genome associated with the feature.      # Create a query to find the genome associated with the incoming ID.
1638      my $query = $self->Get(['IsLocatedIn', 'HasContig'], "IsLocatedIn(from-link) = ?", [$featureID]);      my $query = $self->Get(['IsLocatedIn', 'HasContig'], "IsLocatedIn(from-link) = ? OR HasContig(to-link) = ?",
1639                               [$featureID, $featureID]);
1640      # Declare the return value.      # Declare the return value.
1641      my $retVal;      my $retVal;
1642      # Get the genome ID.      # Get the genome ID.
# Line 1571  Line 1671 
1671  sub CoupledFeatures {  sub CoupledFeatures {
1672      # Get the parameters.      # Get the parameters.
1673      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
1674        Trace("Looking for features coupled to $featureID.") if T(coupling => 3);
1675      # Create a query to retrieve the functionally-coupled features.      # Create a query to retrieve the functionally-coupled features.
1676      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],
1677                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);
# Line 1583  Line 1684 
1684          # Get the ID and score of the coupling.          # Get the ID and score of the coupling.
1685          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',
1686                                                          'Coupling(score)']);                                                          'Coupling(score)']);
1687          # The coupling ID contains the two feature IDs separated by a space. We use          Trace("$featureID coupled with score $score to ID $couplingID.") if T(coupling => 4);
1688          # this information to find the ID of the other feature.          # Get the other feature that participates in the coupling.
1689          my ($fid1, $fid2) = split / /, $couplingID;          my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],
1690          my $otherFeatureID = ($featureID eq $fid1 ? $fid2 : $fid1);                                             "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",
1691                                               [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');
1692            Trace("$couplingID target feature is $otherFeatureID.") if T(coupling => 4);
1693          # Attach the other feature's score to its ID.          # Attach the other feature's score to its ID.
1694          $retVal{$otherFeatureID} = $score;          $retVal{$otherFeatureID} = $score;
1695          $found = 1;          $found = 1;
# Line 1719  Line 1822 
1822      my ($self, $peg1, $peg2) = @_;      my ($self, $peg1, $peg2) = @_;
1823      # Declare the return values. We'll start with the coupling ID and undefine the      # Declare the return values. We'll start with the coupling ID and undefine the
1824      # flag and score until we have more information.      # flag and score until we have more information.
1825      my ($retVal, $inverted, $score) = (CouplingID($peg1, $peg2), undef, undef);      my ($retVal, $inverted, $score) = ($self->CouplingID($peg1, $peg2), undef, undef);
1826      # Find the coupling data.      # Find the coupling data.
1827      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],
1828                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",
# Line 1740  Line 1843 
1843      return ($retVal, $inverted, $score);      return ($retVal, $inverted, $score);
1844  }  }
1845    
1846  =head3 CouplingID  =head3 GetSynonymGroup
1847    
1848  C<< my $couplingID = Sprout::CouplingID($peg1, $peg2); >>  C<< my $id = $sprout->GetSynonymGroup($fid); >>
1849    
1850    Return the synonym group name for the specified feature.
1851    
1852    =over 4
1853    
1854    =item fid
1855    
1856    ID of the feature whose synonym group is desired.
1857    
1858    =item RETURN
1859    
1860    The name of the synonym group to which the feature belongs. If the feature does
1861    not belong to a synonym group, the feature ID itself is returned.
1862    
1863    =back
1864    
1865    =cut
1866    
1867    sub GetSynonymGroup {
1868        # Get the parameters.
1869        my ($self, $fid) = @_;
1870        # Declare the return variable.
1871        my $retVal;
1872        # Find the synonym group.
1873        my @groups = $self->GetFlat(['IsSynonymGroupFor'], "IsSynonymGroupFor(to-link) = ?",
1874                                       [$fid], 'IsSynonymGroupFor(from-link)');
1875        # Check to see if we found anything.
1876        if (@groups) {
1877            $retVal = $groups[0];
1878        } else {
1879            $retVal = $fid;
1880        }
1881        # Return the result.
1882        return $retVal;
1883    }
1884    
1885    =head3 GetBoundaries
1886    
1887    C<< my ($contig, $beg, $end) = $sprout->GetBoundaries(@locList); >>
1888    
1889    Determine the begin and end boundaries for the locations in a list. All of the
1890    locations must belong to the same contig and have mostly the same direction in
1891    order for this method to produce a meaningful result. The resulting
1892    begin/end pair will contain all of the bases in any of the locations.
1893    
1894    =over 4
1895    
1896    =item locList
1897    
1898    List of locations to process.
1899    
1900    =item RETURN
1901    
1902    Returns a 3-tuple consisting of the contig ID, the beginning boundary,
1903    and the ending boundary. The beginning boundary will be left of the
1904    end for mostly-forward locations and right of the end for mostly-backward
1905    locations.
1906    
1907    =back
1908    
1909    =cut
1910    
1911    sub GetBoundaries {
1912        # Get the parameters.
1913        my ($self, @locList) = @_;
1914        # Set up the counters used to determine the most popular direction.
1915        my %counts = ( '+' => 0, '-' => 0 );
1916        # Get the last location and parse it.
1917        my $locObject = BasicLocation->new(pop @locList);
1918        # Prime the loop with its data.
1919        my ($contig, $beg, $end) = ($locObject->Contig, $locObject->Left, $locObject->Right);
1920        # Count its direction.
1921        $counts{$locObject->Dir}++;
1922        # Loop through the remaining locations. Note that in most situations, this loop
1923        # will not iterate at all, because most of the time we will be dealing with a
1924        # singleton list.
1925        for my $loc (@locList) {
1926            # Create a location object.
1927            my $locObject = BasicLocation->new($loc);
1928            # Count the direction.
1929            $counts{$locObject->Dir}++;
1930            # Get the left end and the right end.
1931            my $left = $locObject->Left;
1932            my $right = $locObject->Right;
1933            # Merge them into the return variables.
1934            if ($left < $beg) {
1935                $beg = $left;
1936            }
1937            if ($right > $end) {
1938                $end = $right;
1939            }
1940        }
1941        # If the most common direction is reverse, flip the begin and end markers.
1942        if ($counts{'-'} > $counts{'+'}) {
1943            ($beg, $end) = ($end, $beg);
1944        }
1945        # Return the result.
1946        return ($contig, $beg, $end);
1947    }
1948    
1949    =head3 CouplingID
1950    
1951    C<< my $couplingID = $sprout->CouplingID($peg1, $peg2); >>
1952    
1953  Return the coupling ID for a pair of feature IDs.  Return the coupling ID for a pair of feature IDs.
1954    
# Line 1775  Line 1981 
1981  =cut  =cut
1982  #: Return Type $;  #: Return Type $;
1983  sub CouplingID {  sub CouplingID {
1984      return join " ", sort @_;      my ($self, @pegs) = @_;
1985  }      return $self->DigestKey(join " ", sort @pegs);
   
 =head3 GetEntityTypes  
   
 C<< my @entityList = $sprout->GetEntityTypes(); >>  
   
 Return the list of supported entity types.  
   
 =cut  
 #: Return Type @;  
 sub GetEntityTypes {  
     # Get the parameters.  
     my ($self) = @_;  
     # Get the underlying database object.  
     my $erdb = $self->{_erdb};  
     # Get its entity type list.  
     my @retVal = $erdb->GetEntityTypes();  
1986  }  }
1987    
1988  =head3 ReadFasta  =head3 ReadFasta
# Line 1940  Line 2130 
2130      # Get the data directory name.      # Get the data directory name.
2131      my $outputDirectory = $self->{_options}->{dataDir};      my $outputDirectory = $self->{_options}->{dataDir};
2132      # Dump the relations.      # Dump the relations.
2133      $self->{_erdb}->DumpRelations($outputDirectory);      $self->DumpRelations($outputDirectory);
2134  }  }
2135    
2136  =head3 XMLFileName  =head3 XMLFileName
# Line 1992  Line 2182 
2182      # Get the parameters.      # Get the parameters.
2183      my ($self, $objectType, $fieldHash) = @_;      my ($self, $objectType, $fieldHash) = @_;
2184      # Call the underlying method.      # Call the underlying method.
2185      $self->{_erdb}->InsertObject($objectType, $fieldHash);      $self->InsertObject($objectType, $fieldHash);
2186  }  }
2187    
2188  =head3 Annotate  =head3 Annotate
# Line 2151  Line 2341 
2341      return @retVal;      return @retVal;
2342  }  }
2343    
 =head3 Exists  
   
 C<< my $found = $sprout->Exists($entityName, $entityID); >>  
   
 Return TRUE if an entity exists, else FALSE.  
   
 =over 4  
   
 =item entityName  
   
 Name of the entity type (e.g. C<Feature>) relevant to the existence check.  
   
 =item entityID  
   
 ID of the entity instance whose existence is to be checked.  
   
 =item RETURN  
   
 Returns TRUE if the entity instance exists, else FALSE.  
   
 =back  
   
 =cut  
 #: Return Type $;  
 sub Exists {  
     # Get the parameters.  
     my ($self, $entityName, $entityID) = @_;  
     # Check for the entity instance.  
     Trace("Checking existence of $entityName with ID=$entityID.") if T(4);  
     my $testInstance = $self->GetEntity($entityName, $entityID);  
     # Return an existence indicator.  
     my $retVal = ($testInstance ? 1 : 0);  
     return $retVal;  
 }  
   
2344  =head3 FeatureTranslation  =head3 FeatureTranslation
2345    
2346  C<< my $translation = $sprout->FeatureTranslation($featureID); >>  C<< my $translation = $sprout->FeatureTranslation($featureID); >>
# Line 2520  Line 2675 
2675      return $retVal;      return $retVal;
2676  }  }
2677    
2678    =head3 PropertyID
2679    
2680    C<< my $id = $sprout->PropertyID($propName, $propValue); >>
2681    
2682    Return the ID of the specified property name and value pair, if the
2683    pair exists.
2684    
2685    =over 4
2686    
2687    =item propName
2688    
2689    Name of the desired property.
2690    
2691    =item propValue
2692    
2693    Value expected for the desired property.
2694    
2695    =item RETURN
2696    
2697    Returns the ID of the name/value pair, or C<undef> if the pair does not exist.
2698    
2699    =back
2700    
2701    =cut
2702    
2703    sub PropertyID {
2704        # Get the parameters.
2705        my ($self, $propName, $propValue) = @_;
2706        # Try to find the ID.
2707        my ($retVal) = $self->GetFlat(['Property'],
2708                                      "Property(property-name) = ? AND Property(property-value) = ?",
2709                                      [$propName, $propValue], 'Property(id)');
2710        # Return the result.
2711        return $retVal;
2712    }
2713    
2714  =head3 MergedAnnotations  =head3 MergedAnnotations
2715    
2716  C<< my @annotationList = $sprout->MergedAnnotations(\@list); >>  C<< my @annotationList = $sprout->MergedAnnotations(\@list); >>
# Line 2717  Line 2908 
2908      # Get the parameters.      # Get the parameters.
2909      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
2910      # Get the list of names.      # Get the list of names.
2911      my @retVal = $self->GetFlat(['ContainsFeature', 'HasSSCell'], "ContainsFeature(to-link) = ?",      my @retVal = $self->GetFlat(['HasRoleInSubsystem'], "HasRoleInSubsystem(from-link) = ?",
2912                                  [$featureID], 'HasSSCell(from-link)');                                  [$featureID], 'HasRoleInSubsystem(to-link)');
2913        # Return the result, sorted.
2914        return sort @retVal;
2915    }
2916    
2917    =head3 GenomeSubsystemData
2918    
2919    C<< my %featureData = $sprout->GenomeSubsystemData($genomeID); >>
2920    
2921    Return a hash mapping genome features to their subsystem roles.
2922    
2923    =over 4
2924    
2925    =item genomeID
2926    
2927    ID of the genome whose subsystem feature map is desired.
2928    
2929    =item RETURN
2930    
2931    Returns a hash mapping each feature of the genome to a list of 2-tuples. Eacb
2932    2-tuple contains a subsystem name followed by a role ID.
2933    
2934    =back
2935    
2936    =cut
2937    
2938    sub GenomeSubsystemData {
2939        # Get the parameters.
2940        my ($self, $genomeID) = @_;
2941        # Declare the return variable.
2942        my %retVal = ();
2943        # Get a list of the genome features that participate in subsystems. For each
2944        # feature we get its spreadsheet cells and the corresponding roles.
2945        my @roleData = $self->GetAll(['HasFeature', 'ContainsFeature', 'IsRoleOf'],
2946                                 "HasFeature(from-link) = ?", [$genomeID],
2947                                 ['HasFeature(to-link)', 'IsRoleOf(to-link)', 'IsRoleOf(from-link)']);
2948        # Now we get a list of the spreadsheet cells and their associated subsystems. Subsystems
2949        # with an unknown variant code (-1) are skipped. Note the genome ID is at both ends of the
2950        # list. We use it at the beginning to get all the spreadsheet cells for the genome and
2951        # again at the end to filter out participation in subsystems with a negative variant code.
2952        my @cellData = $self->GetAll(['IsGenomeOf', 'HasSSCell', 'ParticipatesIn'],
2953                                     "IsGenomeOf(from-link) = ? AND ParticipatesIn(variant-code) >= 0 AND ParticipatesIn(from-link) = ?",
2954                                     [$genomeID, $genomeID], ['HasSSCell(to-link)', 'HasSSCell(from-link)']);
2955        # Now "@roleData" lists the spreadsheet cell and role for each of the genome's features.
2956        # "@cellData" lists the subsystem name for each of the genome's spreadsheet cells. We
2957        # link these two lists together to create the result. First, we want a hash mapping
2958        # spreadsheet cells to subsystem names.
2959        my %subHash = map { $_->[0] => $_->[1] } @cellData;
2960        # We loop through @cellData to build the hash.
2961        for my $roleEntry (@roleData) {
2962            # Get the data for this feature and cell.
2963            my ($fid, $cellID, $role) = @{$roleEntry};
2964            # Check for a subsystem name.
2965            my $subsys = $subHash{$cellID};
2966            if ($subsys) {
2967                # Insure this feature has an entry in the return hash.
2968                if (! exists $retVal{$fid}) { $retVal{$fid} = []; }
2969                # Merge in this new data.
2970                push @{$retVal{$fid}}, [$subsys, $role];
2971            }
2972        }
2973      # Return the result.      # Return the result.
2974      return @retVal;      return %retVal;
2975  }  }
2976    
2977  =head3 RelatedFeatures  =head3 RelatedFeatures
# Line 2758  Line 3009 
3009      # Get the parameters.      # Get the parameters.
3010      my ($self, $featureID, $function, $userID) = @_;      my ($self, $featureID, $function, $userID) = @_;
3011      # Get a list of the features that are BBHs of the incoming feature.      # Get a list of the features that are BBHs of the incoming feature.
3012      my @bbhFeatures = $self->GetFlat(['IsBidirectionalBestHitOf'],      my @bbhFeatures = map { $_->[0] } FIGRules::BBHData($featureID);
                                      "IsBidirectionalBestHitOf(from-link) = ?", [$featureID],  
                                      'IsBidirectionalBestHitOf(to-link)');  
3013      # Now we loop through the features, pulling out the ones that have the correct      # Now we loop through the features, pulling out the ones that have the correct
3014      # functional assignment.      # functional assignment.
3015      my @retVal = ();      my @retVal = ();
# Line 2824  Line 3073 
3073      return @retVal;      return @retVal;
3074  }  }
3075    
 =head3 GetAll  
   
 C<< my @list = $sprout->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >>  
   
 Return a list of values taken from the objects returned by a query. The first three  
 parameters correspond to the parameters of the L</Get> method. The final parameter is  
 a list of the fields desired from each record found by the query. The field name  
 syntax is the standard syntax used for fields in the B<ERDB> system--  
 B<I<objectName>(I<fieldName>)>-- where I<objectName> is the name of the relevant entity  
 or relationship and I<fieldName> is the name of the field.  
   
 The list returned will be a list of lists. Each element of the list will contain  
 the values returned for the fields specified in the fourth parameter. If one of the  
 fields specified returns multiple values, they are flattened in with the rest. For  
 example, the following call will return a list of the features in a particular  
 spreadsheet cell, and each feature will be represented by a list containing the  
 feature ID followed by all of its aliases.  
   
 C<< $query = $sprout->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >>  
   
 =over 4  
   
 =item objectNames  
   
 List containing the names of the entity and relationship objects to be retrieved.  
   
 =item filterClause  
   
 WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can  
 be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form  
 B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the  
 parameter list as additional parameters. The fields in a filter clause can come from primary  
 entity relations, relationship relations, or secondary entity relations; however, all of the  
 entities and relationships involved must be included in the list of object names.  
   
 =item parameterList  
   
 List of the parameters to be substituted in for the parameters marks in the filter clause.  
   
 =item fields  
   
 List of the fields to be returned in each element of the list returned.  
   
 =item count  
   
 Maximum number of records to return. If omitted or 0, all available records will be returned.  
   
 =item RETURN  
   
 Returns a list of list references. Each element of the return list contains the values for the  
 fields specified in the B<fields> parameter.  
   
 =back  
   
 =cut  
 #: Return Type @@;  
 sub GetAll {  
     # Get the parameters.  
     my ($self, $objectNames, $filterClause, $parameterList, $fields, $count) = @_;  
     # Call the ERDB method.  
     my @retVal = $self->{_erdb}->GetAll($objectNames, $filterClause, $parameterList,  
                                         $fields, $count);  
     # Return the resulting list.  
     return @retVal;  
 }  
   
 =head3 GetFlat  
   
 C<< my @list = $sprout->GetFlat(\@objectNames, $filterClause, $parameterList, $field); >>  
   
 This is a variation of L</GetAll> that asks for only a single field per record and  
 returns a single flattened list.  
   
 =over 4  
   
 =item objectNames  
   
 List containing the names of the entity and relationship objects to be retrieved.  
   
 =item filterClause  
   
 WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can  
 be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form  
 B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the  
 parameter list as additional parameters. The fields in a filter clause can come from primary  
 entity relations, relationship relations, or secondary entity relations; however, all of the  
 entities and relationships involved must be included in the list of object names.  
   
 =item parameterList  
   
 List of the parameters to be substituted in for the parameters marks in the filter clause.  
   
 =item field  
   
 Name of the field to be used to get the elements of the list returned.  
   
 =item RETURN  
   
 Returns a list of values.  
   
 =back  
   
 =cut  
 #: Return Type @;  
 sub GetFlat {  
     # Get the parameters.  
     my ($self, $objectNames, $filterClause, $parameterList, $field) = @_;  
     # Construct the query.  
     my $query = $self->Get($objectNames, $filterClause, $parameterList);  
     # Create the result list.  
     my @retVal = ();  
     # Loop through the records, adding the field values found to the result list.  
     while (my $row = $query->Fetch()) {  
         push @retVal, $row->Value($field);  
     }  
     # Return the list created.  
     return @retVal;  
 }  
   
3076  =head3 Protein  =head3 Protein
3077    
3078  C<< my $protein = Sprout::Protein($sequence, $table); >>  C<< my $protein = Sprout::Protein($sequence, $table); >>
# Line 3015  Line 3145 
3145      # Loop through the input triples.      # Loop through the input triples.
3146      my $n = length $sequence;      my $n = length $sequence;
3147      for (my $i = 0; $i < $n; $i += 3) {      for (my $i = 0; $i < $n; $i += 3) {
3148          # Get the current triple from the sequence.          # Get the current triple from the sequence. Note we convert to
3149          my $triple = substr($sequence, $i, 3);          # upper case to insure a match.
3150            my $triple = uc substr($sequence, $i, 3);
3151          # Translate it using the table.          # Translate it using the table.
3152          my $protein = "X";          my $protein = "X";
3153          if (exists $table->{$triple}) { $protein = $table->{$triple}; }          if (exists $table->{$triple}) { $protein = $table->{$triple}; }
# Line 3044  Line 3175 
3175      # Create the return list, priming it with the name of the data directory.      # Create the return list, priming it with the name of the data directory.
3176      my @retVal = ($self->{_options}->{dataDir});      my @retVal = ($self->{_options}->{dataDir});
3177      # Concatenate the table names.      # Concatenate the table names.
3178      push @retVal, $self->{_erdb}->GetTableNames();      push @retVal, $self->GetTableNames();
3179      # Return the result.      # Return the result.
3180      return @retVal;      return @retVal;
3181  }  }
3182    
3183    =head3 BBHMatrix
3184    
3185    C<< my %bbhMap = $sprout->BBHMatrix($genomeID, $cutoff, @targets); >>
3186    
3187    Find all the bidirectional best hits for the features of a genome in a
3188    specified list of target genomes. The return value will be a hash mapping
3189    features in the original genome to their bidirectional best hits in the
3190    target genomes.
3191    
3192    =over 4
3193    
3194    =item genomeID
3195    
3196    ID of the genome whose features are to be examined for bidirectional best hits.
3197    
3198    =item cutoff
3199    
3200    A cutoff value. Only hits with a score lower than the cutoff will be returned.
3201    
3202    =item targets
3203    
3204    List of target genomes. Only pairs originating in the original
3205    genome and landing in one of the target genomes will be returned.
3206    
3207    =item RETURN
3208    
3209    Returns a hash mapping each feature in the original genome to a hash mapping its
3210    BBH pegs in the target genomes to their scores.
3211    
3212    =back
3213    
3214    =cut
3215    
3216    sub BBHMatrix {
3217        # Get the parameters.
3218        my ($self, $genomeID, $cutoff, @targets) = @_;
3219        # Declare the return variable.
3220        my %retVal = ();
3221        # Ask for the BBHs.
3222        my @bbhList = FIGRules::BatchBBHs("fig|$genomeID.%", $cutoff, @targets);
3223        # We now have a set of 4-tuples that we need to convert into a hash of hashes.
3224        for my $bbhData (@bbhList) {
3225            my ($peg1, $peg2, $score) = @{$bbhData};
3226            if (! exists $retVal{$peg1}) {
3227                $retVal{$peg1} = { $peg2 => $score };
3228            } else {
3229                $retVal{$peg1}->{$peg2} = $score;
3230            }
3231        }
3232        # Return the result.
3233        return %retVal;
3234    }
3235    
3236    
3237    =head3 SimMatrix
3238    
3239    C<< my %simMap = $sprout->SimMatrix($genomeID, $cutoff, @targets); >>
3240    
3241    Find all the similarities for the features of a genome in a
3242    specified list of target genomes. The return value will be a hash mapping
3243    features in the original genome to their similarites in the
3244    target genomes.
3245    
3246    =over 4
3247    
3248    =item genomeID
3249    
3250    ID of the genome whose features are to be examined for similarities.
3251    
3252    =item cutoff
3253    
3254    A cutoff value. Only hits with a score lower than the cutoff will be returned.
3255    
3256    =item targets
3257    
3258    List of target genomes. Only pairs originating in the original
3259    genome and landing in one of the target genomes will be returned.
3260    
3261    =item RETURN
3262    
3263    Returns a hash mapping each feature in the original genome to a hash mapping its
3264    similar pegs in the target genomes to their scores.
3265    
3266    =back
3267    
3268    =cut
3269    
3270    sub SimMatrix {
3271        # Get the parameters.
3272        my ($self, $genomeID, $cutoff, @targets) = @_;
3273        # Declare the return variable.
3274        my %retVal = ();
3275        # Get the list of features in the source organism.
3276        my @fids = $self->FeaturesOf($genomeID);
3277        # Ask for the sims. We only want similarities to fig features.
3278        my $simList = FIGRules::GetNetworkSims($self, \@fids, {}, 1000, $cutoff, "fig");
3279        if (! defined $simList) {
3280            Confess("Unable to retrieve similarities from server.");
3281        } else {
3282            Trace("Processing sims.") if T(3);
3283            # We now have a set of sims that we need to convert into a hash of hashes. First, we
3284            # Create a hash for the target genomes.
3285            my %targetHash = map { $_ => 1 } @targets;
3286            for my $simData (@{$simList}) {
3287                # Get the PEGs and the score.
3288                my ($peg1, $peg2, $score) = ($simData->id1, $simData->id2, $simData->psc);
3289                # Insure the second ID is in the target list.
3290                my ($genome2) = FIGRules::ParseFeatureID($peg2);
3291                if (exists $targetHash{$genome2}) {
3292                    # Here it is. Now we need to add it to the return hash. How we do that depends
3293                    # on whether or not $peg1 is new to us.
3294                    if (! exists $retVal{$peg1}) {
3295                        $retVal{$peg1} = { $peg2 => $score };
3296                    } else {
3297                        $retVal{$peg1}->{$peg2} = $score;
3298                    }
3299                }
3300            }
3301        }
3302        # Return the result.
3303        return %retVal;
3304    }
3305    
3306    
3307  =head3 LowBBHs  =head3 LowBBHs
3308    
3309  C<< my %bbhMap = $sprout->GoodBBHs($featureID, $cutoff); >>  C<< my %bbhMap = $sprout->LowBBHs($featureID, $cutoff); >>
3310    
3311  Return the bidirectional best hits of a feature whose score is no greater than a  Return the bidirectional best hits of a feature whose score is no greater than a
3312  specified cutoff value. A higher cutoff value will allow inclusion of hits with  specified cutoff value. A higher cutoff value will allow inclusion of hits with
# Line 3080  Line 3335 
3335      my ($self, $featureID, $cutoff) = @_;      my ($self, $featureID, $cutoff) = @_;
3336      # Create the return hash.      # Create the return hash.
3337      my %retVal = ();      my %retVal = ();
3338      # Create a query to get the desired BBHs.      # Query for the desired BBHs.
3339      my @bbhList = $self->GetAll(['IsBidirectionalBestHitOf'],      my @bbhList = FIGRules::BBHData($featureID, $cutoff);
                                 'IsBidirectionalBestHitOf(sc) <= ? AND IsBidirectionalBestHitOf(from-link) = ?',  
                                 [$cutoff, $featureID],  
                                 ['IsBidirectionalBestHitOf(to-link)', 'IsBidirectionalBestHitOf(sc)']);  
3340      # Form the results into the return hash.      # Form the results into the return hash.
3341      for my $pair (@bbhList) {      for my $pair (@bbhList) {
3342          $retVal{$pair->[0]} = $pair->[1];          my $fid = $pair->[0];
3343            if ($self->Exists('Feature', $fid)) {
3344                $retVal{$fid} = $pair->[1];
3345            }
3346      }      }
3347      # Return the result.      # Return the result.
3348      return %retVal;      return %retVal;
3349  }  }
3350    
3351    =head3 Sims
3352    
3353    C<< my $simList = $sprout->Sims($fid, $maxN, $maxP, $select, $max_expand, $filters); >>
3354    
3355    Get a list of similarities for a specified feature. Similarity information is not kept in the
3356    Sprout database; rather, they are retrieved from a network server. The similarities are
3357    returned as B<Sim> objects. A Sim object is actually a list reference that has been blessed
3358    so that its elements can be accessed by name.
3359    
3360    Similarities can be either raw or expanded. The raw similarities are basic
3361    hits between features with similar DNA. Expanding a raw similarity drags in any
3362    features considered substantially identical. So, for example, if features B<A1>,
3363    B<A2>, and B<A3> are all substantially identical to B<A>, then a raw similarity
3364    B<[C,A]> would be expanded to B<[C,A] [C,A1] [C,A2] [C,A3]>.
3365    
3366    =over 4
3367    
3368    =item fid
3369    
3370    ID of the feature whose similarities are desired.
3371    
3372    =item maxN
3373    
3374    Maximum number of similarities to return.
3375    
3376    =item maxP
3377    
3378    Minumum allowable similarity score.
3379    
3380    =item select
3381    
3382    Selection criterion: C<raw> means only raw similarities are returned; C<fig>
3383    means only similarities to FIG features are returned; C<all> means all expanded
3384    similarities are returned; and C<figx> means similarities are expanded until the
3385    number of FIG features equals the maximum.
3386    
3387    =item max_expand
3388    
3389    The maximum number of features to expand.
3390    
3391    =item filters
3392    
3393    Reference to a hash containing filter information, or a subroutine that can be
3394    used to filter the sims.
3395    
3396    =item RETURN
3397    
3398    Returns a reference to a list of similarity objects, or C<undef> if an error
3399    occurred.
3400    
3401    =back
3402    
3403    =cut
3404    
3405    sub Sims {
3406        # Get the parameters.
3407        my ($self, $fid, $maxN, $maxP, $select, $max_expand, $filters) = @_;
3408        # Create the shim object to test for deleted FIDs.
3409        my $shim = FidCheck->new($self);
3410        # Ask the network for sims.
3411        my $retVal = FIGRules::GetNetworkSims($shim, $fid, {}, $maxN, $maxP, $select, $max_expand, $filters);
3412        # Return the result.
3413        return $retVal;
3414    }
3415    
3416    =head3 IsAllGenomes
3417    
3418    C<< my $flag = $sprout->IsAllGenomes(\@list, \@checkList); >>
3419    
3420    Return TRUE if all genomes in the second list are represented in the first list at
3421    least one. Otherwise, return FALSE. If the second list is omitted, the first list is
3422    compared to a list of all the genomes.
3423    
3424    =over 4
3425    
3426    =item list
3427    
3428    Reference to the list to be compared to the second list.
3429    
3430    =item checkList (optional)
3431    
3432    Reference to the comparison target list. Every genome ID in this list must occur at
3433    least once in the first list. If this parameter is omitted, a list of all the genomes
3434    is used.
3435    
3436    =item RETURN
3437    
3438    Returns TRUE if every item in the second list appears at least once in the
3439    first list, else FALSE.
3440    
3441    =back
3442    
3443    =cut
3444    
3445    sub IsAllGenomes {
3446        # Get the parameters.
3447        my ($self, $list, $checkList) = @_;
3448        # Supply the checklist if it was omitted.
3449        $checkList = [$self->Genomes()] if ! defined($checkList);
3450        # Create a hash of the original list.
3451        my %testList = map { $_ => 1 } @{$list};
3452        # Declare the return variable. We assume that the representation
3453        # is complete and stop at the first failure.
3454        my $retVal = 1;
3455        my $n = scalar @{$checkList};
3456        for (my $i = 0; $retVal && $i < $n; $i++) {
3457            if (! $testList{$checkList->[$i]}) {
3458                $retVal = 0;
3459            }
3460        }
3461        # Return the result.
3462        return $retVal;
3463    }
3464    
3465  =head3 GetGroups  =head3 GetGroups
3466    
3467  C<< my %groups = $sprout->GetGroups(\@groupList); >>  C<< my %groups = $sprout->GetGroups(\@groupList); >>
# Line 3114  Line 3483 
3483          # Here we have a group list. Loop through them individually,          # Here we have a group list. Loop through them individually,
3484          # getting a list of the relevant genomes.          # getting a list of the relevant genomes.
3485          for my $group (@{$groupList}) {          for my $group (@{$groupList}) {
3486              my @genomeIDs = $self->GetFlat(['Genome'], "Genome(group-name) = ?",              my @genomeIDs = $self->GetFlat(['Genome'], "Genome(primary-group) = ?",
3487                  [$group], "Genome(id)");                  [$group], "Genome(id)");
3488              $retVal{$group} = \@genomeIDs;              $retVal{$group} = \@genomeIDs;
3489          }          }
# Line 3122  Line 3491 
3491          # Here we need all of the groups. In this case, we run through all          # Here we need all of the groups. In this case, we run through all
3492          # of the genome records, putting each one found into the appropriate          # of the genome records, putting each one found into the appropriate
3493          # group. Note that we use a filter clause to insure that only genomes          # group. Note that we use a filter clause to insure that only genomes
3494          # in groups are included in the return set.          # in real NMPDR groups are included in the return set.
3495          my @genomes = $self->GetAll(['Genome'], "Genome(group-name) > ' '", [],          my @genomes = $self->GetAll(['Genome'], "Genome(primary-group) <> ?",
3496                                      ['Genome(id)', 'Genome(group-name)']);                                      [$FIG_Config::otherGroup], ['Genome(id)', 'Genome(primary-group)']);
3497          # Loop through the genomes found.          # Loop through the genomes found.
3498          for my $genome (@genomes) {          for my $genome (@genomes) {
3499              # Pop this genome's ID off the current list.              # Pop this genome's ID off the current list.
# Line 3214  Line 3583 
3583      return $retVal;      return $retVal;
3584  }  }
3585    
3586  =head2 Internal Utility Methods  =head3 DeleteGenome
3587    
3588  =head3 ParseAssignment  C<< my $stats = $sprout->DeleteGenome($genomeID, $testFlag); >>
3589    
3590  Parse annotation text to determine whether or not it is a functional assignment. If it is,  Delete a genome from the database.
 the user, function text, and assigning user will be returned as a 3-element list. If it  
 isn't, an empty list will be returned.  
3591    
3592  A functional assignment is always of the form  =over 4
3593    
3594      I<XXXX>C<\nset >I<YYYY>C< function to\n>I<ZZZZZ>  =item genomeID
3595    
3596  where I<XXXX> is the B<assigning user>, I<YYYY> is the B<user>, and I<ZZZZ> is the  ID of the genome to delete
 actual functional role. In most cases, the user and the assigning user will be the  
 same, but that is not always the case.  
3597    
3598  This is a static method.  =item testFlag
3599    
3600    If TRUE, then the DELETE statements will be traced, but no deletions will occur.
3601    
3602    =item RETURN
3603    
3604    Returns a statistics object describing the rows deleted.
3605    
3606    =back
3607    
3608    =cut
3609    #: Return Type $%;
3610    sub DeleteGenome {
3611        # Get the parameters.
3612        my ($self, $genomeID, $testFlag) = @_;
3613        # Perform the delete for the genome's features.
3614        my $retVal = $self->Delete('Feature', "fig|$genomeID.%", testMode => $testFlag);
3615        # Perform the delete for the primary genome data.
3616        my $stats = $self->Delete('Genome', $genomeID, testMode => $testFlag);
3617        $retVal->Accumulate($stats);
3618        # Return the result.
3619        return $retVal;
3620    }
3621    
3622    =head3 Fix
3623    
3624    C<< my %fixedHash = Sprout::Fix(%groupHash); >>
3625    
3626    Prepare a genome group hash (like that returned by L</GetGroups> for processing.
3627    Groups with the same primary name will be combined. The primary name is the
3628    first capitalized word in the group name.
3629    
3630  =over 4  =over 4
3631    
3632  =item text  =item groupHash
3633    
3634  Text of the annotation.  Hash to be fixed up.
3635    
3636  =item RETURN  =item RETURN
3637    
3638  Returns an empty list if the annotation is not a functional assignment; otherwise, returns  Returns a fixed-up version of the hash.
 a two-element list containing the user name and the function text.  
3639    
3640  =back  =back
3641    
3642  =cut  =cut
3643    
3644  sub _ParseAssignment {  sub Fix {
3645      # Get the parameters.      # Get the parameters.
3646      my ($text) = @_;      my (%groupHash) = @_;
3647      # Declare the return value.      # Create the result hash.
3648      my @retVal = ();      my %retVal = ();
3649      # Check to see if this is a functional assignment.      # Copy over the genomes.
3650      my ($type, $function) = split(/\n/, $text);      for my $groupID (keys %groupHash) {
3651      if ($type =~ m/^set ([^ ]+) function to$/i) {          # Make a safety copy of the group ID.
3652          # Here it is, so we return the user name (which is in $1), the functional role text,          my $realGroupID = $groupID;
3653          # and the assigning user.          # Yank the primary name.
3654          @retVal = ($1, $function);          if ($groupID =~ /([A-Z]\w+)/) {
3655                $realGroupID = $1;
3656      }      }
3657      # Return the result list.          # Append this group's genomes into the result hash.
3658      return @retVal;          Tracer::AddToListMap(\%retVal, $realGroupID, @{$groupHash{$groupID}});
3659        }
3660        # Return the result hash.
3661        return %retVal;
3662  }  }
3663    
3664  =head3 FriendlyTimestamp  =head3 GroupPageName
3665    
3666  Convert a time number to a user-friendly time stamp for display.  C<< my $name = $sprout->GroupPageName($group); >>
3667    
3668  This is a static method.  Return the name of the page for the specified NMPDR group.
3669    
3670  =over 4  =over 4
3671    
3672  =item timeValue  =item group
3673    
3674  Numeric time value.  Name of the relevant group.
3675    
3676  =item RETURN  =item RETURN
3677    
3678  Returns a string containing the same time in user-readable format.  Returns the relative page name (e.g. C<../content/campy.php>). If the group file is not in
3679    memory it will be read in.
3680    
3681  =back  =back
3682    
3683  =cut  =cut
3684    
3685  sub FriendlyTimestamp {  sub GroupPageName {
3686      my ($timeValue) = @_;      # Get the parameters.
3687      my $retVal = localtime($timeValue);      my ($self, $group) = @_;
3688        # Declare the return variable.
3689        my $retVal;
3690        # Check for the group file data.
3691        if (! defined $self->{groupHash}) {
3692            # Read the group file.
3693            my %groupData = Sprout::ReadGroupFile($self->{_options}->{dataDir} . "/groups.tbl");
3694            # Store it in our object.
3695            $self->{groupHash} = \%groupData;
3696        }
3697        # Compute the real group name.
3698        my $realGroup = $group;
3699        if ($group =~ /([A-Z]\w+)/) {
3700            $realGroup = $1;
3701        }
3702        # Return the page name.
3703        $retVal = "../content/" . $self->{groupHash}->{$realGroup}->[1];
3704        # Return the result.
3705      return $retVal;      return $retVal;
3706  }  }
3707    
3708    =head3 ReadGroupFile
3709    
3710    C<< my %groupData = Sprout::ReadGroupFile($groupFileName); >>
3711    
3712    Read in the data from the specified group file. The group file contains information
3713    about each of the NMPDR groups.
3714    
3715    =over 4
3716    
3717    =item name
3718    
3719    Name of the group.
3720    
3721    =item page
3722    
3723    Name of the group's page on the web site (e.g. C<campy.php> for
3724    Campylobacter)
3725    
3726    =item genus
3727    
3728    Genus of the group
3729    
3730    =item species
3731    
3732    Species of the group, or an empty string if the group is for an entire
3733    genus. If the group contains more than one species, the species names
3734    should be separated by commas.
3735    
3736    =back
3737    
3738    The parameters to this method are as follows
3739    
3740    =over 4
3741    
3742    =item groupFile
3743    
3744    Name of the file containing the group data.
3745    
3746    =item RETURN
3747    
3748    Returns a hash keyed on group name. The value of each hash
3749    
3750    =back
3751    
3752    =cut
3753    
3754    sub ReadGroupFile {
3755        # Get the parameters.
3756        my ($groupFileName) = @_;
3757        # Declare the return variable.
3758        my %retVal;
3759        # Read the group file.
3760        my @groupLines = Tracer::GetFile($groupFileName);
3761        for my $groupLine (@groupLines) {
3762            my ($name, $page, $genus, $species) = split(/\t/, $groupLine);
3763            $retVal{$name} = [$page, $genus, $species];
3764        }
3765        # Return the result.
3766        return %retVal;
3767    }
3768    
3769  =head3 AddProperty  =head3 AddProperty
3770    
3771  C<< my  = $sprout->AddProperty($featureID, $key, $value, $url); >>  C<< my  = $sprout->AddProperty($featureID, $key, $value, $url); >>
# Line 3333  Line 3810 
3810      if (@properties) {      if (@properties) {
3811          # Here the property is already in the database. We save its ID.          # Here the property is already in the database. We save its ID.
3812          $propID = $properties[0];          $propID = $properties[0];
3813        } else {
3814          # Here the property value does not exist. We need to generate an ID. It will be set          # Here the property value does not exist. We need to generate an ID. It will be set
3815          # to a number one greater than the maximum value in the database. This call to          # to a number one greater than the maximum value in the database. This call to
3816          # GetAll will stop after one record.          # GetAll will stop after one record.
# Line 3346  Line 3824 
3824      $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });      $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });
3825  }  }
3826    
3827    =head2 Virtual Methods
3828    
3829    =head3 CleanKeywords
3830    
3831    C<< my $cleanedString = $sprout->CleanKeywords($searchExpression); >>
3832    
3833    Clean up a search expression or keyword list. This involves converting the periods
3834    in EC numbers to underscores, converting non-leading minus signs to underscores,
3835    a vertical bar or colon to an apostrophe, and forcing lower case for all alphabetic
3836    characters. In addition, any extra spaces are removed.
3837    
3838    =over 4
3839    
3840    =item searchExpression
3841    
3842    Search expression or keyword list to clean. Note that a search expression may
3843    contain boolean operators which need to be preserved. This includes leading
3844    minus signs.
3845    
3846    =item RETURN
3847    
3848    Cleaned expression or keyword list.
3849    
3850    =back
3851    
3852    =cut
3853    
3854    sub CleanKeywords {
3855        # Get the parameters.
3856        my ($self, $searchExpression) = @_;
3857        # Perform the standard cleanup.
3858        my $retVal = $self->ERDB::CleanKeywords($searchExpression);
3859        # Fix the periods in EC and TC numbers.
3860        $retVal =~ s/(\d+|\-)\.(\d+|-)\.(\d+|-)\.(\d+|-)/$1_$2_$3_$4/g;
3861        # Fix non-trailing periods.
3862        $retVal =~ s/\.(\w)/_$1/g;
3863        # Fix non-leading minus signs.
3864        $retVal =~ s/(\w)[\-]/$1_/g;
3865        # Fix the vertical bars and colons
3866        $retVal =~ s/(\w)[|:](\w)/$1'$2/g;
3867        # Return the result.
3868        return $retVal;
3869    }
3870    
3871    =head2 Internal Utility Methods
3872    
3873    =head3 ParseAssignment
3874    
3875    Parse annotation text to determine whether or not it is a functional assignment. If it is,
3876    the user, function text, and assigning user will be returned as a 3-element list. If it
3877    isn't, an empty list will be returned.
3878    
3879    A functional assignment is always of the form
3880    
3881        C<set >I<YYYY>C< function to\n>I<ZZZZZ>
3882    
3883    where I<YYYY> is the B<user>, and I<ZZZZ> is the actual functional role. In most cases,
3884    the user and the assigning user (from MadeAnnotation) will be the same, but that is
3885    not always the case.
3886    
3887    In addition, the functional role may contain extra data that is stripped, such as
3888    terminating spaces or a comment separated from the rest of the text by a tab.
3889    
3890    This is a static method.
3891    
3892    =over 4
3893    
3894    =item user
3895    
3896    Name of the assigning user.
3897    
3898    =item text
3899    
3900    Text of the annotation.
3901    
3902    =item RETURN
3903    
3904    Returns an empty list if the annotation is not a functional assignment; otherwise, returns
3905    a two-element list containing the user name and the function text.
3906    
3907    =back
3908    
3909    =cut
3910    
3911    sub _ParseAssignment {
3912        # Get the parameters.
3913        my ($user, $text) = @_;
3914        # Declare the return value.
3915        my @retVal = ();
3916        # Check to see if this is a functional assignment.
3917        my ($type, $function) = split(/\n/, $text);
3918        if ($type =~ m/^set function to$/i) {
3919            # Here we have an assignment without a user, so we use the incoming user ID.
3920            @retVal = ($user, $function);
3921        } elsif ($type =~ m/^set (\S+) function to$/i) {
3922            # Here we have an assignment with a user that is passed back to the caller.
3923            @retVal = ($1, $function);
3924        }
3925        # If we have an assignment, we need to clean the function text. There may be
3926        # extra junk at the end added as a note from the user.
3927        if (defined( $retVal[1] )) {
3928            $retVal[1] =~ s/(\t\S)?\s*$//;
3929        }
3930        # Return the result list.
3931        return @retVal;
3932    }
3933    
3934    =head3 FriendlyTimestamp
3935    
3936    Convert a time number to a user-friendly time stamp for display.
3937    
3938    This is a static method.
3939    
3940    =over 4
3941    
3942    =item timeValue
3943    
3944    Numeric time value.
3945    
3946    =item RETURN
3947    
3948    Returns a string containing the same time in user-readable format.
3949    
3950    =back
3951    
3952    =cut
3953    
3954    sub FriendlyTimestamp {
3955        my ($timeValue) = @_;
3956        my $retVal = localtime($timeValue);
3957        return $retVal;
3958    }
3959    
3960    
3961  1;  1;

Legend:
Removed from v.1.46  
changed lines
  Added in v.1.98

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3