[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4, Tue Jan 25 01:36:09 2005 UTC revision 1.98, Tue Apr 10 06:13:33 2007 UTC
# Line 1  Line 1 
1  package Sprout;  package Sprout;
2    
3        require Exporter;
4        use ERDB;
5        @ISA = qw(Exporter ERDB);
6          use Data::Dumper;          use Data::Dumper;
7          use strict;          use strict;
         use Carp;  
8          use DBKernel;          use DBKernel;
9          use XML::Simple;          use XML::Simple;
10          use DBQuery;          use DBQuery;
11          use DBObject;      use ERDBObject;
         use ERDB;  
12          use Tracer;          use Tracer;
13          use FIGRules;          use FIGRules;
14        use FidCheck;
15          use Stats;          use Stats;
16      use POSIX qw(strftime);      use POSIX qw(strftime);
17        use BasicLocation;
18    
19  =head1 Sprout Database Manipulation Object  =head1 Sprout Database Manipulation Object
20    
# Line 32  Line 34 
34  query tasks. For example, L</genomes> lists the IDs of all the genomes in the database and  query tasks. For example, L</genomes> lists the IDs of all the genomes in the database and
35  L</dna_seq> returns the DNA sequence for a specified genome location.  L</dna_seq> returns the DNA sequence for a specified genome location.
36    
37    The Sprout object is a subclass of the ERDB object and inherits all its properties and methods.
38    
39  =cut  =cut
40    
41  #: Constructor SFXlate->new_sprout_only();  #: Constructor SFXlate->new_sprout_only();
42    
   
43  =head2 Public Methods  =head2 Public Methods
44    
45  =head3 new  =head3 new
# Line 63  Line 66 
66    
67  * B<xmlFileName> name of the XML file containing the database definition (default C<SproutDBD.xml>)  * B<xmlFileName> name of the XML file containing the database definition (default C<SproutDBD.xml>)
68    
69  * B<userData> user name and password, delimited by a slash (default C<root/>)  * B<userData> user name and password, delimited by a slash (default same as SEED)
70    
71  * B<port> connection port (default C<0>)  * B<port> connection port (default C<0>)
72    
73    * B<sock> connection socket (default same as SEED)
74    
75  * B<maxSegmentLength> maximum number of residues per feature segment, (default C<4500>)  * B<maxSegmentLength> maximum number of residues per feature segment, (default C<4500>)
76    
77  * B<maxSequenceLength> maximum number of residues per sequence, (default C<8000>)  * B<maxSequenceLength> maximum number of residues per sequence, (default C<8000>)
78    
79    * B<noDBOpen> suppresses the connection to the database if TRUE, else FALSE
80    
81  =back  =back
82    
83  For example, the following constructor call specifies a database named I<Sprout> and a user name of  For example, the following constructor call specifies a database named I<Sprout> and a user name of
# Line 84  Line 91 
91  sub new {  sub new {
92          # Get the parameters.          # Get the parameters.
93          my ($class, $dbName, $options) = @_;          my ($class, $dbName, $options) = @_;
94        # Compute the DBD directory.
95        my $dbd_dir = (defined($FIG_Config::dbd_dir) ? $FIG_Config::dbd_dir :
96                                                      $FIG_Config::fig );
97          # Compute the options. We do this by starting with a table of defaults and overwriting with          # Compute the options. We do this by starting with a table of defaults and overwriting with
98          # the incoming data.          # the incoming data.
99          my $optionTable = Tracer::GetOptions({          my $optionTable = Tracer::GetOptions({
100                                             dbType               => 'mysql',                     # database type                         dbType       => $FIG_Config::dbms,
101                                             dataDir              => 'Data',                      # data file directory                                                          # database type
102                                             xmlFileName  => 'SproutDBD.xml', # database definition file name                         dataDir      => $FIG_Config::sproutData,
103                                             userData             => 'root/',                     # user name and password                                                          # data file directory
104                                             port                 => 0,                           # database connection port                         xmlFileName  => "$dbd_dir/SproutDBD.xml",
105                                                            # database definition file name
106                           userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",
107                                                            # user name and password
108                           port         => $FIG_Config::dbport,
109                                                            # database connection port
110                           sock         => $FIG_Config::dbsock,
111                           host         => $FIG_Config::dbhost,
112                                             maxSegmentLength => 4500,            # maximum feature segment length                                             maxSegmentLength => 4500,            # maximum feature segment length
113                                             maxSequenceLength => 8000,           # maximum contig sequence length                                             maxSequenceLength => 8000,           # maximum contig sequence length
114                           noDBOpen     => 0,               # 1 to suppress the database open
115                                            }, $options);                                            }, $options);
116          # Get the data directory.          # Get the data directory.
117          my $dataDir = $optionTable->{dataDir};          my $dataDir = $optionTable->{dataDir};
# Line 101  Line 119 
119          $optionTable->{userData} =~ m!([^/]*)/(.*)$!;          $optionTable->{userData} =~ m!([^/]*)/(.*)$!;
120          my ($userName, $password) = ($1, $2);          my ($userName, $password) = ($1, $2);
121          # Connect to the database.          # Connect to the database.
122          my $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName, $password, $optionTable->{port});      my $dbh;
123        if (! $optionTable->{noDBOpen}) {
124            $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,
125                                    $password, $optionTable->{port}, $optionTable->{host}, $optionTable->{sock});
126        }
127          # Create the ERDB object.          # Create the ERDB object.
128          my $xmlFileName = "$optionTable->{xmlFileName}";          my $xmlFileName = "$optionTable->{xmlFileName}";
129          my $erdb = ERDB->new($dbh, $xmlFileName);      my $retVal = ERDB::new($class, $dbh, $xmlFileName);
130          # Create this object.      # Add the option table and XML file name.
131          my $self = { _erdb => $erdb, _options => $optionTable, _xmlName => $xmlFileName };      $retVal->{_options} = $optionTable;
132          # Bless and return it.      $retVal->{_xmlName} = $xmlFileName;
133          bless $self;      # Set up space for the group file data.
134          return $self;      $retVal->{groupHash} = undef;
135        # Return it.
136        return $retVal;
137  }  }
138    
139  =head3 MaxSegment  =head3 MaxSegment
# Line 125  Line 149 
149  =cut  =cut
150  #: Return Type $;  #: Return Type $;
151  sub MaxSegment {  sub MaxSegment {
152          my $self = shift @_;      my ($self) = @_;
153          return $self->{_options}->{maxSegmentLength};          return $self->{_options}->{maxSegmentLength};
154  }  }
155    
# Line 140  Line 164 
164  =cut  =cut
165  #: Return Type $;  #: Return Type $;
166  sub MaxSequence {  sub MaxSequence {
167          my $self = shift @_;      my ($self) = @_;
168          return $self->{_options}->{maxSequenceLength};          return $self->{_options}->{maxSequenceLength};
169  }  }
170    
171  =head3 Get  =head3 Load
   
 C<< my $query = $sprout->Get(\@objectNames, $filterClause, \@parameterList); >>  
   
 This method allows a general query against the Sprout data using a specified filter clause.  
   
 The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each  
 field name represented in the form B<I<objectName>(I<fieldName>)>. For example, the  
 following call requests all B<Genome> objects for the genus specified in the variable  
 $genus.  
   
 C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ?", [$genus]); >>  
   
 The WHERE clause contains a single question mark, so there is a single additional  
 parameter representing the parameter value. It would also be possible to code  
   
 C<< $query = $sprout->Get(['Genome'], "Genome(genus) = \'$genus\'"); >>  
   
 however, this version of the call would generate a syntax error if there were any quote  
 characters inside the variable C<$genus>.  
   
 The use of the strange parenthesized notation for field names enables us to distinguish  
 hyphens contained within field names from minus signs that participate in the computation  
 of the WHERE clause. All of the methods that manipulate fields will use this same notation.  
   
 It is possible to specify multiple entity and relationship names in order to retrieve more than  
 one object's data at the same time, which allows highly complex joined queries. For example,  
   
 C<< $query = $sprout->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", [$genus]); >>  
   
 This query returns all the genomes for a particular genus and allows access to the  
 sources from which they came. The join clauses to go from Genome to Source are generated  
 automatically.  
   
 Finally, the filter clause can contain sort information. To do this, simply put an C<ORDER BY>  
 clause at the end of the filter. Field references in the ORDER BY section follow the same rules  
 as they do in the filter itself; in other words, each one must be of the form B<I<objectName>(I<fieldName>)>.  
 For example, the following filter string gets all genomes for a particular genus and sorts  
 them by species name.  
   
 C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ? ORDER BY Genome(species)", [$genus]); >>  
172    
173  It is also permissible to specify I<only> an ORDER BY clause. For example, the following invocation gets  C<< $sprout->Load($rebuild); >>;
 all genomes ordered by genus and species.  
174    
175  C<< $query = $sprout->Get(['Genome'], "ORDER BY Genome(genus), Genome(species)"); >>  Load the database from files in the data directory, optionally re-creating the tables.
176    
177  Odd things may happen if one of the ORDER BY fields is in a secondary relation. So, for example, an  This method always deletes the data from the database before loading, even if the tables are not
178  attempt to order B<Feature>s by alias may (depending on the underlying database engine used) cause  re-created. The data is loaded into the relations from files in the data directory either having the
179  a single feature to appear more than once.  same name as the target relation with no extension or with an extension of C<.dtx>. Files without an
180    extension are used in preference to the files with an extension.
181    
182  If multiple names are specified, then the query processor will automatically determine a  The files are loaded based on the presumption that each line of the file is a record in the
183  join path between the entities and relationships. The algorithm used is very simplistic.  relation, and the individual fields are delimited by tabs. Tab and new-line characters inside
184  In particular, you can't specify any entity or relationship more than once, and if a  fields must be represented by the escape sequences C<\t> and C<\n>, respectively. The fields must
185  relationship is recursive, the path is determined by the order in which the entity  be presented in the order given in the relation tables produced by the L</ShowMetaData> method.
 and the relationship appear. For example, consider a recursive relationship B<IsParentOf>  
 which relates B<People> objects to other B<People> objects. If the join path is  
 coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,  
 the join path is C<['IsParentOf', 'People']>, then the people returned will be children.  
186    
187  =over 4  =over 4
188    
189  =item objectNames  =item rebuild
   
 List containing the names of the entity and relationship objects to be retrieved.  
   
 =item filterClause  
   
 WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can  
 be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form  
 B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the  
 parameter list as additional parameters. The fields in a filter clause can come from primary  
 entity relations, relationship relations, or secondary entity relations; however, all of the  
 entities and relationships involved must be included in the list of object names.  
   
 =item parameterList  
190    
191  List of the parameters to be substituted in for the parameters marks in the filter clause.  TRUE if the data tables need to be created or re-created, else FALSE
192    
193  =item RETURN  =item RETURN
194    
195  Returns a B<DBQuery> that can be used to iterate through all of the results.  Returns a statistical object containing the number of records read, the number of duplicates found,
196    the number of errors, and a list of the error messages.
197    
198  =back  =back
199    
200  =cut  =cut
201    #: Return Type %;
202  sub Get {  sub Load {
203          # Get the parameters.          # Get the parameters.
204          my $self = shift @_;      my ($self, $rebuild) = @_;
205          my ($objectNames, $filterClause, $parameterList) = @_;      # Load the tables from the data directory.
206          # We differ from the ERDB Get method in that the parameter list is passed in as a list reference      my $retVal = $self->LoadTables($self->{_options}->{dataDir}, $rebuild);
207          # rather than a list of parameters. The next step is to convert the parameters from a reference      # Return the statistics.
208          # to a real list. We can only do this if the parameters have been specified.      return $retVal;
         my @parameters;  
         if ($parameterList) { @parameters = @{$parameterList}; }  
         return $self->{_erdb}->Get($objectNames, $filterClause, @parameters);  
209  }  }
210    
211  =head3 GetEntity  =head3 LoadUpdate
212    
213  C<< my $entityObject = $sprout->GetEntity($entityType, $ID); >>  C<< my $stats = $sprout->LoadUpdate($truncateFlag, \@tableList); >>
214    
215  Return an object describing the entity instance with a specified ID.  Load updates to one or more database tables. This method enables the client to make changes to one
216    or two tables without reloading the whole database. For each table, there must be a corresponding
217    file in the data directory, either with the same name as the table, or with a C<.dtx> suffix. So,
218    for example, to make updates to the B<FeatureTranslation> relation, there must be a
219    C<FeatureTranslation.dtx> file in the data directory. Unlike a full load, files without an extension
220    are not examined. This allows update files to co-exist with files from an original load.
221    
222  =over 4  =over 4
223    
224  =item entityType  =item truncateFlag
225    
226  Entity type name.  TRUE if the tables should be rebuilt before loading, else FALSE. A value of TRUE therefore causes
227    current data and schema of the tables to be replaced, while a value of FALSE means the new data
228    is added to the existing data in the various relations.
229    
230  =item ID  =item tableList
231    
232  ID of the desired entity.  List of the tables to be updated.
233    
234  =item RETURN  =item RETURN
235    
236  Returns a B<DBObject> representing the desired entity instance, or an undefined value if no  Returns a statistical object containing the number of records read, the number of duplicates found,
237  instance is found with the specified key.  the number of errors encountered, and a list of error messages.
238    
239  =back  =back
240    
241  =cut  =cut
242    #: Return Type $%;
243  sub GetEntity {  sub LoadUpdate {
244          # Get the parameters.          # Get the parameters.
245          my $self = shift @_;      my ($self, $truncateFlag, $tableList) = @_;
246          my ($entityType, $ID) = @_;      # Declare the return value.
247          # Create a query.      my $retVal = Stats->new();
248          my $query = $self->Get([$entityType], "$entityType(id) = ?", [$ID]);      # Get the data directory.
249          # Get the first (and only) object.      my $optionTable = $self->{_options};
250          my $retVal = $query->Fetch();      my $dataDir = $optionTable->{dataDir};
251          # Return the result.      # Loop through the incoming table names.
252        for my $tableName (@{$tableList}) {
253            # Find the table's file.
254            my $fileName = LoadFileName($dataDir, $tableName);
255            if (! $fileName) {
256                Trace("No load file found for $tableName in $dataDir.") if T(0);
257            } else {
258                # Attempt to load this table.
259                my $result = $self->LoadTable($fileName, $tableName, $truncateFlag);
260                # Accumulate the resulting statistics.
261                $retVal->Accumulate($result);
262            }
263        }
264        # Return the statistics.
265          return $retVal;          return $retVal;
266  }  }
267    
268  =head3 GetEntityValues  =head3 GenomeCounts
269    
270  C<< my @values = GetEntityValues($entityType, $ID, \@fields); >>  C<< my ($arch, $bact, $euk, $vir, $env, $unk) = $sprout->GenomeCounts($complete); >>
271    
272  Return a list of values from a specified entity instance.  Count the number of genomes in each domain. If I<$complete> is TRUE, only complete
273    genomes will be included in the counts.
274    
275  =over 4  =over 4
276    
277  =item entityType  =item complete
278    
279  Entity type name.  TRUE if only complete genomes are to be counted, FALSE if all genomes are to be
280    counted
 =item ID  
   
 ID of the desired entity.  
   
 =item fields  
   
 List of field names, each of the form I<objectName>C<(>I<fieldName>C<)>.  
281    
282  =item RETURN  =item RETURN
283    
284  Returns a flattened list of the values of the specified fields for the specified entity.  A six-element list containing the number of genomes in each of six categories--
285    Archaea, Bacteria, Eukaryota, Viral, Environmental, and Unknown, respectively.
286    
287  =back  =back
288    
289  =cut  =cut
290  #: Return Type @;  
291  sub GetEntityValues {  sub GenomeCounts {
292          # Get the parameters.          # Get the parameters.
293          my $self = shift @_;      my ($self, $complete) = @_;
294          my ($entityType, $ID, $fields) = @_;      # Set the filter based on the completeness flag.
295          # Get the specified entity.      my $filter = ($complete ? "Genome(complete) = 1" : "");
296          my $entity = $self->GetEntity($entityType, $ID);      # Get all the genomes and the related taxonomy information.
297          # Declare the return list.      my @genomes = $self->GetAll(['Genome'], $filter, [], ['Genome(id)', 'Genome(taxonomy)']);
298          my @retVal = ();      # Clear the counters.
299          # If we found the entity, push the values into the return list.      my ($arch, $bact, $euk, $vir, $env, $unk) = (0, 0, 0, 0, 0, 0);
300          if ($entity) {      # Loop through, counting the domains.
301                  push @retVal, $entity->Values($fields);      for my $genome (@genomes) {
302            if    ($genome->[1] =~ /^archaea/i)  { ++$arch }
303            elsif ($genome->[1] =~ /^bacter/i)   { ++$bact }
304            elsif ($genome->[1] =~ /^eukar/i)    { ++$euk }
305            elsif ($genome->[1] =~ /^vir/i)      { ++$vir }
306            elsif ($genome->[1] =~ /^env/i)      { ++$env }
307            else  { ++$unk }
308          }          }
309          # Return the result.      # Return the counts.
310          return @retVal;      return ($arch, $bact, $euk, $vir, $env, $unk);
311  }  }
312    
313  =head3 ShowMetaData  =head3 ContigCount
314    
315  C<< $sprout->ShowMetaData($fileName); >>  C<< my $count = $sprout->ContigCount($genomeID); >>
316    
317  This method outputs a description of the database to an HTML file in the data directory.  Return the number of contigs for the specified genome ID.
318    
319  =over 4  =over 4
320    
321  =item fileName  =item genomeID
322    
323    ID of the genome whose contig count is desired.
324    
325  Fully-qualified name to give to the output file.  =item RETURN
326    
327    Returns the number of contigs for the specified genome.
328    
329  =back  =back
330    
331  =cut  =cut
332    
333  sub ShowMetaData {  sub ContigCount {
334          # Get the parameters.          # Get the parameters.
335          my $self = shift @_;      my ($self, $genomeID) = @_;
336          my ($fileName) = @_;      # Get the contig count.
337          # Compute the file name.      my $retVal = $self->GetCount(['Contig', 'HasContig'], "HasContig(from-link) = ?", [$genomeID]);
338          my $options = $self->{_options};      # Return the result.
339          # Call the show method on the underlying ERDB object.      return $retVal;
         $self->{_erdb}->ShowMetaData($fileName);  
340  }  }
341    
342  =head3 Load  =head3 GeneMenu
   
 C<< $sprout->Load($rebuild); >>;  
   
 Load the database from files in the data directory, optionally re-creating the tables.  
343    
344  This method always deletes the data from the database before loading, even if the tables are not  C<< my $selectHtml = $sprout->GeneMenu(\%attributes, $filterString, \@params, $selected); >>
 re-created. The data is loaded into the relations from files in the data directory either having the  
 same name as the target relation with no extension or with an extension of C<.dtx>. Files without an  
 extension are used in preference to the files with an extension.  
345    
346  The files are loaded based on the presumption that each line of the file is a record in the  Return an HTML select menu of genomes. Each genome will be an option in the menu,
347  relation, and the individual fields are delimited by tabs. Tab and new-line characters inside  and will be displayed by name with the ID and a contig count attached. The selection
348  fields must be represented by the escape sequences C<\t> and C<\n>, respectively. The fields must  value will be the genome ID. The genomes will be sorted by genus/species name.
 be presented in the order given in the relation tables produced by the L</ShowMetaData> method.  
349    
350  =over 4  =over 4
351    
352  =item rebuild  =item attributes
   
 TRUE if the data tables need to be created or re-created, else FALSE  
   
 =item RETURN  
   
 Returns a statistical object containing the number of records read, the number of duplicates found,  
 the number of errors, and a list of the error messages.  
   
 =back  
353    
354  =cut  Reference to a hash mapping attributes to values for the SELECT tag generated.
 #: Return Type %;  
 sub Load {  
         # Get the parameters.  
         my $self = shift @_;  
         my ($rebuild) = @_;  
         # Get the database object.  
         my $erdb = $self->{_erdb};  
         # Load the tables from the data directory.  
         my $retVal = $erdb->LoadTables($self->{_options}->{dataDir}, $rebuild);  
         # Return the statistics.  
         return $retVal;  
 }  
355    
356  =head3 LoadUpdate  =item filterString
357    
358  C<< my %stats = $sprout->LoadUpdate($truncateFlag, \@tableList); >>  A filter string for use in selecting the genomes. The filter string must conform
359    to the rules for the C<< ERDB->Get >> method.
360    
361  Load updates to one or more database tables. This method enables the client to make changes to one  =item params
 or two tables without reloading the whole database. For each table, there must be a corresponding  
 file in the data directory, either with the same name as the table, or with a C<.dtx> suffix. So,  
 for example, to make updates to the B<FeatureTranslation> relation, there must be a  
 C<FeatureTranslation.dtx> file in the data directory. Unlike a full load, files without an extension  
 are not examined. This allows update files to co-exist with files from an original load.  
362    
363  =over 4  Reference to a list of values to be substituted in for the parameter marks in
364    the filter string.
365    
366  =item truncateFlag  =item selected (optional)
367    
368  TRUE if the tables should be rebuilt before loading, else FALSE. A value of TRUE therefore causes  ID of the genome to be initially selected.
 current data and schema of the tables to be replaced, while a value of FALSE means the new data  
 is added to the existing data in the various relations.  
369    
370  =item tableList  =item fast (optional)
371    
372  List of the tables to be updated.  If specified and TRUE, the contig counts will be omitted to improve performance.
373    
374  =item RETURN  =item RETURN
375    
376  Returns a statistical object containing the number of records read, the number of duplicates found,  Returns an HTML select menu with the specified genomes as selectable options.
 the number of errors encountered, and a list of error messages.  
377    
378  =back  =back
379    
380  =cut  =cut
381  #: Return Type %;  
382  sub LoadUpdate {  sub GeneMenu {
383          # Get the parameters.          # Get the parameters.
384          my $self = shift @_;      my ($self, $attributes, $filterString, $params, $selected, $fast) = @_;
385          my ($truncateFlag, $tableList) = @_;      my $slowMode = ! $fast;
386          # Get the database object.      # Default to nothing selected. This prevents an execution warning if "$selected"
387          my $erdb = $self->{_erdb};      # is undefined.
388          # Declare the return value.      $selected = "" unless defined $selected;
389          my $retVal = Stats->new();      Trace("Gene Menu called with slow mode \"$slowMode\" and selection \"$selected\".") if T(3);
390          # Get the data directory.      # Start the menu.
391          my $optionTable = $self->{_options};      my $retVal = "<select " .
392          my $dataDir = $optionTable->{dataDir};          join(" ", map { "$_=\"$attributes->{$_}\"" } keys %{$attributes}) .
393          # Loop through the incoming table names.          ">\n";
394          for my $tableName (@{$tableList}) {      # Get the genomes.
395                  # Find the table's file.      my @genomes = $self->GetAll(['Genome'], $filterString, $params, ['Genome(id)',
396                  my $fileName = "$dataDir/$tableName";                                                                       'Genome(genus)',
397                  if (! -e $fileName) {                                                                       'Genome(species)',
398                          $fileName = "$fileName.dtx";                                                                       'Genome(unique-characterization)']);
399                  }      # Sort them by name.
400                  # Attempt to load this table.      my @sorted = sort { lc("$a->[1] $a->[2]") cmp lc("$b->[1] $b->[2]") } @genomes;
401                  my $result = $erdb->LoadTable($fileName, $tableName, $truncateFlag);      # Loop through the genomes, creating the option tags.
402                  # Accumulate the resulting statistics.      for my $genomeData (@sorted) {
403                  $retVal->Accumulate($result);          # Get the data for this genome.
404            my ($genomeID, $genus, $species, $strain) = @{$genomeData};
405            # Get the contig count.
406            my $contigInfo = "";
407            if ($slowMode) {
408                my $count = $self->ContigCount($genomeID);
409                my $counting = ($count == 1 ? "contig" : "contigs");
410                $contigInfo = "[$count $counting]";
411            }
412            # Find out if we're selected.
413            my $selectOption = ($selected eq $genomeID ? " selected" : "");
414            # Build the option tag.
415            $retVal .= "<option value=\"$genomeID\"$selectOption>$genus $species $strain ($genomeID)$contigInfo</option>\n";
416          }          }
417          # Return the statistics.      # Close the SELECT tag.
418        $retVal .= "</select>\n";
419        # Return the result.
420          return $retVal;          return $retVal;
421  }  }
422    
# Line 464  Line 432 
432  #: Return Type ;  #: Return Type ;
433  sub Build {  sub Build {
434          # Get the parameters.          # Get the parameters.
435          my $self = shift @_;      my ($self) = @_;
436          # Create the tables.          # Create the tables.
437          $self->{_erdb}->CreateTables;      $self->CreateTables();
438  }  }
439    
440  =head3 Genomes  =head3 Genomes
# Line 479  Line 447 
447  #: Return Type @;  #: Return Type @;
448  sub Genomes {  sub Genomes {
449          # Get the parameters.          # Get the parameters.
450          my $self = shift @_;      my ($self) = @_;
451          # Get all the genomes.          # Get all the genomes.
452          my @retVal = $self->GetFlat(['Genome'], "", [], 'Genome(id)');          my @retVal = $self->GetFlat(['Genome'], "", [], 'Genome(id)');
453          # Return the list of IDs.          # Return the list of IDs.
# Line 509  Line 477 
477  #: Return Type $;  #: Return Type $;
478  sub GenusSpecies {  sub GenusSpecies {
479          # Get the parameters.          # Get the parameters.
480          my $self = shift @_;      my ($self, $genomeID) = @_;
         my ($genomeID) = @_;  
481          # Get the data for the specified genome.          # Get the data for the specified genome.
482          my @values = $self->GetEntityValues('Genome', $genomeID, ['Genome(genus)', 'Genome(species)',          my @values = $self->GetEntityValues('Genome', $genomeID, ['Genome(genus)', 'Genome(species)',
483                                                                                                                            'Genome(unique-characterization)']);                                                                                                                            'Genome(unique-characterization)']);
# Line 546  Line 513 
513  #: Return Type @;  #: Return Type @;
514  sub FeaturesOf {  sub FeaturesOf {
515          # Get the parameters.          # Get the parameters.
516          my $self = shift @_;      my ($self, $genomeID,$ftype) = @_;
         my ($genomeID,$ftype) = @_;  
517          # Get the features we want.          # Get the features we want.
518          my @features;          my @features;
519          if (!$ftype) {          if (!$ftype) {
# Line 591  Line 557 
557  =item RETURN  =item RETURN
558    
559  Returns a list of the feature's contig segments. The locations are returned as a list in a list  Returns a list of the feature's contig segments. The locations are returned as a list in a list
560  context and as a space-delimited string in a scalar context.  context and as a comma-delimited string in a scalar context.
561    
562  =back  =back
563    
# Line 600  Line 566 
566  #: Return Type $;  #: Return Type $;
567  sub FeatureLocation {  sub FeatureLocation {
568          # Get the parameters.          # Get the parameters.
569          my $self = shift @_;      my ($self, $featureID) = @_;
         my ($featureID) = @_;  
570          # Create a query for the feature locations.          # Create a query for the feature locations.
571          my $query = $self->Get(['IsLocatedIn'], "IsLocatedIn(from-link) = ? ORDER BY IsLocatedIn(locN)",          my $query = $self->Get(['IsLocatedIn'], "IsLocatedIn(from-link) = ? ORDER BY IsLocatedIn(locN)",
572                                                     [$featureID]);                                                     [$featureID]);
# Line 619  Line 584 
584                  if ($prevContig eq $contigID && $dir eq $prevDir) {                  if ($prevContig eq $contigID && $dir eq $prevDir) {
585                          # Here the new segment is in the same direction on the same contig. Insure the                          # Here the new segment is in the same direction on the same contig. Insure the
586                          # new segment's beginning is next to the old segment's end.                          # new segment's beginning is next to the old segment's end.
587                          if (($dir eq "-" && $beg == $prevBeg - $prevLen) ||              if ($dir eq "-" && $beg + $len == $prevBeg) {
588                                  ($dir eq "+" && $beg == $prevBeg + $prevLen)) {                  # Here we're merging two backward blocks, so we keep the new begin point
589                                  # Here we need to merge two segments. Adjust the beginning and length values                  # and adjust the length.
590                                  # to include both segments.                  $len += $prevLen;
591                    # Pop the old segment off. The new one will replace it later.
592                    pop @retVal;
593                } elsif ($dir eq "+" && $beg == $prevBeg + $prevLen) {
594                    # Here we need to merge two forward blocks. Adjust the beginning and
595                    # length values to include both segments.
596                                  $beg = $prevBeg;                                  $beg = $prevBeg;
597                                  $len += $prevLen;                                  $len += $prevLen;
598                                  # Pop the old segment off. The new one will replace it later.                                  # Pop the old segment off. The new one will replace it later.
# Line 631  Line 601 
601                  }                  }
602                  # Remember this specifier for the adjacent-segment test the next time through.                  # Remember this specifier for the adjacent-segment test the next time through.
603                  ($prevContig, $prevBeg, $prevDir, $prevLen) = ($contigID, $beg, $dir, $len);                  ($prevContig, $prevBeg, $prevDir, $prevLen) = ($contigID, $beg, $dir, $len);
604            # Compute the initial base pair.
605            my $start = ($dir eq "+" ? $beg : $beg + $len - 1);
606                  # Add the specifier to the list.                  # Add the specifier to the list.
607                  push @retVal, "${contigID}_$beg$dir$len";          push @retVal, "${contigID}_$start$dir$len";
608          }          }
609          # Return the list in the format indicated by the context.          # Return the list in the format indicated by the context.
610          return (wantarray ? @retVal : join(' ', @retVal));      return (wantarray ? @retVal : join(',', @retVal));
611  }  }
612    
613  =head3 ParseLocation  =head3 ParseLocation
# Line 661  Line 633 
633  =cut  =cut
634  #: Return Type @;  #: Return Type @;
635  sub ParseLocation {  sub ParseLocation {
636          # Get the parameter.      # Get the parameter. Note that if we're called as an instance method, we ignore
637        # the first parameter.
638        shift if UNIVERSAL::isa($_[0],__PACKAGE__);
639          my ($location) = @_;          my ($location) = @_;
640          # Parse it into segments.          # Parse it into segments.
641          $location =~ /^(.*)_(\d*)([+-_])(\d*)$/;      $location =~ /^(.+)_(\d+)([+\-_])(\d+)$/;
642          my ($contigID, $start, $dir, $len) = ($1, $2, $3, $4);          my ($contigID, $start, $dir, $len) = ($1, $2, $3, $4);
643          # If the direction is an underscore, convert it to a + or -.          # If the direction is an underscore, convert it to a + or -.
644          if ($dir eq "_") {          if ($dir eq "_") {
# Line 680  Line 654 
654          return ($contigID, $start, $dir, $len);          return ($contigID, $start, $dir, $len);
655  }  }
656    
657    
658    
659    =head3 PointLocation
660    
661    C<< my $found = Sprout::PointLocation($location, $point); >>
662    
663    Return the offset into the specified location of the specified point on the contig. If
664    the specified point is before the location, a negative value will be returned. If it is
665    beyond the location, an undefined value will be returned. It is assumed that the offset
666    is for the location's contig. The location can either be new-style (using a C<+> or C<->
667    and a length) or old-style (using C<_> and start and end positions.
668    
669    =over 4
670    
671    =item location
672    
673    A location specifier (see L</FeatureLocation> for a description).
674    
675    =item point
676    
677    The offset into the contig of the point in which we're interested.
678    
679    =item RETURN
680    
681    Returns the offset inside the specified location of the specified point, a negative
682    number if the point is before the location, or an undefined value if the point is past
683    the location. If the length of the location is 0, this method will B<always> denote
684    that it is outside the location. The offset will always be relative to the left-most
685    position in the location.
686    
687    =back
688    
689    =cut
690    #: Return Type $;
691    sub PointLocation {
692        # Get the parameter. Note that if we're called as an instance method, we ignore
693        # the first parameter.
694        shift if UNIVERSAL::isa($_[0],__PACKAGE__);
695        my ($location, $point) = @_;
696        # Parse out the location elements. Note that this works on both old-style and new-style
697        # locations.
698        my ($contigID, $start, $dir, $len) = ParseLocation($location);
699        # Declare the return variable.
700        my $retVal;
701        # Compute the offset. The computation is dependent on the direction of the location.
702        my $offset = (($dir == '+') ? $point - $start : $point - ($start - $len + 1));
703        # Return the offset if it's valid.
704        if ($offset < $len) {
705            $retVal = $offset;
706        }
707        # Return the offset found.
708        return $retVal;
709    }
710    
711  =head3 DNASeq  =head3 DNASeq
712    
713  C<< my $sequence = $sprout->DNASeq(\@locationList); >>  C<< my $sequence = $sprout->DNASeq(\@locationList); >>
# Line 688  Line 716 
716  should be of the form returned by L</featureLocation> when in a list context. In other words,  should be of the form returned by L</featureLocation> when in a list context. In other words,
717  each location is of the form I<contigID>C<_>I<begin>I<dir>I<end>.  each location is of the form I<contigID>C<_>I<begin>I<dir>I<end>.
718    
719    For example, the following would return the DNA sequence for contig C<83333.1:NC_000913>
720    between positions 1401 and 1532, inclusive.
721    
722        my $sequence = $sprout->DNASeq('83333.1:NC_000913_1401_1532');
723    
724  =over 4  =over 4
725    
726  =item locationList  =item locationList
727    
728  List of location specifiers, each in the form I<contigID>C<_>I<begin>I<dir>I<end> (see  List of location specifiers, each in the form I<contigID>C<_>I<begin>I<dir>I<len> or
729  L</FeatureLocation> for more about this format).  I<contigID>C<_>I<begin>C<_>I<end> (see L</FeatureLocation> for more about this format).
730    
731  =item RETURN  =item RETURN
732    
# Line 705  Line 738 
738  #: Return Type $;  #: Return Type $;
739  sub DNASeq {  sub DNASeq {
740          # Get the parameters.          # Get the parameters.
741          my $self = shift @_;      my ($self, $locationList) = @_;
         my ($locationList) = @_;  
742          # Create the return string.          # Create the return string.
743          my $retVal = "";          my $retVal = "";
744          # Loop through the locations.          # Loop through the locations.
# Line 721  Line 753 
753                  # the start point is the ending. Note that in the latter case we must reverse the DNA string                  # the start point is the ending. Note that in the latter case we must reverse the DNA string
754                  # before putting it in the return value.                  # before putting it in the return value.
755                  my ($start, $stop);                  my ($start, $stop);
756            Trace("Parse of \"$location\" is $beg$dir$len.") if T(SDNA => 4);
757                  if ($dir eq "+") {                  if ($dir eq "+") {
758                          $start = $beg;                          $start = $beg;
759                          $stop = $beg + $len - 1;                          $stop = $beg + $len - 1;
760                  } else {                  } else {
761                          $start = $beg + $len + 1;              $start = $beg - $len + 1;
762                          $stop = $beg;                          $stop = $beg;
763                  }                  }
764            Trace("Looking for sequences containing $start through $stop.") if T(SDNA => 4);
765                  my $query = $self->Get(['IsMadeUpOf','Sequence'],                  my $query = $self->Get(['IsMadeUpOf','Sequence'],
766                          "IsMadeUpOf(from-link) = ? AND IsMadeUpOf(start-position) + IsMadeUpOf(len) > ? AND " .                          "IsMadeUpOf(from-link) = ? AND IsMadeUpOf(start-position) + IsMadeUpOf(len) > ? AND " .
767                          " IsMadeUpOf(start-position) <= ? ORDER BY IsMadeUpOf(start-position)",                          " IsMadeUpOf(start-position) <= ? ORDER BY IsMadeUpOf(start-position)",
# Line 739  Line 773 
773                                  $sequence->Values(['IsMadeUpOf(start-position)', 'Sequence(sequence)',                                  $sequence->Values(['IsMadeUpOf(start-position)', 'Sequence(sequence)',
774                                                                     'IsMadeUpOf(len)']);                                                                     'IsMadeUpOf(len)']);
775                          my $stopPosition = $startPosition + $sequenceLength;                          my $stopPosition = $startPosition + $sequenceLength;
776                Trace("Sequence is from $startPosition to $stopPosition.") if T(SDNA => 4);
777                          # Figure out the start point and length of the relevant section.                          # Figure out the start point and length of the relevant section.
778                          my $pos1 = ($start < $startPosition ? 0 : $start - $startPosition);                          my $pos1 = ($start < $startPosition ? 0 : $start - $startPosition);
779                          my $len = ($stopPosition <= $stop ? $stopPosition : $stop) - $startPosition - $pos1;              my $len1 = ($stopPosition < $stop ? $stopPosition : $stop) + 1 - $startPosition - $pos1;
780                Trace("Position is $pos1 for length $len1.") if T(SDNA => 4);
781                          # Add the relevant data to the location data.                          # Add the relevant data to the location data.
782                          $locationDNA .= substr($sequenceData, $pos1, $len);              $locationDNA .= substr($sequenceData, $pos1, $len1);
783                  }                  }
784                  # Add this location's data to the return string. Note that we may need to reverse it.                  # Add this location's data to the return string. Note that we may need to reverse it.
785                  if ($dir eq '+') {                  if ($dir eq '+') {
786                          $retVal .= $locationDNA;                          $retVal .= $locationDNA;
787                  } else {                  } else {
788                          $locationDNA = join('', reverse split //, $locationDNA);              $retVal .= FIG::reverse_comp($locationDNA);
                         $retVal .= $locationDNA;  
789                  }                  }
790          }          }
791          # Return the result.          # Return the result.
# Line 779  Line 814 
814  #: Return Type @;  #: Return Type @;
815  sub AllContigs {  sub AllContigs {
816          # Get the parameters.          # Get the parameters.
817          my $self = shift @_;      my ($self, $genomeID) = @_;
         my ($genomeID) = @_;  
818          # Ask for the genome's Contigs.          # Ask for the genome's Contigs.
819          my @retVal = $self->GetFlat(['HasContig'], "HasContig(from-link) = ?", [$genomeID],          my @retVal = $self->GetFlat(['HasContig'], "HasContig(from-link) = ?", [$genomeID],
820                                                                  'HasContig(to-link)');                                                                  'HasContig(to-link)');
# Line 788  Line 822 
822          return @retVal;          return @retVal;
823  }  }
824    
825  =head3 ContigLength  =head3 GenomeLength
826    
827  C<< my $length = $sprout->ContigLength($contigID); >>  C<< my $length = $sprout->GenomeLength($genomeID); >>
828    
829  Compute the length of a contig.  Return the length of the specified genome in base pairs.
830    
831  =over 4  =over 4
832    
833  =item contigID  =item genomeID
834    
835  ID of the contig whose length is desired.  ID of the genome whose base pair count is desired.
836    
837  =item RETURN  =item RETURN
838    
839  Returns the number of positions in the contig.  Returns the number of base pairs in all the contigs of the specified
840    genome.
841    
842  =back  =back
843    
844  =cut  =cut
845  #: Return Type $;  
846  sub ContigLength {  sub GenomeLength {
847          # Get the parameters.          # Get the parameters.
848          my $self = shift @_;      my ($self, $genomeID) = @_;
849          my ($contigID) = @_;      # Declare the return variable.
         # Get the contig's last sequence.  
         my $query = $self->Get(['IsMadeUpOf'],  
                 "IsMadeUpOf(from-link) = ? ORDER BY IsMadeUpOf(start-position) DESC",  
                 [$contigID]);  
         my $sequence = $query->Fetch();  
         # Declare the return value.  
850          my $retVal = 0;          my $retVal = 0;
851          # Set it from the sequence data, if any.      # Get the genome's contig sequence lengths.
852          if ($sequence) {      my @lens = $self->GetFlat(['HasContig', 'IsMadeUpOf'], 'HasContig(from-link) = ?',
853                  my ($start, $len) = $sequence->Values(['IsMadeUpOf(start-position)', 'IsMadeUpOf(len)']);                         [$genomeID], 'IsMadeUpOf(len)');
854                  $retVal = $start + $len;      # Sum the lengths.
855          }      map { $retVal += $_ } @lens;
856          # Return the result.          # Return the result.
857          return $retVal;          return $retVal;
858  }  }
859    
860  =head3 GenesInRegion  =head3 FeatureCount
861    
862  C<< my (\@featureIDList, $beg, $end) = $sprout->GenesInRegion($contigID, $start, $stop); >>  C<< my $count = $sprout->FeatureCount($genomeID, $type); >>
863    
864  List the features which overlap a specified region in a contig.  Return the number of features of the specified type in the specified genome.
865    
866  =over 4  =over 4
867    
868  =item contigID  =item genomeID
869    
870  ID of the contig containing the region of interest.  ID of the genome whose feature count is desired.
871    
872  =item start  =item type
873    
874  Offset of the first residue in the region of interest.  Type of feature to count (eg. C<peg>, C<rna>, etc.).
875    
876  =item stop  =item RETURN
877    
878  Offset of the last residue in the region of interest.  Returns the number of features of the specified type for the specified genome.
879    
880    =back
881    
882    =cut
883    
884    sub FeatureCount {
885        # Get the parameters.
886        my ($self, $genomeID, $type) = @_;
887        # Compute the count.
888        my $retVal = $self->GetCount(['HasFeature', 'Feature'],
889                                    "HasFeature(from-link) = ? AND Feature(feature-type) = ?",
890                                    [$genomeID, $type]);
891        # Return the result.
892        return $retVal;
893    }
894    
895    =head3 GenomeAssignments
896    
897    C<< my $fidHash = $sprout->GenomeAssignments($genomeID); >>
898    
899    Return a list of a genome's assigned features. The return hash will contain each
900    assigned feature of the genome mapped to the text of its most recent functional
901    assignment.
902    
903    =over 4
904    
905    =item genomeID
906    
907    ID of the genome whose functional assignments are desired.
908    
909  =item RETURN  =item RETURN
910    
911  Returns a three-element list. The first element is a list of feature IDs for the features that  Returns a reference to a hash which maps each feature to its most recent
912  overlap the region of interest. The second and third elements are the minimum and maximum  functional assignment.
 locations of the features provided on the specified contig. These may extend outside  
 the start and stop values.  
913    
914  =back  =back
915    
916  =cut  =cut
917  #: Return Type @;  
918  sub GenesInRegion {  sub GenomeAssignments {
919          # Get the parameters.          # Get the parameters.
920          my $self = shift @_;      my ($self, $genomeID) = @_;
921          my ($contigID, $start, $stop) = @_;      # Declare the return variable.
922          # Get the maximum segment length.      my $retVal = {};
923          my $maximumSegmentLength = $self->MaxSegment;      # Query the genome's features.
924          # Create a hash to receive the feature list. We use a hash so that we can eliminate      my $query = $self->Get(['HasFeature', 'Feature'], "HasFeature(from-link) = ?",
925          # duplicates easily.                             [$genomeID]);
926          my %featuresFound = ();      # Loop through the features.
927          # Prime the values we'll use for the returned beginning and end.      while (my $data = $query->Fetch) {
928          my ($min, $max) = ($self->ContigLength($contigID), 0);          # Get the feature ID and assignment.
929          # Create a table of parameters for each query. Each query looks for features travelling in          my ($fid, $assignment) = $data->Values(['Feature(id)', 'Feature(assignment)']);
930          # a particular direction. The query parameters include the contig ID, the feature direction,          if ($assignment) {
931          # the lowest possible start position, and the highest possible start position. This works              $retVal->{$fid} = $assignment;
932            }
933        }
934        # Return the result.
935        return $retVal;
936    }
937    
938    =head3 ContigLength
939    
940    C<< my $length = $sprout->ContigLength($contigID); >>
941    
942    Compute the length of a contig.
943    
944    =over 4
945    
946    =item contigID
947    
948    ID of the contig whose length is desired.
949    
950    =item RETURN
951    
952    Returns the number of positions in the contig.
953    
954    =back
955    
956    =cut
957    #: Return Type $;
958    sub ContigLength {
959        # Get the parameters.
960        my ($self, $contigID) = @_;
961        # Get the contig's last sequence.
962        my $query = $self->Get(['IsMadeUpOf'],
963            "IsMadeUpOf(from-link) = ? ORDER BY IsMadeUpOf(start-position) DESC",
964            [$contigID]);
965        my $sequence = $query->Fetch();
966        # Declare the return value.
967        my $retVal = 0;
968        # Set it from the sequence data, if any.
969        if ($sequence) {
970            my ($start, $len) = $sequence->Values(['IsMadeUpOf(start-position)', 'IsMadeUpOf(len)']);
971            $retVal = $start + $len - 1;
972        }
973        # Return the result.
974        return $retVal;
975    }
976    
977    =head3 ClusterPEGs
978    
979    C<< my $clusteredList = $sprout->ClusterPEGs($sub, \@pegs); >>
980    
981    Cluster the PEGs in a list according to the cluster coding scheme of the specified
982    subsystem. In order for this to work properly, the subsystem object must have
983    been used recently to retrieve the PEGs using the B<get_pegs_from_cell> method.
984    This causes the cluster numbers to be pulled into the subsystem's color hash.
985    If a PEG is not found in the color hash, it will not appear in the output
986    sequence.
987    
988    =over 4
989    
990    =item sub
991    
992    Sprout subsystem object for the relevant subsystem, from the L</get_subsystem>
993    method.
994    
995    =item pegs
996    
997    Reference to the list of PEGs to be clustered.
998    
999    =item RETURN
1000    
1001    Returns a list of the PEGs, grouped into smaller lists by cluster number.
1002    
1003    =back
1004    
1005    =cut
1006    #: Return Type $@@;
1007    sub ClusterPEGs {
1008        # Get the parameters.
1009        my ($self, $sub, $pegs) = @_;
1010        # Declare the return variable.
1011        my $retVal = [];
1012        # Loop through the PEGs, creating arrays for each cluster.
1013        for my $pegID (@{$pegs}) {
1014            my $clusterNumber = $sub->get_cluster_number($pegID);
1015            # Only proceed if the PEG is in a cluster.
1016            if ($clusterNumber >= 0) {
1017                # Push this PEG onto the sub-list for the specified cluster number.
1018                push @{$retVal->[$clusterNumber]}, $pegID;
1019            }
1020        }
1021        # Return the result.
1022        return $retVal;
1023    }
1024    
1025    =head3 GenesInRegion
1026    
1027    C<< my (\@featureIDList, $beg, $end) = $sprout->GenesInRegion($contigID, $start, $stop); >>
1028    
1029    List the features which overlap a specified region in a contig.
1030    
1031    =over 4
1032    
1033    =item contigID
1034    
1035    ID of the contig containing the region of interest.
1036    
1037    =item start
1038    
1039    Offset of the first residue in the region of interest.
1040    
1041    =item stop
1042    
1043    Offset of the last residue in the region of interest.
1044    
1045    =item RETURN
1046    
1047    Returns a three-element list. The first element is a list of feature IDs for the features that
1048    overlap the region of interest. The second and third elements are the minimum and maximum
1049    locations of the features provided on the specified contig. These may extend outside
1050    the start and stop values. The first element (that is, the list of features) is sorted
1051    roughly by location.
1052    
1053    =back
1054    
1055    =cut
1056    #: Return Type @@;
1057    sub GenesInRegion {
1058        # Get the parameters.
1059        my ($self, $contigID, $start, $stop) = @_;
1060        # Get the maximum segment length.
1061        my $maximumSegmentLength = $self->MaxSegment;
1062        # Create a hash to receive the feature list. We use a hash so that we can eliminate
1063        # duplicates easily. The hash key will be the feature ID. The value will be a two-element
1064        # containing the minimum and maximum offsets. We will use the offsets to sort the results
1065        # when we're building the result set.
1066        my %featuresFound = ();
1067        # Prime the values we'll use for the returned beginning and end.
1068        my @initialMinMax = ($self->ContigLength($contigID), 0);
1069        my ($min, $max) = @initialMinMax;
1070        # Create a table of parameters for each query. Each query looks for features travelling in
1071        # a particular direction. The query parameters include the contig ID, the feature direction,
1072        # the lowest possible start position, and the highest possible start position. This works
1073          # because each feature segment length must be no greater than the maximum segment length.          # because each feature segment length must be no greater than the maximum segment length.
1074          my %queryParms = (forward => [$contigID, '+', $start - $maximumSegmentLength + 1, $stop],          my %queryParms = (forward => [$contigID, '+', $start - $maximumSegmentLength + 1, $stop],
1075                                            reverse => [$contigID, '-', $start, $stop + $maximumSegmentLength - 1]);                                            reverse => [$contigID, '-', $start, $stop + $maximumSegmentLength - 1]);
# Line 899  Line 1096 
1096                                          $found = 1;                                          $found = 1;
1097                                  }                                  }
1098                          } elsif ($dir eq '-') {                          } elsif ($dir eq '-') {
1099                                  $end = $beg - $len;                  # Note we switch things around so that the beginning is to the left of the
1100                                  if ($end <= $stop) {                  # ending.
1101                    ($beg, $end) = ($beg - $len, $beg);
1102                    if ($beg <= $stop) {
1103                                          # Denote we found a useful feature.                                          # Denote we found a useful feature.
1104                                          $found = 1;                                          $found = 1;
1105                                  }                                  }
1106                          }                          }
1107                          if ($found) {                          if ($found) {
1108                                  # Here we need to record the feature and update the minimum and maximum.                  # Here we need to record the feature and update the minima and maxima. First,
1109                                  $featuresFound{$featureID} = 1;                  # get the current entry for the specified feature.
1110                                  if ($beg < $min) { $min = $beg; }                  my ($loc1, $loc2) = (exists $featuresFound{$featureID} ? @{$featuresFound{$featureID}} :
1111                                  if ($end < $min) { $min = $end; }                                       @initialMinMax);
1112                                  if ($beg > $max) { $max = $beg; }                  # Merge the current segment's begin and end into the feature begin and end and the
1113                                  if ($end > $max) { $max = $end; }                  # global min and max.
1114                    if ($beg < $loc1) {
1115                        $loc1 = $beg;
1116                        $min = $beg if $beg < $min;
1117                    }
1118                    if ($end > $loc2) {
1119                        $loc2 = $end;
1120                        $max = $end if $end > $max;
1121                    }
1122                    # Store the entry back into the hash table.
1123                    $featuresFound{$featureID} = [$loc1, $loc2];
1124                          }                          }
1125                  }                  }
1126          }          }
1127          # Compute a list of the IDs for the features found.      # Now we must compute the list of the IDs for the features found. We start with a list
1128          my @list = (sort (keys %featuresFound));      # of midpoints / feature ID pairs. (It's not really a midpoint, it's twice the midpoint,
1129        # but the result of the sort will be the same.)
1130        my @list = map { [$featuresFound{$_}->[0] + $featuresFound{$_}->[1], $_] } keys %featuresFound;
1131        # Now we sort by midpoint and yank out the feature IDs.
1132        my @retVal = map { $_->[1] } sort { $a->[0] <=> $b->[0] } @list;
1133          # Return it along with the min and max.          # Return it along with the min and max.
1134          return (\@list, $min, $max);      return (\@retVal, $min, $max);
1135  }  }
1136    
1137  =head3 FType  =head3 FType
# Line 944  Line 1157 
1157  #: Return Type $;  #: Return Type $;
1158  sub FType {  sub FType {
1159          # Get the parameters.          # Get the parameters.
1160          my $self = shift @_;      my ($self, $featureID) = @_;
         my ($featureID) = @_;  
1161          # Get the specified feature's type.          # Get the specified feature's type.
1162          my ($retVal) = $self->GetEntityValues('Feature', $featureID, ['Feature(feature-type)']);          my ($retVal) = $self->GetEntityValues('Feature', $featureID, ['Feature(feature-type)']);
1163          # Return the result.          # Return the result.
# Line 954  Line 1166 
1166    
1167  =head3 FeatureAnnotations  =head3 FeatureAnnotations
1168    
1169  C<< my @descriptors = $sprout->FeatureAnnotations($featureID); >>  C<< my @descriptors = $sprout->FeatureAnnotations($featureID, $rawFlag); >>
1170    
1171  Return the annotations of a feature.  Return the annotations of a feature.
1172    
# Line 964  Line 1176 
1176    
1177  ID of the feature whose annotations are desired.  ID of the feature whose annotations are desired.
1178    
1179    =item rawFlag
1180    
1181    If TRUE, the annotation timestamps will be returned in raw form; otherwise, they
1182    will be returned in human-readable form.
1183    
1184  =item RETURN  =item RETURN
1185    
1186  Returns a list of annotation descriptors. Each descriptor is a hash with the following fields.  Returns a list of annotation descriptors. Each descriptor is a hash with the following fields.
1187    
1188  * B<featureID> ID of the relevant feature.  * B<featureID> ID of the relevant feature.
1189    
1190  * B<timeStamp> time the annotation was made, in user-friendly format.  * B<timeStamp> time the annotation was made.
1191    
1192  * B<user> ID of the user who made the annotation  * B<user> ID of the user who made the annotation
1193    
# Line 982  Line 1199 
1199  #: Return Type @%;  #: Return Type @%;
1200  sub FeatureAnnotations {  sub FeatureAnnotations {
1201          # Get the parameters.          # Get the parameters.
1202          my $self = shift @_;      my ($self, $featureID, $rawFlag) = @_;
         my ($featureID) = @_;  
1203          # Create a query to get the feature's annotations and the associated users.          # Create a query to get the feature's annotations and the associated users.
1204          my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],          my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],
1205                                                     "IsTargetOfAnnotation(from-link) = ?", [$featureID]);                                                     "IsTargetOfAnnotation(from-link) = ?", [$featureID]);
# Line 996  Line 1212 
1212                          $annotation->Values(['IsTargetOfAnnotation(from-link)',                          $annotation->Values(['IsTargetOfAnnotation(from-link)',
1213                                                                   'Annotation(time)', 'MadeAnnotation(from-link)',                                                                   'Annotation(time)', 'MadeAnnotation(from-link)',
1214                                                                   'Annotation(annotation)']);                                                                   'Annotation(annotation)']);
1215            # Convert the time, if necessary.
1216            if (! $rawFlag) {
1217                $timeStamp = FriendlyTimestamp($timeStamp);
1218            }
1219                  # Assemble them into a hash.                  # Assemble them into a hash.
1220          my $annotationHash = { featureID => $featureID,          my $annotationHash = { featureID => $featureID,
1221                                 timeStamp => FriendlyTimestamp($timeStamp),                                 timeStamp => $timeStamp,
1222                                                             user => $user, text => $text };                                                             user => $user, text => $text };
1223                  # Add it to the return list.                  # Add it to the return list.
1224                  push @retVal, $annotationHash;                  push @retVal, $annotationHash;
# Line 1012  Line 1232 
1232  C<< my %functions = $sprout->AllFunctionsOf($featureID); >>  C<< my %functions = $sprout->AllFunctionsOf($featureID); >>
1233    
1234  Return all of the functional assignments for a particular feature. The data is returned as a  Return all of the functional assignments for a particular feature. The data is returned as a
1235  hash of functional assignments to user IDs. A functional assignment is a type of annotation.  hash of functional assignments to user IDs. A functional assignment is a type of annotation,
1236  It has the format "XXXX\nset XXXX function to\nYYYYY". In this instance, XXXX is the user ID  Functional assignments are described in the L</ParseAssignment> function. Its worth noting that
1237  and YYYYY is the functional assignment text. Its worth noting that we cannot filter on the content  we cannot filter on the content of the annotation itself because it's a text field; however,
1238  of the annotation itself because it's a text field; however, this is not a big problem because most  this is not a big problem because most features only have a small number of annotations.
1239  features only have a small number of annotations.  Finally, if a single user has multiple functional assignments, we will only keep the most
1240    recent one.
1241    
1242  =over 4  =over 4
1243    
# Line 1026  Line 1247 
1247    
1248  =item RETURN  =item RETURN
1249    
1250  Returns a hash mapping the functional assignment IDs to user IDs.  Returns a hash mapping the user IDs to functional assignment IDs.
1251    
1252  =back  =back
1253    
# Line 1034  Line 1255 
1255  #: Return Type %;  #: Return Type %;
1256  sub AllFunctionsOf {  sub AllFunctionsOf {
1257          # Get the parameters.          # Get the parameters.
1258          my $self = shift @_;      my ($self, $featureID) = @_;
         my ($featureID) = @_;  
1259          # Get all of the feature's annotations.          # Get all of the feature's annotations.
1260          my @query = $self->GetFlat(['IsTargetOfAnnotation', 'Annotation'],      my @query = $self->GetAll(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],
1261                                                      "IsTargetOfAnnotation(from-link) = ?",                                                      "IsTargetOfAnnotation(from-link) = ?",
1262                                                          [$featureID], 'Annotation(annotation)');                              [$featureID], ['Annotation(time)', 'Annotation(annotation)',
1263                                               'MadeAnnotation(from-link)']);
1264          # Declare the return hash.          # Declare the return hash.
1265          my %retVal;          my %retVal;
1266        # Now we sort the assignments by timestamp in reverse.
1267        my @sortedQuery = sort { -($a->[0] <=> $b->[0]) } @query;
1268          # Loop until we run out of annotations.          # Loop until we run out of annotations.
1269          for my $text (@query) {      for my $annotation (@sortedQuery) {
1270            # Get the annotation fields.
1271            my ($timeStamp, $text, $user) = @{$annotation};
1272                  # Check to see if this is a functional assignment.                  # Check to see if this is a functional assignment.
1273                  my ($user, $function) = ParseAssignment($text);          my ($actualUser, $function) = _ParseAssignment($user, $text);
1274                  if ($user) {          if ($actualUser && ! exists $retVal{$actualUser}) {
1275                          # Here it is, so stuff it in the return hash.              # Here it is a functional assignment and there has been no
1276                          $retVal{$function} = $user;              # previous assignment for this user, so we stuff it in the
1277                # return hash.
1278                $retVal{$actualUser} = $function;
1279                  }                  }
1280          }          }
1281          # Return the hash of assignments found.          # Return the hash of assignments found.
# Line 1062  Line 1289 
1289  Return the most recently-determined functional assignment of a particular feature.  Return the most recently-determined functional assignment of a particular feature.
1290    
1291  The functional assignment is handled differently depending on the type of feature. If  The functional assignment is handled differently depending on the type of feature. If
1292  the feature is identified by a FIG ID (begins with the string C<fig|>), then a functional  the feature is identified by a FIG ID (begins with the string C<fig|>), then the functional
1293  assignment is a type of annotation. It has the format "XXXX\nset XXXX function to\nYYYYY". In this  assignment is taken from the B<Feature> or C<Annotation> table, depending.
 instance, XXXX is the user ID and YYYYY is the functional assignment text. Its worth noting that  
 we cannot filter on the content of the annotation itself because it's a text field; however, this  
 is not a big problem because most features only have a small number of annotations.  
1294    
1295  Each user has an associated list of trusted users. The assignment returned will be the most  Each user has an associated list of trusted users. The assignment returned will be the most
1296  recent one by at least one of the trusted users. If no trusted user list is available, then  recent one by at least one of the trusted users. If no trusted user list is available, then
# Line 1085  Line 1309 
1309    
1310  =item userID (optional)  =item userID (optional)
1311    
1312  ID of the user whose function determination is desired. If omitted, only the latest  ID of the user whose function determination is desired. If omitted, the primary
1313  C<FIG> assignment will be returned.  functional assignment in the B<Feature> table will be returned.
1314    
1315  =item RETURN  =item RETURN
1316    
# Line 1098  Line 1322 
1322  #: Return Type $;  #: Return Type $;
1323  sub FunctionOf {  sub FunctionOf {
1324          # Get the parameters.          # Get the parameters.
1325          my $self = shift @_;      my ($self, $featureID, $userID) = @_;
         my ($featureID, $userID) = @_;  
1326      # Declare the return value.      # Declare the return value.
1327      my $retVal;      my $retVal;
1328      # Determine the ID type.      # Determine the ID type.
1329      if ($featureID =~ m/^fig\|/) {      if ($featureID =~ m/^fig\|/) {
1330          # Here we have a FIG feature ID. We must build the list of trusted          # Here we have a FIG feature ID.
1331          # users.          if (!$userID) {
1332                # Use the primary assignment.
1333                ($retVal) = $self->GetEntityValues('Feature', $featureID, ['Feature(assignment)']);
1334            } else {
1335                # We must build the list of trusted users.
1336          my %trusteeTable = ();          my %trusteeTable = ();
1337          # Check the user ID.          # Check the user ID.
1338          if (!$userID) {          if (!$userID) {
# Line 1127  Line 1354 
1354              }              }
1355          }          }
1356          # Build a query for all of the feature's annotations, sorted by date.          # Build a query for all of the feature's annotations, sorted by date.
1357          my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation'],              my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],
1358                                 "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC",                                 "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC",
1359                                 [$featureID]);                                 [$featureID]);
1360          my $timeSelected = 0;          my $timeSelected = 0;
1361          # Loop until we run out of annotations.          # Loop until we run out of annotations.
1362          while (my $annotation = $query->Fetch()) {          while (my $annotation = $query->Fetch()) {
1363              # Get the annotation text.              # Get the annotation text.
1364              my ($text, $time) = $annotation->Values(['Annotation(annotation)','Annotation(time)']);                  my ($text, $time, $user) = $annotation->Values(['Annotation(annotation)',
1365                                                             'Annotation(time)', 'MadeAnnotation(from-link)']);
1366              # Check to see if this is a functional assignment for a trusted user.              # Check to see if this is a functional assignment for a trusted user.
1367              my ($user, $type, $function) = split(/\n/, $text);                  my ($actualUser, $function) = _ParseAssignment($user, $text);
1368              if ($type =~ m/^set $user function to$/i) {                  Trace("Assignment user is $actualUser, text is $function.") if T(4);
1369                    if ($actualUser) {
1370                  # Here it is a functional assignment. Check the time and the user                  # Here it is a functional assignment. Check the time and the user
1371                  # name. The time must be recent and the user must be trusted.                  # name. The time must be recent and the user must be trusted.
1372                  if ((exists $trusteeTable{$user}) && ($time > $timeSelected)) {                      if ((exists $trusteeTable{$actualUser}) && ($time > $timeSelected)) {
1373                      $retVal = $function;                      $retVal = $function;
1374                      $timeSelected = $time;                      $timeSelected = $time;
1375                  }                  }
1376              }              }
1377          }          }
1378            }
1379      } else {      } else {
1380          # Here we have a non-FIG feature ID. In this case the user ID does not          # Here we have a non-FIG feature ID. In this case the user ID does not
1381          # matter. We simply get the information from the External Alias Function          # matter. We simply get the information from the External Alias Function
# Line 1156  Line 1386 
1386          return $retVal;          return $retVal;
1387  }  }
1388    
1389    =head3 FunctionsOf
1390    
1391    C<< my @functionList = $sprout->FunctionOf($featureID, $userID); >>
1392    
1393    Return the functional assignments of a particular feature.
1394    
1395    The functional assignment is handled differently depending on the type of feature. If
1396    the feature is identified by a FIG ID (begins with the string C<fig|>), then a functional
1397    assignment is a type of annotation. The format of an assignment is described in
1398    L</ParseAssignment>. Its worth noting that we cannot filter on the content of the
1399    annotation itself because it's a text field; however, this is not a big problem because
1400    most features only have a small number of annotations.
1401    
1402    If the feature is B<not> identified by a FIG ID, then the functional assignment
1403    information is taken from the B<ExternalAliasFunc> table. If the table does
1404    not contain an entry for the feature, an empty list is returned.
1405    
1406    =over 4
1407    
1408    =item featureID
1409    
1410    ID of the feature whose functional assignments are desired.
1411    
1412    =item RETURN
1413    
1414    Returns a list of 2-tuples, each consisting of a user ID and the text of an assignment by
1415    that user.
1416    
1417    =back
1418    
1419    =cut
1420    #: Return Type @@;
1421    sub FunctionsOf {
1422        # Get the parameters.
1423        my ($self, $featureID) = @_;
1424        # Declare the return value.
1425        my @retVal = ();
1426        # Determine the ID type.
1427        if ($featureID =~ m/^fig\|/) {
1428            # Here we have a FIG feature ID. We must build the list of trusted
1429            # users.
1430            my %trusteeTable = ();
1431            # Build a query for all of the feature's annotations, sorted by date.
1432            my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],
1433                                   "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC",
1434                                   [$featureID]);
1435            my $timeSelected = 0;
1436            # Loop until we run out of annotations.
1437            while (my $annotation = $query->Fetch()) {
1438                # Get the annotation text.
1439                my ($text, $time, $user) = $annotation->Values(['Annotation(annotation)',
1440                                                                'Annotation(time)',
1441                                                                'MadeAnnotation(user)']);
1442                # Check to see if this is a functional assignment for a trusted user.
1443                my ($actualUser, $function) = _ParseAssignment($user, $text);
1444                if ($actualUser) {
1445                    # Here it is a functional assignment.
1446                    push @retVal, [$actualUser, $function];
1447                }
1448            }
1449        } else {
1450            # Here we have a non-FIG feature ID. In this case the user ID does not
1451            # matter. We simply get the information from the External Alias Function
1452            # table.
1453            my @assignments = $self->GetEntityValues('ExternalAliasFunc', $featureID,
1454                                                     ['ExternalAliasFunc(func)']);
1455            push @retVal, map { ['master', $_] } @assignments;
1456        }
1457        # Return the assignments found.
1458        return @retVal;
1459    }
1460    
1461  =head3 BBHList  =head3 BBHList
1462    
1463  C<< my $bbhHash = $sprout->BBHList($genomeID, \@featureList); >>  C<< my $bbhHash = $sprout->BBHList($genomeID, \@featureList); >>
# Line 1175  Line 1477 
1477    
1478  =item RETURN  =item RETURN
1479    
1480  Returns a reference to a hash that maps the IDs of the incoming features to the IDs of  Returns a reference to a hash that maps the IDs of the incoming features to the best hits
1481  their best hits.  on the target genome.
1482    
1483  =back  =back
1484    
# Line 1184  Line 1486 
1486  #: Return Type %;  #: Return Type %;
1487  sub BBHList {  sub BBHList {
1488          # Get the parameters.          # Get the parameters.
1489          my $self = shift @_;      my ($self, $genomeID, $featureList) = @_;
         my ($genomeID, $featureList) = @_;  
1490          # Create the return structure.          # Create the return structure.
1491          my %retVal = ();          my %retVal = ();
1492          # Loop through the incoming features.          # Loop through the incoming features.
1493          for my $featureID (@{$featureList}) {          for my $featureID (@{$featureList}) {
1494                  # Create a query to get the feature's best hit.          # Ask the server for the feature's best hit.
1495                  my $query = $self->Get(['IsBidirectionalBestHitOf'],          my @bbhData = FIGRules::BBHData($featureID);
1496                                                             "IsBidirectionalBestHitOf(from-link) = ? AND IsBidirectionalBestHitOf(genome) = ?",          # Peel off the BBHs found.
1497                                                             [$featureID, $genomeID]);          my @found = ();
1498                  # Look for the best hit.          for my $bbh (@bbhData) {
1499                  my $bbh = $query->Fetch;              my $fid = $bbh->[0];
1500                  if ($bbh) {              my $bbGenome = $self->GenomeOf($fid);
1501                          my ($targetFeature) = $bbh->Value('IsBidirectionalBestHitOf(to-link)');              if ($bbGenome eq $genomeID) {
1502                          $retVal{$featureID} = $targetFeature;                  push @found, $fid;
1503                }
1504                  }                  }
1505            $retVal{$featureID} = \@found;
1506          }          }
1507          # Return the mapping.          # Return the mapping.
1508          return \%retVal;          return \%retVal;
1509  }  }
1510    
1511    =head3 SimList
1512    
1513    C<< my %similarities = $sprout->SimList($featureID, $count); >>
1514    
1515    Return a list of the similarities to the specified feature.
1516    
1517    This method just returns the bidirectional best hits for performance reasons.
1518    
1519    =over 4
1520    
1521    =item featureID
1522    
1523    ID of the feature whose similarities are desired.
1524    
1525    =item count
1526    
1527    Maximum number of similar features to be returned, or C<0> to return them all.
1528    
1529    =back
1530    
1531    =cut
1532    #: Return Type %;
1533    sub SimList {
1534        # Get the parameters.
1535        my ($self, $featureID, $count) = @_;
1536        # Ask for the best hits.
1537        my @lists = FIGRules::BBHData($featureID);
1538        # Create the return value.
1539        my %retVal = ();
1540        for my $tuple (@lists) {
1541            $retVal{$tuple->[0]} = $tuple->[1];
1542        }
1543        # Return the result.
1544        return %retVal;
1545    }
1546    
1547    =head3 IsComplete
1548    
1549    C<< my $flag = $sprout->IsComplete($genomeID); >>
1550    
1551    Return TRUE if the specified genome is complete, else FALSE.
1552    
1553    =over 4
1554    
1555    =item genomeID
1556    
1557    ID of the genome whose completeness status is desired.
1558    
1559    =item RETURN
1560    
1561    Returns TRUE if the genome is complete, FALSE if it is incomplete, and C<undef> if it is
1562    not found.
1563    
1564    =back
1565    
1566    =cut
1567    #: Return Type $;
1568    sub IsComplete {
1569        # Get the parameters.
1570        my ($self, $genomeID) = @_;
1571        # Declare the return variable.
1572        my $retVal;
1573        # Get the genome's data.
1574        my $genomeData = $self->GetEntity('Genome', $genomeID);
1575        if ($genomeData) {
1576            # The genome exists, so get the completeness flag.
1577            ($retVal) = $genomeData->Value('Genome(complete)');
1578        }
1579        # Return the result.
1580        return $retVal;
1581    }
1582    
1583  =head3 FeatureAliases  =head3 FeatureAliases
1584    
1585  C<< my @aliasList = $sprout->FeatureAliases($featureID); >>  C<< my @aliasList = $sprout->FeatureAliases($featureID); >>
# Line 1228  Line 1603 
1603  #: Return Type @;  #: Return Type @;
1604  sub FeatureAliases {  sub FeatureAliases {
1605          # Get the parameters.          # Get the parameters.
1606          my $self = shift @_;      my ($self, $featureID) = @_;
         my ($featureID) = @_;  
1607          # Get the desired feature's aliases          # Get the desired feature's aliases
1608          my @retVal = $self->GetEntityValues('Feature', $featureID, ['Feature(alias)']);          my @retVal = $self->GetEntityValues('Feature', $featureID, ['Feature(alias)']);
1609          # Return the result.          # Return the result.
# Line 1240  Line 1614 
1614    
1615  C<< my $genomeID = $sprout->GenomeOf($featureID); >>  C<< my $genomeID = $sprout->GenomeOf($featureID); >>
1616    
1617  Return the genome that contains a specified feature.  Return the genome that contains a specified feature or contig.
1618    
1619  =over 4  =over 4
1620    
1621  =item featureID  =item featureID
1622    
1623  ID of the feature whose genome is desired.  ID of the feature or contig whose genome is desired.
1624    
1625  =item RETURN  =item RETURN
1626    
1627  Returns the ID of the genome for the specified feature. If the feature is not found, returns  Returns the ID of the genome for the specified feature or contig. If the feature or contig is not
1628  an undefined value.  found, returns an undefined value.
1629    
1630  =back  =back
1631    
# Line 1259  Line 1633 
1633  #: Return Type $;  #: Return Type $;
1634  sub GenomeOf {  sub GenomeOf {
1635          # Get the parameters.          # Get the parameters.
1636          my $self = shift @_;      my ($self, $featureID) = @_;
1637          my ($featureID) = @_;      # Create a query to find the genome associated with the incoming ID.
1638          # Create a query to find the genome associated with the feature.      my $query = $self->Get(['IsLocatedIn', 'HasContig'], "IsLocatedIn(from-link) = ? OR HasContig(to-link) = ?",
1639          my $query = $self->Get(['IsLocatedIn', 'HasContig'], "IsLocatedIn(from-link) = ?", [$featureID]);                             [$featureID, $featureID]);
1640          # Declare the return value.          # Declare the return value.
1641          my $retVal;          my $retVal;
1642          # Get the genome ID.          # Get the genome ID.
# Line 1296  Line 1670 
1670  #: Return Type %;  #: Return Type %;
1671  sub CoupledFeatures {  sub CoupledFeatures {
1672          # Get the parameters.          # Get the parameters.
1673          my $self = shift @_;      my ($self, $featureID) = @_;
1674          my ($featureID) = @_;      Trace("Looking for features coupled to $featureID.") if T(coupling => 3);
1675          # Create a query to retrieve the functionally-coupled features. Note that we depend on the      # Create a query to retrieve the functionally-coupled features.
1676          # fact that the functional coupling is physically paired. If (A,B) is in the database, then      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],
1677          # (B,A) will also be found.                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);
         my $query = $self->Get(['IsClusteredOnChromosomeWith'],  
                                                    "IsClusteredOnChromosomeWith(from-link) = ?", [$featureID]);  
1678          # This value will be set to TRUE if we find at least one coupled feature.          # This value will be set to TRUE if we find at least one coupled feature.
1679          my $found = 0;          my $found = 0;
1680          # Create the return hash.          # Create the return hash.
1681          my %retVal = ();          my %retVal = ();
1682          # Retrieve the relationship records and store them in the hash.          # Retrieve the relationship records and store them in the hash.
1683          while (my $clustering = $query->Fetch()) {          while (my $clustering = $query->Fetch()) {
1684                  my ($otherFeatureID, $score) = $clustering->Values(['IsClusteredOnChromosomeWith(to-link)',          # Get the ID and score of the coupling.
1685                                                                      'IsClusteredOnChromosomeWith(score)']);          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',
1686                                                            'Coupling(score)']);
1687            Trace("$featureID coupled with score $score to ID $couplingID.") if T(coupling => 4);
1688            # Get the other feature that participates in the coupling.
1689            my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],
1690                                               "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",
1691                                               [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');
1692            Trace("$couplingID target feature is $otherFeatureID.") if T(coupling => 4);
1693            # Attach the other feature's score to its ID.
1694                  $retVal{$otherFeatureID} = $score;                  $retVal{$otherFeatureID} = $score;
1695                  $found = 1;                  $found = 1;
1696          }          }
# Line 1323  Line 1703 
1703          return %retVal;          return %retVal;
1704  }  }
1705    
1706  =head3 GetEntityTypes  =head3 CouplingEvidence
1707    
1708    C<< my @evidence = $sprout->CouplingEvidence($peg1, $peg2); >>
1709    
1710    Return the evidence for a functional coupling.
1711    
1712    A pair of features is considered evidence of a coupling between two other
1713    features if they occur close together on a contig and both are similar to
1714    the coupled features. So, if B<A1> and B<A2> are close together on a contig,
1715    B<B1> and B<B2> are considered evidence for the coupling if (1) B<B1> and
1716    B<B2> are close together, (2) B<B1> is similar to B<A1>, and (3) B<B2> is
1717    similar to B<A2>.
1718    
1719    The score of a coupling is determined by the number of pieces of evidence
1720    that are considered I<representative>. If several evidence items belong to
1721    a group of genomes that are close to each other, only one of those items
1722    is considered representative. The other evidence items are presumed to be
1723    there because of the relationship between the genomes rather than because
1724    the two proteins generated by the features have a related functionality.
1725    
1726    Each evidence item is returned as a three-tuple in the form C<[>I<$peg1a>C<,>
1727    I<$peg2a>C<,> I<$rep>C<]>, where I<$peg1a> is similar to I<$peg1>, I<$peg2a>
1728    is similar to I<$peg2>, and I<$rep> is TRUE if the evidence is representative
1729    and FALSE otherwise.
1730    
1731    =over 4
1732    
1733    =item peg1
1734    
1735    ID of the feature of interest.
1736    
1737  C<< my @entityList = $sprout->GetEntityTypes(); >>  =item peg2
1738    
1739  Return the list of supported entity types.  ID of a feature functionally coupled to the feature of interest.
1740    
1741    =item RETURN
1742    
1743    Returns a list of 3-tuples. Each tuple consists of a feature similar to the feature
1744    of interest, a feature similar to the functionally coupled feature, and a flag
1745    that is TRUE for a representative piece of evidence and FALSE otherwise.
1746    
1747    =back
1748    
1749  =cut  =cut
1750  #: Return Type @;  #: Return Type @@;
1751  sub GetEntityTypes {  sub CouplingEvidence {
1752          # Get the parameters.          # Get the parameters.
1753          my $self = shift @_;      my ($self, $peg1, $peg2) = @_;
1754          # Get the underlying database object.      # Declare the return variable.
1755          my $erdb = $self->{_erdb};      my @retVal = ();
1756          # Get its entity type list.      # Our first task is to find out the nature of the coupling: whether or not
1757          my @retVal = $erdb->GetEntityTypes();      # it exists, its score, and whether the features are stored in the same
1758        # order as the ones coming in.
1759        my ($couplingID, $inverted, $score) = $self->GetCoupling($peg1, $peg2);
1760        # Only proceed if a coupling exists.
1761        if ($couplingID) {
1762            # Determine the ordering to place on the evidence items. If we're
1763            # inverted, we want to see feature 2 before feature 1 (descending); otherwise,
1764            # we want feature 1 before feature 2 (normal).
1765            Trace("Coupling evidence for ($peg1, $peg2) with inversion flag $inverted.") if T(Coupling => 4);
1766            my $ordering = ($inverted ? "DESC" : "");
1767            # Get the coupling evidence.
1768            my @evidenceList = $self->GetAll(['IsEvidencedBy', 'PCH', 'UsesAsEvidence'],
1769                                              "IsEvidencedBy(from-link) = ? ORDER BY PCH(id), UsesAsEvidence(pos) $ordering",
1770                                              [$couplingID],
1771                                              ['PCH(used)', 'UsesAsEvidence(to-link)']);
1772            # Loop through the evidence items. Each piece of evidence is represented by two
1773            # positions in the evidence list, one for each feature on the other side of the
1774            # evidence link. If at some point we want to generalize to couplings with
1775            # more than two positions, this section of code will need to be re-done.
1776            while (@evidenceList > 0) {
1777                my $peg1Data = shift @evidenceList;
1778                my $peg2Data = shift @evidenceList;
1779                Trace("Peg 1 is " . $peg1Data->[1] . " and Peg 2 is " . $peg2Data->[1] . ".") if T(Coupling => 4);
1780                push @retVal, [$peg1Data->[1], $peg2Data->[1], $peg1Data->[0]];
1781            }
1782            Trace("Last index in evidence result is is $#retVal.") if T(Coupling => 4);
1783        }
1784        # Return the result.
1785        return @retVal;
1786  }  }
1787    
1788  =head3 ReadFasta  =head3 GetCoupling
1789    
1790  C<< my %sequenceData = Sprout::ReadFasta($fileName, $prefix); >>  C<< my ($couplingID, $inverted, $score) = $sprout->GetCoupling($peg1, $peg2); >>
1791    
1792  Read sequence data from a FASTA-format file. Each sequence in a FASTA file is represented by  Return the coupling (if any) for the specified pair of PEGs. If a coupling
1793  one or more lines of data. The first line begins with a > character and contains an ID.  exists, we return the coupling ID along with an indicator of whether the
1794  The remaining lines contain the sequence data in order.  coupling is stored as C<(>I<$peg1>C<, >I<$peg2>C<)> or C<(>I<$peg2>C<, >I<$peg1>C<)>.
1795    In the second case, we say the coupling is I<inverted>. The importance of an
1796    inverted coupling is that the PEGs in the evidence will appear in reverse order.
1797    
1798  =over 4  =over 4
1799    
1800  =item fileName  =item peg1
1801    
1802  Name of the FASTA file.  ID of the feature of interest.
1803    
1804  =item prefix (optional)  =item peg2
1805    
1806  Prefix to be put in front of each ID found.  ID of the potentially coupled feature.
1807    
1808  =item RETURN  =item RETURN
1809    
1810  Returns a hash that maps each ID to its sequence.  Returns a three-element list. The first element contains the database ID of
1811    the coupling. The second element is FALSE if the coupling is stored in the
1812    database in the caller specified order and TRUE if it is stored in the
1813    inverted order. The third element is the coupling's score. If the coupling
1814    does not exist, all three list elements will be C<undef>.
1815    
1816  =back  =back
1817    
1818  =cut  =cut
1819  #: Return Type %;  #: Return Type $%@;
1820  sub ReadFasta {  sub GetCoupling {
1821          # Get the parameters.          # Get the parameters.
1822          my ($fileName, $prefix) = @_;      my ($self, $peg1, $peg2) = @_;
1823          # Create the return hash.      # Declare the return values. We'll start with the coupling ID and undefine the
1824          my %retVal = ();      # flag and score until we have more information.
1825          # Open the file for input.      my ($retVal, $inverted, $score) = ($self->CouplingID($peg1, $peg2), undef, undef);
1826          open FASTAFILE, '<', $fileName;      # Find the coupling data.
1827          # Declare the ID variable and clear the sequence accumulator.      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],
1828          my $sequence = "";                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",
1829                                     [$retVal], ["ParticipatesInCoupling(from-link)", "Coupling(score)"]);
1830        # Check to see if we found anything.
1831        if (!@pegs) {
1832            Trace("No coupling found.") if T(Coupling => 4);
1833            # No coupling, so undefine the return value.
1834            $retVal = undef;
1835        } else {
1836            # We have a coupling! Get the score and check for inversion.
1837            $score = $pegs[0]->[1];
1838            my $firstFound = $pegs[0]->[0];
1839            $inverted = ($firstFound ne $peg1);
1840            Trace("Coupling score is $score. First peg is $firstFound, peg 1 is $peg1.") if T(Coupling => 4);
1841        }
1842        # Return the result.
1843        return ($retVal, $inverted, $score);
1844    }
1845    
1846    =head3 GetSynonymGroup
1847    
1848    C<< my $id = $sprout->GetSynonymGroup($fid); >>
1849    
1850    Return the synonym group name for the specified feature.
1851    
1852    =over 4
1853    
1854    =item fid
1855    
1856    ID of the feature whose synonym group is desired.
1857    
1858    =item RETURN
1859    
1860    The name of the synonym group to which the feature belongs. If the feature does
1861    not belong to a synonym group, the feature ID itself is returned.
1862    
1863    =back
1864    
1865    =cut
1866    
1867    sub GetSynonymGroup {
1868        # Get the parameters.
1869        my ($self, $fid) = @_;
1870        # Declare the return variable.
1871        my $retVal;
1872        # Find the synonym group.
1873        my @groups = $self->GetFlat(['IsSynonymGroupFor'], "IsSynonymGroupFor(to-link) = ?",
1874                                       [$fid], 'IsSynonymGroupFor(from-link)');
1875        # Check to see if we found anything.
1876        if (@groups) {
1877            $retVal = $groups[0];
1878        } else {
1879            $retVal = $fid;
1880        }
1881        # Return the result.
1882        return $retVal;
1883    }
1884    
1885    =head3 GetBoundaries
1886    
1887    C<< my ($contig, $beg, $end) = $sprout->GetBoundaries(@locList); >>
1888    
1889    Determine the begin and end boundaries for the locations in a list. All of the
1890    locations must belong to the same contig and have mostly the same direction in
1891    order for this method to produce a meaningful result. The resulting
1892    begin/end pair will contain all of the bases in any of the locations.
1893    
1894    =over 4
1895    
1896    =item locList
1897    
1898    List of locations to process.
1899    
1900    =item RETURN
1901    
1902    Returns a 3-tuple consisting of the contig ID, the beginning boundary,
1903    and the ending boundary. The beginning boundary will be left of the
1904    end for mostly-forward locations and right of the end for mostly-backward
1905    locations.
1906    
1907    =back
1908    
1909    =cut
1910    
1911    sub GetBoundaries {
1912        # Get the parameters.
1913        my ($self, @locList) = @_;
1914        # Set up the counters used to determine the most popular direction.
1915        my %counts = ( '+' => 0, '-' => 0 );
1916        # Get the last location and parse it.
1917        my $locObject = BasicLocation->new(pop @locList);
1918        # Prime the loop with its data.
1919        my ($contig, $beg, $end) = ($locObject->Contig, $locObject->Left, $locObject->Right);
1920        # Count its direction.
1921        $counts{$locObject->Dir}++;
1922        # Loop through the remaining locations. Note that in most situations, this loop
1923        # will not iterate at all, because most of the time we will be dealing with a
1924        # singleton list.
1925        for my $loc (@locList) {
1926            # Create a location object.
1927            my $locObject = BasicLocation->new($loc);
1928            # Count the direction.
1929            $counts{$locObject->Dir}++;
1930            # Get the left end and the right end.
1931            my $left = $locObject->Left;
1932            my $right = $locObject->Right;
1933            # Merge them into the return variables.
1934            if ($left < $beg) {
1935                $beg = $left;
1936            }
1937            if ($right > $end) {
1938                $end = $right;
1939            }
1940        }
1941        # If the most common direction is reverse, flip the begin and end markers.
1942        if ($counts{'-'} > $counts{'+'}) {
1943            ($beg, $end) = ($end, $beg);
1944        }
1945        # Return the result.
1946        return ($contig, $beg, $end);
1947    }
1948    
1949    =head3 CouplingID
1950    
1951    C<< my $couplingID = $sprout->CouplingID($peg1, $peg2); >>
1952    
1953    Return the coupling ID for a pair of feature IDs.
1954    
1955    The coupling ID is currently computed by joining the feature IDs in
1956    sorted order with a space. Client modules (that is, modules which
1957    use Sprout) should not, however, count on this always being the
1958    case. This method provides a way for abstracting the concept of a
1959    coupling ID. All that we know for sure about it is that it can be
1960    generated easily from the feature IDs and the order of the IDs
1961    in the parameter list does not matter (i.e. C<CouplingID("a1", "b1")>
1962    will have the same value as C<CouplingID("b1", "a1")>.
1963    
1964    =over 4
1965    
1966    =item peg1
1967    
1968    First feature of interest.
1969    
1970    =item peg2
1971    
1972    Second feature of interest.
1973    
1974    =item RETURN
1975    
1976    Returns the ID that would be used to represent a functional coupling of
1977    the two specified PEGs.
1978    
1979    =back
1980    
1981    =cut
1982    #: Return Type $;
1983    sub CouplingID {
1984        my ($self, @pegs) = @_;
1985        return $self->DigestKey(join " ", sort @pegs);
1986    }
1987    
1988    =head3 ReadFasta
1989    
1990    C<< my %sequenceData = Sprout::ReadFasta($fileName, $prefix); >>
1991    
1992    Read sequence data from a FASTA-format file. Each sequence in a FASTA file is represented by
1993    one or more lines of data. The first line begins with a > character and contains an ID.
1994    The remaining lines contain the sequence data in order.
1995    
1996    =over 4
1997    
1998    =item fileName
1999    
2000    Name of the FASTA file.
2001    
2002    =item prefix (optional)
2003    
2004    Prefix to be put in front of each ID found.
2005    
2006    =item RETURN
2007    
2008    Returns a hash that maps each ID to its sequence.
2009    
2010    =back
2011    
2012    =cut
2013    #: Return Type %;
2014    sub ReadFasta {
2015        # Get the parameters.
2016        my ($fileName, $prefix) = @_;
2017        # Create the return hash.
2018        my %retVal = ();
2019        # Open the file for input.
2020        open FASTAFILE, '<', $fileName;
2021        # Declare the ID variable and clear the sequence accumulator.
2022        my $sequence = "";
2023          my $id = "";          my $id = "";
2024          # Loop through the file.          # Loop through the file.
2025          while (<FASTAFILE>) {          while (<FASTAFILE>) {
# Line 1384  Line 2029 
2029                  if ($line =~ m/^>\s*(.+?)(\s|\n)/) {                  if ($line =~ m/^>\s*(.+?)(\s|\n)/) {
2030                          # Here we have a new header. Store the current sequence if we have one.                          # Here we have a new header. Store the current sequence if we have one.
2031                          if ($id) {                          if ($id) {
2032                                  $retVal{$id} = $sequence;                  $retVal{$id} = lc $sequence;
2033                          }                          }
2034                          # Clear the sequence accumulator and save the new ID.                          # Clear the sequence accumulator and save the new ID.
2035                          ($id, $sequence) = ("$prefix$1", "");                          ($id, $sequence) = ("$prefix$1", "");
2036                  } else {                  } else {
2037                          # Here we have a data line, so we add it to the sequence accumulator.                          # Here we have a data line, so we add it to the sequence accumulator.
2038                          # First, we get the actual data out.              # First, we get the actual data out. Note that we normalize to lower
2039                # case.
2040                          $line =~ /^\s*(.*?)(\s|\n)/;                          $line =~ /^\s*(.*?)(\s|\n)/;
2041                          $sequence .= $1;                          $sequence .= $1;
2042                  }                  }
2043          }          }
2044          # Flush out the last sequence (if any).          # Flush out the last sequence (if any).
2045          if ($sequence) {          if ($sequence) {
2046                  $retVal {$id} = $sequence;          $retVal{$id} = lc $sequence;
2047          }          }
2048        # Close the file.
2049        close FASTAFILE;
2050          # Return the hash constructed from the file.          # Return the hash constructed from the file.
2051          return %retVal;          return %retVal;
2052  }  }
# Line 1409  Line 2057 
2057    
2058  Insure that a list of feature locations is in the Sprout format. The Sprout feature location  Insure that a list of feature locations is in the Sprout format. The Sprout feature location
2059  format is I<contig>_I<beg*len> where I<*> is C<+> for a forward gene and C<-> for a backward  format is I<contig>_I<beg*len> where I<*> is C<+> for a forward gene and C<-> for a backward
2060  gene. The old format is I<contig>_I<beg>_I<end>.  gene. The old format is I<contig>_I<beg>_I<end>. If a feature is in the new format already,
2061    it will not be changed; otherwise, it will be converted. This method can also be used to
2062    perform the reverse task-- insuring that all the locations are in the old format.
2063    
2064  =over 4  =over 4
2065    
# Line 1436  Line 2086 
2086  #: Return Type @;  #: Return Type @;
2087  sub FormatLocations {  sub FormatLocations {
2088          # Get the parameters.          # Get the parameters.
2089          my $self = shift @_;      my ($self, $prefix, $locations, $oldFormat) = @_;
         my ($prefix, $locations, $oldFormat) = @_;  
2090          # Create the return list.          # Create the return list.
2091          my @retVal = ();          my @retVal = ();
2092          # Check to see if any locations were passed in.          # Check to see if any locations were passed in.
2093          if ($locations eq '') {          if ($locations eq '') {
2094              confess "No locations specified.";          Confess("No locations specified.");
2095          } else {          } else {
2096                  # Loop through the locations, converting them to the new format.                  # Loop through the locations, converting them to the new format.
2097                  for my $location (@{$locations}) {                  for my $location (@{$locations}) {
# Line 1477  Line 2126 
2126    
2127  sub DumpData {  sub DumpData {
2128          # Get the parameters.          # Get the parameters.
2129          my $self = shift @_;      my ($self) = @_;
2130          # Get the data directory name.          # Get the data directory name.
2131          my $outputDirectory = $self->{_options}->{dataDir};          my $outputDirectory = $self->{_options}->{dataDir};
2132          # Dump the relations.          # Dump the relations.
2133          $self->{_erdb}->DumpRelations($outputDirectory);      $self->DumpRelations($outputDirectory);
2134  }  }
2135    
2136  =head3 XMLFileName  =head3 XMLFileName
# Line 1493  Line 2142 
2142  =cut  =cut
2143  #: Return Type $;  #: Return Type $;
2144  sub XMLFileName {  sub XMLFileName {
2145          my $self = shift @_;      my ($self) = @_;
2146          return $self->{_xmlName};          return $self->{_xmlName};
2147  }  }
2148    
# Line 1513  Line 2162 
2162  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and
2163  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.
2164    
2165  C<< $sprout->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>  C<< $sprout->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence => 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>
2166    
2167  =over 4  =over 4
2168    
# Line 1531  Line 2180 
2180  #: Return Type ;  #: Return Type ;
2181  sub Insert {  sub Insert {
2182          # Get the parameters.          # Get the parameters.
2183          my $self = shift @_;      my ($self, $objectType, $fieldHash) = @_;
         my ($objectType, $fieldHash) = @_;  
2184          # Call the underlying method.          # Call the underlying method.
2185          $self->{_erdb}->InsertObject($objectType, $fieldHash);      $self->InsertObject($objectType, $fieldHash);
2186  }  }
2187    
2188  =head3 Annotate  =head3 Annotate
# Line 1573  Line 2221 
2221  #: Return Type $;  #: Return Type $;
2222  sub Annotate {  sub Annotate {
2223          # Get the parameters.          # Get the parameters.
2224          my $self = shift @_;      my ($self, $fid, $timestamp, $user, $text) = @_;
         my ($fid, $timestamp, $user, $text) = @_;  
2225          # Create the annotation ID.          # Create the annotation ID.
2226          my $aid = "$fid:$timestamp";          my $aid = "$fid:$timestamp";
2227          # Insert the Annotation object.          # Insert the Annotation object.
# Line 1594  Line 2241 
2241    
2242  =head3 AssignFunction  =head3 AssignFunction
2243    
2244  C<< my $ok = $sprout->AssignFunction($featureID, $user, $function); >>  C<< my $ok = $sprout->AssignFunction($featureID, $user, $function, $assigningUser); >>
2245    
2246  This method assigns a function to a feature. Functions are a special type of annotation. The general  This method assigns a function to a feature. Functions are a special type of annotation. The general
2247  format is "XXXX\nset XXXX function to\nYYYYY" where XXXX is the feature type and YYYY is the functional  format is described in L</ParseAssignment>.
 assignment text.  
2248    
2249  =over 4  =over 4
2250    
# Line 1608  Line 2254 
2254    
2255  =item user  =item user
2256    
2257  Name of the user making the assignment. This is frequently a group name, like C<kegg> or C<fig>.  Name of the user group making the assignment, such as C<kegg> or C<fig>.
2258    
2259  =item function  =item function
2260    
2261  Text of the function being assigned.  Text of the function being assigned.
2262    
2263    =item assigningUser (optional)
2264    
2265    Name of the individual user making the assignment. If omitted, defaults to the user group.
2266    
2267  =item RETURN  =item RETURN
2268    
2269  Returns 1 if successful, 0 if an error occurred.  Returns 1 if successful, 0 if an error occurred.
# Line 1624  Line 2274 
2274  #: Return Type $;  #: Return Type $;
2275  sub AssignFunction {  sub AssignFunction {
2276          # Get the parameters.          # Get the parameters.
2277          my $self = shift @_;      my ($self, $featureID, $user, $function, $assigningUser) = @_;
2278          my ($featureID, $user, $function) = @_;      # Default the assigning user.
2279        if (! $assigningUser) {
2280            $assigningUser = $user;
2281        }
2282          # Create an annotation string from the parameters.          # Create an annotation string from the parameters.
2283          my $annotationText = "$user\nset $user function to\n$function";      my $annotationText = "$assigningUser\nset $user function to\n$function";
2284          # Get the current time.          # Get the current time.
2285          my $now = time;          my $now = time;
2286          # Declare the return variable.          # Declare the return variable.
# Line 1672  Line 2325 
2325  #: Return Type @;  #: Return Type @;
2326  sub FeaturesByAlias {  sub FeaturesByAlias {
2327          # Get the parameters.          # Get the parameters.
2328          my $self = shift @_;      my ($self, $alias) = @_;
         my ($alias) = @_;  
2329          # Declare the return variable.          # Declare the return variable.
2330          my @retVal = ();          my @retVal = ();
2331          # Parse the alias.          # Parse the alias.
# Line 1689  Line 2341 
2341          return @retVal;          return @retVal;
2342  }  }
2343    
 =head3 Exists  
   
 C<< my $found = $sprout->Exists($entityName, $entityID); >>  
   
 Return TRUE if an entity exists, else FALSE.  
   
 =over 4  
   
 =item entityName  
   
 Name of the entity type (e.g. C<Feature>) relevant to the existence check.  
   
 =item entityID  
   
 ID of the entity instance whose existence is to be checked.  
   
 =item RETURN  
   
 Returns TRUE if the entity instance exists, else FALSE.  
   
 =back  
   
 =cut  
 #: Return Type $;  
 sub Exists {  
         # Get the parameters.  
         my $self = shift @_;  
         my ($entityName, $entityID) = @_;  
         # Check for the entity instance.  
         my $testInstance = $self->GetEntity($entityName, $entityID);  
         # Return an existence indicator.  
         my $retVal = ($testInstance ? 1 : 0);  
         return $retVal;  
 }  
   
2344  =head3 FeatureTranslation  =head3 FeatureTranslation
2345    
2346  C<< my $translation = $sprout->FeatureTranslation($featureID); >>  C<< my $translation = $sprout->FeatureTranslation($featureID); >>
# Line 1746  Line 2363 
2363  #: Return Type $;  #: Return Type $;
2364  sub FeatureTranslation {  sub FeatureTranslation {
2365          # Get the parameters.          # Get the parameters.
2366          my $self = shift @_;      my ($self, $featureID) = @_;
         my ($featureID) = @_;  
2367          # Get the specified feature's translation.          # Get the specified feature's translation.
2368          my ($retVal) = $self->GetEntityValues("Feature", $featureID, ['Feature(translation)']);          my ($retVal) = $self->GetEntityValues("Feature", $featureID, ['Feature(translation)']);
2369          return $retVal;          return $retVal;
# Line 1779  Line 2395 
2395  #: Return Type @;  #: Return Type @;
2396  sub Taxonomy {  sub Taxonomy {
2397          # Get the parameters.          # Get the parameters.
2398          my $self = shift @_;      my ($self, $genome) = @_;
         my ($genome) = @_;  
2399          # Find the specified genome's taxonomy string.          # Find the specified genome's taxonomy string.
2400          my ($list) = $self->GetEntityValues('Genome', $genome, ['Genome(taxonomy)']);          my ($list) = $self->GetEntityValues('Genome', $genome, ['Genome(taxonomy)']);
2401          # Declare the return variable.          # Declare the return variable.
# Line 1823  Line 2438 
2438  #: Return Type $;  #: Return Type $;
2439  sub CrudeDistance {  sub CrudeDistance {
2440          # Get the parameters.          # Get the parameters.
2441          my $self = shift @_;      my ($self, $genome1, $genome2) = @_;
         my ($genome1, $genome2) = @_;  
2442          # Insure that the distance is commutative by sorting the genome IDs.          # Insure that the distance is commutative by sorting the genome IDs.
2443          my ($genomeA, $genomeB);          my ($genomeA, $genomeB);
2444          if ($genome2 < $genome2) {          if ($genome2 < $genome2) {
# Line 1871  Line 2485 
2485  #: Return Type $;  #: Return Type $;
2486  sub RoleName {  sub RoleName {
2487          # Get the parameters.          # Get the parameters.
2488          my $self = shift @_;      my ($self, $roleID) = @_;
         my ($roleID) = @_;  
2489          # Get the specified role's name.          # Get the specified role's name.
2490          my ($retVal) = $self->GetEntityValues('Role', $roleID, ['Role(name)']);          my ($retVal) = $self->GetEntityValues('Role', $roleID, ['Role(name)']);
2491          # Use the ID if the role has no name.          # Use the ID if the role has no name.
# Line 1905  Line 2518 
2518  #: Return Type @;  #: Return Type @;
2519  sub RoleDiagrams {  sub RoleDiagrams {
2520          # Get the parameters.          # Get the parameters.
2521          my $self = shift @_;      my ($self, $roleID) = @_;
         my ($roleID) = @_;  
2522          # Query for the diagrams.          # Query for the diagrams.
2523          my @retVal = $self->GetFlat(['RoleOccursIn'], "RoleOccursIn(from-link) = ?", [$roleID],          my @retVal = $self->GetFlat(['RoleOccursIn'], "RoleOccursIn(from-link) = ?", [$roleID],
2524                                                                  'RoleOccursIn(to-link)');                                                                  'RoleOccursIn(to-link)');
# Line 1914  Line 2526 
2526          return @retVal;          return @retVal;
2527  }  }
2528    
2529    =head3 GetProperties
2530    
2531    C<< my @list = $sprout->GetProperties($fid, $key, $value, $url); >>
2532    
2533    Return a list of the properties with the specified characteristics.
2534    
2535    Properties are arbitrary key-value pairs associated with a feature. (At some point they
2536    will also be associated with genomes.) A property value is represented by a 4-tuple of
2537    the form B<($fid, $key, $value, $url)>. These exactly correspond to the parameter
2538    
2539    =over 4
2540    
2541    =item fid
2542    
2543    ID of the feature possessing the property.
2544    
2545    =item key
2546    
2547    Name or key of the property.
2548    
2549    =item value
2550    
2551    Value of the property.
2552    
2553    =item url
2554    
2555    URL of the document that indicated the property should have this particular value, or an
2556    empty string if no such document exists.
2557    
2558    =back
2559    
2560    The parameters act as a filter for the desired data. Any non-null parameter will
2561    automatically match all the tuples returned. So, specifying just the I<$fid> will
2562    return all the properties of the specified feature; similarly, specifying the I<$key>
2563    and I<$value> parameters will return all the features having the specified property
2564    value.
2565    
2566    A single property key can have many values, representing different ideas about the
2567    feature in question. For example, one paper may declare that a feature C<fig|83333.1.peg.10> is
2568    virulent, and another may declare that it is not virulent. A query about the virulence of
2569    C<fig|83333.1.peg.10> would be coded as
2570    
2571        my @list = $sprout->GetProperties('fig|83333.1.peg.10', 'virulence', '', '');
2572    
2573    Here the I<$value> and I<$url> fields are left blank, indicating that those fields are
2574    not to be filtered. The tuples returned would be
2575    
2576        ('fig|83333.1.peg.10', 'virulence', 'yes', 'http://www.somewhere.edu/first.paper.pdf')
2577        ('fig|83333.1.peg.10', 'virulence', 'no', 'http://www.somewhere.edu/second.paper.pdf')
2578    
2579    =cut
2580    #: Return Type @@;
2581    sub GetProperties {
2582        # Get the parameters.
2583        my ($self, @parms) = @_;
2584        # Declare the return variable.
2585        my @retVal = ();
2586        # Now we need to create a WHERE clause that will get us the data we want. First,
2587        # we create a list of the columns containing the data for each parameter.
2588        my @colNames = ('HasProperty(from-link)', 'Property(property-name)',
2589                        'Property(property-value)', 'HasProperty(evidence)');
2590        # Now we build the WHERE clause and the list of parameter values.
2591        my @where = ();
2592        my @values = ();
2593        for (my $i = 0; $i <= $#colNames; $i++) {
2594            my $parm = $parms[$i];
2595            if (defined $parm && ($parm ne '')) {
2596                push @where, "$colNames[$i] = ?";
2597                push @values, $parm;
2598            }
2599        }
2600        # Format the WHERE clause.
2601        my $filter = (@values > 0 ? (join " AND ", @where) : undef);
2602        # Ask for all the propertie values with the desired characteristics.
2603        my $query = $self->Get(['HasProperty', 'Property'], $filter, \@values);
2604        while (my $valueObject = $query->Fetch()) {
2605            my @tuple = $valueObject->Values(\@colNames);
2606            push @retVal, \@tuple;
2607        }
2608        # Return the result.
2609        return @retVal;
2610    }
2611    
2612  =head3 FeatureProperties  =head3 FeatureProperties
2613    
2614  C<< my @properties = $sprout->FeatureProperties($featureID); >>  C<< my @properties = $sprout->FeatureProperties($featureID); >>
# Line 1943  Line 2638 
2638  #: Return Type @@;  #: Return Type @@;
2639  sub FeatureProperties {  sub FeatureProperties {
2640          # Get the parameters.          # Get the parameters.
2641          my $self = shift @_;      my ($self, $featureID) = @_;
         my ($featureID) = @_;  
2642          # Get the properties.          # Get the properties.
2643          my @retVal = $self->GetAll(['HasProperty', 'Property'], "HasProperty(from-link) = ?", [$featureID],          my @retVal = $self->GetAll(['HasProperty', 'Property'], "HasProperty(from-link) = ?", [$featureID],
2644                                                          ['Property(property-name)', 'Property(property-value)',                                                          ['Property(property-name)', 'Property(property-value)',
# Line 1975  Line 2669 
2669  #: Return Type $;  #: Return Type $;
2670  sub DiagramName {  sub DiagramName {
2671          # Get the parameters.          # Get the parameters.
2672          my $self = shift @_;      my ($self, $diagramID) = @_;
         my ($diagramID) = @_;  
2673          # Get the specified diagram's name and return it.          # Get the specified diagram's name and return it.
2674          my ($retVal) = $self->GetEntityValues('Diagram', $diagramID, ['Diagram(name)']);          my ($retVal) = $self->GetEntityValues('Diagram', $diagramID, ['Diagram(name)']);
2675          return $retVal;          return $retVal;
2676  }  }
2677    
2678    =head3 PropertyID
2679    
2680    C<< my $id = $sprout->PropertyID($propName, $propValue); >>
2681    
2682    Return the ID of the specified property name and value pair, if the
2683    pair exists.
2684    
2685    =over 4
2686    
2687    =item propName
2688    
2689    Name of the desired property.
2690    
2691    =item propValue
2692    
2693    Value expected for the desired property.
2694    
2695    =item RETURN
2696    
2697    Returns the ID of the name/value pair, or C<undef> if the pair does not exist.
2698    
2699    =back
2700    
2701    =cut
2702    
2703    sub PropertyID {
2704        # Get the parameters.
2705        my ($self, $propName, $propValue) = @_;
2706        # Try to find the ID.
2707        my ($retVal) = $self->GetFlat(['Property'],
2708                                      "Property(property-name) = ? AND Property(property-value) = ?",
2709                                      [$propName, $propValue], 'Property(id)');
2710        # Return the result.
2711        return $retVal;
2712    }
2713    
2714  =head3 MergedAnnotations  =head3 MergedAnnotations
2715    
2716  C<< my @annotationList = $sprout->MergedAnnotations(\@list); >>  C<< my @annotationList = $sprout->MergedAnnotations(\@list); >>
# Line 2008  Line 2737 
2737  #: Return Type @;  #: Return Type @;
2738  sub MergedAnnotations {  sub MergedAnnotations {
2739          # Get the parameters.          # Get the parameters.
2740          my $self = shift @_;      my ($self, $list) = @_;
         my ($list) = @_;  
2741          # Create a list to hold the annotation tuples found.          # Create a list to hold the annotation tuples found.
2742          my @tuples = ();          my @tuples = ();
2743          # Loop through the features in the input list.          # Loop through the features in the input list.
# Line 2057  Line 2785 
2785  #: Return Type @;  #: Return Type @;
2786  sub RoleNeighbors {  sub RoleNeighbors {
2787          # Get the parameters.          # Get the parameters.
2788          my $self = shift @_;      my ($self, $roleID) = @_;
         my ($roleID) = @_;  
2789          # Get all the diagrams containing this role.          # Get all the diagrams containing this role.
2790          my @diagrams = $self->GetFlat(['RoleOccursIn'], "RoleOccursIn(from-link) = ?", [$roleID],          my @diagrams = $self->GetFlat(['RoleOccursIn'], "RoleOccursIn(from-link) = ?", [$roleID],
2791                                                                    'RoleOccursIn(to-link)');                                                                    'RoleOccursIn(to-link)');
# Line 2100  Line 2827 
2827  #: Return Type @;  #: Return Type @;
2828  sub FeatureLinks {  sub FeatureLinks {
2829          # Get the parameters.          # Get the parameters.
2830          my $self = shift @_;      my ($self, $featureID) = @_;
         my ($featureID) = @_;  
2831          # Get the feature's links.          # Get the feature's links.
2832          my @retVal = $self->GetEntityValues('Feature', $featureID, ['Feature(link)']);          my @retVal = $self->GetEntityValues('Feature', $featureID, ['Feature(link)']);
2833          # Return the feature's links.          # Return the feature's links.
# Line 2113  Line 2839 
2839  C<< my %subsystems = $sprout->SubsystemsOf($featureID); >>  C<< my %subsystems = $sprout->SubsystemsOf($featureID); >>
2840    
2841  Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped  Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped
2842  to the role the feature performs.  to the roles the feature performs.
2843    
2844  =over 4  =over 4
2845    
# Line 2123  Line 2849 
2849    
2850  =item RETURN  =item RETURN
2851    
2852  Returns a hash mapping all the feature's subsystems to the feature's role.  Returns a hash mapping all the feature's subsystems to a list of the feature's roles.
2853    
2854  =back  =back
2855    
2856  =cut  =cut
2857  #: Return Type %;  #: Return Type %@;
2858  sub SubsystemsOf {  sub SubsystemsOf {
2859          # Get the parameters.          # Get the parameters.
2860          my $self = shift @_;      my ($self, $featureID) = @_;
2861          my ($featureID) = @_;      # Get the subsystem list.
         # Use the SSCell to connect features to subsystems.  
2862          my @subsystems = $self->GetAll(['ContainsFeature', 'HasSSCell', 'IsRoleOf'],          my @subsystems = $self->GetAll(['ContainsFeature', 'HasSSCell', 'IsRoleOf'],
2863                                                                          "ContainsFeature(to-link) = ?", [$featureID],                                                                          "ContainsFeature(to-link) = ?", [$featureID],
2864                                                                          ['HasSSCell(from-link)', 'IsRoleOf(from-link)']);                                                                          ['HasSSCell(from-link)', 'IsRoleOf(from-link)']);
2865          # Create the return value.          # Create the return value.
2866          my %retVal = ();          my %retVal = ();
2867        # Build a hash to weed out duplicates. Sometimes the same PEG and role appears
2868        # in two spreadsheet cells.
2869        my %dupHash = ();
2870          # Loop through the results, adding them to the hash.          # Loop through the results, adding them to the hash.
2871          for my $record (@subsystems) {          for my $record (@subsystems) {
2872                  $retVal{$record->[0]} = $record->[1];          # Get this subsystem and role.
2873            my ($subsys, $role) = @{$record};
2874            # Insure it's the first time for both.
2875            my $dupKey = "$subsys\n$role";
2876            if (! exists $dupHash{"$subsys\n$role"}) {
2877                $dupHash{$dupKey} = 1;
2878                push @{$retVal{$subsys}}, $role;
2879            }
2880          }          }
2881          # Return the hash.          # Return the hash.
2882          return %retVal;          return %retVal;
2883  }  }
2884    
2885    =head3 SubsystemList
2886    
2887    C<< my @subsystems = $sprout->SubsystemList($featureID); >>
2888    
2889    Return a list containing the names of the subsystems in which the specified
2890    feature participates. Unlike L</SubsystemsOf>, this method only returns the
2891    subsystem names, not the roles.
2892    
2893    =over 4
2894    
2895    =item featureID
2896    
2897    ID of the feature whose subsystem names are desired.
2898    
2899    =item RETURN
2900    
2901    Returns a list of the names of the subsystems in which the feature participates.
2902    
2903    =back
2904    
2905    =cut
2906    #: Return Type @;
2907    sub SubsystemList {
2908        # Get the parameters.
2909        my ($self, $featureID) = @_;
2910        # Get the list of names.
2911        my @retVal = $self->GetFlat(['HasRoleInSubsystem'], "HasRoleInSubsystem(from-link) = ?",
2912                                    [$featureID], 'HasRoleInSubsystem(to-link)');
2913        # Return the result, sorted.
2914        return sort @retVal;
2915    }
2916    
2917    =head3 GenomeSubsystemData
2918    
2919    C<< my %featureData = $sprout->GenomeSubsystemData($genomeID); >>
2920    
2921    Return a hash mapping genome features to their subsystem roles.
2922    
2923    =over 4
2924    
2925    =item genomeID
2926    
2927    ID of the genome whose subsystem feature map is desired.
2928    
2929    =item RETURN
2930    
2931    Returns a hash mapping each feature of the genome to a list of 2-tuples. Eacb
2932    2-tuple contains a subsystem name followed by a role ID.
2933    
2934    =back
2935    
2936    =cut
2937    
2938    sub GenomeSubsystemData {
2939        # Get the parameters.
2940        my ($self, $genomeID) = @_;
2941        # Declare the return variable.
2942        my %retVal = ();
2943        # Get a list of the genome features that participate in subsystems. For each
2944        # feature we get its spreadsheet cells and the corresponding roles.
2945        my @roleData = $self->GetAll(['HasFeature', 'ContainsFeature', 'IsRoleOf'],
2946                                 "HasFeature(from-link) = ?", [$genomeID],
2947                                 ['HasFeature(to-link)', 'IsRoleOf(to-link)', 'IsRoleOf(from-link)']);
2948        # Now we get a list of the spreadsheet cells and their associated subsystems. Subsystems
2949        # with an unknown variant code (-1) are skipped. Note the genome ID is at both ends of the
2950        # list. We use it at the beginning to get all the spreadsheet cells for the genome and
2951        # again at the end to filter out participation in subsystems with a negative variant code.
2952        my @cellData = $self->GetAll(['IsGenomeOf', 'HasSSCell', 'ParticipatesIn'],
2953                                     "IsGenomeOf(from-link) = ? AND ParticipatesIn(variant-code) >= 0 AND ParticipatesIn(from-link) = ?",
2954                                     [$genomeID, $genomeID], ['HasSSCell(to-link)', 'HasSSCell(from-link)']);
2955        # Now "@roleData" lists the spreadsheet cell and role for each of the genome's features.
2956        # "@cellData" lists the subsystem name for each of the genome's spreadsheet cells. We
2957        # link these two lists together to create the result. First, we want a hash mapping
2958        # spreadsheet cells to subsystem names.
2959        my %subHash = map { $_->[0] => $_->[1] } @cellData;
2960        # We loop through @cellData to build the hash.
2961        for my $roleEntry (@roleData) {
2962            # Get the data for this feature and cell.
2963            my ($fid, $cellID, $role) = @{$roleEntry};
2964            # Check for a subsystem name.
2965            my $subsys = $subHash{$cellID};
2966            if ($subsys) {
2967                # Insure this feature has an entry in the return hash.
2968                if (! exists $retVal{$fid}) { $retVal{$fid} = []; }
2969                # Merge in this new data.
2970                push @{$retVal{$fid}}, [$subsys, $role];
2971            }
2972        }
2973        # Return the result.
2974        return %retVal;
2975    }
2976    
2977  =head3 RelatedFeatures  =head3 RelatedFeatures
2978    
2979  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>
# Line 2180  Line 3007 
3007  #: Return Type @;  #: Return Type @;
3008  sub RelatedFeatures {  sub RelatedFeatures {
3009          # Get the parameters.          # Get the parameters.
3010          my $self = shift @_;      my ($self, $featureID, $function, $userID) = @_;
         my ($featureID, $function, $userID) = @_;  
3011          # Get a list of the features that are BBHs of the incoming feature.          # Get a list of the features that are BBHs of the incoming feature.
3012          my @bbhFeatures = $self->GetFlat(['IsBidirectionalBestHitOf'],      my @bbhFeatures = map { $_->[0] } FIGRules::BBHData($featureID);
                                                                          "IsBidirectionalBestHitOf(from-link) = ?", [$featureID],  
                                                                          'IsBidirectionalBestHitOf(to-link)');  
3013          # Now we loop through the features, pulling out the ones that have the correct          # Now we loop through the features, pulling out the ones that have the correct
3014          # functional assignment.          # functional assignment.
3015          my @retVal = ();          my @retVal = ();
# Line 2229  Line 3053 
3053  #: Return Type @;  #: Return Type @;
3054  sub TaxonomySort {  sub TaxonomySort {
3055          # Get the parameters.          # Get the parameters.
3056          my $self = shift @_;      my ($self, $featureIDs) = @_;
         my ($featureIDs) = @_;  
3057          # Create the working hash table.          # Create the working hash table.
3058          my %hashBuffer = ();          my %hashBuffer = ();
3059          # Loop through the features.          # Loop through the features.
# Line 2239  Line 3062 
3062                  my ($taxonomy) = $self->GetFlat(['IsLocatedIn', 'HasContig', 'Genome'], "IsLocatedIn(from-link) = ?",                  my ($taxonomy) = $self->GetFlat(['IsLocatedIn', 'HasContig', 'Genome'], "IsLocatedIn(from-link) = ?",
3063                                                                                  [$fid], 'Genome(taxonomy)');                                                                                  [$fid], 'Genome(taxonomy)');
3064                  # Add this feature to the hash buffer.                  # Add this feature to the hash buffer.
3065                  if (exists $hashBuffer{$taxonomy}) {          Tracer::AddToListMap(\%hashBuffer, $taxonomy, $fid);
                         push @{$hashBuffer{$taxonomy}}, $fid;  
                 } else {  
                         $hashBuffer{$taxonomy} = [$fid];  
                 }  
3066          }          }
3067          # Sort the keys and get the elements.          # Sort the keys and get the elements.
3068          my @retVal = ();          my @retVal = ();
# Line 2254  Line 3073 
3073          return @retVal;          return @retVal;
3074  }  }
3075    
3076  =head3 GetAll  =head3 Protein
   
 C<< my @list = $sprout->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >>  
   
 Return a list of values taken from the objects returned by a query. The first three  
 parameters correspond to the parameters of the L</Get> method. The final parameter is  
 a list of the fields desired from each record found by the query. The field name  
 syntax is the standard syntax used for fields in the B<ERDB> system--  
 B<I<objectName>(I<fieldName>)>-- where I<objectName> is the name of the relevant entity  
 or relationship and I<fieldName> is the name of the field.  
3077    
3078  The list returned will be a list of lists. Each element of the list will contain  C<< my $protein = Sprout::Protein($sequence, $table); >>
 the values returned for the fields specified in the fourth parameter. If one of the  
 fields specified returns multiple values, they are flattened in with the rest. For  
 example, the following call will return a list of the features in a particular  
 spreadsheet cell, and each feature will be represented by a list containing the  
 feature ID followed by all of its aliases.  
3079    
3080  C<< $query = $sprout->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >>  Translate a DNA sequence into a protein sequence.
3081    
3082  =over 4  =over 4
3083    
3084  =item objectNames  =item sequence
   
 List containing the names of the entity and relationship objects to be retrieved.  
   
 =item filterClause  
   
 WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can  
 be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form  
 B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the  
 parameter list as additional parameters. The fields in a filter clause can come from primary  
 entity relations, relationship relations, or secondary entity relations; however, all of the  
 entities and relationships involved must be included in the list of object names.  
   
 =item parameterList  
   
 List of the parameters to be substituted in for the parameters marks in the filter clause.  
   
 =item fields  
3085    
3086  List of the fields to be returned in each element of the list returned.  DNA sequence to translate.
3087    
3088  =item count  =item table (optional)
3089    
3090  Maximum number of records to return. If omitted or 0, all available records will be returned.  Reference to a Hash that translates DNA triples to proteins. A triple that does not
3091    appear in the hash will be translated automatically to C<X>.
3092    
3093  =item RETURN  =item RETURN
3094    
3095  Returns a list of list references. Each element of the return list contains the values for the  Returns the protein sequence that would be created by the DNA sequence.
 fields specified in the B<fields> parameter.  
   
 =back  
   
 =cut  
 #: Return Type @@;  
 sub GetAll {  
         # Get the parameters.  
         my $self = shift @_;  
         my ($objectNames, $filterClause, $parameterList, $fields, $count) = @_;  
         # Create the query.  
         my $query = $self->Get($objectNames, $filterClause, $parameterList);  
         # Set up a counter of the number of records read.  
         my $fetched = 0;  
         # Insure the counter has a value.  
         if (!defined $count) {  
                 $count = 0;  
         }  
         # Loop through the records returned, extracting the fields. Note that if the  
         # counter is non-zero, we stop when the number of records read hits the count.  
         my @retVal = ();  
         while (($count == 0 || $fetched < $count) && (my $row = $query->Fetch())) {  
                 my @rowData = $row->Values($fields);  
                 push @retVal, \@rowData;  
                 $fetched++;  
         }  
         # Return the resulting list.  
         return @retVal;  
 }  
   
 =head3 GetFlat  
   
 C<< my @list = $sprout->GetFlat(\@objectNames, $filterClause, $parameterList, $field); >>  
   
 This is a variation of L</GetAll> that asks for only a single field per record and  
 returns a single flattened list.  
   
 =over 4  
   
 =item objectNames  
   
 List containing the names of the entity and relationship objects to be retrieved.  
   
 =item filterClause  
   
 WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can  
 be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form  
 B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the  
 parameter list as additional parameters. The fields in a filter clause can come from primary  
 entity relations, relationship relations, or secondary entity relations; however, all of the  
 entities and relationships involved must be included in the list of object names.  
   
 =item parameterList  
   
 List of the parameters to be substituted in for the parameters marks in the filter clause.  
   
 =item field  
   
 Name of the field to be used to get the elements of the list returned.  
   
 =item RETURN  
   
 Returns a list of values.  
   
 =back  
   
 =cut  
 #: Return Type @;  
 sub GetFlat {  
         # Get the parameters.  
         my $self = shift @_;  
         my ($objectNames, $filterClause, $parameterList, $field) = @_;  
         # Construct the query.  
         my $query = $self->Get($objectNames, $filterClause, $parameterList);  
         # Create the result list.  
         my @retVal = ();  
         # Loop through the records, adding the field values found to the result list.  
         while (my $row = $query->Fetch()) {  
                 push @retVal, $row->Value($field);  
         }  
         # Return the list created.  
         return @retVal;  
 }  
   
 =head3 Protein  
   
 C<< my $protein = Sprout::Protein($sequence, $table); >>  
   
 Translate a DNA sequence into a protein sequence.  
   
 =over 4  
   
 =item sequence  
   
 DNA sequence to translate.  
   
 =item table (optional)  
   
 Reference to a Hash that translates DNA triples to proteins. A triple that does not  
 appear in the hash will be translated automatically to C<X>.  
   
 =item RETURN  
   
 Returns the protein sequence that would be created by the DNA sequence.  
3096    
3097  =back  =back
3098    
# Line 2460  Line 3145 
3145          # Loop through the input triples.          # Loop through the input triples.
3146          my $n = length $sequence;          my $n = length $sequence;
3147          for (my $i = 0; $i < $n; $i += 3) {          for (my $i = 0; $i < $n; $i += 3) {
3148                  # Get the current triple from the sequence.          # Get the current triple from the sequence. Note we convert to
3149                  my $triple = substr($sequence, $i, 3);          # upper case to insure a match.
3150            my $triple = uc substr($sequence, $i, 3);
3151                  # Translate it using the table.                  # Translate it using the table.
3152                  my $protein = "X";                  my $protein = "X";
3153                  if (exists $table->{$triple}) { $protein = $table->{$triple}; }                  if (exists $table->{$triple}) { $protein = $table->{$triple}; }
# Line 2485  Line 3171 
3171  #: Return Type @;  #: Return Type @;
3172  sub LoadInfo {  sub LoadInfo {
3173          # Get the parameters.          # Get the parameters.
3174          my $self = shift @_;      my ($self) = @_;
3175          # Create the return list, priming it with the name of the data directory.          # Create the return list, priming it with the name of the data directory.
3176          my @retVal = ($self->{_options}->{dataDir});          my @retVal = ($self->{_options}->{dataDir});
3177          # Concatenate the table names.          # Concatenate the table names.
3178          push @retVal, $self->{_erdb}->GetTableNames();      push @retVal, $self->GetTableNames();
3179          # Return the result.          # Return the result.
3180          return @retVal;          return @retVal;
3181  }  }
3182    
3183    =head3 BBHMatrix
3184    
3185    C<< my %bbhMap = $sprout->BBHMatrix($genomeID, $cutoff, @targets); >>
3186    
3187    Find all the bidirectional best hits for the features of a genome in a
3188    specified list of target genomes. The return value will be a hash mapping
3189    features in the original genome to their bidirectional best hits in the
3190    target genomes.
3191    
3192    =over 4
3193    
3194    =item genomeID
3195    
3196    ID of the genome whose features are to be examined for bidirectional best hits.
3197    
3198    =item cutoff
3199    
3200    A cutoff value. Only hits with a score lower than the cutoff will be returned.
3201    
3202    =item targets
3203    
3204    List of target genomes. Only pairs originating in the original
3205    genome and landing in one of the target genomes will be returned.
3206    
3207    =item RETURN
3208    
3209    Returns a hash mapping each feature in the original genome to a hash mapping its
3210    BBH pegs in the target genomes to their scores.
3211    
3212    =back
3213    
3214    =cut
3215    
3216    sub BBHMatrix {
3217        # Get the parameters.
3218        my ($self, $genomeID, $cutoff, @targets) = @_;
3219        # Declare the return variable.
3220        my %retVal = ();
3221        # Ask for the BBHs.
3222        my @bbhList = FIGRules::BatchBBHs("fig|$genomeID.%", $cutoff, @targets);
3223        # We now have a set of 4-tuples that we need to convert into a hash of hashes.
3224        for my $bbhData (@bbhList) {
3225            my ($peg1, $peg2, $score) = @{$bbhData};
3226            if (! exists $retVal{$peg1}) {
3227                $retVal{$peg1} = { $peg2 => $score };
3228            } else {
3229                $retVal{$peg1}->{$peg2} = $score;
3230            }
3231        }
3232        # Return the result.
3233        return %retVal;
3234    }
3235    
3236    
3237    =head3 SimMatrix
3238    
3239    C<< my %simMap = $sprout->SimMatrix($genomeID, $cutoff, @targets); >>
3240    
3241    Find all the similarities for the features of a genome in a
3242    specified list of target genomes. The return value will be a hash mapping
3243    features in the original genome to their similarites in the
3244    target genomes.
3245    
3246    =over 4
3247    
3248    =item genomeID
3249    
3250    ID of the genome whose features are to be examined for similarities.
3251    
3252    =item cutoff
3253    
3254    A cutoff value. Only hits with a score lower than the cutoff will be returned.
3255    
3256    =item targets
3257    
3258    List of target genomes. Only pairs originating in the original
3259    genome and landing in one of the target genomes will be returned.
3260    
3261    =item RETURN
3262    
3263    Returns a hash mapping each feature in the original genome to a hash mapping its
3264    similar pegs in the target genomes to their scores.
3265    
3266    =back
3267    
3268    =cut
3269    
3270    sub SimMatrix {
3271        # Get the parameters.
3272        my ($self, $genomeID, $cutoff, @targets) = @_;
3273        # Declare the return variable.
3274        my %retVal = ();
3275        # Get the list of features in the source organism.
3276        my @fids = $self->FeaturesOf($genomeID);
3277        # Ask for the sims. We only want similarities to fig features.
3278        my $simList = FIGRules::GetNetworkSims($self, \@fids, {}, 1000, $cutoff, "fig");
3279        if (! defined $simList) {
3280            Confess("Unable to retrieve similarities from server.");
3281        } else {
3282            Trace("Processing sims.") if T(3);
3283            # We now have a set of sims that we need to convert into a hash of hashes. First, we
3284            # Create a hash for the target genomes.
3285            my %targetHash = map { $_ => 1 } @targets;
3286            for my $simData (@{$simList}) {
3287                # Get the PEGs and the score.
3288                my ($peg1, $peg2, $score) = ($simData->id1, $simData->id2, $simData->psc);
3289                # Insure the second ID is in the target list.
3290                my ($genome2) = FIGRules::ParseFeatureID($peg2);
3291                if (exists $targetHash{$genome2}) {
3292                    # Here it is. Now we need to add it to the return hash. How we do that depends
3293                    # on whether or not $peg1 is new to us.
3294                    if (! exists $retVal{$peg1}) {
3295                        $retVal{$peg1} = { $peg2 => $score };
3296                    } else {
3297                        $retVal{$peg1}->{$peg2} = $score;
3298                    }
3299                }
3300            }
3301        }
3302        # Return the result.
3303        return %retVal;
3304    }
3305    
3306    
3307  =head3 LowBBHs  =head3 LowBBHs
3308    
3309  C<< my %bbhMap = $sprout->GoodBBHs($featureID, $cutoff); >>  C<< my %bbhMap = $sprout->LowBBHs($featureID, $cutoff); >>
3310    
3311  Return the bidirectional best hits of a feature whose score is no greater than a  Return the bidirectional best hits of a feature whose score is no greater than a
3312  specified cutoff value. A higher cutoff value will allow inclusion of hits with  specified cutoff value. A higher cutoff value will allow inclusion of hits with
# Line 2522  Line 3332 
3332  #: Return Type %;  #: Return Type %;
3333  sub LowBBHs {  sub LowBBHs {
3334          # Get the parsameters.          # Get the parsameters.
3335          my $self = shift @_;      my ($self, $featureID, $cutoff) = @_;
         my ($featureID, $cutoff) = @_;  
3336          # Create the return hash.          # Create the return hash.
3337          my %retVal = ();          my %retVal = ();
3338          # Create a query to get the desired BBHs.      # Query for the desired BBHs.
3339          my @bbhList = $self->GetAll(['IsBidirectionalBestHitOf'],      my @bbhList = FIGRules::BBHData($featureID, $cutoff);
                                                                 'IsBidirectionalBestHitOf(sc) <= ? AND IsBidirectionalBestHitOf(from-link) = ?',  
                                                                 [$cutoff, $featureID],  
                                                                 ['IsBidirectionalBestHitOf(to-link)', 'IsBidirectionalBestHitOf(sc)']);  
3340          # Form the results into the return hash.          # Form the results into the return hash.
3341          for my $pair (@bbhList) {          for my $pair (@bbhList) {
3342                  $retVal{$pair->[0]} = $pair->[1];          my $fid = $pair->[0];
3343            if ($self->Exists('Feature', $fid)) {
3344                $retVal{$fid} = $pair->[1];
3345            }
3346        }
3347        # Return the result.
3348        return %retVal;
3349    }
3350    
3351    =head3 Sims
3352    
3353    C<< my $simList = $sprout->Sims($fid, $maxN, $maxP, $select, $max_expand, $filters); >>
3354    
3355    Get a list of similarities for a specified feature. Similarity information is not kept in the
3356    Sprout database; rather, they are retrieved from a network server. The similarities are
3357    returned as B<Sim> objects. A Sim object is actually a list reference that has been blessed
3358    so that its elements can be accessed by name.
3359    
3360    Similarities can be either raw or expanded. The raw similarities are basic
3361    hits between features with similar DNA. Expanding a raw similarity drags in any
3362    features considered substantially identical. So, for example, if features B<A1>,
3363    B<A2>, and B<A3> are all substantially identical to B<A>, then a raw similarity
3364    B<[C,A]> would be expanded to B<[C,A] [C,A1] [C,A2] [C,A3]>.
3365    
3366    =over 4
3367    
3368    =item fid
3369    
3370    ID of the feature whose similarities are desired.
3371    
3372    =item maxN
3373    
3374    Maximum number of similarities to return.
3375    
3376    =item maxP
3377    
3378    Minumum allowable similarity score.
3379    
3380    =item select
3381    
3382    Selection criterion: C<raw> means only raw similarities are returned; C<fig>
3383    means only similarities to FIG features are returned; C<all> means all expanded
3384    similarities are returned; and C<figx> means similarities are expanded until the
3385    number of FIG features equals the maximum.
3386    
3387    =item max_expand
3388    
3389    The maximum number of features to expand.
3390    
3391    =item filters
3392    
3393    Reference to a hash containing filter information, or a subroutine that can be
3394    used to filter the sims.
3395    
3396    =item RETURN
3397    
3398    Returns a reference to a list of similarity objects, or C<undef> if an error
3399    occurred.
3400    
3401    =back
3402    
3403    =cut
3404    
3405    sub Sims {
3406        # Get the parameters.
3407        my ($self, $fid, $maxN, $maxP, $select, $max_expand, $filters) = @_;
3408        # Create the shim object to test for deleted FIDs.
3409        my $shim = FidCheck->new($self);
3410        # Ask the network for sims.
3411        my $retVal = FIGRules::GetNetworkSims($shim, $fid, {}, $maxN, $maxP, $select, $max_expand, $filters);
3412        # Return the result.
3413        return $retVal;
3414    }
3415    
3416    =head3 IsAllGenomes
3417    
3418    C<< my $flag = $sprout->IsAllGenomes(\@list, \@checkList); >>
3419    
3420    Return TRUE if all genomes in the second list are represented in the first list at
3421    least one. Otherwise, return FALSE. If the second list is omitted, the first list is
3422    compared to a list of all the genomes.
3423    
3424    =over 4
3425    
3426    =item list
3427    
3428    Reference to the list to be compared to the second list.
3429    
3430    =item checkList (optional)
3431    
3432    Reference to the comparison target list. Every genome ID in this list must occur at
3433    least once in the first list. If this parameter is omitted, a list of all the genomes
3434    is used.
3435    
3436    =item RETURN
3437    
3438    Returns TRUE if every item in the second list appears at least once in the
3439    first list, else FALSE.
3440    
3441    =back
3442    
3443    =cut
3444    
3445    sub IsAllGenomes {
3446        # Get the parameters.
3447        my ($self, $list, $checkList) = @_;
3448        # Supply the checklist if it was omitted.
3449        $checkList = [$self->Genomes()] if ! defined($checkList);
3450        # Create a hash of the original list.
3451        my %testList = map { $_ => 1 } @{$list};
3452        # Declare the return variable. We assume that the representation
3453        # is complete and stop at the first failure.
3454        my $retVal = 1;
3455        my $n = scalar @{$checkList};
3456        for (my $i = 0; $retVal && $i < $n; $i++) {
3457            if (! $testList{$checkList->[$i]}) {
3458                $retVal = 0;
3459            }
3460        }
3461        # Return the result.
3462        return $retVal;
3463    }
3464    
3465    =head3 GetGroups
3466    
3467    C<< my %groups = $sprout->GetGroups(\@groupList); >>
3468    
3469    Return a hash mapping each group to the IDs of the genomes in the group.
3470    A list of groups may be specified, in which case only those groups will be
3471    shown. Alternatively, if no parameter is supplied, all groups will be
3472    included. Genomes that are not in any group are omitted.
3473    
3474    =cut
3475    #: Return Type %@;
3476    sub GetGroups {
3477        # Get the parameters.
3478        my ($self, $groupList) = @_;
3479        # Declare the return value.
3480        my %retVal = ();
3481        # Determine whether we are getting all the groups or just some.
3482        if (defined $groupList) {
3483            # Here we have a group list. Loop through them individually,
3484            # getting a list of the relevant genomes.
3485            for my $group (@{$groupList}) {
3486                my @genomeIDs = $self->GetFlat(['Genome'], "Genome(primary-group) = ?",
3487                    [$group], "Genome(id)");
3488                $retVal{$group} = \@genomeIDs;
3489            }
3490        } else {
3491            # Here we need all of the groups. In this case, we run through all
3492            # of the genome records, putting each one found into the appropriate
3493            # group. Note that we use a filter clause to insure that only genomes
3494            # in real NMPDR groups are included in the return set.
3495            my @genomes = $self->GetAll(['Genome'], "Genome(primary-group) <> ?",
3496                                        [$FIG_Config::otherGroup], ['Genome(id)', 'Genome(primary-group)']);
3497            # Loop through the genomes found.
3498            for my $genome (@genomes) {
3499                # Pop this genome's ID off the current list.
3500                my @groups = @{$genome};
3501                my $genomeID = shift @groups;
3502                # Loop through the groups, adding the genome ID to each group's
3503                # list.
3504                for my $group (@groups) {
3505                    Tracer::AddToListMap(\%retVal, $group, $genomeID);
3506                }
3507            }
3508        }
3509        # Return the hash we just built.
3510        return %retVal;
3511    }
3512    
3513    =head3 MyGenomes
3514    
3515    C<< my @genomes = Sprout::MyGenomes($dataDir); >>
3516    
3517    Return a list of the genomes to be included in the Sprout.
3518    
3519    This method is provided for use during the Sprout load. It presumes the Genome load file has
3520    already been created. (It will be in the Sprout data directory and called either C<Genome>
3521    or C<Genome.dtx>.) Essentially, it reads in the Genome load file and strips out the genome
3522    IDs.
3523    
3524    =over 4
3525    
3526    =item dataDir
3527    
3528    Directory containing the Sprout load files.
3529    
3530    =back
3531    
3532    =cut
3533    #: Return Type @;
3534    sub MyGenomes {
3535        # Get the parameters.
3536        my ($dataDir) = @_;
3537        # Compute the genome file name.
3538        my $genomeFileName = LoadFileName($dataDir, "Genome");
3539        # Extract the genome IDs from the files.
3540        my @retVal = map { $_ =~ /^(\S+)/; $1 } Tracer::GetFile($genomeFileName);
3541        # Return the result.
3542        return @retVal;
3543    }
3544    
3545    =head3 LoadFileName
3546    
3547    C<< my $fileName = Sprout::LoadFileName($dataDir, $tableName); >>
3548    
3549    Return the name of the load file for the specified table in the specified data
3550    directory.
3551    
3552    =over 4
3553    
3554    =item dataDir
3555    
3556    Directory containing the Sprout load files.
3557    
3558    =item tableName
3559    
3560    Name of the table whose load file is desired.
3561    
3562    =item RETURN
3563    
3564    Returns the name of the file containing the load data for the specified table, or
3565    C<undef> if no load file is present.
3566    
3567    =back
3568    
3569    =cut
3570    #: Return Type $;
3571    sub LoadFileName {
3572        # Get the parameters.
3573        my ($dataDir, $tableName) = @_;
3574        # Declare the return variable.
3575        my $retVal;
3576        # Check for the various file names.
3577        if (-e "$dataDir/$tableName") {
3578            $retVal = "$dataDir/$tableName";
3579        } elsif (-e "$dataDir/$tableName.dtx") {
3580            $retVal = "$dataDir/$tableName.dtx";
3581        }
3582        # Return the result.
3583        return $retVal;
3584    }
3585    
3586    =head3 DeleteGenome
3587    
3588    C<< my $stats = $sprout->DeleteGenome($genomeID, $testFlag); >>
3589    
3590    Delete a genome from the database.
3591    
3592    =over 4
3593    
3594    =item genomeID
3595    
3596    ID of the genome to delete
3597    
3598    =item testFlag
3599    
3600    If TRUE, then the DELETE statements will be traced, but no deletions will occur.
3601    
3602    =item RETURN
3603    
3604    Returns a statistics object describing the rows deleted.
3605    
3606    =back
3607    
3608    =cut
3609    #: Return Type $%;
3610    sub DeleteGenome {
3611        # Get the parameters.
3612        my ($self, $genomeID, $testFlag) = @_;
3613        # Perform the delete for the genome's features.
3614        my $retVal = $self->Delete('Feature', "fig|$genomeID.%", testMode => $testFlag);
3615        # Perform the delete for the primary genome data.
3616        my $stats = $self->Delete('Genome', $genomeID, testMode => $testFlag);
3617        $retVal->Accumulate($stats);
3618        # Return the result.
3619        return $retVal;
3620    }
3621    
3622    =head3 Fix
3623    
3624    C<< my %fixedHash = Sprout::Fix(%groupHash); >>
3625    
3626    Prepare a genome group hash (like that returned by L</GetGroups> for processing.
3627    Groups with the same primary name will be combined. The primary name is the
3628    first capitalized word in the group name.
3629    
3630    =over 4
3631    
3632    =item groupHash
3633    
3634    Hash to be fixed up.
3635    
3636    =item RETURN
3637    
3638    Returns a fixed-up version of the hash.
3639    
3640    =back
3641    
3642    =cut
3643    
3644    sub Fix {
3645        # Get the parameters.
3646        my (%groupHash) = @_;
3647        # Create the result hash.
3648        my %retVal = ();
3649        # Copy over the genomes.
3650        for my $groupID (keys %groupHash) {
3651            # Make a safety copy of the group ID.
3652            my $realGroupID = $groupID;
3653            # Yank the primary name.
3654            if ($groupID =~ /([A-Z]\w+)/) {
3655                $realGroupID = $1;
3656            }
3657            # Append this group's genomes into the result hash.
3658            Tracer::AddToListMap(\%retVal, $realGroupID, @{$groupHash{$groupID}});
3659        }
3660        # Return the result hash.
3661        return %retVal;
3662    }
3663    
3664    =head3 GroupPageName
3665    
3666    C<< my $name = $sprout->GroupPageName($group); >>
3667    
3668    Return the name of the page for the specified NMPDR group.
3669    
3670    =over 4
3671    
3672    =item group
3673    
3674    Name of the relevant group.
3675    
3676    =item RETURN
3677    
3678    Returns the relative page name (e.g. C<../content/campy.php>). If the group file is not in
3679    memory it will be read in.
3680    
3681    =back
3682    
3683    =cut
3684    
3685    sub GroupPageName {
3686        # Get the parameters.
3687        my ($self, $group) = @_;
3688        # Declare the return variable.
3689        my $retVal;
3690        # Check for the group file data.
3691        if (! defined $self->{groupHash}) {
3692            # Read the group file.
3693            my %groupData = Sprout::ReadGroupFile($self->{_options}->{dataDir} . "/groups.tbl");
3694            # Store it in our object.
3695            $self->{groupHash} = \%groupData;
3696        }
3697        # Compute the real group name.
3698        my $realGroup = $group;
3699        if ($group =~ /([A-Z]\w+)/) {
3700            $realGroup = $1;
3701        }
3702        # Return the page name.
3703        $retVal = "../content/" . $self->{groupHash}->{$realGroup}->[1];
3704        # Return the result.
3705        return $retVal;
3706    }
3707    
3708    =head3 ReadGroupFile
3709    
3710    C<< my %groupData = Sprout::ReadGroupFile($groupFileName); >>
3711    
3712    Read in the data from the specified group file. The group file contains information
3713    about each of the NMPDR groups.
3714    
3715    =over 4
3716    
3717    =item name
3718    
3719    Name of the group.
3720    
3721    =item page
3722    
3723    Name of the group's page on the web site (e.g. C<campy.php> for
3724    Campylobacter)
3725    
3726    =item genus
3727    
3728    Genus of the group
3729    
3730    =item species
3731    
3732    Species of the group, or an empty string if the group is for an entire
3733    genus. If the group contains more than one species, the species names
3734    should be separated by commas.
3735    
3736    =back
3737    
3738    The parameters to this method are as follows
3739    
3740    =over 4
3741    
3742    =item groupFile
3743    
3744    Name of the file containing the group data.
3745    
3746    =item RETURN
3747    
3748    Returns a hash keyed on group name. The value of each hash
3749    
3750    =back
3751    
3752    =cut
3753    
3754    sub ReadGroupFile {
3755        # Get the parameters.
3756        my ($groupFileName) = @_;
3757        # Declare the return variable.
3758        my %retVal;
3759        # Read the group file.
3760        my @groupLines = Tracer::GetFile($groupFileName);
3761        for my $groupLine (@groupLines) {
3762            my ($name, $page, $genus, $species) = split(/\t/, $groupLine);
3763            $retVal{$name} = [$page, $genus, $species];
3764          }          }
3765          # Return the result.          # Return the result.
3766          return %retVal;          return %retVal;
3767  }  }
3768    
3769    =head3 AddProperty
3770    
3771    C<< my  = $sprout->AddProperty($featureID, $key, $value, $url); >>
3772    
3773    Add a new attribute value (Property) to a feature. In the SEED system, attributes can
3774    be added to almost any object. In Sprout, they can only be added to features. In
3775    Sprout, attributes are implemented using I<properties>. A property represents a key/value
3776    pair. If the particular key/value pair coming in is not already in the database, a new
3777    B<Property> record is created to hold it.
3778    
3779    =over 4
3780    
3781    =item peg
3782    
3783    ID of the feature to which the attribute is to be replied.
3784    
3785    =item key
3786    
3787    Name of the attribute (key).
3788    
3789    =item value
3790    
3791    Value of the attribute.
3792    
3793    =item url
3794    
3795    URL or text citation from which the property was obtained.
3796    
3797    =back
3798    
3799    =cut
3800    #: Return Type ;
3801    sub AddProperty {
3802        # Get the parameters.
3803        my ($self, $featureID, $key, $value, $url) = @_;
3804        # Declare the variable to hold the desired property ID.
3805        my $propID;
3806        # Attempt to find a property record for this key/value pair.
3807        my @properties = $self->GetFlat(['Property'],
3808                                       "Property(property-name) = ? AND Property(property-value) = ?",
3809                                       [$key, $value], 'Property(id)');
3810        if (@properties) {
3811            # Here the property is already in the database. We save its ID.
3812            $propID = $properties[0];
3813        } else {
3814            # Here the property value does not exist. We need to generate an ID. It will be set
3815            # to a number one greater than the maximum value in the database. This call to
3816            # GetAll will stop after one record.
3817            my @maxProperty = $self->GetAll(['Property'], "ORDER BY Property(id) DESC", [], ['Property(id)'],
3818                                            1);
3819            $propID = $maxProperty[0]->[0] + 1;
3820            # Insert the new property value.
3821            $self->Insert('Property', { 'property-name' => $key, 'property-value' => $value, id => $propID });
3822        }
3823        # Now we connect the incoming feature to the property.
3824        $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });
3825    }
3826    
3827    =head2 Virtual Methods
3828    
3829    =head3 CleanKeywords
3830    
3831    C<< my $cleanedString = $sprout->CleanKeywords($searchExpression); >>
3832    
3833    Clean up a search expression or keyword list. This involves converting the periods
3834    in EC numbers to underscores, converting non-leading minus signs to underscores,
3835    a vertical bar or colon to an apostrophe, and forcing lower case for all alphabetic
3836    characters. In addition, any extra spaces are removed.
3837    
3838    =over 4
3839    
3840    =item searchExpression
3841    
3842    Search expression or keyword list to clean. Note that a search expression may
3843    contain boolean operators which need to be preserved. This includes leading
3844    minus signs.
3845    
3846    =item RETURN
3847    
3848    Cleaned expression or keyword list.
3849    
3850    =back
3851    
3852    =cut
3853    
3854    sub CleanKeywords {
3855        # Get the parameters.
3856        my ($self, $searchExpression) = @_;
3857        # Perform the standard cleanup.
3858        my $retVal = $self->ERDB::CleanKeywords($searchExpression);
3859        # Fix the periods in EC and TC numbers.
3860        $retVal =~ s/(\d+|\-)\.(\d+|-)\.(\d+|-)\.(\d+|-)/$1_$2_$3_$4/g;
3861        # Fix non-trailing periods.
3862        $retVal =~ s/\.(\w)/_$1/g;
3863        # Fix non-leading minus signs.
3864        $retVal =~ s/(\w)[\-]/$1_/g;
3865        # Fix the vertical bars and colons
3866        $retVal =~ s/(\w)[|:](\w)/$1'$2/g;
3867        # Return the result.
3868        return $retVal;
3869    }
3870    
3871  =head2 Internal Utility Methods  =head2 Internal Utility Methods
3872    
3873  =head3 ParseAssignment  =head3 ParseAssignment
3874    
3875  Parse annotation text to determine whether or not it is a functional assignment. If it is,  Parse annotation text to determine whether or not it is a functional assignment. If it is,
3876  the user and function text will be returned as a 2-element list. If it isn't, an empty list  the user, function text, and assigning user will be returned as a 3-element list. If it
3877  will be returned.  isn't, an empty list will be returned.
3878    
3879    A functional assignment is always of the form
3880    
3881        C<set >I<YYYY>C< function to\n>I<ZZZZZ>
3882    
3883    where I<YYYY> is the B<user>, and I<ZZZZ> is the actual functional role. In most cases,
3884    the user and the assigning user (from MadeAnnotation) will be the same, but that is
3885    not always the case.
3886    
3887    In addition, the functional role may contain extra data that is stripped, such as
3888    terminating spaces or a comment separated from the rest of the text by a tab.
3889    
3890  This is a static method.  This is a static method.
3891    
3892  =over 4  =over 4
3893    
3894    =item user
3895    
3896    Name of the assigning user.
3897    
3898  =item text  =item text
3899    
3900  Text of the annotation.  Text of the annotation.
# Line 2564  Line 3908 
3908    
3909  =cut  =cut
3910    
3911  sub ParseAssignment {  sub _ParseAssignment {
3912          # Get the parameters.          # Get the parameters.
3913          my ($text) = @_;      my ($user, $text) = @_;
3914          # Declare the return value.          # Declare the return value.
3915          my @retVal = ();          my @retVal = ();
3916          # Check to see if this is a functional assignment.          # Check to see if this is a functional assignment.
3917          my ($user, $type, $function) = split(/\n/, $text);      my ($type, $function) = split(/\n/, $text);
3918          if ($type =~ m/^set $user function to$/i) {      if ($type =~ m/^set function to$/i) {
3919                  # Here it is, so we return the user name and function text.          # Here we have an assignment without a user, so we use the incoming user ID.
3920                  @retVal = ($user, $function);                  @retVal = ($user, $function);
3921        } elsif ($type =~ m/^set (\S+) function to$/i) {
3922            # Here we have an assignment with a user that is passed back to the caller.
3923            @retVal = ($1, $function);
3924        }
3925        # If we have an assignment, we need to clean the function text. There may be
3926        # extra junk at the end added as a note from the user.
3927        if (defined( $retVal[1] )) {
3928            $retVal[1] =~ s/(\t\S)?\s*$//;
3929          }          }
3930          # Return the result list.          # Return the result list.
3931          return @retVal;          return @retVal;
# Line 2601  Line 3953 
3953    
3954  sub FriendlyTimestamp {  sub FriendlyTimestamp {
3955      my ($timeValue) = @_;      my ($timeValue) = @_;
3956      my $retVal = strftime("%a %b %e %H:%M:%S %Y", localtime($timeValue));      my $retVal = localtime($timeValue);
3957      return $retVal;      return $retVal;
3958  }  }
3959    
3960    
3961  1;  1;

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.98

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3