[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5, Tue Jan 25 03:02:07 2005 UTC revision 1.70, Fri Jun 23 19:08:58 2006 UTC
# Line 1  Line 1 
1  package Sprout;  package Sprout;
2    
3        require Exporter;
4        use ERDB;
5        @ISA = qw(Exporter ERDB);
6          use Data::Dumper;          use Data::Dumper;
7          use strict;          use strict;
8          use Carp;          use Carp;
# Line 7  Line 10 
10          use XML::Simple;          use XML::Simple;
11          use DBQuery;          use DBQuery;
12          use DBObject;          use DBObject;
         use ERDB;  
13          use Tracer;          use Tracer;
14          use FIGRules;          use FIGRules;
15          use Stats;          use Stats;
# Line 32  Line 34 
34  query tasks. For example, L</genomes> lists the IDs of all the genomes in the database and  query tasks. For example, L</genomes> lists the IDs of all the genomes in the database and
35  L</dna_seq> returns the DNA sequence for a specified genome location.  L</dna_seq> returns the DNA sequence for a specified genome location.
36    
37    The Sprout object is a subclass of the ERDB object and inherits all its properties and methods.
38    
39  =cut  =cut
40    
41  #: Constructor SFXlate->new_sprout_only();  #: Constructor SFXlate->new_sprout_only();
42    
   
43  =head2 Public Methods  =head2 Public Methods
44    
45  =head3 new  =head3 new
# Line 63  Line 66 
66    
67  * B<xmlFileName> name of the XML file containing the database definition (default C<SproutDBD.xml>)  * B<xmlFileName> name of the XML file containing the database definition (default C<SproutDBD.xml>)
68    
69  * B<userData> user name and password, delimited by a slash (default C<root/>)  * B<userData> user name and password, delimited by a slash (default same as SEED)
70    
71  * B<port> connection port (default C<0>)  * B<port> connection port (default C<0>)
72    
73    * B<sock> connection socket (default same as SEED)
74    
75  * B<maxSegmentLength> maximum number of residues per feature segment, (default C<4500>)  * B<maxSegmentLength> maximum number of residues per feature segment, (default C<4500>)
76    
77  * B<maxSequenceLength> maximum number of residues per sequence, (default C<8000>)  * B<maxSequenceLength> maximum number of residues per sequence, (default C<8000>)
78    
79    * B<noDBOpen> suppresses the connection to the database if TRUE, else FALSE
80    
81  =back  =back
82    
83  For example, the following constructor call specifies a database named I<Sprout> and a user name of  For example, the following constructor call specifies a database named I<Sprout> and a user name of
# Line 87  Line 94 
94          # Compute the options. We do this by starting with a table of defaults and overwriting with          # Compute the options. We do this by starting with a table of defaults and overwriting with
95          # the incoming data.          # the incoming data.
96          my $optionTable = Tracer::GetOptions({          my $optionTable = Tracer::GetOptions({
97                                             dbType               => 'mysql',                     # database type                         dbType       => $FIG_Config::dbms,
98                                             dataDir              => 'Data',                      # data file directory                                                          # database type
99                                             xmlFileName  => 'SproutDBD.xml', # database definition file name                         dataDir      => $FIG_Config::sproutData,
100                                             userData             => 'root/',                     # user name and password                                                          # data file directory
101                                             port                 => 0,                           # database connection port                         xmlFileName  => "$FIG_Config::fig/SproutDBD.xml",
102                                                            # database definition file name
103                           userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",
104                                                            # user name and password
105                           port         => $FIG_Config::dbport,
106                                                            # database connection port
107                           sock         => $FIG_Config::dbsock,
108                                             maxSegmentLength => 4500,            # maximum feature segment length                                             maxSegmentLength => 4500,            # maximum feature segment length
109                                             maxSequenceLength => 8000,           # maximum contig sequence length                                             maxSequenceLength => 8000,           # maximum contig sequence length
110                           noDBOpen     => 0,               # 1 to suppress the database open
111                                            }, $options);                                            }, $options);
112          # Get the data directory.          # Get the data directory.
113          my $dataDir = $optionTable->{dataDir};          my $dataDir = $optionTable->{dataDir};
# Line 101  Line 115 
115          $optionTable->{userData} =~ m!([^/]*)/(.*)$!;          $optionTable->{userData} =~ m!([^/]*)/(.*)$!;
116          my ($userName, $password) = ($1, $2);          my ($userName, $password) = ($1, $2);
117          # Connect to the database.          # Connect to the database.
118          my $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName, $password, $optionTable->{port});      my $dbh;
119        if (! $optionTable->{noDBOpen}) {
120            $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,
121                                    $password, $optionTable->{port}, undef, $optionTable->{sock});
122        }
123          # Create the ERDB object.          # Create the ERDB object.
124          my $xmlFileName = "$optionTable->{xmlFileName}";          my $xmlFileName = "$optionTable->{xmlFileName}";
125          my $erdb = ERDB->new($dbh, $xmlFileName);      my $retVal = ERDB::new($class, $dbh, $xmlFileName);
126          # Create this object.      # Add the option table and XML file name.
127          my $self = { _erdb => $erdb, _options => $optionTable, _xmlName => $xmlFileName };      $retVal->{_options} = $optionTable;
128          # Bless and return it.      $retVal->{_xmlName} = $xmlFileName;
129          bless $self;      # Return it.
130          return $self;      return $retVal;
131  }  }
132    
133  =head3 MaxSegment  =head3 MaxSegment
# Line 125  Line 143 
143  =cut  =cut
144  #: Return Type $;  #: Return Type $;
145  sub MaxSegment {  sub MaxSegment {
146          my $self = shift @_;      my ($self) = @_;
147          return $self->{_options}->{maxSegmentLength};          return $self->{_options}->{maxSegmentLength};
148  }  }
149    
# Line 140  Line 158 
158  =cut  =cut
159  #: Return Type $;  #: Return Type $;
160  sub MaxSequence {  sub MaxSequence {
161          my $self = shift @_;      my ($self) = @_;
162          return $self->{_options}->{maxSequenceLength};          return $self->{_options}->{maxSequenceLength};
163  }  }
164    
165  =head3 Get  =head3 Load
   
 C<< my $query = $sprout->Get(\@objectNames, $filterClause, \@parameterList); >>  
   
 This method allows a general query against the Sprout data using a specified filter clause.  
   
 The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each  
 field name represented in the form B<I<objectName>(I<fieldName>)>. For example, the  
 following call requests all B<Genome> objects for the genus specified in the variable  
 $genus.  
   
 C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ?", [$genus]); >>  
   
 The WHERE clause contains a single question mark, so there is a single additional  
 parameter representing the parameter value. It would also be possible to code  
   
 C<< $query = $sprout->Get(['Genome'], "Genome(genus) = \'$genus\'"); >>  
   
 however, this version of the call would generate a syntax error if there were any quote  
 characters inside the variable C<$genus>.  
   
 The use of the strange parenthesized notation for field names enables us to distinguish  
 hyphens contained within field names from minus signs that participate in the computation  
 of the WHERE clause. All of the methods that manipulate fields will use this same notation.  
   
 It is possible to specify multiple entity and relationship names in order to retrieve more than  
 one object's data at the same time, which allows highly complex joined queries. For example,  
   
 C<< $query = $sprout->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", [$genus]); >>  
   
 This query returns all the genomes for a particular genus and allows access to the  
 sources from which they came. The join clauses to go from Genome to Source are generated  
 automatically.  
   
 Finally, the filter clause can contain sort information. To do this, simply put an C<ORDER BY>  
 clause at the end of the filter. Field references in the ORDER BY section follow the same rules  
 as they do in the filter itself; in other words, each one must be of the form B<I<objectName>(I<fieldName>)>.  
 For example, the following filter string gets all genomes for a particular genus and sorts  
 them by species name.  
   
 C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ? ORDER BY Genome(species)", [$genus]); >>  
166    
167  It is also permissible to specify I<only> an ORDER BY clause. For example, the following invocation gets  C<< $sprout->Load($rebuild); >>;
 all genomes ordered by genus and species.  
168    
169  C<< $query = $sprout->Get(['Genome'], "ORDER BY Genome(genus), Genome(species)"); >>  Load the database from files in the data directory, optionally re-creating the tables.
170    
171  Odd things may happen if one of the ORDER BY fields is in a secondary relation. So, for example, an  This method always deletes the data from the database before loading, even if the tables are not
172  attempt to order B<Feature>s by alias may (depending on the underlying database engine used) cause  re-created. The data is loaded into the relations from files in the data directory either having the
173  a single feature to appear more than once.  same name as the target relation with no extension or with an extension of C<.dtx>. Files without an
174    extension are used in preference to the files with an extension.
175    
176  If multiple names are specified, then the query processor will automatically determine a  The files are loaded based on the presumption that each line of the file is a record in the
177  join path between the entities and relationships. The algorithm used is very simplistic.  relation, and the individual fields are delimited by tabs. Tab and new-line characters inside
178  In particular, you can't specify any entity or relationship more than once, and if a  fields must be represented by the escape sequences C<\t> and C<\n>, respectively. The fields must
179  relationship is recursive, the path is determined by the order in which the entity  be presented in the order given in the relation tables produced by the L</ShowMetaData> method.
 and the relationship appear. For example, consider a recursive relationship B<IsParentOf>  
 which relates B<People> objects to other B<People> objects. If the join path is  
 coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,  
 the join path is C<['IsParentOf', 'People']>, then the people returned will be children.  
180    
181  =over 4  =over 4
182    
183  =item objectNames  =item rebuild
   
 List containing the names of the entity and relationship objects to be retrieved.  
   
 =item filterClause  
   
 WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can  
 be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form  
 B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the  
 parameter list as additional parameters. The fields in a filter clause can come from primary  
 entity relations, relationship relations, or secondary entity relations; however, all of the  
 entities and relationships involved must be included in the list of object names.  
   
 =item parameterList  
184    
185  List of the parameters to be substituted in for the parameters marks in the filter clause.  TRUE if the data tables need to be created or re-created, else FALSE
186    
187  =item RETURN  =item RETURN
188    
189  Returns a B<DBQuery> that can be used to iterate through all of the results.  Returns a statistical object containing the number of records read, the number of duplicates found,
190    the number of errors, and a list of the error messages.
191    
192  =back  =back
193    
194  =cut  =cut
195    #: Return Type %;
196  sub Get {  sub Load {
197          # Get the parameters.          # Get the parameters.
198          my $self = shift @_;      my ($self, $rebuild) = @_;
199          my ($objectNames, $filterClause, $parameterList) = @_;      # Load the tables from the data directory.
200          # We differ from the ERDB Get method in that the parameter list is passed in as a list reference      my $retVal = $self->LoadTables($self->{_options}->{dataDir}, $rebuild);
201          # rather than a list of parameters. The next step is to convert the parameters from a reference      # Return the statistics.
202          # to a real list. We can only do this if the parameters have been specified.      return $retVal;
         my @parameters;  
         if ($parameterList) { @parameters = @{$parameterList}; }  
         return $self->{_erdb}->Get($objectNames, $filterClause, @parameters);  
203  }  }
204    
205  =head3 GetEntity  =head3 LoadUpdate
206    
207  C<< my $entityObject = $sprout->GetEntity($entityType, $ID); >>  C<< my $stats = $sprout->LoadUpdate($truncateFlag, \@tableList); >>
208    
209  Return an object describing the entity instance with a specified ID.  Load updates to one or more database tables. This method enables the client to make changes to one
210    or two tables without reloading the whole database. For each table, there must be a corresponding
211    file in the data directory, either with the same name as the table, or with a C<.dtx> suffix. So,
212    for example, to make updates to the B<FeatureTranslation> relation, there must be a
213    C<FeatureTranslation.dtx> file in the data directory. Unlike a full load, files without an extension
214    are not examined. This allows update files to co-exist with files from an original load.
215    
216  =over 4  =over 4
217    
218  =item entityType  =item truncateFlag
219    
220  Entity type name.  TRUE if the tables should be rebuilt before loading, else FALSE. A value of TRUE therefore causes
221    current data and schema of the tables to be replaced, while a value of FALSE means the new data
222    is added to the existing data in the various relations.
223    
224  =item ID  =item tableList
225    
226  ID of the desired entity.  List of the tables to be updated.
227    
228  =item RETURN  =item RETURN
229    
230  Returns a B<DBObject> representing the desired entity instance, or an undefined value if no  Returns a statistical object containing the number of records read, the number of duplicates found,
231  instance is found with the specified key.  the number of errors encountered, and a list of error messages.
232    
233  =back  =back
234    
235  =cut  =cut
236    #: Return Type $%;
237  sub GetEntity {  sub LoadUpdate {
238          # Get the parameters.          # Get the parameters.
239          my $self = shift @_;      my ($self, $truncateFlag, $tableList) = @_;
240          my ($entityType, $ID) = @_;      # Declare the return value.
241          # Create a query.      my $retVal = Stats->new();
242          my $query = $self->Get([$entityType], "$entityType(id) = ?", [$ID]);      # Get the data directory.
243          # Get the first (and only) object.      my $optionTable = $self->{_options};
244          my $retVal = $query->Fetch();      my $dataDir = $optionTable->{dataDir};
245          # Return the result.      # Loop through the incoming table names.
246        for my $tableName (@{$tableList}) {
247            # Find the table's file.
248            my $fileName = LoadFileName($dataDir, $tableName);
249            if (! $fileName) {
250                Trace("No load file found for $tableName in $dataDir.") if T(0);
251            } else {
252                # Attempt to load this table.
253                my $result = $self->LoadTable($fileName, $tableName, $truncateFlag);
254                # Accumulate the resulting statistics.
255                $retVal->Accumulate($result);
256            }
257        }
258        # Return the statistics.
259          return $retVal;          return $retVal;
260  }  }
261    
262  =head3 GetEntityValues  =head3 GenomeCounts
263    
264  C<< my @values = GetEntityValues($entityType, $ID, \@fields); >>  C<< my ($arch, $bact, $euk, $vir, $env, $unk) = $sprout->GenomeCounts($complete); >>
265    
266  Return a list of values from a specified entity instance.  Count the number of genomes in each domain. If I<$complete> is TRUE, only complete
267    genomes will be included in the counts.
268    
269  =over 4  =over 4
270    
271  =item entityType  =item complete
   
 Entity type name.  
   
 =item ID  
   
 ID of the desired entity.  
272    
273  =item fields  TRUE if only complete genomes are to be counted, FALSE if all genomes are to be
274    counted
 List of field names, each of the form I<objectName>C<(>I<fieldName>C<)>.  
275    
276  =item RETURN  =item RETURN
277    
278  Returns a flattened list of the values of the specified fields for the specified entity.  A six-element list containing the number of genomes in each of six categories--
279    Archaea, Bacteria, Eukaryota, Viral, Environmental, and Unknown, respectively.
280    
281  =back  =back
282    
283  =cut  =cut
 #: Return Type @;  
 sub GetEntityValues {  
         # Get the parameters.  
         my $self = shift @_;  
         my ($entityType, $ID, $fields) = @_;  
         # Get the specified entity.  
         my $entity = $self->GetEntity($entityType, $ID);  
         # Declare the return list.  
         my @retVal = ();  
         # If we found the entity, push the values into the return list.  
         if ($entity) {  
                 push @retVal, $entity->Values($fields);  
         }  
         # Return the result.  
         return @retVal;  
 }  
   
 =head3 ShowMetaData  
   
 C<< $sprout->ShowMetaData($fileName); >>  
   
 This method outputs a description of the database to an HTML file in the data directory.  
   
 =over 4  
   
 =item fileName  
284    
285  Fully-qualified name to give to the output file.  sub GenomeCounts {
   
 =back  
   
 =cut  
   
 sub ShowMetaData {  
286          # Get the parameters.          # Get the parameters.
287          my $self = shift @_;      my ($self, $complete) = @_;
288          my ($fileName) = @_;      # Set the filter based on the completeness flag.
289          # Compute the file name.      my $filter = ($complete ? "Genome(complete) = 1" : "");
290          my $options = $self->{_options};      # Get all the genomes and the related taxonomy information.
291          # Call the show method on the underlying ERDB object.      my @genomes = $self->GetAll(['Genome'], $filter, [], ['Genome(id)', 'Genome(taxonomy)']);
292          $self->{_erdb}->ShowMetaData($fileName);      # Clear the counters.
293        my ($arch, $bact, $euk, $vir, $env, $unk) = (0, 0, 0, 0, 0, 0);
294        # Loop through, counting the domains.
295        for my $genome (@genomes) {
296            if    ($genome->[1] =~ /^archaea/i)  { ++$arch }
297            elsif ($genome->[1] =~ /^bacter/i)   { ++$bact }
298            elsif ($genome->[1] =~ /^eukar/i)    { ++$euk }
299            elsif ($genome->[1] =~ /^vir/i)      { ++$vir }
300            elsif ($genome->[1] =~ /^env/i)      { ++$env }
301            else  { ++$unk }
302        }
303        # Return the counts.
304        return ($arch, $bact, $euk, $vir, $env, $unk);
305  }  }
306    
307  =head3 Load  =head3 ContigCount
   
 C<< $sprout->Load($rebuild); >>;  
   
 Load the database from files in the data directory, optionally re-creating the tables.  
308    
309  This method always deletes the data from the database before loading, even if the tables are not  C<< my $count = $sprout->ContigCount($genomeID); >>
 re-created. The data is loaded into the relations from files in the data directory either having the  
 same name as the target relation with no extension or with an extension of C<.dtx>. Files without an  
 extension are used in preference to the files with an extension.  
310    
311  The files are loaded based on the presumption that each line of the file is a record in the  Return the number of contigs for the specified genome ID.
 relation, and the individual fields are delimited by tabs. Tab and new-line characters inside  
 fields must be represented by the escape sequences C<\t> and C<\n>, respectively. The fields must  
 be presented in the order given in the relation tables produced by the L</ShowMetaData> method.  
312    
313  =over 4  =over 4
314    
315  =item rebuild  =item genomeID
316    
317  TRUE if the data tables need to be created or re-created, else FALSE  ID of the genome whose contig count is desired.
318    
319  =item RETURN  =item RETURN
320    
321  Returns a statistical object containing the number of records read, the number of duplicates found,  Returns the number of contigs for the specified genome.
 the number of errors, and a list of the error messages.  
322    
323  =back  =back
324    
325  =cut  =cut
326  #: Return Type %;  
327  sub Load {  sub ContigCount {
328          # Get the parameters.          # Get the parameters.
329          my $self = shift @_;      my ($self, $genomeID) = @_;
330          my ($rebuild) = @_;      # Get the contig count.
331          # Get the database object.      my $retVal = $self->GetCount(['Contig', 'HasContig'], "HasContig(from-link) = ?", [$genomeID]);
332          my $erdb = $self->{_erdb};      # Return the result.
         # Load the tables from the data directory.  
         my $retVal = $erdb->LoadTables($self->{_options}->{dataDir}, $rebuild);  
         # Return the statistics.  
333          return $retVal;          return $retVal;
334  }  }
335    
336  =head3 LoadUpdate  =head3 GeneMenu
337    
338  C<< my %stats = $sprout->LoadUpdate($truncateFlag, \@tableList); >>  C<< my $selectHtml = $sprout->GeneMenu(\%attributes, $filterString, \@params); >>
339    
340  Load updates to one or more database tables. This method enables the client to make changes to one  Return an HTML select menu of genomes. Each genome will be an option in the menu,
341  or two tables without reloading the whole database. For each table, there must be a corresponding  and will be displayed by name with the ID and a contig count attached. The selection
342  file in the data directory, either with the same name as the table, or with a C<.dtx> suffix. So,  value will be the genome ID. The genomes will be sorted by genus/species name.
 for example, to make updates to the B<FeatureTranslation> relation, there must be a  
 C<FeatureTranslation.dtx> file in the data directory. Unlike a full load, files without an extension  
 are not examined. This allows update files to co-exist with files from an original load.  
343    
344  =over 4  =over 4
345    
346  =item truncateFlag  =item attributes
347    
348  TRUE if the tables should be rebuilt before loading, else FALSE. A value of TRUE therefore causes  Reference to a hash mapping attributes to values for the SELECT tag generated.
 current data and schema of the tables to be replaced, while a value of FALSE means the new data  
 is added to the existing data in the various relations.  
349    
350  =item tableList  =item filterString
351    
352  List of the tables to be updated.  A filter string for use in selecting the genomes. The filter string must conform
353    to the rules for the C<< ERDB->Get >> method.
354    
355    =item params
356    
357    Reference to a list of values to be substituted in for the parameter marks in
358    the filter string.
359    
360  =item RETURN  =item RETURN
361    
362  Returns a statistical object containing the number of records read, the number of duplicates found,  Returns an HTML select menu with the specified genomes as selectable options.
 the number of errors encountered, and a list of error messages.  
363    
364  =back  =back
365    
366  =cut  =cut
367  #: Return Type %;  
368  sub LoadUpdate {  sub GeneMenu {
369          # Get the parameters.          # Get the parameters.
370          my $self = shift @_;      my ($self, $attributes, $filterString, $params) = @_;
371          my ($truncateFlag, $tableList) = @_;      # Start the menu.
372          # Get the database object.      my $retVal = "<select " .
373          my $erdb = $self->{_erdb};          join(" ", map { "$_=\"$attributes->{$_}\"" } keys %{$attributes}) .
374          # Declare the return value.          ">\n";
375          my $retVal = Stats->new();      # Get the genomes.
376          # Get the data directory.      my @genomes = $self->GetAll(['Genome'], $filterString, $params, ['Genome(id)',
377          my $optionTable = $self->{_options};                                                                       'Genome(genus)',
378          my $dataDir = $optionTable->{dataDir};                                                                       'Genome(species)',
379          # Loop through the incoming table names.                                                                       'Genome(unique-characterization)']);
380          for my $tableName (@{$tableList}) {      # Sort them by name.
381                  # Find the table's file.      my @sorted = sort { lc("$a->[1] $a->[2]") cmp lc("$b->[1] $b->[2]") } @genomes;
382                  my $fileName = "$dataDir/$tableName";      # Loop through the genomes, creating the option tags.
383                  if (! -e $fileName) {      for my $genomeData (@sorted) {
384                          $fileName = "$fileName.dtx";          # Get the data for this genome.
385                  }          my ($genomeID, $genus, $species, $strain) = @{$genomeData};
386                  # Attempt to load this table.          # Get the contig count.
387                  my $result = $erdb->LoadTable($fileName, $tableName, $truncateFlag);          my $count = $self->ContigCount($genomeID);
388                  # Accumulate the resulting statistics.          my $counting = ($count == 1 ? "contig" : "contigs");
389                  $retVal->Accumulate($result);          # Build the option tag.
390            $retVal .= "<option value=\"$genomeID\">$genus $species $strain ($genomeID) [$count $counting]</option>\n";
391            Trace("Option tag built for $genomeID: $genus $species $strain.") if T(3);
392          }          }
393          # Return the statistics.      # Close the SELECT tag.
394        $retVal .= "</select>\n";
395        # Return the result.
396          return $retVal;          return $retVal;
397  }  }
   
398  =head3 Build  =head3 Build
399    
400  C<< $sprout->Build(); >>  C<< $sprout->Build(); >>
# Line 464  Line 407 
407  #: Return Type ;  #: Return Type ;
408  sub Build {  sub Build {
409          # Get the parameters.          # Get the parameters.
410          my $self = shift @_;      my ($self) = @_;
411          # Create the tables.          # Create the tables.
412          $self->{_erdb}->CreateTables;      $self->CreateTables();
413  }  }
414    
415  =head3 Genomes  =head3 Genomes
# Line 479  Line 422 
422  #: Return Type @;  #: Return Type @;
423  sub Genomes {  sub Genomes {
424          # Get the parameters.          # Get the parameters.
425          my $self = shift @_;      my ($self) = @_;
426          # Get all the genomes.          # Get all the genomes.
427          my @retVal = $self->GetFlat(['Genome'], "", [], 'Genome(id)');          my @retVal = $self->GetFlat(['Genome'], "", [], 'Genome(id)');
428          # Return the list of IDs.          # Return the list of IDs.
# Line 509  Line 452 
452  #: Return Type $;  #: Return Type $;
453  sub GenusSpecies {  sub GenusSpecies {
454          # Get the parameters.          # Get the parameters.
455          my $self = shift @_;      my ($self, $genomeID) = @_;
         my ($genomeID) = @_;  
456          # Get the data for the specified genome.          # Get the data for the specified genome.
457          my @values = $self->GetEntityValues('Genome', $genomeID, ['Genome(genus)', 'Genome(species)',          my @values = $self->GetEntityValues('Genome', $genomeID, ['Genome(genus)', 'Genome(species)',
458                                                                                                                            'Genome(unique-characterization)']);                                                                                                                            'Genome(unique-characterization)']);
# Line 546  Line 488 
488  #: Return Type @;  #: Return Type @;
489  sub FeaturesOf {  sub FeaturesOf {
490          # Get the parameters.          # Get the parameters.
491          my $self = shift @_;      my ($self, $genomeID,$ftype) = @_;
         my ($genomeID,$ftype) = @_;  
492          # Get the features we want.          # Get the features we want.
493          my @features;          my @features;
494          if (!$ftype) {          if (!$ftype) {
# Line 591  Line 532 
532  =item RETURN  =item RETURN
533    
534  Returns a list of the feature's contig segments. The locations are returned as a list in a list  Returns a list of the feature's contig segments. The locations are returned as a list in a list
535  context and as a space-delimited string in a scalar context.  context and as a comma-delimited string in a scalar context.
536    
537  =back  =back
538    
# Line 600  Line 541 
541  #: Return Type $;  #: Return Type $;
542  sub FeatureLocation {  sub FeatureLocation {
543          # Get the parameters.          # Get the parameters.
544          my $self = shift @_;      my ($self, $featureID) = @_;
         my ($featureID) = @_;  
545          # Create a query for the feature locations.          # Create a query for the feature locations.
546          my $query = $self->Get(['IsLocatedIn'], "IsLocatedIn(from-link) = ? ORDER BY IsLocatedIn(locN)",          my $query = $self->Get(['IsLocatedIn'], "IsLocatedIn(from-link) = ? ORDER BY IsLocatedIn(locN)",
547                                                     [$featureID]);                                                     [$featureID]);
# Line 619  Line 559 
559                  if ($prevContig eq $contigID && $dir eq $prevDir) {                  if ($prevContig eq $contigID && $dir eq $prevDir) {
560                          # Here the new segment is in the same direction on the same contig. Insure the                          # Here the new segment is in the same direction on the same contig. Insure the
561                          # new segment's beginning is next to the old segment's end.                          # new segment's beginning is next to the old segment's end.
562                          if (($dir eq "-" && $beg == $prevBeg - $prevLen) ||              if ($dir eq "-" && $beg + $len == $prevBeg) {
563                                  ($dir eq "+" && $beg == $prevBeg + $prevLen)) {                  # Here we're merging two backward blocks, so we keep the new begin point
564                                  # Here we need to merge two segments. Adjust the beginning and length values                  # and adjust the length.
565                                  # to include both segments.                  $len += $prevLen;
566                    # Pop the old segment off. The new one will replace it later.
567                    pop @retVal;
568                } elsif ($dir eq "+" && $beg == $prevBeg + $prevLen) {
569                    # Here we need to merge two forward blocks. Adjust the beginning and
570                    # length values to include both segments.
571                                  $beg = $prevBeg;                                  $beg = $prevBeg;
572                                  $len += $prevLen;                                  $len += $prevLen;
573                                  # Pop the old segment off. The new one will replace it later.                                  # Pop the old segment off. The new one will replace it later.
# Line 631  Line 576 
576                  }                  }
577                  # Remember this specifier for the adjacent-segment test the next time through.                  # Remember this specifier for the adjacent-segment test the next time through.
578                  ($prevContig, $prevBeg, $prevDir, $prevLen) = ($contigID, $beg, $dir, $len);                  ($prevContig, $prevBeg, $prevDir, $prevLen) = ($contigID, $beg, $dir, $len);
579            # Compute the initial base pair.
580            my $start = ($dir eq "+" ? $beg : $beg + $len - 1);
581                  # Add the specifier to the list.                  # Add the specifier to the list.
582                  push @retVal, "${contigID}_$beg$dir$len";          push @retVal, "${contigID}_$start$dir$len";
583          }          }
584          # Return the list in the format indicated by the context.          # Return the list in the format indicated by the context.
585          return (wantarray ? @retVal : join(' ', @retVal));      return (wantarray ? @retVal : join(',', @retVal));
586  }  }
587    
588  =head3 ParseLocation  =head3 ParseLocation
# Line 661  Line 608 
608  =cut  =cut
609  #: Return Type @;  #: Return Type @;
610  sub ParseLocation {  sub ParseLocation {
611          # Get the parameter.      # Get the parameter. Note that if we're called as an instance method, we ignore
612        # the first parameter.
613        shift if UNIVERSAL::isa($_[0],__PACKAGE__);
614          my ($location) = @_;          my ($location) = @_;
615          # Parse it into segments.          # Parse it into segments.
616          $location =~ /^(.*)_(\d*)([+-_])(\d*)$/;      $location =~ /^(.+)_(\d+)([+\-_])(\d+)$/;
617          my ($contigID, $start, $dir, $len) = ($1, $2, $3, $4);          my ($contigID, $start, $dir, $len) = ($1, $2, $3, $4);
618          # If the direction is an underscore, convert it to a + or -.          # If the direction is an underscore, convert it to a + or -.
619          if ($dir eq "_") {          if ($dir eq "_") {
# Line 680  Line 629 
629          return ($contigID, $start, $dir, $len);          return ($contigID, $start, $dir, $len);
630  }  }
631    
632    =head3 PointLocation
633    
634    C<< my $found = Sprout::PointLocation($location, $point); >>
635    
636    Return the offset into the specified location of the specified point on the contig. If
637    the specified point is before the location, a negative value will be returned. If it is
638    beyond the location, an undefined value will be returned. It is assumed that the offset
639    is for the location's contig. The location can either be new-style (using a C<+> or C<->
640    and a length) or old-style (using C<_> and start and end positions.
641    
642    =over 4
643    
644    =item location
645    
646    A location specifier (see L</FeatureLocation> for a description).
647    
648    =item point
649    
650    The offset into the contig of the point in which we're interested.
651    
652    =item RETURN
653    
654    Returns the offset inside the specified location of the specified point, a negative
655    number if the point is before the location, or an undefined value if the point is past
656    the location. If the length of the location is 0, this method will B<always> denote
657    that it is outside the location. The offset will always be relative to the left-most
658    position in the location.
659    
660    =back
661    
662    =cut
663    #: Return Type $;
664    sub PointLocation {
665        # Get the parameter. Note that if we're called as an instance method, we ignore
666        # the first parameter.
667        shift if UNIVERSAL::isa($_[0],__PACKAGE__);
668        my ($location, $point) = @_;
669        # Parse out the location elements. Note that this works on both old-style and new-style
670        # locations.
671        my ($contigID, $start, $dir, $len) = ParseLocation($location);
672        # Declare the return variable.
673        my $retVal;
674        # Compute the offset. The computation is dependent on the direction of the location.
675        my $offset = (($dir == '+') ? $point - $start : $point - ($start - $len + 1));
676        # Return the offset if it's valid.
677        if ($offset < $len) {
678            $retVal = $offset;
679        }
680        # Return the offset found.
681        return $retVal;
682    }
683    
684  =head3 DNASeq  =head3 DNASeq
685    
686  C<< my $sequence = $sprout->DNASeq(\@locationList); >>  C<< my $sequence = $sprout->DNASeq(\@locationList); >>
# Line 688  Line 689 
689  should be of the form returned by L</featureLocation> when in a list context. In other words,  should be of the form returned by L</featureLocation> when in a list context. In other words,
690  each location is of the form I<contigID>C<_>I<begin>I<dir>I<end>.  each location is of the form I<contigID>C<_>I<begin>I<dir>I<end>.
691    
692    For example, the following would return the DNA sequence for contig C<83333.1:NC_000913>
693    between positions 1401 and 1532, inclusive.
694    
695        my $sequence = $sprout->DNASeq('83333.1:NC_000913_1401_1532');
696    
697  =over 4  =over 4
698    
699  =item locationList  =item locationList
700    
701  List of location specifiers, each in the form I<contigID>C<_>I<begin>I<dir>I<end> (see  List of location specifiers, each in the form I<contigID>C<_>I<begin>I<dir>I<len> or
702  L</FeatureLocation> for more about this format).  I<contigID>C<_>I<begin>C<_>I<end> (see L</FeatureLocation> for more about this format).
703    
704  =item RETURN  =item RETURN
705    
# Line 705  Line 711 
711  #: Return Type $;  #: Return Type $;
712  sub DNASeq {  sub DNASeq {
713          # Get the parameters.          # Get the parameters.
714          my $self = shift @_;      my ($self, $locationList) = @_;
         my ($locationList) = @_;  
715          # Create the return string.          # Create the return string.
716          my $retVal = "";          my $retVal = "";
717          # Loop through the locations.          # Loop through the locations.
# Line 721  Line 726 
726                  # the start point is the ending. Note that in the latter case we must reverse the DNA string                  # the start point is the ending. Note that in the latter case we must reverse the DNA string
727                  # before putting it in the return value.                  # before putting it in the return value.
728                  my ($start, $stop);                  my ($start, $stop);
729            Trace("Parse of \"$location\" is $beg$dir$len.") if T(SDNA => 4);
730                  if ($dir eq "+") {                  if ($dir eq "+") {
731                          $start = $beg;                          $start = $beg;
732                          $stop = $beg + $len - 1;                          $stop = $beg + $len - 1;
733                  } else {                  } else {
734                          $start = $beg + $len + 1;              $start = $beg - $len + 1;
735                          $stop = $beg;                          $stop = $beg;
736                  }                  }
737            Trace("Looking for sequences containing $start through $stop.") if T(SDNA => 4);
738                  my $query = $self->Get(['IsMadeUpOf','Sequence'],                  my $query = $self->Get(['IsMadeUpOf','Sequence'],
739                          "IsMadeUpOf(from-link) = ? AND IsMadeUpOf(start-position) + IsMadeUpOf(len) > ? AND " .                          "IsMadeUpOf(from-link) = ? AND IsMadeUpOf(start-position) + IsMadeUpOf(len) > ? AND " .
740                          " IsMadeUpOf(start-position) <= ? ORDER BY IsMadeUpOf(start-position)",                          " IsMadeUpOf(start-position) <= ? ORDER BY IsMadeUpOf(start-position)",
# Line 739  Line 746 
746                                  $sequence->Values(['IsMadeUpOf(start-position)', 'Sequence(sequence)',                                  $sequence->Values(['IsMadeUpOf(start-position)', 'Sequence(sequence)',
747                                                                     'IsMadeUpOf(len)']);                                                                     'IsMadeUpOf(len)']);
748                          my $stopPosition = $startPosition + $sequenceLength;                          my $stopPosition = $startPosition + $sequenceLength;
749                Trace("Sequence is from $startPosition to $stopPosition.") if T(SDNA => 4);
750                          # Figure out the start point and length of the relevant section.                          # Figure out the start point and length of the relevant section.
751                          my $pos1 = ($start < $startPosition ? 0 : $start - $startPosition);                          my $pos1 = ($start < $startPosition ? 0 : $start - $startPosition);
752                          my $len = ($stopPosition <= $stop ? $stopPosition : $stop) - $startPosition - $pos1;              my $len1 = ($stopPosition < $stop ? $stopPosition : $stop) + 1 - $startPosition - $pos1;
753                Trace("Position is $pos1 for length $len1.") if T(SDNA => 4);
754                          # Add the relevant data to the location data.                          # Add the relevant data to the location data.
755                          $locationDNA .= substr($sequenceData, $pos1, $len);              $locationDNA .= substr($sequenceData, $pos1, $len1);
756                  }                  }
757                  # Add this location's data to the return string. Note that we may need to reverse it.                  # Add this location's data to the return string. Note that we may need to reverse it.
758                  if ($dir eq '+') {                  if ($dir eq '+') {
759                          $retVal .= $locationDNA;                          $retVal .= $locationDNA;
760                  } else {                  } else {
761                          $locationDNA = join('', reverse split //, $locationDNA);              $retVal .= FIG::reverse_comp($locationDNA);
                         $retVal .= $locationDNA;  
762                  }                  }
763          }          }
764          # Return the result.          # Return the result.
# Line 779  Line 787 
787  #: Return Type @;  #: Return Type @;
788  sub AllContigs {  sub AllContigs {
789          # Get the parameters.          # Get the parameters.
790          my $self = shift @_;      my ($self, $genomeID) = @_;
         my ($genomeID) = @_;  
791          # Ask for the genome's Contigs.          # Ask for the genome's Contigs.
792          my @retVal = $self->GetFlat(['HasContig'], "HasContig(from-link) = ?", [$genomeID],          my @retVal = $self->GetFlat(['HasContig'], "HasContig(from-link) = ?", [$genomeID],
793                                                                  'HasContig(to-link)');                                                                  'HasContig(to-link)');
# Line 788  Line 795 
795          return @retVal;          return @retVal;
796  }  }
797    
798    =head3 GenomeLength
799    
800    C<< my $length = $sprout->GenomeLength($genomeID); >>
801    
802    Return the length of the specified genome in base pairs.
803    
804    =over 4
805    
806    =item genomeID
807    
808    ID of the genome whose base pair count is desired.
809    
810    =item RETURN
811    
812    Returns the number of base pairs in all the contigs of the specified
813    genome.
814    
815    =back
816    
817    =cut
818    
819    sub GenomeLength {
820        # Get the parameters.
821        my ($self, $genomeID) = @_;
822        # Declare the return variable.
823        my $retVal = 0;
824        # Get the genome's contig sequence lengths.
825        my @lens = $self->GetFlat(['HasContig', 'IsMadeUpOf'], 'HasContig(from-link) = ?',
826                           [$genomeID], 'IsMadeUpOf(len)');
827        # Sum the lengths.
828        map { $retVal += $_ } @lens;
829        # Return the result.
830        return $retVal;
831    }
832    
833    =head3 FeatureCount
834    
835    C<< my $count = $sprout->FeatureCount($genomeID, $type); >>
836    
837    Return the number of features of the specified type in the specified genome.
838    
839    =over 4
840    
841    =item genomeID
842    
843    ID of the genome whose feature count is desired.
844    
845    =item type
846    
847    Type of feature to count (eg. C<peg>, C<rna>, etc.).
848    
849    =item RETURN
850    
851    Returns the number of features of the specified type for the specified genome.
852    
853    =back
854    
855    =cut
856    
857    sub FeatureCount {
858        # Get the parameters.
859        my ($self, $genomeID, $type) = @_;
860        # Compute the count.
861        my $retVal = $self->GetCount(['HasFeature', 'Feature'],
862                                    "HasFeature(from-link) = ? AND Feature(feature-type) = ?",
863                                    [$genomeID, $type]);
864        # Return the result.
865        return $retVal;
866    }
867    
868    =head3 GenomeAssignments
869    
870    C<< my $fidHash = $sprout->GenomeAssignments($genomeID); >>
871    
872    Return a list of a genome's assigned features. The return hash will contain each
873    assigned feature of the genome mapped to the text of its most recent functional
874    assignment.
875    
876    =over 4
877    
878    =item genomeID
879    
880    ID of the genome whose functional assignments are desired.
881    
882    =item RETURN
883    
884    Returns a reference to a hash which maps each feature to its most recent
885    functional assignment.
886    
887    =back
888    
889    =cut
890    
891    sub GenomeAssignments {
892        # Get the parameters.
893        my ($self, $genomeID) = @_;
894        # Declare the return variable.
895        my $retVal = {};
896        # Query the genome's features and annotations. We'll put the oldest annotations
897        # first so that the last assignment to go into the hash will be the correct one.
898        my $query = $self->Get(['HasFeature', 'IsTargetOfAnnotation', 'Annotation'],
899                               "HasFeature(from-link) = ? ORDER BY Annotation(time)",
900                               [$genomeID]);
901        # Loop through the annotations.
902        while (my $data = $query->Fetch) {
903            # Get the feature ID and annotation text.
904            my ($fid, $annotation) = $data->Values(['HasFeature(to-link)',
905                                                    'Annotation(annotation)']);
906            # Check to see if this is an assignment. Note that the user really
907            # doesn't matter to us, other than we use it to determine whether or
908            # not this is an assignment.
909            my ($user, $assignment) = _ParseAssignment('fig', $annotation);
910            if ($user) {
911                # Here it's an assignment. We put it in the return hash, overwriting
912                # any older assignment that might be present.
913                $retVal->{$fid} = $assignment;
914            }
915        }
916        # Return the result.
917        return $retVal;
918    }
919    
920  =head3 ContigLength  =head3 ContigLength
921    
922  C<< my $length = $sprout->ContigLength($contigID); >>  C<< my $length = $sprout->ContigLength($contigID); >>
# Line 810  Line 939 
939  #: Return Type $;  #: Return Type $;
940  sub ContigLength {  sub ContigLength {
941          # Get the parameters.          # Get the parameters.
942          my $self = shift @_;      my ($self, $contigID) = @_;
         my ($contigID) = @_;  
943          # Get the contig's last sequence.          # Get the contig's last sequence.
944          my $query = $self->Get(['IsMadeUpOf'],          my $query = $self->Get(['IsMadeUpOf'],
945                  "IsMadeUpOf(from-link) = ? ORDER BY IsMadeUpOf(start-position) DESC",                  "IsMadeUpOf(from-link) = ? ORDER BY IsMadeUpOf(start-position) DESC",
# Line 822  Line 950 
950          # Set it from the sequence data, if any.          # Set it from the sequence data, if any.
951          if ($sequence) {          if ($sequence) {
952                  my ($start, $len) = $sequence->Values(['IsMadeUpOf(start-position)', 'IsMadeUpOf(len)']);                  my ($start, $len) = $sequence->Values(['IsMadeUpOf(start-position)', 'IsMadeUpOf(len)']);
953                  $retVal = $start + $len;          $retVal = $start + $len - 1;
954        }
955        # Return the result.
956        return $retVal;
957    }
958    
959    =head3 ClusterPEGs
960    
961    C<< my $clusteredList = $sprout->ClusterPEGs($sub, \@pegs); >>
962    
963    Cluster the PEGs in a list according to the cluster coding scheme of the specified
964    subsystem. In order for this to work properly, the subsystem object must have
965    been used recently to retrieve the PEGs using the B<get_pegs_from_cell> method.
966    This causes the cluster numbers to be pulled into the subsystem's color hash.
967    If a PEG is not found in the color hash, it will not appear in the output
968    sequence.
969    
970    =over 4
971    
972    =item sub
973    
974    Sprout subsystem object for the relevant subsystem, from the L</get_subsystem>
975    method.
976    
977    =item pegs
978    
979    Reference to the list of PEGs to be clustered.
980    
981    =item RETURN
982    
983    Returns a list of the PEGs, grouped into smaller lists by cluster number.
984    
985    =back
986    
987    =cut
988    #: Return Type $@@;
989    sub ClusterPEGs {
990        # Get the parameters.
991        my ($self, $sub, $pegs) = @_;
992        # Declare the return variable.
993        my $retVal = [];
994        # Loop through the PEGs, creating arrays for each cluster.
995        for my $pegID (@{$pegs}) {
996            my $clusterNumber = $sub->get_cluster_number($pegID);
997            # Only proceed if the PEG is in a cluster.
998            if ($clusterNumber >= 0) {
999                # Push this PEG onto the sub-list for the specified cluster number.
1000                push @{$retVal->[$clusterNumber]}, $pegID;
1001            }
1002          }          }
1003          # Return the result.          # Return the result.
1004          return $retVal;          return $retVal;
# Line 853  Line 1029 
1029  Returns a three-element list. The first element is a list of feature IDs for the features that  Returns a three-element list. The first element is a list of feature IDs for the features that
1030  overlap the region of interest. The second and third elements are the minimum and maximum  overlap the region of interest. The second and third elements are the minimum and maximum
1031  locations of the features provided on the specified contig. These may extend outside  locations of the features provided on the specified contig. These may extend outside
1032  the start and stop values.  the start and stop values. The first element (that is, the list of features) is sorted
1033    roughly by location.
1034    
1035  =back  =back
1036    
1037  =cut  =cut
1038  #: Return Type @;  #: Return Type @@;
1039  sub GenesInRegion {  sub GenesInRegion {
1040          # Get the parameters.          # Get the parameters.
1041          my $self = shift @_;      my ($self, $contigID, $start, $stop) = @_;
         my ($contigID, $start, $stop) = @_;  
1042          # Get the maximum segment length.          # Get the maximum segment length.
1043          my $maximumSegmentLength = $self->MaxSegment;          my $maximumSegmentLength = $self->MaxSegment;
1044          # Create a hash to receive the feature list. We use a hash so that we can eliminate          # Create a hash to receive the feature list. We use a hash so that we can eliminate
1045          # duplicates easily.      # duplicates easily. The hash key will be the feature ID. The value will be a two-element
1046        # containing the minimum and maximum offsets. We will use the offsets to sort the results
1047        # when we're building the result set.
1048          my %featuresFound = ();          my %featuresFound = ();
1049          # Prime the values we'll use for the returned beginning and end.          # Prime the values we'll use for the returned beginning and end.
1050          my ($min, $max) = ($self->ContigLength($contigID), 0);      my @initialMinMax = ($self->ContigLength($contigID), 0);
1051        my ($min, $max) = @initialMinMax;
1052          # Create a table of parameters for each query. Each query looks for features travelling in          # Create a table of parameters for each query. Each query looks for features travelling in
1053          # a particular direction. The query parameters include the contig ID, the feature direction,          # a particular direction. The query parameters include the contig ID, the feature direction,
1054          # the lowest possible start position, and the highest possible start position. This works          # the lowest possible start position, and the highest possible start position. This works
# Line 899  Line 1078 
1078                                          $found = 1;                                          $found = 1;
1079                                  }                                  }
1080                          } elsif ($dir eq '-') {                          } elsif ($dir eq '-') {
1081                                  $end = $beg - $len;                  # Note we switch things around so that the beginning is to the left of the
1082                                  if ($end <= $stop) {                  # ending.
1083                    ($beg, $end) = ($beg - $len, $beg);
1084                    if ($beg <= $stop) {
1085                                          # Denote we found a useful feature.                                          # Denote we found a useful feature.
1086                                          $found = 1;                                          $found = 1;
1087                                  }                                  }
1088                          }                          }
1089                          if ($found) {                          if ($found) {
1090                                  # Here we need to record the feature and update the minimum and maximum.                  # Here we need to record the feature and update the minima and maxima. First,
1091                                  $featuresFound{$featureID} = 1;                  # get the current entry for the specified feature.
1092                                  if ($beg < $min) { $min = $beg; }                  my ($loc1, $loc2) = (exists $featuresFound{$featureID} ? @{$featuresFound{$featureID}} :
1093                                  if ($end < $min) { $min = $end; }                                       @initialMinMax);
1094                                  if ($beg > $max) { $max = $beg; }                  # Merge the current segment's begin and end into the feature begin and end and the
1095                                  if ($end > $max) { $max = $end; }                  # global min and max.
1096                    if ($beg < $loc1) {
1097                        $loc1 = $beg;
1098                        $min = $beg if $beg < $min;
1099                    }
1100                    if ($end > $loc2) {
1101                        $loc2 = $end;
1102                        $max = $end if $end > $max;
1103                    }
1104                    # Store the entry back into the hash table.
1105                    $featuresFound{$featureID} = [$loc1, $loc2];
1106                          }                          }
1107                  }                  }
1108          }          }
1109          # Compute a list of the IDs for the features found.      # Now we must compute the list of the IDs for the features found. We start with a list
1110          my @list = (sort (keys %featuresFound));      # of midpoints / feature ID pairs. (It's not really a midpoint, it's twice the midpoint,
1111        # but the result of the sort will be the same.)
1112        my @list = map { [$featuresFound{$_}->[0] + $featuresFound{$_}->[1], $_] } keys %featuresFound;
1113        # Now we sort by midpoint and yank out the feature IDs.
1114        my @retVal = map { $_->[1] } sort { $a->[0] <=> $b->[0] } @list;
1115          # Return it along with the min and max.          # Return it along with the min and max.
1116          return (\@list, $min, $max);      return (\@retVal, $min, $max);
1117  }  }
1118    
1119  =head3 FType  =head3 FType
# Line 944  Line 1139 
1139  #: Return Type $;  #: Return Type $;
1140  sub FType {  sub FType {
1141          # Get the parameters.          # Get the parameters.
1142          my $self = shift @_;      my ($self, $featureID) = @_;
         my ($featureID) = @_;  
1143          # Get the specified feature's type.          # Get the specified feature's type.
1144          my ($retVal) = $self->GetEntityValues('Feature', $featureID, ['Feature(feature-type)']);          my ($retVal) = $self->GetEntityValues('Feature', $featureID, ['Feature(feature-type)']);
1145          # Return the result.          # Return the result.
# Line 954  Line 1148 
1148    
1149  =head3 FeatureAnnotations  =head3 FeatureAnnotations
1150    
1151  C<< my @descriptors = $sprout->FeatureAnnotations($featureID); >>  C<< my @descriptors = $sprout->FeatureAnnotations($featureID, $rawFlag); >>
1152    
1153  Return the annotations of a feature.  Return the annotations of a feature.
1154    
# Line 964  Line 1158 
1158    
1159  ID of the feature whose annotations are desired.  ID of the feature whose annotations are desired.
1160    
1161    =item rawFlag
1162    
1163    If TRUE, the annotation timestamps will be returned in raw form; otherwise, they
1164    will be returned in human-readable form.
1165    
1166  =item RETURN  =item RETURN
1167    
1168  Returns a list of annotation descriptors. Each descriptor is a hash with the following fields.  Returns a list of annotation descriptors. Each descriptor is a hash with the following fields.
1169    
1170  * B<featureID> ID of the relevant feature.  * B<featureID> ID of the relevant feature.
1171    
1172  * B<timeStamp> time the annotation was made, in user-friendly format.  * B<timeStamp> time the annotation was made.
1173    
1174  * B<user> ID of the user who made the annotation  * B<user> ID of the user who made the annotation
1175    
# Line 982  Line 1181 
1181  #: Return Type @%;  #: Return Type @%;
1182  sub FeatureAnnotations {  sub FeatureAnnotations {
1183          # Get the parameters.          # Get the parameters.
1184          my $self = shift @_;      my ($self, $featureID, $rawFlag) = @_;
         my ($featureID) = @_;  
1185          # Create a query to get the feature's annotations and the associated users.          # Create a query to get the feature's annotations and the associated users.
1186          my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],          my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],
1187                                                     "IsTargetOfAnnotation(from-link) = ?", [$featureID]);                                                     "IsTargetOfAnnotation(from-link) = ?", [$featureID]);
# Line 996  Line 1194 
1194                          $annotation->Values(['IsTargetOfAnnotation(from-link)',                          $annotation->Values(['IsTargetOfAnnotation(from-link)',
1195                                                                   'Annotation(time)', 'MadeAnnotation(from-link)',                                                                   'Annotation(time)', 'MadeAnnotation(from-link)',
1196                                                                   'Annotation(annotation)']);                                                                   'Annotation(annotation)']);
1197            # Convert the time, if necessary.
1198            if (! $rawFlag) {
1199                $timeStamp = FriendlyTimestamp($timeStamp);
1200            }
1201                  # Assemble them into a hash.                  # Assemble them into a hash.
1202          my $annotationHash = { featureID => $featureID,          my $annotationHash = { featureID => $featureID,
1203                                 timeStamp => FriendlyTimestamp($timeStamp),                                 timeStamp => $timeStamp,
1204                                                             user => $user, text => $text };                                                             user => $user, text => $text };
1205                  # Add it to the return list.                  # Add it to the return list.
1206                  push @retVal, $annotationHash;                  push @retVal, $annotationHash;
# Line 1012  Line 1214 
1214  C<< my %functions = $sprout->AllFunctionsOf($featureID); >>  C<< my %functions = $sprout->AllFunctionsOf($featureID); >>
1215    
1216  Return all of the functional assignments for a particular feature. The data is returned as a  Return all of the functional assignments for a particular feature. The data is returned as a
1217  hash of functional assignments to user IDs. A functional assignment is a type of annotation.  hash of functional assignments to user IDs. A functional assignment is a type of annotation,
1218  It has the format "XXXX\nset XXXX function to\nYYYYY". In this instance, XXXX is the user ID  Functional assignments are described in the L</ParseAssignment> function. Its worth noting that
1219  and YYYYY is the functional assignment text. Its worth noting that we cannot filter on the content  we cannot filter on the content of the annotation itself because it's a text field; however,
1220  of the annotation itself because it's a text field; however, this is not a big problem because most  this is not a big problem because most features only have a small number of annotations.
1221  features only have a small number of annotations. Finally, if a single user has multiple  Finally, if a single user has multiple functional assignments, we will only keep the most
1222  functional assignments, we will only keep the most recent one.  recent one.
1223    
1224  =over 4  =over 4
1225    
# Line 1027  Line 1229 
1229    
1230  =item RETURN  =item RETURN
1231    
1232  Returns a hash mapping the functional assignment IDs to user IDs.  Returns a hash mapping the user IDs to functional assignment IDs.
1233    
1234  =back  =back
1235    
# Line 1035  Line 1237 
1237  #: Return Type %;  #: Return Type %;
1238  sub AllFunctionsOf {  sub AllFunctionsOf {
1239          # Get the parameters.          # Get the parameters.
1240          my $self = shift @_;      my ($self, $featureID) = @_;
         my ($featureID) = @_;  
1241          # Get all of the feature's annotations.          # Get all of the feature's annotations.
1242      my @query = $self->GetAll(['IsTargetOfAnnotation', 'Annotation'],      my @query = $self->GetAll(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],
1243                                                      "IsTargetOfAnnotation(from-link) = ?",                                                      "IsTargetOfAnnotation(from-link) = ?",
1244                              [$featureID], ['Annotation(time)', 'Annotation(annotation)']);                              [$featureID], ['Annotation(time)', 'Annotation(annotation)',
1245                                               'MadeAnnotation(from-link)']);
1246          # Declare the return hash.          # Declare the return hash.
1247          my %retVal;          my %retVal;
     # Declare a hash for insuring we only make one assignment per user.  
     my %timeHash = ();  
1248      # Now we sort the assignments by timestamp in reverse.      # Now we sort the assignments by timestamp in reverse.
1249      my @sortedQuery = sort { -($a->[0] <=> $b->[0]) } @query;      my @sortedQuery = sort { -($a->[0] <=> $b->[0]) } @query;
1250          # Loop until we run out of annotations.          # Loop until we run out of annotations.
1251      for my $annotation (@sortedQuery) {      for my $annotation (@sortedQuery) {
1252          # Get the annotation fields.          # Get the annotation fields.
1253          my ($timeStamp, $text) = @{$annotation};          my ($timeStamp, $text, $user) = @{$annotation};
1254                  # Check to see if this is a functional assignment.                  # Check to see if this is a functional assignment.
1255                  my ($user, $function) = ParseAssignment($text);          my ($actualUser, $function) = _ParseAssignment($user, $text);
1256          if ($user && ! exists $timeHash{$user}) {          if ($actualUser && ! exists $retVal{$actualUser}) {
1257              # Here it is a functional assignment and there has been no              # Here it is a functional assignment and there has been no
1258              # previous assignment for this user, so we stuff it in the              # previous assignment for this user, so we stuff it in the
1259              # return hash.              # return hash.
1260                          $retVal{$function} = $user;              $retVal{$actualUser} = $function;
             # Insure we don't assign to this user again.  
             $timeHash{$user} = 1;  
1261                  }                  }
1262          }          }
1263          # Return the hash of assignments found.          # Return the hash of assignments found.
# Line 1074  Line 1272 
1272    
1273  The functional assignment is handled differently depending on the type of feature. If  The functional assignment is handled differently depending on the type of feature. If
1274  the feature is identified by a FIG ID (begins with the string C<fig|>), then a functional  the feature is identified by a FIG ID (begins with the string C<fig|>), then a functional
1275  assignment is a type of annotation. It has the format "XXXX\nset XXXX function to\nYYYYY". In this  assignment is a type of annotation. The format of an assignment is described in
1276  instance, XXXX is the user ID and YYYYY is the functional assignment text. Its worth noting that  L</ParseAssignment>. Its worth noting that we cannot filter on the content of the
1277  we cannot filter on the content of the annotation itself because it's a text field; however, this  annotation itself because it's a text field; however, this is not a big problem because
1278  is not a big problem because most features only have a small number of annotations.  most features only have a small number of annotations.
1279    
1280  Each user has an associated list of trusted users. The assignment returned will be the most  Each user has an associated list of trusted users. The assignment returned will be the most
1281  recent one by at least one of the trusted users. If no trusted user list is available, then  recent one by at least one of the trusted users. If no trusted user list is available, then
# Line 1109  Line 1307 
1307  #: Return Type $;  #: Return Type $;
1308  sub FunctionOf {  sub FunctionOf {
1309          # Get the parameters.          # Get the parameters.
1310          my $self = shift @_;      my ($self, $featureID, $userID) = @_;
         my ($featureID, $userID) = @_;  
1311      # Declare the return value.      # Declare the return value.
1312      my $retVal;      my $retVal;
1313      # Determine the ID type.      # Determine the ID type.
# Line 1138  Line 1335 
1335              }              }
1336          }          }
1337          # Build a query for all of the feature's annotations, sorted by date.          # Build a query for all of the feature's annotations, sorted by date.
1338          my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation'],          my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],
1339                                 "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC",                                 "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC",
1340                                 [$featureID]);                                 [$featureID]);
1341          my $timeSelected = 0;          my $timeSelected = 0;
1342          # Loop until we run out of annotations.          # Loop until we run out of annotations.
1343          while (my $annotation = $query->Fetch()) {          while (my $annotation = $query->Fetch()) {
1344              # Get the annotation text.              # Get the annotation text.
1345              my ($text, $time) = $annotation->Values(['Annotation(annotation)','Annotation(time)']);              my ($text, $time, $user) = $annotation->Values(['Annotation(annotation)',
1346                                                         'Annotation(time)', 'MadeAnnotation(from-link)']);
1347              # Check to see if this is a functional assignment for a trusted user.              # Check to see if this is a functional assignment for a trusted user.
1348              my ($user, $type, $function) = split(/\n/, $text);              my ($actualUser, $function) = _ParseAssignment($user, $text);
1349              if ($type =~ m/^set $user function to$/i) {              Trace("Assignment user is $actualUser, text is $function.") if T(4);
1350                if ($actualUser) {
1351                  # Here it is a functional assignment. Check the time and the user                  # Here it is a functional assignment. Check the time and the user
1352                  # name. The time must be recent and the user must be trusted.                  # name. The time must be recent and the user must be trusted.
1353                  if ((exists $trusteeTable{$user}) && ($time > $timeSelected)) {                  if ((exists $trusteeTable{$actualUser}) && ($time > $timeSelected)) {
1354                      $retVal = $function;                      $retVal = $function;
1355                      $timeSelected = $time;                      $timeSelected = $time;
1356                  }                  }
# Line 1167  Line 1366 
1366          return $retVal;          return $retVal;
1367  }  }
1368    
1369    =head3 FunctionsOf
1370    
1371    C<< my @functionList = $sprout->FunctionOf($featureID, $userID); >>
1372    
1373    Return the functional assignments of a particular feature.
1374    
1375    The functional assignment is handled differently depending on the type of feature. If
1376    the feature is identified by a FIG ID (begins with the string C<fig|>), then a functional
1377    assignment is a type of annotation. The format of an assignment is described in
1378    L</ParseAssignment>. Its worth noting that we cannot filter on the content of the
1379    annotation itself because it's a text field; however, this is not a big problem because
1380    most features only have a small number of annotations.
1381    
1382    If the feature is B<not> identified by a FIG ID, then the functional assignment
1383    information is taken from the B<ExternalAliasFunc> table. If the table does
1384    not contain an entry for the feature, an empty list is returned.
1385    
1386    =over 4
1387    
1388    =item featureID
1389    
1390    ID of the feature whose functional assignments are desired.
1391    
1392    =item RETURN
1393    
1394    Returns a list of 2-tuples, each consisting of a user ID and the text of an assignment by
1395    that user.
1396    
1397    =back
1398    
1399    =cut
1400    #: Return Type @@;
1401    sub FunctionsOf {
1402        # Get the parameters.
1403        my ($self, $featureID) = @_;
1404        # Declare the return value.
1405        my @retVal = ();
1406        # Determine the ID type.
1407        if ($featureID =~ m/^fig\|/) {
1408            # Here we have a FIG feature ID. We must build the list of trusted
1409            # users.
1410            my %trusteeTable = ();
1411            # Build a query for all of the feature's annotations, sorted by date.
1412            my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],
1413                                   "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC",
1414                                   [$featureID]);
1415            my $timeSelected = 0;
1416            # Loop until we run out of annotations.
1417            while (my $annotation = $query->Fetch()) {
1418                # Get the annotation text.
1419                my ($text, $time, $user) = $annotation->Values(['Annotation(annotation)',
1420                                                                'Annotation(time)',
1421                                                                'MadeAnnotation(user)']);
1422                # Check to see if this is a functional assignment for a trusted user.
1423                my ($actualUser, $function) = _ParseAssignment($user, $text);
1424                if ($actualUser) {
1425                    # Here it is a functional assignment.
1426                    push @retVal, [$actualUser, $function];
1427                }
1428            }
1429        } else {
1430            # Here we have a non-FIG feature ID. In this case the user ID does not
1431            # matter. We simply get the information from the External Alias Function
1432            # table.
1433            my @assignments = $self->GetEntityValues('ExternalAliasFunc', $featureID,
1434                                                     ['ExternalAliasFunc(func)']);
1435            push @retVal, map { ['master', $_] } @assignments;
1436        }
1437        # Return the assignments found.
1438        return @retVal;
1439    }
1440    
1441  =head3 BBHList  =head3 BBHList
1442    
1443  C<< my $bbhHash = $sprout->BBHList($genomeID, \@featureList); >>  C<< my $bbhHash = $sprout->BBHList($genomeID, \@featureList); >>
# Line 1186  Line 1457 
1457    
1458  =item RETURN  =item RETURN
1459    
1460  Returns a reference to a hash that maps the IDs of the incoming features to the IDs of  Returns a reference to a hash that maps the IDs of the incoming features to the best hits
1461  their best hits.  on the target genome.
1462    
1463  =back  =back
1464    
# Line 1195  Line 1466 
1466  #: Return Type %;  #: Return Type %;
1467  sub BBHList {  sub BBHList {
1468          # Get the parameters.          # Get the parameters.
1469          my $self = shift @_;      my ($self, $genomeID, $featureList) = @_;
         my ($genomeID, $featureList) = @_;  
1470          # Create the return structure.          # Create the return structure.
1471          my %retVal = ();          my %retVal = ();
1472          # Loop through the incoming features.          # Loop through the incoming features.
# Line 1205  Line 1475 
1475                  my $query = $self->Get(['IsBidirectionalBestHitOf'],                  my $query = $self->Get(['IsBidirectionalBestHitOf'],
1476                                                             "IsBidirectionalBestHitOf(from-link) = ? AND IsBidirectionalBestHitOf(genome) = ?",                                                             "IsBidirectionalBestHitOf(from-link) = ? AND IsBidirectionalBestHitOf(genome) = ?",
1477                                                             [$featureID, $genomeID]);                                                             [$featureID, $genomeID]);
1478                  # Look for the best hit.          # Peel off the BBHs found.
1479                  my $bbh = $query->Fetch;          my @found = ();
1480                  if ($bbh) {          while (my $bbh = $query->Fetch) {
1481                          my ($targetFeature) = $bbh->Value('IsBidirectionalBestHitOf(to-link)');              push @found, $bbh->Value('IsBidirectionalBestHitOf(to-link)');
                         $retVal{$featureID} = $targetFeature;  
1482                  }                  }
1483            $retVal{$featureID} = \@found;
1484          }          }
1485          # Return the mapping.          # Return the mapping.
1486          return \%retVal;          return \%retVal;
1487  }  }
1488    
1489    =head3 SimList
1490    
1491    C<< my %similarities = $sprout->SimList($featureID, $count); >>
1492    
1493    Return a list of the similarities to the specified feature.
1494    
1495    Sprout does not support real similarities, so this method just returns the bidirectional
1496    best hits.
1497    
1498    =over 4
1499    
1500    =item featureID
1501    
1502    ID of the feature whose similarities are desired.
1503    
1504    =item count
1505    
1506    Maximum number of similar features to be returned, or C<0> to return them all.
1507    
1508    =back
1509    
1510    =cut
1511    #: Return Type %;
1512    sub SimList {
1513        # Get the parameters.
1514        my ($self, $featureID, $count) = @_;
1515        # Ask for the best hits.
1516        my @lists = $self->GetAll(['IsBidirectionalBestHitOf'],
1517                                  "IsBidirectionalBestHitOf(from-link) = ? ORDER BY IsBidirectionalBestHitOf(score) DESC",
1518                                  [$featureID], ['IsBidirectionalBestHitOf(to-link)', 'IsBidirectionalBestHitOf(score)'],
1519                                  $count);
1520        # Create the return value.
1521        my %retVal = ();
1522        for my $tuple (@lists) {
1523            $retVal{$tuple->[0]} = $tuple->[1];
1524        }
1525        # Return the result.
1526        return %retVal;
1527    }
1528    
1529    
1530    
1531    =head3 IsComplete
1532    
1533    C<< my $flag = $sprout->IsComplete($genomeID); >>
1534    
1535    Return TRUE if the specified genome is complete, else FALSE.
1536    
1537    =over 4
1538    
1539    =item genomeID
1540    
1541    ID of the genome whose completeness status is desired.
1542    
1543    =item RETURN
1544    
1545    Returns TRUE if the genome is complete, FALSE if it is incomplete, and C<undef> if it is
1546    not found.
1547    
1548    =back
1549    
1550    =cut
1551    #: Return Type $;
1552    sub IsComplete {
1553        # Get the parameters.
1554        my ($self, $genomeID) = @_;
1555        # Declare the return variable.
1556        my $retVal;
1557        # Get the genome's data.
1558        my $genomeData = $self->GetEntity('Genome', $genomeID);
1559        if ($genomeData) {
1560            # The genome exists, so get the completeness flag.
1561            ($retVal) = $genomeData->Value('Genome(complete)');
1562        }
1563        # Return the result.
1564        return $retVal;
1565    }
1566    
1567  =head3 FeatureAliases  =head3 FeatureAliases
1568    
1569  C<< my @aliasList = $sprout->FeatureAliases($featureID); >>  C<< my @aliasList = $sprout->FeatureAliases($featureID); >>
# Line 1239  Line 1587 
1587  #: Return Type @;  #: Return Type @;
1588  sub FeatureAliases {  sub FeatureAliases {
1589          # Get the parameters.          # Get the parameters.
1590          my $self = shift @_;      my ($self, $featureID) = @_;
         my ($featureID) = @_;  
1591          # Get the desired feature's aliases          # Get the desired feature's aliases
1592          my @retVal = $self->GetEntityValues('Feature', $featureID, ['Feature(alias)']);          my @retVal = $self->GetEntityValues('Feature', $featureID, ['Feature(alias)']);
1593          # Return the result.          # Return the result.
# Line 1251  Line 1598 
1598    
1599  C<< my $genomeID = $sprout->GenomeOf($featureID); >>  C<< my $genomeID = $sprout->GenomeOf($featureID); >>
1600    
1601  Return the genome that contains a specified feature.  Return the genome that contains a specified feature or contig.
1602    
1603  =over 4  =over 4
1604    
1605  =item featureID  =item featureID
1606    
1607  ID of the feature whose genome is desired.  ID of the feature or contig whose genome is desired.
1608    
1609  =item RETURN  =item RETURN
1610    
1611  Returns the ID of the genome for the specified feature. If the feature is not found, returns  Returns the ID of the genome for the specified feature or contig. If the feature or contig is not
1612  an undefined value.  found, returns an undefined value.
1613    
1614  =back  =back
1615    
# Line 1270  Line 1617 
1617  #: Return Type $;  #: Return Type $;
1618  sub GenomeOf {  sub GenomeOf {
1619          # Get the parameters.          # Get the parameters.
1620          my $self = shift @_;      my ($self, $featureID) = @_;
1621          my ($featureID) = @_;      # Create a query to find the genome associated with the incoming ID.
1622          # Create a query to find the genome associated with the feature.      my $query = $self->Get(['IsLocatedIn', 'HasContig'], "IsLocatedIn(from-link) = ? OR HasContig(to-link) = ?",
1623          my $query = $self->Get(['IsLocatedIn', 'HasContig'], "IsLocatedIn(from-link) = ?", [$featureID]);                             [$featureID, $featureID]);
1624          # Declare the return value.          # Declare the return value.
1625          my $retVal;          my $retVal;
1626          # Get the genome ID.          # Get the genome ID.
# Line 1297  Line 1644 
1644    
1645  ID of the feature whose functionally-coupled brethren are desired.  ID of the feature whose functionally-coupled brethren are desired.
1646    
1647  =item RETURN  =item RETURN
1648    
1649    A hash mapping the functionally-coupled feature IDs to the coupling score.
1650    
1651    =back
1652    
1653    =cut
1654    #: Return Type %;
1655    sub CoupledFeatures {
1656        # Get the parameters.
1657        my ($self, $featureID) = @_;
1658        # Create a query to retrieve the functionally-coupled features.
1659        my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],
1660                               "ParticipatesInCoupling(from-link) = ?", [$featureID]);
1661        # This value will be set to TRUE if we find at least one coupled feature.
1662        my $found = 0;
1663        # Create the return hash.
1664        my %retVal = ();
1665        # Retrieve the relationship records and store them in the hash.
1666        while (my $clustering = $query->Fetch()) {
1667            # Get the ID and score of the coupling.
1668            my ($couplingID, $score) = $clustering->Values(['Coupling(id)',
1669                                                            'Coupling(score)']);
1670            # Get the other feature that participates in the coupling.
1671            my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],
1672                                               "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",
1673                                               [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');
1674            # Attach the other feature's score to its ID.
1675            $retVal{$otherFeatureID} = $score;
1676            $found = 1;
1677        }
1678        # Functional coupling is reflexive. If we found at least one coupled feature, we must add
1679        # the incoming feature as well.
1680        if ($found) {
1681            $retVal{$featureID} = 9999;
1682        }
1683        # Return the hash.
1684        return %retVal;
1685    }
1686    
1687    =head3 CouplingEvidence
1688    
1689    C<< my @evidence = $sprout->CouplingEvidence($peg1, $peg2); >>
1690    
1691    Return the evidence for a functional coupling.
1692    
1693    A pair of features is considered evidence of a coupling between two other
1694    features if they occur close together on a contig and both are similar to
1695    the coupled features. So, if B<A1> and B<A2> are close together on a contig,
1696    B<B1> and B<B2> are considered evidence for the coupling if (1) B<B1> and
1697    B<B2> are close together, (2) B<B1> is similar to B<A1>, and (3) B<B2> is
1698    similar to B<A2>.
1699    
1700    The score of a coupling is determined by the number of pieces of evidence
1701    that are considered I<representative>. If several evidence items belong to
1702    a group of genomes that are close to each other, only one of those items
1703    is considered representative. The other evidence items are presumed to be
1704    there because of the relationship between the genomes rather than because
1705    the two proteins generated by the features have a related functionality.
1706    
1707    Each evidence item is returned as a three-tuple in the form C<[>I<$peg1a>C<,>
1708    I<$peg2a>C<,> I<$rep>C<]>, where I<$peg1a> is similar to I<$peg1>, I<$peg2a>
1709    is similar to I<$peg2>, and I<$rep> is TRUE if the evidence is representative
1710    and FALSE otherwise.
1711    
1712    =over 4
1713    
1714    =item peg1
1715    
1716    ID of the feature of interest.
1717    
1718    =item peg2
1719    
1720    ID of a feature functionally coupled to the feature of interest.
1721    
1722    =item RETURN
1723    
1724    Returns a list of 3-tuples. Each tuple consists of a feature similar to the feature
1725    of interest, a feature similar to the functionally coupled feature, and a flag
1726    that is TRUE for a representative piece of evidence and FALSE otherwise.
1727    
1728    =back
1729    
1730    =cut
1731    #: Return Type @@;
1732    sub CouplingEvidence {
1733        # Get the parameters.
1734        my ($self, $peg1, $peg2) = @_;
1735        # Declare the return variable.
1736        my @retVal = ();
1737        # Our first task is to find out the nature of the coupling: whether or not
1738        # it exists, its score, and whether the features are stored in the same
1739        # order as the ones coming in.
1740        my ($couplingID, $inverted, $score) = $self->GetCoupling($peg1, $peg2);
1741        # Only proceed if a coupling exists.
1742        if ($couplingID) {
1743            # Determine the ordering to place on the evidence items. If we're
1744            # inverted, we want to see feature 2 before feature 1 (descending); otherwise,
1745            # we want feature 1 before feature 2 (normal).
1746            Trace("Coupling evidence for ($peg1, $peg2) with inversion flag $inverted.") if T(Coupling => 4);
1747            my $ordering = ($inverted ? "DESC" : "");
1748            # Get the coupling evidence.
1749            my @evidenceList = $self->GetAll(['IsEvidencedBy', 'PCH', 'UsesAsEvidence'],
1750                                              "IsEvidencedBy(from-link) = ? ORDER BY PCH(id), UsesAsEvidence(pos) $ordering",
1751                                              [$couplingID],
1752                                              ['PCH(used)', 'UsesAsEvidence(to-link)']);
1753            # Loop through the evidence items. Each piece of evidence is represented by two
1754            # positions in the evidence list, one for each feature on the other side of the
1755            # evidence link. If at some point we want to generalize to couplings with
1756            # more than two positions, this section of code will need to be re-done.
1757            while (@evidenceList > 0) {
1758                my $peg1Data = shift @evidenceList;
1759                my $peg2Data = shift @evidenceList;
1760                Trace("Peg 1 is " . $peg1Data->[1] . " and Peg 2 is " . $peg2Data->[1] . ".") if T(Coupling => 4);
1761                push @retVal, [$peg1Data->[1], $peg2Data->[1], $peg1Data->[0]];
1762            }
1763            Trace("Last index in evidence result is is $#retVal.") if T(Coupling => 4);
1764        }
1765        # Return the result.
1766        return @retVal;
1767    }
1768    
1769    =head3 GetCoupling
1770    
1771    C<< my ($couplingID, $inverted, $score) = $sprout->GetCoupling($peg1, $peg2); >>
1772    
1773    Return the coupling (if any) for the specified pair of PEGs. If a coupling
1774    exists, we return the coupling ID along with an indicator of whether the
1775    coupling is stored as C<(>I<$peg1>C<, >I<$peg2>C<)> or C<(>I<$peg2>C<, >I<$peg1>C<)>.
1776    In the second case, we say the coupling is I<inverted>. The importance of an
1777    inverted coupling is that the PEGs in the evidence will appear in reverse order.
1778    
1779    =over 4
1780    
1781    =item peg1
1782    
1783    ID of the feature of interest.
1784    
1785    =item peg2
1786    
1787    ID of the potentially coupled feature.
1788    
1789    =item RETURN
1790    
1791    Returns a three-element list. The first element contains the database ID of
1792    the coupling. The second element is FALSE if the coupling is stored in the
1793    database in the caller specified order and TRUE if it is stored in the
1794    inverted order. The third element is the coupling's score. If the coupling
1795    does not exist, all three list elements will be C<undef>.
1796    
1797    =back
1798    
1799    =cut
1800    #: Return Type $%@;
1801    sub GetCoupling {
1802        # Get the parameters.
1803        my ($self, $peg1, $peg2) = @_;
1804        # Declare the return values. We'll start with the coupling ID and undefine the
1805        # flag and score until we have more information.
1806        my ($retVal, $inverted, $score) = (CouplingID($peg1, $peg2), undef, undef);
1807        # Find the coupling data.
1808        my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],
1809                                     "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",
1810                                     [$retVal], ["ParticipatesInCoupling(from-link)", "Coupling(score)"]);
1811        # Check to see if we found anything.
1812        if (!@pegs) {
1813            Trace("No coupling found.") if T(Coupling => 4);
1814            # No coupling, so undefine the return value.
1815            $retVal = undef;
1816        } else {
1817            # We have a coupling! Get the score and check for inversion.
1818            $score = $pegs[0]->[1];
1819            my $firstFound = $pegs[0]->[0];
1820            $inverted = ($firstFound ne $peg1);
1821            Trace("Coupling score is $score. First peg is $firstFound, peg 1 is $peg1.") if T(Coupling => 4);
1822        }
1823        # Return the result.
1824        return ($retVal, $inverted, $score);
1825    }
1826    
1827    =head3 CouplingID
1828    
1829    C<< my $couplingID = Sprout::CouplingID($peg1, $peg2); >>
1830    
1831    Return the coupling ID for a pair of feature IDs.
1832    
1833    The coupling ID is currently computed by joining the feature IDs in
1834    sorted order with a space. Client modules (that is, modules which
1835    use Sprout) should not, however, count on this always being the
1836    case. This method provides a way for abstracting the concept of a
1837    coupling ID. All that we know for sure about it is that it can be
1838    generated easily from the feature IDs and the order of the IDs
1839    in the parameter list does not matter (i.e. C<CouplingID("a1", "b1")>
1840    will have the same value as C<CouplingID("b1", "a1")>.
1841    
1842    =over 4
1843    
1844    =item peg1
1845    
1846  A hash mapping the functionally-coupled feature IDs to the coupling score.  First feature of interest.
1847    
1848  =back  =item peg2
1849    
1850  =cut  Second feature of interest.
 #: Return Type %;  
 sub CoupledFeatures {  
         # Get the parameters.  
         my $self = shift @_;  
         my ($featureID) = @_;  
         # Create a query to retrieve the functionally-coupled features. Note that we depend on the  
         # fact that the functional coupling is physically paired. If (A,B) is in the database, then  
         # (B,A) will also be found.  
         my $query = $self->Get(['IsClusteredOnChromosomeWith'],  
                                                    "IsClusteredOnChromosomeWith(from-link) = ?", [$featureID]);  
         # This value will be set to TRUE if we find at least one coupled feature.  
         my $found = 0;  
         # Create the return hash.  
         my %retVal = ();  
         # Retrieve the relationship records and store them in the hash.  
         while (my $clustering = $query->Fetch()) {  
                 my ($otherFeatureID, $score) = $clustering->Values(['IsClusteredOnChromosomeWith(to-link)',  
                                                                     'IsClusteredOnChromosomeWith(score)']);  
                 $retVal{$otherFeatureID} = $score;  
                 $found = 1;  
         }  
         # Functional coupling is reflexive. If we found at least one coupled feature, we must add  
         # the incoming feature as well.  
         if ($found) {  
                 $retVal{$featureID} = 9999;  
     }  
         # Return the hash.  
         return %retVal;  
 }  
1851    
1852  =head3 GetEntityTypes  =item RETURN
1853    
1854  C<< my @entityList = $sprout->GetEntityTypes(); >>  Returns the ID that would be used to represent a functional coupling of
1855    the two specified PEGs.
1856    
1857  Return the list of supported entity types.  =back
1858    
1859  =cut  =cut
1860  #: Return Type @;  #: Return Type $;
1861  sub GetEntityTypes {  sub CouplingID {
1862          # Get the parameters.      return join " ", sort @_;
         my $self = shift @_;  
         # Get the underlying database object.  
         my $erdb = $self->{_erdb};  
         # Get its entity type list.  
         my @retVal = $erdb->GetEntityTypes();  
1863  }  }
1864    
1865  =head3 ReadFasta  =head3 ReadFasta
# Line 1395  Line 1906 
1906                  if ($line =~ m/^>\s*(.+?)(\s|\n)/) {                  if ($line =~ m/^>\s*(.+?)(\s|\n)/) {
1907                          # Here we have a new header. Store the current sequence if we have one.                          # Here we have a new header. Store the current sequence if we have one.
1908                          if ($id) {                          if ($id) {
1909                                  $retVal{$id} = $sequence;                  $retVal{$id} = lc $sequence;
1910                          }                          }
1911                          # Clear the sequence accumulator and save the new ID.                          # Clear the sequence accumulator and save the new ID.
1912                          ($id, $sequence) = ("$prefix$1", "");                          ($id, $sequence) = ("$prefix$1", "");
1913                  } else {                  } else {
1914                          # Here we have a data line, so we add it to the sequence accumulator.                          # Here we have a data line, so we add it to the sequence accumulator.
1915                          # First, we get the actual data out.              # First, we get the actual data out. Note that we normalize to lower
1916                # case.
1917                          $line =~ /^\s*(.*?)(\s|\n)/;                          $line =~ /^\s*(.*?)(\s|\n)/;
1918                          $sequence .= $1;                          $sequence .= $1;
1919                  }                  }
1920          }          }
1921          # Flush out the last sequence (if any).          # Flush out the last sequence (if any).
1922          if ($sequence) {          if ($sequence) {
1923                  $retVal {$id} = $sequence;          $retVal{$id} = lc $sequence;
1924          }          }
1925        # Close the file.
1926        close FASTAFILE;
1927          # Return the hash constructed from the file.          # Return the hash constructed from the file.
1928          return %retVal;          return %retVal;
1929  }  }
# Line 1420  Line 1934 
1934    
1935  Insure that a list of feature locations is in the Sprout format. The Sprout feature location  Insure that a list of feature locations is in the Sprout format. The Sprout feature location
1936  format is I<contig>_I<beg*len> where I<*> is C<+> for a forward gene and C<-> for a backward  format is I<contig>_I<beg*len> where I<*> is C<+> for a forward gene and C<-> for a backward
1937  gene. The old format is I<contig>_I<beg>_I<end>.  gene. The old format is I<contig>_I<beg>_I<end>. If a feature is in the new format already,
1938    it will not be changed; otherwise, it will be converted. This method can also be used to
1939    perform the reverse task-- insuring that all the locations are in the old format.
1940    
1941  =over 4  =over 4
1942    
# Line 1447  Line 1963 
1963  #: Return Type @;  #: Return Type @;
1964  sub FormatLocations {  sub FormatLocations {
1965          # Get the parameters.          # Get the parameters.
1966          my $self = shift @_;      my ($self, $prefix, $locations, $oldFormat) = @_;
         my ($prefix, $locations, $oldFormat) = @_;  
1967          # Create the return list.          # Create the return list.
1968          my @retVal = ();          my @retVal = ();
1969          # Check to see if any locations were passed in.          # Check to see if any locations were passed in.
1970          if ($locations eq '') {          if ($locations eq '') {
1971              confess "No locations specified.";          Confess("No locations specified.");
1972          } else {          } else {
1973                  # Loop through the locations, converting them to the new format.                  # Loop through the locations, converting them to the new format.
1974                  for my $location (@{$locations}) {                  for my $location (@{$locations}) {
# Line 1488  Line 2003 
2003    
2004  sub DumpData {  sub DumpData {
2005          # Get the parameters.          # Get the parameters.
2006          my $self = shift @_;      my ($self) = @_;
2007          # Get the data directory name.          # Get the data directory name.
2008          my $outputDirectory = $self->{_options}->{dataDir};          my $outputDirectory = $self->{_options}->{dataDir};
2009          # Dump the relations.          # Dump the relations.
2010          $self->{_erdb}->DumpRelations($outputDirectory);      $self->DumpRelations($outputDirectory);
2011  }  }
2012    
2013  =head3 XMLFileName  =head3 XMLFileName
# Line 1504  Line 2019 
2019  =cut  =cut
2020  #: Return Type $;  #: Return Type $;
2021  sub XMLFileName {  sub XMLFileName {
2022          my $self = shift @_;      my ($self) = @_;
2023          return $self->{_xmlName};          return $self->{_xmlName};
2024  }  }
2025    
# Line 1524  Line 2039 
2039  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and
2040  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.
2041    
2042  C<< $sprout->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>  C<< $sprout->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence => 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>
2043    
2044  =over 4  =over 4
2045    
# Line 1542  Line 2057 
2057  #: Return Type ;  #: Return Type ;
2058  sub Insert {  sub Insert {
2059          # Get the parameters.          # Get the parameters.
2060          my $self = shift @_;      my ($self, $objectType, $fieldHash) = @_;
         my ($objectType, $fieldHash) = @_;  
2061          # Call the underlying method.          # Call the underlying method.
2062          $self->{_erdb}->InsertObject($objectType, $fieldHash);      $self->InsertObject($objectType, $fieldHash);
2063  }  }
2064    
2065  =head3 Annotate  =head3 Annotate
# Line 1584  Line 2098 
2098  #: Return Type $;  #: Return Type $;
2099  sub Annotate {  sub Annotate {
2100          # Get the parameters.          # Get the parameters.
2101          my $self = shift @_;      my ($self, $fid, $timestamp, $user, $text) = @_;
         my ($fid, $timestamp, $user, $text) = @_;  
2102          # Create the annotation ID.          # Create the annotation ID.
2103          my $aid = "$fid:$timestamp";          my $aid = "$fid:$timestamp";
2104          # Insert the Annotation object.          # Insert the Annotation object.
# Line 1605  Line 2118 
2118    
2119  =head3 AssignFunction  =head3 AssignFunction
2120    
2121  C<< my $ok = $sprout->AssignFunction($featureID, $user, $function); >>  C<< my $ok = $sprout->AssignFunction($featureID, $user, $function, $assigningUser); >>
2122    
2123  This method assigns a function to a feature. Functions are a special type of annotation. The general  This method assigns a function to a feature. Functions are a special type of annotation. The general
2124  format is "XXXX\nset XXXX function to\nYYYYY" where XXXX is the feature type and YYYY is the functional  format is described in L</ParseAssignment>.
 assignment text.  
2125    
2126  =over 4  =over 4
2127    
# Line 1619  Line 2131 
2131    
2132  =item user  =item user
2133    
2134  Name of the user making the assignment. This is frequently a group name, like C<kegg> or C<fig>.  Name of the user group making the assignment, such as C<kegg> or C<fig>.
2135    
2136  =item function  =item function
2137    
2138  Text of the function being assigned.  Text of the function being assigned.
2139    
2140    =item assigningUser (optional)
2141    
2142    Name of the individual user making the assignment. If omitted, defaults to the user group.
2143    
2144  =item RETURN  =item RETURN
2145    
2146  Returns 1 if successful, 0 if an error occurred.  Returns 1 if successful, 0 if an error occurred.
# Line 1635  Line 2151 
2151  #: Return Type $;  #: Return Type $;
2152  sub AssignFunction {  sub AssignFunction {
2153          # Get the parameters.          # Get the parameters.
2154          my $self = shift @_;      my ($self, $featureID, $user, $function, $assigningUser) = @_;
2155          my ($featureID, $user, $function) = @_;      # Default the assigning user.
2156        if (! $assigningUser) {
2157            $assigningUser = $user;
2158        }
2159          # Create an annotation string from the parameters.          # Create an annotation string from the parameters.
2160          my $annotationText = "$user\nset $user function to\n$function";      my $annotationText = "$assigningUser\nset $user function to\n$function";
2161          # Get the current time.          # Get the current time.
2162          my $now = time;          my $now = time;
2163          # Declare the return variable.          # Declare the return variable.
# Line 1683  Line 2202 
2202  #: Return Type @;  #: Return Type @;
2203  sub FeaturesByAlias {  sub FeaturesByAlias {
2204          # Get the parameters.          # Get the parameters.
2205          my $self = shift @_;      my ($self, $alias) = @_;
         my ($alias) = @_;  
2206          # Declare the return variable.          # Declare the return variable.
2207          my @retVal = ();          my @retVal = ();
2208          # Parse the alias.          # Parse the alias.
# Line 1726  Line 2244 
2244  #: Return Type $;  #: Return Type $;
2245  sub Exists {  sub Exists {
2246          # Get the parameters.          # Get the parameters.
2247          my $self = shift @_;      my ($self, $entityName, $entityID) = @_;
         my ($entityName, $entityID) = @_;  
2248          # Check for the entity instance.          # Check for the entity instance.
2249        Trace("Checking existence of $entityName with ID=$entityID.") if T(4);
2250          my $testInstance = $self->GetEntity($entityName, $entityID);          my $testInstance = $self->GetEntity($entityName, $entityID);
2251          # Return an existence indicator.          # Return an existence indicator.
2252          my $retVal = ($testInstance ? 1 : 0);          my $retVal = ($testInstance ? 1 : 0);
# Line 1757  Line 2275 
2275  #: Return Type $;  #: Return Type $;
2276  sub FeatureTranslation {  sub FeatureTranslation {
2277          # Get the parameters.          # Get the parameters.
2278          my $self = shift @_;      my ($self, $featureID) = @_;
         my ($featureID) = @_;  
2279          # Get the specified feature's translation.          # Get the specified feature's translation.
2280          my ($retVal) = $self->GetEntityValues("Feature", $featureID, ['Feature(translation)']);          my ($retVal) = $self->GetEntityValues("Feature", $featureID, ['Feature(translation)']);
2281          return $retVal;          return $retVal;
# Line 1790  Line 2307 
2307  #: Return Type @;  #: Return Type @;
2308  sub Taxonomy {  sub Taxonomy {
2309          # Get the parameters.          # Get the parameters.
2310          my $self = shift @_;      my ($self, $genome) = @_;
         my ($genome) = @_;  
2311          # Find the specified genome's taxonomy string.          # Find the specified genome's taxonomy string.
2312          my ($list) = $self->GetEntityValues('Genome', $genome, ['Genome(taxonomy)']);          my ($list) = $self->GetEntityValues('Genome', $genome, ['Genome(taxonomy)']);
2313          # Declare the return variable.          # Declare the return variable.
# Line 1834  Line 2350 
2350  #: Return Type $;  #: Return Type $;
2351  sub CrudeDistance {  sub CrudeDistance {
2352          # Get the parameters.          # Get the parameters.
2353          my $self = shift @_;      my ($self, $genome1, $genome2) = @_;
         my ($genome1, $genome2) = @_;  
2354          # Insure that the distance is commutative by sorting the genome IDs.          # Insure that the distance is commutative by sorting the genome IDs.
2355          my ($genomeA, $genomeB);          my ($genomeA, $genomeB);
2356          if ($genome2 < $genome2) {          if ($genome2 < $genome2) {
# Line 1882  Line 2397 
2397  #: Return Type $;  #: Return Type $;
2398  sub RoleName {  sub RoleName {
2399          # Get the parameters.          # Get the parameters.
2400          my $self = shift @_;      my ($self, $roleID) = @_;
         my ($roleID) = @_;  
2401          # Get the specified role's name.          # Get the specified role's name.
2402          my ($retVal) = $self->GetEntityValues('Role', $roleID, ['Role(name)']);          my ($retVal) = $self->GetEntityValues('Role', $roleID, ['Role(name)']);
2403          # Use the ID if the role has no name.          # Use the ID if the role has no name.
# Line 1916  Line 2430 
2430  #: Return Type @;  #: Return Type @;
2431  sub RoleDiagrams {  sub RoleDiagrams {
2432          # Get the parameters.          # Get the parameters.
2433          my $self = shift @_;      my ($self, $roleID) = @_;
         my ($roleID) = @_;  
2434          # Query for the diagrams.          # Query for the diagrams.
2435          my @retVal = $self->GetFlat(['RoleOccursIn'], "RoleOccursIn(from-link) = ?", [$roleID],          my @retVal = $self->GetFlat(['RoleOccursIn'], "RoleOccursIn(from-link) = ?", [$roleID],
2436                                                                  'RoleOccursIn(to-link)');                                                                  'RoleOccursIn(to-link)');
# Line 1925  Line 2438 
2438          return @retVal;          return @retVal;
2439  }  }
2440    
2441    =head3 GetProperties
2442    
2443    C<< my @list = $sprout->GetProperties($fid, $key, $value, $url); >>
2444    
2445    Return a list of the properties with the specified characteristics.
2446    
2447    Properties are arbitrary key-value pairs associated with a feature. (At some point they
2448    will also be associated with genomes.) A property value is represented by a 4-tuple of
2449    the form B<($fid, $key, $value, $url)>. These exactly correspond to the parameter
2450    
2451    =over 4
2452    
2453    =item fid
2454    
2455    ID of the feature possessing the property.
2456    
2457    =item key
2458    
2459    Name or key of the property.
2460    
2461    =item value
2462    
2463    Value of the property.
2464    
2465    =item url
2466    
2467    URL of the document that indicated the property should have this particular value, or an
2468    empty string if no such document exists.
2469    
2470    =back
2471    
2472    The parameters act as a filter for the desired data. Any non-null parameter will
2473    automatically match all the tuples returned. So, specifying just the I<$fid> will
2474    return all the properties of the specified feature; similarly, specifying the I<$key>
2475    and I<$value> parameters will return all the features having the specified property
2476    value.
2477    
2478    A single property key can have many values, representing different ideas about the
2479    feature in question. For example, one paper may declare that a feature C<fig|83333.1.peg.10> is
2480    virulent, and another may declare that it is not virulent. A query about the virulence of
2481    C<fig|83333.1.peg.10> would be coded as
2482    
2483        my @list = $sprout->GetProperties('fig|83333.1.peg.10', 'virulence', '', '');
2484    
2485    Here the I<$value> and I<$url> fields are left blank, indicating that those fields are
2486    not to be filtered. The tuples returned would be
2487    
2488        ('fig|83333.1.peg.10', 'virulence', 'yes', 'http://www.somewhere.edu/first.paper.pdf')
2489        ('fig|83333.1.peg.10', 'virulence', 'no', 'http://www.somewhere.edu/second.paper.pdf')
2490    
2491    =cut
2492    #: Return Type @@;
2493    sub GetProperties {
2494        # Get the parameters.
2495        my ($self, @parms) = @_;
2496        # Declare the return variable.
2497        my @retVal = ();
2498        # Now we need to create a WHERE clause that will get us the data we want. First,
2499        # we create a list of the columns containing the data for each parameter.
2500        my @colNames = ('HasProperty(from-link)', 'Property(property-name)',
2501                        'Property(property-value)', 'HasProperty(evidence)');
2502        # Now we build the WHERE clause and the list of parameter values.
2503        my @where = ();
2504        my @values = ();
2505        for (my $i = 0; $i <= $#colNames; $i++) {
2506            my $parm = $parms[$i];
2507            if (defined $parm && ($parm ne '')) {
2508                push @where, "$colNames[$i] = ?";
2509                push @values, $parm;
2510            }
2511        }
2512        # Format the WHERE clause.
2513        my $filter = (@values > 0 ? (join " AND ", @where) : undef);
2514        # Ask for all the propertie values with the desired characteristics.
2515        my $query = $self->Get(['HasProperty', 'Property'], $filter, \@values);
2516        while (my $valueObject = $query->Fetch()) {
2517            my @tuple = $valueObject->Values(\@colNames);
2518            push @retVal, \@tuple;
2519        }
2520        # Return the result.
2521        return @retVal;
2522    }
2523    
2524  =head3 FeatureProperties  =head3 FeatureProperties
2525    
2526  C<< my @properties = $sprout->FeatureProperties($featureID); >>  C<< my @properties = $sprout->FeatureProperties($featureID); >>
# Line 1954  Line 2550 
2550  #: Return Type @@;  #: Return Type @@;
2551  sub FeatureProperties {  sub FeatureProperties {
2552          # Get the parameters.          # Get the parameters.
2553          my $self = shift @_;      my ($self, $featureID) = @_;
         my ($featureID) = @_;  
2554          # Get the properties.          # Get the properties.
2555          my @retVal = $self->GetAll(['HasProperty', 'Property'], "HasProperty(from-link) = ?", [$featureID],          my @retVal = $self->GetAll(['HasProperty', 'Property'], "HasProperty(from-link) = ?", [$featureID],
2556                                                          ['Property(property-name)', 'Property(property-value)',                                                          ['Property(property-name)', 'Property(property-value)',
# Line 1986  Line 2581 
2581  #: Return Type $;  #: Return Type $;
2582  sub DiagramName {  sub DiagramName {
2583          # Get the parameters.          # Get the parameters.
2584          my $self = shift @_;      my ($self, $diagramID) = @_;
         my ($diagramID) = @_;  
2585          # Get the specified diagram's name and return it.          # Get the specified diagram's name and return it.
2586          my ($retVal) = $self->GetEntityValues('Diagram', $diagramID, ['Diagram(name)']);          my ($retVal) = $self->GetEntityValues('Diagram', $diagramID, ['Diagram(name)']);
2587          return $retVal;          return $retVal;
# Line 2019  Line 2613 
2613  #: Return Type @;  #: Return Type @;
2614  sub MergedAnnotations {  sub MergedAnnotations {
2615          # Get the parameters.          # Get the parameters.
2616          my $self = shift @_;      my ($self, $list) = @_;
         my ($list) = @_;  
2617          # Create a list to hold the annotation tuples found.          # Create a list to hold the annotation tuples found.
2618          my @tuples = ();          my @tuples = ();
2619          # Loop through the features in the input list.          # Loop through the features in the input list.
# Line 2068  Line 2661 
2661  #: Return Type @;  #: Return Type @;
2662  sub RoleNeighbors {  sub RoleNeighbors {
2663          # Get the parameters.          # Get the parameters.
2664          my $self = shift @_;      my ($self, $roleID) = @_;
         my ($roleID) = @_;  
2665          # Get all the diagrams containing this role.          # Get all the diagrams containing this role.
2666          my @diagrams = $self->GetFlat(['RoleOccursIn'], "RoleOccursIn(from-link) = ?", [$roleID],          my @diagrams = $self->GetFlat(['RoleOccursIn'], "RoleOccursIn(from-link) = ?", [$roleID],
2667                                                                    'RoleOccursIn(to-link)');                                                                    'RoleOccursIn(to-link)');
# Line 2111  Line 2703 
2703  #: Return Type @;  #: Return Type @;
2704  sub FeatureLinks {  sub FeatureLinks {
2705          # Get the parameters.          # Get the parameters.
2706          my $self = shift @_;      my ($self, $featureID) = @_;
         my ($featureID) = @_;  
2707          # Get the feature's links.          # Get the feature's links.
2708          my @retVal = $self->GetEntityValues('Feature', $featureID, ['Feature(link)']);          my @retVal = $self->GetEntityValues('Feature', $featureID, ['Feature(link)']);
2709          # Return the feature's links.          # Return the feature's links.
# Line 2124  Line 2715 
2715  C<< my %subsystems = $sprout->SubsystemsOf($featureID); >>  C<< my %subsystems = $sprout->SubsystemsOf($featureID); >>
2716    
2717  Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped  Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped
2718  to the role the feature performs.  to the roles the feature performs.
2719    
2720  =over 4  =over 4
2721    
# Line 2134  Line 2725 
2725    
2726  =item RETURN  =item RETURN
2727    
2728  Returns a hash mapping all the feature's subsystems to the feature's role.  Returns a hash mapping all the feature's subsystems to a list of the feature's roles.
2729    
2730  =back  =back
2731    
2732  =cut  =cut
2733  #: Return Type %;  #: Return Type %@;
2734  sub SubsystemsOf {  sub SubsystemsOf {
2735          # Get the parameters.          # Get the parameters.
2736          my $self = shift @_;      my ($self, $featureID) = @_;
2737          my ($featureID) = @_;      # Get the subsystem list.
         # Use the SSCell to connect features to subsystems.  
2738          my @subsystems = $self->GetAll(['ContainsFeature', 'HasSSCell', 'IsRoleOf'],          my @subsystems = $self->GetAll(['ContainsFeature', 'HasSSCell', 'IsRoleOf'],
2739                                                                          "ContainsFeature(to-link) = ?", [$featureID],                                                                          "ContainsFeature(to-link) = ?", [$featureID],
2740                                                                          ['HasSSCell(from-link)', 'IsRoleOf(from-link)']);                                                                          ['HasSSCell(from-link)', 'IsRoleOf(from-link)']);
2741          # Create the return value.          # Create the return value.
2742          my %retVal = ();          my %retVal = ();
2743        # Build a hash to weed out duplicates. Sometimes the same PEG and role appears
2744        # in two spreadsheet cells.
2745        my %dupHash = ();
2746          # Loop through the results, adding them to the hash.          # Loop through the results, adding them to the hash.
2747          for my $record (@subsystems) {          for my $record (@subsystems) {
2748                  $retVal{$record->[0]} = $record->[1];          # Get this subsystem and role.
2749            my ($subsys, $role) = @{$record};
2750            # Insure it's the first time for both.
2751            my $dupKey = "$subsys\n$role";
2752            if (! exists $dupHash{"$subsys\n$role"}) {
2753                $dupHash{$dupKey} = 1;
2754                push @{$retVal{$subsys}}, $role;
2755            }
2756          }          }
2757          # Return the hash.          # Return the hash.
2758          return %retVal;          return %retVal;
2759  }  }
2760    
2761    =head3 SubsystemList
2762    
2763    C<< my @subsystems = $sprout->SubsystemList($featureID); >>
2764    
2765    Return a list containing the names of the subsystems in which the specified
2766    feature participates. Unlike L</SubsystemsOf>, this method only returns the
2767    subsystem names, not the roles.
2768    
2769    =over 4
2770    
2771    =item featureID
2772    
2773    ID of the feature whose subsystem names are desired.
2774    
2775    =item RETURN
2776    
2777    Returns a list of the names of the subsystems in which the feature participates.
2778    
2779    =back
2780    
2781    =cut
2782    #: Return Type @;
2783    sub SubsystemList {
2784        # Get the parameters.
2785        my ($self, $featureID) = @_;
2786        # Get the list of names.
2787        my @retVal = $self->GetFlat(['ContainsFeature', 'HasSSCell'], "ContainsFeature(to-link) = ?",
2788                                    [$featureID], 'HasSSCell(from-link)');
2789        # Return the result.
2790        return @retVal;
2791    }
2792    
2793    
2794    
2795  =head3 RelatedFeatures  =head3 RelatedFeatures
2796    
2797  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>
# Line 2191  Line 2825 
2825  #: Return Type @;  #: Return Type @;
2826  sub RelatedFeatures {  sub RelatedFeatures {
2827          # Get the parameters.          # Get the parameters.
2828          my $self = shift @_;      my ($self, $featureID, $function, $userID) = @_;
         my ($featureID, $function, $userID) = @_;  
2829          # Get a list of the features that are BBHs of the incoming feature.          # Get a list of the features that are BBHs of the incoming feature.
2830          my @bbhFeatures = $self->GetFlat(['IsBidirectionalBestHitOf'],          my @bbhFeatures = $self->GetFlat(['IsBidirectionalBestHitOf'],
2831                                                                           "IsBidirectionalBestHitOf(from-link) = ?", [$featureID],                                                                           "IsBidirectionalBestHitOf(from-link) = ?", [$featureID],
# Line 2240  Line 2873 
2873  #: Return Type @;  #: Return Type @;
2874  sub TaxonomySort {  sub TaxonomySort {
2875          # Get the parameters.          # Get the parameters.
2876          my $self = shift @_;      my ($self, $featureIDs) = @_;
         my ($featureIDs) = @_;  
2877          # Create the working hash table.          # Create the working hash table.
2878          my %hashBuffer = ();          my %hashBuffer = ();
2879          # Loop through the features.          # Loop through the features.
# Line 2250  Line 2882 
2882                  my ($taxonomy) = $self->GetFlat(['IsLocatedIn', 'HasContig', 'Genome'], "IsLocatedIn(from-link) = ?",                  my ($taxonomy) = $self->GetFlat(['IsLocatedIn', 'HasContig', 'Genome'], "IsLocatedIn(from-link) = ?",
2883                                                                                  [$fid], 'Genome(taxonomy)');                                                                                  [$fid], 'Genome(taxonomy)');
2884                  # Add this feature to the hash buffer.                  # Add this feature to the hash buffer.
2885                  if (exists $hashBuffer{$taxonomy}) {          Tracer::AddToListMap(\%hashBuffer, $taxonomy, $fid);
                         push @{$hashBuffer{$taxonomy}}, $fid;  
                 } else {  
                         $hashBuffer{$taxonomy} = [$fid];  
                 }  
2886          }          }
2887          # Sort the keys and get the elements.          # Sort the keys and get the elements.
2888          my @retVal = ();          my @retVal = ();
# Line 2265  Line 2893 
2893          return @retVal;          return @retVal;
2894  }  }
2895    
 =head3 GetAll  
   
 C<< my @list = $sprout->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >>  
   
 Return a list of values taken from the objects returned by a query. The first three  
 parameters correspond to the parameters of the L</Get> method. The final parameter is  
 a list of the fields desired from each record found by the query. The field name  
 syntax is the standard syntax used for fields in the B<ERDB> system--  
 B<I<objectName>(I<fieldName>)>-- where I<objectName> is the name of the relevant entity  
 or relationship and I<fieldName> is the name of the field.  
   
 The list returned will be a list of lists. Each element of the list will contain  
 the values returned for the fields specified in the fourth parameter. If one of the  
 fields specified returns multiple values, they are flattened in with the rest. For  
 example, the following call will return a list of the features in a particular  
 spreadsheet cell, and each feature will be represented by a list containing the  
 feature ID followed by all of its aliases.  
   
 C<< $query = $sprout->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >>  
   
 =over 4  
   
 =item objectNames  
   
 List containing the names of the entity and relationship objects to be retrieved.  
   
 =item filterClause  
   
 WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can  
 be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form  
 B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the  
 parameter list as additional parameters. The fields in a filter clause can come from primary  
 entity relations, relationship relations, or secondary entity relations; however, all of the  
 entities and relationships involved must be included in the list of object names.  
   
 =item parameterList  
   
 List of the parameters to be substituted in for the parameters marks in the filter clause.  
   
 =item fields  
   
 List of the fields to be returned in each element of the list returned.  
   
 =item count  
   
 Maximum number of records to return. If omitted or 0, all available records will be returned.  
   
 =item RETURN  
   
 Returns a list of list references. Each element of the return list contains the values for the  
 fields specified in the B<fields> parameter.  
   
 =back  
   
 =cut  
 #: Return Type @@;  
 sub GetAll {  
         # Get the parameters.  
         my $self = shift @_;  
         my ($objectNames, $filterClause, $parameterList, $fields, $count) = @_;  
         # Create the query.  
         my $query = $self->Get($objectNames, $filterClause, $parameterList);  
         # Set up a counter of the number of records read.  
         my $fetched = 0;  
         # Insure the counter has a value.  
         if (!defined $count) {  
                 $count = 0;  
         }  
         # Loop through the records returned, extracting the fields. Note that if the  
         # counter is non-zero, we stop when the number of records read hits the count.  
         my @retVal = ();  
         while (($count == 0 || $fetched < $count) && (my $row = $query->Fetch())) {  
                 my @rowData = $row->Values($fields);  
                 push @retVal, \@rowData;  
                 $fetched++;  
         }  
         # Return the resulting list.  
         return @retVal;  
 }  
   
 =head3 GetFlat  
   
 C<< my @list = $sprout->GetFlat(\@objectNames, $filterClause, $parameterList, $field); >>  
   
 This is a variation of L</GetAll> that asks for only a single field per record and  
 returns a single flattened list.  
   
 =over 4  
   
 =item objectNames  
   
 List containing the names of the entity and relationship objects to be retrieved.  
   
 =item filterClause  
   
 WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can  
 be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form  
 B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the  
 parameter list as additional parameters. The fields in a filter clause can come from primary  
 entity relations, relationship relations, or secondary entity relations; however, all of the  
 entities and relationships involved must be included in the list of object names.  
   
 =item parameterList  
   
 List of the parameters to be substituted in for the parameters marks in the filter clause.  
   
 =item field  
   
 Name of the field to be used to get the elements of the list returned.  
   
 =item RETURN  
   
 Returns a list of values.  
   
 =back  
   
 =cut  
 #: Return Type @;  
 sub GetFlat {  
         # Get the parameters.  
         my $self = shift @_;  
         my ($objectNames, $filterClause, $parameterList, $field) = @_;  
         # Construct the query.  
         my $query = $self->Get($objectNames, $filterClause, $parameterList);  
         # Create the result list.  
         my @retVal = ();  
         # Loop through the records, adding the field values found to the result list.  
         while (my $row = $query->Fetch()) {  
                 push @retVal, $row->Value($field);  
         }  
         # Return the list created.  
         return @retVal;  
 }  
   
2896  =head3 Protein  =head3 Protein
2897    
2898  C<< my $protein = Sprout::Protein($sequence, $table); >>  C<< my $protein = Sprout::Protein($sequence, $table); >>
# Line 2496  Line 2990 
2990  #: Return Type @;  #: Return Type @;
2991  sub LoadInfo {  sub LoadInfo {
2992          # Get the parameters.          # Get the parameters.
2993          my $self = shift @_;      my ($self) = @_;
2994          # Create the return list, priming it with the name of the data directory.          # Create the return list, priming it with the name of the data directory.
2995          my @retVal = ($self->{_options}->{dataDir});          my @retVal = ($self->{_options}->{dataDir});
2996          # Concatenate the table names.          # Concatenate the table names.
2997          push @retVal, $self->{_erdb}->GetTableNames();      push @retVal, $self->GetTableNames();
2998          # Return the result.          # Return the result.
2999          return @retVal;          return @retVal;
3000  }  }
3001    
3002  =head3 LowBBHs  =head3 LowBBHs
3003    
3004  C<< my %bbhMap = $sprout->GoodBBHs($featureID, $cutoff); >>  C<< my %bbhMap = $sprout->LowBBHs($featureID, $cutoff); >>
3005    
3006  Return the bidirectional best hits of a feature whose score is no greater than a  Return the bidirectional best hits of a feature whose score is no greater than a
3007  specified cutoff value. A higher cutoff value will allow inclusion of hits with  specified cutoff value. A higher cutoff value will allow inclusion of hits with
# Line 2533  Line 3027 
3027  #: Return Type %;  #: Return Type %;
3028  sub LowBBHs {  sub LowBBHs {
3029          # Get the parsameters.          # Get the parsameters.
3030          my $self = shift @_;      my ($self, $featureID, $cutoff) = @_;
         my ($featureID, $cutoff) = @_;  
3031          # Create the return hash.          # Create the return hash.
3032          my %retVal = ();          my %retVal = ();
3033          # Create a query to get the desired BBHs.          # Create a query to get the desired BBHs.
# Line 2550  Line 3043 
3043          return %retVal;          return %retVal;
3044  }  }
3045    
3046    =head3 GetGroups
3047    
3048    C<< my %groups = $sprout->GetGroups(\@groupList); >>
3049    
3050    Return a hash mapping each group to the IDs of the genomes in the group.
3051    A list of groups may be specified, in which case only those groups will be
3052    shown. Alternatively, if no parameter is supplied, all groups will be
3053    included. Genomes that are not in any group are omitted.
3054    
3055    =cut
3056    #: Return Type %@;
3057    sub GetGroups {
3058        # Get the parameters.
3059        my ($self, $groupList) = @_;
3060        # Declare the return value.
3061        my %retVal = ();
3062        # Determine whether we are getting all the groups or just some.
3063        if (defined $groupList) {
3064            # Here we have a group list. Loop through them individually,
3065            # getting a list of the relevant genomes.
3066            for my $group (@{$groupList}) {
3067                my @genomeIDs = $self->GetFlat(['Genome'], "Genome(group-name) = ?",
3068                    [$group], "Genome(id)");
3069                $retVal{$group} = \@genomeIDs;
3070            }
3071        } else {
3072            # Here we need all of the groups. In this case, we run through all
3073            # of the genome records, putting each one found into the appropriate
3074            # group. Note that we use a filter clause to insure that only genomes
3075            # in groups are included in the return set.
3076            my @genomes = $self->GetAll(['Genome'], "Genome(group-name) > ' '", [],
3077                                        ['Genome(id)', 'Genome(group-name)']);
3078            # Loop through the genomes found.
3079            for my $genome (@genomes) {
3080                # Pop this genome's ID off the current list.
3081                my @groups = @{$genome};
3082                my $genomeID = shift @groups;
3083                # Loop through the groups, adding the genome ID to each group's
3084                # list.
3085                for my $group (@groups) {
3086                    Tracer::AddToListMap(\%retVal, $group, $genomeID);
3087                }
3088            }
3089        }
3090        # Return the hash we just built.
3091        return %retVal;
3092    }
3093    
3094    =head3 MyGenomes
3095    
3096    C<< my @genomes = Sprout::MyGenomes($dataDir); >>
3097    
3098    Return a list of the genomes to be included in the Sprout.
3099    
3100    This method is provided for use during the Sprout load. It presumes the Genome load file has
3101    already been created. (It will be in the Sprout data directory and called either C<Genome>
3102    or C<Genome.dtx>.) Essentially, it reads in the Genome load file and strips out the genome
3103    IDs.
3104    
3105    =over 4
3106    
3107    =item dataDir
3108    
3109    Directory containing the Sprout load files.
3110    
3111    =back
3112    
3113    =cut
3114    #: Return Type @;
3115    sub MyGenomes {
3116        # Get the parameters.
3117        my ($dataDir) = @_;
3118        # Compute the genome file name.
3119        my $genomeFileName = LoadFileName($dataDir, "Genome");
3120        # Extract the genome IDs from the files.
3121        my @retVal = map { $_ =~ /^(\S+)/; $1 } Tracer::GetFile($genomeFileName);
3122        # Return the result.
3123        return @retVal;
3124    }
3125    
3126    =head3 LoadFileName
3127    
3128    C<< my $fileName = Sprout::LoadFileName($dataDir, $tableName); >>
3129    
3130    Return the name of the load file for the specified table in the specified data
3131    directory.
3132    
3133    =over 4
3134    
3135    =item dataDir
3136    
3137    Directory containing the Sprout load files.
3138    
3139    =item tableName
3140    
3141    Name of the table whose load file is desired.
3142    
3143    =item RETURN
3144    
3145    Returns the name of the file containing the load data for the specified table, or
3146    C<undef> if no load file is present.
3147    
3148    =back
3149    
3150    =cut
3151    #: Return Type $;
3152    sub LoadFileName {
3153        # Get the parameters.
3154        my ($dataDir, $tableName) = @_;
3155        # Declare the return variable.
3156        my $retVal;
3157        # Check for the various file names.
3158        if (-e "$dataDir/$tableName") {
3159            $retVal = "$dataDir/$tableName";
3160        } elsif (-e "$dataDir/$tableName.dtx") {
3161            $retVal = "$dataDir/$tableName.dtx";
3162        }
3163        # Return the result.
3164        return $retVal;
3165    }
3166    
3167    =head3 DeleteGenome
3168    
3169    C<< my $stats = $sprout->DeleteGenome($genomeID, $testFlag); >>
3170    
3171    Delete a genome from the database.
3172    
3173    =over 4
3174    
3175    =item genomeID
3176    
3177    ID of the genome to delete
3178    
3179    =item testFlag
3180    
3181    If TRUE, then the DELETE statements will be traced, but no deletions will occur.
3182    
3183    =item RETURN
3184    
3185    Returns a statistics object describing the rows deleted.
3186    
3187    =back
3188    
3189    =cut
3190    #: Return Type $%;
3191    sub DeleteGenome {
3192        # Get the parameters.
3193        my ($self, $genomeID, $testFlag) = @_;
3194        # Perform the delete for the genome's features.
3195        my $retVal = $self->Delete('Feature', "fig|$genomeID.%", $testFlag);
3196        # Perform the delete for the primary genome data.
3197        my $stats = $self->Delete('Genome', $genomeID, $testFlag);
3198        $retVal->Accumulate($stats);
3199        # Return the result.
3200        return $retVal;
3201    }
3202    
3203  =head2 Internal Utility Methods  =head2 Internal Utility Methods
3204    
3205  =head3 ParseAssignment  =head3 ParseAssignment
3206    
3207  Parse annotation text to determine whether or not it is a functional assignment. If it is,  Parse annotation text to determine whether or not it is a functional assignment. If it is,
3208  the user and function text will be returned as a 2-element list. If it isn't, an empty list  the user, function text, and assigning user will be returned as a 3-element list. If it
3209  will be returned.  isn't, an empty list will be returned.
3210    
3211    A functional assignment is always of the form
3212    
3213        C<set >I<YYYY>C< function to\n>I<ZZZZZ>
3214    
3215    where I<YYYY> is the B<user>, and I<ZZZZ> is the actual functional role. In most cases,
3216    the user and the assigning user (from MadeAnnotation) will be the same, but that is
3217    not always the case.
3218    
3219    In addition, the functional role may contain extra data that is stripped, such as
3220    terminating spaces or a comment separated from the rest of the text by a tab.
3221    
3222  This is a static method.  This is a static method.
3223    
3224  =over 4  =over 4
3225    
3226    =item user
3227    
3228    Name of the assigning user.
3229    
3230  =item text  =item text
3231    
3232  Text of the annotation.  Text of the annotation.
# Line 2575  Line 3240 
3240    
3241  =cut  =cut
3242    
3243  sub ParseAssignment {  sub _ParseAssignment {
3244          # Get the parameters.          # Get the parameters.
3245          my ($text) = @_;      my ($user, $text) = @_;
3246          # Declare the return value.          # Declare the return value.
3247          my @retVal = ();          my @retVal = ();
3248          # Check to see if this is a functional assignment.          # Check to see if this is a functional assignment.
3249          my ($user, $type, $function) = split(/\n/, $text);      my ($type, $function) = split(/\n/, $text);
3250          if ($type =~ m/^set $user function to$/i) {      if ($type =~ m/^set function to$/i) {
3251                  # Here it is, so we return the user name and function text.          # Here we have an assignment without a user, so we use the incoming user ID.
3252                  @retVal = ($user, $function);                  @retVal = ($user, $function);
3253        } elsif ($type =~ m/^set (\S+) function to$/i) {
3254            # Here we have an assignment with a user that is passed back to the caller.
3255            @retVal = ($1, $function);
3256        }
3257        # If we have an assignment, we need to clean the function text. There may be
3258        # extra junk at the end added as a note from the user.
3259        if (@retVal) {
3260            $retVal[1] =~ s/(\t\S)?\s*$//;
3261          }          }
3262          # Return the result list.          # Return the result list.
3263          return @retVal;          return @retVal;
# Line 2612  Line 3285 
3285    
3286  sub FriendlyTimestamp {  sub FriendlyTimestamp {
3287      my ($timeValue) = @_;      my ($timeValue) = @_;
3288      my $retVal = strftime("%a %b %e %H:%M:%S %Y", localtime($timeValue));      my $retVal = localtime($timeValue);
3289      return $retVal;      return $retVal;
3290  }  }
3291    
3292    =head3 AddProperty
3293    
3294    C<< my  = $sprout->AddProperty($featureID, $key, $value, $url); >>
3295    
3296    Add a new attribute value (Property) to a feature. In the SEED system, attributes can
3297    be added to almost any object. In Sprout, they can only be added to features. In
3298    Sprout, attributes are implemented using I<properties>. A property represents a key/value
3299    pair. If the particular key/value pair coming in is not already in the database, a new
3300    B<Property> record is created to hold it.
3301    
3302    =over 4
3303    
3304    =item peg
3305    
3306    ID of the feature to which the attribute is to be replied.
3307    
3308    =item key
3309    
3310    Name of the attribute (key).
3311    
3312    =item value
3313    
3314    Value of the attribute.
3315    
3316    =item url
3317    
3318    URL or text citation from which the property was obtained.
3319    
3320    =back
3321    
3322    =cut
3323    #: Return Type ;
3324    sub AddProperty {
3325        # Get the parameters.
3326        my ($self, $featureID, $key, $value, $url) = @_;
3327        # Declare the variable to hold the desired property ID.
3328        my $propID;
3329        # Attempt to find a property record for this key/value pair.
3330        my @properties = $self->GetFlat(['Property'],
3331                                       "Property(property-name) = ? AND Property(property-value) = ?",
3332                                       [$key, $value], 'Property(id)');
3333        if (@properties) {
3334            # Here the property is already in the database. We save its ID.
3335            $propID = $properties[0];
3336            # Here the property value does not exist. We need to generate an ID. It will be set
3337            # to a number one greater than the maximum value in the database. This call to
3338            # GetAll will stop after one record.
3339            my @maxProperty = $self->GetAll(['Property'], "ORDER BY Property(id) DESC", [], ['Property(id)'],
3340                                            1);
3341            $propID = $maxProperty[0]->[0] + 1;
3342            # Insert the new property value.
3343            $self->Insert('Property', { 'property-name' => $key, 'property-value' => $value, id => $propID });
3344        }
3345        # Now we connect the incoming feature to the property.
3346        $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });
3347    }
3348    
3349    
3350  1;  1;

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.70

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3