[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.10, Fri Feb 25 18:41:45 2005 UTC revision 1.98, Tue Apr 10 06:13:33 2007 UTC
# Line 1  Line 1 
1  package Sprout;  package Sprout;
2    
3        require Exporter;
4        use ERDB;
5        @ISA = qw(Exporter ERDB);
6          use Data::Dumper;          use Data::Dumper;
7          use strict;          use strict;
         use Carp;  
8          use DBKernel;          use DBKernel;
9          use XML::Simple;          use XML::Simple;
10          use DBQuery;          use DBQuery;
11          use DBObject;      use ERDBObject;
         use ERDB;  
12          use Tracer;          use Tracer;
13          use FIGRules;          use FIGRules;
14        use FidCheck;
15          use Stats;          use Stats;
16      use POSIX qw(strftime);      use POSIX qw(strftime);
17        use BasicLocation;
18    
19  =head1 Sprout Database Manipulation Object  =head1 Sprout Database Manipulation Object
20    
# Line 32  Line 34 
34  query tasks. For example, L</genomes> lists the IDs of all the genomes in the database and  query tasks. For example, L</genomes> lists the IDs of all the genomes in the database and
35  L</dna_seq> returns the DNA sequence for a specified genome location.  L</dna_seq> returns the DNA sequence for a specified genome location.
36    
37    The Sprout object is a subclass of the ERDB object and inherits all its properties and methods.
38    
39  =cut  =cut
40    
41  #: Constructor SFXlate->new_sprout_only();  #: Constructor SFXlate->new_sprout_only();
# Line 62  Line 66 
66    
67  * B<xmlFileName> name of the XML file containing the database definition (default C<SproutDBD.xml>)  * B<xmlFileName> name of the XML file containing the database definition (default C<SproutDBD.xml>)
68    
69  * B<userData> user name and password, delimited by a slash (default C<root/>)  * B<userData> user name and password, delimited by a slash (default same as SEED)
70    
71  * B<port> connection port (default C<0>)  * B<port> connection port (default C<0>)
72    
73    * B<sock> connection socket (default same as SEED)
74    
75  * B<maxSegmentLength> maximum number of residues per feature segment, (default C<4500>)  * B<maxSegmentLength> maximum number of residues per feature segment, (default C<4500>)
76    
77  * B<maxSequenceLength> maximum number of residues per sequence, (default C<8000>)  * B<maxSequenceLength> maximum number of residues per sequence, (default C<8000>)
78    
79    * B<noDBOpen> suppresses the connection to the database if TRUE, else FALSE
80    
81  =back  =back
82    
83  For example, the following constructor call specifies a database named I<Sprout> and a user name of  For example, the following constructor call specifies a database named I<Sprout> and a user name of
# Line 83  Line 91 
91  sub new {  sub new {
92          # Get the parameters.          # Get the parameters.
93          my ($class, $dbName, $options) = @_;          my ($class, $dbName, $options) = @_;
94        # Compute the DBD directory.
95        my $dbd_dir = (defined($FIG_Config::dbd_dir) ? $FIG_Config::dbd_dir :
96                                                      $FIG_Config::fig );
97          # Compute the options. We do this by starting with a table of defaults and overwriting with          # Compute the options. We do this by starting with a table of defaults and overwriting with
98          # the incoming data.          # the incoming data.
99          my $optionTable = Tracer::GetOptions({          my $optionTable = Tracer::GetOptions({
100                                             dbType               => 'mysql',                     # database type                         dbType       => $FIG_Config::dbms,
101                                             dataDir              => 'Data',                      # data file directory                                                          # database type
102                                             xmlFileName  => 'SproutDBD.xml', # database definition file name                         dataDir      => $FIG_Config::sproutData,
103                                             userData             => 'root/',                     # user name and password                                                          # data file directory
104                                             port                 => 0,                           # database connection port                         xmlFileName  => "$dbd_dir/SproutDBD.xml",
105                                                            # database definition file name
106                           userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",
107                                                            # user name and password
108                           port         => $FIG_Config::dbport,
109                                                            # database connection port
110                           sock         => $FIG_Config::dbsock,
111                           host         => $FIG_Config::dbhost,
112                                             maxSegmentLength => 4500,            # maximum feature segment length                                             maxSegmentLength => 4500,            # maximum feature segment length
113                                             maxSequenceLength => 8000,           # maximum contig sequence length                                             maxSequenceLength => 8000,           # maximum contig sequence length
114                           noDBOpen     => 0,               # 1 to suppress the database open
115                                            }, $options);                                            }, $options);
116          # Get the data directory.          # Get the data directory.
117          my $dataDir = $optionTable->{dataDir};          my $dataDir = $optionTable->{dataDir};
# Line 100  Line 119 
119          $optionTable->{userData} =~ m!([^/]*)/(.*)$!;          $optionTable->{userData} =~ m!([^/]*)/(.*)$!;
120          my ($userName, $password) = ($1, $2);          my ($userName, $password) = ($1, $2);
121          # Connect to the database.          # Connect to the database.
122          my $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName, $password, $optionTable->{port});      my $dbh;
123        if (! $optionTable->{noDBOpen}) {
124            $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,
125                                    $password, $optionTable->{port}, $optionTable->{host}, $optionTable->{sock});
126        }
127          # Create the ERDB object.          # Create the ERDB object.
128          my $xmlFileName = "$optionTable->{xmlFileName}";          my $xmlFileName = "$optionTable->{xmlFileName}";
129          my $erdb = ERDB->new($dbh, $xmlFileName);      my $retVal = ERDB::new($class, $dbh, $xmlFileName);
130          # Create this object.      # Add the option table and XML file name.
131          my $self = { _erdb => $erdb, _options => $optionTable, _xmlName => $xmlFileName };      $retVal->{_options} = $optionTable;
132          # Bless and return it.      $retVal->{_xmlName} = $xmlFileName;
133          bless $self;      # Set up space for the group file data.
134          return $self;      $retVal->{groupHash} = undef;
135        # Return it.
136        return $retVal;
137  }  }
138    
139  =head3 MaxSegment  =head3 MaxSegment
# Line 143  Line 168 
168          return $self->{_options}->{maxSequenceLength};          return $self->{_options}->{maxSequenceLength};
169  }  }
170    
171  =head3 Get  =head3 Load
   
 C<< my $query = $sprout->Get(\@objectNames, $filterClause, \@parameterList); >>  
   
 This method allows a general query against the Sprout data using a specified filter clause.  
   
 The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each  
 field name represented in the form B<I<objectName>(I<fieldName>)>. For example, the  
 following call requests all B<Genome> objects for the genus specified in the variable  
 $genus.  
   
 C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ?", [$genus]); >>  
   
 The WHERE clause contains a single question mark, so there is a single additional  
 parameter representing the parameter value. It would also be possible to code  
   
 C<< $query = $sprout->Get(['Genome'], "Genome(genus) = \'$genus\'"); >>  
   
 however, this version of the call would generate a syntax error if there were any quote  
 characters inside the variable C<$genus>.  
   
 The use of the strange parenthesized notation for field names enables us to distinguish  
 hyphens contained within field names from minus signs that participate in the computation  
 of the WHERE clause. All of the methods that manipulate fields will use this same notation.  
   
 It is possible to specify multiple entity and relationship names in order to retrieve more than  
 one object's data at the same time, which allows highly complex joined queries. For example,  
   
 C<< $query = $sprout->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", [$genus]); >>  
   
 This query returns all the genomes for a particular genus and allows access to the  
 sources from which they came. The join clauses to go from Genome to Source are generated  
 automatically.  
   
 Finally, the filter clause can contain sort information. To do this, simply put an C<ORDER BY>  
 clause at the end of the filter. Field references in the ORDER BY section follow the same rules  
 as they do in the filter itself; in other words, each one must be of the form B<I<objectName>(I<fieldName>)>.  
 For example, the following filter string gets all genomes for a particular genus and sorts  
 them by species name.  
   
 C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ? ORDER BY Genome(species)", [$genus]); >>  
172    
173  It is also permissible to specify I<only> an ORDER BY clause. For example, the following invocation gets  C<< $sprout->Load($rebuild); >>;
 all genomes ordered by genus and species.  
174    
175  C<< $query = $sprout->Get(['Genome'], "ORDER BY Genome(genus), Genome(species)"); >>  Load the database from files in the data directory, optionally re-creating the tables.
176    
177  Odd things may happen if one of the ORDER BY fields is in a secondary relation. So, for example, an  This method always deletes the data from the database before loading, even if the tables are not
178  attempt to order B<Feature>s by alias may (depending on the underlying database engine used) cause  re-created. The data is loaded into the relations from files in the data directory either having the
179  a single feature to appear more than once.  same name as the target relation with no extension or with an extension of C<.dtx>. Files without an
180    extension are used in preference to the files with an extension.
181    
182  If multiple names are specified, then the query processor will automatically determine a  The files are loaded based on the presumption that each line of the file is a record in the
183  join path between the entities and relationships. The algorithm used is very simplistic.  relation, and the individual fields are delimited by tabs. Tab and new-line characters inside
184  In particular, you can't specify any entity or relationship more than once, and if a  fields must be represented by the escape sequences C<\t> and C<\n>, respectively. The fields must
185  relationship is recursive, the path is determined by the order in which the entity  be presented in the order given in the relation tables produced by the L</ShowMetaData> method.
 and the relationship appear. For example, consider a recursive relationship B<IsParentOf>  
 which relates B<People> objects to other B<People> objects. If the join path is  
 coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however,  
 the join path is C<['IsParentOf', 'People']>, then the people returned will be children.  
186    
187  =over 4  =over 4
188    
189  =item objectNames  =item rebuild
   
 List containing the names of the entity and relationship objects to be retrieved.  
   
 =item filterClause  
   
 WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can  
 be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form  
 B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the  
 parameter list as additional parameters. The fields in a filter clause can come from primary  
 entity relations, relationship relations, or secondary entity relations; however, all of the  
 entities and relationships involved must be included in the list of object names.  
   
 =item parameterList  
190    
191  List of the parameters to be substituted in for the parameters marks in the filter clause.  TRUE if the data tables need to be created or re-created, else FALSE
192    
193  =item RETURN  =item RETURN
194    
195  Returns a B<DBQuery> that can be used to iterate through all of the results.  Returns a statistical object containing the number of records read, the number of duplicates found,
196    the number of errors, and a list of the error messages.
197    
198  =back  =back
199    
200  =cut  =cut
201    #: Return Type %;
202  sub Get {  sub Load {
203          # Get the parameters.          # Get the parameters.
204          my ($self, $objectNames, $filterClause, $parameterList) = @_;      my ($self, $rebuild) = @_;
205          # We differ from the ERDB Get method in that the parameter list is passed in as a list reference      # Load the tables from the data directory.
206          # rather than a list of parameters. The next step is to convert the parameters from a reference      my $retVal = $self->LoadTables($self->{_options}->{dataDir}, $rebuild);
207          # to a real list. We can only do this if the parameters have been specified.      # Return the statistics.
208          my @parameters;      return $retVal;
         if ($parameterList) { @parameters = @{$parameterList}; }  
         return $self->{_erdb}->Get($objectNames, $filterClause, @parameters);  
209  }  }
210    
211  =head3 GetEntity  =head3 LoadUpdate
212    
213  C<< my $entityObject = $sprout->GetEntity($entityType, $ID); >>  C<< my $stats = $sprout->LoadUpdate($truncateFlag, \@tableList); >>
214    
215  Return an object describing the entity instance with a specified ID.  Load updates to one or more database tables. This method enables the client to make changes to one
216    or two tables without reloading the whole database. For each table, there must be a corresponding
217    file in the data directory, either with the same name as the table, or with a C<.dtx> suffix. So,
218    for example, to make updates to the B<FeatureTranslation> relation, there must be a
219    C<FeatureTranslation.dtx> file in the data directory. Unlike a full load, files without an extension
220    are not examined. This allows update files to co-exist with files from an original load.
221    
222  =over 4  =over 4
223    
224  =item entityType  =item truncateFlag
225    
226  Entity type name.  TRUE if the tables should be rebuilt before loading, else FALSE. A value of TRUE therefore causes
227    current data and schema of the tables to be replaced, while a value of FALSE means the new data
228    is added to the existing data in the various relations.
229    
230  =item ID  =item tableList
231    
232  ID of the desired entity.  List of the tables to be updated.
233    
234  =item RETURN  =item RETURN
235    
236  Returns a B<DBObject> representing the desired entity instance, or an undefined value if no  Returns a statistical object containing the number of records read, the number of duplicates found,
237  instance is found with the specified key.  the number of errors encountered, and a list of error messages.
238    
239  =back  =back
240    
241  =cut  =cut
242    #: Return Type $%;
243  sub GetEntity {  sub LoadUpdate {
244          # Get the parameters.          # Get the parameters.
245          my ($self, $entityType, $ID) = @_;      my ($self, $truncateFlag, $tableList) = @_;
246          # Create a query.      # Declare the return value.
247          my $query = $self->Get([$entityType], "$entityType(id) = ?", [$ID]);      my $retVal = Stats->new();
248          # Get the first (and only) object.      # Get the data directory.
249          my $retVal = $query->Fetch();      my $optionTable = $self->{_options};
250          # Return the result.      my $dataDir = $optionTable->{dataDir};
251        # Loop through the incoming table names.
252        for my $tableName (@{$tableList}) {
253            # Find the table's file.
254            my $fileName = LoadFileName($dataDir, $tableName);
255            if (! $fileName) {
256                Trace("No load file found for $tableName in $dataDir.") if T(0);
257            } else {
258                # Attempt to load this table.
259                my $result = $self->LoadTable($fileName, $tableName, $truncateFlag);
260                # Accumulate the resulting statistics.
261                $retVal->Accumulate($result);
262            }
263        }
264        # Return the statistics.
265          return $retVal;          return $retVal;
266  }  }
267    
268  =head3 GetEntityValues  =head3 GenomeCounts
269    
270  C<< my @values = GetEntityValues($entityType, $ID, \@fields); >>  C<< my ($arch, $bact, $euk, $vir, $env, $unk) = $sprout->GenomeCounts($complete); >>
271    
272  Return a list of values from a specified entity instance.  Count the number of genomes in each domain. If I<$complete> is TRUE, only complete
273    genomes will be included in the counts.
274    
275  =over 4  =over 4
276    
277  =item entityType  =item complete
   
 Entity type name.  
   
 =item ID  
278    
279  ID of the desired entity.  TRUE if only complete genomes are to be counted, FALSE if all genomes are to be
280    counted
 =item fields  
   
 List of field names, each of the form I<objectName>C<(>I<fieldName>C<)>.  
281    
282  =item RETURN  =item RETURN
283    
284  Returns a flattened list of the values of the specified fields for the specified entity.  A six-element list containing the number of genomes in each of six categories--
285    Archaea, Bacteria, Eukaryota, Viral, Environmental, and Unknown, respectively.
286    
287  =back  =back
288    
289  =cut  =cut
290  #: Return Type @;  
291  sub GetEntityValues {  sub GenomeCounts {
292          # Get the parameters.          # Get the parameters.
293          my ($self, $entityType, $ID, $fields) = @_;      my ($self, $complete) = @_;
294          # Get the specified entity.      # Set the filter based on the completeness flag.
295          my $entity = $self->GetEntity($entityType, $ID);      my $filter = ($complete ? "Genome(complete) = 1" : "");
296          # Declare the return list.      # Get all the genomes and the related taxonomy information.
297          my @retVal = ();      my @genomes = $self->GetAll(['Genome'], $filter, [], ['Genome(id)', 'Genome(taxonomy)']);
298          # If we found the entity, push the values into the return list.      # Clear the counters.
299          if ($entity) {      my ($arch, $bact, $euk, $vir, $env, $unk) = (0, 0, 0, 0, 0, 0);
300                  push @retVal, $entity->Values($fields);      # Loop through, counting the domains.
301        for my $genome (@genomes) {
302            if    ($genome->[1] =~ /^archaea/i)  { ++$arch }
303            elsif ($genome->[1] =~ /^bacter/i)   { ++$bact }
304            elsif ($genome->[1] =~ /^eukar/i)    { ++$euk }
305            elsif ($genome->[1] =~ /^vir/i)      { ++$vir }
306            elsif ($genome->[1] =~ /^env/i)      { ++$env }
307            else  { ++$unk }
308          }          }
309          # Return the result.      # Return the counts.
310          return @retVal;      return ($arch, $bact, $euk, $vir, $env, $unk);
311  }  }
312    
313  =head3 ShowMetaData  =head3 ContigCount
314    
315  C<< $sprout->ShowMetaData($fileName); >>  C<< my $count = $sprout->ContigCount($genomeID); >>
316    
317  This method outputs a description of the database to an HTML file in the data directory.  Return the number of contigs for the specified genome ID.
318    
319  =over 4  =over 4
320    
321  =item fileName  =item genomeID
322    
323    ID of the genome whose contig count is desired.
324    
325    =item RETURN
326    
327  Fully-qualified name to give to the output file.  Returns the number of contigs for the specified genome.
328    
329  =back  =back
330    
331  =cut  =cut
332    
333  sub ShowMetaData {  sub ContigCount {
334          # Get the parameters.          # Get the parameters.
335          my ($self, $fileName) = @_;      my ($self, $genomeID) = @_;
336          # Compute the file name.      # Get the contig count.
337          my $options = $self->{_options};      my $retVal = $self->GetCount(['Contig', 'HasContig'], "HasContig(from-link) = ?", [$genomeID]);
338          # Call the show method on the underlying ERDB object.      # Return the result.
339          $self->{_erdb}->ShowMetaData($fileName);      return $retVal;
340  }  }
341    
342  =head3 Load  =head3 GeneMenu
   
 C<< $sprout->Load($rebuild); >>;  
   
 Load the database from files in the data directory, optionally re-creating the tables.  
343    
344  This method always deletes the data from the database before loading, even if the tables are not  C<< my $selectHtml = $sprout->GeneMenu(\%attributes, $filterString, \@params, $selected); >>
 re-created. The data is loaded into the relations from files in the data directory either having the  
 same name as the target relation with no extension or with an extension of C<.dtx>. Files without an  
 extension are used in preference to the files with an extension.  
345    
346  The files are loaded based on the presumption that each line of the file is a record in the  Return an HTML select menu of genomes. Each genome will be an option in the menu,
347  relation, and the individual fields are delimited by tabs. Tab and new-line characters inside  and will be displayed by name with the ID and a contig count attached. The selection
348  fields must be represented by the escape sequences C<\t> and C<\n>, respectively. The fields must  value will be the genome ID. The genomes will be sorted by genus/species name.
 be presented in the order given in the relation tables produced by the L</ShowMetaData> method.  
349    
350  =over 4  =over 4
351    
352  =item rebuild  =item attributes
   
 TRUE if the data tables need to be created or re-created, else FALSE  
   
 =item RETURN  
   
 Returns a statistical object containing the number of records read, the number of duplicates found,  
 the number of errors, and a list of the error messages.  
   
 =back  
353    
354  =cut  Reference to a hash mapping attributes to values for the SELECT tag generated.
 #: Return Type %;  
 sub Load {  
         # Get the parameters.  
         my ($self, $rebuild) = @_;  
         # Get the database object.  
         my $erdb = $self->{_erdb};  
         # Load the tables from the data directory.  
         my $retVal = $erdb->LoadTables($self->{_options}->{dataDir}, $rebuild);  
         # Return the statistics.  
         return $retVal;  
 }  
355    
356  =head3 LoadUpdate  =item filterString
357    
358  C<< my %stats = $sprout->LoadUpdate($truncateFlag, \@tableList); >>  A filter string for use in selecting the genomes. The filter string must conform
359    to the rules for the C<< ERDB->Get >> method.
360    
361  Load updates to one or more database tables. This method enables the client to make changes to one  =item params
 or two tables without reloading the whole database. For each table, there must be a corresponding  
 file in the data directory, either with the same name as the table, or with a C<.dtx> suffix. So,  
 for example, to make updates to the B<FeatureTranslation> relation, there must be a  
 C<FeatureTranslation.dtx> file in the data directory. Unlike a full load, files without an extension  
 are not examined. This allows update files to co-exist with files from an original load.  
362    
363  =over 4  Reference to a list of values to be substituted in for the parameter marks in
364    the filter string.
365    
366  =item truncateFlag  =item selected (optional)
367    
368  TRUE if the tables should be rebuilt before loading, else FALSE. A value of TRUE therefore causes  ID of the genome to be initially selected.
 current data and schema of the tables to be replaced, while a value of FALSE means the new data  
 is added to the existing data in the various relations.  
369    
370  =item tableList  =item fast (optional)
371    
372  List of the tables to be updated.  If specified and TRUE, the contig counts will be omitted to improve performance.
373    
374  =item RETURN  =item RETURN
375    
376  Returns a statistical object containing the number of records read, the number of duplicates found,  Returns an HTML select menu with the specified genomes as selectable options.
 the number of errors encountered, and a list of error messages.  
377    
378  =back  =back
379    
380  =cut  =cut
381  #: Return Type $%;  
382  sub LoadUpdate {  sub GeneMenu {
383          # Get the parameters.          # Get the parameters.
384          my ($self, $truncateFlag, $tableList) = @_;      my ($self, $attributes, $filterString, $params, $selected, $fast) = @_;
385          # Get the database object.      my $slowMode = ! $fast;
386          my $erdb = $self->{_erdb};      # Default to nothing selected. This prevents an execution warning if "$selected"
387          # Declare the return value.      # is undefined.
388          my $retVal = Stats->new();      $selected = "" unless defined $selected;
389          # Get the data directory.      Trace("Gene Menu called with slow mode \"$slowMode\" and selection \"$selected\".") if T(3);
390          my $optionTable = $self->{_options};      # Start the menu.
391          my $dataDir = $optionTable->{dataDir};      my $retVal = "<select " .
392          # Loop through the incoming table names.          join(" ", map { "$_=\"$attributes->{$_}\"" } keys %{$attributes}) .
393          for my $tableName (@{$tableList}) {          ">\n";
394                  # Find the table's file.      # Get the genomes.
395                  my $fileName = "$dataDir/$tableName";      my @genomes = $self->GetAll(['Genome'], $filterString, $params, ['Genome(id)',
396                  if (! -e $fileName) {                                                                       'Genome(genus)',
397                          $fileName = "$fileName.dtx";                                                                       'Genome(species)',
398                  }                                                                       'Genome(unique-characterization)']);
399                  # Attempt to load this table.      # Sort them by name.
400                  my $result = $erdb->LoadTable($fileName, $tableName, $truncateFlag);      my @sorted = sort { lc("$a->[1] $a->[2]") cmp lc("$b->[1] $b->[2]") } @genomes;
401                  # Accumulate the resulting statistics.      # Loop through the genomes, creating the option tags.
402                  $retVal->Accumulate($result);      for my $genomeData (@sorted) {
403            # Get the data for this genome.
404            my ($genomeID, $genus, $species, $strain) = @{$genomeData};
405            # Get the contig count.
406            my $contigInfo = "";
407            if ($slowMode) {
408                my $count = $self->ContigCount($genomeID);
409                my $counting = ($count == 1 ? "contig" : "contigs");
410                $contigInfo = "[$count $counting]";
411            }
412            # Find out if we're selected.
413            my $selectOption = ($selected eq $genomeID ? " selected" : "");
414            # Build the option tag.
415            $retVal .= "<option value=\"$genomeID\"$selectOption>$genus $species $strain ($genomeID)$contigInfo</option>\n";
416          }          }
417          # Return the statistics.      # Close the SELECT tag.
418        $retVal .= "</select>\n";
419        # Return the result.
420          return $retVal;          return $retVal;
421  }  }
422    
# Line 459  Line 434 
434          # Get the parameters.          # Get the parameters.
435          my ($self) = @_;          my ($self) = @_;
436          # Create the tables.          # Create the tables.
437          $self->{_erdb}->CreateTables;      $self->CreateTables();
438  }  }
439    
440  =head3 Genomes  =head3 Genomes
# Line 582  Line 557 
557  =item RETURN  =item RETURN
558    
559  Returns a list of the feature's contig segments. The locations are returned as a list in a list  Returns a list of the feature's contig segments. The locations are returned as a list in a list
560  context and as a space-delimited string in a scalar context.  context and as a comma-delimited string in a scalar context.
561    
562  =back  =back
563    
# Line 609  Line 584 
584                  if ($prevContig eq $contigID && $dir eq $prevDir) {                  if ($prevContig eq $contigID && $dir eq $prevDir) {
585                          # Here the new segment is in the same direction on the same contig. Insure the                          # Here the new segment is in the same direction on the same contig. Insure the
586                          # new segment's beginning is next to the old segment's end.                          # new segment's beginning is next to the old segment's end.
587                          if (($dir eq "-" && $beg == $prevBeg - $prevLen) ||              if ($dir eq "-" && $beg + $len == $prevBeg) {
588                                  ($dir eq "+" && $beg == $prevBeg + $prevLen)) {                  # Here we're merging two backward blocks, so we keep the new begin point
589                                  # Here we need to merge two segments. Adjust the beginning and length values                  # and adjust the length.
590                                  # to include both segments.                  $len += $prevLen;
591                    # Pop the old segment off. The new one will replace it later.
592                    pop @retVal;
593                } elsif ($dir eq "+" && $beg == $prevBeg + $prevLen) {
594                    # Here we need to merge two forward blocks. Adjust the beginning and
595                    # length values to include both segments.
596                                  $beg = $prevBeg;                                  $beg = $prevBeg;
597                                  $len += $prevLen;                                  $len += $prevLen;
598                                  # Pop the old segment off. The new one will replace it later.                                  # Pop the old segment off. The new one will replace it later.
# Line 621  Line 601 
601                  }                  }
602                  # Remember this specifier for the adjacent-segment test the next time through.                  # Remember this specifier for the adjacent-segment test the next time through.
603                  ($prevContig, $prevBeg, $prevDir, $prevLen) = ($contigID, $beg, $dir, $len);                  ($prevContig, $prevBeg, $prevDir, $prevLen) = ($contigID, $beg, $dir, $len);
604            # Compute the initial base pair.
605            my $start = ($dir eq "+" ? $beg : $beg + $len - 1);
606                  # Add the specifier to the list.                  # Add the specifier to the list.
607                  push @retVal, "${contigID}_$beg$dir$len";          push @retVal, "${contigID}_$start$dir$len";
608          }          }
609          # Return the list in the format indicated by the context.          # Return the list in the format indicated by the context.
610          return (wantarray ? @retVal : join(' ', @retVal));      return (wantarray ? @retVal : join(',', @retVal));
611  }  }
612    
613  =head3 ParseLocation  =head3 ParseLocation
# Line 656  Line 638 
638      shift if UNIVERSAL::isa($_[0],__PACKAGE__);      shift if UNIVERSAL::isa($_[0],__PACKAGE__);
639          my ($location) = @_;          my ($location) = @_;
640          # Parse it into segments.          # Parse it into segments.
641          $location =~ /^(.*)_(\d*)([+-_])(\d*)$/;      $location =~ /^(.+)_(\d+)([+\-_])(\d+)$/;
642          my ($contigID, $start, $dir, $len) = ($1, $2, $3, $4);          my ($contigID, $start, $dir, $len) = ($1, $2, $3, $4);
643          # If the direction is an underscore, convert it to a + or -.          # If the direction is an underscore, convert it to a + or -.
644          if ($dir eq "_") {          if ($dir eq "_") {
# Line 672  Line 654 
654          return ($contigID, $start, $dir, $len);          return ($contigID, $start, $dir, $len);
655  }  }
656    
657    
658    
659  =head3 PointLocation  =head3 PointLocation
660    
661  C<< my $found = Sprout::PointLocation($location, $point); >>  C<< my $found = Sprout::PointLocation($location, $point); >>
# Line 732  Line 716 
716  should be of the form returned by L</featureLocation> when in a list context. In other words,  should be of the form returned by L</featureLocation> when in a list context. In other words,
717  each location is of the form I<contigID>C<_>I<begin>I<dir>I<end>.  each location is of the form I<contigID>C<_>I<begin>I<dir>I<end>.
718    
719    For example, the following would return the DNA sequence for contig C<83333.1:NC_000913>
720    between positions 1401 and 1532, inclusive.
721    
722        my $sequence = $sprout->DNASeq('83333.1:NC_000913_1401_1532');
723    
724  =over 4  =over 4
725    
726  =item locationList  =item locationList
727    
728  List of location specifiers, each in the form I<contigID>C<_>I<begin>I<dir>I<end> (see  List of location specifiers, each in the form I<contigID>C<_>I<begin>I<dir>I<len> or
729  L</FeatureLocation> for more about this format).  I<contigID>C<_>I<begin>C<_>I<end> (see L</FeatureLocation> for more about this format).
730    
731  =item RETURN  =item RETURN
732    
# Line 764  Line 753 
753                  # the start point is the ending. Note that in the latter case we must reverse the DNA string                  # the start point is the ending. Note that in the latter case we must reverse the DNA string
754                  # before putting it in the return value.                  # before putting it in the return value.
755                  my ($start, $stop);                  my ($start, $stop);
756            Trace("Parse of \"$location\" is $beg$dir$len.") if T(SDNA => 4);
757                  if ($dir eq "+") {                  if ($dir eq "+") {
758                          $start = $beg;                          $start = $beg;
759                          $stop = $beg + $len - 1;                          $stop = $beg + $len - 1;
760                  } else {                  } else {
761                          $start = $beg + $len + 1;              $start = $beg - $len + 1;
762                          $stop = $beg;                          $stop = $beg;
763                  }                  }
764            Trace("Looking for sequences containing $start through $stop.") if T(SDNA => 4);
765                  my $query = $self->Get(['IsMadeUpOf','Sequence'],                  my $query = $self->Get(['IsMadeUpOf','Sequence'],
766                          "IsMadeUpOf(from-link) = ? AND IsMadeUpOf(start-position) + IsMadeUpOf(len) > ? AND " .                          "IsMadeUpOf(from-link) = ? AND IsMadeUpOf(start-position) + IsMadeUpOf(len) > ? AND " .
767                          " IsMadeUpOf(start-position) <= ? ORDER BY IsMadeUpOf(start-position)",                          " IsMadeUpOf(start-position) <= ? ORDER BY IsMadeUpOf(start-position)",
# Line 782  Line 773 
773                                  $sequence->Values(['IsMadeUpOf(start-position)', 'Sequence(sequence)',                                  $sequence->Values(['IsMadeUpOf(start-position)', 'Sequence(sequence)',
774                                                                     'IsMadeUpOf(len)']);                                                                     'IsMadeUpOf(len)']);
775                          my $stopPosition = $startPosition + $sequenceLength;                          my $stopPosition = $startPosition + $sequenceLength;
776                Trace("Sequence is from $startPosition to $stopPosition.") if T(SDNA => 4);
777                          # Figure out the start point and length of the relevant section.                          # Figure out the start point and length of the relevant section.
778                          my $pos1 = ($start < $startPosition ? 0 : $start - $startPosition);                          my $pos1 = ($start < $startPosition ? 0 : $start - $startPosition);
779                          my $len = ($stopPosition <= $stop ? $stopPosition : $stop) - $startPosition - $pos1;              my $len1 = ($stopPosition < $stop ? $stopPosition : $stop) + 1 - $startPosition - $pos1;
780                Trace("Position is $pos1 for length $len1.") if T(SDNA => 4);
781                          # Add the relevant data to the location data.                          # Add the relevant data to the location data.
782                          $locationDNA .= substr($sequenceData, $pos1, $len);              $locationDNA .= substr($sequenceData, $pos1, $len1);
783                  }                  }
784                  # Add this location's data to the return string. Note that we may need to reverse it.                  # Add this location's data to the return string. Note that we may need to reverse it.
785                  if ($dir eq '+') {                  if ($dir eq '+') {
786                          $retVal .= $locationDNA;                          $retVal .= $locationDNA;
787                  } else {                  } else {
788                          $locationDNA = join('', reverse split //, $locationDNA);              $retVal .= FIG::reverse_comp($locationDNA);
                         $retVal .= $locationDNA;  
789                  }                  }
790          }          }
791          # Return the result.          # Return the result.
# Line 830  Line 822 
822          return @retVal;          return @retVal;
823  }  }
824    
825    =head3 GenomeLength
826    
827    C<< my $length = $sprout->GenomeLength($genomeID); >>
828    
829    Return the length of the specified genome in base pairs.
830    
831    =over 4
832    
833    =item genomeID
834    
835    ID of the genome whose base pair count is desired.
836    
837    =item RETURN
838    
839    Returns the number of base pairs in all the contigs of the specified
840    genome.
841    
842    =back
843    
844    =cut
845    
846    sub GenomeLength {
847        # Get the parameters.
848        my ($self, $genomeID) = @_;
849        # Declare the return variable.
850        my $retVal = 0;
851        # Get the genome's contig sequence lengths.
852        my @lens = $self->GetFlat(['HasContig', 'IsMadeUpOf'], 'HasContig(from-link) = ?',
853                           [$genomeID], 'IsMadeUpOf(len)');
854        # Sum the lengths.
855        map { $retVal += $_ } @lens;
856        # Return the result.
857        return $retVal;
858    }
859    
860    =head3 FeatureCount
861    
862    C<< my $count = $sprout->FeatureCount($genomeID, $type); >>
863    
864    Return the number of features of the specified type in the specified genome.
865    
866    =over 4
867    
868    =item genomeID
869    
870    ID of the genome whose feature count is desired.
871    
872    =item type
873    
874    Type of feature to count (eg. C<peg>, C<rna>, etc.).
875    
876    =item RETURN
877    
878    Returns the number of features of the specified type for the specified genome.
879    
880    =back
881    
882    =cut
883    
884    sub FeatureCount {
885        # Get the parameters.
886        my ($self, $genomeID, $type) = @_;
887        # Compute the count.
888        my $retVal = $self->GetCount(['HasFeature', 'Feature'],
889                                    "HasFeature(from-link) = ? AND Feature(feature-type) = ?",
890                                    [$genomeID, $type]);
891        # Return the result.
892        return $retVal;
893    }
894    
895    =head3 GenomeAssignments
896    
897    C<< my $fidHash = $sprout->GenomeAssignments($genomeID); >>
898    
899    Return a list of a genome's assigned features. The return hash will contain each
900    assigned feature of the genome mapped to the text of its most recent functional
901    assignment.
902    
903    =over 4
904    
905    =item genomeID
906    
907    ID of the genome whose functional assignments are desired.
908    
909    =item RETURN
910    
911    Returns a reference to a hash which maps each feature to its most recent
912    functional assignment.
913    
914    =back
915    
916    =cut
917    
918    sub GenomeAssignments {
919        # Get the parameters.
920        my ($self, $genomeID) = @_;
921        # Declare the return variable.
922        my $retVal = {};
923        # Query the genome's features.
924        my $query = $self->Get(['HasFeature', 'Feature'], "HasFeature(from-link) = ?",
925                               [$genomeID]);
926        # Loop through the features.
927        while (my $data = $query->Fetch) {
928            # Get the feature ID and assignment.
929            my ($fid, $assignment) = $data->Values(['Feature(id)', 'Feature(assignment)']);
930            if ($assignment) {
931                $retVal->{$fid} = $assignment;
932            }
933        }
934        # Return the result.
935        return $retVal;
936    }
937    
938  =head3 ContigLength  =head3 ContigLength
939    
940  C<< my $length = $sprout->ContigLength($contigID); >>  C<< my $length = $sprout->ContigLength($contigID); >>
# Line 863  Line 968 
968          # Set it from the sequence data, if any.          # Set it from the sequence data, if any.
969          if ($sequence) {          if ($sequence) {
970                  my ($start, $len) = $sequence->Values(['IsMadeUpOf(start-position)', 'IsMadeUpOf(len)']);                  my ($start, $len) = $sequence->Values(['IsMadeUpOf(start-position)', 'IsMadeUpOf(len)']);
971                  $retVal = $start + $len;          $retVal = $start + $len - 1;
972          }          }
973          # Return the result.          # Return the result.
974          return $retVal;          return $retVal;
975  }  }
976    
977  =head3 GenesInRegion  =head3 ClusterPEGs
978    
979  C<< my (\@featureIDList, $beg, $end) = $sprout->GenesInRegion($contigID, $start, $stop); >>  C<< my $clusteredList = $sprout->ClusterPEGs($sub, \@pegs); >>
980    
981  List the features which overlap a specified region in a contig.  Cluster the PEGs in a list according to the cluster coding scheme of the specified
982    subsystem. In order for this to work properly, the subsystem object must have
983    been used recently to retrieve the PEGs using the B<get_pegs_from_cell> method.
984    This causes the cluster numbers to be pulled into the subsystem's color hash.
985    If a PEG is not found in the color hash, it will not appear in the output
986    sequence.
987    
988  =over 4  =over 4
989    
990  =item contigID  =item sub
   
 ID of the contig containing the region of interest.  
   
 =item start  
991    
992  Offset of the first residue in the region of interest.  Sprout subsystem object for the relevant subsystem, from the L</get_subsystem>
993    method.
994    
995  =item stop  =item pegs
996    
997  Offset of the last residue in the region of interest.  Reference to the list of PEGs to be clustered.
998    
999    =item RETURN
1000    
1001    Returns a list of the PEGs, grouped into smaller lists by cluster number.
1002    
1003    =back
1004    
1005    =cut
1006    #: Return Type $@@;
1007    sub ClusterPEGs {
1008        # Get the parameters.
1009        my ($self, $sub, $pegs) = @_;
1010        # Declare the return variable.
1011        my $retVal = [];
1012        # Loop through the PEGs, creating arrays for each cluster.
1013        for my $pegID (@{$pegs}) {
1014            my $clusterNumber = $sub->get_cluster_number($pegID);
1015            # Only proceed if the PEG is in a cluster.
1016            if ($clusterNumber >= 0) {
1017                # Push this PEG onto the sub-list for the specified cluster number.
1018                push @{$retVal->[$clusterNumber]}, $pegID;
1019            }
1020        }
1021        # Return the result.
1022        return $retVal;
1023    }
1024    
1025    =head3 GenesInRegion
1026    
1027    C<< my (\@featureIDList, $beg, $end) = $sprout->GenesInRegion($contigID, $start, $stop); >>
1028    
1029    List the features which overlap a specified region in a contig.
1030    
1031    =over 4
1032    
1033    =item contigID
1034    
1035    ID of the contig containing the region of interest.
1036    
1037    =item start
1038    
1039    Offset of the first residue in the region of interest.
1040    
1041    =item stop
1042    
1043    Offset of the last residue in the region of interest.
1044    
1045  =item RETURN  =item RETURN
1046    
# Line 1013  Line 1166 
1166    
1167  =head3 FeatureAnnotations  =head3 FeatureAnnotations
1168    
1169  C<< my @descriptors = $sprout->FeatureAnnotations($featureID); >>  C<< my @descriptors = $sprout->FeatureAnnotations($featureID, $rawFlag); >>
1170    
1171  Return the annotations of a feature.  Return the annotations of a feature.
1172    
# Line 1023  Line 1176 
1176    
1177  ID of the feature whose annotations are desired.  ID of the feature whose annotations are desired.
1178    
1179    =item rawFlag
1180    
1181    If TRUE, the annotation timestamps will be returned in raw form; otherwise, they
1182    will be returned in human-readable form.
1183    
1184  =item RETURN  =item RETURN
1185    
1186  Returns a list of annotation descriptors. Each descriptor is a hash with the following fields.  Returns a list of annotation descriptors. Each descriptor is a hash with the following fields.
1187    
1188  * B<featureID> ID of the relevant feature.  * B<featureID> ID of the relevant feature.
1189    
1190  * B<timeStamp> time the annotation was made, in user-friendly format.  * B<timeStamp> time the annotation was made.
1191    
1192  * B<user> ID of the user who made the annotation  * B<user> ID of the user who made the annotation
1193    
# Line 1041  Line 1199 
1199  #: Return Type @%;  #: Return Type @%;
1200  sub FeatureAnnotations {  sub FeatureAnnotations {
1201          # Get the parameters.          # Get the parameters.
1202          my ($self, $featureID) = @_;      my ($self, $featureID, $rawFlag) = @_;
1203          # Create a query to get the feature's annotations and the associated users.          # Create a query to get the feature's annotations and the associated users.
1204          my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],          my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],
1205                                                     "IsTargetOfAnnotation(from-link) = ?", [$featureID]);                                                     "IsTargetOfAnnotation(from-link) = ?", [$featureID]);
# Line 1054  Line 1212 
1212                          $annotation->Values(['IsTargetOfAnnotation(from-link)',                          $annotation->Values(['IsTargetOfAnnotation(from-link)',
1213                                                                   'Annotation(time)', 'MadeAnnotation(from-link)',                                                                   'Annotation(time)', 'MadeAnnotation(from-link)',
1214                                                                   'Annotation(annotation)']);                                                                   'Annotation(annotation)']);
1215            # Convert the time, if necessary.
1216            if (! $rawFlag) {
1217                $timeStamp = FriendlyTimestamp($timeStamp);
1218            }
1219                  # Assemble them into a hash.                  # Assemble them into a hash.
1220          my $annotationHash = { featureID => $featureID,          my $annotationHash = { featureID => $featureID,
1221                                 timeStamp => FriendlyTimestamp($timeStamp),                                 timeStamp => $timeStamp,
1222                                                             user => $user, text => $text };                                                             user => $user, text => $text };
1223                  # Add it to the return list.                  # Add it to the return list.
1224                  push @retVal, $annotationHash;                  push @retVal, $annotationHash;
# Line 1070  Line 1232 
1232  C<< my %functions = $sprout->AllFunctionsOf($featureID); >>  C<< my %functions = $sprout->AllFunctionsOf($featureID); >>
1233    
1234  Return all of the functional assignments for a particular feature. The data is returned as a  Return all of the functional assignments for a particular feature. The data is returned as a
1235  hash of functional assignments to user IDs. A functional assignment is a type of annotation.  hash of functional assignments to user IDs. A functional assignment is a type of annotation,
1236  It has the format "XXXX\nset XXXX function to\nYYYYY". In this instance, XXXX is the user ID  Functional assignments are described in the L</ParseAssignment> function. Its worth noting that
1237  and YYYYY is the functional assignment text. Its worth noting that we cannot filter on the content  we cannot filter on the content of the annotation itself because it's a text field; however,
1238  of the annotation itself because it's a text field; however, this is not a big problem because most  this is not a big problem because most features only have a small number of annotations.
1239  features only have a small number of annotations. Finally, if a single user has multiple  Finally, if a single user has multiple functional assignments, we will only keep the most
1240  functional assignments, we will only keep the most recent one.  recent one.
1241    
1242  =over 4  =over 4
1243    
# Line 1085  Line 1247 
1247    
1248  =item RETURN  =item RETURN
1249    
1250  Returns a hash mapping the functional assignment IDs to user IDs.  Returns a hash mapping the user IDs to functional assignment IDs.
1251    
1252  =back  =back
1253    
# Line 1095  Line 1257 
1257          # Get the parameters.          # Get the parameters.
1258          my ($self, $featureID) = @_;          my ($self, $featureID) = @_;
1259          # Get all of the feature's annotations.          # Get all of the feature's annotations.
1260      my @query = $self->GetAll(['IsTargetOfAnnotation', 'Annotation'],      my @query = $self->GetAll(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],
1261                                                      "IsTargetOfAnnotation(from-link) = ?",                                                      "IsTargetOfAnnotation(from-link) = ?",
1262                              [$featureID], ['Annotation(time)', 'Annotation(annotation)']);                              [$featureID], ['Annotation(time)', 'Annotation(annotation)',
1263                                               'MadeAnnotation(from-link)']);
1264          # Declare the return hash.          # Declare the return hash.
1265          my %retVal;          my %retVal;
     # Declare a hash for insuring we only make one assignment per user.  
     my %timeHash = ();  
1266      # Now we sort the assignments by timestamp in reverse.      # Now we sort the assignments by timestamp in reverse.
1267      my @sortedQuery = sort { -($a->[0] <=> $b->[0]) } @query;      my @sortedQuery = sort { -($a->[0] <=> $b->[0]) } @query;
1268          # Loop until we run out of annotations.          # Loop until we run out of annotations.
1269      for my $annotation (@sortedQuery) {      for my $annotation (@sortedQuery) {
1270          # Get the annotation fields.          # Get the annotation fields.
1271          my ($timeStamp, $text) = @{$annotation};          my ($timeStamp, $text, $user) = @{$annotation};
1272                  # Check to see if this is a functional assignment.                  # Check to see if this is a functional assignment.
1273                  my ($user, $function) = ParseAssignment($text);          my ($actualUser, $function) = _ParseAssignment($user, $text);
1274          if ($user && ! exists $timeHash{$user}) {          if ($actualUser && ! exists $retVal{$actualUser}) {
1275              # Here it is a functional assignment and there has been no              # Here it is a functional assignment and there has been no
1276              # previous assignment for this user, so we stuff it in the              # previous assignment for this user, so we stuff it in the
1277              # return hash.              # return hash.
1278                          $retVal{$function} = $user;              $retVal{$actualUser} = $function;
             # Insure we don't assign to this user again.  
             $timeHash{$user} = 1;  
1279                  }                  }
1280          }          }
1281          # Return the hash of assignments found.          # Return the hash of assignments found.
# Line 1130  Line 1289 
1289  Return the most recently-determined functional assignment of a particular feature.  Return the most recently-determined functional assignment of a particular feature.
1290    
1291  The functional assignment is handled differently depending on the type of feature. If  The functional assignment is handled differently depending on the type of feature. If
1292  the feature is identified by a FIG ID (begins with the string C<fig|>), then a functional  the feature is identified by a FIG ID (begins with the string C<fig|>), then the functional
1293  assignment is a type of annotation. It has the format "XXXX\nset XXXX function to\nYYYYY". In this  assignment is taken from the B<Feature> or C<Annotation> table, depending.
 instance, XXXX is the user ID and YYYYY is the functional assignment text. Its worth noting that  
 we cannot filter on the content of the annotation itself because it's a text field; however, this  
 is not a big problem because most features only have a small number of annotations.  
1294    
1295  Each user has an associated list of trusted users. The assignment returned will be the most  Each user has an associated list of trusted users. The assignment returned will be the most
1296  recent one by at least one of the trusted users. If no trusted user list is available, then  recent one by at least one of the trusted users. If no trusted user list is available, then
# Line 1153  Line 1309 
1309    
1310  =item userID (optional)  =item userID (optional)
1311    
1312  ID of the user whose function determination is desired. If omitted, only the latest  ID of the user whose function determination is desired. If omitted, the primary
1313  C<FIG> assignment will be returned.  functional assignment in the B<Feature> table will be returned.
1314    
1315  =item RETURN  =item RETURN
1316    
# Line 1171  Line 1327 
1327      my $retVal;      my $retVal;
1328      # Determine the ID type.      # Determine the ID type.
1329      if ($featureID =~ m/^fig\|/) {      if ($featureID =~ m/^fig\|/) {
1330          # Here we have a FIG feature ID. We must build the list of trusted          # Here we have a FIG feature ID.
1331          # users.          if (!$userID) {
1332                # Use the primary assignment.
1333                ($retVal) = $self->GetEntityValues('Feature', $featureID, ['Feature(assignment)']);
1334            } else {
1335                # We must build the list of trusted users.
1336          my %trusteeTable = ();          my %trusteeTable = ();
1337          # Check the user ID.          # Check the user ID.
1338          if (!$userID) {          if (!$userID) {
# Line 1194  Line 1354 
1354              }              }
1355          }          }
1356          # Build a query for all of the feature's annotations, sorted by date.          # Build a query for all of the feature's annotations, sorted by date.
1357          my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation'],              my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],
1358                                 "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC",                                 "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC",
1359                                 [$featureID]);                                 [$featureID]);
1360          my $timeSelected = 0;          my $timeSelected = 0;
1361          # Loop until we run out of annotations.          # Loop until we run out of annotations.
1362          while (my $annotation = $query->Fetch()) {          while (my $annotation = $query->Fetch()) {
1363              # Get the annotation text.              # Get the annotation text.
1364              my ($text, $time) = $annotation->Values(['Annotation(annotation)','Annotation(time)']);                  my ($text, $time, $user) = $annotation->Values(['Annotation(annotation)',
1365                                                             'Annotation(time)', 'MadeAnnotation(from-link)']);
1366              # Check to see if this is a functional assignment for a trusted user.              # Check to see if this is a functional assignment for a trusted user.
1367              my ($user, $type, $function) = split(/\n/, $text);                  my ($actualUser, $function) = _ParseAssignment($user, $text);
1368              if ($type =~ m/^set $user function to$/i) {                  Trace("Assignment user is $actualUser, text is $function.") if T(4);
1369                    if ($actualUser) {
1370                  # Here it is a functional assignment. Check the time and the user                  # Here it is a functional assignment. Check the time and the user
1371                  # name. The time must be recent and the user must be trusted.                  # name. The time must be recent and the user must be trusted.
1372                  if ((exists $trusteeTable{$user}) && ($time > $timeSelected)) {                      if ((exists $trusteeTable{$actualUser}) && ($time > $timeSelected)) {
1373                      $retVal = $function;                      $retVal = $function;
1374                      $timeSelected = $time;                      $timeSelected = $time;
1375                  }                  }
1376              }              }
1377          }          }
1378            }
1379      } else {      } else {
1380          # Here we have a non-FIG feature ID. In this case the user ID does not          # Here we have a non-FIG feature ID. In this case the user ID does not
1381          # matter. We simply get the information from the External Alias Function          # matter. We simply get the information from the External Alias Function
# Line 1223  Line 1386 
1386          return $retVal;          return $retVal;
1387  }  }
1388    
1389    =head3 FunctionsOf
1390    
1391    C<< my @functionList = $sprout->FunctionOf($featureID, $userID); >>
1392    
1393    Return the functional assignments of a particular feature.
1394    
1395    The functional assignment is handled differently depending on the type of feature. If
1396    the feature is identified by a FIG ID (begins with the string C<fig|>), then a functional
1397    assignment is a type of annotation. The format of an assignment is described in
1398    L</ParseAssignment>. Its worth noting that we cannot filter on the content of the
1399    annotation itself because it's a text field; however, this is not a big problem because
1400    most features only have a small number of annotations.
1401    
1402    If the feature is B<not> identified by a FIG ID, then the functional assignment
1403    information is taken from the B<ExternalAliasFunc> table. If the table does
1404    not contain an entry for the feature, an empty list is returned.
1405    
1406    =over 4
1407    
1408    =item featureID
1409    
1410    ID of the feature whose functional assignments are desired.
1411    
1412    =item RETURN
1413    
1414    Returns a list of 2-tuples, each consisting of a user ID and the text of an assignment by
1415    that user.
1416    
1417    =back
1418    
1419    =cut
1420    #: Return Type @@;
1421    sub FunctionsOf {
1422        # Get the parameters.
1423        my ($self, $featureID) = @_;
1424        # Declare the return value.
1425        my @retVal = ();
1426        # Determine the ID type.
1427        if ($featureID =~ m/^fig\|/) {
1428            # Here we have a FIG feature ID. We must build the list of trusted
1429            # users.
1430            my %trusteeTable = ();
1431            # Build a query for all of the feature's annotations, sorted by date.
1432            my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],
1433                                   "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC",
1434                                   [$featureID]);
1435            my $timeSelected = 0;
1436            # Loop until we run out of annotations.
1437            while (my $annotation = $query->Fetch()) {
1438                # Get the annotation text.
1439                my ($text, $time, $user) = $annotation->Values(['Annotation(annotation)',
1440                                                                'Annotation(time)',
1441                                                                'MadeAnnotation(user)']);
1442                # Check to see if this is a functional assignment for a trusted user.
1443                my ($actualUser, $function) = _ParseAssignment($user, $text);
1444                if ($actualUser) {
1445                    # Here it is a functional assignment.
1446                    push @retVal, [$actualUser, $function];
1447                }
1448            }
1449        } else {
1450            # Here we have a non-FIG feature ID. In this case the user ID does not
1451            # matter. We simply get the information from the External Alias Function
1452            # table.
1453            my @assignments = $self->GetEntityValues('ExternalAliasFunc', $featureID,
1454                                                     ['ExternalAliasFunc(func)']);
1455            push @retVal, map { ['master', $_] } @assignments;
1456        }
1457        # Return the assignments found.
1458        return @retVal;
1459    }
1460    
1461  =head3 BBHList  =head3 BBHList
1462    
1463  C<< my $bbhHash = $sprout->BBHList($genomeID, \@featureList); >>  C<< my $bbhHash = $sprout->BBHList($genomeID, \@featureList); >>
# Line 1242  Line 1477 
1477    
1478  =item RETURN  =item RETURN
1479    
1480  Returns a reference to a hash that maps the IDs of the incoming features to the IDs of  Returns a reference to a hash that maps the IDs of the incoming features to the best hits
1481  their best hits.  on the target genome.
1482    
1483  =back  =back
1484    
# Line 1256  Line 1491 
1491          my %retVal = ();          my %retVal = ();
1492          # Loop through the incoming features.          # Loop through the incoming features.
1493          for my $featureID (@{$featureList}) {          for my $featureID (@{$featureList}) {
1494                  # Create a query to get the feature's best hit.          # Ask the server for the feature's best hit.
1495                  my $query = $self->Get(['IsBidirectionalBestHitOf'],          my @bbhData = FIGRules::BBHData($featureID);
1496                                                             "IsBidirectionalBestHitOf(from-link) = ? AND IsBidirectionalBestHitOf(genome) = ?",          # Peel off the BBHs found.
1497                                                             [$featureID, $genomeID]);          my @found = ();
1498                  # Look for the best hit.          for my $bbh (@bbhData) {
1499                  my $bbh = $query->Fetch;              my $fid = $bbh->[0];
1500                  if ($bbh) {              my $bbGenome = $self->GenomeOf($fid);
1501                          my ($targetFeature) = $bbh->Value('IsBidirectionalBestHitOf(to-link)');              if ($bbGenome eq $genomeID) {
1502                          $retVal{$featureID} = $targetFeature;                  push @found, $fid;
1503                }
1504                  }                  }
1505            $retVal{$featureID} = \@found;
1506          }          }
1507          # Return the mapping.          # Return the mapping.
1508          return \%retVal;          return \%retVal;
1509  }  }
1510    
1511    =head3 SimList
1512    
1513    C<< my %similarities = $sprout->SimList($featureID, $count); >>
1514    
1515    Return a list of the similarities to the specified feature.
1516    
1517    This method just returns the bidirectional best hits for performance reasons.
1518    
1519    =over 4
1520    
1521    =item featureID
1522    
1523    ID of the feature whose similarities are desired.
1524    
1525    =item count
1526    
1527    Maximum number of similar features to be returned, or C<0> to return them all.
1528    
1529    =back
1530    
1531    =cut
1532    #: Return Type %;
1533    sub SimList {
1534        # Get the parameters.
1535        my ($self, $featureID, $count) = @_;
1536        # Ask for the best hits.
1537        my @lists = FIGRules::BBHData($featureID);
1538        # Create the return value.
1539        my %retVal = ();
1540        for my $tuple (@lists) {
1541            $retVal{$tuple->[0]} = $tuple->[1];
1542        }
1543        # Return the result.
1544        return %retVal;
1545    }
1546    
1547    =head3 IsComplete
1548    
1549    C<< my $flag = $sprout->IsComplete($genomeID); >>
1550    
1551    Return TRUE if the specified genome is complete, else FALSE.
1552    
1553    =over 4
1554    
1555    =item genomeID
1556    
1557    ID of the genome whose completeness status is desired.
1558    
1559    =item RETURN
1560    
1561    Returns TRUE if the genome is complete, FALSE if it is incomplete, and C<undef> if it is
1562    not found.
1563    
1564    =back
1565    
1566    =cut
1567    #: Return Type $;
1568    sub IsComplete {
1569        # Get the parameters.
1570        my ($self, $genomeID) = @_;
1571        # Declare the return variable.
1572        my $retVal;
1573        # Get the genome's data.
1574        my $genomeData = $self->GetEntity('Genome', $genomeID);
1575        if ($genomeData) {
1576            # The genome exists, so get the completeness flag.
1577            ($retVal) = $genomeData->Value('Genome(complete)');
1578        }
1579        # Return the result.
1580        return $retVal;
1581    }
1582    
1583  =head3 FeatureAliases  =head3 FeatureAliases
1584    
1585  C<< my @aliasList = $sprout->FeatureAliases($featureID); >>  C<< my @aliasList = $sprout->FeatureAliases($featureID); >>
# Line 1305  Line 1614 
1614    
1615  C<< my $genomeID = $sprout->GenomeOf($featureID); >>  C<< my $genomeID = $sprout->GenomeOf($featureID); >>
1616    
1617  Return the genome that contains a specified feature.  Return the genome that contains a specified feature or contig.
1618    
1619  =over 4  =over 4
1620    
1621  =item featureID  =item featureID
1622    
1623  ID of the feature whose genome is desired.  ID of the feature or contig whose genome is desired.
1624    
1625  =item RETURN  =item RETURN
1626    
1627  Returns the ID of the genome for the specified feature. If the feature is not found, returns  Returns the ID of the genome for the specified feature or contig. If the feature or contig is not
1628  an undefined value.  found, returns an undefined value.
1629    
1630  =back  =back
1631    
# Line 1325  Line 1634 
1634  sub GenomeOf {  sub GenomeOf {
1635          # Get the parameters.          # Get the parameters.
1636          my ($self, $featureID) = @_;          my ($self, $featureID) = @_;
1637          # Create a query to find the genome associated with the feature.      # Create a query to find the genome associated with the incoming ID.
1638          my $query = $self->Get(['IsLocatedIn', 'HasContig'], "IsLocatedIn(from-link) = ?", [$featureID]);      my $query = $self->Get(['IsLocatedIn', 'HasContig'], "IsLocatedIn(from-link) = ? OR HasContig(to-link) = ?",
1639                               [$featureID, $featureID]);
1640          # Declare the return value.          # Declare the return value.
1641          my $retVal;          my $retVal;
1642          # Get the genome ID.          # Get the genome ID.
# Line 1361  Line 1671 
1671  sub CoupledFeatures {  sub CoupledFeatures {
1672          # Get the parameters.          # Get the parameters.
1673          my ($self, $featureID) = @_;          my ($self, $featureID) = @_;
1674          # Create a query to retrieve the functionally-coupled features. Note that we depend on the      Trace("Looking for features coupled to $featureID.") if T(coupling => 3);
1675          # fact that the functional coupling is physically paired. If (A,B) is in the database, then      # Create a query to retrieve the functionally-coupled features.
1676          # (B,A) will also be found.      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],
1677          my $query = $self->Get(['IsClusteredOnChromosomeWith'],                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);
                                                    "IsClusteredOnChromosomeWith(from-link) = ?", [$featureID]);  
1678          # This value will be set to TRUE if we find at least one coupled feature.          # This value will be set to TRUE if we find at least one coupled feature.
1679          my $found = 0;          my $found = 0;
1680          # Create the return hash.          # Create the return hash.
1681          my %retVal = ();          my %retVal = ();
1682          # Retrieve the relationship records and store them in the hash.          # Retrieve the relationship records and store them in the hash.
1683          while (my $clustering = $query->Fetch()) {          while (my $clustering = $query->Fetch()) {
1684                  my ($otherFeatureID, $score) = $clustering->Values(['IsClusteredOnChromosomeWith(to-link)',          # Get the ID and score of the coupling.
1685                                                                      'IsClusteredOnChromosomeWith(score)']);          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',
1686                                                            'Coupling(score)']);
1687            Trace("$featureID coupled with score $score to ID $couplingID.") if T(coupling => 4);
1688            # Get the other feature that participates in the coupling.
1689            my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],
1690                                               "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",
1691                                               [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');
1692            Trace("$couplingID target feature is $otherFeatureID.") if T(coupling => 4);
1693            # Attach the other feature's score to its ID.
1694                  $retVal{$otherFeatureID} = $score;                  $retVal{$otherFeatureID} = $score;
1695                  $found = 1;                  $found = 1;
1696          }          }
# Line 1386  Line 1703 
1703          return %retVal;          return %retVal;
1704  }  }
1705    
1706  =head3 GetEntityTypes  =head3 CouplingEvidence
1707    
1708  C<< my @entityList = $sprout->GetEntityTypes(); >>  C<< my @evidence = $sprout->CouplingEvidence($peg1, $peg2); >>
1709    
1710  Return the list of supported entity types.  Return the evidence for a functional coupling.
1711    
1712    A pair of features is considered evidence of a coupling between two other
1713    features if they occur close together on a contig and both are similar to
1714    the coupled features. So, if B<A1> and B<A2> are close together on a contig,
1715    B<B1> and B<B2> are considered evidence for the coupling if (1) B<B1> and
1716    B<B2> are close together, (2) B<B1> is similar to B<A1>, and (3) B<B2> is
1717    similar to B<A2>.
1718    
1719    The score of a coupling is determined by the number of pieces of evidence
1720    that are considered I<representative>. If several evidence items belong to
1721    a group of genomes that are close to each other, only one of those items
1722    is considered representative. The other evidence items are presumed to be
1723    there because of the relationship between the genomes rather than because
1724    the two proteins generated by the features have a related functionality.
1725    
1726    Each evidence item is returned as a three-tuple in the form C<[>I<$peg1a>C<,>
1727    I<$peg2a>C<,> I<$rep>C<]>, where I<$peg1a> is similar to I<$peg1>, I<$peg2a>
1728    is similar to I<$peg2>, and I<$rep> is TRUE if the evidence is representative
1729    and FALSE otherwise.
1730    
1731    =over 4
1732    
1733    =item peg1
1734    
1735    ID of the feature of interest.
1736    
1737    =item peg2
1738    
1739    ID of a feature functionally coupled to the feature of interest.
1740    
1741    =item RETURN
1742    
1743    Returns a list of 3-tuples. Each tuple consists of a feature similar to the feature
1744    of interest, a feature similar to the functionally coupled feature, and a flag
1745    that is TRUE for a representative piece of evidence and FALSE otherwise.
1746    
1747    =back
1748    
1749  =cut  =cut
1750  #: Return Type @;  #: Return Type @@;
1751  sub GetEntityTypes {  sub CouplingEvidence {
1752          # Get the parameters.          # Get the parameters.
1753          my ($self) = @_;      my ($self, $peg1, $peg2) = @_;
1754          # Get the underlying database object.      # Declare the return variable.
1755          my $erdb = $self->{_erdb};      my @retVal = ();
1756          # Get its entity type list.      # Our first task is to find out the nature of the coupling: whether or not
1757          my @retVal = $erdb->GetEntityTypes();      # it exists, its score, and whether the features are stored in the same
1758        # order as the ones coming in.
1759        my ($couplingID, $inverted, $score) = $self->GetCoupling($peg1, $peg2);
1760        # Only proceed if a coupling exists.
1761        if ($couplingID) {
1762            # Determine the ordering to place on the evidence items. If we're
1763            # inverted, we want to see feature 2 before feature 1 (descending); otherwise,
1764            # we want feature 1 before feature 2 (normal).
1765            Trace("Coupling evidence for ($peg1, $peg2) with inversion flag $inverted.") if T(Coupling => 4);
1766            my $ordering = ($inverted ? "DESC" : "");
1767            # Get the coupling evidence.
1768            my @evidenceList = $self->GetAll(['IsEvidencedBy', 'PCH', 'UsesAsEvidence'],
1769                                              "IsEvidencedBy(from-link) = ? ORDER BY PCH(id), UsesAsEvidence(pos) $ordering",
1770                                              [$couplingID],
1771                                              ['PCH(used)', 'UsesAsEvidence(to-link)']);
1772            # Loop through the evidence items. Each piece of evidence is represented by two
1773            # positions in the evidence list, one for each feature on the other side of the
1774            # evidence link. If at some point we want to generalize to couplings with
1775            # more than two positions, this section of code will need to be re-done.
1776            while (@evidenceList > 0) {
1777                my $peg1Data = shift @evidenceList;
1778                my $peg2Data = shift @evidenceList;
1779                Trace("Peg 1 is " . $peg1Data->[1] . " and Peg 2 is " . $peg2Data->[1] . ".") if T(Coupling => 4);
1780                push @retVal, [$peg1Data->[1], $peg2Data->[1], $peg1Data->[0]];
1781            }
1782            Trace("Last index in evidence result is is $#retVal.") if T(Coupling => 4);
1783        }
1784        # Return the result.
1785        return @retVal;
1786    }
1787    
1788    =head3 GetCoupling
1789    
1790    C<< my ($couplingID, $inverted, $score) = $sprout->GetCoupling($peg1, $peg2); >>
1791    
1792    Return the coupling (if any) for the specified pair of PEGs. If a coupling
1793    exists, we return the coupling ID along with an indicator of whether the
1794    coupling is stored as C<(>I<$peg1>C<, >I<$peg2>C<)> or C<(>I<$peg2>C<, >I<$peg1>C<)>.
1795    In the second case, we say the coupling is I<inverted>. The importance of an
1796    inverted coupling is that the PEGs in the evidence will appear in reverse order.
1797    
1798    =over 4
1799    
1800    =item peg1
1801    
1802    ID of the feature of interest.
1803    
1804    =item peg2
1805    
1806    ID of the potentially coupled feature.
1807    
1808    =item RETURN
1809    
1810    Returns a three-element list. The first element contains the database ID of
1811    the coupling. The second element is FALSE if the coupling is stored in the
1812    database in the caller specified order and TRUE if it is stored in the
1813    inverted order. The third element is the coupling's score. If the coupling
1814    does not exist, all three list elements will be C<undef>.
1815    
1816    =back
1817    
1818    =cut
1819    #: Return Type $%@;
1820    sub GetCoupling {
1821        # Get the parameters.
1822        my ($self, $peg1, $peg2) = @_;
1823        # Declare the return values. We'll start with the coupling ID and undefine the
1824        # flag and score until we have more information.
1825        my ($retVal, $inverted, $score) = ($self->CouplingID($peg1, $peg2), undef, undef);
1826        # Find the coupling data.
1827        my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],
1828                                     "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",
1829                                     [$retVal], ["ParticipatesInCoupling(from-link)", "Coupling(score)"]);
1830        # Check to see if we found anything.
1831        if (!@pegs) {
1832            Trace("No coupling found.") if T(Coupling => 4);
1833            # No coupling, so undefine the return value.
1834            $retVal = undef;
1835        } else {
1836            # We have a coupling! Get the score and check for inversion.
1837            $score = $pegs[0]->[1];
1838            my $firstFound = $pegs[0]->[0];
1839            $inverted = ($firstFound ne $peg1);
1840            Trace("Coupling score is $score. First peg is $firstFound, peg 1 is $peg1.") if T(Coupling => 4);
1841        }
1842        # Return the result.
1843        return ($retVal, $inverted, $score);
1844    }
1845    
1846    =head3 GetSynonymGroup
1847    
1848    C<< my $id = $sprout->GetSynonymGroup($fid); >>
1849    
1850    Return the synonym group name for the specified feature.
1851    
1852    =over 4
1853    
1854    =item fid
1855    
1856    ID of the feature whose synonym group is desired.
1857    
1858    =item RETURN
1859    
1860    The name of the synonym group to which the feature belongs. If the feature does
1861    not belong to a synonym group, the feature ID itself is returned.
1862    
1863    =back
1864    
1865    =cut
1866    
1867    sub GetSynonymGroup {
1868        # Get the parameters.
1869        my ($self, $fid) = @_;
1870        # Declare the return variable.
1871        my $retVal;
1872        # Find the synonym group.
1873        my @groups = $self->GetFlat(['IsSynonymGroupFor'], "IsSynonymGroupFor(to-link) = ?",
1874                                       [$fid], 'IsSynonymGroupFor(from-link)');
1875        # Check to see if we found anything.
1876        if (@groups) {
1877            $retVal = $groups[0];
1878        } else {
1879            $retVal = $fid;
1880        }
1881        # Return the result.
1882        return $retVal;
1883    }
1884    
1885    =head3 GetBoundaries
1886    
1887    C<< my ($contig, $beg, $end) = $sprout->GetBoundaries(@locList); >>
1888    
1889    Determine the begin and end boundaries for the locations in a list. All of the
1890    locations must belong to the same contig and have mostly the same direction in
1891    order for this method to produce a meaningful result. The resulting
1892    begin/end pair will contain all of the bases in any of the locations.
1893    
1894    =over 4
1895    
1896    =item locList
1897    
1898    List of locations to process.
1899    
1900    =item RETURN
1901    
1902    Returns a 3-tuple consisting of the contig ID, the beginning boundary,
1903    and the ending boundary. The beginning boundary will be left of the
1904    end for mostly-forward locations and right of the end for mostly-backward
1905    locations.
1906    
1907    =back
1908    
1909    =cut
1910    
1911    sub GetBoundaries {
1912        # Get the parameters.
1913        my ($self, @locList) = @_;
1914        # Set up the counters used to determine the most popular direction.
1915        my %counts = ( '+' => 0, '-' => 0 );
1916        # Get the last location and parse it.
1917        my $locObject = BasicLocation->new(pop @locList);
1918        # Prime the loop with its data.
1919        my ($contig, $beg, $end) = ($locObject->Contig, $locObject->Left, $locObject->Right);
1920        # Count its direction.
1921        $counts{$locObject->Dir}++;
1922        # Loop through the remaining locations. Note that in most situations, this loop
1923        # will not iterate at all, because most of the time we will be dealing with a
1924        # singleton list.
1925        for my $loc (@locList) {
1926            # Create a location object.
1927            my $locObject = BasicLocation->new($loc);
1928            # Count the direction.
1929            $counts{$locObject->Dir}++;
1930            # Get the left end and the right end.
1931            my $left = $locObject->Left;
1932            my $right = $locObject->Right;
1933            # Merge them into the return variables.
1934            if ($left < $beg) {
1935                $beg = $left;
1936            }
1937            if ($right > $end) {
1938                $end = $right;
1939            }
1940        }
1941        # If the most common direction is reverse, flip the begin and end markers.
1942        if ($counts{'-'} > $counts{'+'}) {
1943            ($beg, $end) = ($end, $beg);
1944        }
1945        # Return the result.
1946        return ($contig, $beg, $end);
1947    }
1948    
1949    =head3 CouplingID
1950    
1951    C<< my $couplingID = $sprout->CouplingID($peg1, $peg2); >>
1952    
1953    Return the coupling ID for a pair of feature IDs.
1954    
1955    The coupling ID is currently computed by joining the feature IDs in
1956    sorted order with a space. Client modules (that is, modules which
1957    use Sprout) should not, however, count on this always being the
1958    case. This method provides a way for abstracting the concept of a
1959    coupling ID. All that we know for sure about it is that it can be
1960    generated easily from the feature IDs and the order of the IDs
1961    in the parameter list does not matter (i.e. C<CouplingID("a1", "b1")>
1962    will have the same value as C<CouplingID("b1", "a1")>.
1963    
1964    =over 4
1965    
1966    =item peg1
1967    
1968    First feature of interest.
1969    
1970    =item peg2
1971    
1972    Second feature of interest.
1973    
1974    =item RETURN
1975    
1976    Returns the ID that would be used to represent a functional coupling of
1977    the two specified PEGs.
1978    
1979    =back
1980    
1981    =cut
1982    #: Return Type $;
1983    sub CouplingID {
1984        my ($self, @pegs) = @_;
1985        return $self->DigestKey(join " ", sort @pegs);
1986  }  }
1987    
1988  =head3 ReadFasta  =head3 ReadFasta
# Line 1447  Line 2029 
2029                  if ($line =~ m/^>\s*(.+?)(\s|\n)/) {                  if ($line =~ m/^>\s*(.+?)(\s|\n)/) {
2030                          # Here we have a new header. Store the current sequence if we have one.                          # Here we have a new header. Store the current sequence if we have one.
2031                          if ($id) {                          if ($id) {
2032                                  $retVal{$id} = $sequence;                  $retVal{$id} = lc $sequence;
2033                          }                          }
2034                          # Clear the sequence accumulator and save the new ID.                          # Clear the sequence accumulator and save the new ID.
2035                          ($id, $sequence) = ("$prefix$1", "");                          ($id, $sequence) = ("$prefix$1", "");
2036                  } else {                  } else {
2037                          # Here we have a data line, so we add it to the sequence accumulator.                          # Here we have a data line, so we add it to the sequence accumulator.
2038                          # First, we get the actual data out.              # First, we get the actual data out. Note that we normalize to lower
2039                # case.
2040                          $line =~ /^\s*(.*?)(\s|\n)/;                          $line =~ /^\s*(.*?)(\s|\n)/;
2041                          $sequence .= $1;                          $sequence .= $1;
2042                  }                  }
2043          }          }
2044          # Flush out the last sequence (if any).          # Flush out the last sequence (if any).
2045          if ($sequence) {          if ($sequence) {
2046                  $retVal {$id} = $sequence;          $retVal{$id} = lc $sequence;
2047          }          }
2048        # Close the file.
2049        close FASTAFILE;
2050          # Return the hash constructed from the file.          # Return the hash constructed from the file.
2051          return %retVal;          return %retVal;
2052  }  }
# Line 1545  Line 2130 
2130          # Get the data directory name.          # Get the data directory name.
2131          my $outputDirectory = $self->{_options}->{dataDir};          my $outputDirectory = $self->{_options}->{dataDir};
2132          # Dump the relations.          # Dump the relations.
2133          $self->{_erdb}->DumpRelations($outputDirectory);      $self->DumpRelations($outputDirectory);
2134  }  }
2135    
2136  =head3 XMLFileName  =head3 XMLFileName
# Line 1577  Line 2162 
2162  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and  The next statement inserts a C<HasProperty> relationship between feature C<fig|158879.1.peg.1> and
2163  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.  property C<4> with an evidence URL of C<http://seedu.uchicago.edu/query.cgi?article_id=142>.
2164    
2165  C<< $sprout->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence = 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>  C<< $sprout->InsertObject('HasProperty', { 'from-link' => 'fig|158879.1.peg.1', 'to-link' => 4, evidence => 'http://seedu.uchicago.edu/query.cgi?article_id=142'}); >>
2166    
2167  =over 4  =over 4
2168    
# Line 1597  Line 2182 
2182          # Get the parameters.          # Get the parameters.
2183          my ($self, $objectType, $fieldHash) = @_;          my ($self, $objectType, $fieldHash) = @_;
2184          # Call the underlying method.          # Call the underlying method.
2185          $self->{_erdb}->InsertObject($objectType, $fieldHash);      $self->InsertObject($objectType, $fieldHash);
2186  }  }
2187    
2188  =head3 Annotate  =head3 Annotate
# Line 1656  Line 2241 
2241    
2242  =head3 AssignFunction  =head3 AssignFunction
2243    
2244  C<< my $ok = $sprout->AssignFunction($featureID, $user, $function); >>  C<< my $ok = $sprout->AssignFunction($featureID, $user, $function, $assigningUser); >>
2245    
2246  This method assigns a function to a feature. Functions are a special type of annotation. The general  This method assigns a function to a feature. Functions are a special type of annotation. The general
2247  format is "XXXX\nset XXXX function to\nYYYYY" where XXXX is the feature type and YYYY is the functional  format is described in L</ParseAssignment>.
 assignment text.  
2248    
2249  =over 4  =over 4
2250    
# Line 1670  Line 2254 
2254    
2255  =item user  =item user
2256    
2257  Name of the user making the assignment. This is frequently a group name, like C<kegg> or C<fig>.  Name of the user group making the assignment, such as C<kegg> or C<fig>.
2258    
2259  =item function  =item function
2260    
2261  Text of the function being assigned.  Text of the function being assigned.
2262    
2263    =item assigningUser (optional)
2264    
2265    Name of the individual user making the assignment. If omitted, defaults to the user group.
2266    
2267  =item RETURN  =item RETURN
2268    
2269  Returns 1 if successful, 0 if an error occurred.  Returns 1 if successful, 0 if an error occurred.
# Line 1686  Line 2274 
2274  #: Return Type $;  #: Return Type $;
2275  sub AssignFunction {  sub AssignFunction {
2276          # Get the parameters.          # Get the parameters.
2277          my ($self, $featureID, $user, $function) = @_;      my ($self, $featureID, $user, $function, $assigningUser) = @_;
2278        # Default the assigning user.
2279        if (! $assigningUser) {
2280            $assigningUser = $user;
2281        }
2282          # Create an annotation string from the parameters.          # Create an annotation string from the parameters.
2283          my $annotationText = "$user\nset $user function to\n$function";      my $annotationText = "$assigningUser\nset $user function to\n$function";
2284          # Get the current time.          # Get the current time.
2285          my $now = time;          my $now = time;
2286          # Declare the return variable.          # Declare the return variable.
# Line 1749  Line 2341 
2341          return @retVal;          return @retVal;
2342  }  }
2343    
 =head3 Exists  
   
 C<< my $found = $sprout->Exists($entityName, $entityID); >>  
   
 Return TRUE if an entity exists, else FALSE.  
   
 =over 4  
   
 =item entityName  
   
 Name of the entity type (e.g. C<Feature>) relevant to the existence check.  
   
 =item entityID  
   
 ID of the entity instance whose existence is to be checked.  
   
 =item RETURN  
   
 Returns TRUE if the entity instance exists, else FALSE.  
   
 =back  
   
 =cut  
 #: Return Type $;  
 sub Exists {  
         # Get the parameters.  
         my ($self, $entityName, $entityID) = @_;  
         # Check for the entity instance.  
         my $testInstance = $self->GetEntity($entityName, $entityID);  
         # Return an existence indicator.  
         my $retVal = ($testInstance ? 1 : 0);  
         return $retVal;  
 }  
   
2344  =head3 FeatureTranslation  =head3 FeatureTranslation
2345    
2346  C<< my $translation = $sprout->FeatureTranslation($featureID); >>  C<< my $translation = $sprout->FeatureTranslation($featureID); >>
# Line 1968  Line 2526 
2526          return @retVal;          return @retVal;
2527  }  }
2528    
2529    =head3 GetProperties
2530    
2531    C<< my @list = $sprout->GetProperties($fid, $key, $value, $url); >>
2532    
2533    Return a list of the properties with the specified characteristics.
2534    
2535    Properties are arbitrary key-value pairs associated with a feature. (At some point they
2536    will also be associated with genomes.) A property value is represented by a 4-tuple of
2537    the form B<($fid, $key, $value, $url)>. These exactly correspond to the parameter
2538    
2539    =over 4
2540    
2541    =item fid
2542    
2543    ID of the feature possessing the property.
2544    
2545    =item key
2546    
2547    Name or key of the property.
2548    
2549    =item value
2550    
2551    Value of the property.
2552    
2553    =item url
2554    
2555    URL of the document that indicated the property should have this particular value, or an
2556    empty string if no such document exists.
2557    
2558    =back
2559    
2560    The parameters act as a filter for the desired data. Any non-null parameter will
2561    automatically match all the tuples returned. So, specifying just the I<$fid> will
2562    return all the properties of the specified feature; similarly, specifying the I<$key>
2563    and I<$value> parameters will return all the features having the specified property
2564    value.
2565    
2566    A single property key can have many values, representing different ideas about the
2567    feature in question. For example, one paper may declare that a feature C<fig|83333.1.peg.10> is
2568    virulent, and another may declare that it is not virulent. A query about the virulence of
2569    C<fig|83333.1.peg.10> would be coded as
2570    
2571        my @list = $sprout->GetProperties('fig|83333.1.peg.10', 'virulence', '', '');
2572    
2573    Here the I<$value> and I<$url> fields are left blank, indicating that those fields are
2574    not to be filtered. The tuples returned would be
2575    
2576        ('fig|83333.1.peg.10', 'virulence', 'yes', 'http://www.somewhere.edu/first.paper.pdf')
2577        ('fig|83333.1.peg.10', 'virulence', 'no', 'http://www.somewhere.edu/second.paper.pdf')
2578    
2579    =cut
2580    #: Return Type @@;
2581    sub GetProperties {
2582        # Get the parameters.
2583        my ($self, @parms) = @_;
2584        # Declare the return variable.
2585        my @retVal = ();
2586        # Now we need to create a WHERE clause that will get us the data we want. First,
2587        # we create a list of the columns containing the data for each parameter.
2588        my @colNames = ('HasProperty(from-link)', 'Property(property-name)',
2589                        'Property(property-value)', 'HasProperty(evidence)');
2590        # Now we build the WHERE clause and the list of parameter values.
2591        my @where = ();
2592        my @values = ();
2593        for (my $i = 0; $i <= $#colNames; $i++) {
2594            my $parm = $parms[$i];
2595            if (defined $parm && ($parm ne '')) {
2596                push @where, "$colNames[$i] = ?";
2597                push @values, $parm;
2598            }
2599        }
2600        # Format the WHERE clause.
2601        my $filter = (@values > 0 ? (join " AND ", @where) : undef);
2602        # Ask for all the propertie values with the desired characteristics.
2603        my $query = $self->Get(['HasProperty', 'Property'], $filter, \@values);
2604        while (my $valueObject = $query->Fetch()) {
2605            my @tuple = $valueObject->Values(\@colNames);
2606            push @retVal, \@tuple;
2607        }
2608        # Return the result.
2609        return @retVal;
2610    }
2611    
2612  =head3 FeatureProperties  =head3 FeatureProperties
2613    
2614  C<< my @properties = $sprout->FeatureProperties($featureID); >>  C<< my @properties = $sprout->FeatureProperties($featureID); >>
# Line 2034  Line 2675 
2675          return $retVal;          return $retVal;
2676  }  }
2677    
2678    =head3 PropertyID
2679    
2680    C<< my $id = $sprout->PropertyID($propName, $propValue); >>
2681    
2682    Return the ID of the specified property name and value pair, if the
2683    pair exists.
2684    
2685    =over 4
2686    
2687    =item propName
2688    
2689    Name of the desired property.
2690    
2691    =item propValue
2692    
2693    Value expected for the desired property.
2694    
2695    =item RETURN
2696    
2697    Returns the ID of the name/value pair, or C<undef> if the pair does not exist.
2698    
2699    =back
2700    
2701    =cut
2702    
2703    sub PropertyID {
2704        # Get the parameters.
2705        my ($self, $propName, $propValue) = @_;
2706        # Try to find the ID.
2707        my ($retVal) = $self->GetFlat(['Property'],
2708                                      "Property(property-name) = ? AND Property(property-value) = ?",
2709                                      [$propName, $propValue], 'Property(id)');
2710        # Return the result.
2711        return $retVal;
2712    }
2713    
2714  =head3 MergedAnnotations  =head3 MergedAnnotations
2715    
2716  C<< my @annotationList = $sprout->MergedAnnotations(\@list); >>  C<< my @annotationList = $sprout->MergedAnnotations(\@list); >>
# Line 2162  Line 2839 
2839  C<< my %subsystems = $sprout->SubsystemsOf($featureID); >>  C<< my %subsystems = $sprout->SubsystemsOf($featureID); >>
2840    
2841  Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped  Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped
2842  to the role the feature performs.  to the roles the feature performs.
2843    
2844  =over 4  =over 4
2845    
# Line 2172  Line 2849 
2849    
2850  =item RETURN  =item RETURN
2851    
2852  Returns a hash mapping all the feature's subsystems to the feature's role.  Returns a hash mapping all the feature's subsystems to a list of the feature's roles.
2853    
2854  =back  =back
2855    
2856  =cut  =cut
2857  #: Return Type %;  #: Return Type %@;
2858  sub SubsystemsOf {  sub SubsystemsOf {
2859          # Get the parameters.          # Get the parameters.
2860          my ($self, $featureID) = @_;          my ($self, $featureID) = @_;
2861          # Use the SSCell to connect features to subsystems.      # Get the subsystem list.
2862          my @subsystems = $self->GetAll(['ContainsFeature', 'HasSSCell', 'IsRoleOf'],          my @subsystems = $self->GetAll(['ContainsFeature', 'HasSSCell', 'IsRoleOf'],
2863                                                                          "ContainsFeature(to-link) = ?", [$featureID],                                                                          "ContainsFeature(to-link) = ?", [$featureID],
2864                                                                          ['HasSSCell(from-link)', 'IsRoleOf(from-link)']);                                                                          ['HasSSCell(from-link)', 'IsRoleOf(from-link)']);
2865          # Create the return value.          # Create the return value.
2866          my %retVal = ();          my %retVal = ();
2867        # Build a hash to weed out duplicates. Sometimes the same PEG and role appears
2868        # in two spreadsheet cells.
2869        my %dupHash = ();
2870          # Loop through the results, adding them to the hash.          # Loop through the results, adding them to the hash.
2871          for my $record (@subsystems) {          for my $record (@subsystems) {
2872                  $retVal{$record->[0]} = $record->[1];          # Get this subsystem and role.
2873            my ($subsys, $role) = @{$record};
2874            # Insure it's the first time for both.
2875            my $dupKey = "$subsys\n$role";
2876            if (! exists $dupHash{"$subsys\n$role"}) {
2877                $dupHash{$dupKey} = 1;
2878                push @{$retVal{$subsys}}, $role;
2879            }
2880          }          }
2881          # Return the hash.          # Return the hash.
2882          return %retVal;          return %retVal;
2883  }  }
2884    
2885    =head3 SubsystemList
2886    
2887    C<< my @subsystems = $sprout->SubsystemList($featureID); >>
2888    
2889    Return a list containing the names of the subsystems in which the specified
2890    feature participates. Unlike L</SubsystemsOf>, this method only returns the
2891    subsystem names, not the roles.
2892    
2893    =over 4
2894    
2895    =item featureID
2896    
2897    ID of the feature whose subsystem names are desired.
2898    
2899    =item RETURN
2900    
2901    Returns a list of the names of the subsystems in which the feature participates.
2902    
2903    =back
2904    
2905    =cut
2906    #: Return Type @;
2907    sub SubsystemList {
2908        # Get the parameters.
2909        my ($self, $featureID) = @_;
2910        # Get the list of names.
2911        my @retVal = $self->GetFlat(['HasRoleInSubsystem'], "HasRoleInSubsystem(from-link) = ?",
2912                                    [$featureID], 'HasRoleInSubsystem(to-link)');
2913        # Return the result, sorted.
2914        return sort @retVal;
2915    }
2916    
2917    =head3 GenomeSubsystemData
2918    
2919    C<< my %featureData = $sprout->GenomeSubsystemData($genomeID); >>
2920    
2921    Return a hash mapping genome features to their subsystem roles.
2922    
2923    =over 4
2924    
2925    =item genomeID
2926    
2927    ID of the genome whose subsystem feature map is desired.
2928    
2929    =item RETURN
2930    
2931    Returns a hash mapping each feature of the genome to a list of 2-tuples. Eacb
2932    2-tuple contains a subsystem name followed by a role ID.
2933    
2934    =back
2935    
2936    =cut
2937    
2938    sub GenomeSubsystemData {
2939        # Get the parameters.
2940        my ($self, $genomeID) = @_;
2941        # Declare the return variable.
2942        my %retVal = ();
2943        # Get a list of the genome features that participate in subsystems. For each
2944        # feature we get its spreadsheet cells and the corresponding roles.
2945        my @roleData = $self->GetAll(['HasFeature', 'ContainsFeature', 'IsRoleOf'],
2946                                 "HasFeature(from-link) = ?", [$genomeID],
2947                                 ['HasFeature(to-link)', 'IsRoleOf(to-link)', 'IsRoleOf(from-link)']);
2948        # Now we get a list of the spreadsheet cells and their associated subsystems. Subsystems
2949        # with an unknown variant code (-1) are skipped. Note the genome ID is at both ends of the
2950        # list. We use it at the beginning to get all the spreadsheet cells for the genome and
2951        # again at the end to filter out participation in subsystems with a negative variant code.
2952        my @cellData = $self->GetAll(['IsGenomeOf', 'HasSSCell', 'ParticipatesIn'],
2953                                     "IsGenomeOf(from-link) = ? AND ParticipatesIn(variant-code) >= 0 AND ParticipatesIn(from-link) = ?",
2954                                     [$genomeID, $genomeID], ['HasSSCell(to-link)', 'HasSSCell(from-link)']);
2955        # Now "@roleData" lists the spreadsheet cell and role for each of the genome's features.
2956        # "@cellData" lists the subsystem name for each of the genome's spreadsheet cells. We
2957        # link these two lists together to create the result. First, we want a hash mapping
2958        # spreadsheet cells to subsystem names.
2959        my %subHash = map { $_->[0] => $_->[1] } @cellData;
2960        # We loop through @cellData to build the hash.
2961        for my $roleEntry (@roleData) {
2962            # Get the data for this feature and cell.
2963            my ($fid, $cellID, $role) = @{$roleEntry};
2964            # Check for a subsystem name.
2965            my $subsys = $subHash{$cellID};
2966            if ($subsys) {
2967                # Insure this feature has an entry in the return hash.
2968                if (! exists $retVal{$fid}) { $retVal{$fid} = []; }
2969                # Merge in this new data.
2970                push @{$retVal{$fid}}, [$subsys, $role];
2971            }
2972        }
2973        # Return the result.
2974        return %retVal;
2975    }
2976    
2977  =head3 RelatedFeatures  =head3 RelatedFeatures
2978    
2979  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>
# Line 2230  Line 3009 
3009          # Get the parameters.          # Get the parameters.
3010          my ($self, $featureID, $function, $userID) = @_;          my ($self, $featureID, $function, $userID) = @_;
3011          # Get a list of the features that are BBHs of the incoming feature.          # Get a list of the features that are BBHs of the incoming feature.
3012          my @bbhFeatures = $self->GetFlat(['IsBidirectionalBestHitOf'],      my @bbhFeatures = map { $_->[0] } FIGRules::BBHData($featureID);
                                                                          "IsBidirectionalBestHitOf(from-link) = ?", [$featureID],  
                                                                          'IsBidirectionalBestHitOf(to-link)');  
3013          # Now we loop through the features, pulling out the ones that have the correct          # Now we loop through the features, pulling out the ones that have the correct
3014          # functional assignment.          # functional assignment.
3015          my @retVal = ();          my @retVal = ();
# Line 2296  Line 3073 
3073          return @retVal;          return @retVal;
3074  }  }
3075    
 =head3 GetAll  
   
 C<< my @list = $sprout->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >>  
   
 Return a list of values taken from the objects returned by a query. The first three  
 parameters correspond to the parameters of the L</Get> method. The final parameter is  
 a list of the fields desired from each record found by the query. The field name  
 syntax is the standard syntax used for fields in the B<ERDB> system--  
 B<I<objectName>(I<fieldName>)>-- where I<objectName> is the name of the relevant entity  
 or relationship and I<fieldName> is the name of the field.  
   
 The list returned will be a list of lists. Each element of the list will contain  
 the values returned for the fields specified in the fourth parameter. If one of the  
 fields specified returns multiple values, they are flattened in with the rest. For  
 example, the following call will return a list of the features in a particular  
 spreadsheet cell, and each feature will be represented by a list containing the  
 feature ID followed by all of its aliases.  
   
 C<< $query = $sprout->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >>  
   
 =over 4  
   
 =item objectNames  
   
 List containing the names of the entity and relationship objects to be retrieved.  
   
 =item filterClause  
   
 WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can  
 be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form  
 B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the  
 parameter list as additional parameters. The fields in a filter clause can come from primary  
 entity relations, relationship relations, or secondary entity relations; however, all of the  
 entities and relationships involved must be included in the list of object names.  
   
 =item parameterList  
   
 List of the parameters to be substituted in for the parameters marks in the filter clause.  
   
 =item fields  
   
 List of the fields to be returned in each element of the list returned.  
   
 =item count  
   
 Maximum number of records to return. If omitted or 0, all available records will be returned.  
   
 =item RETURN  
   
 Returns a list of list references. Each element of the return list contains the values for the  
 fields specified in the B<fields> parameter.  
   
 =back  
   
 =cut  
 #: Return Type @@;  
 sub GetAll {  
         # Get the parameters.  
         my ($self, $objectNames, $filterClause, $parameterList, $fields, $count) = @_;  
         # Create the query.  
         my $query = $self->Get($objectNames, $filterClause, $parameterList);  
         # Set up a counter of the number of records read.  
         my $fetched = 0;  
         # Insure the counter has a value.  
         if (!defined $count) {  
                 $count = 0;  
         }  
         # Loop through the records returned, extracting the fields. Note that if the  
         # counter is non-zero, we stop when the number of records read hits the count.  
         my @retVal = ();  
         while (($count == 0 || $fetched < $count) && (my $row = $query->Fetch())) {  
                 my @rowData = $row->Values($fields);  
                 push @retVal, \@rowData;  
                 $fetched++;  
         }  
         # Return the resulting list.  
         return @retVal;  
 }  
   
 =head3 GetFlat  
   
 C<< my @list = $sprout->GetFlat(\@objectNames, $filterClause, $parameterList, $field); >>  
   
 This is a variation of L</GetAll> that asks for only a single field per record and  
 returns a single flattened list.  
   
 =over 4  
   
 =item objectNames  
   
 List containing the names of the entity and relationship objects to be retrieved.  
   
 =item filterClause  
   
 WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can  
 be parameterized with parameter markers (C<?>). Each field used must be specified in the standard form  
 B<I<objectName>(I<fieldName>)>. Any parameters specified in the filter clause should be added to the  
 parameter list as additional parameters. The fields in a filter clause can come from primary  
 entity relations, relationship relations, or secondary entity relations; however, all of the  
 entities and relationships involved must be included in the list of object names.  
   
 =item parameterList  
   
 List of the parameters to be substituted in for the parameters marks in the filter clause.  
   
 =item field  
   
 Name of the field to be used to get the elements of the list returned.  
   
 =item RETURN  
   
 Returns a list of values.  
   
 =back  
   
 =cut  
 #: Return Type @;  
 sub GetFlat {  
         # Get the parameters.  
         my ($self, $objectNames, $filterClause, $parameterList, $field) = @_;  
         # Construct the query.  
         my $query = $self->Get($objectNames, $filterClause, $parameterList);  
         # Create the result list.  
         my @retVal = ();  
         # Loop through the records, adding the field values found to the result list.  
         while (my $row = $query->Fetch()) {  
                 push @retVal, $row->Value($field);  
         }  
         # Return the list created.  
         return @retVal;  
 }  
   
3076  =head3 Protein  =head3 Protein
3077    
3078  C<< my $protein = Sprout::Protein($sequence, $table); >>  C<< my $protein = Sprout::Protein($sequence, $table); >>
# Line 2500  Line 3145 
3145          # Loop through the input triples.          # Loop through the input triples.
3146          my $n = length $sequence;          my $n = length $sequence;
3147          for (my $i = 0; $i < $n; $i += 3) {          for (my $i = 0; $i < $n; $i += 3) {
3148                  # Get the current triple from the sequence.          # Get the current triple from the sequence. Note we convert to
3149                  my $triple = substr($sequence, $i, 3);          # upper case to insure a match.
3150            my $triple = uc substr($sequence, $i, 3);
3151                  # Translate it using the table.                  # Translate it using the table.
3152                  my $protein = "X";                  my $protein = "X";
3153                  if (exists $table->{$triple}) { $protein = $table->{$triple}; }                  if (exists $table->{$triple}) { $protein = $table->{$triple}; }
# Line 2529  Line 3175 
3175          # Create the return list, priming it with the name of the data directory.          # Create the return list, priming it with the name of the data directory.
3176          my @retVal = ($self->{_options}->{dataDir});          my @retVal = ($self->{_options}->{dataDir});
3177          # Concatenate the table names.          # Concatenate the table names.
3178          push @retVal, $self->{_erdb}->GetTableNames();      push @retVal, $self->GetTableNames();
3179          # Return the result.          # Return the result.
3180          return @retVal;          return @retVal;
3181  }  }
3182    
3183    =head3 BBHMatrix
3184    
3185    C<< my %bbhMap = $sprout->BBHMatrix($genomeID, $cutoff, @targets); >>
3186    
3187    Find all the bidirectional best hits for the features of a genome in a
3188    specified list of target genomes. The return value will be a hash mapping
3189    features in the original genome to their bidirectional best hits in the
3190    target genomes.
3191    
3192    =over 4
3193    
3194    =item genomeID
3195    
3196    ID of the genome whose features are to be examined for bidirectional best hits.
3197    
3198    =item cutoff
3199    
3200    A cutoff value. Only hits with a score lower than the cutoff will be returned.
3201    
3202    =item targets
3203    
3204    List of target genomes. Only pairs originating in the original
3205    genome and landing in one of the target genomes will be returned.
3206    
3207    =item RETURN
3208    
3209    Returns a hash mapping each feature in the original genome to a hash mapping its
3210    BBH pegs in the target genomes to their scores.
3211    
3212    =back
3213    
3214    =cut
3215    
3216    sub BBHMatrix {
3217        # Get the parameters.
3218        my ($self, $genomeID, $cutoff, @targets) = @_;
3219        # Declare the return variable.
3220        my %retVal = ();
3221        # Ask for the BBHs.
3222        my @bbhList = FIGRules::BatchBBHs("fig|$genomeID.%", $cutoff, @targets);
3223        # We now have a set of 4-tuples that we need to convert into a hash of hashes.
3224        for my $bbhData (@bbhList) {
3225            my ($peg1, $peg2, $score) = @{$bbhData};
3226            if (! exists $retVal{$peg1}) {
3227                $retVal{$peg1} = { $peg2 => $score };
3228            } else {
3229                $retVal{$peg1}->{$peg2} = $score;
3230            }
3231        }
3232        # Return the result.
3233        return %retVal;
3234    }
3235    
3236    
3237    =head3 SimMatrix
3238    
3239    C<< my %simMap = $sprout->SimMatrix($genomeID, $cutoff, @targets); >>
3240    
3241    Find all the similarities for the features of a genome in a
3242    specified list of target genomes. The return value will be a hash mapping
3243    features in the original genome to their similarites in the
3244    target genomes.
3245    
3246    =over 4
3247    
3248    =item genomeID
3249    
3250    ID of the genome whose features are to be examined for similarities.
3251    
3252    =item cutoff
3253    
3254    A cutoff value. Only hits with a score lower than the cutoff will be returned.
3255    
3256    =item targets
3257    
3258    List of target genomes. Only pairs originating in the original
3259    genome and landing in one of the target genomes will be returned.
3260    
3261    =item RETURN
3262    
3263    Returns a hash mapping each feature in the original genome to a hash mapping its
3264    similar pegs in the target genomes to their scores.
3265    
3266    =back
3267    
3268    =cut
3269    
3270    sub SimMatrix {
3271        # Get the parameters.
3272        my ($self, $genomeID, $cutoff, @targets) = @_;
3273        # Declare the return variable.
3274        my %retVal = ();
3275        # Get the list of features in the source organism.
3276        my @fids = $self->FeaturesOf($genomeID);
3277        # Ask for the sims. We only want similarities to fig features.
3278        my $simList = FIGRules::GetNetworkSims($self, \@fids, {}, 1000, $cutoff, "fig");
3279        if (! defined $simList) {
3280            Confess("Unable to retrieve similarities from server.");
3281        } else {
3282            Trace("Processing sims.") if T(3);
3283            # We now have a set of sims that we need to convert into a hash of hashes. First, we
3284            # Create a hash for the target genomes.
3285            my %targetHash = map { $_ => 1 } @targets;
3286            for my $simData (@{$simList}) {
3287                # Get the PEGs and the score.
3288                my ($peg1, $peg2, $score) = ($simData->id1, $simData->id2, $simData->psc);
3289                # Insure the second ID is in the target list.
3290                my ($genome2) = FIGRules::ParseFeatureID($peg2);
3291                if (exists $targetHash{$genome2}) {
3292                    # Here it is. Now we need to add it to the return hash. How we do that depends
3293                    # on whether or not $peg1 is new to us.
3294                    if (! exists $retVal{$peg1}) {
3295                        $retVal{$peg1} = { $peg2 => $score };
3296                    } else {
3297                        $retVal{$peg1}->{$peg2} = $score;
3298                    }
3299                }
3300            }
3301        }
3302        # Return the result.
3303        return %retVal;
3304    }
3305    
3306    
3307  =head3 LowBBHs  =head3 LowBBHs
3308    
3309  C<< my %bbhMap = $sprout->GoodBBHs($featureID, $cutoff); >>  C<< my %bbhMap = $sprout->LowBBHs($featureID, $cutoff); >>
3310    
3311  Return the bidirectional best hits of a feature whose score is no greater than a  Return the bidirectional best hits of a feature whose score is no greater than a
3312  specified cutoff value. A higher cutoff value will allow inclusion of hits with  specified cutoff value. A higher cutoff value will allow inclusion of hits with
# Line 2565  Line 3335 
3335          my ($self, $featureID, $cutoff) = @_;          my ($self, $featureID, $cutoff) = @_;
3336          # Create the return hash.          # Create the return hash.
3337          my %retVal = ();          my %retVal = ();
3338          # Create a query to get the desired BBHs.      # Query for the desired BBHs.
3339          my @bbhList = $self->GetAll(['IsBidirectionalBestHitOf'],      my @bbhList = FIGRules::BBHData($featureID, $cutoff);
                                                                 'IsBidirectionalBestHitOf(sc) <= ? AND IsBidirectionalBestHitOf(from-link) = ?',  
                                                                 [$cutoff, $featureID],  
                                                                 ['IsBidirectionalBestHitOf(to-link)', 'IsBidirectionalBestHitOf(sc)']);  
3340          # Form the results into the return hash.          # Form the results into the return hash.
3341          for my $pair (@bbhList) {          for my $pair (@bbhList) {
3342                  $retVal{$pair->[0]} = $pair->[1];          my $fid = $pair->[0];
3343            if ($self->Exists('Feature', $fid)) {
3344                $retVal{$fid} = $pair->[1];
3345            }
3346          }          }
3347          # Return the result.          # Return the result.
3348          return %retVal;          return %retVal;
3349  }  }
3350    
3351    =head3 Sims
3352    
3353    C<< my $simList = $sprout->Sims($fid, $maxN, $maxP, $select, $max_expand, $filters); >>
3354    
3355    Get a list of similarities for a specified feature. Similarity information is not kept in the
3356    Sprout database; rather, they are retrieved from a network server. The similarities are
3357    returned as B<Sim> objects. A Sim object is actually a list reference that has been blessed
3358    so that its elements can be accessed by name.
3359    
3360    Similarities can be either raw or expanded. The raw similarities are basic
3361    hits between features with similar DNA. Expanding a raw similarity drags in any
3362    features considered substantially identical. So, for example, if features B<A1>,
3363    B<A2>, and B<A3> are all substantially identical to B<A>, then a raw similarity
3364    B<[C,A]> would be expanded to B<[C,A] [C,A1] [C,A2] [C,A3]>.
3365    
3366    =over 4
3367    
3368    =item fid
3369    
3370    ID of the feature whose similarities are desired.
3371    
3372    =item maxN
3373    
3374    Maximum number of similarities to return.
3375    
3376    =item maxP
3377    
3378    Minumum allowable similarity score.
3379    
3380    =item select
3381    
3382    Selection criterion: C<raw> means only raw similarities are returned; C<fig>
3383    means only similarities to FIG features are returned; C<all> means all expanded
3384    similarities are returned; and C<figx> means similarities are expanded until the
3385    number of FIG features equals the maximum.
3386    
3387    =item max_expand
3388    
3389    The maximum number of features to expand.
3390    
3391    =item filters
3392    
3393    Reference to a hash containing filter information, or a subroutine that can be
3394    used to filter the sims.
3395    
3396    =item RETURN
3397    
3398    Returns a reference to a list of similarity objects, or C<undef> if an error
3399    occurred.
3400    
3401    =back
3402    
3403    =cut
3404    
3405    sub Sims {
3406        # Get the parameters.
3407        my ($self, $fid, $maxN, $maxP, $select, $max_expand, $filters) = @_;
3408        # Create the shim object to test for deleted FIDs.
3409        my $shim = FidCheck->new($self);
3410        # Ask the network for sims.
3411        my $retVal = FIGRules::GetNetworkSims($shim, $fid, {}, $maxN, $maxP, $select, $max_expand, $filters);
3412        # Return the result.
3413        return $retVal;
3414    }
3415    
3416    =head3 IsAllGenomes
3417    
3418    C<< my $flag = $sprout->IsAllGenomes(\@list, \@checkList); >>
3419    
3420    Return TRUE if all genomes in the second list are represented in the first list at
3421    least one. Otherwise, return FALSE. If the second list is omitted, the first list is
3422    compared to a list of all the genomes.
3423    
3424    =over 4
3425    
3426    =item list
3427    
3428    Reference to the list to be compared to the second list.
3429    
3430    =item checkList (optional)
3431    
3432    Reference to the comparison target list. Every genome ID in this list must occur at
3433    least once in the first list. If this parameter is omitted, a list of all the genomes
3434    is used.
3435    
3436    =item RETURN
3437    
3438    Returns TRUE if every item in the second list appears at least once in the
3439    first list, else FALSE.
3440    
3441    =back
3442    
3443    =cut
3444    
3445    sub IsAllGenomes {
3446        # Get the parameters.
3447        my ($self, $list, $checkList) = @_;
3448        # Supply the checklist if it was omitted.
3449        $checkList = [$self->Genomes()] if ! defined($checkList);
3450        # Create a hash of the original list.
3451        my %testList = map { $_ => 1 } @{$list};
3452        # Declare the return variable. We assume that the representation
3453        # is complete and stop at the first failure.
3454        my $retVal = 1;
3455        my $n = scalar @{$checkList};
3456        for (my $i = 0; $retVal && $i < $n; $i++) {
3457            if (! $testList{$checkList->[$i]}) {
3458                $retVal = 0;
3459            }
3460        }
3461        # Return the result.
3462        return $retVal;
3463    }
3464    
3465  =head3 GetGroups  =head3 GetGroups
3466    
3467  C<< my %groups = $sprout->GetGroups(\@groupList); >>  C<< my %groups = $sprout->GetGroups(\@groupList); >>
# Line 2599  Line 3483 
3483          # Here we have a group list. Loop through them individually,          # Here we have a group list. Loop through them individually,
3484          # getting a list of the relevant genomes.          # getting a list of the relevant genomes.
3485          for my $group (@{$groupList}) {          for my $group (@{$groupList}) {
3486              my @genomeIDs = $self->GetFlat(['Genome'], "Genome(group-name) = ?",              my @genomeIDs = $self->GetFlat(['Genome'], "Genome(primary-group) = ?",
3487                  [$group], "Genome(id)");                  [$group], "Genome(id)");
3488              $retVal{$group} = \@genomeIDs;              $retVal{$group} = \@genomeIDs;
3489          }          }
# Line 2607  Line 3491 
3491          # Here we need all of the groups. In this case, we run through all          # Here we need all of the groups. In this case, we run through all
3492          # of the genome records, putting each one found into the appropriate          # of the genome records, putting each one found into the appropriate
3493          # group. Note that we use a filter clause to insure that only genomes          # group. Note that we use a filter clause to insure that only genomes
3494          # in groups are included in the return set.          # in real NMPDR groups are included in the return set.
3495          my @genomes = $self->GetAll(['Genome'], "Genome(group-name) > ' '", [],          my @genomes = $self->GetAll(['Genome'], "Genome(primary-group) <> ?",
3496                                      ['Genome(id)', 'Genome(group-name)']);                                      [$FIG_Config::otherGroup], ['Genome(id)', 'Genome(primary-group)']);
3497          # Loop through the genomes found.          # Loop through the genomes found.
3498          for my $genome (@genomes) {          for my $genome (@genomes) {
3499              # Pop this genome's ID off the current list.              # Pop this genome's ID off the current list.
# Line 2626  Line 3510 
3510      return %retVal;      return %retVal;
3511  }  }
3512    
3513    =head3 MyGenomes
3514    
3515    C<< my @genomes = Sprout::MyGenomes($dataDir); >>
3516    
3517    Return a list of the genomes to be included in the Sprout.
3518    
3519    This method is provided for use during the Sprout load. It presumes the Genome load file has
3520    already been created. (It will be in the Sprout data directory and called either C<Genome>
3521    or C<Genome.dtx>.) Essentially, it reads in the Genome load file and strips out the genome
3522    IDs.
3523    
3524    =over 4
3525    
3526    =item dataDir
3527    
3528    Directory containing the Sprout load files.
3529    
3530    =back
3531    
3532    =cut
3533    #: Return Type @;
3534    sub MyGenomes {
3535        # Get the parameters.
3536        my ($dataDir) = @_;
3537        # Compute the genome file name.
3538        my $genomeFileName = LoadFileName($dataDir, "Genome");
3539        # Extract the genome IDs from the files.
3540        my @retVal = map { $_ =~ /^(\S+)/; $1 } Tracer::GetFile($genomeFileName);
3541        # Return the result.
3542        return @retVal;
3543    }
3544    
3545    =head3 LoadFileName
3546    
3547    C<< my $fileName = Sprout::LoadFileName($dataDir, $tableName); >>
3548    
3549    Return the name of the load file for the specified table in the specified data
3550    directory.
3551    
3552    =over 4
3553    
3554    =item dataDir
3555    
3556    Directory containing the Sprout load files.
3557    
3558    =item tableName
3559    
3560    Name of the table whose load file is desired.
3561    
3562    =item RETURN
3563    
3564    Returns the name of the file containing the load data for the specified table, or
3565    C<undef> if no load file is present.
3566    
3567    =back
3568    
3569    =cut
3570    #: Return Type $;
3571    sub LoadFileName {
3572        # Get the parameters.
3573        my ($dataDir, $tableName) = @_;
3574        # Declare the return variable.
3575        my $retVal;
3576        # Check for the various file names.
3577        if (-e "$dataDir/$tableName") {
3578            $retVal = "$dataDir/$tableName";
3579        } elsif (-e "$dataDir/$tableName.dtx") {
3580            $retVal = "$dataDir/$tableName.dtx";
3581        }
3582        # Return the result.
3583        return $retVal;
3584    }
3585    
3586    =head3 DeleteGenome
3587    
3588    C<< my $stats = $sprout->DeleteGenome($genomeID, $testFlag); >>
3589    
3590    Delete a genome from the database.
3591    
3592    =over 4
3593    
3594    =item genomeID
3595    
3596    ID of the genome to delete
3597    
3598    =item testFlag
3599    
3600    If TRUE, then the DELETE statements will be traced, but no deletions will occur.
3601    
3602    =item RETURN
3603    
3604    Returns a statistics object describing the rows deleted.
3605    
3606    =back
3607    
3608    =cut
3609    #: Return Type $%;
3610    sub DeleteGenome {
3611        # Get the parameters.
3612        my ($self, $genomeID, $testFlag) = @_;
3613        # Perform the delete for the genome's features.
3614        my $retVal = $self->Delete('Feature', "fig|$genomeID.%", testMode => $testFlag);
3615        # Perform the delete for the primary genome data.
3616        my $stats = $self->Delete('Genome', $genomeID, testMode => $testFlag);
3617        $retVal->Accumulate($stats);
3618        # Return the result.
3619        return $retVal;
3620    }
3621    
3622    =head3 Fix
3623    
3624    C<< my %fixedHash = Sprout::Fix(%groupHash); >>
3625    
3626    Prepare a genome group hash (like that returned by L</GetGroups> for processing.
3627    Groups with the same primary name will be combined. The primary name is the
3628    first capitalized word in the group name.
3629    
3630    =over 4
3631    
3632    =item groupHash
3633    
3634    Hash to be fixed up.
3635    
3636    =item RETURN
3637    
3638    Returns a fixed-up version of the hash.
3639    
3640    =back
3641    
3642    =cut
3643    
3644    sub Fix {
3645        # Get the parameters.
3646        my (%groupHash) = @_;
3647        # Create the result hash.
3648        my %retVal = ();
3649        # Copy over the genomes.
3650        for my $groupID (keys %groupHash) {
3651            # Make a safety copy of the group ID.
3652            my $realGroupID = $groupID;
3653            # Yank the primary name.
3654            if ($groupID =~ /([A-Z]\w+)/) {
3655                $realGroupID = $1;
3656            }
3657            # Append this group's genomes into the result hash.
3658            Tracer::AddToListMap(\%retVal, $realGroupID, @{$groupHash{$groupID}});
3659        }
3660        # Return the result hash.
3661        return %retVal;
3662    }
3663    
3664    =head3 GroupPageName
3665    
3666    C<< my $name = $sprout->GroupPageName($group); >>
3667    
3668    Return the name of the page for the specified NMPDR group.
3669    
3670    =over 4
3671    
3672    =item group
3673    
3674    Name of the relevant group.
3675    
3676    =item RETURN
3677    
3678    Returns the relative page name (e.g. C<../content/campy.php>). If the group file is not in
3679    memory it will be read in.
3680    
3681    =back
3682    
3683    =cut
3684    
3685    sub GroupPageName {
3686        # Get the parameters.
3687        my ($self, $group) = @_;
3688        # Declare the return variable.
3689        my $retVal;
3690        # Check for the group file data.
3691        if (! defined $self->{groupHash}) {
3692            # Read the group file.
3693            my %groupData = Sprout::ReadGroupFile($self->{_options}->{dataDir} . "/groups.tbl");
3694            # Store it in our object.
3695            $self->{groupHash} = \%groupData;
3696        }
3697        # Compute the real group name.
3698        my $realGroup = $group;
3699        if ($group =~ /([A-Z]\w+)/) {
3700            $realGroup = $1;
3701        }
3702        # Return the page name.
3703        $retVal = "../content/" . $self->{groupHash}->{$realGroup}->[1];
3704        # Return the result.
3705        return $retVal;
3706    }
3707    
3708    =head3 ReadGroupFile
3709    
3710    C<< my %groupData = Sprout::ReadGroupFile($groupFileName); >>
3711    
3712    Read in the data from the specified group file. The group file contains information
3713    about each of the NMPDR groups.
3714    
3715    =over 4
3716    
3717    =item name
3718    
3719    Name of the group.
3720    
3721    =item page
3722    
3723    Name of the group's page on the web site (e.g. C<campy.php> for
3724    Campylobacter)
3725    
3726    =item genus
3727    
3728    Genus of the group
3729    
3730    =item species
3731    
3732    Species of the group, or an empty string if the group is for an entire
3733    genus. If the group contains more than one species, the species names
3734    should be separated by commas.
3735    
3736    =back
3737    
3738    The parameters to this method are as follows
3739    
3740    =over 4
3741    
3742    =item groupFile
3743    
3744    Name of the file containing the group data.
3745    
3746    =item RETURN
3747    
3748    Returns a hash keyed on group name. The value of each hash
3749    
3750    =back
3751    
3752    =cut
3753    
3754    sub ReadGroupFile {
3755        # Get the parameters.
3756        my ($groupFileName) = @_;
3757        # Declare the return variable.
3758        my %retVal;
3759        # Read the group file.
3760        my @groupLines = Tracer::GetFile($groupFileName);
3761        for my $groupLine (@groupLines) {
3762            my ($name, $page, $genus, $species) = split(/\t/, $groupLine);
3763            $retVal{$name} = [$page, $genus, $species];
3764        }
3765        # Return the result.
3766        return %retVal;
3767    }
3768    
3769    =head3 AddProperty
3770    
3771    C<< my  = $sprout->AddProperty($featureID, $key, $value, $url); >>
3772    
3773    Add a new attribute value (Property) to a feature. In the SEED system, attributes can
3774    be added to almost any object. In Sprout, they can only be added to features. In
3775    Sprout, attributes are implemented using I<properties>. A property represents a key/value
3776    pair. If the particular key/value pair coming in is not already in the database, a new
3777    B<Property> record is created to hold it.
3778    
3779    =over 4
3780    
3781    =item peg
3782    
3783    ID of the feature to which the attribute is to be replied.
3784    
3785    =item key
3786    
3787    Name of the attribute (key).
3788    
3789    =item value
3790    
3791    Value of the attribute.
3792    
3793    =item url
3794    
3795    URL or text citation from which the property was obtained.
3796    
3797    =back
3798    
3799    =cut
3800    #: Return Type ;
3801    sub AddProperty {
3802        # Get the parameters.
3803        my ($self, $featureID, $key, $value, $url) = @_;
3804        # Declare the variable to hold the desired property ID.
3805        my $propID;
3806        # Attempt to find a property record for this key/value pair.
3807        my @properties = $self->GetFlat(['Property'],
3808                                       "Property(property-name) = ? AND Property(property-value) = ?",
3809                                       [$key, $value], 'Property(id)');
3810        if (@properties) {
3811            # Here the property is already in the database. We save its ID.
3812            $propID = $properties[0];
3813        } else {
3814            # Here the property value does not exist. We need to generate an ID. It will be set
3815            # to a number one greater than the maximum value in the database. This call to
3816            # GetAll will stop after one record.
3817            my @maxProperty = $self->GetAll(['Property'], "ORDER BY Property(id) DESC", [], ['Property(id)'],
3818                                            1);
3819            $propID = $maxProperty[0]->[0] + 1;
3820            # Insert the new property value.
3821            $self->Insert('Property', { 'property-name' => $key, 'property-value' => $value, id => $propID });
3822        }
3823        # Now we connect the incoming feature to the property.
3824        $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });
3825    }
3826    
3827    =head2 Virtual Methods
3828    
3829    =head3 CleanKeywords
3830    
3831    C<< my $cleanedString = $sprout->CleanKeywords($searchExpression); >>
3832    
3833    Clean up a search expression or keyword list. This involves converting the periods
3834    in EC numbers to underscores, converting non-leading minus signs to underscores,
3835    a vertical bar or colon to an apostrophe, and forcing lower case for all alphabetic
3836    characters. In addition, any extra spaces are removed.
3837    
3838    =over 4
3839    
3840    =item searchExpression
3841    
3842    Search expression or keyword list to clean. Note that a search expression may
3843    contain boolean operators which need to be preserved. This includes leading
3844    minus signs.
3845    
3846    =item RETURN
3847    
3848    Cleaned expression or keyword list.
3849    
3850    =back
3851    
3852    =cut
3853    
3854    sub CleanKeywords {
3855        # Get the parameters.
3856        my ($self, $searchExpression) = @_;
3857        # Perform the standard cleanup.
3858        my $retVal = $self->ERDB::CleanKeywords($searchExpression);
3859        # Fix the periods in EC and TC numbers.
3860        $retVal =~ s/(\d+|\-)\.(\d+|-)\.(\d+|-)\.(\d+|-)/$1_$2_$3_$4/g;
3861        # Fix non-trailing periods.
3862        $retVal =~ s/\.(\w)/_$1/g;
3863        # Fix non-leading minus signs.
3864        $retVal =~ s/(\w)[\-]/$1_/g;
3865        # Fix the vertical bars and colons
3866        $retVal =~ s/(\w)[|:](\w)/$1'$2/g;
3867        # Return the result.
3868        return $retVal;
3869    }
3870    
3871  =head2 Internal Utility Methods  =head2 Internal Utility Methods
3872    
3873  =head3 ParseAssignment  =head3 ParseAssignment
3874    
3875  Parse annotation text to determine whether or not it is a functional assignment. If it is,  Parse annotation text to determine whether or not it is a functional assignment. If it is,
3876  the user and function text will be returned as a 2-element list. If it isn't, an empty list  the user, function text, and assigning user will be returned as a 3-element list. If it
3877  will be returned.  isn't, an empty list will be returned.
3878    
3879    A functional assignment is always of the form
3880    
3881        C<set >I<YYYY>C< function to\n>I<ZZZZZ>
3882    
3883    where I<YYYY> is the B<user>, and I<ZZZZ> is the actual functional role. In most cases,
3884    the user and the assigning user (from MadeAnnotation) will be the same, but that is
3885    not always the case.
3886    
3887    In addition, the functional role may contain extra data that is stripped, such as
3888    terminating spaces or a comment separated from the rest of the text by a tab.
3889    
3890  This is a static method.  This is a static method.
3891    
3892  =over 4  =over 4
3893    
3894    =item user
3895    
3896    Name of the assigning user.
3897    
3898  =item text  =item text
3899    
3900  Text of the annotation.  Text of the annotation.
# Line 2651  Line 3908 
3908    
3909  =cut  =cut
3910    
3911  sub ParseAssignment {  sub _ParseAssignment {
3912          # Get the parameters.          # Get the parameters.
3913          my ($text) = @_;      my ($user, $text) = @_;
3914          # Declare the return value.          # Declare the return value.
3915          my @retVal = ();          my @retVal = ();
3916          # Check to see if this is a functional assignment.          # Check to see if this is a functional assignment.
3917          my ($user, $type, $function) = split(/\n/, $text);      my ($type, $function) = split(/\n/, $text);
3918          if ($type =~ m/^set $user function to$/i) {      if ($type =~ m/^set function to$/i) {
3919                  # Here it is, so we return the user name and function text.          # Here we have an assignment without a user, so we use the incoming user ID.
3920                  @retVal = ($user, $function);                  @retVal = ($user, $function);
3921        } elsif ($type =~ m/^set (\S+) function to$/i) {
3922            # Here we have an assignment with a user that is passed back to the caller.
3923            @retVal = ($1, $function);
3924        }
3925        # If we have an assignment, we need to clean the function text. There may be
3926        # extra junk at the end added as a note from the user.
3927        if (defined( $retVal[1] )) {
3928            $retVal[1] =~ s/(\t\S)?\s*$//;
3929          }          }
3930          # Return the result list.          # Return the result list.
3931          return @retVal;          return @retVal;
# Line 2688  Line 3953 
3953    
3954  sub FriendlyTimestamp {  sub FriendlyTimestamp {
3955      my ($timeValue) = @_;      my ($timeValue) = @_;
3956      my $retVal = strftime("%a %b %e %H:%M:%S %Y", localtime($timeValue));      my $retVal = localtime($timeValue);
3957      return $retVal;      return $retVal;
3958  }  }
3959    
3960    
3961  1;  1;

Legend:
Removed from v.1.10  
changed lines
  Added in v.1.98

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3