--- Sprout.pm 2005/06/22 21:30:45 1.15 +++ Sprout.pm 2006/09/19 00:14:04 1.85 @@ -1,5 +1,8 @@ package Sprout; + require Exporter; + use ERDB; + @ISA = qw(Exporter ERDB); use Data::Dumper; use strict; use Carp; @@ -7,12 +10,12 @@ use XML::Simple; use DBQuery; use DBObject; - use ERDB; use Tracer; use FIGRules; + use FidCheck; use Stats; use POSIX qw(strftime); - + use BasicLocation; =head1 Sprout Database Manipulation Object @@ -32,6 +35,8 @@ query tasks. For example, L lists the IDs of all the genomes in the database and L returns the DNA sequence for a specified genome location. +The Sprout object is a subclass of the ERDB object and inherits all its properties and methods. + =cut #: Constructor SFXlate->new_sprout_only(); @@ -62,14 +67,18 @@ * B name of the XML file containing the database definition (default C) -* B user name and password, delimited by a slash (default C) +* B user name and password, delimited by a slash (default same as SEED) * B connection port (default C<0>) +* B connection socket (default same as SEED) + * B maximum number of residues per feature segment, (default C<4500>) * B maximum number of residues per sequence, (default C<8000>) +* B suppresses the connection to the database if TRUE, else FALSE + =back For example, the following constructor call specifies a database named I and a user name of @@ -83,16 +92,27 @@ sub new { # Get the parameters. my ($class, $dbName, $options) = @_; + # Compute the DBD directory. + my $dbd_dir = (defined($FIG_Config::dbd_dir) ? $FIG_Config::dbd_dir : + $FIG_Config::fig ); # Compute the options. We do this by starting with a table of defaults and overwriting with # the incoming data. my $optionTable = Tracer::GetOptions({ - dbType => 'mysql', # database type - dataDir => 'Data', # data file directory - xmlFileName => 'SproutDBD.xml', # database definition file name - userData => 'root/', # user name and password - port => 0, # database connection port + dbType => $FIG_Config::dbms, + # database type + dataDir => $FIG_Config::sproutData, + # data file directory + xmlFileName => "$dbd_dir/SproutDBD.xml", + # database definition file name + userData => "$FIG_Config::dbuser/$FIG_Config::dbpass", + # user name and password + port => $FIG_Config::dbport, + # database connection port + sock => $FIG_Config::dbsock, + host => $FIG_Config::dbhost, maxSegmentLength => 4500, # maximum feature segment length maxSequenceLength => 8000, # maximum contig sequence length + noDBOpen => 0, # 1 to suppress the database open }, $options); # Get the data directory. my $dataDir = $optionTable->{dataDir}; @@ -100,15 +120,21 @@ $optionTable->{userData} =~ m!([^/]*)/(.*)$!; my ($userName, $password) = ($1, $2); # Connect to the database. - my $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName, $password, $optionTable->{port}); + my $dbh; + if (! $optionTable->{noDBOpen}) { + $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName, + $password, $optionTable->{port}, $optionTable->{host}, $optionTable->{sock}); + } # Create the ERDB object. my $xmlFileName = "$optionTable->{xmlFileName}"; - my $erdb = ERDB->new($dbh, $xmlFileName); - # Create this object. - my $self = { _erdb => $erdb, _options => $optionTable, _xmlName => $xmlFileName }; - # Bless and return it. - bless $self; - return $self; + my $retVal = ERDB::new($class, $dbh, $xmlFileName); + # Add the option table and XML file name. + $retVal->{_options} = $optionTable; + $retVal->{_xmlName} = $xmlFileName; + # Set up space for the group file data. + $retVal->{groupHash} = undef; + # Return it. + return $retVal; } =head3 MaxSegment @@ -143,293 +169,255 @@ return $self->{_options}->{maxSequenceLength}; } -=head3 Get - -C<< my $query = $sprout->Get(\@objectNames, $filterClause, \@parameterList); >> - -This method allows a general query against the Sprout data using a specified filter clause. - -The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each -field name represented in the form B(I)>. For example, the -following call requests all B objects for the genus specified in the variable -$genus. - -C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ?", [$genus]); >> - -The WHERE clause contains a single question mark, so there is a single additional -parameter representing the parameter value. It would also be possible to code - -C<< $query = $sprout->Get(['Genome'], "Genome(genus) = \'$genus\'"); >> - -however, this version of the call would generate a syntax error if there were any quote -characters inside the variable C<$genus>. - -The use of the strange parenthesized notation for field names enables us to distinguish -hyphens contained within field names from minus signs that participate in the computation -of the WHERE clause. All of the methods that manipulate fields will use this same notation. - -It is possible to specify multiple entity and relationship names in order to retrieve more than -one object's data at the same time, which allows highly complex joined queries. For example, - -C<< $query = $sprout->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", [$genus]); >> - -This query returns all the genomes for a particular genus and allows access to the -sources from which they came. The join clauses to go from Genome to Source are generated -automatically. - -Finally, the filter clause can contain sort information. To do this, simply put an C -clause at the end of the filter. Field references in the ORDER BY section follow the same rules -as they do in the filter itself; in other words, each one must be of the form B(I)>. -For example, the following filter string gets all genomes for a particular genus and sorts -them by species name. - -C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ? ORDER BY Genome(species)", [$genus]); >> +=head3 Load -It is also permissible to specify I an ORDER BY clause. For example, the following invocation gets -all genomes ordered by genus and species. +C<< $sprout->Load($rebuild); >>; -C<< $query = $sprout->Get(['Genome'], "ORDER BY Genome(genus), Genome(species)"); >> +Load the database from files in the data directory, optionally re-creating the tables. -Odd things may happen if one of the ORDER BY fields is in a secondary relation. So, for example, an -attempt to order Bs by alias may (depending on the underlying database engine used) cause -a single feature to appear more than once. +This method always deletes the data from the database before loading, even if the tables are not +re-created. The data is loaded into the relations from files in the data directory either having the +same name as the target relation with no extension or with an extension of C<.dtx>. Files without an +extension are used in preference to the files with an extension. -If multiple names are specified, then the query processor will automatically determine a -join path between the entities and relationships. The algorithm used is very simplistic. -In particular, you can't specify any entity or relationship more than once, and if a -relationship is recursive, the path is determined by the order in which the entity -and the relationship appear. For example, consider a recursive relationship B -which relates B objects to other B objects. If the join path is -coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however, -the join path is C<['IsParentOf', 'People']>, then the people returned will be children. +The files are loaded based on the presumption that each line of the file is a record in the +relation, and the individual fields are delimited by tabs. Tab and new-line characters inside +fields must be represented by the escape sequences C<\t> and C<\n>, respectively. The fields must +be presented in the order given in the relation tables produced by the L method. =over 4 -=item objectNames - -List containing the names of the entity and relationship objects to be retrieved. - -=item filterClause - -WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can -be parameterized with parameter markers (C). Each field used must be specified in the standard form -B(I)>. Any parameters specified in the filter clause should be added to the -parameter list as additional parameters. The fields in a filter clause can come from primary -entity relations, relationship relations, or secondary entity relations; however, all of the -entities and relationships involved must be included in the list of object names. - -=item parameterList +=item rebuild -List of the parameters to be substituted in for the parameters marks in the filter clause. +TRUE if the data tables need to be created or re-created, else FALSE =item RETURN -Returns a B that can be used to iterate through all of the results. +Returns a statistical object containing the number of records read, the number of duplicates found, +the number of errors, and a list of the error messages. =back =cut - -sub Get { +#: Return Type %; +sub Load { # Get the parameters. - my ($self, $objectNames, $filterClause, $parameterList) = @_; - # We differ from the ERDB Get method in that the parameter list is passed in as a list reference - # rather than a list of parameters. The next step is to convert the parameters from a reference - # to a real list. We can only do this if the parameters have been specified. - my @parameters; - if ($parameterList) { @parameters = @{$parameterList}; } - return $self->{_erdb}->Get($objectNames, $filterClause, @parameters); + my ($self, $rebuild) = @_; + # Load the tables from the data directory. + my $retVal = $self->LoadTables($self->{_options}->{dataDir}, $rebuild); + # Return the statistics. + return $retVal; } -=head3 GetEntity +=head3 LoadUpdate -C<< my $entityObject = $sprout->GetEntity($entityType, $ID); >> +C<< my $stats = $sprout->LoadUpdate($truncateFlag, \@tableList); >> -Return an object describing the entity instance with a specified ID. +Load updates to one or more database tables. This method enables the client to make changes to one +or two tables without reloading the whole database. For each table, there must be a corresponding +file in the data directory, either with the same name as the table, or with a C<.dtx> suffix. So, +for example, to make updates to the B relation, there must be a +C file in the data directory. Unlike a full load, files without an extension +are not examined. This allows update files to co-exist with files from an original load. =over 4 -=item entityType +=item truncateFlag -Entity type name. +TRUE if the tables should be rebuilt before loading, else FALSE. A value of TRUE therefore causes +current data and schema of the tables to be replaced, while a value of FALSE means the new data +is added to the existing data in the various relations. -=item ID +=item tableList -ID of the desired entity. +List of the tables to be updated. =item RETURN -Returns a B representing the desired entity instance, or an undefined value if no -instance is found with the specified key. +Returns a statistical object containing the number of records read, the number of duplicates found, +the number of errors encountered, and a list of error messages. =back =cut - -sub GetEntity { +#: Return Type $%; +sub LoadUpdate { # Get the parameters. - my ($self, $entityType, $ID) = @_; - # Call the ERDB method. - return $self->{_erdb}->GetEntity($entityType, $ID); + my ($self, $truncateFlag, $tableList) = @_; + # Declare the return value. + my $retVal = Stats->new(); + # Get the data directory. + my $optionTable = $self->{_options}; + my $dataDir = $optionTable->{dataDir}; + # Loop through the incoming table names. + for my $tableName (@{$tableList}) { + # Find the table's file. + my $fileName = LoadFileName($dataDir, $tableName); + if (! $fileName) { + Trace("No load file found for $tableName in $dataDir.") if T(0); + } else { + # Attempt to load this table. + my $result = $self->LoadTable($fileName, $tableName, $truncateFlag); + # Accumulate the resulting statistics. + $retVal->Accumulate($result); + } + } + # Return the statistics. + return $retVal; } -=head3 GetEntityValues +=head3 GenomeCounts -C<< my @values = GetEntityValues($entityType, $ID, \@fields); >> +C<< my ($arch, $bact, $euk, $vir, $env, $unk) = $sprout->GenomeCounts($complete); >> -Return a list of values from a specified entity instance. +Count the number of genomes in each domain. If I<$complete> is TRUE, only complete +genomes will be included in the counts. =over 4 -=item entityType - -Entity type name. +=item complete -=item ID - -ID of the desired entity. - -=item fields - -List of field names, each of the form IC<(>IC<)>. +TRUE if only complete genomes are to be counted, FALSE if all genomes are to be +counted =item RETURN -Returns a flattened list of the values of the specified fields for the specified entity. +A six-element list containing the number of genomes in each of six categories-- +Archaea, Bacteria, Eukaryota, Viral, Environmental, and Unknown, respectively. =back =cut -#: Return Type @; -sub GetEntityValues { + +sub GenomeCounts { # Get the parameters. - my ($self, $entityType, $ID, $fields) = @_; - # Call the ERDB method. - return $self->{_erdb}->GetEntityValues($entityType, $ID, $fields); + my ($self, $complete) = @_; + # Set the filter based on the completeness flag. + my $filter = ($complete ? "Genome(complete) = 1" : ""); + # Get all the genomes and the related taxonomy information. + my @genomes = $self->GetAll(['Genome'], $filter, [], ['Genome(id)', 'Genome(taxonomy)']); + # Clear the counters. + my ($arch, $bact, $euk, $vir, $env, $unk) = (0, 0, 0, 0, 0, 0); + # Loop through, counting the domains. + for my $genome (@genomes) { + if ($genome->[1] =~ /^archaea/i) { ++$arch } + elsif ($genome->[1] =~ /^bacter/i) { ++$bact } + elsif ($genome->[1] =~ /^eukar/i) { ++$euk } + elsif ($genome->[1] =~ /^vir/i) { ++$vir } + elsif ($genome->[1] =~ /^env/i) { ++$env } + else { ++$unk } + } + # Return the counts. + return ($arch, $bact, $euk, $vir, $env, $unk); } -=head3 ShowMetaData +=head3 ContigCount -C<< $sprout->ShowMetaData($fileName); >> +C<< my $count = $sprout->ContigCount($genomeID); >> -This method outputs a description of the database to an HTML file in the data directory. +Return the number of contigs for the specified genome ID. =over 4 -=item fileName +=item genomeID -Fully-qualified name to give to the output file. +ID of the genome whose contig count is desired. + +=item RETURN + +Returns the number of contigs for the specified genome. =back =cut -sub ShowMetaData { +sub ContigCount { # Get the parameters. - my ($self, $fileName) = @_; - # Compute the file name. - my $options = $self->{_options}; - # Call the show method on the underlying ERDB object. - $self->{_erdb}->ShowMetaData($fileName); + my ($self, $genomeID) = @_; + # Get the contig count. + my $retVal = $self->GetCount(['Contig', 'HasContig'], "HasContig(from-link) = ?", [$genomeID]); + # Return the result. + return $retVal; } -=head3 Load - -C<< $sprout->Load($rebuild); >>; - -Load the database from files in the data directory, optionally re-creating the tables. +=head3 GeneMenu -This method always deletes the data from the database before loading, even if the tables are not -re-created. The data is loaded into the relations from files in the data directory either having the -same name as the target relation with no extension or with an extension of C<.dtx>. Files without an -extension are used in preference to the files with an extension. +C<< my $selectHtml = $sprout->GeneMenu(\%attributes, $filterString, \@params, $selected); >> -The files are loaded based on the presumption that each line of the file is a record in the -relation, and the individual fields are delimited by tabs. Tab and new-line characters inside -fields must be represented by the escape sequences C<\t> and C<\n>, respectively. The fields must -be presented in the order given in the relation tables produced by the L method. +Return an HTML select menu of genomes. Each genome will be an option in the menu, +and will be displayed by name with the ID and a contig count attached. The selection +value will be the genome ID. The genomes will be sorted by genus/species name. =over 4 -=item rebuild - -TRUE if the data tables need to be created or re-created, else FALSE - -=item RETURN - -Returns a statistical object containing the number of records read, the number of duplicates found, -the number of errors, and a list of the error messages. - -=back +=item attributes -=cut -#: Return Type %; -sub Load { - # Get the parameters. - my ($self, $rebuild) = @_; - # Get the database object. - my $erdb = $self->{_erdb}; - # Load the tables from the data directory. - my $retVal = $erdb->LoadTables($self->{_options}->{dataDir}, $rebuild); - # Return the statistics. - return $retVal; -} +Reference to a hash mapping attributes to values for the SELECT tag generated. -=head3 LoadUpdate +=item filterString -C<< my %stats = $sprout->LoadUpdate($truncateFlag, \@tableList); >> +A filter string for use in selecting the genomes. The filter string must conform +to the rules for the C<< ERDB->Get >> method. -Load updates to one or more database tables. This method enables the client to make changes to one -or two tables without reloading the whole database. For each table, there must be a corresponding -file in the data directory, either with the same name as the table, or with a C<.dtx> suffix. So, -for example, to make updates to the B relation, there must be a -C file in the data directory. Unlike a full load, files without an extension -are not examined. This allows update files to co-exist with files from an original load. +=item params -=over 4 +Reference to a list of values to be substituted in for the parameter marks in +the filter string. -=item truncateFlag +=item selected (optional) -TRUE if the tables should be rebuilt before loading, else FALSE. A value of TRUE therefore causes -current data and schema of the tables to be replaced, while a value of FALSE means the new data -is added to the existing data in the various relations. +ID of the genome to be initially selected. -=item tableList +=item fast (optional) -List of the tables to be updated. +If specified and TRUE, the contig counts will be omitted to improve performance. =item RETURN -Returns a statistical object containing the number of records read, the number of duplicates found, -the number of errors encountered, and a list of error messages. +Returns an HTML select menu with the specified genomes as selectable options. =back =cut -#: Return Type $%; -sub LoadUpdate { + +sub GeneMenu { # Get the parameters. - my ($self, $truncateFlag, $tableList) = @_; - # Get the database object. - my $erdb = $self->{_erdb}; - # Declare the return value. - my $retVal = Stats->new(); - # Get the data directory. - my $optionTable = $self->{_options}; - my $dataDir = $optionTable->{dataDir}; - # Loop through the incoming table names. - for my $tableName (@{$tableList}) { - # Find the table's file. - my $fileName = "$dataDir/$tableName"; - if (! -e $fileName) { - $fileName = "$fileName.dtx"; - } - # Attempt to load this table. - my $result = $erdb->LoadTable($fileName, $tableName, $truncateFlag); - # Accumulate the resulting statistics. - $retVal->Accumulate($result); + my ($self, $attributes, $filterString, $params, $selected, $fast) = @_; + my $slowMode = ! $fast; + # Default to nothing selected. This prevents an execution warning if "$selected" + # is undefined. + $selected = "" unless defined $selected; + Trace("Gene Menu called with slow mode \"$slowMode\" and selection \"$selected\".") if T(3); + # Start the menu. + my $retVal = "\n"; + # Return the result. return $retVal; } @@ -447,7 +435,7 @@ # Get the parameters. my ($self) = @_; # Create the tables. - $self->{_erdb}->CreateTables; + $self->CreateTables(); } =head3 Genomes @@ -570,7 +558,7 @@ =item RETURN Returns a list of the feature's contig segments. The locations are returned as a list in a list -context and as a space-delimited string in a scalar context. +context and as a comma-delimited string in a scalar context. =back @@ -597,10 +585,15 @@ if ($prevContig eq $contigID && $dir eq $prevDir) { # Here the new segment is in the same direction on the same contig. Insure the # new segment's beginning is next to the old segment's end. - if (($dir eq "-" && $beg == $prevBeg - $prevLen) || - ($dir eq "+" && $beg == $prevBeg + $prevLen)) { - # Here we need to merge two segments. Adjust the beginning and length values - # to include both segments. + if ($dir eq "-" && $beg + $len == $prevBeg) { + # Here we're merging two backward blocks, so we keep the new begin point + # and adjust the length. + $len += $prevLen; + # Pop the old segment off. The new one will replace it later. + pop @retVal; + } elsif ($dir eq "+" && $beg == $prevBeg + $prevLen) { + # Here we need to merge two forward blocks. Adjust the beginning and + # length values to include both segments. $beg = $prevBeg; $len += $prevLen; # Pop the old segment off. The new one will replace it later. @@ -609,11 +602,13 @@ } # Remember this specifier for the adjacent-segment test the next time through. ($prevContig, $prevBeg, $prevDir, $prevLen) = ($contigID, $beg, $dir, $len); + # Compute the initial base pair. + my $start = ($dir eq "+" ? $beg : $beg + $len - 1); # Add the specifier to the list. - push @retVal, "${contigID}_$beg$dir$len"; + push @retVal, "${contigID}_$start$dir$len"; } # Return the list in the format indicated by the context. - return (wantarray ? @retVal : join(' ', @retVal)); + return (wantarray ? @retVal : join(',', @retVal)); } =head3 ParseLocation @@ -644,7 +639,7 @@ shift if UNIVERSAL::isa($_[0],__PACKAGE__); my ($location) = @_; # Parse it into segments. - $location =~ /^(.*)_(\d*)([+-_])(\d*)$/; + $location =~ /^(.+)_(\d+)([+\-_])(\d+)$/; my ($contigID, $start, $dir, $len) = ($1, $2, $3, $4); # If the direction is an underscore, convert it to a + or -. if ($dir eq "_") { @@ -660,14 +655,16 @@ return ($contigID, $start, $dir, $len); } + + =head3 PointLocation C<< my $found = Sprout::PointLocation($location, $point); >> Return the offset into the specified location of the specified point on the contig. If the specified point is before the location, a negative value will be returned. If it is -beyond the location, an undefined value will be returned. It is assumed that the offset -is for the location's contig. The location can either be new-style (using a C<+> or C<-> +beyond the location, an undefined value will be returned. It is assumed that the offset +is for the location's contig. The location can either be new-style (using a C<+> or C<-> and a length) or old-style (using C<_> and start and end positions. =over 4 @@ -720,12 +717,17 @@ should be of the form returned by L when in a list context. In other words, each location is of the form IC<_>III. +For example, the following would return the DNA sequence for contig C<83333.1:NC_000913> +between positions 1401 and 1532, inclusive. + + my $sequence = $sprout->DNASeq('83333.1:NC_000913_1401_1532'); + =over 4 =item locationList -List of location specifiers, each in the form IC<_>III (see -L for more about this format). +List of location specifiers, each in the form IC<_>III or +IC<_>IC<_>I (see L for more about this format). =item RETURN @@ -752,13 +754,15 @@ # the start point is the ending. Note that in the latter case we must reverse the DNA string # before putting it in the return value. my ($start, $stop); + Trace("Parse of \"$location\" is $beg$dir$len.") if T(SDNA => 4); if ($dir eq "+") { $start = $beg; $stop = $beg + $len - 1; } else { - $start = $beg + $len + 1; + $start = $beg - $len + 1; $stop = $beg; } + Trace("Looking for sequences containing $start through $stop.") if T(SDNA => 4); my $query = $self->Get(['IsMadeUpOf','Sequence'], "IsMadeUpOf(from-link) = ? AND IsMadeUpOf(start-position) + IsMadeUpOf(len) > ? AND " . " IsMadeUpOf(start-position) <= ? ORDER BY IsMadeUpOf(start-position)", @@ -770,18 +774,19 @@ $sequence->Values(['IsMadeUpOf(start-position)', 'Sequence(sequence)', 'IsMadeUpOf(len)']); my $stopPosition = $startPosition + $sequenceLength; + Trace("Sequence is from $startPosition to $stopPosition.") if T(SDNA => 4); # Figure out the start point and length of the relevant section. my $pos1 = ($start < $startPosition ? 0 : $start - $startPosition); - my $len = ($stopPosition <= $stop ? $stopPosition : $stop) - $startPosition - $pos1; + my $len1 = ($stopPosition < $stop ? $stopPosition : $stop) + 1 - $startPosition - $pos1; + Trace("Position is $pos1 for length $len1.") if T(SDNA => 4); # Add the relevant data to the location data. - $locationDNA .= substr($sequenceData, $pos1, $len); + $locationDNA .= substr($sequenceData, $pos1, $len1); } # Add this location's data to the return string. Note that we may need to reverse it. if ($dir eq '+') { $retVal .= $locationDNA; } else { - $locationDNA = join('', reverse split //, $locationDNA); - $retVal .= $locationDNA; + $retVal .= FIG::reverse_comp($locationDNA); } } # Return the result. @@ -818,6 +823,128 @@ return @retVal; } +=head3 GenomeLength + +C<< my $length = $sprout->GenomeLength($genomeID); >> + +Return the length of the specified genome in base pairs. + +=over 4 + +=item genomeID + +ID of the genome whose base pair count is desired. + +=item RETURN + +Returns the number of base pairs in all the contigs of the specified +genome. + +=back + +=cut + +sub GenomeLength { + # Get the parameters. + my ($self, $genomeID) = @_; + # Declare the return variable. + my $retVal = 0; + # Get the genome's contig sequence lengths. + my @lens = $self->GetFlat(['HasContig', 'IsMadeUpOf'], 'HasContig(from-link) = ?', + [$genomeID], 'IsMadeUpOf(len)'); + # Sum the lengths. + map { $retVal += $_ } @lens; + # Return the result. + return $retVal; +} + +=head3 FeatureCount + +C<< my $count = $sprout->FeatureCount($genomeID, $type); >> + +Return the number of features of the specified type in the specified genome. + +=over 4 + +=item genomeID + +ID of the genome whose feature count is desired. + +=item type + +Type of feature to count (eg. C, C, etc.). + +=item RETURN + +Returns the number of features of the specified type for the specified genome. + +=back + +=cut + +sub FeatureCount { + # Get the parameters. + my ($self, $genomeID, $type) = @_; + # Compute the count. + my $retVal = $self->GetCount(['HasFeature', 'Feature'], + "HasFeature(from-link) = ? AND Feature(feature-type) = ?", + [$genomeID, $type]); + # Return the result. + return $retVal; +} + +=head3 GenomeAssignments + +C<< my $fidHash = $sprout->GenomeAssignments($genomeID); >> + +Return a list of a genome's assigned features. The return hash will contain each +assigned feature of the genome mapped to the text of its most recent functional +assignment. + +=over 4 + +=item genomeID + +ID of the genome whose functional assignments are desired. + +=item RETURN + +Returns a reference to a hash which maps each feature to its most recent +functional assignment. + +=back + +=cut + +sub GenomeAssignments { + # Get the parameters. + my ($self, $genomeID) = @_; + # Declare the return variable. + my $retVal = {}; + # Query the genome's features and annotations. We'll put the oldest annotations + # first so that the last assignment to go into the hash will be the correct one. + my $query = $self->Get(['HasFeature', 'IsTargetOfAnnotation', 'Annotation'], + "HasFeature(from-link) = ? ORDER BY Annotation(time)", + [$genomeID]); + # Loop through the annotations. + while (my $data = $query->Fetch) { + # Get the feature ID and annotation text. + my ($fid, $annotation) = $data->Values(['HasFeature(to-link)', + 'Annotation(annotation)']); + # Check to see if this is an assignment. Note that the user really + # doesn't matter to us, other than we use it to determine whether or + # not this is an assignment. + my ($user, $assignment) = _ParseAssignment('fig', $annotation); + if ($user) { + # Here it's an assignment. We put it in the return hash, overwriting + # any older assignment that might be present. + $retVal->{$fid} = $assignment; + } + } + # Return the result. + return $retVal; +} + =head3 ContigLength C<< my $length = $sprout->ContigLength($contigID); >> @@ -851,52 +978,100 @@ # Set it from the sequence data, if any. if ($sequence) { my ($start, $len) = $sequence->Values(['IsMadeUpOf(start-position)', 'IsMadeUpOf(len)']); - $retVal = $start + $len; + $retVal = $start + $len - 1; } # Return the result. return $retVal; } -=head3 GenesInRegion +=head3 ClusterPEGs -C<< my (\@featureIDList, $beg, $end) = $sprout->GenesInRegion($contigID, $start, $stop); >> +C<< my $clusteredList = $sprout->ClusterPEGs($sub, \@pegs); >> -List the features which overlap a specified region in a contig. +Cluster the PEGs in a list according to the cluster coding scheme of the specified +subsystem. In order for this to work properly, the subsystem object must have +been used recently to retrieve the PEGs using the B method. +This causes the cluster numbers to be pulled into the subsystem's color hash. +If a PEG is not found in the color hash, it will not appear in the output +sequence. =over 4 -=item contigID - -ID of the contig containing the region of interest. - -=item start +=item sub -Offset of the first residue in the region of interest. +Sprout subsystem object for the relevant subsystem, from the L +method. -=item stop +=item pegs -Offset of the last residue in the region of interest. +Reference to the list of PEGs to be clustered. =item RETURN -Returns a three-element list. The first element is a list of feature IDs for the features that -overlap the region of interest. The second and third elements are the minimum and maximum -locations of the features provided on the specified contig. These may extend outside -the start and stop values. The first element (that is, the list of features) is sorted -roughly by location. +Returns a list of the PEGs, grouped into smaller lists by cluster number. =back =cut -#: Return Type @@; -sub GenesInRegion { +#: Return Type $@@; +sub ClusterPEGs { # Get the parameters. - my ($self, $contigID, $start, $stop) = @_; - # Get the maximum segment length. - my $maximumSegmentLength = $self->MaxSegment; + my ($self, $sub, $pegs) = @_; + # Declare the return variable. + my $retVal = []; + # Loop through the PEGs, creating arrays for each cluster. + for my $pegID (@{$pegs}) { + my $clusterNumber = $sub->get_cluster_number($pegID); + # Only proceed if the PEG is in a cluster. + if ($clusterNumber >= 0) { + # Push this PEG onto the sub-list for the specified cluster number. + push @{$retVal->[$clusterNumber]}, $pegID; + } + } + # Return the result. + return $retVal; +} + +=head3 GenesInRegion + +C<< my (\@featureIDList, $beg, $end) = $sprout->GenesInRegion($contigID, $start, $stop); >> + +List the features which overlap a specified region in a contig. + +=over 4 + +=item contigID + +ID of the contig containing the region of interest. + +=item start + +Offset of the first residue in the region of interest. + +=item stop + +Offset of the last residue in the region of interest. + +=item RETURN + +Returns a three-element list. The first element is a list of feature IDs for the features that +overlap the region of interest. The second and third elements are the minimum and maximum +locations of the features provided on the specified contig. These may extend outside +the start and stop values. The first element (that is, the list of features) is sorted +roughly by location. + +=back + +=cut +#: Return Type @@; +sub GenesInRegion { + # Get the parameters. + my ($self, $contigID, $start, $stop) = @_; + # Get the maximum segment length. + my $maximumSegmentLength = $self->MaxSegment; # Create a hash to receive the feature list. We use a hash so that we can eliminate # duplicates easily. The hash key will be the feature ID. The value will be a two-element - # containing the minimum and maximum offsets. We will use the offsets to sort the results + # containing the minimum and maximum offsets. We will use the offsets to sort the results # when we're building the result set. my %featuresFound = (); # Prime the values we'll use for the returned beginning and end. @@ -1001,7 +1176,7 @@ =head3 FeatureAnnotations -C<< my @descriptors = $sprout->FeatureAnnotations($featureID); >> +C<< my @descriptors = $sprout->FeatureAnnotations($featureID, $rawFlag); >> Return the annotations of a feature. @@ -1011,13 +1186,18 @@ ID of the feature whose annotations are desired. +=item rawFlag + +If TRUE, the annotation timestamps will be returned in raw form; otherwise, they +will be returned in human-readable form. + =item RETURN Returns a list of annotation descriptors. Each descriptor is a hash with the following fields. * B ID of the relevant feature. -* B time the annotation was made, in user-friendly format. +* B time the annotation was made. * B ID of the user who made the annotation @@ -1029,7 +1209,7 @@ #: Return Type @%; sub FeatureAnnotations { # Get the parameters. - my ($self, $featureID) = @_; + my ($self, $featureID, $rawFlag) = @_; # Create a query to get the feature's annotations and the associated users. my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'], "IsTargetOfAnnotation(from-link) = ?", [$featureID]); @@ -1042,9 +1222,13 @@ $annotation->Values(['IsTargetOfAnnotation(from-link)', 'Annotation(time)', 'MadeAnnotation(from-link)', 'Annotation(annotation)']); + # Convert the time, if necessary. + if (! $rawFlag) { + $timeStamp = FriendlyTimestamp($timeStamp); + } # Assemble them into a hash. my $annotationHash = { featureID => $featureID, - timeStamp => FriendlyTimestamp($timeStamp), + timeStamp => $timeStamp, user => $user, text => $text }; # Add it to the return list. push @retVal, $annotationHash; @@ -1059,10 +1243,10 @@ Return all of the functional assignments for a particular feature. The data is returned as a hash of functional assignments to user IDs. A functional assignment is a type of annotation, -Functional assignments are described in the L function. Its worth noting that -we cannot filter on the content of the annotation itself because it's a text field; however, -this is not a big problem because most features only have a small number of annotations. -Finally, if a single user has multiple functional assignments, we will only keep the most +Functional assignments are described in the L function. Its worth noting that +we cannot filter on the content of the annotation itself because it's a text field; however, +this is not a big problem because most features only have a small number of annotations. +Finally, if a single user has multiple functional assignments, we will only keep the most recent one. =over 4 @@ -1073,7 +1257,7 @@ =item RETURN -Returns a hash mapping the functional assignment IDs to user IDs. +Returns a hash mapping the user IDs to functional assignment IDs. =back @@ -1083,28 +1267,25 @@ # Get the parameters. my ($self, $featureID) = @_; # Get all of the feature's annotations. - my @query = $self->GetAll(['IsTargetOfAnnotation', 'Annotation'], + my @query = $self->GetAll(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'], "IsTargetOfAnnotation(from-link) = ?", - [$featureID], ['Annotation(time)', 'Annotation(annotation)']); + [$featureID], ['Annotation(time)', 'Annotation(annotation)', + 'MadeAnnotation(from-link)']); # Declare the return hash. my %retVal; - # Declare a hash for insuring we only make one assignment per user. - my %timeHash = (); # Now we sort the assignments by timestamp in reverse. my @sortedQuery = sort { -($a->[0] <=> $b->[0]) } @query; # Loop until we run out of annotations. for my $annotation (@sortedQuery) { # Get the annotation fields. - my ($timeStamp, $text) = @{$annotation}; + my ($timeStamp, $text, $user) = @{$annotation}; # Check to see if this is a functional assignment. - my ($user, $function) = _ParseAssignment($text); - if ($user && ! exists $timeHash{$user}) { + my ($actualUser, $function) = _ParseAssignment($user, $text); + if ($actualUser && ! exists $retVal{$actualUser}) { # Here it is a functional assignment and there has been no # previous assignment for this user, so we stuff it in the # return hash. - $retVal{$function} = $user; - # Insure we don't assign to this user again. - $timeHash{$user} = 1; + $retVal{$actualUser} = $function; } } # Return the hash of assignments found. @@ -1120,8 +1301,8 @@ The functional assignment is handled differently depending on the type of feature. If the feature is identified by a FIG ID (begins with the string C), then a functional assignment is a type of annotation. The format of an assignment is described in -L. Its worth noting that we cannot filter on the content of the -annotation itself because it's a text field; however, this is not a big problem because +L. Its worth noting that we cannot filter on the content of the +annotation itself because it's a text field; however, this is not a big problem because most features only have a small number of annotations. Each user has an associated list of trusted users. The assignment returned will be the most @@ -1182,20 +1363,22 @@ } } # Build a query for all of the feature's annotations, sorted by date. - my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation'], + my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'], "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC", [$featureID]); my $timeSelected = 0; # Loop until we run out of annotations. while (my $annotation = $query->Fetch()) { # Get the annotation text. - my ($text, $time) = $annotation->Values(['Annotation(annotation)','Annotation(time)']); + my ($text, $time, $user) = $annotation->Values(['Annotation(annotation)', + 'Annotation(time)', 'MadeAnnotation(from-link)']); # Check to see if this is a functional assignment for a trusted user. - my ($user, $function) = _ParseAssignment($text); - if ($user) { + my ($actualUser, $function) = _ParseAssignment($user, $text); + Trace("Assignment user is $actualUser, text is $function.") if T(4); + if ($actualUser) { # Here it is a functional assignment. Check the time and the user # name. The time must be recent and the user must be trusted. - if ((exists $trusteeTable{$user}) && ($time > $timeSelected)) { + if ((exists $trusteeTable{$actualUser}) && ($time > $timeSelected)) { $retVal = $function; $timeSelected = $time; } @@ -1211,6 +1394,78 @@ return $retVal; } +=head3 FunctionsOf + +C<< my @functionList = $sprout->FunctionOf($featureID, $userID); >> + +Return the functional assignments of a particular feature. + +The functional assignment is handled differently depending on the type of feature. If +the feature is identified by a FIG ID (begins with the string C), then a functional +assignment is a type of annotation. The format of an assignment is described in +L. Its worth noting that we cannot filter on the content of the +annotation itself because it's a text field; however, this is not a big problem because +most features only have a small number of annotations. + +If the feature is B identified by a FIG ID, then the functional assignment +information is taken from the B table. If the table does +not contain an entry for the feature, an empty list is returned. + +=over 4 + +=item featureID + +ID of the feature whose functional assignments are desired. + +=item RETURN + +Returns a list of 2-tuples, each consisting of a user ID and the text of an assignment by +that user. + +=back + +=cut +#: Return Type @@; +sub FunctionsOf { + # Get the parameters. + my ($self, $featureID) = @_; + # Declare the return value. + my @retVal = (); + # Determine the ID type. + if ($featureID =~ m/^fig\|/) { + # Here we have a FIG feature ID. We must build the list of trusted + # users. + my %trusteeTable = (); + # Build a query for all of the feature's annotations, sorted by date. + my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'], + "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC", + [$featureID]); + my $timeSelected = 0; + # Loop until we run out of annotations. + while (my $annotation = $query->Fetch()) { + # Get the annotation text. + my ($text, $time, $user) = $annotation->Values(['Annotation(annotation)', + 'Annotation(time)', + 'MadeAnnotation(user)']); + # Check to see if this is a functional assignment for a trusted user. + my ($actualUser, $function) = _ParseAssignment($user, $text); + if ($actualUser) { + # Here it is a functional assignment. + push @retVal, [$actualUser, $function]; + } + } + } else { + # Here we have a non-FIG feature ID. In this case the user ID does not + # matter. We simply get the information from the External Alias Function + # table. + my @assignments = $self->GetEntityValues('ExternalAliasFunc', $featureID, + ['ExternalAliasFunc(func)']); + push @retVal, map { ['master', $_] } @assignments; + } + # Return the assignments found. + return @retVal; +} + =head3 BBHList C<< my $bbhHash = $sprout->BBHList($genomeID, \@featureList); >> @@ -1244,16 +1499,14 @@ my %retVal = (); # Loop through the incoming features. for my $featureID (@{$featureList}) { - # Create a query to get the feature's best hit. - my $query = $self->Get(['IsBidirectionalBestHitOf'], - "IsBidirectionalBestHitOf(from-link) = ? AND IsBidirectionalBestHitOf(genome) = ?", - [$featureID, $genomeID]); - # Look for the best hit. - my $bbh = $query->Fetch; - if ($bbh) { - my ($targetFeature) = $bbh->Value('IsBidirectionalBestHitOf(to-link)'); - $retVal{$featureID} = $targetFeature; + # Ask the server for the feature's best hit. + my @bbhData = FIGRules::BBHData($featureID); + # Peel off the BBHs found. + my @found = (); + for my $bbh (@bbhData) { + push @found, $bbh->[0]; } + $retVal{$featureID} = \@found; } # Return the mapping. return \%retVal; @@ -1265,8 +1518,7 @@ Return a list of the similarities to the specified feature. -Sprout does not support real similarities, so this method just returns the bidirectional -best hits. +This method just returns the bidirectional best hits for performance reasons. =over 4 @@ -1286,10 +1538,7 @@ # Get the parameters. my ($self, $featureID, $count) = @_; # Ask for the best hits. - my @lists = $self->GetAll(['IsBidirectionalBestHitOf'], - "IsBidirectionalBestHitOf(from-link) = ? ORDER BY IsBidirectionalBestHitOf(score) DESC", - [$featureID], ['IsBidirectionalBestHitOf(to-link)', 'IsBidirectionalBestHitOf(score)'], - $count); + my @lists = FIGRules::BBHData($featureID); # Create the return value. my %retVal = (); for my $tuple (@lists) { @@ -1299,8 +1548,6 @@ return %retVal; } - - =head3 IsComplete C<< my $flag = $sprout->IsComplete($genomeID); >> @@ -1331,7 +1578,7 @@ my $genomeData = $self->GetEntity('Genome', $genomeID); if ($genomeData) { # The genome exists, so get the completeness flag. - ($retVal) = $genomeData->Value('complete'); + ($retVal) = $genomeData->Value('Genome(complete)'); } # Return the result. return $retVal; @@ -1371,18 +1618,18 @@ C<< my $genomeID = $sprout->GenomeOf($featureID); >> -Return the genome that contains a specified feature. +Return the genome that contains a specified feature or contig. =over 4 =item featureID -ID of the feature whose genome is desired. +ID of the feature or contig whose genome is desired. =item RETURN -Returns the ID of the genome for the specified feature. If the feature is not found, returns -an undefined value. +Returns the ID of the genome for the specified feature or contig. If the feature or contig is not +found, returns an undefined value. =back @@ -1391,8 +1638,9 @@ sub GenomeOf { # Get the parameters. my ($self, $featureID) = @_; - # Create a query to find the genome associated with the feature. - my $query = $self->Get(['IsLocatedIn', 'HasContig'], "IsLocatedIn(from-link) = ?", [$featureID]); + # Create a query to find the genome associated with the incoming ID. + my $query = $self->Get(['IsLocatedIn', 'HasContig'], "IsLocatedIn(from-link) = ? OR HasContig(to-link) = ?", + [$featureID, $featureID]); # Declare the return value. my $retVal; # Get the genome ID. @@ -1427,6 +1675,7 @@ sub CoupledFeatures { # Get the parameters. my ($self, $featureID) = @_; + Trace("Looking for features coupled to $featureID.") if T(coupling => 3); # Create a query to retrieve the functionally-coupled features. my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'], "ParticipatesInCoupling(from-link) = ?", [$featureID]); @@ -1439,10 +1688,12 @@ # Get the ID and score of the coupling. my ($couplingID, $score) = $clustering->Values(['Coupling(id)', 'Coupling(score)']); - # The coupling ID contains the two feature IDs separated by a space. We use - # this information to find the ID of the other feature. - my ($fid1, $fid2) = split / /, $couplingID; - my $otherFeatureID = ($featureID eq $fid1 ? $fid2 : $fid1); + Trace("$featureID coupled with score $score to ID $couplingID.") if T(coupling => 4); + # Get the other feature that participates in the coupling. + my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'], + "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?", + [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)'); + Trace("$couplingID target feature is $otherFeatureID.") if T(coupling => 4); # Attach the other feature's score to its ID. $retVal{$otherFeatureID} = $score; $found = 1; @@ -1506,19 +1757,22 @@ my ($self, $peg1, $peg2) = @_; # Declare the return variable. my @retVal = (); - # Our first task is to find out the nature of the coupling. + # Our first task is to find out the nature of the coupling: whether or not + # it exists, its score, and whether the features are stored in the same + # order as the ones coming in. my ($couplingID, $inverted, $score) = $self->GetCoupling($peg1, $peg2); # Only proceed if a coupling exists. if ($couplingID) { # Determine the ordering to place on the evidence items. If we're - # inverted, we want to see feature 2 before feature 1; otherwise, - # we want the reverse. + # inverted, we want to see feature 2 before feature 1 (descending); otherwise, + # we want feature 1 before feature 2 (normal). + Trace("Coupling evidence for ($peg1, $peg2) with inversion flag $inverted.") if T(Coupling => 4); my $ordering = ($inverted ? "DESC" : ""); # Get the coupling evidence. my @evidenceList = $self->GetAll(['IsEvidencedBy', 'PCH', 'UsesAsEvidence'], "IsEvidencedBy(from-link) = ? ORDER BY PCH(id), UsesAsEvidence(pos) $ordering", [$couplingID], - ['PCH(used)', 'UsesAsEvidence(pos)']); + ['PCH(used)', 'UsesAsEvidence(to-link)']); # Loop through the evidence items. Each piece of evidence is represented by two # positions in the evidence list, one for each feature on the other side of the # evidence link. If at some point we want to generalize to couplings with @@ -1526,10 +1780,11 @@ while (@evidenceList > 0) { my $peg1Data = shift @evidenceList; my $peg2Data = shift @evidenceList; + Trace("Peg 1 is " . $peg1Data->[1] . " and Peg 2 is " . $peg2Data->[1] . ".") if T(Coupling => 4); push @retVal, [$peg1Data->[1], $peg2Data->[1], $peg1Data->[0]]; } + Trace("Last index in evidence result is is $#retVal.") if T(Coupling => 4); } - # TODO: code # Return the result. return @retVal; } @@ -1571,27 +1826,133 @@ my ($self, $peg1, $peg2) = @_; # Declare the return values. We'll start with the coupling ID and undefine the # flag and score until we have more information. - my ($retVal, $inverted, $score) = (CouplingID($peg1, $peg2), undef, undef); + my ($retVal, $inverted, $score) = ($self->CouplingID($peg1, $peg2), undef, undef); # Find the coupling data. my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'], "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)", - [$retVal], "ParticipatesInCoupling(from-link), Coupling(score)"); + [$retVal], ["ParticipatesInCoupling(from-link)", "Coupling(score)"]); # Check to see if we found anything. if (!@pegs) { + Trace("No coupling found.") if T(Coupling => 4); # No coupling, so undefine the return value. $retVal = undef; } else { # We have a coupling! Get the score and check for inversion. $score = $pegs[0]->[1]; - $inverted = ($pegs[0]->[0] eq $peg1); + my $firstFound = $pegs[0]->[0]; + $inverted = ($firstFound ne $peg1); + Trace("Coupling score is $score. First peg is $firstFound, peg 1 is $peg1.") if T(Coupling => 4); } # Return the result. return ($retVal, $inverted, $score); } +=head3 GetSynonymGroup + +C<< my $id = $sprout->GetSynonymGroup($fid); >> + +Return the synonym group name for the specified feature. + +=over 4 + +=item fid + +ID of the feature whose synonym group is desired. + +=item RETURN + +The name of the synonym group to which the feature belongs. If the feature does +not belong to a synonym group, the feature ID itself is returned. + +=back + +=cut + +sub GetSynonymGroup { + # Get the parameters. + my ($self, $fid) = @_; + # Declare the return variable. + my $retVal; + # Find the synonym group. + my @groups = $self->GetFlat(['IsSynonymGroupFor'], "IsSynonymGroupFor(to-link) = ?", + [$fid], 'IsSynonymGroupFor(from-link)'); + # Check to see if we found anything. + if (@groups) { + $retVal = $groups[0]; + } else { + $retVal = $fid; + } + # Return the result. + return $retVal; +} + +=head3 GetBoundaries + +C<< my ($contig, $beg, $end) = $sprout->GetBoundaries(@locList); >> + +Determine the begin and end boundaries for the locations in a list. All of the +locations must belong to the same contig and have mostly the same direction in +order for this method to produce a meaningful result. The resulting +begin/end pair will contain all of the bases in any of the locations. + +=over 4 + +=item locList + +List of locations to process. + +=item RETURN + +Returns a 3-tuple consisting of the contig ID, the beginning boundary, +and the ending boundary. The beginning boundary will be left of the +end for mostly-forward locations and right of the end for mostly-backward +locations. + +=back + +=cut + +sub GetBoundaries { + # Get the parameters. + my ($self, @locList) = @_; + # Set up the counters used to determine the most popular direction. + my %counts = ( '+' => 0, '-' => 0 ); + # Get the last location and parse it. + my $locObject = BasicLocation->new(pop @locList); + # Prime the loop with its data. + my ($contig, $beg, $end) = ($locObject->Contig, $locObject->Left, $locObject->Right); + # Count its direction. + $counts{$locObject->Dir}++; + # Loop through the remaining locations. Note that in most situations, this loop + # will not iterate at all, because most of the time we will be dealing with a + # singleton list. + for my $loc (@locList) { + # Create a location object. + my $locObject = BasicLocation->new($loc); + # Count the direction. + $counts{$locObject->Dir}++; + # Get the left end and the right end. + my $left = $locObject->Left; + my $right = $locObject->Right; + # Merge them into the return variables. + if ($left < $beg) { + $beg = $left; + } + if ($right > $end) { + $end = $right; + } + } + # If the most common direction is reverse, flip the begin and end markers. + if ($counts{'-'} > $counts{'+'}) { + ($beg, $end) = ($end, $beg); + } + # Return the result. + return ($contig, $beg, $end); +} + =head3 CouplingID -C<< my $couplingID = Sprout::CouplingID($peg1, $peg2); >> +C<< my $couplingID = $sprout->CouplingID($peg1, $peg2); >> Return the coupling ID for a pair of feature IDs. @@ -1624,24 +1985,8 @@ =cut #: Return Type $; sub CouplingID { - return join " ", sort @_; -} - -=head3 GetEntityTypes - -C<< my @entityList = $sprout->GetEntityTypes(); >> - -Return the list of supported entity types. - -=cut -#: Return Type @; -sub GetEntityTypes { - # Get the parameters. - my ($self) = @_; - # Get the underlying database object. - my $erdb = $self->{_erdb}; - # Get its entity type list. - my @retVal = $erdb->GetEntityTypes(); + my ($self, @pegs) = @_; + return $self->DigestKey(join " ", sort @pegs); } =head3 ReadFasta @@ -1688,13 +2033,13 @@ if ($line =~ m/^>\s*(.+?)(\s|\n)/) { # Here we have a new header. Store the current sequence if we have one. if ($id) { - $retVal{$id} = uc $sequence; + $retVal{$id} = lc $sequence; } # Clear the sequence accumulator and save the new ID. ($id, $sequence) = ("$prefix$1", ""); } else { # Here we have a data line, so we add it to the sequence accumulator. - # First, we get the actual data out. Note that we normalize to upper + # First, we get the actual data out. Note that we normalize to lower # case. $line =~ /^\s*(.*?)(\s|\n)/; $sequence .= $1; @@ -1702,7 +2047,7 @@ } # Flush out the last sequence (if any). if ($sequence) { - $retVal{$id} = uc $sequence; + $retVal{$id} = lc $sequence; } # Close the file. close FASTAFILE; @@ -1717,7 +2062,7 @@ Insure that a list of feature locations is in the Sprout format. The Sprout feature location format is I_I where I<*> is C<+> for a forward gene and C<-> for a backward gene. The old format is I_I_I. If a feature is in the new format already, -it will not be changed; otherwise, it will be converted. This method can also be used to +it will not be changed; otherwise, it will be converted. This method can also be used to perform the reverse task-- insuring that all the locations are in the old format. =over 4 @@ -1789,7 +2134,7 @@ # Get the data directory name. my $outputDirectory = $self->{_options}->{dataDir}; # Dump the relations. - $self->{_erdb}->DumpRelations($outputDirectory); + $self->DumpRelations($outputDirectory); } =head3 XMLFileName @@ -1841,7 +2186,7 @@ # Get the parameters. my ($self, $objectType, $fieldHash) = @_; # Call the underlying method. - $self->{_erdb}->InsertObject($objectType, $fieldHash); + $self->InsertObject($objectType, $fieldHash); } =head3 Annotate @@ -2000,40 +2345,6 @@ return @retVal; } -=head3 Exists - -C<< my $found = $sprout->Exists($entityName, $entityID); >> - -Return TRUE if an entity exists, else FALSE. - -=over 4 - -=item entityName - -Name of the entity type (e.g. C) relevant to the existence check. - -=item entityID - -ID of the entity instance whose existence is to be checked. - -=item RETURN - -Returns TRUE if the entity instance exists, else FALSE. - -=back - -=cut -#: Return Type $; -sub Exists { - # Get the parameters. - my ($self, $entityName, $entityID) = @_; - # Check for the entity instance. - my $testInstance = $self->GetEntity($entityName, $entityID); - # Return an existence indicator. - my $retVal = ($testInstance ? 1 : 0); - return $retVal; -} - =head3 FeatureTranslation C<< my $translation = $sprout->FeatureTranslation($featureID); >> @@ -2219,6 +2530,89 @@ return @retVal; } +=head3 GetProperties + +C<< my @list = $sprout->GetProperties($fid, $key, $value, $url); >> + +Return a list of the properties with the specified characteristics. + +Properties are arbitrary key-value pairs associated with a feature. (At some point they +will also be associated with genomes.) A property value is represented by a 4-tuple of +the form B<($fid, $key, $value, $url)>. These exactly correspond to the parameter + +=over 4 + +=item fid + +ID of the feature possessing the property. + +=item key + +Name or key of the property. + +=item value + +Value of the property. + +=item url + +URL of the document that indicated the property should have this particular value, or an +empty string if no such document exists. + +=back + +The parameters act as a filter for the desired data. Any non-null parameter will +automatically match all the tuples returned. So, specifying just the I<$fid> will +return all the properties of the specified feature; similarly, specifying the I<$key> +and I<$value> parameters will return all the features having the specified property +value. + +A single property key can have many values, representing different ideas about the +feature in question. For example, one paper may declare that a feature C is +virulent, and another may declare that it is not virulent. A query about the virulence of +C would be coded as + + my @list = $sprout->GetProperties('fig|83333.1.peg.10', 'virulence', '', ''); + +Here the I<$value> and I<$url> fields are left blank, indicating that those fields are +not to be filtered. The tuples returned would be + + ('fig|83333.1.peg.10', 'virulence', 'yes', 'http://www.somewhere.edu/first.paper.pdf') + ('fig|83333.1.peg.10', 'virulence', 'no', 'http://www.somewhere.edu/second.paper.pdf') + +=cut +#: Return Type @@; +sub GetProperties { + # Get the parameters. + my ($self, @parms) = @_; + # Declare the return variable. + my @retVal = (); + # Now we need to create a WHERE clause that will get us the data we want. First, + # we create a list of the columns containing the data for each parameter. + my @colNames = ('HasProperty(from-link)', 'Property(property-name)', + 'Property(property-value)', 'HasProperty(evidence)'); + # Now we build the WHERE clause and the list of parameter values. + my @where = (); + my @values = (); + for (my $i = 0; $i <= $#colNames; $i++) { + my $parm = $parms[$i]; + if (defined $parm && ($parm ne '')) { + push @where, "$colNames[$i] = ?"; + push @values, $parm; + } + } + # Format the WHERE clause. + my $filter = (@values > 0 ? (join " AND ", @where) : undef); + # Ask for all the propertie values with the desired characteristics. + my $query = $self->Get(['HasProperty', 'Property'], $filter, \@values); + while (my $valueObject = $query->Fetch()) { + my @tuple = $valueObject->Values(\@colNames); + push @retVal, \@tuple; + } + # Return the result. + return @retVal; +} + =head3 FeatureProperties C<< my @properties = $sprout->FeatureProperties($featureID); >> @@ -2413,7 +2807,7 @@ C<< my %subsystems = $sprout->SubsystemsOf($featureID); >> Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped -to the role the feature performs. +to the roles the feature performs. =over 4 @@ -2423,29 +2817,131 @@ =item RETURN -Returns a hash mapping all the feature's subsystems to the feature's role. +Returns a hash mapping all the feature's subsystems to a list of the feature's roles. =back =cut -#: Return Type %; +#: Return Type %@; sub SubsystemsOf { # Get the parameters. my ($self, $featureID) = @_; - # Use the SSCell to connect features to subsystems. + # Get the subsystem list. my @subsystems = $self->GetAll(['ContainsFeature', 'HasSSCell', 'IsRoleOf'], "ContainsFeature(to-link) = ?", [$featureID], ['HasSSCell(from-link)', 'IsRoleOf(from-link)']); # Create the return value. my %retVal = (); + # Build a hash to weed out duplicates. Sometimes the same PEG and role appears + # in two spreadsheet cells. + my %dupHash = (); # Loop through the results, adding them to the hash. for my $record (@subsystems) { - $retVal{$record->[0]} = $record->[1]; + # Get this subsystem and role. + my ($subsys, $role) = @{$record}; + # Insure it's the first time for both. + my $dupKey = "$subsys\n$role"; + if (! exists $dupHash{"$subsys\n$role"}) { + $dupHash{$dupKey} = 1; + push @{$retVal{$subsys}}, $role; + } } # Return the hash. return %retVal; } +=head3 SubsystemList + +C<< my @subsystems = $sprout->SubsystemList($featureID); >> + +Return a list containing the names of the subsystems in which the specified +feature participates. Unlike L, this method only returns the +subsystem names, not the roles. + +=over 4 + +=item featureID + +ID of the feature whose subsystem names are desired. + +=item RETURN + +Returns a list of the names of the subsystems in which the feature participates. + +=back + +=cut +#: Return Type @; +sub SubsystemList { + # Get the parameters. + my ($self, $featureID) = @_; + # Get the list of names. + my @retVal = $self->GetFlat(['ContainsFeature', 'HasSSCell'], "ContainsFeature(to-link) = ?", + [$featureID], 'HasSSCell(from-link)'); + # Return the result. + return @retVal; +} + +=head3 GenomeSubsystemData + +C<< my %featureData = $sprout->GenomeSubsystemData($genomeID); >> + +Return a hash mapping genome features to their subsystem roles. + +=over 4 + +=item genomeID + +ID of the genome whose subsystem feature map is desired. + +=item RETURN + +Returns a hash mapping each feature of the genome to a list of 2-tuples. Eacb +2-tuple contains a subsystem name followed by a role ID. + +=back + +=cut + +sub GenomeSubsystemData { + # Get the parameters. + my ($self, $genomeID) = @_; + # Declare the return variable. + my %retVal = (); + # Get a list of the genome features that participate in subsystems. For each + # feature we get its spreadsheet cells and the corresponding roles. + my @roleData = $self->GetAll(['HasFeature', 'ContainsFeature', 'IsRoleOf'], + "HasFeature(from-link) = ?", [$genomeID], + ['HasFeature(to-link)', 'IsRoleOf(to-link)', 'IsRoleOf(from-link)']); + # Now we get a list of the spreadsheet cells and their associated subsystems. Subsystems + # with an unknown variant code (-1) are skipped. Note the genome ID is at both ends of the + # list. We use it at the beginning to get all the spreadsheet cells for the genome and + # again at the end to filter out participation in subsystems with a negative variant code. + my @cellData = $self->GetAll(['IsGenomeOf', 'HasSSCell', 'ParticipatesIn'], + "IsGenomeOf(from-link) = ? AND ParticipatesIn(variant-code) >= 0 AND ParticipatesIn(from-link) = ?", + [$genomeID, $genomeID], ['HasSSCell(to-link)', 'HasSSCell(from-link)']); + # Now "@roleData" lists the spreadsheet cell and role for each of the genome's features. + # "@cellData" lists the subsystem name for each of the genome's spreadsheet cells. We + # link these two lists together to create the result. First, we want a hash mapping + # spreadsheet cells to subsystem names. + my %subHash = map { $_->[0] => $_->[1] } @cellData; + # We loop through @cellData to build the hash. + for my $roleEntry (@roleData) { + # Get the data for this feature and cell. + my ($fid, $cellID, $role) = @{$roleEntry}; + # Check for a subsystem name. + my $subsys = $subHash{$cellID}; + if ($subsys) { + # Insure this feature has an entry in the return hash. + if (! exists $retVal{$fid}) { $retVal{$fid} = []; } + # Merge in this new data. + push @{$retVal{$fid}}, [$subsys, $role]; + } + } + # Return the result. + return %retVal; +} + =head3 RelatedFeatures C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >> @@ -2481,9 +2977,7 @@ # Get the parameters. my ($self, $featureID, $function, $userID) = @_; # Get a list of the features that are BBHs of the incoming feature. - my @bbhFeatures = $self->GetFlat(['IsBidirectionalBestHitOf'], - "IsBidirectionalBestHitOf(from-link) = ?", [$featureID], - 'IsBidirectionalBestHitOf(to-link)'); + my @bbhFeatures = map { $_->[0] } FIGRules::BBHData($featureID); # Now we loop through the features, pulling out the ones that have the correct # functional assignment. my @retVal = (); @@ -2547,125 +3041,6 @@ return @retVal; } -=head3 GetAll - -C<< my @list = $sprout->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >> - -Return a list of values taken from the objects returned by a query. The first three -parameters correspond to the parameters of the L method. The final parameter is -a list of the fields desired from each record found by the query. The field name -syntax is the standard syntax used for fields in the B system-- -B(I)>-- where I is the name of the relevant entity -or relationship and I is the name of the field. - -The list returned will be a list of lists. Each element of the list will contain -the values returned for the fields specified in the fourth parameter. If one of the -fields specified returns multiple values, they are flattened in with the rest. For -example, the following call will return a list of the features in a particular -spreadsheet cell, and each feature will be represented by a list containing the -feature ID followed by all of its aliases. - -C<< $query = $sprout->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >> - -=over 4 - -=item objectNames - -List containing the names of the entity and relationship objects to be retrieved. - -=item filterClause - -WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can -be parameterized with parameter markers (C). Each field used must be specified in the standard form -B(I)>. Any parameters specified in the filter clause should be added to the -parameter list as additional parameters. The fields in a filter clause can come from primary -entity relations, relationship relations, or secondary entity relations; however, all of the -entities and relationships involved must be included in the list of object names. - -=item parameterList - -List of the parameters to be substituted in for the parameters marks in the filter clause. - -=item fields - -List of the fields to be returned in each element of the list returned. - -=item count - -Maximum number of records to return. If omitted or 0, all available records will be returned. - -=item RETURN - -Returns a list of list references. Each element of the return list contains the values for the -fields specified in the B parameter. - -=back - -=cut -#: Return Type @@; -sub GetAll { - # Get the parameters. - my ($self, $objectNames, $filterClause, $parameterList, $fields, $count) = @_; - # Call the ERDB method. - my @retVal = $self->{_erdb}->GetAll($objectNames, $filterClause, $parameterList, - $fields, $count); - # Return the resulting list. - return @retVal; -} - -=head3 GetFlat - -C<< my @list = $sprout->GetFlat(\@objectNames, $filterClause, $parameterList, $field); >> - -This is a variation of L that asks for only a single field per record and -returns a single flattened list. - -=over 4 - -=item objectNames - -List containing the names of the entity and relationship objects to be retrieved. - -=item filterClause - -WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can -be parameterized with parameter markers (C). Each field used must be specified in the standard form -B(I)>. Any parameters specified in the filter clause should be added to the -parameter list as additional parameters. The fields in a filter clause can come from primary -entity relations, relationship relations, or secondary entity relations; however, all of the -entities and relationships involved must be included in the list of object names. - -=item parameterList - -List of the parameters to be substituted in for the parameters marks in the filter clause. - -=item field - -Name of the field to be used to get the elements of the list returned. - -=item RETURN - -Returns a list of values. - -=back - -=cut -#: Return Type @; -sub GetFlat { - # Get the parameters. - my ($self, $objectNames, $filterClause, $parameterList, $field) = @_; - # Construct the query. - my $query = $self->Get($objectNames, $filterClause, $parameterList); - # Create the result list. - my @retVal = (); - # Loop through the records, adding the field values found to the result list. - while (my $row = $query->Fetch()) { - push @retVal, $row->Value($field); - } - # Return the list created. - return @retVal; -} - =head3 Protein C<< my $protein = Sprout::Protein($sequence, $table); >> @@ -2767,14 +3142,14 @@ # Create the return list, priming it with the name of the data directory. my @retVal = ($self->{_options}->{dataDir}); # Concatenate the table names. - push @retVal, $self->{_erdb}->GetTableNames(); + push @retVal, $self->GetTableNames(); # Return the result. return @retVal; } =head3 LowBBHs -C<< my %bbhMap = $sprout->GoodBBHs($featureID, $cutoff); >> +C<< my %bbhMap = $sprout->LowBBHs($featureID, $cutoff); >> Return the bidirectional best hits of a feature whose score is no greater than a specified cutoff value. A higher cutoff value will allow inclusion of hits with @@ -2803,11 +3178,8 @@ my ($self, $featureID, $cutoff) = @_; # Create the return hash. my %retVal = (); - # Create a query to get the desired BBHs. - my @bbhList = $self->GetAll(['IsBidirectionalBestHitOf'], - 'IsBidirectionalBestHitOf(sc) <= ? AND IsBidirectionalBestHitOf(from-link) = ?', - [$cutoff, $featureID], - ['IsBidirectionalBestHitOf(to-link)', 'IsBidirectionalBestHitOf(sc)']); + # Query for the desired BBHs. + my @bbhList = FIGRules::BBHData($featureID, $cutoff); # Form the results into the return hash. for my $pair (@bbhList) { $retVal{$pair->[0]} = $pair->[1]; @@ -2816,6 +3188,71 @@ return %retVal; } +=head3 Sims + +C<< my $simList = $sprout->Sims($fid, $maxN, $maxP, $select, $max_expand, $filters); >> + +Get a list of similarities for a specified feature. Similarity information is not kept in the +Sprout database; rather, they are retrieved from a network server. The similarities are +returned as B objects. A Sim object is actually a list reference that has been blessed +so that its elements can be accessed by name. + +Similarities can be either raw or expanded. The raw similarities are basic +hits between features with similar DNA. Expanding a raw similarity drags in any +features considered substantially identical. So, for example, if features B, +B, and B are all substatially identical to B, then a raw similarity +B<[C,A]> would be expanded to B<[C,A] [C,A1] [C,A2] [C,A3]>. + +=over 4 + +=item fid + +ID of the feature whose similarities are desired. + +=item maxN + +Maximum number of similarities to return. + +=item maxP + +Minumum allowable similarity score. + +=item select + +Selection criterion: C means only raw similarities are returned; C +means only similarities to FIG features are returned; C means all expanded +similarities are returned; and C means similarities are expanded until the +number of FIG features equals the maximum. + +=item max_expand + +The maximum number of features to expand. + +=item filters + +Reference to a hash containing filter information, or a subroutine that can be +used to filter the sims. + +=item RETURN + +Returns a reference to a list of similarity objects, or C if an error +occurred. + +=back + +=cut + +sub Sims { + # Get the parameters. + my ($self, $fid, $maxN, $maxP, $select, $max_expand, $filters) = @_; + # Create the shim object to test for deleted FIDs. + my $shim = FidCheck->new($self); + # Ask the network for sims. + my $retVal = FIGRules::GetNetworkSims($shim, $fid, {}, $maxN, $maxP, $select, $max_expand, $filters); + # Return the result. + return $retVal; +} + =head3 GetGroups C<< my %groups = $sprout->GetGroups(\@groupList); >> @@ -2864,26 +3301,289 @@ return %retVal; } +=head3 MyGenomes + +C<< my @genomes = Sprout::MyGenomes($dataDir); >> + +Return a list of the genomes to be included in the Sprout. + +This method is provided for use during the Sprout load. It presumes the Genome load file has +already been created. (It will be in the Sprout data directory and called either C +or C.) Essentially, it reads in the Genome load file and strips out the genome +IDs. + +=over 4 + +=item dataDir + +Directory containing the Sprout load files. + +=back + +=cut +#: Return Type @; +sub MyGenomes { + # Get the parameters. + my ($dataDir) = @_; + # Compute the genome file name. + my $genomeFileName = LoadFileName($dataDir, "Genome"); + # Extract the genome IDs from the files. + my @retVal = map { $_ =~ /^(\S+)/; $1 } Tracer::GetFile($genomeFileName); + # Return the result. + return @retVal; +} + +=head3 LoadFileName + +C<< my $fileName = Sprout::LoadFileName($dataDir, $tableName); >> + +Return the name of the load file for the specified table in the specified data +directory. + +=over 4 + +=item dataDir + +Directory containing the Sprout load files. + +=item tableName + +Name of the table whose load file is desired. + +=item RETURN + +Returns the name of the file containing the load data for the specified table, or +C if no load file is present. + +=back + +=cut +#: Return Type $; +sub LoadFileName { + # Get the parameters. + my ($dataDir, $tableName) = @_; + # Declare the return variable. + my $retVal; + # Check for the various file names. + if (-e "$dataDir/$tableName") { + $retVal = "$dataDir/$tableName"; + } elsif (-e "$dataDir/$tableName.dtx") { + $retVal = "$dataDir/$tableName.dtx"; + } + # Return the result. + return $retVal; +} + +=head3 DeleteGenome + +C<< my $stats = $sprout->DeleteGenome($genomeID, $testFlag); >> + +Delete a genome from the database. + +=over 4 + +=item genomeID + +ID of the genome to delete + +=item testFlag + +If TRUE, then the DELETE statements will be traced, but no deletions will occur. + +=item RETURN + +Returns a statistics object describing the rows deleted. + +=back + +=cut +#: Return Type $%; +sub DeleteGenome { + # Get the parameters. + my ($self, $genomeID, $testFlag) = @_; + # Perform the delete for the genome's features. + my $retVal = $self->Delete('Feature', "fig|$genomeID.%", $testFlag); + # Perform the delete for the primary genome data. + my $stats = $self->Delete('Genome', $genomeID, $testFlag); + $retVal->Accumulate($stats); + # Return the result. + return $retVal; +} + +=head3 Fix + +C<< my %fixedHash = Sprout::Fix(%groupHash); >> + +Prepare a genome group hash (like that returned by L for processing. +Groups with the same primary name will be combined. The primary name is the +first capitalized word in the group name. + +=over 4 + +=item groupHash + +Hash to be fixed up. + +=item RETURN + +Returns a fixed-up version of the hash. + +=back + +=cut + +sub Fix { + # Get the parameters. + my (%groupHash) = @_; + # Create the result hash. + my %retVal = (); + # Copy over the genomes. + for my $groupID (keys %groupHash) { + # Make a safety copy of the group ID. + my $realGroupID = $groupID; + # Yank the primary name. + if ($groupID =~ /([A-Z]\w+)/) { + $realGroupID = $1; + } + # Append this group's genomes into the result hash. + Tracer::AddToListMap(\%retVal, $realGroupID, @{$groupHash{$groupID}}); + } + # Return the result hash. + return %retVal; +} + +=head3 GroupPageName + +C<< my $name = $sprout->GroupPageName($group); >> + +Return the name of the page for the specified NMPDR group. + +=over 4 + +=item group + +Name of the relevant group. + +=item RETURN + +Returns the relative page name (e.g. C<../content/campy.php>). If the group file is not in +memory it will be read in. + +=back + +=cut + +sub GroupPageName { + # Get the parameters. + my ($self, $group) = @_; + # Declare the return variable. + my $retVal; + # Check for the group file data. + if (! defined $self->{groupHash}) { + # Read the group file. + my %groupData = Sprout::ReadGroupFile($self->{_options}->{dataDir} . "/groups.tbl"); + # Store it in our object. + $self->{groupHash} = \%groupData; + } + # Compute the real group name. + my $realGroup = $group; + if ($group =~ /([A-Z]\w+)/) { + $realGroup = $1; + } + # Return the page name. + $retVal = "../content/" . $self->{groupHash}->{$realGroup}->[1]; + # Return the result. + return $retVal; +} + +=head3 ReadGroupFile + +C<< my %groupData = Sprout::ReadGroupFile($groupFileName); >> + +Read in the data from the specified group file. The group file contains information +about each of the NMPDR groups. + +=over 4 + +=item name + +Name of the group. + +=item page + +Name of the group's page on the web site (e.g. C for +Campylobacter) + +=item genus + +Genus of the group + +=item species + +Species of the group, or an empty string if the group is for an entire +genus. If the group contains more than one species, the species names +should be separated by commas. + +=back + +The parameters to this method are as follows + +=over 4 + +=item groupFile + +Name of the file containing the group data. + +=item RETURN + +Returns a hash keyed on group name. The value of each hash + +=back + +=cut + +sub ReadGroupFile { + # Get the parameters. + my ($groupFileName) = @_; + # Declare the return variable. + my %retVal; + # Read the group file. + my @groupLines = Tracer::GetFile($groupFileName); + for my $groupLine (@groupLines) { + my ($name, $page, $genus, $species) = split(/\t/, $groupLine); + $retVal{$name} = [$page, $genus, $species]; + } + # Return the result. + return %retVal; +} + =head2 Internal Utility Methods =head3 ParseAssignment Parse annotation text to determine whether or not it is a functional assignment. If it is, -the user, function text, and assigning user will be returned as a 3-element list. If it +the user, function text, and assigning user will be returned as a 3-element list. If it isn't, an empty list will be returned. A functional assignment is always of the form - IC<\nset >IC< function to\n>I - -where I is the B, I is the B, and I is the -actual functional role. In most cases, the user and the assigning user will be the -same, but that is not always the case. + CIC< function to\n>I + +where I is the B, and I is the actual functional role. In most cases, +the user and the assigning user (from MadeAnnotation) will be the same, but that is +not always the case. + +In addition, the functional role may contain extra data that is stripped, such as +terminating spaces or a comment separated from the rest of the text by a tab. This is a static method. =over 4 +=item user + +Name of the assigning user. + =item text Text of the annotation. @@ -2899,15 +3599,22 @@ sub _ParseAssignment { # Get the parameters. - my ($text) = @_; + my ($user, $text) = @_; # Declare the return value. my @retVal = (); # Check to see if this is a functional assignment. - my ($user, $type, $function) = split(/\n/, $text); - if ($type =~ m/^set ([^ ]+) function to$/i) { - # Here it is, so we return the user name (which is in $1), the functional role text, - # and the assigning user. - @retVal = ($1, $function, $user); + my ($type, $function) = split(/\n/, $text); + if ($type =~ m/^set function to$/i) { + # Here we have an assignment without a user, so we use the incoming user ID. + @retVal = ($user, $function); + } elsif ($type =~ m/^set (\S+) function to$/i) { + # Here we have an assignment with a user that is passed back to the caller. + @retVal = ($1, $function); + } + # If we have an assignment, we need to clean the function text. There may be + # extra junk at the end added as a note from the user. + if (defined( $retVal[1] )) { + $retVal[1] =~ s/(\t\S)?\s*$//; } # Return the result list. return @retVal; @@ -2935,7 +3642,7 @@ sub FriendlyTimestamp { my ($timeValue) = @_; - my $retVal = strftime("%a %b %e %H:%M:%S %Y", localtime($timeValue)); + my $retVal = localtime($timeValue); return $retVal; } @@ -2996,4 +3703,5 @@ $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url }); } -1; \ No newline at end of file + +1;