--- Sprout.pm 2006/04/03 18:33:58 1.55 +++ Sprout.pm 2006/06/25 07:34:46 1.74 @@ -1,5 +1,8 @@ package Sprout; + require Exporter; + use ERDB; + @ISA = qw(Exporter ERDB); use Data::Dumper; use strict; use Carp; @@ -7,9 +10,9 @@ use XML::Simple; use DBQuery; use DBObject; - use ERDB; use Tracer; use FIGRules; + use FidCheck; use Stats; use POSIX qw(strftime); @@ -32,6 +35,8 @@ query tasks. For example, L lists the IDs of all the genomes in the database and L returns the DNA sequence for a specified genome location. +The Sprout object is a subclass of the ERDB object and inherits all its properties and methods. + =cut #: Constructor SFXlate->new_sprout_only(); @@ -62,10 +67,12 @@ * B name of the XML file containing the database definition (default C) -* B user name and password, delimited by a slash (default C) +* B user name and password, delimited by a slash (default same as SEED) * B connection port (default C<0>) +* B connection socket (default same as SEED) + * B maximum number of residues per feature segment, (default C<4500>) * B maximum number of residues per sequence, (default C<8000>) @@ -98,6 +105,7 @@ # user name and password port => $FIG_Config::dbport, # database connection port + sock => $FIG_Config::dbsock, maxSegmentLength => 4500, # maximum feature segment length maxSequenceLength => 8000, # maximum contig sequence length noDBOpen => 0, # 1 to suppress the database open @@ -111,16 +119,16 @@ my $dbh; if (! $optionTable->{noDBOpen}) { $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName, - $password, $optionTable->{port}); + $password, $optionTable->{port}, undef, $optionTable->{sock}); } # Create the ERDB object. my $xmlFileName = "$optionTable->{xmlFileName}"; - my $erdb = ERDB->new($dbh, $xmlFileName); - # Create this object. - my $self = { _erdb => $erdb, _options => $optionTable, _xmlName => $xmlFileName }; - # Bless and return it. - bless $self; - return $self; + my $retVal = ERDB::new($class, $dbh, $xmlFileName); + # Add the option table and XML file name. + $retVal->{_options} = $optionTable; + $retVal->{_xmlName} = $xmlFileName; + # Return it. + return $retVal; } =head3 MaxSegment @@ -155,196 +163,6 @@ return $self->{_options}->{maxSequenceLength}; } -=head3 Get - -C<< my $query = $sprout->Get(\@objectNames, $filterClause, \@parameterList); >> - -This method allows a general query against the Sprout data using a specified filter clause. - -The filter is a standard WHERE/ORDER BY clause with question marks as parameter markers and each -field name represented in the form B(I)>. For example, the -following call requests all B objects for the genus specified in the variable -$genus. - -C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ?", [$genus]); >> - -The WHERE clause contains a single question mark, so there is a single additional -parameter representing the parameter value. It would also be possible to code - -C<< $query = $sprout->Get(['Genome'], "Genome(genus) = \'$genus\'"); >> - -however, this version of the call would generate a syntax error if there were any quote -characters inside the variable C<$genus>. - -The use of the strange parenthesized notation for field names enables us to distinguish -hyphens contained within field names from minus signs that participate in the computation -of the WHERE clause. All of the methods that manipulate fields will use this same notation. - -It is possible to specify multiple entity and relationship names in order to retrieve more than -one object's data at the same time, which allows highly complex joined queries. For example, - -C<< $query = $sprout->Get(['Genome', 'ComesFrom', 'Source'], "Genome(genus) = ?", [$genus]); >> - -This query returns all the genomes for a particular genus and allows access to the -sources from which they came. The join clauses to go from Genome to Source are generated -automatically. - -Finally, the filter clause can contain sort information. To do this, simply put an C -clause at the end of the filter. Field references in the ORDER BY section follow the same rules -as they do in the filter itself; in other words, each one must be of the form B(I)>. -For example, the following filter string gets all genomes for a particular genus and sorts -them by species name. - -C<< $query = $sprout->Get(['Genome'], "Genome(genus) = ? ORDER BY Genome(species)", [$genus]); >> - -It is also permissible to specify I an ORDER BY clause. For example, the following invocation gets -all genomes ordered by genus and species. - -C<< $query = $sprout->Get(['Genome'], "ORDER BY Genome(genus), Genome(species)"); >> - -Odd things may happen if one of the ORDER BY fields is in a secondary relation. So, for example, an -attempt to order Bs by alias may (depending on the underlying database engine used) cause -a single feature to appear more than once. - -If multiple names are specified, then the query processor will automatically determine a -join path between the entities and relationships. The algorithm used is very simplistic. -In particular, you can't specify any entity or relationship more than once, and if a -relationship is recursive, the path is determined by the order in which the entity -and the relationship appear. For example, consider a recursive relationship B -which relates B objects to other B objects. If the join path is -coded as C<['People', 'IsParentOf']>, then the people returned will be parents. If, however, -the join path is C<['IsParentOf', 'People']>, then the people returned will be children. - -=over 4 - -=item objectNames - -List containing the names of the entity and relationship objects to be retrieved. - -=item filterClause - -WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can -be parameterized with parameter markers (C). Each field used must be specified in the standard form -B(I)>. Any parameters specified in the filter clause should be added to the -parameter list as additional parameters. The fields in a filter clause can come from primary -entity relations, relationship relations, or secondary entity relations; however, all of the -entities and relationships involved must be included in the list of object names. - -=item parameterList - -List of the parameters to be substituted in for the parameters marks in the filter clause. - -=item RETURN - -Returns a B that can be used to iterate through all of the results. - -=back - -=cut - -sub Get { - # Get the parameters. - my ($self, $objectNames, $filterClause, $parameterList) = @_; - # We differ from the ERDB Get method in that the parameter list is passed in as a list reference - # rather than a list of parameters. The next step is to convert the parameters from a reference - # to a real list. We can only do this if the parameters have been specified. - my @parameters; - if ($parameterList) { @parameters = @{$parameterList}; } - return $self->{_erdb}->Get($objectNames, $filterClause, @parameters); -} - -=head3 GetEntity - -C<< my $entityObject = $sprout->GetEntity($entityType, $ID); >> - -Return an object describing the entity instance with a specified ID. - -=over 4 - -=item entityType - -Entity type name. - -=item ID - -ID of the desired entity. - -=item RETURN - -Returns a B representing the desired entity instance, or an undefined value if no -instance is found with the specified key. - -=back - -=cut - -sub GetEntity { - # Get the parameters. - my ($self, $entityType, $ID) = @_; - # Call the ERDB method. - return $self->{_erdb}->GetEntity($entityType, $ID); -} - -=head3 GetEntityValues - -C<< my @values = GetEntityValues($entityType, $ID, \@fields); >> - -Return a list of values from a specified entity instance. - -=over 4 - -=item entityType - -Entity type name. - -=item ID - -ID of the desired entity. - -=item fields - -List of field names, each of the form IC<(>IC<)>. - -=item RETURN - -Returns a flattened list of the values of the specified fields for the specified entity. - -=back - -=cut -#: Return Type @; -sub GetEntityValues { - # Get the parameters. - my ($self, $entityType, $ID, $fields) = @_; - # Call the ERDB method. - return $self->{_erdb}->GetEntityValues($entityType, $ID, $fields); -} - -=head3 ShowMetaData - -C<< $sprout->ShowMetaData($fileName); >> - -This method outputs a description of the database to an HTML file in the data directory. - -=over 4 - -=item fileName - -Fully-qualified name to give to the output file. - -=back - -=cut - -sub ShowMetaData { - # Get the parameters. - my ($self, $fileName) = @_; - # Compute the file name. - my $options = $self->{_options}; - # Call the show method on the underlying ERDB object. - $self->{_erdb}->ShowMetaData($fileName); -} - =head3 Load C<< $sprout->Load($rebuild); >>; @@ -379,10 +197,8 @@ sub Load { # Get the parameters. my ($self, $rebuild) = @_; - # Get the database object. - my $erdb = $self->{_erdb}; # Load the tables from the data directory. - my $retVal = $erdb->LoadTables($self->{_options}->{dataDir}, $rebuild); + my $retVal = $self->LoadTables($self->{_options}->{dataDir}, $rebuild); # Return the statistics. return $retVal; } @@ -422,8 +238,6 @@ sub LoadUpdate { # Get the parameters. my ($self, $truncateFlag, $tableList) = @_; - # Get the database object. - my $erdb = $self->{_erdb}; # Declare the return value. my $retVal = Stats->new(); # Get the data directory. @@ -437,7 +251,7 @@ Trace("No load file found for $tableName in $dataDir.") if T(0); } else { # Attempt to load this table. - my $result = $erdb->LoadTable($fileName, $tableName, $truncateFlag); + my $result = $self->LoadTable($fileName, $tableName, $truncateFlag); # Accumulate the resulting statistics. $retVal->Accumulate($result); } @@ -446,6 +260,142 @@ return $retVal; } +=head3 GenomeCounts + +C<< my ($arch, $bact, $euk, $vir, $env, $unk) = $sprout->GenomeCounts($complete); >> + +Count the number of genomes in each domain. If I<$complete> is TRUE, only complete +genomes will be included in the counts. + +=over 4 + +=item complete + +TRUE if only complete genomes are to be counted, FALSE if all genomes are to be +counted + +=item RETURN + +A six-element list containing the number of genomes in each of six categories-- +Archaea, Bacteria, Eukaryota, Viral, Environmental, and Unknown, respectively. + +=back + +=cut + +sub GenomeCounts { + # Get the parameters. + my ($self, $complete) = @_; + # Set the filter based on the completeness flag. + my $filter = ($complete ? "Genome(complete) = 1" : ""); + # Get all the genomes and the related taxonomy information. + my @genomes = $self->GetAll(['Genome'], $filter, [], ['Genome(id)', 'Genome(taxonomy)']); + # Clear the counters. + my ($arch, $bact, $euk, $vir, $env, $unk) = (0, 0, 0, 0, 0, 0); + # Loop through, counting the domains. + for my $genome (@genomes) { + if ($genome->[1] =~ /^archaea/i) { ++$arch } + elsif ($genome->[1] =~ /^bacter/i) { ++$bact } + elsif ($genome->[1] =~ /^eukar/i) { ++$euk } + elsif ($genome->[1] =~ /^vir/i) { ++$vir } + elsif ($genome->[1] =~ /^env/i) { ++$env } + else { ++$unk } + } + # Return the counts. + return ($arch, $bact, $euk, $vir, $env, $unk); +} + +=head3 ContigCount + +C<< my $count = $sprout->ContigCount($genomeID); >> + +Return the number of contigs for the specified genome ID. + +=over 4 + +=item genomeID + +ID of the genome whose contig count is desired. + +=item RETURN + +Returns the number of contigs for the specified genome. + +=back + +=cut + +sub ContigCount { + # Get the parameters. + my ($self, $genomeID) = @_; + # Get the contig count. + my $retVal = $self->GetCount(['Contig', 'HasContig'], "HasContig(from-link) = ?", [$genomeID]); + # Return the result. + return $retVal; +} + +=head3 GeneMenu + +C<< my $selectHtml = $sprout->GeneMenu(\%attributes, $filterString, \@params); >> + +Return an HTML select menu of genomes. Each genome will be an option in the menu, +and will be displayed by name with the ID and a contig count attached. The selection +value will be the genome ID. The genomes will be sorted by genus/species name. + +=over 4 + +=item attributes + +Reference to a hash mapping attributes to values for the SELECT tag generated. + +=item filterString + +A filter string for use in selecting the genomes. The filter string must conform +to the rules for the C<< ERDB->Get >> method. + +=item params + +Reference to a list of values to be substituted in for the parameter marks in +the filter string. + +=item RETURN + +Returns an HTML select menu with the specified genomes as selectable options. + +=back + +=cut + +sub GeneMenu { + # Get the parameters. + my ($self, $attributes, $filterString, $params) = @_; + # Start the menu. + my $retVal = "\n"; + # Return the result. + return $retVal; +} =head3 Build C<< $sprout->Build(); >> @@ -460,7 +410,7 @@ # Get the parameters. my ($self) = @_; # Create the tables. - $self->{_erdb}->CreateTables; + $self->CreateTables(); } =head3 Genomes @@ -846,6 +796,128 @@ return @retVal; } +=head3 GenomeLength + +C<< my $length = $sprout->GenomeLength($genomeID); >> + +Return the length of the specified genome in base pairs. + +=over 4 + +=item genomeID + +ID of the genome whose base pair count is desired. + +=item RETURN + +Returns the number of base pairs in all the contigs of the specified +genome. + +=back + +=cut + +sub GenomeLength { + # Get the parameters. + my ($self, $genomeID) = @_; + # Declare the return variable. + my $retVal = 0; + # Get the genome's contig sequence lengths. + my @lens = $self->GetFlat(['HasContig', 'IsMadeUpOf'], 'HasContig(from-link) = ?', + [$genomeID], 'IsMadeUpOf(len)'); + # Sum the lengths. + map { $retVal += $_ } @lens; + # Return the result. + return $retVal; +} + +=head3 FeatureCount + +C<< my $count = $sprout->FeatureCount($genomeID, $type); >> + +Return the number of features of the specified type in the specified genome. + +=over 4 + +=item genomeID + +ID of the genome whose feature count is desired. + +=item type + +Type of feature to count (eg. C, C, etc.). + +=item RETURN + +Returns the number of features of the specified type for the specified genome. + +=back + +=cut + +sub FeatureCount { + # Get the parameters. + my ($self, $genomeID, $type) = @_; + # Compute the count. + my $retVal = $self->GetCount(['HasFeature', 'Feature'], + "HasFeature(from-link) = ? AND Feature(feature-type) = ?", + [$genomeID, $type]); + # Return the result. + return $retVal; +} + +=head3 GenomeAssignments + +C<< my $fidHash = $sprout->GenomeAssignments($genomeID); >> + +Return a list of a genome's assigned features. The return hash will contain each +assigned feature of the genome mapped to the text of its most recent functional +assignment. + +=over 4 + +=item genomeID + +ID of the genome whose functional assignments are desired. + +=item RETURN + +Returns a reference to a hash which maps each feature to its most recent +functional assignment. + +=back + +=cut + +sub GenomeAssignments { + # Get the parameters. + my ($self, $genomeID) = @_; + # Declare the return variable. + my $retVal = {}; + # Query the genome's features and annotations. We'll put the oldest annotations + # first so that the last assignment to go into the hash will be the correct one. + my $query = $self->Get(['HasFeature', 'IsTargetOfAnnotation', 'Annotation'], + "HasFeature(from-link) = ? ORDER BY Annotation(time)", + [$genomeID]); + # Loop through the annotations. + while (my $data = $query->Fetch) { + # Get the feature ID and annotation text. + my ($fid, $annotation) = $data->Values(['HasFeature(to-link)', + 'Annotation(annotation)']); + # Check to see if this is an assignment. Note that the user really + # doesn't matter to us, other than we use it to determine whether or + # not this is an assignment. + my ($user, $assignment) = _ParseAssignment('fig', $annotation); + if ($user) { + # Here it's an assignment. We put it in the return hash, overwriting + # any older assignment that might be present. + $retVal->{$fid} = $assignment; + } + } + # Return the result. + return $retVal; +} + =head3 ContigLength C<< my $length = $sprout->ContigLength($contigID); >> @@ -1527,18 +1599,18 @@ C<< my $genomeID = $sprout->GenomeOf($featureID); >> -Return the genome that contains a specified feature. +Return the genome that contains a specified feature or contig. =over 4 =item featureID -ID of the feature whose genome is desired. +ID of the feature or contig whose genome is desired. =item RETURN -Returns the ID of the genome for the specified feature. If the feature is not found, returns -an undefined value. +Returns the ID of the genome for the specified feature or contig. If the feature or contig is not +found, returns an undefined value. =back @@ -1547,8 +1619,9 @@ sub GenomeOf { # Get the parameters. my ($self, $featureID) = @_; - # Create a query to find the genome associated with the feature. - my $query = $self->Get(['IsLocatedIn', 'HasContig'], "IsLocatedIn(from-link) = ?", [$featureID]); + # Create a query to find the genome associated with the incoming ID. + my $query = $self->Get(['IsLocatedIn', 'HasContig'], "IsLocatedIn(from-link) = ? OR HasContig(to-link) = ?", + [$featureID, $featureID]); # Declare the return value. my $retVal; # Get the genome ID. @@ -1595,10 +1668,10 @@ # Get the ID and score of the coupling. my ($couplingID, $score) = $clustering->Values(['Coupling(id)', 'Coupling(score)']); - # The coupling ID contains the two feature IDs separated by a space. We use - # this information to find the ID of the other feature. - my ($fid1, $fid2) = split / /, $couplingID; - my $otherFeatureID = ($featureID eq $fid1 ? $fid2 : $fid1); + # Get the other feature that participates in the coupling. + my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'], + "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?", + [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)'); # Attach the other feature's score to its ID. $retVal{$otherFeatureID} = $score; $found = 1; @@ -1731,7 +1804,7 @@ my ($self, $peg1, $peg2) = @_; # Declare the return values. We'll start with the coupling ID and undefine the # flag and score until we have more information. - my ($retVal, $inverted, $score) = (CouplingID($peg1, $peg2), undef, undef); + my ($retVal, $inverted, $score) = ($self->CouplingID($peg1, $peg2), undef, undef); # Find the coupling data. my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'], "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)", @@ -1752,9 +1825,48 @@ return ($retVal, $inverted, $score); } +=head3 GetSynonymGroup + +C<< my $id = $sprout->GetSynonymGroup($fid); >> + +Return the synonym group name for the specified feature. + +=over 4 + +=item fid + +ID of the feature whose synonym group is desired. + +=item RETURN + +The name of the synonym group to which the feature belongs. If the feature does +not belong to a synonym group, the feature ID itself is returned. + +=back + +=cut + +sub GetSynonymGroup { + # Get the parameters. + my ($self, $fid) = @_; + # Declare the return variable. + my $retVal; + # Find the synonym group. + my @groups = $self->GetFlat(['IsSynonymGroupFor'], "IsSynonymGroupFor(to-link) = ?", + [$fid], 'IsSynonymGroupFor(from-link)'); + # Check to see if we found anything. + if (@groups) { + $retVal = $groups[0]; + } else { + $retVal = $fid; + } + # Return the result. + return $retVal; +} + =head3 CouplingID -C<< my $couplingID = Sprout::CouplingID($peg1, $peg2); >> +C<< my $couplingID = $sprout->CouplingID($peg1, $peg2); >> Return the coupling ID for a pair of feature IDs. @@ -1787,24 +1899,8 @@ =cut #: Return Type $; sub CouplingID { - return join " ", sort @_; -} - -=head3 GetEntityTypes - -C<< my @entityList = $sprout->GetEntityTypes(); >> - -Return the list of supported entity types. - -=cut -#: Return Type @; -sub GetEntityTypes { - # Get the parameters. - my ($self) = @_; - # Get the underlying database object. - my $erdb = $self->{_erdb}; - # Get its entity type list. - my @retVal = $erdb->GetEntityTypes(); + my ($self, @pegs) = @_; + return $self->DigestKey(join " ", sort @pegs); } =head3 ReadFasta @@ -1952,7 +2048,7 @@ # Get the data directory name. my $outputDirectory = $self->{_options}->{dataDir}; # Dump the relations. - $self->{_erdb}->DumpRelations($outputDirectory); + $self->DumpRelations($outputDirectory); } =head3 XMLFileName @@ -2004,7 +2100,7 @@ # Get the parameters. my ($self, $objectType, $fieldHash) = @_; # Call the underlying method. - $self->{_erdb}->InsertObject($objectType, $fieldHash); + $self->InsertObject($objectType, $fieldHash); } =head3 Annotate @@ -2163,41 +2259,6 @@ return @retVal; } -=head3 Exists - -C<< my $found = $sprout->Exists($entityName, $entityID); >> - -Return TRUE if an entity exists, else FALSE. - -=over 4 - -=item entityName - -Name of the entity type (e.g. C) relevant to the existence check. - -=item entityID - -ID of the entity instance whose existence is to be checked. - -=item RETURN - -Returns TRUE if the entity instance exists, else FALSE. - -=back - -=cut -#: Return Type $; -sub Exists { - # Get the parameters. - my ($self, $entityName, $entityID) = @_; - # Check for the entity instance. - Trace("Checking existence of $entityName with ID=$entityID.") if T(4); - my $testInstance = $self->GetEntity($entityName, $entityID); - # Return an existence indicator. - my $retVal = ($testInstance ? 1 : 0); - return $retVal; -} - =head3 FeatureTranslation C<< my $translation = $sprout->FeatureTranslation($featureID); >> @@ -2836,125 +2897,6 @@ return @retVal; } -=head3 GetAll - -C<< my @list = $sprout->GetAll(\@objectNames, $filterClause, \@parameters, \@fields, $count); >> - -Return a list of values taken from the objects returned by a query. The first three -parameters correspond to the parameters of the L method. The final parameter is -a list of the fields desired from each record found by the query. The field name -syntax is the standard syntax used for fields in the B system-- -B(I)>-- where I is the name of the relevant entity -or relationship and I is the name of the field. - -The list returned will be a list of lists. Each element of the list will contain -the values returned for the fields specified in the fourth parameter. If one of the -fields specified returns multiple values, they are flattened in with the rest. For -example, the following call will return a list of the features in a particular -spreadsheet cell, and each feature will be represented by a list containing the -feature ID followed by all of its aliases. - -C<< $query = $sprout->Get(['ContainsFeature', 'Feature'], "ContainsFeature(from-link) = ?", [$ssCellID], ['Feature(id)', 'Feature(alias)']); >> - -=over 4 - -=item objectNames - -List containing the names of the entity and relationship objects to be retrieved. - -=item filterClause - -WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can -be parameterized with parameter markers (C). Each field used must be specified in the standard form -B(I)>. Any parameters specified in the filter clause should be added to the -parameter list as additional parameters. The fields in a filter clause can come from primary -entity relations, relationship relations, or secondary entity relations; however, all of the -entities and relationships involved must be included in the list of object names. - -=item parameterList - -List of the parameters to be substituted in for the parameters marks in the filter clause. - -=item fields - -List of the fields to be returned in each element of the list returned. - -=item count - -Maximum number of records to return. If omitted or 0, all available records will be returned. - -=item RETURN - -Returns a list of list references. Each element of the return list contains the values for the -fields specified in the B parameter. - -=back - -=cut -#: Return Type @@; -sub GetAll { - # Get the parameters. - my ($self, $objectNames, $filterClause, $parameterList, $fields, $count) = @_; - # Call the ERDB method. - my @retVal = $self->{_erdb}->GetAll($objectNames, $filterClause, $parameterList, - $fields, $count); - # Return the resulting list. - return @retVal; -} - -=head3 GetFlat - -C<< my @list = $sprout->GetFlat(\@objectNames, $filterClause, $parameterList, $field); >> - -This is a variation of L that asks for only a single field per record and -returns a single flattened list. - -=over 4 - -=item objectNames - -List containing the names of the entity and relationship objects to be retrieved. - -=item filterClause - -WHERE/ORDER BY clause (without the WHERE) to be used to filter and sort the query. The WHERE clause can -be parameterized with parameter markers (C). Each field used must be specified in the standard form -B(I)>. Any parameters specified in the filter clause should be added to the -parameter list as additional parameters. The fields in a filter clause can come from primary -entity relations, relationship relations, or secondary entity relations; however, all of the -entities and relationships involved must be included in the list of object names. - -=item parameterList - -List of the parameters to be substituted in for the parameters marks in the filter clause. - -=item field - -Name of the field to be used to get the elements of the list returned. - -=item RETURN - -Returns a list of values. - -=back - -=cut -#: Return Type @; -sub GetFlat { - # Get the parameters. - my ($self, $objectNames, $filterClause, $parameterList, $field) = @_; - # Construct the query. - my $query = $self->Get($objectNames, $filterClause, $parameterList); - # Create the result list. - my @retVal = (); - # Loop through the records, adding the field values found to the result list. - while (my $row = $query->Fetch()) { - push @retVal, $row->Value($field); - } - # Return the list created. - return @retVal; -} - =head3 Protein C<< my $protein = Sprout::Protein($sequence, $table); >> @@ -3056,14 +2998,14 @@ # Create the return list, priming it with the name of the data directory. my @retVal = ($self->{_options}->{dataDir}); # Concatenate the table names. - push @retVal, $self->{_erdb}->GetTableNames(); + push @retVal, $self->GetTableNames(); # Return the result. return @retVal; } =head3 LowBBHs -C<< my %bbhMap = $sprout->GoodBBHs($featureID, $cutoff); >> +C<< my %bbhMap = $sprout->LowBBHs($featureID, $cutoff); >> Return the bidirectional best hits of a feature whose score is no greater than a specified cutoff value. A higher cutoff value will allow inclusion of hits with @@ -3105,6 +3047,71 @@ return %retVal; } +=head3 Sims + +C<< my $simList = $sprout->Sims($fid, $maxN, $maxP, $select, $max_expand, $filters); >> + +Get a list of similarities for a specified feature. Similarity information is not kept in the +Sprout database; rather, they are retrieved from a network server. The similarities are +returned as B objects. A Sim object is actually a list reference that has been blessed +so that its elements can be accessed by name. + +Similarities can be either raw or expanded. The raw similarities are basic +hits between features with similar DNA. Expanding a raw similarity drags in any +features considered substantially identical. So, for example, if features B, +B, and B are all substatially identical to B, then a raw similarity +B<[C,A]> would be expanded to B<[C,A] [C,A1] [C,A2] [C,A3]>. + +=over 4 + +=item fid + +ID of the feature whose similarities are desired. + +=item maxN + +Maximum number of similarities to return. + +=item maxP + +Minumum allowable similarity score. + +=item select + +Selection criterion: C means only raw similarities are returned; C +means only similarities to FIG features are returned; C means all expanded +similarities are returned; and C means similarities are expanded until the +number of FIG features equals the maximum. + +=item max_expand + +The maximum number of features to expand. + +=item filters + +Reference to a hash containing filter information, or a subroutine that can be +used to filter the sims. + +=item RETURN + +Returns a reference to a list of similarity objects, or C if an error +occurred. + +=back + +=cut + +sub Sims { + # Get the parameters. + my ($self, $fid, $maxN, $maxP, $select, $max_expand, $filters) = @_; + # Create the shim object to test for deleted FIDs. + my $shim = FidCheck->new($self); + # Ask the network for sims. + my $retVal = FIGRules::GetNetworkSims($shim, $fid, {}, $maxN, $maxP, $select, $max_expand, $filters); + # Return the result. + return $retVal; +} + =head3 GetGroups C<< my %groups = $sprout->GetGroups(\@groupList); >> @@ -3254,9 +3261,9 @@ # Get the parameters. my ($self, $genomeID, $testFlag) = @_; # Perform the delete for the genome's features. - my $retVal = $self->{_erdb}->Delete('Feature', "fig|$genomeID.%", $testFlag); + my $retVal = $self->Delete('Feature', "fig|$genomeID.%", $testFlag); # Perform the delete for the primary genome data. - my $stats = $self->{_erdb}->Delete('Genome', $genomeID, $testFlag); + my $stats = $self->Delete('Genome', $genomeID, $testFlag); $retVal->Accumulate($stats); # Return the result. return $retVal;