--- Sprout.pm 2005/06/28 21:34:15 1.18 +++ Sprout.pm 2005/10/12 03:12:24 1.40 @@ -70,6 +70,8 @@ * B maximum number of residues per sequence, (default C<8000>) +* B suppresses the connection to the database if TRUE, else FALSE + =back For example, the following constructor call specifies a database named I and a user name of @@ -98,6 +100,7 @@ # database connection port maxSegmentLength => 4500, # maximum feature segment length maxSequenceLength => 8000, # maximum contig sequence length + noDBOpen => 0, # 1 to suppress the database open }, $options); # Get the data directory. my $dataDir = $optionTable->{dataDir}; @@ -105,7 +108,11 @@ $optionTable->{userData} =~ m!([^/]*)/(.*)$!; my ($userName, $password) = ($1, $2); # Connect to the database. - my $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName, $password, $optionTable->{port}); + my $dbh; + if (! $optionTable->{noDBOpen}) { + $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName, + $password, $optionTable->{port}); + } # Create the ERDB object. my $xmlFileName = "$optionTable->{xmlFileName}"; my $erdb = ERDB->new($dbh, $xmlFileName); @@ -576,7 +583,7 @@ =item RETURN Returns a list of the feature's contig segments. The locations are returned as a list in a list -context and as a space-delimited string in a scalar context. +context and as a comma-delimited string in a scalar context. =back @@ -615,11 +622,13 @@ } # Remember this specifier for the adjacent-segment test the next time through. ($prevContig, $prevBeg, $prevDir, $prevLen) = ($contigID, $beg, $dir, $len); + # Compute the initial base pair. + my $start = ($dir eq "+" ? $beg : $beg + $len - 1); # Add the specifier to the list. - push @retVal, "${contigID}_$beg$dir$len"; + push @retVal, "${contigID}_$start$dir$len"; } # Return the list in the format indicated by the context. - return (wantarray ? @retVal : join(' ', @retVal)); + return (wantarray ? @retVal : join(',', @retVal)); } =head3 ParseLocation @@ -650,7 +659,7 @@ shift if UNIVERSAL::isa($_[0],__PACKAGE__); my ($location) = @_; # Parse it into segments. - $location =~ /^(.*)_(\d*)([+-_])(\d*)$/; + $location =~ /^(.+)_(\d+)([+\-_])(\d+)$/; my ($contigID, $start, $dir, $len) = ($1, $2, $3, $4); # If the direction is an underscore, convert it to a + or -. if ($dir eq "_") { @@ -672,8 +681,8 @@ Return the offset into the specified location of the specified point on the contig. If the specified point is before the location, a negative value will be returned. If it is -beyond the location, an undefined value will be returned. It is assumed that the offset -is for the location's contig. The location can either be new-style (using a C<+> or C<-> +beyond the location, an undefined value will be returned. It is assumed that the offset +is for the location's contig. The location can either be new-style (using a C<+> or C<-> and a length) or old-style (using C<_> and start and end positions. =over 4 @@ -758,13 +767,15 @@ # the start point is the ending. Note that in the latter case we must reverse the DNA string # before putting it in the return value. my ($start, $stop); + Trace("Parse of \"$location\" is $beg$dir$len.") if T(SDNA => 4); if ($dir eq "+") { $start = $beg; $stop = $beg + $len - 1; } else { - $start = $beg + $len + 1; + $start = $beg - $len + 1; $stop = $beg; } + Trace("Looking for sequences containing $start through $stop.") if T(SDNA => 4); my $query = $self->Get(['IsMadeUpOf','Sequence'], "IsMadeUpOf(from-link) = ? AND IsMadeUpOf(start-position) + IsMadeUpOf(len) > ? AND " . " IsMadeUpOf(start-position) <= ? ORDER BY IsMadeUpOf(start-position)", @@ -776,18 +787,19 @@ $sequence->Values(['IsMadeUpOf(start-position)', 'Sequence(sequence)', 'IsMadeUpOf(len)']); my $stopPosition = $startPosition + $sequenceLength; + Trace("Sequence is from $startPosition to $stopPosition.") if T(SDNA => 4); # Figure out the start point and length of the relevant section. my $pos1 = ($start < $startPosition ? 0 : $start - $startPosition); - my $len = ($stopPosition <= $stop ? $stopPosition : $stop) - $startPosition - $pos1; + my $len1 = ($stopPosition < $stop ? $stopPosition : $stop) + 1 - $startPosition - $pos1; + Trace("Position is $pos1 for length $len1.") if T(SDNA => 4); # Add the relevant data to the location data. - $locationDNA .= substr($sequenceData, $pos1, $len); + $locationDNA .= substr($sequenceData, $pos1, $len1); } # Add this location's data to the return string. Note that we may need to reverse it. if ($dir eq '+') { $retVal .= $locationDNA; } else { - $locationDNA = join('', reverse split //, $locationDNA); - $retVal .= $locationDNA; + $retVal .= FIG::reverse_comp($locationDNA); } } # Return the result. @@ -857,7 +869,55 @@ # Set it from the sequence data, if any. if ($sequence) { my ($start, $len) = $sequence->Values(['IsMadeUpOf(start-position)', 'IsMadeUpOf(len)']); - $retVal = $start + $len; + $retVal = $start + $len - 1; + } + # Return the result. + return $retVal; +} + +=head3 ClusterPEGs + +C<< my $clusteredList = $sprout->ClusterPEGs($sub, \@pegs); >> + +Cluster the PEGs in a list according to the cluster coding scheme of the specified +subsystem. In order for this to work properly, the subsystem object must have +been used recently to retrieve the PEGs using the B method. +This causes the cluster numbers to be pulled into the subsystem's color hash. +If a PEG is not found in the color hash, it will not appear in the output +sequence. + +=over 4 + +=item sub + +Sprout subsystem object for the relevant subsystem, from the L +method. + +=item pegs + +Reference to the list of PEGs to be clustered. + +=item RETURN + +Returns a list of the PEGs, grouped into smaller lists by cluster number. + +=back + +=cut +#: Return Type $@@; +sub ClusterPEGs { + # Get the parameters. + my ($self, $sub, $pegs) = @_; + # Declare the return variable. + my $retVal = []; + # Loop through the PEGs, creating arrays for each cluster. + for my $pegID (@{$pegs}) { + my $clusterNumber = $sub->get_cluster_number($pegID); + # Only proceed if the PEG is in a cluster. + if ($clusterNumber >= 0) { + # Push this PEG onto the sub-list for the specified cluster number. + push @{$retVal->[$clusterNumber]}, $pegID; + } } # Return the result. return $retVal; @@ -902,7 +962,7 @@ my $maximumSegmentLength = $self->MaxSegment; # Create a hash to receive the feature list. We use a hash so that we can eliminate # duplicates easily. The hash key will be the feature ID. The value will be a two-element - # containing the minimum and maximum offsets. We will use the offsets to sort the results + # containing the minimum and maximum offsets. We will use the offsets to sort the results # when we're building the result set. my %featuresFound = (); # Prime the values we'll use for the returned beginning and end. @@ -1007,7 +1067,7 @@ =head3 FeatureAnnotations -C<< my @descriptors = $sprout->FeatureAnnotations($featureID); >> +C<< my @descriptors = $sprout->FeatureAnnotations($featureID, $rawFlag); >> Return the annotations of a feature. @@ -1017,13 +1077,18 @@ ID of the feature whose annotations are desired. +=item rawFlag + +If TRUE, the annotation timestamps will be returned in raw form; otherwise, they +will be returned in human-readable form. + =item RETURN Returns a list of annotation descriptors. Each descriptor is a hash with the following fields. * B ID of the relevant feature. -* B time the annotation was made, in user-friendly format. +* B time the annotation was made. * B ID of the user who made the annotation @@ -1035,7 +1100,7 @@ #: Return Type @%; sub FeatureAnnotations { # Get the parameters. - my ($self, $featureID) = @_; + my ($self, $featureID, $rawFlag) = @_; # Create a query to get the feature's annotations and the associated users. my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'], "IsTargetOfAnnotation(from-link) = ?", [$featureID]); @@ -1048,9 +1113,13 @@ $annotation->Values(['IsTargetOfAnnotation(from-link)', 'Annotation(time)', 'MadeAnnotation(from-link)', 'Annotation(annotation)']); + # Convert the time, if necessary. + if (! $rawFlag) { + $timeStamp = FriendlyTimestamp($timeStamp); + } # Assemble them into a hash. my $annotationHash = { featureID => $featureID, - timeStamp => FriendlyTimestamp($timeStamp), + timeStamp => $timeStamp, user => $user, text => $text }; # Add it to the return list. push @retVal, $annotationHash; @@ -1065,10 +1134,10 @@ Return all of the functional assignments for a particular feature. The data is returned as a hash of functional assignments to user IDs. A functional assignment is a type of annotation, -Functional assignments are described in the L function. Its worth noting that -we cannot filter on the content of the annotation itself because it's a text field; however, -this is not a big problem because most features only have a small number of annotations. -Finally, if a single user has multiple functional assignments, we will only keep the most +Functional assignments are described in the L function. Its worth noting that +we cannot filter on the content of the annotation itself because it's a text field; however, +this is not a big problem because most features only have a small number of annotations. +Finally, if a single user has multiple functional assignments, we will only keep the most recent one. =over 4 @@ -1126,8 +1195,8 @@ The functional assignment is handled differently depending on the type of feature. If the feature is identified by a FIG ID (begins with the string C), then a functional assignment is a type of annotation. The format of an assignment is described in -L. Its worth noting that we cannot filter on the content of the -annotation itself because it's a text field; however, this is not a big problem because +L. Its worth noting that we cannot filter on the content of the +annotation itself because it's a text field; however, this is not a big problem because most features only have a small number of annotations. Each user has an associated list of trusted users. The assignment returned will be the most @@ -1254,12 +1323,12 @@ my $query = $self->Get(['IsBidirectionalBestHitOf'], "IsBidirectionalBestHitOf(from-link) = ? AND IsBidirectionalBestHitOf(genome) = ?", [$featureID, $genomeID]); - # Look for the best hit. - my $bbh = $query->Fetch; - if ($bbh) { - my ($targetFeature) = $bbh->Value('IsBidirectionalBestHitOf(to-link)'); - $retVal{$featureID} = $targetFeature; + # Peel off the BBHs found. + my @found = (); + while (my $bbh = $query->Fetch) { + push @found, $bbh->Value('IsBidirectionalBestHitOf(to-link)'); } + $retVal{$featureID} = \@found; } # Return the mapping. return \%retVal; @@ -1521,6 +1590,7 @@ # Determine the ordering to place on the evidence items. If we're # inverted, we want to see feature 2 before feature 1 (descending); otherwise, # we want feature 1 before feature 2 (normal). + Trace("Coupling evidence for ($peg1, $peg2) with inversion flag $inverted.") if T(Coupling => 4); my $ordering = ($inverted ? "DESC" : ""); # Get the coupling evidence. my @evidenceList = $self->GetAll(['IsEvidencedBy', 'PCH', 'UsesAsEvidence'], @@ -1534,8 +1604,10 @@ while (@evidenceList > 0) { my $peg1Data = shift @evidenceList; my $peg2Data = shift @evidenceList; + Trace("Peg 1 is " . $peg1Data->[1] . " and Peg 2 is " . $peg2Data->[1] . ".") if T(Coupling => 4); push @retVal, [$peg1Data->[1], $peg2Data->[1], $peg1Data->[0]]; } + Trace("Last index in evidence result is is $#retVal.") if T(Coupling => 4); } # Return the result. return @retVal; @@ -1585,12 +1657,15 @@ [$retVal], ["ParticipatesInCoupling(from-link)", "Coupling(score)"]); # Check to see if we found anything. if (!@pegs) { + Trace("No coupling found.") if T(Coupling => 4); # No coupling, so undefine the return value. $retVal = undef; } else { # We have a coupling! Get the score and check for inversion. $score = $pegs[0]->[1]; - $inverted = ($pegs[0]->[0] eq $peg1); + my $firstFound = $pegs[0]->[0]; + $inverted = ($firstFound ne $peg1); + Trace("Coupling score is $score. First peg is $firstFound, peg 1 is $peg1.") if T(Coupling => 4); } # Return the result. return ($retVal, $inverted, $score); @@ -1695,13 +1770,13 @@ if ($line =~ m/^>\s*(.+?)(\s|\n)/) { # Here we have a new header. Store the current sequence if we have one. if ($id) { - $retVal{$id} = uc $sequence; + $retVal{$id} = lc $sequence; } # Clear the sequence accumulator and save the new ID. ($id, $sequence) = ("$prefix$1", ""); } else { # Here we have a data line, so we add it to the sequence accumulator. - # First, we get the actual data out. Note that we normalize to upper + # First, we get the actual data out. Note that we normalize to lower # case. $line =~ /^\s*(.*?)(\s|\n)/; $sequence .= $1; @@ -1709,7 +1784,7 @@ } # Flush out the last sequence (if any). if ($sequence) { - $retVal{$id} = uc $sequence; + $retVal{$id} = lc $sequence; } # Close the file. close FASTAFILE; @@ -1724,7 +1799,7 @@ Insure that a list of feature locations is in the Sprout format. The Sprout feature location format is I_I where I<*> is C<+> for a forward gene and C<-> for a backward gene. The old format is I_I_I. If a feature is in the new format already, -it will not be changed; otherwise, it will be converted. This method can also be used to +it will not be changed; otherwise, it will be converted. This method can also be used to perform the reverse task-- insuring that all the locations are in the old format. =over 4 @@ -2035,6 +2110,7 @@ # Get the parameters. my ($self, $entityName, $entityID) = @_; # Check for the entity instance. + Trace("Checking existence of $entityName with ID=$entityID.") if T(4); my $testInstance = $self->GetEntity($entityName, $entityID); # Return an existence indicator. my $retVal = ($testInstance ? 1 : 0); @@ -2226,6 +2302,89 @@ return @retVal; } +=head3 GetProperties + +C<< my @list = $sprout->GetProperties($fid, $key, $value, $url); >> + +Return a list of the properties with the specified characteristics. + +Properties are arbitrary key-value pairs associated with a feature. (At some point they +will also be associated with genomes.) A property value is represented by a 4-tuple of +the form B<($fid, $key, $value, $url)>. These exactly correspond to the parameter + +=over 4 + +=item fid + +ID of the feature possessing the property. + +=item key + +Name or key of the property. + +=item value + +Value of the property. + +=item url + +URL of the document that indicated the property should have this particular value, or an +empty string if no such document exists. + +=back + +The parameters act as a filter for the desired data. Any non-null parameter will +automatically match all the tuples returned. So, specifying just the I<$fid> will +return all the properties of the specified feature; similarly, specifying the I<$key> +and I<$value> parameters will return all the features having the specified property +value. + +A single property key can have many values, representing different ideas about the +feature in question. For example, one paper may declare that a feature C is +virulent, and another may declare that it is not virulent. A query about the virulence of +C would be coded as + + my @list = $sprout->GetProperties('fig|83333.1.peg.10', 'virulence', '', ''); + +Here the I<$value> and I<$url> fields are left blank, indicating that those fields are +not to be filtered. The tuples returned would be + + ('fig|83333.1.peg.10', 'virulence', 'yes', 'http://www.somewhere.edu/first.paper.pdf') + ('fig|83333.1.peg.10', 'virulence', 'no', 'http://www.somewhere.edu/second.paper.pdf') + +=cut +#: Return Type @@; +sub GetProperties { + # Get the parameters. + my ($self, @parms) = @_; + # Declare the return variable. + my @retVal = (); + # Now we need to create a WHERE clause that will get us the data we want. First, + # we create a list of the columns containing the data for each parameter. + my @colNames = ('HasProperty(from-link)', 'Property(property-name)', + 'Property(property-value)', 'HasProperty(evidence)'); + # Now we build the WHERE clause and the list of parameter values. + my @where = (); + my @values = (); + for (my $i = 0; $i <= $#colNames; $i++) { + my $parm = $parms[$i]; + if (defined $parm && ($parm ne '')) { + push @where, "$colNames[$i] = ?"; + push @values, $parm; + } + } + # Format the WHERE clause. + my $filter = (@values > 0 ? (join " AND ", @where) : undef); + # Ask for all the propertie values with the desired characteristics. + my $query = $self->Get(['HasProperty', 'Property'], $filter, \@values); + while (my $valueObject = $query->Fetch()) { + my @tuple = $valueObject->Values(\@colNames); + push @retVal, \@tuple; + } + # Return the result. + return @retVal; +} + =head3 FeatureProperties C<< my @properties = $sprout->FeatureProperties($featureID); >> @@ -2420,7 +2579,7 @@ C<< my %subsystems = $sprout->SubsystemsOf($featureID); >> Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped -to the role the feature performs. +to the roles the feature performs. =over 4 @@ -2430,12 +2589,12 @@ =item RETURN -Returns a hash mapping all the feature's subsystems to the feature's role. +Returns a hash mapping all the feature's subsystems to a list of the feature's roles. =back =cut -#: Return Type %; +#: Return Type %@; sub SubsystemsOf { # Get the parameters. my ($self, $featureID) = @_; @@ -2447,7 +2606,12 @@ my %retVal = (); # Loop through the results, adding them to the hash. for my $record (@subsystems) { - $retVal{$record->[0]} = $record->[1]; + my ($subsys, $role) = @{$record}; + if (exists $retVal{$subsys}) { + push @{$retVal{$subsys}}, $role; + } else { + $retVal{$subsys} = [$role]; + } } # Return the hash. return %retVal; @@ -2981,13 +3145,13 @@ =head3 ParseAssignment Parse annotation text to determine whether or not it is a functional assignment. If it is, -the user, function text, and assigning user will be returned as a 3-element list. If it +the user, function text, and assigning user will be returned as a 3-element list. If it isn't, an empty list will be returned. A functional assignment is always of the form IC<\nset >IC< function to\n>I - + where I is the B, I is the B, and I is the actual functional role. In most cases, the user and the assigning user will be the same, but that is not always the case. @@ -3047,7 +3211,7 @@ sub FriendlyTimestamp { my ($timeValue) = @_; - my $retVal = strftime("%a %b %e %H:%M:%S %Y", localtime($timeValue)); + my $retVal = localtime($timeValue); return $retVal; } @@ -3108,4 +3272,6 @@ $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url }); } + + 1;