--- Sprout.pm 2005/09/09 21:15:47 1.23 +++ Sprout.pm 2005/10/18 02:24:23 1.41 @@ -70,6 +70,8 @@ * B maximum number of residues per sequence, (default C<8000>) +* B suppresses the connection to the database if TRUE, else FALSE + =back For example, the following constructor call specifies a database named I and a user name of @@ -98,6 +100,7 @@ # database connection port maxSegmentLength => 4500, # maximum feature segment length maxSequenceLength => 8000, # maximum contig sequence length + noDBOpen => 0, # 1 to suppress the database open }, $options); # Get the data directory. my $dataDir = $optionTable->{dataDir}; @@ -105,7 +108,11 @@ $optionTable->{userData} =~ m!([^/]*)/(.*)$!; my ($userName, $password) = ($1, $2); # Connect to the database. - my $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName, $password, $optionTable->{port}); + my $dbh; + if (! $optionTable->{noDBOpen}) { + $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName, + $password, $optionTable->{port}); + } # Create the ERDB object. my $xmlFileName = "$optionTable->{xmlFileName}"; my $erdb = ERDB->new($dbh, $xmlFileName); @@ -603,10 +610,15 @@ if ($prevContig eq $contigID && $dir eq $prevDir) { # Here the new segment is in the same direction on the same contig. Insure the # new segment's beginning is next to the old segment's end. - if (($dir eq "-" && $beg == $prevBeg - $prevLen) || - ($dir eq "+" && $beg == $prevBeg + $prevLen)) { - # Here we need to merge two segments. Adjust the beginning and length values - # to include both segments. + if ($dir eq "-" && $beg + $len == $prevBeg) { + # Here we're merging two backward blocks, so we keep the new begin point + # and adjust the length. + $len += $prevLen; + # Pop the old segment off. The new one will replace it later. + pop @retVal; + } elsif ($dir eq "+" && $beg == $prevBeg + $prevLen) { + # Here we need to merge two forward blocks. Adjust the beginning and + # length values to include both segments. $beg = $prevBeg; $len += $prevLen; # Pop the old segment off. The new one will replace it later. @@ -615,8 +627,10 @@ } # Remember this specifier for the adjacent-segment test the next time through. ($prevContig, $prevBeg, $prevDir, $prevLen) = ($contigID, $beg, $dir, $len); + # Compute the initial base pair. + my $start = ($dir eq "+" ? $beg : $beg + $len - 1); # Add the specifier to the list. - push @retVal, "${contigID}_$beg$dir$len"; + push @retVal, "${contigID}_$start$dir$len"; } # Return the list in the format indicated by the context. return (wantarray ? @retVal : join(',', @retVal)); @@ -650,7 +664,7 @@ shift if UNIVERSAL::isa($_[0],__PACKAGE__); my ($location) = @_; # Parse it into segments. - $location =~ /^(.*)_(\d*)([+-_])(\d*)$/; + $location =~ /^(.+)_(\d+)([+\-_])(\d+)$/; my ($contigID, $start, $dir, $len) = ($1, $2, $3, $4); # If the direction is an underscore, convert it to a + or -. if ($dir eq "_") { @@ -758,13 +772,15 @@ # the start point is the ending. Note that in the latter case we must reverse the DNA string # before putting it in the return value. my ($start, $stop); + Trace("Parse of \"$location\" is $beg$dir$len.") if T(SDNA => 4); if ($dir eq "+") { $start = $beg; $stop = $beg + $len - 1; } else { - $start = $beg + $len + 1; + $start = $beg - $len + 1; $stop = $beg; } + Trace("Looking for sequences containing $start through $stop.") if T(SDNA => 4); my $query = $self->Get(['IsMadeUpOf','Sequence'], "IsMadeUpOf(from-link) = ? AND IsMadeUpOf(start-position) + IsMadeUpOf(len) > ? AND " . " IsMadeUpOf(start-position) <= ? ORDER BY IsMadeUpOf(start-position)", @@ -776,18 +792,19 @@ $sequence->Values(['IsMadeUpOf(start-position)', 'Sequence(sequence)', 'IsMadeUpOf(len)']); my $stopPosition = $startPosition + $sequenceLength; + Trace("Sequence is from $startPosition to $stopPosition.") if T(SDNA => 4); # Figure out the start point and length of the relevant section. my $pos1 = ($start < $startPosition ? 0 : $start - $startPosition); - my $len = ($stopPosition <= $stop ? $stopPosition : $stop) - $startPosition - $pos1; + my $len1 = ($stopPosition < $stop ? $stopPosition : $stop) + 1 - $startPosition - $pos1; + Trace("Position is $pos1 for length $len1.") if T(SDNA => 4); # Add the relevant data to the location data. - $locationDNA .= substr($sequenceData, $pos1, $len); + $locationDNA .= substr($sequenceData, $pos1, $len1); } # Add this location's data to the return string. Note that we may need to reverse it. if ($dir eq '+') { $retVal .= $locationDNA; } else { - $locationDNA = join('', reverse split //, $locationDNA); - $retVal .= $locationDNA; + $retVal .= FIG::reverse_comp($locationDNA); } } # Return the result. @@ -857,7 +874,55 @@ # Set it from the sequence data, if any. if ($sequence) { my ($start, $len) = $sequence->Values(['IsMadeUpOf(start-position)', 'IsMadeUpOf(len)']); - $retVal = $start + $len; + $retVal = $start + $len - 1; + } + # Return the result. + return $retVal; +} + +=head3 ClusterPEGs + +C<< my $clusteredList = $sprout->ClusterPEGs($sub, \@pegs); >> + +Cluster the PEGs in a list according to the cluster coding scheme of the specified +subsystem. In order for this to work properly, the subsystem object must have +been used recently to retrieve the PEGs using the B method. +This causes the cluster numbers to be pulled into the subsystem's color hash. +If a PEG is not found in the color hash, it will not appear in the output +sequence. + +=over 4 + +=item sub + +Sprout subsystem object for the relevant subsystem, from the L +method. + +=item pegs + +Reference to the list of PEGs to be clustered. + +=item RETURN + +Returns a list of the PEGs, grouped into smaller lists by cluster number. + +=back + +=cut +#: Return Type $@@; +sub ClusterPEGs { + # Get the parameters. + my ($self, $sub, $pegs) = @_; + # Declare the return variable. + my $retVal = []; + # Loop through the PEGs, creating arrays for each cluster. + for my $pegID (@{$pegs}) { + my $clusterNumber = $sub->get_cluster_number($pegID); + # Only proceed if the PEG is in a cluster. + if ($clusterNumber >= 0) { + # Push this PEG onto the sub-list for the specified cluster number. + push @{$retVal->[$clusterNumber]}, $pegID; + } } # Return the result. return $retVal; @@ -1007,7 +1072,7 @@ =head3 FeatureAnnotations -C<< my @descriptors = $sprout->FeatureAnnotations($featureID); >> +C<< my @descriptors = $sprout->FeatureAnnotations($featureID, $rawFlag); >> Return the annotations of a feature. @@ -1017,13 +1082,18 @@ ID of the feature whose annotations are desired. +=item rawFlag + +If TRUE, the annotation timestamps will be returned in raw form; otherwise, they +will be returned in human-readable form. + =item RETURN Returns a list of annotation descriptors. Each descriptor is a hash with the following fields. * B ID of the relevant feature. -* B time the annotation was made, in user-friendly format. +* B time the annotation was made. * B ID of the user who made the annotation @@ -1035,7 +1105,7 @@ #: Return Type @%; sub FeatureAnnotations { # Get the parameters. - my ($self, $featureID) = @_; + my ($self, $featureID, $rawFlag) = @_; # Create a query to get the feature's annotations and the associated users. my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'], "IsTargetOfAnnotation(from-link) = ?", [$featureID]); @@ -1048,9 +1118,13 @@ $annotation->Values(['IsTargetOfAnnotation(from-link)', 'Annotation(time)', 'MadeAnnotation(from-link)', 'Annotation(annotation)']); + # Convert the time, if necessary. + if (! $rawFlag) { + $timeStamp = FriendlyTimestamp($timeStamp); + } # Assemble them into a hash. my $annotationHash = { featureID => $featureID, - timeStamp => FriendlyTimestamp($timeStamp), + timeStamp => $timeStamp, user => $user, text => $text }; # Add it to the return list. push @retVal, $annotationHash; @@ -1254,12 +1328,12 @@ my $query = $self->Get(['IsBidirectionalBestHitOf'], "IsBidirectionalBestHitOf(from-link) = ? AND IsBidirectionalBestHitOf(genome) = ?", [$featureID, $genomeID]); - # Look for the best hit. - my $bbh = $query->Fetch; - if ($bbh) { - my ($targetFeature) = $bbh->Value('IsBidirectionalBestHitOf(to-link)'); - $retVal{$featureID} = $targetFeature; + # Peel off the BBHs found. + my @found = (); + while (my $bbh = $query->Fetch) { + push @found, $bbh->Value('IsBidirectionalBestHitOf(to-link)'); } + $retVal{$featureID} = \@found; } # Return the mapping. return \%retVal; @@ -1701,13 +1775,13 @@ if ($line =~ m/^>\s*(.+?)(\s|\n)/) { # Here we have a new header. Store the current sequence if we have one. if ($id) { - $retVal{$id} = uc $sequence; + $retVal{$id} = lc $sequence; } # Clear the sequence accumulator and save the new ID. ($id, $sequence) = ("$prefix$1", ""); } else { # Here we have a data line, so we add it to the sequence accumulator. - # First, we get the actual data out. Note that we normalize to upper + # First, we get the actual data out. Note that we normalize to lower # case. $line =~ /^\s*(.*?)(\s|\n)/; $sequence .= $1; @@ -1715,7 +1789,7 @@ } # Flush out the last sequence (if any). if ($sequence) { - $retVal{$id} = uc $sequence; + $retVal{$id} = lc $sequence; } # Close the file. close FASTAFILE; @@ -2041,6 +2115,7 @@ # Get the parameters. my ($self, $entityName, $entityID) = @_; # Check for the entity instance. + Trace("Checking existence of $entityName with ID=$entityID.") if T(4); my $testInstance = $self->GetEntity($entityName, $entityID); # Return an existence indicator. my $retVal = ($testInstance ? 1 : 0); @@ -3141,7 +3216,7 @@ sub FriendlyTimestamp { my ($timeValue) = @_; - my $retVal = strftime("%a %b %e %H:%M:%S %Y", localtime($timeValue)); + my $retVal = localtime($timeValue); return $retVal; }