--- Sprout.pm 2005/09/11 17:08:59 1.24 +++ Sprout.pm 2005/10/20 11:52:36 1.46 @@ -70,6 +70,8 @@ * B maximum number of residues per sequence, (default C<8000>) +* B suppresses the connection to the database if TRUE, else FALSE + =back For example, the following constructor call specifies a database named I and a user name of @@ -98,6 +100,7 @@ # database connection port maxSegmentLength => 4500, # maximum feature segment length maxSequenceLength => 8000, # maximum contig sequence length + noDBOpen => 0, # 1 to suppress the database open }, $options); # Get the data directory. my $dataDir = $optionTable->{dataDir}; @@ -105,7 +108,11 @@ $optionTable->{userData} =~ m!([^/]*)/(.*)$!; my ($userName, $password) = ($1, $2); # Connect to the database. - my $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName, $password, $optionTable->{port}); + my $dbh; + if (! $optionTable->{noDBOpen}) { + $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName, + $password, $optionTable->{port}); + } # Create the ERDB object. my $xmlFileName = "$optionTable->{xmlFileName}"; my $erdb = ERDB->new($dbh, $xmlFileName); @@ -382,7 +389,7 @@ =head3 LoadUpdate -C<< my %stats = $sprout->LoadUpdate($truncateFlag, \@tableList); >> +C<< my $stats = $sprout->LoadUpdate($truncateFlag, \@tableList); >> Load updates to one or more database tables. This method enables the client to make changes to one or two tables without reloading the whole database. For each table, there must be a corresponding @@ -603,10 +610,15 @@ if ($prevContig eq $contigID && $dir eq $prevDir) { # Here the new segment is in the same direction on the same contig. Insure the # new segment's beginning is next to the old segment's end. - if (($dir eq "-" && $beg == $prevBeg - $prevLen) || - ($dir eq "+" && $beg == $prevBeg + $prevLen)) { - # Here we need to merge two segments. Adjust the beginning and length values - # to include both segments. + if ($dir eq "-" && $beg + $len == $prevBeg) { + # Here we're merging two backward blocks, so we keep the new begin point + # and adjust the length. + $len += $prevLen; + # Pop the old segment off. The new one will replace it later. + pop @retVal; + } elsif ($dir eq "+" && $beg == $prevBeg + $prevLen) { + # Here we need to merge two forward blocks. Adjust the beginning and + # length values to include both segments. $beg = $prevBeg; $len += $prevLen; # Pop the old segment off. The new one will replace it later. @@ -615,8 +627,10 @@ } # Remember this specifier for the adjacent-segment test the next time through. ($prevContig, $prevBeg, $prevDir, $prevLen) = ($contigID, $beg, $dir, $len); + # Compute the initial base pair. + my $start = ($dir eq "+" ? $beg : $beg + $len - 1); # Add the specifier to the list. - push @retVal, "${contigID}_$beg$dir$len"; + push @retVal, "${contigID}_$start$dir$len"; } # Return the list in the format indicated by the context. return (wantarray ? @retVal : join(',', @retVal)); @@ -650,7 +664,7 @@ shift if UNIVERSAL::isa($_[0],__PACKAGE__); my ($location) = @_; # Parse it into segments. - $location =~ /^(.*)_(\d*)([+-_])(\d*)$/; + $location =~ /^(.+)_(\d+)([+\-_])(\d+)$/; my ($contigID, $start, $dir, $len) = ($1, $2, $3, $4); # If the direction is an underscore, convert it to a + or -. if ($dir eq "_") { @@ -758,13 +772,15 @@ # the start point is the ending. Note that in the latter case we must reverse the DNA string # before putting it in the return value. my ($start, $stop); + Trace("Parse of \"$location\" is $beg$dir$len.") if T(SDNA => 4); if ($dir eq "+") { $start = $beg; $stop = $beg + $len - 1; } else { - $start = $beg + $len + 1; + $start = $beg - $len + 1; $stop = $beg; } + Trace("Looking for sequences containing $start through $stop.") if T(SDNA => 4); my $query = $self->Get(['IsMadeUpOf','Sequence'], "IsMadeUpOf(from-link) = ? AND IsMadeUpOf(start-position) + IsMadeUpOf(len) > ? AND " . " IsMadeUpOf(start-position) <= ? ORDER BY IsMadeUpOf(start-position)", @@ -776,18 +792,19 @@ $sequence->Values(['IsMadeUpOf(start-position)', 'Sequence(sequence)', 'IsMadeUpOf(len)']); my $stopPosition = $startPosition + $sequenceLength; + Trace("Sequence is from $startPosition to $stopPosition.") if T(SDNA => 4); # Figure out the start point and length of the relevant section. my $pos1 = ($start < $startPosition ? 0 : $start - $startPosition); - my $len = ($stopPosition <= $stop ? $stopPosition : $stop) - $startPosition - $pos1; + my $len1 = ($stopPosition < $stop ? $stopPosition : $stop) + 1 - $startPosition - $pos1; + Trace("Position is $pos1 for length $len1.") if T(SDNA => 4); # Add the relevant data to the location data. - $locationDNA .= substr($sequenceData, $pos1, $len); + $locationDNA .= substr($sequenceData, $pos1, $len1); } # Add this location's data to the return string. Note that we may need to reverse it. if ($dir eq '+') { $retVal .= $locationDNA; } else { - $locationDNA = join('', reverse split //, $locationDNA); - $retVal .= $locationDNA; + $retVal .= FIG::reverse_comp($locationDNA); } } # Return the result. @@ -857,7 +874,55 @@ # Set it from the sequence data, if any. if ($sequence) { my ($start, $len) = $sequence->Values(['IsMadeUpOf(start-position)', 'IsMadeUpOf(len)']); - $retVal = $start + $len; + $retVal = $start + $len - 1; + } + # Return the result. + return $retVal; +} + +=head3 ClusterPEGs + +C<< my $clusteredList = $sprout->ClusterPEGs($sub, \@pegs); >> + +Cluster the PEGs in a list according to the cluster coding scheme of the specified +subsystem. In order for this to work properly, the subsystem object must have +been used recently to retrieve the PEGs using the B method. +This causes the cluster numbers to be pulled into the subsystem's color hash. +If a PEG is not found in the color hash, it will not appear in the output +sequence. + +=over 4 + +=item sub + +Sprout subsystem object for the relevant subsystem, from the L +method. + +=item pegs + +Reference to the list of PEGs to be clustered. + +=item RETURN + +Returns a list of the PEGs, grouped into smaller lists by cluster number. + +=back + +=cut +#: Return Type $@@; +sub ClusterPEGs { + # Get the parameters. + my ($self, $sub, $pegs) = @_; + # Declare the return variable. + my $retVal = []; + # Loop through the PEGs, creating arrays for each cluster. + for my $pegID (@{$pegs}) { + my $clusterNumber = $sub->get_cluster_number($pegID); + # Only proceed if the PEG is in a cluster. + if ($clusterNumber >= 0) { + # Push this PEG onto the sub-list for the specified cluster number. + push @{$retVal->[$clusterNumber]}, $pegID; + } } # Return the result. return $retVal; @@ -1007,7 +1072,7 @@ =head3 FeatureAnnotations -C<< my @descriptors = $sprout->FeatureAnnotations($featureID); >> +C<< my @descriptors = $sprout->FeatureAnnotations($featureID, $rawFlag); >> Return the annotations of a feature. @@ -1017,13 +1082,18 @@ ID of the feature whose annotations are desired. +=item rawFlag + +If TRUE, the annotation timestamps will be returned in raw form; otherwise, they +will be returned in human-readable form. + =item RETURN Returns a list of annotation descriptors. Each descriptor is a hash with the following fields. * B ID of the relevant feature. -* B time the annotation was made, in user-friendly format. +* B time the annotation was made. * B ID of the user who made the annotation @@ -1035,7 +1105,7 @@ #: Return Type @%; sub FeatureAnnotations { # Get the parameters. - my ($self, $featureID) = @_; + my ($self, $featureID, $rawFlag) = @_; # Create a query to get the feature's annotations and the associated users. my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'], "IsTargetOfAnnotation(from-link) = ?", [$featureID]); @@ -1048,9 +1118,13 @@ $annotation->Values(['IsTargetOfAnnotation(from-link)', 'Annotation(time)', 'MadeAnnotation(from-link)', 'Annotation(annotation)']); + # Convert the time, if necessary. + if (! $rawFlag) { + $timeStamp = FriendlyTimestamp($timeStamp); + } # Assemble them into a hash. my $annotationHash = { featureID => $featureID, - timeStamp => FriendlyTimestamp($timeStamp), + timeStamp => $timeStamp, user => $user, text => $text }; # Add it to the return list. push @retVal, $annotationHash; @@ -1079,7 +1153,7 @@ =item RETURN -Returns a hash mapping the functional assignment IDs to user IDs. +Returns a hash mapping the user IDs to functional assignment IDs. =back @@ -1094,8 +1168,6 @@ [$featureID], ['Annotation(time)', 'Annotation(annotation)']); # Declare the return hash. my %retVal; - # Declare a hash for insuring we only make one assignment per user. - my %timeHash = (); # Now we sort the assignments by timestamp in reverse. my @sortedQuery = sort { -($a->[0] <=> $b->[0]) } @query; # Loop until we run out of annotations. @@ -1104,13 +1176,11 @@ my ($timeStamp, $text) = @{$annotation}; # Check to see if this is a functional assignment. my ($user, $function) = _ParseAssignment($text); - if ($user && ! exists $timeHash{$user}) { + if ($user && ! exists $retVal{$user}) { # Here it is a functional assignment and there has been no # previous assignment for this user, so we stuff it in the # return hash. - $retVal{$function} = $user; - # Insure we don't assign to this user again. - $timeHash{$user} = 1; + $retVal{$user} = $function; } } # Return the hash of assignments found. @@ -1126,7 +1196,7 @@ The functional assignment is handled differently depending on the type of feature. If the feature is identified by a FIG ID (begins with the string C), then a functional assignment is a type of annotation. The format of an assignment is described in -L. Its worth noting that we cannot filter on the content of the +L. Its worth noting that we cannot filter on the content of the annotation itself because it's a text field; however, this is not a big problem because most features only have a small number of annotations. @@ -1217,6 +1287,74 @@ return $retVal; } +=head3 FunctionsOf + +C<< my @functionList = $sprout->FunctionOf($featureID, $userID); >> + +Return the functional assignments of a particular feature. + +The functional assignment is handled differently depending on the type of feature. If +the feature is identified by a FIG ID (begins with the string C), then a functional +assignment is a type of annotation. The format of an assignment is described in +L. Its worth noting that we cannot filter on the content of the +annotation itself because it's a text field; however, this is not a big problem because +most features only have a small number of annotations. + +If the feature is B identified by a FIG ID, then the functional assignment +information is taken from the B table. If the table does +not contain an entry for the feature, an empty list is returned. + +=over 4 + +=item featureID + +ID of the feature whose functional assignments are desired. + +=item RETURN + +Returns a list of 2-tuples, each consisting of a user ID and the text of an assignment by +that user. + +=back + +=cut +#: Return Type @@; +sub FunctionsOf { + # Get the parameters. + my ($self, $featureID) = @_; + # Declare the return value. + my @retVal = (); + # Determine the ID type. + if ($featureID =~ m/^fig\|/) { + # Here we have a FIG feature ID. We must build the list of trusted + # users. + my %trusteeTable = (); + # Build a query for all of the feature's annotations, sorted by date. + my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation'], + "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC", + [$featureID]); + my $timeSelected = 0; + # Loop until we run out of annotations. + while (my $annotation = $query->Fetch()) { + # Get the annotation text. + my ($text, $time) = $annotation->Values(['Annotation(annotation)','Annotation(time)']); + # Check to see if this is a functional assignment for a trusted user. + my ($user, $function) = _ParseAssignment($text); + if ($user) { + # Here it is a functional assignment. + push @retVal, [$user, $function]; + } + } + } else { + # Here we have a non-FIG feature ID. In this case the user ID does not + # matter. We simply get the information from the External Alias Function + # table. + push @retVal, $self->GetEntityValues('ExternalAliasFunc', $featureID, ['ExternalAliasFunc(func)']); + } + # Return the assignments found. + return @retVal; +} + =head3 BBHList C<< my $bbhHash = $sprout->BBHList($genomeID, \@featureList); >> @@ -1254,12 +1392,12 @@ my $query = $self->Get(['IsBidirectionalBestHitOf'], "IsBidirectionalBestHitOf(from-link) = ? AND IsBidirectionalBestHitOf(genome) = ?", [$featureID, $genomeID]); - # Look for the best hit. - my $bbh = $query->Fetch; - if ($bbh) { - my ($targetFeature) = $bbh->Value('IsBidirectionalBestHitOf(to-link)'); - $retVal{$featureID} = $targetFeature; + # Peel off the BBHs found. + my @found = (); + while (my $bbh = $query->Fetch) { + push @found, $bbh->Value('IsBidirectionalBestHitOf(to-link)'); } + $retVal{$featureID} = \@found; } # Return the mapping. return \%retVal; @@ -2041,6 +2179,7 @@ # Get the parameters. my ($self, $entityName, $entityID) = @_; # Check for the entity instance. + Trace("Checking existence of $entityName with ID=$entityID.") if T(4); my $testInstance = $self->GetEntity($entityName, $entityID); # Return an existence indicator. my $retVal = ($testInstance ? 1 : 0); @@ -2534,13 +2673,18 @@ ['HasSSCell(from-link)', 'IsRoleOf(from-link)']); # Create the return value. my %retVal = (); + # Build a hash to weed out duplicates. Sometimes the same PEG and role appears + # in two spreadsheet cells. + my %dupHash = (); # Loop through the results, adding them to the hash. for my $record (@subsystems) { + # Get this subsystem and role. my ($subsys, $role) = @{$record}; - if (exists $retVal{$subsys}) { + # Insure it's the first time for both. + my $dupKey = "$subsys\n$role"; + if (! exists $dupHash{"$subsys\n$role"}) { + $dupHash{$dupKey} = 1; push @{$retVal{$subsys}}, $role; - } else { - $retVal{$subsys} = [$role]; } } # Return the hash. @@ -3109,11 +3253,11 @@ # Declare the return value. my @retVal = (); # Check to see if this is a functional assignment. - my ($user, $type, $function) = split(/\n/, $text); + my ($type, $function) = split(/\n/, $text); if ($type =~ m/^set ([^ ]+) function to$/i) { # Here it is, so we return the user name (which is in $1), the functional role text, # and the assigning user. - @retVal = ($1, $function, $user); + @retVal = ($1, $function); } # Return the result list. return @retVal; @@ -3141,7 +3285,7 @@ sub FriendlyTimestamp { my ($timeValue) = @_; - my $retVal = strftime("%a %b %e %H:%M:%S %Y", localtime($timeValue)); + my $retVal = localtime($timeValue); return $retVal; }