--- Sprout.pm 2005/09/14 13:26:27 1.33 +++ Sprout.pm 2006/02/16 04:10:07 1.54 @@ -92,7 +92,7 @@ # database type dataDir => $FIG_Config::sproutData, # data file directory - xmlFileName => "$FIG_Config::sproutData/SproutDBD.xml", + xmlFileName => "$FIG_Config::fig/SproutDBD.xml", # database definition file name userData => "$FIG_Config::dbuser/$FIG_Config::dbpass", # user name and password @@ -389,7 +389,7 @@ =head3 LoadUpdate -C<< my %stats = $sprout->LoadUpdate($truncateFlag, \@tableList); >> +C<< my $stats = $sprout->LoadUpdate($truncateFlag, \@tableList); >> Load updates to one or more database tables. This method enables the client to make changes to one or two tables without reloading the whole database. For each table, there must be a corresponding @@ -610,10 +610,15 @@ if ($prevContig eq $contigID && $dir eq $prevDir) { # Here the new segment is in the same direction on the same contig. Insure the # new segment's beginning is next to the old segment's end. - if (($dir eq "-" && $beg == $prevBeg - $prevLen) || - ($dir eq "+" && $beg == $prevBeg + $prevLen)) { - # Here we need to merge two segments. Adjust the beginning and length values - # to include both segments. + if ($dir eq "-" && $beg + $len == $prevBeg) { + # Here we're merging two backward blocks, so we keep the new begin point + # and adjust the length. + $len += $prevLen; + # Pop the old segment off. The new one will replace it later. + pop @retVal; + } elsif ($dir eq "+" && $beg == $prevBeg + $prevLen) { + # Here we need to merge two forward blocks. Adjust the beginning and + # length values to include both segments. $beg = $prevBeg; $len += $prevLen; # Pop the old segment off. The new one will replace it later. @@ -659,7 +664,7 @@ shift if UNIVERSAL::isa($_[0],__PACKAGE__); my ($location) = @_; # Parse it into segments. - $location =~ /^(.*)_(\d*)([+-_])(\d*)$/; + $location =~ /^(.+)_(\d+)([+\-_])(\d+)$/; my ($contigID, $start, $dir, $len) = ($1, $2, $3, $4); # If the direction is an underscore, convert it to a + or -. if ($dir eq "_") { @@ -767,7 +772,7 @@ # the start point is the ending. Note that in the latter case we must reverse the DNA string # before putting it in the return value. my ($start, $stop); - Trace("Parsed location is $beg$dir$len.") if T(SDNA => 4); + Trace("Parse of \"$location\" is $beg$dir$len.") if T(SDNA => 4); if ($dir eq "+") { $start = $beg; $stop = $beg + $len - 1; @@ -790,7 +795,7 @@ Trace("Sequence is from $startPosition to $stopPosition.") if T(SDNA => 4); # Figure out the start point and length of the relevant section. my $pos1 = ($start < $startPosition ? 0 : $start - $startPosition); - my $len1 = ($stopPosition <= $stop ? $stopPosition : $stop) - $startPosition - $pos1; + my $len1 = ($stopPosition < $stop ? $stopPosition : $stop) + 1 - $startPosition - $pos1; Trace("Position is $pos1 for length $len1.") if T(SDNA => 4); # Add the relevant data to the location data. $locationDNA .= substr($sequenceData, $pos1, $len1); @@ -869,7 +874,55 @@ # Set it from the sequence data, if any. if ($sequence) { my ($start, $len) = $sequence->Values(['IsMadeUpOf(start-position)', 'IsMadeUpOf(len)']); - $retVal = $start + $len; + $retVal = $start + $len - 1; + } + # Return the result. + return $retVal; +} + +=head3 ClusterPEGs + +C<< my $clusteredList = $sprout->ClusterPEGs($sub, \@pegs); >> + +Cluster the PEGs in a list according to the cluster coding scheme of the specified +subsystem. In order for this to work properly, the subsystem object must have +been used recently to retrieve the PEGs using the B method. +This causes the cluster numbers to be pulled into the subsystem's color hash. +If a PEG is not found in the color hash, it will not appear in the output +sequence. + +=over 4 + +=item sub + +Sprout subsystem object for the relevant subsystem, from the L +method. + +=item pegs + +Reference to the list of PEGs to be clustered. + +=item RETURN + +Returns a list of the PEGs, grouped into smaller lists by cluster number. + +=back + +=cut +#: Return Type $@@; +sub ClusterPEGs { + # Get the parameters. + my ($self, $sub, $pegs) = @_; + # Declare the return variable. + my $retVal = []; + # Loop through the PEGs, creating arrays for each cluster. + for my $pegID (@{$pegs}) { + my $clusterNumber = $sub->get_cluster_number($pegID); + # Only proceed if the PEG is in a cluster. + if ($clusterNumber >= 0) { + # Push this PEG onto the sub-list for the specified cluster number. + push @{$retVal->[$clusterNumber]}, $pegID; + } } # Return the result. return $retVal; @@ -1019,7 +1072,7 @@ =head3 FeatureAnnotations -C<< my @descriptors = $sprout->FeatureAnnotations($featureID); >> +C<< my @descriptors = $sprout->FeatureAnnotations($featureID, $rawFlag); >> Return the annotations of a feature. @@ -1029,13 +1082,18 @@ ID of the feature whose annotations are desired. +=item rawFlag + +If TRUE, the annotation timestamps will be returned in raw form; otherwise, they +will be returned in human-readable form. + =item RETURN Returns a list of annotation descriptors. Each descriptor is a hash with the following fields. * B ID of the relevant feature. -* B time the annotation was made, in user-friendly format. +* B time the annotation was made. * B ID of the user who made the annotation @@ -1047,7 +1105,7 @@ #: Return Type @%; sub FeatureAnnotations { # Get the parameters. - my ($self, $featureID) = @_; + my ($self, $featureID, $rawFlag) = @_; # Create a query to get the feature's annotations and the associated users. my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'], "IsTargetOfAnnotation(from-link) = ?", [$featureID]); @@ -1060,9 +1118,13 @@ $annotation->Values(['IsTargetOfAnnotation(from-link)', 'Annotation(time)', 'MadeAnnotation(from-link)', 'Annotation(annotation)']); + # Convert the time, if necessary. + if (! $rawFlag) { + $timeStamp = FriendlyTimestamp($timeStamp); + } # Assemble them into a hash. my $annotationHash = { featureID => $featureID, - timeStamp => FriendlyTimestamp($timeStamp), + timeStamp => $timeStamp, user => $user, text => $text }; # Add it to the return list. push @retVal, $annotationHash; @@ -1091,7 +1153,7 @@ =item RETURN -Returns a hash mapping the functional assignment IDs to user IDs. +Returns a hash mapping the user IDs to functional assignment IDs. =back @@ -1101,28 +1163,25 @@ # Get the parameters. my ($self, $featureID) = @_; # Get all of the feature's annotations. - my @query = $self->GetAll(['IsTargetOfAnnotation', 'Annotation'], + my @query = $self->GetAll(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'], "IsTargetOfAnnotation(from-link) = ?", - [$featureID], ['Annotation(time)', 'Annotation(annotation)']); + [$featureID], ['Annotation(time)', 'Annotation(annotation)', + 'MadeAnnotation(from-link)']); # Declare the return hash. my %retVal; - # Declare a hash for insuring we only make one assignment per user. - my %timeHash = (); # Now we sort the assignments by timestamp in reverse. my @sortedQuery = sort { -($a->[0] <=> $b->[0]) } @query; # Loop until we run out of annotations. for my $annotation (@sortedQuery) { # Get the annotation fields. - my ($timeStamp, $text) = @{$annotation}; + my ($timeStamp, $text, $user) = @{$annotation}; # Check to see if this is a functional assignment. - my ($user, $function) = _ParseAssignment($text); - if ($user && ! exists $timeHash{$user}) { + my ($actualUser, $function) = _ParseAssignment($user, $text); + if ($actualUser && ! exists $retVal{$actualUser}) { # Here it is a functional assignment and there has been no # previous assignment for this user, so we stuff it in the # return hash. - $retVal{$function} = $user; - # Insure we don't assign to this user again. - $timeHash{$user} = 1; + $retVal{$actualUser} = $function; } } # Return the hash of assignments found. @@ -1138,7 +1197,7 @@ The functional assignment is handled differently depending on the type of feature. If the feature is identified by a FIG ID (begins with the string C), then a functional assignment is a type of annotation. The format of an assignment is described in -L. Its worth noting that we cannot filter on the content of the +L. Its worth noting that we cannot filter on the content of the annotation itself because it's a text field; however, this is not a big problem because most features only have a small number of annotations. @@ -1200,20 +1259,22 @@ } } # Build a query for all of the feature's annotations, sorted by date. - my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation'], + my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'], "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC", [$featureID]); my $timeSelected = 0; # Loop until we run out of annotations. while (my $annotation = $query->Fetch()) { # Get the annotation text. - my ($text, $time) = $annotation->Values(['Annotation(annotation)','Annotation(time)']); + my ($text, $time, $user) = $annotation->Values(['Annotation(annotation)', + 'Annotation(time)', 'MadeAnnotation(from-link)']); # Check to see if this is a functional assignment for a trusted user. - my ($user, $function) = _ParseAssignment($text); - if ($user) { + my ($actualUser, $function) = _ParseAssignment($user, $text); + Trace("Assignment user is $actualUser, text is $function.") if T(4); + if ($actualUser) { # Here it is a functional assignment. Check the time and the user # name. The time must be recent and the user must be trusted. - if ((exists $trusteeTable{$user}) && ($time > $timeSelected)) { + if ((exists $trusteeTable{$actualUser}) && ($time > $timeSelected)) { $retVal = $function; $timeSelected = $time; } @@ -1229,6 +1290,78 @@ return $retVal; } +=head3 FunctionsOf + +C<< my @functionList = $sprout->FunctionOf($featureID, $userID); >> + +Return the functional assignments of a particular feature. + +The functional assignment is handled differently depending on the type of feature. If +the feature is identified by a FIG ID (begins with the string C), then a functional +assignment is a type of annotation. The format of an assignment is described in +L. Its worth noting that we cannot filter on the content of the +annotation itself because it's a text field; however, this is not a big problem because +most features only have a small number of annotations. + +If the feature is B identified by a FIG ID, then the functional assignment +information is taken from the B table. If the table does +not contain an entry for the feature, an empty list is returned. + +=over 4 + +=item featureID + +ID of the feature whose functional assignments are desired. + +=item RETURN + +Returns a list of 2-tuples, each consisting of a user ID and the text of an assignment by +that user. + +=back + +=cut +#: Return Type @@; +sub FunctionsOf { + # Get the parameters. + my ($self, $featureID) = @_; + # Declare the return value. + my @retVal = (); + # Determine the ID type. + if ($featureID =~ m/^fig\|/) { + # Here we have a FIG feature ID. We must build the list of trusted + # users. + my %trusteeTable = (); + # Build a query for all of the feature's annotations, sorted by date. + my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'], + "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC", + [$featureID]); + my $timeSelected = 0; + # Loop until we run out of annotations. + while (my $annotation = $query->Fetch()) { + # Get the annotation text. + my ($text, $time, $user) = $annotation->Values(['Annotation(annotation)', + 'Annotation(time)', + 'MadeAnnotation(user)']); + # Check to see if this is a functional assignment for a trusted user. + my ($actualUser, $function) = _ParseAssignment($user, $text); + if ($actualUser) { + # Here it is a functional assignment. + push @retVal, [$actualUser, $function]; + } + } + } else { + # Here we have a non-FIG feature ID. In this case the user ID does not + # matter. We simply get the information from the External Alias Function + # table. + my @assignments = $self->GetEntityValues('ExternalAliasFunc', $featureID, + ['ExternalAliasFunc(func)']); + push @retVal, map { ['master', $_] } @assignments; + } + # Return the assignments found. + return @retVal; +} + =head3 BBHList C<< my $bbhHash = $sprout->BBHList($genomeID, \@featureList); >> @@ -1266,12 +1399,12 @@ my $query = $self->Get(['IsBidirectionalBestHitOf'], "IsBidirectionalBestHitOf(from-link) = ? AND IsBidirectionalBestHitOf(genome) = ?", [$featureID, $genomeID]); - # Look for the best hit. - my $bbh = $query->Fetch; - if ($bbh) { - my ($targetFeature) = $bbh->Value('IsBidirectionalBestHitOf(to-link)'); - $retVal{$featureID} = $targetFeature; + # Peel off the BBHs found. + my @found = (); + while (my $bbh = $query->Fetch) { + push @found, $bbh->Value('IsBidirectionalBestHitOf(to-link)'); } + $retVal{$featureID} = \@found; } # Return the mapping. return \%retVal; @@ -1349,7 +1482,7 @@ my $genomeData = $self->GetEntity('Genome', $genomeID); if ($genomeData) { # The genome exists, so get the completeness flag. - ($retVal) = $genomeData->Value('complete'); + ($retVal) = $genomeData->Value('Genome(complete)'); } # Return the result. return $retVal; @@ -2547,13 +2680,18 @@ ['HasSSCell(from-link)', 'IsRoleOf(from-link)']); # Create the return value. my %retVal = (); + # Build a hash to weed out duplicates. Sometimes the same PEG and role appears + # in two spreadsheet cells. + my %dupHash = (); # Loop through the results, adding them to the hash. for my $record (@subsystems) { + # Get this subsystem and role. my ($subsys, $role) = @{$record}; - if (exists $retVal{$subsys}) { + # Insure it's the first time for both. + my $dupKey = "$subsys\n$role"; + if (! exists $dupHash{"$subsys\n$role"}) { + $dupHash{$dupKey} = 1; push @{$retVal{$subsys}}, $role; - } else { - $retVal{$subsys} = [$role]; } } # Return the hash. @@ -3083,6 +3221,42 @@ return $retVal; } +=head3 DeleteGenome + +C<< my $stats = $sprout->DeleteGenome($genomeID, $testFlag); >> + +Delete a genome from the database. + +=over 4 + +=item genomeID + +ID of the genome to delete + +=item testFlag + +If TRUE, then the DELETE statements will be traced, but no deletions will occur. + +=item RETURN + +Returns a statistics object describing the rows deleted. + +=back + +=cut +#: Return Type $%; +sub DeleteGenome { + # Get the parameters. + my ($self, $genomeID, $testFlag) = @_; + # Perform the delete for the genome's features. + my $retVal = $self->{_erdb}->Delete('Feature', "fig|$genomeID.%", $testFlag); + # Perform the delete for the primary genome data. + my $stats = $self->{_erdb}->Delete('Genome', $genomeID, $testFlag); + $retVal->Accumulate($stats); + # Return the result. + return $retVal; +} + =head2 Internal Utility Methods =head3 ParseAssignment @@ -3093,16 +3267,23 @@ A functional assignment is always of the form - IC<\nset >IC< function to\n>I + CIC< function to\n>I + +where I is the B, and I is the actual functional role. In most cases, +the user and the assigning user (from MadeAnnotation) will be the same, but that is +not always the case. -where I is the B, I is the B, and I is the -actual functional role. In most cases, the user and the assigning user will be the -same, but that is not always the case. +In addition, the functional role may contain extra data that is stripped, such as +terminating spaces or a comment separated from the rest of the text by a tab. This is a static method. =over 4 +=item user + +Name of the assigning user. + =item text Text of the annotation. @@ -3118,15 +3299,22 @@ sub _ParseAssignment { # Get the parameters. - my ($text) = @_; + my ($user, $text) = @_; # Declare the return value. my @retVal = (); # Check to see if this is a functional assignment. - my ($user, $type, $function) = split(/\n/, $text); - if ($type =~ m/^set ([^ ]+) function to$/i) { - # Here it is, so we return the user name (which is in $1), the functional role text, - # and the assigning user. - @retVal = ($1, $function, $user); + my ($type, $function) = split(/\n/, $text); + if ($type =~ m/^set function to$/i) { + # Here we have an assignment without a user, so we use the incoming user ID. + @retVal = ($user, $function); + } elsif ($type =~ m/^set (\S+) function to$/i) { + # Here we have an assignment with a user that is passed back to the caller. + @retVal = ($1, $function); + } + # If we have an assignment, we need to clean the function text. There may be + # extra junk at the end added as a note from the user. + if (@retVal) { + $retVal[1] =~ s/(\t\S)?\s*$//; } # Return the result list. return @retVal; @@ -3154,7 +3342,7 @@ sub FriendlyTimestamp { my ($timeValue) = @_; - my $retVal = strftime("%a %b %e %H:%M:%S %Y", localtime($timeValue)); + my $retVal = localtime($timeValue); return $retVal; } @@ -3216,5 +3404,4 @@ } - -1; +1; \ No newline at end of file