[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.85, Tue Sep 19 00:14:04 2006 UTC revision 1.96, Wed Dec 6 03:37:26 2006 UTC
# Line 921  Line 921 
921      my ($self, $genomeID) = @_;      my ($self, $genomeID) = @_;
922      # Declare the return variable.      # Declare the return variable.
923      my $retVal = {};      my $retVal = {};
924      # Query the genome's features and annotations. We'll put the oldest annotations      # Query the genome's features.
925      # first so that the last assignment to go into the hash will be the correct one.      my $query = $self->Get(['HasFeature', 'Feature'], "HasFeature(from-link) = ?",
     my $query = $self->Get(['HasFeature', 'IsTargetOfAnnotation', 'Annotation'],  
                            "HasFeature(from-link) = ? ORDER BY Annotation(time)",  
926                             [$genomeID]);                             [$genomeID]);
927      # Loop through the annotations.      # Loop through the features.
928      while (my $data = $query->Fetch) {      while (my $data = $query->Fetch) {
929          # Get the feature ID and annotation text.          # Get the feature ID and assignment.
930          my ($fid, $annotation) = $data->Values(['HasFeature(to-link)',          my ($fid, $assignment) = $data->Values(['Feature(id)', 'Feature(assignment)']);
931                                                  'Annotation(annotation)']);          if ($assignment) {
         # Check to see if this is an assignment. Note that the user really  
         # doesn't matter to us, other than we use it to determine whether or  
         # not this is an assignment.  
         my ($user, $assignment) = _ParseAssignment('fig', $annotation);  
         if ($user) {  
             # Here it's an assignment. We put it in the return hash, overwriting  
             # any older assignment that might be present.  
932              $retVal->{$fid} = $assignment;              $retVal->{$fid} = $assignment;
933          }          }
934      }      }
# Line 1299  Line 1290 
1290  Return the most recently-determined functional assignment of a particular feature.  Return the most recently-determined functional assignment of a particular feature.
1291    
1292  The functional assignment is handled differently depending on the type of feature. If  The functional assignment is handled differently depending on the type of feature. If
1293  the feature is identified by a FIG ID (begins with the string C<fig|>), then a functional  the feature is identified by a FIG ID (begins with the string C<fig|>), then the functional
1294  assignment is a type of annotation. The format of an assignment is described in  assignment is taken from the B<Feature> or C<Annotation> table, depending.
 L</ParseAssignment>. Its worth noting that we cannot filter on the content of the  
 annotation itself because it's a text field; however, this is not a big problem because  
 most features only have a small number of annotations.  
1295    
1296  Each user has an associated list of trusted users. The assignment returned will be the most  Each user has an associated list of trusted users. The assignment returned will be the most
1297  recent one by at least one of the trusted users. If no trusted user list is available, then  recent one by at least one of the trusted users. If no trusted user list is available, then
# Line 1322  Line 1310 
1310    
1311  =item userID (optional)  =item userID (optional)
1312    
1313  ID of the user whose function determination is desired. If omitted, only the latest  ID of the user whose function determination is desired. If omitted, the primary
1314  C<FIG> assignment will be returned.  functional assignment in the B<Feature> table will be returned.
1315    
1316  =item RETURN  =item RETURN
1317    
# Line 1340  Line 1328 
1328      my $retVal;      my $retVal;
1329      # Determine the ID type.      # Determine the ID type.
1330      if ($featureID =~ m/^fig\|/) {      if ($featureID =~ m/^fig\|/) {
1331          # Here we have a FIG feature ID. We must build the list of trusted          # Here we have a FIG feature ID.
1332          # users.          if (!$userID) {
1333                # Use the primary assignment.
1334                ($retVal) = $self->GetEntityValues('Feature', $featureID, ['Feature(assignment)']);
1335            } else {
1336                # We must build the list of trusted users.
1337          my %trusteeTable = ();          my %trusteeTable = ();
1338          # Check the user ID.          # Check the user ID.
1339          if (!$userID) {          if (!$userID) {
# Line 1384  Line 1376 
1376                  }                  }
1377              }              }
1378          }          }
1379            }
1380      } else {      } else {
1381          # Here we have a non-FIG feature ID. In this case the user ID does not          # Here we have a non-FIG feature ID. In this case the user ID does not
1382          # matter. We simply get the information from the External Alias Function          # matter. We simply get the information from the External Alias Function
# Line 1504  Line 1497 
1497          # Peel off the BBHs found.          # Peel off the BBHs found.
1498          my @found = ();          my @found = ();
1499          for my $bbh (@bbhData) {          for my $bbh (@bbhData) {
1500              push @found, $bbh->[0];              my $fid = $bbh->[0];
1501                my $bbGenome = $self->GenomeOf($fid);
1502                if ($bbGenome eq $genomeID) {
1503                    push @found, $fid;
1504                }
1505          }          }
1506          $retVal{$featureID} = \@found;          $retVal{$featureID} = \@found;
1507      }      }
# Line 2679  Line 2676 
2676      return $retVal;      return $retVal;
2677  }  }
2678    
2679    =head3 PropertyID
2680    
2681    C<< my $id = $sprout->PropertyID($propName, $propValue); >>
2682    
2683    Return the ID of the specified property name and value pair, if the
2684    pair exists.
2685    
2686    =over 4
2687    
2688    =item propName
2689    
2690    Name of the desired property.
2691    
2692    =item propValue
2693    
2694    Value expected for the desired property.
2695    
2696    =item RETURN
2697    
2698    Returns the ID of the name/value pair, or C<undef> if the pair does not exist.
2699    
2700    =back
2701    
2702    =cut
2703    
2704    sub PropertyID {
2705        # Get the parameters.
2706        my ($self, $propName, $propValue) = @_;
2707        # Try to find the ID.
2708        my ($retVal) = $self->GetFlat(['Property'],
2709                                      "Property(property-name) = ? AND Property(property-value) = ?",
2710                                      [$propName, $propValue], 'Property(id)');
2711        # Return the result.
2712        return $retVal;
2713    }
2714    
2715  =head3 MergedAnnotations  =head3 MergedAnnotations
2716    
2717  C<< my @annotationList = $sprout->MergedAnnotations(\@list); >>  C<< my @annotationList = $sprout->MergedAnnotations(\@list); >>
# Line 2876  Line 2909 
2909      # Get the parameters.      # Get the parameters.
2910      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
2911      # Get the list of names.      # Get the list of names.
2912      my @retVal = $self->GetFlat(['ContainsFeature', 'HasSSCell'], "ContainsFeature(to-link) = ?",      my @retVal = $self->GetFlat(['HasRoleInSubsystem'], "HasRoleInSubsystem(from-link) = ?",
2913                                  [$featureID], 'HasSSCell(from-link)');                                  [$featureID], 'HasRoleInSubsystem(to-link)');
2914      # Return the result.      # Return the result, sorted.
2915      return @retVal;      return sort @retVal;
2916  }  }
2917    
2918  =head3 GenomeSubsystemData  =head3 GenomeSubsystemData
# Line 3113  Line 3146 
3146      # Loop through the input triples.      # Loop through the input triples.
3147      my $n = length $sequence;      my $n = length $sequence;
3148      for (my $i = 0; $i < $n; $i += 3) {      for (my $i = 0; $i < $n; $i += 3) {
3149          # Get the current triple from the sequence.          # Get the current triple from the sequence. Note we convert to
3150          my $triple = substr($sequence, $i, 3);          # upper case to insure a match.
3151            my $triple = uc substr($sequence, $i, 3);
3152          # Translate it using the table.          # Translate it using the table.
3153          my $protein = "X";          my $protein = "X";
3154          if (exists $table->{$triple}) { $protein = $table->{$triple}; }          if (exists $table->{$triple}) { $protein = $table->{$triple}; }
# Line 3147  Line 3181 
3181      return @retVal;      return @retVal;
3182  }  }
3183    
3184    =head3 BBHMatrix
3185    
3186    C<< my %bbhMap = $sprout->BBHMatrix($genomeID, $cutoff, @targets); >>
3187    
3188    Find all the bidirectional best hits for the features of a genome in a
3189    specified list of target genomes. The return value will be a hash mapping
3190    features in the original genome to their bidirectional best hits in the
3191    target genomes.
3192    
3193    =over 4
3194    
3195    =item genomeID
3196    
3197    ID of the genome whose features are to be examined for bidirectional best hits.
3198    
3199    =item cutoff
3200    
3201    A cutoff value. Only hits with a score lower than the cutoff will be returned.
3202    
3203    =item targets
3204    
3205    List of target genomes. Only pairs originating in the original
3206    genome and landing in one of the target genomes will be returned.
3207    
3208    =item RETURN
3209    
3210    Returns a hash mapping each feature in the original genome to a hash mapping its
3211    BBH pegs in the target genomes to their scores.
3212    
3213    =back
3214    
3215    =cut
3216    
3217    sub BBHMatrix {
3218        # Get the parameters.
3219        my ($self, $genomeID, $cutoff, @targets) = @_;
3220        # Declare the return variable.
3221        my %retVal = ();
3222        # Ask for the BBHs.
3223        my @bbhList = FIGRules::BatchBBHs("fig|$genomeID.%", $cutoff, @targets);
3224        # We now have a set of 4-tuples that we need to convert into a hash of hashes.
3225        for my $bbhData (@bbhList) {
3226            my ($peg1, $peg2, $score) = @{$bbhData};
3227            if (! exists $retVal{$peg1}) {
3228                $retVal{$peg1} = { $peg2 => $score };
3229            } else {
3230                $retVal{$peg1}->{$peg2} = $score;
3231            }
3232        }
3233        # Return the result.
3234        return %retVal;
3235    }
3236    
3237  =head3 LowBBHs  =head3 LowBBHs
3238    
3239  C<< my %bbhMap = $sprout->LowBBHs($featureID, $cutoff); >>  C<< my %bbhMap = $sprout->LowBBHs($featureID, $cutoff); >>
# Line 3182  Line 3269 
3269      my @bbhList = FIGRules::BBHData($featureID, $cutoff);      my @bbhList = FIGRules::BBHData($featureID, $cutoff);
3270      # Form the results into the return hash.      # Form the results into the return hash.
3271      for my $pair (@bbhList) {      for my $pair (@bbhList) {
3272          $retVal{$pair->[0]} = $pair->[1];          my $fid = $pair->[0];
3273            if ($self->Exists('Feature', $fid)) {
3274                $retVal{$fid} = $pair->[1];
3275            }
3276      }      }
3277      # Return the result.      # Return the result.
3278      return %retVal;      return %retVal;
# Line 3253  Line 3343 
3343      return $retVal;      return $retVal;
3344  }  }
3345    
3346    =head3 IsAllGenomes
3347    
3348    C<< my $flag = $sprout->IsAllGenomes(\@list, \@checkList); >>
3349    
3350    Return TRUE if all genomes in the second list are represented in the first list at
3351    least one. Otherwise, return FALSE. If the second list is omitted, the first list is
3352    compared to a list of all the genomes.
3353    
3354    =over 4
3355    
3356    =item list
3357    
3358    Reference to the list to be compared to the second list.
3359    
3360    =item checkList (optional)
3361    
3362    Reference to the comparison target list. Every genome ID in this list must occur at
3363    least once in the first list. If this parameter is omitted, a list of all the genomes
3364    is used.
3365    
3366    =item RETURN
3367    
3368    Returns TRUE if every item in the second list appears at least once in the
3369    first list, else FALSE.
3370    
3371    =back
3372    
3373    =cut
3374    
3375    sub IsAllGenomes {
3376        # Get the parameters.
3377        my ($self, $list, $checkList) = @_;
3378        # Supply the checklist if it was omitted.
3379        $checkList = [$self->Genomes()] if ! defined($checkList);
3380        # Create a hash of the original list.
3381        my %testList = map { $_ => 1 } @{$list};
3382        # Declare the return variable. We assume that the representation
3383        # is complete and stop at the first failure.
3384        my $retVal = 1;
3385        my $n = scalar @{$checkList};
3386        for (my $i = 0; $retVal && $i < $n; $i++) {
3387            if (! $testList{$checkList->[$i]}) {
3388                $retVal = 0;
3389            }
3390        }
3391        # Return the result.
3392        return $retVal;
3393    }
3394    
3395  =head3 GetGroups  =head3 GetGroups
3396    
3397  C<< my %groups = $sprout->GetGroups(\@groupList); >>  C<< my %groups = $sprout->GetGroups(\@groupList); >>
# Line 3274  Line 3413 
3413          # Here we have a group list. Loop through them individually,          # Here we have a group list. Loop through them individually,
3414          # getting a list of the relevant genomes.          # getting a list of the relevant genomes.
3415          for my $group (@{$groupList}) {          for my $group (@{$groupList}) {
3416              my @genomeIDs = $self->GetFlat(['Genome'], "Genome(group-name) = ?",              my @genomeIDs = $self->GetFlat(['Genome'], "Genome(primary-group) = ?",
3417                  [$group], "Genome(id)");                  [$group], "Genome(id)");
3418              $retVal{$group} = \@genomeIDs;              $retVal{$group} = \@genomeIDs;
3419          }          }
# Line 3282  Line 3421 
3421          # Here we need all of the groups. In this case, we run through all          # Here we need all of the groups. In this case, we run through all
3422          # of the genome records, putting each one found into the appropriate          # of the genome records, putting each one found into the appropriate
3423          # group. Note that we use a filter clause to insure that only genomes          # group. Note that we use a filter clause to insure that only genomes
3424          # in groups are included in the return set.          # in real NMPDR groups are included in the return set.
3425          my @genomes = $self->GetAll(['Genome'], "Genome(group-name) > ' '", [],          my @genomes = $self->GetAll(['Genome'], "Genome(primary-group) <> ?",
3426                                      ['Genome(id)', 'Genome(group-name)']);                                      [$FIG_Config::otherGroup], ['Genome(id)', 'Genome(primary-group)']);
3427          # Loop through the genomes found.          # Loop through the genomes found.
3428          for my $genome (@genomes) {          for my $genome (@genomes) {
3429              # Pop this genome's ID off the current list.              # Pop this genome's ID off the current list.
# Line 3402  Line 3541 
3541      # Get the parameters.      # Get the parameters.
3542      my ($self, $genomeID, $testFlag) = @_;      my ($self, $genomeID, $testFlag) = @_;
3543      # Perform the delete for the genome's features.      # Perform the delete for the genome's features.
3544      my $retVal = $self->Delete('Feature', "fig|$genomeID.%", $testFlag);      my $retVal = $self->Delete('Feature', "fig|$genomeID.%", testMode => $testFlag);
3545      # Perform the delete for the primary genome data.      # Perform the delete for the primary genome data.
3546      my $stats = $self->Delete('Genome', $genomeID, $testFlag);      my $stats = $self->Delete('Genome', $genomeID, testMode => $testFlag);
3547      $retVal->Accumulate($stats);      $retVal->Accumulate($stats);
3548      # Return the result.      # Return the result.
3549      return $retVal;      return $retVal;
# Line 3557  Line 3696 
3696      return %retVal;      return %retVal;
3697  }  }
3698    
3699    =head3 AddProperty
3700    
3701    C<< my  = $sprout->AddProperty($featureID, $key, $value, $url); >>
3702    
3703    Add a new attribute value (Property) to a feature. In the SEED system, attributes can
3704    be added to almost any object. In Sprout, they can only be added to features. In
3705    Sprout, attributes are implemented using I<properties>. A property represents a key/value
3706    pair. If the particular key/value pair coming in is not already in the database, a new
3707    B<Property> record is created to hold it.
3708    
3709    =over 4
3710    
3711    =item peg
3712    
3713    ID of the feature to which the attribute is to be replied.
3714    
3715    =item key
3716    
3717    Name of the attribute (key).
3718    
3719    =item value
3720    
3721    Value of the attribute.
3722    
3723    =item url
3724    
3725    URL or text citation from which the property was obtained.
3726    
3727    =back
3728    
3729    =cut
3730    #: Return Type ;
3731    sub AddProperty {
3732        # Get the parameters.
3733        my ($self, $featureID, $key, $value, $url) = @_;
3734        # Declare the variable to hold the desired property ID.
3735        my $propID;
3736        # Attempt to find a property record for this key/value pair.
3737        my @properties = $self->GetFlat(['Property'],
3738                                       "Property(property-name) = ? AND Property(property-value) = ?",
3739                                       [$key, $value], 'Property(id)');
3740        if (@properties) {
3741            # Here the property is already in the database. We save its ID.
3742            $propID = $properties[0];
3743            # Here the property value does not exist. We need to generate an ID. It will be set
3744            # to a number one greater than the maximum value in the database. This call to
3745            # GetAll will stop after one record.
3746            my @maxProperty = $self->GetAll(['Property'], "ORDER BY Property(id) DESC", [], ['Property(id)'],
3747                                            1);
3748            $propID = $maxProperty[0]->[0] + 1;
3749            # Insert the new property value.
3750            $self->Insert('Property', { 'property-name' => $key, 'property-value' => $value, id => $propID });
3751        }
3752        # Now we connect the incoming feature to the property.
3753        $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });
3754    }
3755    
3756    =head2 Virtual Methods
3757    
3758    =head3 CleanKeywords
3759    
3760    C<< my $cleanedString = $sprout->CleanKeywords($searchExpression); >>
3761    
3762    Clean up a search expression or keyword list. This involves converting the periods
3763    in EC numbers to underscores, converting non-leading minus signs to underscores,
3764    a vertical bar or colon to an apostrophe, and forcing lower case for all alphabetic
3765    characters. In addition, any extra spaces are removed.
3766    
3767    =over 4
3768    
3769    =item searchExpression
3770    
3771    Search expression or keyword list to clean. Note that a search expression may
3772    contain boolean operators which need to be preserved. This includes leading
3773    minus signs.
3774    
3775    =item RETURN
3776    
3777    Cleaned expression or keyword list.
3778    
3779    =back
3780    
3781    =cut
3782    
3783    sub CleanKeywords {
3784        # Get the parameters.
3785        my ($self, $searchExpression) = @_;
3786        # Perform the standard cleanup.
3787        my $retVal = $self->ERDB::CleanKeywords($searchExpression);
3788        # Fix the periods in EC and TC numbers.
3789        $retVal =~ s/(\d+|\-)\.(\d+|-)\.(\d+|-)\.(\d+|-)/$1_$2_$3_$4/g;
3790        # Fix non-trailing periods.
3791        $retVal =~ s/\.(\w)/_$1/g;
3792        # Fix non-leading minus signs.
3793        $retVal =~ s/(\w)[\-]/$1_/g;
3794        # Fix the vertical bars and colons
3795        $retVal =~ s/(\w)[|:](\w)/$1'$2/g;
3796        # Return the result.
3797        return $retVal;
3798    }
3799    
3800  =head2 Internal Utility Methods  =head2 Internal Utility Methods
3801    
3802  =head3 ParseAssignment  =head3 ParseAssignment
# Line 3646  Line 3886 
3886      return $retVal;      return $retVal;
3887  }  }
3888    
 =head3 AddProperty  
   
 C<< my  = $sprout->AddProperty($featureID, $key, $value, $url); >>  
   
 Add a new attribute value (Property) to a feature. In the SEED system, attributes can  
 be added to almost any object. In Sprout, they can only be added to features. In  
 Sprout, attributes are implemented using I<properties>. A property represents a key/value  
 pair. If the particular key/value pair coming in is not already in the database, a new  
 B<Property> record is created to hold it.  
   
 =over 4  
   
 =item peg  
   
 ID of the feature to which the attribute is to be replied.  
   
 =item key  
   
 Name of the attribute (key).  
   
 =item value  
   
 Value of the attribute.  
   
 =item url  
   
 URL or text citation from which the property was obtained.  
   
 =back  
   
 =cut  
 #: Return Type ;  
 sub AddProperty {  
     # Get the parameters.  
     my ($self, $featureID, $key, $value, $url) = @_;  
     # Declare the variable to hold the desired property ID.  
     my $propID;  
     # Attempt to find a property record for this key/value pair.  
     my @properties = $self->GetFlat(['Property'],  
                                    "Property(property-name) = ? AND Property(property-value) = ?",  
                                    [$key, $value], 'Property(id)');  
     if (@properties) {  
         # Here the property is already in the database. We save its ID.  
         $propID = $properties[0];  
         # Here the property value does not exist. We need to generate an ID. It will be set  
         # to a number one greater than the maximum value in the database. This call to  
         # GetAll will stop after one record.  
         my @maxProperty = $self->GetAll(['Property'], "ORDER BY Property(id) DESC", [], ['Property(id)'],  
                                         1);  
         $propID = $maxProperty[0]->[0] + 1;  
         # Insert the new property value.  
         $self->Insert('Property', { 'property-name' => $key, 'property-value' => $value, id => $propID });  
     }  
     # Now we connect the incoming feature to the property.  
     $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });  
 }  
   
3889    
3890  1;  1;

Legend:
Removed from v.1.85  
changed lines
  Added in v.1.96

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3