[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.92, Mon Oct 16 07:41:50 2006 UTC revision 1.98, Tue Apr 10 06:13:33 2007 UTC
# Line 5  Line 5 
5      @ISA = qw(Exporter ERDB);      @ISA = qw(Exporter ERDB);
6      use Data::Dumper;      use Data::Dumper;
7      use strict;      use strict;
     use Carp;  
8      use DBKernel;      use DBKernel;
9      use XML::Simple;      use XML::Simple;
10      use DBQuery;      use DBQuery;
11      use DBObject;      use ERDBObject;
12      use Tracer;      use Tracer;
13      use FIGRules;      use FIGRules;
14      use FidCheck;      use FidCheck;
# Line 921  Line 920 
920      my ($self, $genomeID) = @_;      my ($self, $genomeID) = @_;
921      # Declare the return variable.      # Declare the return variable.
922      my $retVal = {};      my $retVal = {};
923      # Query the genome's features and annotations. We'll put the oldest annotations      # Query the genome's features.
924      # first so that the last assignment to go into the hash will be the correct one.      my $query = $self->Get(['HasFeature', 'Feature'], "HasFeature(from-link) = ?",
     my $query = $self->Get(['HasFeature', 'IsTargetOfAnnotation', 'Annotation'],  
                            "HasFeature(from-link) = ? ORDER BY Annotation(time)",  
925                             [$genomeID]);                             [$genomeID]);
926      # Loop through the annotations.      # Loop through the features.
927      while (my $data = $query->Fetch) {      while (my $data = $query->Fetch) {
928          # Get the feature ID and annotation text.          # Get the feature ID and assignment.
929          my ($fid, $annotation) = $data->Values(['HasFeature(to-link)',          my ($fid, $assignment) = $data->Values(['Feature(id)', 'Feature(assignment)']);
930                                                  'Annotation(annotation)']);          if ($assignment) {
         # Check to see if this is an assignment. Note that the user really  
         # doesn't matter to us, other than we use it to determine whether or  
         # not this is an assignment.  
         my ($user, $assignment) = _ParseAssignment('fig', $annotation);  
         if ($user) {  
             # Here it's an assignment. We put it in the return hash, overwriting  
             # any older assignment that might be present.  
931              $retVal->{$fid} = $assignment;              $retVal->{$fid} = $assignment;
932          }          }
933      }      }
# Line 1299  Line 1289 
1289  Return the most recently-determined functional assignment of a particular feature.  Return the most recently-determined functional assignment of a particular feature.
1290    
1291  The functional assignment is handled differently depending on the type of feature. If  The functional assignment is handled differently depending on the type of feature. If
1292  the feature is identified by a FIG ID (begins with the string C<fig|>), then a functional  the feature is identified by a FIG ID (begins with the string C<fig|>), then the functional
1293  assignment is a type of annotation. The format of an assignment is described in  assignment is taken from the B<Feature> or C<Annotation> table, depending.
 L</ParseAssignment>. Its worth noting that we cannot filter on the content of the  
 annotation itself because it's a text field; however, this is not a big problem because  
 most features only have a small number of annotations.  
1294    
1295  Each user has an associated list of trusted users. The assignment returned will be the most  Each user has an associated list of trusted users. The assignment returned will be the most
1296  recent one by at least one of the trusted users. If no trusted user list is available, then  recent one by at least one of the trusted users. If no trusted user list is available, then
# Line 1322  Line 1309 
1309    
1310  =item userID (optional)  =item userID (optional)
1311    
1312  ID of the user whose function determination is desired. If omitted, only the latest  ID of the user whose function determination is desired. If omitted, the primary
1313  C<FIG> assignment will be returned.  functional assignment in the B<Feature> table will be returned.
1314    
1315  =item RETURN  =item RETURN
1316    
# Line 1340  Line 1327 
1327      my $retVal;      my $retVal;
1328      # Determine the ID type.      # Determine the ID type.
1329      if ($featureID =~ m/^fig\|/) {      if ($featureID =~ m/^fig\|/) {
1330          # Here we have a FIG feature ID. We must build the list of trusted          # Here we have a FIG feature ID.
1331          # users.          if (!$userID) {
1332                # Use the primary assignment.
1333                ($retVal) = $self->GetEntityValues('Feature', $featureID, ['Feature(assignment)']);
1334            } else {
1335                # We must build the list of trusted users.
1336          my %trusteeTable = ();          my %trusteeTable = ();
1337          # Check the user ID.          # Check the user ID.
1338          if (!$userID) {          if (!$userID) {
# Line 1384  Line 1375 
1375                  }                  }
1376              }              }
1377          }          }
1378            }
1379      } else {      } else {
1380          # Here we have a non-FIG feature ID. In this case the user ID does not          # Here we have a non-FIG feature ID. In this case the user ID does not
1381          # matter. We simply get the information from the External Alias Function          # matter. We simply get the information from the External Alias Function
# Line 1504  Line 1496 
1496          # Peel off the BBHs found.          # Peel off the BBHs found.
1497          my @found = ();          my @found = ();
1498          for my $bbh (@bbhData) {          for my $bbh (@bbhData) {
1499              push @found, $bbh->[0];              my $fid = $bbh->[0];
1500                my $bbGenome = $self->GenomeOf($fid);
1501                if ($bbGenome eq $genomeID) {
1502                    push @found, $fid;
1503                }
1504          }          }
1505          $retVal{$featureID} = \@found;          $retVal{$featureID} = \@found;
1506      }      }
# Line 3184  Line 3180 
3180      return @retVal;      return @retVal;
3181  }  }
3182    
3183    =head3 BBHMatrix
3184    
3185    C<< my %bbhMap = $sprout->BBHMatrix($genomeID, $cutoff, @targets); >>
3186    
3187    Find all the bidirectional best hits for the features of a genome in a
3188    specified list of target genomes. The return value will be a hash mapping
3189    features in the original genome to their bidirectional best hits in the
3190    target genomes.
3191    
3192    =over 4
3193    
3194    =item genomeID
3195    
3196    ID of the genome whose features are to be examined for bidirectional best hits.
3197    
3198    =item cutoff
3199    
3200    A cutoff value. Only hits with a score lower than the cutoff will be returned.
3201    
3202    =item targets
3203    
3204    List of target genomes. Only pairs originating in the original
3205    genome and landing in one of the target genomes will be returned.
3206    
3207    =item RETURN
3208    
3209    Returns a hash mapping each feature in the original genome to a hash mapping its
3210    BBH pegs in the target genomes to their scores.
3211    
3212    =back
3213    
3214    =cut
3215    
3216    sub BBHMatrix {
3217        # Get the parameters.
3218        my ($self, $genomeID, $cutoff, @targets) = @_;
3219        # Declare the return variable.
3220        my %retVal = ();
3221        # Ask for the BBHs.
3222        my @bbhList = FIGRules::BatchBBHs("fig|$genomeID.%", $cutoff, @targets);
3223        # We now have a set of 4-tuples that we need to convert into a hash of hashes.
3224        for my $bbhData (@bbhList) {
3225            my ($peg1, $peg2, $score) = @{$bbhData};
3226            if (! exists $retVal{$peg1}) {
3227                $retVal{$peg1} = { $peg2 => $score };
3228            } else {
3229                $retVal{$peg1}->{$peg2} = $score;
3230            }
3231        }
3232        # Return the result.
3233        return %retVal;
3234    }
3235    
3236    
3237    =head3 SimMatrix
3238    
3239    C<< my %simMap = $sprout->SimMatrix($genomeID, $cutoff, @targets); >>
3240    
3241    Find all the similarities for the features of a genome in a
3242    specified list of target genomes. The return value will be a hash mapping
3243    features in the original genome to their similarites in the
3244    target genomes.
3245    
3246    =over 4
3247    
3248    =item genomeID
3249    
3250    ID of the genome whose features are to be examined for similarities.
3251    
3252    =item cutoff
3253    
3254    A cutoff value. Only hits with a score lower than the cutoff will be returned.
3255    
3256    =item targets
3257    
3258    List of target genomes. Only pairs originating in the original
3259    genome and landing in one of the target genomes will be returned.
3260    
3261    =item RETURN
3262    
3263    Returns a hash mapping each feature in the original genome to a hash mapping its
3264    similar pegs in the target genomes to their scores.
3265    
3266    =back
3267    
3268    =cut
3269    
3270    sub SimMatrix {
3271        # Get the parameters.
3272        my ($self, $genomeID, $cutoff, @targets) = @_;
3273        # Declare the return variable.
3274        my %retVal = ();
3275        # Get the list of features in the source organism.
3276        my @fids = $self->FeaturesOf($genomeID);
3277        # Ask for the sims. We only want similarities to fig features.
3278        my $simList = FIGRules::GetNetworkSims($self, \@fids, {}, 1000, $cutoff, "fig");
3279        if (! defined $simList) {
3280            Confess("Unable to retrieve similarities from server.");
3281        } else {
3282            Trace("Processing sims.") if T(3);
3283            # We now have a set of sims that we need to convert into a hash of hashes. First, we
3284            # Create a hash for the target genomes.
3285            my %targetHash = map { $_ => 1 } @targets;
3286            for my $simData (@{$simList}) {
3287                # Get the PEGs and the score.
3288                my ($peg1, $peg2, $score) = ($simData->id1, $simData->id2, $simData->psc);
3289                # Insure the second ID is in the target list.
3290                my ($genome2) = FIGRules::ParseFeatureID($peg2);
3291                if (exists $targetHash{$genome2}) {
3292                    # Here it is. Now we need to add it to the return hash. How we do that depends
3293                    # on whether or not $peg1 is new to us.
3294                    if (! exists $retVal{$peg1}) {
3295                        $retVal{$peg1} = { $peg2 => $score };
3296                    } else {
3297                        $retVal{$peg1}->{$peg2} = $score;
3298                    }
3299                }
3300            }
3301        }
3302        # Return the result.
3303        return %retVal;
3304    }
3305    
3306    
3307  =head3 LowBBHs  =head3 LowBBHs
3308    
3309  C<< my %bbhMap = $sprout->LowBBHs($featureID, $cutoff); >>  C<< my %bbhMap = $sprout->LowBBHs($featureID, $cutoff); >>
# Line 3219  Line 3339 
3339      my @bbhList = FIGRules::BBHData($featureID, $cutoff);      my @bbhList = FIGRules::BBHData($featureID, $cutoff);
3340      # Form the results into the return hash.      # Form the results into the return hash.
3341      for my $pair (@bbhList) {      for my $pair (@bbhList) {
3342          $retVal{$pair->[0]} = $pair->[1];          my $fid = $pair->[0];
3343            if ($self->Exists('Feature', $fid)) {
3344                $retVal{$fid} = $pair->[1];
3345            }
3346      }      }
3347      # Return the result.      # Return the result.
3348      return %retVal;      return %retVal;
# Line 3237  Line 3360 
3360  Similarities can be either raw or expanded. The raw similarities are basic  Similarities can be either raw or expanded. The raw similarities are basic
3361  hits between features with similar DNA. Expanding a raw similarity drags in any  hits between features with similar DNA. Expanding a raw similarity drags in any
3362  features considered substantially identical. So, for example, if features B<A1>,  features considered substantially identical. So, for example, if features B<A1>,
3363  B<A2>, and B<A3> are all substatially identical to B<A>, then a raw similarity  B<A2>, and B<A3> are all substantially identical to B<A>, then a raw similarity
3364  B<[C,A]> would be expanded to B<[C,A] [C,A1] [C,A2] [C,A3]>.  B<[C,A]> would be expanded to B<[C,A] [C,A1] [C,A2] [C,A3]>.
3365    
3366  =over 4  =over 4
# Line 3488  Line 3611 
3611      # Get the parameters.      # Get the parameters.
3612      my ($self, $genomeID, $testFlag) = @_;      my ($self, $genomeID, $testFlag) = @_;
3613      # Perform the delete for the genome's features.      # Perform the delete for the genome's features.
3614      my $retVal = $self->Delete('Feature', "fig|$genomeID.%", $testFlag);      my $retVal = $self->Delete('Feature', "fig|$genomeID.%", testMode => $testFlag);
3615      # Perform the delete for the primary genome data.      # Perform the delete for the primary genome data.
3616      my $stats = $self->Delete('Genome', $genomeID, $testFlag);      my $stats = $self->Delete('Genome', $genomeID, testMode => $testFlag);
3617      $retVal->Accumulate($stats);      $retVal->Accumulate($stats);
3618      # Return the result.      # Return the result.
3619      return $retVal;      return $retVal;
# Line 3687  Line 3810 
3810      if (@properties) {      if (@properties) {
3811          # Here the property is already in the database. We save its ID.          # Here the property is already in the database. We save its ID.
3812          $propID = $properties[0];          $propID = $properties[0];
3813        } else {
3814          # Here the property value does not exist. We need to generate an ID. It will be set          # Here the property value does not exist. We need to generate an ID. It will be set
3815          # to a number one greater than the maximum value in the database. This call to          # to a number one greater than the maximum value in the database. This call to
3816          # GetAll will stop after one record.          # GetAll will stop after one record.

Legend:
Removed from v.1.92  
changed lines
  Added in v.1.98

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3