[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.96, Wed Dec 6 03:37:26 2006 UTC revision 1.99, Fri Apr 27 22:21:46 2007 UTC
# Line 5  Line 5 
5      @ISA = qw(Exporter ERDB);      @ISA = qw(Exporter ERDB);
6      use Data::Dumper;      use Data::Dumper;
7      use strict;      use strict;
     use Carp;  
8      use DBKernel;      use DBKernel;
9      use XML::Simple;      use XML::Simple;
10      use DBQuery;      use DBQuery;
11      use DBObject;      use ERDBObject;
12      use Tracer;      use Tracer;
13      use FIGRules;      use FIGRules;
14      use FidCheck;      use FidCheck;
15      use Stats;      use Stats;
16      use POSIX qw(strftime);      use POSIX qw(strftime);
17      use BasicLocation;      use BasicLocation;
18        use CustomAttributes;
19        use RemoteCustomAttributes;
20    
21  =head1 Sprout Database Manipulation Object  =head1 Sprout Database Manipulation Object
22    
# Line 133  Line 134 
134      $retVal->{_xmlName} = $xmlFileName;      $retVal->{_xmlName} = $xmlFileName;
135      # Set up space for the group file data.      # Set up space for the group file data.
136      $retVal->{groupHash} = undef;      $retVal->{groupHash} = undef;
137        # Connect to the attributes.
138        if ($FIG_Config::attrURL) {
139            Trace("Remote attribute server $FIG_Config::attrURL chosen.") if T(3);
140            $retVal->{_ca} = RemoteCustomAttributes->new($FIG_Config::attrURL);
141        } elsif ($FIG_Config::attrDbName) {
142            Trace("Local attribute database $FIG_Config::attrDbName chosen.") if T(3);
143            my $user = ($FIG_Config::arch eq 'win' ? 'self' : scalar(getpwent()));
144            $retVal->{_ca} = CustomAttributes->new(user => $user);
145        }
146      # Return it.      # Return it.
147      return $retVal;      return $retVal;
148  }  }
# Line 2533  Line 2543 
2543    
2544  Return a list of the properties with the specified characteristics.  Return a list of the properties with the specified characteristics.
2545    
2546  Properties are arbitrary key-value pairs associated with a feature. (At some point they  Properties are the Sprout analog of the FIG attributes. The call is
2547  will also be associated with genomes.) A property value is represented by a 4-tuple of  passed directly to the CustomAttributes or RemoteCustomAttributes object
2548  the form B<($fid, $key, $value, $url)>. These exactly correspond to the parameter  contained in this object.
2549    
2550  =over 4  This method returns a series of tuples that match the specified criteria. Each tuple
2551    will contain an object ID, a key, and one or more values. The parameters to this
2552    method therefore correspond structurally to the values expected in each tuple. In
2553    addition, you can ask for a generic search by suffixing a percent sign (C<%>) to any
2554    of the parameters. So, for example,
2555    
2556  =item fid      my @attributeList = $sprout->GetProperties('fig|100226.1.peg.1004', 'structure%', 1, 2);
2557    
2558  ID of the feature possessing the property.  would return something like
2559    
2560  =item key      ['fig}100226.1.peg.1004', 'structure', 1, 2]
2561        ['fig}100226.1.peg.1004', 'structure1', 1, 2]
2562        ['fig}100226.1.peg.1004', 'structure2', 1, 2]
2563        ['fig}100226.1.peg.1004', 'structureA', 1, 2]
2564    
2565  Name or key of the property.  Use of C<undef> in any position acts as a wild card (all values). You can also specify
2566    a list reference in the ID column. Thus,
2567    
2568  =item value      my @attributeList = $sprout->GetProperties(['100226.1', 'fig|100226.1.%'], 'PUBMED');
2569    
2570  Value of the property.  would get the PUBMED attribute data for Streptomyces coelicolor A3(2) and all its
2571    features.
2572    
2573  =item url  In addition to values in multiple sections, a single attribute key can have multiple
2574    values, so even
2575    
2576  URL of the document that indicated the property should have this particular value, or an      my @attributeList = $sprout->GetProperties($peg, 'virulent');
 empty string if no such document exists.  
2577    
2578  =back  which has no wildcard in the key or the object ID, may return multiple tuples.
2579    
2580  The parameters act as a filter for the desired data. Any non-null parameter will  =over 4
2581  automatically match all the tuples returned. So, specifying just the I<$fid> will  
2582  return all the properties of the specified feature; similarly, specifying the I<$key>  =item objectID
 and I<$value> parameters will return all the features having the specified property  
 value.  
2583    
2584  A single property key can have many values, representing different ideas about the  ID of object whose attributes are desired. If the attributes are desired for multiple
2585  feature in question. For example, one paper may declare that a feature C<fig|83333.1.peg.10> is  objects, this parameter can be specified as a list reference. If the attributes are
2586  virulent, and another may declare that it is not virulent. A query about the virulence of  desired for all objects, specify C<undef> or an empty string. Finally, you can specify
2587  C<fig|83333.1.peg.10> would be coded as  attributes for a range of object IDs by putting a percent sign (C<%>) at the end.
2588    
2589      my @list = $sprout->GetProperties('fig|83333.1.peg.10', 'virulence', '', '');  =item key
2590    
2591  Here the I<$value> and I<$url> fields are left blank, indicating that those fields are  Attribute key name. A value of C<undef> or an empty string will match all
2592  not to be filtered. The tuples returned would be  attribute keys. If the values are desired for multiple keys, this parameter can be
2593    specified as a list reference. Finally, you can specify attributes for a range of
2594    keys by putting a percent sign (C<%>) at the end.
2595    
2596    =item values
2597    
2598    List of the desired attribute values, section by section. If C<undef>
2599    or an empty string is specified, all values in that section will match. A
2600    generic match can be requested by placing a percent sign (C<%>) at the end.
2601    In that case, all values that match up to and not including the percent sign
2602    will match. You may also specify a regular expression enclosed
2603    in slashes. All values that match the regular expression will be returned. For
2604    performance reasons, only values have this extra capability.
2605    
2606      ('fig|83333.1.peg.10', 'virulence', 'yes', 'http://www.somewhere.edu/first.paper.pdf')  =item RETURN
2607      ('fig|83333.1.peg.10', 'virulence', 'no', 'http://www.somewhere.edu/second.paper.pdf')  
2608    Returns a list of tuples. The first element in the tuple is an object ID, the
2609    second is an attribute key, and the remaining elements are the sections of
2610    the attribute value. All of the tuples will match the criteria set forth in
2611    the parameter list.
2612    
2613    =back
2614    
2615  =cut  =cut
2616  #: Return Type @@;  
2617  sub GetProperties {  sub GetProperties {
2618      # Get the parameters.      # Get the parameters.
2619      my ($self, @parms) = @_;      my ($self, @parms) = @_;
2620      # Declare the return variable.      # Declare the return variable.
2621      my @retVal = ();      my @retVal = $self->{_ca}->GetAttributes(@parms);
     # Now we need to create a WHERE clause that will get us the data we want. First,  
     # we create a list of the columns containing the data for each parameter.  
     my @colNames = ('HasProperty(from-link)', 'Property(property-name)',  
                     'Property(property-value)', 'HasProperty(evidence)');  
     # Now we build the WHERE clause and the list of parameter values.  
     my @where = ();  
     my @values = ();  
     for (my $i = 0; $i <= $#colNames; $i++) {  
         my $parm = $parms[$i];  
         if (defined $parm && ($parm ne '')) {  
             push @where, "$colNames[$i] = ?";  
             push @values, $parm;  
         }  
     }  
     # Format the WHERE clause.  
     my $filter = (@values > 0 ? (join " AND ", @where) : undef);  
     # Ask for all the propertie values with the desired characteristics.  
     my $query = $self->Get(['HasProperty', 'Property'], $filter, \@values);  
     while (my $valueObject = $query->Fetch()) {  
         my @tuple = $valueObject->Values(\@colNames);  
         push @retVal, \@tuple;  
     }  
2622      # Return the result.      # Return the result.
2623      return @retVal;      return @retVal;
2624  }  }
# Line 2618  Line 2631 
2631  that specify special characteristics of the feature. For example, a property could indicate  that specify special characteristics of the feature. For example, a property could indicate
2632  that a feature is essential to the survival of the organism or that it has benign influence  that a feature is essential to the survival of the organism or that it has benign influence
2633  on the activities of a pathogen. Each property is returned as a triple of the form  on the activities of a pathogen. Each property is returned as a triple of the form
2634  C<($key,$value,$url)>, where C<$key> is the property name, C<$value> is its value (commonly  C<($key,@values)>, where C<$key> is the property name and  C<@values> are its values.
 a 1 or a 0, but possibly a string or a floating-point value), and C<$url> is a string describing  
 the web address or citation in which the property's value for the feature was identified.  
2635    
2636  =over 4  =over 4
2637    
# Line 2630  Line 2641 
2641    
2642  =item RETURN  =item RETURN
2643    
2644  Returns a list of triples, each triple containing the property name, its value, and a URL or  Returns a list of tuples, each tuple containing the property name and its values.
 citation.  
2645    
2646  =back  =back
2647    
# Line 2641  Line 2651 
2651      # Get the parameters.      # Get the parameters.
2652      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
2653      # Get the properties.      # Get the properties.
2654      my @retVal = $self->GetAll(['HasProperty', 'Property'], "HasProperty(from-link) = ?", [$featureID],      my @attributes = $self->{_ca}->GetAttributes($featureID);
2655                              ['Property(property-name)', 'Property(property-value)',      # Strip the feature ID off each tuple.
2656                               'HasProperty(evidence)']);      my @retVal = ();
2657        for my $attributeRow (@attributes) {
2658            shift @{$attributeRow};
2659            push @retVal, $attributeRow;
2660        }
2661      # Return the resulting list.      # Return the resulting list.
2662      return @retVal;      return @retVal;
2663  }  }
# Line 2681  Line 2695 
2695  C<< my $id = $sprout->PropertyID($propName, $propValue); >>  C<< my $id = $sprout->PropertyID($propName, $propValue); >>
2696    
2697  Return the ID of the specified property name and value pair, if the  Return the ID of the specified property name and value pair, if the
2698  pair exists.  pair exists. Only a small subset of the FIG attributes are stored as
2699    Sprout properties, mostly for use in search optimization.
2700    
2701  =over 4  =over 4
2702    
# Line 3234  Line 3249 
3249      return %retVal;      return %retVal;
3250  }  }
3251    
3252    
3253    =head3 SimMatrix
3254    
3255    C<< my %simMap = $sprout->SimMatrix($genomeID, $cutoff, @targets); >>
3256    
3257    Find all the similarities for the features of a genome in a
3258    specified list of target genomes. The return value will be a hash mapping
3259    features in the original genome to their similarites in the
3260    target genomes.
3261    
3262    =over 4
3263    
3264    =item genomeID
3265    
3266    ID of the genome whose features are to be examined for similarities.
3267    
3268    =item cutoff
3269    
3270    A cutoff value. Only hits with a score lower than the cutoff will be returned.
3271    
3272    =item targets
3273    
3274    List of target genomes. Only pairs originating in the original
3275    genome and landing in one of the target genomes will be returned.
3276    
3277    =item RETURN
3278    
3279    Returns a hash mapping each feature in the original genome to a hash mapping its
3280    similar pegs in the target genomes to their scores.
3281    
3282    =back
3283    
3284    =cut
3285    
3286    sub SimMatrix {
3287        # Get the parameters.
3288        my ($self, $genomeID, $cutoff, @targets) = @_;
3289        # Declare the return variable.
3290        my %retVal = ();
3291        # Get the list of features in the source organism.
3292        my @fids = $self->FeaturesOf($genomeID);
3293        # Ask for the sims. We only want similarities to fig features.
3294        my $simList = FIGRules::GetNetworkSims($self, \@fids, {}, 1000, $cutoff, "fig");
3295        if (! defined $simList) {
3296            Confess("Unable to retrieve similarities from server.");
3297        } else {
3298            Trace("Processing sims.") if T(3);
3299            # We now have a set of sims that we need to convert into a hash of hashes. First, we
3300            # Create a hash for the target genomes.
3301            my %targetHash = map { $_ => 1 } @targets;
3302            for my $simData (@{$simList}) {
3303                # Get the PEGs and the score.
3304                my ($peg1, $peg2, $score) = ($simData->id1, $simData->id2, $simData->psc);
3305                # Insure the second ID is in the target list.
3306                my ($genome2) = FIGRules::ParseFeatureID($peg2);
3307                if (exists $targetHash{$genome2}) {
3308                    # Here it is. Now we need to add it to the return hash. How we do that depends
3309                    # on whether or not $peg1 is new to us.
3310                    if (! exists $retVal{$peg1}) {
3311                        $retVal{$peg1} = { $peg2 => $score };
3312                    } else {
3313                        $retVal{$peg1}->{$peg2} = $score;
3314                    }
3315                }
3316            }
3317        }
3318        # Return the result.
3319        return %retVal;
3320    }
3321    
3322    
3323  =head3 LowBBHs  =head3 LowBBHs
3324    
3325  C<< my %bbhMap = $sprout->LowBBHs($featureID, $cutoff); >>  C<< my %bbhMap = $sprout->LowBBHs($featureID, $cutoff); >>
# Line 3290  Line 3376 
3376  Similarities can be either raw or expanded. The raw similarities are basic  Similarities can be either raw or expanded. The raw similarities are basic
3377  hits between features with similar DNA. Expanding a raw similarity drags in any  hits between features with similar DNA. Expanding a raw similarity drags in any
3378  features considered substantially identical. So, for example, if features B<A1>,  features considered substantially identical. So, for example, if features B<A1>,
3379  B<A2>, and B<A3> are all substatially identical to B<A>, then a raw similarity  B<A2>, and B<A3> are all substantially identical to B<A>, then a raw similarity
3380  B<[C,A]> would be expanded to B<[C,A] [C,A1] [C,A2] [C,A3]>.  B<[C,A]> would be expanded to B<[C,A] [C,A1] [C,A2] [C,A3]>.
3381    
3382  =over 4  =over 4
# Line 3698  Line 3784 
3784    
3785  =head3 AddProperty  =head3 AddProperty
3786    
3787  C<< my  = $sprout->AddProperty($featureID, $key, $value, $url); >>  C<< my  = $sprout->AddProperty($featureID, $key, @values); >>
3788    
3789  Add a new attribute value (Property) to a feature. In the SEED system, attributes can  Add a new attribute value (Property) to a feature.
 be added to almost any object. In Sprout, they can only be added to features. In  
 Sprout, attributes are implemented using I<properties>. A property represents a key/value  
 pair. If the particular key/value pair coming in is not already in the database, a new  
 B<Property> record is created to hold it.  
3790    
3791  =over 4  =over 4
3792    
3793  =item peg  =item peg
3794    
3795  ID of the feature to which the attribute is to be replied.  ID of the feature to which the attribute is to be added.
3796    
3797  =item key  =item key
3798    
3799  Name of the attribute (key).  Name of the attribute (key).
3800    
3801  =item value  =item values
3802    
3803  Value of the attribute.  Values of the attribute.
   
 =item url  
   
 URL or text citation from which the property was obtained.  
3804    
3805  =back  =back
3806    
# Line 3730  Line 3808 
3808  #: Return Type ;  #: Return Type ;
3809  sub AddProperty {  sub AddProperty {
3810      # Get the parameters.      # Get the parameters.
3811      my ($self, $featureID, $key, $value, $url) = @_;      my ($self, $featureID, $key, @values) = @_;
3812      # Declare the variable to hold the desired property ID.      # Add the property using the attached attributes object.
3813      my $propID;      $self->{_ca}->AddAttribute($featureID, $key, @values);
     # Attempt to find a property record for this key/value pair.  
     my @properties = $self->GetFlat(['Property'],  
                                    "Property(property-name) = ? AND Property(property-value) = ?",  
                                    [$key, $value], 'Property(id)');  
     if (@properties) {  
         # Here the property is already in the database. We save its ID.  
         $propID = $properties[0];  
         # Here the property value does not exist. We need to generate an ID. It will be set  
         # to a number one greater than the maximum value in the database. This call to  
         # GetAll will stop after one record.  
         my @maxProperty = $self->GetAll(['Property'], "ORDER BY Property(id) DESC", [], ['Property(id)'],  
                                         1);  
         $propID = $maxProperty[0]->[0] + 1;  
         # Insert the new property value.  
         $self->Insert('Property', { 'property-name' => $key, 'property-value' => $value, id => $propID });  
     }  
     # Now we connect the incoming feature to the property.  
     $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });  
3814  }  }
3815    
3816  =head2 Virtual Methods  =head2 Virtual Methods

Legend:
Removed from v.1.96  
changed lines
  Added in v.1.99

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3