[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.65, Sun Jun 18 07:20:33 2006 UTC revision 1.78, Wed Jul 26 14:47:03 2006 UTC
# Line 12  Line 12 
12      use DBObject;      use DBObject;
13      use Tracer;      use Tracer;
14      use FIGRules;      use FIGRules;
15        use FidCheck;
16      use Stats;      use Stats;
17      use POSIX qw(strftime);      use POSIX qw(strftime);
18        use BasicLocation;
19    
20  =head1 Sprout Database Manipulation Object  =head1 Sprout Database Manipulation Object
21    
# Line 91  Line 92 
92  sub new {  sub new {
93      # Get the parameters.      # Get the parameters.
94      my ($class, $dbName, $options) = @_;      my ($class, $dbName, $options) = @_;
95        # Compute the DBD directory.
96        my $dbd_dir = (defined($FIG_Config::dbd_dir) ? $FIG_Config::dbd_dir :
97                                                      $FIG_Config::fig );
98      # Compute the options. We do this by starting with a table of defaults and overwriting with      # Compute the options. We do this by starting with a table of defaults and overwriting with
99      # the incoming data.      # the incoming data.
100      my $optionTable = Tracer::GetOptions({      my $optionTable = Tracer::GetOptions({
# Line 98  Line 102 
102                                                          # database type                                                          # database type
103                         dataDir      => $FIG_Config::sproutData,                         dataDir      => $FIG_Config::sproutData,
104                                                          # data file directory                                                          # data file directory
105                         xmlFileName  => "$FIG_Config::fig/SproutDBD.xml",                         xmlFileName  => "$dbd_dir/SproutDBD.xml",
106                                                          # database definition file name                                                          # database definition file name
107                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",
108                                                          # user name and password                                                          # user name and password
# Line 375  Line 379 
379      # Get the genomes.      # Get the genomes.
380      my @genomes = $self->GetAll(['Genome'], $filterString, $params, ['Genome(id)',      my @genomes = $self->GetAll(['Genome'], $filterString, $params, ['Genome(id)',
381                                                                       'Genome(genus)',                                                                       'Genome(genus)',
382                                                                       'Genome(species)']);                                                                       'Genome(species)',
383                                                                         'Genome(unique-characterization)']);
384      # Sort them by name.      # Sort them by name.
385      my @sorted = sort { lc("$a->[1] $a->[2]") cmp lc("$b->[1] $b->[2]") } @genomes;      my @sorted = sort { lc("$a->[1] $a->[2]") cmp lc("$b->[1] $b->[2]") } @genomes;
386      # Loop through the genomes, creating the option tags.      # Loop through the genomes, creating the option tags.
387      for my $genomeData (@sorted) {      for my $genomeData (@sorted) {
388          # Get the data for this genome.          # Get the data for this genome.
389          my ($genomeID, $genus, $species) = @{$genomeData};          my ($genomeID, $genus, $species, $strain) = @{$genomeData};
390          # Get the contig count.          # Get the contig count.
391          my $count = $self->ContigCount($genomeID);          my $count = $self->ContigCount($genomeID);
392          my $counting = ($count == 1 ? "contig" : "contigs");          my $counting = ($count == 1 ? "contig" : "contigs");
393          # Build the option tag.          # Build the option tag.
394          $retVal .= "<option value=\"$genomeID\">$genus $species ($genomeID) [$count $counting]</option>\n";          $retVal .= "<option value=\"$genomeID\">$genus $species $strain ($genomeID) [$count $counting]</option>\n";
395            Trace("Option tag built for $genomeID: $genus $species $strain.") if T(3);
396      }      }
397      # Close the SELECT tag.      # Close the SELECT tag.
398      $retVal .= "</select>\n";      $retVal .= "</select>\n";
# Line 836  Line 842 
842    
843  =over 4  =over 4
844    
845  =genomeID  =item genomeID
846    
847  ID of the genome whose feature count is desired.  ID of the genome whose feature count is desired.
848    
# Line 899  Line 905 
905      # Loop through the annotations.      # Loop through the annotations.
906      while (my $data = $query->Fetch) {      while (my $data = $query->Fetch) {
907          # Get the feature ID and annotation text.          # Get the feature ID and annotation text.
908          my ($fid, $annotation) = $data->Values(['HasFeature(from-link)',          my ($fid, $annotation) = $data->Values(['HasFeature(to-link)',
909                                                  'Annotation(annotation)']);                                                  'Annotation(annotation)']);
910          # Check to see if this is an assignment. Note that the user really          # Check to see if this is an assignment. Note that the user really
911          # doesn't matter to us, other than we use it to determine whether or          # doesn't matter to us, other than we use it to determine whether or
912          # not this is an assignment.          # not this is an assignment.
913          my ($user, $assignment) = $self->_ParseAssignment('fig', $annotation);          my ($user, $assignment) = _ParseAssignment('fig', $annotation);
914          if ($user) {          if ($user) {
915              # Here it's an assignment. We put it in the return hash, overwriting              # Here it's an assignment. We put it in the return hash, overwriting
916              # any older assignment that might be present.              # any older assignment that might be present.
# Line 1653  Line 1659 
1659  sub CoupledFeatures {  sub CoupledFeatures {
1660      # Get the parameters.      # Get the parameters.
1661      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
1662        Trace("Looking for features coupled to $featureID.") if T(coupling => 3);
1663      # Create a query to retrieve the functionally-coupled features.      # Create a query to retrieve the functionally-coupled features.
1664      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],
1665                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);
# Line 1665  Line 1672 
1672          # Get the ID and score of the coupling.          # Get the ID and score of the coupling.
1673          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',
1674                                                          'Coupling(score)']);                                                          'Coupling(score)']);
1675            Trace("$featureID coupled with score $score to ID $couplingID.") if T(coupling => 4);
1676          # Get the other feature that participates in the coupling.          # Get the other feature that participates in the coupling.
1677          my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],          my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],
1678                                             "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",                                             "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",
1679                                             [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');                                             [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');
1680            Trace("$couplingID target feature is $otherFeatureID.") if T(coupling => 4);
1681          # Attach the other feature's score to its ID.          # Attach the other feature's score to its ID.
1682          $retVal{$otherFeatureID} = $score;          $retVal{$otherFeatureID} = $score;
1683          $found = 1;          $found = 1;
# Line 1801  Line 1810 
1810      my ($self, $peg1, $peg2) = @_;      my ($self, $peg1, $peg2) = @_;
1811      # Declare the return values. We'll start with the coupling ID and undefine the      # Declare the return values. We'll start with the coupling ID and undefine the
1812      # flag and score until we have more information.      # flag and score until we have more information.
1813      my ($retVal, $inverted, $score) = (CouplingID($peg1, $peg2), undef, undef);      my ($retVal, $inverted, $score) = ($self->CouplingID($peg1, $peg2), undef, undef);
1814      # Find the coupling data.      # Find the coupling data.
1815      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],
1816                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",
# Line 1822  Line 1831 
1831      return ($retVal, $inverted, $score);      return ($retVal, $inverted, $score);
1832  }  }
1833    
1834    =head3 GetSynonymGroup
1835    
1836    C<< my $id = $sprout->GetSynonymGroup($fid); >>
1837    
1838    Return the synonym group name for the specified feature.
1839    
1840    =over 4
1841    
1842    =item fid
1843    
1844    ID of the feature whose synonym group is desired.
1845    
1846    =item RETURN
1847    
1848    The name of the synonym group to which the feature belongs. If the feature does
1849    not belong to a synonym group, the feature ID itself is returned.
1850    
1851    =back
1852    
1853    =cut
1854    
1855    sub GetSynonymGroup {
1856        # Get the parameters.
1857        my ($self, $fid) = @_;
1858        # Declare the return variable.
1859        my $retVal;
1860        # Find the synonym group.
1861        my @groups = $self->GetFlat(['IsSynonymGroupFor'], "IsSynonymGroupFor(to-link) = ?",
1862                                       [$fid], 'IsSynonymGroupFor(from-link)');
1863        # Check to see if we found anything.
1864        if (@groups) {
1865            $retVal = $groups[0];
1866        } else {
1867            $retVal = $fid;
1868        }
1869        # Return the result.
1870        return $retVal;
1871    }
1872    
1873    =head3 GetBoundaries
1874    
1875    C<< my ($contig, $beg, $end) = $sprout->GetBoundaries(@locList); >>
1876    
1877    Determine the begin and end boundaries for the locations in a list. All of the
1878    locations must belong to the same contig and have mostly the same direction in
1879    order for this method to produce a meaningful result. The resulting
1880    begin/end pair will contain all of the bases in any of the locations.
1881    
1882    =over 4
1883    
1884    =item locList
1885    
1886    List of locations to process.
1887    
1888    =item RETURN
1889    
1890    Returns a 3-tuple consisting of the contig ID, the beginning boundary,
1891    and the ending boundary. The beginning boundary will be left of the
1892    end for mostly-forward locations and right of the end for mostly-backward
1893    locations.
1894    
1895    =back
1896    
1897    =cut
1898    
1899    sub GetBoundaries {
1900        # Get the parameters.
1901        my ($self, @locList) = @_;
1902        # Set up the counters used to determine the most popular direction.
1903        my %counts = ( '+' => 0, '-' => 0 );
1904        # Get the last location and parse it.
1905        my $locObject = BasicLocation->new(pop @locList);
1906        # Prime the loop with its data.
1907        my ($contig, $beg, $end) = ($locObject->Contig, $locObject->Left, $locObject->Right);
1908        # Count its direction.
1909        $counts{$locObject->Dir}++;
1910        # Loop through the remaining locations. Note that in most situations, this loop
1911        # will not iterate at all, because most of the time we will be dealing with a
1912        # singleton list.
1913        for my $loc (@locList) {
1914            # Create a location object.
1915            my $locObject = BasicLocation->new($loc);
1916            # Count the direction.
1917            $counts{$locObject->Dir}++;
1918            # Get the left end and the right end.
1919            my $left = $locObject->Left;
1920            my $right = $locObject->Right;
1921            # Merge them into the return variables.
1922            if ($left < $beg) {
1923                $beg = $left;
1924            }
1925            if ($right > $end) {
1926                $end = $right;
1927            }
1928        }
1929        # If the most common direction is reverse, flip the begin and end markers.
1930        if ($counts{'-'} > $counts{'+'}) {
1931            ($beg, $end) = ($end, $beg);
1932        }
1933        # Return the result.
1934        return ($contig, $beg, $end);
1935    }
1936    
1937  =head3 CouplingID  =head3 CouplingID
1938    
1939  C<< my $couplingID = Sprout::CouplingID($peg1, $peg2); >>  C<< my $couplingID = $sprout->CouplingID($peg1, $peg2); >>
1940    
1941  Return the coupling ID for a pair of feature IDs.  Return the coupling ID for a pair of feature IDs.
1942    
# Line 1857  Line 1969 
1969  =cut  =cut
1970  #: Return Type $;  #: Return Type $;
1971  sub CouplingID {  sub CouplingID {
1972      return join " ", sort @_;      my ($self, @pegs) = @_;
1973        return $self->DigestKey(join " ", sort @pegs);
1974  }  }
1975    
1976  =head3 ReadFasta  =head3 ReadFasta
# Line 2216  Line 2329 
2329      return @retVal;      return @retVal;
2330  }  }
2331    
 =head3 Exists  
   
 C<< my $found = $sprout->Exists($entityName, $entityID); >>  
   
 Return TRUE if an entity exists, else FALSE.  
   
 =over 4  
   
 =item entityName  
   
 Name of the entity type (e.g. C<Feature>) relevant to the existence check.  
   
 =item entityID  
   
 ID of the entity instance whose existence is to be checked.  
   
 =item RETURN  
   
 Returns TRUE if the entity instance exists, else FALSE.  
   
 =back  
   
 =cut  
 #: Return Type $;  
 sub Exists {  
     # Get the parameters.  
     my ($self, $entityName, $entityID) = @_;  
     # Check for the entity instance.  
     Trace("Checking existence of $entityName with ID=$entityID.") if T(4);  
     my $testInstance = $self->GetEntity($entityName, $entityID);  
     # Return an existence indicator.  
     my $retVal = ($testInstance ? 1 : 0);  
     return $retVal;  
 }  
   
2332  =head3 FeatureTranslation  =head3 FeatureTranslation
2333    
2334  C<< my $translation = $sprout->FeatureTranslation($featureID); >>  C<< my $translation = $sprout->FeatureTranslation($featureID); >>
# Line 2788  Line 2866 
2866      return @retVal;      return @retVal;
2867  }  }
2868    
   
   
2869  =head3 RelatedFeatures  =head3 RelatedFeatures
2870    
2871  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>
# Line 3041  Line 3117 
3117      return %retVal;      return %retVal;
3118  }  }
3119    
3120    =head3 Sims
3121    
3122    C<< my $simList = $sprout->Sims($fid, $maxN, $maxP, $select, $max_expand, $filters); >>
3123    
3124    Get a list of similarities for a specified feature. Similarity information is not kept in the
3125    Sprout database; rather, they are retrieved from a network server. The similarities are
3126    returned as B<Sim> objects. A Sim object is actually a list reference that has been blessed
3127    so that its elements can be accessed by name.
3128    
3129    Similarities can be either raw or expanded. The raw similarities are basic
3130    hits between features with similar DNA. Expanding a raw similarity drags in any
3131    features considered substantially identical. So, for example, if features B<A1>,
3132    B<A2>, and B<A3> are all substatially identical to B<A>, then a raw similarity
3133    B<[C,A]> would be expanded to B<[C,A] [C,A1] [C,A2] [C,A3]>.
3134    
3135    =over 4
3136    
3137    =item fid
3138    
3139    ID of the feature whose similarities are desired.
3140    
3141    =item maxN
3142    
3143    Maximum number of similarities to return.
3144    
3145    =item maxP
3146    
3147    Minumum allowable similarity score.
3148    
3149    =item select
3150    
3151    Selection criterion: C<raw> means only raw similarities are returned; C<fig>
3152    means only similarities to FIG features are returned; C<all> means all expanded
3153    similarities are returned; and C<figx> means similarities are expanded until the
3154    number of FIG features equals the maximum.
3155    
3156    =item max_expand
3157    
3158    The maximum number of features to expand.
3159    
3160    =item filters
3161    
3162    Reference to a hash containing filter information, or a subroutine that can be
3163    used to filter the sims.
3164    
3165    =item RETURN
3166    
3167    Returns a reference to a list of similarity objects, or C<undef> if an error
3168    occurred.
3169    
3170    =back
3171    
3172    =cut
3173    
3174    sub Sims {
3175        # Get the parameters.
3176        my ($self, $fid, $maxN, $maxP, $select, $max_expand, $filters) = @_;
3177        # Create the shim object to test for deleted FIDs.
3178        my $shim = FidCheck->new($self);
3179        # Ask the network for sims.
3180        my $retVal = FIGRules::GetNetworkSims($shim, $fid, {}, $maxN, $maxP, $select, $max_expand, $filters);
3181        # Return the result.
3182        return $retVal;
3183    }
3184    
3185  =head3 GetGroups  =head3 GetGroups
3186    
3187  C<< my %groups = $sprout->GetGroups(\@groupList); >>  C<< my %groups = $sprout->GetGroups(\@groupList); >>

Legend:
Removed from v.1.65  
changed lines
  Added in v.1.78

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3