[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.63, Sun Jun 18 07:03:00 2006 UTC revision 1.79, Mon Jul 31 20:07:06 2006 UTC
# Line 12  Line 12 
12      use DBObject;      use DBObject;
13      use Tracer;      use Tracer;
14      use FIGRules;      use FIGRules;
15        use FidCheck;
16      use Stats;      use Stats;
17      use POSIX qw(strftime);      use POSIX qw(strftime);
18        use BasicLocation;
19    
20  =head1 Sprout Database Manipulation Object  =head1 Sprout Database Manipulation Object
21    
# Line 91  Line 92 
92  sub new {  sub new {
93      # Get the parameters.      # Get the parameters.
94      my ($class, $dbName, $options) = @_;      my ($class, $dbName, $options) = @_;
95        # Compute the DBD directory.
96        my $dbd_dir = (defined($FIG_Config::dbd_dir) ? $FIG_Config::dbd_dir :
97                                                      $FIG_Config::fig );
98      # Compute the options. We do this by starting with a table of defaults and overwriting with      # Compute the options. We do this by starting with a table of defaults and overwriting with
99      # the incoming data.      # the incoming data.
100      my $optionTable = Tracer::GetOptions({      my $optionTable = Tracer::GetOptions({
# Line 98  Line 102 
102                                                          # database type                                                          # database type
103                         dataDir      => $FIG_Config::sproutData,                         dataDir      => $FIG_Config::sproutData,
104                                                          # data file directory                                                          # data file directory
105                         xmlFileName  => "$FIG_Config::fig/SproutDBD.xml",                         xmlFileName  => "$dbd_dir/SproutDBD.xml",
106                                                          # database definition file name                                                          # database definition file name
107                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",
108                                                          # user name and password                                                          # user name and password
109                         port         => $FIG_Config::dbport,                         port         => $FIG_Config::dbport,
110                                                          # database connection port                                                          # database connection port
111                         sock         => $FIG_Config::dbsock,                         sock         => $FIG_Config::dbsock,
112                           host         => $FIG_Config::dbhost,
113                         maxSegmentLength => 4500,        # maximum feature segment length                         maxSegmentLength => 4500,        # maximum feature segment length
114                         maxSequenceLength => 8000,       # maximum contig sequence length                         maxSequenceLength => 8000,       # maximum contig sequence length
115                         noDBOpen     => 0,               # 1 to suppress the database open                         noDBOpen     => 0,               # 1 to suppress the database open
# Line 118  Line 123 
123      my $dbh;      my $dbh;
124      if (! $optionTable->{noDBOpen}) {      if (! $optionTable->{noDBOpen}) {
125          $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,          $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,
126                                  $password, $optionTable->{port}, undef, $optionTable->{sock});                                  $password, $optionTable->{port}, $optionTable->{host}, $optionTable->{sock});
127      }      }
128      # Create the ERDB object.      # Create the ERDB object.
129      my $xmlFileName = "$optionTable->{xmlFileName}";      my $xmlFileName = "$optionTable->{xmlFileName}";
# Line 375  Line 380 
380      # Get the genomes.      # Get the genomes.
381      my @genomes = $self->GetAll(['Genome'], $filterString, $params, ['Genome(id)',      my @genomes = $self->GetAll(['Genome'], $filterString, $params, ['Genome(id)',
382                                                                       'Genome(genus)',                                                                       'Genome(genus)',
383                                                                       'Genome(species)']);                                                                       'Genome(species)',
384                                                                         'Genome(unique-characterization)']);
385      # Sort them by name.      # Sort them by name.
386      my @sorted = sort { lc("$a->[1] $a->[2]") cmp lc("$b->[1] $b->[2]") } @genomes;      my @sorted = sort { lc("$a->[1] $a->[2]") cmp lc("$b->[1] $b->[2]") } @genomes;
387      # Loop through the genomes, creating the option tags.      # Loop through the genomes, creating the option tags.
388      for my $genomeData (@sorted) {      for my $genomeData (@sorted) {
389          # Get the data for this genome.          # Get the data for this genome.
390          my ($genomeID, $genus, $species) = @{$genomeData};          my ($genomeID, $genus, $species, $strain) = @{$genomeData};
391          # Get the contig count.          # Get the contig count.
392          my $count = $self->ContigCount($genomeID);          my $count = $self->ContigCount($genomeID);
393          my $counting = ($count == 1 ? "contig" : "contigs");          my $counting = ($count == 1 ? "contig" : "contigs");
394          # Build the option tag.          # Build the option tag.
395          $retVal .= "<option value=\"$genomeID\">$genus $species ($genomeID) [$count $counting]</option>\n";          $retVal .= "<option value=\"$genomeID\">$genus $species $strain ($genomeID) [$count $counting]</option>\n";
396            Trace("Option tag built for $genomeID: $genus $species $strain.") if T(3);
397      }      }
398      # Close the SELECT tag.      # Close the SELECT tag.
399      $retVal .= "</select>\n";      $retVal .= "</select>\n";
# Line 836  Line 843 
843    
844  =over 4  =over 4
845    
846  =genomeID  =item genomeID
847    
848  ID of the genome whose feature count is desired.  ID of the genome whose feature count is desired.
849    
# Line 857  Line 864 
864      my ($self, $genomeID, $type) = @_;      my ($self, $genomeID, $type) = @_;
865      # Compute the count.      # Compute the count.
866      my $retVal = $self->GetCount(['HasFeature', 'Feature'],      my $retVal = $self->GetCount(['HasFeature', 'Feature'],
867                                  "HasFeature(from-link) = ? AND Feature(type) = ?",                                  "HasFeature(from-link) = ? AND Feature(feature-type) = ?",
868                                  [$genomeID, $type]);                                  [$genomeID, $type]);
869      # Return the result.      # Return the result.
870      return $retVal;      return $retVal;
# Line 899  Line 906 
906      # Loop through the annotations.      # Loop through the annotations.
907      while (my $data = $query->Fetch) {      while (my $data = $query->Fetch) {
908          # Get the feature ID and annotation text.          # Get the feature ID and annotation text.
909          my ($fid, $annotation) = $data->Values(['HasFeature(from-link)',          my ($fid, $annotation) = $data->Values(['HasFeature(to-link)',
910                                                  'Annotation(text)']);                                                  'Annotation(annotation)']);
911          # Check to see if this is an assignment. Note that the user really          # Check to see if this is an assignment. Note that the user really
912          # doesn't matter to us, other than we use it to determine whether or          # doesn't matter to us, other than we use it to determine whether or
913          # not this is an assignment.          # not this is an assignment.
914          my ($user, $assignment) = $self->_ParseAssignment('fig', $annotation);          my ($user, $assignment) = _ParseAssignment('fig', $annotation);
915          if ($user) {          if ($user) {
916              # Here it's an assignment. We put it in the return hash, overwriting              # Here it's an assignment. We put it in the return hash, overwriting
917              # any older assignment that might be present.              # any older assignment that might be present.
# Line 1653  Line 1660 
1660  sub CoupledFeatures {  sub CoupledFeatures {
1661      # Get the parameters.      # Get the parameters.
1662      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
1663        Trace("Looking for features coupled to $featureID.") if T(coupling => 3);
1664      # Create a query to retrieve the functionally-coupled features.      # Create a query to retrieve the functionally-coupled features.
1665      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],
1666                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);
# Line 1665  Line 1673 
1673          # Get the ID and score of the coupling.          # Get the ID and score of the coupling.
1674          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',
1675                                                          'Coupling(score)']);                                                          'Coupling(score)']);
1676            Trace("$featureID coupled with score $score to ID $couplingID.") if T(coupling => 4);
1677          # Get the other feature that participates in the coupling.          # Get the other feature that participates in the coupling.
1678          my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],          my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],
1679                                             "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",                                             "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",
1680                                             [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');                                             [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');
1681            Trace("$couplingID target feature is $otherFeatureID.") if T(coupling => 4);
1682          # Attach the other feature's score to its ID.          # Attach the other feature's score to its ID.
1683          $retVal{$otherFeatureID} = $score;          $retVal{$otherFeatureID} = $score;
1684          $found = 1;          $found = 1;
# Line 1801  Line 1811 
1811      my ($self, $peg1, $peg2) = @_;      my ($self, $peg1, $peg2) = @_;
1812      # Declare the return values. We'll start with the coupling ID and undefine the      # Declare the return values. We'll start with the coupling ID and undefine the
1813      # flag and score until we have more information.      # flag and score until we have more information.
1814      my ($retVal, $inverted, $score) = (CouplingID($peg1, $peg2), undef, undef);      my ($retVal, $inverted, $score) = ($self->CouplingID($peg1, $peg2), undef, undef);
1815      # Find the coupling data.      # Find the coupling data.
1816      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],
1817                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",
# Line 1822  Line 1832 
1832      return ($retVal, $inverted, $score);      return ($retVal, $inverted, $score);
1833  }  }
1834    
1835    =head3 GetSynonymGroup
1836    
1837    C<< my $id = $sprout->GetSynonymGroup($fid); >>
1838    
1839    Return the synonym group name for the specified feature.
1840    
1841    =over 4
1842    
1843    =item fid
1844    
1845    ID of the feature whose synonym group is desired.
1846    
1847    =item RETURN
1848    
1849    The name of the synonym group to which the feature belongs. If the feature does
1850    not belong to a synonym group, the feature ID itself is returned.
1851    
1852    =back
1853    
1854    =cut
1855    
1856    sub GetSynonymGroup {
1857        # Get the parameters.
1858        my ($self, $fid) = @_;
1859        # Declare the return variable.
1860        my $retVal;
1861        # Find the synonym group.
1862        my @groups = $self->GetFlat(['IsSynonymGroupFor'], "IsSynonymGroupFor(to-link) = ?",
1863                                       [$fid], 'IsSynonymGroupFor(from-link)');
1864        # Check to see if we found anything.
1865        if (@groups) {
1866            $retVal = $groups[0];
1867        } else {
1868            $retVal = $fid;
1869        }
1870        # Return the result.
1871        return $retVal;
1872    }
1873    
1874    =head3 GetBoundaries
1875    
1876    C<< my ($contig, $beg, $end) = $sprout->GetBoundaries(@locList); >>
1877    
1878    Determine the begin and end boundaries for the locations in a list. All of the
1879    locations must belong to the same contig and have mostly the same direction in
1880    order for this method to produce a meaningful result. The resulting
1881    begin/end pair will contain all of the bases in any of the locations.
1882    
1883    =over 4
1884    
1885    =item locList
1886    
1887    List of locations to process.
1888    
1889    =item RETURN
1890    
1891    Returns a 3-tuple consisting of the contig ID, the beginning boundary,
1892    and the ending boundary. The beginning boundary will be left of the
1893    end for mostly-forward locations and right of the end for mostly-backward
1894    locations.
1895    
1896    =back
1897    
1898    =cut
1899    
1900    sub GetBoundaries {
1901        # Get the parameters.
1902        my ($self, @locList) = @_;
1903        # Set up the counters used to determine the most popular direction.
1904        my %counts = ( '+' => 0, '-' => 0 );
1905        # Get the last location and parse it.
1906        my $locObject = BasicLocation->new(pop @locList);
1907        # Prime the loop with its data.
1908        my ($contig, $beg, $end) = ($locObject->Contig, $locObject->Left, $locObject->Right);
1909        # Count its direction.
1910        $counts{$locObject->Dir}++;
1911        # Loop through the remaining locations. Note that in most situations, this loop
1912        # will not iterate at all, because most of the time we will be dealing with a
1913        # singleton list.
1914        for my $loc (@locList) {
1915            # Create a location object.
1916            my $locObject = BasicLocation->new($loc);
1917            # Count the direction.
1918            $counts{$locObject->Dir}++;
1919            # Get the left end and the right end.
1920            my $left = $locObject->Left;
1921            my $right = $locObject->Right;
1922            # Merge them into the return variables.
1923            if ($left < $beg) {
1924                $beg = $left;
1925            }
1926            if ($right > $end) {
1927                $end = $right;
1928            }
1929        }
1930        # If the most common direction is reverse, flip the begin and end markers.
1931        if ($counts{'-'} > $counts{'+'}) {
1932            ($beg, $end) = ($end, $beg);
1933        }
1934        # Return the result.
1935        return ($contig, $beg, $end);
1936    }
1937    
1938  =head3 CouplingID  =head3 CouplingID
1939    
1940  C<< my $couplingID = Sprout::CouplingID($peg1, $peg2); >>  C<< my $couplingID = $sprout->CouplingID($peg1, $peg2); >>
1941    
1942  Return the coupling ID for a pair of feature IDs.  Return the coupling ID for a pair of feature IDs.
1943    
# Line 1857  Line 1970 
1970  =cut  =cut
1971  #: Return Type $;  #: Return Type $;
1972  sub CouplingID {  sub CouplingID {
1973      return join " ", sort @_;      my ($self, @pegs) = @_;
1974        return $self->DigestKey(join " ", sort @pegs);
1975  }  }
1976    
1977  =head3 ReadFasta  =head3 ReadFasta
# Line 2216  Line 2330 
2330      return @retVal;      return @retVal;
2331  }  }
2332    
 =head3 Exists  
   
 C<< my $found = $sprout->Exists($entityName, $entityID); >>  
   
 Return TRUE if an entity exists, else FALSE.  
   
 =over 4  
   
 =item entityName  
   
 Name of the entity type (e.g. C<Feature>) relevant to the existence check.  
   
 =item entityID  
   
 ID of the entity instance whose existence is to be checked.  
   
 =item RETURN  
   
 Returns TRUE if the entity instance exists, else FALSE.  
   
 =back  
   
 =cut  
 #: Return Type $;  
 sub Exists {  
     # Get the parameters.  
     my ($self, $entityName, $entityID) = @_;  
     # Check for the entity instance.  
     Trace("Checking existence of $entityName with ID=$entityID.") if T(4);  
     my $testInstance = $self->GetEntity($entityName, $entityID);  
     # Return an existence indicator.  
     my $retVal = ($testInstance ? 1 : 0);  
     return $retVal;  
 }  
   
2333  =head3 FeatureTranslation  =head3 FeatureTranslation
2334    
2335  C<< my $translation = $sprout->FeatureTranslation($featureID); >>  C<< my $translation = $sprout->FeatureTranslation($featureID); >>
# Line 2788  Line 2867 
2867      return @retVal;      return @retVal;
2868  }  }
2869    
   
   
2870  =head3 RelatedFeatures  =head3 RelatedFeatures
2871    
2872  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>
# Line 3041  Line 3118 
3118      return %retVal;      return %retVal;
3119  }  }
3120    
3121    =head3 Sims
3122    
3123    C<< my $simList = $sprout->Sims($fid, $maxN, $maxP, $select, $max_expand, $filters); >>
3124    
3125    Get a list of similarities for a specified feature. Similarity information is not kept in the
3126    Sprout database; rather, they are retrieved from a network server. The similarities are
3127    returned as B<Sim> objects. A Sim object is actually a list reference that has been blessed
3128    so that its elements can be accessed by name.
3129    
3130    Similarities can be either raw or expanded. The raw similarities are basic
3131    hits between features with similar DNA. Expanding a raw similarity drags in any
3132    features considered substantially identical. So, for example, if features B<A1>,
3133    B<A2>, and B<A3> are all substatially identical to B<A>, then a raw similarity
3134    B<[C,A]> would be expanded to B<[C,A] [C,A1] [C,A2] [C,A3]>.
3135    
3136    =over 4
3137    
3138    =item fid
3139    
3140    ID of the feature whose similarities are desired.
3141    
3142    =item maxN
3143    
3144    Maximum number of similarities to return.
3145    
3146    =item maxP
3147    
3148    Minumum allowable similarity score.
3149    
3150    =item select
3151    
3152    Selection criterion: C<raw> means only raw similarities are returned; C<fig>
3153    means only similarities to FIG features are returned; C<all> means all expanded
3154    similarities are returned; and C<figx> means similarities are expanded until the
3155    number of FIG features equals the maximum.
3156    
3157    =item max_expand
3158    
3159    The maximum number of features to expand.
3160    
3161    =item filters
3162    
3163    Reference to a hash containing filter information, or a subroutine that can be
3164    used to filter the sims.
3165    
3166    =item RETURN
3167    
3168    Returns a reference to a list of similarity objects, or C<undef> if an error
3169    occurred.
3170    
3171    =back
3172    
3173    =cut
3174    
3175    sub Sims {
3176        # Get the parameters.
3177        my ($self, $fid, $maxN, $maxP, $select, $max_expand, $filters) = @_;
3178        # Create the shim object to test for deleted FIDs.
3179        my $shim = FidCheck->new($self);
3180        # Ask the network for sims.
3181        my $retVal = FIGRules::GetNetworkSims($shim, $fid, {}, $maxN, $maxP, $select, $max_expand, $filters);
3182        # Return the result.
3183        return $retVal;
3184    }
3185    
3186  =head3 GetGroups  =head3 GetGroups
3187    
3188  C<< my %groups = $sprout->GetGroups(\@groupList); >>  C<< my %groups = $sprout->GetGroups(\@groupList); >>

Legend:
Removed from v.1.63  
changed lines
  Added in v.1.79

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3