[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.59, Wed Jun 7 01:53:03 2006 UTC revision 1.78, Wed Jul 26 14:47:03 2006 UTC
# Line 12  Line 12 
12      use DBObject;      use DBObject;
13      use Tracer;      use Tracer;
14      use FIGRules;      use FIGRules;
15        use FidCheck;
16      use Stats;      use Stats;
17      use POSIX qw(strftime);      use POSIX qw(strftime);
18        use BasicLocation;
19    
20  =head1 Sprout Database Manipulation Object  =head1 Sprout Database Manipulation Object
21    
# Line 91  Line 92 
92  sub new {  sub new {
93      # Get the parameters.      # Get the parameters.
94      my ($class, $dbName, $options) = @_;      my ($class, $dbName, $options) = @_;
95        # Compute the DBD directory.
96        my $dbd_dir = (defined($FIG_Config::dbd_dir) ? $FIG_Config::dbd_dir :
97                                                      $FIG_Config::fig );
98      # Compute the options. We do this by starting with a table of defaults and overwriting with      # Compute the options. We do this by starting with a table of defaults and overwriting with
99      # the incoming data.      # the incoming data.
100      my $optionTable = Tracer::GetOptions({      my $optionTable = Tracer::GetOptions({
# Line 98  Line 102 
102                                                          # database type                                                          # database type
103                         dataDir      => $FIG_Config::sproutData,                         dataDir      => $FIG_Config::sproutData,
104                                                          # data file directory                                                          # data file directory
105                         xmlFileName  => "$FIG_Config::fig/SproutDBD.xml",                         xmlFileName  => "$dbd_dir/SproutDBD.xml",
106                                                          # database definition file name                                                          # database definition file name
107                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",
108                                                          # user name and password                                                          # user name and password
# Line 375  Line 379 
379      # Get the genomes.      # Get the genomes.
380      my @genomes = $self->GetAll(['Genome'], $filterString, $params, ['Genome(id)',      my @genomes = $self->GetAll(['Genome'], $filterString, $params, ['Genome(id)',
381                                                                       'Genome(genus)',                                                                       'Genome(genus)',
382                                                                       'Genome(species)']);                                                                       'Genome(species)',
383                                                                         'Genome(unique-characterization)']);
384      # Sort them by name.      # Sort them by name.
385      my @sorted = sort { lc("$a->[1] $a->[2]") cmp lc("$b->[1] $b->[2]") } @genomes;      my @sorted = sort { lc("$a->[1] $a->[2]") cmp lc("$b->[1] $b->[2]") } @genomes;
386      # Loop through the genomes, creating the option tags.      # Loop through the genomes, creating the option tags.
387      for my $genomeData (@sorted) {      for my $genomeData (@sorted) {
388          # Get the data for this genome.          # Get the data for this genome.
389          my ($genomeID, $genus, $species) = @{$genomeData};          my ($genomeID, $genus, $species, $strain) = @{$genomeData};
390          # Get the contig count.          # Get the contig count.
391          my $count = $self->ContigCount($genomeID);          my $count = $self->ContigCount($genomeID);
392          my $counting = ($count == 1 ? "contig" : "contigs");          my $counting = ($count == 1 ? "contig" : "contigs");
393          # Build the option tag.          # Build the option tag.
394          $retVal .= "<option value=\"$genomeID\">$genus $species ($genomeID) [$count $counting]</option>\n";          $retVal .= "<option value=\"$genomeID\">$genus $species $strain ($genomeID) [$count $counting]</option>\n";
395            Trace("Option tag built for $genomeID: $genus $species $strain.") if T(3);
396      }      }
397      # Close the SELECT tag.      # Close the SELECT tag.
398      $retVal .= "</select>\n";      $retVal .= "</select>\n";
# Line 793  Line 799 
799      return @retVal;      return @retVal;
800  }  }
801    
802    =head3 GenomeLength
803    
804    C<< my $length = $sprout->GenomeLength($genomeID); >>
805    
806    Return the length of the specified genome in base pairs.
807    
808    =over 4
809    
810    =item genomeID
811    
812    ID of the genome whose base pair count is desired.
813    
814    =item RETURN
815    
816    Returns the number of base pairs in all the contigs of the specified
817    genome.
818    
819    =back
820    
821    =cut
822    
823    sub GenomeLength {
824        # Get the parameters.
825        my ($self, $genomeID) = @_;
826        # Declare the return variable.
827        my $retVal = 0;
828        # Get the genome's contig sequence lengths.
829        my @lens = $self->GetFlat(['HasContig', 'IsMadeUpOf'], 'HasContig(from-link) = ?',
830                           [$genomeID], 'IsMadeUpOf(len)');
831        # Sum the lengths.
832        map { $retVal += $_ } @lens;
833        # Return the result.
834        return $retVal;
835    }
836    
837    =head3 FeatureCount
838    
839    C<< my $count = $sprout->FeatureCount($genomeID, $type); >>
840    
841    Return the number of features of the specified type in the specified genome.
842    
843    =over 4
844    
845    =item genomeID
846    
847    ID of the genome whose feature count is desired.
848    
849    =item type
850    
851    Type of feature to count (eg. C<peg>, C<rna>, etc.).
852    
853    =item RETURN
854    
855    Returns the number of features of the specified type for the specified genome.
856    
857    =back
858    
859    =cut
860    
861    sub FeatureCount {
862        # Get the parameters.
863        my ($self, $genomeID, $type) = @_;
864        # Compute the count.
865        my $retVal = $self->GetCount(['HasFeature', 'Feature'],
866                                    "HasFeature(from-link) = ? AND Feature(feature-type) = ?",
867                                    [$genomeID, $type]);
868        # Return the result.
869        return $retVal;
870    }
871    
872    =head3 GenomeAssignments
873    
874    C<< my $fidHash = $sprout->GenomeAssignments($genomeID); >>
875    
876    Return a list of a genome's assigned features. The return hash will contain each
877    assigned feature of the genome mapped to the text of its most recent functional
878    assignment.
879    
880    =over 4
881    
882    =item genomeID
883    
884    ID of the genome whose functional assignments are desired.
885    
886    =item RETURN
887    
888    Returns a reference to a hash which maps each feature to its most recent
889    functional assignment.
890    
891    =back
892    
893    =cut
894    
895    sub GenomeAssignments {
896        # Get the parameters.
897        my ($self, $genomeID) = @_;
898        # Declare the return variable.
899        my $retVal = {};
900        # Query the genome's features and annotations. We'll put the oldest annotations
901        # first so that the last assignment to go into the hash will be the correct one.
902        my $query = $self->Get(['HasFeature', 'IsTargetOfAnnotation', 'Annotation'],
903                               "HasFeature(from-link) = ? ORDER BY Annotation(time)",
904                               [$genomeID]);
905        # Loop through the annotations.
906        while (my $data = $query->Fetch) {
907            # Get the feature ID and annotation text.
908            my ($fid, $annotation) = $data->Values(['HasFeature(to-link)',
909                                                    'Annotation(annotation)']);
910            # Check to see if this is an assignment. Note that the user really
911            # doesn't matter to us, other than we use it to determine whether or
912            # not this is an assignment.
913            my ($user, $assignment) = _ParseAssignment('fig', $annotation);
914            if ($user) {
915                # Here it's an assignment. We put it in the return hash, overwriting
916                # any older assignment that might be present.
917                $retVal->{$fid} = $assignment;
918            }
919        }
920        # Return the result.
921        return $retVal;
922    }
923    
924  =head3 ContigLength  =head3 ContigLength
925    
926  C<< my $length = $sprout->ContigLength($contigID); >>  C<< my $length = $sprout->ContigLength($contigID); >>
# Line 1531  Line 1659 
1659  sub CoupledFeatures {  sub CoupledFeatures {
1660      # Get the parameters.      # Get the parameters.
1661      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
1662        Trace("Looking for features coupled to $featureID.") if T(coupling => 3);
1663      # Create a query to retrieve the functionally-coupled features.      # Create a query to retrieve the functionally-coupled features.
1664      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],
1665                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);
# Line 1543  Line 1672 
1672          # Get the ID and score of the coupling.          # Get the ID and score of the coupling.
1673          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',
1674                                                          'Coupling(score)']);                                                          'Coupling(score)']);
1675          # The coupling ID contains the two feature IDs separated by a space. We use          Trace("$featureID coupled with score $score to ID $couplingID.") if T(coupling => 4);
1676          # this information to find the ID of the other feature.          # Get the other feature that participates in the coupling.
1677          my ($fid1, $fid2) = split / /, $couplingID;          my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],
1678          my $otherFeatureID = ($featureID eq $fid1 ? $fid2 : $fid1);                                             "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",
1679                                               [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');
1680            Trace("$couplingID target feature is $otherFeatureID.") if T(coupling => 4);
1681          # Attach the other feature's score to its ID.          # Attach the other feature's score to its ID.
1682          $retVal{$otherFeatureID} = $score;          $retVal{$otherFeatureID} = $score;
1683          $found = 1;          $found = 1;
# Line 1679  Line 1810 
1810      my ($self, $peg1, $peg2) = @_;      my ($self, $peg1, $peg2) = @_;
1811      # Declare the return values. We'll start with the coupling ID and undefine the      # Declare the return values. We'll start with the coupling ID and undefine the
1812      # flag and score until we have more information.      # flag and score until we have more information.
1813      my ($retVal, $inverted, $score) = (CouplingID($peg1, $peg2), undef, undef);      my ($retVal, $inverted, $score) = ($self->CouplingID($peg1, $peg2), undef, undef);
1814      # Find the coupling data.      # Find the coupling data.
1815      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],
1816                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",
# Line 1700  Line 1831 
1831      return ($retVal, $inverted, $score);      return ($retVal, $inverted, $score);
1832  }  }
1833    
1834    =head3 GetSynonymGroup
1835    
1836    C<< my $id = $sprout->GetSynonymGroup($fid); >>
1837    
1838    Return the synonym group name for the specified feature.
1839    
1840    =over 4
1841    
1842    =item fid
1843    
1844    ID of the feature whose synonym group is desired.
1845    
1846    =item RETURN
1847    
1848    The name of the synonym group to which the feature belongs. If the feature does
1849    not belong to a synonym group, the feature ID itself is returned.
1850    
1851    =back
1852    
1853    =cut
1854    
1855    sub GetSynonymGroup {
1856        # Get the parameters.
1857        my ($self, $fid) = @_;
1858        # Declare the return variable.
1859        my $retVal;
1860        # Find the synonym group.
1861        my @groups = $self->GetFlat(['IsSynonymGroupFor'], "IsSynonymGroupFor(to-link) = ?",
1862                                       [$fid], 'IsSynonymGroupFor(from-link)');
1863        # Check to see if we found anything.
1864        if (@groups) {
1865            $retVal = $groups[0];
1866        } else {
1867            $retVal = $fid;
1868        }
1869        # Return the result.
1870        return $retVal;
1871    }
1872    
1873    =head3 GetBoundaries
1874    
1875    C<< my ($contig, $beg, $end) = $sprout->GetBoundaries(@locList); >>
1876    
1877    Determine the begin and end boundaries for the locations in a list. All of the
1878    locations must belong to the same contig and have mostly the same direction in
1879    order for this method to produce a meaningful result. The resulting
1880    begin/end pair will contain all of the bases in any of the locations.
1881    
1882    =over 4
1883    
1884    =item locList
1885    
1886    List of locations to process.
1887    
1888    =item RETURN
1889    
1890    Returns a 3-tuple consisting of the contig ID, the beginning boundary,
1891    and the ending boundary. The beginning boundary will be left of the
1892    end for mostly-forward locations and right of the end for mostly-backward
1893    locations.
1894    
1895    =back
1896    
1897    =cut
1898    
1899    sub GetBoundaries {
1900        # Get the parameters.
1901        my ($self, @locList) = @_;
1902        # Set up the counters used to determine the most popular direction.
1903        my %counts = ( '+' => 0, '-' => 0 );
1904        # Get the last location and parse it.
1905        my $locObject = BasicLocation->new(pop @locList);
1906        # Prime the loop with its data.
1907        my ($contig, $beg, $end) = ($locObject->Contig, $locObject->Left, $locObject->Right);
1908        # Count its direction.
1909        $counts{$locObject->Dir}++;
1910        # Loop through the remaining locations. Note that in most situations, this loop
1911        # will not iterate at all, because most of the time we will be dealing with a
1912        # singleton list.
1913        for my $loc (@locList) {
1914            # Create a location object.
1915            my $locObject = BasicLocation->new($loc);
1916            # Count the direction.
1917            $counts{$locObject->Dir}++;
1918            # Get the left end and the right end.
1919            my $left = $locObject->Left;
1920            my $right = $locObject->Right;
1921            # Merge them into the return variables.
1922            if ($left < $beg) {
1923                $beg = $left;
1924            }
1925            if ($right > $end) {
1926                $end = $right;
1927            }
1928        }
1929        # If the most common direction is reverse, flip the begin and end markers.
1930        if ($counts{'-'} > $counts{'+'}) {
1931            ($beg, $end) = ($end, $beg);
1932        }
1933        # Return the result.
1934        return ($contig, $beg, $end);
1935    }
1936    
1937  =head3 CouplingID  =head3 CouplingID
1938    
1939  C<< my $couplingID = Sprout::CouplingID($peg1, $peg2); >>  C<< my $couplingID = $sprout->CouplingID($peg1, $peg2); >>
1940    
1941  Return the coupling ID for a pair of feature IDs.  Return the coupling ID for a pair of feature IDs.
1942    
# Line 1735  Line 1969 
1969  =cut  =cut
1970  #: Return Type $;  #: Return Type $;
1971  sub CouplingID {  sub CouplingID {
1972      return join " ", sort @_;      my ($self, @pegs) = @_;
1973        return $self->DigestKey(join " ", sort @pegs);
1974  }  }
1975    
1976  =head3 ReadFasta  =head3 ReadFasta
# Line 2094  Line 2329 
2329      return @retVal;      return @retVal;
2330  }  }
2331    
 =head3 Exists  
   
 C<< my $found = $sprout->Exists($entityName, $entityID); >>  
   
 Return TRUE if an entity exists, else FALSE.  
   
 =over 4  
   
 =item entityName  
   
 Name of the entity type (e.g. C<Feature>) relevant to the existence check.  
   
 =item entityID  
   
 ID of the entity instance whose existence is to be checked.  
   
 =item RETURN  
   
 Returns TRUE if the entity instance exists, else FALSE.  
   
 =back  
   
 =cut  
 #: Return Type $;  
 sub Exists {  
     # Get the parameters.  
     my ($self, $entityName, $entityID) = @_;  
     # Check for the entity instance.  
     Trace("Checking existence of $entityName with ID=$entityID.") if T(4);  
     my $testInstance = $self->GetEntity($entityName, $entityID);  
     # Return an existence indicator.  
     my $retVal = ($testInstance ? 1 : 0);  
     return $retVal;  
 }  
   
2332  =head3 FeatureTranslation  =head3 FeatureTranslation
2333    
2334  C<< my $translation = $sprout->FeatureTranslation($featureID); >>  C<< my $translation = $sprout->FeatureTranslation($featureID); >>
# Line 2666  Line 2866 
2866      return @retVal;      return @retVal;
2867  }  }
2868    
   
   
2869  =head3 RelatedFeatures  =head3 RelatedFeatures
2870    
2871  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>
# Line 2919  Line 3117 
3117      return %retVal;      return %retVal;
3118  }  }
3119    
3120    =head3 Sims
3121    
3122    C<< my $simList = $sprout->Sims($fid, $maxN, $maxP, $select, $max_expand, $filters); >>
3123    
3124    Get a list of similarities for a specified feature. Similarity information is not kept in the
3125    Sprout database; rather, they are retrieved from a network server. The similarities are
3126    returned as B<Sim> objects. A Sim object is actually a list reference that has been blessed
3127    so that its elements can be accessed by name.
3128    
3129    Similarities can be either raw or expanded. The raw similarities are basic
3130    hits between features with similar DNA. Expanding a raw similarity drags in any
3131    features considered substantially identical. So, for example, if features B<A1>,
3132    B<A2>, and B<A3> are all substatially identical to B<A>, then a raw similarity
3133    B<[C,A]> would be expanded to B<[C,A] [C,A1] [C,A2] [C,A3]>.
3134    
3135    =over 4
3136    
3137    =item fid
3138    
3139    ID of the feature whose similarities are desired.
3140    
3141    =item maxN
3142    
3143    Maximum number of similarities to return.
3144    
3145    =item maxP
3146    
3147    Minumum allowable similarity score.
3148    
3149    =item select
3150    
3151    Selection criterion: C<raw> means only raw similarities are returned; C<fig>
3152    means only similarities to FIG features are returned; C<all> means all expanded
3153    similarities are returned; and C<figx> means similarities are expanded until the
3154    number of FIG features equals the maximum.
3155    
3156    =item max_expand
3157    
3158    The maximum number of features to expand.
3159    
3160    =item filters
3161    
3162    Reference to a hash containing filter information, or a subroutine that can be
3163    used to filter the sims.
3164    
3165    =item RETURN
3166    
3167    Returns a reference to a list of similarity objects, or C<undef> if an error
3168    occurred.
3169    
3170    =back
3171    
3172    =cut
3173    
3174    sub Sims {
3175        # Get the parameters.
3176        my ($self, $fid, $maxN, $maxP, $select, $max_expand, $filters) = @_;
3177        # Create the shim object to test for deleted FIDs.
3178        my $shim = FidCheck->new($self);
3179        # Ask the network for sims.
3180        my $retVal = FIGRules::GetNetworkSims($shim, $fid, {}, $maxN, $maxP, $select, $max_expand, $filters);
3181        # Return the result.
3182        return $retVal;
3183    }
3184    
3185  =head3 GetGroups  =head3 GetGroups
3186    
3187  C<< my %groups = $sprout->GetGroups(\@groupList); >>  C<< my %groups = $sprout->GetGroups(\@groupList); >>

Legend:
Removed from v.1.59  
changed lines
  Added in v.1.78

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3