[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.58, Tue Jun 6 05:07:15 2006 UTC revision 1.77, Sun Jul 23 16:44:10 2006 UTC
# Line 12  Line 12 
12      use DBObject;      use DBObject;
13      use Tracer;      use Tracer;
14      use FIGRules;      use FIGRules;
15        use FidCheck;
16      use Stats;      use Stats;
17      use POSIX qw(strftime);      use POSIX qw(strftime);
18        use BasicLocation;
19    
20  =head1 Sprout Database Manipulation Object  =head1 Sprout Database Manipulation Object
21    
# Line 304  Line 305 
305      return ($arch, $bact, $euk, $vir, $env, $unk);      return ($arch, $bact, $euk, $vir, $env, $unk);
306  }  }
307    
308    =head3 ContigCount
309    
310    C<< my $count = $sprout->ContigCount($genomeID); >>
311    
312    Return the number of contigs for the specified genome ID.
313    
314    =over 4
315    
316    =item genomeID
317    
318    ID of the genome whose contig count is desired.
319    
320    =item RETURN
321    
322    Returns the number of contigs for the specified genome.
323    
324    =back
325    
326    =cut
327    
328    sub ContigCount {
329        # Get the parameters.
330        my ($self, $genomeID) = @_;
331        # Get the contig count.
332        my $retVal = $self->GetCount(['Contig', 'HasContig'], "HasContig(from-link) = ?", [$genomeID]);
333        # Return the result.
334        return $retVal;
335    }
336    
337    =head3 GeneMenu
338    
339    C<< my $selectHtml = $sprout->GeneMenu(\%attributes, $filterString, \@params); >>
340    
341    Return an HTML select menu of genomes. Each genome will be an option in the menu,
342    and will be displayed by name with the ID and a contig count attached. The selection
343    value will be the genome ID. The genomes will be sorted by genus/species name.
344    
345    =over 4
346    
347    =item attributes
348    
349    Reference to a hash mapping attributes to values for the SELECT tag generated.
350    
351    =item filterString
352    
353    A filter string for use in selecting the genomes. The filter string must conform
354    to the rules for the C<< ERDB->Get >> method.
355    
356    =item params
357    
358    Reference to a list of values to be substituted in for the parameter marks in
359    the filter string.
360    
361    =item RETURN
362    
363    Returns an HTML select menu with the specified genomes as selectable options.
364    
365    =back
366    
367    =cut
368    
369    sub GeneMenu {
370        # Get the parameters.
371        my ($self, $attributes, $filterString, $params) = @_;
372        # Start the menu.
373        my $retVal = "<select " .
374            join(" ", map { "$_=\"$attributes->{$_}\"" } keys %{$attributes}) .
375            ">\n";
376        # Get the genomes.
377        my @genomes = $self->GetAll(['Genome'], $filterString, $params, ['Genome(id)',
378                                                                         'Genome(genus)',
379                                                                         'Genome(species)',
380                                                                         'Genome(unique-characterization)']);
381        # Sort them by name.
382        my @sorted = sort { lc("$a->[1] $a->[2]") cmp lc("$b->[1] $b->[2]") } @genomes;
383        # Loop through the genomes, creating the option tags.
384        for my $genomeData (@sorted) {
385            # Get the data for this genome.
386            my ($genomeID, $genus, $species, $strain) = @{$genomeData};
387            # Get the contig count.
388            my $count = $self->ContigCount($genomeID);
389            my $counting = ($count == 1 ? "contig" : "contigs");
390            # Build the option tag.
391            $retVal .= "<option value=\"$genomeID\">$genus $species $strain ($genomeID) [$count $counting]</option>\n";
392            Trace("Option tag built for $genomeID: $genus $species $strain.") if T(3);
393        }
394        # Close the SELECT tag.
395        $retVal .= "</select>\n";
396        # Return the result.
397        return $retVal;
398    }
399  =head3 Build  =head3 Build
400    
401  C<< $sprout->Build(); >>  C<< $sprout->Build(); >>
# Line 704  Line 796 
796      return @retVal;      return @retVal;
797  }  }
798    
799    =head3 GenomeLength
800    
801    C<< my $length = $sprout->GenomeLength($genomeID); >>
802    
803    Return the length of the specified genome in base pairs.
804    
805    =over 4
806    
807    =item genomeID
808    
809    ID of the genome whose base pair count is desired.
810    
811    =item RETURN
812    
813    Returns the number of base pairs in all the contigs of the specified
814    genome.
815    
816    =back
817    
818    =cut
819    
820    sub GenomeLength {
821        # Get the parameters.
822        my ($self, $genomeID) = @_;
823        # Declare the return variable.
824        my $retVal = 0;
825        # Get the genome's contig sequence lengths.
826        my @lens = $self->GetFlat(['HasContig', 'IsMadeUpOf'], 'HasContig(from-link) = ?',
827                           [$genomeID], 'IsMadeUpOf(len)');
828        # Sum the lengths.
829        map { $retVal += $_ } @lens;
830        # Return the result.
831        return $retVal;
832    }
833    
834    =head3 FeatureCount
835    
836    C<< my $count = $sprout->FeatureCount($genomeID, $type); >>
837    
838    Return the number of features of the specified type in the specified genome.
839    
840    =over 4
841    
842    =item genomeID
843    
844    ID of the genome whose feature count is desired.
845    
846    =item type
847    
848    Type of feature to count (eg. C<peg>, C<rna>, etc.).
849    
850    =item RETURN
851    
852    Returns the number of features of the specified type for the specified genome.
853    
854    =back
855    
856    =cut
857    
858    sub FeatureCount {
859        # Get the parameters.
860        my ($self, $genomeID, $type) = @_;
861        # Compute the count.
862        my $retVal = $self->GetCount(['HasFeature', 'Feature'],
863                                    "HasFeature(from-link) = ? AND Feature(feature-type) = ?",
864                                    [$genomeID, $type]);
865        # Return the result.
866        return $retVal;
867    }
868    
869    =head3 GenomeAssignments
870    
871    C<< my $fidHash = $sprout->GenomeAssignments($genomeID); >>
872    
873    Return a list of a genome's assigned features. The return hash will contain each
874    assigned feature of the genome mapped to the text of its most recent functional
875    assignment.
876    
877    =over 4
878    
879    =item genomeID
880    
881    ID of the genome whose functional assignments are desired.
882    
883    =item RETURN
884    
885    Returns a reference to a hash which maps each feature to its most recent
886    functional assignment.
887    
888    =back
889    
890    =cut
891    
892    sub GenomeAssignments {
893        # Get the parameters.
894        my ($self, $genomeID) = @_;
895        # Declare the return variable.
896        my $retVal = {};
897        # Query the genome's features and annotations. We'll put the oldest annotations
898        # first so that the last assignment to go into the hash will be the correct one.
899        my $query = $self->Get(['HasFeature', 'IsTargetOfAnnotation', 'Annotation'],
900                               "HasFeature(from-link) = ? ORDER BY Annotation(time)",
901                               [$genomeID]);
902        # Loop through the annotations.
903        while (my $data = $query->Fetch) {
904            # Get the feature ID and annotation text.
905            my ($fid, $annotation) = $data->Values(['HasFeature(to-link)',
906                                                    'Annotation(annotation)']);
907            # Check to see if this is an assignment. Note that the user really
908            # doesn't matter to us, other than we use it to determine whether or
909            # not this is an assignment.
910            my ($user, $assignment) = _ParseAssignment('fig', $annotation);
911            if ($user) {
912                # Here it's an assignment. We put it in the return hash, overwriting
913                # any older assignment that might be present.
914                $retVal->{$fid} = $assignment;
915            }
916        }
917        # Return the result.
918        return $retVal;
919    }
920    
921  =head3 ContigLength  =head3 ContigLength
922    
923  C<< my $length = $sprout->ContigLength($contigID); >>  C<< my $length = $sprout->ContigLength($contigID); >>
# Line 1442  Line 1656 
1656  sub CoupledFeatures {  sub CoupledFeatures {
1657      # Get the parameters.      # Get the parameters.
1658      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
1659        Trace("Looking for features coupled to $featureID.") if T(coupling => 3);
1660      # Create a query to retrieve the functionally-coupled features.      # Create a query to retrieve the functionally-coupled features.
1661      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],
1662                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);
# Line 1454  Line 1669 
1669          # Get the ID and score of the coupling.          # Get the ID and score of the coupling.
1670          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',
1671                                                          'Coupling(score)']);                                                          'Coupling(score)']);
1672          # The coupling ID contains the two feature IDs separated by a space. We use          Trace("$featureID coupled with score $score to ID $couplingID.") if T(coupling => 4);
1673          # this information to find the ID of the other feature.          # Get the other feature that participates in the coupling.
1674          my ($fid1, $fid2) = split / /, $couplingID;          my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],
1675          my $otherFeatureID = ($featureID eq $fid1 ? $fid2 : $fid1);                                             "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",
1676                                               [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');
1677            Trace("$couplingID target feature is $otherFeatureID.") if T(coupling => 4);
1678          # Attach the other feature's score to its ID.          # Attach the other feature's score to its ID.
1679          $retVal{$otherFeatureID} = $score;          $retVal{$otherFeatureID} = $score;
1680          $found = 1;          $found = 1;
# Line 1590  Line 1807 
1807      my ($self, $peg1, $peg2) = @_;      my ($self, $peg1, $peg2) = @_;
1808      # Declare the return values. We'll start with the coupling ID and undefine the      # Declare the return values. We'll start with the coupling ID and undefine the
1809      # flag and score until we have more information.      # flag and score until we have more information.
1810      my ($retVal, $inverted, $score) = (CouplingID($peg1, $peg2), undef, undef);      my ($retVal, $inverted, $score) = ($self->CouplingID($peg1, $peg2), undef, undef);
1811      # Find the coupling data.      # Find the coupling data.
1812      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],
1813                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",
# Line 1611  Line 1828 
1828      return ($retVal, $inverted, $score);      return ($retVal, $inverted, $score);
1829  }  }
1830    
1831    =head3 GetSynonymGroup
1832    
1833    C<< my $id = $sprout->GetSynonymGroup($fid); >>
1834    
1835    Return the synonym group name for the specified feature.
1836    
1837    =over 4
1838    
1839    =item fid
1840    
1841    ID of the feature whose synonym group is desired.
1842    
1843    =item RETURN
1844    
1845    The name of the synonym group to which the feature belongs. If the feature does
1846    not belong to a synonym group, the feature ID itself is returned.
1847    
1848    =back
1849    
1850    =cut
1851    
1852    sub GetSynonymGroup {
1853        # Get the parameters.
1854        my ($self, $fid) = @_;
1855        # Declare the return variable.
1856        my $retVal;
1857        # Find the synonym group.
1858        my @groups = $self->GetFlat(['IsSynonymGroupFor'], "IsSynonymGroupFor(to-link) = ?",
1859                                       [$fid], 'IsSynonymGroupFor(from-link)');
1860        # Check to see if we found anything.
1861        if (@groups) {
1862            $retVal = $groups[0];
1863        } else {
1864            $retVal = $fid;
1865        }
1866        # Return the result.
1867        return $retVal;
1868    }
1869    
1870    =head3 GetBoundaries
1871    
1872    C<< my ($contig, $beg, $end) = $sprout->GetBoundaries(@locList); >>
1873    
1874    Determine the begin and end boundaries for the locations in a list. All of the
1875    locations must belong to the same contig and have mostly the same direction in
1876    order for this method to produce a meaningful result. The resulting
1877    begin/end pair will contain all of the bases in any of the locations.
1878    
1879    =over 4
1880    
1881    =item locList
1882    
1883    List of locations to process.
1884    
1885    =item RETURN
1886    
1887    Returns a 3-tuple consisting of the contig ID, the beginning boundary,
1888    and the ending boundary. The beginning boundary will be left of the
1889    end for mostly-forward locations and right of the end for mostly-backward
1890    locations.
1891    
1892    =back
1893    
1894    =cut
1895    
1896    sub GetBoundaries {
1897        # Get the parameters.
1898        my ($self, @locList) = @_;
1899        # Set up the counters used to determine the most popular direction.
1900        my %counts = ( '+' => 0, '-' => 0 );
1901        # Get the last location and parse it.
1902        my $locObject = BasicLocation->new(pop @locList);
1903        # Prime the loop with its data.
1904        my ($contig, $beg, $end) = ($locObject->Contig, $locObject->Left, $locObject->Right);
1905        # Count its direction.
1906        $counts{$locObject->Dir}++;
1907        # Loop through the remaining locations. Note that in most situations, this loop
1908        # will not iterate at all, because most of the time we will be dealing with a
1909        # singleton list.
1910        for my $loc (@locList) {
1911            # Create a location object.
1912            my $locObject = BasicLocation->new($loc);
1913            # Count the direction.
1914            $counts{$locObject->Dir}++;
1915            # Get the left end and the right end.
1916            my $left = $locObject->Left;
1917            my $right = $locObject->Right;
1918            # Merge them into the return variables.
1919            if ($left < $beg) {
1920                $beg = $left;
1921            }
1922            if ($right > $end) {
1923                $end = $right;
1924            }
1925        }
1926        # If the most common direction is reverse, flip the begin and end markers.
1927        if ($counts{'-'} > $counts{'+'}) {
1928            ($beg, $end) = ($end, $beg);
1929        }
1930        # Return the result.
1931        return ($contig, $beg, $end);
1932    }
1933    
1934  =head3 CouplingID  =head3 CouplingID
1935    
1936  C<< my $couplingID = Sprout::CouplingID($peg1, $peg2); >>  C<< my $couplingID = $sprout->CouplingID($peg1, $peg2); >>
1937    
1938  Return the coupling ID for a pair of feature IDs.  Return the coupling ID for a pair of feature IDs.
1939    
# Line 1646  Line 1966 
1966  =cut  =cut
1967  #: Return Type $;  #: Return Type $;
1968  sub CouplingID {  sub CouplingID {
1969      return join " ", sort @_;      my ($self, @pegs) = @_;
1970        return $self->DigestKey(join " ", sort @pegs);
1971  }  }
1972    
1973  =head3 ReadFasta  =head3 ReadFasta
# Line 2005  Line 2326 
2326      return @retVal;      return @retVal;
2327  }  }
2328    
 =head3 Exists  
   
 C<< my $found = $sprout->Exists($entityName, $entityID); >>  
   
 Return TRUE if an entity exists, else FALSE.  
   
 =over 4  
   
 =item entityName  
   
 Name of the entity type (e.g. C<Feature>) relevant to the existence check.  
   
 =item entityID  
   
 ID of the entity instance whose existence is to be checked.  
   
 =item RETURN  
   
 Returns TRUE if the entity instance exists, else FALSE.  
   
 =back  
   
 =cut  
 #: Return Type $;  
 sub Exists {  
     # Get the parameters.  
     my ($self, $entityName, $entityID) = @_;  
     # Check for the entity instance.  
     Trace("Checking existence of $entityName with ID=$entityID.") if T(4);  
     my $testInstance = $self->GetEntity($entityName, $entityID);  
     # Return an existence indicator.  
     my $retVal = ($testInstance ? 1 : 0);  
     return $retVal;  
 }  
   
2329  =head3 FeatureTranslation  =head3 FeatureTranslation
2330    
2331  C<< my $translation = $sprout->FeatureTranslation($featureID); >>  C<< my $translation = $sprout->FeatureTranslation($featureID); >>
# Line 2577  Line 2863 
2863      return @retVal;      return @retVal;
2864  }  }
2865    
   
   
2866  =head3 RelatedFeatures  =head3 RelatedFeatures
2867    
2868  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>
# Line 2830  Line 3114 
3114      return %retVal;      return %retVal;
3115  }  }
3116    
3117    =head3 Sims
3118    
3119    C<< my $simList = $sprout->Sims($fid, $maxN, $maxP, $select, $max_expand, $filters); >>
3120    
3121    Get a list of similarities for a specified feature. Similarity information is not kept in the
3122    Sprout database; rather, they are retrieved from a network server. The similarities are
3123    returned as B<Sim> objects. A Sim object is actually a list reference that has been blessed
3124    so that its elements can be accessed by name.
3125    
3126    Similarities can be either raw or expanded. The raw similarities are basic
3127    hits between features with similar DNA. Expanding a raw similarity drags in any
3128    features considered substantially identical. So, for example, if features B<A1>,
3129    B<A2>, and B<A3> are all substatially identical to B<A>, then a raw similarity
3130    B<[C,A]> would be expanded to B<[C,A] [C,A1] [C,A2] [C,A3]>.
3131    
3132    =over 4
3133    
3134    =item fid
3135    
3136    ID of the feature whose similarities are desired.
3137    
3138    =item maxN
3139    
3140    Maximum number of similarities to return.
3141    
3142    =item maxP
3143    
3144    Minumum allowable similarity score.
3145    
3146    =item select
3147    
3148    Selection criterion: C<raw> means only raw similarities are returned; C<fig>
3149    means only similarities to FIG features are returned; C<all> means all expanded
3150    similarities are returned; and C<figx> means similarities are expanded until the
3151    number of FIG features equals the maximum.
3152    
3153    =item max_expand
3154    
3155    The maximum number of features to expand.
3156    
3157    =item filters
3158    
3159    Reference to a hash containing filter information, or a subroutine that can be
3160    used to filter the sims.
3161    
3162    =item RETURN
3163    
3164    Returns a reference to a list of similarity objects, or C<undef> if an error
3165    occurred.
3166    
3167    =back
3168    
3169    =cut
3170    
3171    sub Sims {
3172        # Get the parameters.
3173        my ($self, $fid, $maxN, $maxP, $select, $max_expand, $filters) = @_;
3174        # Create the shim object to test for deleted FIDs.
3175        my $shim = FidCheck->new($self);
3176        # Ask the network for sims.
3177        my $retVal = FIGRules::GetNetworkSims($shim, $fid, {}, $maxN, $maxP, $select, $max_expand, $filters);
3178        # Return the result.
3179        return $retVal;
3180    }
3181    
3182  =head3 GetGroups  =head3 GetGroups
3183    
3184  C<< my %groups = $sprout->GetGroups(\@groupList); >>  C<< my %groups = $sprout->GetGroups(\@groupList); >>

Legend:
Removed from v.1.58  
changed lines
  Added in v.1.77

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3