[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.59, Wed Jun 7 01:53:03 2006 UTC revision 1.75, Sun Jun 25 18:02:35 2006 UTC
# Line 12  Line 12 
12      use DBObject;      use DBObject;
13      use Tracer;      use Tracer;
14      use FIGRules;      use FIGRules;
15        use FidCheck;
16      use Stats;      use Stats;
17      use POSIX qw(strftime);      use POSIX qw(strftime);
18    
# Line 375  Line 376 
376      # Get the genomes.      # Get the genomes.
377      my @genomes = $self->GetAll(['Genome'], $filterString, $params, ['Genome(id)',      my @genomes = $self->GetAll(['Genome'], $filterString, $params, ['Genome(id)',
378                                                                       'Genome(genus)',                                                                       'Genome(genus)',
379                                                                       'Genome(species)']);                                                                       'Genome(species)',
380                                                                         'Genome(unique-characterization)']);
381      # Sort them by name.      # Sort them by name.
382      my @sorted = sort { lc("$a->[1] $a->[2]") cmp lc("$b->[1] $b->[2]") } @genomes;      my @sorted = sort { lc("$a->[1] $a->[2]") cmp lc("$b->[1] $b->[2]") } @genomes;
383      # Loop through the genomes, creating the option tags.      # Loop through the genomes, creating the option tags.
384      for my $genomeData (@sorted) {      for my $genomeData (@sorted) {
385          # Get the data for this genome.          # Get the data for this genome.
386          my ($genomeID, $genus, $species) = @{$genomeData};          my ($genomeID, $genus, $species, $strain) = @{$genomeData};
387          # Get the contig count.          # Get the contig count.
388          my $count = $self->ContigCount($genomeID);          my $count = $self->ContigCount($genomeID);
389          my $counting = ($count == 1 ? "contig" : "contigs");          my $counting = ($count == 1 ? "contig" : "contigs");
390          # Build the option tag.          # Build the option tag.
391          $retVal .= "<option value=\"$genomeID\">$genus $species ($genomeID) [$count $counting]</option>\n";          $retVal .= "<option value=\"$genomeID\">$genus $species $strain ($genomeID) [$count $counting]</option>\n";
392            Trace("Option tag built for $genomeID: $genus $species $strain.") if T(3);
393      }      }
394      # Close the SELECT tag.      # Close the SELECT tag.
395      $retVal .= "</select>\n";      $retVal .= "</select>\n";
# Line 793  Line 796 
796      return @retVal;      return @retVal;
797  }  }
798    
799    =head3 GenomeLength
800    
801    C<< my $length = $sprout->GenomeLength($genomeID); >>
802    
803    Return the length of the specified genome in base pairs.
804    
805    =over 4
806    
807    =item genomeID
808    
809    ID of the genome whose base pair count is desired.
810    
811    =item RETURN
812    
813    Returns the number of base pairs in all the contigs of the specified
814    genome.
815    
816    =back
817    
818    =cut
819    
820    sub GenomeLength {
821        # Get the parameters.
822        my ($self, $genomeID) = @_;
823        # Declare the return variable.
824        my $retVal = 0;
825        # Get the genome's contig sequence lengths.
826        my @lens = $self->GetFlat(['HasContig', 'IsMadeUpOf'], 'HasContig(from-link) = ?',
827                           [$genomeID], 'IsMadeUpOf(len)');
828        # Sum the lengths.
829        map { $retVal += $_ } @lens;
830        # Return the result.
831        return $retVal;
832    }
833    
834    =head3 FeatureCount
835    
836    C<< my $count = $sprout->FeatureCount($genomeID, $type); >>
837    
838    Return the number of features of the specified type in the specified genome.
839    
840    =over 4
841    
842    =item genomeID
843    
844    ID of the genome whose feature count is desired.
845    
846    =item type
847    
848    Type of feature to count (eg. C<peg>, C<rna>, etc.).
849    
850    =item RETURN
851    
852    Returns the number of features of the specified type for the specified genome.
853    
854    =back
855    
856    =cut
857    
858    sub FeatureCount {
859        # Get the parameters.
860        my ($self, $genomeID, $type) = @_;
861        # Compute the count.
862        my $retVal = $self->GetCount(['HasFeature', 'Feature'],
863                                    "HasFeature(from-link) = ? AND Feature(feature-type) = ?",
864                                    [$genomeID, $type]);
865        # Return the result.
866        return $retVal;
867    }
868    
869    =head3 GenomeAssignments
870    
871    C<< my $fidHash = $sprout->GenomeAssignments($genomeID); >>
872    
873    Return a list of a genome's assigned features. The return hash will contain each
874    assigned feature of the genome mapped to the text of its most recent functional
875    assignment.
876    
877    =over 4
878    
879    =item genomeID
880    
881    ID of the genome whose functional assignments are desired.
882    
883    =item RETURN
884    
885    Returns a reference to a hash which maps each feature to its most recent
886    functional assignment.
887    
888    =back
889    
890    =cut
891    
892    sub GenomeAssignments {
893        # Get the parameters.
894        my ($self, $genomeID) = @_;
895        # Declare the return variable.
896        my $retVal = {};
897        # Query the genome's features and annotations. We'll put the oldest annotations
898        # first so that the last assignment to go into the hash will be the correct one.
899        my $query = $self->Get(['HasFeature', 'IsTargetOfAnnotation', 'Annotation'],
900                               "HasFeature(from-link) = ? ORDER BY Annotation(time)",
901                               [$genomeID]);
902        # Loop through the annotations.
903        while (my $data = $query->Fetch) {
904            # Get the feature ID and annotation text.
905            my ($fid, $annotation) = $data->Values(['HasFeature(to-link)',
906                                                    'Annotation(annotation)']);
907            # Check to see if this is an assignment. Note that the user really
908            # doesn't matter to us, other than we use it to determine whether or
909            # not this is an assignment.
910            my ($user, $assignment) = _ParseAssignment('fig', $annotation);
911            if ($user) {
912                # Here it's an assignment. We put it in the return hash, overwriting
913                # any older assignment that might be present.
914                $retVal->{$fid} = $assignment;
915            }
916        }
917        # Return the result.
918        return $retVal;
919    }
920    
921  =head3 ContigLength  =head3 ContigLength
922    
923  C<< my $length = $sprout->ContigLength($contigID); >>  C<< my $length = $sprout->ContigLength($contigID); >>
# Line 1543  Line 1668 
1668          # Get the ID and score of the coupling.          # Get the ID and score of the coupling.
1669          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',
1670                                                          'Coupling(score)']);                                                          'Coupling(score)']);
1671          # The coupling ID contains the two feature IDs separated by a space. We use          # Get the other feature that participates in the coupling.
1672          # this information to find the ID of the other feature.          my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],
1673          my ($fid1, $fid2) = split / /, $couplingID;                                             "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",
1674          my $otherFeatureID = ($featureID eq $fid1 ? $fid2 : $fid1);                                             [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');
1675          # Attach the other feature's score to its ID.          # Attach the other feature's score to its ID.
1676          $retVal{$otherFeatureID} = $score;          $retVal{$otherFeatureID} = $score;
1677          $found = 1;          $found = 1;
# Line 1679  Line 1804 
1804      my ($self, $peg1, $peg2) = @_;      my ($self, $peg1, $peg2) = @_;
1805      # Declare the return values. We'll start with the coupling ID and undefine the      # Declare the return values. We'll start with the coupling ID and undefine the
1806      # flag and score until we have more information.      # flag and score until we have more information.
1807      my ($retVal, $inverted, $score) = (CouplingID($peg1, $peg2), undef, undef);      my ($retVal, $inverted, $score) = ($self->CouplingID($peg1, $peg2), undef, undef);
1808      # Find the coupling data.      # Find the coupling data.
1809      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],
1810                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",
# Line 1700  Line 1825 
1825      return ($retVal, $inverted, $score);      return ($retVal, $inverted, $score);
1826  }  }
1827    
1828    =head3 GetSynonymGroup
1829    
1830    C<< my $id = $sprout->GetSynonymGroup($fid); >>
1831    
1832    Return the synonym group name for the specified feature.
1833    
1834    =over 4
1835    
1836    =item fid
1837    
1838    ID of the feature whose synonym group is desired.
1839    
1840    =item RETURN
1841    
1842    The name of the synonym group to which the feature belongs. If the feature does
1843    not belong to a synonym group, the feature ID itself is returned.
1844    
1845    =back
1846    
1847    =cut
1848    
1849    sub GetSynonymGroup {
1850        # Get the parameters.
1851        my ($self, $fid) = @_;
1852        # Declare the return variable.
1853        my $retVal;
1854        # Find the synonym group.
1855        my @groups = $self->GetFlat(['IsSynonymGroupFor'], "IsSynonymGroupFor(to-link) = ?",
1856                                       [$fid], 'IsSynonymGroupFor(from-link)');
1857        # Check to see if we found anything.
1858        if (@groups) {
1859            $retVal = $groups[0];
1860        } else {
1861            $retVal = $fid;
1862        }
1863        # Return the result.
1864        return $retVal;
1865    }
1866    
1867    =head3 GetBoundaries
1868    
1869    C<< my ($contig, $beg, $end) = $sprout->GetBoundaries(@locList); >>
1870    
1871    Determine the begin and end boundaries for the locations in a list. All of the
1872    locations must belong to the same contig and have mostly the same direction in
1873    order for this method to produce a meaningful result. The resulting
1874    begin/end pair will contain all of the bases in any of the locations.
1875    
1876    =over 4
1877    
1878    =item locList
1879    
1880    List of locations to process.
1881    
1882    =item RETURN
1883    
1884    Returns a 3-tuple consisting of the contig ID, the beginning boundary,
1885    and the ending boundary. The beginning boundary will be left of the
1886    end for mostly-forward locations and right of the end for mostly-backward
1887    locations.
1888    
1889    =back
1890    
1891    =cut
1892    
1893    sub GetBoundaries {
1894        # Get the parameters.
1895        my ($self, @locList) = @_;
1896        # Set up the counters used to determine the most popular direction.
1897        my %counts = ( '+' => 0, '-' => 0 );
1898        # Get the last location and parse it.
1899        my $locObject = BasicLocation->new(pop @locList);
1900        # Prime the loop with its data.
1901        my ($contig, $beg, $end) = ($locObject->Contig, $locObject->Left, $locObject->Right);
1902        # Count its direction.
1903        $counts{$locObject->Dir}++;
1904        # Loop through the remaining locations. Note that in most situations, this loop
1905        # will not iterate at all, because most of the time we will be dealing with a
1906        # singleton list.
1907        for my $loc (@locList) {
1908            # Create a location object.
1909            my $locObject = BasicLocation->new($loc);
1910            # Count the direction.
1911            $counts{$locObject->Dir}++;
1912            # Get the left end and the right end.
1913            my $left = $locObject->Left;
1914            my $right = $locObject->Right;
1915            # Merge them into the return variables.
1916            if ($left < $beg) {
1917                $beg = $left;
1918            }
1919            if ($right > $end) {
1920                $end = $right;
1921            }
1922        }
1923        # If the most common direction is reverse, flip the begin and end markers.
1924        if ($counts{'-'} > $counts{'+'}) {
1925            ($beg, $end) = ($end, $beg);
1926        }
1927        # Return the result.
1928        return ($contig, $beg, $end);
1929    }
1930    
1931  =head3 CouplingID  =head3 CouplingID
1932    
1933  C<< my $couplingID = Sprout::CouplingID($peg1, $peg2); >>  C<< my $couplingID = $sprout->CouplingID($peg1, $peg2); >>
1934    
1935  Return the coupling ID for a pair of feature IDs.  Return the coupling ID for a pair of feature IDs.
1936    
# Line 1735  Line 1963 
1963  =cut  =cut
1964  #: Return Type $;  #: Return Type $;
1965  sub CouplingID {  sub CouplingID {
1966      return join " ", sort @_;      my ($self, @pegs) = @_;
1967        return $self->DigestKey(join " ", sort @pegs);
1968  }  }
1969    
1970  =head3 ReadFasta  =head3 ReadFasta
# Line 2094  Line 2323 
2323      return @retVal;      return @retVal;
2324  }  }
2325    
 =head3 Exists  
   
 C<< my $found = $sprout->Exists($entityName, $entityID); >>  
   
 Return TRUE if an entity exists, else FALSE.  
   
 =over 4  
   
 =item entityName  
   
 Name of the entity type (e.g. C<Feature>) relevant to the existence check.  
   
 =item entityID  
   
 ID of the entity instance whose existence is to be checked.  
   
 =item RETURN  
   
 Returns TRUE if the entity instance exists, else FALSE.  
   
 =back  
   
 =cut  
 #: Return Type $;  
 sub Exists {  
     # Get the parameters.  
     my ($self, $entityName, $entityID) = @_;  
     # Check for the entity instance.  
     Trace("Checking existence of $entityName with ID=$entityID.") if T(4);  
     my $testInstance = $self->GetEntity($entityName, $entityID);  
     # Return an existence indicator.  
     my $retVal = ($testInstance ? 1 : 0);  
     return $retVal;  
 }  
   
2326  =head3 FeatureTranslation  =head3 FeatureTranslation
2327    
2328  C<< my $translation = $sprout->FeatureTranslation($featureID); >>  C<< my $translation = $sprout->FeatureTranslation($featureID); >>
# Line 2666  Line 2860 
2860      return @retVal;      return @retVal;
2861  }  }
2862    
   
   
2863  =head3 RelatedFeatures  =head3 RelatedFeatures
2864    
2865  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>
# Line 2919  Line 3111 
3111      return %retVal;      return %retVal;
3112  }  }
3113    
3114    =head3 Sims
3115    
3116    C<< my $simList = $sprout->Sims($fid, $maxN, $maxP, $select, $max_expand, $filters); >>
3117    
3118    Get a list of similarities for a specified feature. Similarity information is not kept in the
3119    Sprout database; rather, they are retrieved from a network server. The similarities are
3120    returned as B<Sim> objects. A Sim object is actually a list reference that has been blessed
3121    so that its elements can be accessed by name.
3122    
3123    Similarities can be either raw or expanded. The raw similarities are basic
3124    hits between features with similar DNA. Expanding a raw similarity drags in any
3125    features considered substantially identical. So, for example, if features B<A1>,
3126    B<A2>, and B<A3> are all substatially identical to B<A>, then a raw similarity
3127    B<[C,A]> would be expanded to B<[C,A] [C,A1] [C,A2] [C,A3]>.
3128    
3129    =over 4
3130    
3131    =item fid
3132    
3133    ID of the feature whose similarities are desired.
3134    
3135    =item maxN
3136    
3137    Maximum number of similarities to return.
3138    
3139    =item maxP
3140    
3141    Minumum allowable similarity score.
3142    
3143    =item select
3144    
3145    Selection criterion: C<raw> means only raw similarities are returned; C<fig>
3146    means only similarities to FIG features are returned; C<all> means all expanded
3147    similarities are returned; and C<figx> means similarities are expanded until the
3148    number of FIG features equals the maximum.
3149    
3150    =item max_expand
3151    
3152    The maximum number of features to expand.
3153    
3154    =item filters
3155    
3156    Reference to a hash containing filter information, or a subroutine that can be
3157    used to filter the sims.
3158    
3159    =item RETURN
3160    
3161    Returns a reference to a list of similarity objects, or C<undef> if an error
3162    occurred.
3163    
3164    =back
3165    
3166    =cut
3167    
3168    sub Sims {
3169        # Get the parameters.
3170        my ($self, $fid, $maxN, $maxP, $select, $max_expand, $filters) = @_;
3171        # Create the shim object to test for deleted FIDs.
3172        my $shim = FidCheck->new($self);
3173        # Ask the network for sims.
3174        my $retVal = FIGRules::GetNetworkSims($shim, $fid, {}, $maxN, $maxP, $select, $max_expand, $filters);
3175        # Return the result.
3176        return $retVal;
3177    }
3178    
3179  =head3 GetGroups  =head3 GetGroups
3180    
3181  C<< my %groups = $sprout->GetGroups(\@groupList); >>  C<< my %groups = $sprout->GetGroups(\@groupList); >>

Legend:
Removed from v.1.59  
changed lines
  Added in v.1.75

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3