[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.72, Sun Jun 25 02:37:04 2006 UTC revision 1.80, Thu Aug 10 07:34:37 2006 UTC
# Line 15  Line 15 
15      use FidCheck;      use FidCheck;
16      use Stats;      use Stats;
17      use POSIX qw(strftime);      use POSIX qw(strftime);
18        use BasicLocation;
19    
20  =head1 Sprout Database Manipulation Object  =head1 Sprout Database Manipulation Object
21    
# Line 92  Line 92 
92  sub new {  sub new {
93      # Get the parameters.      # Get the parameters.
94      my ($class, $dbName, $options) = @_;      my ($class, $dbName, $options) = @_;
95        # Compute the DBD directory.
96        my $dbd_dir = (defined($FIG_Config::dbd_dir) ? $FIG_Config::dbd_dir :
97                                                      $FIG_Config::fig );
98      # Compute the options. We do this by starting with a table of defaults and overwriting with      # Compute the options. We do this by starting with a table of defaults and overwriting with
99      # the incoming data.      # the incoming data.
100      my $optionTable = Tracer::GetOptions({      my $optionTable = Tracer::GetOptions({
# Line 99  Line 102 
102                                                          # database type                                                          # database type
103                         dataDir      => $FIG_Config::sproutData,                         dataDir      => $FIG_Config::sproutData,
104                                                          # data file directory                                                          # data file directory
105                         xmlFileName  => "$FIG_Config::fig/SproutDBD.xml",                         xmlFileName  => "$dbd_dir/SproutDBD.xml",
106                                                          # database definition file name                                                          # database definition file name
107                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",
108                                                          # user name and password                                                          # user name and password
109                         port         => $FIG_Config::dbport,                         port         => $FIG_Config::dbport,
110                                                          # database connection port                                                          # database connection port
111                         sock         => $FIG_Config::dbsock,                         sock         => $FIG_Config::dbsock,
112                           host         => $FIG_Config::dbhost,
113                         maxSegmentLength => 4500,        # maximum feature segment length                         maxSegmentLength => 4500,        # maximum feature segment length
114                         maxSequenceLength => 8000,       # maximum contig sequence length                         maxSequenceLength => 8000,       # maximum contig sequence length
115                         noDBOpen     => 0,               # 1 to suppress the database open                         noDBOpen     => 0,               # 1 to suppress the database open
# Line 119  Line 123 
123      my $dbh;      my $dbh;
124      if (! $optionTable->{noDBOpen}) {      if (! $optionTable->{noDBOpen}) {
125          $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,          $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,
126                                  $password, $optionTable->{port}, undef, $optionTable->{sock});                                  $password, $optionTable->{port}, $optionTable->{host}, $optionTable->{sock});
127      }      }
128      # Create the ERDB object.      # Create the ERDB object.
129      my $xmlFileName = "$optionTable->{xmlFileName}";      my $xmlFileName = "$optionTable->{xmlFileName}";
# Line 1656  Line 1660 
1660  sub CoupledFeatures {  sub CoupledFeatures {
1661      # Get the parameters.      # Get the parameters.
1662      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
1663        Trace("Looking for features coupled to $featureID.") if T(coupling => 3);
1664      # Create a query to retrieve the functionally-coupled features.      # Create a query to retrieve the functionally-coupled features.
1665      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],
1666                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);
# Line 1668  Line 1673 
1673          # Get the ID and score of the coupling.          # Get the ID and score of the coupling.
1674          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',
1675                                                          'Coupling(score)']);                                                          'Coupling(score)']);
1676            Trace("$featureID coupled with score $score to ID $couplingID.") if T(coupling => 4);
1677          # Get the other feature that participates in the coupling.          # Get the other feature that participates in the coupling.
1678          my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],          my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],
1679                                             "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",                                             "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",
1680                                             [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');                                             [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');
1681            Trace("$couplingID target feature is $otherFeatureID.") if T(coupling => 4);
1682          # Attach the other feature's score to its ID.          # Attach the other feature's score to its ID.
1683          $retVal{$otherFeatureID} = $score;          $retVal{$otherFeatureID} = $score;
1684          $found = 1;          $found = 1;
# Line 1804  Line 1811 
1811      my ($self, $peg1, $peg2) = @_;      my ($self, $peg1, $peg2) = @_;
1812      # Declare the return values. We'll start with the coupling ID and undefine the      # Declare the return values. We'll start with the coupling ID and undefine the
1813      # flag and score until we have more information.      # flag and score until we have more information.
1814      my ($retVal, $inverted, $score) = (CouplingID($peg1, $peg2), undef, undef);      my ($retVal, $inverted, $score) = ($self->CouplingID($peg1, $peg2), undef, undef);
1815      # Find the coupling data.      # Find the coupling data.
1816      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],
1817                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",
# Line 1825  Line 1832 
1832      return ($retVal, $inverted, $score);      return ($retVal, $inverted, $score);
1833  }  }
1834    
1835    =head3 GetSynonymGroup
1836    
1837    C<< my $id = $sprout->GetSynonymGroup($fid); >>
1838    
1839    Return the synonym group name for the specified feature.
1840    
1841    =over 4
1842    
1843    =item fid
1844    
1845    ID of the feature whose synonym group is desired.
1846    
1847    =item RETURN
1848    
1849    The name of the synonym group to which the feature belongs. If the feature does
1850    not belong to a synonym group, the feature ID itself is returned.
1851    
1852    =back
1853    
1854    =cut
1855    
1856    sub GetSynonymGroup {
1857        # Get the parameters.
1858        my ($self, $fid) = @_;
1859        # Declare the return variable.
1860        my $retVal;
1861        # Find the synonym group.
1862        my @groups = $self->GetFlat(['IsSynonymGroupFor'], "IsSynonymGroupFor(to-link) = ?",
1863                                       [$fid], 'IsSynonymGroupFor(from-link)');
1864        # Check to see if we found anything.
1865        if (@groups) {
1866            $retVal = $groups[0];
1867        } else {
1868            $retVal = $fid;
1869        }
1870        # Return the result.
1871        return $retVal;
1872    }
1873    
1874    =head3 GetBoundaries
1875    
1876    C<< my ($contig, $beg, $end) = $sprout->GetBoundaries(@locList); >>
1877    
1878    Determine the begin and end boundaries for the locations in a list. All of the
1879    locations must belong to the same contig and have mostly the same direction in
1880    order for this method to produce a meaningful result. The resulting
1881    begin/end pair will contain all of the bases in any of the locations.
1882    
1883    =over 4
1884    
1885    =item locList
1886    
1887    List of locations to process.
1888    
1889    =item RETURN
1890    
1891    Returns a 3-tuple consisting of the contig ID, the beginning boundary,
1892    and the ending boundary. The beginning boundary will be left of the
1893    end for mostly-forward locations and right of the end for mostly-backward
1894    locations.
1895    
1896    =back
1897    
1898    =cut
1899    
1900    sub GetBoundaries {
1901        # Get the parameters.
1902        my ($self, @locList) = @_;
1903        # Set up the counters used to determine the most popular direction.
1904        my %counts = ( '+' => 0, '-' => 0 );
1905        # Get the last location and parse it.
1906        my $locObject = BasicLocation->new(pop @locList);
1907        # Prime the loop with its data.
1908        my ($contig, $beg, $end) = ($locObject->Contig, $locObject->Left, $locObject->Right);
1909        # Count its direction.
1910        $counts{$locObject->Dir}++;
1911        # Loop through the remaining locations. Note that in most situations, this loop
1912        # will not iterate at all, because most of the time we will be dealing with a
1913        # singleton list.
1914        for my $loc (@locList) {
1915            # Create a location object.
1916            my $locObject = BasicLocation->new($loc);
1917            # Count the direction.
1918            $counts{$locObject->Dir}++;
1919            # Get the left end and the right end.
1920            my $left = $locObject->Left;
1921            my $right = $locObject->Right;
1922            # Merge them into the return variables.
1923            if ($left < $beg) {
1924                $beg = $left;
1925            }
1926            if ($right > $end) {
1927                $end = $right;
1928            }
1929        }
1930        # If the most common direction is reverse, flip the begin and end markers.
1931        if ($counts{'-'} > $counts{'+'}) {
1932            ($beg, $end) = ($end, $beg);
1933        }
1934        # Return the result.
1935        return ($contig, $beg, $end);
1936    }
1937    
1938  =head3 CouplingID  =head3 CouplingID
1939    
1940  C<< my $couplingID = Sprout::CouplingID($peg1, $peg2); >>  C<< my $couplingID = $sprout->CouplingID($peg1, $peg2); >>
1941    
1942  Return the coupling ID for a pair of feature IDs.  Return the coupling ID for a pair of feature IDs.
1943    
# Line 1860  Line 1970 
1970  =cut  =cut
1971  #: Return Type $;  #: Return Type $;
1972  sub CouplingID {  sub CouplingID {
1973      return DigestKey(join " ", sort @_);      my ($self, @pegs) = @_;
1974        return $self->DigestKey(join " ", sort @pegs);
1975  }  }
1976    
1977  =head3 ReadFasta  =head3 ReadFasta
# Line 2756  Line 2867 
2867      return @retVal;      return @retVal;
2868  }  }
2869    
2870    =head3 GenomeSubsystemData
2871    
2872    C<< my %featureData = $sprout->GenomeSubsystemData($genomeID); >>
2873    
2874    Return a hash mapping genome features to their subsystem roles.
2875    
2876    =over 4
2877    
2878    =item genomeID
2879    
2880    ID of the genome whose subsystem feature map is desired.
2881    
2882    =item RETURN
2883    
2884    Returns a hash mapping each feature of the genome to a list of 2-tuples. Eacb
2885    2-tuple contains a subsystem name followed by a role ID.
2886    
2887    =back
2888    
2889    =cut
2890    
2891    sub GenomeSubsystemData {
2892        # Get the parameters.
2893        my ($self, $genomeID) = @_;
2894        # Declare the return variable.
2895        my %retVal = ();
2896        # Get a list of the genome features that participate in subsystems. For each
2897        # feature we get its spreadsheet cells and the corresponding roles.
2898        my @roleData = $self->GetAll(['HasFeature', 'ContainsFeature', 'IsRoleOf'],
2899                                 "HasFeature(from-link) = ?", [$genomeID],
2900                                 ['HasFeature(to-link)', 'IsRoleOf(to-link)', 'IsRoleOf(from-link)']);
2901        # Now we get a list of the spreadsheet cells and their associated subsystems. Subsystems
2902        # with an unknown variant code (-1) are skipped. Note the genome ID is at both ends of the
2903        # list. We use it at the beginning to get all the spreadsheet cells for the genome and
2904        # again at the end to filter out participation in subsystems with a negative variant code.
2905        my @cellData = $self->GetAll(['IsGenomeOf', 'HasSSCell', 'ParticipatesIn'],
2906                                     "IsGenomeOf(from-link) = ? AND ParticipatesIn(variant-code) >= 0 AND ParticipatesIn(from-link) = ?",
2907                                     [$genomeID, $genomeID], ['HasSSCell(to-link)', 'HasSSCell(from-link)']);
2908        # Now "@roleData" lists the spreadsheet cell and role for each of the genome's features.
2909        # "@cellData" lists the subsystem name for each of the genome's spreadsheet cells. We
2910        # link these two lists together to create the result. First, we want a hash mapping
2911        # spreadsheet cells to subsystem names.
2912        my %subHash = map { $_->[0] => $_->[1] } @cellData;
2913        # We loop through @cellData to build the hash.
2914        for my $roleEntry (@roleData) {
2915            # Get the data for this feature and cell.
2916            my ($fid, $cellID, $role) = @{$roleEntry};
2917            # Check for a subsystem name.
2918            my $subsys = $subHash{$cellID};
2919            if ($subsys) {
2920                # Insure this feature has an entry in the return hash.
2921                if (! exists $retVal{$fid}) { $retVal{$fid} = []; }
2922                # Merge in this new data.
2923                push @{$retVal{$fid}}, [$subsys, $role];
2924            }
2925        }
2926        # Return the result.
2927        return %retVal;
2928    }
2929    
2930  =head3 RelatedFeatures  =head3 RelatedFeatures
2931    
2932  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>
# Line 3285  Line 3456 
3456      }      }
3457      # If we have an assignment, we need to clean the function text. There may be      # If we have an assignment, we need to clean the function text. There may be
3458      # extra junk at the end added as a note from the user.      # extra junk at the end added as a note from the user.
3459      if (@retVal) {      if (@retVal > 1) {
3460          $retVal[1] =~ s/(\t\S)?\s*$//;          $retVal[1] =~ s/(\t\S)?\s*$//;
3461      }      }
3462      # Return the result list.      # Return the result list.

Legend:
Removed from v.1.72  
changed lines
  Added in v.1.80

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3