[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.18, Tue Jun 28 21:34:15 2005 UTC revision 1.33, Wed Sep 14 13:26:27 2005 UTC
# Line 70  Line 70 
70    
71  * B<maxSequenceLength> maximum number of residues per sequence, (default C<8000>)  * B<maxSequenceLength> maximum number of residues per sequence, (default C<8000>)
72    
73    * B<noDBOpen> suppresses the connection to the database if TRUE, else FALSE
74    
75  =back  =back
76    
77  For example, the following constructor call specifies a database named I<Sprout> and a user name of  For example, the following constructor call specifies a database named I<Sprout> and a user name of
# Line 98  Line 100 
100                                                          # database connection port                                                          # database connection port
101                         maxSegmentLength => 4500,        # maximum feature segment length                         maxSegmentLength => 4500,        # maximum feature segment length
102                         maxSequenceLength => 8000,       # maximum contig sequence length                         maxSequenceLength => 8000,       # maximum contig sequence length
103                           noDBOpen     => 0,               # 1 to suppress the database open
104                        }, $options);                        }, $options);
105      # Get the data directory.      # Get the data directory.
106      my $dataDir = $optionTable->{dataDir};      my $dataDir = $optionTable->{dataDir};
# Line 105  Line 108 
108      $optionTable->{userData} =~ m!([^/]*)/(.*)$!;      $optionTable->{userData} =~ m!([^/]*)/(.*)$!;
109      my ($userName, $password) = ($1, $2);      my ($userName, $password) = ($1, $2);
110      # Connect to the database.      # Connect to the database.
111      my $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName, $password, $optionTable->{port});      my $dbh;
112        if (! $optionTable->{noDBOpen}) {
113            $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,
114                                    $password, $optionTable->{port});
115        }
116      # Create the ERDB object.      # Create the ERDB object.
117      my $xmlFileName = "$optionTable->{xmlFileName}";      my $xmlFileName = "$optionTable->{xmlFileName}";
118      my $erdb = ERDB->new($dbh, $xmlFileName);      my $erdb = ERDB->new($dbh, $xmlFileName);
# Line 576  Line 583 
583  =item RETURN  =item RETURN
584    
585  Returns a list of the feature's contig segments. The locations are returned as a list in a list  Returns a list of the feature's contig segments. The locations are returned as a list in a list
586  context and as a space-delimited string in a scalar context.  context and as a comma-delimited string in a scalar context.
587    
588  =back  =back
589    
# Line 615  Line 622 
622          }          }
623          # Remember this specifier for the adjacent-segment test the next time through.          # Remember this specifier for the adjacent-segment test the next time through.
624          ($prevContig, $prevBeg, $prevDir, $prevLen) = ($contigID, $beg, $dir, $len);          ($prevContig, $prevBeg, $prevDir, $prevLen) = ($contigID, $beg, $dir, $len);
625            # Compute the initial base pair.
626            my $start = ($dir eq "+" ? $beg : $beg + $len - 1);
627          # Add the specifier to the list.          # Add the specifier to the list.
628          push @retVal, "${contigID}_$beg$dir$len";          push @retVal, "${contigID}_$start$dir$len";
629      }      }
630      # Return the list in the format indicated by the context.      # Return the list in the format indicated by the context.
631      return (wantarray ? @retVal : join(' ', @retVal));      return (wantarray ? @retVal : join(',', @retVal));
632  }  }
633    
634  =head3 ParseLocation  =head3 ParseLocation
# Line 758  Line 767 
767          # the start point is the ending. Note that in the latter case we must reverse the DNA string          # the start point is the ending. Note that in the latter case we must reverse the DNA string
768          # before putting it in the return value.          # before putting it in the return value.
769          my ($start, $stop);          my ($start, $stop);
770            Trace("Parsed location is $beg$dir$len.") if T(SDNA => 4);
771          if ($dir eq "+") {          if ($dir eq "+") {
772              $start = $beg;              $start = $beg;
773              $stop = $beg + $len - 1;              $stop = $beg + $len - 1;
774          } else {          } else {
775              $start = $beg + $len + 1;              $start = $beg - $len + 1;
776              $stop = $beg;              $stop = $beg;
777          }          }
778            Trace("Looking for sequences containing $start through $stop.") if T(SDNA => 4);
779          my $query = $self->Get(['IsMadeUpOf','Sequence'],          my $query = $self->Get(['IsMadeUpOf','Sequence'],
780              "IsMadeUpOf(from-link) = ? AND IsMadeUpOf(start-position) + IsMadeUpOf(len) > ? AND " .              "IsMadeUpOf(from-link) = ? AND IsMadeUpOf(start-position) + IsMadeUpOf(len) > ? AND " .
781              " IsMadeUpOf(start-position) <= ? ORDER BY IsMadeUpOf(start-position)",              " IsMadeUpOf(start-position) <= ? ORDER BY IsMadeUpOf(start-position)",
# Line 776  Line 787 
787                  $sequence->Values(['IsMadeUpOf(start-position)', 'Sequence(sequence)',                  $sequence->Values(['IsMadeUpOf(start-position)', 'Sequence(sequence)',
788                                     'IsMadeUpOf(len)']);                                     'IsMadeUpOf(len)']);
789              my $stopPosition = $startPosition + $sequenceLength;              my $stopPosition = $startPosition + $sequenceLength;
790                Trace("Sequence is from $startPosition to $stopPosition.") if T(SDNA => 4);
791              # Figure out the start point and length of the relevant section.              # Figure out the start point and length of the relevant section.
792              my $pos1 = ($start < $startPosition ? 0 : $start - $startPosition);              my $pos1 = ($start < $startPosition ? 0 : $start - $startPosition);
793              my $len = ($stopPosition <= $stop ? $stopPosition : $stop) - $startPosition - $pos1;              my $len1 = ($stopPosition <= $stop ? $stopPosition : $stop) - $startPosition - $pos1;
794                Trace("Position is $pos1 for length $len1.") if T(SDNA => 4);
795              # Add the relevant data to the location data.              # Add the relevant data to the location data.
796              $locationDNA .= substr($sequenceData, $pos1, $len);              $locationDNA .= substr($sequenceData, $pos1, $len1);
797          }          }
798          # Add this location's data to the return string. Note that we may need to reverse it.          # Add this location's data to the return string. Note that we may need to reverse it.
799          if ($dir eq '+') {          if ($dir eq '+') {
800              $retVal .= $locationDNA;              $retVal .= $locationDNA;
801          } else {          } else {
802              $locationDNA = join('', reverse split //, $locationDNA);              $retVal .= FIG::reverse_comp($locationDNA);
             $retVal .= $locationDNA;  
803          }          }
804      }      }
805      # Return the result.      # Return the result.
# Line 1521  Line 1533 
1533          # Determine the ordering to place on the evidence items. If we're          # Determine the ordering to place on the evidence items. If we're
1534          # inverted, we want to see feature 2 before feature 1 (descending); otherwise,          # inverted, we want to see feature 2 before feature 1 (descending); otherwise,
1535          # we want feature 1 before feature 2 (normal).          # we want feature 1 before feature 2 (normal).
1536            Trace("Coupling evidence for ($peg1, $peg2) with inversion flag $inverted.") if T(Coupling => 4);
1537          my $ordering = ($inverted ? "DESC" : "");          my $ordering = ($inverted ? "DESC" : "");
1538          # Get the coupling evidence.          # Get the coupling evidence.
1539          my @evidenceList = $self->GetAll(['IsEvidencedBy', 'PCH', 'UsesAsEvidence'],          my @evidenceList = $self->GetAll(['IsEvidencedBy', 'PCH', 'UsesAsEvidence'],
# Line 1534  Line 1547 
1547          while (@evidenceList > 0) {          while (@evidenceList > 0) {
1548              my $peg1Data = shift @evidenceList;              my $peg1Data = shift @evidenceList;
1549              my $peg2Data = shift @evidenceList;              my $peg2Data = shift @evidenceList;
1550                Trace("Peg 1 is " . $peg1Data->[1] . " and Peg 2 is " . $peg2Data->[1] . ".") if T(Coupling => 4);
1551              push @retVal, [$peg1Data->[1], $peg2Data->[1], $peg1Data->[0]];              push @retVal, [$peg1Data->[1], $peg2Data->[1], $peg1Data->[0]];
1552          }          }
1553            Trace("Last index in evidence result is is $#retVal.") if T(Coupling => 4);
1554      }      }
1555      # Return the result.      # Return the result.
1556      return @retVal;      return @retVal;
# Line 1585  Line 1600 
1600                                   [$retVal], ["ParticipatesInCoupling(from-link)", "Coupling(score)"]);                                   [$retVal], ["ParticipatesInCoupling(from-link)", "Coupling(score)"]);
1601      # Check to see if we found anything.      # Check to see if we found anything.
1602      if (!@pegs) {      if (!@pegs) {
1603            Trace("No coupling found.") if T(Coupling => 4);
1604          # No coupling, so undefine the return value.          # No coupling, so undefine the return value.
1605          $retVal = undef;          $retVal = undef;
1606      } else {      } else {
1607          # We have a coupling! Get the score and check for inversion.          # We have a coupling! Get the score and check for inversion.
1608          $score = $pegs[0]->[1];          $score = $pegs[0]->[1];
1609          $inverted = ($pegs[0]->[0] eq $peg1);          my $firstFound = $pegs[0]->[0];
1610            $inverted = ($firstFound ne $peg1);
1611            Trace("Coupling score is $score. First peg is $firstFound, peg 1 is $peg1.") if T(Coupling => 4);
1612      }      }
1613      # Return the result.      # Return the result.
1614      return ($retVal, $inverted, $score);      return ($retVal, $inverted, $score);
# Line 1695  Line 1713 
1713          if ($line =~ m/^>\s*(.+?)(\s|\n)/) {          if ($line =~ m/^>\s*(.+?)(\s|\n)/) {
1714              # Here we have a new header. Store the current sequence if we have one.              # Here we have a new header. Store the current sequence if we have one.
1715              if ($id) {              if ($id) {
1716                  $retVal{$id} = uc $sequence;                  $retVal{$id} = lc $sequence;
1717              }              }
1718              # Clear the sequence accumulator and save the new ID.              # Clear the sequence accumulator and save the new ID.
1719              ($id, $sequence) = ("$prefix$1", "");              ($id, $sequence) = ("$prefix$1", "");
1720          } else {          } else {
1721              # Here we have a data line, so we add it to the sequence accumulator.              # Here we have a data line, so we add it to the sequence accumulator.
1722              # First, we get the actual data out. Note that we normalize to upper              # First, we get the actual data out. Note that we normalize to lower
1723              # case.              # case.
1724              $line =~ /^\s*(.*?)(\s|\n)/;              $line =~ /^\s*(.*?)(\s|\n)/;
1725              $sequence .= $1;              $sequence .= $1;
# Line 1709  Line 1727 
1727      }      }
1728      # Flush out the last sequence (if any).      # Flush out the last sequence (if any).
1729      if ($sequence) {      if ($sequence) {
1730          $retVal{$id} = uc $sequence;          $retVal{$id} = lc $sequence;
1731      }      }
1732      # Close the file.      # Close the file.
1733      close FASTAFILE;      close FASTAFILE;
# Line 2035  Line 2053 
2053      # Get the parameters.      # Get the parameters.
2054      my ($self, $entityName, $entityID) = @_;      my ($self, $entityName, $entityID) = @_;
2055      # Check for the entity instance.      # Check for the entity instance.
2056        Trace("Checking existence of $entityName with ID=$entityID.") if T(4);
2057      my $testInstance = $self->GetEntity($entityName, $entityID);      my $testInstance = $self->GetEntity($entityName, $entityID);
2058      # Return an existence indicator.      # Return an existence indicator.
2059      my $retVal = ($testInstance ? 1 : 0);      my $retVal = ($testInstance ? 1 : 0);
# Line 2226  Line 2245 
2245      return @retVal;      return @retVal;
2246  }  }
2247    
2248    =head3 GetProperties
2249    
2250    C<< my @list = $sprout->GetProperties($fid, $key, $value, $url); >>
2251    
2252    Return a list of the properties with the specified characteristics.
2253    
2254    Properties are arbitrary key-value pairs associated with a feature. (At some point they
2255    will also be associated with genomes.) A property value is represented by a 4-tuple of
2256    the form B<($fid, $key, $value, $url)>. These exactly correspond to the parameter
2257    
2258    =over 4
2259    
2260    =item fid
2261    
2262    ID of the feature possessing the property.
2263    
2264    =item key
2265    
2266    Name or key of the property.
2267    
2268    =item value
2269    
2270    Value of the property.
2271    
2272    =item url
2273    
2274    URL of the document that indicated the property should have this particular value, or an
2275    empty string if no such document exists.
2276    
2277    =back
2278    
2279    The parameters act as a filter for the desired data. Any non-null parameter will
2280    automatically match all the tuples returned. So, specifying just the I<$fid> will
2281    return all the properties of the specified feature; similarly, specifying the I<$key>
2282    and I<$value> parameters will return all the features having the specified property
2283    value.
2284    
2285    A single property key can have many values, representing different ideas about the
2286    feature in question. For example, one paper may declare that a feature C<fig|83333.1.peg.10> is
2287    virulent, and another may declare that it is not virulent. A query about the virulence of
2288    C<fig|83333.1.peg.10> would be coded as
2289    
2290        my @list = $sprout->GetProperties('fig|83333.1.peg.10', 'virulence', '', '');
2291    
2292    Here the I<$value> and I<$url> fields are left blank, indicating that those fields are
2293    not to be filtered. The tuples returned would be
2294    
2295        ('fig|83333.1.peg.10', 'virulence', 'yes', 'http://www.somewhere.edu/first.paper.pdf')
2296        ('fig|83333.1.peg.10', 'virulence', 'no', 'http://www.somewhere.edu/second.paper.pdf')
2297    
2298    =cut
2299    #: Return Type @@;
2300    sub GetProperties {
2301        # Get the parameters.
2302        my ($self, @parms) = @_;
2303        # Declare the return variable.
2304        my @retVal = ();
2305        # Now we need to create a WHERE clause that will get us the data we want. First,
2306        # we create a list of the columns containing the data for each parameter.
2307        my @colNames = ('HasProperty(from-link)', 'Property(property-name)',
2308                        'Property(property-value)', 'HasProperty(evidence)');
2309        # Now we build the WHERE clause and the list of parameter values.
2310        my @where = ();
2311        my @values = ();
2312        for (my $i = 0; $i <= $#colNames; $i++) {
2313            my $parm = $parms[$i];
2314            if (defined $parm && ($parm ne '')) {
2315                push @where, "$colNames[$i] = ?";
2316                push @values, $parm;
2317            }
2318        }
2319        # Format the WHERE clause.
2320        my $filter = (@values > 0 ? (join " AND ", @where) : undef);
2321        # Ask for all the propertie values with the desired characteristics.
2322        my $query = $self->Get(['HasProperty', 'Property'], $filter, \@values);
2323        while (my $valueObject = $query->Fetch()) {
2324            my @tuple = $valueObject->Values(\@colNames);
2325            push @retVal, \@tuple;
2326        }
2327        # Return the result.
2328        return @retVal;
2329    }
2330    
2331  =head3 FeatureProperties  =head3 FeatureProperties
2332    
2333  C<< my @properties = $sprout->FeatureProperties($featureID); >>  C<< my @properties = $sprout->FeatureProperties($featureID); >>
# Line 2420  Line 2522 
2522  C<< my %subsystems = $sprout->SubsystemsOf($featureID); >>  C<< my %subsystems = $sprout->SubsystemsOf($featureID); >>
2523    
2524  Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped  Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped
2525  to the role the feature performs.  to the roles the feature performs.
2526    
2527  =over 4  =over 4
2528    
# Line 2430  Line 2532 
2532    
2533  =item RETURN  =item RETURN
2534    
2535  Returns a hash mapping all the feature's subsystems to the feature's role.  Returns a hash mapping all the feature's subsystems to a list of the feature's roles.
2536    
2537  =back  =back
2538    
2539  =cut  =cut
2540  #: Return Type %;  #: Return Type %@;
2541  sub SubsystemsOf {  sub SubsystemsOf {
2542      # Get the parameters.      # Get the parameters.
2543      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
# Line 2447  Line 2549 
2549      my %retVal = ();      my %retVal = ();
2550      # Loop through the results, adding them to the hash.      # Loop through the results, adding them to the hash.
2551      for my $record (@subsystems) {      for my $record (@subsystems) {
2552          $retVal{$record->[0]} = $record->[1];          my ($subsys, $role) = @{$record};
2553            if (exists $retVal{$subsys}) {
2554                push @{$retVal{$subsys}}, $role;
2555            } else {
2556                $retVal{$subsys} = [$role];
2557            }
2558      }      }
2559      # Return the hash.      # Return the hash.
2560      return %retVal;      return %retVal;
# Line 3108  Line 3215 
3215      $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });      $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });
3216  }  }
3217    
3218    
3219    
3220  1;  1;

Legend:
Removed from v.1.18  
changed lines
  Added in v.1.33

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3