[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.18, Tue Jun 28 21:34:15 2005 UTC revision 1.29, Wed Sep 14 13:06:53 2005 UTC
# Line 70  Line 70 
70    
71  * B<maxSequenceLength> maximum number of residues per sequence, (default C<8000>)  * B<maxSequenceLength> maximum number of residues per sequence, (default C<8000>)
72    
73    * B<noDBOpen> suppresses the connection to the database if TRUE, else FALSE
74    
75  =back  =back
76    
77  For example, the following constructor call specifies a database named I<Sprout> and a user name of  For example, the following constructor call specifies a database named I<Sprout> and a user name of
# Line 98  Line 100 
100                                                          # database connection port                                                          # database connection port
101                         maxSegmentLength => 4500,        # maximum feature segment length                         maxSegmentLength => 4500,        # maximum feature segment length
102                         maxSequenceLength => 8000,       # maximum contig sequence length                         maxSequenceLength => 8000,       # maximum contig sequence length
103                           noDBOpen     => 0,               # 1 to suppress the database open
104                        }, $options);                        }, $options);
105      # Get the data directory.      # Get the data directory.
106      my $dataDir = $optionTable->{dataDir};      my $dataDir = $optionTable->{dataDir};
# Line 105  Line 108 
108      $optionTable->{userData} =~ m!([^/]*)/(.*)$!;      $optionTable->{userData} =~ m!([^/]*)/(.*)$!;
109      my ($userName, $password) = ($1, $2);      my ($userName, $password) = ($1, $2);
110      # Connect to the database.      # Connect to the database.
111      my $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName, $password, $optionTable->{port});      my $dbh;
112        if (! $optionTable->{noDBOpen}) {
113            $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,
114                                    $password, $optionTable->{port});
115        }
116      # Create the ERDB object.      # Create the ERDB object.
117      my $xmlFileName = "$optionTable->{xmlFileName}";      my $xmlFileName = "$optionTable->{xmlFileName}";
118      my $erdb = ERDB->new($dbh, $xmlFileName);      my $erdb = ERDB->new($dbh, $xmlFileName);
# Line 576  Line 583 
583  =item RETURN  =item RETURN
584    
585  Returns a list of the feature's contig segments. The locations are returned as a list in a list  Returns a list of the feature's contig segments. The locations are returned as a list in a list
586  context and as a space-delimited string in a scalar context.  context and as a comma-delimited string in a scalar context.
587    
588  =back  =back
589    
# Line 619  Line 626 
626          push @retVal, "${contigID}_$beg$dir$len";          push @retVal, "${contigID}_$beg$dir$len";
627      }      }
628      # Return the list in the format indicated by the context.      # Return the list in the format indicated by the context.
629      return (wantarray ? @retVal : join(' ', @retVal));      return (wantarray ? @retVal : join(',', @retVal));
630  }  }
631    
632  =head3 ParseLocation  =head3 ParseLocation
# Line 762  Line 769 
769              $start = $beg;              $start = $beg;
770              $stop = $beg + $len - 1;              $stop = $beg + $len - 1;
771          } else {          } else {
772              $start = $beg + $len + 1;              $start = $beg - $len + 1;
773              $stop = $beg;              $stop = $beg;
774          }          }
775            Trace("Looking for sequences containing $start through $stop.") if T(SDNA => 4);
776          my $query = $self->Get(['IsMadeUpOf','Sequence'],          my $query = $self->Get(['IsMadeUpOf','Sequence'],
777              "IsMadeUpOf(from-link) = ? AND IsMadeUpOf(start-position) + IsMadeUpOf(len) > ? AND " .              "IsMadeUpOf(from-link) = ? AND IsMadeUpOf(start-position) + IsMadeUpOf(len) > ? AND " .
778              " IsMadeUpOf(start-position) <= ? ORDER BY IsMadeUpOf(start-position)",              " IsMadeUpOf(start-position) <= ? ORDER BY IsMadeUpOf(start-position)",
# Line 776  Line 784 
784                  $sequence->Values(['IsMadeUpOf(start-position)', 'Sequence(sequence)',                  $sequence->Values(['IsMadeUpOf(start-position)', 'Sequence(sequence)',
785                                     'IsMadeUpOf(len)']);                                     'IsMadeUpOf(len)']);
786              my $stopPosition = $startPosition + $sequenceLength;              my $stopPosition = $startPosition + $sequenceLength;
787                Trace("Sequence is from $startPosition to $stopPosition.") if T(SDNA => 4);
788              # Figure out the start point and length of the relevant section.              # Figure out the start point and length of the relevant section.
789              my $pos1 = ($start < $startPosition ? 0 : $start - $startPosition);              my $pos1 = ($start < $startPosition ? 0 : $start - $startPosition);
790              my $len = ($stopPosition <= $stop ? $stopPosition : $stop) - $startPosition - $pos1;              my $len1 = ($stopPosition <= $stop ? $stopPosition : $stop) - $startPosition - $pos1;
791                Trace("Position is $pos1 for length $len1.") if T(SDNA => 4);
792              # Add the relevant data to the location data.              # Add the relevant data to the location data.
793              $locationDNA .= substr($sequenceData, $pos1, $len);              $locationDNA .= substr($sequenceData, $pos1, $len1);
794          }          }
795          # Add this location's data to the return string. Note that we may need to reverse it.          # Add this location's data to the return string. Note that we may need to reverse it.
796          if ($dir eq '+') {          if ($dir eq '+') {
797              $retVal .= $locationDNA;              $retVal .= $locationDNA;
798          } else {          } else {
799              $locationDNA = join('', reverse split //, $locationDNA);              $retVal .= FIG::reverse_comp($locationDNA);
             $retVal .= $locationDNA;  
800          }          }
801      }      }
802      # Return the result.      # Return the result.
# Line 1521  Line 1530 
1530          # Determine the ordering to place on the evidence items. If we're          # Determine the ordering to place on the evidence items. If we're
1531          # inverted, we want to see feature 2 before feature 1 (descending); otherwise,          # inverted, we want to see feature 2 before feature 1 (descending); otherwise,
1532          # we want feature 1 before feature 2 (normal).          # we want feature 1 before feature 2 (normal).
1533            Trace("Coupling evidence for ($peg1, $peg2) with inversion flag $inverted.") if T(Coupling => 4);
1534          my $ordering = ($inverted ? "DESC" : "");          my $ordering = ($inverted ? "DESC" : "");
1535          # Get the coupling evidence.          # Get the coupling evidence.
1536          my @evidenceList = $self->GetAll(['IsEvidencedBy', 'PCH', 'UsesAsEvidence'],          my @evidenceList = $self->GetAll(['IsEvidencedBy', 'PCH', 'UsesAsEvidence'],
# Line 1534  Line 1544 
1544          while (@evidenceList > 0) {          while (@evidenceList > 0) {
1545              my $peg1Data = shift @evidenceList;              my $peg1Data = shift @evidenceList;
1546              my $peg2Data = shift @evidenceList;              my $peg2Data = shift @evidenceList;
1547                Trace("Peg 1 is " . $peg1Data->[1] . " and Peg 2 is " . $peg2Data->[1] . ".") if T(Coupling => 4);
1548              push @retVal, [$peg1Data->[1], $peg2Data->[1], $peg1Data->[0]];              push @retVal, [$peg1Data->[1], $peg2Data->[1], $peg1Data->[0]];
1549          }          }
1550            Trace("Last index in evidence result is is $#retVal.") if T(Coupling => 4);
1551      }      }
1552      # Return the result.      # Return the result.
1553      return @retVal;      return @retVal;
# Line 1585  Line 1597 
1597                                   [$retVal], ["ParticipatesInCoupling(from-link)", "Coupling(score)"]);                                   [$retVal], ["ParticipatesInCoupling(from-link)", "Coupling(score)"]);
1598      # Check to see if we found anything.      # Check to see if we found anything.
1599      if (!@pegs) {      if (!@pegs) {
1600            Trace("No coupling found.") if T(Coupling => 4);
1601          # No coupling, so undefine the return value.          # No coupling, so undefine the return value.
1602          $retVal = undef;          $retVal = undef;
1603      } else {      } else {
1604          # We have a coupling! Get the score and check for inversion.          # We have a coupling! Get the score and check for inversion.
1605          $score = $pegs[0]->[1];          $score = $pegs[0]->[1];
1606          $inverted = ($pegs[0]->[0] eq $peg1);          my $firstFound = $pegs[0]->[0];
1607            $inverted = ($firstFound ne $peg1);
1608            Trace("Coupling score is $score. First peg is $firstFound, peg 1 is $peg1.") if T(Coupling => 4);
1609      }      }
1610      # Return the result.      # Return the result.
1611      return ($retVal, $inverted, $score);      return ($retVal, $inverted, $score);
# Line 1695  Line 1710 
1710          if ($line =~ m/^>\s*(.+?)(\s|\n)/) {          if ($line =~ m/^>\s*(.+?)(\s|\n)/) {
1711              # Here we have a new header. Store the current sequence if we have one.              # Here we have a new header. Store the current sequence if we have one.
1712              if ($id) {              if ($id) {
1713                  $retVal{$id} = uc $sequence;                  $retVal{$id} = lc $sequence;
1714              }              }
1715              # Clear the sequence accumulator and save the new ID.              # Clear the sequence accumulator and save the new ID.
1716              ($id, $sequence) = ("$prefix$1", "");              ($id, $sequence) = ("$prefix$1", "");
1717          } else {          } else {
1718              # Here we have a data line, so we add it to the sequence accumulator.              # Here we have a data line, so we add it to the sequence accumulator.
1719              # First, we get the actual data out. Note that we normalize to upper              # First, we get the actual data out. Note that we normalize to lower
1720              # case.              # case.
1721              $line =~ /^\s*(.*?)(\s|\n)/;              $line =~ /^\s*(.*?)(\s|\n)/;
1722              $sequence .= $1;              $sequence .= $1;
# Line 1709  Line 1724 
1724      }      }
1725      # Flush out the last sequence (if any).      # Flush out the last sequence (if any).
1726      if ($sequence) {      if ($sequence) {
1727          $retVal{$id} = uc $sequence;          $retVal{$id} = lc $sequence;
1728      }      }
1729      # Close the file.      # Close the file.
1730      close FASTAFILE;      close FASTAFILE;
# Line 2035  Line 2050 
2050      # Get the parameters.      # Get the parameters.
2051      my ($self, $entityName, $entityID) = @_;      my ($self, $entityName, $entityID) = @_;
2052      # Check for the entity instance.      # Check for the entity instance.
2053        Trace("Checking existence of $entityName with ID=$entityID.") if T(4);
2054      my $testInstance = $self->GetEntity($entityName, $entityID);      my $testInstance = $self->GetEntity($entityName, $entityID);
2055      # Return an existence indicator.      # Return an existence indicator.
2056      my $retVal = ($testInstance ? 1 : 0);      my $retVal = ($testInstance ? 1 : 0);
# Line 2226  Line 2242 
2242      return @retVal;      return @retVal;
2243  }  }
2244    
2245    =head3 GetProperties
2246    
2247    C<< my @list = $sprout->GetProperties($fid, $key, $value, $url); >>
2248    
2249    Return a list of the properties with the specified characteristics.
2250    
2251    Properties are arbitrary key-value pairs associated with a feature. (At some point they
2252    will also be associated with genomes.) A property value is represented by a 4-tuple of
2253    the form B<($fid, $key, $value, $url)>. These exactly correspond to the parameter
2254    
2255    =over 4
2256    
2257    =item fid
2258    
2259    ID of the feature possessing the property.
2260    
2261    =item key
2262    
2263    Name or key of the property.
2264    
2265    =item value
2266    
2267    Value of the property.
2268    
2269    =item url
2270    
2271    URL of the document that indicated the property should have this particular value, or an
2272    empty string if no such document exists.
2273    
2274    =back
2275    
2276    The parameters act as a filter for the desired data. Any non-null parameter will
2277    automatically match all the tuples returned. So, specifying just the I<$fid> will
2278    return all the properties of the specified feature; similarly, specifying the I<$key>
2279    and I<$value> parameters will return all the features having the specified property
2280    value.
2281    
2282    A single property key can have many values, representing different ideas about the
2283    feature in question. For example, one paper may declare that a feature C<fig|83333.1.peg.10> is
2284    virulent, and another may declare that it is not virulent. A query about the virulence of
2285    C<fig|83333.1.peg.10> would be coded as
2286    
2287        my @list = $sprout->GetProperties('fig|83333.1.peg.10', 'virulence', '', '');
2288    
2289    Here the I<$value> and I<$url> fields are left blank, indicating that those fields are
2290    not to be filtered. The tuples returned would be
2291    
2292        ('fig|83333.1.peg.10', 'virulence', 'yes', 'http://www.somewhere.edu/first.paper.pdf')
2293        ('fig|83333.1.peg.10', 'virulence', 'no', 'http://www.somewhere.edu/second.paper.pdf')
2294    
2295    =cut
2296    #: Return Type @@;
2297    sub GetProperties {
2298        # Get the parameters.
2299        my ($self, @parms) = @_;
2300        # Declare the return variable.
2301        my @retVal = ();
2302        # Now we need to create a WHERE clause that will get us the data we want. First,
2303        # we create a list of the columns containing the data for each parameter.
2304        my @colNames = ('HasProperty(from-link)', 'Property(property-name)',
2305                        'Property(property-value)', 'HasProperty(evidence)');
2306        # Now we build the WHERE clause and the list of parameter values.
2307        my @where = ();
2308        my @values = ();
2309        for (my $i = 0; $i <= $#colNames; $i++) {
2310            my $parm = $parms[$i];
2311            if (defined $parm && ($parm ne '')) {
2312                push @where, "$colNames[$i] = ?";
2313                push @values, $parm;
2314            }
2315        }
2316        # Format the WHERE clause.
2317        my $filter = (@values > 0 ? (join " AND ", @where) : undef);
2318        # Ask for all the propertie values with the desired characteristics.
2319        my $query = $self->Get(['HasProperty', 'Property'], $filter, \@values);
2320        while (my $valueObject = $query->Fetch()) {
2321            my @tuple = $valueObject->Values(\@colNames);
2322            push @retVal, \@tuple;
2323        }
2324        # Return the result.
2325        return @retVal;
2326    }
2327    
2328  =head3 FeatureProperties  =head3 FeatureProperties
2329    
2330  C<< my @properties = $sprout->FeatureProperties($featureID); >>  C<< my @properties = $sprout->FeatureProperties($featureID); >>
# Line 2420  Line 2519 
2519  C<< my %subsystems = $sprout->SubsystemsOf($featureID); >>  C<< my %subsystems = $sprout->SubsystemsOf($featureID); >>
2520    
2521  Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped  Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped
2522  to the role the feature performs.  to the roles the feature performs.
2523    
2524  =over 4  =over 4
2525    
# Line 2430  Line 2529 
2529    
2530  =item RETURN  =item RETURN
2531    
2532  Returns a hash mapping all the feature's subsystems to the feature's role.  Returns a hash mapping all the feature's subsystems to a list of the feature's roles.
2533    
2534  =back  =back
2535    
2536  =cut  =cut
2537  #: Return Type %;  #: Return Type %@;
2538  sub SubsystemsOf {  sub SubsystemsOf {
2539      # Get the parameters.      # Get the parameters.
2540      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
# Line 2447  Line 2546 
2546      my %retVal = ();      my %retVal = ();
2547      # Loop through the results, adding them to the hash.      # Loop through the results, adding them to the hash.
2548      for my $record (@subsystems) {      for my $record (@subsystems) {
2549          $retVal{$record->[0]} = $record->[1];          my ($subsys, $role) = @{$record};
2550            if (exists $retVal{$subsys}) {
2551                push @{$retVal{$subsys}}, $role;
2552            } else {
2553                $retVal{$subsys} = [$role];
2554            }
2555      }      }
2556      # Return the hash.      # Return the hash.
2557      return %retVal;      return %retVal;
# Line 3108  Line 3212 
3212      $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });      $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });
3213  }  }
3214    
3215    
3216    
3217  1;  1;

Legend:
Removed from v.1.18  
changed lines
  Added in v.1.29

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3