[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.17, Mon Jun 27 20:00:55 2005 UTC revision 1.35, Wed Sep 14 13:47:26 2005 UTC
# Line 70  Line 70 
70    
71  * B<maxSequenceLength> maximum number of residues per sequence, (default C<8000>)  * B<maxSequenceLength> maximum number of residues per sequence, (default C<8000>)
72    
73    * B<noDBOpen> suppresses the connection to the database if TRUE, else FALSE
74    
75  =back  =back
76    
77  For example, the following constructor call specifies a database named I<Sprout> and a user name of  For example, the following constructor call specifies a database named I<Sprout> and a user name of
# Line 86  Line 88 
88      # Compute the options. We do this by starting with a table of defaults and overwriting with      # Compute the options. We do this by starting with a table of defaults and overwriting with
89      # the incoming data.      # the incoming data.
90      my $optionTable = Tracer::GetOptions({      my $optionTable = Tracer::GetOptions({
91                         dbType       => 'mysql',         # database type                         dbType       => $FIG_Config::dbms,
92                         dataDir      => 'Data',          # data file directory                                                          # database type
93                         xmlFileName  => 'SproutDBD.xml', # database definition file name                         dataDir      => $FIG_Config::sproutData,
94                         userData     => 'root/',         # user name and password                                                          # data file directory
95                         port         => 0,               # database connection port                         xmlFileName  => "$FIG_Config::sproutData/SproutDBD.xml",
96                                                            # database definition file name
97                           userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",
98                                                            # user name and password
99                           port         => $FIG_Config::dbport,
100                                                            # database connection port
101                         maxSegmentLength => 4500,        # maximum feature segment length                         maxSegmentLength => 4500,        # maximum feature segment length
102                         maxSequenceLength => 8000,       # maximum contig sequence length                         maxSequenceLength => 8000,       # maximum contig sequence length
103                           noDBOpen     => 0,               # 1 to suppress the database open
104                        }, $options);                        }, $options);
105      # Get the data directory.      # Get the data directory.
106      my $dataDir = $optionTable->{dataDir};      my $dataDir = $optionTable->{dataDir};
# Line 100  Line 108 
108      $optionTable->{userData} =~ m!([^/]*)/(.*)$!;      $optionTable->{userData} =~ m!([^/]*)/(.*)$!;
109      my ($userName, $password) = ($1, $2);      my ($userName, $password) = ($1, $2);
110      # Connect to the database.      # Connect to the database.
111      my $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName, $password, $optionTable->{port});      my $dbh;
112        if (! $optionTable->{noDBOpen}) {
113            $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,
114                                    $password, $optionTable->{port});
115        }
116      # Create the ERDB object.      # Create the ERDB object.
117      my $xmlFileName = "$optionTable->{xmlFileName}";      my $xmlFileName = "$optionTable->{xmlFileName}";
118      my $erdb = ERDB->new($dbh, $xmlFileName);      my $erdb = ERDB->new($dbh, $xmlFileName);
# Line 420  Line 432 
432      # Loop through the incoming table names.      # Loop through the incoming table names.
433      for my $tableName (@{$tableList}) {      for my $tableName (@{$tableList}) {
434          # Find the table's file.          # Find the table's file.
435          my $fileName = "$dataDir/$tableName";          my $fileName = LoadFileName($dataDir, $tableName);
436          if (! -e $fileName) {          if (! $fileName) {
437              $fileName = "$fileName.dtx";              Trace("No load file found for $tableName in $dataDir.") if T(0);
438          }          } else {
439          # Attempt to load this table.          # Attempt to load this table.
440          my $result = $erdb->LoadTable($fileName, $tableName, $truncateFlag);          my $result = $erdb->LoadTable($fileName, $tableName, $truncateFlag);
441          # Accumulate the resulting statistics.          # Accumulate the resulting statistics.
442          $retVal->Accumulate($result);          $retVal->Accumulate($result);
443      }      }
444        }
445      # Return the statistics.      # Return the statistics.
446      return $retVal;      return $retVal;
447  }  }
# Line 570  Line 583 
583  =item RETURN  =item RETURN
584    
585  Returns a list of the feature's contig segments. The locations are returned as a list in a list  Returns a list of the feature's contig segments. The locations are returned as a list in a list
586  context and as a space-delimited string in a scalar context.  context and as a comma-delimited string in a scalar context.
587    
588  =back  =back
589    
# Line 609  Line 622 
622          }          }
623          # Remember this specifier for the adjacent-segment test the next time through.          # Remember this specifier for the adjacent-segment test the next time through.
624          ($prevContig, $prevBeg, $prevDir, $prevLen) = ($contigID, $beg, $dir, $len);          ($prevContig, $prevBeg, $prevDir, $prevLen) = ($contigID, $beg, $dir, $len);
625            # Compute the initial base pair.
626            my $start = ($dir eq "+" ? $beg : $beg + $len - 1);
627          # Add the specifier to the list.          # Add the specifier to the list.
628          push @retVal, "${contigID}_$beg$dir$len";          push @retVal, "${contigID}_$start$dir$len";
629      }      }
630      # Return the list in the format indicated by the context.      # Return the list in the format indicated by the context.
631      return (wantarray ? @retVal : join(' ', @retVal));      return (wantarray ? @retVal : join(',', @retVal));
632  }  }
633    
634  =head3 ParseLocation  =head3 ParseLocation
# Line 752  Line 767 
767          # the start point is the ending. Note that in the latter case we must reverse the DNA string          # the start point is the ending. Note that in the latter case we must reverse the DNA string
768          # before putting it in the return value.          # before putting it in the return value.
769          my ($start, $stop);          my ($start, $stop);
770            Trace("Parse of \"$location\" is $beg$dir$len.") if T(SDNA => 4);
771          if ($dir eq "+") {          if ($dir eq "+") {
772              $start = $beg;              $start = $beg;
773              $stop = $beg + $len - 1;              $stop = $beg + $len;
774          } else {          } else {
775              $start = $beg + $len + 1;              $start = $beg - $len;
776              $stop = $beg;              $stop = $beg;
777          }          }
778            Trace("Looking for sequences containing $start to $stop.") if T(SDNA => 4);
779          my $query = $self->Get(['IsMadeUpOf','Sequence'],          my $query = $self->Get(['IsMadeUpOf','Sequence'],
780              "IsMadeUpOf(from-link) = ? AND IsMadeUpOf(start-position) + IsMadeUpOf(len) > ? AND " .              "IsMadeUpOf(from-link) = ? AND IsMadeUpOf(start-position) + IsMadeUpOf(len) > ? AND " .
781              " IsMadeUpOf(start-position) <= ? ORDER BY IsMadeUpOf(start-position)",              " IsMadeUpOf(start-position) <= ? ORDER BY IsMadeUpOf(start-position)",
# Line 770  Line 787 
787                  $sequence->Values(['IsMadeUpOf(start-position)', 'Sequence(sequence)',                  $sequence->Values(['IsMadeUpOf(start-position)', 'Sequence(sequence)',
788                                     'IsMadeUpOf(len)']);                                     'IsMadeUpOf(len)']);
789              my $stopPosition = $startPosition + $sequenceLength;              my $stopPosition = $startPosition + $sequenceLength;
790                Trace("Sequence is from $startPosition to $stopPosition.") if T(SDNA => 4);
791              # Figure out the start point and length of the relevant section.              # Figure out the start point and length of the relevant section.
792              my $pos1 = ($start < $startPosition ? 0 : $start - $startPosition);              my $pos1 = ($start < $startPosition ? 0 : $start - $startPosition);
793              my $len = ($stopPosition <= $stop ? $stopPosition : $stop) - $startPosition - $pos1;              my $len1 = ($stopPosition <= $stop ? $stopPosition : $stop) - $startPosition - $pos1;
794                Trace("Position is $pos1 for length $len1.") if T(SDNA => 4);
795              # Add the relevant data to the location data.              # Add the relevant data to the location data.
796              $locationDNA .= substr($sequenceData, $pos1, $len);              $locationDNA .= substr($sequenceData, $pos1, $len1);
797          }          }
798          # Add this location's data to the return string. Note that we may need to reverse it.          # Add this location's data to the return string. Note that we may need to reverse it.
799          if ($dir eq '+') {          if ($dir eq '+') {
800              $retVal .= $locationDNA;              $retVal .= $locationDNA;
801          } else {          } else {
802              $locationDNA = join('', reverse split //, $locationDNA);              $retVal .= FIG::reverse_comp($locationDNA);
             $retVal .= $locationDNA;  
803          }          }
804      }      }
805      # Return the result.      # Return the result.
# Line 1506  Line 1524 
1524      my ($self, $peg1, $peg2) = @_;      my ($self, $peg1, $peg2) = @_;
1525      # Declare the return variable.      # Declare the return variable.
1526      my @retVal = ();      my @retVal = ();
1527      # Our first task is to find out the nature of the coupling.      # Our first task is to find out the nature of the coupling: whether or not
1528        # it exists, its score, and whether the features are stored in the same
1529        # order as the ones coming in.
1530      my ($couplingID, $inverted, $score) = $self->GetCoupling($peg1, $peg2);      my ($couplingID, $inverted, $score) = $self->GetCoupling($peg1, $peg2);
1531      # Only proceed if a coupling exists.      # Only proceed if a coupling exists.
1532      if ($couplingID) {      if ($couplingID) {
1533          # Determine the ordering to place on the evidence items. If we're          # Determine the ordering to place on the evidence items. If we're
1534          # inverted, we want to see feature 2 before feature 1; otherwise,          # inverted, we want to see feature 2 before feature 1 (descending); otherwise,
1535          # we want the reverse.          # we want feature 1 before feature 2 (normal).
1536            Trace("Coupling evidence for ($peg1, $peg2) with inversion flag $inverted.") if T(Coupling => 4);
1537          my $ordering = ($inverted ? "DESC" : "");          my $ordering = ($inverted ? "DESC" : "");
1538          # Get the coupling evidence.          # Get the coupling evidence.
1539          my @evidenceList = $self->GetAll(['IsEvidencedBy', 'PCH', 'UsesAsEvidence'],          my @evidenceList = $self->GetAll(['IsEvidencedBy', 'PCH', 'UsesAsEvidence'],
# Line 1526  Line 1547 
1547          while (@evidenceList > 0) {          while (@evidenceList > 0) {
1548              my $peg1Data = shift @evidenceList;              my $peg1Data = shift @evidenceList;
1549              my $peg2Data = shift @evidenceList;              my $peg2Data = shift @evidenceList;
1550                Trace("Peg 1 is " . $peg1Data->[1] . " and Peg 2 is " . $peg2Data->[1] . ".") if T(Coupling => 4);
1551              push @retVal, [$peg1Data->[1], $peg2Data->[1], $peg1Data->[0]];              push @retVal, [$peg1Data->[1], $peg2Data->[1], $peg1Data->[0]];
1552          }          }
1553            Trace("Last index in evidence result is is $#retVal.") if T(Coupling => 4);
1554      }      }
     # TODO: code  
1555      # Return the result.      # Return the result.
1556      return @retVal;      return @retVal;
1557  }  }
# Line 1578  Line 1600 
1600                                   [$retVal], ["ParticipatesInCoupling(from-link)", "Coupling(score)"]);                                   [$retVal], ["ParticipatesInCoupling(from-link)", "Coupling(score)"]);
1601      # Check to see if we found anything.      # Check to see if we found anything.
1602      if (!@pegs) {      if (!@pegs) {
1603            Trace("No coupling found.") if T(Coupling => 4);
1604          # No coupling, so undefine the return value.          # No coupling, so undefine the return value.
1605          $retVal = undef;          $retVal = undef;
1606      } else {      } else {
1607          # We have a coupling! Get the score and check for inversion.          # We have a coupling! Get the score and check for inversion.
1608          $score = $pegs[0]->[1];          $score = $pegs[0]->[1];
1609          $inverted = ($pegs[0]->[0] eq $peg1);          my $firstFound = $pegs[0]->[0];
1610            $inverted = ($firstFound ne $peg1);
1611            Trace("Coupling score is $score. First peg is $firstFound, peg 1 is $peg1.") if T(Coupling => 4);
1612      }      }
1613      # Return the result.      # Return the result.
1614      return ($retVal, $inverted, $score);      return ($retVal, $inverted, $score);
# Line 1688  Line 1713 
1713          if ($line =~ m/^>\s*(.+?)(\s|\n)/) {          if ($line =~ m/^>\s*(.+?)(\s|\n)/) {
1714              # Here we have a new header. Store the current sequence if we have one.              # Here we have a new header. Store the current sequence if we have one.
1715              if ($id) {              if ($id) {
1716                  $retVal{$id} = uc $sequence;                  $retVal{$id} = lc $sequence;
1717              }              }
1718              # Clear the sequence accumulator and save the new ID.              # Clear the sequence accumulator and save the new ID.
1719              ($id, $sequence) = ("$prefix$1", "");              ($id, $sequence) = ("$prefix$1", "");
1720          } else {          } else {
1721              # Here we have a data line, so we add it to the sequence accumulator.              # Here we have a data line, so we add it to the sequence accumulator.
1722              # First, we get the actual data out. Note that we normalize to upper              # First, we get the actual data out. Note that we normalize to lower
1723              # case.              # case.
1724              $line =~ /^\s*(.*?)(\s|\n)/;              $line =~ /^\s*(.*?)(\s|\n)/;
1725              $sequence .= $1;              $sequence .= $1;
# Line 1702  Line 1727 
1727      }      }
1728      # Flush out the last sequence (if any).      # Flush out the last sequence (if any).
1729      if ($sequence) {      if ($sequence) {
1730          $retVal{$id} = uc $sequence;          $retVal{$id} = lc $sequence;
1731      }      }
1732      # Close the file.      # Close the file.
1733      close FASTAFILE;      close FASTAFILE;
# Line 2028  Line 2053 
2053      # Get the parameters.      # Get the parameters.
2054      my ($self, $entityName, $entityID) = @_;      my ($self, $entityName, $entityID) = @_;
2055      # Check for the entity instance.      # Check for the entity instance.
2056        Trace("Checking existence of $entityName with ID=$entityID.") if T(4);
2057      my $testInstance = $self->GetEntity($entityName, $entityID);      my $testInstance = $self->GetEntity($entityName, $entityID);
2058      # Return an existence indicator.      # Return an existence indicator.
2059      my $retVal = ($testInstance ? 1 : 0);      my $retVal = ($testInstance ? 1 : 0);
# Line 2219  Line 2245 
2245      return @retVal;      return @retVal;
2246  }  }
2247    
2248    =head3 GetProperties
2249    
2250    C<< my @list = $sprout->GetProperties($fid, $key, $value, $url); >>
2251    
2252    Return a list of the properties with the specified characteristics.
2253    
2254    Properties are arbitrary key-value pairs associated with a feature. (At some point they
2255    will also be associated with genomes.) A property value is represented by a 4-tuple of
2256    the form B<($fid, $key, $value, $url)>. These exactly correspond to the parameter
2257    
2258    =over 4
2259    
2260    =item fid
2261    
2262    ID of the feature possessing the property.
2263    
2264    =item key
2265    
2266    Name or key of the property.
2267    
2268    =item value
2269    
2270    Value of the property.
2271    
2272    =item url
2273    
2274    URL of the document that indicated the property should have this particular value, or an
2275    empty string if no such document exists.
2276    
2277    =back
2278    
2279    The parameters act as a filter for the desired data. Any non-null parameter will
2280    automatically match all the tuples returned. So, specifying just the I<$fid> will
2281    return all the properties of the specified feature; similarly, specifying the I<$key>
2282    and I<$value> parameters will return all the features having the specified property
2283    value.
2284    
2285    A single property key can have many values, representing different ideas about the
2286    feature in question. For example, one paper may declare that a feature C<fig|83333.1.peg.10> is
2287    virulent, and another may declare that it is not virulent. A query about the virulence of
2288    C<fig|83333.1.peg.10> would be coded as
2289    
2290        my @list = $sprout->GetProperties('fig|83333.1.peg.10', 'virulence', '', '');
2291    
2292    Here the I<$value> and I<$url> fields are left blank, indicating that those fields are
2293    not to be filtered. The tuples returned would be
2294    
2295        ('fig|83333.1.peg.10', 'virulence', 'yes', 'http://www.somewhere.edu/first.paper.pdf')
2296        ('fig|83333.1.peg.10', 'virulence', 'no', 'http://www.somewhere.edu/second.paper.pdf')
2297    
2298    =cut
2299    #: Return Type @@;
2300    sub GetProperties {
2301        # Get the parameters.
2302        my ($self, @parms) = @_;
2303        # Declare the return variable.
2304        my @retVal = ();
2305        # Now we need to create a WHERE clause that will get us the data we want. First,
2306        # we create a list of the columns containing the data for each parameter.
2307        my @colNames = ('HasProperty(from-link)', 'Property(property-name)',
2308                        'Property(property-value)', 'HasProperty(evidence)');
2309        # Now we build the WHERE clause and the list of parameter values.
2310        my @where = ();
2311        my @values = ();
2312        for (my $i = 0; $i <= $#colNames; $i++) {
2313            my $parm = $parms[$i];
2314            if (defined $parm && ($parm ne '')) {
2315                push @where, "$colNames[$i] = ?";
2316                push @values, $parm;
2317            }
2318        }
2319        # Format the WHERE clause.
2320        my $filter = (@values > 0 ? (join " AND ", @where) : undef);
2321        # Ask for all the propertie values with the desired characteristics.
2322        my $query = $self->Get(['HasProperty', 'Property'], $filter, \@values);
2323        while (my $valueObject = $query->Fetch()) {
2324            my @tuple = $valueObject->Values(\@colNames);
2325            push @retVal, \@tuple;
2326        }
2327        # Return the result.
2328        return @retVal;
2329    }
2330    
2331  =head3 FeatureProperties  =head3 FeatureProperties
2332    
2333  C<< my @properties = $sprout->FeatureProperties($featureID); >>  C<< my @properties = $sprout->FeatureProperties($featureID); >>
# Line 2413  Line 2522 
2522  C<< my %subsystems = $sprout->SubsystemsOf($featureID); >>  C<< my %subsystems = $sprout->SubsystemsOf($featureID); >>
2523    
2524  Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped  Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped
2525  to the role the feature performs.  to the roles the feature performs.
2526    
2527  =over 4  =over 4
2528    
# Line 2423  Line 2532 
2532    
2533  =item RETURN  =item RETURN
2534    
2535  Returns a hash mapping all the feature's subsystems to the feature's role.  Returns a hash mapping all the feature's subsystems to a list of the feature's roles.
2536    
2537  =back  =back
2538    
2539  =cut  =cut
2540  #: Return Type %;  #: Return Type %@;
2541  sub SubsystemsOf {  sub SubsystemsOf {
2542      # Get the parameters.      # Get the parameters.
2543      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
# Line 2440  Line 2549 
2549      my %retVal = ();      my %retVal = ();
2550      # Loop through the results, adding them to the hash.      # Loop through the results, adding them to the hash.
2551      for my $record (@subsystems) {      for my $record (@subsystems) {
2552          $retVal{$record->[0]} = $record->[1];          my ($subsys, $role) = @{$record};
2553            if (exists $retVal{$subsys}) {
2554                push @{$retVal{$subsys}}, $role;
2555            } else {
2556                $retVal{$subsys} = [$role];
2557            }
2558      }      }
2559      # Return the hash.      # Return the hash.
2560      return %retVal;      return %retVal;
# Line 2896  Line 3010 
3010      return %retVal;      return %retVal;
3011  }  }
3012    
3013    =head3 MyGenomes
3014    
3015    C<< my @genomes = Sprout::MyGenomes($dataDir); >>
3016    
3017    Return a list of the genomes to be included in the Sprout.
3018    
3019    This method is provided for use during the Sprout load. It presumes the Genome load file has
3020    already been created. (It will be in the Sprout data directory and called either C<Genome>
3021    or C<Genome.dtx>.) Essentially, it reads in the Genome load file and strips out the genome
3022    IDs.
3023    
3024    =over 4
3025    
3026    =item dataDir
3027    
3028    Directory containing the Sprout load files.
3029    
3030    =back
3031    
3032    =cut
3033    #: Return Type @;
3034    sub MyGenomes {
3035        # Get the parameters.
3036        my ($dataDir) = @_;
3037        # Compute the genome file name.
3038        my $genomeFileName = LoadFileName($dataDir, "Genome");
3039        # Extract the genome IDs from the files.
3040        my @retVal = map { $_ =~ /^(\S+)/; $1 } Tracer::GetFile($genomeFileName);
3041        # Return the result.
3042        return @retVal;
3043    }
3044    
3045    =head3 LoadFileName
3046    
3047    C<< my $fileName = Sprout::LoadFileName($dataDir, $tableName); >>
3048    
3049    Return the name of the load file for the specified table in the specified data
3050    directory.
3051    
3052    =over 4
3053    
3054    =item dataDir
3055    
3056    Directory containing the Sprout load files.
3057    
3058    =item tableName
3059    
3060    Name of the table whose load file is desired.
3061    
3062    =item RETURN
3063    
3064    Returns the name of the file containing the load data for the specified table, or
3065    C<undef> if no load file is present.
3066    
3067    =back
3068    
3069    =cut
3070    #: Return Type $;
3071    sub LoadFileName {
3072        # Get the parameters.
3073        my ($dataDir, $tableName) = @_;
3074        # Declare the return variable.
3075        my $retVal;
3076        # Check for the various file names.
3077        if (-e "$dataDir/$tableName") {
3078            $retVal = "$dataDir/$tableName";
3079        } elsif (-e "$dataDir/$tableName.dtx") {
3080            $retVal = "$dataDir/$tableName.dtx";
3081        }
3082        # Return the result.
3083        return $retVal;
3084    }
3085    
3086  =head2 Internal Utility Methods  =head2 Internal Utility Methods
3087    
3088  =head3 ParseAssignment  =head3 ParseAssignment
# Line 3028  Line 3215 
3215      $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });      $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });
3216  }  }
3217    
3218    
3219    
3220  1;  1;

Legend:
Removed from v.1.17  
changed lines
  Added in v.1.35

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3