[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.16, Fri Jun 24 21:45:45 2005 UTC revision 1.23, Fri Sep 9 21:15:47 2005 UTC
# Line 86  Line 86 
86      # Compute the options. We do this by starting with a table of defaults and overwriting with      # Compute the options. We do this by starting with a table of defaults and overwriting with
87      # the incoming data.      # the incoming data.
88      my $optionTable = Tracer::GetOptions({      my $optionTable = Tracer::GetOptions({
89                         dbType       => 'mysql',         # database type                         dbType       => $FIG_Config::dbms,
90                         dataDir      => 'Data',          # data file directory                                                          # database type
91                         xmlFileName  => 'SproutDBD.xml', # database definition file name                         dataDir      => $FIG_Config::sproutData,
92                         userData     => 'root/',         # user name and password                                                          # data file directory
93                         port         => 0,               # database connection port                         xmlFileName  => "$FIG_Config::sproutData/SproutDBD.xml",
94                                                            # database definition file name
95                           userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",
96                                                            # user name and password
97                           port         => $FIG_Config::dbport,
98                                                            # database connection port
99                         maxSegmentLength => 4500,        # maximum feature segment length                         maxSegmentLength => 4500,        # maximum feature segment length
100                         maxSequenceLength => 8000,       # maximum contig sequence length                         maxSequenceLength => 8000,       # maximum contig sequence length
101                        }, $options);                        }, $options);
# Line 420  Line 425 
425      # Loop through the incoming table names.      # Loop through the incoming table names.
426      for my $tableName (@{$tableList}) {      for my $tableName (@{$tableList}) {
427          # Find the table's file.          # Find the table's file.
428          my $fileName = "$dataDir/$tableName";          my $fileName = LoadFileName($dataDir, $tableName);
429          if (! -e $fileName) {          if (! $fileName) {
430              $fileName = "$fileName.dtx";              Trace("No load file found for $tableName in $dataDir.") if T(0);
431          }          } else {
432          # Attempt to load this table.          # Attempt to load this table.
433          my $result = $erdb->LoadTable($fileName, $tableName, $truncateFlag);          my $result = $erdb->LoadTable($fileName, $tableName, $truncateFlag);
434          # Accumulate the resulting statistics.          # Accumulate the resulting statistics.
435          $retVal->Accumulate($result);          $retVal->Accumulate($result);
436      }      }
437        }
438      # Return the statistics.      # Return the statistics.
439      return $retVal;      return $retVal;
440  }  }
# Line 570  Line 576 
576  =item RETURN  =item RETURN
577    
578  Returns a list of the feature's contig segments. The locations are returned as a list in a list  Returns a list of the feature's contig segments. The locations are returned as a list in a list
579  context and as a space-delimited string in a scalar context.  context and as a comma-delimited string in a scalar context.
580    
581  =back  =back
582    
# Line 613  Line 619 
619          push @retVal, "${contigID}_$beg$dir$len";          push @retVal, "${contigID}_$beg$dir$len";
620      }      }
621      # Return the list in the format indicated by the context.      # Return the list in the format indicated by the context.
622      return (wantarray ? @retVal : join(' ', @retVal));      return (wantarray ? @retVal : join(',', @retVal));
623  }  }
624    
625  =head3 ParseLocation  =head3 ParseLocation
# Line 1506  Line 1512 
1512      my ($self, $peg1, $peg2) = @_;      my ($self, $peg1, $peg2) = @_;
1513      # Declare the return variable.      # Declare the return variable.
1514      my @retVal = ();      my @retVal = ();
1515      # Our first task is to find out the nature of the coupling.      # Our first task is to find out the nature of the coupling: whether or not
1516        # it exists, its score, and whether the features are stored in the same
1517        # order as the ones coming in.
1518      my ($couplingID, $inverted, $score) = $self->GetCoupling($peg1, $peg2);      my ($couplingID, $inverted, $score) = $self->GetCoupling($peg1, $peg2);
1519      # Only proceed if a coupling exists.      # Only proceed if a coupling exists.
1520      if ($couplingID) {      if ($couplingID) {
1521          # Determine the ordering to place on the evidence items. If we're          # Determine the ordering to place on the evidence items. If we're
1522          # inverted, we want to see feature 2 before feature 1; otherwise,          # inverted, we want to see feature 2 before feature 1 (descending); otherwise,
1523          # we want the reverse.          # we want feature 1 before feature 2 (normal).
1524            Trace("Coupling evidence for ($peg1, $peg2) with inversion flag $inverted.") if T(Coupling => 4);
1525          my $ordering = ($inverted ? "DESC" : "");          my $ordering = ($inverted ? "DESC" : "");
1526          # Get the coupling evidence.          # Get the coupling evidence.
1527          my @evidenceList = $self->GetAll(['IsEvidencedBy', 'PCH', 'UsesAsEvidence'],          my @evidenceList = $self->GetAll(['IsEvidencedBy', 'PCH', 'UsesAsEvidence'],
1528                                            "IsEvidencedBy(from-link) = ? ORDER BY PCH(id), UsesAsEvidence(pos) $ordering",                                            "IsEvidencedBy(from-link) = ? ORDER BY PCH(id), UsesAsEvidence(pos) $ordering",
1529                                            [$couplingID],                                            [$couplingID],
1530                                            ['PCH(used)', 'UsesAsEvidence(pos)']);                                            ['PCH(used)', 'UsesAsEvidence(to-link)']);
1531          # Loop through the evidence items. Each piece of evidence is represented by two          # Loop through the evidence items. Each piece of evidence is represented by two
1532          # positions in the evidence list, one for each feature on the other side of the          # positions in the evidence list, one for each feature on the other side of the
1533          # evidence link. If at some point we want to generalize to couplings with          # evidence link. If at some point we want to generalize to couplings with
# Line 1526  Line 1535 
1535          while (@evidenceList > 0) {          while (@evidenceList > 0) {
1536              my $peg1Data = shift @evidenceList;              my $peg1Data = shift @evidenceList;
1537              my $peg2Data = shift @evidenceList;              my $peg2Data = shift @evidenceList;
1538                Trace("Peg 1 is " . $peg1Data->[1] . " and Peg 2 is " . $peg2Data->[1] . ".") if T(Coupling => 4);
1539              push @retVal, [$peg1Data->[1], $peg2Data->[1], $peg1Data->[0]];              push @retVal, [$peg1Data->[1], $peg2Data->[1], $peg1Data->[0]];
1540          }          }
1541            Trace("Last index in evidence result is is $#retVal.") if T(Coupling => 4);
1542      }      }
     # TODO: code  
1543      # Return the result.      # Return the result.
1544      return @retVal;      return @retVal;
1545  }  }
# Line 1575  Line 1585 
1585      # Find the coupling data.      # Find the coupling data.
1586      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],
1587                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",
1588                                   [$retVal], "ParticipatesInCoupling(from-link), Coupling(score)");                                   [$retVal], ["ParticipatesInCoupling(from-link)", "Coupling(score)"]);
1589      # Check to see if we found anything.      # Check to see if we found anything.
1590      if (!@pegs) {      if (!@pegs) {
1591            Trace("No coupling found.") if T(Coupling => 4);
1592          # No coupling, so undefine the return value.          # No coupling, so undefine the return value.
1593          $retVal = undef;          $retVal = undef;
1594      } else {      } else {
1595          # We have a coupling! Get the score and check for inversion.          # We have a coupling! Get the score and check for inversion.
1596          $score = $pegs[0]->[1];          $score = $pegs[0]->[1];
1597          $inverted = ($pegs[0]->[0] eq $peg1);          my $firstFound = $pegs[0]->[0];
1598            $inverted = ($firstFound ne $peg1);
1599            Trace("Coupling score is $score. First peg is $firstFound, peg 1 is $peg1.") if T(Coupling => 4);
1600      }      }
1601      # Return the result.      # Return the result.
1602      return ($retVal, $inverted, $score);      return ($retVal, $inverted, $score);
# Line 2219  Line 2232 
2232      return @retVal;      return @retVal;
2233  }  }
2234    
2235    =head3 GetProperties
2236    
2237    C<< my @list = $sprout->GetProperties($fid, $key, $value, $url); >>
2238    
2239    Return a list of the properties with the specified characteristics.
2240    
2241    Properties are arbitrary key-value pairs associated with a feature. (At some point they
2242    will also be associated with genomes.) A property value is represented by a 4-tuple of
2243    the form B<($fid, $key, $value, $url)>. These exactly correspond to the parameter
2244    
2245    =over 4
2246    
2247    =item fid
2248    
2249    ID of the feature possessing the property.
2250    
2251    =item key
2252    
2253    Name or key of the property.
2254    
2255    =item value
2256    
2257    Value of the property.
2258    
2259    =item url
2260    
2261    URL of the document that indicated the property should have this particular value, or an
2262    empty string if no such document exists.
2263    
2264    =back
2265    
2266    The parameters act as a filter for the desired data. Any non-null parameter will
2267    automatically match all the tuples returned. So, specifying just the I<$fid> will
2268    return all the properties of the specified feature; similarly, specifying the I<$key>
2269    and I<$value> parameters will return all the features having the specified property
2270    value.
2271    
2272    A single property key can have many values, representing different ideas about the
2273    feature in question. For example, one paper may declare that a feature C<fig|83333.1.peg.10> is
2274    virulent, and another may declare that it is not virulent. A query about the virulence of
2275    C<fig|83333.1.peg.10> would be coded as
2276    
2277        my @list = $sprout->GetProperties('fig|83333.1.peg.10', 'virulence', '', '');
2278    
2279    Here the I<$value> and I<$url> fields are left blank, indicating that those fields are
2280    not to be filtered. The tuples returned would be
2281    
2282        ('fig|83333.1.peg.10', 'virulence', 'yes', 'http://www.somewhere.edu/first.paper.pdf')
2283        ('fig|83333.1.peg.10', 'virulence', 'no', 'http://www.somewhere.edu/second.paper.pdf')
2284    
2285    =cut
2286    #: Return Type @@;
2287    sub GetProperties {
2288        # Get the parameters.
2289        my ($self, @parms) = @_;
2290        # Declare the return variable.
2291        my @retVal = ();
2292        # Now we need to create a WHERE clause that will get us the data we want. First,
2293        # we create a list of the columns containing the data for each parameter.
2294        my @colNames = ('HasProperty(from-link)', 'Property(property-name)',
2295                        'Property(property-value)', 'HasProperty(evidence)');
2296        # Now we build the WHERE clause and the list of parameter values.
2297        my @where = ();
2298        my @values = ();
2299        for (my $i = 0; $i <= $#colNames; $i++) {
2300            my $parm = $parms[$i];
2301            if (defined $parm && ($parm ne '')) {
2302                push @where, "$colNames[$i] = ?";
2303                push @values, $parm;
2304            }
2305        }
2306        # Format the WHERE clause.
2307        my $filter = (@values > 0 ? (join " AND ", @where) : undef);
2308        # Ask for all the propertie values with the desired characteristics.
2309        my $query = $self->Get(['HasProperty', 'Property'], $filter, \@values);
2310        while (my $valueObject = $query->Fetch()) {
2311            my @tuple = $valueObject->Values(\@colNames);
2312            push @retVal, \@tuple;
2313        }
2314        # Return the result.
2315        return @retVal;
2316    }
2317    
2318  =head3 FeatureProperties  =head3 FeatureProperties
2319    
2320  C<< my @properties = $sprout->FeatureProperties($featureID); >>  C<< my @properties = $sprout->FeatureProperties($featureID); >>
# Line 2413  Line 2509 
2509  C<< my %subsystems = $sprout->SubsystemsOf($featureID); >>  C<< my %subsystems = $sprout->SubsystemsOf($featureID); >>
2510    
2511  Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped  Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped
2512  to the role the feature performs.  to the roles the feature performs.
2513    
2514  =over 4  =over 4
2515    
# Line 2423  Line 2519 
2519    
2520  =item RETURN  =item RETURN
2521    
2522  Returns a hash mapping all the feature's subsystems to the feature's role.  Returns a hash mapping all the feature's subsystems to a list of the feature's roles.
2523    
2524  =back  =back
2525    
2526  =cut  =cut
2527  #: Return Type %;  #: Return Type %@;
2528  sub SubsystemsOf {  sub SubsystemsOf {
2529      # Get the parameters.      # Get the parameters.
2530      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
# Line 2440  Line 2536 
2536      my %retVal = ();      my %retVal = ();
2537      # Loop through the results, adding them to the hash.      # Loop through the results, adding them to the hash.
2538      for my $record (@subsystems) {      for my $record (@subsystems) {
2539          $retVal{$record->[0]} = $record->[1];          my ($subsys, $role) = @{$record};
2540            if (exists $retVal{$subsys}) {
2541                push @{$retVal{$subsys}}, $role;
2542            } else {
2543                $retVal{$subsys} = [$role];
2544            }
2545      }      }
2546      # Return the hash.      # Return the hash.
2547      return %retVal;      return %retVal;
# Line 2896  Line 2997 
2997      return %retVal;      return %retVal;
2998  }  }
2999    
3000    =head3 MyGenomes
3001    
3002    C<< my @genomes = Sprout::MyGenomes($dataDir); >>
3003    
3004    Return a list of the genomes to be included in the Sprout.
3005    
3006    This method is provided for use during the Sprout load. It presumes the Genome load file has
3007    already been created. (It will be in the Sprout data directory and called either C<Genome>
3008    or C<Genome.dtx>.) Essentially, it reads in the Genome load file and strips out the genome
3009    IDs.
3010    
3011    =over 4
3012    
3013    =item dataDir
3014    
3015    Directory containing the Sprout load files.
3016    
3017    =back
3018    
3019    =cut
3020    #: Return Type @;
3021    sub MyGenomes {
3022        # Get the parameters.
3023        my ($dataDir) = @_;
3024        # Compute the genome file name.
3025        my $genomeFileName = LoadFileName($dataDir, "Genome");
3026        # Extract the genome IDs from the files.
3027        my @retVal = map { $_ =~ /^(\S+)/; $1 } Tracer::GetFile($genomeFileName);
3028        # Return the result.
3029        return @retVal;
3030    }
3031    
3032    =head3 LoadFileName
3033    
3034    C<< my $fileName = Sprout::LoadFileName($dataDir, $tableName); >>
3035    
3036    Return the name of the load file for the specified table in the specified data
3037    directory.
3038    
3039    =over 4
3040    
3041    =item dataDir
3042    
3043    Directory containing the Sprout load files.
3044    
3045    =item tableName
3046    
3047    Name of the table whose load file is desired.
3048    
3049    =item RETURN
3050    
3051    Returns the name of the file containing the load data for the specified table, or
3052    C<undef> if no load file is present.
3053    
3054    =back
3055    
3056    =cut
3057    #: Return Type $;
3058    sub LoadFileName {
3059        # Get the parameters.
3060        my ($dataDir, $tableName) = @_;
3061        # Declare the return variable.
3062        my $retVal;
3063        # Check for the various file names.
3064        if (-e "$dataDir/$tableName") {
3065            $retVal = "$dataDir/$tableName";
3066        } elsif (-e "$dataDir/$tableName.dtx") {
3067            $retVal = "$dataDir/$tableName.dtx";
3068        }
3069        # Return the result.
3070        return $retVal;
3071    }
3072    
3073  =head2 Internal Utility Methods  =head2 Internal Utility Methods
3074    
3075  =head3 ParseAssignment  =head3 ParseAssignment
# Line 3028  Line 3202 
3202      $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });      $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });
3203  }  }
3204    
3205    
3206    
3207  1;  1;

Legend:
Removed from v.1.16  
changed lines
  Added in v.1.23

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3