[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.15, Wed Jun 22 21:30:45 2005 UTC revision 1.21, Fri Sep 9 20:40:41 2005 UTC
# Line 86  Line 86 
86      # Compute the options. We do this by starting with a table of defaults and overwriting with      # Compute the options. We do this by starting with a table of defaults and overwriting with
87      # the incoming data.      # the incoming data.
88      my $optionTable = Tracer::GetOptions({      my $optionTable = Tracer::GetOptions({
89                         dbType       => 'mysql',         # database type                         dbType       => $FIG_Config::dbms,
90                         dataDir      => 'Data',          # data file directory                                                          # database type
91                         xmlFileName  => 'SproutDBD.xml', # database definition file name                         dataDir      => $FIG_Config::sproutData,
92                         userData     => 'root/',         # user name and password                                                          # data file directory
93                         port         => 0,               # database connection port                         xmlFileName  => "$FIG_Config::sproutData/SproutDBD.xml",
94                                                            # database definition file name
95                           userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",
96                                                            # user name and password
97                           port         => $FIG_Config::dbport,
98                                                            # database connection port
99                         maxSegmentLength => 4500,        # maximum feature segment length                         maxSegmentLength => 4500,        # maximum feature segment length
100                         maxSequenceLength => 8000,       # maximum contig sequence length                         maxSequenceLength => 8000,       # maximum contig sequence length
101                        }, $options);                        }, $options);
# Line 420  Line 425 
425      # Loop through the incoming table names.      # Loop through the incoming table names.
426      for my $tableName (@{$tableList}) {      for my $tableName (@{$tableList}) {
427          # Find the table's file.          # Find the table's file.
428          my $fileName = "$dataDir/$tableName";          my $fileName = LoadFileName($dataDir, $tableName);
429          if (! -e $fileName) {          if (! $fileName) {
430              $fileName = "$fileName.dtx";              Trace("No load file found for $tableName in $dataDir.") if T(0);
431          }          } else {
432          # Attempt to load this table.          # Attempt to load this table.
433          my $result = $erdb->LoadTable($fileName, $tableName, $truncateFlag);          my $result = $erdb->LoadTable($fileName, $tableName, $truncateFlag);
434          # Accumulate the resulting statistics.          # Accumulate the resulting statistics.
435          $retVal->Accumulate($result);          $retVal->Accumulate($result);
436      }      }
437        }
438      # Return the statistics.      # Return the statistics.
439      return $retVal;      return $retVal;
440  }  }
# Line 570  Line 576 
576  =item RETURN  =item RETURN
577    
578  Returns a list of the feature's contig segments. The locations are returned as a list in a list  Returns a list of the feature's contig segments. The locations are returned as a list in a list
579  context and as a space-delimited string in a scalar context.  context and as a comma-delimited string in a scalar context.
580    
581  =back  =back
582    
# Line 613  Line 619 
619          push @retVal, "${contigID}_$beg$dir$len";          push @retVal, "${contigID}_$beg$dir$len";
620      }      }
621      # Return the list in the format indicated by the context.      # Return the list in the format indicated by the context.
622      return (wantarray ? @retVal : join(' ', @retVal));      return (wantarray ? @retVal : join(',', @retVal));
623  }  }
624    
625  =head3 ParseLocation  =head3 ParseLocation
# Line 1506  Line 1512 
1512      my ($self, $peg1, $peg2) = @_;      my ($self, $peg1, $peg2) = @_;
1513      # Declare the return variable.      # Declare the return variable.
1514      my @retVal = ();      my @retVal = ();
1515      # Our first task is to find out the nature of the coupling.      # Our first task is to find out the nature of the coupling: whether or not
1516        # it exists, its score, and whether the features are stored in the same
1517        # order as the ones coming in.
1518      my ($couplingID, $inverted, $score) = $self->GetCoupling($peg1, $peg2);      my ($couplingID, $inverted, $score) = $self->GetCoupling($peg1, $peg2);
1519      # Only proceed if a coupling exists.      # Only proceed if a coupling exists.
1520      if ($couplingID) {      if ($couplingID) {
1521          # Determine the ordering to place on the evidence items. If we're          # Determine the ordering to place on the evidence items. If we're
1522          # inverted, we want to see feature 2 before feature 1; otherwise,          # inverted, we want to see feature 2 before feature 1 (descending); otherwise,
1523          # we want the reverse.          # we want feature 1 before feature 2 (normal).
1524            Trace("Coupling evidence for ($peg1, $peg2) with inversion flag $inverted.") if T(Coupling => 4);
1525          my $ordering = ($inverted ? "DESC" : "");          my $ordering = ($inverted ? "DESC" : "");
1526          # Get the coupling evidence.          # Get the coupling evidence.
1527          my @evidenceList = $self->GetAll(['IsEvidencedBy', 'PCH', 'UsesAsEvidence'],          my @evidenceList = $self->GetAll(['IsEvidencedBy', 'PCH', 'UsesAsEvidence'],
1528                                            "IsEvidencedBy(from-link) = ? ORDER BY PCH(id), UsesAsEvidence(pos) $ordering",                                            "IsEvidencedBy(from-link) = ? ORDER BY PCH(id), UsesAsEvidence(pos) $ordering",
1529                                            [$couplingID],                                            [$couplingID],
1530                                            ['PCH(used)', 'UsesAsEvidence(pos)']);                                            ['PCH(used)', 'UsesAsEvidence(to-link)']);
1531          # Loop through the evidence items. Each piece of evidence is represented by two          # Loop through the evidence items. Each piece of evidence is represented by two
1532          # positions in the evidence list, one for each feature on the other side of the          # positions in the evidence list, one for each feature on the other side of the
1533          # evidence link. If at some point we want to generalize to couplings with          # evidence link. If at some point we want to generalize to couplings with
# Line 1526  Line 1535 
1535          while (@evidenceList > 0) {          while (@evidenceList > 0) {
1536              my $peg1Data = shift @evidenceList;              my $peg1Data = shift @evidenceList;
1537              my $peg2Data = shift @evidenceList;              my $peg2Data = shift @evidenceList;
1538                Trace("Peg 1 is " . $peg1Data->[1] . " and Peg 2 is " . $peg2Data->[1] . ".") if T(Coupling => 4);
1539              push @retVal, [$peg1Data->[1], $peg2Data->[1], $peg1Data->[0]];              push @retVal, [$peg1Data->[1], $peg2Data->[1], $peg1Data->[0]];
1540          }          }
1541      }      }
     # TODO: code  
1542      # Return the result.      # Return the result.
1543      return @retVal;      return @retVal;
1544  }  }
# Line 1575  Line 1584 
1584      # Find the coupling data.      # Find the coupling data.
1585      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],
1586                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",
1587                                   [$retVal], "ParticipatesInCoupling(from-link), Coupling(score)");                                   [$retVal], ["ParticipatesInCoupling(from-link)", "Coupling(score)"]);
1588      # Check to see if we found anything.      # Check to see if we found anything.
1589      if (!@pegs) {      if (!@pegs) {
1590          # No coupling, so undefine the return value.          # No coupling, so undefine the return value.
# Line 2219  Line 2228 
2228      return @retVal;      return @retVal;
2229  }  }
2230    
2231    =head3 GetProperties
2232    
2233    C<< my @list = $sprout->GetProperties($fid, $key, $value, $url); >>
2234    
2235    Return a list of the properties with the specified characteristics.
2236    
2237    Properties are arbitrary key-value pairs associated with a feature. (At some point they
2238    will also be associated with genomes.) A property value is represented by a 4-tuple of
2239    the form B<($fid, $key, $value, $url)>. These exactly correspond to the parameter
2240    
2241    =over 4
2242    
2243    =item fid
2244    
2245    ID of the feature possessing the property.
2246    
2247    =item key
2248    
2249    Name or key of the property.
2250    
2251    =item value
2252    
2253    Value of the property.
2254    
2255    =item url
2256    
2257    URL of the document that indicated the property should have this particular value, or an
2258    empty string if no such document exists.
2259    
2260    =back
2261    
2262    The parameters act as a filter for the desired data. Any non-null parameter will
2263    automatically match all the tuples returned. So, specifying just the I<$fid> will
2264    return all the properties of the specified feature; similarly, specifying the I<$key>
2265    and I<$value> parameters will return all the features having the specified property
2266    value.
2267    
2268    A single property key can have many values, representing different ideas about the
2269    feature in question. For example, one paper may declare that a feature C<fig|83333.1.peg.10> is
2270    virulent, and another may declare that it is not virulent. A query about the virulence of
2271    C<fig|83333.1.peg.10> would be coded as
2272    
2273        my @list = $sprout->GetProperties('fig|83333.1.peg.10', 'virulence', '', '');
2274    
2275    Here the I<$value> and I<$url> fields are left blank, indicating that those fields are
2276    not to be filtered. The tuples returned would be
2277    
2278        ('fig|83333.1.peg.10', 'virulence', 'yes', 'http://www.somewhere.edu/first.paper.pdf')
2279        ('fig|83333.1.peg.10', 'virulence', 'no', 'http://www.somewhere.edu/second.paper.pdf')
2280    
2281    =cut
2282    #: Return Type @@;
2283    sub GetProperties {
2284        # Get the parameters.
2285        my ($self, @parms) = @_;
2286        # Declare the return variable.
2287        my @retVal = ();
2288        # Now we need to create a WHERE clause that will get us the data we want. First,
2289        # we create a list of the columns containing the data for each parameter.
2290        my @colNames = ('HasProperty(from-link)', 'Property(property-name)',
2291                        'Property(property-value)', 'HasProperty(evidence)');
2292        # Now we build the WHERE clause and the list of parameter values.
2293        my @where = ();
2294        my @values = ();
2295        for (my $i = 0; $i <= $#colNames; $i++) {
2296            my $parm = $parms[$i];
2297            if (defined $parm && ($parm ne '')) {
2298                push @where, "$colNames[$i] = ?";
2299                push @values, $parm;
2300            }
2301        }
2302        # Format the WHERE clause.
2303        my $filter = (@values > 0 ? (join " AND ", @where) : undef);
2304        # Ask for all the propertie values with the desired characteristics.
2305        my $query = $self->Get(['HasProperty', 'Property'], $filter, \@values);
2306        while (my $valueObject = $query->Fetch()) {
2307            my @tuple = $valueObject->Values(\@colNames);
2308            push @retVal, \@tuple;
2309        }
2310        # Return the result.
2311        return @retVal;
2312    }
2313    
2314  =head3 FeatureProperties  =head3 FeatureProperties
2315    
2316  C<< my @properties = $sprout->FeatureProperties($featureID); >>  C<< my @properties = $sprout->FeatureProperties($featureID); >>
# Line 2413  Line 2505 
2505  C<< my %subsystems = $sprout->SubsystemsOf($featureID); >>  C<< my %subsystems = $sprout->SubsystemsOf($featureID); >>
2506    
2507  Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped  Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped
2508  to the role the feature performs.  to the roles the feature performs.
2509    
2510  =over 4  =over 4
2511    
# Line 2423  Line 2515 
2515    
2516  =item RETURN  =item RETURN
2517    
2518  Returns a hash mapping all the feature's subsystems to the feature's role.  Returns a hash mapping all the feature's subsystems to a list of the feature's roles.
2519    
2520  =back  =back
2521    
2522  =cut  =cut
2523  #: Return Type %;  #: Return Type %@;
2524  sub SubsystemsOf {  sub SubsystemsOf {
2525      # Get the parameters.      # Get the parameters.
2526      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
2527      # Use the SSCell to connect features to subsystems.      # Get the subsystem list.
2528      my @subsystems = $self->GetAll(['ContainsFeature', 'HasSSCell', 'IsRoleOf'],      my @subsystems = $self->GetAll(['ContainsFeature', 'HasSSCell', 'IsRoleOf'],
2529                                      "ContainsFeature(to-link) = ?", [$featureID],                                      "ContainsFeature(to-link) = ?", [$featureID],
2530                                      ['HasSSCell(from-link)', 'IsRoleOf(from-link)']);                                      ['HasSSCell(from-link)', 'IsRoleOf(from-link)']);
# Line 2440  Line 2532 
2532      my %retVal = ();      my %retVal = ();
2533      # Loop through the results, adding them to the hash.      # Loop through the results, adding them to the hash.
2534      for my $record (@subsystems) {      for my $record (@subsystems) {
2535          $retVal{$record->[0]} = $record->[1];          my ($subsys, $role) = @{$record};
2536            if (exists $retVal{$subsys}) {
2537                push @{$retVal{$subsys}}, $role;
2538            } else {
2539                $retVal{$subsys} = [$role];
2540            }
2541      }      }
2542      # Return the hash.      # Return the hash.
2543      return %retVal;      return %retVal;
2544  }  }
2545    
2546    =head3 SubsystemList
2547    
2548    C<< my @subsystems = $sprout->SubsystemList($featureID); >>
2549    
2550    Return a list containing the names of the subsystems in which the specified
2551    feature participates. Unlike L</SubsystemsOf>, this method only returns the
2552    subsystem names, not the roles.
2553    
2554    =over 4
2555    
2556    =item featureID
2557    
2558    ID of the feature whose subsystem names are desired.
2559    
2560    =item RETURN
2561    
2562    Returns a list of the names of the subsystems in which the feature participates.
2563    
2564    =back
2565    
2566    =cut
2567    #: Return Type @;
2568    sub SubsystemList {
2569        # Get the parameters.
2570        my ($self, $featureID) = @_;
2571        # Get the list of names.
2572        my @retVal = $self->GetFlat(['ContainsFeature', 'HasSSCell'], "ContainsFeature(to-link) = ?",
2573                                    [$featureID], 'HasSSCell(from-link)');
2574        # Return the result.
2575        return @retVal;
2576    }
2577    
2578  =head3 RelatedFeatures  =head3 RelatedFeatures
2579    
2580  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>
# Line 2864  Line 2993 
2993      return %retVal;      return %retVal;
2994  }  }
2995    
2996    =head3 MyGenomes
2997    
2998    C<< my @genomes = Sprout::MyGenomes($dataDir); >>
2999    
3000    Return a list of the genomes to be included in the Sprout.
3001    
3002    This method is provided for use during the Sprout load. It presumes the Genome load file has
3003    already been created. (It will be in the Sprout data directory and called either C<Genome>
3004    or C<Genome.dtx>.) Essentially, it reads in the Genome load file and strips out the genome
3005    IDs.
3006    
3007    =over 4
3008    
3009    =item dataDir
3010    
3011    Directory containing the Sprout load files.
3012    
3013    =back
3014    
3015    =cut
3016    #: Return Type @;
3017    sub MyGenomes {
3018        # Get the parameters.
3019        my ($dataDir) = @_;
3020        # Compute the genome file name.
3021        my $genomeFileName = LoadFileName($dataDir, "Genome");
3022        # Extract the genome IDs from the files.
3023        my @retVal = map { $_ =~ /^(\S+)/; $1 } Tracer::GetFile($genomeFileName);
3024        # Return the result.
3025        return @retVal;
3026    }
3027    
3028    =head3 LoadFileName
3029    
3030    C<< my $fileName = Sprout::LoadFileName($dataDir, $tableName); >>
3031    
3032    Return the name of the load file for the specified table in the specified data
3033    directory.
3034    
3035    =over 4
3036    
3037    =item dataDir
3038    
3039    Directory containing the Sprout load files.
3040    
3041    =item tableName
3042    
3043    Name of the table whose load file is desired.
3044    
3045    =item RETURN
3046    
3047    Returns the name of the file containing the load data for the specified table, or
3048    C<undef> if no load file is present.
3049    
3050    =back
3051    
3052    =cut
3053    #: Return Type $;
3054    sub LoadFileName {
3055        # Get the parameters.
3056        my ($dataDir, $tableName) = @_;
3057        # Declare the return variable.
3058        my $retVal;
3059        # Check for the various file names.
3060        if (-e "$dataDir/$tableName") {
3061            $retVal = "$dataDir/$tableName";
3062        } elsif (-e "$dataDir/$tableName.dtx") {
3063            $retVal = "$dataDir/$tableName.dtx";
3064        }
3065        # Return the result.
3066        return $retVal;
3067    }
3068    
3069  =head2 Internal Utility Methods  =head2 Internal Utility Methods
3070    
3071  =head3 ParseAssignment  =head3 ParseAssignment
# Line 2996  Line 3198 
3198      $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });      $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });
3199  }  }
3200    
3201    
3202    
3203  1;  1;

Legend:
Removed from v.1.15  
changed lines
  Added in v.1.21

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3