[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.15, Wed Jun 22 21:30:45 2005 UTC revision 1.22, Fri Sep 9 21:10:46 2005 UTC
# Line 86  Line 86 
86      # Compute the options. We do this by starting with a table of defaults and overwriting with      # Compute the options. We do this by starting with a table of defaults and overwriting with
87      # the incoming data.      # the incoming data.
88      my $optionTable = Tracer::GetOptions({      my $optionTable = Tracer::GetOptions({
89                         dbType       => 'mysql',         # database type                         dbType       => $FIG_Config::dbms,
90                         dataDir      => 'Data',          # data file directory                                                          # database type
91                         xmlFileName  => 'SproutDBD.xml', # database definition file name                         dataDir      => $FIG_Config::sproutData,
92                         userData     => 'root/',         # user name and password                                                          # data file directory
93                         port         => 0,               # database connection port                         xmlFileName  => "$FIG_Config::sproutData/SproutDBD.xml",
94                                                            # database definition file name
95                           userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",
96                                                            # user name and password
97                           port         => $FIG_Config::dbport,
98                                                            # database connection port
99                         maxSegmentLength => 4500,        # maximum feature segment length                         maxSegmentLength => 4500,        # maximum feature segment length
100                         maxSequenceLength => 8000,       # maximum contig sequence length                         maxSequenceLength => 8000,       # maximum contig sequence length
101                        }, $options);                        }, $options);
# Line 420  Line 425 
425      # Loop through the incoming table names.      # Loop through the incoming table names.
426      for my $tableName (@{$tableList}) {      for my $tableName (@{$tableList}) {
427          # Find the table's file.          # Find the table's file.
428          my $fileName = "$dataDir/$tableName";          my $fileName = LoadFileName($dataDir, $tableName);
429          if (! -e $fileName) {          if (! $fileName) {
430              $fileName = "$fileName.dtx";              Trace("No load file found for $tableName in $dataDir.") if T(0);
431          }          } else {
432          # Attempt to load this table.          # Attempt to load this table.
433          my $result = $erdb->LoadTable($fileName, $tableName, $truncateFlag);          my $result = $erdb->LoadTable($fileName, $tableName, $truncateFlag);
434          # Accumulate the resulting statistics.          # Accumulate the resulting statistics.
435          $retVal->Accumulate($result);          $retVal->Accumulate($result);
436      }      }
437        }
438      # Return the statistics.      # Return the statistics.
439      return $retVal;      return $retVal;
440  }  }
# Line 570  Line 576 
576  =item RETURN  =item RETURN
577    
578  Returns a list of the feature's contig segments. The locations are returned as a list in a list  Returns a list of the feature's contig segments. The locations are returned as a list in a list
579  context and as a space-delimited string in a scalar context.  context and as a comma-delimited string in a scalar context.
580    
581  =back  =back
582    
# Line 613  Line 619 
619          push @retVal, "${contigID}_$beg$dir$len";          push @retVal, "${contigID}_$beg$dir$len";
620      }      }
621      # Return the list in the format indicated by the context.      # Return the list in the format indicated by the context.
622      return (wantarray ? @retVal : join(' ', @retVal));      return (wantarray ? @retVal : join(',', @retVal));
623  }  }
624    
625  =head3 ParseLocation  =head3 ParseLocation
# Line 1506  Line 1512 
1512      my ($self, $peg1, $peg2) = @_;      my ($self, $peg1, $peg2) = @_;
1513      # Declare the return variable.      # Declare the return variable.
1514      my @retVal = ();      my @retVal = ();
1515      # Our first task is to find out the nature of the coupling.      # Our first task is to find out the nature of the coupling: whether or not
1516        # it exists, its score, and whether the features are stored in the same
1517        # order as the ones coming in.
1518      my ($couplingID, $inverted, $score) = $self->GetCoupling($peg1, $peg2);      my ($couplingID, $inverted, $score) = $self->GetCoupling($peg1, $peg2);
1519      # Only proceed if a coupling exists.      # Only proceed if a coupling exists.
1520      if ($couplingID) {      if ($couplingID) {
1521          # Determine the ordering to place on the evidence items. If we're          # Determine the ordering to place on the evidence items. If we're
1522          # inverted, we want to see feature 2 before feature 1; otherwise,          # inverted, we want to see feature 2 before feature 1 (descending); otherwise,
1523          # we want the reverse.          # we want feature 1 before feature 2 (normal).
1524            Trace("Coupling evidence for ($peg1, $peg2) with inversion flag $inverted.") if T(Coupling => 4);
1525          my $ordering = ($inverted ? "DESC" : "");          my $ordering = ($inverted ? "DESC" : "");
1526          # Get the coupling evidence.          # Get the coupling evidence.
1527          my @evidenceList = $self->GetAll(['IsEvidencedBy', 'PCH', 'UsesAsEvidence'],          my @evidenceList = $self->GetAll(['IsEvidencedBy', 'PCH', 'UsesAsEvidence'],
1528                                            "IsEvidencedBy(from-link) = ? ORDER BY PCH(id), UsesAsEvidence(pos) $ordering",                                            "IsEvidencedBy(from-link) = ? ORDER BY PCH(id), UsesAsEvidence(pos) $ordering",
1529                                            [$couplingID],                                            [$couplingID],
1530                                            ['PCH(used)', 'UsesAsEvidence(pos)']);                                            ['PCH(used)', 'UsesAsEvidence(to-link)']);
1531          # Loop through the evidence items. Each piece of evidence is represented by two          # Loop through the evidence items. Each piece of evidence is represented by two
1532          # positions in the evidence list, one for each feature on the other side of the          # positions in the evidence list, one for each feature on the other side of the
1533          # evidence link. If at some point we want to generalize to couplings with          # evidence link. If at some point we want to generalize to couplings with
# Line 1526  Line 1535 
1535          while (@evidenceList > 0) {          while (@evidenceList > 0) {
1536              my $peg1Data = shift @evidenceList;              my $peg1Data = shift @evidenceList;
1537              my $peg2Data = shift @evidenceList;              my $peg2Data = shift @evidenceList;
1538                Trace("Peg 1 is " . $peg1Data->[1] . " and Peg 2 is " . $peg2Data->[1] . ".") if T(Coupling => 4);
1539              push @retVal, [$peg1Data->[1], $peg2Data->[1], $peg1Data->[0]];              push @retVal, [$peg1Data->[1], $peg2Data->[1], $peg1Data->[0]];
1540          }          }
1541            Trace("Last index in evidence result is is $#retVal.") if T(Coupling => 4);
1542      }      }
     # TODO: code  
1543      # Return the result.      # Return the result.
1544      return @retVal;      return @retVal;
1545  }  }
# Line 1575  Line 1585 
1585      # Find the coupling data.      # Find the coupling data.
1586      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],
1587                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",
1588                                   [$retVal], "ParticipatesInCoupling(from-link), Coupling(score)");                                   [$retVal], ["ParticipatesInCoupling(from-link)", "Coupling(score)"]);
1589      # Check to see if we found anything.      # Check to see if we found anything.
1590      if (!@pegs) {      if (!@pegs) {
1591          # No coupling, so undefine the return value.          # No coupling, so undefine the return value.
# Line 2219  Line 2229 
2229      return @retVal;      return @retVal;
2230  }  }
2231    
2232    =head3 GetProperties
2233    
2234    C<< my @list = $sprout->GetProperties($fid, $key, $value, $url); >>
2235    
2236    Return a list of the properties with the specified characteristics.
2237    
2238    Properties are arbitrary key-value pairs associated with a feature. (At some point they
2239    will also be associated with genomes.) A property value is represented by a 4-tuple of
2240    the form B<($fid, $key, $value, $url)>. These exactly correspond to the parameter
2241    
2242    =over 4
2243    
2244    =item fid
2245    
2246    ID of the feature possessing the property.
2247    
2248    =item key
2249    
2250    Name or key of the property.
2251    
2252    =item value
2253    
2254    Value of the property.
2255    
2256    =item url
2257    
2258    URL of the document that indicated the property should have this particular value, or an
2259    empty string if no such document exists.
2260    
2261    =back
2262    
2263    The parameters act as a filter for the desired data. Any non-null parameter will
2264    automatically match all the tuples returned. So, specifying just the I<$fid> will
2265    return all the properties of the specified feature; similarly, specifying the I<$key>
2266    and I<$value> parameters will return all the features having the specified property
2267    value.
2268    
2269    A single property key can have many values, representing different ideas about the
2270    feature in question. For example, one paper may declare that a feature C<fig|83333.1.peg.10> is
2271    virulent, and another may declare that it is not virulent. A query about the virulence of
2272    C<fig|83333.1.peg.10> would be coded as
2273    
2274        my @list = $sprout->GetProperties('fig|83333.1.peg.10', 'virulence', '', '');
2275    
2276    Here the I<$value> and I<$url> fields are left blank, indicating that those fields are
2277    not to be filtered. The tuples returned would be
2278    
2279        ('fig|83333.1.peg.10', 'virulence', 'yes', 'http://www.somewhere.edu/first.paper.pdf')
2280        ('fig|83333.1.peg.10', 'virulence', 'no', 'http://www.somewhere.edu/second.paper.pdf')
2281    
2282    =cut
2283    #: Return Type @@;
2284    sub GetProperties {
2285        # Get the parameters.
2286        my ($self, @parms) = @_;
2287        # Declare the return variable.
2288        my @retVal = ();
2289        # Now we need to create a WHERE clause that will get us the data we want. First,
2290        # we create a list of the columns containing the data for each parameter.
2291        my @colNames = ('HasProperty(from-link)', 'Property(property-name)',
2292                        'Property(property-value)', 'HasProperty(evidence)');
2293        # Now we build the WHERE clause and the list of parameter values.
2294        my @where = ();
2295        my @values = ();
2296        for (my $i = 0; $i <= $#colNames; $i++) {
2297            my $parm = $parms[$i];
2298            if (defined $parm && ($parm ne '')) {
2299                push @where, "$colNames[$i] = ?";
2300                push @values, $parm;
2301            }
2302        }
2303        # Format the WHERE clause.
2304        my $filter = (@values > 0 ? (join " AND ", @where) : undef);
2305        # Ask for all the propertie values with the desired characteristics.
2306        my $query = $self->Get(['HasProperty', 'Property'], $filter, \@values);
2307        while (my $valueObject = $query->Fetch()) {
2308            my @tuple = $valueObject->Values(\@colNames);
2309            push @retVal, \@tuple;
2310        }
2311        # Return the result.
2312        return @retVal;
2313    }
2314    
2315  =head3 FeatureProperties  =head3 FeatureProperties
2316    
2317  C<< my @properties = $sprout->FeatureProperties($featureID); >>  C<< my @properties = $sprout->FeatureProperties($featureID); >>
# Line 2413  Line 2506 
2506  C<< my %subsystems = $sprout->SubsystemsOf($featureID); >>  C<< my %subsystems = $sprout->SubsystemsOf($featureID); >>
2507    
2508  Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped  Return a hash describing all the subsystems in which a feature participates. Each subsystem is mapped
2509  to the role the feature performs.  to the roles the feature performs.
2510    
2511  =over 4  =over 4
2512    
# Line 2423  Line 2516 
2516    
2517  =item RETURN  =item RETURN
2518    
2519  Returns a hash mapping all the feature's subsystems to the feature's role.  Returns a hash mapping all the feature's subsystems to a list of the feature's roles.
2520    
2521  =back  =back
2522    
2523  =cut  =cut
2524  #: Return Type %;  #: Return Type %@;
2525  sub SubsystemsOf {  sub SubsystemsOf {
2526      # Get the parameters.      # Get the parameters.
2527      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
2528      # Use the SSCell to connect features to subsystems.      # Get the subsystem list.
2529      my @subsystems = $self->GetAll(['ContainsFeature', 'HasSSCell', 'IsRoleOf'],      my @subsystems = $self->GetAll(['ContainsFeature', 'HasSSCell', 'IsRoleOf'],
2530                                      "ContainsFeature(to-link) = ?", [$featureID],                                      "ContainsFeature(to-link) = ?", [$featureID],
2531                                      ['HasSSCell(from-link)', 'IsRoleOf(from-link)']);                                      ['HasSSCell(from-link)', 'IsRoleOf(from-link)']);
# Line 2440  Line 2533 
2533      my %retVal = ();      my %retVal = ();
2534      # Loop through the results, adding them to the hash.      # Loop through the results, adding them to the hash.
2535      for my $record (@subsystems) {      for my $record (@subsystems) {
2536          $retVal{$record->[0]} = $record->[1];          my ($subsys, $role) = @{$record};
2537            if (exists $retVal{$subsys}) {
2538                push @{$retVal{$subsys}}, $role;
2539            } else {
2540                $retVal{$subsys} = [$role];
2541            }
2542      }      }
2543      # Return the hash.      # Return the hash.
2544      return %retVal;      return %retVal;
2545  }  }
2546    
2547    =head3 SubsystemList
2548    
2549    C<< my @subsystems = $sprout->SubsystemList($featureID); >>
2550    
2551    Return a list containing the names of the subsystems in which the specified
2552    feature participates. Unlike L</SubsystemsOf>, this method only returns the
2553    subsystem names, not the roles.
2554    
2555    =over 4
2556    
2557    =item featureID
2558    
2559    ID of the feature whose subsystem names are desired.
2560    
2561    =item RETURN
2562    
2563    Returns a list of the names of the subsystems in which the feature participates.
2564    
2565    =back
2566    
2567    =cut
2568    #: Return Type @;
2569    sub SubsystemList {
2570        # Get the parameters.
2571        my ($self, $featureID) = @_;
2572        # Get the list of names.
2573        my @retVal = $self->GetFlat(['ContainsFeature', 'HasSSCell'], "ContainsFeature(to-link) = ?",
2574                                    [$featureID], 'HasSSCell(from-link)');
2575        # Return the result.
2576        return @retVal;
2577    }
2578    
2579  =head3 RelatedFeatures  =head3 RelatedFeatures
2580    
2581  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>
# Line 2864  Line 2994 
2994      return %retVal;      return %retVal;
2995  }  }
2996    
2997    =head3 MyGenomes
2998    
2999    C<< my @genomes = Sprout::MyGenomes($dataDir); >>
3000    
3001    Return a list of the genomes to be included in the Sprout.
3002    
3003    This method is provided for use during the Sprout load. It presumes the Genome load file has
3004    already been created. (It will be in the Sprout data directory and called either C<Genome>
3005    or C<Genome.dtx>.) Essentially, it reads in the Genome load file and strips out the genome
3006    IDs.
3007    
3008    =over 4
3009    
3010    =item dataDir
3011    
3012    Directory containing the Sprout load files.
3013    
3014    =back
3015    
3016    =cut
3017    #: Return Type @;
3018    sub MyGenomes {
3019        # Get the parameters.
3020        my ($dataDir) = @_;
3021        # Compute the genome file name.
3022        my $genomeFileName = LoadFileName($dataDir, "Genome");
3023        # Extract the genome IDs from the files.
3024        my @retVal = map { $_ =~ /^(\S+)/; $1 } Tracer::GetFile($genomeFileName);
3025        # Return the result.
3026        return @retVal;
3027    }
3028    
3029    =head3 LoadFileName
3030    
3031    C<< my $fileName = Sprout::LoadFileName($dataDir, $tableName); >>
3032    
3033    Return the name of the load file for the specified table in the specified data
3034    directory.
3035    
3036    =over 4
3037    
3038    =item dataDir
3039    
3040    Directory containing the Sprout load files.
3041    
3042    =item tableName
3043    
3044    Name of the table whose load file is desired.
3045    
3046    =item RETURN
3047    
3048    Returns the name of the file containing the load data for the specified table, or
3049    C<undef> if no load file is present.
3050    
3051    =back
3052    
3053    =cut
3054    #: Return Type $;
3055    sub LoadFileName {
3056        # Get the parameters.
3057        my ($dataDir, $tableName) = @_;
3058        # Declare the return variable.
3059        my $retVal;
3060        # Check for the various file names.
3061        if (-e "$dataDir/$tableName") {
3062            $retVal = "$dataDir/$tableName";
3063        } elsif (-e "$dataDir/$tableName.dtx") {
3064            $retVal = "$dataDir/$tableName.dtx";
3065        }
3066        # Return the result.
3067        return $retVal;
3068    }
3069    
3070  =head2 Internal Utility Methods  =head2 Internal Utility Methods
3071    
3072  =head3 ParseAssignment  =head3 ParseAssignment
# Line 2996  Line 3199 
3199      $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });      $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });
3200  }  }
3201    
3202    
3203    
3204  1;  1;

Legend:
Removed from v.1.15  
changed lines
  Added in v.1.22

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3