[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.76, Sun Jun 25 18:03:29 2006 UTC revision 1.85, Tue Sep 19 00:14:04 2006 UTC
# Line 92  Line 92 
92  sub new {  sub new {
93      # Get the parameters.      # Get the parameters.
94      my ($class, $dbName, $options) = @_;      my ($class, $dbName, $options) = @_;
95        # Compute the DBD directory.
96        my $dbd_dir = (defined($FIG_Config::dbd_dir) ? $FIG_Config::dbd_dir :
97                                                      $FIG_Config::fig );
98      # Compute the options. We do this by starting with a table of defaults and overwriting with      # Compute the options. We do this by starting with a table of defaults and overwriting with
99      # the incoming data.      # the incoming data.
100      my $optionTable = Tracer::GetOptions({      my $optionTable = Tracer::GetOptions({
# Line 99  Line 102 
102                                                          # database type                                                          # database type
103                         dataDir      => $FIG_Config::sproutData,                         dataDir      => $FIG_Config::sproutData,
104                                                          # data file directory                                                          # data file directory
105                         xmlFileName  => "$FIG_Config::fig/SproutDBD.xml",                         xmlFileName  => "$dbd_dir/SproutDBD.xml",
106                                                          # database definition file name                                                          # database definition file name
107                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",
108                                                          # user name and password                                                          # user name and password
109                         port         => $FIG_Config::dbport,                         port         => $FIG_Config::dbport,
110                                                          # database connection port                                                          # database connection port
111                         sock         => $FIG_Config::dbsock,                         sock         => $FIG_Config::dbsock,
112                           host         => $FIG_Config::dbhost,
113                         maxSegmentLength => 4500,        # maximum feature segment length                         maxSegmentLength => 4500,        # maximum feature segment length
114                         maxSequenceLength => 8000,       # maximum contig sequence length                         maxSequenceLength => 8000,       # maximum contig sequence length
115                         noDBOpen     => 0,               # 1 to suppress the database open                         noDBOpen     => 0,               # 1 to suppress the database open
# Line 119  Line 123 
123      my $dbh;      my $dbh;
124      if (! $optionTable->{noDBOpen}) {      if (! $optionTable->{noDBOpen}) {
125          $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,          $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,
126                                  $password, $optionTable->{port}, undef, $optionTable->{sock});                                  $password, $optionTable->{port}, $optionTable->{host}, $optionTable->{sock});
127      }      }
128      # Create the ERDB object.      # Create the ERDB object.
129      my $xmlFileName = "$optionTable->{xmlFileName}";      my $xmlFileName = "$optionTable->{xmlFileName}";
# Line 127  Line 131 
131      # Add the option table and XML file name.      # Add the option table and XML file name.
132      $retVal->{_options} = $optionTable;      $retVal->{_options} = $optionTable;
133      $retVal->{_xmlName} = $xmlFileName;      $retVal->{_xmlName} = $xmlFileName;
134        # Set up space for the group file data.
135        $retVal->{groupHash} = undef;
136      # Return it.      # Return it.
137      return $retVal;      return $retVal;
138  }  }
# Line 336  Line 342 
342    
343  =head3 GeneMenu  =head3 GeneMenu
344    
345  C<< my $selectHtml = $sprout->GeneMenu(\%attributes, $filterString, \@params); >>  C<< my $selectHtml = $sprout->GeneMenu(\%attributes, $filterString, \@params, $selected); >>
346    
347  Return an HTML select menu of genomes. Each genome will be an option in the menu,  Return an HTML select menu of genomes. Each genome will be an option in the menu,
348  and will be displayed by name with the ID and a contig count attached. The selection  and will be displayed by name with the ID and a contig count attached. The selection
# Line 358  Line 364 
364  Reference to a list of values to be substituted in for the parameter marks in  Reference to a list of values to be substituted in for the parameter marks in
365  the filter string.  the filter string.
366    
367    =item selected (optional)
368    
369    ID of the genome to be initially selected.
370    
371    =item fast (optional)
372    
373    If specified and TRUE, the contig counts will be omitted to improve performance.
374    
375  =item RETURN  =item RETURN
376    
377  Returns an HTML select menu with the specified genomes as selectable options.  Returns an HTML select menu with the specified genomes as selectable options.
# Line 368  Line 382 
382    
383  sub GeneMenu {  sub GeneMenu {
384      # Get the parameters.      # Get the parameters.
385      my ($self, $attributes, $filterString, $params) = @_;      my ($self, $attributes, $filterString, $params, $selected, $fast) = @_;
386        my $slowMode = ! $fast;
387        # Default to nothing selected. This prevents an execution warning if "$selected"
388        # is undefined.
389        $selected = "" unless defined $selected;
390        Trace("Gene Menu called with slow mode \"$slowMode\" and selection \"$selected\".") if T(3);
391      # Start the menu.      # Start the menu.
392      my $retVal = "<select " .      my $retVal = "<select " .
393          join(" ", map { "$_=\"$attributes->{$_}\"" } keys %{$attributes}) .          join(" ", map { "$_=\"$attributes->{$_}\"" } keys %{$attributes}) .
# Line 385  Line 404 
404          # Get the data for this genome.          # Get the data for this genome.
405          my ($genomeID, $genus, $species, $strain) = @{$genomeData};          my ($genomeID, $genus, $species, $strain) = @{$genomeData};
406          # Get the contig count.          # Get the contig count.
407            my $contigInfo = "";
408            if ($slowMode) {
409          my $count = $self->ContigCount($genomeID);          my $count = $self->ContigCount($genomeID);
410          my $counting = ($count == 1 ? "contig" : "contigs");          my $counting = ($count == 1 ? "contig" : "contigs");
411                $contigInfo = "[$count $counting]";
412            }
413            # Find out if we're selected.
414            my $selectOption = ($selected eq $genomeID ? " selected" : "");
415          # Build the option tag.          # Build the option tag.
416          $retVal .= "<option value=\"$genomeID\">$genus $species $strain ($genomeID) [$count $counting]</option>\n";          $retVal .= "<option value=\"$genomeID\"$selectOption>$genus $species $strain ($genomeID)$contigInfo</option>\n";
         Trace("Option tag built for $genomeID: $genus $species $strain.") if T(3);  
417      }      }
418      # Close the SELECT tag.      # Close the SELECT tag.
419      $retVal .= "</select>\n";      $retVal .= "</select>\n";
420      # Return the result.      # Return the result.
421      return $retVal;      return $retVal;
422  }  }
423    
424  =head3 Build  =head3 Build
425    
426  C<< $sprout->Build(); >>  C<< $sprout->Build(); >>
# Line 630  Line 655 
655      return ($contigID, $start, $dir, $len);      return ($contigID, $start, $dir, $len);
656  }  }
657    
658    
659    
660  =head3 PointLocation  =head3 PointLocation
661    
662  C<< my $found = Sprout::PointLocation($location, $point); >>  C<< my $found = Sprout::PointLocation($location, $point); >>
# Line 1472  Line 1499 
1499      my %retVal = ();      my %retVal = ();
1500      # Loop through the incoming features.      # Loop through the incoming features.
1501      for my $featureID (@{$featureList}) {      for my $featureID (@{$featureList}) {
1502          # Create a query to get the feature's best hit.          # Ask the server for the feature's best hit.
1503          my $query = $self->Get(['IsBidirectionalBestHitOf'],          my @bbhData = FIGRules::BBHData($featureID);
                                "IsBidirectionalBestHitOf(from-link) = ? AND IsBidirectionalBestHitOf(genome) = ?",  
                                [$featureID, $genomeID]);  
1504          # Peel off the BBHs found.          # Peel off the BBHs found.
1505          my @found = ();          my @found = ();
1506          while (my $bbh = $query->Fetch) {          for my $bbh (@bbhData) {
1507              push @found, $bbh->Value('IsBidirectionalBestHitOf(to-link)');              push @found, $bbh->[0];
1508          }          }
1509          $retVal{$featureID} = \@found;          $retVal{$featureID} = \@found;
1510      }      }
# Line 1493  Line 1518 
1518    
1519  Return a list of the similarities to the specified feature.  Return a list of the similarities to the specified feature.
1520    
1521  Sprout does not support real similarities, so this method just returns the bidirectional  This method just returns the bidirectional best hits for performance reasons.
 best hits.  
1522    
1523  =over 4  =over 4
1524    
# Line 1514  Line 1538 
1538      # Get the parameters.      # Get the parameters.
1539      my ($self, $featureID, $count) = @_;      my ($self, $featureID, $count) = @_;
1540      # Ask for the best hits.      # Ask for the best hits.
1541      my @lists = $self->GetAll(['IsBidirectionalBestHitOf'],      my @lists = FIGRules::BBHData($featureID);
                               "IsBidirectionalBestHitOf(from-link) = ? ORDER BY IsBidirectionalBestHitOf(score) DESC",  
                               [$featureID], ['IsBidirectionalBestHitOf(to-link)', 'IsBidirectionalBestHitOf(score)'],  
                               $count);  
1542      # Create the return value.      # Create the return value.
1543      my %retVal = ();      my %retVal = ();
1544      for my $tuple (@lists) {      for my $tuple (@lists) {
# Line 1527  Line 1548 
1548      return %retVal;      return %retVal;
1549  }  }
1550    
   
   
1551  =head3 IsComplete  =head3 IsComplete
1552    
1553  C<< my $flag = $sprout->IsComplete($genomeID); >>  C<< my $flag = $sprout->IsComplete($genomeID); >>
# Line 1656  Line 1675 
1675  sub CoupledFeatures {  sub CoupledFeatures {
1676      # Get the parameters.      # Get the parameters.
1677      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
1678        Trace("Looking for features coupled to $featureID.") if T(coupling => 3);
1679      # Create a query to retrieve the functionally-coupled features.      # Create a query to retrieve the functionally-coupled features.
1680      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],
1681                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);
# Line 1668  Line 1688 
1688          # Get the ID and score of the coupling.          # Get the ID and score of the coupling.
1689          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',
1690                                                          'Coupling(score)']);                                                          'Coupling(score)']);
1691            Trace("$featureID coupled with score $score to ID $couplingID.") if T(coupling => 4);
1692          # Get the other feature that participates in the coupling.          # Get the other feature that participates in the coupling.
1693          my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],          my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],
1694                                             "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",                                             "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",
1695                                             [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');                                             [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');
1696            Trace("$couplingID target feature is $otherFeatureID.") if T(coupling => 4);
1697          # Attach the other feature's score to its ID.          # Attach the other feature's score to its ID.
1698          $retVal{$otherFeatureID} = $score;          $retVal{$otherFeatureID} = $score;
1699          $found = 1;          $found = 1;
# Line 2860  Line 2882 
2882      return @retVal;      return @retVal;
2883  }  }
2884    
2885    =head3 GenomeSubsystemData
2886    
2887    C<< my %featureData = $sprout->GenomeSubsystemData($genomeID); >>
2888    
2889    Return a hash mapping genome features to their subsystem roles.
2890    
2891    =over 4
2892    
2893    =item genomeID
2894    
2895    ID of the genome whose subsystem feature map is desired.
2896    
2897    =item RETURN
2898    
2899    Returns a hash mapping each feature of the genome to a list of 2-tuples. Eacb
2900    2-tuple contains a subsystem name followed by a role ID.
2901    
2902    =back
2903    
2904    =cut
2905    
2906    sub GenomeSubsystemData {
2907        # Get the parameters.
2908        my ($self, $genomeID) = @_;
2909        # Declare the return variable.
2910        my %retVal = ();
2911        # Get a list of the genome features that participate in subsystems. For each
2912        # feature we get its spreadsheet cells and the corresponding roles.
2913        my @roleData = $self->GetAll(['HasFeature', 'ContainsFeature', 'IsRoleOf'],
2914                                 "HasFeature(from-link) = ?", [$genomeID],
2915                                 ['HasFeature(to-link)', 'IsRoleOf(to-link)', 'IsRoleOf(from-link)']);
2916        # Now we get a list of the spreadsheet cells and their associated subsystems. Subsystems
2917        # with an unknown variant code (-1) are skipped. Note the genome ID is at both ends of the
2918        # list. We use it at the beginning to get all the spreadsheet cells for the genome and
2919        # again at the end to filter out participation in subsystems with a negative variant code.
2920        my @cellData = $self->GetAll(['IsGenomeOf', 'HasSSCell', 'ParticipatesIn'],
2921                                     "IsGenomeOf(from-link) = ? AND ParticipatesIn(variant-code) >= 0 AND ParticipatesIn(from-link) = ?",
2922                                     [$genomeID, $genomeID], ['HasSSCell(to-link)', 'HasSSCell(from-link)']);
2923        # Now "@roleData" lists the spreadsheet cell and role for each of the genome's features.
2924        # "@cellData" lists the subsystem name for each of the genome's spreadsheet cells. We
2925        # link these two lists together to create the result. First, we want a hash mapping
2926        # spreadsheet cells to subsystem names.
2927        my %subHash = map { $_->[0] => $_->[1] } @cellData;
2928        # We loop through @cellData to build the hash.
2929        for my $roleEntry (@roleData) {
2930            # Get the data for this feature and cell.
2931            my ($fid, $cellID, $role) = @{$roleEntry};
2932            # Check for a subsystem name.
2933            my $subsys = $subHash{$cellID};
2934            if ($subsys) {
2935                # Insure this feature has an entry in the return hash.
2936                if (! exists $retVal{$fid}) { $retVal{$fid} = []; }
2937                # Merge in this new data.
2938                push @{$retVal{$fid}}, [$subsys, $role];
2939            }
2940        }
2941        # Return the result.
2942        return %retVal;
2943    }
2944    
2945  =head3 RelatedFeatures  =head3 RelatedFeatures
2946    
2947  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>  C<< my @relatedList = $sprout->RelatedFeatures($featureID, $function, $userID); >>
# Line 2895  Line 2977 
2977      # Get the parameters.      # Get the parameters.
2978      my ($self, $featureID, $function, $userID) = @_;      my ($self, $featureID, $function, $userID) = @_;
2979      # Get a list of the features that are BBHs of the incoming feature.      # Get a list of the features that are BBHs of the incoming feature.
2980      my @bbhFeatures = $self->GetFlat(['IsBidirectionalBestHitOf'],      my @bbhFeatures = map { $_->[0] } FIGRules::BBHData($featureID);
                                      "IsBidirectionalBestHitOf(from-link) = ?", [$featureID],  
                                      'IsBidirectionalBestHitOf(to-link)');  
2981      # Now we loop through the features, pulling out the ones that have the correct      # Now we loop through the features, pulling out the ones that have the correct
2982      # functional assignment.      # functional assignment.
2983      my @retVal = ();      my @retVal = ();
# Line 3098  Line 3178 
3178      my ($self, $featureID, $cutoff) = @_;      my ($self, $featureID, $cutoff) = @_;
3179      # Create the return hash.      # Create the return hash.
3180      my %retVal = ();      my %retVal = ();
3181      # Create a query to get the desired BBHs.      # Query for the desired BBHs.
3182      my @bbhList = $self->GetAll(['IsBidirectionalBestHitOf'],      my @bbhList = FIGRules::BBHData($featureID, $cutoff);
                                 'IsBidirectionalBestHitOf(sc) <= ? AND IsBidirectionalBestHitOf(from-link) = ?',  
                                 [$cutoff, $featureID],  
                                 ['IsBidirectionalBestHitOf(to-link)', 'IsBidirectionalBestHitOf(sc)']);  
3183      # Form the results into the return hash.      # Form the results into the return hash.
3184      for my $pair (@bbhList) {      for my $pair (@bbhList) {
3185          $retVal{$pair->[0]} = $pair->[1];          $retVal{$pair->[0]} = $pair->[1];
# Line 3333  Line 3410 
3410      return $retVal;      return $retVal;
3411  }  }
3412    
3413    =head3 Fix
3414    
3415    C<< my %fixedHash = Sprout::Fix(%groupHash); >>
3416    
3417    Prepare a genome group hash (like that returned by L</GetGroups> for processing.
3418    Groups with the same primary name will be combined. The primary name is the
3419    first capitalized word in the group name.
3420    
3421    =over 4
3422    
3423    =item groupHash
3424    
3425    Hash to be fixed up.
3426    
3427    =item RETURN
3428    
3429    Returns a fixed-up version of the hash.
3430    
3431    =back
3432    
3433    =cut
3434    
3435    sub Fix {
3436        # Get the parameters.
3437        my (%groupHash) = @_;
3438        # Create the result hash.
3439        my %retVal = ();
3440        # Copy over the genomes.
3441        for my $groupID (keys %groupHash) {
3442            # Make a safety copy of the group ID.
3443            my $realGroupID = $groupID;
3444            # Yank the primary name.
3445            if ($groupID =~ /([A-Z]\w+)/) {
3446                $realGroupID = $1;
3447            }
3448            # Append this group's genomes into the result hash.
3449            Tracer::AddToListMap(\%retVal, $realGroupID, @{$groupHash{$groupID}});
3450        }
3451        # Return the result hash.
3452        return %retVal;
3453    }
3454    
3455    =head3 GroupPageName
3456    
3457    C<< my $name = $sprout->GroupPageName($group); >>
3458    
3459    Return the name of the page for the specified NMPDR group.
3460    
3461    =over 4
3462    
3463    =item group
3464    
3465    Name of the relevant group.
3466    
3467    =item RETURN
3468    
3469    Returns the relative page name (e.g. C<../content/campy.php>). If the group file is not in
3470    memory it will be read in.
3471    
3472    =back
3473    
3474    =cut
3475    
3476    sub GroupPageName {
3477        # Get the parameters.
3478        my ($self, $group) = @_;
3479        # Declare the return variable.
3480        my $retVal;
3481        # Check for the group file data.
3482        if (! defined $self->{groupHash}) {
3483            # Read the group file.
3484            my %groupData = Sprout::ReadGroupFile($self->{_options}->{dataDir} . "/groups.tbl");
3485            # Store it in our object.
3486            $self->{groupHash} = \%groupData;
3487        }
3488        # Compute the real group name.
3489        my $realGroup = $group;
3490        if ($group =~ /([A-Z]\w+)/) {
3491            $realGroup = $1;
3492        }
3493        # Return the page name.
3494        $retVal = "../content/" . $self->{groupHash}->{$realGroup}->[1];
3495        # Return the result.
3496        return $retVal;
3497    }
3498    
3499    =head3 ReadGroupFile
3500    
3501    C<< my %groupData = Sprout::ReadGroupFile($groupFileName); >>
3502    
3503    Read in the data from the specified group file. The group file contains information
3504    about each of the NMPDR groups.
3505    
3506    =over 4
3507    
3508    =item name
3509    
3510    Name of the group.
3511    
3512    =item page
3513    
3514    Name of the group's page on the web site (e.g. C<campy.php> for
3515    Campylobacter)
3516    
3517    =item genus
3518    
3519    Genus of the group
3520    
3521    =item species
3522    
3523    Species of the group, or an empty string if the group is for an entire
3524    genus. If the group contains more than one species, the species names
3525    should be separated by commas.
3526    
3527    =back
3528    
3529    The parameters to this method are as follows
3530    
3531    =over 4
3532    
3533    =item groupFile
3534    
3535    Name of the file containing the group data.
3536    
3537    =item RETURN
3538    
3539    Returns a hash keyed on group name. The value of each hash
3540    
3541    =back
3542    
3543    =cut
3544    
3545    sub ReadGroupFile {
3546        # Get the parameters.
3547        my ($groupFileName) = @_;
3548        # Declare the return variable.
3549        my %retVal;
3550        # Read the group file.
3551        my @groupLines = Tracer::GetFile($groupFileName);
3552        for my $groupLine (@groupLines) {
3553            my ($name, $page, $genus, $species) = split(/\t/, $groupLine);
3554            $retVal{$name} = [$page, $genus, $species];
3555        }
3556        # Return the result.
3557        return %retVal;
3558    }
3559    
3560  =head2 Internal Utility Methods  =head2 Internal Utility Methods
3561    
3562  =head3 ParseAssignment  =head3 ParseAssignment
# Line 3389  Line 3613 
3613      }      }
3614      # If we have an assignment, we need to clean the function text. There may be      # If we have an assignment, we need to clean the function text. There may be
3615      # extra junk at the end added as a note from the user.      # extra junk at the end added as a note from the user.
3616      if (@retVal) {      if (defined( $retVal[1] )) {
3617          $retVal[1] =~ s/(\t\S)?\s*$//;          $retVal[1] =~ s/(\t\S)?\s*$//;
3618      }      }
3619      # Return the result list.      # Return the result list.

Legend:
Removed from v.1.76  
changed lines
  Added in v.1.85

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3