[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.65, Sun Jun 18 07:20:33 2006 UTC revision 1.86, Sun Sep 24 17:14:16 2006 UTC
# Line 12  Line 12 
12      use DBObject;      use DBObject;
13      use Tracer;      use Tracer;
14      use FIGRules;      use FIGRules;
15        use FidCheck;
16      use Stats;      use Stats;
17      use POSIX qw(strftime);      use POSIX qw(strftime);
18        use BasicLocation;
19    
20  =head1 Sprout Database Manipulation Object  =head1 Sprout Database Manipulation Object
21    
# Line 91  Line 92 
92  sub new {  sub new {
93      # Get the parameters.      # Get the parameters.
94      my ($class, $dbName, $options) = @_;      my ($class, $dbName, $options) = @_;
95        # Compute the DBD directory.
96        my $dbd_dir = (defined($FIG_Config::dbd_dir) ? $FIG_Config::dbd_dir :
97                                                      $FIG_Config::fig );
98      # Compute the options. We do this by starting with a table of defaults and overwriting with      # Compute the options. We do this by starting with a table of defaults and overwriting with
99      # the incoming data.      # the incoming data.
100      my $optionTable = Tracer::GetOptions({      my $optionTable = Tracer::GetOptions({
# Line 98  Line 102 
102                                                          # database type                                                          # database type
103                         dataDir      => $FIG_Config::sproutData,                         dataDir      => $FIG_Config::sproutData,
104                                                          # data file directory                                                          # data file directory
105                         xmlFileName  => "$FIG_Config::fig/SproutDBD.xml",                         xmlFileName  => "$dbd_dir/SproutDBD.xml",
106                                                          # database definition file name                                                          # database definition file name
107                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",
108                                                          # user name and password                                                          # user name and password
109                         port         => $FIG_Config::dbport,                         port         => $FIG_Config::dbport,
110                                                          # database connection port                                                          # database connection port
111                         sock         => $FIG_Config::dbsock,                         sock         => $FIG_Config::dbsock,
112                           host         => $FIG_Config::dbhost,
113                         maxSegmentLength => 4500,        # maximum feature segment length                         maxSegmentLength => 4500,        # maximum feature segment length
114                         maxSequenceLength => 8000,       # maximum contig sequence length                         maxSequenceLength => 8000,       # maximum contig sequence length
115                         noDBOpen     => 0,               # 1 to suppress the database open                         noDBOpen     => 0,               # 1 to suppress the database open
# Line 118  Line 123 
123      my $dbh;      my $dbh;
124      if (! $optionTable->{noDBOpen}) {      if (! $optionTable->{noDBOpen}) {
125          $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,          $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,
126                                  $password, $optionTable->{port}, undef, $optionTable->{sock});                                  $password, $optionTable->{port}, $optionTable->{host}, $optionTable->{sock});
127      }      }
128      # Create the ERDB object.      # Create the ERDB object.
129      my $xmlFileName = "$optionTable->{xmlFileName}";      my $xmlFileName = "$optionTable->{xmlFileName}";
# Line 126  Line 131 
131      # Add the option table and XML file name.      # Add the option table and XML file name.
132      $retVal->{_options} = $optionTable;      $retVal->{_options} = $optionTable;
133      $retVal->{_xmlName} = $xmlFileName;      $retVal->{_xmlName} = $xmlFileName;
134        # Set up space for the group file data.
135        $retVal->{groupHash} = undef;
136      # Return it.      # Return it.
137      return $retVal;      return $retVal;
138  }  }
# Line 335  Line 342 
342    
343  =head3 GeneMenu  =head3 GeneMenu
344    
345  C<< my $selectHtml = $sprout->GeneMenu(\%attributes, $filterString, \@params); >>  C<< my $selectHtml = $sprout->GeneMenu(\%attributes, $filterString, \@params, $selected); >>
346    
347  Return an HTML select menu of genomes. Each genome will be an option in the menu,  Return an HTML select menu of genomes. Each genome will be an option in the menu,
348  and will be displayed by name with the ID and a contig count attached. The selection  and will be displayed by name with the ID and a contig count attached. The selection
# Line 357  Line 364 
364  Reference to a list of values to be substituted in for the parameter marks in  Reference to a list of values to be substituted in for the parameter marks in
365  the filter string.  the filter string.
366    
367    =item selected (optional)
368    
369    ID of the genome to be initially selected.
370    
371    =item fast (optional)
372    
373    If specified and TRUE, the contig counts will be omitted to improve performance.
374    
375  =item RETURN  =item RETURN
376    
377  Returns an HTML select menu with the specified genomes as selectable options.  Returns an HTML select menu with the specified genomes as selectable options.
# Line 367  Line 382 
382    
383  sub GeneMenu {  sub GeneMenu {
384      # Get the parameters.      # Get the parameters.
385      my ($self, $attributes, $filterString, $params) = @_;      my ($self, $attributes, $filterString, $params, $selected, $fast) = @_;
386        my $slowMode = ! $fast;
387        # Default to nothing selected. This prevents an execution warning if "$selected"
388        # is undefined.
389        $selected = "" unless defined $selected;
390        Trace("Gene Menu called with slow mode \"$slowMode\" and selection \"$selected\".") if T(3);
391      # Start the menu.      # Start the menu.
392      my $retVal = "<select " .      my $retVal = "<select " .
393          join(" ", map { "$_=\"$attributes->{$_}\"" } keys %{$attributes}) .          join(" ", map { "$_=\"$attributes->{$_}\"" } keys %{$attributes}) .
# Line 375  Line 395 
395      # Get the genomes.      # Get the genomes.
396      my @genomes = $self->GetAll(['Genome'], $filterString, $params, ['Genome(id)',      my @genomes = $self->GetAll(['Genome'], $filterString, $params, ['Genome(id)',
397                                                                       'Genome(genus)',                                                                       'Genome(genus)',
398                                                                       'Genome(species)']);                                                                       'Genome(species)',
399                                                                         'Genome(unique-characterization)']);
400      # Sort them by name.      # Sort them by name.
401      my @sorted = sort { lc("$a->[1] $a->[2]") cmp lc("$b->[1] $b->[2]") } @genomes;      my @sorted = sort { lc("$a->[1] $a->[2]") cmp lc("$b->[1] $b->[2]") } @genomes;
402      # Loop through the genomes, creating the option tags.      # Loop through the genomes, creating the option tags.
403      for my $genomeData (@sorted) {      for my $genomeData (@sorted) {
404          # Get the data for this genome.          # Get the data for this genome.
405          my ($genomeID, $genus, $species) = @{$genomeData};          my ($genomeID, $genus, $species, $strain) = @{$genomeData};
406          # Get the contig count.          # Get the contig count.
407            my $contigInfo = "";
408            if ($slowMode) {
409          my $count = $self->ContigCount($genomeID);          my $count = $self->ContigCount($genomeID);
410          my $counting = ($count == 1 ? "contig" : "contigs");          my $counting = ($count == 1 ? "contig" : "contigs");
411                $contigInfo = "[$count $counting]";
412            }
413            # Find out if we're selected.
414            my $selectOption = ($selected eq $genomeID ? " selected" : "");
415          # Build the option tag.          # Build the option tag.
416          $retVal .= "<option value=\"$genomeID\">$genus $species ($genomeID) [$count $counting]</option>\n";          $retVal .= "<option value=\"$genomeID\"$selectOption>$genus $species $strain ($genomeID)$contigInfo</option>\n";
417      }      }
418      # Close the SELECT tag.      # Close the SELECT tag.
419      $retVal .= "</select>\n";      $retVal .= "</select>\n";
420      # Return the result.      # Return the result.
421      return $retVal;      return $retVal;
422  }  }
423    
424  =head3 Build  =head3 Build
425    
426  C<< $sprout->Build(); >>  C<< $sprout->Build(); >>
# Line 410  Line 438 
438      $self->CreateTables();      $self->CreateTables();
439  }  }
440    
441    =head3 NmpdrGenomeMenu
442    
443    C<< my $htmlText = $sprout->NmpdrGenomeMenu(\%options, \@selected); >>
444    
445    This method creates a hierarchical HTML menu for NMPDR genomes organized by category. The
446    category indicates the low-level NMPDR group. Organizing the genomes in this way makes it
447    easier to select all genomes from a particular category.
448    
449    =over 4
450    
451    =item options
452    
453    Reference to a hash containing the options to be applied to the C<SELECT> tag form the menu.
454    Typical options would include C<name> to specify the field name, C<multiple> to specify
455    that multiple selections are allowed, and C<size> to set the number of rows to display
456    in the menu.
457    
458    =item selected
459    
460    Reference to a list containing the IDs of the genomes to be pre-selected. If the menu
461    is not intended to allow multiple selections, the list should be a singleton. If the
462    list is empty, nothing will be pre-selected.
463    
464    =item RETURN
465    
466    Returns the HTML text to generate a C<SELECT> menu inside a form.
467    
468    =back
469    
470    =cut
471    
472    sub NmpdrGenomeMenu {
473        # Get the parameters.
474        my ($self, $options, $selected) = @_;
475        # Get a list of all the genomes in group order. In fact, we only need them ordered
476        # by name (genus,species,strain), but putting primary-group in front enables us to
477        # take advantage of an existing index.
478        my @genomeList = $self->GetAll(['Genome'],
479                                       "ORDER BY Genome(primary-group), Genome(genus), Genome(species), Genome(unique-characterization)",
480                                       [], ['Genome(primary-group)', 'Genome(id)',
481                                            'Genome(genus)', 'Genome(species)',
482                                            'Genome(unique-characterization)']);
483        # Create a hash to organize the genomes by group. Each group will contain a list of
484        # 2-tuples, the first element being the genome ID and the second being the genome
485        # name.
486        my %groupHash = ();
487        for my $genome (@genomeList) {
488            # Get the genome data.
489            my ($group, $genomeID, $genus, $species, $strain) = @{$genome};
490            # Form the genome name.
491            my $name = "$genus $species";
492            if ($strain) {
493                $name .= " $strain";
494            }
495            # Push the genome into the group's list.
496            push @{$groupHash{$group}}, [$genomeID, $name];
497        }
498        # Now we are ready to unroll the menu out of the group hash. First, we sort the groups, putting
499        # the supporting-genome group last.
500        my @groups = sort grep { $_ ne $FIG_Config::otherGroup } keys %groupHash;
501        push @groups, $FIG_Config::otherGroup;
502        # Next, create a hash that specifies the pre-selected entries.
503        my %selectedHash = map { $_ => 1 } @{$selected};
504        # Create the SELECT tag and stuff it into the output array.
505        my $select = "<" . join(" ", 'SELECT', map { "$_=\"$options->{$_}\"" } keys %{$options}) . ">";
506        my @lines = ($select);
507        # Loop through the groups.
508        for my $group (@groups) {
509            # Create the option group tag.
510            my $tag = "<OPTGROUP name=\"$group\">";
511            push @lines, "  $tag";
512            # Get the genomes in the group.
513            for my $genome (@{$groupHash{$group}}) {
514                my ($genomeID, $name) = @{$genome};
515                # See if it's selected.
516                my $select = ($selectedHash{$genomeID} ? " selected" : "");
517                # Generate the option tag.
518                my $optionTag = "<OPTION value=\"$genomeID\"$select>$name</OPTION>";
519                push @lines, "    $optionTag";
520            }
521            # Close the option group.
522            push @lines, "  </OPTGROUP>";
523        }
524        # Close the SELECT tag.
525        push @lines, "</SELECT>";
526        # Assemble the lines into a string.
527        my $retVal = join("\n", @lines, "");
528        # Return the result.
529        return $retVal;
530    }
531    
532  =head3 Genomes  =head3 Genomes
533    
534  C<< my @genomes = $sprout->Genomes(); >>  C<< my @genomes = $sprout->Genomes(); >>
# Line 627  Line 746 
746      return ($contigID, $start, $dir, $len);      return ($contigID, $start, $dir, $len);
747  }  }
748    
749    
750    
751  =head3 PointLocation  =head3 PointLocation
752    
753  C<< my $found = Sprout::PointLocation($location, $point); >>  C<< my $found = Sprout::PointLocation($location, $point); >>
# Line 836  Line 957 
957    
958  =over 4  =over 4
959    
960  =genomeID  =item genomeID
961    
962  ID of the genome whose feature count is desired.  ID of the genome whose feature count is desired.
963    
# Line 899  Line 1020 
1020      # Loop through the annotations.      # Loop through the annotations.
1021      while (my $data = $query->Fetch) {      while (my $data = $query->Fetch) {
1022          # Get the feature ID and annotation text.          # Get the feature ID and annotation text.
1023          my ($fid, $annotation) = $data->Values(['HasFeature(from-link)',          my ($fid, $annotation) = $data->Values(['HasFeature(to-link)',
1024                                                  'Annotation(annotation)']);                                                  'Annotation(annotation)']);
1025          # Check to see if this is an assignment. Note that the user really          # Check to see if this is an assignment. Note that the user really
1026          # doesn't matter to us, other than we use it to determine whether or          # doesn't matter to us, other than we use it to determine whether or
1027          # not this is an assignment.          # not this is an assignment.
1028          my ($user, $assignment) = $self->_ParseAssignment('fig', $annotation);          my ($user, $assignment) = _ParseAssignment('fig', $annotation);
1029          if ($user) {          if ($user) {
1030              # Here it's an assignment. We put it in the return hash, overwriting              # Here it's an assignment. We put it in the return hash, overwriting
1031              # any older assignment that might be present.              # any older assignment that might be present.
# Line 1469  Line 1590 
1590      my %retVal = ();      my %retVal = ();
1591      # Loop through the incoming features.      # Loop through the incoming features.
1592      for my $featureID (@{$featureList}) {      for my $featureID (@{$featureList}) {
1593          # Create a query to get the feature's best hit.          # Ask the server for the feature's best hit.
1594          my $query = $self->Get(['IsBidirectionalBestHitOf'],          my @bbhData = FIGRules::BBHData($featureID);
                                "IsBidirectionalBestHitOf(from-link) = ? AND IsBidirectionalBestHitOf(genome) = ?",  
                                [$featureID, $genomeID]);  
1595          # Peel off the BBHs found.          # Peel off the BBHs found.
1596          my @found = ();          my @found = ();
1597          while (my $bbh = $query->Fetch) {          for my $bbh (@bbhData) {
1598              push @found, $bbh->Value('IsBidirectionalBestHitOf(to-link)');              push @found, $bbh->[0];
1599          }          }
1600          $retVal{$featureID} = \@found;          $retVal{$featureID} = \@found;
1601      }      }
# Line 1490  Line 1609 
1609    
1610  Return a list of the similarities to the specified feature.  Return a list of the similarities to the specified feature.
1611    
1612  Sprout does not support real similarities, so this method just returns the bidirectional  This method just returns the bidirectional best hits for performance reasons.
 best hits.  
1613    
1614  =over 4  =over 4
1615    
# Line 1511  Line 1629 
1629      # Get the parameters.      # Get the parameters.
1630      my ($self, $featureID, $count) = @_;      my ($self, $featureID, $count) = @_;
1631      # Ask for the best hits.      # Ask for the best hits.
1632      my @lists = $self->GetAll(['IsBidirectionalBestHitOf'],      my @lists = FIGRules::BBHData($featureID);
                               "IsBidirectionalBestHitOf(from-link) = ? ORDER BY IsBidirectionalBestHitOf(score) DESC",  
                               [$featureID], ['IsBidirectionalBestHitOf(to-link)', 'IsBidirectionalBestHitOf(score)'],  
                               $count);  
1633      # Create the return value.      # Create the return value.
1634      my %retVal = ();      my %retVal = ();
1635      for my $tuple (@lists) {      for my $tuple (@lists) {
# Line 1524  Line 1639 
1639      return %retVal;      return %retVal;
1640  }  }
1641    
   
   
1642  =head3 IsComplete  =head3 IsComplete
1643    
1644  C<< my $flag = $sprout->IsComplete($genomeID); >>  C<< my $flag = $sprout->IsComplete($genomeID); >>
# Line 1653  Line 1766 
1766  sub CoupledFeatures {  sub CoupledFeatures {
1767      # Get the parameters.      # Get the parameters.
1768      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
1769        Trace("Looking for features coupled to $featureID.") if T(coupling => 3);
1770      # Create a query to retrieve the functionally-coupled features.      # Create a query to retrieve the functionally-coupled features.
1771      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],
1772                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);
# Line 1665  Line 1779 
1779          # Get the ID and score of the coupling.          # Get the ID and score of the coupling.
1780          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',
1781                                                          'Coupling(score)']);                                                          'Coupling(score)']);
1782            Trace("$featureID coupled with score $score to ID $couplingID.") if T(coupling => 4);
1783          # Get the other feature that participates in the coupling.          # Get the other feature that participates in the coupling.
1784          my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],          my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],
1785                                             "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",                                             "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",
1786                                             [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');                                             [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');
1787            Trace("$couplingID target feature is $otherFeatureID.") if T(coupling => 4);
1788          # Attach the other feature's score to its ID.          # Attach the other feature's score to its ID.
1789          $retVal{$otherFeatureID} = $score;          $retVal{$otherFeatureID} = $score;
1790          $found = 1;          $found = 1;
# Line 1801  Line 1917 
1917      my ($self, $peg1, $peg2) = @_;      my ($self, $peg1, $peg2) = @_;
1918      # Declare the return values. We'll start with the coupling ID and undefine the      # Declare the return values. We'll start with the coupling ID and undefine the
1919      # flag and score until we have more information.      # flag and score until we have more information.
1920      my ($retVal, $inverted, $score) = (CouplingID($peg1, $peg2), undef, undef);      my ($retVal, $inverted, $score) = ($self->CouplingID($peg1, $peg2), undef, undef);
1921      # Find the coupling data.      # Find the coupling data.
1922      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],
1923                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",
# Line 1822  Line 1938 
1938      return ($retVal, $inverted, $score);      return ($retVal, $inverted, $score);
1939  }  }
1940    
1941    =head3 GetSynonymGroup
1942    
1943    C<< my $id = $sprout->GetSynonymGroup($fid); >>
1944    
1945    Return the synonym group name for the specified feature.
1946    
1947    =over 4
1948    
1949    =item fid
1950    
1951    ID of the feature whose synonym group is desired.
1952    
1953    =item RETURN
1954    
1955    The name of the synonym group to which the feature belongs. If the feature does
1956    not belong to a synonym group, the feature ID itself is returned.
1957    
1958    =back
1959    
1960    =cut
1961    
1962    sub GetSynonymGroup {
1963        # Get the parameters.
1964        my ($self, $fid) = @_;
1965        # Declare the return variable.
1966        my $retVal;
1967        # Find the synonym group.
1968        my @groups = $self->GetFlat(['IsSynonymGroupFor'], "IsSynonymGroupFor(to-link) = ?",
1969                                       [$fid], 'IsSynonymGroupFor(from-link)');
1970        # Check to see if we found anything.
1971        if (@groups) {
1972            $retVal = $groups[0];
1973        } else {
1974            $retVal = $fid;
1975        }
1976        # Return the result.
1977        return $retVal;
1978    }
1979    
1980    =head3 GetBoundaries
1981    
1982    C<< my ($contig, $beg, $end) = $sprout->GetBoundaries(@locList); >>
1983    
1984    Determine the begin and end boundaries for the locations in a list. All of the
1985    locations must belong to the same contig and have mostly the same direction in
1986    order for this method to produce a meaningful result. The resulting
1987    begin/end pair will contain all of the bases in any of the locations.
1988    
1989    =over 4
1990    
1991    =item locList
1992    
1993    List of locations to process.
1994    
1995    =item RETURN
1996    
1997    Returns a 3-tuple consisting of the contig ID, the beginning boundary,
1998    and the ending boundary. The beginning boundary will be left of the
1999    end for mostly-forward locations and right of the end for mostly-backward
2000    locations.
2001    
2002    =back
2003    
2004    =cut
2005    
2006    sub GetBoundaries {
2007        # Get the parameters.
2008        my ($self, @locList) = @_;
2009        # Set up the counters used to determine the most popular direction.
2010        my %counts = ( '+' => 0, '-' => 0 );
2011        # Get the last location and parse it.
2012        my $locObject = BasicLocation->new(pop @locList);
2013        # Prime the loop with its data.
2014        my ($contig, $beg, $end) = ($locObject->Contig, $locObject->Left, $locObject->Right);
2015        # Count its direction.
2016        $counts{$locObject->Dir}++;
2017        # Loop through the remaining locations. Note that in most situations, this loop
2018        # will not iterate at all, because most of the time we will be dealing with a
2019        # singleton list.
2020        for my $loc (@locList) {
2021            # Create a location object.
2022            my $locObject = BasicLocation->new($loc);
2023            # Count the direction.
2024            $counts{$locObject->Dir}++;
2025            # Get the left end and the right end.
2026            my $left = $locObject->Left;
2027            my $right = $locObject->Right;
2028            # Merge them into the return variables.
2029            if ($left < $beg) {
2030                $beg = $left;
2031            }
2032            if ($right > $end) {
2033                $end = $right;
2034            }
2035        }
2036        # If the most common direction is reverse, flip the begin and end markers.
2037        if ($counts{'-'} > $counts{'+'}) {
2038            ($beg, $end) = ($end, $beg);
2039        }
2040        # Return the result.
2041        return ($contig, $beg, $end);
2042    }
2043    
2044  =head3 CouplingID  =head3 CouplingID
2045    
2046  C<< my $couplingID = Sprout::CouplingID($peg1, $peg2); >>  C<< my $couplingID = $sprout->CouplingID($peg1, $peg2); >>
2047    
2048  Return the coupling ID for a pair of feature IDs.  Return the coupling ID for a pair of feature IDs.
2049    
# Line 1857  Line 2076 
2076  =cut  =cut
2077  #: Return Type $;  #: Return Type $;
2078  sub CouplingID {  sub CouplingID {
2079      return join " ", sort @_;      my ($self, @pegs) = @_;
2080        return $self->DigestKey(join " ", sort @pegs);
2081  }  }
2082    
2083  =head3 ReadFasta  =head3 ReadFasta
# Line 2216  Line 2436 
2436      return @retVal;      return @retVal;
2437  }  }
2438    
 =head3 Exists  
   
 C<< my $found = $sprout->Exists($entityName, $entityID); >>  
   
 Return TRUE if an entity exists, else FALSE.  
   
 =over 4  
   
 =item entityName  
   
 Name of the entity type (e.g. C<Feature>) relevant to the existence check.  
   
 =item entityID  
   
 ID of the entity instance whose existence is to be checked.  
   
 =item RETURN  
   
 Returns TRUE if the entity instance exists, else FALSE.  
   
 =back  
   
 =cut  
 #: Return Type $;  
 sub Exists {  
     # Get the parameters.  
     my ($self, $entityName, $entityID) = @_;  
     # Check for the entity instance.  
     Trace("Checking existence of $entityName with ID=$entityID.") if T(4);  
     my $testInstance = $self->GetEntity($entityName, $entityID);  
     # Return an existence indicator.  
     my $retVal = ($testInstance ? 1 : 0);  
     return $retVal;  
 }  
   
2439  =head3 FeatureTranslation  =head3 FeatureTranslation
2440    
2441  C<< my $translation = $sprout->FeatureTranslation($featureID); >>  C<< my $translation = $sprout->FeatureTranslation($featureID); >>
# Line 2788  Line 2973 
2973      return @retVal;      return @retVal;
2974  }  }
2975    
2976    =head3 GenomeSubsystemData
2977    
2978    C<< my %featureData = $sprout->GenomeSubsystemData($genomeID); >>
2979    
2980    Return a hash mapping genome features to their subsystem roles.
2981    
2982    =over 4
2983    
2984    =item genomeID
2985    
2986    ID of the genome whose subsystem feature map is desired.
2987    
2988    =item RETURN
2989    
2990    Returns a hash mapping each feature of the genome to a list of 2-tuples. Eacb
2991    2-tuple contains a subsystem name followed by a role ID.
2992    
2993    =back
2994    
2995    =cut
2996    
2997    sub GenomeSubsystemData {
2998        # Get the parameters.
2999        my ($self, $genomeID) = @_;
3000        # Declare the return variable.
3001        my %retVal = ();
3002        # Get a list of the genome features that participate in subsystems. For each
3003        # feature we get its spreadsheet cells and the corresponding roles.
3004        my @roleData = $self->GetAll(['HasFeature', 'ContainsFeature', 'IsRoleOf'],
3005                                 "HasFeature(from-link) = ?", [$genomeID],
3006                                 ['HasFeature(to-link)', 'IsRoleOf(to-link)', 'IsRoleOf(from-link)']);
3007        # Now we get a list of the spreadsheet cells and their associated subsystems. Subsystems
3008        # with an unknown variant code (-1) are skipped. Note the genome ID is at both ends of the
3009        # list. We use it at the beginning to get all the spreadsheet cells for the genome and
3010        # again at the end to filter out participation in subsystems with a negative variant code.
3011        my @cellData = $self->GetAll(['IsGenomeOf', 'HasSSCell', 'ParticipatesIn'],
3012                                     "IsGenomeOf(from-link) = ? AND ParticipatesIn(variant-code) >= 0 AND ParticipatesIn(from-link) = ?",
3013                                     [$genomeID, $genomeID], ['HasSSCell(to-link)', 'HasSSCell(from-link)']);
3014        # Now "@roleData" lists the spreadsheet cell and role for each of the genome's features.
3015        # "@cellData" lists the subsystem name for each of the genome's spreadsheet cells. We
3016        # link these two lists together to create the result. First, we want a hash mapping
3017        # spreadsheet cells to subsystem names.
3018        my %subHash = map { $_->[0] => $_->[1] } @cellData;
3019        # We loop through @cellData to build the hash.
3020        for my $roleEntry (@roleData) {
3021            # Get the data for this feature and cell.
3022            my ($fid, $cellID, $role) = @{$roleEntry};
3023            # Check for a subsystem name.
3024            my $subsys = $subHash{$cellID};
3025            if ($subsys) {
3026                # Insure this feature has an entry in the return hash.
3027                if (! exists $retVal{$fid}) { $retVal{$fid} = []; }
3028                # Merge in this new data.
3029                push @{$retVal{$fid}}, [$subsys, $role];
3030            }
3031        }
3032        # Return the result.
3033        return %retVal;
3034    }
3035    
3036  =head3 RelatedFeatures  =head3 RelatedFeatures
3037    
# Line 2825  Line 3068 
3068      # Get the parameters.      # Get the parameters.
3069      my ($self, $featureID, $function, $userID) = @_;      my ($self, $featureID, $function, $userID) = @_;
3070      # Get a list of the features that are BBHs of the incoming feature.      # Get a list of the features that are BBHs of the incoming feature.
3071      my @bbhFeatures = $self->GetFlat(['IsBidirectionalBestHitOf'],      my @bbhFeatures = map { $_->[0] } FIGRules::BBHData($featureID);
                                      "IsBidirectionalBestHitOf(from-link) = ?", [$featureID],  
                                      'IsBidirectionalBestHitOf(to-link)');  
3072      # Now we loop through the features, pulling out the ones that have the correct      # Now we loop through the features, pulling out the ones that have the correct
3073      # functional assignment.      # functional assignment.
3074      my @retVal = ();      my @retVal = ();
# Line 3028  Line 3269 
3269      my ($self, $featureID, $cutoff) = @_;      my ($self, $featureID, $cutoff) = @_;
3270      # Create the return hash.      # Create the return hash.
3271      my %retVal = ();      my %retVal = ();
3272      # Create a query to get the desired BBHs.      # Query for the desired BBHs.
3273      my @bbhList = $self->GetAll(['IsBidirectionalBestHitOf'],      my @bbhList = FIGRules::BBHData($featureID, $cutoff);
                                 'IsBidirectionalBestHitOf(sc) <= ? AND IsBidirectionalBestHitOf(from-link) = ?',  
                                 [$cutoff, $featureID],  
                                 ['IsBidirectionalBestHitOf(to-link)', 'IsBidirectionalBestHitOf(sc)']);  
3274      # Form the results into the return hash.      # Form the results into the return hash.
3275      for my $pair (@bbhList) {      for my $pair (@bbhList) {
3276          $retVal{$pair->[0]} = $pair->[1];          $retVal{$pair->[0]} = $pair->[1];
# Line 3041  Line 3279 
3279      return %retVal;      return %retVal;
3280  }  }
3281    
3282    =head3 Sims
3283    
3284    C<< my $simList = $sprout->Sims($fid, $maxN, $maxP, $select, $max_expand, $filters); >>
3285    
3286    Get a list of similarities for a specified feature. Similarity information is not kept in the
3287    Sprout database; rather, they are retrieved from a network server. The similarities are
3288    returned as B<Sim> objects. A Sim object is actually a list reference that has been blessed
3289    so that its elements can be accessed by name.
3290    
3291    Similarities can be either raw or expanded. The raw similarities are basic
3292    hits between features with similar DNA. Expanding a raw similarity drags in any
3293    features considered substantially identical. So, for example, if features B<A1>,
3294    B<A2>, and B<A3> are all substatially identical to B<A>, then a raw similarity
3295    B<[C,A]> would be expanded to B<[C,A] [C,A1] [C,A2] [C,A3]>.
3296    
3297    =over 4
3298    
3299    =item fid
3300    
3301    ID of the feature whose similarities are desired.
3302    
3303    =item maxN
3304    
3305    Maximum number of similarities to return.
3306    
3307    =item maxP
3308    
3309    Minumum allowable similarity score.
3310    
3311    =item select
3312    
3313    Selection criterion: C<raw> means only raw similarities are returned; C<fig>
3314    means only similarities to FIG features are returned; C<all> means all expanded
3315    similarities are returned; and C<figx> means similarities are expanded until the
3316    number of FIG features equals the maximum.
3317    
3318    =item max_expand
3319    
3320    The maximum number of features to expand.
3321    
3322    =item filters
3323    
3324    Reference to a hash containing filter information, or a subroutine that can be
3325    used to filter the sims.
3326    
3327    =item RETURN
3328    
3329    Returns a reference to a list of similarity objects, or C<undef> if an error
3330    occurred.
3331    
3332    =back
3333    
3334    =cut
3335    
3336    sub Sims {
3337        # Get the parameters.
3338        my ($self, $fid, $maxN, $maxP, $select, $max_expand, $filters) = @_;
3339        # Create the shim object to test for deleted FIDs.
3340        my $shim = FidCheck->new($self);
3341        # Ask the network for sims.
3342        my $retVal = FIGRules::GetNetworkSims($shim, $fid, {}, $maxN, $maxP, $select, $max_expand, $filters);
3343        # Return the result.
3344        return $retVal;
3345    }
3346    
3347  =head3 GetGroups  =head3 GetGroups
3348    
3349  C<< my %groups = $sprout->GetGroups(\@groupList); >>  C<< my %groups = $sprout->GetGroups(\@groupList); >>
# Line 3062  Line 3365 
3365          # Here we have a group list. Loop through them individually,          # Here we have a group list. Loop through them individually,
3366          # getting a list of the relevant genomes.          # getting a list of the relevant genomes.
3367          for my $group (@{$groupList}) {          for my $group (@{$groupList}) {
3368              my @genomeIDs = $self->GetFlat(['Genome'], "Genome(group-name) = ?",              my @genomeIDs = $self->GetFlat(['Genome'], "Genome(primary-group) = ?",
3369                  [$group], "Genome(id)");                  [$group], "Genome(id)");
3370              $retVal{$group} = \@genomeIDs;              $retVal{$group} = \@genomeIDs;
3371          }          }
# Line 3070  Line 3373 
3373          # Here we need all of the groups. In this case, we run through all          # Here we need all of the groups. In this case, we run through all
3374          # of the genome records, putting each one found into the appropriate          # of the genome records, putting each one found into the appropriate
3375          # group. Note that we use a filter clause to insure that only genomes          # group. Note that we use a filter clause to insure that only genomes
3376          # in groups are included in the return set.          # in real NMPDR groups are included in the return set.
3377          my @genomes = $self->GetAll(['Genome'], "Genome(group-name) > ' '", [],          my @genomes = $self->GetAll(['Genome'], "Genome(primary-group) <> ?",
3378                                      ['Genome(id)', 'Genome(group-name)']);                                      [$FIG_Config::otherGroup], ['Genome(id)', 'Genome(primary-group)']);
3379          # Loop through the genomes found.          # Loop through the genomes found.
3380          for my $genome (@genomes) {          for my $genome (@genomes) {
3381              # Pop this genome's ID off the current list.              # Pop this genome's ID off the current list.
# Line 3198  Line 3501 
3501      return $retVal;      return $retVal;
3502  }  }
3503    
3504    =head3 Fix
3505    
3506    C<< my %fixedHash = Sprout::Fix(%groupHash); >>
3507    
3508    Prepare a genome group hash (like that returned by L</GetGroups> for processing.
3509    Groups with the same primary name will be combined. The primary name is the
3510    first capitalized word in the group name.
3511    
3512    =over 4
3513    
3514    =item groupHash
3515    
3516    Hash to be fixed up.
3517    
3518    =item RETURN
3519    
3520    Returns a fixed-up version of the hash.
3521    
3522    =back
3523    
3524    =cut
3525    
3526    sub Fix {
3527        # Get the parameters.
3528        my (%groupHash) = @_;
3529        # Create the result hash.
3530        my %retVal = ();
3531        # Copy over the genomes.
3532        for my $groupID (keys %groupHash) {
3533            # Make a safety copy of the group ID.
3534            my $realGroupID = $groupID;
3535            # Yank the primary name.
3536            if ($groupID =~ /([A-Z]\w+)/) {
3537                $realGroupID = $1;
3538            }
3539            # Append this group's genomes into the result hash.
3540            Tracer::AddToListMap(\%retVal, $realGroupID, @{$groupHash{$groupID}});
3541        }
3542        # Return the result hash.
3543        return %retVal;
3544    }
3545    
3546    =head3 GroupPageName
3547    
3548    C<< my $name = $sprout->GroupPageName($group); >>
3549    
3550    Return the name of the page for the specified NMPDR group.
3551    
3552    =over 4
3553    
3554    =item group
3555    
3556    Name of the relevant group.
3557    
3558    =item RETURN
3559    
3560    Returns the relative page name (e.g. C<../content/campy.php>). If the group file is not in
3561    memory it will be read in.
3562    
3563    =back
3564    
3565    =cut
3566    
3567    sub GroupPageName {
3568        # Get the parameters.
3569        my ($self, $group) = @_;
3570        # Declare the return variable.
3571        my $retVal;
3572        # Check for the group file data.
3573        if (! defined $self->{groupHash}) {
3574            # Read the group file.
3575            my %groupData = Sprout::ReadGroupFile($self->{_options}->{dataDir} . "/groups.tbl");
3576            # Store it in our object.
3577            $self->{groupHash} = \%groupData;
3578        }
3579        # Compute the real group name.
3580        my $realGroup = $group;
3581        if ($group =~ /([A-Z]\w+)/) {
3582            $realGroup = $1;
3583        }
3584        # Return the page name.
3585        $retVal = "../content/" . $self->{groupHash}->{$realGroup}->[1];
3586        # Return the result.
3587        return $retVal;
3588    }
3589    
3590    =head3 ReadGroupFile
3591    
3592    C<< my %groupData = Sprout::ReadGroupFile($groupFileName); >>
3593    
3594    Read in the data from the specified group file. The group file contains information
3595    about each of the NMPDR groups.
3596    
3597    =over 4
3598    
3599    =item name
3600    
3601    Name of the group.
3602    
3603    =item page
3604    
3605    Name of the group's page on the web site (e.g. C<campy.php> for
3606    Campylobacter)
3607    
3608    =item genus
3609    
3610    Genus of the group
3611    
3612    =item species
3613    
3614    Species of the group, or an empty string if the group is for an entire
3615    genus. If the group contains more than one species, the species names
3616    should be separated by commas.
3617    
3618    =back
3619    
3620    The parameters to this method are as follows
3621    
3622    =over 4
3623    
3624    =item groupFile
3625    
3626    Name of the file containing the group data.
3627    
3628    =item RETURN
3629    
3630    Returns a hash keyed on group name. The value of each hash
3631    
3632    =back
3633    
3634    =cut
3635    
3636    sub ReadGroupFile {
3637        # Get the parameters.
3638        my ($groupFileName) = @_;
3639        # Declare the return variable.
3640        my %retVal;
3641        # Read the group file.
3642        my @groupLines = Tracer::GetFile($groupFileName);
3643        for my $groupLine (@groupLines) {
3644            my ($name, $page, $genus, $species) = split(/\t/, $groupLine);
3645            $retVal{$name} = [$page, $genus, $species];
3646        }
3647        # Return the result.
3648        return %retVal;
3649    }
3650    
3651  =head2 Internal Utility Methods  =head2 Internal Utility Methods
3652    
3653  =head3 ParseAssignment  =head3 ParseAssignment
# Line 3254  Line 3704 
3704      }      }
3705      # If we have an assignment, we need to clean the function text. There may be      # If we have an assignment, we need to clean the function text. There may be
3706      # extra junk at the end added as a note from the user.      # extra junk at the end added as a note from the user.
3707      if (@retVal) {      if (defined( $retVal[1] )) {
3708          $retVal[1] =~ s/(\t\S)?\s*$//;          $retVal[1] =~ s/(\t\S)?\s*$//;
3709      }      }
3710      # Return the result list.      # Return the result list.

Legend:
Removed from v.1.65  
changed lines
  Added in v.1.86

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3