[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.58, Tue Jun 6 05:07:15 2006 UTC revision 1.85, Tue Sep 19 00:14:04 2006 UTC
# Line 12  Line 12 
12      use DBObject;      use DBObject;
13      use Tracer;      use Tracer;
14      use FIGRules;      use FIGRules;
15        use FidCheck;
16      use Stats;      use Stats;
17      use POSIX qw(strftime);      use POSIX qw(strftime);
18        use BasicLocation;
19    
20  =head1 Sprout Database Manipulation Object  =head1 Sprout Database Manipulation Object
21    
# Line 91  Line 92 
92  sub new {  sub new {
93      # Get the parameters.      # Get the parameters.
94      my ($class, $dbName, $options) = @_;      my ($class, $dbName, $options) = @_;
95        # Compute the DBD directory.
96        my $dbd_dir = (defined($FIG_Config::dbd_dir) ? $FIG_Config::dbd_dir :
97                                                      $FIG_Config::fig );
98      # Compute the options. We do this by starting with a table of defaults and overwriting with      # Compute the options. We do this by starting with a table of defaults and overwriting with
99      # the incoming data.      # the incoming data.
100      my $optionTable = Tracer::GetOptions({      my $optionTable = Tracer::GetOptions({
# Line 98  Line 102 
102                                                          # database type                                                          # database type
103                         dataDir      => $FIG_Config::sproutData,                         dataDir      => $FIG_Config::sproutData,
104                                                          # data file directory                                                          # data file directory
105                         xmlFileName  => "$FIG_Config::fig/SproutDBD.xml",                         xmlFileName  => "$dbd_dir/SproutDBD.xml",
106                                                          # database definition file name                                                          # database definition file name
107                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",
108                                                          # user name and password                                                          # user name and password
109                         port         => $FIG_Config::dbport,                         port         => $FIG_Config::dbport,
110                                                          # database connection port                                                          # database connection port
111                         sock         => $FIG_Config::dbsock,                         sock         => $FIG_Config::dbsock,
112                           host         => $FIG_Config::dbhost,
113                         maxSegmentLength => 4500,        # maximum feature segment length                         maxSegmentLength => 4500,        # maximum feature segment length
114                         maxSequenceLength => 8000,       # maximum contig sequence length                         maxSequenceLength => 8000,       # maximum contig sequence length
115                         noDBOpen     => 0,               # 1 to suppress the database open                         noDBOpen     => 0,               # 1 to suppress the database open
# Line 118  Line 123 
123      my $dbh;      my $dbh;
124      if (! $optionTable->{noDBOpen}) {      if (! $optionTable->{noDBOpen}) {
125          $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,          $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,
126                                  $password, $optionTable->{port}, undef, $optionTable->{sock});                                  $password, $optionTable->{port}, $optionTable->{host}, $optionTable->{sock});
127      }      }
128      # Create the ERDB object.      # Create the ERDB object.
129      my $xmlFileName = "$optionTable->{xmlFileName}";      my $xmlFileName = "$optionTable->{xmlFileName}";
# Line 126  Line 131 
131      # Add the option table and XML file name.      # Add the option table and XML file name.
132      $retVal->{_options} = $optionTable;      $retVal->{_options} = $optionTable;
133      $retVal->{_xmlName} = $xmlFileName;      $retVal->{_xmlName} = $xmlFileName;
134        # Set up space for the group file data.
135        $retVal->{groupHash} = undef;
136      # Return it.      # Return it.
137      return $retVal;      return $retVal;
138  }  }
# Line 304  Line 311 
311      return ($arch, $bact, $euk, $vir, $env, $unk);      return ($arch, $bact, $euk, $vir, $env, $unk);
312  }  }
313    
314    =head3 ContigCount
315    
316    C<< my $count = $sprout->ContigCount($genomeID); >>
317    
318    Return the number of contigs for the specified genome ID.
319    
320    =over 4
321    
322    =item genomeID
323    
324    ID of the genome whose contig count is desired.
325    
326    =item RETURN
327    
328    Returns the number of contigs for the specified genome.
329    
330    =back
331    
332    =cut
333    
334    sub ContigCount {
335        # Get the parameters.
336        my ($self, $genomeID) = @_;
337        # Get the contig count.
338        my $retVal = $self->GetCount(['Contig', 'HasContig'], "HasContig(from-link) = ?", [$genomeID]);
339        # Return the result.
340        return $retVal;
341    }
342    
343    =head3 GeneMenu
344    
345    C<< my $selectHtml = $sprout->GeneMenu(\%attributes, $filterString, \@params, $selected); >>
346    
347    Return an HTML select menu of genomes. Each genome will be an option in the menu,
348    and will be displayed by name with the ID and a contig count attached. The selection
349    value will be the genome ID. The genomes will be sorted by genus/species name.
350    
351    =over 4
352    
353    =item attributes
354    
355    Reference to a hash mapping attributes to values for the SELECT tag generated.
356    
357    =item filterString
358    
359    A filter string for use in selecting the genomes. The filter string must conform
360    to the rules for the C<< ERDB->Get >> method.
361    
362    =item params
363    
364    Reference to a list of values to be substituted in for the parameter marks in
365    the filter string.
366    
367    =item selected (optional)
368    
369    ID of the genome to be initially selected.
370    
371    =item fast (optional)
372    
373    If specified and TRUE, the contig counts will be omitted to improve performance.
374    
375    =item RETURN
376    
377    Returns an HTML select menu with the specified genomes as selectable options.
378    
379    =back
380    
381    =cut
382    
383    sub GeneMenu {
384        # Get the parameters.
385        my ($self, $attributes, $filterString, $params, $selected, $fast) = @_;
386        my $slowMode = ! $fast;
387        # Default to nothing selected. This prevents an execution warning if "$selected"
388        # is undefined.
389        $selected = "" unless defined $selected;
390        Trace("Gene Menu called with slow mode \"$slowMode\" and selection \"$selected\".") if T(3);
391        # Start the menu.
392        my $retVal = "<select " .
393            join(" ", map { "$_=\"$attributes->{$_}\"" } keys %{$attributes}) .
394            ">\n";
395        # Get the genomes.
396        my @genomes = $self->GetAll(['Genome'], $filterString, $params, ['Genome(id)',
397                                                                         'Genome(genus)',
398                                                                         'Genome(species)',
399                                                                         'Genome(unique-characterization)']);
400        # Sort them by name.
401        my @sorted = sort { lc("$a->[1] $a->[2]") cmp lc("$b->[1] $b->[2]") } @genomes;
402        # Loop through the genomes, creating the option tags.
403        for my $genomeData (@sorted) {
404            # Get the data for this genome.
405            my ($genomeID, $genus, $species, $strain) = @{$genomeData};
406            # Get the contig count.
407            my $contigInfo = "";
408            if ($slowMode) {
409                my $count = $self->ContigCount($genomeID);
410                my $counting = ($count == 1 ? "contig" : "contigs");
411                $contigInfo = "[$count $counting]";
412            }
413            # Find out if we're selected.
414            my $selectOption = ($selected eq $genomeID ? " selected" : "");
415            # Build the option tag.
416            $retVal .= "<option value=\"$genomeID\"$selectOption>$genus $species $strain ($genomeID)$contigInfo</option>\n";
417        }
418        # Close the SELECT tag.
419        $retVal .= "</select>\n";
420        # Return the result.
421        return $retVal;
422    }
423    
424  =head3 Build  =head3 Build
425    
426  C<< $sprout->Build(); >>  C<< $sprout->Build(); >>
# Line 538  Line 655 
655      return ($contigID, $start, $dir, $len);      return ($contigID, $start, $dir, $len);
656  }  }
657    
658    
659    
660  =head3 PointLocation  =head3 PointLocation
661    
662  C<< my $found = Sprout::PointLocation($location, $point); >>  C<< my $found = Sprout::PointLocation($location, $point); >>
# Line 704  Line 823 
823      return @retVal;      return @retVal;
824  }  }
825    
826    =head3 GenomeLength
827    
828    C<< my $length = $sprout->GenomeLength($genomeID); >>
829    
830    Return the length of the specified genome in base pairs.
831    
832    =over 4
833    
834    =item genomeID
835    
836    ID of the genome whose base pair count is desired.
837    
838    =item RETURN
839    
840    Returns the number of base pairs in all the contigs of the specified
841    genome.
842    
843    =back
844    
845    =cut
846    
847    sub GenomeLength {
848        # Get the parameters.
849        my ($self, $genomeID) = @_;
850        # Declare the return variable.
851        my $retVal = 0;
852        # Get the genome's contig sequence lengths.
853        my @lens = $self->GetFlat(['HasContig', 'IsMadeUpOf'], 'HasContig(from-link) = ?',
854                           [$genomeID], 'IsMadeUpOf(len)');
855        # Sum the lengths.
856        map { $retVal += $_ } @lens;
857        # Return the result.
858        return $retVal;
859    }
860    
861    =head3 FeatureCount
862    
863    C<< my $count = $sprout->FeatureCount($genomeID, $type); >>
864    
865    Return the number of features of the specified type in the specified genome.
866    
867    =over 4
868    
869    =item genomeID
870    
871    ID of the genome whose feature count is desired.
872    
873    =item type
874    
875    Type of feature to count (eg. C<peg>, C<rna>, etc.).
876    
877    =item RETURN
878    
879    Returns the number of features of the specified type for the specified genome.
880    
881    =back
882    
883    =cut
884    
885    sub FeatureCount {
886        # Get the parameters.
887        my ($self, $genomeID, $type) = @_;
888        # Compute the count.
889        my $retVal = $self->GetCount(['HasFeature', 'Feature'],
890                                    "HasFeature(from-link) = ? AND Feature(feature-type) = ?",
891                                    [$genomeID, $type]);
892        # Return the result.
893        return $retVal;
894    }
895    
896    =head3 GenomeAssignments
897    
898    C<< my $fidHash = $sprout->GenomeAssignments($genomeID); >>
899    
900    Return a list of a genome's assigned features. The return hash will contain each
901    assigned feature of the genome mapped to the text of its most recent functional
902    assignment.
903    
904    =over 4
905    
906    =item genomeID
907    
908    ID of the genome whose functional assignments are desired.
909    
910    =item RETURN
911    
912    Returns a reference to a hash which maps each feature to its most recent
913    functional assignment.
914    
915    =back
916    
917    =cut
918    
919    sub GenomeAssignments {
920        # Get the parameters.
921        my ($self, $genomeID) = @_;
922        # Declare the return variable.
923        my $retVal = {};
924        # Query the genome's features and annotations. We'll put the oldest annotations
925        # first so that the last assignment to go into the hash will be the correct one.
926        my $query = $self->Get(['HasFeature', 'IsTargetOfAnnotation', 'Annotation'],
927                               "HasFeature(from-link) = ? ORDER BY Annotation(time)",
928                               [$genomeID]);
929        # Loop through the annotations.
930        while (my $data = $query->Fetch) {
931            # Get the feature ID and annotation text.
932            my ($fid, $annotation) = $data->Values(['HasFeature(to-link)',
933                                                    'Annotation(annotation)']);
934            # Check to see if this is an assignment. Note that the user really
935            # doesn't matter to us, other than we use it to determine whether or
936            # not this is an assignment.
937            my ($user, $assignment) = _ParseAssignment('fig', $annotation);
938            if ($user) {
939                # Here it's an assignment. We put it in the return hash, overwriting
940                # any older assignment that might be present.
941                $retVal->{$fid} = $assignment;
942            }
943        }
944        # Return the result.
945        return $retVal;
946    }
947    
948  =head3 ContigLength  =head3 ContigLength
949    
950  C<< my $length = $sprout->ContigLength($contigID); >>  C<< my $length = $sprout->ContigLength($contigID); >>
# Line 1258  Line 1499 
1499      my %retVal = ();      my %retVal = ();
1500      # Loop through the incoming features.      # Loop through the incoming features.
1501      for my $featureID (@{$featureList}) {      for my $featureID (@{$featureList}) {
1502          # Create a query to get the feature's best hit.          # Ask the server for the feature's best hit.
1503          my $query = $self->Get(['IsBidirectionalBestHitOf'],          my @bbhData = FIGRules::BBHData($featureID);
                                "IsBidirectionalBestHitOf(from-link) = ? AND IsBidirectionalBestHitOf(genome) = ?",  
                                [$featureID, $genomeID]);  
1504          # Peel off the BBHs found.          # Peel off the BBHs found.
1505          my @found = ();          my @found = ();
1506          while (my $bbh = $query->Fetch) {          for my $bbh (@bbhData) {
1507              push @found, $bbh->Value('IsBidirectionalBestHitOf(to-link)');              push @found, $bbh->[0];
1508          }          }
1509          $retVal{$featureID} = \@found;          $retVal{$featureID} = \@found;
1510      }      }
# Line 1279  Line 1518 
1518    
1519  Return a list of the similarities to the specified feature.  Return a list of the similarities to the specified feature.
1520    
1521  Sprout does not support real similarities, so this method just returns the bidirectional  This method just returns the bidirectional best hits for performance reasons.
 best hits.  
1522    
1523  =over 4  =over 4
1524    
# Line 1300  Line 1538 
1538      # Get the parameters.      # Get the parameters.
1539      my ($self, $featureID, $count) = @_;      my ($self, $featureID, $count) = @_;
1540      # Ask for the best hits.      # Ask for the best hits.
1541      my @lists = $self->GetAll(['IsBidirectionalBestHitOf'],      my @lists = FIGRules::BBHData($featureID);
                               "IsBidirectionalBestHitOf(from-link) = ? ORDER BY IsBidirectionalBestHitOf(score) DESC",  
                               [$featureID], ['IsBidirectionalBestHitOf(to-link)', 'IsBidirectionalBestHitOf(score)'],  
                               $count);  
1542      # Create the return value.      # Create the return value.
1543      my %retVal = ();      my %retVal = ();
1544      for my $tuple (@lists) {      for my $tuple (@lists) {
# Line 1313  Line 1548 
1548      return %retVal;      return %retVal;
1549  }  }
1550    
   
   
1551  =head3 IsComplete  =head3 IsComplete
1552    
1553  C<< my $flag = $sprout->IsComplete($genomeID); >>  C<< my $flag = $sprout->IsComplete($genomeID); >>
# Line 1442  Line 1675 
1675  sub CoupledFeatures {  sub CoupledFeatures {
1676      # Get the parameters.      # Get the parameters.
1677      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
1678        Trace("Looking for features coupled to $featureID.") if T(coupling => 3);
1679      # Create a query to retrieve the functionally-coupled features.      # Create a query to retrieve the functionally-coupled features.
1680      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],
1681                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);
# Line 1454  Line 1688 
1688          # Get the ID and score of the coupling.          # Get the ID and score of the coupling.
1689          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',
1690                                                          'Coupling(score)']);                                                          'Coupling(score)']);
1691          # The coupling ID contains the two feature IDs separated by a space. We use          Trace("$featureID coupled with score $score to ID $couplingID.") if T(coupling => 4);
1692          # this information to find the ID of the other feature.          # Get the other feature that participates in the coupling.
1693          my ($fid1, $fid2) = split / /, $couplingID;          my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],
1694          my $otherFeatureID = ($featureID eq $fid1 ? $fid2 : $fid1);                                             "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",
1695                                               [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');
1696            Trace("$couplingID target feature is $otherFeatureID.") if T(coupling => 4);
1697          # Attach the other feature's score to its ID.          # Attach the other feature's score to its ID.
1698          $retVal{$otherFeatureID} = $score;          $retVal{$otherFeatureID} = $score;
1699          $found = 1;          $found = 1;
# Line 1590  Line 1826 
1826      my ($self, $peg1, $peg2) = @_;      my ($self, $peg1, $peg2) = @_;
1827      # Declare the return values. We'll start with the coupling ID and undefine the      # Declare the return values. We'll start with the coupling ID and undefine the
1828      # flag and score until we have more information.      # flag and score until we have more information.
1829      my ($retVal, $inverted, $score) = (CouplingID($peg1, $peg2), undef, undef);      my ($retVal, $inverted, $score) = ($self->CouplingID($peg1, $peg2), undef, undef);
1830      # Find the coupling data.      # Find the coupling data.
1831      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],
1832                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",
# Line 1611  Line 1847 
1847      return ($retVal, $inverted, $score);      return ($retVal, $inverted, $score);
1848  }  }
1849    
1850    =head3 GetSynonymGroup
1851    
1852    C<< my $id = $sprout->GetSynonymGroup($fid); >>
1853    
1854    Return the synonym group name for the specified feature.
1855    
1856    =over 4
1857    
1858    =item fid
1859    
1860    ID of the feature whose synonym group is desired.
1861    
1862    =item RETURN
1863    
1864    The name of the synonym group to which the feature belongs. If the feature does
1865    not belong to a synonym group, the feature ID itself is returned.
1866    
1867    =back
1868    
1869    =cut
1870    
1871    sub GetSynonymGroup {
1872        # Get the parameters.
1873        my ($self, $fid) = @_;
1874        # Declare the return variable.
1875        my $retVal;
1876        # Find the synonym group.
1877        my @groups = $self->GetFlat(['IsSynonymGroupFor'], "IsSynonymGroupFor(to-link) = ?",
1878                                       [$fid], 'IsSynonymGroupFor(from-link)');
1879        # Check to see if we found anything.
1880        if (@groups) {
1881            $retVal = $groups[0];
1882        } else {
1883            $retVal = $fid;
1884        }
1885        # Return the result.
1886        return $retVal;
1887    }
1888    
1889    =head3 GetBoundaries
1890    
1891    C<< my ($contig, $beg, $end) = $sprout->GetBoundaries(@locList); >>
1892    
1893    Determine the begin and end boundaries for the locations in a list. All of the
1894    locations must belong to the same contig and have mostly the same direction in
1895    order for this method to produce a meaningful result. The resulting
1896    begin/end pair will contain all of the bases in any of the locations.
1897    
1898    =over 4
1899    
1900    =item locList
1901    
1902    List of locations to process.
1903    
1904    =item RETURN
1905    
1906    Returns a 3-tuple consisting of the contig ID, the beginning boundary,
1907    and the ending boundary. The beginning boundary will be left of the
1908    end for mostly-forward locations and right of the end for mostly-backward
1909    locations.
1910    
1911    =back
1912    
1913    =cut
1914    
1915    sub GetBoundaries {
1916        # Get the parameters.
1917        my ($self, @locList) = @_;
1918        # Set up the counters used to determine the most popular direction.
1919        my %counts = ( '+' => 0, '-' => 0 );
1920        # Get the last location and parse it.
1921        my $locObject = BasicLocation->new(pop @locList);
1922        # Prime the loop with its data.
1923        my ($contig, $beg, $end) = ($locObject->Contig, $locObject->Left, $locObject->Right);
1924        # Count its direction.
1925        $counts{$locObject->Dir}++;
1926        # Loop through the remaining locations. Note that in most situations, this loop
1927        # will not iterate at all, because most of the time we will be dealing with a
1928        # singleton list.
1929        for my $loc (@locList) {
1930            # Create a location object.
1931            my $locObject = BasicLocation->new($loc);
1932            # Count the direction.
1933            $counts{$locObject->Dir}++;
1934            # Get the left end and the right end.
1935            my $left = $locObject->Left;
1936            my $right = $locObject->Right;
1937            # Merge them into the return variables.
1938            if ($left < $beg) {
1939                $beg = $left;
1940            }
1941            if ($right > $end) {
1942                $end = $right;
1943            }
1944        }
1945        # If the most common direction is reverse, flip the begin and end markers.
1946        if ($counts{'-'} > $counts{'+'}) {
1947            ($beg, $end) = ($end, $beg);
1948        }
1949        # Return the result.
1950        return ($contig, $beg, $end);
1951    }
1952    
1953  =head3 CouplingID  =head3 CouplingID
1954    
1955  C<< my $couplingID = Sprout::CouplingID($peg1, $peg2); >>  C<< my $couplingID = $sprout->CouplingID($peg1, $peg2); >>
1956    
1957  Return the coupling ID for a pair of feature IDs.  Return the coupling ID for a pair of feature IDs.
1958    
# Line 1646  Line 1985 
1985  =cut  =cut
1986  #: Return Type $;  #: Return Type $;
1987  sub CouplingID {  sub CouplingID {
1988      return join " ", sort @_;      my ($self, @pegs) = @_;
1989        return $self->DigestKey(join " ", sort @pegs);
1990  }  }
1991    
1992  =head3 ReadFasta  =head3 ReadFasta
# Line 2005  Line 2345 
2345      return @retVal;      return @retVal;
2346  }  }
2347    
 =head3 Exists  
   
 C<< my $found = $sprout->Exists($entityName, $entityID); >>  
   
 Return TRUE if an entity exists, else FALSE.  
   
 =over 4  
   
 =item entityName  
   
 Name of the entity type (e.g. C<Feature>) relevant to the existence check.  
   
 =item entityID  
   
 ID of the entity instance whose existence is to be checked.  
   
 =item RETURN  
   
 Returns TRUE if the entity instance exists, else FALSE.  
   
 =back  
   
 =cut  
 #: Return Type $;  
 sub Exists {  
     # Get the parameters.  
     my ($self, $entityName, $entityID) = @_;  
     # Check for the entity instance.  
     Trace("Checking existence of $entityName with ID=$entityID.") if T(4);  
     my $testInstance = $self->GetEntity($entityName, $entityID);  
     # Return an existence indicator.  
     my $retVal = ($testInstance ? 1 : 0);  
     return $retVal;  
 }  
   
2348  =head3 FeatureTranslation  =head3 FeatureTranslation
2349    
2350  C<< my $translation = $sprout->FeatureTranslation($featureID); >>  C<< my $translation = $sprout->FeatureTranslation($featureID); >>
# Line 2577  Line 2882 
2882      return @retVal;      return @retVal;
2883  }  }
2884    
2885    =head3 GenomeSubsystemData
2886    
2887    C<< my %featureData = $sprout->GenomeSubsystemData($genomeID); >>
2888    
2889    Return a hash mapping genome features to their subsystem roles.
2890    
2891    =over 4
2892    
2893    =item genomeID
2894    
2895    ID of the genome whose subsystem feature map is desired.
2896    
2897    =item RETURN
2898    
2899    Returns a hash mapping each feature of the genome to a list of 2-tuples. Eacb
2900    2-tuple contains a subsystem name followed by a role ID.
2901    
2902    =back
2903    
2904    =cut
2905    
2906    sub GenomeSubsystemData {
2907        # Get the parameters.
2908        my ($self, $genomeID) = @_;
2909        # Declare the return variable.
2910        my %retVal = ();
2911        # Get a list of the genome features that participate in subsystems. For each
2912        # feature we get its spreadsheet cells and the corresponding roles.
2913        my @roleData = $self->GetAll(['HasFeature', 'ContainsFeature', 'IsRoleOf'],
2914                                 "HasFeature(from-link) = ?", [$genomeID],
2915                                 ['HasFeature(to-link)', 'IsRoleOf(to-link)', 'IsRoleOf(from-link)']);
2916        # Now we get a list of the spreadsheet cells and their associated subsystems. Subsystems
2917        # with an unknown variant code (-1) are skipped. Note the genome ID is at both ends of the
2918        # list. We use it at the beginning to get all the spreadsheet cells for the genome and
2919        # again at the end to filter out participation in subsystems with a negative variant code.
2920        my @cellData = $self->GetAll(['IsGenomeOf', 'HasSSCell', 'ParticipatesIn'],
2921                                     "IsGenomeOf(from-link) = ? AND ParticipatesIn(variant-code) >= 0 AND ParticipatesIn(from-link) = ?",
2922                                     [$genomeID, $genomeID], ['HasSSCell(to-link)', 'HasSSCell(from-link)']);
2923        # Now "@roleData" lists the spreadsheet cell and role for each of the genome's features.
2924        # "@cellData" lists the subsystem name for each of the genome's spreadsheet cells. We
2925        # link these two lists together to create the result. First, we want a hash mapping
2926        # spreadsheet cells to subsystem names.
2927        my %subHash = map { $_->[0] => $_->[1] } @cellData;
2928        # We loop through @cellData to build the hash.
2929        for my $roleEntry (@roleData) {
2930            # Get the data for this feature and cell.
2931            my ($fid, $cellID, $role) = @{$roleEntry};
2932            # Check for a subsystem name.
2933            my $subsys = $subHash{$cellID};
2934            if ($subsys) {
2935                # Insure this feature has an entry in the return hash.
2936                if (! exists $retVal{$fid}) { $retVal{$fid} = []; }
2937                # Merge in this new data.
2938                push @{$retVal{$fid}}, [$subsys, $role];
2939            }
2940        }
2941        # Return the result.
2942        return %retVal;
2943    }
2944    
2945  =head3 RelatedFeatures  =head3 RelatedFeatures
2946    
# Line 2614  Line 2977 
2977      # Get the parameters.      # Get the parameters.
2978      my ($self, $featureID, $function, $userID) = @_;      my ($self, $featureID, $function, $userID) = @_;
2979      # Get a list of the features that are BBHs of the incoming feature.      # Get a list of the features that are BBHs of the incoming feature.
2980      my @bbhFeatures = $self->GetFlat(['IsBidirectionalBestHitOf'],      my @bbhFeatures = map { $_->[0] } FIGRules::BBHData($featureID);
                                      "IsBidirectionalBestHitOf(from-link) = ?", [$featureID],  
                                      'IsBidirectionalBestHitOf(to-link)');  
2981      # Now we loop through the features, pulling out the ones that have the correct      # Now we loop through the features, pulling out the ones that have the correct
2982      # functional assignment.      # functional assignment.
2983      my @retVal = ();      my @retVal = ();
# Line 2817  Line 3178 
3178      my ($self, $featureID, $cutoff) = @_;      my ($self, $featureID, $cutoff) = @_;
3179      # Create the return hash.      # Create the return hash.
3180      my %retVal = ();      my %retVal = ();
3181      # Create a query to get the desired BBHs.      # Query for the desired BBHs.
3182      my @bbhList = $self->GetAll(['IsBidirectionalBestHitOf'],      my @bbhList = FIGRules::BBHData($featureID, $cutoff);
                                 'IsBidirectionalBestHitOf(sc) <= ? AND IsBidirectionalBestHitOf(from-link) = ?',  
                                 [$cutoff, $featureID],  
                                 ['IsBidirectionalBestHitOf(to-link)', 'IsBidirectionalBestHitOf(sc)']);  
3183      # Form the results into the return hash.      # Form the results into the return hash.
3184      for my $pair (@bbhList) {      for my $pair (@bbhList) {
3185          $retVal{$pair->[0]} = $pair->[1];          $retVal{$pair->[0]} = $pair->[1];
# Line 2830  Line 3188 
3188      return %retVal;      return %retVal;
3189  }  }
3190    
3191    =head3 Sims
3192    
3193    C<< my $simList = $sprout->Sims($fid, $maxN, $maxP, $select, $max_expand, $filters); >>
3194    
3195    Get a list of similarities for a specified feature. Similarity information is not kept in the
3196    Sprout database; rather, they are retrieved from a network server. The similarities are
3197    returned as B<Sim> objects. A Sim object is actually a list reference that has been blessed
3198    so that its elements can be accessed by name.
3199    
3200    Similarities can be either raw or expanded. The raw similarities are basic
3201    hits between features with similar DNA. Expanding a raw similarity drags in any
3202    features considered substantially identical. So, for example, if features B<A1>,
3203    B<A2>, and B<A3> are all substatially identical to B<A>, then a raw similarity
3204    B<[C,A]> would be expanded to B<[C,A] [C,A1] [C,A2] [C,A3]>.
3205    
3206    =over 4
3207    
3208    =item fid
3209    
3210    ID of the feature whose similarities are desired.
3211    
3212    =item maxN
3213    
3214    Maximum number of similarities to return.
3215    
3216    =item maxP
3217    
3218    Minumum allowable similarity score.
3219    
3220    =item select
3221    
3222    Selection criterion: C<raw> means only raw similarities are returned; C<fig>
3223    means only similarities to FIG features are returned; C<all> means all expanded
3224    similarities are returned; and C<figx> means similarities are expanded until the
3225    number of FIG features equals the maximum.
3226    
3227    =item max_expand
3228    
3229    The maximum number of features to expand.
3230    
3231    =item filters
3232    
3233    Reference to a hash containing filter information, or a subroutine that can be
3234    used to filter the sims.
3235    
3236    =item RETURN
3237    
3238    Returns a reference to a list of similarity objects, or C<undef> if an error
3239    occurred.
3240    
3241    =back
3242    
3243    =cut
3244    
3245    sub Sims {
3246        # Get the parameters.
3247        my ($self, $fid, $maxN, $maxP, $select, $max_expand, $filters) = @_;
3248        # Create the shim object to test for deleted FIDs.
3249        my $shim = FidCheck->new($self);
3250        # Ask the network for sims.
3251        my $retVal = FIGRules::GetNetworkSims($shim, $fid, {}, $maxN, $maxP, $select, $max_expand, $filters);
3252        # Return the result.
3253        return $retVal;
3254    }
3255    
3256  =head3 GetGroups  =head3 GetGroups
3257    
3258  C<< my %groups = $sprout->GetGroups(\@groupList); >>  C<< my %groups = $sprout->GetGroups(\@groupList); >>
# Line 2987  Line 3410 
3410      return $retVal;      return $retVal;
3411  }  }
3412    
3413    =head3 Fix
3414    
3415    C<< my %fixedHash = Sprout::Fix(%groupHash); >>
3416    
3417    Prepare a genome group hash (like that returned by L</GetGroups> for processing.
3418    Groups with the same primary name will be combined. The primary name is the
3419    first capitalized word in the group name.
3420    
3421    =over 4
3422    
3423    =item groupHash
3424    
3425    Hash to be fixed up.
3426    
3427    =item RETURN
3428    
3429    Returns a fixed-up version of the hash.
3430    
3431    =back
3432    
3433    =cut
3434    
3435    sub Fix {
3436        # Get the parameters.
3437        my (%groupHash) = @_;
3438        # Create the result hash.
3439        my %retVal = ();
3440        # Copy over the genomes.
3441        for my $groupID (keys %groupHash) {
3442            # Make a safety copy of the group ID.
3443            my $realGroupID = $groupID;
3444            # Yank the primary name.
3445            if ($groupID =~ /([A-Z]\w+)/) {
3446                $realGroupID = $1;
3447            }
3448            # Append this group's genomes into the result hash.
3449            Tracer::AddToListMap(\%retVal, $realGroupID, @{$groupHash{$groupID}});
3450        }
3451        # Return the result hash.
3452        return %retVal;
3453    }
3454    
3455    =head3 GroupPageName
3456    
3457    C<< my $name = $sprout->GroupPageName($group); >>
3458    
3459    Return the name of the page for the specified NMPDR group.
3460    
3461    =over 4
3462    
3463    =item group
3464    
3465    Name of the relevant group.
3466    
3467    =item RETURN
3468    
3469    Returns the relative page name (e.g. C<../content/campy.php>). If the group file is not in
3470    memory it will be read in.
3471    
3472    =back
3473    
3474    =cut
3475    
3476    sub GroupPageName {
3477        # Get the parameters.
3478        my ($self, $group) = @_;
3479        # Declare the return variable.
3480        my $retVal;
3481        # Check for the group file data.
3482        if (! defined $self->{groupHash}) {
3483            # Read the group file.
3484            my %groupData = Sprout::ReadGroupFile($self->{_options}->{dataDir} . "/groups.tbl");
3485            # Store it in our object.
3486            $self->{groupHash} = \%groupData;
3487        }
3488        # Compute the real group name.
3489        my $realGroup = $group;
3490        if ($group =~ /([A-Z]\w+)/) {
3491            $realGroup = $1;
3492        }
3493        # Return the page name.
3494        $retVal = "../content/" . $self->{groupHash}->{$realGroup}->[1];
3495        # Return the result.
3496        return $retVal;
3497    }
3498    
3499    =head3 ReadGroupFile
3500    
3501    C<< my %groupData = Sprout::ReadGroupFile($groupFileName); >>
3502    
3503    Read in the data from the specified group file. The group file contains information
3504    about each of the NMPDR groups.
3505    
3506    =over 4
3507    
3508    =item name
3509    
3510    Name of the group.
3511    
3512    =item page
3513    
3514    Name of the group's page on the web site (e.g. C<campy.php> for
3515    Campylobacter)
3516    
3517    =item genus
3518    
3519    Genus of the group
3520    
3521    =item species
3522    
3523    Species of the group, or an empty string if the group is for an entire
3524    genus. If the group contains more than one species, the species names
3525    should be separated by commas.
3526    
3527    =back
3528    
3529    The parameters to this method are as follows
3530    
3531    =over 4
3532    
3533    =item groupFile
3534    
3535    Name of the file containing the group data.
3536    
3537    =item RETURN
3538    
3539    Returns a hash keyed on group name. The value of each hash
3540    
3541    =back
3542    
3543    =cut
3544    
3545    sub ReadGroupFile {
3546        # Get the parameters.
3547        my ($groupFileName) = @_;
3548        # Declare the return variable.
3549        my %retVal;
3550        # Read the group file.
3551        my @groupLines = Tracer::GetFile($groupFileName);
3552        for my $groupLine (@groupLines) {
3553            my ($name, $page, $genus, $species) = split(/\t/, $groupLine);
3554            $retVal{$name} = [$page, $genus, $species];
3555        }
3556        # Return the result.
3557        return %retVal;
3558    }
3559    
3560  =head2 Internal Utility Methods  =head2 Internal Utility Methods
3561    
3562  =head3 ParseAssignment  =head3 ParseAssignment
# Line 3043  Line 3613 
3613      }      }
3614      # If we have an assignment, we need to clean the function text. There may be      # If we have an assignment, we need to clean the function text. There may be
3615      # extra junk at the end added as a note from the user.      # extra junk at the end added as a note from the user.
3616      if (@retVal) {      if (defined( $retVal[1] )) {
3617          $retVal[1] =~ s/(\t\S)?\s*$//;          $retVal[1] =~ s/(\t\S)?\s*$//;
3618      }      }
3619      # Return the result list.      # Return the result list.

Legend:
Removed from v.1.58  
changed lines
  Added in v.1.85

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3