[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.63, Sun Jun 18 07:03:00 2006 UTC revision 1.98, Tue Apr 10 06:13:33 2007 UTC
# Line 5  Line 5 
5      @ISA = qw(Exporter ERDB);      @ISA = qw(Exporter ERDB);
6      use Data::Dumper;      use Data::Dumper;
7      use strict;      use strict;
     use Carp;  
8      use DBKernel;      use DBKernel;
9      use XML::Simple;      use XML::Simple;
10      use DBQuery;      use DBQuery;
11      use DBObject;      use ERDBObject;
12      use Tracer;      use Tracer;
13      use FIGRules;      use FIGRules;
14        use FidCheck;
15      use Stats;      use Stats;
16      use POSIX qw(strftime);      use POSIX qw(strftime);
17        use BasicLocation;
18    
19  =head1 Sprout Database Manipulation Object  =head1 Sprout Database Manipulation Object
20    
# Line 91  Line 91 
91  sub new {  sub new {
92      # Get the parameters.      # Get the parameters.
93      my ($class, $dbName, $options) = @_;      my ($class, $dbName, $options) = @_;
94        # Compute the DBD directory.
95        my $dbd_dir = (defined($FIG_Config::dbd_dir) ? $FIG_Config::dbd_dir :
96                                                      $FIG_Config::fig );
97      # Compute the options. We do this by starting with a table of defaults and overwriting with      # Compute the options. We do this by starting with a table of defaults and overwriting with
98      # the incoming data.      # the incoming data.
99      my $optionTable = Tracer::GetOptions({      my $optionTable = Tracer::GetOptions({
# Line 98  Line 101 
101                                                          # database type                                                          # database type
102                         dataDir      => $FIG_Config::sproutData,                         dataDir      => $FIG_Config::sproutData,
103                                                          # data file directory                                                          # data file directory
104                         xmlFileName  => "$FIG_Config::fig/SproutDBD.xml",                         xmlFileName  => "$dbd_dir/SproutDBD.xml",
105                                                          # database definition file name                                                          # database definition file name
106                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",
107                                                          # user name and password                                                          # user name and password
108                         port         => $FIG_Config::dbport,                         port         => $FIG_Config::dbport,
109                                                          # database connection port                                                          # database connection port
110                         sock         => $FIG_Config::dbsock,                         sock         => $FIG_Config::dbsock,
111                           host         => $FIG_Config::dbhost,
112                         maxSegmentLength => 4500,        # maximum feature segment length                         maxSegmentLength => 4500,        # maximum feature segment length
113                         maxSequenceLength => 8000,       # maximum contig sequence length                         maxSequenceLength => 8000,       # maximum contig sequence length
114                         noDBOpen     => 0,               # 1 to suppress the database open                         noDBOpen     => 0,               # 1 to suppress the database open
# Line 118  Line 122 
122      my $dbh;      my $dbh;
123      if (! $optionTable->{noDBOpen}) {      if (! $optionTable->{noDBOpen}) {
124          $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,          $dbh = DBKernel->new($optionTable->{dbType}, $dbName, $userName,
125                                  $password, $optionTable->{port}, undef, $optionTable->{sock});                                  $password, $optionTable->{port}, $optionTable->{host}, $optionTable->{sock});
126      }      }
127      # Create the ERDB object.      # Create the ERDB object.
128      my $xmlFileName = "$optionTable->{xmlFileName}";      my $xmlFileName = "$optionTable->{xmlFileName}";
# Line 126  Line 130 
130      # Add the option table and XML file name.      # Add the option table and XML file name.
131      $retVal->{_options} = $optionTable;      $retVal->{_options} = $optionTable;
132      $retVal->{_xmlName} = $xmlFileName;      $retVal->{_xmlName} = $xmlFileName;
133        # Set up space for the group file data.
134        $retVal->{groupHash} = undef;
135      # Return it.      # Return it.
136      return $retVal;      return $retVal;
137  }  }
# Line 335  Line 341 
341    
342  =head3 GeneMenu  =head3 GeneMenu
343    
344  C<< my $selectHtml = $sprout->GeneMenu(\%attributes, $filterString, \@params); >>  C<< my $selectHtml = $sprout->GeneMenu(\%attributes, $filterString, \@params, $selected); >>
345    
346  Return an HTML select menu of genomes. Each genome will be an option in the menu,  Return an HTML select menu of genomes. Each genome will be an option in the menu,
347  and will be displayed by name with the ID and a contig count attached. The selection  and will be displayed by name with the ID and a contig count attached. The selection
# Line 357  Line 363 
363  Reference to a list of values to be substituted in for the parameter marks in  Reference to a list of values to be substituted in for the parameter marks in
364  the filter string.  the filter string.
365    
366    =item selected (optional)
367    
368    ID of the genome to be initially selected.
369    
370    =item fast (optional)
371    
372    If specified and TRUE, the contig counts will be omitted to improve performance.
373    
374  =item RETURN  =item RETURN
375    
376  Returns an HTML select menu with the specified genomes as selectable options.  Returns an HTML select menu with the specified genomes as selectable options.
# Line 367  Line 381 
381    
382  sub GeneMenu {  sub GeneMenu {
383      # Get the parameters.      # Get the parameters.
384      my ($self, $attributes, $filterString, $params) = @_;      my ($self, $attributes, $filterString, $params, $selected, $fast) = @_;
385        my $slowMode = ! $fast;
386        # Default to nothing selected. This prevents an execution warning if "$selected"
387        # is undefined.
388        $selected = "" unless defined $selected;
389        Trace("Gene Menu called with slow mode \"$slowMode\" and selection \"$selected\".") if T(3);
390      # Start the menu.      # Start the menu.
391      my $retVal = "<select " .      my $retVal = "<select " .
392          join(" ", map { "$_=\"$attributes->{$_}\"" } keys %{$attributes}) .          join(" ", map { "$_=\"$attributes->{$_}\"" } keys %{$attributes}) .
# Line 375  Line 394 
394      # Get the genomes.      # Get the genomes.
395      my @genomes = $self->GetAll(['Genome'], $filterString, $params, ['Genome(id)',      my @genomes = $self->GetAll(['Genome'], $filterString, $params, ['Genome(id)',
396                                                                       'Genome(genus)',                                                                       'Genome(genus)',
397                                                                       'Genome(species)']);                                                                       'Genome(species)',
398                                                                         'Genome(unique-characterization)']);
399      # Sort them by name.      # Sort them by name.
400      my @sorted = sort { lc("$a->[1] $a->[2]") cmp lc("$b->[1] $b->[2]") } @genomes;      my @sorted = sort { lc("$a->[1] $a->[2]") cmp lc("$b->[1] $b->[2]") } @genomes;
401      # Loop through the genomes, creating the option tags.      # Loop through the genomes, creating the option tags.
402      for my $genomeData (@sorted) {      for my $genomeData (@sorted) {
403          # Get the data for this genome.          # Get the data for this genome.
404          my ($genomeID, $genus, $species) = @{$genomeData};          my ($genomeID, $genus, $species, $strain) = @{$genomeData};
405          # Get the contig count.          # Get the contig count.
406            my $contigInfo = "";
407            if ($slowMode) {
408          my $count = $self->ContigCount($genomeID);          my $count = $self->ContigCount($genomeID);
409          my $counting = ($count == 1 ? "contig" : "contigs");          my $counting = ($count == 1 ? "contig" : "contigs");
410                $contigInfo = "[$count $counting]";
411            }
412            # Find out if we're selected.
413            my $selectOption = ($selected eq $genomeID ? " selected" : "");
414          # Build the option tag.          # Build the option tag.
415          $retVal .= "<option value=\"$genomeID\">$genus $species ($genomeID) [$count $counting]</option>\n";          $retVal .= "<option value=\"$genomeID\"$selectOption>$genus $species $strain ($genomeID)$contigInfo</option>\n";
416      }      }
417      # Close the SELECT tag.      # Close the SELECT tag.
418      $retVal .= "</select>\n";      $retVal .= "</select>\n";
419      # Return the result.      # Return the result.
420      return $retVal;      return $retVal;
421  }  }
422    
423  =head3 Build  =head3 Build
424    
425  C<< $sprout->Build(); >>  C<< $sprout->Build(); >>
# Line 627  Line 654 
654      return ($contigID, $start, $dir, $len);      return ($contigID, $start, $dir, $len);
655  }  }
656    
657    
658    
659  =head3 PointLocation  =head3 PointLocation
660    
661  C<< my $found = Sprout::PointLocation($location, $point); >>  C<< my $found = Sprout::PointLocation($location, $point); >>
# Line 836  Line 865 
865    
866  =over 4  =over 4
867    
868  =genomeID  =item genomeID
869    
870  ID of the genome whose feature count is desired.  ID of the genome whose feature count is desired.
871    
# Line 857  Line 886 
886      my ($self, $genomeID, $type) = @_;      my ($self, $genomeID, $type) = @_;
887      # Compute the count.      # Compute the count.
888      my $retVal = $self->GetCount(['HasFeature', 'Feature'],      my $retVal = $self->GetCount(['HasFeature', 'Feature'],
889                                  "HasFeature(from-link) = ? AND Feature(type) = ?",                                  "HasFeature(from-link) = ? AND Feature(feature-type) = ?",
890                                  [$genomeID, $type]);                                  [$genomeID, $type]);
891      # Return the result.      # Return the result.
892      return $retVal;      return $retVal;
# Line 891  Line 920 
920      my ($self, $genomeID) = @_;      my ($self, $genomeID) = @_;
921      # Declare the return variable.      # Declare the return variable.
922      my $retVal = {};      my $retVal = {};
923      # Query the genome's features and annotations. We'll put the oldest annotations      # Query the genome's features.
924      # first so that the last assignment to go into the hash will be the correct one.      my $query = $self->Get(['HasFeature', 'Feature'], "HasFeature(from-link) = ?",
     my $query = $self->Get(['HasFeature', 'IsTargetOfAnnotation', 'Annotation'],  
                            "HasFeature(from-link) = ? ORDER BY Annotation(time)",  
925                             [$genomeID]);                             [$genomeID]);
926      # Loop through the annotations.      # Loop through the features.
927      while (my $data = $query->Fetch) {      while (my $data = $query->Fetch) {
928          # Get the feature ID and annotation text.          # Get the feature ID and assignment.
929          my ($fid, $annotation) = $data->Values(['HasFeature(from-link)',          my ($fid, $assignment) = $data->Values(['Feature(id)', 'Feature(assignment)']);
930                                                  'Annotation(text)']);          if ($assignment) {
         # Check to see if this is an assignment. Note that the user really  
         # doesn't matter to us, other than we use it to determine whether or  
         # not this is an assignment.  
         my ($user, $assignment) = $self->_ParseAssignment('fig', $annotation);  
         if ($user) {  
             # Here it's an assignment. We put it in the return hash, overwriting  
             # any older assignment that might be present.  
931              $retVal->{$fid} = $assignment;              $retVal->{$fid} = $assignment;
932          }          }
933      }      }
# Line 1269  Line 1289 
1289  Return the most recently-determined functional assignment of a particular feature.  Return the most recently-determined functional assignment of a particular feature.
1290    
1291  The functional assignment is handled differently depending on the type of feature. If  The functional assignment is handled differently depending on the type of feature. If
1292  the feature is identified by a FIG ID (begins with the string C<fig|>), then a functional  the feature is identified by a FIG ID (begins with the string C<fig|>), then the functional
1293  assignment is a type of annotation. The format of an assignment is described in  assignment is taken from the B<Feature> or C<Annotation> table, depending.
 L</ParseAssignment>. Its worth noting that we cannot filter on the content of the  
 annotation itself because it's a text field; however, this is not a big problem because  
 most features only have a small number of annotations.  
1294    
1295  Each user has an associated list of trusted users. The assignment returned will be the most  Each user has an associated list of trusted users. The assignment returned will be the most
1296  recent one by at least one of the trusted users. If no trusted user list is available, then  recent one by at least one of the trusted users. If no trusted user list is available, then
# Line 1292  Line 1309 
1309    
1310  =item userID (optional)  =item userID (optional)
1311    
1312  ID of the user whose function determination is desired. If omitted, only the latest  ID of the user whose function determination is desired. If omitted, the primary
1313  C<FIG> assignment will be returned.  functional assignment in the B<Feature> table will be returned.
1314    
1315  =item RETURN  =item RETURN
1316    
# Line 1310  Line 1327 
1327      my $retVal;      my $retVal;
1328      # Determine the ID type.      # Determine the ID type.
1329      if ($featureID =~ m/^fig\|/) {      if ($featureID =~ m/^fig\|/) {
1330          # Here we have a FIG feature ID. We must build the list of trusted          # Here we have a FIG feature ID.
1331          # users.          if (!$userID) {
1332                # Use the primary assignment.
1333                ($retVal) = $self->GetEntityValues('Feature', $featureID, ['Feature(assignment)']);
1334            } else {
1335                # We must build the list of trusted users.
1336          my %trusteeTable = ();          my %trusteeTable = ();
1337          # Check the user ID.          # Check the user ID.
1338          if (!$userID) {          if (!$userID) {
# Line 1354  Line 1375 
1375                  }                  }
1376              }              }
1377          }          }
1378            }
1379      } else {      } else {
1380          # Here we have a non-FIG feature ID. In this case the user ID does not          # Here we have a non-FIG feature ID. In this case the user ID does not
1381          # matter. We simply get the information from the External Alias Function          # matter. We simply get the information from the External Alias Function
# Line 1469  Line 1491 
1491      my %retVal = ();      my %retVal = ();
1492      # Loop through the incoming features.      # Loop through the incoming features.
1493      for my $featureID (@{$featureList}) {      for my $featureID (@{$featureList}) {
1494          # Create a query to get the feature's best hit.          # Ask the server for the feature's best hit.
1495          my $query = $self->Get(['IsBidirectionalBestHitOf'],          my @bbhData = FIGRules::BBHData($featureID);
                                "IsBidirectionalBestHitOf(from-link) = ? AND IsBidirectionalBestHitOf(genome) = ?",  
                                [$featureID, $genomeID]);  
1496          # Peel off the BBHs found.          # Peel off the BBHs found.
1497          my @found = ();          my @found = ();
1498          while (my $bbh = $query->Fetch) {          for my $bbh (@bbhData) {
1499              push @found, $bbh->Value('IsBidirectionalBestHitOf(to-link)');              my $fid = $bbh->[0];
1500                my $bbGenome = $self->GenomeOf($fid);
1501                if ($bbGenome eq $genomeID) {
1502                    push @found, $fid;
1503                }
1504          }          }
1505          $retVal{$featureID} = \@found;          $retVal{$featureID} = \@found;
1506      }      }
# Line 1490  Line 1514 
1514    
1515  Return a list of the similarities to the specified feature.  Return a list of the similarities to the specified feature.
1516    
1517  Sprout does not support real similarities, so this method just returns the bidirectional  This method just returns the bidirectional best hits for performance reasons.
 best hits.  
1518    
1519  =over 4  =over 4
1520    
# Line 1511  Line 1534 
1534      # Get the parameters.      # Get the parameters.
1535      my ($self, $featureID, $count) = @_;      my ($self, $featureID, $count) = @_;
1536      # Ask for the best hits.      # Ask for the best hits.
1537      my @lists = $self->GetAll(['IsBidirectionalBestHitOf'],      my @lists = FIGRules::BBHData($featureID);
                               "IsBidirectionalBestHitOf(from-link) = ? ORDER BY IsBidirectionalBestHitOf(score) DESC",  
                               [$featureID], ['IsBidirectionalBestHitOf(to-link)', 'IsBidirectionalBestHitOf(score)'],  
                               $count);  
1538      # Create the return value.      # Create the return value.
1539      my %retVal = ();      my %retVal = ();
1540      for my $tuple (@lists) {      for my $tuple (@lists) {
# Line 1524  Line 1544 
1544      return %retVal;      return %retVal;
1545  }  }
1546    
   
   
1547  =head3 IsComplete  =head3 IsComplete
1548    
1549  C<< my $flag = $sprout->IsComplete($genomeID); >>  C<< my $flag = $sprout->IsComplete($genomeID); >>
# Line 1653  Line 1671 
1671  sub CoupledFeatures {  sub CoupledFeatures {
1672      # Get the parameters.      # Get the parameters.
1673      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
1674        Trace("Looking for features coupled to $featureID.") if T(coupling => 3);
1675      # Create a query to retrieve the functionally-coupled features.      # Create a query to retrieve the functionally-coupled features.
1676      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],      my $query = $self->Get(['ParticipatesInCoupling', 'Coupling'],
1677                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);                             "ParticipatesInCoupling(from-link) = ?", [$featureID]);
# Line 1665  Line 1684 
1684          # Get the ID and score of the coupling.          # Get the ID and score of the coupling.
1685          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',          my ($couplingID, $score) = $clustering->Values(['Coupling(id)',
1686                                                          'Coupling(score)']);                                                          'Coupling(score)']);
1687            Trace("$featureID coupled with score $score to ID $couplingID.") if T(coupling => 4);
1688          # Get the other feature that participates in the coupling.          # Get the other feature that participates in the coupling.
1689          my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],          my ($otherFeatureID) = $self->GetFlat(['ParticipatesInCoupling'],
1690                                             "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",                                             "ParticipatesInCoupling(to-link) = ? AND ParticipatesInCoupling(from-link) <> ?",
1691                                             [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');                                             [$couplingID, $featureID], 'ParticipatesInCoupling(from-link)');
1692            Trace("$couplingID target feature is $otherFeatureID.") if T(coupling => 4);
1693          # Attach the other feature's score to its ID.          # Attach the other feature's score to its ID.
1694          $retVal{$otherFeatureID} = $score;          $retVal{$otherFeatureID} = $score;
1695          $found = 1;          $found = 1;
# Line 1801  Line 1822 
1822      my ($self, $peg1, $peg2) = @_;      my ($self, $peg1, $peg2) = @_;
1823      # Declare the return values. We'll start with the coupling ID and undefine the      # Declare the return values. We'll start with the coupling ID and undefine the
1824      # flag and score until we have more information.      # flag and score until we have more information.
1825      my ($retVal, $inverted, $score) = (CouplingID($peg1, $peg2), undef, undef);      my ($retVal, $inverted, $score) = ($self->CouplingID($peg1, $peg2), undef, undef);
1826      # Find the coupling data.      # Find the coupling data.
1827      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],      my @pegs = $self->GetAll(['Coupling', 'ParticipatesInCoupling'],
1828                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",                                   "Coupling(id) = ? ORDER BY ParticipatesInCoupling(pos)",
# Line 1822  Line 1843 
1843      return ($retVal, $inverted, $score);      return ($retVal, $inverted, $score);
1844  }  }
1845    
1846    =head3 GetSynonymGroup
1847    
1848    C<< my $id = $sprout->GetSynonymGroup($fid); >>
1849    
1850    Return the synonym group name for the specified feature.
1851    
1852    =over 4
1853    
1854    =item fid
1855    
1856    ID of the feature whose synonym group is desired.
1857    
1858    =item RETURN
1859    
1860    The name of the synonym group to which the feature belongs. If the feature does
1861    not belong to a synonym group, the feature ID itself is returned.
1862    
1863    =back
1864    
1865    =cut
1866    
1867    sub GetSynonymGroup {
1868        # Get the parameters.
1869        my ($self, $fid) = @_;
1870        # Declare the return variable.
1871        my $retVal;
1872        # Find the synonym group.
1873        my @groups = $self->GetFlat(['IsSynonymGroupFor'], "IsSynonymGroupFor(to-link) = ?",
1874                                       [$fid], 'IsSynonymGroupFor(from-link)');
1875        # Check to see if we found anything.
1876        if (@groups) {
1877            $retVal = $groups[0];
1878        } else {
1879            $retVal = $fid;
1880        }
1881        # Return the result.
1882        return $retVal;
1883    }
1884    
1885    =head3 GetBoundaries
1886    
1887    C<< my ($contig, $beg, $end) = $sprout->GetBoundaries(@locList); >>
1888    
1889    Determine the begin and end boundaries for the locations in a list. All of the
1890    locations must belong to the same contig and have mostly the same direction in
1891    order for this method to produce a meaningful result. The resulting
1892    begin/end pair will contain all of the bases in any of the locations.
1893    
1894    =over 4
1895    
1896    =item locList
1897    
1898    List of locations to process.
1899    
1900    =item RETURN
1901    
1902    Returns a 3-tuple consisting of the contig ID, the beginning boundary,
1903    and the ending boundary. The beginning boundary will be left of the
1904    end for mostly-forward locations and right of the end for mostly-backward
1905    locations.
1906    
1907    =back
1908    
1909    =cut
1910    
1911    sub GetBoundaries {
1912        # Get the parameters.
1913        my ($self, @locList) = @_;
1914        # Set up the counters used to determine the most popular direction.
1915        my %counts = ( '+' => 0, '-' => 0 );
1916        # Get the last location and parse it.
1917        my $locObject = BasicLocation->new(pop @locList);
1918        # Prime the loop with its data.
1919        my ($contig, $beg, $end) = ($locObject->Contig, $locObject->Left, $locObject->Right);
1920        # Count its direction.
1921        $counts{$locObject->Dir}++;
1922        # Loop through the remaining locations. Note that in most situations, this loop
1923        # will not iterate at all, because most of the time we will be dealing with a
1924        # singleton list.
1925        for my $loc (@locList) {
1926            # Create a location object.
1927            my $locObject = BasicLocation->new($loc);
1928            # Count the direction.
1929            $counts{$locObject->Dir}++;
1930            # Get the left end and the right end.
1931            my $left = $locObject->Left;
1932            my $right = $locObject->Right;
1933            # Merge them into the return variables.
1934            if ($left < $beg) {
1935                $beg = $left;
1936            }
1937            if ($right > $end) {
1938                $end = $right;
1939            }
1940        }
1941        # If the most common direction is reverse, flip the begin and end markers.
1942        if ($counts{'-'} > $counts{'+'}) {
1943            ($beg, $end) = ($end, $beg);
1944        }
1945        # Return the result.
1946        return ($contig, $beg, $end);
1947    }
1948    
1949  =head3 CouplingID  =head3 CouplingID
1950    
1951  C<< my $couplingID = Sprout::CouplingID($peg1, $peg2); >>  C<< my $couplingID = $sprout->CouplingID($peg1, $peg2); >>
1952    
1953  Return the coupling ID for a pair of feature IDs.  Return the coupling ID for a pair of feature IDs.
1954    
# Line 1857  Line 1981 
1981  =cut  =cut
1982  #: Return Type $;  #: Return Type $;
1983  sub CouplingID {  sub CouplingID {
1984      return join " ", sort @_;      my ($self, @pegs) = @_;
1985        return $self->DigestKey(join " ", sort @pegs);
1986  }  }
1987    
1988  =head3 ReadFasta  =head3 ReadFasta
# Line 2216  Line 2341 
2341      return @retVal;      return @retVal;
2342  }  }
2343    
 =head3 Exists  
   
 C<< my $found = $sprout->Exists($entityName, $entityID); >>  
   
 Return TRUE if an entity exists, else FALSE.  
   
 =over 4  
   
 =item entityName  
   
 Name of the entity type (e.g. C<Feature>) relevant to the existence check.  
   
 =item entityID  
   
 ID of the entity instance whose existence is to be checked.  
   
 =item RETURN  
   
 Returns TRUE if the entity instance exists, else FALSE.  
   
 =back  
   
 =cut  
 #: Return Type $;  
 sub Exists {  
     # Get the parameters.  
     my ($self, $entityName, $entityID) = @_;  
     # Check for the entity instance.  
     Trace("Checking existence of $entityName with ID=$entityID.") if T(4);  
     my $testInstance = $self->GetEntity($entityName, $entityID);  
     # Return an existence indicator.  
     my $retVal = ($testInstance ? 1 : 0);  
     return $retVal;  
 }  
   
2344  =head3 FeatureTranslation  =head3 FeatureTranslation
2345    
2346  C<< my $translation = $sprout->FeatureTranslation($featureID); >>  C<< my $translation = $sprout->FeatureTranslation($featureID); >>
# Line 2585  Line 2675 
2675      return $retVal;      return $retVal;
2676  }  }
2677    
2678    =head3 PropertyID
2679    
2680    C<< my $id = $sprout->PropertyID($propName, $propValue); >>
2681    
2682    Return the ID of the specified property name and value pair, if the
2683    pair exists.
2684    
2685    =over 4
2686    
2687    =item propName
2688    
2689    Name of the desired property.
2690    
2691    =item propValue
2692    
2693    Value expected for the desired property.
2694    
2695    =item RETURN
2696    
2697    Returns the ID of the name/value pair, or C<undef> if the pair does not exist.
2698    
2699    =back
2700    
2701    =cut
2702    
2703    sub PropertyID {
2704        # Get the parameters.
2705        my ($self, $propName, $propValue) = @_;
2706        # Try to find the ID.
2707        my ($retVal) = $self->GetFlat(['Property'],
2708                                      "Property(property-name) = ? AND Property(property-value) = ?",
2709                                      [$propName, $propValue], 'Property(id)');
2710        # Return the result.
2711        return $retVal;
2712    }
2713    
2714  =head3 MergedAnnotations  =head3 MergedAnnotations
2715    
2716  C<< my @annotationList = $sprout->MergedAnnotations(\@list); >>  C<< my @annotationList = $sprout->MergedAnnotations(\@list); >>
# Line 2782  Line 2908 
2908      # Get the parameters.      # Get the parameters.
2909      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
2910      # Get the list of names.      # Get the list of names.
2911      my @retVal = $self->GetFlat(['ContainsFeature', 'HasSSCell'], "ContainsFeature(to-link) = ?",      my @retVal = $self->GetFlat(['HasRoleInSubsystem'], "HasRoleInSubsystem(from-link) = ?",
2912                                  [$featureID], 'HasSSCell(from-link)');                                  [$featureID], 'HasRoleInSubsystem(to-link)');
2913      # Return the result.      # Return the result, sorted.
2914      return @retVal;      return sort @retVal;
2915  }  }
2916    
2917    =head3 GenomeSubsystemData
2918    
2919    C<< my %featureData = $sprout->GenomeSubsystemData($genomeID); >>
2920    
2921    Return a hash mapping genome features to their subsystem roles.
2922    
2923    =over 4
2924    
2925    =item genomeID
2926    
2927    ID of the genome whose subsystem feature map is desired.
2928    
2929    =item RETURN
2930    
2931    Returns a hash mapping each feature of the genome to a list of 2-tuples. Eacb
2932    2-tuple contains a subsystem name followed by a role ID.
2933    
2934    =back
2935    
2936    =cut
2937    
2938    sub GenomeSubsystemData {
2939        # Get the parameters.
2940        my ($self, $genomeID) = @_;
2941        # Declare the return variable.
2942        my %retVal = ();
2943        # Get a list of the genome features that participate in subsystems. For each
2944        # feature we get its spreadsheet cells and the corresponding roles.
2945        my @roleData = $self->GetAll(['HasFeature', 'ContainsFeature', 'IsRoleOf'],
2946                                 "HasFeature(from-link) = ?", [$genomeID],
2947                                 ['HasFeature(to-link)', 'IsRoleOf(to-link)', 'IsRoleOf(from-link)']);
2948        # Now we get a list of the spreadsheet cells and their associated subsystems. Subsystems
2949        # with an unknown variant code (-1) are skipped. Note the genome ID is at both ends of the
2950        # list. We use it at the beginning to get all the spreadsheet cells for the genome and
2951        # again at the end to filter out participation in subsystems with a negative variant code.
2952        my @cellData = $self->GetAll(['IsGenomeOf', 'HasSSCell', 'ParticipatesIn'],
2953                                     "IsGenomeOf(from-link) = ? AND ParticipatesIn(variant-code) >= 0 AND ParticipatesIn(from-link) = ?",
2954                                     [$genomeID, $genomeID], ['HasSSCell(to-link)', 'HasSSCell(from-link)']);
2955        # Now "@roleData" lists the spreadsheet cell and role for each of the genome's features.
2956        # "@cellData" lists the subsystem name for each of the genome's spreadsheet cells. We
2957        # link these two lists together to create the result. First, we want a hash mapping
2958        # spreadsheet cells to subsystem names.
2959        my %subHash = map { $_->[0] => $_->[1] } @cellData;
2960        # We loop through @cellData to build the hash.
2961        for my $roleEntry (@roleData) {
2962            # Get the data for this feature and cell.
2963            my ($fid, $cellID, $role) = @{$roleEntry};
2964            # Check for a subsystem name.
2965            my $subsys = $subHash{$cellID};
2966            if ($subsys) {
2967                # Insure this feature has an entry in the return hash.
2968                if (! exists $retVal{$fid}) { $retVal{$fid} = []; }
2969                # Merge in this new data.
2970                push @{$retVal{$fid}}, [$subsys, $role];
2971            }
2972        }
2973        # Return the result.
2974        return %retVal;
2975    }
2976    
2977  =head3 RelatedFeatures  =head3 RelatedFeatures
2978    
# Line 2825  Line 3009 
3009      # Get the parameters.      # Get the parameters.
3010      my ($self, $featureID, $function, $userID) = @_;      my ($self, $featureID, $function, $userID) = @_;
3011      # Get a list of the features that are BBHs of the incoming feature.      # Get a list of the features that are BBHs of the incoming feature.
3012      my @bbhFeatures = $self->GetFlat(['IsBidirectionalBestHitOf'],      my @bbhFeatures = map { $_->[0] } FIGRules::BBHData($featureID);
                                      "IsBidirectionalBestHitOf(from-link) = ?", [$featureID],  
                                      'IsBidirectionalBestHitOf(to-link)');  
3013      # Now we loop through the features, pulling out the ones that have the correct      # Now we loop through the features, pulling out the ones that have the correct
3014      # functional assignment.      # functional assignment.
3015      my @retVal = ();      my @retVal = ();
# Line 2963  Line 3145 
3145      # Loop through the input triples.      # Loop through the input triples.
3146      my $n = length $sequence;      my $n = length $sequence;
3147      for (my $i = 0; $i < $n; $i += 3) {      for (my $i = 0; $i < $n; $i += 3) {
3148          # Get the current triple from the sequence.          # Get the current triple from the sequence. Note we convert to
3149          my $triple = substr($sequence, $i, 3);          # upper case to insure a match.
3150            my $triple = uc substr($sequence, $i, 3);
3151          # Translate it using the table.          # Translate it using the table.
3152          my $protein = "X";          my $protein = "X";
3153          if (exists $table->{$triple}) { $protein = $table->{$triple}; }          if (exists $table->{$triple}) { $protein = $table->{$triple}; }
# Line 2997  Line 3180 
3180      return @retVal;      return @retVal;
3181  }  }
3182    
3183    =head3 BBHMatrix
3184    
3185    C<< my %bbhMap = $sprout->BBHMatrix($genomeID, $cutoff, @targets); >>
3186    
3187    Find all the bidirectional best hits for the features of a genome in a
3188    specified list of target genomes. The return value will be a hash mapping
3189    features in the original genome to their bidirectional best hits in the
3190    target genomes.
3191    
3192    =over 4
3193    
3194    =item genomeID
3195    
3196    ID of the genome whose features are to be examined for bidirectional best hits.
3197    
3198    =item cutoff
3199    
3200    A cutoff value. Only hits with a score lower than the cutoff will be returned.
3201    
3202    =item targets
3203    
3204    List of target genomes. Only pairs originating in the original
3205    genome and landing in one of the target genomes will be returned.
3206    
3207    =item RETURN
3208    
3209    Returns a hash mapping each feature in the original genome to a hash mapping its
3210    BBH pegs in the target genomes to their scores.
3211    
3212    =back
3213    
3214    =cut
3215    
3216    sub BBHMatrix {
3217        # Get the parameters.
3218        my ($self, $genomeID, $cutoff, @targets) = @_;
3219        # Declare the return variable.
3220        my %retVal = ();
3221        # Ask for the BBHs.
3222        my @bbhList = FIGRules::BatchBBHs("fig|$genomeID.%", $cutoff, @targets);
3223        # We now have a set of 4-tuples that we need to convert into a hash of hashes.
3224        for my $bbhData (@bbhList) {
3225            my ($peg1, $peg2, $score) = @{$bbhData};
3226            if (! exists $retVal{$peg1}) {
3227                $retVal{$peg1} = { $peg2 => $score };
3228            } else {
3229                $retVal{$peg1}->{$peg2} = $score;
3230            }
3231        }
3232        # Return the result.
3233        return %retVal;
3234    }
3235    
3236    
3237    =head3 SimMatrix
3238    
3239    C<< my %simMap = $sprout->SimMatrix($genomeID, $cutoff, @targets); >>
3240    
3241    Find all the similarities for the features of a genome in a
3242    specified list of target genomes. The return value will be a hash mapping
3243    features in the original genome to their similarites in the
3244    target genomes.
3245    
3246    =over 4
3247    
3248    =item genomeID
3249    
3250    ID of the genome whose features are to be examined for similarities.
3251    
3252    =item cutoff
3253    
3254    A cutoff value. Only hits with a score lower than the cutoff will be returned.
3255    
3256    =item targets
3257    
3258    List of target genomes. Only pairs originating in the original
3259    genome and landing in one of the target genomes will be returned.
3260    
3261    =item RETURN
3262    
3263    Returns a hash mapping each feature in the original genome to a hash mapping its
3264    similar pegs in the target genomes to their scores.
3265    
3266    =back
3267    
3268    =cut
3269    
3270    sub SimMatrix {
3271        # Get the parameters.
3272        my ($self, $genomeID, $cutoff, @targets) = @_;
3273        # Declare the return variable.
3274        my %retVal = ();
3275        # Get the list of features in the source organism.
3276        my @fids = $self->FeaturesOf($genomeID);
3277        # Ask for the sims. We only want similarities to fig features.
3278        my $simList = FIGRules::GetNetworkSims($self, \@fids, {}, 1000, $cutoff, "fig");
3279        if (! defined $simList) {
3280            Confess("Unable to retrieve similarities from server.");
3281        } else {
3282            Trace("Processing sims.") if T(3);
3283            # We now have a set of sims that we need to convert into a hash of hashes. First, we
3284            # Create a hash for the target genomes.
3285            my %targetHash = map { $_ => 1 } @targets;
3286            for my $simData (@{$simList}) {
3287                # Get the PEGs and the score.
3288                my ($peg1, $peg2, $score) = ($simData->id1, $simData->id2, $simData->psc);
3289                # Insure the second ID is in the target list.
3290                my ($genome2) = FIGRules::ParseFeatureID($peg2);
3291                if (exists $targetHash{$genome2}) {
3292                    # Here it is. Now we need to add it to the return hash. How we do that depends
3293                    # on whether or not $peg1 is new to us.
3294                    if (! exists $retVal{$peg1}) {
3295                        $retVal{$peg1} = { $peg2 => $score };
3296                    } else {
3297                        $retVal{$peg1}->{$peg2} = $score;
3298                    }
3299                }
3300            }
3301        }
3302        # Return the result.
3303        return %retVal;
3304    }
3305    
3306    
3307  =head3 LowBBHs  =head3 LowBBHs
3308    
3309  C<< my %bbhMap = $sprout->LowBBHs($featureID, $cutoff); >>  C<< my %bbhMap = $sprout->LowBBHs($featureID, $cutoff); >>
# Line 3028  Line 3335 
3335      my ($self, $featureID, $cutoff) = @_;      my ($self, $featureID, $cutoff) = @_;
3336      # Create the return hash.      # Create the return hash.
3337      my %retVal = ();      my %retVal = ();
3338      # Create a query to get the desired BBHs.      # Query for the desired BBHs.
3339      my @bbhList = $self->GetAll(['IsBidirectionalBestHitOf'],      my @bbhList = FIGRules::BBHData($featureID, $cutoff);
                                 'IsBidirectionalBestHitOf(sc) <= ? AND IsBidirectionalBestHitOf(from-link) = ?',  
                                 [$cutoff, $featureID],  
                                 ['IsBidirectionalBestHitOf(to-link)', 'IsBidirectionalBestHitOf(sc)']);  
3340      # Form the results into the return hash.      # Form the results into the return hash.
3341      for my $pair (@bbhList) {      for my $pair (@bbhList) {
3342          $retVal{$pair->[0]} = $pair->[1];          my $fid = $pair->[0];
3343            if ($self->Exists('Feature', $fid)) {
3344                $retVal{$fid} = $pair->[1];
3345            }
3346      }      }
3347      # Return the result.      # Return the result.
3348      return %retVal;      return %retVal;
3349  }  }
3350    
3351    =head3 Sims
3352    
3353    C<< my $simList = $sprout->Sims($fid, $maxN, $maxP, $select, $max_expand, $filters); >>
3354    
3355    Get a list of similarities for a specified feature. Similarity information is not kept in the
3356    Sprout database; rather, they are retrieved from a network server. The similarities are
3357    returned as B<Sim> objects. A Sim object is actually a list reference that has been blessed
3358    so that its elements can be accessed by name.
3359    
3360    Similarities can be either raw or expanded. The raw similarities are basic
3361    hits between features with similar DNA. Expanding a raw similarity drags in any
3362    features considered substantially identical. So, for example, if features B<A1>,
3363    B<A2>, and B<A3> are all substantially identical to B<A>, then a raw similarity
3364    B<[C,A]> would be expanded to B<[C,A] [C,A1] [C,A2] [C,A3]>.
3365    
3366    =over 4
3367    
3368    =item fid
3369    
3370    ID of the feature whose similarities are desired.
3371    
3372    =item maxN
3373    
3374    Maximum number of similarities to return.
3375    
3376    =item maxP
3377    
3378    Minumum allowable similarity score.
3379    
3380    =item select
3381    
3382    Selection criterion: C<raw> means only raw similarities are returned; C<fig>
3383    means only similarities to FIG features are returned; C<all> means all expanded
3384    similarities are returned; and C<figx> means similarities are expanded until the
3385    number of FIG features equals the maximum.
3386    
3387    =item max_expand
3388    
3389    The maximum number of features to expand.
3390    
3391    =item filters
3392    
3393    Reference to a hash containing filter information, or a subroutine that can be
3394    used to filter the sims.
3395    
3396    =item RETURN
3397    
3398    Returns a reference to a list of similarity objects, or C<undef> if an error
3399    occurred.
3400    
3401    =back
3402    
3403    =cut
3404    
3405    sub Sims {
3406        # Get the parameters.
3407        my ($self, $fid, $maxN, $maxP, $select, $max_expand, $filters) = @_;
3408        # Create the shim object to test for deleted FIDs.
3409        my $shim = FidCheck->new($self);
3410        # Ask the network for sims.
3411        my $retVal = FIGRules::GetNetworkSims($shim, $fid, {}, $maxN, $maxP, $select, $max_expand, $filters);
3412        # Return the result.
3413        return $retVal;
3414    }
3415    
3416    =head3 IsAllGenomes
3417    
3418    C<< my $flag = $sprout->IsAllGenomes(\@list, \@checkList); >>
3419    
3420    Return TRUE if all genomes in the second list are represented in the first list at
3421    least one. Otherwise, return FALSE. If the second list is omitted, the first list is
3422    compared to a list of all the genomes.
3423    
3424    =over 4
3425    
3426    =item list
3427    
3428    Reference to the list to be compared to the second list.
3429    
3430    =item checkList (optional)
3431    
3432    Reference to the comparison target list. Every genome ID in this list must occur at
3433    least once in the first list. If this parameter is omitted, a list of all the genomes
3434    is used.
3435    
3436    =item RETURN
3437    
3438    Returns TRUE if every item in the second list appears at least once in the
3439    first list, else FALSE.
3440    
3441    =back
3442    
3443    =cut
3444    
3445    sub IsAllGenomes {
3446        # Get the parameters.
3447        my ($self, $list, $checkList) = @_;
3448        # Supply the checklist if it was omitted.
3449        $checkList = [$self->Genomes()] if ! defined($checkList);
3450        # Create a hash of the original list.
3451        my %testList = map { $_ => 1 } @{$list};
3452        # Declare the return variable. We assume that the representation
3453        # is complete and stop at the first failure.
3454        my $retVal = 1;
3455        my $n = scalar @{$checkList};
3456        for (my $i = 0; $retVal && $i < $n; $i++) {
3457            if (! $testList{$checkList->[$i]}) {
3458                $retVal = 0;
3459            }
3460        }
3461        # Return the result.
3462        return $retVal;
3463    }
3464    
3465  =head3 GetGroups  =head3 GetGroups
3466    
3467  C<< my %groups = $sprout->GetGroups(\@groupList); >>  C<< my %groups = $sprout->GetGroups(\@groupList); >>
# Line 3062  Line 3483 
3483          # Here we have a group list. Loop through them individually,          # Here we have a group list. Loop through them individually,
3484          # getting a list of the relevant genomes.          # getting a list of the relevant genomes.
3485          for my $group (@{$groupList}) {          for my $group (@{$groupList}) {
3486              my @genomeIDs = $self->GetFlat(['Genome'], "Genome(group-name) = ?",              my @genomeIDs = $self->GetFlat(['Genome'], "Genome(primary-group) = ?",
3487                  [$group], "Genome(id)");                  [$group], "Genome(id)");
3488              $retVal{$group} = \@genomeIDs;              $retVal{$group} = \@genomeIDs;
3489          }          }
# Line 3070  Line 3491 
3491          # Here we need all of the groups. In this case, we run through all          # Here we need all of the groups. In this case, we run through all
3492          # of the genome records, putting each one found into the appropriate          # of the genome records, putting each one found into the appropriate
3493          # group. Note that we use a filter clause to insure that only genomes          # group. Note that we use a filter clause to insure that only genomes
3494          # in groups are included in the return set.          # in real NMPDR groups are included in the return set.
3495          my @genomes = $self->GetAll(['Genome'], "Genome(group-name) > ' '", [],          my @genomes = $self->GetAll(['Genome'], "Genome(primary-group) <> ?",
3496                                      ['Genome(id)', 'Genome(group-name)']);                                      [$FIG_Config::otherGroup], ['Genome(id)', 'Genome(primary-group)']);
3497          # Loop through the genomes found.          # Loop through the genomes found.
3498          for my $genome (@genomes) {          for my $genome (@genomes) {
3499              # Pop this genome's ID off the current list.              # Pop this genome's ID off the current list.
# Line 3190  Line 3611 
3611      # Get the parameters.      # Get the parameters.
3612      my ($self, $genomeID, $testFlag) = @_;      my ($self, $genomeID, $testFlag) = @_;
3613      # Perform the delete for the genome's features.      # Perform the delete for the genome's features.
3614      my $retVal = $self->Delete('Feature', "fig|$genomeID.%", $testFlag);      my $retVal = $self->Delete('Feature', "fig|$genomeID.%", testMode => $testFlag);
3615      # Perform the delete for the primary genome data.      # Perform the delete for the primary genome data.
3616      my $stats = $self->Delete('Genome', $genomeID, $testFlag);      my $stats = $self->Delete('Genome', $genomeID, testMode => $testFlag);
3617      $retVal->Accumulate($stats);      $retVal->Accumulate($stats);
3618      # Return the result.      # Return the result.
3619      return $retVal;      return $retVal;
3620  }  }
3621    
3622  =head2 Internal Utility Methods  =head3 Fix
3623    
3624  =head3 ParseAssignment  C<< my %fixedHash = Sprout::Fix(%groupHash); >>
3625    
3626  Parse annotation text to determine whether or not it is a functional assignment. If it is,  Prepare a genome group hash (like that returned by L</GetGroups> for processing.
3627  the user, function text, and assigning user will be returned as a 3-element list. If it  Groups with the same primary name will be combined. The primary name is the
3628  isn't, an empty list will be returned.  first capitalized word in the group name.
3629    
3630  A functional assignment is always of the form  =over 4
3631    
3632      C<set >I<YYYY>C< function to\n>I<ZZZZZ>  =item groupHash
3633    
3634  where I<YYYY> is the B<user>, and I<ZZZZ> is the actual functional role. In most cases,  Hash to be fixed up.
 the user and the assigning user (from MadeAnnotation) will be the same, but that is  
 not always the case.  
3635    
3636  In addition, the functional role may contain extra data that is stripped, such as  =item RETURN
 terminating spaces or a comment separated from the rest of the text by a tab.  
3637    
3638  This is a static method.  Returns a fixed-up version of the hash.
3639    
3640  =over 4  =back
3641    
3642  =item user  =cut
3643    
3644  Name of the assigning user.  sub Fix {
3645        # Get the parameters.
3646        my (%groupHash) = @_;
3647        # Create the result hash.
3648        my %retVal = ();
3649        # Copy over the genomes.
3650        for my $groupID (keys %groupHash) {
3651            # Make a safety copy of the group ID.
3652            my $realGroupID = $groupID;
3653            # Yank the primary name.
3654            if ($groupID =~ /([A-Z]\w+)/) {
3655                $realGroupID = $1;
3656            }
3657            # Append this group's genomes into the result hash.
3658            Tracer::AddToListMap(\%retVal, $realGroupID, @{$groupHash{$groupID}});
3659        }
3660        # Return the result hash.
3661        return %retVal;
3662    }
3663    
3664  =item text  =head3 GroupPageName
3665    
3666  Text of the annotation.  C<< my $name = $sprout->GroupPageName($group); >>
3667    
3668    Return the name of the page for the specified NMPDR group.
3669    
3670    =over 4
3671    
3672    =item group
3673    
3674    Name of the relevant group.
3675    
3676  =item RETURN  =item RETURN
3677    
3678  Returns an empty list if the annotation is not a functional assignment; otherwise, returns  Returns the relative page name (e.g. C<../content/campy.php>). If the group file is not in
3679  a two-element list containing the user name and the function text.  memory it will be read in.
3680    
3681  =back  =back
3682    
3683  =cut  =cut
3684    
3685  sub _ParseAssignment {  sub GroupPageName {
3686      # Get the parameters.      # Get the parameters.
3687      my ($user, $text) = @_;      my ($self, $group) = @_;
3688      # Declare the return value.      # Declare the return variable.
3689      my @retVal = ();      my $retVal;
3690      # Check to see if this is a functional assignment.      # Check for the group file data.
3691      my ($type, $function) = split(/\n/, $text);      if (! defined $self->{groupHash}) {
3692      if ($type =~ m/^set function to$/i) {          # Read the group file.
3693          # Here we have an assignment without a user, so we use the incoming user ID.          my %groupData = Sprout::ReadGroupFile($self->{_options}->{dataDir} . "/groups.tbl");
3694          @retVal = ($user, $function);          # Store it in our object.
3695      } elsif ($type =~ m/^set (\S+) function to$/i) {          $self->{groupHash} = \%groupData;
         # Here we have an assignment with a user that is passed back to the caller.  
         @retVal = ($1, $function);  
3696      }      }
3697      # If we have an assignment, we need to clean the function text. There may be      # Compute the real group name.
3698      # extra junk at the end added as a note from the user.      my $realGroup = $group;
3699      if (@retVal) {      if ($group =~ /([A-Z]\w+)/) {
3700          $retVal[1] =~ s/(\t\S)?\s*$//;          $realGroup = $1;
3701      }      }
3702      # Return the result list.      # Return the page name.
3703      return @retVal;      $retVal = "../content/" . $self->{groupHash}->{$realGroup}->[1];
3704        # Return the result.
3705        return $retVal;
3706  }  }
3707    
3708  =head3 FriendlyTimestamp  =head3 ReadGroupFile
3709    
3710  Convert a time number to a user-friendly time stamp for display.  C<< my %groupData = Sprout::ReadGroupFile($groupFileName); >>
3711    
3712  This is a static method.  Read in the data from the specified group file. The group file contains information
3713    about each of the NMPDR groups.
3714    
3715  =over 4  =over 4
3716    
3717  =item timeValue  =item name
3718    
3719  Numeric time value.  Name of the group.
3720    
3721    =item page
3722    
3723    Name of the group's page on the web site (e.g. C<campy.php> for
3724    Campylobacter)
3725    
3726    =item genus
3727    
3728    Genus of the group
3729    
3730    =item species
3731    
3732    Species of the group, or an empty string if the group is for an entire
3733    genus. If the group contains more than one species, the species names
3734    should be separated by commas.
3735    
3736    =back
3737    
3738    The parameters to this method are as follows
3739    
3740    =over 4
3741    
3742    =item groupFile
3743    
3744    Name of the file containing the group data.
3745    
3746  =item RETURN  =item RETURN
3747    
3748  Returns a string containing the same time in user-readable format.  Returns a hash keyed on group name. The value of each hash
3749    
3750  =back  =back
3751    
3752  =cut  =cut
3753    
3754  sub FriendlyTimestamp {  sub ReadGroupFile {
3755      my ($timeValue) = @_;      # Get the parameters.
3756      my $retVal = localtime($timeValue);      my ($groupFileName) = @_;
3757      return $retVal;      # Declare the return variable.
3758        my %retVal;
3759        # Read the group file.
3760        my @groupLines = Tracer::GetFile($groupFileName);
3761        for my $groupLine (@groupLines) {
3762            my ($name, $page, $genus, $species) = split(/\t/, $groupLine);
3763            $retVal{$name} = [$page, $genus, $species];
3764        }
3765        # Return the result.
3766        return %retVal;
3767  }  }
3768    
3769  =head3 AddProperty  =head3 AddProperty
# Line 3331  Line 3810 
3810      if (@properties) {      if (@properties) {
3811          # Here the property is already in the database. We save its ID.          # Here the property is already in the database. We save its ID.
3812          $propID = $properties[0];          $propID = $properties[0];
3813        } else {
3814          # Here the property value does not exist. We need to generate an ID. It will be set          # Here the property value does not exist. We need to generate an ID. It will be set
3815          # to a number one greater than the maximum value in the database. This call to          # to a number one greater than the maximum value in the database. This call to
3816          # GetAll will stop after one record.          # GetAll will stop after one record.
# Line 3344  Line 3824 
3824      $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });      $self->Insert('HasProperty', { 'from-link' => $featureID, 'to-link' => $propID, evidence => $url });
3825  }  }
3826    
3827    =head2 Virtual Methods
3828    
3829    =head3 CleanKeywords
3830    
3831    C<< my $cleanedString = $sprout->CleanKeywords($searchExpression); >>
3832    
3833    Clean up a search expression or keyword list. This involves converting the periods
3834    in EC numbers to underscores, converting non-leading minus signs to underscores,
3835    a vertical bar or colon to an apostrophe, and forcing lower case for all alphabetic
3836    characters. In addition, any extra spaces are removed.
3837    
3838    =over 4
3839    
3840    =item searchExpression
3841    
3842    Search expression or keyword list to clean. Note that a search expression may
3843    contain boolean operators which need to be preserved. This includes leading
3844    minus signs.
3845    
3846    =item RETURN
3847    
3848    Cleaned expression or keyword list.
3849    
3850    =back
3851    
3852    =cut
3853    
3854    sub CleanKeywords {
3855        # Get the parameters.
3856        my ($self, $searchExpression) = @_;
3857        # Perform the standard cleanup.
3858        my $retVal = $self->ERDB::CleanKeywords($searchExpression);
3859        # Fix the periods in EC and TC numbers.
3860        $retVal =~ s/(\d+|\-)\.(\d+|-)\.(\d+|-)\.(\d+|-)/$1_$2_$3_$4/g;
3861        # Fix non-trailing periods.
3862        $retVal =~ s/\.(\w)/_$1/g;
3863        # Fix non-leading minus signs.
3864        $retVal =~ s/(\w)[\-]/$1_/g;
3865        # Fix the vertical bars and colons
3866        $retVal =~ s/(\w)[|:](\w)/$1'$2/g;
3867        # Return the result.
3868        return $retVal;
3869    }
3870    
3871    =head2 Internal Utility Methods
3872    
3873    =head3 ParseAssignment
3874    
3875    Parse annotation text to determine whether or not it is a functional assignment. If it is,
3876    the user, function text, and assigning user will be returned as a 3-element list. If it
3877    isn't, an empty list will be returned.
3878    
3879    A functional assignment is always of the form
3880    
3881        C<set >I<YYYY>C< function to\n>I<ZZZZZ>
3882    
3883    where I<YYYY> is the B<user>, and I<ZZZZ> is the actual functional role. In most cases,
3884    the user and the assigning user (from MadeAnnotation) will be the same, but that is
3885    not always the case.
3886    
3887    In addition, the functional role may contain extra data that is stripped, such as
3888    terminating spaces or a comment separated from the rest of the text by a tab.
3889    
3890    This is a static method.
3891    
3892    =over 4
3893    
3894    =item user
3895    
3896    Name of the assigning user.
3897    
3898    =item text
3899    
3900    Text of the annotation.
3901    
3902    =item RETURN
3903    
3904    Returns an empty list if the annotation is not a functional assignment; otherwise, returns
3905    a two-element list containing the user name and the function text.
3906    
3907    =back
3908    
3909    =cut
3910    
3911    sub _ParseAssignment {
3912        # Get the parameters.
3913        my ($user, $text) = @_;
3914        # Declare the return value.
3915        my @retVal = ();
3916        # Check to see if this is a functional assignment.
3917        my ($type, $function) = split(/\n/, $text);
3918        if ($type =~ m/^set function to$/i) {
3919            # Here we have an assignment without a user, so we use the incoming user ID.
3920            @retVal = ($user, $function);
3921        } elsif ($type =~ m/^set (\S+) function to$/i) {
3922            # Here we have an assignment with a user that is passed back to the caller.
3923            @retVal = ($1, $function);
3924        }
3925        # If we have an assignment, we need to clean the function text. There may be
3926        # extra junk at the end added as a note from the user.
3927        if (defined( $retVal[1] )) {
3928            $retVal[1] =~ s/(\t\S)?\s*$//;
3929        }
3930        # Return the result list.
3931        return @retVal;
3932    }
3933    
3934    =head3 FriendlyTimestamp
3935    
3936    Convert a time number to a user-friendly time stamp for display.
3937    
3938    This is a static method.
3939    
3940    =over 4
3941    
3942    =item timeValue
3943    
3944    Numeric time value.
3945    
3946    =item RETURN
3947    
3948    Returns a string containing the same time in user-readable format.
3949    
3950    =back
3951    
3952    =cut
3953    
3954    sub FriendlyTimestamp {
3955        my ($timeValue) = @_;
3956        my $retVal = localtime($timeValue);
3957        return $retVal;
3958    }
3959    
3960    
3961  1;  1;

Legend:
Removed from v.1.63  
changed lines
  Added in v.1.98

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3