[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.111, Wed May 7 23:11:51 2008 UTC revision 1.120, Thu Oct 2 16:32:42 2008 UTC
# Line 16  Line 16 
16      use RemoteCustomAttributes;      use RemoteCustomAttributes;
17      use CGI;      use CGI;
18      use WikiTools;      use WikiTools;
19        use BioWords;
20      use base qw(ERDB);      use base qw(ERDB);
21    
22  =head1 Sprout Database Manipulation Object  =head1 Sprout Database Manipulation Object
# Line 56  Line 57 
57    
58  =item dbName  =item dbName
59    
60  Name of the database.  Name of the database. If omitted, the default Sprout database name is used.
61    
62  =item options  =item options
63    
# Line 95  Line 96 
96  sub new {  sub new {
97      # Get the parameters.      # Get the parameters.
98      my ($class, $dbName, $options) = @_;      my ($class, $dbName, $options) = @_;
99        # Default the database name if it is missing.
100        if (! defined $dbName) {
101            $dbName = $FIG_Config::sproutDB;
102        } elsif (ref $dbName eq 'HASH') {
103            $options = $dbName;
104            $dbName = $FIG_Config::sproutDB;
105        }
106      # Compute the DBD directory.      # Compute the DBD directory.
107      my $dbd_dir = (defined($FIG_Config::dbd_dir) ? $FIG_Config::dbd_dir :      my $dbd_dir = (defined($FIG_Config::dbd_dir) ? $FIG_Config::dbd_dir :
108                                                    $FIG_Config::fig );                                                    $FIG_Config::fig );
# Line 107  Line 115 
115                                                          # data file directory                                                          # data file directory
116                         xmlFileName  => "$dbd_dir/SproutDBD.xml",                         xmlFileName  => "$dbd_dir/SproutDBD.xml",
117                                                          # database definition file name                                                          # database definition file name
118                         userData     => "$FIG_Config::dbuser/$FIG_Config::dbpass",                         userData     => "$FIG_Config::sproutUser/$FIG_Config::sproutPass",
119                                                          # user name and password                                                          # user name and password
120                         port         => $FIG_Config::dbport,                         port         => $FIG_Config::sproutPort,
121                                                          # database connection port                                                          # database connection port
122                         sock         => $FIG_Config::dbsock,                         sock         => $FIG_Config::sproutSock,
123                         host         => $FIG_Config::sprout_host,                         host         => $FIG_Config::sprout_host,
124                         maxSegmentLength => 4500,        # maximum feature segment length                         maxSegmentLength => 4500,        # maximum feature segment length
125                         maxSequenceLength => 8000,       # maximum contig sequence length                         maxSequenceLength => 8000,       # maximum contig sequence length
# Line 139  Line 147 
147      $retVal->{groupHash} = undef;      $retVal->{groupHash} = undef;
148      # Set up space for the genome hash. We use this to identify NMPDR genomes.      # Set up space for the genome hash. We use this to identify NMPDR genomes.
149      $retVal->{genomeHash} = undef;      $retVal->{genomeHash} = undef;
150      # Connect to the attributes.      # Remember the data directory name.
151        $retVal->{dataDir} = $dataDir;
152        # Return it.
153        return $retVal;
154    }
155    
156    =head3 ca
157    
158        my $ca = $sprout->ca():;
159    
160    Return the [[CustomAttributesPm]] object for retrieving object
161    properties.
162    
163    =cut
164    
165    sub ca {
166        # Get the parameters.
167        my ($self) = @_;
168        # Do we already have an attribute object?
169        my $retVal = $self->{_ca};
170        if (! defined $retVal) {
171            # No, create one. How we do it depends on the configuration.
172      if ($FIG_Config::attrURL) {      if ($FIG_Config::attrURL) {
173          Trace("Remote attribute server $FIG_Config::attrURL chosen.") if T(3);          Trace("Remote attribute server $FIG_Config::attrURL chosen.") if T(3);
174          $retVal->{_ca} = RemoteCustomAttributes->new($FIG_Config::attrURL);              $retVal = RemoteCustomAttributes->new($FIG_Config::attrURL);
175      } elsif ($FIG_Config::attrDbName) {      } elsif ($FIG_Config::attrDbName) {
176          Trace("Local attribute database $FIG_Config::attrDbName chosen.") if T(3);          Trace("Local attribute database $FIG_Config::attrDbName chosen.") if T(3);
177          my $user = ($FIG_Config::arch eq 'win' ? 'self' : scalar(getpwent()));          my $user = ($FIG_Config::arch eq 'win' ? 'self' : scalar(getpwent()));
178          $retVal->{_ca} = CustomAttributes->new(user => $user);              $retVal = CustomAttributes->new(user => $user);
179      }      }
180      # Insure we have access to the stem module.          # Save it for next time.
181      WikiUse('Lingua::Stem');          $self->{_ca} = $retVal;
182      $retVal->{stemmer} = Lingua::Stem->new();      }
183      $retVal->{stemmer}->stem_caching({ -level => 2 });      # Return the result.
     # Return it.  
184      return $retVal;      return $retVal;
185  }  }
186    
# Line 536  Line 564 
564      # Get the parameters.      # Get the parameters.
565      my ($self, %options) = @_;      my ($self, %options) = @_;
566      # Get the control's name and ID.      # Get the control's name and ID.
567      my $menuName = $options{name} || 'myGenomeControl';      my $menuName = $options{name} || $options{id} || 'myGenomeControl';
568      my $menuID = $options{id} || $menuName;      my $menuID = $options{id} || $menuName;
569        Trace("Genome menu name = $menuName with ID $menuID.") if T(3);
570      # Compute the IDs for the status display.      # Compute the IDs for the status display.
571      my $divID = "${menuID}_status";      my $divID = "${menuID}_status";
572      my $urlID = "${menuID}_url";      my $urlID = "${menuID}_url";
# Line 552  Line 581 
581      if (ref $selections ne 'ARRAY') {      if (ref $selections ne 'ARRAY') {
582          $selections = [ split /\s*,\s*/, $selections ];          $selections = [ split /\s*,\s*/, $selections ];
583      }      }
584      my %selected = map { $_ => } @{$selections};      my %selected = map { $_ => 1 } @{$selections};
585      # Extract the filter information. The default is no filtering. It can be passed as a tab-delimited      # Extract the filter information. The default is no filtering. It can be passed as a tab-delimited
586      # string or a list reference.      # string or a list reference.
587      my $filterParms = $options{filter} || "";      my $filterParms = $options{filter} || "";
# Line 628  Line 657 
657      # Set up the style class.      # Set up the style class.
658      my $classTag = ($class ? " class=\"$class\"" : "" );      my $classTag = ($class ? " class=\"$class\"" : "" );
659      # Create the SELECT tag and stuff it into the output array.      # Create the SELECT tag and stuff it into the output array.
660      my @lines = ("<SELECT name=\"$menuID\" id=\"$menuID\" $onChangeTag$multipleTag$classTag size=\"$rows\">");      my @lines = ("<SELECT name=\"$menuName\" id=\"$menuID\" $onChangeTag$multipleTag$classTag size=\"$rows\">");
661      # Loop through the groups.      # Loop through the groups.
662      for my $group (@groups) {      for my $group (@groups) {
663          # Get the genomes in the group.          # Get the genomes in the group.
# Line 664  Line 693 
693          my $searchThingLabel = ($multiSelect ? "<INPUT type=\"button\" name=\"MacroSearch\" class=\"button\" value=\"Select genomes containing\" onClick=\"selectShowing('$menuID', '$searchThingName'); $showSelect;\" />"          my $searchThingLabel = ($multiSelect ? "<INPUT type=\"button\" name=\"MacroSearch\" class=\"button\" value=\"Select genomes containing\" onClick=\"selectShowing('$menuID', '$searchThingName'); $showSelect;\" />"
694                                               : "Show genomes containing");                                               : "Show genomes containing");
695          push @lines, "<br />$searchThingLabel&nbsp;" .          push @lines, "<br />$searchThingLabel&nbsp;" .
696                       "<INPUT type=\"text\" id=\"$searchThingName\" name=\"$searchThingName\" size=\"30\" onKeyup=\"showTyped('$menuID', '$searchThingName');\" />";                       "<INPUT type=\"text\" id=\"$searchThingName\" name=\"$searchThingName\" size=\"30\" onKeyup=\"showTyped('$menuID', '$searchThingName');\" />" .
697                         Hint("GenomeControl", "Type here to filter the genomes displayed.") . "<br />";
698          # For multi-select mode, we also have buttons to set and clear selections.          # For multi-select mode, we also have buttons to set and clear selections.
699          if ($multiSelect) {          if ($multiSelect) {
700              push @lines, "<INPUT type=\"button\" name=\"ClearAll\" class=\"bigButton\"  value=\"Clear All\" onClick=\"clearAll('$menuID'); $showSelect\" />";              push @lines, "<INPUT type=\"button\" name=\"ClearAll\" class=\"bigButton\"  value=\"Clear All\" onClick=\"clearAll('$menuID'); $showSelect\" />";
# Line 672  Line 702 
702              push @lines, "<INPUT type=\"button\" name=\"NMPDROnly\" class=\"bigButton\"  value=\"Select NMPDR\" onClick=\"selectSome('$menuID', $nmpdrCount, true); $showSelect;\" />";              push @lines, "<INPUT type=\"button\" name=\"NMPDROnly\" class=\"bigButton\"  value=\"Select NMPDR\" onClick=\"selectSome('$menuID', $nmpdrCount, true); $showSelect;\" />";
703          }          }
704          # Add a hidden field we can use to generate organism page hyperlinks.          # Add a hidden field we can use to generate organism page hyperlinks.
705          push @lines, "<INPUT type=\"hidden\" id=\"$urlID\" value=\"$FIG_Config::cgi_url/seedviewer.cgi?page=Organism;organism=\" />";          push @lines, "<INPUT type=\"hidden\" id=\"$urlID\" value=\"$FIG_Config::cgi_url/wiki/rest.cgi/NmpdrPlugin/SeedViewer?page=Organism;organism=\" />";
706          # Add the status display. This tells the user what's selected no matter where the list is scrolled.          # Add the status display. This tells the user what's selected no matter where the list is scrolled.
707          push @lines, "<DIV id=\"$divID\" class=\"Panel\"></DIV>";          push @lines, "<DIV id=\"$divID\" class=\"Panel\"></DIV>";
708      }      }
# Line 709  Line 739 
739  sub Stem {  sub Stem {
740      # Get the parameters.      # Get the parameters.
741      my ($self, $word) = @_;      my ($self, $word) = @_;
742      # Declare the return variable.      # Get the stemmer object.
743      my $retVal;      my $stemmer = $self->{stemmer};
744      # See if it's stemmable.      if (! defined $stemmer) {
745      if ($word =~ /^[A-Za-z]+$/) {          # We don't have one pre-built, so we build and save it now.
746          # Compute the stem.          $stemmer = BioWords->new(exceptions => "$FIG_Config::sproutData/Exceptions.txt",
747          my $stemList = $self->{stemmer}->stem($word);                                   stops => "$FIG_Config::sproutData/StopWords.txt",
748          my $stem = $stemList->[0];                                   cache => 0);
749          # Check to see if it's long enough.          $self->{stemmer} = $stemmer;
         if (length $stem >= 3) {  
             # Yes, keep it.  
             $retVal = $stem;  
         } else {  
             # No, use the original word.  
             $retVal = $word;  
         }  
750      }      }
751        # Try to stem the word.
752        my $retVal = $stemmer->Process($word);
753      # Return the result.      # Return the result.
754      return $retVal;      return $retVal;
755  }  }
# Line 1619  Line 1644 
1644  the specified user and FIG are considered trusted. If the user ID is omitted, only FIG  the specified user and FIG are considered trusted. If the user ID is omitted, only FIG
1645  is trusted.  is trusted.
1646    
1647  If the feature is B<not> identified by a FIG ID, then the functional assignment  If the feature is B<not> identified by a FIG ID, then we search the aliases for it.
1648  information is taken from the B<ExternalAliasFunc> table. If the table does  If no matching alias is found, we return an undefined value.
 not contain an entry for the feature, an undefined value is returned.  
1649    
1650  =over 4  =over 4
1651    
# Line 1647  Line 1671 
1671      my ($self, $featureID, $userID) = @_;      my ($self, $featureID, $userID) = @_;
1672      # Declare the return value.      # Declare the return value.
1673      my $retVal;      my $retVal;
1674      # Determine the ID type.      # Find a FIG ID for this feature.
1675      if ($featureID =~ m/^fig\|/) {      my ($fid) = $self->FeaturesByAlias($featureID);
1676        # Only proceed if we have an ID.
1677        if ($fid) {
1678          # Here we have a FIG feature ID.          # Here we have a FIG feature ID.
1679          if (!$userID) {          if (!$userID) {
1680              # Use the primary assignment.              # Use the primary assignment.
1681              ($retVal) = $self->GetEntityValues('Feature', $featureID, ['Feature(assignment)']);              ($retVal) = $self->GetEntityValues('Feature', $fid, ['Feature(assignment)']);
1682          } else {          } else {
1683              # We must build the list of trusted users.              # We must build the list of trusted users.
1684              my %trusteeTable = ();              my %trusteeTable = ();
# Line 1678  Line 1704 
1704              # Build a query for all of the feature's annotations, sorted by date.              # Build a query for all of the feature's annotations, sorted by date.
1705              my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],              my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],
1706                                     "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC",                                     "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC",
1707                                     [$featureID]);                                     [$fid]);
1708              my $timeSelected = 0;              my $timeSelected = 0;
1709              # Loop until we run out of annotations.              # Loop until we run out of annotations.
1710              while (my $annotation = $query->Fetch()) {              while (my $annotation = $query->Fetch()) {
# Line 1698  Line 1724 
1724                  }                  }
1725              }              }
1726          }          }
     } else {  
         # Here we have a non-FIG feature ID. In this case the user ID does not  
         # matter. We simply get the information from the External Alias Function  
         # table.  
         ($retVal) = $self->GetEntityValues('ExternalAliasFunc', $featureID, ['ExternalAliasFunc(func)']);  
1727      }      }
1728      # Return the assignment found.      # Return the assignment found.
1729      return $retVal;      return $retVal;
# Line 1721  Line 1742 
1742  annotation itself because it's a text field; however, this is not a big problem because  annotation itself because it's a text field; however, this is not a big problem because
1743  most features only have a small number of annotations.  most features only have a small number of annotations.
1744    
 If the feature is B<not> identified by a FIG ID, then the functional assignment  
 information is taken from the B<ExternalAliasFunc> table. If the table does  
 not contain an entry for the feature, an empty list is returned.  
   
1745  =over 4  =over 4
1746    
1747  =item featureID  =item featureID
# Line 1745  Line 1762 
1762      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
1763      # Declare the return value.      # Declare the return value.
1764      my @retVal = ();      my @retVal = ();
1765      # Determine the ID type.      # Convert to a FIG ID.
1766      if ($featureID =~ m/^fig\|/) {      my ($fid) = $self->FeaturesByAlias($featureID);
1767        # Only proceed if we found one.
1768        if ($fid) {
1769          # Here we have a FIG feature ID. We must build the list of trusted          # Here we have a FIG feature ID. We must build the list of trusted
1770          # users.          # users.
1771          my %trusteeTable = ();          my %trusteeTable = ();
1772          # Build a query for all of the feature's annotations, sorted by date.          # Build a query for all of the feature's annotations, sorted by date.
1773          my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],          my $query = $self->Get(['IsTargetOfAnnotation', 'Annotation', 'MadeAnnotation'],
1774                                 "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC",                                 "IsTargetOfAnnotation(from-link) = ? ORDER BY Annotation(time) DESC",
1775                                 [$featureID]);                                 [$fid]);
1776          my $timeSelected = 0;          my $timeSelected = 0;
1777          # Loop until we run out of annotations.          # Loop until we run out of annotations.
1778          while (my $annotation = $query->Fetch()) {          while (my $annotation = $query->Fetch()) {
# Line 1768  Line 1787 
1787                  push @retVal, [$actualUser, $function];                  push @retVal, [$actualUser, $function];
1788              }              }
1789          }          }
     } else {  
         # Here we have a non-FIG feature ID. In this case the user ID does not  
         # matter. We simply get the information from the External Alias Function  
         # table.  
         my @assignments = $self->GetEntityValues('ExternalAliasFunc', $featureID,  
                                                  ['ExternalAliasFunc(func)']);  
         push @retVal, map { ['master', $_] } @assignments;  
1790      }      }
1791      # Return the assignments found.      # Return the assignments found.
1792      return @retVal;      return @retVal;
# Line 1962  Line 1974 
1974      if ($featureID =~ /^fig\|(\d+\.\d+)/) {      if ($featureID =~ /^fig\|(\d+\.\d+)/) {
1975          $retVal = $1;          $retVal = $1;
1976      } else {      } else {
1977          Confess("Invalid feature ID $featureID.");          # Find the feature by alias.
1978            my ($realFeatureID) = $self->FeaturesByAlias($featureID);
1979            if ($realFeatureID && $realFeatureID =~ /^fig\|(\d+\.\d+)/) {
1980                $retVal = $1;
1981            }
1982      }      }
1983      # Return the value found.      # Return the value found.
1984      return $retVal;      return $retVal;
# Line 2802  Line 2818 
2818      return @retVal;      return @retVal;
2819  }  }
2820    
 =head3 GetProperties  
   
     my @list = $sprout->GetProperties($fid, $key, $value, $url);  
   
 Return a list of the properties with the specified characteristics.  
   
 Properties are the Sprout analog of the FIG attributes. The call is  
 passed directly to the CustomAttributes or RemoteCustomAttributes object  
 contained in this object.  
   
 This method returns a series of tuples that match the specified criteria. Each tuple  
 will contain an object ID, a key, and one or more values. The parameters to this  
 method therefore correspond structurally to the values expected in each tuple. In  
 addition, you can ask for a generic search by suffixing a percent sign (C<%>) to any  
 of the parameters. So, for example,  
   
     my @attributeList = $sprout->GetProperties('fig|100226.1.peg.1004', 'structure%', 1, 2);  
   
 would return something like  
   
     ['fig}100226.1.peg.1004', 'structure', 1, 2]  
     ['fig}100226.1.peg.1004', 'structure1', 1, 2]  
     ['fig}100226.1.peg.1004', 'structure2', 1, 2]  
     ['fig}100226.1.peg.1004', 'structureA', 1, 2]  
   
 Use of C<undef> in any position acts as a wild card (all values). You can also specify  
 a list reference in the ID column. Thus,  
   
     my @attributeList = $sprout->GetProperties(['100226.1', 'fig|100226.1.%'], 'PUBMED');  
   
 would get the PUBMED attribute data for Streptomyces coelicolor A3(2) and all its  
 features.  
   
 In addition to values in multiple sections, a single attribute key can have multiple  
 values, so even  
   
     my @attributeList = $sprout->GetProperties($peg, 'virulent');  
   
 which has no wildcard in the key or the object ID, may return multiple tuples.  
   
 =over 4  
   
 =item objectID  
   
 ID of object whose attributes are desired. If the attributes are desired for multiple  
 objects, this parameter can be specified as a list reference. If the attributes are  
 desired for all objects, specify C<undef> or an empty string. Finally, you can specify  
 attributes for a range of object IDs by putting a percent sign (C<%>) at the end.  
   
 =item key  
   
 Attribute key name. A value of C<undef> or an empty string will match all  
 attribute keys. If the values are desired for multiple keys, this parameter can be  
 specified as a list reference. Finally, you can specify attributes for a range of  
 keys by putting a percent sign (C<%>) at the end.  
   
 =item values  
   
 List of the desired attribute values, section by section. If C<undef>  
 or an empty string is specified, all values in that section will match. A  
 generic match can be requested by placing a percent sign (C<%>) at the end.  
 In that case, all values that match up to and not including the percent sign  
 will match. You may also specify a regular expression enclosed  
 in slashes. All values that match the regular expression will be returned. For  
 performance reasons, only values have this extra capability.  
   
 =item RETURN  
   
 Returns a list of tuples. The first element in the tuple is an object ID, the  
 second is an attribute key, and the remaining elements are the sections of  
 the attribute value. All of the tuples will match the criteria set forth in  
 the parameter list.  
   
 =back  
   
 =cut  
   
 sub GetProperties {  
     # Get the parameters.  
     my ($self, @parms) = @_;  
     # Declare the return variable.  
     my @retVal = $self->{_ca}->GetAttributes(@parms);  
     # Return the result.  
     return @retVal;  
 }  
   
2821  =head3 FeatureProperties  =head3 FeatureProperties
2822    
2823      my @properties = $sprout->FeatureProperties($featureID);      my @properties = $sprout->FeatureProperties($featureID);
# Line 2916  Line 2846 
2846      # Get the parameters.      # Get the parameters.
2847      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
2848      # Get the properties.      # Get the properties.
2849      my @attributes = $self->{_ca}->GetAttributes($featureID);      my @attributes = $self->ca->GetAttributes($featureID);
2850      # Strip the feature ID off each tuple.      # Strip the feature ID off each tuple.
2851      my @retVal = ();      my @retVal = ();
2852      for my $attributeRow (@attributes) {      for my $attributeRow (@attributes) {
# Line 3188  Line 3118 
3118  sub SubsystemList {  sub SubsystemList {
3119      # Get the parameters.      # Get the parameters.
3120      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
3121      # Get the list of names.      # Get the list of names. We do a join to the Subsystem table because we have missing subsystems in
3122      my @retVal = $self->GetFlat(['HasRoleInSubsystem'], "HasRoleInSubsystem(from-link) = ?",      # the Sprout database!
3123        my @retVal = $self->GetFlat(['HasRoleInSubsystem', 'Subsystem'], "HasRoleInSubsystem(from-link) = ?",
3124                                  [$featureID], 'HasRoleInSubsystem(to-link)');                                  [$featureID], 'HasRoleInSubsystem(to-link)');
3125      # Return the result, sorted.      # Return the result, sorted.
3126      return sort @retVal;      return sort @retVal;
# Line 3222  Line 3153 
3153      # Declare the return variable.      # Declare the return variable.
3154      my %retVal = ();      my %retVal = ();
3155      # Get a list of the genome features that participate in subsystems. For each      # Get a list of the genome features that participate in subsystems. For each
3156      # feature we get its spreadsheet cells and the corresponding roles.      # feature we get its subsystem ID and the corresponding roles.
3157      my @roleData = $self->GetAll(['HasFeature', 'ContainsFeature', 'IsRoleOf'],      my @roleData = $self->GetAll(['HasFeature', 'ContainsFeature', 'IsRoleOf', 'HasSSCell'],
3158                               "HasFeature(from-link) = ?", [$genomeID],                               "HasFeature(from-link) = ?", [$genomeID],
3159                               ['HasFeature(to-link)', 'IsRoleOf(to-link)', 'IsRoleOf(from-link)']);                                   ['HasFeature(to-link)', 'IsRoleOf(from-link)',  'HasSSCell(from-link)']);
3160      # Now we get a list of the spreadsheet cells and their associated subsystems. Subsystems      # Now we get a list of valid subsystems. These are the subsystems connected to the genome with
3161      # with an unknown variant code (-1) are skipped. Note the genome ID is at both ends of the      # a non-negative variant code.
3162      # list. We use it at the beginning to get all the spreadsheet cells for the genome and      my %subs = map { $_ => 1 } $self->GetFlat(['ParticipatesIn'],
3163      # again at the end to filter out participation in subsystems with a negative variant code.                                                  "ParticipatesIn(from-link) = ? AND ParticipatesIn(variant-code) >= 0",
3164      my @cellData = $self->GetAll(['IsGenomeOf', 'HasSSCell', 'ParticipatesIn'],                                                  [$genomeID], 'ParticipatesIn(to-link)');
3165                                   "IsGenomeOf(from-link) = ? AND ParticipatesIn(variant-code) >= 0 AND ParticipatesIn(from-link) = ?",      # We loop through @roleData to build the hash.
                                  [$genomeID, $genomeID], ['HasSSCell(to-link)', 'HasSSCell(from-link)']);  
     # Now "@roleData" lists the spreadsheet cell and role for each of the genome's features.  
     # "@cellData" lists the subsystem name for each of the genome's spreadsheet cells. We  
     # link these two lists together to create the result. First, we want a hash mapping  
     # spreadsheet cells to subsystem names.  
     my %subHash = map { $_->[0] => $_->[1] } @cellData;  
     # We loop through @cellData to build the hash.  
3166      for my $roleEntry (@roleData) {      for my $roleEntry (@roleData) {
3167          # Get the data for this feature and cell.          # Get the data for this feature and cell.
3168          my ($fid, $cellID, $role) = @{$roleEntry};          my ($fid, $role, $subsys) = @{$roleEntry};
3169          # Check for a subsystem name.          Trace("Subsystem for $fid is $subsys.") if T(4);
3170          my $subsys = $subHash{$cellID};          # Check the subsystem;
3171          if ($subsys) {          if ($subs{$subsys}) {
3172                Trace("Subsystem found.") if T(4);
3173              # Insure this feature has an entry in the return hash.              # Insure this feature has an entry in the return hash.
3174              if (! exists $retVal{$fid}) { $retVal{$fid} = []; }              if (! exists $retVal{$fid}) { $retVal{$fid} = []; }
3175              # Merge in this new data.              # Merge in this new data.
# Line 3997  Line 3922 
3922      # Get the parameters.      # Get the parameters.
3923      my ($self, $featureID, $key, @values) = @_;      my ($self, $featureID, $key, @values) = @_;
3924      # Add the property using the attached attributes object.      # Add the property using the attached attributes object.
3925      $self->{_ca}->AddAttribute($featureID, $key, @values);      $self->ca->AddAttribute($featureID, $key, @values);
3926  }  }
3927    
3928  =head3 CheckGroupFile  =head3 CheckGroupFile
# Line 4084  Line 4009 
4009  sub CleanKeywords {  sub CleanKeywords {
4010      # Get the parameters.      # Get the parameters.
4011      my ($self, $searchExpression) = @_;      my ($self, $searchExpression) = @_;
4012      # Perform the standard cleanup.      # Get the stemmer.
4013      my $words = $self->ERDB::CleanKeywords($searchExpression);      my $stemmer = $self->GetStemmer();
4014      # Fix the periods in EC and TC numbers.      # Convert the search expression using the stemmer.
4015      $words =~ s/(\d+|\-)\.(\d+|-)\.(\d+|-)\.(\d+|-)/$1_$2_$3_$4/g;      my $retVal = $stemmer->PrepareSearchExpression($searchExpression);
     # Fix non-trailing periods.  
     $words =~ s/\.(\w)/_$1/g;  
     # Fix non-leading minus signs.  
     $words =~ s/(\w)[\-]/$1_/g;  
     # Fix the vertical bars and colons  
     $words =~ s/(\w)[|:](\w)/$1'$2/g;  
     # Now split up the list so that each keyword is in its own string. We keep the delimiters  
     # because they may contain boolean expression data.  
     my @words = split /([^A-Za-z'0-9_]+)/, $words;  
     # We'll convert the stemmable words into stems and re-assemble the result.  
     my $retVal = "";  
     for my $word (@words) {  
         my $stem = $self->Stem($word);  
         if (defined $stem) {  
             $retVal .= $stem;  
         } else {  
             $retVal .= $word;  
         }  
     }  
4016      Trace("Cleaned keyword list for \"$searchExpression\" is \"$retVal\".") if T(3);      Trace("Cleaned keyword list for \"$searchExpression\" is \"$retVal\".") if T(3);
4017      # Return the result.      # Return the result.
4018      return $retVal;      return $retVal;
4019  }  }
4020    
4021    =head3 GetSourceObject
4022    
4023        my $source = $erdb->GetSourceObject();
4024    
4025    Return the object to be used in creating load files for this database.
4026    
4027    =cut
4028    
4029    sub GetSourceObject {
4030        # Get access to the FIG code.
4031        require FIG;
4032        # Return a FIG object.
4033        return FIG->new();
4034    }
4035    
4036    =head3 SectionList
4037    
4038        my @sections = $erdb->SectionList($source);
4039    
4040    Return a list of the names for the different data sections used when loading this database.
4041    The default is an empty string, in which case there is only one section representing the
4042    entire database.
4043    
4044    =over 4
4045    
4046    =item source
4047    
4048    Source object used to access the data from which the database is loaded. This is the
4049    same object returned by L</GetSourceObject>; however, we ask the caller to pass it
4050    in as a parameter so that we don't end up creating multiple copies of a potentially
4051    expensive data structure.
4052    
4053    =item RETURN
4054    
4055    Returns a list of section names.
4056    
4057    =back
4058    
4059    =cut
4060    
4061    sub SectionList {
4062        # Get the parameters.
4063        my ($self, $source) = @_;
4064        # Ask the BaseSproutLoader for a section list.
4065        require BaseSproutLoader;
4066        my @retVal = BaseSproutLoader::GetSectionList($self, $source);
4067        # Return the list.
4068        return @retVal;
4069    }
4070    
4071    =head3 Loader
4072    
4073        my $groupLoader = $erdb->Loader($groupName, $source, $options);
4074    
4075    Return an [[ERDBLoadGroupPm]] object for the specified load group. This method is used
4076    by [[ERDBGeneratorPl]] to create the load group objects. If you are not using
4077    [[ERDBGeneratorPl]], you don't need to override this method.
4078    
4079    =over 4
4080    
4081    =item groupName
4082    
4083    Name of the load group whose object is to be returned. The group name is
4084    guaranteed to be a single word with only the first letter capitalized.
4085    
4086    =item source
4087    
4088    The source object used to access the data from which the load file is derived. This
4089    is the same object returned by L</GetSourceObject>; however, we ask the caller to pass
4090    it in as a parameter so that we don't end up creating multiple copies of a potentially
4091    expensive data structure.
4092    
4093    =item options
4094    
4095    Reference to a hash of command-line options.
4096    
4097    =item RETURN
4098    
4099    Returns an [[ERDBLoadGroupPm]] object that can be used to process the specified load group
4100    for this database.
4101    
4102    =back
4103    
4104    =cut
4105    
4106    sub Loader {
4107        # Get the parameters.
4108        my ($self, $groupName, $source, $options) = @_;
4109        # Compute the loader name.
4110        my $loaderClass = "${groupName}SproutLoader";
4111        # Pull in its definition.
4112        require "$loaderClass.pm";
4113        # Create an object for it.
4114        my $retVal = eval("$loaderClass->new(\$self, \$source, \$options)");
4115        # Insure it worked.
4116        Confess("Could not create $loaderClass object: $@") if $@;
4117        # Return it to the caller.
4118        return $retVal;
4119    }
4120    
4121    =head3 LoadGroupList
4122    
4123        my @groups = $erdb->LoadGroupList();
4124    
4125    Returns a list of the names for this database's load groups. This method is used
4126    by [[ERDBGeneratorPl]] when the user wishes to load all table groups. The default
4127    is a single group called 'All' that loads everything.
4128    
4129    =cut
4130    
4131    sub LoadGroupList {
4132        # Return the list.
4133        return qw(Genome Subsystem Annotation Property Source Reaction Synonym Drug Feature);
4134    }
4135    
4136    =head3 LoadDirectory
4137    
4138        my $dirName = $erdb->LoadDirectory();
4139    
4140    Return the name of the directory in which load files are kept. The default is
4141    the FIG temporary directory, which is a really bad choice, but it's always there.
4142    
4143    =cut
4144    
4145    sub LoadDirectory {
4146        # Get the parameters.
4147        my ($self) = @_;
4148        # Return the directory name.
4149        return $self->{dataDir};
4150    }
4151    
4152  =head2 Internal Utility Methods  =head2 Internal Utility Methods
4153    
4154    =head3 GetStemmer
4155    
4156        my $stermmer = $sprout->GetStemmer();
4157    
4158    Return the stemmer object for this database.
4159    
4160    =cut
4161    
4162    sub GetStemmer {
4163        # Get the parameters.
4164        my ($self) = @_;
4165        # Declare the return variable.
4166        my $retVal = $self->{stemmer};
4167        if (! defined $retVal) {
4168            # We don't have one pre-built, so we build and save it now.
4169            $retVal = BioWords->new(exceptions => "$FIG_Config::sproutData/Exceptions.txt",
4170                                     stops => "$FIG_Config::sproutData/StopWords.txt",
4171                                     cache => 1);
4172            $self->{stemmer} = $retVal;
4173        }
4174        # Return the result.
4175        return $retVal;
4176    }
4177    
4178  =head3 ParseAssignment  =head3 ParseAssignment
4179    
4180  Parse annotation text to determine whether or not it is a functional assignment. If it is,  Parse annotation text to determine whether or not it is a functional assignment. If it is,
# Line 4237  Line 4298 
4298  }  }
4299    
4300    
4301    =head3 Hint
4302    
4303        my $htmlText = SearchHelper::Hint($wikiPage, $hintText);
4304    
4305    Return the HTML for a small question mark that displays the specified hint text when it is clicked.
4306    This HTML can be put in forms to provide a useful hinting mechanism.
4307    
4308    =over 4
4309    
4310    =item wikiPage
4311    
4312    Name of the wiki page to be popped up when the hint mark is clicked.
4313    
4314    =item hintText
4315    
4316    Text to display for the hint. It is raw html, but may not contain any double quotes.
4317    
4318    =item RETURN
4319    
4320    Returns the html for the hint facility. The resulting html shows a small button-like thing that
4321    uses the standard FIG popup technology.
4322    
4323    =back
4324    
4325    =cut
4326    
4327    sub Hint {
4328        # Get the parameters.
4329        my ($wikiPage, $hintText) = @_;
4330        # Escape the single quotes in the hint text.
4331        my $quotedText = $hintText;
4332        $quotedText =~ s/'/\\'/g;
4333        # Convert the wiki page name to a URL.
4334        my $wikiURL = join("", map { ucfirst $_ } split /\s+/, $wikiPage);
4335        $wikiURL = "$FIG_Config::cgi_url/wiki/view.cgi/FIG/$wikiURL";
4336        # Compute the mouseover script.
4337        my $mouseOver = "doTooltip(this, '$quotedText')";
4338        # Create the html.
4339        my $retVal = "&nbsp;<a href=\"$wikiURL\"><img src=\"$FIG_Config::cgi_url/Html/button-h.png\" class=\"helpicon\" onmouseover=\"$mouseOver\"/></a>";
4340        # Return it.
4341        return $retVal;
4342    }
4343    
4344  1;  1;

Legend:
Removed from v.1.111  
changed lines
  Added in v.1.120

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3