[Bio] / Sprout / Sprout.pm Repository:
ViewVC logotype

Diff of /Sprout/Sprout.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.117, Tue Sep 16 18:57:59 2008 UTC revision 1.121, Wed Oct 15 11:46:22 2008 UTC
# Line 57  Line 57 
57    
58  =item dbName  =item dbName
59    
60  Name of the database.  Name of the database. If omitted, the default Sprout database name is used.
61    
62  =item options  =item options
63    
# Line 96  Line 96 
96  sub new {  sub new {
97      # Get the parameters.      # Get the parameters.
98      my ($class, $dbName, $options) = @_;      my ($class, $dbName, $options) = @_;
99        # Default the database name if it is missing.
100        if (! defined $dbName) {
101            $dbName = $FIG_Config::sproutDB;
102        } elsif (ref $dbName eq 'HASH') {
103            $options = $dbName;
104            $dbName = $FIG_Config::sproutDB;
105        }
106      # Compute the DBD directory.      # Compute the DBD directory.
107      my $dbd_dir = (defined($FIG_Config::dbd_dir) ? $FIG_Config::dbd_dir :      my $dbd_dir = (defined($FIG_Config::dbd_dir) ? $FIG_Config::dbd_dir :
108                                                    $FIG_Config::fig );                                                    $FIG_Config::fig );
# Line 140  Line 147 
147      $retVal->{groupHash} = undef;      $retVal->{groupHash} = undef;
148      # Set up space for the genome hash. We use this to identify NMPDR genomes.      # Set up space for the genome hash. We use this to identify NMPDR genomes.
149      $retVal->{genomeHash} = undef;      $retVal->{genomeHash} = undef;
150      # Connect to the attributes.      # Remember the data directory name.
151        $retVal->{dataDir} = $dataDir;
152        # Return it.
153        return $retVal;
154    }
155    
156    =head3 ca
157    
158        my $ca = $sprout->ca():;
159    
160    Return the [[CustomAttributesPm]] object for retrieving object
161    properties.
162    
163    =cut
164    
165    sub ca {
166        # Get the parameters.
167        my ($self) = @_;
168        # Do we already have an attribute object?
169        my $retVal = $self->{_ca};
170        if (! defined $retVal) {
171            # No, create one. How we do it depends on the configuration.
172      if ($FIG_Config::attrURL) {      if ($FIG_Config::attrURL) {
173          Trace("Remote attribute server $FIG_Config::attrURL chosen.") if T(3);          Trace("Remote attribute server $FIG_Config::attrURL chosen.") if T(3);
174          $retVal->{_ca} = RemoteCustomAttributes->new($FIG_Config::attrURL);              $retVal = RemoteCustomAttributes->new($FIG_Config::attrURL);
175      } elsif ($FIG_Config::attrDbName) {      } elsif ($FIG_Config::attrDbName) {
176          Trace("Local attribute database $FIG_Config::attrDbName chosen.") if T(3);          Trace("Local attribute database $FIG_Config::attrDbName chosen.") if T(3);
177          my $user = ($FIG_Config::arch eq 'win' ? 'self' : scalar(getpwent()));          my $user = ($FIG_Config::arch eq 'win' ? 'self' : scalar(getpwent()));
178          $retVal->{_ca} = CustomAttributes->new(user => $user);              $retVal = CustomAttributes->new(user => $user);
179      }      }
180      # Return it.          # Save it for next time.
181            $self->{_ca} = $retVal;
182        }
183        # Return the result.
184      return $retVal;      return $retVal;
185  }  }
186    
# Line 666  Line 697 
697                       Hint("GenomeControl", "Type here to filter the genomes displayed.") . "<br />";                       Hint("GenomeControl", "Type here to filter the genomes displayed.") . "<br />";
698          # For multi-select mode, we also have buttons to set and clear selections.          # For multi-select mode, we also have buttons to set and clear selections.
699          if ($multiSelect) {          if ($multiSelect) {
700              push @lines, "<INPUT type=\"button\" name=\"ClearAll\" class=\"bigButton\"  value=\"Clear All\" onClick=\"clearAll('$menuID'); $showSelect\" />";              push @lines, "<INPUT type=\"button\" name=\"ClearAll\" class=\"bigButton\"  value=\"Clear All\" onClick=\"clearAll(getElementById('$menuID')); $showSelect\" />";
701              push @lines, "<INPUT type=\"button\" name=\"SelectAll\" class=\"bigButton\" value=\"Select All\" onClick=\"selectAll('$menuID'); $showSelect\" />";              push @lines, "<INPUT type=\"button\" name=\"SelectAll\" class=\"bigButton\" value=\"Select All\" onClick=\"selectAll(getElementById('$menuID')); $showSelect\" />";
702              push @lines, "<INPUT type=\"button\" name=\"NMPDROnly\" class=\"bigButton\"  value=\"Select NMPDR\" onClick=\"selectSome('$menuID', $nmpdrCount, true); $showSelect;\" />";              push @lines, "<INPUT type=\"button\" name=\"NMPDROnly\" class=\"bigButton\"  value=\"Select NMPDR\" onClick=\"selectSome(getElementById('$menuID'), $nmpdrCount, true); $showSelect;\" />";
703          }          }
704          # Add a hidden field we can use to generate organism page hyperlinks.          # Add a hidden field we can use to generate organism page hyperlinks.
705          push @lines, "<INPUT type=\"hidden\" id=\"$urlID\" value=\"$FIG_Config::cgi_url/wiki/rest.cgi/NmpdrPlugin/SeedViewer?page=Organism;organism=\" />";          push @lines, "<INPUT type=\"hidden\" id=\"$urlID\" value=\"$FIG_Config::cgi_url/wiki/rest.cgi/NmpdrPlugin/SeedViewer?page=Organism;organism=\" />";
# Line 714  Line 745 
745          # We don't have one pre-built, so we build and save it now.          # We don't have one pre-built, so we build and save it now.
746          $stemmer = BioWords->new(exceptions => "$FIG_Config::sproutData/Exceptions.txt",          $stemmer = BioWords->new(exceptions => "$FIG_Config::sproutData/Exceptions.txt",
747                                   stops => "$FIG_Config::sproutData/StopWords.txt",                                   stops => "$FIG_Config::sproutData/StopWords.txt",
748                                   cache => 1);                                   cache => 0);
749          $self->{stemmer} = $stemmer;          $self->{stemmer} = $stemmer;
750      }      }
751      # Try to stem the word.      # Try to stem the word.
# Line 2815  Line 2846 
2846      # Get the parameters.      # Get the parameters.
2847      my ($self, $featureID) = @_;      my ($self, $featureID) = @_;
2848      # Get the properties.      # Get the properties.
2849      my @attributes = $self->{_ca}->GetAttributes($featureID);      my @attributes = $self->ca->GetAttributes($featureID);
2850      # Strip the feature ID off each tuple.      # Strip the feature ID off each tuple.
2851      my @retVal = ();      my @retVal = ();
2852      for my $attributeRow (@attributes) {      for my $attributeRow (@attributes) {
# Line 3122  Line 3153 
3153      # Declare the return variable.      # Declare the return variable.
3154      my %retVal = ();      my %retVal = ();
3155      # Get a list of the genome features that participate in subsystems. For each      # Get a list of the genome features that participate in subsystems. For each
3156      # feature we get its spreadsheet cells and the corresponding roles.      # feature we get its subsystem ID and the corresponding roles.
3157      my @roleData = $self->GetAll(['HasFeature', 'ContainsFeature', 'IsRoleOf'],      my @roleData = $self->GetAll(['HasFeature', 'ContainsFeature', 'IsRoleOf', 'HasSSCell'],
3158                               "HasFeature(from-link) = ?", [$genomeID],                               "HasFeature(from-link) = ?", [$genomeID],
3159                               ['HasFeature(to-link)', 'IsRoleOf(to-link)', 'IsRoleOf(from-link)']);                                   ['HasFeature(to-link)', 'IsRoleOf(from-link)',  'HasSSCell(from-link)']);
3160      # Now we get a list of the spreadsheet cells and their associated subsystems. Subsystems      # Now we get a list of valid subsystems. These are the subsystems connected to the genome with
3161      # with an unknown variant code (-1) are skipped. Note the genome ID is at both ends of the      # a non-negative variant code.
3162      # list. We use it at the beginning to get all the spreadsheet cells for the genome and      my %subs = map { $_ => 1 } $self->GetFlat(['ParticipatesIn'],
3163      # again at the end to filter out participation in subsystems with a negative variant code.                                                  "ParticipatesIn(from-link) = ? AND ParticipatesIn(variant-code) >= 0",
3164      my @cellData = $self->GetAll(['IsGenomeOf', 'HasSSCell', 'ParticipatesIn'],                                                  [$genomeID], 'ParticipatesIn(to-link)');
3165                                   "IsGenomeOf(from-link) = ? AND ParticipatesIn(variant-code) >= 0 AND ParticipatesIn(from-link) = ?",      # We loop through @roleData to build the hash.
                                  [$genomeID, $genomeID], ['HasSSCell(to-link)', 'HasSSCell(from-link)']);  
     # Now "@roleData" lists the spreadsheet cell and role for each of the genome's features.  
     # "@cellData" lists the subsystem name for each of the genome's spreadsheet cells. We  
     # link these two lists together to create the result. First, we want a hash mapping  
     # spreadsheet cells to subsystem names.  
     my %subHash = map { $_->[0] => $_->[1] } @cellData;  
     # We loop through @cellData to build the hash.  
3166      for my $roleEntry (@roleData) {      for my $roleEntry (@roleData) {
3167          # Get the data for this feature and cell.          # Get the data for this feature and cell.
3168          my ($fid, $cellID, $role) = @{$roleEntry};          my ($fid, $role, $subsys) = @{$roleEntry};
3169          # Check for a subsystem name.          Trace("Subsystem for $fid is $subsys.") if T(4);
3170          my $subsys = $subHash{$cellID};          # Check the subsystem;
3171          if ($subsys) {          if ($subs{$subsys}) {
3172                Trace("Subsystem found.") if T(4);
3173              # Insure this feature has an entry in the return hash.              # Insure this feature has an entry in the return hash.
3174              if (! exists $retVal{$fid}) { $retVal{$fid} = []; }              if (! exists $retVal{$fid}) { $retVal{$fid} = []; }
3175              # Merge in this new data.              # Merge in this new data.
# Line 3897  Line 3922 
3922      # Get the parameters.      # Get the parameters.
3923      my ($self, $featureID, $key, @values) = @_;      my ($self, $featureID, $key, @values) = @_;
3924      # Add the property using the attached attributes object.      # Add the property using the attached attributes object.
3925      $self->{_ca}->AddAttribute($featureID, $key, @values);      $self->ca->AddAttribute($featureID, $key, @values);
3926  }  }
3927    
3928  =head3 CheckGroupFile  =head3 CheckGroupFile
# Line 3984  Line 4009 
4009  sub CleanKeywords {  sub CleanKeywords {
4010      # Get the parameters.      # Get the parameters.
4011      my ($self, $searchExpression) = @_;      my ($self, $searchExpression) = @_;
4012      # Perform the standard cleanup.      # Get the stemmer.
4013      my $words = $self->ERDB::CleanKeywords($searchExpression);      my $stemmer = $self->GetStemmer();
4014      # Fix the periods in EC and TC numbers.      # Convert the search expression using the stemmer.
4015      $words =~ s/(\d+|\-)\.(\d+|-)\.(\d+|-)\.(\d+|-)/$1_$2_$3_$4/g;      my $retVal = $stemmer->PrepareSearchExpression($searchExpression);
     # Fix non-trailing periods.  
     $words =~ s/\.(\w)/_$1/g;  
     # Fix non-leading minus signs.  
     $words =~ s/(\w)[\-]/$1_/g;  
     # Fix the vertical bars and colons  
     $words =~ s/(\w)[|:](\w)/$1'$2/g;  
     # Now split up the list so that each keyword is in its own string. We keep the delimiters  
     # because they may contain boolean expression data.  
     my @words = split /([^A-Za-z'0-9_]+)/, $words;  
     # We'll convert the stemmable words into stems and re-assemble the result.  
     my $retVal = "";  
     for my $word (@words) {  
         my $stem = $self->Stem($word);  
         if (defined $stem) {  
             $retVal .= $stem;  
         } else {  
             $retVal .= $word;  
         }  
     }  
4016      Trace("Cleaned keyword list for \"$searchExpression\" is \"$retVal\".") if T(3);      Trace("Cleaned keyword list for \"$searchExpression\" is \"$retVal\".") if T(3);
4017      # Return the result.      # Return the result.
4018      return $retVal;      return $retVal;
4019  }  }
4020    
4021    =head3 GetSourceObject
4022    
4023        my $source = $erdb->GetSourceObject();
4024    
4025    Return the object to be used in creating load files for this database.
4026    
4027    =cut
4028    
4029    sub GetSourceObject {
4030        # Get access to the FIG code.
4031        require FIG;
4032        # Return a FIG object.
4033        return FIG->new();
4034    }
4035    
4036    =head3 SectionList
4037    
4038        my @sections = $erdb->SectionList($source);
4039    
4040    Return a list of the names for the different data sections used when loading this database.
4041    The default is an empty string, in which case there is only one section representing the
4042    entire database.
4043    
4044    =over 4
4045    
4046    =item source
4047    
4048    Source object used to access the data from which the database is loaded. This is the
4049    same object returned by L</GetSourceObject>; however, we ask the caller to pass it
4050    in as a parameter so that we don't end up creating multiple copies of a potentially
4051    expensive data structure.
4052    
4053    =item RETURN
4054    
4055    Returns a list of section names.
4056    
4057    =back
4058    
4059    =cut
4060    
4061    sub SectionList {
4062        # Get the parameters.
4063        my ($self, $source) = @_;
4064        # Ask the BaseSproutLoader for a section list.
4065        require BaseSproutLoader;
4066        my @retVal = BaseSproutLoader::GetSectionList($self, $source);
4067        # Return the list.
4068        return @retVal;
4069    }
4070    
4071    =head3 Loader
4072    
4073        my $groupLoader = $erdb->Loader($groupName, $source, $options);
4074    
4075    Return an [[ERDBLoadGroupPm]] object for the specified load group. This method is used
4076    by [[ERDBGeneratorPl]] to create the load group objects. If you are not using
4077    [[ERDBGeneratorPl]], you don't need to override this method.
4078    
4079    =over 4
4080    
4081    =item groupName
4082    
4083    Name of the load group whose object is to be returned. The group name is
4084    guaranteed to be a single word with only the first letter capitalized.
4085    
4086    =item source
4087    
4088    The source object used to access the data from which the load file is derived. This
4089    is the same object returned by L</GetSourceObject>; however, we ask the caller to pass
4090    it in as a parameter so that we don't end up creating multiple copies of a potentially
4091    expensive data structure.
4092    
4093    =item options
4094    
4095    Reference to a hash of command-line options.
4096    
4097    =item RETURN
4098    
4099    Returns an [[ERDBLoadGroupPm]] object that can be used to process the specified load group
4100    for this database.
4101    
4102    =back
4103    
4104    =cut
4105    
4106    sub Loader {
4107        # Get the parameters.
4108        my ($self, $groupName, $source, $options) = @_;
4109        # Compute the loader name.
4110        my $loaderClass = "${groupName}SproutLoader";
4111        # Pull in its definition.
4112        require "$loaderClass.pm";
4113        # Create an object for it.
4114        my $retVal = eval("$loaderClass->new(\$self, \$source, \$options)");
4115        # Insure it worked.
4116        Confess("Could not create $loaderClass object: $@") if $@;
4117        # Return it to the caller.
4118        return $retVal;
4119    }
4120    
4121    =head3 LoadGroupList
4122    
4123        my @groups = $erdb->LoadGroupList();
4124    
4125    Returns a list of the names for this database's load groups. This method is used
4126    by [[ERDBGeneratorPl]] when the user wishes to load all table groups. The default
4127    is a single group called 'All' that loads everything.
4128    
4129    =cut
4130    
4131    sub LoadGroupList {
4132        # Return the list.
4133        return qw(Genome Subsystem Annotation Property Source Reaction Synonym Drug Feature);
4134    }
4135    
4136    =head3 LoadDirectory
4137    
4138        my $dirName = $erdb->LoadDirectory();
4139    
4140    Return the name of the directory in which load files are kept. The default is
4141    the FIG temporary directory, which is a really bad choice, but it's always there.
4142    
4143    =cut
4144    
4145    sub LoadDirectory {
4146        # Get the parameters.
4147        my ($self) = @_;
4148        # Return the directory name.
4149        return $self->{dataDir};
4150    }
4151    
4152  =head2 Internal Utility Methods  =head2 Internal Utility Methods
4153    
4154    =head3 GetStemmer
4155    
4156        my $stermmer = $sprout->GetStemmer();
4157    
4158    Return the stemmer object for this database.
4159    
4160    =cut
4161    
4162    sub GetStemmer {
4163        # Get the parameters.
4164        my ($self) = @_;
4165        # Declare the return variable.
4166        my $retVal = $self->{stemmer};
4167        if (! defined $retVal) {
4168            # We don't have one pre-built, so we build and save it now.
4169            $retVal = BioWords->new(exceptions => "$FIG_Config::sproutData/Exceptions.txt",
4170                                     stops => "$FIG_Config::sproutData/StopWords.txt",
4171                                     cache => 1);
4172            $self->{stemmer} = $retVal;
4173        }
4174        # Return the result.
4175        return $retVal;
4176    }
4177    
4178  =head3 ParseAssignment  =head3 ParseAssignment
4179    
4180  Parse annotation text to determine whether or not it is a functional assignment. If it is,  Parse annotation text to determine whether or not it is a functional assignment. If it is,

Legend:
Removed from v.1.117  
changed lines
  Added in v.1.121

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3