[Bio] / Sprout / Sapling.pm Repository:
ViewVC logotype

Diff of /Sprout/Sapling.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.3, Thu Feb 5 07:16:16 2009 UTC revision 1.4, Mon Mar 2 22:29:03 2009 UTC
# Line 110  Line 110 
110  a huge operon that contains tens of thousands of base pairs, you'll still be  a huge operon that contains tens of thousands of base pairs, you'll still be
111  able to find it.  able to find it.
112    
113    =item maxSequenceLength
114    
115    The maximum number of base pairs allowed in a single DNA sequence. DNA sequences
116    are broken into segments to prevent excessively large genomes from clogging
117    memory during sequence resolution.
118    
119  =back  =back
120    
121  =head2 Special Methods  =head2 Special Methods
# Line 134  Line 140 
140  =cut  =cut
141    
142      use constant TUNING_DEFAULTS => {      use constant TUNING_DEFAULTS => {
143          maxLocationLength => 4000          maxLocationLength => 4000,
144            maxSequenceLength => 1000000,
145      };      };
146    
147  =head3 new  =head3 new
# Line 205  Line 212 
212    
213  =head2 Public Methods  =head2 Public Methods
214    
215    =head3 Taxonomy
216    
217        my @taxonomy = $sap->Taxonomy($genomeID);
218    
219    Return the full taxonomy of the specified genome, starting from the
220    domain downward. The returned values will be primary names, not taxonomy
221    IDs.
222    
223    =over 4
224    
225    =item genomeID
226    
227    ID of the genome whose taxonomy is desired. The genome does not need to exist
228    in the database: the version number will be lopped off and the result used as
229    an entry point into the taxonomy tree.
230    
231    =item RETURN
232    
233    Returns a list of taxonomy names, starting from the domain and moving
234    down to the node where the genome is attached.
235    
236    =back
237    
238    =cut
239    
240    sub Taxonomy {
241        # Get the parameters.
242        my ($self, $genomeID) = @_;
243        # Get the genome's taxonomic group.
244        my ($taxon) = split /\./, $genomeID, 2;
245        # We'll put the return data in here.
246        my @retVal;
247        # Loop until we hit a domain.
248        my $domainFlag;
249        while (! $domainFlag) {
250            # Get the data we need for this taxonomic group.
251            my ($taxonData) = $self->GetAll('TaxonomicGrouping IsInGroup',
252                                            'TaxonomicGrouping(id) = ?', [$taxon],
253                                            'domain scientific-name IsInGroup(to-link)');
254            # If we didn't find what we're looking for, then we have a problem. This
255            # would indicate a node below the domain level that doesn't have a parent
256            # or (more likely) an invalid input string.
257            if (! $taxonData) {
258                # Terminate the loop and trace a warning.
259                $domainFlag = 1;
260                Trace("Could not find node or parent for \"$taxon\".") if T(1);
261            } else {
262                # Extract the data for the current group. Note we overwrite our
263                # taxonomy ID with the ID of our parent, priming the next iteration
264                # of the loop.
265                my $name;
266                ($domainFlag, $name, $taxon) = @$taxonData;
267                # Put the current group's name in the return list.
268                unshift @retVal, $name;
269            }
270        }
271        # Return the result.
272        return @retVal;
273    }
274    
275    
276  =head3 GenomeHash  =head3 GenomeHash
277    
278      my $genomeHash = $sap->GenomeHash();      my $genomeHash = $sap->GenomeHash();
# Line 261  Line 329 
329    
330  =item RETURN  =item RETURN
331    
332  Returns an MD5 hash of the normalized subsystem name.  Returns a normalized subsystem name.
333    
334  =back  =back
335    
# Line 270  Line 338 
338  sub SubsystemID {  sub SubsystemID {
339      # Get the parameters.      # Get the parameters.
340      my ($self, $subName) = @_;      my ($self, $subName) = @_;
341      # Normalize the subsystem name. Spaces are converted to underscores,      # Normalize the subsystem name by converting underscores to spaces.
342      # and all letters are lower-cased.      my $retVal = $subName;
343      my $subNormalized = lc $subName;      $retVal =~ s/_/ /g;
     $subNormalized =~ s/\s+/_/g;  
     # Compute a hash of the normalized name.  
     my $retVal = ERDB::DigestKey($subNormalized);  
344      # Return the result.      # Return the result.
345      return $retVal;      return $retVal;
346  }  }
# Line 310  Line 375 
375          } else {          } else {
376              # No config file, so we ask the FIG object.              # No config file, so we ask the FIG object.
377              my $fig = $self->GetSourceObject();              my $fig = $self->GetSourceObject();
378              my @subs = $fig->all_subsystems();              my @subs = map { $self->SubsystemID($_) } $fig->all_subsystems();
379              %subHash = map { $_ => 1 } grep { $fig->usable_subsystem($_) } @subs;              %subHash = map { $_ => 1 } grep { $fig->usable_subsystem($_) } @subs;
380          }          }
381          # Store the subsystems in this object.          # Store the subsystems in this object.
# Line 408  Line 473 
473    
474  =item RETURN  =item RETURN
475    
476  Returns TRUE if the parameter matches the GLOBAL contant, else FALSE.  Returns TRUE if the parameter matches the GLOBAL constant, else FALSE.
477    
478  =back  =back
479    
# Line 534  Line 599 
599    
600  sub LoadGroupList {  sub LoadGroupList {
601      # Return the list.      # Return the list.
602      return qw(Genome Feature Subsystem);  ##TODO more sections      return qw(Genome Feature Subsystem Family Scenario); # ##TODO Model, Drug, Protein
603  }  }
604    
605  =head3 LoadDirectory  =head3 LoadDirectory

Legend:
Removed from v.1.3  
changed lines
  Added in v.1.4

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3