[Bio] / Sprout / Sapling.pm Repository:
ViewVC logotype

Diff of /Sprout/Sapling.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.7, Mon May 4 18:49:49 2009 UTC revision 1.8, Thu May 28 18:08:56 2009 UTC
# Line 65  Line 65 
65    
66  =back  =back
67    
68  =head2 Configuration  =head2 Configuration and Construction
69    
70  The default loading profile for the Sapling database is to include all complete  The default loading profile for the Sapling database is to include all complete
71  genomes and all usable subsystems. This can be overridden by specifying a list of  genomes and all usable subsystems. This can be overridden by specifying a list of
# Line 118  Line 118 
118    
119  =back  =back
120    
 =head2 Special Methods  
   
121  =head3 Global Section Constant  =head3 Global Section Constant
122    
123  Each section of the database used by the loader corresponds to a single genome.  Each section of the database used by the loader corresponds to a single genome.
# Line 210  Line 208 
208      return $retVal;      return $retVal;
209  }  }
210    
   
211  =head2 Public Methods  =head2 Public Methods
212    
213    =head3 IdentifiedProtein
214    
215        my $proteinID = $sap->IdentifiedProtein($id);
216    
217    Compute the protein for a specified identifier. If the identifier does
218    not exist or does not identify a protein, this method will return
219    C<undef>.
220    
221    =over 4
222    
223    =item id
224    
225    Identifier whose protein is desired.
226    
227    =item RETURN
228    
229    Returns the protein ID corresponding to the incoming identifier,
230    or C<undef> if the identifier does not exist or is not for a protein.
231    
232    =back
233    
234    =cut
235    
236    sub IdentifiedProtein {
237        # Get the parameters.
238        my ($self, $id) = @_;
239        # Declare the return variable.
240        my $retVal;
241        # Try to find a protein for this ID.
242        my ($proteinID) = $self->GetFlat("Identifier Names ProteinSequence",
243                                         "Identifier(id) = ?", [$id],
244                                         'ProteinSequence(id)');
245        if (defined $proteinID) {
246            # We found one, so we're done.
247            $retVal = $proteinID;
248        } else {
249            # Not a protein ID. See if it's the ID of a feature that has a
250            # protein connected. Note that it's possible to find more than one,
251            # but we're going to punt and pick the first.
252            ($proteinID) = $self->GetFlat("Identifier Identifies Feature Produces ProteinSequence",
253                                          "Identifier(id) = ? LIMIT 1", [$id],
254                                          'ProteinSequence(id)');
255            if (defined $proteinID) {
256                # We found a protein ID, so return it.
257                $retVal = $proteinID;
258            }
259        }
260        # Return the result.
261        return $retVal;
262    }
263    
264    
265    
266    
267    =head3 GenesInRegion
268    
269        my @pegs = $sap->GenesInRegion($location);
270    
271    Return a list of the IDs for the features that overlap the specified
272    region on a contig.
273    
274    =over 4
275    
276    =item location
277    
278    Location of interest, either in the form of a location string (e.g.
279    C<360108.3:NZ_AANK01000002_264528_264007>)  or a [[BasicLocationPm]]
280    object.
281    
282    =item RETURN
283    
284    Returns a list of feature IDs. The features in the list will be all
285    those that overlap or occur inside the location of interest.
286    
287    =back
288    
289    =cut
290    
291    sub GenesInRegion {
292        # Get the parameters.
293        my ($self, $location) = @_;
294        # Insure we have a location object.
295        my $locObject = (ref $location ? $location : BasicLocation->new($location));
296        # Get the beginning and the end of the location of interest.
297        my $begin = $locObject->Left();
298        my $end = $locObject->Right();
299        # For performance reasons, we limit the possible starting location, using the
300        # tuning parameter for maximum location length.
301        my $limit = $begin - $self->TuningParameter('maxLocationLength');
302        # Perform the query. Note we use a hash to eliminate duplicates.
303        my %retVal = map { $_ => 1 } $self->GetFlat('Contig IsLocusFor Feature',
304                                    "Contig(id) = ? AND IsLocusFor(begin) <= ? AND " .
305                                    "IsLocusFor(begin) > ? AND " .
306                                    "IsLocusFor(begin) + IsLocusFor(len) >= ?",
307                                    [$locObject->Contig(), $end, $limit, $begin],
308                                    'Feature(id)');
309        # Return the result.
310        return sort keys %retVal;
311    }
312    
313    =head3 GetFasta
314    
315        my $fasta = $sapling->GetFasta($proteinID, $id);
316    
317    Return a FASTA sequence for the specified protein. An optional identifier
318    can be provided to be used as the identification string.
319    
320    =over 4
321    
322    =item proteinID
323    
324    Protein sequence identifier.
325    
326    =item id (optional)
327    
328    The identifier to be used in the FASTA output. If omitted, the protein ID
329    is used.
330    
331    =item RETURN
332    
333    Returns a FASTA string for the protein. This includes the identification
334    line, the protein letters themselves, and the trailer line.
335    
336    =back
337    
338    =cut
339    
340    sub GetFasta {
341        # Get the parameters.
342        my ($self, $proteinID, $id) = @_;
343        # Compute the identifier.
344        my $realID = $id || "md5|$proteinID";
345        # Declare the return variable.
346        my $retVal;
347        # Get the protein sequence.
348        my ($sequence) = $self->GetFlat("ProteinSequence", "ProteinSequence(id) = ?",
349                                        [$proteinID], "sequence");
350        # It's an error if the sequence was not found.
351        if (! defined $sequence) {
352            Confess("No protein found with the sequence identifier $proteinID.");
353        } else {
354            # Bust the sequence into 60-character chunks.
355            my @chunks = grep { $_ } split /(.{1,60})/, $sequence;
356            # Put it together to make the FASTA string. Note that we force a new-line
357            # at the end.
358            $retVal = join("\n", ">$realID", @chunks, "//", "");
359        }
360        # Return the result.
361        return $retVal;
362    }
363    
364    
365  =head3 Taxonomy  =head3 Taxonomy
366    
367      my @taxonomy = $sap->Taxonomy($genomeID);      my @taxonomy = $sap->Taxonomy($genomeID);
# Line 299  Line 448 
448          } else {          } else {
449              # No, so get the genome list.              # No, so get the genome list.
450              my $fig = $self->GetSourceObject();              my $fig = $self->GetSourceObject();
451              my @genomes = $fig->genomes(1);              my @genomes = $fig->genomes();
452              # Verify the genome list to insure every genome has an organism              # Verify the genome list to insure every genome has an organism
453              # directory.              # directory.
454              for my $genome (@genomes) {              for my $genome (@genomes) {
# Line 376  Line 525 
525          } else {          } else {
526              # No config file, so we ask the FIG object.              # No config file, so we ask the FIG object.
527              my $fig = $self->GetSourceObject();              my $fig = $self->GetSourceObject();
528              my @subs = map { $self->SubsystemID($_) } $fig->all_subsystems();              %subHash = map { $self->SubsystemID($_) => 1 } $fig->all_subsystems();
             %subHash = map { $_ => 1 } grep { $fig->usable_subsystem($_) } @subs;  
529          }          }
530          # Store the subsystems in this object.          # Store the subsystems in this object.
531          $self->{subHash} = \%subHash;          $self->{subHash} = \%subHash;
# Line 600  Line 748 
748    
749  sub LoadGroupList {  sub LoadGroupList {
750      # Return the list.      # Return the list.
751      return qw(Genome Feature Subsystem Family Scenario Model); # ##TODO Drug, Protein      return qw(Protein Genome Feature Subsystem Family Scenario Model); # ##TODO Drug
752  }  }
753    
754  =head3 LoadDirectory  =head3 LoadDirectory

Legend:
Removed from v.1.7  
changed lines
  Added in v.1.8

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3