[Bio] / Sprout / Sapling.pm Repository:
ViewVC logotype

Diff of /Sprout/Sapling.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.10, Tue Jun 16 21:58:36 2009 UTC revision 1.11, Tue Jun 30 19:53:51 2009 UTC
# Line 21  Line 21 
21    
22      use strict;      use strict;
23      use Tracer;      use Tracer;
24      use base 'ERDB';      use base qw(ERDB);
25      use Stats;      use Stats;
26      use DBKernel;      use DBKernel;
27        use SeedUtils;
28        use BasicLocation;
29      use XML::Simple;      use XML::Simple;
30    
31  =head1 Sapling Package  =head1 Sapling Package
# Line 32  Line 34 
34    
35  =head2 Introduction  =head2 Introduction
36    
37  The Sapling database is a new [[ErdbPm]] database that attempts to encapsulate  The Sapling database is a new Entity-Relationship Database that attempts to
38  our data in a portable form for distribution. It is loaded directly from the  encapsulate our data in a portable form for distribution. It is loaded directly
39  complete genomes and trusted subsystems of the SEED. This object has minimal  from the genomes and subsystems of the SEED. This object has minimal
40  capabilities: in essence, it's just enough to get the database loaded and  capabilities: most of its power comes the L<ERDB> base class.
 working. As with the earlier Sprout database, most of the work required to use  
 the database can be performed using the base-class methods.  
41    
42  The fields in this object are as follows.  The fields in this object are as follows.
43    
# Line 49  Line 49 
49    
50  =item loaderSource  =item loaderSource
51    
52  Source object for the loaders (a [[FigPm]] in our case).  Source object for the loaders (a L<FIG> in our case).
53    
54  =item genomeHash  =item genomeHash
55    
# Line 210  Line 210 
210    
211  =head2 Public Methods  =head2 Public Methods
212    
213    =head3 ComputeDNA
214    
215        my $dna = $sap->ComputeDNA($location);
216    
217    Return the DNA sequence for the specified location.
218    
219    =over 4
220    
221    =item location
222    
223    A L<BasicLocation> object indicating the contig, start location, direction, and
224    length of the desired DNA segment.
225    
226    =item RETURN
227    
228    Returns a string containing the desired DNA.
229    
230    =back
231    
232    =cut
233    
234    sub ComputeDNA {
235        # Get the parameters.
236        my ($self, $location) = @_;
237        # Get the contig, left end, and right end of the location.
238        my $contig = $location->Contig;
239        my $left = $location->Left;
240        my $right = $location->Right;
241        # Get the DNA segment length.
242        my $maxSequenceLength = $self->TuningParameter("maxSequenceLength");
243        # Compute the key of the first segment of our DNA and the starting
244        # point in that segment.
245        my $leftOffset = $left % $maxSequenceLength;
246        my $leftKey = "$contig:" . Tracer::Pad(($left - $leftOffset)/$maxSequenceLength,
247                                            7, 1, '0');
248        # Compute the key of the last segment containing our DNA.
249        my $rightKey = "$contig:" . Tracer::Pad(int($right/$maxSequenceLength), 7, 1, '0');
250        my @results = $self->GetFlat("DNASequence",
251                                     'DNASequence(id) <= ? AND DNASequence(id) >= ?',
252                                     [$leftKey, $rightKey], 'sequence');
253        # Form all the DNA into a string and extract our piece.
254        my $retVal = substr(join("", @results), $leftOffset, $location->Length);
255        # If this is a backwards string, we need the reverse complement.
256        rev_comp(\$retVal) if $location->Dir eq '-';
257        # Return the result.
258        return $retVal;
259    }
260    
261    =head3 GetLocations
262    
263        my @locs = $sapling->GetLocations($fid);
264    
265    Return the locations of the DNA for the specified feature.
266    
267    =over 4
268    
269    =item fid
270    
271    ID of the feature whose location is desired.
272    
273    =item RETURN
274    
275    Returns a list of L<BasicLocation> objects for the locations containing the
276    feature's DNA.
277    
278    =back
279    
280    =cut
281    
282    sub GetLocations {
283        # Get the parameters.
284        my ($self, $fid) = @_;
285        # Declare the return variable.
286        my @retVal;
287        # Get this feature's locations.
288        my $qh = $self->Get("IsLocatedIn",
289                           'IsLocatedIn(from-link) = ? ORDER BY IsLocatedIn(ordinal)',
290                           [$fid]);
291        while (my $resultRow = $qh->Fetch()) {
292            # Compute the contig ID and other information.
293            my $contig = $resultRow->PrimaryValue('to-link');
294            my $begin = $resultRow->PrimaryValue('begin');
295            my $dir = $resultRow->PrimaryValue('dir');
296            my $len = $resultRow->PrimaryValue('len');
297            # Create a location from the location information.
298            my $start = ($dir eq '+' ? $begin : $begin + $len - 1);
299            # Push it on the result list.
300            push @retVal, BasicLocation->new("${contig}_$start$dir$len");
301        }
302        # Return the result.
303        return @retVal;
304    }
305    
306    
307  =head3 IdentifiedProtein  =head3 IdentifiedProtein
308    
309      my $proteinID = $sap->IdentifiedProtein($id);      my $proteinID = $sap->IdentifiedProtein($id);
# Line 339  Line 433 
433    
434      my $ssData = $sapling->GetSubsystem($ssName);      my $ssData = $sapling->GetSubsystem($ssName);
435    
436  Return a [[SaplingSubsysPm]] object for the named subsystem.  Return a L<SaplingSubsys> object for the named subsystem.
437    
438  =over 4  =over 4
439    
# Line 379  Line 473 
473  =item location  =item location
474    
475  Location of interest, either in the form of a location string (e.g.  Location of interest, either in the form of a location string (e.g.
476  C<360108.3:NZ_AANK01000002_264528_264007>)  or a [[BasicLocationPm]]  C<360108.3:NZ_AANK01000002_264528_264007>)  or a L<BasicLocation>
477  object.  object.
478    
479  =item RETURN  =item RETURN
# Line 448  Line 542 
542  sub GetFasta {  sub GetFasta {
543      # Get the parameters.      # Get the parameters.
544      my ($self, $proteinID, $id, $comment) = @_;      my ($self, $proteinID, $id, $comment) = @_;
545      # Compute the identifier and comment.      # Compute the identifier.
546      my $realID = $id || "md5|$proteinID";      my $realID = $id || "md5|$proteinID";
     my $realComment = (defined $comment ? " $comment" : "");  
547      # Declare the return variable.      # Declare the return variable.
548      my $retVal;      my $retVal;
549      # Get the protein sequence.      # Get the protein sequence.
# Line 460  Line 553 
553      if (! defined $sequence) {      if (! defined $sequence) {
554          Confess("No protein found with the sequence identifier $proteinID.");          Confess("No protein found with the sequence identifier $proteinID.");
555      } else {      } else {
556          # Bust the sequence into 60-character chunks.          # Create a FASTA string for the protein.
557          my @chunks = grep { $_ } split /(.{1,60})/, $sequence;          $retVal = SeedUtils::create_fasta_record($realID, $comment, $sequence);
         # Put it together to make the FASTA string. Note that we force a new-line  
         # at the end.  
         $retVal = join("\n", ">$realID$realComment", @chunks, "//", "");  
558      }      }
559      # Return the result.      # Return the result.
560      return $retVal;      return $retVal;
# Line 747  Line 837 
837    
838  =head2 Virtual Methods  =head2 Virtual Methods
839    
840    =head3 PreferredName
841    
842        my $name = $erdb->PreferredName();
843    
844    Return the variable name to use for this database when generating code.
845    
846    =cut
847    
848    sub PreferredName {
849        return 'sap';
850    }
851    
852  =head3 GetSourceObject  =head3 GetSourceObject
853    
854      my $source = $erdb->GetSourceObject();      my $source = $erdb->GetSourceObject();
855    
856  Return the object to be used in creating load files for this database. This is  Return the object to be used in creating load files for this database. This is
857  only the default source object. Loaders have the option of overriding the chosen  only the default source object. Loaders have the option of overriding the chosen
858  source object when constructing the [[ERDBLoadGroupPm]] objects.  source object when constructing the L<ERDBLoadGroup> objects.
859    
860  =cut  =cut
861    
# Line 798  Line 900 
900    
901      my $groupLoader = $erdb->Loader($groupName, $source, $options);      my $groupLoader = $erdb->Loader($groupName, $source, $options);
902    
903  Return an [[ERDBLoadGroupPm]] object for the specified load group. This method is used  Return an L<ERDBLoadGroup> object for the specified load group. This method is used
904  by [[ERDBGeneratorPl]] to create the load group objects. If you are not using  by L<ERDBGenerator.pl> to create the load group objects. If you are not using
905  [[ERDBGeneratorPl]], you don't need to override this method.  L<ERDBGenerator.pl>, you don't need to override this method.
906    
907  =over 4  =over 4
908    
# Line 823  Line 925 
925    
926  =item RETURN  =item RETURN
927    
928  Returns an [[ERDBLoadGroupPm]] object that can be used to process the specified load group  Returns an L<ERDBLoadGroup> object that can be used to process the specified load group
929  for this database.  for this database.
930    
931  =back  =back
# Line 850  Line 952 
952      my @groups = $erdb->LoadGroupList();      my @groups = $erdb->LoadGroupList();
953    
954  Returns a list of the names for this database's load groups. This method is used  Returns a list of the names for this database's load groups. This method is used
955  by [[ERDBGeneratorPl]] when the user wishes to load all table groups. The default  by L<ERDBGenerator.pl> when the user wishes to load all table groups. The default
956  is a single group called 'All' that loads everything.  is a single group called 'All' that loads everything.
957    
958  =cut  =cut

Legend:
Removed from v.1.10  
changed lines
  Added in v.1.11

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3