[Bio] / Sprout / SimBlocks.pm Repository:
ViewVC logotype

Diff of /Sprout/SimBlocks.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.7, Mon Feb 13 15:42:48 2006 UTC revision 1.11, Thu Dec 6 14:58:03 2007 UTC
# Line 174  Line 174 
174    
175  =head3 new  =head3 new
176    
177  C<< my $simdb = SimBlocks->new($dbname, $dbType, $port); >>      my $simdb = SimBlocks->new($dbname, $dbType, $port);
178    
179  Construct a new SimBlocks object connected to the specified database. If no  Construct a new SimBlocks object connected to the specified database. If no
180  database is specified, the default database indicated by C<FIG_Config::simBlocksDB>  database is specified, the default database indicated by C<FIG_Config::simBlocksDB>
# Line 185  Line 185 
185    
186  In almost every case, you will be calling  In almost every case, you will be calling
187    
188  C<< my $simdb = SimBlocks->new(); >>      my $simdb = SimBlocks->new();
189    
190  =over 4  =over 4
191    
# Line 226  Line 226 
226      # Get the data directory name.      # Get the data directory name.
227      my $directory = $FIG_Config::simBlocksData;      my $directory = $FIG_Config::simBlocksData;
228      # Create and bless the ERDB object.      # Create and bless the ERDB object.
229      my $retVal = ERDB::new($class, $dbh, "$directory/SimBlocksDBD.xml");      my $retVal = ERDB::new($class, $dbh, "$FIG_Config::fig/SimBlocksDBD.xml");
230      # Return it.      # Return it.
231      return $retVal;      return $retVal;
232  }  }
233    
234  =head3 DBName  =head3 DBName
235    
236  C<< my $name = SimBlocks::DBName; >>      my $name = SimBlocks::DBName;
237    
238  Return the name of the database. This is set from a config variable, but if the  Return the name of the database. This is set from a config variable, but if the
239  variable is undefined a default value is used.  variable is undefined a default value is used.
# Line 252  Line 252 
252    
253  =head3 DefaultDistances  =head3 DefaultDistances
254    
255  C<< my $distances = DefaultDistances(); >>      my $distances = DefaultDistances();
256    
257  Return the default distance matrix for computing the alignment distances (see  Return the default distance matrix for computing the alignment distances (see
258  also L</DistanceMatrix>. This matrix returns a distance of 0 between an insertion  also L</DistanceMatrix>. This matrix returns a distance of 0 between an insertion
# Line 284  Line 284 
284    
285  =head3 DBLoad  =head3 DBLoad
286    
287  C<< my $stats = $simBlocks->DBLoad($rebuild); >>      my $stats = $simBlocks->DBLoad($rebuild);
288    
289  Load the database from the default directory. This is essentially identical to  Load the database from the default directory. This is essentially identical to
290  a B<LoadTables> call with the default directory used instead of a caller-specified  a B<LoadTables> call with the default directory used instead of a caller-specified
# Line 319  Line 319 
319    
320  =head3 CompareGenomes  =head3 CompareGenomes
321    
322  C<< my (\%set0Blocks, \%set1Blocks, \%bothBlocks) = $simBlocks->CompareGenomes(\@set0, \@set1); >>      my (\%set0Blocks, \%set1Blocks, \%bothBlocks) = $simBlocks->CompareGenomes(\@set0, \@set1);
323    
324  Analyze two sets of genomes for commonalities. The group blocks returned will be divided  Analyze two sets of genomes for commonalities. The group blocks returned will be divided
325  into three hashes: one for those common to set 0 and not occurring at all in set 1, one  into three hashes: one for those common to set 0 and not occurring at all in set 1, one
326  for those common to set 1 and not occurring at all in set 0, and one for those common  for those common to set 1 and not occurring at all in set 0, and one for those common
327  to both sets. Each hash is keyed by group ID and will contain B<DBObject>s for  to both sets. Each hash is keyed by group ID and will contain B<ERDBObject>s for
328  B<GroupBlock> records with B<HasInstanceOf> data attached, though the genome ID in  B<GroupBlock> records with B<HasInstanceOf> data attached, though the genome ID in
329  the B<HasInstanceOf> section is not generally predictable.  the B<HasInstanceOf> section is not generally predictable.
330    
# Line 341  Line 341 
341  =item RETURN  =item RETURN
342    
343  Returns a triple of hashes. Each hash is keyed by group ID, and will contain  Returns a triple of hashes. Each hash is keyed by group ID, and will contain
344  B<DBObject>s for records in the B<GroupBlock> table. Groups found in all of the  B<ERDBObject>s for records in the B<GroupBlock> table. Groups found in all of the
345  genomes in set 0 but in none of the genomes of set 1 will be in the first hash,  genomes in set 0 but in none of the genomes of set 1 will be in the first hash,
346  groups found in all of the genomes in set 1 but in none of the genomes of set 0  groups found in all of the genomes in set 1 but in none of the genomes of set 0
347  will be in the second hash, and groups found in all of the genomes of both sets  will be in the second hash, and groups found in all of the genomes of both sets
# Line 401  Line 401 
401    
402  =head3 RemoveBlocks  =head3 RemoveBlocks
403    
404  C<< $simBlocks->RemoveBlocks(\%blockMap, \@set); >>      $simBlocks->RemoveBlocks(\%blockMap, \@set);
405    
406  Remove from the specified block map any blocks that occur in the specified set of genomes.  Remove from the specified block map any blocks that occur in the specified set of genomes.
407  The block map can contain any data, but it must be keyed by block ID.  The block map can contain any data, but it must be keyed by block ID.
# Line 440  Line 440 
440    
441  =head3 BlocksInSet  =head3 BlocksInSet
442    
443  C<< my %blockList = $simBlocks->BlocksInSet($set, $count); >>      my %blockList = $simBlocks->BlocksInSet($set, $count);
444    
445  Return a list of the group blocks found in a given number of the genomes in a given  Return a list of the group blocks found in a given number of the genomes in a given
446  set. The list returned will be a hash of B<DBObject>s, each corresponding to a single  set. The list returned will be a hash of B<ERDBObject>s, each corresponding to a single
447  B<GroupBlock> record, with a B<HasInstanceOf> record attached, though the content of  B<GroupBlock> record, with a B<HasInstanceOf> record attached, though the content of
448  the B<HasInstanceOf> record is not predictable. The hash will be keyed by block ID.  the B<HasInstanceOf> record is not predictable. The hash will be keyed by block ID.
449    
# Line 464  Line 464 
464    
465  =item RETURN  =item RETURN
466    
467  Returns a hash of B<DBObject>s corresponding to the group blocks found in the  Returns a hash of B<ERDBObject>s corresponding to the group blocks found in the
468  genomes of the set.  genomes of the set.
469    
470  =back  =back
# Line 488  Line 488 
488      for my $genomeID (@{$set}) {      for my $genomeID (@{$set}) {
489          # Get a list of group blocks for this genome.          # Get a list of group blocks for this genome.
490          my @blocks = $self->GetList(['HasInstanceOf', 'GroupBlock'],          my @blocks = $self->GetList(['HasInstanceOf', 'GroupBlock'],
491                                      "HasInstanceOf(from-link) = ?", $genomeID);                                      "HasInstanceOf(from-link) = ?", [$genomeID]);
492          # Loop through the blocks, storing any new ones in the hash.          # Loop through the blocks, storing any new ones in the hash.
493          for my $block (@blocks) {          for my $block (@blocks) {
494              # Get the ID of this block.              # Get the ID of this block.
# Line 518  Line 518 
518    
519  =head3 GetRegions  =head3 GetRegions
520    
521  C<< my %regions = $simBlocks->GetRegions($blockID, \@genomes); >>      my %regions = $simBlocks->GetRegions($blockID, \@genomes);
522    
523  Return the regions of the specified block that occur in the contigs of  Return the regions of the specified block that occur in the contigs of
524  the specified genomes. The return value is a hash of DNA strings keyed  the specified genomes. The return value is a hash of DNA strings keyed
# Line 553  Line 553 
553      my $regionCount = 0;      my $regionCount = 0;
554      # Query all the regions for the specified block.      # Query all the regions for the specified block.
555      my $query = $self->Get(['IncludesRegion', 'Region'], "IncludesRegion(from-link) = ?",      my $query = $self->Get(['IncludesRegion', 'Region'], "IncludesRegion(from-link) = ?",
556                             $blockID);                             [$blockID]);
557      # Loop through the query.      # Loop through the query.
558      while (my $region = $query->Fetch) {      while (my $region = $query->Fetch) {
559          # Get this region's data.          # Get this region's data.
# Line 574  Line 574 
574    
575  =head3 SetNumber  =head3 SetNumber
576    
577  C<< my $setNumber = SimBlocks::SetNumber($contigRegion, \@set0, \@set1, ..., \@setN); >>      my $setNumber = SimBlocks::SetNumber($contigRegion, \@set0, \@set1, ..., \@setN);
578    
579  Examine a region string, contig ID, or genome ID, and return the number of the genome  Examine a region string, contig ID, or genome ID, and return the number of the genome
580  set to which it belongs.  set to which it belongs.
# Line 620  Line 620 
620    
621  =head3 TagDNA  =head3 TagDNA
622    
623  C<< my $taggedDNA = SimBlocks::TagDNA($pattern, $dnaString, $prefix, $suffix); >>      my $taggedDNA = SimBlocks::TagDNA($pattern, $dnaString, $prefix, $suffix);
624    
625  Convert a DNA string from the B<Region> relation to the actual DNA.  Convert a DNA string from the B<Region> relation to the actual DNA.
626  The incoming DNA string will contain only the values corresponding to the  The incoming DNA string will contain only the values corresponding to the
# Line 695  Line 695 
695    
696  =head3 SnipScan  =head3 SnipScan
697    
698  C<< my %positions = $simBlocks->SnipScan($blockObject, \@set0, \@set1); >>      my %positions = $simBlocks->SnipScan($blockObject, \@set0, \@set1);
699    
700  Examine the specified block and return a list of the positions at which the  Examine the specified block and return a list of the positions at which the
701  nucleotide values for regions in the first genome set differ from the values  nucleotide values for regions in the first genome set differ from the values
# Line 709  Line 709 
709    
710  =item blockObject  =item blockObject
711    
712  A C<DBObject> representing the B<GroupBlock> record for the desired block or  A C<ERDBObject> representing the B<GroupBlock> record for the desired block or
713  the actual ID of the block whose regions are to be examined. It is expected that the  the actual ID of the block whose regions are to be examined. It is expected that the
714  block will have regions in all of the genomes for both sets, but this is not  block will have regions in all of the genomes for both sets, but this is not
715  required by the algorithm.  required by the algorithm.
# Line 738  Line 738 
738      # Get the parameters.      # Get the parameters.
739      my ($self, $blockObject, $set0, $set1) = @_;      my ($self, $blockObject, $set0, $set1) = @_;
740      # Convert an incoming block ID to a block object.      # Convert an incoming block ID to a block object.
741      if (ref $blockObject ne "DBObject") {      if (ref $blockObject ne "ERDBObject") {
742          $blockObject = $self->GetEntity('GroupBlock', $blockObject);          $blockObject = $self->GetEntity('GroupBlock', $blockObject);
743      }      }
744      # Get the ID and length of this block.      # Get the ID and length of this block.
# Line 760  Line 760 
760      }      }
761      # Ask for the regions in the block.      # Ask for the regions in the block.
762      my $query = $self->Get(['IncludesRegion', 'Region'], "IncludesRegion(from-link) = ?",      my $query = $self->Get(['IncludesRegion', 'Region'], "IncludesRegion(from-link) = ?",
763                             $blockID);                             [$blockID]);
764      # Loop through the regions.      # Loop through the regions.
765      while (my $region = $query->Fetch) {      while (my $region = $query->Fetch) {
766          # Determine this region's genome set. We only continue if the region is in          # Determine this region's genome set. We only continue if the region is in
# Line 807  Line 807 
807    
808  =head3 ParsePattern  =head3 ParsePattern
809    
810  C<< my @positions = SimBlocks::ParsePattern($pattern); >>      my @positions = SimBlocks::ParsePattern($pattern);
811    
812  Get a list of the positions of variance inside a block pattern. The  Get a list of the positions of variance inside a block pattern. The
813  positions of variance are marked by question marks, so all we need to  positions of variance are marked by question marks, so all we need to
# Line 846  Line 846 
846    
847  =head3 MergeDNA  =head3 MergeDNA
848    
849  C<< my ($groupSequence, $variance) = SimBlocks::MergeDNA($groupSequence, $newSequence); >>      my ($groupSequence, $variance) = SimBlocks::MergeDNA($groupSequence, $newSequence);
850    
851  Merge the DNA for a region into the group representation of similar DNA, returning the  Merge the DNA for a region into the group representation of similar DNA, returning the
852  result and the positions of variance. Positions of variance in the group representation  result and the positions of variance. Positions of variance in the group representation
# Line 909  Line 909 
909    
910  =head3 GetAlignment  =head3 GetAlignment
911    
912  C<< my %sequences = $$simBlocks->GetAlignment(\@blockIDs, \@genomeIDs, $indels); >>      my %sequences = $$simBlocks->GetAlignment(\@blockIDs, \@genomeIDs, $indels);
913    
914  Return an alignment of the specified genomes relative to the specified block  Return an alignment of the specified genomes relative to the specified block
915  IDs. Only blocks in which all of the genomes occur will produce output for  IDs. Only blocks in which all of the genomes occur will produce output for
# Line 1006  Line 1006 
1006    
1007  =head3 DistanceMatrix  =head3 DistanceMatrix
1008    
1009  C<< my %distances = SimBlocks::DistanceMatrix(\%sequences, \%distances); >>      my %distances = SimBlocks::DistanceMatrix(\%sequences, \%distances);
1010    
1011  Compute the distances between the sequences in an alignment. the L</SequenceDistance>  Compute the distances between the sequences in an alignment. the L</SequenceDistance>
1012  method is used to compute the individual distances.  method is used to compute the individual distances.
# Line 1066  Line 1066 
1066    
1067  =head3 SequenceDistance  =head3 SequenceDistance
1068    
1069  C<< my $dist = SimBlocks::SequenceDistance($seq1, $seq2, $distances); >>      my $dist = SimBlocks::SequenceDistance($seq1, $seq2, $distances);
1070    
1071  Return the distance between two sequences. The distance presumes that  Return the distance between two sequences. The distance presumes that
1072  each alignment is a vector of sorts, with purines (C<A>/C<T>) and pyrmidines (C<G>/C<C>)  each alignment is a vector of sorts, with purines (C<A>/C<T>) and pyrmidines (C<G>/C<C>)
# Line 1128  Line 1128 
1128    
1129  =head3 GetBlock  =head3 GetBlock
1130    
1131  C<< my $blockData = $simBlocks->GetBlock($blockID); >>      my $blockData = $simBlocks->GetBlock($blockID);
1132    
1133  Return a B<DBObject> for a specified group block.  Return a B<ERDBObject> for a specified group block.
1134    
1135  =over 4  =over 4
1136    
# Line 1140  Line 1140 
1140    
1141  =item RETURN  =item RETURN
1142    
1143  Returns a B<DBObject> for the group block in question. The object allows access to  Returns a B<ERDBObject> for the group block in question. The object allows access to
1144  all of the fields in the C<GroupBlock> relation of the database.  all of the fields in the C<GroupBlock> relation of the database.
1145    
1146  =back  =back
# Line 1158  Line 1158 
1158    
1159  =head3 GetBlockPieces  =head3 GetBlockPieces
1160    
1161  C<< my @blocks = $blockData->GetBlockPieces($location); >>      my @blocks = $blockData->GetBlockPieces($location);
1162    
1163  Return a map of the block pieces inside the specified location. The return  Return a map of the block pieces inside the specified location. The return
1164  value will be a list of block locations. A block location is essentially a  value will be a list of block locations. A block location is essentially a
# Line 1265  Line 1265 
1265    
1266  =head3 GetFeatureBlockPieces  =head3 GetFeatureBlockPieces
1267    
1268  C<< my @pieces = $simBlocks->GetFeatureBlockPieces($fig, \@featureIDs, $distance); >>      my @pieces = $simBlocks->GetFeatureBlockPieces($fig, \@featureIDs, $distance);
1269    
1270  Return a list of the block pieces within the specified distance of the  Return a list of the block pieces within the specified distance of the
1271  specified features. This method essentially computes locations from  specified features. This method essentially computes locations from
# Line 1370  Line 1370 
1370    
1371  =head3 WalkDNA  =head3 WalkDNA
1372    
1373  C<< my $blockPos = SimBlocks::WalkDNA($blockPos, $contigPos, $dna, $loc); >>      my $blockPos = SimBlocks::WalkDNA($blockPos, $contigPos, $dna, $loc);
1374    
1375  Location the desired position within a block of a specified location in a contig.  Location the desired position within a block of a specified location in a contig.
1376    

Legend:
Removed from v.1.7  
changed lines
  Added in v.1.11

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3