[Bio] / Sprout / SimBlocks.pm Repository:
ViewVC logotype

Diff of /Sprout/SimBlocks.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.3, Fri Jan 6 20:35:01 2006 UTC revision 1.7, Mon Feb 13 15:42:48 2006 UTC
# Line 11  Line 11 
11      use PageBuilder;      use PageBuilder;
12      use Genome;      use Genome;
13      use BasicLocation;      use BasicLocation;
14        use Overlap;
15      use FIG_Config;      use FIG_Config;
16        use FIG;
17    
18  =head1 Similarity Block Database  =head1 Similarity Block Database
19    
# Line 211  Line 213 
213      my ($class, $dbname, $dbType, $port) = @_;      my ($class, $dbname, $dbType, $port) = @_;
214      # Plug in the default values.      # Plug in the default values.
215      if (! $dbname) {      if (! $dbname) {
216          $dbname = $FIG_Config::simBlocksDB;          $dbname = DBName();
217      }      }
218      if (! $dbType) {      if (! $dbType) {
219          $dbType = $FIG_Config::dbms;          $dbType = $FIG_Config::dbms;
# Line 229  Line 231 
231      return $retVal;      return $retVal;
232  }  }
233    
234    =head3 DBName
235    
236    C<< my $name = SimBlocks::DBName; >>
237    
238    Return the name of the database. This is set from a config variable, but if the
239    variable is undefined a default value is used.
240    
241    =cut
242    
243    sub DBName {
244        my $retVal;
245        if (defined $FIG_Config::simBlocksDB) {
246            $retVal = $FIG_Config::simBlocksDB;
247        } else {
248            $retVal = "Blocks";
249        }
250        return $retVal;
251    }
252    
253  =head3 DefaultDistances  =head3 DefaultDistances
254    
255  C<< my $distances = DefaultDistances(); >>  C<< my $distances = DefaultDistances(); >>
# Line 744  Line 765 
765      while (my $region = $query->Fetch) {      while (my $region = $query->Fetch) {
766          # Determine this region's genome set. We only continue if the region is in          # Determine this region's genome set. We only continue if the region is in
767          # one of the sets.          # one of the sets.
768          my ($location) = $region->Value('Region(from-link)');          my ($location) = $region->Value('Region(id)');
769          my $genomeID = Genome::FromContig($location);          my $genomeID = Genome::FromContig($location);
770          if (exists $setMap{$genomeID}) {          if (exists $setMap{$genomeID}) {
771              my $setIndex = $setMap{$genomeID};              my $setIndex = $setMap{$genomeID};
# Line 823  Line 844 
844      return @retVal;      return @retVal;
845  }  }
846    
   
   
847  =head3 MergeDNA  =head3 MergeDNA
848    
849  C<< my ($groupSequence, $variance) = SimBlocks::MergeDNA($groupSequence, $newSequence); >>  C<< my ($groupSequence, $variance) = SimBlocks::MergeDNA($groupSequence, $newSequence); >>
# Line 1180  Line 1199 
1199      my $loc = BasicLocation->new($location);      my $loc = BasicLocation->new($location);
1200      # Determine the parameters needed to get the desired list of regions.      # Determine the parameters needed to get the desired list of regions.
1201      my ($left, $right, $contigID) = ($loc->Left, $loc->Right, $loc->Contig);      my ($left, $right, $contigID) = ($loc->Left, $loc->Right, $loc->Contig);
1202        Trace("Searching for regions near " . $loc->String) if T(4);
1203      # Ask for the regions in the section we want.      # Ask for the regions in the section we want.
1204      my @regions = $self->GetAll(['Region', 'IncludesRegion', 'GroupBlock'],      my @regions = $self->GetAll(['Region', 'IncludesRegion', 'GroupBlock'],
1205                                  'Region(end) >= ? AND Region(position) <= ? AND Region(contigID) = ?',                                  'Region(endpoint) >= ? AND Region(position) <= ? AND Region(contigID) = ?',
1206                                  [$left, $right, $contigID],                                  [$left, $right, $contigID],
1207                                  ['GroupBlock(id)', 'GroupBlock(pattern)',                                  ['GroupBlock(id)', 'GroupBlock(pattern)',
1208                                   'GroupBlock(len)',                                   'GroupBlock(len)',
1209                                   'Region(position)', 'Region(end)',                                   'Region(position)', 'Region(endpoint)',
1210                                   'Region(content)']);                                   'Region(content)']);
1211      # Loop through the regions found. For each region we will output a location string.      # Loop through the regions found. For each region we will output a location string.
1212      for my $regionData (@regions) {      for my $regionData (@regions) {
# Line 1243  Line 1263 
1263      return @retVal;      return @retVal;
1264  }  }
1265    
1266    =head3 GetFeatureBlockPieces
1267    
1268    C<< my @pieces = $simBlocks->GetFeatureBlockPieces($fig, \@featureIDs, $distance); >>
1269    
1270    Return a list of the block pieces within the specified distance of the
1271    specified features. This method essentially computes locations from
1272    the distance and the feature locations, then passes them to L/GetBlockPieces>.
1273    That method will return the sections of various similarity blocks that
1274    occur inside the the locations computed.
1275    
1276    =over 4
1277    
1278    =item fig
1279    
1280    A FIG-like object that can be used to convert features into locations.
1281    
1282    =item featureIDs
1283    
1284    Reference to a list of feature IDs. Blocks in the vicinity of any of the
1285    features are returned.
1286    
1287    =item distance
1288    
1289    Distance to search from the feature's actual location.
1290    
1291    =item RETURN
1292    
1293    Returns a list of block piece location objects. A block piece location object
1294    is a standard B<BasicLocation> object with a block ID instead of a contig ID.
1295    
1296    =back
1297    
1298    =cut
1299    #: Return Type @;
1300    sub GetFeatureBlockPieces {
1301        # Get the parameters.
1302        my ($self, $fig, $featureIDs, $distance) = @_;
1303        # Declare a hash for keeping the return data. This will weed out
1304        # duplicates, of which we expect plenty. We'll need to handle
1305        # overlaps later, however.
1306        my %retHash = ();
1307        # Loop through the features.
1308        for my $featureID (@{$featureIDs}) {
1309            # Get the feature's genome ID.
1310            my $genomeID = FIG::genome_of($featureID);
1311            # Get the feature's locations.
1312            my @locations = $fig->feature_location($featureID);
1313            # Loop through the locations, sticking the resulting pieces in the return
1314            # hash.
1315            for my $loc (@locations) {
1316                my $locObject = BasicLocation->new($loc);
1317                # Get the length of the contig in question.
1318                my $len = $fig->contig_ln($genomeID, $locObject->Contig);
1319                # Expand the location.
1320                $locObject->Widen($distance, $len);
1321                # Insure the location is Sprout-style;
1322                $locObject->FixContig($genomeID);
1323                # Get the desired block pieces.
1324                my @pieces = $self->GetBlockPieces($locObject);
1325                Trace(scalar(@pieces) . " pieces found for location $loc.") if T(4);
1326                # Put them in the hash.
1327                for my $piece (@pieces) {
1328                    $retHash{$piece} = 1;
1329                }
1330            }
1331        }
1332        # Now we have all the block pieces that occur in any one of our regions. The
1333        # next step is to merge adjacent and overlapping regions. First we convert
1334        # the pieces to location objects so we can sort them.
1335        my @retVal = ();
1336        for my $piece (keys %retHash) {
1337            my $loc = BasicLocation->new($piece);
1338            push @retVal, $loc;
1339        }
1340        Trace("Beginning sort.") if T(3);
1341        @retVal = sort { BasicLocation::Cmp($a,$b) } @retVal;
1342        Trace(scalar(@retVal) . " pieces found before overlap check.") if T(3);
1343        # Now the locations are sorted by block ID, start position, and descending
1344        # length. This means that if there's an overlap, the two overlapping
1345        # pieces will be next to each other.
1346        my $i = 0;
1347        while ($i < $#retVal) {
1348            # Check for overlap between this location and the next.
1349            my ($type, $len) = Overlap::CheckOverlap($retVal[$i], $retVal[$i+1]);
1350            if ($len == 0) {
1351                # Here there is no overlap, so we push ahead.
1352                $i++;
1353            } elsif ($type eq 'embedded') {
1354                # Here the second piece is inside the first, so we delete the
1355                # second.
1356                delete $retVal[$i+1];
1357            } else {
1358                # Here we have a normal overlap. Because all of our pieces
1359                # are forward locations, this means the second piece starts
1360                # before the end of the of the first. We set the end point
1361                # if the first to the end point of the second and delete the
1362                # second.
1363                $retVal[$i]->SetEnd($retVal[$i+1]->EndPoint);
1364                delete $retVal[$i+1];
1365            }
1366        }
1367        # Return the result.
1368        return @retVal;
1369    }
1370    
1371  =head3 WalkDNA  =head3 WalkDNA
1372    
1373  C<< my $blockPos = SimBlocks::WalkDNA($blockPos, $contigPos, $dna, $loc); >>  C<< my $blockPos = SimBlocks::WalkDNA($blockPos, $contigPos, $dna, $loc); >>

Legend:
Removed from v.1.3  
changed lines
  Added in v.1.7

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3