Parent Directory
|
Revision Log
build a set of solid rectangles
use strict; use Data::Dumper; use Carp; use SeedUtils; use GenerateClusters; # # This is a SAS Component # =head1 svr_build_solid_set -g CloseGenomes < clustersRel Produces a set of "solid rectangles" separated by "//" lines. Each group of lines is the set of rows in one rectangle. Each line in a rectangle begins with a genome ID, and is followed by an ordered set of tab-separated Pegs (one line per genome. The first line will be an input cluster from the input set (from the reference genome) ------ Example: svr_build_solid_set -g close.genomes < RefClusters > solid.rectangles =head2 Command-Line Options =over 4 =item -g GenomesToTest =back =head2 Output Format The output is a set of lines, one per genome projected to. Each line is a set of tab-separated Pegs from a single genome =cut use SeedUtils; use SAPserver; my $sapO = SAPserver->new(); use Getopt::Long; use GenerateClusters; my $usage = "usage: svr_build_solid_set -g CloseGenomesFile < clusters_from_reference_genome"; my $file; my $rc = GetOptions('g=s' => \$file); if (! $rc) { print STDERR $usage; exit } if (! $file) { die "you need to give a file of genomes -f argument" } my @genomes = map { ($_ =~ /^(\S+)/) ? $1 : () } `cat $file`; print STDERR scalar @genomes, " genomes to check\n"; my @sets; my $last = <STDIN>; while ($last && ($last =~ /^(\S+)\t(\S+)/)) { my $curr = $1; my $set = []; while ($last && ($last =~ /^(\S+)\t(\S+)/) && ($1 eq $curr)) { push(@$set,$2); $last = <STDIN>; } push(@sets,$set); } print STDERR scalar @sets, " sets were read\n"; my $ref = &SeedUtils::genome_of($sets[0]->[0]); print STDERR "reference genome = $ref\n"; my $corrH = &GenerateClusters::bbhs($ref,\@genomes,$sapO); print STDERR "read all correspondence tables\n"; foreach my $set (@sets) { print join("\t",($ref,@$set)),"\n"; foreach my $g (@genomes) { my $instance = &project($set,$ref,$g,$corrH,$sapO); if ($instance) { print join("\t",($g,@$instance)),"\n"; } } print "//\n"; } sub project { my($set,$ref,$g,$corrH,$sapO) = @_; my @set2 = map { ($corrH->{$g}->{$_}) ? $corrH->{$g}->{$_} : () } @$set; if (@set2 == @$set) { my $sz = &sz_of_cluster($set,$sapO); my $sz2 = &sz_of_cluster(\@set2,$sapO); if ((($sz2-$sz) / $sz) < 0.2) { return \@set2; } } return undef; } sub sz_of_cluster { my($set,$sapO) = @_; my $locH = $sapO->fid_locations( -ids => $set, -boundaries => 1); my $min; my $max; foreach my $peg (keys(%$locH)) { my $loc = $locH->{$peg}; if ($loc && ($loc =~ /^.*:(\S+)_(\d+)[+-](\d+)$/)) { my($contig,$beg,$strand,$len) = ($1,$2,$3,$4); if ($strand eq '+') { if ((! $min) || ($min > $beg)) { $min = $beg } if ((! $max) || ($max < ($beg + $len - 1))) { $max = $beg + $len - 1 } } else { if ((! $min) || ($min > ($beg - ($len - 1)))) { $min = $beg-($len - 1) } if ((! $max) || ($max < $beg)) { $max = $beg } } } } return ($max - $min) + 1; }
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |