[Bio] / FigKernelPackages / ComparedRegions.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/ComparedRegions.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.4, Thu Jun 28 16:51:39 2007 UTC revision 1.5, Mon Jul 2 22:27:49 2007 UTC
# Line 5  Line 5 
5  use strict;  use strict;
6  use warnings;  use warnings;
7    
 use FIG;  
 use FIGV;  
8  use Tracer;  use Tracer;
9    use BasicLocation;
10    
11  sub get_compared_regions {  sub get_compared_regions {
12    my ($params) = @_;    my ($params) = @_;
# Line 29  Line 28 
28    # iterate over the returned pegs    # iterate over the returned pegs
29    foreach my $peg (@closest_pegs) {    foreach my $peg (@closest_pegs) {
30      my $loc = $fig->feature_location($peg);      my $loc = $fig->feature_location($peg);
31        my $genome = $fig->genome_of($peg);
32      my ($contig,$beg,$end) = $fig->boundaries_of($loc);      my ($contig,$beg,$end) = $fig->boundaries_of($loc);
33      if ($contig && $beg && $end) {      if ($contig && $beg && $end) {
34        my $mid = int(($beg + $end) / 2);        my $mid = int(($beg + $end) / 2);
35        my $min = int($mid - ($region_size / 2));        my $min = int($mid - ($region_size / 2));
36        my $max = int($mid + ($region_size / 2));        my $max = int($mid + ($region_size / 2));
37        my $features = [];        my $features = [];
38        my $feature_ids;        my $feature_ids = $fig->all_features_detailed($genome, $min, $max, $contig);
39        ($feature_ids, undef, undef) = $fig->genes_in_region($fig->genome_of($peg),$contig,$min,$max);        # "feature_ids" now contains a list of tuples. Each tuple consists of nine
40        foreach my $fid (@$feature_ids) {        # elements: (0) the feature ID, (1) the feature location (as a comma-delimited
41          my $floc = $fig->feature_location($fid);        # list of location specifiers), (2) the feature aliases (as a comma-delimited
42          my ($contig1,$beg1,$end1) = $fig->boundaries_of($fig->feature_location($fid));        # list of named aliases), (3) the feature type, (4) the leftmost index of the
43          # feature's leftmost location, (5) the rightmost index of the feature's
44          # rightmost location, (6) the current functional assignment, (7) the user who
45          # made the assignment, and (8) the quality of the assignment (which is usually a space).
46          foreach my $featureTuple (@$feature_ids) {
47            my $floc = $featureTuple->[1];
48            my ($contig1,$beg1,$end1) = $fig->boundaries_of($floc);
49          $beg1 = &in_bounds($min,$max,$beg1);          $beg1 = &in_bounds($min,$max,$beg1);
50          $end1 = &in_bounds($min,$max,$end1);          $end1 = &in_bounds($min,$max,$end1);
51          push(@$features, { 'start' => $beg1,          push(@$features, { 'start' => $beg1,
52                             'stop' => $end1,                             'stop' => $end1,
53                             'id' => $fid });                             'id' => $featureTuple->[0] });
54        }        }
55        push(@$data, { 'features' => $features,        push(@$data, { 'features' => $features,
56                       'contig' => $contig,                       'contig' => $contig,
# Line 102  Line 108 
108      $file = "$FIG_Config::temp/$file";      $file = "$FIG_Config::temp/$file";
109      # check if file exists      # check if file exists
110      (-e $file) || return undef;      (-e $file) || return undef;
   
111      my($line, %locations);      my($line, %locations);
112      open(HITS, "<$file") or die "could not open file '$file': $!";      open(HITS, "<$file") or die "could not open file '$file': $!";
113      $line = <HITS>;  # ignore column header line      $line = <HITS>;  # ignore column header line
114      while (defined($line = <HITS>)) {      while (defined($line = <HITS>)) {
115          my($hit) = split(/\s+/, $line);          my($hit) = split(/\s+/, $line);
116            Trace("Hit string is $hit.") if T(3);
117          if ($hit =~ /^fig\|(\S+)\.peg\.(\d+)_(\d+)\+(\d+)$/) {          my $locObject = BasicLocation->new($hit);
118            my $object = $locObject->Contig;
119            if ($object =~ /^fig\|/) {
120              # output from protein sequence scan              # output from protein sequence scan
121              my($org_id, $peg_num, $offset, $ln) = ($1, $2, $3, $4);              my($org_id, $offset, $ln) = ($fig->genome_of($object), $locObject->Begin, $locObject->Length);
122    
123              my $fid = 'fig|' . $org_id . '.peg.' . $peg_num;              my $fid = $object;
124              my $loc = $fig->feature_location($fid);              my $loc = $fig->feature_location($fid);
125    
126              if (not $fig->is_deleted_fid($fid))              if (not $fig->is_deleted_fid($fid))
# Line 159  Line 166 
166                      my $overlapping_hits = [];                      my $overlapping_hits = [];
167                      foreach my $hit (@{$patscan_hits->{$org_id}{$contig}}) {                      foreach my $hit (@{$patscan_hits->{$org_id}{$contig}}) {
168                          my($hit_beg, $hit_end) = @$hit;                          my($hit_beg, $hit_end) = @$hit;
169                          if ($fig->between($reg_beg, $hit_beg, $reg_end) or $fig->between($reg_beg, $hit_end, $reg_end)) {                          if (FIG::between($reg_beg, $hit_beg, $reg_end) or FIG::between($reg_beg, $hit_end, $reg_end)) {
170                              push @$overlapping_hits, [$hit_beg, $hit_end];                              push @$overlapping_hits, [$hit_beg, $hit_end];
171                          }                          }
172                      }                      }
# Line 179  Line 186 
186  sub add_features_in_regions {  sub add_features_in_regions {
187      my($fig, $regions) = @_;      my($fig, $regions) = @_;
188      # add features in region to data structure containing closest pegs      # add features in region to data structure containing closest pegs
189      foreach my $org_id (keys %$regions) {      my @regionList = keys %$regions;
190        Trace("Looking for features in " . scalar(@regionList) . " regions.") if T(3);
191        foreach my $org_id (@regionList) {
192            Trace("Processing genome $org_id.") if T(3);
193          foreach my $contig (keys %{$regions->{$org_id}}) {          foreach my $contig (keys %{$regions->{$org_id}}) {
194                Trace("Processing contig $contig.") if T(3);
195              foreach my $region (@{$regions->{$org_id}{$contig}}) {              foreach my $region (@{$regions->{$org_id}{$contig}}) {
196                  # get closest peg and boundaries of region                  # get closest peg and boundaries of region
197                  my $peg = $region->{'peg'};                  my $peg = $region->{'peg'};
198                  my $min = $region->{'reg_beg'};                  my $min = $region->{'reg_beg'};
199                  my $max = $region->{'reg_end'};                  my $max = $region->{'reg_end'};
200                    Trace("Region of interest is with $peg from $min to $max.") if T(3);
201                  # get list of features which fall in this region                  # get list of features which fall in this region
202                  my($feature_ids) = $fig->genes_in_region($fig->genome_of($peg),$contig,$min,$max);                  my($feature_ids) = $fig->genes_in_region($fig->genome_of($peg),$contig,$min,$max);
203    
# Line 194  Line 205 
205                  my $features = [];                  my $features = [];
206                  foreach my $fid (@$feature_ids) {                  foreach my $fid (@$feature_ids) {
207                      my $floc = $fig->feature_location($fid);                      my $floc = $fig->feature_location($fid);
208                      my ($contig1,$beg1,$end1) = $fig->boundaries_of($fig->feature_location($fid));                      my ($contig1,$beg1,$end1) = $fig->boundaries_of($floc);
209                      $beg1 = &in_bounds($min,$max,$beg1);                      $beg1 = &in_bounds($min,$max,$beg1);
210                      $end1 = &in_bounds($min,$max,$end1);                      $end1 = &in_bounds($min,$max,$end1);
211                      push(@$features, { 'feat_beg' => $beg1,                      push(@$features, { 'feat_beg' => $beg1,
212                                         'feat_end' => $end1,                                         'feat_end' => $end1,
213                                         'feat_id'  => $fid });                                         'feat_id'  => $fid });
214                  }                  }
215                    Trace(scalar(@$features) . " features found in region of interest.") if T(3);
216                  # add the feature data to the region                  # add the feature data to the region
217                  $region->{'features'} = $features;                  $region->{'features'} = $features;
218              }              }
219          }          }
220      }      }
221        Trace("Completed features-in-regions process.") if T(3);
222      return $regions;      return $regions;
223  }  }
224    
# Line 353  Line 364 
364    
365      # initialize data variable      # initialize data variable
366      my $regions = {};      my $regions = {};
367        Trace(scalar(@$closest_pegs) . " closest pegs coming in.") if T(3);
368      # iterate over the pegs      # iterate over the pegs
369      foreach my $peg (@$closest_pegs) {      foreach my $peg (@$closest_pegs) {
370          # get location of peg          # get location of peg
# Line 381  Line 392 
392          }          }
393      }      }
394    
395        Trace(scalar(keys %$regions) . " regions found.") if T(3);
396      # return regions data      # return regions data
397      return $regions;      return $regions;
398  }  }
# Line 388  Line 400 
400  sub get_closest_pegs {  sub get_closest_pegs {
401      my ($params) = @_;      my ($params) = @_;
402      # returns the n closest pegs, sorted by taxonomy      # returns the n closest pegs, sorted by taxonomy
   
403      # check parameters      # check parameters
404      my $fig       = $params->{'fig'} || return undef;      my $fig       = $params->{'fig'} || return undef;
405      my $peg       = $params->{'id'}  || return undef;      my $peg       = $params->{'id'}  || return undef;
     my $is_sprout = $params->{'is_sprout'} || 0;  
406      my $n         = $params->{'number_of_genomes'} || 50000; # return ALL closest pegs      my $n         = $params->{'number_of_genomes'} || 50000; # return ALL closest pegs
407        Trace("Looking for closest peg to $peg.") if T(3);
408    
409      # get the n pegs closest to the one passed      # get the n pegs closest to the one passed
410    
411      my($id2,$d,$peg2,$i);      my($id2,$d,$peg2,$i);
412    
413      my @closest;      my @closest;
     if ($is_sprout) {  
         @closest = map { $_->[0] } sort { $a->[1] <=> $b->[1] } $fig->bbhs($peg, 1.0e-10);  
     } else {  
414          @closest = map { $id2 = $_->id2; ($id2 =~ /^fig\|/) ? $id2 : () } $fig->sims($peg,&FIG::max(20,$n*4),1.0e-20,"fig",&FIG::max(20,$n*4));          @closest = map { $id2 = $_->id2; ($id2 =~ /^fig\|/) ? $id2 : () } $fig->sims($peg,&FIG::max(20,$n*4),1.0e-20,"fig",&FIG::max(20,$n*4));
     }  
415    
416      if (@closest >= ($n-1)) {      if (@closest >= ($n-1)) {
417          $#closest = $n-2 ;          $#closest = $n-2 ;
# Line 432  Line 439 
439      @closest = $fig->sort_fids_by_taxonomy(@closest);      @closest = $fig->sort_fids_by_taxonomy(@closest);
440    
441      unshift(@closest, $peg);      unshift(@closest, $peg);
442        Trace("Returning " . scalar(@closest) . " close pegs.") if T(3);
443      return \@closest;      return \@closest;
444  }  }
445    

Legend:
Removed from v.1.4  
changed lines
  Added in v.1.5

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3