[Bio] / FigKernelPackages / ComparedRegions.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/ComparedRegions.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5, Mon Jul 2 22:27:49 2007 UTC revision 1.6, Mon Jul 16 19:13:23 2007 UTC
# Line 109  Line 109 
109      # check if file exists      # check if file exists
110      (-e $file) || return undef;      (-e $file) || return undef;
111      my($line, %locations);      my($line, %locations);
112      open(HITS, "<$file") or die "could not open file '$file': $!";      Open(\*HITS, "<$file");
113      $line = <HITS>;  # ignore column header line      $line = <HITS>;  # ignore column header line
114      while (defined($line = <HITS>)) {      while (defined($line = <HITS>)) {
115          my($hit) = split(/\s+/, $line);          my($hit) = split(/\s+/, $line);
# Line 142  Line 142 
142          } else {          } else {
143              # output from DNA scan              # output from DNA scan
144              # this will need to be cleaned up depending on the format Bruce puts the output in              # this will need to be cleaned up depending on the format Bruce puts the output in
145              my($org_id, $loc) = split(":", $hit);              my($org_id) = split(":", $hit);
146              my($contig, $beg, $end) = $fig->boundaries_of($fig->feature_location($loc));              my($contig, $beg, $end) = ($locObject->Contig, $locObject->Begin, $locObject->EndPoint);
147              push @{$locations{$org_id}{$contig}}, [$beg, $end];              push @{$locations{$org_id}{$contig}}, [$beg, $end];
148          }          }
149      }      }
150      close(HITS) or die "could not close file '$file': $!";      close(HITS) or Confess("could not close file '$file': $!");
151      return \%locations;      return \%locations;
152  }  }
153    
# Line 155  Line 155 
155      my($fig, $regions, $patscan_hits) = @_;      my($fig, $regions, $patscan_hits) = @_;
156      # find scan_for_matches hits (from $patscan_hits) which overlap the regions      # find scan_for_matches hits (from $patscan_hits) which overlap the regions
157      # containing the closest pegs (from $peg_regions)      # containing the closest pegs (from $peg_regions)
158        my @regionKeys = keys %$regions;
159        Trace("Adding hits for " . scalar(@regionKeys) . " regions.") if T(3);
160        # Keep in here a list of the organisms that had hits.
161        my %hitOrgs = ();
162      # should probably sort regions and perform search intelligently      # should probably sort regions and perform search intelligently
163      foreach my $org_id (keys %$regions) {      foreach my $org_id (@regionKeys) {
164            Trace("Finding hits for $org_id.") if T(3);
165          foreach my $contig (keys %{$regions->{$org_id}}) {          foreach my $contig (keys %{$regions->{$org_id}}) {
166                Trace("Finding hits for $contig.") if T(3);
167              if (exists $patscan_hits->{$org_id} and exists $patscan_hits->{$org_id}{$contig}) {              if (exists $patscan_hits->{$org_id} and exists $patscan_hits->{$org_id}{$contig}) {
168                    Trace("Hits found on $contig.") if T(3);
169                    $hitOrgs{$org_id}{$contig} = 1;
170                  foreach my $region (@{$regions->{$org_id}{$contig}}) {                  foreach my $region (@{$regions->{$org_id}{$contig}}) {
171                      my($reg_beg, $reg_end) = ($region->{'reg_beg'}, $region->{'reg_end'});                      my($reg_beg, $reg_end) = ($region->{'reg_beg'}, $region->{'reg_end'});
172                      # build up list of hits which overlap the region                      # build up list of hits which overlap the region
# Line 170  Line 177 
177                              push @$overlapping_hits, [$hit_beg, $hit_end];                              push @$overlapping_hits, [$hit_beg, $hit_end];
178                          }                          }
179                      }                      }
180                        Trace(scalar(@$overlapping_hits) . " overlapping hits found between $reg_beg and $reg_end.") if T(3);
181                      if (@$overlapping_hits) {                      if (@$overlapping_hits) {
182                          # add the overlapping hits in the region                          # add the overlapping hits in the region
183                          $region->{'hits'} = $overlapping_hits;                          $region->{'hits'} = $overlapping_hits;
# Line 179  Line 186 
186              }              }
187          }          }
188      }      }
189        # Now delete the unused genomes and contigs.
190        for my $org_id (@regionKeys) {
191            if (! exists $hitOrgs{$org_id}) {
192                delete $regions->{$org_id};
193            } else {
194                my $contigHash = $regions->{$org_id};
195                my @contigKeys = keys %{$contigHash};
196                for my $contig (@contigKeys) {
197                    if (! exists $hitOrgs{$org_id}->{$contig}) {
198                        delete $contigHash->{$contig};
199                    }
200                }
201            }
202        }
203      return $regions;      return $regions;
204  }  }
205    
# Line 404  Line 424 
424      my $fig       = $params->{'fig'} || return undef;      my $fig       = $params->{'fig'} || return undef;
425      my $peg       = $params->{'id'}  || return undef;      my $peg       = $params->{'id'}  || return undef;
426      my $n         = $params->{'number_of_genomes'} || 50000; # return ALL closest pegs      my $n         = $params->{'number_of_genomes'} || 50000; # return ALL closest pegs
427      Trace("Looking for closest peg to $peg.") if T(3);      Trace("Looking for closest peg to $peg in $n genomes.") if T(3);
428        # Create a hash of legal genomes.
429        my %genomeIDs = map { $_ => 1 } $fig->genomes();
430    
431      # get the n pegs closest to the one passed      # get the n pegs closest to the one passed
432    
# Line 418  Line 440 
440      }      }
441      my %closest = map { $_ => 1 } @closest;      my %closest = map { $_ => 1 } @closest;
442    
     # there are dragons flying around...  
     my @pinned_to = grep { ($_ ne $peg) && (! $closest{$_}) } $fig->in_pch_pin_with($peg);  
443      my $g1 = $fig->genome_of($peg);      my $g1 = $fig->genome_of($peg);
444        # there are dragons flying around...
445        Trace("Checking pins.") if T(3);
446        my @pinned_to = grep { ($_ ne $peg) && (! $closest{$_}) && $genomeIDs{$fig->genome_of($_)} } $fig->in_pch_pin_with($peg);
447        Trace("Mapping by distance.") if T(3);
448      @pinned_to = map {$_->[1] } sort { $a->[0] <=> $b->[0] } map { $peg2 = $_; $d = $fig->crude_estimate_of_distance($g1,$fig->genome_of($peg2)); [$d,$peg2] } @pinned_to;      @pinned_to = map {$_->[1] } sort { $a->[0] <=> $b->[0] } map { $peg2 = $_; $d = $fig->crude_estimate_of_distance($g1,$fig->genome_of($peg2)); [$d,$peg2] } @pinned_to;
449        Trace("computing pin stuff.") if T(3);
450      if (@closest == ($n-1)) {      if (@closest == ($n-1)) {
451          $#closest = ($n - 2) - &FIG::min(scalar @pinned_to,int($n/2));          $#closest = ($n - 2) - &FIG::min(scalar @pinned_to,int($n/2));
452          for ($i=0; ($i < @pinned_to) && (@closest < ($n-1)); $i++) {          for ($i=0; ($i < @pinned_to) && (@closest < ($n-1)); $i++) {
# Line 432  Line 456 
456              }              }
457          }          }
458      }      }
459    #    Trace("Checking for extensions.") if T(3);
460      if ($fig->possibly_truncated($peg)) {  #    if ($fig->possibly_truncated($peg)) {
461          push(@closest, &possible_extensions($fig, $peg, \@closest));  #       push(@closest, &possible_extensions($fig, $peg, \@closest));
462      }  #    }
463        Trace("Sorting by taxonomy.") if T(3);
464      @closest = $fig->sort_fids_by_taxonomy(@closest);      @closest = $fig->sort_fids_by_taxonomy(@closest);
465    
466      unshift(@closest, $peg);      unshift(@closest, $peg);
# Line 458  Line 483 
483    $g = &FIG::genome_of($peg);    $g = &FIG::genome_of($peg);
484    
485    foreach $peg1 (@$closest_pegs) {    foreach $peg1 (@$closest_pegs) {
486        if ($g ne &genome_of($peg1)) {        if ($g ne $fig->genome_of($peg1)) {
487            foreach $sim ($fig->sims($peg1,500,1.0e-5,"all")) {            foreach $sim ($fig->sims($peg1,500,1.0e-5,"all")) {
488                $id2 = $sim->id2;                $id2 = $sim->id2;
489                if (($id2 ne $peg) && ($id2 =~ /^fig\|$g\./) && $fig->possibly_truncated($id2)) {                if (($id2 ne $peg) && ($id2 =~ /^fig\|$g\./) && $fig->possibly_truncated($id2)) {

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.6

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3