[Bio] / FigKernelScripts / p3-find-couples.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/p3-find-couples.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5, Sat Oct 20 01:59:13 2018 UTC revision 1.6, Wed Oct 24 19:06:28 2018 UTC
# Line 108  Line 108 
108  $catCol = P3Utils::find_column($catCol, $outHeaders);  $catCol = P3Utils::find_column($catCol, $outHeaders);
109  # Form the full header set and write it out.  # Form the full header set and write it out.
110  if (! $opt->nohead) {  if (! $opt->nohead) {
111      my @headers = ("$outHeaders->[$catCol]1", "$outHeaders->[$catCol]2", 'count', 'percent');      my @headers = ("$outHeaders->[$catCol]1", "$outHeaders->[$catCol]2", 'count', 'percent', 'found1', 'found2');
112      P3Utils::print_cols(\@headers);      P3Utils::print_cols(\@headers);
113  }  }
114  # These are used for status messages.  # These are used for status messages.
# Line 157  Line 157 
157              # Compute the genome ID.              # Compute the genome ID.
158              my ($genomeID) = ($fid =~ /(\d+\.\d+)/);              my ($genomeID) = ($fid =~ /(\d+\.\d+)/);
159              # Put the feature in the hash.              # Put the feature in the hash.
160              push @{$contigs{"$genomeID:$sequence"}}, [$category, $start, $end];              push @{$contigs{"$genomeID:$sequence"}}, [$category, $start, $end, $fid];
161                $catCounts{$category}++;
162          }          }
163      }      }
164      print STDERR "$count features processed.\n" if $debug && $count % $period == 0;      print STDERR "$count features processed.\n" if $debug && $count % $period == 0;
# Line 165  Line 166 
166  # Now we have category and position data for each feature sorted by sequence.  # Now we have category and position data for each feature sorted by sequence.
167  # For each list, we sort by start position and figure out what qualifies as a couple.  # For each list, we sort by start position and figure out what qualifies as a couple.
168  # The couples are counted in this hash, which is keyed by "element1\telement2".  # The couples are counted in this hash, which is keyed by "element1\telement2".
169    open(my $oh, ">coupleDebug.log") || die "Could not open debug file: $!"; ##TODO debug
170  my %couples;  my %couples;
171  print STDERR scalar(keys %contigs) . " contigs ready to examine.\n" if $debug;  print STDERR scalar(keys %contigs) . " contigs ready to examine.\n" if $debug;
172    my ($contigCount, $fidCount) = (0,0);
173  for my $contig (sort keys %contigs) {  for my $contig (sort keys %contigs) {
174        $contigCount++;
175      my @features = sort { $a->[1] <=> $b->[1] } @{$contigs{$contig}};      my @features = sort { $a->[1] <=> $b->[1] } @{$contigs{$contig}};
     print STDERR "Processing $contig with " . scalar(@features) . " features.\n" if $debug;  
176      # We process one feature at a time, and stop when there are none left      # We process one feature at a time, and stop when there are none left
177      # to couple with it.      # to couple with it.
178      my $feat = shift @features;      my $feat = shift @features;
179      while (scalar @features) {      while (scalar @features) {
180          my $cat1 = $feat->[0];          $fidCount++;
181          $catCounts{$cat1}++;          my ($cat1, $start, $end, $fid) = @$feat;
182          # Compute the latest start position that qualifies as a couple.          # Compute the latest start position that qualifies as a couple.
183          my $limit = $feat->[2] + $maxGap;          my $limit = $end + $maxGap;
184          # Loop through the remaining features until we hit the limit.          # Loop through the remaining features until we hit the limit.
185          for my $other (@features) { last if $other->[1] > $limit;          for my $other (@features) { last if $other->[1] > $limit;
186              if ($cat1 ne $other->[0]) {              my ($cat2, $s2, $e2, $fid2) = @$other;
187                  my $couple = join("\t", sort ($cat1, $other->[0]));              if ($cat1 ne $cat2 && $fid ne $fid2) {
188                    my $couple = join("\t", sort ($cat1, $cat2));
189                    if ($couple eq "LighHarvLhiiAlph8\tLighHarvLhiiBeta8") { print $oh "$fid and $fid2 at $start and $s2 on $contig.\n"; }
190                  $couples{$couple}++;                  $couples{$couple}++;
191              }              }
192          }          }
193          # Get the next feature.          # Get the next feature.
194          $feat = shift @features;          $feat = shift @features;
195      }      }
196        print STDERR "$contigCount contigs processed with $fidCount features.\n" if $debug && $contigCount % 1000 == 0;
197  }  }
198  # Sort the couples and output them.  # Sort the couples and output them.
199  print STDERR "Writing couples.\n" if $debug;  print STDERR "Analyzing couples.\n" if $debug;
200  my @couples = sort { $couples{$b} <=> $couples{$a} } grep { $couples{$_} >= $minCount } keys %couples;  my @couples = grep { $couples{$_} >= $minCount } keys %couples;
201  for my $couple (@couples) {  for my $couple (@couples) {
202      my ($cat1, $cat2) = split /\t/, $couple;      my ($cat1, $cat2) = split /\t/, $couple;
203      my $count = $couples{$couple};      my $count = $couples{$couple};
204      my $total = ($catCounts{$cat1} + $catCounts{$cat2}) / 2;      my $total = ($catCounts{$cat1} + $catCounts{$cat2}) / 2;
205      P3Utils::print_cols([$couple, $count, Math::Round::nearest(0.01, $count * 100 / $total)]);      $couples{$couple} = [$count, Math::Round::nearest(0.01, $count * 100 / $total), $catCounts{$cat1}, $catCounts{$cat2}];
206    }
207    print STDERR "Sorting couples.\n" if $debug;
208    @couples = sort { $couples{$b}[1] <=> $couples{$a}[1] } @couples;
209    print STDERR "Writing couples.\n" if $debug;
210    for my $couple (@couples) {
211        my $line = $couples{$couple};
212        P3Utils::print_cols([$couple, @$line]);
213  }  }

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.6

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3