[Bio] / FigKernelScripts / p3-find-couples.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/p3-find-couples.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.6, Wed Oct 24 19:06:28 2018 UTC revision 1.7, Tue Mar 26 11:46:38 2019 UTC
# Line 108  Line 108 
108  $catCol = P3Utils::find_column($catCol, $outHeaders);  $catCol = P3Utils::find_column($catCol, $outHeaders);
109  # Form the full header set and write it out.  # Form the full header set and write it out.
110  if (! $opt->nohead) {  if (! $opt->nohead) {
111      my @headers = ("$outHeaders->[$catCol]1", "$outHeaders->[$catCol]2", 'count', 'percent', 'found1', 'found2');      my @headers = ("$outHeaders->[$catCol]1", "$outHeaders->[$catCol]2", 'count', 'percent', 'found1');
112      P3Utils::print_cols(\@headers);      P3Utils::print_cols(\@headers);
113  }  }
114  # These are used for status messages.  # These are used for status messages.
# Line 166  Line 166 
166  # Now we have category and position data for each feature sorted by sequence.  # Now we have category and position data for each feature sorted by sequence.
167  # For each list, we sort by start position and figure out what qualifies as a couple.  # For each list, we sort by start position and figure out what qualifies as a couple.
168  # The couples are counted in this hash, which is keyed by "element1\telement2".  # The couples are counted in this hash, which is keyed by "element1\telement2".
 open(my $oh, ">coupleDebug.log") || die "Could not open debug file: $!"; ##TODO debug  
169  my %couples;  my %couples;
170    open(my $oh, ">coupleDebug.log") || die "Could not open debug file: $!"; ##TODO debug
171  print STDERR scalar(keys %contigs) . " contigs ready to examine.\n" if $debug;  print STDERR scalar(keys %contigs) . " contigs ready to examine.\n" if $debug;
172  my ($contigCount, $fidCount) = (0,0);  my ($contigCount, $fidCount) = (0,0);
173  for my $contig (sort keys %contigs) {  for my $contig (sort keys %contigs) {
174      $contigCount++;      $contigCount++;
175      my @features = sort { $a->[1] <=> $b->[1] } @{$contigs{$contig}};      my @features = sort { $a->[1] <=> $b->[1] } @{$contigs{$contig}};
176      # We process one feature at a time, and stop when there are none left      # We process one feature at a time, and stop when there are none left
177      # to couple with it.      # to couple with it.  For each feature, we need a list of the couplings.
178        # This hash is keyed by {fid}{couple}.
179        my %counts;
180        # Loop through the features on the contig.
181      my $feat = shift @features;      my $feat = shift @features;
182      while (scalar @features) {      while (scalar @features) {
183          $fidCount++;          $fidCount++;
# Line 185  Line 188 
188          for my $other (@features) { last if $other->[1] > $limit;          for my $other (@features) { last if $other->[1] > $limit;
189              my ($cat2, $s2, $e2, $fid2) = @$other;              my ($cat2, $s2, $e2, $fid2) = @$other;
190              if ($cat1 ne $cat2 && $fid ne $fid2) {              if ($cat1 ne $cat2 && $fid ne $fid2) {
191                  my $couple = join("\t", sort ($cat1, $cat2));                  $counts{$fid}{"$cat1\t$cat2"}++;
192                  if ($couple eq "LighHarvLhiiAlph8\tLighHarvLhiiBeta8") { print $oh "$fid and $fid2 at $start and $s2 on $contig.\n"; }                  $counts{$fid2}{"$cat2\t$cat1"}++;
                 $couples{$couple}++;  
193              }              }
194          }          }
195          # Get the next feature.          # Get the next feature.
196          $feat = shift @features;          $feat = shift @features;
197      }      }
198      print STDERR "$contigCount contigs processed with $fidCount features.\n" if $debug && $contigCount % 1000 == 0;      print STDERR "$contigCount contigs processed with $fidCount features.\n" if $debug && $contigCount % 1000 == 0;
199        # From the counts hash, we do the actual couple counts. This insures we don't double-count when a frame shift has
200        # split a role across two adjacent proteins.
201        for my $fid (keys %counts) {
202            my $coupleH = $counts{$fid};
203            for my $couple (keys %$coupleH) {
204                $couples{$couple}++;
205            }
206        }
207  }  }
208  # Sort the couples and output them.  # Sort the couples and output them.
209  print STDERR "Analyzing couples.\n" if $debug;  print STDERR "Analyzing couples.\n" if $debug;
# Line 201  Line 211 
211  for my $couple (@couples) {  for my $couple (@couples) {
212      my ($cat1, $cat2) = split /\t/, $couple;      my ($cat1, $cat2) = split /\t/, $couple;
213      my $count = $couples{$couple};      my $count = $couples{$couple};
214      my $total = ($catCounts{$cat1} + $catCounts{$cat2}) / 2;      $couples{$couple} = [$count, Math::Round::nearest(0.01, $count * 100 / $catCounts{$cat1}), $catCounts{$cat1}];
     $couples{$couple} = [$count, Math::Round::nearest(0.01, $count * 100 / $total), $catCounts{$cat1}, $catCounts{$cat2}];  
215  }  }
216  print STDERR "Sorting couples.\n" if $debug;  print STDERR "Sorting couples.\n" if $debug;
217  @couples = sort { $couples{$b}[1] <=> $couples{$a}[1] } @couples;  @couples = sort { $couples{$b}[1] <=> $couples{$a}[1] || $couples{$b}[0] <=> $couples{$a}[0] } @couples;
218  print STDERR "Writing couples.\n" if $debug;  print STDERR "Writing couples.\n" if $debug;
219  for my $couple (@couples) {  for my $couple (@couples) {
220      my $line = $couples{$couple};      my $line = $couples{$couple};

Legend:
Removed from v.1.6  
changed lines
  Added in v.1.7

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3