Parent Directory
|
Revision Log
Initial import
#!/usr/bin/perl -w # count the exact duplicate reads use strict; use Rob; my $file=shift || die "$0 <file> <full output>\nIf full output is not set (boolean) only a summary is printed\n"; my $fo=shift; if ($file =~ /gz$/) { open(IN, "gunzip -c $file|") || die "Can't open pipe"; } else {open (IN, $file) || die "$file?"} my $tag; my $seq; my $count; while (<IN>) { chomp; if (/^>/) { if ($seq) { $seq=uc($seq); if ($count->{$seq}) { push @{$count->{$seq}}, $tag; } else { # if we don't have it in the forward phase, check the reverse my $rc=Rob->rc($seq); if ($count->{$rc}) {push @{$count->{$rc}}, $tag} else {push @{$count->{$seq}}, $tag;} } undef $seq; } $tag = $_; $tag =~ s/^>//; } else { $seq .= $_; } } if ($fo) { foreach my $seq (sort {scalar @{$count->{$b}} <=> scalar @{$count->{$a}}} keys %$count) { if (scalar @{$count->{$seq}} > 1) { print join "\t", scalar @{$count->{$seq}}, @{$count->{$seq}}, "\n"; } } } else { my %allcounts; foreach my $seq (sort {scalar @{$count->{$b}} <=> scalar @{$count->{$a}}} keys %$count) { if (scalar @{$count->{$seq}} > 1) {$allcounts{scalar @{$count->{$seq}}}++} } print "No. Seqs.\tNo dups\n"; my $total; map {print "$_\t$allcounts{$_}\n"; $total+=($_-1) * $allcounts{$_}} sort {$allcounts{$b} <=> $allcounts{$a}} keys %allcounts; print "TOTAL\t$total\n"; }
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |