[Bio] / FigMetagenomeTools / exact_duplicates.pl Repository:
ViewVC logotype

View of /FigMetagenomeTools/exact_duplicates.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1.1.1 - (download) (as text) (annotate) (vendor branch)
Mon Feb 19 17:15:26 2007 UTC (12 years, 9 months ago) by olson
Branch: x, MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, mgrast_rel_2008_0806, mgrast_dev_10262011, mgrast_dev_02212011, mgrast_rel_2008_0923, mgrast_release_3_0, mgrast_dev_03252011, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, mgrast_rel_2008_0919, mgrast_rel_2008_1110, myrast_33, mgrast_rel_2008_0917, mgrast_dev_04052011, mgrast_dev_02222011, y, HEAD
Changes since 1.1: +0 -0 lines
Initial import

#!/usr/bin/perl -w

# count the exact duplicate reads

use strict;
use Rob;


my $file=shift || die "$0 <file> <full output>\nIf full output is not set (boolean) only a summary is printed\n";
my $fo=shift;

if ($file =~ /gz$/) 
{
	open(IN, "gunzip -c $file|") || die "Can't open pipe";
}
else {open (IN, $file) || die "$file?"}
	



my $tag; my $seq; my $count;
while (<IN>) {
 chomp;
 if (/^>/) {
  if ($seq) {
   $seq=uc($seq);
   if ($count->{$seq}) {
    push @{$count->{$seq}}, $tag;
   }
   else {
    # if we don't have it in the forward phase, check the reverse
    my $rc=Rob->rc($seq);
    if ($count->{$rc}) {push @{$count->{$rc}}, $tag}
    else {push @{$count->{$seq}}, $tag;}
   }
   undef $seq;
  }
  $tag = $_;
  $tag =~ s/^>//;
 } else {
  $seq .= $_;
 }
}

if ($fo)
{
	foreach my $seq (sort {scalar @{$count->{$b}} <=> scalar @{$count->{$a}}} keys %$count) 
	{
		if (scalar @{$count->{$seq}} > 1) 
		{
			print join "\t", scalar @{$count->{$seq}}, @{$count->{$seq}}, "\n";
		}
	}
}
else 
{
	my %allcounts;
	foreach my $seq (sort {scalar @{$count->{$b}} <=> scalar @{$count->{$a}}} keys %$count) 
	{
		if (scalar @{$count->{$seq}} > 1) {$allcounts{scalar @{$count->{$seq}}}++}
	}
	print "No. Seqs.\tNo dups\n";
	my $total;
	map {print "$_\t$allcounts{$_}\n"; $total+=($_-1) * $allcounts{$_}} sort {$allcounts{$b} <=> $allcounts{$a}} keys %allcounts;
	print "TOTAL\t$total\n";
}
 

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3