[Bio] / FigMetagenomeTools / dereplicate_fasta.pl Repository:
ViewVC logotype

View of /FigMetagenomeTools/dereplicate_fasta.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Fri Mar 16 20:51:25 2007 UTC (12 years, 9 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, mgrast_rel_2008_0806, mgrast_dev_10262011, mgrast_dev_02212011, mgrast_rel_2008_0923, mgrast_release_3_0, mgrast_dev_03252011, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, mgrast_rel_2008_0919, mgrast_rel_2008_1110, myrast_33, mgrast_rel_2008_0917, mgrast_dev_04052011, mgrast_dev_02222011, HEAD
Changes since 1.1: +0 -1 lines
initial tweaks

#!/usr/bin/perl -w

# remove duplicate names and sequences

use strict;
use Bio::SeqIO;
use Getopt::Long;
my ($infile, $outfile);
GetOptions(
 'i|infile:s'   	=> \$infile,
 'o|outfile:s'		=> \$outfile,
);

die "$0 -i infile -o outfile" unless ($infile && $outfile);
my $sio=Bio::SeqIO->new(-file=>"$infile", -format=>'fasta');
my $sout=Bio::SeqIO->new(-file=>">$outfile", -format=>'fasta');

my %seq; my %id;
while (my $sin=$sio->next_seq) {
 next if ($seq{uc($sin->seq)});
 $seq{uc($sin->seq)}=1;
 if ($id{$sin->id}) {
  $id{$sin->id}++;
  $sin->id($sin->id . "." . $id{$sin->id});
 }
 else {$id{$sin->id}=1}
 
 $sout->write_seq($sin);
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3