Removes duplicate entries from fasta file. -- /gdp

# -*- perl -*-
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
# This file is part of the SEED Toolkit.
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.

use FIG;

use Pod::Text;
# use File::Basename;   $this_tool_name = basename($0);
if ((@ARGV == 1) && ($ARGV[0] =~ m/-help/))  {
    pod2text($0);  exit(0);


=over 5

=item Usage:     purge_fasta  < fasta_in  > fasta_out

=item Function:  Removes duplicate entries, and reformats a fasta file to a uniform 60 chars per line.



if ($ARGV[0] && (-e $ARGV[0]))
    open(FIN, "<$ARGV[0]") or die "could not open $ARGV[0] as input";
    $fin = \*FIN;
    $fin = \*STDIN;

if ($ARGV[1])
    open(FOUT, ">$ARGV[1]") or die "could not open $ARGV[1] as output";
    $fout = \*FOUT;
    $fout = \*STDOUT;

while (($id, $seqP) = &FIG::read_fasta_record($fin))
    $seq_of{$id} = $$seqP;

foreach $id (sort { &FIG::by_fig_id($a, $b) } keys %seq_of)
    &FIG::display_id_and_seq( $id, \$seq_of{$id}, $fout );

