[Bio] / FortyEightMeta / mg_norm_16s.pl Repository:
ViewVC logotype

View of /FortyEightMeta/mg_norm_16s.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Wed Jun 11 21:16:42 2008 UTC (11 years, 7 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, mgrast_rel_2008_0806, mgrast_dev_10262011, mgrast_dev_02212011, mgrast_rel_2008_0923, mgrast_release_3_0, mgrast_dev_03252011, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, mgrast_rel_2008_0919, mgrast_rel_2008_1110, myrast_33, mgrast_rel_2008_0917, mgrast_dev_04052011, mgrast_dev_02222011, HEAD
Commit of initial MGRAST2 code.

use strict;
use Data::Dumper;

#
# normalize 16s taxonomy data.
#

my %seen;

my $cur;
while (<>)
{
    chomp;
    if (/^LOCUS\s+(\S+)/)
    {
	$cur = $1;
	if ($seen{$cur})
	{
	    print STDERR "DUP $cur $.\n";
	}
	$seen{$cur}++;
    }
    elsif (/^SOURCE/)
    {
	my $skip;
	while (<>)
	{
	    if (/^ {12}/)
	    {
		$skip++;
	    }
	    else
	    {
		last;
	    }
	}
	chomp;
	if (/^\s+ORGANISM\s+(.*)\s*$/)
	{
	    my $org = $1;
	    for my $s (1..($skip + 1))
	    {
		$_ = <>;
	    }
	    my $tax;
	    while (defined($_) and /^\s+(.*)\S*/)
	    {
		$tax .= " $1";
		$_ = <>;
	    }
	    if ($tax eq '')
	    {
		warn "empty tax at $.\n";
	    }
	    $tax =~ s/\.$//;
	    my @tax = split(/;\s+/, $tax);
	    #		print "$cur\t$org\t$tax\n";
	    
	    print join("\t", $cur, @tax), "\n";
	}
	else
	{
	    warn "bad parse 1 for org at $.\n";
	}
    }
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3