[Bio] / FortyEightMeta / summarize_mgrast_taxa.pl Repository:
ViewVC logotype

View of /FortyEightMeta/summarize_mgrast_taxa.pl

Parent Directory Parent Directory | Revision Log Revision Log

Revision 1.1 - (download) (as text) (annotate)
Tue Aug 14 18:32:46 2007 UTC (12 years, 7 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, mgrast_rel_2008_0806, mgrast_dev_10262011, mgrast_dev_02212011, mgrast_rel_2008_0923, mgrast_release_3_0, mgrast_dev_03252011, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, mgrast_rel_2008_0919, mgrast_rel_2008_1110, myrast_33, mgrast_rel_2008_0917, mgrast_dev_04052011, mgrast_dev_02222011, HEAD
Summarize all taxa info for all jobs in the mgrast.

# Build a flat summary of all of the taxa hit in the besthits file in the mgrast jobs.

use strict;
use FIG;
use Data::Dumper;

my $spool = "/vol/metagenome-48-hour/Jobs.dev";

opendir(D, $spool) or die "cannot opendir $spool: $!\n";

my %tax;
for my $j (sort { $a <=> $b } grep { -f "$spool/$_/DONE" and  /^\d+/ } readdir(D))
    my $genome = &FIG::file_head("$spool/$j/GENOME_ID");
    chomp $genome;
    print STDERR "j=$j $genome\n";
    for my $taxfile (<$spool/$j/rp/$genome/taxa*.besthits>, "$spool/$j/rp/$genome/taxa_summary_by_blast")
	open(T, "<$taxfile") or die "Cannot open $taxfile: $!\n";
	while (<T>)
	    my($tax, $count, $leaf) = split(/\t/);
	    next unless $leaf;
	    $tax{$tax} += $count;

for my $tax (sort keys %tax)
    my $c = $tax{$tax};
    print "$tax\t$c\n";

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3