[Bio] / FortyEightMeta / find_jobs_with_short_seqs.pl Repository:
ViewVC logotype

View of /FortyEightMeta/find_jobs_with_short_seqs.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Fri May 30 23:23:51 2008 UTC (11 years, 8 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, mgrast_rel_2008_0806, mgrast_dev_10262011, mgrast_dev_02212011, mgrast_rel_2008_0923, mgrast_release_3_0, mgrast_dev_03252011, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, mgrast_rel_2008_0919, mgrast_rel_2008_1110, myrast_33, mgrast_rel_2008_0917, mgrast_dev_04052011, mgrast_dev_02222011, HEAD
Development checkin of new metagenomics RAST pipeline.

use strict;
use GenomeMeta;
use FIG;

#
# Find any jobs that have sequences too short.
#

my $jobdir = "/vol/mg-rast-prod/jobs";

my @jobs;

if (@ARGV)
{
    @jobs = @ ARGV;
}
else
{
    opendir(D, $jobdir) or die;
    @jobs = sort {$b <=> $a} readdir(D);
}

for my $d (@jobs)
{
    next unless $d =~ /^\d+$/;
    # next unless $d > 500;
    my $p = "$jobdir/$d";
    next unless -d $p;

    my $meta = new GenomeMeta(undef, "$p/meta.xml");
    my $fasta = $meta->get_metadata("preprocess.fasta_file");

    next unless $fasta;
    my @short;

    if (open(F, "<", $fasta))
    {

	while (my($id, $dat, $com) = &FIG::read_fasta_record(\*F))
	{
	    my $l = length($$dat);
	    if ($l < 10)
	    {
		push(@short, [$id, $l]);
	    }
	}
	close(F);
    }
    if (@short)
    {
	my $n = @short;
	print "$d $fasta: $n short seqs:\n";
	map { print join("\t", @$_), "\n" } @short;
    }
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3