[Bio] / FortyEightMeta / pull_sge_accounting_for_jobs.pl Repository:
ViewVC logotype

View of /FortyEightMeta/pull_sge_accounting_for_jobs.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Fri May 30 23:23:51 2008 UTC (11 years, 10 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, mgrast_rel_2008_0806, mgrast_dev_10262011, mgrast_dev_02212011, mgrast_rel_2008_0923, mgrast_release_3_0, mgrast_dev_03252011, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, mgrast_rel_2008_0919, mgrast_rel_2008_1110, myrast_33, mgrast_rel_2008_0917, mgrast_dev_04052011, mgrast_dev_02222011, HEAD
Development checkin of new metagenomics RAST pipeline.

#
# Read and parse the SGE accounting data; then run through the
# jobs and summarize the cpu use accumulated for each, as well as the
# job size.
#

use strict;
use Data::Dumper;
use GenomeMeta;

our %accounting;
our %total;

our %mg_job;

our $job_base_dir = "/vol/rast-prod/jobs.new/jobs";
#our $job_base_dir = "/vol/rast-prod/jobs";
#our $job_base_dir = "/vol/metagenome-48-hour/Jobs.prod";
opendir(D, $job_base_dir) or die "cannot open jobdir $job_base_dir\n";

read_accounting();

while (my $f = readdir(D))
{
    my $job_dir = "$job_base_dir/$f";
    next unless $f =~ /^\d+$/ and -d $job_dir;
    next unless -f "$job_dir/DONE";
    next if -f "$job_dir/ERROR";
    print STDERR "$f\n";
    process_job($f, $job_dir);
}

for my $j (sort { $a <=> $b } keys %mg_job)
{
    my $jh = $mg_job{$j};
    print join("\t", $j, $jh->{size}, $jh->{wallclock}, $jh->{user}, $jh->{slots}), "\n";
}

sub process_job
{
    my($job, $job_dir) = @_;
    my $genome = `cat $job_dir/GENOME_ID`;
    chomp $genome;

    my $meta = new GenomeMeta(undef, "$job_dir/meta.xml");

    if (!$meta)
    {
	warn "no meta for $job_dir ($!)\n";
	return;
    }

#    my $data =  $meta->get_metadata("preprocess.fasta_file");
    my $data = "$job_dir/raw/$genome/contigs";
    my $size = -s $data;
    if (!$size)
    {
	warn "no size for data file $data\n";
	return;
    }

    $mg_job{$job}->{size} = $size;

    for my $k (grep { /sge_(job_)?id/ } $meta->get_metadata_keys)
    {
	my $v = $meta->get_metadata($k);
	if (!ref($v))
	{
	    $v = [$v];
	}

	for my $sge_id (@$v)
	{
	    do_job_sge($job, $job_dir, $sge_id);
	}
    }
}

sub do_job_sge
{
    my($job, $job_dir, $sge_job) = @_;

    my $stat = $total{$sge_job};
    if ($stat)
    {
	map { $mg_job{$job}->{$_} += $stat->{$_} } keys(%$stat);
    }
}

sub read_accounting
{
    for my $file (<$ENV{SGE_ROOT}/$ENV{SGE_CELL}/common/accounting*>)
    {
	open(F, "<$file") or die "cannot read accounting file $file: $!";
	while (<F>)
	{
	    chomp;
	    my @a = split(/:/);
	    my($q, $host, $job_name, $job, $start, $end, $failed, $wallclock, $user, $slots, $task_id) =
		@a[0, 1, 4, 5, 9, 10, 11, 13, 14, 34, 35];

	    next if $failed;
	    
#	    print "job=$job q=$q slots=$slots task=$task_id\n";

	    my $rec = { q => $q, host => $host, name => $job_name,
			    job => $job, start => $start, end => $end,
			    wallclock => $wallclock, user => $user, slots => $slots, task_id=> $task_id};
	    push @{$accounting{$job}}, $rec;

	    for my $k (qw(wallclock user slots))
	    {
		$total{$job}->{$k} += $rec->{$k};
	    }
	}
	
    }
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3