[Bio] / FortyEight / find-genomes-not-in-seed.pl Repository:
ViewVC logotype

View of /FortyEight/find-genomes-not-in-seed.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Tue Jan 16 21:37:23 2007 UTC (13 years, 2 months ago) by olson
Branch: MAIN
Add module to wrap up 48hr job lookups
Add script to find genomes in 48hr that aren't in seed.
Make BBHs colored purple on cluster cartoon

#
# Find any genomes currently in the 48-hour queue that are finished and appear to
# not exist in the SEED.
#

use strict;
use Data::Dumper;
use FIG;
use Job48;

my $fig = new FIG();

my @genomes = $fig->genomes();

my %by_tax;
my %genome_to_name;
my %name_to_genome;
my %contig_to_genome;
for my $g (@genomes)
{
    my($tax, $vers) = split(/\./, $g);
    push @{$by_tax{$tax}}, $g;
    my $gs = $fig->genus_species($g);
    $name_to_genome{$gs} = $g;
    $genome_to_name{$g} = $gs;
}

#
# Poke the db to read all contig ids.
#
print "Reading contigs\n";
my $res = $fig->db_handle->SQL(qq(SELECT genome, contig from contig_lengths));
for my $ent (@$res)
{
    my($genome, $contig) = @$ent;
    
    push @{$contig_to_genome{$contig}}, $genome;
}
print "done reading contigs\n";

my @jobs = Job48::all_jobs();
@jobs = grep { $_->active() } @jobs;

for my $job (@jobs)
{
#    print "Job " . $job->id . " " . $job->genome_id . " " . $job->genome_name . "\n";
    check($job);
}

sub check
{
    my($job) = @_;

    my $id = $job->id;
    my $g = $job->genome_id();
    my $gs = $job->genome_name();
    my @inseed;
    my $status = "UNKNOWN";

    if (!$job->finished())
    {
	$status = "INCOMPLETE";
    }
    elsif (my $sname = $name_to_genome{$gs})
    {
	$status = "NAME_IN_SEED";
	@inseed = ($sname, $gs);
    }
    else
    {
	(my $tax = $g) =~ s/\..*$//;
	my @bytax = @{$by_tax{$tax}} if $by_tax{$tax};
	if (@bytax)
	{
	    $status = "TAX_IN_SEED";
	    
	    for my $seedg (@bytax)
	    {
		my $seedname = $genome_to_name{$seedg};
		push(@inseed, $seedg, $seedname);
	    }
	}
	else
	{
	    #
	    # Search for contig names that map.
	    #

	    for my $contig ($job->contigs())
	    {
		my $glist = $contig_to_genome{$contig};
		if ($glist)
		{
		    $status = "MATCHING_CONTIG_ID";
		    for my $sg (@$glist)
		    {
			push(@inseed, $sg, $genome_to_name{$sg});
		    }
		    last;
		}
	    }

	    if ($status eq 'UNKNOWN')
	    {
		$status = "NEW";
	    }
	}
    }
    print join("\t", $status, $job->id, $job->user, $g, $gs, @inseed), "\n";

}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3