[Bio] / FigKernelScripts / gather_genome_info.pl Repository:
ViewVC logotype

View of /FigKernelScripts/gather_genome_info.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Fri Oct 9 20:59:05 2009 UTC (10 years, 5 months ago) by olson
Branch: MAIN
genome info gatherer

#
# Gather all information about the complete prokaryotic genomes.
#
# This includes
#
# Taxon id
# accession #s from genbank file
# project id from genbank file
# RAST job id if present
#
# one line for each contig:
# contig id, size, md5sum
#

use strict;
use FIG;

my $fig = new FIG;

my @genomes = sort { &FIG::by_genome_id($a, $b) } $fig->genomes();
#my @genomes = qw(585057.4);

for my $genome (@genomes)
{
    next unless $fig->is_prokaryotic($genome);
    my $name = $fig->genus_species($genome);

    my $dir = $fig->organism_directory($genome);

    my $complete = (-f "$dir/COMPLETE") ? 'COMPLETE' : 'NOT_COMPLETE';

    #
    # Gather RAST info.
    #

    my $rast_job;
    if (open(R, "<", "$dir/RAST"))
    {
	while (<R>)
	{
	    if (/RAST job number (\d+)/)
	    {
		$rast_job = $1;
		last;
	    }
	}
	close(R);
    }

    #
    # Genbank info, if available.
    #
    my(@genbank);
    my $cur;
    if (open(G, "<", "$dir/genbank_file"))
    {
	#
	# Crude parse for the data we're interested in.
	#
	while (<G>)
	{
	    chomp;
	    if (/^LOCUS\s+(.*)/)
	    {
		$cur = [];
		push(@$cur, $_);
		push(@genbank, $cur);
	    }
	    elsif (/^ACCESSION\s+(\S+)/)
	    {
		push(@$cur, $_);
	    }
	    elsif (/^DBLINK\s+Project:(\d+)/)
	    {
		push(@$cur, $_);
	    }
	}
	close(G);
    }

    print join("\t", $genome, $name, $rast_job, $complete), "\n";
    my $contigs = $fig->contig_lengths($genome);
    for my $id (sort keys %$contigs)
    {
	my $md5 = $fig->contig_md5sum($genome, $id);
	print join("\t", $id, $contigs->{$id}, $md5), "\n";
    }
    print "///\n";
    for my $g (@genbank)
    {
	print "$_\n" for @$g;
    }
    print "///\n";
    print "//\n";
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3