[Bio] / FigKernelScripts / ensure_genome_registration.pl Repository:
ViewVC logotype

View of /FigKernelScripts/ensure_genome_registration.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Wed Mar 14 20:45:00 2007 UTC (13 years, 3 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
generate feature registration stuff

use strict;
use FIG;
use Data::Dumper;
use FIG_Config;
use File::Basename;

#
# Ensure that the genomes in this SEED have been registered with the clearinghouse.
#
# This version utilizes a tab-separated dump of genome and feature registration information
# taken directly from the clearinghouse database.
#
# It generates a tab-delimited file suitable for feeding to register_features_batch.pl.
#

my $fig = new FIG;

my $usage = "ensure_genome_registration genome-dump feature-dump [G1 G2 ...]\n";

@ARGV >= 2 or die $usage;

my $gdump = shift;
my $fdump = shift;

my @genomes;
if (@ARGV)
{
    @genomes = @ARGV;
}
else
{
    @genomes = $fig->genomes();
}

open(G, "<$gdump") or die "Cannot open genome dump $gdump: $!\n";
open(F, "<$fdump") or die "Cannot open featur dump $fdump: $!\n";

my %next_genome;

while (<G>)
{
    chomp;
    my($genome, $next_id) = split(/\t/);
    $next_genome{$genome} = $next_id;
}

close(G);

my %next_feature;
while (<F>)
{
    chomp;
    my($genome, $type, $next_id) = split(/\t/);
    $next_feature{$genome}->{$type} = $next_id;
}
close(F);


for my $genome(@genomes)
{
    my $dir = "$FIG_Config::organisms/$genome";
    -d $dir or die "$dir does not exist\n";

    if ($genome =~ /^(\d+)\.(\d+)/)
    {
	my($tax, $num) = ($1, $2);
	if ($next_genome{$tax} < $num)
	{
	    warn "UNREGISTERED GENOME $genome (next_genome($num) = $next_genome{$tax})\n";
	}
    }

    for my $tdir (<$dir/Features/*>)
    {
	my $ftype = basename($tdir);
	my $max_feat = max_feature_in_tbl("$tdir/tbl");
#	print "$genome $ftype $max_feat\n";

	my $regnext = $next_feature{$genome}->{$ftype};
	if ($max_feat >= $regnext)
	{
#	    print "UNREGISTERED FEATURE $genome ($ftype '$regnext')\n";
	    print join("\t", $genome, $ftype, $max_feat), "\n";
	}
    }
}

sub max_feature_in_tbl
{
    my($tbl_file) = @_;

    open(F, "<$tbl_file") or die "cannot open $tbl_file: $!\n";

    my $max = -1;
    while (<F>)
    {
	chomp;
	my($fid) = split(/\t/);
	if ($fid =~ /^fig\|\d+\.\d+\.[^.]+\.(\d+)/)
	{
	    $max = $1 > $max ? $1 : $max;
	}
    }
    close(F);
    return $max;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3