[Bio] / FigKernelScripts / summarize_genome_data.pl Repository:
ViewVC logotype

View of /FigKernelScripts/summarize_genome_data.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Wed Feb 6 20:32:33 2013 UTC (6 years, 9 months ago) by overbeek
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
create summary_of_genome data

use strict;
use Data::Dumper;
use SeedEnv;
my $sapO = SAPserver->new;

my $genomeH = $sapO->all_genomes({-complete => 1, -prokaryotic => 1});
my $gdataH  = $sapO->genome_data( -ids => [keys(%$genomeH)],
				  -data => ['dna-size','domain','genetic-code','name','taxonomy'] );
my %genetic_codeH;
my %taxonomyH;
my %domainH;
my %rp_countH;
my %dna_szH;
my %numH;

foreach my $g (sort { $a <=> $b } keys(%$gdataH))
{
    print STDERR "$g\n";
    my($dna_sz,$domain,$gc,$name,$tax) = @{$gdataH->{$g}};
    $tax =~ s/,[^,]*$//;
    if ($name =~ /^(\S+\s+\S+)/)
    {
	my $gs = $1;
	if (($gs !~ /plasmid|phage/i) && ($gs !~ / sp\.?$/))
	{
	    $numH{$gs}++;
	    $dna_szH{$gs} += $dna_sz;
	    $domainH{$gs}->{$domain}++;
	    $genetic_codeH{$gs}->{$gc}++;
	    if ((! $taxonomyH{$gs}) || (length($tax) < $taxonomyH{$gs}))
	    {
		$taxonomyH{$gs} = $tax;
	    }
	    my $pegsH = $sapO->all_features( -ids => [$g], -type => ['peg'] );
	    my $pegs  = $pegsH->{$g};
	    my $funcH = $sapO->ids_to_functions( -ids => $pegs);
	    my %rp    = map { my $f = $funcH->{$_}; 
			      ($f =~ /[LS]SU ribosomal protein/) ? ($f => 1) : () } keys(%$funcH);
	    my $count = keys(%rp);
	    $rp_countH{$gs}->{$count}++;
	}
    }
}

foreach my $gs (sort keys(%domainH))
{
    my @poss = sort { $domainH{$gs}->{$b} <=> $domainH{$gs}->{$a} } keys(%{$domainH{$gs}});
    my $best_domain = $poss[0];

    @poss = sort { $genetic_codeH{$gs}->{$b} <=> $genetic_codeH{$gs}->{$a} } keys(%{$genetic_codeH{$gs}});
    my $best_genetic_code = $poss[0];

    my $best_tax = $taxonomyH{$gs};

    @poss = sort { $rp_countH{$gs}->{$b} <=> $rp_countH{$gs}->{$a} } keys(%{$rp_countH{$gs}});
    my $best_rp  = $poss[0];

    print join("\t",($gs,int($dna_szH{$gs}/$numH{$gs}),$best_genetic_code,$best_domain,$best_tax,$best_rp)),"\n";
}
    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3