[Bio] / FigKernelScripts / check_all_subsystems.pl Repository:
ViewVC logotype

View of /FigKernelScripts/check_all_subsystems.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.11 - (download) (as text) (annotate)
Tue Dec 16 17:20:43 2008 UTC (11 years, 2 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2009_07_09, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, HEAD
Changes since 1.10: +9 -6 lines
Transform the single big select into a prepared statement on per-role select.s

########################################################################

use FIG;
my $fig = new FIG;

use strict;

# usage: check_all_subsystems [update check.info in subsystems directories]
#
#
# The check.info file in each subsystems directory contains indications of
# two possible errors:
#
#    subsystem mismatch peg func role genus-species curator
#            indicates that func mismatches role
#    subsystem left-out peg func role genus-species curator
#            indicates that peg should be in the spreadsheet
#
# In addition, I have added
#
#    subsystem maybe-add genome Variant N1 Role1 N2 Role2 ... Nn RoleN
#
# to indicate that "genome" maybe should be added to the spreadsheet.
# Automated extension would produce such an addition, if the variant is not 0.
#

my($sub,$peg,$role,%roles,$i,%roleI,%has_role,$roleQ,$query,$rdbH,$role_constraint,@roles);
my($relational_db_response,$func,%connected,%non_aux_roles_in_sub,%subsys,%genome_in_sub,$roleI);
my(%role_to_sub);
foreach $sub ($fig->all_subsystems)
{
    my $sobj = $fig->get_subsystem($sub);
    my @roles_in_sub   = $sobj->get_roles;
    my @non_aux_roles = grep { ! $sobj->is_aux_role($_) } @roles_in_sub;

    foreach $role (@roles_in_sub)
    {
        push(@{$role_to_sub{$role}},$sub);
	$roles{$role}++;
    }

    my(%vcodes);
    foreach my $genome ($sobj->get_genomes)
    {
        my $vcode = $sobj->get_variant_code($sobj->get_genome_index($genome));
        my @non_aux_roles_in_genome = ();

        foreach my $role (@non_aux_roles)
        {
            my @pegs = $sobj->get_pegs_from_cell($genome,$role);
            if (@pegs > 0)
            {
                push(@non_aux_roles_in_genome,$role);
            }
        }
        my $key = join("\t",sort @non_aux_roles_in_genome);
        $subsys{$sub}->{$key}->{$vcode}++;
        $genome_in_sub{$sub}->{$genome} = $vcode;
    }

    foreach $role (@non_aux_roles)
    {
        push(@{$non_aux_roles_in_sub{$sub}},$role);
    }
}

@roles = sort keys(%roles);
for ($i=0; ($i < @roles); $i++)
{
    $roleI{$roles[$i]} = $i;
}
print STDERR "gathered all ",scalar @roles, " roles\n";

$rdbH = $fig->db_handle;
my $sth = $rdbH->{_dbh}->prepare(qq(SELECT prot FROM roles WHERE role = ?));

for my $role (@roles)
{
    if (defined($roleI{$role}))
    {
	$sth->execute($role);
	while (my $row = $sth->fetchrow_arrayref())
	{
	    $peg = $row->[0];
	    $has_role{$peg}->{$roleI{$role}} = 1;
	}
    }
}
$sth->finish();
print STDERR "constructed has_role\n";

$query = "SELECT subsystem,role,protein FROM subsystem_index";
$relational_db_response = $rdbH->SQL($query);
foreach $_ (@$relational_db_response)
{   
    ($sub,$role,$peg) = @$_;
    if (defined($roleI{$role}))
    {
	$connected{$peg}->{$roleI{$role}}->{$sub} = 1;
    }
}
print STDERR "constructed connected\n";

foreach $peg (keys(%connected))
{
    my $x = $connected{$peg};
    foreach $roleI (keys(%$x))
    {
        if ((! $has_role{$peg}->{$roleI}) && $fig->is_real_feature($peg))
        {
            my $function = &stripped_function_of($fig,$peg);
            my $subH = $x->{$roleI};
            {
                foreach $sub (keys(%$subH))
                {   
                    print join("\t",($sub,'mismatch',$peg,$function,$roles[$roleI],
				     $fig->genus_species(&FIG::genome_of($peg)),
				     $fig->subsystem_curator($sub))),"\n";
                }
	    }
	}
    }
}

my %possible;
foreach $peg (keys(%has_role))
{   
    my $genome = &FIG::genome_of($peg);
    my $x = $has_role{$peg};
    foreach $roleI (keys(%$x))
    {
        if ((! $connected{$peg}->{$roleI}) && $fig->is_real_feature($peg))
        {
            my $subs = $role_to_sub{$roles[$roleI]};
            foreach $sub (@$subs)
            {
                if ($genome_in_sub{$sub}->{$genome})
                {
                    my $function = &stripped_function_of($fig,$peg);
                    print join("\t",($sub,'left-out',$peg,$function,$roles[$roleI],$fig->genus_species($genome),
				     $fig->subsystem_curator($sub))),"\n";
                }
                else
                {
                    $possible{$sub}->{$genome}->{$roles[$roleI]} = 1;
                }
            }
        }
    }
}

foreach $sub (keys(%possible))
{
    my $curator = $fig->subsystem_curator($sub);
    my $x = $possible{$sub};
    foreach my $genome (keys(%$x))
    {
	next if (! $fig->is_complete($genome));
        my $y = $x->{$genome};
        my @non_aux_roles_in_genome = grep { $y->{$_} } sort @{$non_aux_roles_in_sub{$sub}};
        my $key = join("\t",sort @non_aux_roles_in_genome);
        my $vcodeH = $subsys{$sub}->{$key};
        my @vcodes = grep { ($_ ne "-1") && ($_ ne "*-1") && ($_ ne "0") } sort { $vcodeH->{$b} <=> $vcodeH->{$a} } keys(%$vcodeH);
	if (@vcodes > 0)
	{
	    my $vcode = join(",",@vcodes);
	    print join("\t",($sub,'maybe-add',$curator,$vcode,$genome,$fig->genus_species($genome),$key)),"\n";
	}
    }
}

sub stripped_function_of {
    my($fig,$peg) = @_;

    my $func = $fig->function_of($peg);
    $func =~ s/\s*\#.*$//;
    return $func;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3