[Bio] / FigKernelScripts / check_model.pl Repository:
ViewVC logotype

View of /FigKernelScripts/check_model.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (download) (as text) (annotate)
Tue Feb 5 02:27:15 2008 UTC (11 years, 10 months ago) by parrello
Branch: MAIN
CVS Tags: rast_rel_2008_06_18, rast_rel_2008_06_16, rast_rel_2008_07_21, rast_2008_0924, rast_rel_2008_04_23, rast_rel_2008_09_30, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, mgrast_rel_2008_0625, rast_rel_2008_10_09, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, mgrast_rel_2008_1110, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, rast_rel_2008_08_07
Changes since 1.5: +141 -141 lines
Fixed a POD error.

# _*_ Perl _*_
#
#
#
use strict;
use FIGV;
use Scenario;

my ($genome_id,$orgdir) = @ARGV;

my $debug = 0;

unless (scalar (@ARGV) >= 1)
{
    print STDERR "usage: check_model <genome_id>\n";
    exit(-1);
}
chomp $genome_id;
my $fig;
if($orgdir)
{
    $fig = new FIGV($orgdir);
}
else
{
    $fig = new FIGV;
}
Scenario::set_fig($fig,$genome_id);
print STDERR "Running check_model on genome $genome_id\n" if($debug);
print STDERR "\tStage 1 - Loading Scenarios, model inputs and biomass\n" if($debug);
my @scenarios = @{Scenario->get_genome_scenarios($genome_id,0)};
my $filebase = $fig->model_directory($genome_id)."/Analysis/";

my %scenarios_used;
my %scenarios_id;
my %all_substrates;
foreach (@scenarios)
{
    map {$all_substrates{$_} = 1} @{$_->get_substrates_all()};
    $scenarios_used{$_->get_scenario_name()} = 0;
    $scenarios_id{$_->get_id()} = 0;
}

my %input_cpds;
open(INPUTS,$filebase."inputs.txt")
    or die ("Failed to open ".$filebase."inputs.txt. Run find_model_information $genome_id inputs");
while(<INPUTS>)
{
    chomp;
    my @line = split "\t", $_;
    chomp @line;
    $input_cpds{$line[0]} = 1;
}
close(INPUTS);

#ATP Snythase HACK, lets add ATP as a input

#cofactors just used for testing connectivity, not actually provided to model
$input_cpds{"C00002"} = 1;
$input_cpds{"C00028"} = 1;
$input_cpds{"C00030"} = 1;
$input_cpds{"C00035"} = 1;
$input_cpds{"C00040"} = 1;
$input_cpds{"C00044"} = 1;
$input_cpds{"C00126"} = 1;
$input_cpds{"C00138"} = 1;
$input_cpds{"C00139"} = 1;
$input_cpds{"C00239"} = 1;
$input_cpds{"C00342"} = 1;
$input_cpds{"C00343"} = 1;
$input_cpds{"C00361"} = 1;
$input_cpds{"C01070"} = 1;


#inputs not produced or imported but required for connecting all scenarios
=pod
    $input_cpds{"C00141"} = 1;
    $input_cpds{"C00099"} = 1;
    $input_cpds{"C00049"} = 1;
    $input_cpds{"C00040"} = 1;
    $input_cpds{"C00054"} = 1;
    $input_cpds{"C00059"} = 1;
    $input_cpds{"C00116"} = 1;
    $input_cpds{"C00163"} = 1;
    $input_cpds{"C00185"} = 1;
    $input_cpds{"C00206"} = 1;
    $input_cpds{"C00208"} = 1;
    $input_cpds{"C00234"} = 1;
    $input_cpds{"C00235"} = 1;
    $input_cpds{"C00239"} = 1;
    $input_cpds{"C00250"} = 1;
    $input_cpds{"C00266"} = 1;
    $input_cpds{"C00281"} = 1;
    $input_cpds{"C00288"} = 1;
    $input_cpds{"C00301"} = 1;
    $input_cpds{"C00314"} = 1;
    $input_cpds{"C00416"} = 1;
    $input_cpds{"C00534"} = 1;
    $input_cpds{"C00670"} = 1;
    $input_cpds{"C00672"} = 1;
    $input_cpds{"C00794"} = 1;
    $input_cpds{"C00898"} = 1;
    $input_cpds{"C00988"} = 1;
    $input_cpds{"C01070"} = 1;
    $input_cpds{"C01096"} = 1;
    $input_cpds{"C01137"} = 1;
    $input_cpds{"C01157"} = 1;
    $input_cpds{"C01233"} = 1;
    $input_cpds{"C02191"} = 1;
    $input_cpds{"C02232"} = 1;
    $input_cpds{"C02315"} = 1;
    $input_cpds{"C02336"} = 1;
    $input_cpds{"C02350"} = 1;
    $input_cpds{"C03150"} = 1;
    $input_cpds{"C03543"} = 1;
    $input_cpds{"C04121"} = 1;
    $input_cpds{"C04146"} = 1;
    $input_cpds{"C04489"} = 1;
    $input_cpds{"C04688"} = 1;
    $input_cpds{"C04824"} = 1;
    $input_cpds{"C05223"} = 1;
    $input_cpds{"C05761"} = 1;
    $input_cpds{"C05898"} = 1;
    $input_cpds{"C06143"} = 1;
    $input_cpds{"C06187"} = 1;
    $input_cpds{"C06188"} = 1;
    $input_cpds{"C11457"} = 1;
    $input_cpds{"C14818"} = 1;
    
    
    $input_cpds{"C00002"} = 1;
    $input_cpds{"C00001"} = 1;
    $input_cpds{"C00003"} = 1;
    $input_cpds{"C00007"} = 1;
    $input_cpds{"C00009"} = 1;
    $input_cpds{"C00023"} = 1;
    $input_cpds{"C00024"} = 1;
    $input_cpds{"C00034"} = 1;
    $input_cpds{"C00035"} = 1;
    $input_cpds{"C00044"} = 1;
    $input_cpds{"C00076"} = 1;
    $input_cpds{"C00080"} = 1;
    $input_cpds{"C00082"} = 1;
    $input_cpds{"C00087"} = 1;
    $input_cpds{"C00114"} = 1;
    $input_cpds{"C00115"} = 1;
    $input_cpds{"C00120"} = 1;
    $input_cpds{"C00126"} = 1;
    $input_cpds{"C00138"} = 1;
    $input_cpds{"C00139"} = 1;
    $input_cpds{"C00153"} = 1;
    $input_cpds{"C00229"} = 1;
    $input_cpds{"C00238"} = 1;
    $input_cpds{"C00255"} = 1;
    $input_cpds{"C00283"} = 1;
    $input_cpds{"C00305"} = 1;
    $input_cpds{"C00342"} = 1;
    $input_cpds{"C00343"} = 1;
    $input_cpds{"C00361"} = 1;
    $input_cpds{"C00378"} = 1;
    $input_cpds{"C00504"} = 1;
    $input_cpds{"C00568"} = 1;
    $input_cpds{"C00864"} = 1;
    $input_cpds{"C05776"} = 1;
    
    $input_cpds{"C00141"} = 1;
    $input_cpds{"C00099"} = 1;
    $input_cpds{"C00049"} = 1;
    $input_cpds{"C00040"} = 1;
    $input_cpds{"C00054"} = 1;
    $input_cpds{"C00059"} = 1;
    $input_cpds{"C00116"} = 1;
    $input_cpds{"C00163"} = 1;
    $input_cpds{"C00185"} = 1;
    $input_cpds{"C00206"} = 1;
    $input_cpds{"C00208"} = 1;
    $input_cpds{"C00234"} = 1;
    $input_cpds{"C00235"} = 1;
    $input_cpds{"C00239"} = 1;
    $input_cpds{"C00250"} = 1;
    $input_cpds{"C00266"} = 1;
    $input_cpds{"C00281"} = 1;
    $input_cpds{"C00288"} = 1;
    $input_cpds{"C00301"} = 1;
    $input_cpds{"C00314"} = 1;
    $input_cpds{"C00416"} = 1;
    $input_cpds{"C00534"} = 1;
    $input_cpds{"C00670"} = 1;
    $input_cpds{"C00672"} = 1;
    $input_cpds{"C00794"} = 1;
    $input_cpds{"C00898"} = 1;
    $input_cpds{"C00988"} = 1;
    $input_cpds{"C01070"} = 1;
    $input_cpds{"C01096"} = 1;
    $input_cpds{"C01137"} = 1;
    $input_cpds{"C01157"} = 1;
    $input_cpds{"C01233"} = 1;
    $input_cpds{"C02191"} = 1;
    $input_cpds{"C02232"} = 1;
    $input_cpds{"C02315"} = 1;
    $input_cpds{"C02336"} = 1;
    $input_cpds{"C02350"} = 1;
    $input_cpds{"C03150"} = 1;
    $input_cpds{"C03543"} = 1;
    $input_cpds{"C04121"} = 1;
    $input_cpds{"C04146"} = 1;
    $input_cpds{"C04489"} = 1;
    $input_cpds{"C04688"} = 1;
    $input_cpds{"C04824"} = 1;
    $input_cpds{"C05223"} = 1;
    $input_cpds{"C05761"} = 1;
    $input_cpds{"C05898"} = 1;
    $input_cpds{"C06143"} = 1;
    $input_cpds{"C06187"} = 1;
    $input_cpds{"C06188"} = 1;
    $input_cpds{"C11457"} = 1;
    $input_cpds{"C14818"} = 1;

=cut

my %cofactor_cpds;
open(INPUTCO,$filebase."secondary_compounds.txt")
    or die("Failed to open ".$filebase."secondary_compounds.txt. Run find_model_cofactors $genome_id\n");
while(<INPUTCO>)
{
    chomp;
    my @line = split "\t", $_;
    chomp @line;
    $cofactor_cpds{$line[0]} = 1;
}
close(INPUTCO);

my %biomass_cpds;
open(BIOMASS,$filebase."biomass.txt")
    or die ("Failed to open ".$filebase."biomass.txt Run find_model_information $genome_id biomass");
while(<BIOMASS>)
{
    chomp;
    my @temp = split "\t", $_;
    chomp @temp;
    $biomass_cpds{$temp[0]} = 1;
}
close(BIOMASS);

my %compound_id_to_name;
open(IN,"$FIG_Config::global/Models/compound_id_to_name.txt") or die("Failed to open compound_id_to_name.txt");
while($_ = <IN>){
    chomp($_);
    my($id,$name) = split("\t",$_);
    $compound_id_to_name{$id} = $name;
}
close(IN);

print STDERR "\tStage 1 - Complete\n" if($debug);

print STDERR "\tStage 2 - Run Analysis Tools (fueled by caffine)\n" if($debug);
#Total list of what compounds we reach.
my @list_generated = keys %input_cpds;
push @list_generated , keys %cofactor_cpds;
my %possible_problems;
my %good_compounds;

# Here we are running all the scenarios we have inputs for
#  to analyze what biomass we can reach. We are also identifying 
#  any Scenarios that are not being utilized.

my %current_inputs;
foreach (keys %input_cpds)
{
    $current_inputs{$_} = 1;
}
my %current_cofactors;
foreach (keys %cofactor_cpds)
{
    $current_cofactors{$_} = 1;
}

while(scalar(keys %current_inputs))
{
    my ($temp_in,$temp_co,$break) = generate_new_inputs(\%current_inputs,\%current_cofactors);

    map {$current_inputs{$_} = 1;} keys %$temp_in;
    map {$current_cofactors{$_} = 1;} keys %$temp_co;

    push @list_generated, keys %$temp_in;
    push @list_generated, keys %$temp_co;

    print STDERR "Rechecking with inputs\n" if($debug);
    if($break)
    {
	last;
    }
}
print STDERR "Finished checking scenarios\n" if($debug);
foreach my $found (@list_generated)
{
    if(defined $biomass_cpds{$found})
    {
	$biomass_cpds{$found} = 2;
    }
}

# Analyze input compounds
# and also identify dead-end Scenarios
my %dead_end_scenarios;

foreach my $scenario (@scenarios)
{
    my $count = 0;
    my %used_inputs;
    foreach my $substrate (@{$scenario->get_substrates_all()})
    {
	if($input_cpds{$substrate} == 1)
	{
	    $used_inputs{$substrate} = 1;
	    $count++;
	}
    }

    if($count !=0 && $count != scalar(@{$scenario->get_substrates_all()}))
    {
       	map {$possible_problems{$_} = 1;} keys %used_inputs;
    }
    else
    {
	map {$good_compounds{$_} = 0;} keys %used_inputs;
    }

    #dead-end analysis
    my $bad_end = 1;
    foreach my $product (@{$scenario->get_products_all()})
    {
	if(defined $biomass_cpds{$product} || $all_substrates{$product} == 1)
	{
	    $bad_end = 0;
	    last;
	}
    }
    #Iff we reach this point, then this scenario produces no biomass
    # or compounds used by another scenario. It's all waste, could be a error.
    if($bad_end)
    {
	$dead_end_scenarios{$scenario->get_scenario_name()} = 1;
    }
}

open(RESULTS,">".$filebase."info_biomass.txt");
open(OUT, ">".$filebase."biomass_valid.txt");
foreach (sort keys %biomass_cpds)
{
    if($biomass_cpds{$_} != 2)
    {
	print RESULTS "Biomass $_ $compound_id_to_name{$_} failed to be generated using inputs.txt\n";
    }
    else
    {
	print OUT "$_\t$compound_id_to_name{$_}\n";
    }
}
close(RESULTS);
close(OUT);

open(OUT,">$filebase\scenarios_valid.txt");
open(SCEN_RESULTS,">".$filebase."info_scenarios.txt");
foreach my $scen_id (sort keys %scenarios_used)
{
    if($scenarios_used{$scen_id} == 0)
    {
	print SCEN_RESULTS "Scenario $scen_id unable to run with current inputs\n";
    }
    else
    {
	print OUT "$scen_id\n";
    }
}
print SCEN_RESULTS "\n\n#################\n\n\n";
foreach (sort keys %dead_end_scenarios)
{
    print SCEN_RESULTS "Scenario $_ is possibly a dead end(no outputs used by other scenarios or biomass.)\n";
}
close(SCEN_RESULTS);
close(OUT);

#open(RESULTS_INPUT,">".$filebase."info_inputs.txt");
#foreach (sort keys %possible_problems)
#{
#    if($possible_problems{$_} == 1 && !defined $good_compounds{$_})
#    {
#	print RESULTS_INPUT "Input $_ may be a false-positive. Perhaps a Scenario should generate this.\n";
#    }
#}
#close(RESULTS_INPUT);

print STDERR "\tStage 2 - Complete! Results in info_inputs.txt, info_biomass.txt and info_scenarios.txt\n" if($debug);

sub generate_new_inputs
{
    my ($inputs,$cofactors) = @_;
    my %new_inputs;
    my %new_cofactors;
    my $break = 1;
    foreach my $scenario (@scenarios)
    {
	if($scenarios_id{$scenario->get_id()} == 1)
	{
	    next;
	}
	my $count = 0;
	foreach my $substrate (@{$scenario->get_substrates_all()})
	{
	    if($inputs->{$substrate})
	    {
		$count++;
	    }
	    elsif($cofactors->{$substrate})
	    {
		$count++;
	    }
	}

	if($count != scalar(@{$scenario->get_substrates_all()}))
	{
	    next;
	}
	else
	{
	    print STDERR "Scenario ".$scenario->get_scenario_name()." ran.\n" if($debug);
	    $break = 0;   
	    $scenarios_id{$scenario->get_id()} = 1;
	    $scenarios_used{$scenario->get_scenario_name()} = 1;
	    foreach my $product (@{$scenario->get_products()})
	    {
		$new_inputs{$product} = 1;
	    }
	    foreach my $product (@{$scenario->get_product_cofactors()})
	    {
		$new_cofactors{$product} = 1;
	    }
	}
    }
    return (\%new_inputs,\%new_cofactors,$break);
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3