[Bio] / FigKernelScripts / make_MFA_tool_input.pl Repository:
ViewVC logotype

View of /FigKernelScripts/make_MFA_tool_input.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Wed Nov 12 18:29:31 2008 UTC (11 years ago) by dejongh
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2009_07_09, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, HEAD
Changes since 1.1: +1 -1 lines
switch to scenario_directory

# _*_ Perl _*_
#
#
#
#use strict;
use FIGV;
use Scenario;

my ($genome_id,$orgdir) = @ARGV;

my $debug = 0;

unless (scalar (@ARGV) >= 1)
{
    print STDERR "usage: make_MFA_tool_input <genome_id>\n";
    exit(-1);
}
my $fig;
if($orgdir)
{
    $fig = new FIGV($orgdir);
}
else
{
    $fig = new FIGV;
}
chomp $genome_id;
Scenario::set_fig($fig,$genome_id);
my @scenarios = @{Scenario->get_genome_scenarios($genome_id,1)};
my $filebase = $fig->scenario_directory($genome_id)."/Analysis/";
print "$filebase\n";

my %compound_id_to_name;
open(IN,"$FIG_Config::global/Models/compound_id_to_name.txt") or die("Failed to open compound_id_to_name.txt");
while($_ = <IN>){
    chomp($_);
    my($id,$name) = split("\t",$_);
    $compound_id_to_name{$id} = $name;
}
close(IN);

my %biomass_cpds;
open(BIOMASS,$filebase."biomass_valid.txt")
    or die ("Failed to open ".$filebase."biomass_valid.txt\n");
while(<BIOMASS>)
{
    if($_ =~/^KEGG/){next}
    chomp;
    my @temp = split "\t", $_;
    chomp @temp;
    $biomass_cpds{$temp[0]} = 1;
}
close(BIOMASS);

my %input_cpds;
open(INPUTS,$filebase."inputs.txt")
    or die ("Failed to open ".$filebase."inputs.txt. Run find_model_information $genome_id inputs");
while(<INPUTS>)
{
    chomp;
    my @line = split "\t", $_;
    chomp @line;
    #keep biomass cpds out of the modelinputs
    if($biomass_cpds{$line[0]}){next;}
    $input_cpds{$line[0]} = 1;
}
close(INPUTS);
#we are going to load up the cofactor compounds into inputs as well
open(COFACT,$filebase."secondary_compounds.txt")
    or die("Failed to open ".$filebase."secondary_compounds.txt");
while(<COFACT>)
{
    chomp;
    my @line = split "\t",$_;
    chomp @line;
    $input_cpds{$line[0]} = 1;
}
close(COFACT);

open(IN,$filebase."scenarios_valid.txt");
my @lines = <IN>;
chomp @lines;
close(IN);
my %valid_scenarios;
foreach (@lines)
{
    $valid_scenarios{$_} = 1;
}

my %output_cpds;
open(OUTS,$filebase."outputs.txt");
while(<OUTS>)
{
    chomp;
    my @temp = split "\t", $_;
    chomp @temp;
    $output_cpds{$temp[0]} = 1;
}
close(OUTS);

#load core_metabolites, scenario reaction information
my %core_metabolites;
my %reaction_to_substrates;
my %reaction_to_products;
my %reaction_reversible;
my %reaction_notes;

foreach my $scenario (@scenarios)
{
    if(!defined $valid_scenarios{$scenario->get_scenario_name})
    {
	next;
    }
    
    $scenario->load_reaction_pegs($fig);
    my $subsystem = $scenario->get_subsystem_name();
    
    foreach my $reaction (@{$scenario->get_reaction_ids()})
    {
	$reaction_to_substrates{$reaction} = {};
	$reaction_to_products{$reaction} = {};
	
	my %substrates = %{$scenario->get_reaction_substrates($reaction)};
	my %products = %{$scenario->get_reaction_products($reaction)};

	$reaction_reversible{$reaction} = $scenario->get_reaction_reversibility($reaction);
	map {$core_metabolites{$_} = 1;
	     $reaction_to_substrates{$reaction}->{$_} = $substrates{$_}; 
	 } keys %substrates;
	map {$core_metabolites{$_} = 1;
	     $reaction_to_products{$reaction}->{$_} = $products{$_}; 
	 } keys %products;
    

	if(!defined $reaction_notes{$reaction})
	{
	    $reaction_notes{$reaction} = {};
	}
	foreach ( @{$scenario->get_reaction_pegs($reaction) })
	{
	    $reaction_notes{$reaction}->{"SEED_PEG:$_"} = 1;
	    my $roles = $fig->protein_subsystem_to_roles($_, $subsystem);
	    foreach my $role (@$roles)
	    {
		$reaction_notes{$reaction}->{"Functional Role:$role"} = 1;
	    }
	}
	$reaction_notes{$reaction}->{"Subsystem/Scenario:".$scenario->get_scenario_name()} = 1;
    }
}

#the ATP Synthase hack reaction
$core_metabolites{"C00009"} = 1;
$core_metabolites{"C00001"} = 1;
$core_metabolites{"C00008"} = 1;
$core_metabolites{"C00002"} = 1;

$reaction_reversible{"ATPSYN"} = 0;
$reaction_to_substrates{"ATPSYN"} = {};
$reaction_to_products{"ATPSYN"} = {};
$reaction_to_substrates{"ATPSYN"}->{"C00008"} = 1;
$reaction_to_substrates{"ATPSYN"}->{"C00009"} = 1;
$reaction_to_products{"ATPSYN"}->{"C00001"} = 1;
$reaction_to_products{"ATPSYN"}->{"C00002"} = 1;

#end hack  

open(OUT,">".$filebase.$genome_id.".mfa_input");

print OUT "COMPOUNDS\n";
print OUT "DATABASE;NAMES;FORMULA;CHARGE\n";

my @all_cpds;

#write the compound list.
foreach my $id (sort keys %input_cpds)
{
    my $name = "not available";
    if($compound_id_to_name{$id}){$name = $compound_id_to_name{$id};}
	
    print OUT "$id;$id|$name;;\n"; #regular cellular
    push @all_cpds, $id;
}

foreach my $id (sort keys %output_cpds)
{
    if(!$input_cpds{$id})
    {
	my $name = "not available";
	if($compound_id_to_name{$id}){$name = $compound_id_to_name{$id};}
	print OUT "$id;$id|$name;;\n"; #regular cellular
	push @all_cpds, $id;
    }
}

foreach my $id (sort keys %biomass_cpds)
{
    if(!$input_cpds{$id} && !$output_cpds{$id})
    {
	my $name = "not available";
	if($compound_id_to_name{$id}){$name = $compound_id_to_name{$id};}
	print OUT "$id;$id|$name;;\n"; #regular cellular
	push @all_cpds, $id;
    }
}

foreach my $id (sort keys %core_metabolites)
{
    if(!$input_cpds{$id} && !$output_cpds{$id} && !$biomass_cpds{$id})
    {
	my $name = "not available";
	if($compound_id_to_name{$id}){$name = $compound_id_to_name{$id};}
	print OUT "$id;$id|$name;;\n"; #regular cellular
	push @all_cpds, $id;
    }
}

#special compounds
print OUT "bioout;bioout;;\n";
push @all_cpds, "bioout";

print OUT "REACTIONS\n";
print OUT "DATABASE;NAMES;EQUATION;PATHWAY;ENZYME;BGNUMBER\n";

my @all_rids;

#scenario reactions
foreach my $rid (sort keys %reaction_to_substrates)
{
    if($rid =~/R04241/){next;}
    my @rx_string_array;
    my @substrate_array;
    foreach (sort keys %{$reaction_to_substrates{$rid}})
    {
	if(!defined $reaction_to_products{$rid}->{$_})
	{
	    my $term = "(".$reaction_to_substrates{$rid}->{$_}.")"." $_";
	    push(@substrate_array,$term);              
	}
    }

    my $substrate_terms = join(" + ",@substrate_array);
    push(@rx_string_array,$substrate_terms);              

    if($reaction_reversible{$rid}){
	push(@rx_string_array,"<-->");
    }
    else{
	push(@rx_string_array,"-->");
    }

    my @product_array;
    foreach (sort keys %{$reaction_to_products{$rid}})
    {
	if(!defined $reaction_to_substrates{$rid}->{$_})
	{
	    my $term = "(".$reaction_to_products{$rid}->{$_}.")"." $_";
	    push(@product_array,$term);              
	}
    }
    
    my $product_terms = join(" + ",@product_array);
    push(@rx_string_array,$product_terms);              
    
    my $rx_string = join(" ",@rx_string_array);
    my $subsystem;
    my $role;
    my $peg;
    my $notes = $reaction_notes{$rid};

    if(defined $notes)
    {
	foreach my $note (keys %$notes)
	{
	    if($note =~/SEED_PEG:(.*)/){$peg = $1;}
	}
	foreach my $note (keys %$notes)
	{
	    if($note =~/Functional Role:(.*)/){$role = $1; $role =~s/;/OR/;} 
	}

	foreach my $note (keys %$notes)
	{
	    if($note =~/Subsystem\/Scenario:(.*)/){$subsystem = $1;} 
	}
    }

    print OUT "$rid;$rid;$rx_string;$subsystem;$role;$peg\n";

    push @all_rids, $rid;
}

#add in gene evidence
my %in;
my %out;
open(IN,$filebase."transporters_spreadsheet.txt");
while($_ =<IN>){
    chomp($_);
    if($_ =~/^IN\t(C\d+)\t(.*)\t(.*)\tTC/){
	$in{$1} = $3;
    }
    elsif($_ =~/^OUT\t(C\d+)\t(.*)\t(.*)\tTC/){
	$out{$1} = $3;
    }
}
close(IN);

#transport reactions
foreach my $cid (keys %input_cpds)
{
    my $trans_id = "T".substr($cid,1);
    my $genes = $in{$cid};
    print "genes:$genes\n";
    print OUT "$trans_id;$trans_id;$cid\[e] --> $cid;;;$genes\n";
    
    #lets enable reverse transports to fix some sink rxn issues
    print OUT "$trans_id\_r;$trans_id\_r; $cid --> $cid\[e];;;\n";
}

foreach my $cid (keys %in)
{
    if(! $input_cpds{$cid}){
	my $trans_id = "T".substr($cid,1);
	my $genes = $in{$cid};
	print OUT "$trans_id;$trans_id;$cid\[e] --> $cid;;;$genes\n";
    }
}

foreach my $cid (keys %output_cpds)
{
    if(!$input_cpds{$cid})
    {
	my $trans_id = "O".substr($cid,1);
	my $genes = $out{$cid};
	print OUT "$trans_id;$trans_id; $cid --> $cid\[e];;;$genes\n";
    }
}

#biomass reaction
my %biomass_stoich;
open(IN,"$FIG_Config::global/Models/ec_biomass.txt");
while($_ = <IN>){
    chomp($_);
    my($id,$name,$stoich) = split("\t",$_);
    $stoich = $stoich * (.000001);
    $biomass_stoich{$id} = $stoich;
}
close(IN);

my @biomass_rx_array;
my $biomass_rx_string;

my @biomass_substrates; 
foreach my $cid (keys %biomass_cpds)
{
    my $stoich = $biomass_stoich{$cid};
    my $term = "(".$stoich.")"." $cid";
    push(@biomass_substrates,$term);
}

my $biomass_substrate_string = join(" + ",@biomass_substrates);
push(@biomass_rx_array,$biomass_substrate_string);
push(@biomass_rx_array,"-->");
push(@biomass_rx_array,"bioout");

$biomass_rx_string = join(" ",@biomass_rx_array);

print OUT "BIOMAS;BIOMAS;$biomass_rx_string;;;\n";

#sink reactions for all non-biomass, input, output compounds
#not sure if this should be included?

foreach my $cid (@all_cpds)
{
    if($cid =~/E\d{5}/ || $cid eq "bioout")
    {
	next;
    } 

    my $temp  = substr($cid,1);

    if(!$input_cpds{$cid} && !$output_cpds{$cid} && !$biomass_cpds{$cid})
    {
	print OUT "S$temp;S$temp; $cid --> $cid\[e];;;\n";
    }
}

close(OUT);

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3