[Bio] / FigKernelScripts / make_model_spreadsheets.pl Repository:
ViewVC logotype

View of /FigKernelScripts/make_model_spreadsheets.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Wed Nov 12 18:29:31 2008 UTC (11 years, 4 months ago) by dejongh
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2009_07_09, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, HEAD
Changes since 1.1: +1 -1 lines
switch to scenario_directory

# _*_ Perl _*_
#
#
#
use strict;
use FIGV;
use Scenario;

my ($genome_id,$orgdir) = @ARGV;

my $debug = 0;


unless (scalar (@ARGV) >= 1)
{
    print STDERR "usage: make_model_spreadsheets <genome_id>\n";
    exit(-1);
}
chomp $genome_id;
my $fig;
if($orgdir)
{
    $fig = new FIGV($orgdir);
}
else
{
    $fig = new FIGV;
}
Scenario::set_fig($fig,$genome_id);
print STDERR "Creating model spreadsheets forgenome $genome_id\n" if($debug);
print STDERR "\tStage 1 - Loading Scenarios, model inputs and biomass\n" if($debug);
my @scenarios = @{Scenario->get_genome_scenarios($genome_id,1)};
my $filebase = $fig->scenario_directory($genome_id)."/Analysis/";

#library of transported compounds in other models
open(LIB,$FIG_Config::global."/Models/transports_in.txt.original");
my %lib_input_cpds;
while(<LIB>)
{
    chomp;
    my @line = split "\t", $_;
    chomp @line;
    $lib_input_cpds{$line[0]} = 1;
}
close(LIB);

my %evidence_of_transport_peg;
my %evidence_of_transport_annotation;
open(EVIDENCE,$FIG_Config::global."/Models/transport_evidence.txt");
#open(EVIDENCE,$FIG_Config::global."/Models/transport_evidence.txt");
while(<EVIDENCE>)
{
    chomp;
    my ($cid,$annotation) = split "\t", $_;
    if($evidence_of_transport_annotation{$cid}){
	my $ref = $evidence_of_transport_annotation{$cid};
	push(@$ref,$annotation);
    }
    else{
	$evidence_of_transport_annotation{$cid} = [$annotation];
    }
	
    my ($peg_index_data,undef) = $fig->search_index($annotation);
    foreach my $peg (map { $_->[0] } @$peg_index_data)
    {
	if($peg =~/$genome_id/){
	    if($evidence_of_transport_peg{$cid}){
		my $ref = $evidence_of_transport_peg{$cid};
		push(@$ref,$peg);
	    }
	    else{
		$evidence_of_transport_peg{$cid} = [$peg];
	    }
	}
    }
}

my %input_cpds;
open(INPUTS,$filebase."inputs.txt")
    or die ("Failed to open ".$filebase."inputs.txt. Run find_model_information $genome_id inputs");
while(<INPUTS>)
{
    chomp;
    my @line = split "\t", $_;
    chomp @line;
    $input_cpds{$line[0]} = 1;
}
close(INPUTS);
#we are going to load up the cofactor compounds into inputs as well
my %cofactor_cpds;
open(COFACT,$filebase."secondary_compounds.txt")
    or die("Failed to open ".$filebase."secondary_compounds.txt");
while(<COFACT>)
{
    chomp;
    my @line = split "\t",$_;
    chomp @line;
    $input_cpds{$line[0]} = 1;
    $cofactor_cpds{$line[0]} = 1;
}
close(COFACT);


my %compound_id_to_name;
open(IN,"$FIG_Config::global/Models/compound_id_to_name.txt") or die("Failed to open compound_id_to_name.txt");
while($_ = <IN>){
    chomp($_);
    my($id,$name) = split("\t",$_);
    $compound_id_to_name{$id} = $name;
}
close(IN);

my %biomass_cpds;
open(BIOMASS,$filebase."biomass_valid.txt")
    or die ("Failed to open ".$filebase."biomass_valid.txt\n");
while(<BIOMASS>)
{
    chomp;
    my @temp = split "\t", $_;
    chomp @temp;
    $biomass_cpds{$temp[0]} = 1;
}
close(BIOMASS);

open(IN,$filebase."scenarios_valid.txt");
my @lines = <IN>;
chomp @lines;
close(IN);
my %valid_scenarios;
foreach (@lines)
{
    $valid_scenarios{$_} = 1;
}

my %output_cpds;
open(OUTS,$filebase."outputs.txt");
while(<OUTS>)
{
    chomp;
    my @temp = split "\t", $_;
    chomp @temp;
    $output_cpds{$temp[0]} = 1;
}
close(OUTS);

#load core_metabolites, scenario reaction information
my %core_metabolites;
my %reaction_to_substrates;
my %reaction_to_products;
my %reaction_reversible;
my %reaction_notes;
foreach my $scenario (@scenarios)
{
    if(!defined $valid_scenarios{$scenario->get_scenario_name})
    {
	next;
    }
    $scenario->load_reaction_pegs($fig);
    my $subsystem = $scenario->get_subsystem_name();
    foreach my $reaction (@{$scenario->get_reaction_ids()})
    {
	$reaction_to_substrates{$reaction} = {};
	$reaction_to_products{$reaction} = {};
	
	my %substrates = %{$scenario->get_reaction_substrates($reaction)};
	my %products = %{$scenario->get_reaction_products($reaction)};

	$reaction_reversible{$reaction} = $scenario->get_reaction_reversibility($reaction);
	map {$core_metabolites{$_} = 1;
	     $reaction_to_substrates{$reaction}->{$_} = $substrates{$_}; 
	 } keys %substrates;
	map {$core_metabolites{$_} = 1;
	     $reaction_to_products{$reaction}->{$_} = $products{$_}; 
	 } keys %products;
	if(!defined $reaction_notes{$reaction})
	{
	    $reaction_notes{$reaction} = {};
	}
	foreach ( @{$scenario->get_reaction_pegs($reaction) })
	{
	    $reaction_notes{$reaction}->{"SEED_PEG:$_"} = 1;
	    my $roles = $fig->protein_subsystem_to_roles($_, $subsystem);
	    foreach my $role (@$roles)
	    {
		$reaction_notes{$reaction}->{"Functional Role:$role"} = 1;
	    }
	}
	$reaction_notes{$reaction}->{"Subsystem/Scenario:".$scenario->get_scenario_name()} = 1;

    }
}

#the ATP Synthase hack reaction
$core_metabolites{"C00009"} = 1;
$core_metabolites{"C00001"} = 1;
$core_metabolites{"C00008"} = 1;
$core_metabolites{"C00002"} = 1;
$reaction_reversible{"ATPSYN"} = 0;
$reaction_to_substrates{"ATPSYN"} = {};
$reaction_to_products{"ATPSYN"} = {};
$reaction_to_substrates{"ATPSYN"}->{"C00008"} = 1;
$reaction_to_substrates{"ATPSYN"}->{"C00009"} = 1;
$reaction_to_products{"ATPSYN"}->{"C00001"} = 1;
$reaction_to_products{"ATPSYN"}->{"C00002"} = 1;
$reaction_notes{"ATPSYN"} = {};
$reaction_notes{"ATPSYN"}->{"Manual Reaction:This fixes problems because we lack electron transport reactions."} = 1;
#end hack

print STDERR "\tStage 1 - Complete\n" if($debug);

print STDERR "\tStage 2 - Write Spreadsheets file\n" if($debug);
open(OUT,">".$filebase."compounds_spreadsheet.txt");
write_species_list();
close(OUT);

open(OUT,">".$filebase."reactions_spreadsheet.txt");
write_reactions_list();
close(OUT);

open(OUT,">".$filebase."transports_spreadsheet.txt");
write_transports_list();
close(OUT);

sub write_species_list
{
    print OUT "KEGG ID\tName\n";

    #input compounds
    foreach my $id (sort keys %input_cpds)
    {
	my $name = $compound_id_to_name{$id};
	$name =~ s/\\"//g;
        print OUT "$id\t$name\n";
    }
    #biomass compounds
    foreach my $id (sort keys(%biomass_cpds))
    {
	my $name = $compound_id_to_name{$id};
	$name =~ s/\\"//g;
	if(!$input_cpds{$id})
	{
	    print OUT "$id\t$name\n";
	}
    }
    #output compounds
    foreach my $id (sort keys(%output_cpds))
    {
	my $name = $compound_id_to_name{$id};
	$name =~ s/\\"//g;
	if(!$input_cpds{$id} && !$biomass_cpds{$id})
	{
            print OUT "$id\t$name\n";
        }
    }

    #core_metabolites are all the compounds used in any scenario.
    foreach my $id (sort keys(%core_metabolites))
    {
	my $name = $compound_id_to_name{$id};
	$name =~ s/\\"//g;
	if(!$input_cpds{$id} && !$biomass_cpds{$id} && !$output_cpds{$id})
	{
	    print OUT "$id\t$name\n";
	}
    }
}

sub write_reactions_list
{
    print OUT "KEGG RID\tGene\tRole\tSubsystem/Scenario(s)\n";
    
    #Now we write out the Scenario reactions
    foreach my $rid (keys %reaction_to_substrates)
    {
	make_reaction($rid,$reaction_reversible{$rid},$reaction_to_substrates{$rid},$reaction_to_products{$rid},
		      "0.000000",$reaction_notes{$rid});
    }
    
    my @biomass_list;
    open(IN,"$FIG_Config::global/Models/ec_biomass.txt");
    while($_ = <IN>){
	chomp($_);
	my($id,$name,$stoich) = split("\t",$_);
	$stoich = $stoich * (.000001);
	push(@biomass_list,"$stoich $id");
    }
    
    print OUT "\n";
    my $biomass_string = join(" + ",@biomass_list);
    print OUT "BIOMAS\t$biomass_string\n";
    close(IN);

}

sub write_transports_list
{
    print OUT "Direction\tCompound ID\tName\tClass\tGene(s)\tRole\n";

    foreach my $input (keys %input_cpds)
    {
	if(!$cofactor_cpds{$input}){
	    my $name = $compound_id_to_name{$input};
	    if($evidence_of_transport_peg{$input}){
		my $peg_ref = $evidence_of_transport_peg{$input};
		my $genes = join(",",@$peg_ref);
		my $annotation_ref = $evidence_of_transport_annotation{$input};
		my $annotations = join(",",@$annotation_ref);
		print OUT "IN\t$input\t$name\tevidence_of_input\t$genes\t$annotations\n";
	    }
	    elsif($lib_input_cpds{$input}){
		print OUT "IN\t$input\t$name\tcommon_primary_input\n";
	    }
	    else{
		print OUT "IN\t$input\t$name\tabstract_input\n";
	    }
	}
    }
    
    foreach my $input (keys %cofactor_cpds)
    {
	my $name = $compound_id_to_name{$input};
	if($evidence_of_transport_peg{$input}){
	    my $peg_ref = $evidence_of_transport_peg{$input};
	    my $genes = join(",",@$peg_ref);
	    my $annotation_ref = $evidence_of_transport_annotation{$input};
	    my $annotations = join(",",@$annotation_ref);
	    print OUT "IN\t$input\t$name\tevidence_of_input\t$genes\t$annotations\n";
	}
	
	else{    
	    print OUT "IN\t$input\t$name\tcommon_secondary_input\n";
	}
    }

    #Make sink/transport reactions for the output compounds
    foreach my $output (keys %output_cpds)
    {
	my $name = $compound_id_to_name{$output};
	print OUT "OUT\t$output\t$name\tcommon_output\n";
    }

    #make sinks/transport reactions for all cpds not already outputed or biomassed
    foreach my $id (keys %core_metabolites)
    {
	my $name = $compound_id_to_name{$id};
	if(!$output_cpds{$id} && !$biomass_cpds{$id})
	{
	    print OUT "OUT\t$id\t$name\tabstract_output\n";
	}
    }
}

sub make_reaction
{
    my($reaction_id,$reversible,$substrates,$products,$objective,$notes) = @_;
    

    if($reversible)
    {
	print OUT "$reaction_id\ttrue\t";
    }
    else
    {
	print OUT "$reaction_id\tfalse\t";
    }
    if(defined $notes)
    {
	my @temp_array = ();
        my $temp_string;  
	foreach my $note (keys %$notes)
	{
	    if($note =~/SEED_PEG:(.*)/){push(@temp_array,$1);}
	}
	$temp_string = join(",",@temp_array);
	print OUT "$temp_string\t";
	
	@temp_array = ();
	foreach my $note (keys %$notes)
	{
	    if($note =~/Functional Role:(.*)/){push(@temp_array,$1);} 
	}
	$temp_string = join(",",@temp_array);
	print OUT "$temp_string\t";
	
        @temp_array = ();
	foreach my $note (keys %$notes)
	{
	    if($note =~/Subsystem\/Scenario:(.*)/){push(@temp_array,$1);} 
	}
	$temp_string = join(",",@temp_array);
	print OUT "$temp_string";
	
    }
    print OUT "\n"
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3