[Bio] / DeJonghStuff / parse_model.pl Repository:
ViewVC logotype

View of /DeJonghStuff/parse_model.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1.1.1 - (download) (as text) (annotate) (vendor branch)
Wed Oct 26 18:48:35 2005 UTC (14 years, 1 month ago) by dejongh
Branch: foo
CVS Tags: bar
Changes since 1.1: +0 -0 lines
Import of scripts to parse and load Palsson models into the SEED.  Also 
including models for Staph Aureus and E. Coli (minus gene-reaction associatons).

# -*- perl -*-

###########################################
# usage:  parse_model <genome_id> <model_id>
# models for a given genome should be stored in Data/Global/Models/<genome_id>/
# models should be prepended with the model id (e.g., iSB615).  Three files are expected:
# ${model_id}-metabolites.txt:  contains two-column, tab-separated file with compound ids
#     in the first column and compound names in the second column
# ${model_id}-reactions.txt:  contains three-column, tab-separated file with reaction ids
#     in the first column, reaction names in the second column, and reaction equations in 
#     the third column
# ${model_id}-gene-reaction-assoc.txt:  contains two-column, tab-separated file with 
#     reaction ids in the first column and list of gene ids in the second column

use strict;
use FIG;

my ($genome_id, $model_id) = @ARGV;
my $fig = new FIG;

open(MODEL_METABOLITES, "<$FIG_Config::global/Models/$genome_id/${model_id}-metabolites.txt")
    or die("Couldn't open $genome_id/${model_id}-metabolites.txt");
open(MODEL_REACTIONS, "<$FIG_Config::global/Models/$genome_id/${model_id}-reactions.txt") 
    or die("Couldn't open $genome_id/${model_id}-reactions.txt");
open(MODEL_GRA, "<$FIG_Config::global/Models/$genome_id/${model_id}-gene-reaction-assoc.txt")
    or die("Couldn't open $genome_id/${model_id}-gene-reaction-assoc.txt");
open(COMPOUND, ">$FIG_Config::global/Models/$genome_id/compound");
open(COMPOUND_NAME, ">$FIG_Config::global/Models/$genome_id/compound_name");
open(REACTION_TO_ROLE, ">$FIG_Config::global/Models/$genome_id/reaction_to_role");
open(REACTION, ">$FIG_Config::global/Models/$genome_id/reaction");
open(REACTION_TO_COMPOUND, ">$FIG_Config::global/Models/$genome_id/reaction_to_compound");
open(GENE_TO_ROLE, ">$FIG_Config::global/Models/$genome_id/gene_to_role");
open(GENE_TO_REACTION, ">$FIG_Config::global/Models/$genome_id/gene_to_reaction");
open(GENE_TO_PEG, ">$FIG_Config::global/Models/$genome_id/gene_to_peg");

my (%compound_abbrev_2_name, %reaction_abbrev_2_role);

while (my $metabolite = <MODEL_METABOLITES>)
{
    chomp($metabolite);
    if (my ($abbrev, $name) = split /\t/, $metabolite) 
    {
	$abbrev =~ s/\"//g;
	$name =~ s/\"//g;
	$name =~ s/'/\\'/g;

	if (! defined($compound_abbrev_2_name{$abbrev}))
	{
	    $compound_abbrev_2_name{$abbrev} = $name;
	    print COMPOUND "$abbrev\n";
	    print COMPOUND_NAME "$abbrev\t1\t$compound_abbrev_2_name{$abbrev}\n";
	}
	else
	{
	    print "Warning, found multiple names for $abbrev\n";
	}
    }
    else
    {
	print "Can't parse metabolite: $metabolite\n";
    }
}

close(COMPOUND);
close(COMPOUND_NAME);

while (my $reaction = <MODEL_REACTIONS>)
{
    chomp($reaction);

    if (my ($rabbrev, $rname, $equation) = split /\t/, $reaction)
    {
	$rabbrev =~ s/\"//g;
	$rname =~ s/\"//g;
	$equation =~ s/\"//g;

	if (! defined($reaction_abbrev_2_role{$rabbrev}))
	{
	    $reaction_abbrev_2_role{$rabbrev} = $rname;
	    print REACTION_TO_ROLE "$rabbrev\t$rname\n";
	}
	else
	{
	    print "Warning, found multiple names for $rabbrev\n";	
	}

	my ($loc, $substrate_list, $direction, $product_list);

	if ($equation =~ /^\[(.)\]( : )?(.*)(-->|<==>)(.*)/)
	{
	    $loc = $1;
	    ($substrate_list, $direction, $product_list) = ($3, $4, $5);
	}
	elsif ($equation =~ /(.*)(-->|<==>)(.*)/)
	{
	    $loc = "";
	    ($substrate_list, $direction, $product_list) = ($1, $2, $3);
	}
	else
	{
	    print "Warning, unparseable equation: $equation\n";
	    next;
	}

	if ($direction eq "-->")
	{
	    print REACTION "$rabbrev\t0\n";
	}
	else
	{
	    print REACTION "$rabbrev\t1\n";
	}

	$substrate_list =~ s/\s//g;
	$product_list =~ s/\s//g;

	for (my $setn = 1; $setn <= 2; $setn++)
	{
	    my $compound_list = ($setn == 1 ? $substrate_list : $product_list);

	    foreach my $cabbrev_info (split /\+/, $compound_list) 
	    { 
		my ($cid, $stoich, $cloc);

		if ($cabbrev_info =~ /(.+)\[(.)\]$/)
		{
		    $cabbrev_info = $1;
		    $cloc = $2;
		}
		else
		{
		    $cloc = $loc;
		}

		if ($cabbrev_info =~ /\((\d.*)\)(.+)/)
		{
		    $stoich = $1;
		    $cid = $2;
		}
		else
		{
		    $stoich = "1";
		    $cid = $cabbrev_info;
		}

		if (defined($compound_abbrev_2_name{$cid}))
		{
		    print REACTION_TO_COMPOUND "$rabbrev\t$cid\t$setn\t$stoich\t\t$cloc\n";
		}
		else
		{
		    print "Warning, undefined compound $cid in reaction $equation\n";
		}
	    }
	}
    }
    else
    {
	print "Warning, unparseable reaction: $reaction\n";
    }
}

close(REACTION);
close(REACTION_TO_ROLE);
close(REACTION_TO_COMPOUND);

# extra - use hash for tracking gene/peg correspondences since genes occur in multiple reactions
my %gene_2_peg;

while (<MODEL_GRA>)
{
    my ($rabbrev, $genes) = split "\t", $_;
    my @rids;

    if (defined $reaction_abbrev_2_role{$rabbrev})
    {
	while ($genes =~ /(SA\d\d\d\d)/g)
	{
	    my $gene = $1;

	    print GENE_TO_REACTION "$gene\t$rabbrev\n";
	    my $rname = $reaction_abbrev_2_role{$rabbrev};
	    print GENE_TO_ROLE "$gene\t$rname\n"; # this table is redundant

	    # extra - look for pegs based on gene
	    if (! defined($gene_2_peg{$gene}))
	    {
		my $peg_count = 0;
		my ($peg_index_data,undef) = $fig->search_index($gene);

		foreach my $peg (map { $_->[0] } @$peg_index_data)
		{
		    if ($peg =~ /$genome_id/ )
		    {
			$peg_count++;
			print GENE_TO_PEG "$gene\t$peg\t$peg_count\n";
		    }
		}

		if ($peg_count == 0)
		{
		    print "Warning, no pegs found for gene $gene\n";
		}

		$gene_2_peg{$gene} = 1;
	    }
	}
    }
    else
    {
	print "Warning, unknown reaction abbreviation in GRA file: $rabbrev\n";
    }
}

close(GENE_TO_REACTION);
close(GENE_TO_ROLE);
close(GENE_TO_PEG);

exit;


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3