[Bio] / DeJonghStuff / load_models.pl Repository:
ViewVC logotype

View of /DeJonghStuff/load_models.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1.1.1 - (download) (as text) (annotate) (vendor branch)
Wed Oct 26 18:48:35 2005 UTC (14 years ago) by dejongh
Branch: foo, MAIN
CVS Tags: bar, HEAD
Changes since 1.1: +0 -0 lines
Import of scripts to parse and load Palsson models into the SEED.  Also 
including models for Staph Aureus and E. Coli (minus gene-reaction associatons).

# -*- perl -*-

###########################################
# loads models produced by parse_model.pl into table files that can be loaded into the database

use strict;
use FIG;

my $fig = new FIG;
my %compound_names;
my @compound_name_files = `find $FIG_Config::global/Models -name "compound_name" -print`;

foreach my $file (@compound_name_files)
{
    open(TMPIN, $file) or die("Couldn't open $file\n");
    print "Opened $file\n";

    while(<TMPIN>)
    {
	my ($cabbrev, $num, $cname) = split "\t", $_;
	chomp $cname;

	if (! exists($compound_names{$cabbrev}))
	{
	    $compound_names{$cabbrev} = $cname;
	}
	elsif ($compound_names{$cabbrev} ne $cname)
	{
	    print "Warning, multiple names for $cabbrev:\n\t$compound_names{$cabbrev}\n\t$cname\n";
	}
    }

    close(TMPIN);
}

open(CNAMES, ">$FIG_Config::temp/compound_name.table");
open(CIDS, ">$FIG_Config::temp/compound.table");

foreach my $cabbrev (keys %compound_names)
{
    print CIDS "$cabbrev\n";
    print CNAMES "$cabbrev\t1\t$compound_names{$cabbrev}\n";
}

close(CNAMES);
close(CIDS);

my %reaction_roles;
my @reaction_role_files = `find $FIG_Config::global/Models -name "reaction_to_role" -print`;

foreach my $file (@reaction_role_files)
{
    open(TMPIN, $file) or die("Couldn't open $file\n");
    print "Opened $file\n";

    while(<TMPIN>)
    {
	my ($abbrev, $name) = split "\t", $_;
	chomp $name;

	if (! exists($reaction_roles{$abbrev}))
	{
	    $reaction_roles{$abbrev} = $name;
	}
	elsif ($reaction_roles{$abbrev} ne $name)
	{
	    print "Warning, multiple names for $abbrev:\n\t$reaction_roles{$abbrev}\n\t$name\n";
	}
    }

    close(TMPIN);
}

open(ROLES, ">$FIG_Config::temp/reaction_to_role.table");

foreach my $abbrev (keys %reaction_roles)
{
    print ROLES "$abbrev\t$reaction_roles{$abbrev}\n";
}

close(ROLES);

my %reactions;
my @reaction_files = `find $FIG_Config::global/Models -name "reaction" -print`;

foreach my $file (@reaction_files)
{
    open(TMPIN, $file) or die("Couldn't open $file\n");
    print "Opened $file\n";

    while(<TMPIN>)
    {
	my ($abbrev, $rev) = split "\t", $_;
	chomp $rev;

	if (! exists($reactions{$abbrev}))
	{
	    $reactions{$abbrev} = $rev;
	}
	elsif ($reactions{$abbrev} ne $rev)
	{
	    print "Warning, multiple reversabilities for $abbrev:\n\t$reactions{$abbrev}\n\t$rev\n";
	}
    }

    close(TMPIN);
}

open(REAC, ">$FIG_Config::temp/reaction.table");

foreach my $abbrev (keys %reactions)
{
    print REAC "$abbrev\t$reactions{$abbrev}\n";
}

close(REAC);

my %reaction_compounds;
my @reaction_files = `find $FIG_Config::global/Models -name "reaction_to_compound" -print`;

foreach my $file (@reaction_files)
{
    open(TMPIN, $file) or die("Couldn't open $file\n");
    print "Opened $file\n";

    my %new_reactions; # keep track of which reactions are first encountered in the current file

    while(my $reaction_compound = <TMPIN>)
    {
	chomp $reaction_compound;
	$reaction_compound =~ /([^\t]+)\t(.+)/;
	my ($abbrev, $cinfo) = ($1, $2);

	if (! exists($reaction_compounds{$abbrev}) || exists($new_reactions{$abbrev}))
	{
	    # load the new reaction
	    $reaction_compounds{$abbrev}{$cinfo} = 1;
	    $new_reactions{$abbrev} = 1;
	}
	elsif (! exists $reaction_compounds{$abbrev}{$cinfo})
	{
	    # current version of reaction has more compouds than version already loaded
	    print "Warning, new compound found for reaction $abbrev: $cinfo\n";
	}
    }

    close(TMPIN);
}

# redo this in reverse to check for extra compounds in reactions that weren't caught by the first loop
undef %reaction_compounds;

foreach my $file (reverse @reaction_files)
{
    open(TMPIN, $file) or die("Couldn't open $file\n");
    print "Opened $file\n";

    my %new_reactions; # keep track of which reactions are first encountered in the current file

    while(my $reaction_compound = <TMPIN>)
    {
	chomp $reaction_compound;
	$reaction_compound =~ /([^\t]+)\t(.+)/;
	my ($abbrev, $cinfo) = ($1, $2);

	if (! exists($reaction_compounds{$abbrev}) || exists($new_reactions{$abbrev}))
	{
	    # load the new reaction
	    $reaction_compounds{$abbrev}{$cinfo} = 1;
	    $new_reactions{$abbrev} = 1;
	}
	elsif (! exists $reaction_compounds{$abbrev}{$cinfo})
	{
	    print "Warning, new compound found for reaction $abbrev: $cinfo\n";
	}
    }

    close(TMPIN);
}

open(R2C, ">$FIG_Config::temp/reaction_to_compound.table");

foreach my $abbrev (keys %reaction_compounds)
{
    my $inner = %reaction_compounds->{$abbrev};

    foreach my $cinfo (keys %$inner)
    {
	print R2C "$abbrev\t$cinfo\n";
    }
}

close(R2C);

# no need to process the gene_to_role or gene_to_reaction or gene_to_peg files any further,
# since genes are unique to each organism
my @gene_to_role_files = `find $FIG_Config::global/Models -name "gene_to_role" -print`;
my @gene_to_reaction_files = `find $FIG_Config::global/Models -name "gene_to_reaction" -print`;
my @gene_to_peg_files = `find $FIG_Config::global/Models -name "gene_to_peg" -print`;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3