[Bio] / DeJonghStuff / map_model_to_subsystems.pl Repository:
ViewVC logotype

View of /DeJonghStuff/map_model_to_subsystems.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Wed Nov 16 22:01:06 2005 UTC (14 years, 1 month ago) by dejongh
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +34 -5 lines
Matching more reactions based on compounds; searching org-specific data in KEGG

# -*- perl -*-

###########################################
use strict;

use FIG;
use Subsystem;

my $fig = new FIG;

my ($genome_id, $kegg_org_abbrev) = @ARGV;

# map from model's gene-reaction associations to KEGG reaction ids, and check against
# corresponding functional roles and reactions for genes in subsystems

open(MODEL_GENE_REACTIONS, "<$FIG_Config::global/Models/$genome_id/gene_to_reaction") or die("Run map_model_to_kegg first");
open(MODEL_GENE_PEGS, "<$FIG_Config::global/Models/$genome_id/gene_to_peg") or die("Run parse_model first");
open(REACTIONS, "<$FIG_Config::global/Models/$genome_id/reaction_to_kegg_curated");
open(ALL_PEGS, ">$FIG_Config::global/Models/$genome_id/pegs_to_subsystem");

print ALL_PEGS "peg\tgene_list\tsubsystem\trole\tclassification0\tclassification1\treactions_for_role\tgene_reactions\tkgml_reactions\tmatched_rids2\n";

my (%reaction_abbrev_2_ids);

while (my $line = <REACTIONS>)
{
    chomp($line);
    my ($rabbrev, $rid_list) = split "\t", $line;
    my @rids = split " ", $rid_list;
    $reaction_abbrev_2_ids{$rabbrev} = \@rids;
}

my (%gene_2_reactions);

while (my $line = <MODEL_GENE_REACTIONS>)
{
    chomp($line);
    my ($gene, $rabbrev_list) = split "\t", $line;
    my @rabbrevs = split " ", $rabbrev_list;
    foreach my $rabbrev (@rabbrevs)
    {
	if (defined $reaction_abbrev_2_ids{$rabbrev})
	{
	    my @rids = @{$reaction_abbrev_2_ids{$rabbrev}};
	    push @{$gene_2_reactions{$gene}}, @rids;
	}
    }
}

my %peg_2_genes;

while (my $line = <MODEL_GENE_PEGS>)
{
    chomp($line);
    my ($gene, $peg_list) = split "\t", $line;

    foreach my $peg (split " ", $peg_list)
    {
	push @{$peg_2_genes{$peg}}, $gene;
    }
}

foreach my $peg (keys %peg_2_genes)
{
    my @gene_list = @{$peg_2_genes{$peg}};
    my @gene_reactions = ();
    my %kgml_reactions = ();

    foreach my $gene (@gene_list)
    {
	if (defined $gene_2_reactions{$gene})
	{
	    push @gene_reactions, @{$gene_2_reactions{$gene}};
	}

	my @tmp = `find $FIG_Config::data/KGML/$kegg_org_abbrev -name "*.xml" -exec grep $gene {} \\;`;
	
	foreach my $line (@tmp)
	{
	    if ($line =~ /reaction="(.*)"/)
	    {
		my $reactions = $1;

		while ($reactions =~ /rn:(R\d+)/g)
		{
		    $kgml_reactions{$1} = 1;
		}
	    }
	}
    }

    my $found_ss = 0;

    foreach my $ss_info ($fig->subsystems_for_peg($peg))
    {
	my ($subsystem,$role) = @$ss_info;
	my @classification = @{$fig->subsystem_classification($subsystem)};
	my $ss = new Subsystem($subsystem, $fig, 0);
	if (! defined($ss)) { next; }
	$found_ss = 1;
	my $ss_reactions = $ss->get_reactions();
	my $reactions_for_role = $ss_reactions->{$role};
	my %temp = ();
	my %intersection1 = ();
	my %intersection2 = ();

	foreach my $rid (@$reactions_for_role) { $temp{$rid} = 1 };

	foreach my $rid (@gene_reactions)
	{
	    if ($temp{$rid})
	    {
		$intersection1{$rid} = 1;
	    }
	}

	my @matched_rids1 = keys %intersection1;	    

	foreach my $rid (keys %kgml_reactions)
	{
	    if ($intersection1{$rid})
	    {
		$intersection2{$rid} = 1;
	    }
	}

	my @matched_rids2 = keys %intersection2;	    

	print ALL_PEGS "$peg\t@gene_list\t$subsystem\t$role\t$classification[0]\t$classification[1]\t@$reactions_for_role\t@gene_reactions\t@{[ keys %kgml_reactions ]}\t@matched_rids2\n";
    }

    if (! $found_ss)
    {
	my $func = $fig->function_of($peg);
	print ALL_PEGS "$peg\t@gene_list\t\t$func\t\t\t\t@gene_reactions\t\t\n";
    }
}
close(ALL_PEGS);


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3