[Bio] / FigKernelScripts / find_model_information.pl Repository:
ViewVC logotype

View of /FigKernelScripts/find_model_information.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (download) (as text) (annotate)
Wed Nov 12 18:29:31 2008 UTC (11 years ago) by dejongh
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2009_07_09, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, HEAD
Changes since 1.6: +2 -2 lines
switch to scenario_directory

## _*_ Perl _*_
#

use FIGV;
use Scenario;
use strict;

my ($genome_id, $type, $orgdir) = @ARGV;

my $debug = 0;

unless (scalar( @ARGV) >= 2)
{
    print STDERR "\nusage:  find_model_information <genome_id> <inputs|outputs|biomass>\n";
    exit(-1);
}
chomp $genome_id;

#check for proper input on variable $type
unless($type eq 'inputs' || $type eq 'outputs' || $type eq 'biomass')
{
    print STDERR "\nSecond parameter required in format: 'outputs','inputs' or 'biomass'\n";
	exit(-1);
}
my $fig;
if($orgdir)
{
    $fig = new FIGV($orgdir);
}
else
{
    $fig = new FIGV;
}

Scenario::set_fig($fig,$genome_id);
my @scenarios = @{Scenario->get_genome_scenarios($genome_id,0)};

my %compound_id_to_name;
open(IN,"$FIG_Config::global/Models/compound_id_to_name.txt") or die("Failed to open compound_id_to_name.txt");
while($_ = <IN>){
    chomp($_);
    my($id,$name) = split("\t",$_);
    $compound_id_to_name{$id} = $name;
}
close(IN);

my %inputs;
my %outputs;
foreach my $scenario (@scenarios)
{
    map {$inputs{$_} = 1} @{$scenario->get_substrates_all()};
    map {$outputs{$_} = 1} @{$scenario->get_products_all()};
    #Uncomment this line of code if you want to see what scenario a cpd is being referenced from
    # Make sure you comment the previous line out if you do this
    #map {$outputs{$_} = $scenario->get_id()} @{$scenario->get_products_all()};
}

# Filter out compounts that occur in both the inputs and outputs/biomass
#   these are likely intermediates that are both produced and consumed
#foreach my $input (keys %inputs)
#{
#    if(defined $outputs{$input})
#    {
#	delete $inputs{$input};
#	delete $outputs{$input};
#    }
#}

my $libbase = "$FIG_Config::global/Models/";
my $filebase = $fig->scenario_directory($genome_id)."/Analysis/";
if($type eq 'inputs')
{

    my $results = generate_resource($type,$libbase."transports_in.txt",\%inputs);
    #Lets run a additional check to see if any biomass cpds are taken up directly
    # Note - This check removed because we are going for generalized models
    #  not all inputs like this type are appropriate for most organisms.
    open(BIO,$filebase."biomass.txt");
    while(<BIO>)
    {
	chomp;
	my @items = split "\t" , $_;
	chomp @items;
	if($inputs{$items[0]})
	{
	    delete $results->{$items[0]};
	}
    }
    close(BIO);
    print_resource($type,$results,\%inputs) if($debug);
    output_resource($type,$results,$genome_id);
}

if($type eq 'outputs')
{
    my $results = generate_resource($type,$libbase."transports_out.txt",\%outputs);
    print_resource($type,$results,\%outputs) if($debug);
    output_resource($type,$results,$genome_id);
}
if($type eq 'biomass')
{
    my $results = generate_resource($type,$libbase."biomass.txt",\%outputs);
    #filter results for trouble compounds
    open(TRB,$libbase."trouble_cpds.txt");
    while(<TRB>)
    {
	chomp;
	if(/\#/) {next;}
	my @temp = split "\t" , $_;
	chomp @temp;
	my ($cpd_1,$cpd_2,$reason)  = @temp;
	if($results->{$cpd_1} && $results->{$cpd_2})
	{
	    delete $results->{$cpd_2};
	    print STDERR "Removing $cpd_2 from possible biomass due to $reason\n" if($debug);
	}
    }
    close(TRB);
    print_resource($type,$results,\%outputs) if($debug);
    output_resource($type,$results,$genome_id);
}

sub generate_resource
{
    my ($type,$resource_lib,$resource_hash) = @_;
    my %likely_compounds;

    open(RES_LIB, $resource_lib);
    while(<RES_LIB>)
    {
	chomp;
	my @temp = split "\t" , $_;
	if(defined $resource_hash->{$temp[0]} && $temp[0] ne "")
	{
	    $likely_compounds{$temp[0]} = $resource_hash->{$temp[0]};
	    # A print statment for debugging. If you enable this above, uncomment this line
	    #print "$temp[0] was found generated from $resource_hash->{$temp[0]}\n";
	    delete $resource_hash->{$temp[0]};
	}
    }
    close(RES_LIB);
    return \%likely_compounds;
}

sub print_resource
{
    my ($type,$results_hash,$resource_hash) = @_;
    print STDERR "\nLeft over $type :\n";
    print STDERR map { $_." $resource_hash->{$_}\n" } sort keys %{$resource_hash};
    print STDERR "\nLikely $type based on previous $type lib :\n";
    print STDERR map { $_." $results_hash->{$_}\n" } sort keys %{$results_hash};
}

sub output_resource
{
    my ($type,$results_hash,$genome_id) = @_;
    my $location = $fig->scenario_directory($genome_id)."/Analysis/";
    #Make the Analysis directory if it doesn't exist
    mkdir($location);
    open(OUTPUT,">",$location.$type.".txt") or
	die("Failed to create file ".$location.$type.".txt");
    print OUTPUT map { $_."\t$compound_id_to_name{$_}\n" } sort keys %{$results_hash};
    close(OUTPUT);
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3