[Bio] / FigKernelScripts / find_model_information_v2.pl Repository:
ViewVC logotype

View of /FigKernelScripts/find_model_information_v2.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Thu Oct 11 15:41:04 2007 UTC (12 years, 7 months ago) by mkubal
Branch: MAIN
CVS Tags: rast_rel_2008_06_18, rast_rel_2008_06_16, rast_rel_2008_07_21, rast_2008_0924, rast_rel_2008_04_23, rast_rel_2008_09_30, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, mgrast_rel_2008_0625, rast_rel_2008_10_09, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, mgrast_rel_2008_1110, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, rast_rel_2008_08_07
for Model Repository Oct 2007

## _*_ Perl _*_


use FIGV;
use Scenario;
use strict;

my ($genome_id, $type, $orgdir) = @ARGV;

my $debug = 0;

unless (scalar( @ARGV) >= 2)
{
    print STDERR "\nusage:  find_model_information <genome_id> <inputs|outputs|biomass>\n";
    exit(-1);
}
chomp $genome_id;

#check for proper input on variable $type
unless($type eq 'inputs' || $type eq 'outputs' || $type eq 'biomass')
{
    print STDERR "\nSecond parameter required in format: 'outputs','inputs' or 'biomass'\n";
	exit(-1);
}
my $fig;
if($orgdir)
{
    $fig = new FIGV($orgdir);
}
else
{
    $fig = new FIGV;
}

Scenario::set_fig($fig,$genome_id);
my @scenarios = @{Scenario->get_genome_scenarios($genome_id,0)};

my %compound_id_to_name;
open(IN,"$FIG_Config::global/Models/compound_id_to_name.txt") or die("Failed to open compound_id_to_name.txt");
while($_ = <IN>){
    chomp($_);
    my($id,$name) = split("\t",$_);
    $compound_id_to_name{$id} = $name;
}
close(IN);

my %inputs;
my %outputs;

open(SM,"$FIG_Config::global/Models/standard_medium_lib.txt")
    or die("Failed to open $FIG_Config::global/Models/standard_medium_lib.txt ");
while(<SM>)
{
    chomp;
    $inputs{$_} = 1;
}
close(SM);


foreach my $scenario (@scenarios)
{
    #map {$inputs{$_} = 1} @{$scenario->get_substrates_all()};
    map {$outputs{$_} = 1} @{$scenario->get_products_all()};
    #Uncomment this line of code if you want to see what scenario a cpd is being referenced from
    # Make sure you comment the previous line out if you do this
    #map {$outputs{$_} = $scenario->get_id()} @{$scenario->get_products_all()};
}

my $libbase = "$FIG_Config::global/Models/";
my $filebase = $fig->model_directory($genome_id)."/Analysis/";




if($type eq 'inputs')
{

    open(MODEL_INPUTS,">".$filebase."inputs.txt");
    foreach my $input (keys %inputs){print MODEL_INPUTS "$input\n";}
    close(MODEL_INPUTS);
=pod    
    my $results = generate_resource($type,$libbase."standard_medium_lib.txt",\%inputs);
    #Lets run a additional check to see if any biomass cpds are taken up directly
    # Note - This check removed because we are going for generalized models
    #  not all inputs like this type are appropriate for most organisms.
    #open(BIO,$filebase."biomass.txt");
    while(<BIO>)
    {
	chomp;
	my @items = split "\t" , $_;
	chomp @items;
	if($inputs{$items[0]})
	{
	    delete $results->{$items[0]};
	}
    }
    close(BIO);
    print_resource($type,$results,\%inputs) if($debug);
    output_resource($type,$results,$genome_id);
=cut
}

if($type eq 'outputs')
{
    my $results = generate_resource($type,$libbase."transports_out.txt",\%outputs);
    print_resource($type,$results,\%outputs) if($debug);
    output_resource($type,$results,$genome_id);
}
if($type eq 'biomass')
{
    my $results = generate_resource($type,$libbase."ec_biomass.txt",\%outputs);
    #filter results for trouble compounds
    open(TRB,$libbase."trouble_cpds.txt");
    while(<TRB>)
    {
	chomp;
	if(/\#/) {next;}
	my @temp = split "\t" , $_;
	chomp @temp;
	my ($cpd_1,$cpd_2,$reason)  = @temp;
	if($results->{$cpd_1} && $results->{$cpd_2})
	{
	    delete $results->{$cpd_2};
	    print STDERR "Removing $cpd_2 from possible biomass due to $reason\n" if($debug);
	}
    }
    close(TRB);
    print_resource($type,$results,\%outputs) if($debug);
    output_resource($type,$results,$genome_id);
}

sub generate_resource
{
    my ($type,$resource_lib,$resource_hash) = @_;
    my %likely_compounds;

    open(RES_LIB, $resource_lib);
    while(<RES_LIB>)
    {
	chomp;
	my @temp = split "\t" , $_;
	if(defined $resource_hash->{$temp[0]} && $temp[0] ne "")
	{
	    $likely_compounds{$temp[0]} = $resource_hash->{$temp[0]};
	    # A print statment for debugging. If you enable this above, uncomment this line
	    #print "$temp[0] was found generated from $resource_hash->{$temp[0]}\n";
	    delete $resource_hash->{$temp[0]};
	}
    }
    close(RES_LIB);
    return \%likely_compounds;
}

sub print_resource
{
    my ($type,$results_hash,$resource_hash) = @_;
    print STDERR "\nLeft over $type :\n";
    print STDERR map { $_." $resource_hash->{$_}\n" } sort keys %{$resource_hash};
    print STDERR "\nLikely $type based on previous $type lib :\n";
    print STDERR map { $_." $results_hash->{$_}\n" } sort keys %{$results_hash};
}

sub output_resource
{
    my ($type,$results_hash,$genome_id) = @_;
    my $location = $fig->model_directory($genome_id)."/Analysis/";
    #Make the Analysis directory if it doesn't exist
    mkdir($location);
    open(OUTPUT,">",$location.$type.".txt") or
	die("Failed to create file ".$location.$type.".txt");
    print OUTPUT map { $_."\t$compound_id_to_name{$_}\n" } sort keys %{$results_hash};
    close(OUTPUT);
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3