[Bio] / FigKernelScripts / make_ma_html.pl Repository:
ViewVC logotype

View of /FigKernelScripts/make_ma_html.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (as text) (annotate)
Mon Jan 17 22:11:38 2011 UTC (8 years, 9 months ago) by overbeek
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2014_0729, mgrast_dev_02212011, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.1: +43 -6 lines
minor fixes

#########################################################################
# -*- perl -*-
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

########################################

use DB_File;
use SeedEnv;
use Data::Dumper;
use Statistics::Descriptive;
use tree_utilities;
use HTML;
use strict;
use ExpressionDir;

my $usage = "usage: make_ma_html ExpDir HTMLdir";
my $dir;
my $html_dir;
my $funcH;
(
 ($dir      = shift @ARGV)  &&
 ($html_dir = shift @ARGV) 
)
    || die $usage;

my $exprO = ExpressionDir->new($dir);
&SeedUtils::verify_dir("$html_dir");

my @pegs = $exprO->all_features('peg');
my $funcH = $exprO->ids_to_functions(\@pegs);
my $subH = $exprO->ids_to_subsystems(\@pegs);

my $atomic_regulons  = &atomic_regulons($dir);
my $countsH = &count_on_off($dir);

my $areg;
open(INDEX,">","$html_dir/index.html") || die "aborted";
print INDEX "<html>\n";
my $index_cols = ['Atomic Regulon', 'PEGs','ON', 'OFF'];
my $index_tab = [];
for ($areg = 1; ($areg <= @$atomic_regulons); $areg++)
{
#    print STDERR "areg=$areg\n";
    my $html = ["<html>\n"];
    my $cluster = $atomic_regulons->[$areg-1];
    my $number_pegs = @{$cluster->[1]};
    push(@{$index_tab},[&areg_link($areg),$number_pegs,$countsH->{$areg}->[0],$countsH->{$areg}->[1]]);
    &process_areg($html,$cluster,"$html_dir/$areg.html",$funcH,$subH);
}
print INDEX &HTML::make_table($index_cols,$index_tab,'Atomic Regulons');
print INDEX "</html>\n";
close(INDEX);

sub count_on_off {
    my($dir) = @_;

    my $countsH = {};
    open(ONOFF,"<","$dir/ar.vectors") || die "could not open $dir/ar.vectors";
    while (defined($_ = <ONOFF>))
    {
	if ($_ =~ /^(\d+)\t(\S.*\S)/)
	{
	    my $areg = $1;
	    my @v = split(/,/,$2);
	    my $on = grep { $_ eq "1" } @v;
	    my $off = grep { $_ eq "-1" } @v;
	    $countsH->{$areg} = [$on,$off];
	}
    }
    close(ONOFF);
    return $countsH;
}


sub process_areg {
    my($html,$cluster,$file,$funcH,$subH) = @_;
    my($n,$peg_set,$desc) = @$cluster;
    &show_pearson_correlation($html,$peg_set,$dir);
    push(@$html,"<br>");
    &show_funcs($html,$peg_set,$funcH,$subH);
    push(@$html,"</html>\n");
    open(TMP,">$file") || die "could not open $file";
    foreach $_ (@$html)
    {
	print TMP $_;
    }
    close(TMP);
}

sub show_pearson_correlation {
    my($html,$pegs,$dir) = @_;

    my @stripped = map { ($_ =~ /^fig\|\d+\.\d+\.((peg|rna)\.\d+)/) ? $1 : $_ } @$pegs;
    my $col_hdrs = ['PEG',@stripped];
    my $tab = [];
    my $i;
    my $handle = &get_pc_hash($pegs,$dir);
#    print &Dumper($handle); 

    for ($i=0; ($i < @$pegs); $i++)
    {
	my @corr = map { &pearson_corr($handle,$pegs->[$i],$_) } @$pegs;
	push(@$tab,[&peg_link($pegs->[$i]),@corr]);
    }
    push(@$html,&HTML::make_table($col_hdrs,$tab,"Pearson Coefficients"));
}

sub pearson_corr {
    my($hash,$peg1,$peg2) = @_;
    my $v = $hash->{$peg1}->{$peg2};
    return defined($v) ? sprintf("%0.3f",$v) : " ";
}

sub get_pc_hash {
    my($pegs,$dir) = @_;

    my $corrH = &get_corr($dir,$pegs);
    my $hash  = &compute_pc($pegs,$corrH);
    return $hash;
}

sub get_corr {
    my($dir,$pegs) = @_;

    my %only = map { $_ => 1 } @$pegs;
    my $rawF          = "$dir/rma_normalized.tab";
    my %gene_to_values;
    open(RAW,"<$rawF") || die "could not open $rawF";
    while (<RAW>)
    {
	chomp;
	my ($gene_id, @gxp_values) = split("\t");
	if ($only{$gene_id})
	{
	    $gene_to_values{$gene_id} = \@gxp_values;
	}
    }
    close(RAW);
    return \%gene_to_values;
}

sub compute_pc
{
    my ($gene_ids, $gxp_hash) = @_;
    my %values = ();

    for (my $i = 0; $i < @$gene_ids-1; $i++)
    {
	my $stat = Statistics::Descriptive::Full->new();
	$stat->add_data(@{$gxp_hash->{$gene_ids->[$i]}});

	for (my $j = $i+1; $j < @$gene_ids; $j++)
	{
	    my ($q, $m, $r, $err) = $stat->least_squares_fit(@{$gxp_hash->{$gene_ids->[$j]}});
	    $values{$gene_ids->[$i]}->{$gene_ids->[$j]} = $r;
	    $values{$gene_ids->[$j]}->{$gene_ids->[$i]} = $r;
	}
    }
    
    return \%values;
}

sub atomic_regulons {
    my($dir) = @_;
    my $n = 1;
    my @clusters = ();
    open(AR,"<$dir/atomic.regulons") || die "could not read atomic.regulons";
    my $last = <AR>;
    while ($last && ($last =~ /^(\d+)/))
    {
	my $curr = $1;
	my @pegs = ();
	while ($last && ($last =~ /^(\d+)\t(\S+)/) && ($1 == $curr))
	{
	    push(@pegs,$2);
	    $last = <AR>;
	}
	my $pegs = [sort { &SeedUtils::by_fig_id($a,$b) } @pegs];
	push(@clusters,$pegs);
    }
    return [map { my $pegs = $_; 
	          $pegs->[0] =~ /^fig\|\d+\.\d+\.peg\.(\d+)/; 
	          my $first_peg = $1; 
	          $pegs->[-1] =~ /^fig\|\d+\.\d+\.peg\.(\d+)/; 
	          my $last_peg = $1;
                  [$n++,$pegs,"$first_peg-$last_peg"]
	        } @clusters
	    ];
}

sub show_funcs {
    my($html,$pegs,$funcH,$subH) = @_;

    my $col_hdrs = ['PEG','Function','Subsystems'];
    my $tab = [];
    foreach my $peg (@$pegs)
    {
	my $func = &function_of($peg,$funcH);
	$func    = $func ? $func : 'hypothetical protein';
	my $subsys = &get_subsystems($peg,$subH);
	my $ss = (@$subsys > 0) ? join("<br>",@$subsys) : '';
	push(@$tab,[&peg_link($peg),$func,$ss]);
    }
    push(@$html,&HTML::make_table($col_hdrs,$tab,"Functions"));
}
###############################

sub function_of {
    my($peg,$funcH) = @_;
    
    return $funcH->{$peg};
}

sub peg_link { 
    my($peg) = @_;

    if ($peg =~ /^fig\|\d+\.\d+\.peg\./)
    {
	my $window = "PEG.$$";
 	return "<a target=$window href=http://anno-3.nmpdr.org/anno/FIG/seedviewer.cgi?page=Annotation&feature=$peg>$peg</a>";
    }
    else
    {
	return $peg;
    }
}

sub get_subsystems {
    my($peg,$subH) = @_;

    return $subH->{$peg} ? $subH->{$peg} : [];
}

sub areg_link {
    my($areg) = @_;

    return "<a href='./$areg.html'>$areg</a>";
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3