[Bio] / FigWebServices / ma.cgi Repository:
ViewVC logotype

View of /FigWebServices/ma.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.12 - (download) (annotate)
Sun Oct 17 17:19:33 2010 UTC (9 years, 4 months ago) by overbeek
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.11: +189 -92 lines
saving recent changes - it is still work in progress

#########################################################################
# -*- perl -*-
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

########################################
use FIG;
my $fig = new FIG;
use SAPserver;
my $sapO = SAPserver->new;
#######################################

use DB_File;
use SeedEnv;
use Data::Dumper;
use Statistics::Descriptive;
use tree_utilities;
use HTML;
use strict;

use CGI;
my $cgi = new CGI;

if (0)
{
    my $VAR1;
    eval(join("",`cat $FIG_Config::temp/ma_cgi`));
    $cgi = $VAR1;
#   print STDERR &Dumper($cgi);
}

if (0)
{
    print $cgi->header;
    my @params = $cgi->param;
    print "<pre>\n";
    foreach $_ (@params)
    {
	print "$_\t:",join(",",$cgi->param($_)),":\n";
    }

    if (0)
    {
	if (open(TMP,">$FIG_Config::temp/ma_cgi"))
	{
	    print TMP &Dumper($cgi);
	    close(TMP);
	}
    }
    exit;
}

my $html = [];
my $genome        = $cgi->param('genome');
my $request       = $cgi->param('request');
my $areg          = $cgi->param('areg');
my $pegs          = $cgi->param('pegs');
my $exp           = $cgi->param('exp');
my @pcpegs        = $cgi->param('pcpeg');
my $fid           = $cgi->param('fid');

my $filled = 0;
if ($fid)        { $filled++ }
if ($pegs)       { $filled++ }
if ($request && ($request =~ /^show_ar|show_conn|show_connH/))    { $filled++ }
if ($filled > 1)
{
    push(@$html,$cgi->h1('you filled in too many fields'));
    &HTML::show_page($cgi,$html);
    exit;
}    

if (! $genome)
{
    my $genome_data = &get_genomes_with_expression_data($sapO);

    push(@$html,$cgi->start_form(-action => "./ma.cgi"));
    my $col_hdrs    = ['',"Genome ID","Genome Description"];
    my $tab         = [];
    my @choices     = map { $_->[0] } @$genome_data;
    my @radio       = $cgi->radio_group( -name     => 'genome',
					 -override => 1,
					 -values   => [@choices]
					 );
    my $check1 = $cgi->checkbox(-label => 'Show Predicted Atomic Requlons', 
			       -name => 'request', -value => 'show_ar', -default => 0, -override => 1);
    my $check2 = $cgi->checkbox(-label => 'Show binary connections', 
			       -name => 'request', -value => 'show_conn', -default => 0, -override => 1);
    my $check3 = $cgi->checkbox(-label => 'Show binary connections (just hypotheticals)', 
			       -name => 'request', -value => 'show_connH', -default => 0, -override => 1);
    my $i;
    for ($i=0; ($i < @choices); $i++)
    {
	push(@$tab,[$radio[$i],$genome_data->[$i]->[0],$genome_data->[$i]->[1]]);
    }
    push(@$html,&HTML::make_table($col_hdrs,$tab,"Genomes with Expression Data"));
    push(@$html,"<br><hr><br>",
	 "If you wish to focus on a set of PEGs, type them in. <br>",
	 "If you wish to focus on putative atomic regulons containing a specific PEG, type it in.",
	 "<br>",
	 $cgi->textfield(-name => 'pegs', -size => 40),"<br>",
	 "<br><br>",
	 "If you wish to focus on a single feature, type it here: ",
	 $cgi->textfield(-name => 'fid', -size => 40),"<br><br><br>",
	 $check1,"<br><br><br>",
	 $check2,"<br><br><br>",
	 $check3,"<br><br><br>",
	 $cgi->submit('Do It'),
	 $cgi->end_form);
    &HTML::show_page($cgi,$html);
    exit;
}

if (0) # ($exp)
{
    use FIGMODEL;
    my $model = new FIGMODEL;
    my $experiments = $model->getExperimentsByGenome($genome);
    my $data = $model->getExperimentDetails($experiments->[$exp]);

    my $col_hdrs = ['attribute','value'];
    my $tab      = [map { [$_,$data->{$_}] } sort { $a cmp $b } keys(%$data)];
    push(@$html,&HTML::make_table($col_hdrs,$tab,"Properties of Experiment $exp"));
    
    &HTML::show_page($cgi,$html);
    exit;
}

if (($request eq "show_pc") && (@pcpegs > 0))
{
    &get_expanded_pcs($cgi,$html,\@pcpegs);
}    
else
{
    my @pegset;
    if ($pegs) 
    { 
	@pegset = map { ($_ =~ /^(\d+)$/) ? "fig\|$genome\.peg.$1" : 
			(($_ =~ /^((peg|rna)\.\d+)$/) ? "fig\|$genome\.$1" : $_ )
		      } split(/[ ,]+/,$pegs) ;
    }

    my($vecs,$vec_sz)    = &read_vecs($genome);
    my $atomic_regulons  = &atomic_regulons($genome);

    if (@pegset > 1)
    {
	my $cluster     = ['',\@pegset,'User Request'];
	&show_cluster($fig,$cgi,$html,$vecs,$vec_sz,$cluster);
    }
    elsif ($request && ($request eq "show_conn"))
    {
	&show_conn($cgi,$html,$genome);
    }
    elsif ($request && ($request eq "show_connH"))
    {
	&show_connH($cgi,$html,$genome);
    }
    elsif ($request && ($request eq "show_areg") && $areg)
    {
	my $cluster = $atomic_regulons->[$areg-1];
	&show_cluster($fig,$cgi,$html,$vecs,$vec_sz,$cluster);
    }
    elsif ($request eq "summarize_exps")
    {
	&summarize_exps($cgi,$html,$genome);
    }
    elsif ($request && ($request eq "show_exp"))
    {
	&show_exp($cgi,$html,$genome);
    }
    elsif ($request && ($request eq "show_exp_clusters"))
    {
	&show_exp_clusters($cgi,$html,$genome);
    }
    elsif ($request && ($request eq "show_ar"))
    {
	&show_ar($cgi,$html,$genome,$atomic_regulons);
    }
    elsif ($request && ($request eq "show_ar_clusters"))
    {
	&show_ar_clusters($cgi,$html,$genome);
    }
    elsif ($request && ($request eq "show_experiment_clusters"))
    {
	&show_experiment_clusters($cgi,$html,$genome);
    }
    elsif ($request && ($request eq "show_an_ar"))
    {
	&show_an_ar($cgi,$html,$genome,$atomic_regulons);
    }
    elsif ($request && ($request eq "show_an_experiment"))
    {
	&show_an_experiment($cgi,$html,$genome);
    }
    elsif ($fid)
    {
	if ($fid =~ /^(\d+)$/) 
	{
	    $fid = "fig\|$genome\.peg\.$fid";
	}
	elsif ($fid =~ /^((peg|rna)\.\d+)$/) 
	{
	    $fid = "fig\|$genome\.$fid";
	}
	elsif ($fid !~ /^fig\|\d+\.\d+\.(peg|rna)\.\d+$/)
	{
	    my @poss = map { ($_ =~ /^(\S+)/) ? $1 : () }
	               grep { $_ =~ /\b$fid\b/ } 
	               `cat $FIG_Config::organisms/$genome/Features/*/tbl`;

	    if (@poss != 1)
	    {
		push(@$html,$cgi->h2("could not resolve alias=$fid"));
		&HTML::show_page($cgi,$html);
		exit;
	    }
	    else
	    {
		$fid = $poss[0];
	    }
	}
	&get_expanded_pcs($cgi,$html,[$fid]);
    }
    else
    {
	if (@pegset == 1)
	{
	    $atomic_regulons = &restrict_to_clusters_containing_peg($atomic_regulons,$pegset[0]);
	}
	my @scored      = &scored_clusters($atomic_regulons,$vecs);
	@scored         = sort { $a->[0] <=> $b->[0] } @scored;
	&display_cluster_menu($cgi,$html,\@scored);
    }
}

&HTML::show_page($cgi,$html);
exit;

sub show_conn {
    my($cgi,$html,$genome) = @_;

    my $dir           = "$FIG_Config::data/ExpressionData";
    my @scored = sort {$b->[2] <=> $a->[2] } map { chomp; [split(/\t/,$_)] } `cat $dir/sets.corr`;
    &show_scored($cgi,$html,$genome,\@scored,0);
}

sub show_connH {
    my($cgi,$html,$genome) = @_;

    my $dir           = "$FIG_Config::data/ExpressionData";
    my @scored = sort {$b->[2] <=> $a->[2] } map { chomp; [split(/\t/,$_)] } `cat $dir/sets.corr`;
    &show_scored($cgi,$html,$genome,\@scored,1);
}

sub show_scored {
    my($cgi,$html,$genome,$scored,$hypo) = @_;

    my %seen;
    my $dir           = "$FIG_Config::data/ExpressionData";
    my @sets   = map { chomp; [split(/,/,$_)] } `cat $dir/gene.sets`;
    my $col_hdrs = ['Score','PEG1','PEG2','Function1','Function2','Subsystems1','Subsystems2'];
    my $tab = [];
    my $fig = new FIG;
    foreach my $tuple (@$scored)
    {
	next if (@$tab > 299);
	my($set1,$set2,$sc) = @$tuple;
	my @pegs = sort { &SeedUtils::by_fig_id($a,$b) }
                   grep { index($_,"fig\|$genome\.") == 0 } (@{$sets[$set1]},@{$sets[$set2]});
	if ((@pegs == 2) && (! $seen{join(",",@pegs)}))
	{
	    $seen{join(",",@pegs)} = 1;
	    my $func1 = $fig->function_of($pegs[0]);
	    my $func2 = $fig->function_of($pegs[1]);
	    next if ($hypo && ((! $fig->hypo($func1)) && (! $fig->hypo($func2))));
	    my $ss1 = join(",", grep {$fig->usable_subsystem($_) } $fig->peg_to_subsystems($pegs[0]));
	    my $ss2 = join(",", grep {$fig->usable_subsystem($_) } $fig->peg_to_subsystems($pegs[1]));
	    
	    push(@$tab,[$sc,&peg_link($pegs[0]),&peg_link($pegs[1]),$func1,$func2,$ss1,$ss2]);
	}
    }
    push(@$html,&HTML::make_table($col_hdrs,$tab,"High-scoring Pairs"));
}

sub get_expanded_pcs {
    my($cgi,$html,$pegs) = @_;
    &show_pearson_coefficient_connections($cgi,$html,$pegs);
}

sub restrict_to_clusters_containing_peg {
    my($clusters,$peg) = @_;

    my @restricted = grep { &member($peg,$_->[1]) } @$clusters;
    return \@restricted;
}

sub scored_clusters {
    my($clusters,$vecs) = @_;
    my @scored = map { [&scored_cluster($vecs,$_->[1]),$_] } @$clusters;
    return @scored;
}

sub scored_cluster {
    my($vecs,$pegs) = @_;
    my($i,$j);
    my $high_sc = 0;

    for ($i=0; ($i < (@$pegs-1)); $i++)
    {
	for ($j=$i+1; ($j < @$pegs); $j++)
	{
	    my($v1,$v2);
	    if (($v1 = $vecs->[0]->{$pegs->[$i]}) &&
		($v2 = $vecs->[0]->{$pegs->[$j]}))
	    {
		my $sc = &scored_pair($v1,$v2);

		if ($sc > $high_sc)
		{
		    $high_sc = $sc;
		}
	    }
	}
    }
    return $high_sc;
}

sub scored_pair {
    my($v1,$v2) = @_;

    if ((! $v1) || (! $v2)) { return 0 }

    my $i;
    my $match = 0;
    my $mismatch = 0;
    my $sc = 0;
    for ($i=0; ($i < @$v1); $i++)
    {
	if (($v1->[$i] != 0) && ($v2->[$i] != 0))
	{
	    if ($v1->[$i] != $v2->[$i])
	    {
		$mismatch++;
	    }
	    else
	    {
		$match++;
	    }
	}
    }
    return ($match+$mismatch) ? int(($mismatch * 100) / ($match+$mismatch)) : 0;
}

sub display_cluster_menu {
    my($cgi,$html,$scored) = @_;

    my $genome = $cgi->param('genome');
    my $dir           = "$FIG_Config::organisms/$genome/UserSpace/ExpressionData";
    my $col_hdrs = ['','sc','regulon-id','# PEGs','Source'];
    my $tab = [];
    my $n = 1;
    foreach my $scored_cluster (@$scored)
    {
	my($sc,$cluster) = @$scored_cluster;
	my($regulon_id,$pegs,$desc)  = @$cluster;
	my $num_pegs = @$pegs;
	my $extra = $desc;
	
	if ($desc =~ /Scenario:(\S.*\S)/)
	{
	    my $ss = $1;
	    $ss =~ s/ /_/g;
	    $ss =~ s/::.*$//;
	    $extra = "<a href=http://anno-3.nmpdr.org/anno/FIG/seedviewer.cgi?page=Subsystems&subsystem=" . $ss .
		"&genome=$genome&tab=scenarios>$desc:$num_pegs:$pegs->[0]</a>";
	}
	elsif ($desc =~ /^InSubsystem:(\S.*\S)/)
	{
	    my $ss = $1;
	    $ss =~ s/ /_/g;
	    $ss =~ s/::.*$//;
	    $extra = "<a href=http://anno-3.nmpdr.org/anno/FIG/SubsysEditor.cgi?page=ShowSubsystem&subsystem=" . $ss .
		"&genome=$genome>$desc:$num_pegs:$pegs->[0]</a>";
	}
	elsif ($desc =~ /(ClusterOnChromosome:)(fig\|\d+\.\d+\.peg\.\d+)(.*)/)
	{
	    $extra = $1 . &peg_link($2) . $3;
	}
	push(@$tab,[$n,$sc,
		    "<a href=./ma.cgi?request=show_areg&areg=$regulon_id&genome=$genome>$regulon_id</a>",
		    $num_pegs,$extra]);
	$n++;
    }
    push(@$html,&HTML::make_table($col_hdrs,$tab,"Putative Atomic Regulons"));
}


sub member {
    my($x,$xL) = @_;

    my $i;
    for ($i=0; ($i < @$xL) && ($x ne $xL->[$i]); $i++) {}
    return ($i < @$xL);
}

sub show_cluster {
    my($fig,$cgi,$html,$vecs,$vec_sz,$cluster) = @_;
    my $red = $cgi->param('red');
    if (! defined($red)) { $red = 0.001 }

    my $green = $cgi->param('green');
    if (! defined($green)) { $green = 1 }

    my($n,$peg_set,$desc) = @$cluster;
    &show_pearson_correlation($html,$peg_set);
    push(@$html,"<br");
    &show_funcs($html,$peg_set);
    push(@$html,"<hr>");

    &show_comparative($html,$peg_set);
    &show_areg_orig($fig,$cgi,$html,$red,$green,$vecs,$vec_sz,$peg_set,"Putative Regulon");
}

sub show_comparative {
    my($html,$pegs) = @_;

    my $peg_to_set = &load_same_peg_sets($pegs);
    if (keys(%$peg_to_set) > 1)
    {
	my $genome1    = &SeedUtils::genome_of($pegs->[0]);
	my %by_genome;
	foreach my $peg1 (keys(%$peg_to_set))
	{
	    my $set = $peg_to_set->{$peg1};
	    foreach my $peg2 (@$set)
	    {
		my $genome2 = &SeedUtils::genome_of($peg2);
		if ($genome1 ne $genome2)
		{
		    push(@{$by_genome{$genome2}},$peg2);
		}
	    }
	}
	foreach my $genome2 (sort { $a <=> $b } keys(%by_genome))
	{
	    &show_set_of_pegs($html,$by_genome{$genome2});
	}
    }
}

sub show_set_of_pegs {
    my($html,$pegs) = @_;

    my @pegset = sort { &SeedUtils::by_fig_id($a,$b) } @$pegs;
    push(@$html,"<hr>\n");
    if (@$pegs < 50)
    {
	&show_pearson_correlation($html,\@pegset);
    }
    push(@$html,"<br");
    &show_funcs($html,\@pegset);
}

sub show_an_ar {
    my($cgi,$html,$genome,$atomic_regulons) = @_;

    my $arI = $cgi->param('arI');
    my $arH = &load_ar_vectors($genome);
    my $on  = &v_occurs($arH->{$arI},1);
    my $off =  &v_occurs($arH->{$arI},-1);
    my $pegs = $atomic_regulons->[$arI-1]->[1];
    push(@$html,$cgi->h2("Pegs in Atomic Regulon $arI [ON=$on  OFF=$off]"));
    &show_set_of_pegs($html,$pegs);
}

sub show_an_experiment {
    my($cgi,$html,$genome,$exp) = @_;

    push(@$html,"<h1>Experiment Characterizations are not known</h1>");
}

sub show_funcs {
    my($html,$pegs) = @_;

    my $fig  = new FIG;

    my $col_hdrs = ['PEG','Function','Subsystems'];
    my $tab = [];
    foreach my $peg (@$pegs)
    {
	my $func = $fig->function_of($peg);
	my @subsys = grep {$fig->usable_subsystem($_) } $fig->peg_to_subsystems($peg);
	$func    = $func ? $func : 'hypothetical protein';
	push(@$tab,[&peg_link($peg),$func,join(',',@subsys)]);
    }
    my $gs = $fig->genus_species(&SeedUtils::genome_of($pegs->[0]));
    push(@$html,&HTML::make_table($col_hdrs,$tab,"Functions in $gs"));
}

sub link_to_graphic {
    my($pegs,$exp) = @_;

    my $query_string = $exp ? "experiment=$exp&peg=" : "peg=";
    $query_string .= join('&peg=',@$pegs);
    return "<a target=show_pegs.$$ href=show_pegs.cgi?$query_string>graphic</a>";
}

sub show_areg_orig {
    my($fig,$cgi,$html,$red,$green,$vecs,$sz,$pegs,$hdr) = @_;

    my($i,$j);
    my $col_hdrs = ['Show in Graph','Sort by','PEG','aliases','Function','ON','OFF'];
    my $genome = &SeedUtils::genome_of($pegs->[0]);
    for ($i=1; ($i <= $sz); $i++) 
    { 
	my $links = &link_to_exp($i,$genome) . "<br>" . &link_to_graphic($pegs,$i);
	push(@$col_hdrs,$links);
    }

    my $tab = [];
    my @pvecs = map { $vecs->[0]->{$_} ? $vecs->[0]->{$_} : () } @$pegs;
    my $color = {};

    my $totON = 0;
    my $totOFF = 0;
    my $totCONFLICT = 0;

    for ($i=0; ($i < $sz); $i++)
    {
	my $on = 0;
	my $off = 0;
	for ($j=0; ($j < @pvecs); $j++)
	{
	    my $c = $pvecs[$j]->[$i];
	    if ($c == -1) { $off++ }
	    if ($c == 1)  { $on++ }
	}

	if ($off && $on && 
	    (&FIG::min($off,$on) / ($off + $on) >= $red))
	{
	    $color->{$i} = 'red';
	}
	elsif (($off || $on) && (($off+$on) > 1) &&
	       ((&FIG::max($off,$on) / ($off + $on)) >= $green))
	{
	    $color->{$i} = 'green';
	}
	if ($on && $off) { $totCONFLICT++ }
	if ($on > $off) { $totON++ }
	if ($off > $on) { $totOFF++ }
    }

    my @radio       = $cgi->radio_group( -name     => 'sort_by',
					 -override => 1,
					 -values   => $pegs
					 );

    my $pegI;
    for ($pegI=0; ($pegI < @$pegs); $pegI++)
    {
	my $peg = $pegs->[$pegI];
	my $func = $fig->function_of($peg);
	my $v = $vecs->[0]->{$peg};
	my $extraV = $vecs->[1]->{$peg};
	if (! $v)
	{
	    $v = [];
	    for ($i=0; ($i < $sz); $i++)
	    {
		push(@$v,"");
	    }
	}

	if (! $extraV)
	{
	    $extraV = [];
	    for ($i=0; ($i < $sz); $i++)
	    {
		push(@$extraV,"");
	    }
	}
	my $aliases = $fig->feature_aliases($peg);
	my $restrict = $cgi->param('aliases');
	if ($restrict)
	{
	    $aliases = join(",",grep { $_ =~ /^$restrict/ } split(/,/,$aliases));
	}
	else
	{
	    my @tmp = split(/,/,$aliases);
	    if (@tmp > 4) { $#tmp = 3 }
	    $aliases = join(",",@tmp);
	}

	my $check = $cgi->checkbox(-name => 'checked', -value => $peg, -default => 0, -override => 1);
	my $links = &peg_link($peg) . " <br> " . &link_to_region($peg);
	my $base = [$check,$radio[$pegI],$links,$aliases,$func,undef,undef];
	
	my $pegON = 0;
	my $pegOFF = 0;
	for ($i=0; ($i < $sz); $i++)
	{
	    if ($v->[$i] == 1)  { $pegON++ }
	    if ($v->[$i] == -1) { $pegOFF++ }
	    my $val2 = join('<br>&nbsp;&nbsp;&nbsp;',($v->[$i],sprintf("%0.2f",$extraV->[$i])));
	    if ($color->{$i})
	    {
		push(@$base,[$val2,"td bgcolor=$color->{$i}"]);
	    }
	    else
	    {
		push(@$base,$val2);
	    }
	}
	$base->[5] = $pegON;
	$base->[6] = $pegOFF;
	push(@$tab,$base);
    }
    my $cutoffs = $vecs->[2];
    push(@$tab,['','','','','','','',map { $_->[0] } @$cutoffs]);
    push(@$tab,['','','','','','','',map { $_->[1] } @$cutoffs]);
    push(@$html,$cgi->start_form(-action => "graph_exp.cgi"),
	 &HTML::make_table($col_hdrs,$tab,$hdr),
	 $cgi->submit('make graphs'),
	 $cgi->end_form,
	 "<br><br>on-columns=$totON<br>off-colums=$totOFF<br>conflicts=$totCONFLICT<br>"
	 );
}

sub peg_link { 
    my($peg) = @_;

    if ($peg =~ /^fig\|\d+\.\d+\.peg\./)
    {
	my $genome = &SeedUtils::genome_of($peg);
	my $window = "PEG.$$";
	return "<a target=$window href=http://anno-3.nmpdr.org/anno/FIG/seedviewer.cgi?page=Annotation&feature=$peg>$peg</a>";
    }
    else
    {
	return $peg;
    }
}

sub peg_link_pc { 
    my($peg) = @_;

    my $genome = &SeedUtils::genome_of($peg);
    my $window = "PEG.$$";
    return "<a target=$window href=./ma.cgi?genome=$genome&pegs=$peg>$peg</a>";
}

sub show_correlation {
    my($html,$pegs,$vecs) = @_;

    my $col_hdrs = ['PEG',@$pegs];
    my $tab = [];
    my $i;
    for ($i=0; ($i < @$pegs); $i++)
    {
	my @corr = map { &corr($vecs,$pegs->[$i],$_) } @$pegs;
	push(@$tab,[&peg_link($pegs->[$i]),@corr]);
    }
    push(@$html,&HTML::make_table($col_hdrs,$tab,'Correlations'));
}

sub show_pearson_correlation {
    my($html,$pegs) = @_;

    my @stripped = map { ($_ =~ /^fig\|\d+\.\d+\.((peg|rna)\.\d+)/) ? $1 : $_ } @$pegs;
    my $col_hdrs = ['PEG',@stripped];
    my $tab = [];
    my $i;
    my $handle = &get_pc_hash($pegs);
#    print &Dumper($handle); 

    for ($i=0; ($i < @$pegs); $i++)
    {
	my @corr = map { &pearson_corr($handle,$pegs->[$i],$_) } @$pegs;
	push(@$tab,[&peg_link($pegs->[$i]),@corr]);
    }
    my $all_link = &show_all_link($pegs);
    push(@$html,&HTML::make_table($col_hdrs,$tab,"Pearson Coefficients: $all_link"));
}

sub show_all_link {
    my($pegs) = @_;

    my $genome = &SeedUtils::genome_of($pegs->[0]);
    return "<a href=./ma.cgi?genome=$genome&request=show_pc&pcpeg=" . 
	join("&pcpeg=",@$pegs) . 
	">Get All Related PEGs</a>";
}

sub pearson_corr {
    my($hash,$peg1,$peg2) = @_;
    my $v = $hash->{$peg1}->{$peg2};
    return defined($v) ? sprintf("%0.3f",$v) : " ";
}

sub expressed_val {
    my($v,$val) = @_;

    my $i;
    my $in = [];
    for ($i=0; ($i < @$v); $i++)
    {
	if ($v->[$i] == $val)
	{
	    push(@$in,$i);
	}
    }
    return $in;
}

sub expressed_in {
    my($v) = @_;
    
    return &expressed_val($v,1);
}

sub not_expressed_in {
    my($v) = @_;
    
    return &expressed_val($v,-1);
}

##############################################
sub load_ar_vectors {
    my($genome) = @_;

    return &load_vectors($genome,'ar');
}

sub load_experiment_vectors {
    my($genome) = @_;

    return &load_vectors($genome,'experiment');
}

sub load_vectors {
    my($genome,$type) = @_;

    my $expD = "$FIG_Config::organisms/$genome/UserSpace/ExpressionData";
    if (! -s "$expD/$type.vectors") { return undef }
    return { map { chomp; my($ar,$vec) = split(/\t/,$_); ($ar,[split(/,/,$vec)]) } 
	     `cat $expD/$type.vectors` }
}

sub read_vecs {
    my($genome) = @_;

    my($vecs1,$sz1) = &read_on_off($genome);
    my($vecs2,$sz2) = &read_normalized_values($genome);
    if ($sz1 != $sz2) { die "sz1=$sz1 sz2=$sz2" }
    my $cutoffs = &cutoffs($genome);
    return ([$vecs1,$vecs2,$cutoffs],$sz1);
}

sub atomic_regulons {
    my($genome) = @_;
    my $dir           = "$FIG_Config::organisms/$genome/UserSpace/ExpressionData";
    my $n = 1;
    my @clusters = ();
    open(AR,"<$dir/atomic.regulons") || die "could not read atomic.regulons";
    my $last = <AR>;
    while ($last && ($last =~ /^(\d+)/))
    {
	my $curr = $1;
	my @pegs = ();
	while ($last && ($last =~ /^(\d+)\t(\S+)/) && ($1 == $curr))
	{
	    push(@pegs,$2);
	    $last = <AR>;
	}
	my $pegs = [sort { &SeedUtils::by_fig_id($a,$b) } @pegs];
	push(@clusters,$pegs);
    }
    return [map { my $pegs = $_; 
	          $pegs->[0] =~ /^fig\|\d+\.\d+\.peg\.(\d+)/; 
	          my $first_peg = $1; 
	          $pegs->[-1] =~ /^fig\|\d+\.\d+\.peg\.(\d+)/; 
	          my $last_peg = $1;
                  [$n++,$pegs,"$first_peg-$last_peg"]
	        } @clusters
	    ];
}

sub get_pc_hash {
    my($pegs) = @_;

    my $corrH = &get_corr(&SeedUtils::genome_of($pegs->[0]));
    my $hash  = &compute_pc($pegs,$corrH);
    return $hash;
}

sub get_pc_hash_strip {
    my($pegs1,$pegs2) = @_;
    my $corrH = &get_corr(&SeedUtils::genome_of($pegs1->[0]));
    my $hash  = &compute_pc_strip($pegs1,$pegs2,$corrH);
    return $hash;
}

sub read_on_off {
    my($genome) = @_;
    return  &read_vecs1($genome,"final_on_off_calls.txt");
}

sub read_normalized_values {
    my($genome) = @_;

    &read_vecs1($genome,"raw_data.tab");
}

sub cutoffs {
    my($genome) = @_;

    my $dir           = "$FIG_Config::organisms/$genome/UserSpace/ExpressionData";
    return [map { $_ =~ /^(\S+)\s+(\S+)/; [sprintf("%0.3f",$1),sprintf("%0.3f",$2)] } `cat $dir/cutoffs.txt`];
}

sub get_genomes_with_expression_data
{
    opendir(ORG,"$FIG_Config::organisms") || die "could not open $FIG_Config::organisms";
    my @orgs = grep { -s "$FIG_Config::organisms/$_/UserSpace/ExpressionData/final_on_off_calls.txt" }
    readdir(ORG);
    closedir(ORG);
    use FIG;
    my $fig = new FIG;
    my @genome_data = sort { $a->[0] <=> $b->[0] } map { [$_,$fig->genus_species($_)] } @orgs;
    return \@genome_data;
}

sub load_same_peg_sets {
    my($pegs) = @_;

    my %sought = map { $_ => 1 } @$pegs;
    my $peg_to_set = {};
    foreach $_ (`cat $FIG_Config::data/ExpressionData/gene.sets`)
    {
	chomp;
	my @in = split(/,/,$_);
	my $i;
	for ($i=0; ($i < @in) && (! $sought{$in[$i]}); $i++) {}
	if ($i < @in)
	{
	    $peg_to_set->{$in[$i]} = \@in;
	}
    }
    return $peg_to_set;
}

sub link_to_exp {
    my($n,$genome) = @_;

    my $dir  = "$FIG_Config::organisms/$genome/UserSpace/ExpressionData";
    if (-s "$dir/experiment.names") 
    {
	my %idH = map { $_ =~ /^(\d+)\s+(\S+)\.CEL\.gz/; ($1 => $2) } `cat $dir/experiment.names`;
	my $exp = $idH{$n};
	if ($exp)
	{
	    return "<a href=seedviewer.cgi?page=Experiment&e=$exp>$exp</a>";
	}
    }
    return $n;
}

sub link_to_region {
    my($peg) = @_;

    return "";
}

sub show_pearson_coefficient_connections {
    my($cgi,$html,$pegs1) = @_;

    my %pegs1 = map { $_ => 1 } @$pegs1;
    my $genome = &SeedUtils::genome_of($pegs1->[0]);
    my $fig = new FIG;
    my @pegs2 = $fig->all_features($genome,'peg');
    my $handle = &get_pc_hash_strip($pegs1,\@pegs2);
    my %ok;
    my $i;
    for ($i=0; ($i < @$pegs1); $i++)
    {
	foreach my $peg2 ( @pegs2 )
	{
	    my $pc = &pearson_corr($handle,$pegs1->[$i],$peg2); 
	    if (abs($pc >= 0.7))
	    {
		$ok{$pegs1->[$i]} -> {$peg2} = $pc;
	    }
	}
    }

    my $col_hdrs = ['PC','PEG','Function','Subsystems'];
    foreach my $peg1 (sort { &SeedUtils::by_fig_id($a,$b) } keys(%ok))
    {
	my $pegs2H = $ok{$peg1};
	my @to = map { [$pegs2H->{$_},$_] } keys(%$pegs2H);
	my $tab = [];
	foreach my $tuple (sort { abs($b->[0]) <=> abs($a->[0]) } @to)
	{
	    my ($pc,$peg2) = @$tuple;
	    
	    my @subsys = grep {$fig->usable_subsystem($_) } $fig->peg_to_subsystems($peg2);
	    my $func       = $fig->function_of($peg2);
	    $func          = $func ? $func : "hypothetical protein";
	    push(@$tab,[$pc,&peg_link_pc($peg2),$func,join(",",@subsys)]);
	}
	push(@$html,&HTML::make_table($col_hdrs,$tab,"Connections for " . &peg_link($peg1)));
	push(@$html,"<br><hr><br>");
    }
}


###################
sub read_vecs1 {
    my($genome,$file) = @_;

    my $dir           = "$FIG_Config::organisms/$genome/UserSpace/ExpressionData";
    my %seen;
    my $vecs = {};
    my $sz;
    foreach $_ (`cat $dir/$file`)
    {
	if ($_ =~ /^(fig\|\d+\.\d+\.(peg|rna)\.\d+)\t(\S.*\S)/)
	{
	    my $peg = $1;
	    my $vec = [split(/\t/,$3)];
	    if (! $sz)
	    {
		$sz = @$vec;
	    }
	    if (! $seen{$peg})
	    {
		$seen{$peg} = 1;
		if (@$vec != $sz)  { die 'malformed vectors' }
		$vecs->{$peg} = $vec;
	    }
	}
    }
    return ($vecs,$sz);
}

sub get_corr {
    my($genome) = @_;

    defined($genome) || confess 'BAD';
    my $dir           = "$FIG_Config::organisms/$genome/UserSpace/ExpressionData";
    my $rawF          = "$dir/raw_data.tab";
    my %gene_to_values;
    open(RAW,"<$rawF") || die "could not open $rawF";
    while (<RAW>)
    {
	chomp;
	my ($gene_id, @gxp_values) = split("\t");
	$gene_to_values{$gene_id} = \@gxp_values;
    }
    close(RAW);
    return \%gene_to_values;
}

sub compute_pc
{
    my ($gene_ids, $gxp_hash) = @_;
    my %values = ();

    for (my $i = 0; $i < @$gene_ids-1; $i++)
    {
	my $stat = Statistics::Descriptive::Full->new();
	$stat->add_data(@{$gxp_hash->{$gene_ids->[$i]}});

	for (my $j = $i+1; $j < @$gene_ids; $j++)
	{
	    my ($q, $m, $r, $err) = $stat->least_squares_fit(@{$gxp_hash->{$gene_ids->[$j]}});
	    $values{$gene_ids->[$i]}->{$gene_ids->[$j]} = $r;
	    $values{$gene_ids->[$j]}->{$gene_ids->[$i]} = $r;
	}
    }
    
    return \%values;
}

sub compute_pc_strip {
    my ($pegs1,$pegs2, $gxp_hash) = @_;
    my %values = ();

    for (my $i = 0; $i < @$pegs1; $i++)
    {
	my $stat = Statistics::Descriptive::Full->new();
	$stat->add_data(@{$gxp_hash->{$pegs1->[$i]}});

	foreach my $peg2 (@$pegs2)
	{
	    if ($pegs1->[$i] ne $peg2)
	    {
		my ($q, $m, $r, $err) = $stat->least_squares_fit(@{$gxp_hash->{$peg2}});
		$values{$pegs1->[$i]}->{$peg2} = $r;
	    }
	}
    }
    
    return \%values;
}

sub show_exp {
    my($cgi,$html,$genome) = @_;

    my $expD = "$FIG_Config::organisms/$genome/UserSpace/ExpressionData";
    opendir(EXPD,$expD) || die "could not open $expD";
    my @files = grep { $_ =~ /^experiment\.clusters-/ } readdir(EXPD);
    closedir(EXPD);
    my $col_hdrs = ['MaxDist','Number of Clusters'];
    my @rows;
    foreach my $file (@files)
    {
	if ($file =~ /clusters-(0\.\d+)$/)
	{
	    my $maxD     = $1;
	    my @tmp = `cat $expD/$file`;
	    my $clusters = @tmp;
	    push(@rows,[$maxD,$clusters]);
	}
    }
    @rows    = sort { $a->[0] <=> $b->[0] } @rows;
    my @tab  = map { my($dist,$n) = @$_; [&show_clust_link($dist,"experiment"),$n] } @rows;
    push(@$html,&HTML::make_table($col_hdrs,\@tab,"Clustering Experiments"));
}

sub show_experiment_clusters {
    my($cgi,$html,$genome) = @_;
    my $expD = "$FIG_Config::organisms/$genome/UserSpace/ExpressionData";
    my $node = $cgi->param('node') || 'n1';
    my @names = map { $_ =~ /^(\d+)\t(\S.*\S)/;   ### we assume that the name entries are in order
                      my($exp,$name) = ($1,$2);   ### and of the form 1,2,3,4,5...n (no missing)
		      $name =~ s/\.gz$//;
		      $exp
		    } `cat $expD/experiment.names`;

    &show_typed_clusters($cgi,$html,$genome,'experiment',\@names);
}

sub show_ar_clusters {
    my($cgi,$html,$genome) = @_;

    my $atomic_regulons = &atomic_regulons($genome);
    my @names = map { $_->[0] } @$atomic_regulons;
    &show_typed_clusters($cgi,$html,$genome,'ar',\@names);
}

sub show_typed_clusters {
    my($cgi,$html,$genome,$type,$names) = @_;

    my $node = $cgi->param('node');
    if (! $node) { $node = 'n1' }
    my $expD = "$FIG_Config::organisms/$genome/UserSpace/ExpressionData";
    my $to_callsH = ($type eq "ar") ? &load_ar_vectors($genome)
	                            : &load_experiment_vectors($genome);
    my $relabel = {};
    for (my $nameI=1; ($nameI <= @$names); $nameI++)
    {
	my $name = $names->[$nameI-1];
	my $on   = &v_occurs($to_callsH->{$nameI},1);
	my $off  = &v_occurs($to_callsH->{$nameI},-1);
	my $link = &show_object_link($genome,$type,$name);
	$relabel->{$name} = "$link: [ON=$on OFF=$off]";
    }
    my $title = ($type eq "ar") ? 'Atomic Regulon Clustering' : 'Experiment Clustering';
    &show_clustering($genome,$title,$type,$cgi,$html,$node,$relabel);
}

sub show_object_link {
    my($genome,$type,$name) = @_;

    return ($type eq 'ar') ? 
	   "<a target=wind$$ href=ma.cgi?genome=$genome&request=show_an_ar&arI=$name>$name</a>" :
	   "<a target=wind$$ href=ma.cgi?genome=$genome&request=show_an_experiment&name=$name>$name</a>";
}

sub size_ar {
    my($ar,$arI) = @_;
    
    return scalar @{$ar->[$arI-1]};
}

sub show_exp_clusters {
    my($cgi,$html,$genome) = @_;

    my $expD = "$FIG_Config::organisms/$genome/UserSpace/ExpressionData";
    my $file = $cgi->param('file');
    my($dist) = ($file =~ /-(\d\.\d+)$/);

    my $col_hdrs = ['Summarize Cluster','Experiments that Cluster'];
    my $tab = [];
    foreach my $set (`cat $expD/$file`)
    {
	chomp $set;
	my @exp = sort { $a <=> $b } split(/,/,$set);
	push(@$tab,[&summarize_exp_cluster_link(\@exp,$cgi,$genome),join(",",@exp)]);
    }
    my @sorted = sort { length($b->[0]) <=> length($a->[0]) } @$tab;
    push(@$html, &HTML::make_table($col_hdrs,\@sorted,"Clustered at $dist"));
}

sub summarize_exp_cluster_link {
    my($exps,$cgi,$genome) = @_;

    my $exp = "exp=" . join(",&exp=",@$exps);
    my $n = @$exps;
    return "<a target=wind$$ href=ma.cgi?genome=$genome&request=summarize_exps\&$exp>summarize $n experiments</a>";
}

sub show_clust_link {
    my($dist,$type) = @_;

    my $file = ($type eq "ar") ? "ar.clusters-$dist" : "experiment.clusters-$dist";
    my $url = "<a target=clust$$ href=./ma.cgi?genome=$genome&request=show\_$type\_clusters&file=$file>$dist</a>";
    return $url;
}

sub show_ar {
    my($cgi,$html,$genome,$ar) = @_;

    my $col_hdrs = ["Atomic Regulon ID", 'Size','ON','OFF'];
    my $arH = &load_ar_vectors($genome);

    my $tab = [];
    my $arI;
    for ($arI=1; ($arI <= @$ar); $arI++)
    {
	$_ = @{$ar->[$arI-1]->[1]};
	push(@$tab,[&show_ar_link($arI,$genome),$_,&v_occurs($arH->{$arI},1),&v_occurs($arH->{$arI},-1)]);
    }
    push(@$html,&HTML::make_table($col_hdrs,$tab,'Atomic Regulons'),"<hr><br>");

    push(@$html,&show_ar_clustering_link,'Show Clustering of Atomic Regulons',"<br>");
    push(@$html,&show_experiment_clustering_link('Show Clustering of Experiments'),"<br>");
}

sub show_clustering {
    my($genome,$title,$type,$cgi,$html,$node_label,$relabel) = @_;

    my $expD       = "$FIG_Config::organisms/$genome/UserSpace/ExpressionData";
    my $treeF      = "$expD/$type.trees-1.nwk";
    my $big_tree   = join("",`cat $treeF`);
    $big_tree      =~ s/\/\/\n//s;
    my $tree       = &tree_utilities::parse_newick_tree($big_tree);
    $tree || die "where is $expD/$type-trees-1.nwk?";
    &tree_utilities::label_all_nodes($tree);
    my $tabP       = &tree_utilities::tree_index_tables($tree);
    my $node       = &tree_utilities::label_to_node($tabP,$node_label);
    $node || confess "$treeF is mssing $node_label";
    my $trimmed = &trim_tree($node,4,$relabel,$type);
    &tree_utilities::relabel_nodes($trimmed,$relabel);
    push(@$html,"<h1>$title</h1><br><pre>",&tree_utilities::display_tree($trimmed),"</pre><br>");
}

sub trim_tree {
    my($node,$depth,$relabel,$type) = @_;

    my $trimmed = &tree_utilities::copy_tree($node);
    &trim_copy($trimmed,$depth,$relabel,$type);
    return $trimmed;
}

sub trim_copy {
    my($node,$depth_left,$relabel,$type) = @_;

    my($label,undef,$ptrs) = @$node;
    if ((@$ptrs > 1) && $depth_left)
    {
	my $i;
	for ($i=1; ($i < @$ptrs); $i++)
	{
	    &trim_copy($ptrs->[$i],$depth_left-1,$relabel,$type);
	}
    }
    elsif (@$ptrs > 1)
    {
	my $tips = &tree_utilities::tips_of_tree($node);
	my $n = @$tips;
	splice(@$ptrs,1);
	$relabel->{$label} = ($type eq 'ar') ? &show_ar_clustering_link("($n)",$label) :
	                                       &show_experiment_clustering_link("($n)",$label);
    }
}
    
sub show_ar_clustering_link {
    my($text,$node) = @_;
    if (! $node) { $node = 'n1' }
    return "<a target=show_ar.$$ href=./ma.cgi?genome=$genome&node=$node&request=show_ar_clusters>$text</a>";
}

sub show_experiment_clustering_link {
    my($text,$node) = @_;
    if (! $node) { $node = 'n1' }
    return "<a target=show_experiment.$$ href=./ma.cgi?genome=$genome&node=$node&request=show_experiment_clusters>$text</a>";
}

sub show_ar_link {
    my($arI,$genome) = @_;

    return "<a targetshow_ar.$$ href=./ma.cgi?genome=$genome&request=show_an_ar&arI=$arI>$arI</a>";
}

sub display_tree {
    my($cgi,$html,$genome,$tree) = @_;

    my $relabel = &label_nodes($cgi,$genome,$tree);

    push(@$html,
	 "<pre>\n",
	 &tree_utilities::display_tree($tree,$relabel),
	 "</pre>\n",
	 $cgi->br
         );
}

sub label_nodes {
    my($cgi,$genome,$tree) = @_;
    my($id,$gs,$func,$check);

    my $relabel = {};
    return $relabel;
}

sub v_occurs {
    my($v,$x) = @_;

    my $tot = 0;
    foreach $_ (@$v) 
    { 
	if ($_ eq $x) { $tot++ }
    }
    return $tot;
}

sub summarize_exps {
    my($cgi,$html,$genome) = @_;

    my @exps = $cgi->param('exp');
    my $state = join(",",@exps);
    my $col_hdrs = ['Average','Atomic Regulon','ON','OFF'];
    my @tab = ();

    my $exp_vecs = &read_experiment_vectors($genome);
    my $arN      = @{$exp_vecs->[0]};
    my ($ar,$exp);
    for ($ar=1; ($ar <= $arN); $ar++)
    {
	my $on = 0;
	my $off = 0;
	my $tot = 0;
	foreach $exp (@exps)
	{
	    my $v = $exp_vecs->[$exp-1]->[$ar-1];
	    if ($v != 0)
	    {
		$tot++;
		if ($v == 1) 
		{
		    $on++;
		}
		else
		{
		    $off++;
		}
	    }
	}
	my $avg = ($tot > 0) ? sprintf("%0.3f",($on-$off)/$tot) : 0;
	push(@tab,[$avg,&show_ar_link($ar,$genome),$on,$off]);
    }
#    @tab = sort { $b->[0] <=> $a->[0] } @tab;
    push(@$html,&HTML::make_table($col_hdrs,\@tab,"Atomic Regulons that Make up the State $state"));
}

sub read_experiment_vectors {
    my($genome) = @_;

    my $dir = "$FIG_Config::organisms/$genome/UserSpace/ExpressionData";
    if (-s "$dir/experiment.vectors")
    {
	my @v = map { chop; [split(/,/,$_)] } `cat $dir/experiment.vectors`;
	return \@v;
    }
    return undef;
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3