[Bio] / FigWebServices / families_on_tree.cgi Repository:
ViewVC logotype

View of /FigWebServices/families_on_tree.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (download) (annotate)
Mon May 5 18:05:26 2014 UTC (5 years, 6 months ago) by overbeek
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
Changes since 1.3: +15 -5 lines
allow painting of multiple families

########################################################################
use CGI;


if (-f "$FIG_Config::data/Global/why_down")
{
    local $/;
    open my $fh, "<$FIG_Config::data/Global/why_down";
    my $down_msg = <$fh>;
    
    print CGI::header();
    print CGI::head(CGI::title("SEED Server down"));
    print CGI::start_body();
    print CGI::h1("SEED Server down");
    print CGI::p("The seed server is not currently running:");
    print CGI::pre($down_msg);
    print CGI::end_body();
    exit;
}

if ($FIG_Config::readonly)
{
    CGI::param("user", undef);
}
########################################################################
use CGI;


if (-f "$FIG_Config::data/Global/why_down")
{
    local $/;
    open my $fh, "<$FIG_Config::data/Global/why_down";
    my $down_msg = <$fh>;
    
    print CGI::header();
    print CGI::head(CGI::title("SEED Server down"));
    print CGI::start_body();
    print CGI::h1("SEED Server down");
    print CGI::p("The seed server is not currently running:");
    print CGI::pre($down_msg);
    print CGI::end_body();
    exit;
}

if ($FIG_Config::readonly)
{
    CGI::param("user", undef);
}
########################################################################
# -*- perl -*-
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

use DB_File;
use URI::Escape;  # uri_escape
use gjoseqlib;
use HTML;
use strict;
use CGI;
my $cgi = new CGI;
use SeedEnv;
use tree_utilities;

if (0) {
    print $cgi->header;
    my @params = $cgi->param;
    print "<pre>\n";
    foreach $_ (@params) {
		print "$_\t:",join(",",$cgi->param($_)),":\n";
    }
    exit;
}

my $html     = [];

my $node     = $cgi->param('node');
my $family   = $cgi->param('family');
my $ali      = $cgi->param('alignment');
my $tree     = $cgi->param('tree');
my $request  = $cgi->param('request');
my $keywords = $cgi->param('keywords');
my $function = $cgi->param('function');
my $dataD    = $cgi->param('dataD');
my $dataDir  = "/homes/overbeek/Ross/MakeCS/Data/CS";
my $dataDF  = "$dataDir/$dataD";

if ($request eq "show_otus")
{
    &show_otus($cgi,$dataDir);
    exit;
}
elsif (($request eq "show_options_for_otu") && $dataD)
{
    &show_options_for_otu($cgi,$dataD);
    exit;
}

if (($request eq "show_virulence_functions") && (-s "$dataDF/virulence.functions"))
{
    &show_virulence_functions($cgi,$dataDF,$html);
}
elsif (($request eq 'show_indexed_funcs') && $keywords)
{
    &show_indexed_funcs($cgi,$dataDF,$html,$keywords); 
}
elsif (($request eq "show_func") && $function)
{
    $function =~ s/^\s+//;
    $function =~ s/\s+$//;
    &show_func($cgi,$dataDF,$html,$function);
}
elsif (($request eq "show_ali_or_occurs_tree") && $ali)
{
    &show_ali($cgi);
    exit;
}
elsif (($request eq "show_ali_or_occurs_tree") && $tree)
{
    &show_occurs_tree($cgi,$dataDF);
    exit;
}
elsif (($request eq "show_family_pegs") && $family)
{
    &show_family1($cgi,$dataDF,$html,$family);
}
elsif (($request eq "show_family_tree") && $family)
{
    &show_family2($cgi,$dataDF,$html,$family);
}
elsif (($request eq "show_node") && $node)
{
    &show_changes($cgi,$dataDF,$html,$node);
}
elsif ($request eq "show_otu_tree")
{
    &show_otu_tree($cgi,$dataDF,$html,'families');
}
elsif ($request eq "show_adjacency")
{
    &show_otu_tree($cgi,$dataDF,$html,'adjacency');
}
elsif ($request eq "show_clusters")
{
    &show_clusters($cgi,$dataDF,$html);
}
elsif ($request eq "show_kovbassa")
{
    &show_kovbassa_signatures($cgi,$dataDF,$html);
}
elsif ($request eq "compute_kovbassa_sigs")
{
    &compute_kovbassa_signatures($cgi,$dataDF,$html);
}
else
{
    push(@$html,"<h1>Invalid request</h1>");
}
&HTML::show_page($cgi,$html);
exit;

sub show_otu_tree {
    my($cgi,$dataDF,$html,$type) = @_;
    
    $dataDF =~ /([^\/]+)$/; 
    my $dataD = $1;
    my @tree;
    my @tmp = `cat $dataDF/readable.tree`;
    foreach $_ (@tmp)
    {
	if ($_ =~ /^(.*\+ )(n\d+)$/)
	{
	    my $start = $1;
	    my $node  = $2;
	    my $link  = &show_node_link($dataD,$node,$type);
	    push(@tree,"$start$link\n");
	}
	elsif ($_  =~ /^(.*\- )(\d+\.\d+)(:.*)$/)
	{
	    my $start = $1;
	    my $node  = $2;
	    my $end = $3;
	    push(@tree,"$start",&show_node_link($dataD,$node,$type),"$end\n");
	}
	else
	{
	    push(@tree,$_);
	}
    }

    my $tree = join("",@tree);
    push(@$html,"<pre>\n$tree\n</pre>\n");
}

sub fam_in_set {
    my($fam,$set) = @_;

    my @fams = split(/,/,$set);
    my $i;
    for ($i=0; ($i < @fams) && ($fam ne $fams[$i]); $i++) {}
    return ($i < @fams);
}

sub show_family1 {
    my($cgi,$dataDF,$html,$families) = @_;

    $dataDF =~ /([^\/]+)$/; 
    my $dataD = $1;

    my $func;
    foreach $_ (`cut -f1,2 $dataDF/families.all`)
    {
#	if (($_ =~ /^(\d+)\t(\S[^\t]*\S)/) && ($1 == $families)) { $func = $2 }
	if (($_ =~ /^(\d+)\t(\S[^\t]*\S)/) && &fam_in_set($1,$families)) { $func = $2 }
    }
    my %genome_names = map { ($_ =~ /^(\S+)\t(\S.*\S)/) ? ($1 => $2) : () } `cat $dataDF/genome.names`;
    my $col_hdrs = ['','Genome','Genome Name','Peg'];
    my @tuples   = map { my $tuple = $_; $tuple->[3] = &peg_link($tuple->[3]); $tuple } 
	           sort { $a->[1] cmp $b->[1] } 
	           map { (($_ =~ /^(\d+)\t([^\t]*)\t(fig\|(\d+\.\d+)\.peg\.\d+)/) && &fam_in_set($1,$families)) ? 
			     [$cgi->checkbox(-name => "check.$3",-checked => 1,-label => ''),
			      $4,
			      $genome_names{$4},
			      $3] : () }
	           `cut -f1,2,4 $dataDF/families.all`;
    push(@$html,$cgi->start_form(-action => "./families5_on_tree.cgi"));
    push(@$html,"<br><br>\n",&HTML::make_table($col_hdrs,\@tuples,"Distribution of Family $families: $func"),$cgi->hr,"\n");
    push(@$html,"<input type=hidden name=dataD value=$dataD>\n");
    push(@$html,"<input type=hidden name=request value=show_ali_or_occurs_tree>\n");
    push(@$html,$cgi->submit('alignment'),
                $cgi->checkbox(-name => "dna",-checked => 0,-label => 'DNA'),
	        "<br><br>",
	        $cgi->submit('tree'),
                $cgi->end_form());
}

sub show_family2 {
    my($cgi,$dataDF,$html,$family) = @_;
#   if union is specified, you want to treat all families with the same function
#   as "present"
    my $union = $cgi->param('union');
    my $func;   # used only if union is requested
    my @fam_func_peg_tuples = map { chomp; [split(/\t/,$_)] } `cut -f1,2,4 $dataDF/families.all`;
    if ($union) { 
	my @tmp = grep { $_->[0] == $family } @fam_func_peg_tuples; 
	$func = $tmp[0]->[1];
    }

    $dataDF =~ /([^\/]+)$/; 
    my $dataD = $1;

    push(@$html,"<br><hr>",&show_fam_table_link($dataDF,$family),"<br><br>");
    my @tuples = grep { $union ? ($_->[1] eq $func) : ($_->[0] == $family) } 
                 @fam_func_peg_tuples;
    my $func   = (@tuples > 0) ? $tuples[0]->[1] : 'hypothetical protein';
    my %has    = map { ($_->[2] =~ /fig\|(\d+\.\d+)/) ? ($1 => 1) : () } @tuples;
    my @tree;
    my %node_vals;
    if (! -s "$dataDF/fams.on.tree.db")
    {
	%node_vals = map { (($_ =~ /^(\S+)\t(n\d+)\t(\S+)/) && ($family eq $1)) ? ($2 => $3) : () } `cat $dataDF/families.on.tree`;
    }
    else 
    {
	if ($dataD ne "bacteria") { die "only bacteria have a db"; }
	my $bacteriaD          = "/homes/overbeek/Ross/MakeCS/Data/CS/bacteria";
	my $fams_on_tree_db    = "$bacteriaD/fams.on.tree.db";
	my %fams_on_tree;
	tie %fams_on_tree,"DB_File", $fams_on_tree_db, O_RDWR, 0666, $DB_HASH 
	    or die "Cannot open file '$fams_on_tree_db': $!\n";
	my $node_vals = $fams_on_tree{$family};
	%node_vals = map { $_ =~ s/://g; ($_ =~ /^(\S+),(\S+)/) ? ($1 => $2) : () } split("::",$node_vals);
    }

    my @tmp = `cat $dataDF/readable.tree`;
    foreach $_ (@tmp)
    {
	if ($_  =~ /^(.*\- )(\d+\.\d+)(:.*)$/)
	{
	    my $start   = $1;
	    my $genome  = $2;
	    my $end     = $3;
	    if ($has{$genome})
	    {
		push(@tree,"$start<b>$genome$end</b>\n");
	    }
	    else
	    {
		push(@tree,"$start$genome$end\n");
	    }
	}
	elsif (($_ =~ /^(.*\+ )(n\d+)$/) && (! $union))
	{
	    my $start = $1;
	    my $node  = $2;
	    my $status = $node_vals{$node};
	    my $to_show = $node . ":$status";
	    push(@tree,"$start$to_show\n");
	}
	else
	{
	    push(@tree,$_);
	}
    }
    my $tree = join("",@tree);
    push(@$html,"<br><h2>$func</h2><br><pre>\n$tree\n</pre>\n");
}

sub show_changes {
    my($cgi,$dataDF,$html,$node) = @_;

    my $type = $cgi->param('type');
    if ($type eq 'families')
    {
	&show_changes_families($cgi,$dataDF,$html,$node);
    }
    else
    {
	&show_changes_adjacency($cgi,$dataDF,$html,$node);
    }
}

sub show_changes_adjacency {
    my($cgi,$dataDF,$html,$node) = @_;
    $dataDF =~ /([^\/]+)$/; 
    my $dataD = $1;
    my $col_hdrs = ['Family','Function','Ancestral','New','Compare'];
    my @events = grep { $node eq $_->[1] } 
                 map { ($_ =~ /(\S+)\t(\S+)\t(\d+):\S+\t(\d+):\S+\t(\d+)/) ? [$1,$2,$3,$4,$5] : () }
                 `cat $dataDF/placed.events`;
    my %families = map { ($_->[2] => [$_->[3],$_->[4]])} @events;
    my %pegs_needed  = map { (($_->[2] => 1), ($_->[3] => 1),($_->[4] => 1)) } @events;
    my %fam_peg  = map { my $x; 
			 (($_ =~ /^(\d+)\t\S+\t(\d+)\t\S+\t\S+\t(\S+)\t(\S+)/) && 
		          ($x = $families{$1}) && (($x->[0] eq $2) || ($x->[1] eq $2))) ? ("$1,$2" => $3) : () }
                   `cat $dataDF/adjacency.of.unique`;
    my %peg_to_func = map { (($_ =~ /^([^t]+)\t([^\t]*)\t(\S+)/) && $pegs_needed{$1}) ? ($3 => $2) : () } `cut -f1,2,4 $dataDF/families.all`;
    my @rows;
    my $ancestor;
    foreach my $event (@events)
    {
	my($anc,$node,$fam,$fam1,$fam2) = @$event;
	$ancestor = $anc;
	my $peg1 = $fam_peg{"$fam,$fam1"};
	my $peg2 = $fam_peg{"$fam,$fam2"};
	my $func = $peg_to_func{$peg1};
	if ($peg1 && $peg2 && $func)
	{
	    push(@rows,[&show_fam_table_link($dataDF,$fam),
			$func,
			&peg_link($peg1),
			&peg_link($peg2),
	                &compare_link([$peg1,$peg2])]);
	}
    }
    push(@$html,&HTML::make_table($col_hdrs,\@rows,"Changes in Adjacency from $ancestor"));
}


sub show_changes_families {
    my($cgi,$dataDF,$html,$node) = @_;

    $dataDF =~ /([^\/]+)$/; 
    my $dataD = $1;

    my %func = map { ($_ =~ /^(\d+)\t(\S[^\t]*\S)/) ? ($1 => $2) : () } `cut -f1,2 $dataDF/families.all`;
    my $col_hdrs = ['Show Where','Show PEGs','Family','Function','Clusters','Coupling'];
    my @tmp = grep { (($_ =~ /^\S+\t(\S+)/) && ($1 eq $node)) } 
              `cat /$dataDF/where.shifts.occurred`;
    my @tabG  = sort { ($a->[4] cmp $b->[4]) or ($a->[3] <=> $b->[3]) }
	        map { ($_ =~ /^(\S+)\t\S+\t(\S+)\t0\t1/) ? [&show_fam_links($dataDF,$2),$1,$2,$func{$2}] : () } 
                @tmp;
    # tabG entries are [linkT,linkP,ancestor,fam,func]

    # try to pick up the ancestor node from the first entry in @tabG
    # If you cannot get it, try to take it from @tabL
    my $anc = (@tabG > 0) ? $tabG[0]->[-3] : undef;
    foreach $_ (@tabG) { splice(@$_,2,1) }   ### get rid of ancestor
    ## tabG entries are [linkT,linkP,fam,func]

    my @tabL  = sort { ($a->[4] cmp $b->[4]) or ($a->[3] <=> $b->[3]) }
	        map { ($_ =~ /^(\S+)\t\S+\t(\S+)\t1\t0/) ? [&show_fam_links($dataDF,$2),$1,$2,$func{$2}] : () } 
                @tmp;
    if (! $anc)
    {
	$anc = (@tabL > 0) ? $tabL[0]->[-3] : '';
    }
    foreach $_ (@tabL) { splice(@$_,2,1) }   ### get rid of ancestor

## @tabG and @tabL are of the form [link-to-tree,link-to-peg-display,family,function]]
## we now add coupling data.

    my $with_couplingL = &build_table(\@tabL,$dataDF);
    my $with_couplingG = &build_table(\@tabG,$dataDF);
    
    push(@$html,&HTML::make_table($col_hdrs,$with_couplingG,"Families Gained from Ancestor $anc"),$cgi->hr,"\n");
    push(@$html,&HTML::make_table($col_hdrs,$with_couplingL,"Families Lost from Ancestor $anc"),$cgi->hr,"\n");
}

sub coupling_data {
    my($dataDF,$famsH) = @_;

    my $coupled = {};
    foreach $_ (`cat $dataDF/coupled.families`)
    {
	if (($_ =~ /(\S+)\t(\S+)\t(\d+)/) && $famsH->{$1} && $famsH->{$2} && ($1 ne $2))
	{
	    push(@{$coupled->{$1}},$2);
	}
    }
    return $coupled;
}

sub cluster_link_and_cluster_html {
    my($family,$coupledH,$fam_to_func,$dataD) = @_;

    my $cluster_link = '';
    my $coupled = $coupledH->{$family};
    my $coupled_html = "";
    if ($coupled && (@$coupled > 0))
    {
	$cluster_link = &show_clusters_link($dataD,$family,$coupled);
	$coupled_html = join("<br>",map { &show_fam_table_link($dataD,$_) . '(' .
					  &show_func_link($dataD,$fam_to_func->{$_}) . ')'
				        } @$coupled);
    }
    return ($cluster_link,$coupled_html);
}

sub build_table {
    my($tab,$dataDF) = @_;
    $dataDF =~ /([^\/]+)$/; 
    my $dataD = $1;

    my %famH = map { ($_->[-2] => 1) } @$tab;
    my %fam_to_func = map { ($_->[2] => $_->[3]) } @$tab;
    my $coupledH = &coupling_data($dataDF,\%famH);
    my @with_coupling;
    foreach my $tuple (@$tab)
    {
	my($link1,$link2,$family,$function) = @$tuple;
	$tuple->[3] = &show_func_link($dataD,$function);
	my($cluster_link,$coupled_html) = &cluster_link_and_cluster_html($family,$coupledH,\%fam_to_func,$dataD);
	$tuple->[4] = $cluster_link;
	$tuple->[5] = $coupled_html; 
	push(@with_coupling,$tuple);
    }
    return \@with_coupling;
}

sub show_fam_links {
    my($dataDF,$fam) = @_;

    $dataDF =~ /([^\/]+)$/; 
    my $dataD = $1;

    return ("<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_family_tree&family=$fam&dataD=$dataD>tree</a>",
	    "<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_family_pegs&family=$fam&pegs=1&dataD=$dataD>PEGs:$fam</a>");
}

sub show_fam_table_link {
    my($dataDF,$fam) = @_;

    $dataDF =~ /([^\/]+)$/; 
    my $dataD = $1;

    return "<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?family=$fam&pegs=1&request=show_family_pegs&dataD=$dataD>$fam: PEGs</a>";
}

sub show_func {
    my($cgi,$dataDF,$html,$function) = @_;
    $dataDF =~ /([^\/]+)$/; 
    my $dataD = $1;

    my %counts;
    my %fams = map { my $x; 
		     if (($_ =~ /^(\d+)\t(\S[^\t]*\S)/) && ($2 eq $function)) 
                     {
			 $counts{$1}++;
                         ($1 => 1);
		     }
		     else
		     {
			 () 
		     }
                   } `cat $dataDF/families.all`;
    my $col_hdrs = ['Family','PEGs in Family','Distribution on Tree','Size'];
    my @tab      = map { [$_,
			  &show_fam_table_link($dataDF,$_),
			  &show_fam_on_tree_link($dataDF,$_),
			  $counts{$_}
                         ] } sort { $a <=> $b } keys(%fams);
    push(@$html,"<h1>$function</h1>", &HTML::make_table($col_hdrs,\@tab,"Families with Function"),"<hr>");

    my @fams = keys(%fams);
    if (@fams > 1)
    {
	push(@$html,"<br>",&show_fam_on_tree_link($dataDF,$fams[0],"union")," with union of families<br><br>");
    }
    push(@$html,"<hr>");

    my @gained = map { (($_ =~ /^(\S+)\t(\S+)\t(\d+)\t0\t1/) && $fams{$3}) ? 
			 [$1,&show_node_link($dataD,$2,'families'),&show_fam_table_link($dataDF,$3)] : () }
               `cat $dataDF/where.shifts.occurred`;
    push(@$html,&HTML::make_table(['Ancestor','Descendant','Family'],\@gained,"Where $function was GAINED"));
    push(@$html,"<hr><br>");
    my @lost = map { (($_ =~ /^(\S+)\t(\S+)\t(\d+)\t1\t0/) && $fams{$3}) ? 
			 [$1,&show_node_link($dataD,$2,'families'),&show_fam_table_link($dataDF,$3)] : () }
               `cat $dataDF/where.shifts.occurred`;
    push(@$html,&HTML::make_table(['Ancestor','Descendant','Family'],\@lost,"Where $function was LOST"));
}

sub show_indexed_funcs {
    my($cgi,$dataDF,$html,$keywords) = @_;

    $dataDF =~ /([^\/]+)$/; 
    my $dataD = $1;

    my $functions_in_fams = &functions_in_at_least_one_family($dataDF);
#    $keywords = "$dataD " . $keywords; ### tell the user to add it,if necessary

    my %funcs_to_show;
    foreach my $func (`svr_sphinx_indexing -k \'$keywords\' | cut -f1 | svr_function_of | cut -f2`)
    {
	chomp $func;
	$func =~ s/\s*\#.*$//;
	if ($functions_in_fams->{$func})
	{
	    $funcs_to_show{$func}++;
	}
    }
    my @funcs = sort { $funcs_to_show{$b} <=> $funcs_to_show{$a} } keys(%funcs_to_show);
    if (@funcs == 0)
    {
	push(@$html,"<h1>Sorry, no functions matched</h1>\n");
    }
    else
    {
	my @links = map { [&show_func_link($dataD,$_)] } @funcs;
	push(@$html,&HTML::make_table(['Possible Functions'],\@links,"Possible functions - Select to find nodes where shifts occurred"));
    }
}

sub show_virulence_functions {
    my($cgi,$dataDF,$html) = @_;

    $dataDF =~ /([^\/]+)$/; 
    my $dataD = $1;

    my $functions_in_fams = &functions_in_at_least_one_family($dataDF);
    my @virulence_functions = map { chomp; $functions_in_fams->{$_} ? $_ : () } `cat $dataDF/virulence.functions`;
    my @links = map { [&show_func_link($dataD,$_)] } sort @virulence_functions;
    push(@$html,&HTML::make_table(['Function Sometimes Associated with Virulence'],
				  \@links,
				  'Functions Known to Be Associated with Virulence in Some Organisms'));
}
sub functions_in_at_least_one_family {
    my($dataDF) = @_;

    my %functions_in_fams = map { chomp; ($_ => 1) }  `cut -f2 $dataDF/families.all`;
    return \%functions_in_fams;
}

sub subsystems_of {
    my($fig,$reaction_to_roles,$reaction) = @_;

    my $roles_and_pegs = $reaction_to_roles->{$reaction};
    my @roles          = map { my $x = $_; $x =~ s/^[^:]+://; $x } @{$reaction_to_roles->{$reaction}};
    my $printable_roles = "";
    if (@roles > 0)
    { 
	my %tmp = map { $_ =~ /^([^:]+):(\S.*\S)/; ($2 => $1) } @$roles_and_pegs;
	my @tmp = map { &peg_link($tmp{$_}) . "<br>" . $_ } sort keys(%tmp);
	$printable_roles = join(",",@tmp);
    }
    my %subsys;
    foreach my $role (@roles)
    {
	foreach my $s ($fig->role_to_subsystems($role))
	{
	    $subsys{$s} = 1;
	}
    }
    return join("\t",sort keys(%subsys)) . "<br>" . $printable_roles;
}

sub show_ali {
    my($cgi) = @_;
    my @checked = map { ($_ =~ s/^check.//) ? $_ : () } $cgi->param();
    if (@checked > 1) 
    {
	my %checked = map { $_ => 1 } @checked;
	my $dataD    = $cgi->param('dataD');
	my $dataDir  = "/homes/overbeek/Ross/MakeCS/Data/CS";
	my $dataDF  = "$dataDir/$dataD";

	my $tmp_seqs_in = "$FIG_Config::temp/tmp$$.seqs.in";
	my $tmp_seqs_ali = "$FIG_Config::temp/tmp$$.ali";
	open(SEQS,">$tmp_seqs_in") || die "could not open $tmp_seqs_in";
	my $dna = $cgi->param('dna');
	opendir(GENOMES,"$dataDF/Seqs") || die "could not open $dataDF/Seqs";
	my @genomes = grep { $_ !~ /^\./ } readdir(GENOMES);
	closedir(GENOMES);
	foreach my $g (@genomes)
	{
	    my @seqs;
	    if($dna)
	    {
		@seqs = grep { $checked{$_->[0]} } &gjoseqlib::read_fasta("$dataDF/PegDNA/$g");
	    }
	    else
	    {
		@seqs = grep { $checked{$_->[0]} } &gjoseqlib::read_fasta("$dataDF/Seqs/$g");
	    }
	    foreach my $tuple(@seqs)
	    {
		my($peg,undef,$seq) = @$tuple;
		print SEQS ">$peg\n$seq\n";
	    }
	}
	close(SEQS);

	&SeedUtils::run("svr_align_seqs < $tmp_seqs_in | svr_ali_to_html > $tmp_seqs_ali");
	open(SEQS,"<$tmp_seqs_ali") || die "could not open $tmp_seqs_ali";
	print $cgi->header;
	while (defined($_ = <SEQS>))
	{
	    $_ =~ s/\b(fig\|\d+\.\d+\.peg\.\d+)/<a target=_blank href=http:\/\/pubseed.theseed.org\/seedviewer.cgi?page=Annotation&feature=$1>$1<\/a>/g;
	    print $_;
	}
	close(SEQS);
	unlink($tmp_seqs_in,$tmp_seqs_ali);
    }
}

sub show_occurs_tree {
    my($cgi,$dataDF) = @_;

    my @checked = map { ($_ =~ s/^check.//) ? $_ : () } $cgi->param();
    if (@checked > 3) 
    {
	my %genome_name = 
	    map { ($_ =~ /^(\d+\.\d+)\t(\S.*\S)/) ? ($1 => $2) : () }
	    `cat $dataDF/genome.names`;
	my $tmp_labels = "$FIG_Config::temp/tmp$$.labels";
	open(LABELS,">$tmp_labels") || die "could not open $tmp_labels";
	foreach my $peg (@checked) 
	{
	    if ($peg =~ /^fig\|(\d+\.\d+)/)
	    {
		my $g = $genome_name{$1};
		my $link = &peg_link($peg);
		print LABELS "$peg\t$link: $g\n";
	    }
	}
	close(LABELS);
	my $tmp_seqs = "$FIG_Config::temp/tmp$$.seqs";
	open(SEQS,"| svr_fasta -fasta -protein | svr_align_seqs | svr_tree | sketch_tree -m -l $tmp_labels > $tmp_seqs")
	    || die "could not open $tmp_seqs";
	foreach $_ (@checked)
	{
	    print SEQS "$_\n";
	}
	close(SEQS);
	open(SEQS,"<$tmp_seqs") || die "could not open $tmp_seqs";
	print $cgi->header;
	print "<pre>\n";
	while (defined($_ = <SEQS>))
	{
	    print $_;
	}
	print "</pre>\n";
	close(SEQS);
	unlink($tmp_seqs,$tmp_labels);
    }
}
sub show_options_for_otu {
    my($cgi,$g) = @_;

    print $cgi->header;
    print "<h3>$g</h3>\n";
    print "<ol>\n";
    print "<li> <a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_otu_tree&dataD=$g>Gains/losses of Gene Families</a>\n";
    print "<li><a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_kovbassa&dataD=$g>Genes that Distinguishing Families (Genes as Signatures)</a>\n";
    print "<li><a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_adjacency&dataD=$g>Changes in Adjacency</a>\n";
    print "</ol>\n";

    push(@$html,"<br>",
                $cgi->start_form(-action => "./families_on_tree.cgi"),
	        $cgi->textfield(-name => 'keywords',-size => 100),
	        $cgi->hidden(-override => 1,-name => 'request',-value => 'show_indexed_funcs'),
	        $cgi->hidden(-override => 1,-name => 'dataD',-value => $dataD),
	        $cgi->submit('show functions matching keywords'),$cgi->end_form(),
                "<hr>", 
	        &virulence_functions_link($cgi,$dataDF));
    foreach $_ (@$html) { print $_ }

    my $txt = <<END;
 <pre><br><hr>
We think of changes as belonging to three broad classes:

     1.  Genes that are gained or lost,

     2.  Rearrangements, which we often think of as "changes in adjacency", and

     3.  SNPs

Users concerned about issues relating to virulence will probably focus on the 
first two classes, unless that have high-quality sequences for a number of
very close genomes (and they can separate these into virulent and nonvirulent).

On the other hand users interested in what makes a production strain display
desirable properties may well have a number of very, very close strains, and for them
SNPs might well be the most interesting form of change.

Lurking behind efforts to accurately display these three types of changes all
depend heavily on our ability to match up genese between genomes.  That is,
there are (almost inevitably) a set of protein families that are the
basis of comparison, and these are often of dubious quality.  Be aware that
we have attempted to build fairly accurate families of isofunctional homologs
(i.e., genes that have the same function and a common ancestor), but they are
far from perfect.

END
    print $txt;
}

sub show_otus {
    my($cgi,$datadir) = @_;

    print $cgi->header;
    if (opendir(GENERA,$dataDir))
    {
	my @genera = grep { $_ !~ /^\./ } readdir(GENERA);
	closedir(GENERA);
	print "<h1>What Changed?</h1>\n";
	print "<h2><a target=_blank href=\"http://bioseed.mcs.anl.gov/~overbeek/what_changed.html\">Getting Started: a short Tutorial</a></h2>\n";
	print "<h2>Genera Available</h2>\n";
	foreach my $g (sort @genera)
	{
	    print "<h3><a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_options_for_otu&dataD=$g>$g</a>\n";
	}
    }
    else
    {
	print "<h1>The dataD parameter is invalid\n";
    }
}

sub peg_link {
    my($peg) = @_;

    return "<a target=_blank href=http://pubseed.theseed.org/seedviewer.cgi?page=Annotation&feature=$peg>$peg</a>";
}

sub compare_link {
    my($pegs) = @_;
    my @genomes = map { ($_ =~ /^fig\|(\d+\.\d+)/) ? $1 : () } @$pegs;
    my $args = join("&",map { "show_genome=$_" } @genomes);
    return "<a target=_blank href=http://pubseed.theseed.org/seedviewer.cgi?page=Annotation&feature=" .
	     $pegs->[0] . "&$args>Compare Regions</a>";
}

sub show_func_link {
    my($dataD,$function) = @_;

    if (! $function) { return '' }
    my $functionQ = uri_escape($function);
    return "<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_func&function=$functionQ&dataD=$dataD>$function</a>"
}

sub virulence_functions_link {
    my($cgi,$dataDF) = @_;

    if ((-s "$dataDF/virulence.functions") && ($dataDF =~ /([^\/]+)$/))
    {
	my $dataDQ = uri_escape($1);
	return "<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_virulence_functions&dataD=$dataDQ>Some Posssible Functions Associated with Virulence</a>";
    }
    return '';
}

sub show_fam_on_tree_link {
    my($dataDF,$fam,$union) = @_;

    $union = defined($union) ? $union : 0;
    if ($dataDF =~ /([^\/]+)$/)
    {
	my $dataDQ = uri_escape($1);
	return "<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?family=$fam&union=$union&dataD=$dataDQ&request=show_family_tree>Family on Tree</a>";
    }
    return '';
}

sub show_node_link {
    my($dataD,$node,$type) = @_;
    if ($type eq "families")
    {
	return "<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_node&dataD=$dataD&node=$node&type=families>$node</a>";
    }
    else
    {
	return "<a target=_blank href=http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_node&dataD=$dataD&node=$node&type=adjacency>$node</a>";
    }
}

sub show_clusters_link {
    my($dataD,$family,$coupled) = @_;
    my %tmp = map { $_ => 1 } ($family,@$coupled);
    my $families = join(",",sort { $a <=> $b} keys(%tmp));
    return "<a target=_blank href=\"http://bioseed.mcs.anl.gov/ross/FIG/families_on_tree.cgi?request=show_clusters&dataD=$dataD&families=$families\">$family</a>";
}

sub show_clusters {
    my($cgi,$dataDF,$html) = @_;

    my $families = $cgi->param('families');
    my @families = split(/,/,$families);
    my %families = map { $_ => 1 } @families;
    my %genome_names = map { ($_ =~ /^(\S+)\t(\S.*\S)/) ? ($1 => $2) : () } `cat $dataDF/genome.names`;
    my @genome_pegN_fam_func = sort { ($a->[0] <=> $b->[0]) or ($a->[1] <=> $b->[1]) }
	                       map { (($_ =~ /^(\S+)\t([^\t]*)\t[^\t]*\tfig\|(\d+\.\d+)\.peg\.(\d+)/) && $families{$1}) ?
					 [$3,$4,$1,$2] : () 
                                   } `cat $dataDF/families.all`;
    push(@$html,$cgi->h1('Relevant Clusters'));
    my $col_hdrs = ['Family','Function','PEG'];
    my $last = shift @genome_pegN_fam_func;
    while ($last)
    {
	my $last_g    = $last->[0];
	my $last_pegN = $last->[1];
	my @set;
	while ($last && ($last_g == $last->[0]) && &close($last_pegN,$last->[1]))
	{
	    $last_pegN = $last->[1];
	    push(@set,[$last->[2],$last->[3],&peg_link("fig|" . $last_g . ".peg." . $last_pegN)]);
	    $last = shift @genome_pegN_fam_func;
	}
	if (@set > 1)
	{
	    push(@$html,&HTML::make_table($col_hdrs,\@set,"Cluster for $last_g: $genome_names{$last_g}"));
	    push(@$html,"<hr><br><br>\n");
	}
    }
}

sub close {
    my($pegN1,$pegN2) = @_;

    return abs($pegN2 - $pegN1) <= 7;
}

sub show_kovbassa_signatures {
    my($cgi,$dataDF,$html) = @_;

    $dataDF =~ /([^\/]+)$/; 
    my $dataD = $1;
    my @tree;
    my @tmp = `cat $dataDF/readable.tree`;
    foreach $_ (@tmp)
    {
	if ($_ =~ /^(.*\+ )(n\d+)$/)
	{
	    my $start = $1;
	    my $node  = $2;
	    my $link = $cgi->radio_group(-nolabels => 1,
					 -name => "radio_$node",
					 -override => 1,
					 -values => [1,0,2],
					 -default => 0);
	    push(@tree,"$start$node$link\n");
	}
	elsif ($_  =~ /^(.*\- )(\d+\.\d+)(:.*)$/)
	{
	    my $start = $1;
	    my $node  = $2;
	    my $end = $3;
	    my $link = $cgi->radio_group(-nolabels => 1,
					 -name => "radio_$node",
					 -override => 1,
					 -values => [1,0,2],
					 -default => 0);
	    push(@tree,"$start",$link,"$end\n");
	}
	else
	{
	    push(@tree,$_);
	}
    }
    my $tree = join("",@tree);
    push(@$html,"<br>",
                $cgi->start_form(-action => "./families_on_tree.cgi"),
	        $cgi->hidden(-override => 1,-name => 'request',-value => 'compute_kovbassa_sigs'),
	        $cgi->hidden(-override => 1,-name => 'dataD',-value => $dataD),
	        "<pre>\n$tree\n</pre>\n",
	        $cgi->submit('Compute Gene Signatures'));
}

sub compute_kovbassa_signatures {
    my($cgi,$dataDF,$html) = @_;

    $dataDF =~ /([^\/]+)$/; 
    my $dataD = $1;
    my @out;
    my @in;
    my @params = grep { $_ =~ /^radio/ } $cgi->param();
    foreach my $param (@params)
    {
	if ($param =~ /^radio_(\S+)/)
	{
	    my $node = $1;
	    if ($cgi->param($param) == 1)
	    {
		push(@out,$node);
	    }
	    elsif ($cgi->param($param) == 2)
	    {
		push(@in,$node);
	    }
	}
    }
    my $families = "$dataDF/families.all";
    my $tree = &parse_newick_tree(join("",`cat $dataDF/labeled.tree`));
    my $indexP = &tree_utilities::tree_index_tables($tree);
    my $genomes = {};
    &in_set(\@out,$tree,$indexP,1,$genomes);
    &in_set(\@in,$tree,$indexP,2,$genomes);
    my $s1N = grep { $genomes->{$_} == 1 } keys(%$genomes);
    my $s2N = grep { $genomes->{$_} == 2 } keys(%$genomes);

    my %by_family;
    my %fam2func;
    foreach $_ (map { (($_ =~ /^(\S+)\t([^\t]*)\tfig\|(\d+\.\d+)/) && $genomes->{$3}) ? [$1,$2,$3] : () }
		`cut -f1,2,4 $families`)
    {
	my($fam,$func,$g) = @$_;
	$by_family{$fam}->{$g} = 1;
	$fam2func{$fam} = $func;
    }
    
    my @scored_families;
    my %famH;
    foreach my $f (keys(%by_family))
    {
	my $fH = $by_family{$f};
	my @hits = keys(%$fH);
	my $s1Hits = grep { $genomes->{$_} == 1 } @hits;
	my $s2Hits = grep { $genomes->{$_} == 2 } @hits;
	my $counts1 = [$s1Hits,$s1N-$s1Hits];
	my $counts2 = [$s2Hits,$s2N-$s2Hits];
	my($din,$dout)  = &discriminates($counts2,$counts1);
	my $disc = $din+$dout;
	if ($disc > 1.5)
	{
	    $famH{$f} = 1;
	    push(@scored_families,[sprintf("%0.3f",$disc),$f]);
	}
    }
    my $coupledH = &coupling_data($dataDF,\%famH);
    my $col_hdrs = ['Score','Tree','Family','Mean Length','Function','Clusters','Coupling'];
    my %mean_len = map { ($_ =~ /^(\S+)\t(\S+)/) ? ($1 => $2) : () } `cut -f1,6 $dataDF/families.all`;
    my $rows = [];
    foreach my $tuple (sort { $b->[0] <=> $a->[0] } @scored_families)
    {
	my($sc,$fam) = @$tuple;
	my $func = $fam2func{$fam};
	my $func_link = &show_func_link($dataD,$func);
	my($cluster_link,$coupled_html) = &cluster_link_and_cluster_html($fam,$coupledH,\%fam2func,$dataD);
	push(@$rows,[$sc,
	             &show_fam_links($dataDF,$fam),
                     $mean_len{$fam},
                     $func_link,
	             $cluster_link,
	             $coupled_html
	            ]);
    }
    push(@$html,&HTML::make_table($col_hdrs,$rows,"Families that Distinguish"));
}

sub in_set {
    my($nodes,$tree,$indexP,$which,$genomes) = @_;

    foreach my $x (@$nodes)
    {
	if ($x =~ /^(\d+\.\d+)/)
	{
	    $genomes->{$1} = $which;
	}
	elsif (($x =~ /^(n\d+)/) && (my $node = &label_to_node($indexP,$1)))
	{
	    my $tips = &tips_of_tree($node);
	    foreach my $tip (@$tips)
	    {
		$genomes->{$tip} = $which;
	    }
	}
    }
}

sub discriminates {
    my($xin,$xout) = @_;

    my $sx    = &vector_sum($xin);
    my $sy    = &vector_sum($xout);
    my $xy    = &scalar_product($xin,$xout);
    my $xx    = &scalar_product($xin,$xin);
    my $yy    = &scalar_product($xout,$xout);

    my $din   = (($sx != 0) && ($yy != 0)) ? (1 - (($sy * $xy) / ($sx * $yy))) : 0;
    my $dout  = (($sy != 0) && ($xx != 0)) ? (1 - (($sx * $xy) / ($sy * $xx))) : 0;
    return ($din,$dout);
}

sub vector_sum {
    my($v) = @_;

    my $sum = 0;
    foreach $_ (@$v) 
    { 
	$sum += $_;
    }
    return $sum;
}

sub scalar_product {
    my($x,$y) = @_;
    
    if (@$x != @$y)
    {
	print STDERR &Dumper($x,$y);
	die "incompatable";
    }
    my $i;
    my $sum = 0;
    for ($i=0; ($i < @$x); $i++)
    {
	$sum += $x->[$i] * $y->[$i];
    }
    return $sum;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3