######################################################################## use CGI; if (-f "$FIG_Config::data/Global/why_down") { local $/; open my $fh, "<$FIG_Config::data/Global/why_down"; my $down_msg = <$fh>; print CGI::header(); print CGI::head(CGI::title("SEED Server down")); print CGI::start_body(); print CGI::h1("SEED Server down"); print CGI::p("The seed server is not currently running:"); print CGI::pre($down_msg); print CGI::end_body(); exit; } if ($FIG_Config::readonly) { CGI::param("user", undef); } ######################################################################## use CGI; if (-f "$FIG_Config::data/Global/why_down") { local $/; open my $fh, "<$FIG_Config::data/Global/why_down"; my $down_msg = <$fh>; print CGI::header(); print CGI::head(CGI::title("SEED Server down")); print CGI::start_body(); print CGI::h1("SEED Server down"); print CGI::p("The seed server is not currently running:"); print CGI::pre($down_msg); print CGI::end_body(); exit; } if ($FIG_Config::readonly) { CGI::param("user", undef); } ######################################################################## # -*- perl -*- # # Copyright (c) 2003-2006 University of Chicago and Fellowship # for Interpretations of Genomes. All Rights Reserved. # # This file is part of the SEED Toolkit. # # The SEED Toolkit is free software. You can redistribute # it and/or modify it under the terms of the SEED Toolkit # Public License. # # You should have received a copy of the SEED Toolkit Public License # along with this program; if not write to the University of Chicago # at info@ci.uchicago.edu or the Fellowship for Interpretation of # Genomes at veronika@thefig.info or download a copy from # http://www.theseed.org/LICENSE.TXT. # use DB_File; use URI::Escape; # uri_escape use gjoseqlib; use HTML; use strict; use CGI; my $cgi = new CGI; use SeedEnv; use tree_utilities; if (0) { print $cgi->header; my @params = $cgi->param; print "
\n"; foreach $_ (@params) { print "$_\t:",join(",",$cgi->param($_)),":\n"; } exit; } my $html = []; my $node = $cgi->param('node'); my $family = $cgi->param('family'); my $ali = $cgi->param('alignment'); my $tree = $cgi->param('tree'); my $request = $cgi->param('request'); my $keywords = $cgi->param('keywords'); my $function = $cgi->param('function'); my $dataD = $cgi->param('dataD'); my $dataDir = "/homes/overbeek/Ross/MakeCS/Data/CS"; my $dataDF = "$dataDir/$dataD"; if ($request eq "show_otus") { &show_otus($cgi,$dataDir); exit; } elsif (($request eq "show_options_for_otu") && $dataD) { &show_options_for_otu($cgi,$dataD); exit; } if (($request eq "show_virulence_functions") && (-s "$dataDF/virulence.functions")) { &show_virulence_functions($cgi,$dataDF,$html); } elsif (($request eq 'show_indexed_funcs') && $keywords) { &show_indexed_funcs($cgi,$dataDF,$html,$keywords); } elsif (($request eq "show_func") && $function) { $function =~ s/^\s+//; $function =~ s/\s+$//; &show_func($cgi,$dataDF,$html,$function); } elsif (($request eq "show_ali_or_occurs_tree") && $ali) { &show_ali($cgi); exit; } elsif (($request eq "show_ali_or_occurs_tree") && $tree) { &show_occurs_tree($cgi,$dataDF); exit; } elsif (($request eq "show_family_pegs") && $family) { &show_family1($cgi,$dataDF,$html,$family); } elsif (($request eq "show_family_tree") && $family) { &show_family2($cgi,$dataDF,$html,$family); } elsif (($request eq "show_node") && $node) { &show_changes($cgi,$dataDF,$html,$node); } elsif ($request eq "show_otu_tree") { &show_otu_tree($cgi,$dataDF,$html,'families'); } elsif ($request eq "show_adjacency") { &show_otu_tree($cgi,$dataDF,$html,'adjacency'); } elsif ($request eq "show_clusters") { &show_clusters($cgi,$dataDF,$html); } elsif ($request eq "show_kovbassa") { &show_kovbassa_signatures($cgi,$dataDF,$html); } elsif ($request eq "compute_kovbassa_sigs") { &compute_kovbassa_signatures($cgi,$dataDF,$html); } else { push(@$html,"Invalid request
"); } &HTML::show_page($cgi,$html); exit; sub show_otu_tree { my($cgi,$dataDF,$html,$type) = @_; $dataDF =~ /([^\/]+)$/; my $dataD = $1; my @tree; my @tmp = `cat $dataDF/readable.tree`; foreach $_ (@tmp) { if ($_ =~ /^(.*\+ )(n\d+)$/) { my $start = $1; my $node = $2; my $link = &show_node_link($dataD,$node,$type); push(@tree,"$start$link\n"); } elsif ($_ =~ /^(.*\- )(\d+\.\d+)(:.*)$/) { my $start = $1; my $node = $2; my $end = $3; push(@tree,"$start",&show_node_link($dataD,$node,$type),"$end\n"); } else { push(@tree,$_); } } my $tree = join("",@tree); push(@$html,"\n$tree\n\n"); } sub fam_in_set { my($fam,$set) = @_; my @fams = split(/,/,$set); my $i; for ($i=0; ($i < @fams) && ($fam ne $fams[$i]); $i++) {} return ($i < @fams); } sub show_family1 { my($cgi,$dataDF,$html,$families) = @_; $dataDF =~ /([^\/]+)$/; my $dataD = $1; my $func; foreach $_ (`cut -f1,2 $dataDF/families.all`) { # if (($_ =~ /^(\d+)\t(\S[^\t]*\S)/) && ($1 == $families)) { $func = $2 } if (($_ =~ /^(\d+)\t(\S[^\t]*\S)/) && &fam_in_set($1,$families)) { $func = $2 } } my %genome_names = map { ($_ =~ /^(\S+)\t(\S.*\S)/) ? ($1 => $2) : () } `cat $dataDF/genome.names`; my $col_hdrs = ['','Genome','Genome Name','Peg']; my @tuples = map { my $tuple = $_; $tuple->[3] = &peg_link($tuple->[3]); $tuple } sort { $a->[1] cmp $b->[1] } map { (($_ =~ /^(\d+)\t([^\t]*)\t(fig\|(\d+\.\d+)\.peg\.\d+)/) && &fam_in_set($1,$families)) ? [$cgi->checkbox(-name => "check.$3",-checked => 1,-label => ''), $4, $genome_names{$4}, $3] : () } `cut -f1,2,4 $dataDF/families.all`; push(@$html,$cgi->start_form(-action => "./families5_on_tree.cgi")); push(@$html,"
\n",&HTML::make_table($col_hdrs,\@tuples,"Distribution of Family $families: $func"),$cgi->hr,"\n"); push(@$html,"\n"); push(@$html,"\n"); push(@$html,$cgi->submit('alignment'), $cgi->checkbox(-name => "dna",-checked => 0,-label => 'DNA'), "
", $cgi->submit('tree'), $cgi->end_form()); } sub show_family2 { my($cgi,$dataDF,$html,$family) = @_; # if union is specified, you want to treat all families with the same function # as "present" my $union = $cgi->param('union'); my $func; # used only if union is requested my @fam_func_peg_tuples = map { chomp; [split(/\t/,$_)] } `cut -f1,2,4 $dataDF/families.all`; if ($union) { my @tmp = grep { $_->[0] == $family } @fam_func_peg_tuples; $func = $tmp[0]->[1]; } $dataDF =~ /([^\/]+)$/; my $dataD = $1; push(@$html,"
",&show_fam_table_link($dataDF,$family),"
"); my @tuples = grep { $union ? ($_->[1] eq $func) : ($_->[0] == $family) } @fam_func_peg_tuples; my $func = (@tuples > 0) ? $tuples[0]->[1] : 'hypothetical protein'; my %has = map { ($_->[2] =~ /fig\|(\d+\.\d+)/) ? ($1 => 1) : () } @tuples; my @tree; my %node_vals; if (! -s "$dataDF/fams.on.tree.db") { %node_vals = map { (($_ =~ /^(\S+)\t(n\d+)\t(\S+)/) && ($family eq $1)) ? ($2 => $3) : () } `cat $dataDF/families.on.tree`; } else { if ($dataD ne "bacteria") { die "only bacteria have a db"; } my $bacteriaD = "/homes/overbeek/Ross/MakeCS/Data/CS/bacteria"; my $fams_on_tree_db = "$bacteriaD/fams.on.tree.db"; my %fams_on_tree; tie %fams_on_tree,"DB_File", $fams_on_tree_db, O_RDWR, 0666, $DB_HASH or die "Cannot open file '$fams_on_tree_db': $!\n"; my $node_vals = $fams_on_tree{$family}; %node_vals = map { $_ =~ s/://g; ($_ =~ /^(\S+),(\S+)/) ? ($1 => $2) : () } split("::",$node_vals); } my @tmp = `cat $dataDF/readable.tree`; foreach $_ (@tmp) { if ($_ =~ /^(.*\- )(\d+\.\d+)(:.*)$/) { my $start = $1; my $genome = $2; my $end = $3; if ($has{$genome}) { push(@tree,"$start$genome$end\n"); } else { push(@tree,"$start$genome$end\n"); } } elsif (($_ =~ /^(.*\+ )(n\d+)$/) && (! $union)) { my $start = $1; my $node = $2; my $status = $node_vals{$node}; my $to_show = $node . ":$status"; push(@tree,"$start$to_show\n"); } else { push(@tree,$_); } } my $tree = join("",@tree); push(@$html,"$func
\n$tree\n\n"); } sub show_changes { my($cgi,$dataDF,$html,$node) = @_; my $type = $cgi->param('type'); if ($type eq 'families') { &show_changes_families($cgi,$dataDF,$html,$node); } else { &show_changes_adjacency($cgi,$dataDF,$html,$node); } } sub show_changes_adjacency { my($cgi,$dataDF,$html,$node) = @_; $dataDF =~ /([^\/]+)$/; my $dataD = $1; my $col_hdrs = ['Family','Function','Ancestral','New','Compare']; my @events = grep { $node eq $_->[1] } map { ($_ =~ /(\S+)\t(\S+)\t(\d+):\S+\t(\d+):\S+\t(\d+)/) ? [$1,$2,$3,$4,$5] : () } `cat $dataDF/placed.events`; my %families = map { ($_->[2] => [$_->[3],$_->[4]])} @events; my %pegs_needed = map { (($_->[2] => 1), ($_->[3] => 1),($_->[4] => 1)) } @events; my %fam_peg = map { my $x; (($_ =~ /^(\d+)\t\S+\t(\d+)\t\S+\t\S+\t(\S+)\t(\S+)/) && ($x = $families{$1}) && (($x->[0] eq $2) || ($x->[1] eq $2))) ? ("$1,$2" => $3) : () } `cat $dataDF/adjacency.of.unique`; my %peg_to_func = map { (($_ =~ /^([^t]+)\t([^\t]*)\t(\S+)/) && $pegs_needed{$1}) ? ($3 => $2) : () } `cut -f1,2,4 $dataDF/families.all`; my @rows; my $ancestor; foreach my $event (@events) { my($anc,$node,$fam,$fam1,$fam2) = @$event; $ancestor = $anc; my $peg1 = $fam_peg{"$fam,$fam1"}; my $peg2 = $fam_peg{"$fam,$fam2"}; my $func = $peg_to_func{$peg1}; if ($peg1 && $peg2 && $func) { push(@rows,[&show_fam_table_link($dataDF,$fam), $func, &peg_link($peg1), &peg_link($peg2), &compare_link([$peg1,$peg2])]); } } push(@$html,&HTML::make_table($col_hdrs,\@rows,"Changes in Adjacency from $ancestor")); } sub show_changes_families { my($cgi,$dataDF,$html,$node) = @_; $dataDF =~ /([^\/]+)$/; my $dataD = $1; my %func = map { ($_ =~ /^(\d+)\t(\S[^\t]*\S)/) ? ($1 => $2) : () } `cut -f1,2 $dataDF/families.all`; my $col_hdrs = ['Show Where','Show PEGs','Family','Function','Clusters','Coupling']; my @tmp = grep { (($_ =~ /^\S+\t(\S+)/) && ($1 eq $node)) } `cat /$dataDF/where.shifts.occurred`; my @tabG = sort { ($a->[4] cmp $b->[4]) or ($a->[3] <=> $b->[3]) } map { ($_ =~ /^(\S+)\t\S+\t(\S+)\t0\t1/) ? [&show_fam_links($dataDF,$2),$1,$2,$func{$2}] : () } @tmp; # tabG entries are [linkT,linkP,ancestor,fam,func] # try to pick up the ancestor node from the first entry in @tabG # If you cannot get it, try to take it from @tabL my $anc = (@tabG > 0) ? $tabG[0]->[-3] : undef; foreach $_ (@tabG) { splice(@$_,2,1) } ### get rid of ancestor ## tabG entries are [linkT,linkP,fam,func] my @tabL = sort { ($a->[4] cmp $b->[4]) or ($a->[3] <=> $b->[3]) } map { ($_ =~ /^(\S+)\t\S+\t(\S+)\t1\t0/) ? [&show_fam_links($dataDF,$2),$1,$2,$func{$2}] : () } @tmp; if (! $anc) { $anc = (@tabL > 0) ? $tabL[0]->[-3] : ''; } foreach $_ (@tabL) { splice(@$_,2,1) } ### get rid of ancestor ## @tabG and @tabL are of the form [link-to-tree,link-to-peg-display,family,function]] ## we now add coupling data. my $with_couplingL = &build_table(\@tabL,$dataDF); my $with_couplingG = &build_table(\@tabG,$dataDF); push(@$html,&HTML::make_table($col_hdrs,$with_couplingG,"Families Gained from Ancestor $anc"),$cgi->hr,"\n"); push(@$html,&HTML::make_table($col_hdrs,$with_couplingL,"Families Lost from Ancestor $anc"),$cgi->hr,"\n"); } sub coupling_data { my($dataDF,$famsH) = @_; my $coupled = {}; foreach $_ (`cat $dataDF/coupled.families`) { if (($_ =~ /(\S+)\t(\S+)\t(\d+)/) && $famsH->{$1} && $famsH->{$2} && ($1 ne $2)) { push(@{$coupled->{$1}},$2); } } return $coupled; } sub cluster_link_and_cluster_html { my($family,$coupledH,$fam_to_func,$dataD) = @_; my $cluster_link = ''; my $coupled = $coupledH->{$family}; my $coupled_html = ""; if ($coupled && (@$coupled > 0)) { $cluster_link = &show_clusters_link($dataD,$family,$coupled); $coupled_html = join("
",map { &show_fam_table_link($dataD,$_) . '(' . &show_func_link($dataD,$fam_to_func->{$_}) . ')' } @$coupled); } return ($cluster_link,$coupled_html); } sub build_table { my($tab,$dataDF) = @_; $dataDF =~ /([^\/]+)$/; my $dataD = $1; my %famH = map { ($_->[-2] => 1) } @$tab; my %fam_to_func = map { ($_->[2] => $_->[3]) } @$tab; my $coupledH = &coupling_data($dataDF,\%famH); my @with_coupling; foreach my $tuple (@$tab) { my($link1,$link2,$family,$function) = @$tuple; $tuple->[3] = &show_func_link($dataD,$function); my($cluster_link,$coupled_html) = &cluster_link_and_cluster_html($family,$coupledH,\%fam_to_func,$dataD); $tuple->[4] = $cluster_link; $tuple->[5] = $coupled_html; push(@with_coupling,$tuple); } return \@with_coupling; } sub show_fam_links { my($dataDF,$fam) = @_; $dataDF =~ /([^\/]+)$/; my $dataD = $1; return ("tree", "PEGs:$fam"); } sub show_fam_table_link { my($dataDF,$fam) = @_; $dataDF =~ /([^\/]+)$/; my $dataD = $1; return "$fam: PEGs"; } sub show_func { my($cgi,$dataDF,$html,$function) = @_; $dataDF =~ /([^\/]+)$/; my $dataD = $1; my %counts; my %fams = map { my $x; if (($_ =~ /^(\d+)\t(\S[^\t]*\S)/) && ($2 eq $function)) { $counts{$1}++; ($1 => 1); } else { () } } `cat $dataDF/families.all`; my $col_hdrs = ['Family','PEGs in Family','Distribution on Tree','Size']; my @tab = map { [$_, &show_fam_table_link($dataDF,$_), &show_fam_on_tree_link($dataDF,$_), $counts{$_} ] } sort { $a <=> $b } keys(%fams); push(@$html,"$function
", &HTML::make_table($col_hdrs,\@tab,"Families with Function"),"
"); my @fams = keys(%fams); if (@fams > 1) { push(@$html,"
",&show_fam_on_tree_link($dataDF,$fams[0],"union")," with union of families
"); } push(@$html,"
"); my @gained = map { (($_ =~ /^(\S+)\t(\S+)\t(\d+)\t0\t1/) && $fams{$3}) ? [$1,&show_node_link($dataD,$2,'families'),&show_fam_table_link($dataDF,$3)] : () } `cat $dataDF/where.shifts.occurred`; push(@$html,&HTML::make_table(['Ancestor','Descendant','Family'],\@gained,"Where $function was GAINED")); push(@$html,"
"); my @lost = map { (($_ =~ /^(\S+)\t(\S+)\t(\d+)\t1\t0/) && $fams{$3}) ? [$1,&show_node_link($dataD,$2,'families'),&show_fam_table_link($dataDF,$3)] : () } `cat $dataDF/where.shifts.occurred`; push(@$html,&HTML::make_table(['Ancestor','Descendant','Family'],\@lost,"Where $function was LOST")); } sub show_indexed_funcs { my($cgi,$dataDF,$html,$keywords) = @_; $dataDF =~ /([^\/]+)$/; my $dataD = $1; my $functions_in_fams = &functions_in_at_least_one_family($dataDF); # $keywords = "$dataD " . $keywords; ### tell the user to add it,if necessary my %funcs_to_show; foreach my $func (`svr_sphinx_indexing -k \'$keywords\' | cut -f1 | svr_function_of | cut -f2`) { chomp $func; $func =~ s/\s*\#.*$//; if ($functions_in_fams->{$func}) { $funcs_to_show{$func}++; } } my @funcs = sort { $funcs_to_show{$b} <=> $funcs_to_show{$a} } keys(%funcs_to_show); if (@funcs == 0) { push(@$html,"Sorry, no functions matched
\n"); } else { my @links = map { [&show_func_link($dataD,$_)] } @funcs; push(@$html,&HTML::make_table(['Possible Functions'],\@links,"Possible functions - Select to find nodes where shifts occurred")); } } sub show_virulence_functions { my($cgi,$dataDF,$html) = @_; $dataDF =~ /([^\/]+)$/; my $dataD = $1; my $functions_in_fams = &functions_in_at_least_one_family($dataDF); my @virulence_functions = map { chomp; $functions_in_fams->{$_} ? $_ : () } `cat $dataDF/virulence.functions`; my @links = map { [&show_func_link($dataD,$_)] } sort @virulence_functions; push(@$html,&HTML::make_table(['Function Sometimes Associated with Virulence'], \@links, 'Functions Known to Be Associated with Virulence in Some Organisms')); } sub functions_in_at_least_one_family { my($dataDF) = @_; my %functions_in_fams = map { chomp; ($_ => 1) } `cut -f2 $dataDF/families.all`; return \%functions_in_fams; } sub subsystems_of { my($fig,$reaction_to_roles,$reaction) = @_; my $roles_and_pegs = $reaction_to_roles->{$reaction}; my @roles = map { my $x = $_; $x =~ s/^[^:]+://; $x } @{$reaction_to_roles->{$reaction}}; my $printable_roles = ""; if (@roles > 0) { my %tmp = map { $_ =~ /^([^:]+):(\S.*\S)/; ($2 => $1) } @$roles_and_pegs; my @tmp = map { &peg_link($tmp{$_}) . "
" . $_ } sort keys(%tmp); $printable_roles = join(",",@tmp); } my %subsys; foreach my $role (@roles) { foreach my $s ($fig->role_to_subsystems($role)) { $subsys{$s} = 1; } } return join("\t",sort keys(%subsys)) . "
" . $printable_roles; } sub show_ali { my($cgi) = @_; my @checked = map { ($_ =~ s/^check.//) ? $_ : () } $cgi->param(); if (@checked > 1) { my %checked = map { $_ => 1 } @checked; my $dataD = $cgi->param('dataD'); my $dataDir = "/homes/overbeek/Ross/MakeCS/Data/CS"; my $dataDF = "$dataDir/$dataD"; my $tmp_seqs_in = "$FIG_Config::temp/tmp$$.seqs.in"; my $tmp_seqs_ali = "$FIG_Config::temp/tmp$$.ali"; open(SEQS,">$tmp_seqs_in") || die "could not open $tmp_seqs_in"; my $dna = $cgi->param('dna'); opendir(GENOMES,"$dataDF/Seqs") || die "could not open $dataDF/Seqs"; my @genomes = grep { $_ !~ /^\./ } readdir(GENOMES); closedir(GENOMES); foreach my $g (@genomes) { my @seqs; if($dna) { @seqs = grep { $checked{$_->[0]} } &gjoseqlib::read_fasta("$dataDF/PegDNA/$g"); } else { @seqs = grep { $checked{$_->[0]} } &gjoseqlib::read_fasta("$dataDF/Seqs/$g"); } foreach my $tuple(@seqs) { my($peg,undef,$seq) = @$tuple; print SEQS ">$peg\n$seq\n"; } } close(SEQS); &SeedUtils::run("svr_align_seqs < $tmp_seqs_in | svr_ali_to_html > $tmp_seqs_ali"); open(SEQS,"<$tmp_seqs_ali") || die "could not open $tmp_seqs_ali"; print $cgi->header; while (defined($_ =)) { $_ =~ s/\b(fig\|\d+\.\d+\.peg\.\d+)/$1<\/a>/g; print $_; } close(SEQS); unlink($tmp_seqs_in,$tmp_seqs_ali); } } sub show_occurs_tree { my($cgi,$dataDF) = @_; my @checked = map { ($_ =~ s/^check.//) ? $_ : () } $cgi->param(); if (@checked > 3) { my %genome_name = map { ($_ =~ /^(\d+\.\d+)\t(\S.*\S)/) ? ($1 => $2) : () } `cat $dataDF/genome.names`; my $tmp_labels = "$FIG_Config::temp/tmp$$.labels"; open(LABELS,">$tmp_labels") || die "could not open $tmp_labels"; foreach my $peg (@checked) { if ($peg =~ /^fig\|(\d+\.\d+)/) { my $g = $genome_name{$1}; my $link = &peg_link($peg); print LABELS "$peg\t$link: $g\n"; } } close(LABELS); my $tmp_seqs = "$FIG_Config::temp/tmp$$.seqs"; open(SEQS,"| svr_fasta -fasta -protein | svr_align_seqs | svr_tree | sketch_tree -m -l $tmp_labels > $tmp_seqs") || die "could not open $tmp_seqs"; foreach $_ (@checked) { print SEQS "$_\n"; } close(SEQS); open(SEQS,"<$tmp_seqs") || die "could not open $tmp_seqs"; print $cgi->header; print " \n"; while (defined($_ =\n"; close(SEQS); unlink($tmp_seqs,$tmp_labels); } } sub show_options_for_otu { my($cgi,$g) = @_; print $cgi->header; print ")) { print $_; } print " $g
\n"; print "\n"; print "
\n"; push(@$html,"- Gains/losses of Gene Families\n"; print "
- Genes that Distinguishing Families (Genes as Signatures)\n"; print "
- Changes in Adjacency\n"; print "
", $cgi->start_form(-action => "./families_on_tree.cgi"), $cgi->textfield(-name => 'keywords',-size => 100), $cgi->hidden(-override => 1,-name => 'request',-value => 'show_indexed_funcs'), $cgi->hidden(-override => 1,-name => 'dataD',-value => $dataD), $cgi->submit('show functions matching keywords'),$cgi->end_form(), "
", &virulence_functions_link($cgi,$dataDF)); foreach $_ (@$html) { print $_ } my $txt = <
We think of changes as belonging to three broad classes: 1. Genes that are gained or lost, 2. Rearrangements, which we often think of as "changes in adjacency", and 3. SNPs Users concerned about issues relating to virulence will probably focus on the first two classes, unless that have high-quality sequences for a number of very close genomes (and they can separate these into virulent and nonvirulent). On the other hand users interested in what makes a production strain display desirable properties may well have a number of very, very close strains, and for them SNPs might well be the most interesting form of change. Lurking behind efforts to accurately display these three types of changes all depend heavily on our ability to match up genese between genomes. That is, there are (almost inevitably) a set of protein families that are the basis of comparison, and these are often of dubious quality. Be aware that we have attempted to build fairly accurate families of isofunctional homologs (i.e., genes that have the same function and a common ancestor), but they are far from perfect. END print $txt; } sub show_otus { my($cgi,$datadir) = @_; print $cgi->header; if (opendir(GENERA,$dataDir)) { my @genera = grep { $_ !~ /^\./ } readdir(GENERA); closedir(GENERA); print "What Changed?
\n"; print "Getting Started: a short Tutorial
\n"; print "Genera Available
\n"; foreach my $g (sort @genera) { print "$g\n"; } } else { print "
The dataD parameter is invalid\n"; } } sub peg_link { my($peg) = @_; return "$peg"; } sub compare_link { my($pegs) = @_; my @genomes = map { ($_ =~ /^fig\|(\d+\.\d+)/) ? $1 : () } @$pegs; my $args = join("&",map { "show_genome=$_" } @genomes); return "[0] . "&$args>Compare Regions"; } sub show_func_link { my($dataD,$function) = @_; if (! $function) { return '' } my $functionQ = uri_escape($function); return "$function" } sub virulence_functions_link { my($cgi,$dataDF) = @_; if ((-s "$dataDF/virulence.functions") && ($dataDF =~ /([^\/]+)$/)) { my $dataDQ = uri_escape($1); return "Some Posssible Functions Associated with Virulence"; } return ''; } sub show_fam_on_tree_link { my($dataDF,$fam,$union) = @_; $union = defined($union) ? $union : 0; if ($dataDF =~ /([^\/]+)$/) { my $dataDQ = uri_escape($1); return "Family on Tree"; } return ''; } sub show_node_link { my($dataD,$node,$type) = @_; if ($type eq "families") { return "$node"; } else { return "$node"; } } sub show_clusters_link { my($dataD,$family,$coupled) = @_; my %tmp = map { $_ => 1 } ($family,@$coupled); my $families = join(",",sort { $a <=> $b} keys(%tmp)); return "$family"; } sub show_clusters { my($cgi,$dataDF,$html) = @_; my $families = $cgi->param('families'); my @families = split(/,/,$families); my %families = map { $_ => 1 } @families; my %genome_names = map { ($_ =~ /^(\S+)\t(\S.*\S)/) ? ($1 => $2) : () } `cat $dataDF/genome.names`; my @genome_pegN_fam_func = sort { ($a->[0] <=> $b->[0]) or ($a->[1] <=> $b->[1]) } map { (($_ =~ /^(\S+)\t([^\t]*)\t[^\t]*\tfig\|(\d+\.\d+)\.peg\.(\d+)/) && $families{$1}) ? [$3,$4,$1,$2] : () } `cat $dataDF/families.all`; push(@$html,$cgi->h1('Relevant Clusters')); my $col_hdrs = ['Family','Function','PEG']; my $last = shift @genome_pegN_fam_func; while ($last) { my $last_g = $last->[0]; my $last_pegN = $last->[1]; my @set; while ($last && ($last_g == $last->[0]) && &close($last_pegN,$last->[1])) { $last_pegN = $last->[1]; push(@set,[$last->[2],$last->[3],&peg_link("fig|" . $last_g . ".peg." . $last_pegN)]); $last = shift @genome_pegN_fam_func; } if (@set > 1) { push(@$html,&HTML::make_table($col_hdrs,\@set,"Cluster for $last_g: $genome_names{$last_g}")); push(@$html,"
\n"); } } } sub close { my($pegN1,$pegN2) = @_; return abs($pegN2 - $pegN1) <= 7; } sub show_kovbassa_signatures { my($cgi,$dataDF,$html) = @_; $dataDF =~ /([^\/]+)$/; my $dataD = $1; my @tree; my @tmp = `cat $dataDF/readable.tree`; foreach $_ (@tmp) { if ($_ =~ /^(.*\+ )(n\d+)$/) { my $start = $1; my $node = $2; my $link = $cgi->radio_group(-nolabels => 1, -name => "radio_$node", -override => 1, -values => [1,0,2], -default => 0); push(@tree,"$start$node$link\n"); } elsif ($_ =~ /^(.*\- )(\d+\.\d+)(:.*)$/) { my $start = $1; my $node = $2; my $end = $3; my $link = $cgi->radio_group(-nolabels => 1, -name => "radio_$node", -override => 1, -values => [1,0,2], -default => 0); push(@tree,"$start",$link,"$end\n"); } else { push(@tree,$_); } } my $tree = join("",@tree); push(@$html,"
", $cgi->start_form(-action => "./families_on_tree.cgi"), $cgi->hidden(-override => 1,-name => 'request',-value => 'compute_kovbassa_sigs'), $cgi->hidden(-override => 1,-name => 'dataD',-value => $dataD), "\n$tree\n\n", $cgi->submit('Compute Gene Signatures')); } sub compute_kovbassa_signatures { my($cgi,$dataDF,$html) = @_; $dataDF =~ /([^\/]+)$/; my $dataD = $1; my @out; my @in; my @params = grep { $_ =~ /^radio/ } $cgi->param(); foreach my $param (@params) { if ($param =~ /^radio_(\S+)/) { my $node = $1; if ($cgi->param($param) == 1) { push(@out,$node); } elsif ($cgi->param($param) == 2) { push(@in,$node); } } } my $families = "$dataDF/families.all"; my $tree = &parse_newick_tree(join("",`cat $dataDF/labeled.tree`)); my $indexP = &tree_utilities::tree_index_tables($tree); my $genomes = {}; &in_set(\@out,$tree,$indexP,1,$genomes); &in_set(\@in,$tree,$indexP,2,$genomes); my $s1N = grep { $genomes->{$_} == 1 } keys(%$genomes); my $s2N = grep { $genomes->{$_} == 2 } keys(%$genomes); my %by_family; my %fam2func; foreach $_ (map { (($_ =~ /^(\S+)\t([^\t]*)\tfig\|(\d+\.\d+)/) && $genomes->{$3}) ? [$1,$2,$3] : () } `cut -f1,2,4 $families`) { my($fam,$func,$g) = @$_; $by_family{$fam}->{$g} = 1; $fam2func{$fam} = $func; } my @scored_families; my %famH; foreach my $f (keys(%by_family)) { my $fH = $by_family{$f}; my @hits = keys(%$fH); my $s1Hits = grep { $genomes->{$_} == 1 } @hits; my $s2Hits = grep { $genomes->{$_} == 2 } @hits; my $counts1 = [$s1Hits,$s1N-$s1Hits]; my $counts2 = [$s2Hits,$s2N-$s2Hits]; my($din,$dout) = &discriminates($counts2,$counts1); my $disc = $din+$dout; if ($disc > 1.5) { $famH{$f} = 1; push(@scored_families,[sprintf("%0.3f",$disc),$f]); } } my $coupledH = &coupling_data($dataDF,\%famH); my $col_hdrs = ['Score','Tree','Family','Mean Length','Function','Clusters','Coupling']; my %mean_len = map { ($_ =~ /^(\S+)\t(\S+)/) ? ($1 => $2) : () } `cut -f1,6 $dataDF/families.all`; my $rows = []; foreach my $tuple (sort { $b->[0] <=> $a->[0] } @scored_families) { my($sc,$fam) = @$tuple; my $func = $fam2func{$fam}; my $func_link = &show_func_link($dataD,$func); my($cluster_link,$coupled_html) = &cluster_link_and_cluster_html($fam,$coupledH,\%fam2func,$dataD); push(@$rows,[$sc, &show_fam_links($dataDF,$fam), $mean_len{$fam}, $func_link, $cluster_link, $coupled_html ]); } push(@$html,&HTML::make_table($col_hdrs,$rows,"Families that Distinguish")); } sub in_set { my($nodes,$tree,$indexP,$which,$genomes) = @_; foreach my $x (@$nodes) { if ($x =~ /^(\d+\.\d+)/) { $genomes->{$1} = $which; } elsif (($x =~ /^(n\d+)/) && (my $node = &label_to_node($indexP,$1))) { my $tips = &tips_of_tree($node); foreach my $tip (@$tips) { $genomes->{$tip} = $which; } } } } sub discriminates { my($xin,$xout) = @_; my $sx = &vector_sum($xin); my $sy = &vector_sum($xout); my $xy = &scalar_product($xin,$xout); my $xx = &scalar_product($xin,$xin); my $yy = &scalar_product($xout,$xout); my $din = (($sx != 0) && ($yy != 0)) ? (1 - (($sy * $xy) / ($sx * $yy))) : 0; my $dout = (($sy != 0) && ($xx != 0)) ? (1 - (($sx * $xy) / ($sy * $xx))) : 0; return ($din,$dout); } sub vector_sum { my($v) = @_; my $sum = 0; foreach $_ (@$v) { $sum += $_; } return $sum; } sub scalar_product { my($x,$y) = @_; if (@$x != @$y) { print STDERR &Dumper($x,$y); die "incompatable"; } my $i; my $sum = 0; for ($i=0; ($i < @$x); $i++) { $sum += $x->[$i] * $y->[$i]; } return $sum; }