=pod
=head1
Create a summary of subsystems
=cut
use strict;
use FIG;
use HTML;
use raelib;
use CGI;
my $cgi=new CGI;
my $fig;
eval {
$fig = new FIG;
};
if ($@ ne "")
{
my $err = $@;
my(@html);
push(@html, $cgi->p("Error connecting to SEED database."));
if ($err =~ /Could not connect to DBI:.*could not connect to server/)
{
push(@html, $cgi->p("Could not connect to relational database of type $FIG_Config::dbms named $FIG_Config::db on port $FIG_Config::dbport."));
}
else
{
push(@html, $cgi->pre($err));
}
&HTML::show_page($cgi, \@html, 1);
exit;
}
$ENV{"PATH"} = "$FIG_Config::bin:$FIG_Config::ext_bin:" . $ENV{"PATH"};
my $html = [];
if ($cgi->param('submit') eq "Summarize Subsystems")
{
summarize_ss();
}
else
{
show_initial();
}
unshift @$html, "
The SEED - Subsystem summarizer \n";
&HTML::show_page($cgi, $html, 1);
exit;
sub summarize_ss {
my @wants;
foreach my $w ($cgi->param('korgs')) {
$w =~ /\((\d+\.\d+)\)/;
push @wants, $1;
}
my @wantheaders;
foreach my $gen (@wants) {push @wantheaders, $fig->abbrev($fig->genus_species($gen))}
my @th;
if ($cgi->param('levels')) {push @th, ('Level 1', 'Level 2')}
push @th, 'Subsystem'; # we should have this, at least
if ($cgi->param('genomes')) {push @th, ('Organisms')}
if ($cgi->param('stats')) {push @th, ('Non-zero variants', 'Roles', 'Total Pegs', @wantheaders)}
elsif ($cgi->param('korgs')) {push @th, ('Roles', @wantheaders)}
my $tableheader="". (join " | ", @th) . " |
\n\n";
my $cols=scalar @th;
my $table;
foreach my $subsys ($fig->all_subsystems) {
# figure out how many roles we have, as this will determine the number of rows in the table
my @roles=$fig->subsystem_to_roles($subsys);
my $rows=1;
if (!$cgi->param('stats') && $cgi->param('korgs')) {$rows=scalar @roles}
# figure out which genomes are in this subsystem
my %genomes;
foreach my $gen ($fig->get_subsystem($subsys)->get_genomes()) {$genomes{$gen}=$fig->genus_species($gen)}
# start a new row for this data
# tablerows is a refence to an array.
# each element is an array of tablecells with the following values value colspan rowspan width
# if colspan or rowspan are undef they are believed to be 1
# if width is ommitted it is ignored
my $tablerows;
# get the classification if required
if ($cgi->param('levels')) {
# get the classification and make sure there are now 2 columns as andrei doesn't like #3.
my $class=$fig->subsystem_classification($subsys);
unless ($class) {@$class=(' ',' ')}
unless ($$class[0]) {$$class[0] = " "}
unless ($$class[1]) {$$class[1] = " "}
# remove anything more than $$class[1];
@$class=($$class[0], $$class[1]); # could have done this with splice. Ech.
foreach my $c (@$class) {
push @$tablerows, [$c, 1, $rows, 20];
}
}
# add the subsystem to the table
my $link=&HTML::sub_link($cgi,$subsys);
push @$tablerows, [$link, '1', $rows];
# add the genomes in the subsystem
if ($cgi->param('genomes') && $cgi->param('stats')) {push @$tablerows, [(scalar keys %genomes), '1', $rows]}
elsif ($cgi->param('genomes')) {
my $c=join "
\n", sort {$a cmp $b} values %genomes;
push @$tablerows, [$c, 1, $rows];
}
#
# This is convolvuted because we may want to skip some empty cells, but we won't know whether
# we want to skip them. Also, we are using rowspan/colspan to determine the size of the cells
# so we need everything defined
#
# I store the table initially in @$tablerows, and then push it into @$table. Later on we
# reconstruct @$table with etc....
#
# Now I am going to store the rows in @$toadd, and if we want them I will add them to @$table.
#
# For stats, there is only one row in @$toadd, but for other data there are several because
# of the use of rowspan
my $pegcount;
my $toadd; #this is what we will add if we need to
if ($cgi->param('stats')) {
my $totalpegs;
my $nzv=0; # non-zero variant codes
# there should be a quicker way of getting this ... ?
foreach my $role (@roles) {
foreach my $genome (keys %genomes) {
my $count = scalar ($fig->pegs_in_subsystem_cell($subsys, $genome, $role));
$pegcount->{$genome} += $count;
$totalpegs += $count;
my $vc=$fig->get_subsystem($subsys)->get_variant_code_for_genome($genome);
unless ($vc == 0) {$nzv++}
}
}
# add non-zero variant codes
push @$tablerows, [$nzv, 1, $rows];
# add roles
push @$tablerows, [(scalar @roles), '1', $rows];
# add total pegs
push @$tablerows, [$totalpegs, '1', $rows];
foreach my $w (@wants) {
push @$tablerows, [$pegcount->{$w}, '1', $rows]; # pegs for each of the chosen genomes
}
push @$toadd, $tablerows;
}
elsif ($cgi->param('korgs')) {
my $first=1;
foreach my $role (@roles) {
my $genomeroles;
foreach my $gen (@wants) {
my $cell = ' ';
if ($genomes{$gen}) {
foreach my $peg ($fig->pegs_in_subsystem_cell($subsys, $gen, $role)) {
$cell .= &HTML::fid_link($cgi,$peg, 'local') . "; ";
}
}
push @$genomeroles, [$cell, 1, 1];
}
# because I am using rowspan to make long columns, we don't want the tr on the first of these.
if ($first) {
push @$tablerows, [$role, 1, 1], @$genomeroles;
push @$toadd, $tablerows;
undef $first;
}
else {
my @tr=([$role, 1, 1], @$genomeroles);
push @$toadd, \@tr;
}
}
}
unless ($toadd) {
push @$toadd, $tablerows;
} # in case we didn't define it
# now decide if we want the table
if ($cgi->param('orgpegs')) {
my $mustadd;
foreach my $org (@wants) {if ($pegcount->{$org}) {$mustadd=1}} # we'll add it if one of the orgs has a role
if ($mustadd) {push @$table, @$toadd}
}
elsif ($cgi->param("haveroles")) {
if (scalar(@roles)) {push @$table, @$toadd}
}
else {
push @$table, @$toadd;
}
}
# now convert $table into a table
# sort the table
# if we have levels we want to sort on level 1 and then level 2 and then subsysname
# if not, we just sort on subsys name
if ($cgi->param("levels")) {
@$table = sort {
$a->[0]->[0] cmp $b->[0]->[0]
||
$a->[1]->[0] cmp $b->[1]->[0]
||
$a->[2]->[0] cmp $b->[2]->[0]
} @$table;
}
else {
@$table = sort {
$a->[0]->[0] cmp $b->[0]->[0]
} @$table;
}
my $tab;
foreach my $row (@$table) {
$tab .= "\n | ";
foreach my $cell (@$row) {
my $width='';
if ($cell->[3]) {$width="width=" . $cell->[3]}
$tab .= "\n\t[1] rowspan=$cell->[2]>$cell->[0] | ";
}
$tab .= "\n
\n";
}
push @$html, ("\n" . $tableheader . $tab . "
\n");
}
sub show_initial {
#############
#
# Stolen from index.cgi
#
#
my @display = ( 'All', 'Archaea', 'Bacteria', 'Eucarya', 'Viruses', 'Environmental samples' );
#
# Canonical names must match the keywords used in the DBMS. They are
# defined in compute_genome_counts.pl
#
my %canonical = (
'All' => undef,
'Archaea' => 'Archaea',
'Bacteria' => 'Bacteria',
'Eucarya' => 'Eukaryota',
'Viruses' => 'Virus',
'Environmental samples' => 'Environmental Sample'
);
my $req_dom = $cgi->param( 'domain' ) || 'All';
my @domains = $cgi->radio_group( -name => 'domain',
-default => $req_dom,
-override => 1,
-values => [ @display ]
);
my $n_domain = 0;
my %dom_num = map { ( $_, $n_domain++ ) } @display;
my $req_dom_num = $dom_num{ $req_dom } || 0;
#
# Viruses and Environmental samples must have completeness = All (that is
# how they are in the database). Otherwise, default is Only "complete".
#
my $req_comp = ( $req_dom_num > $dom_num{ 'Eucarya' } ) ? 'All'
: $cgi->param( 'complete' ) || 'Only "complete"';
my @complete = $cgi->radio_group( -name => 'complete',
-default => $req_comp,
-override => 1,
-values => [ 'All', 'Only "complete"' ]
);
#
# Use $fig->genomes( complete, restricted, domain ) to get org list:
#
my $complete = ( $req_comp =~ /^all$/i ) ? undef : "complete";
my @orgs = sort map { my $org = $_; my $gs = $fig->genus_species($org); my $gc=scalar $fig->all_contigs($org); "$gs ($org) [$gc contigs]" }
$fig->genomes( $complete, undef, $canonical{ $req_dom } );
my $n_genomes = @orgs;
push @$html, $cgi->start_form,
$cgi->h2("Please choose one or more organisms from the list below to see in the subsystems table"),
"\n",
" \n",
" ",
$cgi->scrolling_list( -name => 'korgs',
-size => 10,
-multiple => 1,
-values => \@orgs,
),
$cgi->br,
"$n_genomes genomes shown ",
$cgi->submit( 'Update List' ), $cgi->reset, $cgi->submit('submit', 'Summarize Subsystems'),
" | ",
" ",
join( " ", "Domain(s) to show:", @domains), " \n",
join( " ", "Completeness?", @complete), "\n",
" | ",
"
\n",
"
\n",
$cgi->p,
"Enter user: ", $cgi->textfield(-name=>'user', size=>20), $cgi->p,
$cgi->checkbox(-name=>"stats", -label=>"Only show statistics", -checked=>'on'), $cgi->p,
$cgi->checkbox(-name=>"levels", -label=>"Show subsystem classification", -checked=>'on'), $cgi->p,
$cgi->checkbox(-name=>"genomes", -label=>"Show genomes in subsystem", -checked=>'on'), $cgi->p,
$cgi->checkbox(-name=>"haveroles", -label=>"Show only subsystems that have roles defined"), $cgi->p,
$cgi->checkbox(-name=>"orgpegs", -label=>"Show only subsystems that have roles in the organism(s) you have chosen"), $cgi->p,
$cgi->end_form,
}