[Bio] / FigWebServices / heat_map.cgi Repository:
ViewVC logotype

View of /FigWebServices/heat_map.cgi

Parent Directory Parent Directory | Revision Log Revision Log

Revision 1.12 - (download) (annotate)
Mon Jun 19 21:19:22 2006 UTC (13 years, 9 months ago) by overbeek
Branch: MAIN
Changes since 1.11: +5 -28 lines
RAE: Simplifying making excel tables



=head1 heat_map.cgi

A simple "microarray" like program that I wanted. Just display a table with no borders, where the rows are the ss and the cols are the samples, and the cells are the intensity

A gui representation of data. I want to represent the connections to subsystems between different genomes. This allows us to compare the relative amounts of each metabolism occuring in each genome.

The connections to subsystems are stored as attributes, and are generated by the script:



use strict;
use FIG;
use HTML;
use CGI;
use FIG_CGI;
use CGI::Carp qw(fatalsToBrowser);
my $html=["<TITLE>Heat Map NQ</title>"];
use raelib;
use raedraw;
my $raedraw=new raedraw;
my $raelib=new raelib;

my ($fig, $cgi, $user);
eval {
    ($fig, $cgi, $user) = FIG_CGI::init(debug_save => 0,
                                        debug_load => 0,
                                        print_params => 0);

if ($@ ne ""){
    my $err = $@;


    push(@html, $cgi->p("Error connecting to SEED database."));
    if ($err =~ /Could not connect to DBI:.*could not connect to server/)
        push(@html, $cgi->p("Could not connect to relational database of type $FIG_Config::dbms named $FIG_Config::db on port $FIG_Config::dbport."));
        push(@html, $cgi->pre($err));
    &HTML::show_page($cgi, \@html, 1, undef, {"default"=>"Html/css/heatmap.css"});

unless ($cgi->param("korgs"))
    my %options;
    map {$options{$_} = $fig->genus_species($_) . " ($_)"} &genomes_with_cnx();

    my %limit=(""=>1, "unclassified"=>1);
    foreach my $ssc ($fig->all_subsystem_classifications()) {$limit{$ssc->[0]}=1}
    unless ($cgi->param('complete')) {$cgi->param('complete', 'All')}

    # logo
    #  $cgi->p({style=>"text-align: center;"}, $cgi->a({href=>$cgi->url}, $cgi->img({alt=>"Heat Map NR", src=>"/heatmapnq.png"}))),

    push @$html, (
      $cgi->h2("Heat Map NQ"),
      $cgi->p("Heat Map NQ is designed to show relationships between subsystems in different environmental samples. Each subsystem that is present in a sample gets a score. The score is calculated by counting the number of sequences that are similar to a protein in each subsystem. This number is divided by the total number of sequences from the sample that are similar to any protein in a subsystem, so it is the fraction of sequences in subsystems. Therefore the size of the sample should not necessarily affect the number that you see. Please note that these numbers are only approximate and \"for entertainment purposes only\". We will integrate our statistical comparison package <a href='http://sourceforge.net/projects/xipe-totec' target='_new'>xipe-totec</a> into this analysis so that you can identify those subsystems that are present at unlikely levels."),
      $cgi->p("The raw numbers mean that if there are 10 sequences that hit all subsystems in total, then a subsystem that has two sequences that hit it will get a score of 0.2 (2/10). However, these numbers tend to be 2 and 100000, so the number is very small in most cases. Therefore, the multiplier allows you to multiply all scores by a number to make them 2 instead of 0.0000002. The non-quantitative analysis gets biased by one or two outliers, so you can also overcome the outlier effect by trimming off the maximums -- anything above your chosen value is set as the maximum. Note that the maximum value is from the unmodified raw score."),
     $cgi->p("The raw scores may not mean that 2 is twice as much as 1, just that 2 is more than one. Because of that, and because it is easier to visualize groups of data, you can aggregate all the data into chunks. This will take all scores and split them into however many groups you tell it to. That is the non-quantitative analysis."),
      $cgi->p("My reccommendation is that you display different areas of metabolism, with non-quantitative differences grouped in either 5 or 10 groups. You can also see the raw data by using the quantitative analysis checkbox, but I am not certain how much you can infer from these numbers - does 2 mean twice as much as 1?"),
       $cgi->br("Please choose some genomes: &nbsp; ",
      $raelib->scrolling_org_list($cgi, 1, 0, [&genomes_with_cnx()]),
      $cgi->br("Please choose a subset to show: &nbsp; ", $cgi->popup_menu(-name=>"limit", -values=>[sort {uc($a) cmp uc($b)}keys %limit], -default=>""), " &nbsp; (leave blank to see all of metabolism\n"),
      $cgi->h2("Non-quantitative Analysis"),
        $cgi->p("Non quantitive analysis groups the data into a set of groups and colors the boxes accordingly. This is the default that you should probably use.\n"),
        $cgi->br("Number of groups: &nbsp; ", $cgi->textfield(-name=>"ng", -default=>5, -size=>3)),
        $cgi->br("Effective raw score maximum: &nbsp; ", $cgi->textfield(-name=>"fmax", -size=>5), " (a good value for this is about .01)\n"),
      $cgi->h2("Quantitative Analysis"),
        $cgi->p("Quantitive analysis will show you the number of subsystems in each sample. This is the ratio of the number of times that subsystem is hit to the total number of subsystems that are found in the sample.\n The ratio is multiplied by a fiddle factor to normalize the data. Set the multiplier here, or use the default\n"),
        $cgi->br($cgi->checkbox(-name=>"quant", -label=>"Use quantitative analysis")," &nbsp; Multiplier: &nbsp; ", $cgi->textfield(-name=>"fiddle", -default=>5000, -size=>5)),
        "The default is to use a blue color as the extreme, but you can change that to red or green\n",
        $cgi->br($cgi->popup_menu(-name=>"color", -label=>"Default color scheme", -values=>['blue', 'red', 'green'], -default=>'blue')),
      $cgi->submit, $cgi->reset, $cgi->end_form());
      &HTML::show_page($cgi, $html, 1, undef, {"default"=>"Html/css/heatmap.css"});

my  @genomes=sort {lc($fig->genus_species($a)) cmp lc($fig->genus_species($b))} $cgi->param('korgs');
my $scores; my $max;
for (my $i=0; $i<=$#genomes; $i++)
    next unless ($fig->is_genome($genomes[$i]));
    foreach my $attr ($fig->get_attributes($genomes[$i], "ss_connections"))
        $attr->[2] =~ /^(.*):(([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?)/;
        my ($ss, $score)=($1, $2);
        unless ($ss && defined $score) {die "Can't parse a ss and a score from ".(join("\n", @$attr))}
        unless (defined $scores->{$ss}) {$#{$scores->{$ss}}=$#genomes}

my @data;
foreach my $ss (keys %$scores)
    my @class=@{$fig->subsystem_classification($ss)};
    if (
            $cgi->param('limit') && 
            ($cgi->param('limit') eq "unclassified" && !$class[0]) || 
            ($cgi->param('limit') eq $class[0])
        ) || 
        foreach my $sc (@{$scores->{$ss}}) {($sc > $max) ? ($max=$sc) : 1}
        push @data, [@class, $ss, @{$scores->{$ss}}];

#fix the effective maximum if we have set it
($cgi->param('fmax')) ? ($max=$cgi->param('fmax')) : 1;

unless ($data[0] && $max)
    push @$html, 
        $cgi->p({style=>'color: red; background-color: yellow; font-size: 1.2em; font-weight: bolder;'}, "Sorry, no subsystems matched your query. <br>Please use your back button to try again"),
        &HTML::show_page($cgi, $html, 1, undef, {"default"=>"Html/css/heatmap.css"});

# now we have the max, we need to figure out what the groups are.
# we want $ng groups, and so mapping will have the range from 0 to 100
# @mapping has all the data, in order
my $ng=$cgi->param('ng');
my @mapping=(0);
my %mapped = (0=>0);
my $count=int(100/$ng);
for (my $i=$max/$ng; $i<= ($max-($max/$ng)); $i+=$max/$ng)
   push @mapping, $i;
# define $max as 100
push @mapping, 100;

my $tab;
foreach my $a (@data)
    # delete this part to remove the links to the ss
    my $ssn=$a->[2];
    $ssn =~ s/ /_/g;
    $a->[2] = &HTML::sub_link($cgi, $a->[2]);

    my @row;
    foreach my $cell (@$a)
        if ($raelib->is_number($cell)) {
            if ($cgi->param('quant'))
                $cell *= $cgi->param('fiddle');
                my $changed;
                for (my $i=0; $i<=$#mapping; $i++)
                    last if ($changed);
                    if ($cell < $mapping[$i]) {$cell=$mapped{$mapping[$i]}; $changed=1}
                unless ($changed) {$cell=100}
            my @color=$raedraw->heat_map_color($cell, $cgi->param('color'));
            my $bgcolor;
            map {$_=int($_*255); $bgcolor.=sprintf("%x", $_)} @color;
            $cell =~ s/(\.\d\d)\d+/$1/;
            push @row, [" $cell ", "td bgcolor='#$bgcolor' align='center'"]
#push @row, [" $cell ($hue) ", "td"]
        elsif (!defined $cell) {push @row, [" &nbsp; ", "td"]}
        else {push @row, [$cell, "td"]}
    push @$tab, \@row;

# sort the table by column 1 then col 2 then col 3
@$tab=sort {$a->[0]->[0] cmp $b->[0]->[0] || $a->[1]->[0] cmp $b->[1]->[0] || $a->[2]->[0] cmp $b->[2]->[0]} @$tab;

# merge the table
# skip the data columns
my $skip;
map {$skip->{$_}=1} (2..10);
unless ($cgi->param('create_excel')) {$tab=&HTML::merge_table_rows($tab, $skip)}

# generate the table of significant differences;
my $sigtab=&significant_difference();

# finally make the HTML

my $border=0;
if ($cgi->param('border')) {$border=1}
my @headers=("Class 1", "Class 2", "Subsystem");
push @headers, map {$fig->genus_species($_) . "<br />$_"} @genomes;

my %options=("border"=>0);
if ($cgi->param('create_excel')) {$options{'excelfile'}="SubsystemConnections"}

push @$html, 
        &HTML::make_table([], &control_color_table(), ""),
        &HTML::make_table(\@headers, $tab, "", %options),
        $cgi->submit("create_excel", "Create excel file of this table"),
        $cgi->submit("create_excel", "Create excel file of this table"),

     &HTML::show_page($cgi, $html, 1, undef, {"default"=>"Html/css/heatmap.css"});


sub genomes_with_cnx {
    my %gcx;
    foreach my $attr ($fig->get_attributes(undef, "ss_connections"))
    return keys %gcx;

sub control_color_table {
# controltab is the table at the top that shows what the colors are.
    my $controltab;
        my $row;
        for (my $i=0; $i<=100; $i+=2)
            my @color=$raedraw->heat_map_color($i, $cgi->param('color'));
            my $bgcolor;
            map {$_=int($_*255); $bgcolor.=sprintf("%x", $_)} @color;
            push @$row, [" $i ", "td bgcolor='#$bgcolor' align='center'"];
#if ($i && !($i % 20)) {push @$controltab, $row; undef $row}
        push @$controltab, $row;
    return $controltab;

sub significant_difference {
    # identify those things with a significant difference and make a cool table of them

    # read the xipe attribute for significant differences
    my $xipe; my $seen;
    foreach my $i (0 .. $#genomes)
        foreach my $attr ($fig->get_attributes($genomes[$i], "xipe"))
            my @pieces=split /\:/, $attr->[2];

            # this is a hack to ignore things with > 1 entry. We should clean this up and show based on confidence
            next if ($seen->{$genomes[$i]}->{$pieces[0]}->{$pieces[1]});
            # note that pieces has lots of information about confidence etc that we are ignoring right now
            if (@pieces) 
                my $htmlstring=&HTML::sub_link($cgi, $pieces[1])." <small>(".(join(", ", @pieces[2,3,4])).")</small><br />\n";
                my $textstring=$pieces[1] . " (".(join(", ", @pieces[2,3,4]))."),\n";
                push @{$xipe->{$genomes[$i]}->{$pieces[0]}}, [$htmlstring, $textstring];
                push @{$xipe->{$genomes[$i]}->{$pieces[0]}}, [" &nbsp; ", ""];

    foreach my $g (keys %$xipe)
        foreach my $f (keys %{$xipe->{$g}})
            @{$xipe->{$g}->{$f}} = sort {lc($a->[0]) cmp lc($b->[0]) || lc($a->[1]) cmp lc($b->[1])} @{$xipe->{$g}->{$f}};
    my $hdrs=["UP IN", map {$fig->genus_species($_)."<br />($_)\n"} @genomes];
    map {$_=[$_, "th class=\"bordered\""]} @$hdrs;
    my $tab=[]; my $texttab=[]; # texttab is for the excel file and doesn't have links
    foreach my $i (0 .. $#genomes)
        my $hrow=[[$fig->genus_species($genomes[$i]) . "($genomes[$i])", "th class=\"bordered\""]];
        my $trow=[$fig->genus_species($genomes[$i]) . "($genomes[$i])"]; # t is text for excel
        foreach my $j (0 .. $#genomes)
            #$matches=join("<br />\n", map {&HTML::sub_link($cgi, $_)} @{$xipe->{$genomes[$i]}->{$genomes[$j]}});
            my ($hmatches, $tmatches)=(" &nbsp; ", "");
            if (defined $xipe->{$genomes[$i]}->{$genomes[$j]}) 
                $hmatches=join("", map {$_->[0]} @{$xipe->{$genomes[$i]}->{$genomes[$j]}}); # this is the html version for web
                $tmatches=join("", map {$_->[1]} @{$xipe->{$genomes[$i]}->{$genomes[$j]}}); # this is the text version for excel
            push @$hrow, [$hmatches, "td class=\"bordered\""];
            push @$trow, $tmatches;
        push @$tab, $hrow;
        push @$texttab, $trow;
    my %options=("border"=>1);
    if ($cgi->param('create_excel')) {$options{'excelfile'}="SubsystemConnections"}
    return &HTML::make_table($hdrs, $tab, "Subsystems with significant difference", %options);

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3