[Bio] / FigWebServices / heat_map.cgi Repository:
ViewVC logotype

Annotation of /FigWebServices/heat_map.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (view) (download)

1 : overbeek 1.1 #__perl__
2 :    
3 :     =pod
4 :    
5 :     =head1 heat_map.cgi
6 :    
7 :     A simple "microarray" like program that I wanted. Just display a table with no borders, where the rows are the ss and the cols are the samples, and the cells are the intensity
8 :    
9 :     A gui representation of data. I want to represent the connections to subsystems between different genomes. This allows us to compare the relative amounts of each metabolism occuring in each genome.
10 :    
11 :     The connections to subsystems are stored as attributes, and are generated by the script:
12 :    
13 :     get_ss_connections
14 :    
15 :     =cut
16 :    
17 :     use strict;
18 :     use FIG;
19 :     use HTML;
20 :     use CGI;
21 :     use FIG_CGI;
22 :     use CGI::Carp qw(fatalsToBrowser);
23 :     my $html=["<TITLE>Heat Map NQ</title>"];
24 :     use raelib;
25 :     use raedraw;
26 :     my $raedraw=new raedraw;
27 :     my $raelib=new raelib;
28 :    
29 :     my ($fig, $cgi, $user);
30 :     eval {
31 :     ($fig, $cgi, $user) = FIG_CGI::init(debug_save => 0,
32 :     debug_load => 0,
33 :     print_params => 0);
34 :     };
35 :    
36 :     if ($@ ne ""){
37 :     my $err = $@;
38 :    
39 :     my(@html);
40 :    
41 :     push(@html, $cgi->p("Error connecting to SEED database."));
42 :     if ($err =~ /Could not connect to DBI:.*could not connect to server/)
43 :     {
44 :     push(@html, $cgi->p("Could not connect to relational database of type $FIG_Config::dbms named $FIG_Config::db on port $FIG_Config::dbport."));
45 :     }
46 :     else
47 :     {
48 :     push(@html, $cgi->pre($err));
49 :     }
50 :     &HTML::show_page($cgi, \@html, 1, undef, {"default"=>"Html/css/heatmap.css"});
51 :     exit;
52 :     }
53 :    
54 :    
55 :    
56 :     unless ($cgi->param("korgs"))
57 :     {
58 :     my %options;
59 :     map {$options{$_} = $fig->genus_species($_) . " ($_)"} &genomes_with_cnx();
60 :    
61 : overbeek 1.3 my %limit=(""=>1, "unclassified"=>1);
62 : overbeek 1.1 foreach my $ssc ($fig->all_subsystem_classifications()) {$limit{$ssc->[0]}=1}
63 :    
64 : overbeek 1.2 unless ($cgi->param('complete')) {$cgi->param('complete', 'All')}
65 :    
66 : overbeek 1.1 # logo
67 :     # $cgi->p({style=>"text-align: center;"}, $cgi->a({href=>$cgi->url}, $cgi->img({alt=>"Heat Map NR", src=>"/heatmapnq.png"}))),
68 :    
69 :     push @$html, (
70 :     $cgi->start_form(),
71 :     $cgi->h2("Heat Map NQ"),
72 : overbeek 1.6 $cgi->p("Heat Map NQ is designed to show relationships between subsystems in different environmental samples. Each subsystem that is present in a sample gets a score. The score is calculated by counting the number of sequences that are similar to a protein in each subsystem. This number is divided by the total number of sequences from the sample that are similar to any protein in a subsystem, so it is the fraction of sequences in subsystems. Therefore the size of the sample should not necessarily affect the number that you see. Please note that these numbers are only approximate and \"for entertainment purposes only\". We will integrate our statistical comparison package <a href='http://sourceforge.net/projects/xipe-totec' target='_new'>xipe-totec</a> into this analysis so that you can identify those subsystems that are present at unlikely levels."),
73 :     $cgi->p("The raw numbers mean that if there are 10 sequences that hit all subsystems in total, then a subsystem that has two sequences that hit it will get a score of 0.2 (2/10). However, these numbers tend to be 2 and 100000, so the number is very small in most cases. Therefore, the multiplier allows you to multiply all scores by a number to make them 2 instead of 0.0000002. The non-quantitative analysis gets biased by one or two outliers, so you can also overcome the outlier effect by trimming off the maximums -- anything above your chosen value is set as the maximum. Note that the maximum value is from the unmodified raw score."),
74 :     $cgi->p("The raw scores may not mean that 2 is twice as much as 1, just that 2 is more than one. Because of that, and because it is easier to visualize groups of data, you can aggregate all the data into chunks. This will take all scores and split them into however many groups you tell it to. That is the non-quantitative analysis."),
75 :     $cgi->p("My reccommendation is that you display different areas of metabolism, with non-quantitative differences grouped in either 5 or 10 groups. You can also see the raw data by using the quantitative analysis checkbox, but I am not certain how much you can infer from these numbers - does 2 mean twice as much as 1?"),
76 : overbeek 1.1 $cgi->h2("Dataset"),
77 :     $cgi->p(
78 :     $cgi->br("Please choose some genomes: &nbsp; ",
79 :     $raelib->scrolling_org_list($cgi, 1, 0, [&genomes_with_cnx()]),
80 :     $cgi->br("Please choose a subset to show: &nbsp; ", $cgi->popup_menu(-name=>"limit", -values=>[sort {uc($a) cmp uc($b)}keys %limit], -default=>""), " &nbsp; (leave blank to see all of metabolism\n"),
81 :     )),
82 :     $cgi->h2("Non-quantitative Analysis"),
83 :     $cgi->p(
84 :     $cgi->p("Non quantitive analysis groups the data into a set of groups and colors the boxes accordingly. This is the default that you should probably use.\n"),
85 :     $cgi->br("Number of groups: &nbsp; ", $cgi->textfield(-name=>"ng", -default=>5, -size=>3)),
86 :     $cgi->br("Effective raw score maximum: &nbsp; ", $cgi->textfield(-name=>"fmax", -size=>5), " (a good value for this is about .01)\n"),
87 :     ),
88 :     $cgi->h2("Quantitative Analysis"),
89 :     $cgi->p(
90 :     $cgi->p("Quantitive analysis will show you the number of subsystems in each sample. This is the ratio of the number of times that subsystem is hit to the total number of subsystems that are found in the sample.\n The ratio is multiplied by a fiddle factor to normalize the data. Set the multiplier here, or use the default\n"),
91 :     $cgi->br($cgi->checkbox(-name=>"quant", -label=>"Use quantitative analysis")," &nbsp; Multiplier: &nbsp; ", $cgi->textfield(-name=>"fiddle", -default=>5000, -size=>5)),
92 :     ),
93 :     $cgi->h2("Display"),
94 :     $cgi->p(
95 :     "The default is to use a blue color as the extreme, but you can change that to red or green\n",
96 :     $cgi->br($cgi->popup_menu(-name=>"color", -label=>"Default color scheme", -values=>['blue', 'red', 'green'], -default=>'blue')),
97 :     ),
98 :     $cgi->submit, $cgi->reset, $cgi->end_form());
99 :    
100 :     &HTML::show_page($cgi, $html, 1, undef, {"default"=>"Html/css/heatmap.css"});
101 :     exit(0);
102 :     }
103 :    
104 :    
105 :    
106 :    
107 : overbeek 1.7 my @genomes=sort {lc($fig->genus_species($a)) cmp lc($fig->genus_species($b))} $cgi->param('korgs');
108 : overbeek 1.1 my $scores; my $max;
109 :     for (my $i=0; $i<=$#genomes; $i++)
110 :     {
111 :     next unless ($fig->is_genome($genomes[$i]));
112 :     foreach my $attr ($fig->get_attributes($genomes[$i], "ss_connections"))
113 :     {
114 : overbeek 1.4 $attr->[2] =~ /^(.*):(([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?)/;
115 : overbeek 1.1 my ($ss, $score)=($1, $2);
116 :     unless ($ss && defined $score) {die "Can't parse a ss and a score from ".(join("\n", @$attr))}
117 :     unless (defined $scores->{$ss}) {$#{$scores->{$ss}}=$#genomes}
118 :     $scores->{$ss}->[$i]=$score;
119 :     }
120 :     }
121 :    
122 :     my @data;
123 :     foreach my $ss (keys %$scores)
124 :     {
125 :     my @class=@{$fig->subsystem_classification($ss)};
126 : overbeek 1.3 if (
127 :     (
128 :     $cgi->param('limit') &&
129 :     ($cgi->param('limit') eq "unclassified" && !$class[0]) ||
130 :     ($cgi->param('limit') eq $class[0])
131 :     ) ||
132 :     !$cgi->param('limit')
133 :     )
134 : overbeek 1.1 {
135 : overbeek 1.5 foreach my $sc (@{$scores->{$ss}}) {($sc > $max) ? ($max=$sc) : 1}
136 : overbeek 1.1 push @data, [@class, $ss, @{$scores->{$ss}}];
137 :     }
138 :     }
139 :    
140 :     #fix the effective maximum if we have set it
141 :     ($cgi->param('fmax')) ? ($max=$cgi->param('fmax')) : 1;
142 :    
143 :     unless ($data[0] && $max)
144 :     {
145 :     push @$html,
146 :     (
147 :     $cgi->p({style=>'color: red; background-color: yellow; font-size: 1.2em; font-weight: bolder;'}, "Sorry, no subsystems matched your query. <br>Please use your back button to try again"),
148 :     );
149 :     &HTML::show_page($cgi, $html, 1, undef, {"default"=>"Html/css/heatmap.css"});
150 :     exit(0);
151 :     }
152 :    
153 :    
154 :     # now we have the max, we need to figure out what the groups are.
155 :     # we want $ng groups, and so mapping will have the range from 0 to 100
156 :     # @mapping has all the data, in order
157 :     my $ng=$cgi->param('ng');
158 :     my @mapping=(0);
159 :     my %mapped = (0=>0);
160 :     my $count=int(100/$ng);
161 :     for (my $i=$max/$ng; $i<= ($max-($max/$ng)); $i+=$max/$ng)
162 :     {
163 :     push @mapping, $i;
164 :     $mapped{$i}=$count;
165 :     $count+=int(100/$ng);
166 :     }
167 :     # define $max as 100
168 :     push @mapping, 100;
169 :     $mapped{100}=100;
170 :    
171 :     my $tab;
172 :     foreach my $a (@data)
173 :     {
174 :     # delete this part to remove the links to the ss
175 :     my $ssn=$a->[2];
176 :     $ssn =~ s/ /_/g;
177 :     $a->[2] = &HTML::sub_link($cgi, $a->[2]);
178 :    
179 :     my @row;
180 :     foreach my $cell (@$a)
181 :     {
182 :     if ($raelib->is_number($cell)) {
183 :     if ($cgi->param('quant'))
184 :     {
185 :     $cell *= $cgi->param('fiddle');
186 :     }
187 :     else
188 :     {
189 :     my $changed;
190 :     for (my $i=0; $i<=$#mapping; $i++)
191 :     {
192 :     last if ($changed);
193 :     if ($cell < $mapping[$i]) {$cell=$mapped{$mapping[$i]}; $changed=1}
194 :     }
195 :     unless ($changed) {$cell=100}
196 :     }
197 :     my @color=$raedraw->heat_map_color($cell, $cgi->param('color'));
198 :     my $bgcolor;
199 :     map {$_=int($_*255); $bgcolor.=sprintf("%x", $_)} @color;
200 :     $cell =~ s/(\.\d\d)\d+/$1/;
201 :     push @row, [" $cell ", "td bgcolor='#$bgcolor' align='center'"]
202 :     #push @row, [" $cell ($hue) ", "td"]
203 :     }
204 :     elsif (!defined $cell) {push @row, [" &nbsp; ", "td"]}
205 :     else {push @row, [$cell, "td"]}
206 :     }
207 :     push @$tab, \@row;
208 :     }
209 :    
210 : overbeek 1.6 # sort the table by column 1 then col 2 then col 3
211 :     @$tab=sort {$a->[0]->[0] cmp $b->[0]->[0] || $a->[1]->[0] cmp $b->[1]->[0] || $a->[2]->[0] cmp $b->[2]->[0]} @$tab;
212 : overbeek 1.1
213 :     # merge the table
214 :     # skip the data columns
215 :     my $skip;
216 :     map {$skip->{$_}=1} (2..10);
217 :     unless ($cgi->param('create_excel')) {$tab=&HTML::merge_table_rows($tab, $skip)}
218 :    
219 : overbeek 1.8 # generate the table of significant differences;
220 :     my $sigtab=&significant_difference();
221 :    
222 : overbeek 1.1 # finally make the HTML
223 :    
224 :    
225 :     my $border=0;
226 :     if ($cgi->param('border')) {$border=1}
227 :     my @headers=("Class 1", "Class 2", "Subsystem");
228 :     push @headers, map {$fig->genus_species($_) . "<br />$_"} @genomes;
229 :    
230 :     my %options=("border"=>0);
231 : overbeek 1.8
232 :     ## We need to make the HTML table before the excel file table. It is the only excel rule!
233 :     my $tablehtml=&HTML::make_table(\@headers, $tab, "", %options);
234 :    
235 :     my $excellink=" &nbsp; ";
236 :     if ($cgi->param('create_excel'))
237 :     {
238 :     $raelib->make_excel_workbook("SubsystemConnections", \%options);
239 :     $raelib->make_excel_worksheet(\@headers, $tab, "hits");
240 :     $excellink=$raelib->close_excel_file();
241 :     }
242 : overbeek 1.1
243 : overbeek 1.7
244 :    
245 :    
246 :    
247 : overbeek 1.1 # this is the link to the logo, which I removed
248 :     # $cgi->p({style=>"text-align: center;"}, $cgi->a({href=>$cgi->url}, $cgi->img({alt=>"Heat Map NR", src=>"/heatmapnq.png"}))),
249 :    
250 :     $cgi->submit("create_excel", "Create excel file");
251 :     push @$html,
252 :     (
253 :     $cgi->start_form,
254 :     $cgi->hidden('korgs'),
255 :     $cgi->hidden('border'),
256 :     $cgi->hidden('fiddle'),
257 :     $cgi->hidden('quant'),
258 :     $cgi->hidden('ng'),
259 :     $cgi->hidden('fmax'),
260 :     $cgi->hidden('limit'),
261 :     $cgi->hidden('color'),
262 :     &HTML::make_table([], &control_color_table(), ""),
263 : overbeek 1.8 $tablehtml,
264 :     $excellink,
265 :     $cgi->submit("create_excel", "Create excel file of this table"),
266 : overbeek 1.7 $cgi->hr,
267 :     $sigtab,
268 : overbeek 1.8 $excellink,
269 : overbeek 1.1 $cgi->submit("create_excel", "Create excel file of this table"),
270 :     );
271 :    
272 :     &HTML::show_page($cgi, $html, 1, undef, {"default"=>"Html/css/heatmap.css"});
273 :    
274 :     exit(0);
275 :    
276 :    
277 :     sub genomes_with_cnx {
278 :     my %gcx;
279 :     foreach my $attr ($fig->get_attributes(undef, "ss_connections"))
280 :     {
281 :     $gcx{$attr->[0]}=1;
282 :     }
283 :     return keys %gcx;
284 :     }
285 :    
286 :     sub control_color_table {
287 :     # controltab is the table at the top that shows what the colors are.
288 :     my $controltab;
289 :     {
290 :     my $row;
291 :     for (my $i=0; $i<=100; $i+=2)
292 :     {
293 :     my @color=$raedraw->heat_map_color($i, $cgi->param('color'));
294 :     my $bgcolor;
295 :     map {$_=int($_*255); $bgcolor.=sprintf("%x", $_)} @color;
296 :     push @$row, [" $i ", "td bgcolor='#$bgcolor' align='center'"];
297 :     #if ($i && !($i % 20)) {push @$controltab, $row; undef $row}
298 :     }
299 :     push @$controltab, $row;
300 :     }
301 :     return $controltab;
302 :     }
303 :    
304 : overbeek 1.7
305 :     sub significant_difference {
306 :     # identify those things with a significant difference and make a cool table of them
307 :    
308 :     # read the xipe attribute for significant differences
309 :     my $xipe;
310 :     foreach my $i (0 .. $#genomes)
311 :     {
312 :     foreach my $attr ($fig->get_attributes($genomes[$i], "xipe"))
313 :     {
314 :     my @pieces=split /\:/, $attr->[2];
315 :     # note that pieces has lots of information about confidence etc that we are ignoring right now
316 :     if (@pieces)
317 :     {
318 : overbeek 1.8 my $htmlstring=&HTML::sub_link($cgi, $pieces[1])." <small>(".(join(", ", @pieces[2,3,4])).")</small><br />\n";
319 :     my $textstring=$pieces[1] . " (".(join(", ", @pieces[2,3,4]))."),\n";
320 :     push @{$xipe->{$genomes[$i]}->{$pieces[0]}}, [$htmlstring, $textstring];
321 : overbeek 1.7 }
322 :     else
323 :     {
324 :     push @{$xipe->{$genomes[$i]}->{$pieces[0]}}, " &nbsp; ";
325 :     }
326 :     }
327 :     }
328 :     my $hdrs=["UP IN", map {$fig->genus_species($_)."<br />($_)\n"} @genomes];
329 : overbeek 1.8 my $tab=[]; my $texttab=[]; # texttab is for the excel file and doesn't have links
330 : overbeek 1.7 foreach my $i (0 .. $#genomes)
331 :     {
332 : overbeek 1.8 my $hrow=[$fig->genus_species($genomes[$i]) . "($_)"];
333 :     my $trow=[$fig->genus_species($genomes[$i]) . "($_)"]; # t is text for excel
334 : overbeek 1.7 foreach my $j (0 .. $#genomes)
335 :     {
336 :     #$matches=join("<br />\n", map {&HTML::sub_link($cgi, $_)} @{$xipe->{$genomes[$i]}->{$genomes[$j]}});
337 : overbeek 1.8 my ($hmatches, $tmatches)=(" &nbsp; ", "");
338 :     if (defined $xipe->{$genomes[$i]}->{$genomes[$j]})
339 :     {
340 :     $hmatches=join("", map {$_->[0]} @{$xipe->{$genomes[$i]}->{$genomes[$j]}}); # this is the html version for web
341 :     $tmatches=join("", map {$_->[1]} @{$xipe->{$genomes[$i]}->{$genomes[$j]}}); # this is the text version for excel
342 :     }
343 :     push @$hrow, $hmatches;
344 :     push @$trow, $tmatches;
345 : overbeek 1.7 }
346 : overbeek 1.8 push @$tab, $hrow;
347 :     push @$texttab, $trow;
348 : overbeek 1.7 }
349 : overbeek 1.8 my %options=("border"=>1);
350 :     my $tablehtml=&HTML::make_table($hdrs, $tab, "Subsystems with significant difference", %options);
351 :    
352 :     if ($cgi->param('create_excel')) {$raelib->make_excel_workbook("SubsystemConnections", \%options); $raelib->make_excel_worksheet($hdrs, $texttab, "Signif. Diff")}
353 :    
354 :     return $tablehtml;
355 : overbeek 1.7 }
356 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3