[Bio] / FigMetagenomeTools / 16s_taxa2html.pl Repository:
ViewVC logotype

View of /FigMetagenomeTools/16s_taxa2html.pl

Parent Directory Parent Directory | Revision Log Revision Log

Revision 1.1 - (download) (as text) (annotate)
Tue Apr 10 16:37:39 2007 UTC (12 years, 10 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, mgrast_dev_05262011, mgrast_dev_04082011, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, mgrast_rel_2008_0806, mgrast_dev_10262011, mgrast_dev_02212011, mgrast_rel_2008_0923, mgrast_release_3_0, mgrast_dev_03252011, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, mgrast_rel_2008_0919, mgrast_rel_2008_1110, myrast_33, mgrast_rel_2008_0917, mgrast_dev_04052011, mgrast_dev_02222011, HEAD
new stuff that didn't make it in before

#!/usr/bin/perl -w

# make an html page with the 16S best hits

use strict;
use CGI;
use FIG_Config;

my $cgi=new CGI;

my $usage=<<EOF;
-t taxonomy file (probably $FIG_Config::metagenome_data/rel9taxa.txt)
-b blast file (can use multiple -b's)
-c cutoff e value default = 0.01
-m max number of taxonomy columns (default=5)
-h html outputfile
-n max number of hits to show 
-l minimum length of the hit (default=25 nt)
-p minimum percent identity (default=90%)

Note: It seems to me that you need at least 90% identity and 25 nt to really call something, so those are the defaults. If you set either -p or -l (or both) to zero they will be ignored.


my ($taxaf, $htmlf, @blastf);
my $cutoff=0.01;
my $short;
my $maxtaxcol=5;
my $maxhits;
my $minper=90;
my $minlen=25;

while (@ARGV) {
 my $t=shift;
 if ($t eq "-t") {$taxaf=shift}
 elsif ($t eq "-b") {push @blastf, shift}
 elsif ($t eq "-c") {$cutoff=shift}
 elsif ($t eq "-m") {$maxtaxcol=shift}
 elsif ($t eq "-h") {$htmlf=shift}
 elsif ($t eq "-n") {$maxhits=shift}
 elsif ($t eq "-l") {$minlen=shift}
 elsif ($t eq "-p") {$minper=shift}

die $usage unless ($taxaf && $blastf[0] && $htmlf);

my $species; my $genus;
if ($taxaf =~ /.gz$/) {open(IN, "gunzip -c $taxaf|") || die "can't open a pipe to $taxaf"}
else {open(IN, $taxaf) || die "Can't open $taxaf"}
while (<IN>) 
 s/_/ /g;
 my @line=split /\t/;
 my @geni=split /\;\s+/, $line[2];
 $species->{$line[0]}=shift @geni;
close IN;

my $allbest; my %best;
foreach my $blastf (@blastf) {
	if ($blastf =~ /.gz$/) {open(IN, "gunzip -c $blastf|") || die "can't open a pipe to $blastf"}
	else {open(IN, $blastf) || die "Can't open $blastf"}
	while (<IN>)
		my @line=split /\t/;
		next unless ($line[10] < $cutoff);
		next if ($minper && $line[2] < $minper);
		next if ($minlen && $line[3] < $minlen);
		unless (exists $best{$line[0]}) {$best{$line[0]}=11000}
		next unless ($line[10] <= $best{$line[0]});
		if ($line[10] < $best{$line[0]})
		elsif ($line[10] == $best{$line[0]})  
			push @{$allbest->{$line[0]}}, \@line;

my $filename=`pwd`;
($filename =~ s/\.html$/.chosen.html/) ? 1 : ($filename .= "chosen.html");
die "crap. filename is $filename and htmlf is $htmlf" if ($filename eq $htmlf);

open (HTM, ">$htmlf") || die "Can't open $htmlf";
print HTM <<EOF;
<title>Choice of 16S hits</title>
<link rel='stylesheet' title='BIGBlue' href='/~rob/css/bigblue.css' type='text/css'>

<div id="header">
<span id="headerimg">
<img src="/~rob/images/BIGblue.gif" height=110 width=110 />
Microbial Informatics Group</h1>
<p id="headerlink">
<a href="http://phage.sdsu.edu/">Rohwer Lab</a>
<a href="http://phage.sdsu.edu/~rob/">MIG</a>
<a href="http://seed.sdsu.edu/FIG/index.cgi">SEED</a>
<a href="http://phage.sdsu.edu/~rob/open_source.html">Free Data</a>


<h1>16S hits found</h1>
<p>Please choose the 16S hit that describes the sequence, or enter a name for it. If you enter something in the Descriptor box that will be used as the species name. If you check a box that will be used for the taxonomy. If you enter a name and check a box the name you entered will be used instead of the species name.</p>
<form name='choose16s' method='Post' action='/~rob/cgi-bin/choose16s.cgi'>
<input type="hidden" name="filename" value="$filename">
<input type="submit" name="Submit"><input type="reset">


my $maxtax=0; my $count=0;
my $allcounts=scalar(keys %$allbest);
foreach my $hit (keys %$allbest)
	my @results=sort {$b->[2] <=> $a->[2]} @{$allbest->{$hit}};
	print HTM "<div class='choice' id='$hit'>\n";
	print HTM "<h2>$count of $allcounts &nbsp; $hit (hits: ", $#results+1, ") &nbsp; Descriptor: ", $cgi->textfield(-name=>"desc$hit", -size=>20), "</h2>\n";
	print HTM $cgi->hidden(-name=>"hit", -value=>$hit), "\n";
	print HTM "<table>\n<tr><th>";
	my @header; map {push @header, " &nbsp; "} (0 .. $maxtaxcol-1);
	print HTM join("</th><th>", qw[Chse Query Hit Species Taxonomy], @header, '% ID', 'Len', 'E'), "</th></tr>\n";
	if ($maxhits && $#results >= $maxhits) {splice(@results, $maxhits)}
	my $i=0; 
	my $first="checked";
	#my $first="";
	foreach my $choice (@results)
		my $gen =$species->{$choice->[1]};
		my @tax =('');
		if ($genus->{$choice->[1]}) {@tax=@{$genus->{$choice->[1]}}}
		if ($#tax>$maxtax) {$maxtax=$#tax}
		while ($#tax<$maxtaxcol) {push @tax, " &nbsp; "}
		if ($#tax>$maxtaxcol) {print STDERR "More than $maxtaxcol columns: max is $#tax\n"}
		print HTM "<tr><td>", join("</td><td>", "<input type='checkbox' name='$hit' value='$i' $first />", $choice->[0], $choice->[1], $gen, @tax, $choice->[2], $choice->[3], $choice->[10]), "</td></tr>\n";
		my $hidden=join("\t", $choice->[0], $gen, @tax);
		print HTM "\n", $cgi->hidden(-name=>"hidden$hit$i", -value=>$hidden), "\n";
	print HTM "</table>\n</div>\n";
print HTM join("\n", $cgi->submit(), $cgi->reset, $cgi->end_form(), $cgi->end_html);

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3