[Bio] / FigWebServices / sigs.cgi Repository:
ViewVC logotype

View of /FigWebServices/sigs.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.33 - (download) (annotate)
Thu Jul 20 03:51:24 2006 UTC (13 years, 8 months ago) by parrello
Branch: MAIN
Changes since 1.32: +1 -1 lines
Fixed improper signature on the bbhs call.

##################################################################
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#
##################################################################

use FIG;
require SproutFIG;

use HTML;
use CGI;
use Tracer;
my $cgi = new CGI;

if (0) {
    my $VAR1;
    eval(join("",`cat /tmp/sig_parms`));
    $cgi = $VAR1;
#   print STDERR &Dumper($cgi);
}

if (0) {
    print $cgi->header;
    my @params = $cgi->param;
    print "<pre>\n";
    foreach $_ (@params) {
		print "$_\t:",join(",",$cgi->param($_)),":\n";
    }

    if (0) {
        if (open(TMP,">/tmp/sig_parms")) {
	    	print TMP &Dumper($cgi);
	    	close(TMP);
		}
    }
    exit;
}

my $html = [];

my($fig_or_sprout);
if ($cgi->param('SPROUT')) {
    $is_sprout = 1;
    $fig_or_sprout = new SproutFIG($FIG_Config::sproutDB, $FIG_Config::sproutData);
    unshift @$html, "<TITLE>The NMPDR Signature Genes Page v2</TITLE>\n";
} else {
    $is_sprout = 0;
    $fig_or_sprout = new FIG;
    unshift @$html, "<TITLE>The SEED Signature Genes Page</TITLE>\n";
}
my $tracing = $cgi->param('TRACE');
if ($tracing) {
    TSetup($tracing, $cgi->param('TTYPE'));
}

my @tmp = grep { $_ =~ /^\d+\.\d+$/ } $cgi->param;
my @set1 = grep { $cgi->param($_) eq "set1" } @tmp;
my @set2 = grep { $cgi->param($_) eq "set2" } @tmp;

my $given = $cgi->param('given');

if (((@set1 == 0) && (@set2 == 0)) || (! $given))
{

    $col_hdrs = ["Given","Set 1","","Set 2","genome","Genus/Species"];
    $tab = [];

    @orgs = $fig_or_sprout->genomes("complete");

    foreach $org (@orgs)
    {
	$full{$org} = $fig_or_sprout->genus_species($org);
    }
    @orgs = sort { $full{$a} cmp $full{$b} } @orgs;

    @given = $cgi->radio_group(-name => 'given',
			       -values => [@orgs],
			       -nolabels => 1);

    foreach $org (@orgs)
    {
	push(@$tab,[shift @given,
                    $cgi->radio_group(-name => $org,
				      -default => 'neither',
				      -values => ['set1','neither','set2'],
				      -nolabels => 1
				      ),
		    $org,
		    $full{$org}]);
    }
    my $sprout = $cgi->param('SPROUT') ? 1 : 0;
    push(@$html,$cgi->start_form(-action => 'sigs.cgi'),
	        $cgi->hidden(-name => 'SPROUT', -value => $sprout),
	        $cgi->h1("Find Proteins that Discriminate Two Sets of Organisms or Are Common to a Set of Organisms"),
	        &HTML::make_table($col_hdrs,$tab,"Pick organisms for Set 1 and Set 2"),
	        $cgi->br, "Similarity Cutoff: ",$cgi->textfield(-name => "cutoff", -size => 10, -value => 1.0e-10),
	        $cgi->br,
	        $cgi->checkbox( -name => 'sort_by_func', -value => 1, -override => 1, -checked => 0, -label => 'Sort by Function'),
	        $cgi->br,
	        $cgi->checkbox( -name => 'write_tab', -value => 1, -override => 1, -checked => 0, -label => 'Export Tab Delimited Table'),
	        $cgi->br,
	        $cgi->br,
	        $cgi->submit("Find the Discriminating Proteins from Given Organism"),$cgi->reset,
	        $cgi->br,
	        $cgi->br,
	        $cgi->submit("Find Genes from Checked Organism and in Organisms from Set 1"),
	        " (minimum matches from Set 1: )", $cgi->textfield(-name => minN, -size => 5));
    if ($cgi->param('TTYPE')) {
        push @$html, 
                $cgi->br,
                $cgi->br, "Tracing: ",$cgi->textfield(-name => "TRACE", -size => 30, -value => ""),
                $cgi->hidden(-name => 'TTYPE', -value => $cgi->param('TTYPE'));
    }
    push @$html, 
	        $cgi->end_form,
	        "\n";
}
else
{
    my $sim_cutoff = $cgi->param('sim_cutoff');
    if (! $sim_cutoff) { $sim_cutoff = 1.0e-10 }

    if (@set1 > 0)
    {
	my @hits;
	if (@set2 > 0)
	{
	    @hits = &differentiating_genes(\@set1,\@set2,$given,$sim_cutoff,$is_sprout);
            Trace(scalar(@hits) . " hits found by differentiating_genes.") if T(3);
	    if ($cgi->param('sort_by_func'))
	    {
		@hits    = sort { ($a->[2] cmp $b->[2]) or ($b->[1] <=> $a->[1]) or (&FIG::by_fig_id($a->[0],$b->[0])) } @hits;
	    }
	    else
	    {
		@hits    = sort { ($b->[1] <=> $a->[1]) || (&FIG::by_fig_id($a->[0],$b->[0])) } @hits;
	    }

	    if ($cgi->param('write_tab'))
	    {
		push(@$html,"<pre>\n");
		foreach $_ (@hits)
		{
		    push(@$html,join("\t",@$_) . "\n");
		}
		push(@$html,"</pre>\n");
	    }
	    else
	    {
		$col_hdrs = ["","Gene","Score","Function"];
		$tab      = [];
		my $gs = $fig_or_sprout->genus_species($given);
		$title    = "Genes in $gs that Discriminate";
		my $subscript = 1;
		foreach $_ (@hits)
		{
		    my($peg,$score,$function) = @$_;
		    push(@$tab,[$subscript,&HTML::fid_link($cgi,$peg,"local"),$score,$function]);
		    $subscript++;
		}
		
		push(@$html,&HTML::make_table($col_hdrs,$tab,$title));
	    }
	}
	else
	{
	    my($i,$j,%which_col,$peg1,$func1,$link,$genome1,$hit);
	    my $minN = $cgi->param('minN');
	    $minN = $minN ? $minN : @set1;
	    @hits = &common_genes(\@set1,$given,$sim_cutoff,$minN,$is_sprout);
	    $col_hdrs = ["",&FIG::abbrev($fig_or_sprout->genus_species($given)),"Score","Function"];
	    for ($i=0; ($i < @set1); $i++)
	    {
		$which_col{$set1[$i]} = $i+4;
		push(@$col_hdrs,&FIG::abbrev($fig_or_sprout->genus_species($set1[$i])));
	    }

	    $tab      = [];
	    $title    = "Genes in Common";
    
	    for ($j=0; ($j < @hits); $j++)
	    {
		my($peg,$score,$hits) = @{$hits[$j]};
		my $func = scalar $fig_or_sprout->function_of($peg,$cgi->param('user'));
		my $row = [$j+1,&HTML::fid_link($cgi,$peg,"local"),$score,$func];
		for ($i=0; ($i < @set1); $i++)
		{
		    push(@$row,"");
		}
		foreach $hit (@$hits)
		{
		    ($peg1,$func1) = @$hit;
		    $genome1 = &FIG::genome_of($peg1);
		    $col     = $which_col{$genome1};
		    $link = &HTML::fid_link($cgi,$peg1,"local");
		    $row->[$col] = ($func eq $func1) ? $link : "*$link";
		}
		push(@$tab,$row);
	    }
	    push(@$html,&HTML::make_table($col_hdrs,$tab,$title));
	}
    }
    else
    {
	push(@$html,$cgi->h1("You need to fill in at least Set1"));
    }
}
&HTML::show_page($cgi,$html);

sub common_genes {
    my($set1,$given,$sim_cutoff,$minN,$is_sprout) = @_;
    warn "sig: common genes.\n";
    my %set1  = map { $_ => 1 } @$set1;
    if ($set1{$given})
    {
	$minN--;
	$set1{$given} = 0;
    }

    foreach $peg ($fig_or_sprout->all_features($given,"peg"))
    {
	undef %hits_set1;
	foreach $sim ($is_sprout ? $fig_or_sprout->bbhs($peg, $sim_cutoff) : $fig_or_sprout->sims($peg, 1000, $sim_cutoff, "fig"))
	{
	    $id2          = $is_sprout ? $sim->[0] : $sim->[1]; # id2
	    if ($id2 =~ /^fig\|(\d+\.\d+)/)
	    {
		my $org1 = $1;
		if ($set1{$org1})
		{
		    if (! $hits_set1{$org1})
		    {
			$hits_set1{$org1} = $id2;
		    }
		}
	    }
	}
	$sc = keys(%hits_set1);
	if ($sc >= $minN)
	{
	    push(@hits,[$peg,$sc,[map { [$hits_set1{$_}, scalar $fig_or_sprout->function_of($hits_set1{$_})] } keys(%hits_set1)]]);
	}
    }
    return @hits;
}
	    
sub differentiating_genes {
    my($set1,$set2,$given,$sim_cutoff,$is_sprout) = @_;
    my %set1  = map { $_ => 1 } @$set1;
    my %set2  = map { $_ => 1 } @$set2;

    my(%hits_set1,%hits_set2,@hits,$sim,$id2,$peg);

    foreach $peg ($fig_or_sprout->all_features($given,"peg"))
    {
	undef %hits_set1; undef %hits_set2;
	$hits_set1{&FIG::genome_of($peg)} = 1;
        Trace("Processing $peg.") if T(3);
	foreach $sim ($is_sprout ? $fig_or_sprout->bbhs($peg) : $fig_or_sprout->sims($peg, 1000, $sim_cutoff, "fig"))
	{
	    $id2          = $is_sprout ? $sim->[0] : $sim->[1];
            Trace("SIG tool sim check for $peg vs. $id2.") if T(4);
	    if ($id2 =~ /^fig\|(\d+\.\d+)/)
	    {
		my $org1 = $1;
		if ($set1{$org1})
		{
		    $hits_set1{$org1} = 1;
		}
		elsif ($set2{$org1})
		{
		    $hits_set2{$org1} = 1;
		}
	    }
	}
	my $n_set1 = keys(%hits_set1);
	my $n_set2 = keys(%hits_set2);

#	my $sc = sprintf "%.3f", ($n_set1 / @set1) - (($n_set2 / @set2) * $fudge);
	my $sc = sprintf "%.3f", &sig($n_set1,(@set1 - $n_set1),$n_set2,(@set2 - $n_set2));
	if ($sc >= 1)
	{
	    push(@hits,[$peg,$sc,scalar $fig_or_sprout->function_of($peg)]);
	}
    }
    Trace(scalar(@hits) . " hits found by differentiator.") if T(3);
    return @hits;
}

sub sig {
    my($in_has,$in_has_not,$out_has,$out_has_not) = @_;
    my($sx,$sy,$xx,$xy,$yy,$din,$dout);
    $sx = $in_has + $in_has_not;
    $sy = $out_has + $out_has_not;
    $xx = ($in_has * $in_has) + ($in_has_not * $in_has_not);
    $xy = ($in_has * $out_has) + ($in_has_not * $out_has_not);
    $yy = ($out_has * $out_has) + ($out_has_not * $out_has_not);
    (($sx > 0) && ($yy > 0) && ($sy > 0) && ($xx > 0)) || return 0;
    $din   = 1 - (($sy * $xy) / ($sx * $yy));
    $dout  = 1 - (($sx * $xy) / ($sy * $xx));
    return $din+$dout;
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3