[Bio] / FigWebServices / sigs.cgi Repository:
ViewVC logotype

View of /FigWebServices/sigs.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.53 - (download) (annotate)
Wed Jun 8 19:42:57 2011 UTC (8 years, 5 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2014_0729, mgrast_release_3_1_2, mgrast_release_3_1_1, rast_rel_2011_0928, mgrast_dev_10262011, HEAD
Changes since 1.52: +3 -2 lines
fix links for seedviwer

##################################################################
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#
##################################################################

require SproutFIG;
use FIG;
use FIGV;

use HTML;
use CGI;
use CGI::Carp qw(fatalsToBrowser);
use Tracer;
use PageBuilder;
my $cgi = new CGI;

if (0) {
    my $VAR1;
    eval(join("",`cat /tmp/sig_parms`));
    $cgi = $VAR1;
#   print STDERR &Dumper($cgi);
}

if (0) {
    print $cgi->header;
    my @params = $cgi->param;
    print "<pre>\n";
    foreach $_ (@params) {
                print "$_\t:",join(",",$cgi->param($_)),":\n";
    }

    if (0) {
        if (open(TMP,">/tmp/sig_parms")) {
                print TMP &Dumper($cgi);
                close(TMP);
                }
    }
    exit;
}

my $org_dir;
$org_dir = $cgi->param('new_genome');

my $html = [];

my($fig_or_sprout);
if (FIGRules::nmpdr_mode($cgi)) {
    $is_sprout = 1;
    $fig_or_sprout = new SproutFIG($FIG_Config::sproutDB, $FIG_Config::sproutData);
} else {
    $is_sprout = 0;
    $fig_or_sprout = new FIGV($org_dir);;
    unshift @$html, "<TITLE>The SEED Signature Genes Page</TITLE>\n";
}

# Initialize tracing.
ETracing($cgi);

my $bbhs = $cgi->param('bbhs');

my @tmp = grep { $_ =~ /^\d+\.\d+$/ } $cgi->param;
my @set1 = grep { $cgi->param($_) eq "set1" } @tmp;
my @set2 = grep { $cgi->param($_) eq "set2" } @tmp;

my $given = $cgi->param('given');

# Determine the operating mode.
my $submit1 = $cgi->param("Find Common Proteins");
my $submit2 = $cgi->param("Find Discriminating Proteins");
# This will be set to 1 for common proteins and 2 for discriminating proteins.
my $mode = 0;

if (! $given && ($submit1 || $submit2)) {
    push @$html, $cgi->h3("You must select a given organism.");
} else {
    # Here the user wantsd to process data. Insure the given organism is in
    # set 1.
    my @found = grep { $_ eq $given } @set1;
    if (! @found) {
        push @set1, $given;
    }
    if ($submit1) {
        $mode = 1;
        if (@set2 > 0) {
            push @$html, $cgi->h3("Organisms from set 2 were ignored.");
        }
    } elsif ($submit2) {
        if (@set2 == 0) {
            push @$html, $cgi->h3("Could not process: set 2 was empty.");
        } else {
            $mode = 2;
        }
    }
}

if ($mode == 0) {

    $col_hdrs = ["Given","Set 1","","Set 2","genome","Genus/Species"];
    $tab = [];
    Trace("Retrieving genomes.") if T(2);
    @orgs = $fig_or_sprout->genomes("complete");
    $cgi->param('Environmental') ? (@orgs=$fig_or_sprout->genomes(undef, undef, "Environmental Sample")) : 1;
    if ($cgi->param("limittoorgs")) 
    {
    	my $sometext=$cgi->param("limittoorgs");
	@orgs = grep {$fig_or_sprout->genus_species($_) =~ /$sometext/i} @orgs;
    }
    Trace("Processing genomes.") if T(2);
    foreach $org (@orgs)
    {
        $full{$org} = $fig_or_sprout->genus_species($org);
    }
    @orgs = sort { uc($full{$a}) cmp uc($full{$b}) } @orgs;

    @given = $cgi->radio_group(-name => 'given',
                               -values => [@orgs],
                               -nolabels => 1);
    # We're going to create a navigation bar here that gets you automatically
    # to the first organism beginning with a specified letter.
    Trace("Creating navigation bar.") if T(2);
    my $bar = $cgi->a({ name => "bar" },"Jump to: ");
    my $currLetter = "";
    foreach $org (@orgs)
    {
        my $name = $full{$org};
        my $backToTop = "";
        if (uc(substr($name, 0, 1)) ne $currLetter) {
            # Here we have a navigation target: the first organism whose name begins
            # with a new letter. We put in the name label
            $currLetter = uc(substr($name, 0, 1));
            $bar .= $cgi->a({ href => "#$currLetter" }, $currLetter) . " ";
            $name = $cgi->a({ name => $currLetter },"") . $name;
            $backToTop = $cgi->a({ href => "#bar"}, "TOP");
        }
        push(@$tab,[shift @given,
                    $cgi->radio_group(-name => $org,
                                      -default => 'neither',
                                      -values => ['set1','neither','set2'],
                                      -nolabels => 1
                                      ),
                    $org,
                    $name, $backToTop]);
    }
    my $sprout = $cgi->param('SPROUT') ? 1 : 0;
    # Create the navigation bar.
    push(@$html,$cgi->start_form(-action => 'sigs.cgi'),
                $cgi->hidden(-name => 'SPROUT', -value => $sprout));
    if (! $sprout)
    {
    	push(@$html,"<br>",$cgi->checkbox( -name => 'bbhs', -value => 1, -override => 1, -label => 'Use BBHs' ),"<br>");
    }
    Trace("Creating main form.") if T(2);
    push(@$html,$cgi->h3("Find Proteins from Given Organism that Discriminate Set 1 from Set 2"),
                $cgi->br, "Similarity Cutoff: ",$cgi->textfield(-name => "cutoff", -size => 10, -value => 1.0e-10),
                $cgi->br,
                $cgi->checkbox( -name => 'sort_by_func', -value => 1, -override => 1, -checked => 0, -label => 'Sort by Function'),
                $cgi->br,
                $cgi->checkbox( -name => 'write_tab', -value => 1, -override => 1, -checked => 0, -label => 'Export Tab Delimited Table'),
                $cgi->br,
	        $cgi->br,
                $cgi->submit("Find Discriminating Proteins"),$cgi->reset,
                $cgi->br,
                $cgi->br,
                $cgi->h3("Find Proteins from Given Organism that are Common to Set 1"),
                $cgi->br,
                "Minimum matches from Set 1: ", $cgi->textfield(-name => minN, -size => 5),
                $cgi->br,
                $cgi->submit("Find Common Proteins"),$cgi->reset,
                $cgi->br, $cgi->br,
                $cgi->h3("Select Given, Set 1, and Set 2"),
                $cgi->p($bar),
    		$cgi->p("Limit table to some organisms with some text: ", $cgi->textfield(-name=>"limittoorgs", -size=>20, -label=>"")),
		$cgi->submit("Refresh Table"), $cgi->reset,
                &HTML::make_table($col_hdrs,$tab,"Pick organisms for Set 1 and Set 2"),
                $cgi->br);
    if ($cgi->param('TTYPE')) {
        push @$html, 
                $cgi->br,
                $cgi->br, "Tracing: ",$cgi->textfield(-name => "TRACE", -size => 30, -value => ""),
                $cgi->hidden(-name => 'TTYPE', -value => $cgi->param('TTYPE'));
    }
    push @$html, 
                $cgi->end_form,
                "\n";
} else {
    # Mode 1 or 2
    my $sim_cutoff = $cgi->param('cutoff');
    if (! $sim_cutoff) { $sim_cutoff = 1.0e-10 }
    my @hits;
    if ($mode == 2) {
        Trace("Computing discriminants.") if T(2);
        @hits = &differentiating_genes(\@set1,\@set2,$given,$sim_cutoff,$is_sprout,$bbhs);
        Trace(scalar(@hits) . " hits found by differentiating_genes.") if T(3);
        if ($cgi->param('sort_by_func')) {
            @hits    = sort { ($a->[2] cmp $b->[2]) or ($b->[1] <=> $a->[1]) or (&FIG::by_fig_id($a->[0],$b->[0])) } @hits;
        } else {
            @hits    = sort { ($b->[1] <=> $a->[1]) || (&FIG::by_fig_id($a->[0],$b->[0])) } @hits;
        }

        if ($cgi->param('write_tab')) {
            push(@$html,"<pre>\n");
            foreach $_ (@hits) {
                push(@$html,join("\t",@$_) . "\n");
            }
            push(@$html,"</pre>\n");
        } else {
            $col_hdrs = ["","Gene","Score","Function","Subsystems"];
            $tab      = [];
            my $gs = $fig_or_sprout->genus_species($given);
            $title    = "There are ", scalar(@hits), " genes in $gs that discriminate these sets";
            my $subscript = 1;
            foreach $_ (@hits) {
                my($peg,$score,$function) = @$_;
                my @sub = grep { $fig_or_sprout->usable_subsystem($_) } $fig_or_sprout->peg_to_subsystems($peg);
                my $sprout_param = ($cgi->param('SPROUT') ? "&SPROUT=1" : "");
                my $user = $cgi->param('user') || "";
		my $sub_url = "SubsysEditor.cgi?page=ShowSubsystem&subsystem=";
		# ./display_subsys.cgi?ssa_name=$_$sprout_param&request=show_ssa&user=$user&focus=$peg&show_clusters=1
                my @linkedSubs = map { my $a = $_; $a =~ s/_/ /g; $cgi->a({href => $sub_url . $_}, $a) } @sub;
                push(@$tab,[$subscript,&HTML::fid_link($cgi,$peg,"local"),$score,$function,join("<br>",@linkedSubs)]);
                $subscript++;
            }
            push(@$html,&HTML::make_table($col_hdrs,$tab,$title));
        }
    } else {
        # Mode 1
        my($i,$j,%which_col,$peg1,$func1,$link,$genome1,$hit);
        my $minN = $cgi->param('minN');
        $minN = $minN ? $minN : @set1;
        @hits = &common_genes(\@set1,$given,$sim_cutoff,$minN,$is_sprout,$bbhs);
        $col_hdrs = ["",&FIG::abbrev($fig_or_sprout->genus_species($given)),"Score","Function"];
        for ($i=0; ($i < @set1); $i++) {
            $which_col{$set1[$i]} = $i+4;
            push(@$col_hdrs,&FIG::abbrev($fig_or_sprout->genus_species($set1[$i])));
        }

        $tab      = [];
        $title    = "Genes in Common";

        for ($j=0; ($j < @hits); $j++) {
            my($peg,$score,$hits) = @{$hits[$j]};
            my $func = scalar $fig_or_sprout->function_of($peg,$cgi->param('user'));
            my $row = [$j+1,&HTML::fid_link($cgi,$peg,"local"),$score,$func];
            for ($i=0; ($i < @set1); $i++) {
                push(@$row,"");
            }
            foreach $hit (@$hits) {
                ($peg1,$func1) = @$hit;
                $genome1 = &FIG::genome_of($peg1);
                $col     = $which_col{$genome1};
                $link = &HTML::fid_link($cgi,$peg1,"local");
                $row->[$col] = ($func eq $func1) ? $link : "*$link";
            }
            push(@$tab,$row);
        }
        push(@$html,&HTML::make_table($col_hdrs,$tab,$title));
    }
}
if ($is_sprout) {
    # For sprout, we use a template.
    my $template = "$FIG_Config::template_url/Sigs_tmpl.php";
    my $traceList = QTrace("html");
    my $result = PageBuilder::Build($template, { data => join("\n", @$html),
                                                 tracing => $traceList }, "Html");
    print "CONTENT-TYPE: text/html\n\n";
    print $result;
} else {
    # For SEED, a normal SEED page.
    push @$html, QTrace("html");
    &HTML::show_page($cgi,$html);
}

sub common_genes {
    my($set1,$given,$sim_cutoff,$minN,$is_sprout,$bbhs) = @_;
    my %set1  = map { $_ => 1 } @$set1;
    if ($set1{$given})
    {
        $minN--;
        $set1{$given} = 0;
    }

    foreach $peg ($fig_or_sprout->all_features($given,"peg"))
    {
        undef %hits_set1;
        foreach $sim (($is_sprout || $bbhs) ? $fig_or_sprout->bbhs($peg, $sim_cutoff) : $fig_or_sprout->sims($peg, 1000, $sim_cutoff, "fig"))
        {
            $id2          = ($is_sprout || $bbhs) ? $sim->[0] : $sim->[1]; # id2
            if ($id2 =~ /^fig\|(\d+\.\d+)/)
            {
                my $org1 = $1;
                if ($set1{$org1})
                {
                    if (! $hits_set1{$org1})
                    {
                        $hits_set1{$org1} = $id2;
                    }
                }
            }
        }
        $sc = keys(%hits_set1);
        if ($sc >= $minN)
        {
            push(@hits,[$peg,$sc,[map { [$hits_set1{$_}, scalar $fig_or_sprout->function_of($hits_set1{$_})] } keys(%hits_set1)]]);
        }
    }
    return @hits;
}
            
sub differentiating_genes {
    my($set1,$set2,$given,$sim_cutoff,$is_sprout,$bbhs) = @_;
    my %set1  = map { $_ => 1 } @$set1;
    my %set2  = map { $_ => 1 } @$set2;

    my(%hits_set1,%hits_set2,@hits,$sim,$id2,$peg);
    my $pegCount = 0;
    my $simCount = 0;
    foreach $peg ($fig_or_sprout->all_features($given,"peg"))
    {
        $pegCount++;
        undef %hits_set1; undef %hits_set2;
        $hits_set1{&FIG::genome_of($peg)} = 1;
        Trace("Processing $peg.") if T(4);
        foreach $sim (($is_sprout || $bbhs) ? $fig_or_sprout->bbhs($peg, $sim_cutoff) : $fig_or_sprout->sims($peg, 1000, $sim_cutoff, "fig"))
        {
            $simCount++;
            $id2          = ($is_sprout || $bbhs) ? $sim->[0] : $sim->[1];
            Trace("SIG tool sim check for $peg vs. $id2.") if T(4);
            if ($id2 =~ /^fig\|(\d+\.\d+)/)
            {
                my $org1 = $1;
                if ($set1{$org1})
                {
                    $hits_set1{$org1} = 1;
                }
                elsif ($set2{$org1})
                {
                    $hits_set2{$org1} = 1;
                }
            }
        }
        my $n_set1 = keys(%hits_set1);
        my $n_set2 = keys(%hits_set2);

#       my $sc = sprintf "%.3f", ($n_set1 / @set1) - (($n_set2 / @set2) * $fudge);
        my $sc = sprintf "%.3f", &sig($n_set1,(@set1 - $n_set1),$n_set2,(@set2 - $n_set2));
        if ($sc >= 1)
        {
            push(@hits,[$peg,$sc,scalar $fig_or_sprout->function_of($peg)]);
        }
    }
    Trace(scalar(@hits) . " hits found by differentiator off of $simCount sims for $pegCount pegs with cutoff $sim_cutoff.") if T(3);
    return @hits;
}

sub sig {
    my($in_has,$in_has_not,$out_has,$out_has_not) = @_;
    my($sx,$sy,$xx,$xy,$yy,$din,$dout);
    $sx = $in_has + $in_has_not;
    $sy = $out_has + $out_has_not;
    $xx = ($in_has * $in_has) + ($in_has_not * $in_has_not);
    $xy = ($in_has * $out_has) + ($in_has_not * $out_has_not);
    $yy = ($out_has * $out_has) + ($out_has_not * $out_has_not);
    (($sx > 0) && ($yy > 0) && ($sy > 0) && ($xx > 0)) || return 0;
    $din   = 1 - (($sy * $xy) / ($sx * $yy));
    $dout  = 1 - (($sx * $xy) / ($sy * $xx));
    return $din+$dout;
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3