[Bio] / FigWebServices / ex_assertions.cgi Repository:
ViewVC logotype

View of /FigWebServices/ex_assertions.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.11 - (download) (annotate)
Wed Feb 4 17:47:24 2009 UTC (10 years, 11 months ago) by overbeek
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2009_07_09, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, HEAD
Changes since 1.10: +1 -1 lines
add check for cluster-based

# -*- perl -*-
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#


use FIG;
my $fig = new FIG;

use Sim;
use HTML;
use strict;
use URI::Escape;

use CGI;
my $cgi = new CGI;

if (0)
{
    my $VAR1;
    eval(join("",`cat /tmp/expert_assertions_parms`));
    $cgi = $VAR1;
}

if (0)
{
    print $cgi->header;
    my @params = $cgi->param;
    print "<pre>\n";
    foreach $_ (@params)
    {
	print "$_\t:",join(",",$cgi->param($_)),":\n";
    }

    if (0)
    {
	if (open(TMP,">/tmp/expert_assertions_parms"))
	{
	    print TMP &Dumper($cgi);
	    close(TMP);
	}
    }
    exit;
}

my $html = [];
unshift @$html, "<TITLE>Expert Assertions</TITLE>\n";

my $user       = $cgi->param('user');
my $request    = $cgi->param('request');

if (! -d "$FIG_Config::data/ExpertAssertions")
{
    mkdir("$FIG_Config::data/ExpertAssertions",0777) || die "could not make ExpertAssertions";
}

if (! $user)
{
    &get_user($fig,$cgi,$html);
}
elsif ($cgi->param("Show SS"))
{
    &process_show($fig,$cgi,$html,$user);
}
elsif ($cgi->param('Update Your Assertions from Saved Sets'))
{
    push(@$html,$cgi->h1("updated assertions for $user"));
}
elsif (($request eq "process_assertions") && $cgi->param('Save Assertions'))
{
    &check_assertions($fig,$cgi,$html,$user,"assert");
}
elsif (($request eq "process_assertions") && $cgi->param('Show Unreliable PEGs'))
{
    &check_assertions($fig,$cgi,$html,$user,"show_unreliable");
}
else
{
    push(@$html,$cgi->h1("invalid request"));
}
&HTML::show_page($cgi,$html, 1, undef, undef, undef, undef, { no_fig_search => 1 });

sub get_user {
    my($fig,$cgi,$html) = @_;

    push(@$html, $cgi->start_form(-action => "ex_assertions.cgi",
				  -method => 'post'),
	         '<br><br>User: ',
	         $cgi->hidden(-name => 'request', -value => 'show_ss'),
	         $cgi->textfield(-name => "user", -size => 10, -value => ''),
	         $cgi->br,
	         $cgi->submit( 'Show SS' ),
	         $cgi->submit( 'Update Your Assertions from Saved Sets'),
	         $cgi->end_form
	 );

}
    
sub process_show {
    my($fig,$cgi,$html,$user) = @_;

    push(@$html, $cgi->start_form(-action => "ex_assertions.cgi",
				  -method => 'post'),
	         $cgi->hidden(-name => 'user', -value => $user),
	         $cgi->hidden(-name => 'request', -value => 'process_assertions', -override => 1),
	         $cgi->br,
	         &show_ss($fig,$user,$cgi),
	         $cgi->submit( 'Save Assertions' ),
	         $cgi->submit( 'Show Unreliable PEGs' ),
	         $cgi->end_form
	 );
}

sub show_ss {
    my($fig,$user,$cgi) = @_;

    my @html = ();
    my @subsys = &get_subsys($fig,$user);
    my($iS,$iR);
    for ($iS=0; ($iS < @subsys); $iS++)
    {
	push(@html,$cgi->h2($subsys[$iS]));
	push(@html,"ignore subsystem: <input type=checkbox name=ignore_subsys value=$iS checked=1>\n");
	push(@html,"<ul>\n");
	my @roles = grep { ! $fig->is_aux_role_in_subsystem($subsys[$iS],$_) } $fig->subsystem_to_roles($subsys[$iS]);
	for ($iR=0; ($iR < @roles); $iR++)
	{
	    push(@html,"<li> <b>$roles[$iR]</b><br>\n");
	    push(@html,"ignore role: <input type=checkbox name=ignore_role value=$iS:$iR selected=0>\n");
	    push(@html,"&nbsp; &nbsp; allow dups: <input type=checkbox name=allow_dups value=$iS:$iR selected=0>\n");
	    push(@html,"&nbsp; &nbsp; check similarity and lengrth: <input type=checkbox name=check_sims value=$iS:$iR selected=0>\n");
	    push(@html,"&nbsp; &nbsp; required close:");
	    push(@html,$cgi->textfield(-name => "rc:$iS:$iR", -value => "1", -size => 3),"<br>");
	}
	push(@html,"</ul><br>\n");
    }
    return @html;
}

sub get_subsys {
    my($fig,$user) = @_;

    my @subsys = ();
    foreach my $ss ($fig->all_subsystems)
    {
	
	if (&ok_subsys($ss,$user) && ($fig->subsystem_curator($ss) eq $user))
	{
	    push(@subsys,$ss);
	}
    }
    return @subsys;
}


sub ok_subsys {
    my($ss,$user) = @_;

    if (! $fig->usable_subsystem($ss))         { return 0 }
    if ($fig->subsystem_curator($ss) ne $user) { return 0 }
    if ($fig->is_cluster_based_subsystem($ss)) { return 0 }
    return 1;
}

sub check_assertions {
    my($fig,$cgi,$html,$user,$flag) = @_;
    
    my %ignore_ss   = map { $_ => 1 } $cgi->param('ignore_subsys');
    my %ignore_role = map { $_ => 1 } $cgi->param('ignore_role');
    my %allow_dups  = map { $_ => 1 } $cgi->param('allow_dups');
    my %check_sims  = map { $_ => 1 } $cgi->param('check_sims');

    my @subsys = &get_subsys($fig,$user);

    my($iS);
    for ($iS=0; ($iS < @subsys); $iS++)
    {
	if (! defined($ignore_ss{$iS}))
	{
	    my($good,$unreliable) = &check_subsys($fig,$cgi,$user,\%ignore_role,\%allow_dups,\%check_sims,\@subsys,$iS);
	    
	    if ($flag eq "assert")
	    {
		my $col_hdrs = ['PEG','Function'];
		my $tab = [];

		&FIG::verify_dir("$FIG_Config::data/ExpertAssertions/$user");
		open(ASSERTIONS,">$FIG_Config::data/ExpertAssertions/$user/$iS")
		    || die "could not open $FIG_Config::data/ExpertAssertions/$user/$iS";

		foreach my $peg (@$good)
		{
		    my $link = &HTML::fid_link($cgi,$peg);
		    my $func = $fig->function_of($peg,1);
		    push(@$tab,[$link,$func]);
		    print ASSERTIONS join("\t",($peg,$func,$user)),"\n";
		}
		close(ASSERTIONS);
		push(@$html,&HTML::make_table($col_hdrs,$tab,"Reliable Assertions"));
	    }
	    else
	    {
		my $col_hdrs = ['PEG','Function','Why'];
		my $tab = [];
		foreach my $tuple (@$unreliable)
		{
		    my($peg,$why) = @$tuple;
		    my $link = &HTML::fid_link($cgi,$peg);
		    my $func = $fig->function_of($peg,1);
		    push(@$tab,[$link,$func,$why]);
		}
		push(@$html,&HTML::make_table($col_hdrs,$tab,"Unreliable"));
	    }
	}
    }
}    

sub check_subsys {
    my($fig,$cgi,$user,$ignore_role,$allow_dups,$check_sims,$subsys,$iS) = @_;

    my $good = [];
    my $unreliable = [];

    my @genomes = map { $_->[0] } @{$fig->subsystem_genomes($subsys->[$iS])};
    my @roles   = grep { ! $fig->is_aux_role_in_subsystem($subsys->[$iS],$_) } $fig->subsystem_to_roles($subsys->[$iS]);
    my($genome,$role,$peg,$iR);
    my (%in_cell,%in_cells);
    foreach $genome (@genomes)
    {
	for ($iR=0; ($iR < @roles); $iR++)
	{
	    if (! defined($in_cell{"$genome:$iR"}))
	    {
		&cache_cell($subsys,$iS,$genome,\@roles,$iR,\%in_cell,\%in_cells);
	    }
	}
    }

    for ($iR=0; ($iR < @roles); $iR++)
    {
	if (! $ignore_role->{"$iS:$iR"})
	{
	    my $passed_sims;
	    if ($check_sims->{"$iS:$iR"})
	    {
		$passed_sims = &compute_sims($fig,\%in_cell,$iR,\@genomes);
	    }
	    foreach $genome (@genomes)
	    {
		my $pegs = $in_cell{"$genome:$iR"};
		my $req_close = $cgi->param("rc:$iS:$iR");
		foreach $peg (@$pegs)
		{
		    if ((! $allow_dups->{"$iS:$iR"}) && (@$pegs > 1))
		    {
			push(@$unreliable,[$peg,'duplicate']);
		    }
		    else
		    {
			my @close = ();
			if ($req_close)
			{
			    @close = grep { ($_ ne $peg) && $in_cells{$_} }
			             $fig->close_genes($peg,5000);
			}
			if ($fig->possible_frameshift($peg))
			{
			    push(@$unreliable,[$peg,'possible frameshift']);
			}
			elsif ($req_close && ($req_close > @close))
			{
			    push(@$unreliable,[$peg,'too few close']);
			}
			elsif ($fig->possibly_truncated($peg))
			{
			    push(@$unreliable,[$peg,'possibly truncated']);
			}
			elsif ($check_sims->{"$iS:$iR"} && (! $passed_sims->{$peg}))
			{
			    push(@$unreliable,[$peg,'failed similarity/length test']);
			}
			else
			{
			    push(@$good,$peg);
			}
		    }
		}
	    }
	}
    }
    return ($good,$unreliable);
}

sub cache_cell {
    my($subsys,$iS,$genome,$roles,$iR,$in_cell,$in_cells) = @_;

    my $role = $roles->[$iR];
    my @pegs = $fig->pegs_in_subsystem_cell($subsys->[$iS],$genome,$role,1);
    $in_cell->{"$genome:$iR"} = [@pegs];

    foreach my $peg (@pegs)
    {
	$in_cells->{$peg} = 1;
    }
}

sub compute_sims {
    my($fig,$in_cell,$iR,$genomes) = @_;

    my $lens = {};
    my $passed_sims = {};

    my(%pegs_in_col);
    foreach my $genome (@$genomes)
    {
	foreach my $peg (@{$in_cell->{"$genome:$iR"}})
	{
	    $pegs_in_col{$peg} = 1;
	}
    }
    my @pegs = keys(%pegs_in_col);
    foreach my $peg (@pegs)
    {
	if (! $passed_sims->{$peg})
	{
	    my @sims = $fig->sims($peg,500,1.0e-10,"fig");
	    foreach my $sim (@sims)
	    {
		if (($sim->id1 ne $sim->id2) && $pegs_in_col{$sim->id2})
		{
		    my $cov1 = ($sim->e1 + 1 - $sim->b1) / $sim->ln1;
		    my $cov2 = ($sim->e2 + 1 - $sim->b2) / $sim->ln2;
		    if (($cov1 >= 0.8) && ($cov2 >= 0.8))
		    {
			$passed_sims->{$peg} = $passed_sims->{$sim->id2} = 1;
		    }
		}
	    }
	}
    }

#    my $tmpF = "$FIG_Config::temp/blast.$$.fasta";
#    open(TMP,">$tmpF") || die "could noty open $tmpF";
#    foreach my $genome (@$genomes)
#    {
#	foreach my $peg (@{$in_cell->{"$genome:$iR"}})
#	{
#	    my $prot_seq = $fig->get_translation($peg);
#	    if ($prot_seq)
#	    {
#		$lens->{$peg} = length($prot_seq);
#		print TMP ">$peg\n$prot_seq\n";
#	    }
#	}
#    }
#    close(TMP);
#    system "formatdb -i $tmpF -p T";
#
#    open(BLAST,"blastall -FF -i $tmpF -d $tmpF -e 1.0e-10 -m 8 -p blastp |")
#	|| die "could not open blastout";
#    while (defined($_ = <BLAST>))
#    {
#	chop;
#	my @flds = split(/\t/,$_);
#	if ($flds[0] ne $flds[1])
#	{
#	    my $cov1 = ($flds[7] + 1 - $flds[6]) / $lens->{$flds[0]};
#	    my $cov2 = ($flds[9] + 1 - $flds[8]) / $lens->{$flds[1]};
#	    if (($cov1 >= 0.8) && ($cov2 >= 0.8))
#	    {
#		$passed_sims->{$flds[0]} = $passed_sims->{$flds[1]} = 1;
#	    }
#	}
#    }
#    close(BLAST);
#    unlink($tmpF);
    return $passed_sims;
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3