[Bio] / FigWebServices / rssv.cgi Repository:
ViewVC logotype

View of /FigWebServices/rssv.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (download) (annotate)
Mon Dec 14 02:10:48 2009 UTC (9 years, 11 months ago) by overbeek
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.4: +23 -11 lines
a basic genome directory viewer

# -*- perl -*-
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#


use HTML;
use strict;
use SeedEnv;

use CGI;
my $cgi = new CGI;

if (0)
{
    my $VAR1;
    eval(join("",`cat /tmp/rssv_parms`));
    $cgi = $VAR1;
#   print STDERR &Dumper($cgi);
}

if (0)
{
    print $cgi->header;
    my @params = $cgi->param;
    print "<pre>\n";
    foreach $_ (@params)
    {
	print "$_\t:",join(",",$cgi->param($_)),":\n";
    }

    if (0)
    {
	if (open(TMP,">/tmp/rssv_parms"))
	{
	    print TMP &Dumper($cgi);
	    close(TMP);
	}
    }
    exit;
}


my $html = [];
unshift @$html, "<TITLE>Simple SEED Viewer</TITLE>\n";

my $request = $cgi->param('request');
if (! $request)
{
    &build_initial_page($cgi,$html);
}
elsif ($request eq 'initial')
{
    if ($cgi->param('cache') ||  &build_cache($cgi,$html))
    {
	&basic_query($cgi,$html);
    }
}
elsif ($request eq 'id')
{
    &process_id($cgi,$html);
}
elsif ($request eq 'features')
{
    &process_feature_search($cgi,$html);
}
elsif ($request eq 'subsystems')
{
    &process_subsystems_search($cgi,$html);
}
elsif ($request eq 'peg2subsystems')
{
    &process_peg2subsystems_search($cgi,$html);
}
elsif ($request eq 'compare')
{
    &comparison($cgi,$html);
}
elsif ($request eq 'corresponding')
{
    &process_corr_search($cgi,$html);
}
elsif ($request =~ 'query_only')
{
    &process_query_only_search($cgi,$html);
}
elsif ($request eq 'reference_only')
{
    &process_ref_only_search($cgi,$html);
}

&HTML::show_page($cgi,$html);

sub id_search_form {
    my($cgi,$html) = @_;

    my $queryG = $cgi->param('genome');
    my $refG   = $cgi->param('reference');
    my $cache  = $cgi->param('cache');

    push(@$html,$cgi->start_form(-action => "rssv.cgi"),
	        '<br><b>Get Protein Page for FIG ID, or ACH Page for non-FIG IDs</b><br><br> ',
	        $cgi->textfield(-name => 'id', -size => 20),
	        $cgi->submit('go'),
	        $cgi->hidden(-name => 'request', -value => 'id', -override => 1),
	        $cgi->hidden(-name => 'reference', -value => $refG, -override => 1),
	        $cgi->hidden(-name => 'genome', -value => $queryG, -override => 1),
	        $cgi->hidden(-name => 'cache',   -value => $cache, -override => 1),
	        $cgi->end_form,
	        $cgi->hr,$cgi->hr);
}

sub build_initial_page {
    my($cgi,$html) = @_;
	
    &id_search_form($cgi,$html);

    my $sapObject = SAPserver->new();
    my $genomesH  = $sapObject->all_genomes(-complete => 1);
    my @genomes   = keys(%$genomesH);
    my $metricsH  = $sapObject->genome_metrics(-ids => \@genomes);
    my($genomes,$labels) = &build_labels(\@genomes,$sapObject);
    push(@$html,$cgi->start_form(-action => "rssv.cgi"),
	        $cgi->h2('Genome to Query'),
	        $cgi->scrolling_list( -name     => 'genome',
				      -values   => $genomes,
				      -labels   => $labels,
				      -size     => 4),
	        $cgi->br,
	        $cgi->h2('Reference Genomes'),
	        $cgi->scrolling_list( -name     => 'reference',
				      -values   => $genomes,
				      -labels   => $labels,
				      -multiple => 1,
				      -size     => 4),
	        $cgi->br,
	        $cgi->submit('go'),
	        $cgi->hidden(-name => 'request', -value => 'initial'),
	        $cgi->end_form,
	        $cgi->hr
        );
}

sub build_cache {
    my($cgi,$html) = @_;

    my $queryG = $cgi->param('genome');
    my @refG   = $cgi->param('reference');
    if ((! $queryG) || (! @refG))
    {
	push(@$html,$cgi->h2('You need to pick a genome to study and at least one genome in the reference set'));
	return 0;
    }
    elsif (@refG > 30) 
    {
	push(@$html,$cgi->h2('Sorry, but we limit you to 30 reference genomes'));
	return 0;
    }
    my $cache = "$FIG_Config::temp/Cache.$$";
#   print STDERR "cache=$cache\n";
    $cgi->param(-name => 'cache', -value => $cache);
    &SeedUtils::verify_dir($cache);
    my $cmd = join("; ", map { "$FIG_Config::bin/svr_corresponding_genes $queryG $_ > $cache/$queryG-$_" } @refG);
    system "( $cmd ) &";

    my $sapObject = SAPserver->new();
    my $genomeH   = $sapObject->all_features(-ids => [$queryG]);
    my $query_features = $genomeH->{$queryG};
    my $functionsH     = $sapObject->ids_to_functions(-ids => $query_features);
    my $sub_infoH      = $sapObject->ids_to_subsystems(-ids => $query_features);
    my $sub_infoH2     = $sapObject->genomes_to_subsystems(-ids => [$queryG]);
    my %sub2variant    = map { $_->[0] => $_->[1] } @{$sub_infoH2->{$queryG}};

    my $ssObject = SSserver->new();
    my $subsys_dataH = $ssObject->all_subsystems;

    open(CACHED,">$cache/functions") || die "could not open $cache/functions";
    foreach my $fid (@$query_features)
    {
	my $func = $functionsH->{$fid};
	if (! $func) { $func = "" }
	print CACHED "$fid\t$func\n";
    }
    close(CACHED);

    open(CACHED,"| sort > $cache/subsystems") || die "could not open $cache/subsystems";
    foreach my $fid (@$query_features)
    {
	my $tuples = $sub_infoH->{$fid};
	next if (@$tuples < 1);
	my @with_var = map { my($role,$sub) = @$_; 
			     my $cat = $subsys_dataH->{$sub}->[1];
			     my @cat = ($cat && (@$cat > 0)) ? @$cat : ();
			     [join("; ",@cat),$sub,$role,$sub2variant{$sub},$fid] } 
	                     @$tuples;

	foreach my $tuple (@with_var)
	{
	    print CACHED join("\t",@$tuple),"\n";
	}
    }
    close(CACHED);
    return 1;
}

sub basic_query {
    my($cgi,$html) = @_;

    my $queryG = $cgi->param('genome');
    my $refG   = $cgi->param('reference');
    my $cache  = $cgi->param('cache');

    &id_search_form($cgi,$html);

    push(@$html,$cgi->start_form(-action => "rssv.cgi"),
	        '<b>Query Features in Focus Genome</b>: ',
	        $cgi->textfield(-name => 'pattern', -size => 30),
	        $cgi->submit('go'),
	        $cgi->hidden(-name => 'request', -value => 'features', -override => 1),
	        $cgi->hidden(-name => 'reference', -value => $refG, -override => 1),
	        $cgi->hidden(-name => 'genome', -value => $queryG, -override => 1),
	        $cgi->hidden(-name => 'cache',   -value => $cache, -override => 1),
	        $cgi->end_form,
	        $cgi->hr,

	        $cgi->start_form(-action => "rssv.cgi"),
	        '<b>Query Subsystems in Focus Genome</b>: ',
	        $cgi->textfield(-name => 'pattern', -size => 30),
	        $cgi->submit('go'),
	        $cgi->hidden(-name => 'request', -value => 'subsystems', -override => 1),
	        $cgi->hidden(-name => 'reference', -value => $refG, -override => 1),
	        $cgi->hidden(-name => 'genome', -value => $queryG, -override => 1),
	        $cgi->hidden(-name => 'cache',   -value => $cache, -override => 1),
	        $cgi->end_form,
	        $cgi->hr
	);

    opendir(CACHE,$cache) || die "could not open $cache";
    my @refG = map { ((-s "$cache/$_") && ($_ =~ /(\d+\.\d+$)/)) ? $1 : () } readdir(CACHE);
    closedir(CACHE);

    if (@refG > 0)
    {
	my $sapObject = SAPserver->new();
	my($refG,$labels) = &build_labels(\@refG,$sapObject);

	push(@$html,$cgi->start_form(-action => "rssv.cgi"),
	            '<b>Compare Focus Genome Against Reference Genome</b>: ',
	            $cgi->scrolling_list( -name     => 'reference',
					  -values   => $refG,
					  -labels   => $labels,
					  -size     => 4),
	            $cgi->submit('go'),
	            $cgi->hidden(-name => 'request', -value => "compare", -override => 1),
	            $cgi->hidden(-name => 'cache',   -value => $cache, -override => 1),
	            $cgi->hidden(-name => 'genome',   -value => $queryG, -override => 1),
	            $cgi->end_form
            );
    }
}

sub build_labels {
    my($genomes,$sapObject) = @_;

    my $genomesH  = $sapObject->all_genomes(-complete => 1);
    my $metricsH  = $sapObject->genome_metrics(-ids => $genomes);
    my %labels    = map { my($contigs,$sz) = @{$metricsH->{$_}}; 
			  my $lab = $genomesH->{$_} . " ($_): $sz bp, $contigs contigs";
			  $_ => $lab
                        } 
                    @$genomes;

    my @genomes = sort { lc($labels{$a}) cmp lc($labels{$b}) } @$genomes;
    
    return (\@genomes,\%labels);
}

sub process_feature_search {
    my($cgi,$html) = @_;

    my $pattern = $cgi->param('pattern');
    my $cache   = $cgi->param('cache');
    my $file    = "$cache/functions";
    my @hits    = &process_index($file,$pattern);
    &format_function_table($cgi,$html,\@hits);
}

sub process_subsystems_search {
    my($cgi,$html) = @_;

    my $pattern = $cgi->param('pattern');
    my $cache   = $cgi->param('cache');
    my $file    = "$cache/subsystems";
    my @hits    = &process_index($file,$pattern);
    &format_subsystems_table($cgi,$html,\@hits);
}

sub process_peg2subsystems_search {
    my($cgi,$html) = @_;

    my $peg     = $cgi->param('peg');
    my $cache   = $cgi->param('cache');
    my $file    = "$cache/subsystems";
    my %subs    = map { $_->[0] => 1 } &process_index($file,$peg);
    my @hits    = sort { ($a->[0] cmp $b->[0]) or &SeedUtils::by_fig_id($a->[3],$b->[3]) }
	          map { chop; [split(/\t/,$_)] }
	          grep { ($_ =~ /^([^\t]+)/) && $subs{$1} } 
                  `cat $cache/subsystems`;

    &format_subsystems_table($cgi,$html,\@hits);
}

sub comparison {
    my($cgi,$html) = @_;

    my $refG   = $cgi->param('reference');
    my $queryG = $cgi->param('genome');
    my $cache  = $cgi->param('cache');
    push(@$html,$cgi->start_form(-action => "rssv.cgi"),
	        '<b>Corresponding Features</b>: ',
	        $cgi->textfield(-name => 'pattern', -size => 30),
	        $cgi->submit('go'),
	        $cgi->hidden(-name => 'request', -value => 'corresponding', -override => 1),
	        $cgi->hidden(-name => 'cache',   -value => $cache, -override => 1),
	        $cgi->hidden(-name => 'reference',   -value => $refG, -override => 1),
	        $cgi->hidden(-name => 'genome',   -value => $queryG, -override => 1),
	        $cgi->end_form,
	        $cgi->hr,

                $cgi->start_form(-action => "rssv.cgi"),
	        '<b>Features in Query, but Not Reference</b>: ',
	        $cgi->textfield(-name => 'pattern', -size => 30),
	        $cgi->submit('go'),
	        $cgi->hidden(-name => 'request', -value => 'query_only', -override => 1),
	        $cgi->hidden(-name => 'cache',   -value => $cache, -override => 1),
	        $cgi->hidden(-name => 'reference',   -value => $refG, -override => 1),
	        $cgi->hidden(-name => 'genome',   -value => $queryG, -override => 1),
	        $cgi->end_form,
	        $cgi->hr,

                $cgi->start_form(-action => "rssv.cgi"),
	        '<b>Features in Reference, but Not Query</b>: ',
	        $cgi->textfield(-name => 'pattern', -size => 30),
	        $cgi->submit('go'),
	        $cgi->hidden(-name => 'request', -value => 'reference_only', -override => 1),
	        $cgi->hidden(-name => 'cache',   -value => $cache, -override => 1),
	        $cgi->hidden(-name => 'reference',   -value => $refG, -override => 1),
	        $cgi->hidden(-name => 'genome',   -value => $queryG, -override => 1),
	        $cgi->end_form,
	        $cgi->hr
	);
}	 

sub process_corr_search {
    my($cgi,$html) = @_;

    my $pattern = $cgi->param('pattern');
    my $cache   = $cgi->param('cache');
    my $refG    = $cgi->param('reference');
    my $genome  = $cgi->param('genome');
    my $file    = "$cache/$genome-$refG";
    my @corr    = sort { &SeedUtils::by_fig_id($a->[0],$b->[0]) }
                  map { chomp; [split(/\t/,$_)] } `cat $file`;

    my @genes   = map { ($_->[0],$_->[1]) } @corr;
    my $col_headings = ['PEG','Function','reference','Reference Function','P-sc',
			'BBH','Context-Matches','AliasesG','AliasesR'];
    my $tab = [];
    foreach my $entry (@corr)
    {
	my($pegG,$pegR,$n_context,undef,$funcG,$funcR,$aliasesG,$aliasesR,$bbh,undef,$psc) = @$entry;

	push(@$tab,[&peg_link($cgi,$pegG),$funcG,&ref_peg_link($cgi,$pegR),$funcR,
		    $psc,$bbh,$n_context,$aliasesG,$aliasesR]);
    }
    my $filtered = &filter_tab_entries($tab,$pattern);
    push(@$html,&HTML::make_table($col_headings,$filtered,'Correspondences'));
}

sub process_query_only_search {
    my($cgi,$html) = @_;

    my $pattern = $cgi->param('pattern');
    my $cache   = $cgi->param('cache');
    my $refG    = $cgi->param('reference');
    my $queryG  = $cgi->param('genome');
    my $file    = "$cache/$queryG-$refG";
    my %in_corr = map { $_ =~ /^(\S+)/; $1 => 1 } `cat $file`;
    
    my $sapObject  = SAPserver->new();
    my $genomeH    = $sapObject->all_features(-ids => [$queryG], -type => 'peg');
    my @to_show    = grep { ! $in_corr{$_} } @{$genomeH->{$queryG}};
    
    my $functionsH   = $sapObject->ids_to_functions(-ids => \@to_show);
    my $col_hdrs     = ["PEG","Function"];
    my @tab          = map { [&peg_link($cgi,$_),$functionsH->{$_} ? $functionsH->{$_} : ""] }
                       sort { &SeedUtils::by_fig_id($a,$b) } @to_show;
    my $filtered = &filter_tab_entries(\@tab,$pattern);
    push(@$html,&HTML::make_table($col_hdrs,$filtered,"Genes Missing in Reference Genome"));
}

sub process_ref_only_search {
    my($cgi,$html) = @_;

    my $pattern = $cgi->param('pattern');
    my $cache   = $cgi->param('cache');
    my $refG    = $cgi->param('reference');
    my $queryG  = $cgi->param('genome');
    my $file    = "$cache/$queryG-$refG";

    my %in_ref = map { $_ =~ /^\S+\t(\S+)/; $1 => 1 } `cat $file`;
    
    my $sapObject  = SAPserver->new();
    my $genomeH    = $sapObject->all_features(-ids => [$refG], -type => 'peg');
    my @to_show    = grep { ! $in_ref{$_} } @{$genomeH->{$refG}};
    
    my $functionsH   = $sapObject->ids_to_functions(-ids => \@to_show);
    my $col_hdrs     = ["PEG","Function"];
    my @tab          = map { [&ref_peg_link($cgi,$_),$functionsH->{$_} ? $functionsH->{$_} : ""] }
                       sort { &SeedUtils::by_fig_id($a,$b) } @to_show;
    my $filtered = &filter_tab_entries(\@tab,$pattern);
    push(@$html,&HTML::make_table($col_hdrs,$filtered,"Genes Present Only in Reference Genome"));
}

sub process_index {
    my($file,$pattern) = @_;

    my @lines = `cat $file`;
    if ( ! $pattern)
    {
	return map { chop; [split(/\t/,$_)] } @lines;
    }
    elsif ($pattern =~ /^\s*\/(.*)\/\s*$/)
    {
	return &perl_patmatch(\@lines,$1);
    }
    else
    {
	return &substr_match(\@lines,$pattern);
    }
}

sub perl_patmatch {
    my($lines,$pat) = @_;

    my @lines = grep { $_ =~ /$pat/i } @$lines;
    return map { chop; [split(/\t/,$_)] } @lines;
}

sub substr_match {
    my($lines,$pat) = @_;

    $pat =~ s/^\s+//;
    $pat =~ s/\s+$//;
    my @words = split(/\s+/,$pat);
    my @lines = @$lines;
    foreach my $word (@words)
    {
	@lines = grep { &matchword($word,$_) } @lines;
    }
    return map { chop; [split(/\t/,$_)] } @lines;
}

sub matchword {
    my($word,$str) = @_;

    my $wordL = lc $word;
    my $strL  = lc $str;
    if (index($strL,$wordL) >= 0)
    {
	if  ($wordL =~ /^fig\|\d+\.\d+\.peg\.\d+$/)
	{
	    my $wordQ = quotemeta $wordL;
	    return ($strL =~ /$wordQ\b/);
	}
	return 1;
    }
    return 0;
}

sub format_function_table {
    my($cgi,$html,$entries) = @_;

    my $col_hdrs = ['ID','Type','Function','Psi-blast','Subsystems','ACH'];
    my $tab = [];

    foreach my $entry (@$entries)
    {
	my($fid,$function) = @$entry;
	$fid =~ /fig\|\d+\.\d+\.([^\.]+)\.\d+$/;
	my $type = $1;
	if ($type eq "peg")
	{
	    push(@$tab,[ 			 
		         &comp_reg_link($fid,$cgi), 
			 'peg',
			 $function,
			 &psi_blast_link($fid),
			 &subsys_link($cgi,$fid),
		         &ach_link($cgi,$fid)
		       ]);
	}
	else
	{
	    push(@$tab,[$fid,$type,$function,"","",""]);
	}
    }

    if (@$tab > 0)
    {
	push(@$html,&HTML::make_table($col_hdrs,$tab,"Features"));
    }
    else
    {
	push(@$html,$cgi->h3('no matches'));
    }
    push(@$html,$cgi->hr,&query_link($cgi));
}

sub format_subsystems_table {
    my($cgi,$html,$entries) = @_;

    my $col_hdrs = ['Classification','Subsystem','Role','Variant','PEG'];
    my $tab = [];

    foreach my $entry (@$entries)
    {
	my($class,$subsys,$role,$variant,$peg) = @$entry;
	push(@$tab,[
		    $class,
		    $subsys,
		    $role,
		    $variant,
		    &peg_link($cgi,$peg)
		  ]);
    }
    if (@$tab > 0)
    {
	push(@$html,&HTML::make_table($col_hdrs,$tab,"Subsystems"));
    }
    else
    {
	push(@$html,$cgi->h3('no matches'));
    }
    push(@$html,$cgi->hr,&query_link($cgi));
}


sub comp_reg_link {
    my($peg,$cgi) = @_;

    my $target = "target.$$";
    my $url = "http://seed-viewer.theseed.org/seedviewer.cgi?page=Annotation&feature=$peg";
    return "<a target=$target href=$url>$peg</a>";
}

sub psi_blast_link {
    my($peg) = @_;

    my $url = "http://seed-viewer.theseed.org/protein.cgi?prot=$peg&request=use_protein_tool&tool=Psi-Blast";
    my $target = "target.$$";
    return "<a target=$target href=$url>Psi</a>";
}

sub ach_link {
    my($cgi,$peg) = @_;
    my $url = "http://seed-viewer.theseed.org/seedviewer.cgi?page=ACHresults&query=$peg";
    my $target = "target.$$";
    return "<a target=$target href=$url>ACH</a>";
}

sub subsys_link {
    my($cgi,$peg) = @_;

    my $cache = $cgi->param('cache');
    my $genome = $cgi->param('genome');
    my $refG   = $cgi->param('reference');
    my $url   = $cgi->url() . "?request=peg2subsystems&cache=$cache&peg=$peg&genome=$genome&reference=$refG";
    my $target = "target.$$";
    return "<a target=$target href=$url>sub</a>";
}

sub peg_link {
    my($cgi,$peg) = @_;

    my $genome = $cgi->param('genome');
    my $refG   = $cgi->param('reference');
    my $cache = $cgi->param('cache');
    my $url   = $cgi->url() . "?request=features&cache=$cache&pattern=$peg&genome=$genome&reference=$refG";
    my $target = "target.$$";
    return "<a target=$target href=$url>$peg</a>";
}

sub ref_peg_link {
    my($cgi,$peg) = @_;


    my $target = "target.$$";
    my $url = "http://seed-viewer.theseed.org/seedviewer.cgi?page=Annotation&feature=$peg";
    return "<a target=$target href=$url>$peg</a>";
}

sub query_link {
    my($cgi) = @_;

    my $cache     = $cgi->param('cache');
    my $genome    = $cgi->param('genome');
    my $reference = $cgi->param('reference');
    my $url  = $cgi->url() . "?request=initial&cache=$cache&genome=$genome&reference=$reference";
    return "<a href=$url>Basic Query Form</a>";
}

sub filter_tab_entries {
    my($tab,$pattern) = @_;

    if (! $pattern)  { return $tab }
    
    my $filtered = [];
    foreach my $entry (@$tab)
    {
	my @tmp = &substr_match([join("\t",@$entry)],$pattern);
	if (@tmp > 0)
	{
	    push(@$filtered,$entry);
	}
    }
    return $filtered;
}

sub process_id {
    my($cgi,$html) = @_;

    my $id = $cgi->param('id');
    if ($id =~ /^\s*(fig\|\d+\.\d+\.peg\.\d+)\s*$/)
    {
	my $peg = $1;
	print $cgi->redirect("http://seed-viewer.theseed.org/seedviewer.cgi?page=Annotation&feature=$peg");
	exit;
    }
    elsif ($id =~ /^\s*(\S+)\s*$/)
    {
	print $cgi->redirect("http://seed-viewer.theseed.org/seedviewer.cgi?page=ACHresults&query=$1");
	exit;
    }
    else
    {
	push(@$html,$cgi->h2('Invalid request'));
    }
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3