[Bio] / FigWebServices / index.cgi Repository:
ViewVC logotype

View of /FigWebServices/index.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (download) (annotate)
Sat Jan 3 06:03:43 2004 UTC (16 years, 3 months ago) by overbeek
Branch: MAIN
Changes since 1.6: +104 -1 lines
changes to annotations to allow extraction of assignments

use FIG;
my $fig = new FIG;

use HTML;
use strict;
use CGI;
my $cgi = new CGI;

my($map,@orgs,$user,$map,$org,$made_by);

if (0)
{
    print $cgi->header;
    my @params = $cgi->param;
    print "<pre>\n";
    foreach $_ (@params)
    {
	print "$_\t:",join(",",$cgi->param($_)),":\n";
    }
    print "=======\n";
    foreach $_ (sort keys(%ENV))
    {
	print "$_\t$ENV{$_}\n";
    }
    exit;
}

$ENV{"PATH"} = "$FIG_Config::bin:$FIG_Config::ext_bin:" . $ENV{"PATH"};

my $html = [];
my @ver = `cat $FIG_Config::fig_disk/CURRENT_RELEASE`;
chop $ver[0];
push(@$html,
	"You are currently running SEED version <b>$ver[0]</b><br>",
	"To start a peer-to-peer update, <a href=$FIG_Config::cgi_url/seed_update_page.cgi>click here</a><br>\n"
     );


my($pattern,$seq_pat,$tool);
my $user = $cgi->param('user');
if (! $user) { $user = "" }

if ($cgi->param('Search for Genes Matching an Occurrence Profile'))
{

    my $url = $cgi->url;
    $ENV{"REQUEST_METHOD"} = "GET";
    $ENV{"QUERY_STRING"} = "user=$user";
    my @out = `./sigs.cgi`;
    &HTML::trim_output(\@out);
    push(@$html,@out);
}
elsif (($pattern = $cgi->param('pattern')) && ($cgi->param('Search') || $cgi->param('Find Genes in Org that Might Play the Role')))
{
    if ($cgi->param('Find Genes in Org that Might Play the Role') &&
	(@orgs = $cgi->param('korgs')) && (@orgs == 1))
    {
	@orgs = map { $_ =~ /\((\d+\.\d+)\)/; $1 } @orgs;
	$ENV{"REQUEST_METHOD"} = "GET";
	$ENV{"QUERY_STRING"} = "user=$user&request=find_in_org&role=$pattern&org=$orgs[0]";
	my @out = `./pom.cgi`;
	print join("",@out);
	exit;
    }
    else
    {
	&show_indexed_objects($fig,$cgi,$html,$pattern);
    }
}
elsif (($map = $cgi->param('kmap')) && $cgi->param('Metabolic Overview'))
{
    $map =~ s/^.*\((MAP\d+)\).*$/$1/;
    @orgs = $cgi->param('korgs');
    @orgs = map { $_ =~ /\((\d+\.\d+)\)/; $1 } @orgs;
    $ENV{"REQUEST_METHOD"} = "GET";
    if (@orgs > 0)
    {
	$ENV{"QUERY_STRING"} = "user=$user&map=$map&org=$orgs[0]";
    }
    else
    {
	$ENV{"QUERY_STRING"} = "user=$user&map=$map";
    }

    my @out = `./show_kegg_map.cgi`;
    &HTML::trim_output(\@out);
    push(@$html,@out);
}
elsif (($seq_pat = $cgi->param('seq_pat')) && 
       (@orgs = $cgi->param('korgs')) && 
       ($tool = $cgi->param('Tool')) &&
       $cgi->param('Search for Matches'))
{
    @orgs = map { $_ =~ /\((\d+\.\d+)\)/; $1 } @orgs;
    if ($tool =~ /blast/)
    {
	&run_blast($fig,$cgi,$html,$orgs[0],$tool,$seq_pat);
    }
    elsif ($tool =~ /Protein scan_for_matches/)
    {
	&run_prot_scan_for_matches($fig,$cgi,$html,$orgs[0],$seq_pat);
    }
    elsif ($tool =~ /DNA scan_for_matches/)
    {
	&run_dna_scan_for_matches($fig,$cgi,$html,$orgs[0],$seq_pat);
    }
}
elsif (($made_by = $cgi->param('made_by')) && $cgi->param('Extract Assignments'))
{
    &export_assignments($fig,$cgi,$html,$made_by);
}
else
{
    &show_initial($fig,$cgi,$html);
}
&HTML::show_page($cgi,$html,1);

sub show_initial {
    my($fig,$cgi,$html) = @_;
    my($map,$name,$olrg,$gs);

    my($a,$b,$e,$v) = $fig->genome_counts;
    push(@$html,$cgi->h2("Contains $a archaeal, $b bacterial, $e eukaryotic, and $v viral genomes"));
    my($a,$b,$e,$v) = $fig->genome_counts("complete");
    push(@$html,$cgi->h2("Of these, $a archaeal, $b bacterial, and $e eukaryotic genomes are more-or-less complete"),$cgi->hr);
    
    my @maps = sort map { $map = $_; $name = $fig->map_name($map); "$name ($map)" } $fig->all_maps;
    my @orgs = sort map { $org = $_; $gs = $fig->genus_species($org); "$gs ($org)" } $fig->genomes("complete",undef);

    push(@$html,
	        $cgi->start_form(-action => "index.cgi"),
	        $cgi->h1('Searching for Genes or Functional Roles Using Text'),
                "Search Pattern: ",
	        $cgi->textfield(-name => "pattern", -size => 50),
	        $cgi->br,
	        $cgi->br,
	        "User ID: &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; ", 
	        $cgi->textfield(-name => "user", -size => 20),
	        "&nbsp; [optional]",
	        $cgi->br,
	        $cgi->br,
	        $cgi->submit('Search'),
	        $cgi->reset('Clear'),
	        $cgi->hr,
	        $cgi->h1('If You Need to Pick an Organism for Options Below'),
	        $cgi->scrolling_list(-name => 'korgs',
                                     -values => [@orgs],
				     -size => 10
                                    ),
	        $cgi->hr,
	        $cgi->h1('Finding Candidates for a Functional Role'),
	        "Make sure that you type the functional role you want to search for in the Search Pattern above",
	        $cgi->br,
	        $cgi->submit('Find Genes in Org that Might Play the Role'),
	        $cgi->hr,
	        $cgi->h1('Metabolic Overviews (via KEGG) - Choose KEGG Map'),
	        $cgi->submit('Metabolic Overview'),
	        $cgi->br,
	        $cgi->br,
	        $cgi->scrolling_list(-name => 'kmap',
                                     -values => [@maps],
				     -size => 10
                                    ),
	        $cgi->hr,
	        $cgi->h1('Searching DNA or Protein Sequences (in a selected organism)'),
	        "Sequence/Pattern: ",
	        $cgi->textarea(-name => 'seq_pat', -rows => 20, -cols => 70),
	        $cgi->popup_menu(-name => 'Tool', -values => ['blastp','blastx','blastn','tblastn','Protein scan_for_matches','DNA scan_for_matches'], -default => 'blastp'),
	        $cgi->submit('Search for Matches'),
	        $cgi->hr,
	        $cgi->h1('Exporting Assignments'),
	        "Extract assignments made by ",
	        $cgi->textfield(-name => "made_by", -size => 50),
	        $cgi->br,
	        $cgi->checkbox(-label => 'tab-delimited Spreadsheet', -name => 'tabs', -value => 1),
	        $cgi->br,
	        $cgi->submit('Extract Assignments'),
	        $cgi->hr,
	        $cgi->h1('Searching for Interesting Genes'),
	        $cgi->submit('Search for Genes Matching an Occurrence Profile'),
                $cgi->end_form,
	        $cgi->end_html
	 );

}

sub show_indexed_objects {
    my($fig,$cgi,$html,$pattern) = @_;
    my($msg,$i);

    if ($pattern =~ /^\s*(fig\|\d+\.\d+\.peg\.\d+)\s*$/)
    {
	my $peg = $1;
	my $user = $cgi->param('user');
	$user = $user ? $user : "";
	$ENV{'REQUEST_METHOD'} = "GET";
	$ENV{"QUERY_STRING"} = "prot=$peg\&user=$user";
	$ENV{"REQUEST_URI"} =~ s/index.cgi/protein.cgi/;
	my @prot_out = `./protein.cgi`;
	&HTML::trim_output(\@prot_out);
	push(@$html,@prot_out);
	return;
    }

    my($peg_index_data,$role_index_data) = $fig->search_index($pattern);
    my $n = @$peg_index_data;
    if ($n > 100)
    {
	$msg = "Showing First 100 Out of $n PEGs";
	$#{$peg_index_data} = 99;
    }
    else
    {
	$msg = "Showing $n PEGs";
    }

    my $col_hdrs = ["PEG","Organism","Aliases","Function","Who"];
    my $tab = [ map { &format_peg_entry($fig,$cgi,$_) } @$peg_index_data ];
    push(@$html,&HTML::make_table($col_hdrs,$tab,$msg), $cgi->hr);

    $n = @$role_index_data;
    if ($n > 100)
    {
	$msg = "Showing First 100 Out of $n Roles";
	$#{$role_index_data} = 99;
    }
    else
    {
	$msg = "Showing $n Roles";
    }

    $col_hdrs = ["Role"];
    $tab = [ map { &format_role_entry($fig,$cgi,$_) } @$role_index_data ];
    push(@$html,&HTML::make_table($col_hdrs,$tab,$msg));
}

sub format_peg_entry {
    my($fig,$cgi,$entry) = @_;
    my($i,$function,$who);

    my($peg,$gs,$aliases,@funcs) = @$entry;
    my $user = $cgi->param('user');
    $user = $user ? $user : "";

    @funcs = map { $_ =~ s/^function:\s*//; $_ } @funcs;

    if ($aliases)
    {
	$aliases =~ s/^aliases://;
    }
    else
    {
	$aliases = "";
    }

    if ($user)
    {
	for ($i=0; ($i < @funcs) && ($funcs[$i] !~ /\#$user/); $i++) {}
	if ($i < @funcs)
	{
	    ($function,$who) = split(/\#/,$funcs[$i]);
	}
    }
    if (! $function)
    {
	for ($i=0; ($i < @funcs) && ($funcs[$i] !~ /\#master/); $i++) {}
	if ($i < @funcs)
	{
	    ($function,$who) = split(/\#/,$funcs[$i]);
	}
    }

    if ((! $function) && (@funcs > 0))
    {
	($function,$who) = split(/\#/,$funcs[0]);
    }
    return [&HTML::fid_link($cgi,$peg),$gs,$aliases,$function,$who];
}

sub format_role_entry {
    my($fig,$cgi,$entry) = @_;

    return [&HTML::role_link($cgi,$entry)];
}

sub run_prot_scan_for_matches {
    my($fig,$cgi,$html,$org,$pat) = @_;
    my($string,$peg,$beg,$end,$user,$col_hdrs,$tab,$i);

    my $tmp_pat = "$FIG_Config::temp/tmp$$.pat";
    open(PAT,">$tmp_pat")
	|| die "could not open $tmp_pat";
    $pat =~ s/[\s\012\015]+/ /g;
    print PAT "$pat\n";
    close(PAT);
    my @out = `$FIG_Config::ext_bin/scan_for_matches -p $tmp_pat < $FIG_Config::organisms/$org/Features/peg/fasta`;
    if (@out < 1)
    {
	push(@$html,$cgi->h1("Sorry, no hits"));
    }
    else
    {
	if (@out > 2000)
	{
	    push(@$html,$cgi->h1("truncating to the first 1000 hits"));
	    $#out = 1999;
	}

	push(@$html,$cgi->pre);
	$user = $cgi->param('user');
	$col_hdrs = ["peg","begin","end","string","function of peg"];
	for ($i=0; ($i < @out); $i += 2)
	{
	    if ($out[$i] =~ /^>([^:]+):\[(\d+),(\d+)\]/)
	    {
		$peg = $1;
		$beg = $2;
		$end = $3;
		$string = $out[$i+1];
		chop $string;
		push(@$tab,[&HTML::fid_link($cgi,$peg,1),$beg,$end,$string,scalar $fig->function_of($peg,$user)]);
	    }
	}
	push(@$html,&HTML::make_table($col_hdrs,$tab,"Matches"));
	push(@$html,$cgi->end_pre);
    }
    unlink($tmp_pat);
}

sub run_dna_scan_for_matches {
    my($fig,$cgi,$html,$org,$pat) = @_;
    my($string,$contig,$beg,$end,$col_hdrs,$tab,$i);

    my $tmp_pat = "$FIG_Config::temp/tmp$$.pat";
    open(PAT,">$tmp_pat")
	|| die "could not open $tmp_pat";
    $pat =~ s/[\s\012\015]+/ /g;
    print PAT "$pat\n";
    close(PAT);
    my @out = `cat $FIG_Config::organisms/$org/contigs | $FIG_Config::ext_bin/scan_for_matches -c $tmp_pat`;
    if (@out < 1)
    {
	push(@$html,$cgi->h1("Sorry, no hits"));
    }
    else
    {
	if (@out > 2000)
	{
	    push(@$html,$cgi->h1("truncating to the first 1000 hits"));
	    $#out = 1999;
	}

	push(@$html,$cgi->pre);
	$col_hdrs = ["contig","begin","end","string"];
	for ($i=0; ($i < @out); $i += 2)
	{
	    if ($out[$i] =~ /^>([^:]+):\[(\d+),(\d+)\]/)
	    {
		$contig = $1;
		$beg = $2;
		$end = $3;
		$string = $out[$i+1];
		chop $string;
		push(@$tab,[$contig,$beg,$end,$string]);
	    }
	}
	push(@$html,&HTML::make_table($col_hdrs,$tab,"Matches"));
	push(@$html,$cgi->end_pre);
    }
    unlink($tmp_pat);
}

sub run_blast {
    my($fig,$cgi,$html,$org,$tool,$seq) = @_;
    my($query,@out);

    my $tmp_seq = "$FIG_Config::temp/tmp$$.seq";

    if ($seq =~ /^\s*([a-zA-Z]{2,4}\|\S+)/)
    {
	my $id = $1;
	$seq = "";
	if (($tool eq "blastp") || ($tool eq "tblastn"))
	{
	    $seq = $fig->get_translation($id);
	}
	elsif ($id =~ /^fig/)
	{
	    my @locs;
	    if ((@locs = $fig->feature_location($id)) && (@locs > 0))
	    {
		$seq = $fig->dna_seq($fig->genome_of($id),@locs);
	    }
	}
	if (! $seq)
	{
	    push(@$html,$cgi->h1("Sorry, could not get sequence for $id"));
	    return;
	}
    }
    elsif ($seq =~ s/^>(\S+)[^\n\012\015]*//)
    {
	$query = $1;
    }
    else
    {
	$query = "query";
    }
    $seq =~ s/\s//g;
    open(SEQ,">$tmp_seq")
	|| die "could not open $tmp_seq";
    print SEQ ">$query\n$seq\n";
    close(SEQ);

    if (! $ENV{"BLASTMAT"}) { $ENV{"BLASTMAT"} = "$FIG_Config::blastmat" }

    if ($tool eq "blastp")
    {
	&verify_db("$FIG_Config::organisms/$org/Features/peg/fasta","p");
	@out = map { &HTML::set_prot_links($cgi,$_) } `$FIG_Config::ext_bin/blastall -i $tmp_seq -d $FIG_Config::organisms/$org/Features/peg/fasta -p blastp`;
    }
    elsif ($tool eq "blastx")
    {
	&verify_db("$FIG_Config::organisms/$org/Features/peg/fasta","p");
	@out = map { &HTML::set_prot_links($cgi,$_) } `$FIG_Config::ext_bin/blastall -i $tmp_seq -d $FIG_Config::organisms/$org/Features/peg/fasta -p blastx`;
    }
    elsif ($tool eq "blastn")
    {
	&verify_db("$FIG_Config::organisms/$org/contigs","n");  ### fix to get all contigs
	@out = `$FIG_Config::ext_bin/blastall -i $tmp_seq -d $FIG_Config::organisms/$org/contigs -p blastn`;
    }
    elsif ($tool eq "tblastn")
    {
	&verify_db("$FIG_Config::organisms/$org/contigs","n");  ### fix to get all contigs
	@out = `$FIG_Config::ext_bin/blastall -i $tmp_seq -d $FIG_Config::organisms/$org/contigs -p tblastn`;
    }

    if (@out < 1)
    {
	push(@$html,$cgi->h1("Sorry, no hits"));
    }
    else
    {
	push(@$html,$cgi->pre);
	push(@$html,@out);
	push(@$html,$cgi->end_pre);
    }
    unlink($tmp_seq);
}

sub verify_db {
    my($db,$type) = @_;

    if ($type =~ /p/i)
    {
	if ((! -s "$db.psq") || (-M "$db.psq" > -M $db))
	{
	    system "$FIG_Config::ext_bin/formatdb -p T -i $db";
	}
    }
    else
    {
	if ((! -s "$db.nsq") || (-M "$db.nsq" > -M $db))
	{
	    system "$FIG_Config::ext_bin/formatdb -p F -i $db";
	}
    }
}	

sub export_assignments {
    my($fig,$cgi,$html,$who) = @_;
    my($genome,$x);

    my @genomes = map { $_ =~ /\((\d+\.\d+)\)/; $1 } $cgi->param('korgs');

    if (@genomes == 0)
    {
	@genomes = $fig->genomes;
    }
    
    my @assignments = &assignments_made($fig,\@genomes,$who,$cgi->param('date'));
    if (@assignments == 0)
    {
	push(@$html,$cgi->h1("Sorry, no assignments where made by $who"));
    }
    else
    {
	my $col_hdrs = ["FIG id", "External ID", "Genus/Species","Assignment"];
	my $tab = [];
	my($x,$peg,$func);
	foreach $x (@assignments)
	{
	    ($peg,$func) = @$x;
	    push(@$tab,[$peg,&ext_id($fig,$peg),$fig->genus_species($fig->genome_of($peg)),$func]);
	}
	if ($cgi->param('tabs'))
	{
	    print $cgi->header;
	    print "<pre>\n";
	    print join("",map { join("\t",@$_) . "\n" } @$tab);
	    print "</pre>\n";
	    exit;
	}
	else
	{
	    push(@$html,&HTML::make_table($col_hdrs,$tab,"Assignments Made by $who"));
	}
    }
}

sub ext_id {
    my($fig,$peg) = @_;

    my @mapped = grep { $_ !~ /^fig/ } map { $_->[0] } $fig->mapped_prot_ids($peg);
    if (@mapped == 0)
    {
	return $peg;
    }

    my @tmp = ();
    if ((@tmp = grep { $_ =~ /^sp/ }   @mapped) && (@tmp > 0))  { return $tmp[0] }
    if ((@tmp = grep { $_ =~ /^pir/ }  @mapped) && (@tmp > 0))  { return $tmp[0] }
    if ((@tmp = grep { $_ =~ /^gi/ }   @mapped) && (@tmp > 0))  { return $tmp[0] }
    if ((@tmp = grep { $_ =~ /^tr/ }   @mapped) && (@tmp > 0))  { return $tmp[0] }
    if ((@tmp = grep { $_ =~ /^tn/ }   @mapped) && (@tmp > 0))  { return $tmp[0] }
    if ((@tmp = grep { $_ =~ /^kegg/ } @mapped) && (@tmp > 0))  { return $tmp[0] }
    
    return $peg;
}

sub assignments_made {
    my($fig,$genomes,$who,$date) = @_;
    my($relational_db_response,$entry,$fid,$fileno,$seek,$len,$ann);

    my %genomes = map { $_ => 1 } @$genomes;
    $date = defined($date) ? $date-1 : 0;
    my @assignments = ();
    my $rdbH = $fig->db_handle;
    if (($relational_db_response = $rdbH->SQL("SELECT fid, fileno, seek, len  FROM annotation_seeks WHERE (( who = \'$who\' ) AND (dateof > $date))")) &&
	(@$relational_db_response > 0))
    {
	foreach $entry (@$relational_db_response)
	{
	    ($fid,$fileno,$seek,$len) = @$entry;
	    if (($fid =~ /^fig\|(\d+\.\d+)/) && $genomes{$1})
	    {
		$ann = $fig->read_annotation($fileno,$seek,$len);

		if (($ann =~ /^(fig\|\d+\.\d+\.peg\.\d+)\n(\d+)\n(\S+)\nSet ([^\n]*)function[^\n]*\n(\S[^\n]+\S)/s) &&
		    (($who eq $3) || (($4 eq "master ") && ($who eq "master"))) && 
		    ($2 >= $date))
		{
		    push(@assignments,[$1,$5]);
		}
	    }
	}
    }
    return @assignments;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3