[Bio] / FigWebServices / index.cgi Repository:
ViewVC logotype

View of /FigWebServices/index.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.111 - (download) (annotate)
Wed Jul 19 21:59:16 2006 UTC (13 years, 8 months ago) by overbeek
Branch: MAIN
Changes since 1.110: +1 -1 lines
RAE: Getting rid of this is the new code

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

### start

use FIG;
use FIG_CGI;

#use strict;
use Tracer;
use FIGjs          qw( toolTipScript );
use GenoGraphics   qw( render );
use gjoparseblast  qw( next_blast_hsp );

use URI::Escape;  # uri_escape
use POSIX;
use HTML;

my($fig, $cgi, $user);
my $this_script = "index.cgi";

eval {
    ($fig, $cgi, $user) = FIG_CGI::init(debug_save => 0,
					debug_load => 0,
					print_params => 0);
};

if ($@ ne "")
{
    my $err = $@;

    my(@html);

    push(@html, $cgi->p("Error connecting to SEED database."));
    if ($err =~ /Could not connect to DBI:.*could not connect to server/)
    {
        push(@html, $cgi->p("Could not connect to relational database of type $FIG_Config::dbms named $FIG_Config::db on port $FIG_Config::dbport."));
    }
    else
    {
        push(@html, $cgi->pre($err));
    }
    &HTML::show_page($cgi, \@html, 1);
    exit;
}

Trace("Connected to FIG.") if T(2);
my($map,@orgs,$user,$map,$org,$made_by,$from_func,$to_func);

#for my $k (sort keys %ENV)
#{
#    warn "$k=$ENV{$k}\n";
#}

$ENV{"PATH"} = "$FIG_Config::bin:$FIG_Config::ext_bin:" . $ENV{"PATH"};

my $html = [];

my($pattern,$seq_pat,$tool,$ids,$subsearch);

my $user = $cgi->param('user');

if ($cgi->param('Search for Genes Matching an Occurrence Profile or Common to a Set of Organisms'))
{
    Trace("Gene search chosen.") if T(2);
    unshift @$html, "<TITLE>The SEED: Phylogenetic Signatures</TITLE>\n";
    $ENV{"REQUEST_METHOD"} = "GET";
    $ENV{"QUERY_STRING"} = "user=$user";
    my @out = `./sigs.cgi`;
    print @out;
    exit;
}
#-----------------------------------------------------------------------
#  Statistics for a single organism
#-----------------------------------------------------------------------
elsif ($cgi->param('statistics'))
{
    Trace("Statistics chosen.") if T(2);
    @orgs = $cgi->param('korgs');
    @orgs = map { $_ =~ /\((\d+\.\d+)\)/; $1 } @orgs;
    if (@orgs != 1)
    {
	unshift @$html, "<TITLE>The SEED Statistics Page</TITLE>\n";
	push(@$html,$cgi->h1('Please select a single organism to get statistcs'));
    }
    else
    {
        $ENV{"REQUEST_METHOD"} = "GET";
        $ENV{"QUERY_STRING"} = "user=$user&genome=$orgs[0]";
        my @out = `./genome_statistics.cgi`;
        print @out;
        exit;
    }
}
#-----------------------------------------------------------------------
#  Locate PEGs in Subsystems
#-----------------------------------------------------------------------
elsif ($cgi->param('Find PEGs') && ($subsearch = $cgi->param('subsearch')))
{
    Trace("PEG find chosen.") if T(2);
    my $genome = $cgi->param('genome');
    my (@pegs,$peg);

    my @poss = $fig->by_alias($subsearch);
    if (@poss > 0)    { $subsearch = $poss[0] }

    if ($subsearch =~ /(fig\|\d+\.\d+\.peg\.\d+)/)
    {
#       handle searching for homologs that occur in subsystems
        $peg = $1;
        @pegs = ($peg);
        push(@pegs,map { $_->id2 } $fig->sims( $peg, 500, 1.0e-10, "fig"));
        if ($genome) 
        {
            my $genomeQ = quotemeta $genome;
            @pegs = grep { $_ =~ /^fig\|$genomeQ/ } @pegs;
        }
    }
    else
    {
#       handle searching for PEGs with functional role in subsystems
        @pegs = $fig->seqs_with_role($subsearch,"master",$genome);
    }
    
    print $cgi->header;
    if (@pegs == 0)
    {
        print $cgi->h1("Sorry, could not even find PEGs to check");
    }
    else
    {
        my(@pairs,$pair,@sub);
        @pairs = map { $peg = $_; 
                      @sub = $fig->peg_to_subsystems($peg);
                      map { [$peg,$_] } @sub } @pegs;
        if (@pairs == 0)
        {
            print $cgi->h1("Sorry, could not map any PEGs to subsystems");
        }
        else
        {
            my($uni,$uni_func);
            my $col_hdrs = ["PEG","Genome","Function","UniProt","UniProt Function","Subsystem"];
            my $tab = [ map { $pair = $_; $uni = $fig->to_alias($pair->[0],"uni");
                              ($uni,$uni_func) = $uni ? (&HTML::uni_link($cgi,$uni),scalar $fig->function_of($uni)) : ("","");
                              [&HTML::fid_link($cgi,$pair->[0]),
                               $fig->org_of($pair->[0]),
                               scalar $fig->function_of($pair->[0]),
                               $uni,$uni_func,
                               &HTML::sub_link($cgi,$pair->[1])] } @pairs];
            print &HTML::make_table($col_hdrs,$tab,"PEGs that Occur in Subsystems");
        }
    }
    exit;
}
#-----------------------------------------------------------------------
#  Align Sequences
#-----------------------------------------------------------------------
elsif ($cgi->param('Align Sequences'))
{
    Trace("Sequence alignment chosen.");
    my $seqs = $cgi->param('seqids');
    $seqs =~ s/^\s+//;
    $seqs =~ s/\s+$//;
    my @seq_ids = split(/[ \t,;]+/,$seqs);
    if (@seq_ids < 2)
    {
        print $cgi->header;
        print $cgi->h1("Sorry, you need to specify at least two sequence IDs");
    }
    else
    {
        $ENV{"REQUEST_METHOD"} = "GET";
        $_ = join('&checked=',@seq_ids);
        $ENV{"QUERY_STRING"} = "user=$user&align=1&checked=" . $_;
        my @out = `./fid_checked.cgi`;
        print join("",@out);
    }
    exit;
}
#-----------------------------------------------------------------------
#  Search (text) || Find Genes in Org that Might Play the Role
#-----------------------------------------------------------------------
elsif ( ( $pattern = $cgi->param('pattern') )
     && ( $cgi->param('Search')
       || $cgi->param('Search genome selected below')
       || $cgi->param('Search Selected Organisms')
       || $cgi->param('Find Genes in Org that Might Play the Role')
        )
      )
{
    Trace("Pattern search chosen.") if T(2);
    #  Remove leading and trailing spaces from pattern -- GJO:
    $pattern =~ s/^\s+//;
    $pattern =~ s/\s+$//;
    if ($cgi->param('Find Genes in Org that Might Play the Role') &&
        (@orgs = $cgi->param('korgs')) && (@orgs == 1))
    {
        unshift @$html, "<TITLE>The SEED: Genes in that Might Play Specific Role</TITLE>\n";
        @orgs = map { $_ =~ /\((\d+\.\d+)\)/; $1 } @orgs;
        $ENV{"REQUEST_METHOD"} = "GET";
        $ENV{"QUERY_STRING"} = "user=$user&request=find_in_org&role=$pattern&org=$orgs[0]";
        my @out = `./pom.cgi`;
        print join("",@out);
        exit;
    }
    else
    {
        unshift @$html, "<TITLE>The SEED: Search Results</TITLE>\n";
        &show_indexed_objects($fig, $cgi, $html, $pattern);
    }
}
#-----------------------------------------------------------------------
#  Metabolic Overview
#-----------------------------------------------------------------------
elsif (($map = $cgi->param('kmap')) && $cgi->param('Metabolic Overview'))
{
    Trace("Metabolic overview chosen.") if T(2);
    if ($map =~ /\(([^)]*)\)$/)
    {
        $map = $1;
    }
    else
    {
        # ??? Gary ???
    }

    #$map =~ s/^.*\((MAP\d+)\).*$/$1/;
    @orgs = $cgi->param('korgs');
    @orgs = map { $_ =~ /\((\d+\.\d+)\)/; $1 } @orgs;
    $ENV{"REQUEST_METHOD"} = "GET";
    if (@orgs > 0)
    {
        $ENV{"QUERY_STRING"} = "user=$user&map=$map&org=$orgs[0]";
    }
    else
    {
        $ENV{"QUERY_STRING"} = "user=$user&map=$map";
    }

    unshift @$html, "<TITLE>The SEED: Metabolic Overview</TITLE>\n";
    my @out = `./show_map.cgi`;
    &HTML::trim_output(\@out);
    push( @$html, "<br>\n", @out );
}

#-----------------------------------------------------------------------
#  Search for Matches (sequence or pattern)
#-----------------------------------------------------------------------
elsif (($seq_pat = $cgi->param('seq_pat')) && 
       ($tool = $cgi->param('Tool')) &&
       $cgi->param('Search for Matches'))
{
    Trace("Match search chosen.") if T(2);
    @orgs = $cgi->param('korgs');
    if (@orgs > 0)
    {
        @orgs = map { $_ =~ /\((\d+\.\d+)\)/; $1 } @orgs;
    }
    else
    {
        @orgs = ("");
    }

    if ($tool =~ /blast/)
    {
        unshift @$html, "<TITLE>The SEED: BLAST Search Results</TITLE>\n";
        &run_blast($fig,$cgi,$html,$orgs[0],$tool,$seq_pat);
    }
    elsif ($tool =~ /Protein scan_for_matches/)
    {
        unshift @$html, "<TITLE>The SEED: Protein Pattern Match Results</TITLE>\n";
        &run_prot_scan_for_matches($fig,$cgi,$html,$orgs[0],$seq_pat);
    }
    elsif ($tool =~ /DNA scan_for_matches/)
    {
        unshift @$html, "<TITLE>The SEED: Nucleotide Pattern Match Results</TITLE>\n";
        &run_dna_scan_for_matches($fig,$cgi,$html,$orgs[0],$seq_pat);
    }
}
elsif (($made_by = $cgi->param('made_by')) && $cgi->param('Extract Assignments'))
{
    Trace("Assignment export chosen.") if T(2);
    &export_assignments($fig,$cgi,$html,$made_by);
}
elsif ($cgi->param('Generate Assignments via Translation') &&
       ($from_func = $cgi->param('from_func')) &&
       ($to_func = $cgi->param('to_func')))
{
    Trace("Assignment translate chosen.") if T(2);
    &translate_assignments($fig,$cgi,$html,$from_func,$to_func);
}

elsif ($cgi->param('Extract Matched Sequences') && ($ids = $cgi->param('ids')))
{
    Trace("Matched sequence extract chosen.") if T(2);
    my @ids = split(/,/,$ids);

    #  Truncate the list if requested:

    my($list_to,$i);
    if ($list_to = $cgi->param('list_to'))
    {
        for ($i=0; ($i < @ids) && ($ids[$i] ne $list_to); $i++) {}
        if ($i < @ids)
        {
            $#ids = $i;
        }
    }

    #  Print the sequences:
    #     Add organisms -- GJO

    my( $id, $seq, $desc, $func, $org );
    push( @$html, $cgi->pre );
    foreach $id (@ids)
    {
        if ($seq = $fig->get_translation($id))
        {
            $desc  = $id;
            if ( $func = $fig->function_of( $id ) )
            {
                $desc .= " $func";
            }
            if ( $org  = $fig->genus_species( $fig->genome_of( $id ) ) )
            {
                $desc .= " [$org]" if $org;
            }
            push( @$html, ">$desc\n" );
            for ($i=0; ($i < length($seq)); $i += 60)
            {
                #  substr does not mind a request for more than length
                push( @$html, substr( $seq, $i, 60 ) . "\n" );
            }
        }
    }
    push(@$html,$cgi->end_pre);
}

#-----------------------------------------------------------------------
#  Initial search page
#-----------------------------------------------------------------------
else
{
    Trace("SEED Entry page chosen.") if T(2);
    unshift @$html, "<TITLE>The SEED: Entry Page</TITLE>\n";
    &show_initial($fig,$cgi,$html);
}
&HTML::show_page($cgi,$html,1);
exit;


#==============================================================================
#  Initial page (alias search)
#==============================================================================

sub show_initial {
    my($fig,$cgi,$html) = @_;
    my($map,$name,$olrg,$gs);


    #
    # Display the message of the day, if present.
    #

    show_motd($fig, $cgi, $html);

    my( $a, $b, $e, $v, $env ) = $fig->genome_counts;
    push(@$html,$cgi->h2("Contains $a archaeal, $b bacterial, $e eukaryal, $v viral and $env environmental genomes"));
    my( $a, $b, $e ) = $fig->genome_counts("complete");
    push(@$html,$cgi->h2("Of these, $a archaeal, $b bacterial and $e eukaryal genomes are more-or-less complete"),$cgi->hr);

    push(@$html,
         $cgi->h2('Work on Subsystems'),

#        $cgi->start_form(-action => "ssa2.cgi"),
#        "Enter user: ",
#        $cgi->textfield(-name => "user", -size => 20),
#        $cgi->submit('Work on Subsystems'),
#        $cgi->end_form,

#        $cgi->h2('Work on Subsystems Using New, Experimental Code'),
#         "This is the <i>new</i> subsystems code, and is now officially released.",
         $cgi->start_form(-action => "subsys.cgi"),
         "Enter user: ",
         $cgi->textfield(-name => "user", -size => 20),
         $cgi->submit('Work on Subsystems'),
         $cgi->end_form,
         $cgi->hr,
        );

    push( @$html,
          $cgi->start_form(-action => $this_script),
          "<table>\n",
          "<tr>",
              "<td colspan=2>", $cgi->h2('Searching for Genes or Functional Roles Using Text'), "</td>",
              "<td align=right><a href='sdk_uniprot_search.cgi'>UniProt WebService Search</a></td>", 
          "</tr>\n",
          "<tr>",
              "<td>Search Pattern: </td>",
              "<td>", $cgi->textfield(-name => "pattern", -size => 65), "</td>",
              "<td>", "Search <select name=search_kind>
		           <option value=DIRECT  >Directly</option>
		           <option value=GO  >Via Gene Ontology</option>
		           <option value=HUGO  >Via HUGO Gene Nomenclature Committee</option>
	               </select></td>",
          "</tr>\n",
          "<tr>",
              "<td>User ID:</td>",
              "<td>",
                  $cgi->textfield(-name => "user", -size => 20), " [optional] &nbsp; &nbsp; ",
                  "Max Genes: ", $cgi->textfield(-name => "maxpeg",  -size => 6, -value => 100), "&nbsp; &nbsp; ",
                  "Max Roles: ", $cgi->textfield(-name => "maxrole", -size => 6, -value => 100), "</td>",
              "<td>", $cgi->checkbox(-name => "substring_match", -label => 'Allow substring match'), "</td>",
          "</tr>\n",
          "</table>\n",
          $cgi->submit('Search'),
          $cgi->submit('Search genome selected below'),
          $cgi->reset('Clear'),
          $cgi->hr
        );

    my @display = ( 'All', 'Archaea', 'Bacteria', 'Eucarya', 'Viruses', 'Environmental samples' );

    #
    #  Canonical names must match the keywords used in the DBMS.  They are
    #  defined in compute_genome_counts.pl
    #
    my %canonical = (
        'All'                   =>  undef,
        'Archaea'               => 'Archaea',
        'Bacteria'              => 'Bacteria',
        'Eucarya'               => 'Eukaryota',
        'Viruses'               => 'Virus',
        'Environmental samples' => 'Environmental Sample'
        );

    my $req_dom = $cgi->param( 'domain' ) || 'All';
    my @domains = $cgi->radio_group( -name     => 'domain',
                                     -default  => $req_dom,
                                     -override => 1,
                                     -values   => [ @display ]
                                   );

    my $n_domain = 0;
    my %dom_num = map { ( $_, $n_domain++ ) } @display;
    my $req_dom_num = $dom_num{ $req_dom } || 0;

    #
    #  Viruses and Environmental samples must have completeness = All (that is
    #  how they are in the database).  Otherwise, default is Only "complete".
    #
    my $req_comp = ( $req_dom_num > $dom_num{ 'Eucarya' } ) ? 'All'
                 : $cgi->param( 'complete' ) || 'Only "complete"';
    my @complete = $cgi->radio_group( -name     => 'complete',
                                      -default  => $req_comp,
                                      -override => 1,
                                      -values   => [ 'All', 'Only "complete"' ]
                        );
    #
    #  Use $fig->genomes( complete, restricted, domain ) to get org list:
    #
    my $complete = ( $req_comp =~ /^all$/i ) ? undef : "complete";
    
    my @orgs;
    foreach my $org ($fig->genomes( $complete, undef, $canonical{ $req_dom } ))
    {
        my $gs = $fig->genus_species($org);
        if ($fig->genome_domain($org) ne "Environmental Sample")
        {
            my $gc=$fig->number_of_contigs($org);
            push @orgs, "$gs ($org) [$gc contigs]";
        }
        else 
        {
            push @orgs, "$gs ($org)";
        }
    }
    @orgs=sort @orgs;
            


    my $n_genomes = @orgs;

    my $link;
    ( $link = $cgi->url(-relative => 1) ) =~ s/index\.cgi/show_log.cgi/;

    push( @$html, $cgi->h2('If You Need to Pick a Genome for Options Below'),"&nbsp;[<a href=$link>Log</a>]",
                  "<TABLE>\n",
                  "   <TR>\n",
                  "      <TD>",
                  $cgi->scrolling_list( -name   => 'korgs',
                                        -values => [ @orgs ],
                                        -size   => 10,
                                      ), $cgi->br,
                  "$n_genomes genomes shown ",
                  $cgi->submit( 'Update List' ), $cgi->reset, $cgi->br,
                  "Show some ", $cgi->submit('statistics')," of the selected genome",
                  "      </TD>");
 
    push(@$html,
                  "      <TD>",
                  join( "<br>", "<b>Domain(s) to show:</b>", @domains), "<br>\n",
                  join( "<br>", "<b>Completeness?</b>", @complete), "\n",
                  "</TD>",
                  "   </TR>\n",
                  "</TABLE>\n",
                  $cgi->hr
        );

    push( @$html, $cgi->h2('Finding Candidates for a Functional Role'),
                "Make sure that you type the functional role you want to search for in the Search Pattern above",
                $cgi->br,
                $cgi->submit('Find Genes in Org that Might Play the Role'),
                $cgi->hr);

    my @maps = sort map { $map = $_; $name = $fig->map_name($map); "$name ($map)" } $fig->all_maps;

    push( @$html, $cgi->h2('Metabolic Overviews and Subsystem Maps (via KEGG & SEED) - Choose Map'),
                $cgi->submit('Metabolic Overview'),
                $cgi->br,
                $cgi->br,
                $cgi->scrolling_list(-name => 'kmap',
                                     -values => [@maps],
                                     -size => 10
                                    ),
                $cgi->hr);

    push( @$html, $cgi->h2('Searching DNA or Protein Sequences (in a selected organism)'),
                "<TABLE>\n",
                "    <TR>\n",
                "        <TD>Sequence/Pattern: </TD>",
                "        <TD Colspan=3>", $cgi->textarea(-name => 'seq_pat', -rows => 10, -cols => 70), "</TD>\n",
                "    </TR>\n",
                "    <TR>\n",
                "        <TD>Search Program: </TD>",
                "        <TD>", $cgi->popup_menu(-name => 'Tool', -values => ['blastp', 'blastx', 'blastn', 'tblastn', 'blastp against complete genomes', 'Protein scan_for_matches', 'DNA scan_for_matches'], -default => 'blastp'), " </TD>",
                "        <TD> Program Options:</TD>",
                "        <TD>", $cgi->textfield( -name => "blast_options", -size => 27 ), "</TD>",
                "    </TR>\n",
                "</TABLE>\n",
                $cgi->submit('Search for Matches'),
                $cgi->hr);

    #
    # Make assignment export tbl.
    #

    my @atbl;
    push(@atbl, [ "Extract assignments made by ",
                  $cgi->textfield(-name => "made_by", -size => 25) . " (do not prefix with <b>master:</b>)" ]);
    push(@atbl, [ "Save as user: ",
                  $cgi->textfield(-name => "save_user", -size => 25) . " (do not prefix with <b>master:</b>)" ] );
    push(@atbl, [ "After date (MM/DD/YYYY) ",
                  $cgi->textfield(-name => "after_date", -size => 15)]);

    push(@$html, 
         $cgi->h2($cgi->a({name => "exporting_assignments"}, 'Exporting Assignments')),
         &HTML::make_table(undef, \@atbl, '', border => 0),
                $cgi->checkbox(-label => 'Tab-delimited Spreadsheet', -name => 'tabs', -value => 1),
                $cgi->br,
                $cgi->checkbox(-label => 'Save Assignments', -name => 'save_assignments', -value => 1),
                $cgi->br,
                $cgi->submit('Extract Assignments'),
                $cgi->br, $cgi->br, $cgi->br,
                "Alternatively, you can generate a set of assignments as translations of existing assignments.  ",
                "To do so, you need to make sure that you fill in the <b>Save as user</b> field just above.  You ",
                "should use something like <b>RossO</b> (leave out the <b>master:</b>).  When you look at the assignments (and decide which ",
                "to actually install), they will be made available under that name (but, when you access them, ",
                "you will normally be using something like <b>master:RossO</b>)",
                $cgi->br,$cgi->br,
                "From: ",
                $cgi->textarea(-name => 'from_func', -rows => 4, -cols => 100),
                $cgi->br,$cgi->br,
                "To:&nbsp;&nbsp;&nbsp;&nbsp; ",$cgi->textfield(-name => "to_func", -size => 100), 
                $cgi->br,
                "<TABLE Width=100%><TR><TD>",
                $cgi->submit('Generate Assignments via Translation'),
                "</TD><TD NoWrap Width=1%>",
                $cgi->a({class=>"help", target=>"help", href=>"Html/seedtips.html#replace_names"}, "Help with generate assignments via translation"),
                "</TD></TR></TABLE>\n"
         );

    push(@$html, 
                $cgi->hr,
                $cgi->h2('Searching for Interesting Genes'),
                $cgi->submit('Search for Genes Matching an Occurrence Profile or Common to a Set of Organisms'),
                $cgi->end_form
         );

    push(@$html,
                $cgi->hr,
                $cgi->h2('Process Saved Assignments Sets'),
                $cgi->start_form(-action => "assignments.cgi"),
                "Here you should include the <b>master:</b>.  Thus use something like <b>master:RossO</b>",$cgi->br,
                $cgi->br,
                "Enter user: ",
                $cgi->textfield(-name => "user", -size => 20),
                $cgi->submit('Process Assignment Sets'),
                $cgi->end_form
         );

    push(@$html,
                $cgi->hr,
                $cgi->h2('Align Sequences'),
                $cgi->start_form(-action => $this_script),
                "Enter user: ",
                $cgi->textfield(-name => "user", -size => 20), $cgi->br,
                $cgi->submit('Align Sequences'),": ",
                $cgi->textfield(-name => "seqids", -size => 100),
                $cgi->end_form
         );

    push(@$html,
                $cgi->hr,
                $cgi->h2('Locate PEGs in Subsystems'),
                "If you wish to locate PEGs in subsystems, you have two approaches supported.  You can
give a FIG id, and you will get a list of all homologs in the designated genome that occur in subsystems.
Alternatively, you can specify a functional role, and all PEGs in the genome that match that role will be shown.",
                $cgi->start_form(-action => $this_script),
                "Enter user: ",
                $cgi->textfield(-name => "user", -size => 20), $cgi->br,
                $cgi->br,"Genome: ",$cgi->textfield(-name => "genome", -size => 15),$cgi->br,
                "Search: ",$cgi->textfield(-name => "subsearch", -size => 100),$cgi->br,
                $cgi->submit('Find PEGs'),": ",
                $cgi->end_form
         );
}

#
# Show a message of the day if it's present.
#
sub show_motd
{
    my($fig, $cgi, $html) = @_;

    my $motd_file = "$FIG_Config::fig_disk/config/motd";

    if (open(F, "<$motd_file"))
    {
	push(@$html, "<p>\n");
	while (<F>)
	{
	    push(@$html, $_);
	}
	close(F);
	push(@$html, "<hr>\n");
    }
}

#==============================================================================
#  Indexed objects (text search)
#==============================================================================

sub show_indexed_objects {
    my($fig, $cgi, $html, $pattern) = @_;
    my($msg, $i);

    if ($pattern =~ /^\s*(fig\|\d+\.\d+\.peg\.\d+)\s*$/)
    {
        my $peg = $1;
        my $user = $cgi->param('user');
        $user = $user ? $user : "";
	my @prot_out;
	if (defined($cgi->param('fromframe'))) {
	  $ENV{'REQUEST_METHOD'} = "GET";
	  $ENV{"QUERY_STRING"} = "prot=$peg\&user=$user\&action=proteinpage";
	  $ENV{"REQUEST_URI"} =~ s/$this_script/frame.cgi/;
	  @prot_out = TICK("./frame.cgi");
	} else {
	  $ENV{'REQUEST_METHOD'} = "GET";
	  $ENV{"QUERY_STRING"} = "prot=$peg\&user=$user";
	  $ENV{"REQUEST_URI"} =~ s/$this_script/protein.cgi/;
	  @prot_out = TICK("./protein.cgi");
	}
        print @prot_out;
        exit;
    }
    $pattern =~ s/([a-zA-Z0-9])\|([a-zA-Z0-9])/$1\\\|$2/ig;

    my $search_kind = $cgi->param("search_kind");
    if ( $search_kind && ! ($search_kind eq "DIRECT") ) {
	#otherwise $search_kind is name of controlled vocab
	find_pegs_by_cv($fig, $cgi, $html, $user, $pattern, $search_kind);
	return;
    }

    push( @$html, $cgi->br );
    my( $peg_index_data, $role_index_data ) = $fig->search_index($pattern, $cgi->param("substring_match") eq "on");
    my $maxpeg  = defined( $cgi->param("maxpeg")  ) ? $cgi->param("maxpeg")  : 100;
    my $maxrole = defined( $cgi->param("maxrole") ) ? $cgi->param("maxrole") : 100;
    
    my $output_file = "$FIG_Config::temp/search_results.txt"; 	
    open(OUT,">$output_file");

    # RAE added lines to allow searching within a single organism
    # if ($cgi->param('korgs'))
    # {
    #  $cgi->param('korgs') =~ /\((\d+\.*\d*)\)/;
    #  $org=$1; # this should be undef if korgs is not defined

    #  push (@$html, $cgi->br, "Matches found in ",$cgi->param('korgs'), $cgi->p);
    #  my @clean_data; my @clean_index;
    #  while (@$peg_index_data)
    #  {
    #   my ($data, $index)=(shift @$peg_index_data, shift @$role_index_data);
    #   next unless (${$data}[0] =~ /^fig\|$org\.peg/);
    #   push @clean_data, $data;
    #   push @clean_index, $index;
    #  }

    #  @$peg_index_data=@clean_data;
    #  @$role_index_data=@clean_index;
    # }
    ## End of added lines

    # RAE version with separate submit buttoxns and more than one org in korg
    # this is used by organisms.cgi for group specific searches
    if ( $cgi->param('korgs') && $cgi->param('Search Selected Organisms') 
       )
    {
      my @temp;
      foreach my $org ($cgi->param('korgs')) 
      {
         push @temp, grep { $_->[0] =~ /^fig\|$org/ } @$peg_index_data;
      }
      @$peg_index_data = @temp;
    }

    # GJO version with separate submit buttons

    if ( $cgi->param('korgs') && $cgi->param('korgs') =~ /\((\d+\.*\d*)\)/
                              && $cgi->param('Search genome selected below')
       )
    {
        my $org = $1;
        push @$html, $cgi->br, "Matches found in ",$cgi->param('korgs'), $cgi->p;
        @$peg_index_data = grep { $_->[0] =~ /^fig\|$org\.*/ } @$peg_index_data;
    }

    if ( ( $maxpeg > 0 ) && @$peg_index_data )
    {
        # RAE: Added javascript buttons see below. Only two things are needed.
        # The form must have a name parameter, and the one line of code for the
        # buttons. Everything else is automatic

        push( @$html, $cgi->start_form( -method => 'post',
                                        -target => "window$$",
                                        -action => 'fid_checked.cgi',
                                        -name   => 'found_pegs'
                                      ),
                      $cgi->hidden(-name => 'user', -value => $user),
                      "For Selected (checked) sequences: ",
                      $cgi->submit('get sequences'),
                      $cgi->submit('view annotations'),
                      $cgi->submit('assign/annotate'),
                      $cgi->param('SPROUT') ? () : $cgi->submit('view similarities'),
                      $cgi->br, $cgi->br
            );

        # RAE Add the check all/uncheck all boxes.
        push (@$html, $cgi->br, &HTML::java_buttons("found_pegs", "checked"), $cgi->br);
        
        my $n = @$peg_index_data;
        if ($n > $maxpeg)
        {
            $msg = "Showing first $maxpeg out of $n protein genes";
            $#{$peg_index_data} = $maxpeg-1;
        }
        else
        {
            $msg = "Showing $n FEATURES";
        }

        my $col_hdrs = ["Sel","FEATURE","Organism","Aliases","Function","Who"];
        my $tab = [ map { format_peg_entry( $fig, $cgi, $_ ) } sort {$a->[1] cmp $b->[1]} @$peg_index_data ];

	my $tab2 = [ sort {$a->[1] cmp $b->[1]} @$peg_index_data ];
        
	push( @$html,$cgi->br, 
		      "<a href=$FIG_Config::temp_url/search_results.txt>Download_Search_Results</a>",
		      &HTML::make_table($col_hdrs,$tab,$msg),
                      $cgi->br,
                      "For SELECTed (checked) sequences: ",
                      $cgi->submit('get sequences'),
                      $cgi->submit('view annotations'),
                      $cgi->submit('assign/annotate'),
                      $cgi->param('SPROUT') ? () : $cgi->submit('view similarities'),
                      $cgi->br,
                      $cgi->end_form
         );
     
	foreach my $t (@$tab2){
		my $string = join("\t",@$t); 
		print OUT "$string\n";
	}
        
    }
    elsif ( $maxpeg > 0 )
    {
        push @$html, $cgi->h3('No matching protein genes');
    }

    if ( ( $maxrole > 0 ) && @$role_index_data )
    {
        my $n = @$role_index_data;
        if ($n > $maxrole)
        {
            $msg = "Showing first $maxrole out of $n Roles";
            $#{$role_index_data} = $maxrole - 1;
        }
        else
        {
            $msg = "Showing $n Roles";
        }

        if ( $maxpeg > 0 ) { push( @$html, $cgi->hr ) }
        my $col_hdrs = ["Role"];
        my $tab = [ map { &format_role_entry($fig,$cgi,$_) } @$role_index_data ];
        push( @$html, &HTML::make_table($col_hdrs,$tab,$msg) );
    }
    elsif ( $maxrole > 0 )
    {
        push @$html, $cgi->h3('No matching roles');
    }
}


sub format_peg_entry {
    my( $fig, $cgi, $entry ) = @_;
    my($i,$function,$who);

    my($peg,$gs,$aliases,@funcs) = @$entry;

    $gs =~ s/\s+\d+$//;   # Org name comes with taxon_id appended (why?) -- GJO

    @funcs = map { $_ =~ s/^function:\s*//; $_ } @funcs;

    if ($aliases)
    {
        $aliases =~ s/^aliases://;
    }
    else
    {
        $aliases = "";
    }

    my $user = $cgi->param('user');
    $user = $user ? $user : "";

    if ($user)
    {
        for ($i=0; ($i < @funcs) && ($funcs[$i] !~ /\#$user/); $i++) {}
        if ($i < @funcs)
        {
            ($function,$who) = split(/\#/,$funcs[$i]);
        }
    }

    if (! $function)
    {
        for ($i=0; ($i < @funcs) && ($funcs[$i] !~ /\#master/); $i++) {}
        if ($i < @funcs)
        {
            ($function,$who) = split(/\#/,$funcs[$i]);
        }
    }

    if ((! $function) && (@funcs > 0))
    {
        ($function,$who) = split(/\#/,$funcs[0]);
    }
    my $box = "<input type=checkbox name=checked value=\"$peg\">";
    return [ $box, &HTML::fid_link($cgi,$peg), $gs, $aliases, $function, $who ];
}

sub format_role_entry {
    my($fig,$cgi,$entry) = @_;

    return [&HTML::role_link($cgi,$entry)];
}

sub run_prot_scan_for_matches {
    my($fig,$cgi,$html,$org,$pat) = @_;
    my($string,$peg,$beg,$end,$user,$col_hdrs,$tab,$i);

    my $tmp_pat = "$FIG_Config::temp/tmp$$.pat";
    open(PAT,">$tmp_pat")
        || die "could not open $tmp_pat";
    $pat =~ s/[\s\012\015]+/ /g;
    print PAT "$pat\n";
    close(PAT);
    my @out = `$FIG_Config::ext_bin/scan_for_matches -p $tmp_pat < $FIG_Config::organisms/$org/Features/peg/fasta`;
    if (@out < 1)
    {
        push(@$html,$cgi->h1("Sorry, no hits"));
    }
    else
    {
        if (@out > 2000)
        {
            push(@$html,$cgi->h1("truncating to the first 1000 hits"));
            $#out = 1999;
        }

        push(@$html,$cgi->pre);
        $user = $cgi->param('user');
        $col_hdrs = ["peg","begin","end","string","function of peg"];
        for ($i=0; ($i < @out); $i += 2)
        {
            if ($out[$i] =~ /^>([^:]+):\[(\d+),(\d+)\]/)
            {
                $peg = $1;
                $beg = $2;
                $end = $3;
                $string = $out[$i+1];
                chomp $string;
                push( @$tab, [ &HTML::fid_link($cgi,$peg,1),
                               $beg,
                               $end,
                               $string,
                               scalar $fig->function_of( $peg, $user )
                             ]
                    );
            }
        }
        push(@$html,&HTML::make_table($col_hdrs,$tab,"Matches"));
        push(@$html,$cgi->end_pre);
    }
    unlink($tmp_pat);
}

#==============================================================================
#  Scan for matches
#==============================================================================

sub run_dna_scan_for_matches {
    my($fig,$cgi,$html,$org,$pat) = @_;
    my($string,$contig,$beg,$end,$col_hdrs,$tab,$i);

    my $tmp_pat = "$FIG_Config::temp/tmp$$.pat";
    open(PAT,">$tmp_pat")
        || die "could not open $tmp_pat";
    $pat =~ s/[\s\012\015]+/ /g;
    print PAT "$pat\n";
    close(PAT);
    my @out = `cat $FIG_Config::organisms/$org/contigs | $FIG_Config::ext_bin/scan_for_matches -c $tmp_pat`;
    if (@out < 1)
    {
        push(@$html,$cgi->h1("Sorry, no hits"));
    }
    else
    {
        if (@out > 2000)
        {
            push(@$html,$cgi->h1("truncating to the first 1000 hits"));
            $#out = 1999;
        }

        push(@$html,$cgi->pre);
        $col_hdrs = ["contig","begin","end","string"];
        for ($i=0; ($i < @out); $i += 2)
        {
            if ($out[$i] =~ /^>([^:]+):\[(\d+),(\d+)\]/)
            {
                $contig = $1;
                $beg = $2;
                $end = $3;
                $string = $out[$i+1];
                chomp $string;
                push(@$tab,[$contig,$beg,$end,$string]);
            }
        }
        push(@$html,&HTML::make_table($col_hdrs,$tab,"Matches"));
        push(@$html,$cgi->end_pre);
    }
    unlink($tmp_pat);
}

#==============================================================================
#  BLAST search
#==============================================================================

sub run_blast {
    my( $fig, $cgi, $html, $org, $tool, $seq ) = @_;
    my( $query, @out );

    my $tmp_seq = "$FIG_Config::temp/run_blast_tmp$$.seq";

    #--------------------------------------------------------------------------
    #  Does the request require a defined genome?  We never check that the
    #  database build works, so the least we can do is some up-front tests.
    #  -- GJO
    #--------------------------------------------------------------------------

    if ( $tool !~ /complete genomes/ )
    {
        if ( ! $org || ! -d "$FIG_Config::organisms/$org" )
        {
            push @$html, $cgi->h2("Sorry, $tool requires selecting a genome." );
            return;
        }

        if ( ( $tool =~ /blastn/ ) || ( $tool =~ /tblastx/ ) )
        {
            if ( ! -f "$FIG_Config::organisms/$org/contigs" )
            {
                push @$html, $cgi->h2("Sorry, cannot find DNA data for genome $org." );
                return;
            }
        } 
        else
        {
            if ( ! -f "$FIG_Config::organisms/$org/Features/peg/fasta" )
            {
                push @$html, $cgi->h2("Sorry, cannot find protein data for genome $org." );
                return;
            }
        } 
    }

    #--------------------------------------------------------------------------
    #  Is the request for an id?  Get the sequence
    #--------------------------------------------------------------------------

    if ( ( $query ) = $seq =~ /^\s*([a-zA-Z]{2,4}\|\S+)/ )
    {
        # Replaced $id with $query so that output inherits label -- GJO
        # Found ugly fairure to build correct query sequence for
        #     'blastp against complete genomes'.  Can't figure out
        #     why it ever worked with and id -- GJO

        $seq = "";
        if ( ($tool eq "blastp") || ($tool eq "tblastn")
                                 || ($tool eq 'blastp against complete genomes')
           )
        {
            $seq = $fig->get_translation($query);
            my $func = $fig->function_of( $query, $user );
            $query .= " $func"  if $func;
        }
        elsif ($query =~ /^fig/)
        {
            my @locs;
            if ((@locs = $fig->feature_location($query)) && (@locs > 0))
            {
                $seq = $fig->dna_seq($fig->genome_of($query),@locs);
            }
        }
        if (! $seq)
        {
            push(@$html,$cgi->h1("Sorry, could not get sequence for $query"));
            return;
        }
    }

    #--------------------------------------------------------------------------
    #  Is it a fasta format?  Get the query name
    #--------------------------------------------------------------------------

    elsif ( $seq =~ s/^>\s*(\S+[^\n\012\015]*)// )  #  more flexible match -- GJO
    {
        $query = $1;
    }

    #--------------------------------------------------------------------------
    #  Take it as plain text
    #--------------------------------------------------------------------------

    else
    {
        $query = "query";
    }

    #
    #  The rest is taken as the sequence
    #

    $seq =~ s/\s+//g;
    open( SEQ, ">$tmp_seq" ) || die "run_blast could not open $tmp_seq";
    print SEQ ">$query\n$seq\n";
    close( SEQ );

    if (! $ENV{"BLASTMAT"}) { $ENV{"BLASTMAT"} = "$FIG_Config::blastmat" }
    my $blast_opt = $cgi->param( 'blast_options' ) || '';

    if ( $tool eq "blastp" )
    {
        my $db = "$FIG_Config::organisms/$org/Features/peg/fasta";
        &verify_db( $db, "p" );
        @out = map { &HTML::set_prot_links($cgi,$_) } execute_blastall( 'blastp', $tmp_seq, $db, $blast_opt );
    }

    elsif ( $tool eq "blastx" )
    {
        my $db = "$FIG_Config::organisms/$org/Features/peg/fasta";
        &verify_db( $db, "p" );
        @out = map { &HTML::set_prot_links($cgi,$_) } execute_blastall( 'blastx', $tmp_seq, $db, $blast_opt );
    }

    elsif ( $tool eq "blastn" )
    {
        my $db = "$FIG_Config::organisms/$org/contigs";
        &verify_db( $db, "n" );                               ### fix to get all contigs
        @out = execute_blastall( 'blastn', $tmp_seq, $db, "-r 1 -q -1 " . $blast_opt );
        push @$html, blast_graphics( $fig, $cgi, $org, \@out, $tool );
    }

    elsif ( $tool eq "tblastn" )
    {
        my $db = "$FIG_Config::organisms/$org/contigs";
        &verify_db( $db, "n" );                               ### fix to get all contigs
        @out = execute_blastall( 'tblastn', $tmp_seq, $db, $blast_opt );
        push @$html, blast_graphics( $fig, $cgi, $org, \@out, $tool );
    }

    elsif ( $tool eq 'blastp against complete genomes' )     ### this tool gets nonstandard treatment: RAO
    {
	&blast_complete( $fig, $cgi, $html, $tmp_seq, $query, $seq );
	unlink( $tmp_seq );
	return;
    }

    if (@out < 1)          #  This is really a bigger problem than no hits (GJO)
    {
        push @$html, $cgi->h1( "Sorry, no blast output" );
    }
    else
    {
        push @$html, $cgi->pre, @out, $cgi->end_pre;
    }
    unlink( $tmp_seq );
}


# `$blastall -p $prog -i $tmp_seq -d $db $blast_opt`
# execute_blastall( $prog, $input_file, $db, $options )

sub execute_blastall
{
    my( $prog, $input, $db, $options ) = @_;

    my $blastall = "$FIG_Config::ext_bin/blastall";
    my @args = ( '-p', $prog, '-i', $input, '-d', $db, split(/\s+/, $options) );

    my $bfh;
    my $pid = open( $bfh, "-|" );
    if ( $pid == 0 )
    {
        exec( $blastall,  @args );
        die join( " ", $blastall, @args, "failed: $!" );
    }

    <$bfh>
}


#  Changed to:
#     Include low complexity filter in blast search.
#     Remove all but first match to a given database sequence.
#     Sort by bit-score, not E-value (which becomes equal for all strong matches).
#     Limit to 1000 matches.
#  -- GJO

sub blast_complete
{
    my( $fig, $cgi, $html, $seqfile, $query, $seq ) = @_;
    my( $genome, @sims );

    @sims = ();
    foreach $genome ( $fig->genomes("complete") )
    {
        my $db = "$FIG_Config::organisms/$genome/Features/peg/fasta";
        next if (! -s $db);

        &verify_db($db,"p");
        my $sim;
        my %seen = ();
        push @sims, map { chomp;
                          $sim = [ split /\t/ ];
                          $sim->[10] =~ s/^e-/1.0e-/;
                          $seen{ $sim->[1] }++ ? () : $sim
                        }
                    execute_blastall( 'blastp', $seqfile, $db, '-m 8 -F T -e 1e-5' );
    }

    @sims = sort { $b->[11] <=> $a->[11] } @sims;
    if ( @sims > 1000 ) { @sims = @sims[0 .. 999] }
    &format_sims( $fig, $cgi, $html, \@sims, $query, $seq );
}


#------------------------------------------------------------------------------
#  Graphically display search results on contigs
#
#  use FIGjs        qw( toolTipScript );
#  use GenoGraphics qw( render );
#------------------------------------------------------------------------------
#
#  Fields produced by next_blast_hsp:
#
#  0   1    2    3   4    5    6    7    8    9    10    11   12    13   14  15 16  17  18 19  20
# qid qdef qlen sid sdef slen scr e_val p_n p_val n_mat n_id n_pos n_gap dir q1 q2 qseq s1 s2 sseq
#------------------------------------------------------------------------------

sub blast_graphics {
    my ( $fig_or_sprout, $cgi, $genome, $out, $tool ) = @_;

    my $e_min = 0.1;
    my $gg = [];
    my @html = ();

    #  Changed to use standalone parsing function, not shell script -- GJO

    my $outcopy = [ @$out ];
    while ( $_ = &gjoparseblast::next_blast_hsp( $outcopy ) )
    {
        my ( $qid, $qlen, $contig, $slen ) = @$_[0, 2, 3, 5 ];
        my ( $e_val, $n_mat, $n_id, $q1, $q2, $s1, $s2 ) = @$_[ 7, 10, 11, 15, 16, 18, 19 ];
	next if $e_val > $e_min;
	my ( $genes, $min, $max ) = hsp_context( $fig_or_sprout, $cgi, $genome,
	                                         $e_val, 100 * $n_id / $n_mat,
	                                         $qid,    $q1, $q2, $qlen,
	                                         $contig, $s1, $s2, $slen
	                                       );
	if ($min && $max)
	{
	    push @$gg, [ substr( $contig, 0, 18 ), $min, $max, $genes ];
	}
    }

    # $gene  = [ $beg, $end, $shape, $color, $text, $url, $pop-up, $alt_action, $pop-up_title ];
    # $genes = [ $gene, $gene, ... ];
    # $map   = [ $label, $min_coord, $max_coord, $genes ];
    # $gg    = [ $map, $map, ... ];
    # render( $gg, $width, $obj_half_heigth, $save, $img_index_number )

    if ( @$gg )
    {
	# print STDERR Dumper( $gg );
	my $gs = $fig_or_sprout->genus_species( $genome );
	my $space = "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;";
	my $legend = "<TABLE>\n"
	           . "    <TR>\n"
	           . "        <TD>Q = Query sequence$space</TD>\n"
	           . "        <TD Bgcolor='#FF0000'>$space</TD><TD>Frame 1 translation$space</TD>\n"
	           . "        <TD Bgcolor='#00FF00'>$space</TD><TD>Frame 2 translation$space</TD>\n"
	           . "        <TD Bgcolor='#0000FF'>$space</TD><TD>Frame 3 translation$space</TD>\n"
	           . "        <TD Bgcolor='#808080'>$space</TD><TD>Untranslated feature</TD>\n"
	           . "    </TR>\n"
	           . "</TABLE><P />";

	push @html, "\n", FIGjs::toolTipScript(), "\n",
	            $cgi->h2( "Results of $tool search of contigs from $gs\n"),
	            $legend,
	            @{ GenoGraphics::render( $gg, 600, 4, 0, 1 ) },
	            $cgi->hr, "\n";
    }

    return @html;
}


sub hsp_context {
    my( $fig_or_sprout, $cgi, $genome, $e_val, $pct_id,
        $qid,    $q1, $q2, $qlen,
        $contig, $s1, $s2, $slen ) = @_;
    my $half_sz = 5000;

    my( $from, $to, $features, $fid, $beg, $end );
    my( $link, $lbl, $isprot, $function, $uniprot, $info, $prot_query );

    my $user   = $cgi->param( 'user' ) || "";
    my $sprout = $cgi->param( 'SPROUT' ) ? '&SPROUT=1' : '';

    my @genes  = ();

    #  Based on the match position of the query, select the context region:

    ( $from, $to ) = ( $s1 <= $s2 ) ? ( $s1 - $half_sz, $s2 + $half_sz )
                                    : ( $s2 - $half_sz, $s1 + $half_sz );
    $from = 1      if ( $from < 1 );
    $to   = $slen  if ( $to > $slen );

    #  Get the genes in the region, and adjust the ends to include whole genes:

    ( $features, $from, $to ) = genes_in_region( $fig_or_sprout, $cgi, $genome, $contig, $from, $to );

    #  Fix the end points if features have moved them to exclude query:

    if ( $s1 < $s2 ) { $from = $s1 if $s1 < $from; $to = $s2 if $s2 > $to }
    else             { $from = $s2 if $s2 < $from; $to = $s1 if $s1 > $to }

    #  Add the other features:

    foreach $fid ( @$features )
    {
        my $contig1;
        ( $contig1, $beg, $end ) = boundaries_of( $fig_or_sprout, feature_locationS( $fig_or_sprout, $fid ) );
        next if $contig1 ne $contig;

        $link = "";
        if ( ( $lbl ) = $fid =~ /peg\.(\d+)$/ ) {
            ( $link = $cgi->url(-relative => 1) ) =~ s/index\.cgi/protein.cgi/;
            $link .= "?prot=$fid&user=$user$sprout";
            $isprot = 1;
        } elsif ( ( $lbl ) = $fid =~ /\.([a-z]+)\.\d+$/ ) {
            $lbl = uc $lbl;
            $isprot = 0;
        } else {
            $lbl = "";
            $isprot = 0;
        }

        $function = function_ofS( $fig_or_sprout, $fid );

        $uniprot = join ", ", grep { /^uni\|/ } feature_aliasesL( $fig_or_sprout, $fid);

        $info = join( '<br />', "<b>Feature:</b> $fid",
                                "<b>Contig:</b> $contig",
                                "<b>Begin:</b> $beg",
                                "<b>End:</b> $end",
                                $function ? "<b>Function:</b> $function" : '',
                                $uniprot ? "<b>Uniprot ID:</b> $uniprot" : ''
                    );

	# $gene  = [ $beg, $end, $shape, $color, $text, $url, $pop-up, $alt_action, $pop-up_title ];

        push @genes, [ feature_graphic( $beg, $end, $isprot ),
                       $lbl, $link, $info,
                       $isprot ? () : ( undef, "Feature information" )
                     ];
    }

    #  Draw the query.  The subject coordinates are always DNA.  If the query
    #  is protein, it is about 3 times shorter than the matching contig DNA.
    #  Splitting the difference, if 1.7 times the query length is still less
    #  than the subject length, we will call it a protein query (and reading
    #  frame in the contig coordinates has meaning).  If it is nucleotides,
    #  there is no defined frame.

    $info = join( '<br />', $qid ne 'query ' ? "<b>Query:</b> $qid" : (),
                            "<b>Length:</b> $qlen",
                            "<b>E-value:</b> $e_val",
                            "<b>% identity:</b> " . sprintf( "%.1f", $pct_id ),
                            "<b>Region of similarity:</b> $q1 &#150; $q2"
                );
    $prot_query = ( 1.7 * abs( $q2 - $q1 ) < abs( $s2 - $s1 ) ) ? 1 : 0;

    if ( $user && $prot_query )
    {
        $link  = $cgi->url(-relative => 1);
        $link  =~ s/index\.cgi/propose_new_peg.cgi/;
        $link .= "?user=$user&genome=$genome&covering=${contig}_${s1}_${s2}";
    }
    else
    {
        $link = undef;
    }

    push @genes, [ feature_graphic( $s1, $s2, $prot_query ),
                   'Q', $link, $info, undef, 'Query and match information'
                 ];

    return \@genes, $from, $to;
}


sub feature_graphic {
    my ( $beg, $end, $isprot ) = @_;
    my ( $min, $max, $symb, $color );

    ( $min, $max, $symb ) = ( $beg <= $end ) ? ( $beg, $end, "rightArrow" )
                                             : ( $end, $beg, "leftArrow" );

    #  Color proteins by translation frame

    $color = $isprot ? qw( blue red green )[ $beg % 3 ] : 'grey';

    ( $min, $max, $symb, $color );
}


sub genes_in_region {
    my( $fig_or_sprout, $cgi, $genome, $contig, $min, $max ) = @_;

    if ( $cgi->param( 'SPROUT' ) )
    {
        my( $x, $feature_id );
        my( $feat, $min, $max ) = $fig_or_sprout->genes_in_region( $genome, $contig, $min, $max );
        my @tmp = sort { ($a->[1] cmp $b->[1]) or
                         (($a->[2]+$a->[3]) <=> ($b->[2]+$b->[3]))
                       }
                  map  { $feature_id = $_;
                         $x = feature_locationS( $fig_or_sprout, $feature_id );
                         $x ? [ $feature_id, boundaries_of( $fig_or_sprout, $x )]  : ()
                       }
                  @$feat;
        return ( [map { $_->[0] } @tmp ], $min, $max );
    }
    else
    {
        return $fig_or_sprout->genes_in_region( $genome, $contig, $min, $max );
    }
}


sub feature_locationS {
    my ( $fig_or_sprout, $peg ) = @_;
    scalar $fig_or_sprout->feature_location( $peg );
}


sub boundaries_of {
    my( $fig_or_sprout, $loc ) = @_;
    $fig_or_sprout->boundaries_of( $loc );
}


sub function_ofS {
    my( $fig_or_sprout, $peg, $user ) = @_;
    scalar $fig_or_sprout->function_of( $peg, $user );
}


sub feature_aliasesL {
    my( $fig_or_sprout, $fid ) = @_;
    my @tmp = $fig_or_sprout->feature_aliases( $fid );
    @tmp
}


sub format_sims {
    my( $fig, $cgi, $html, $sims, $query, $seq ) = @_;
    my( $col_hdrs, $table, @ids, $ids, $sim, %seen, $n, $fid );

    $col_hdrs = [ "Select up to here",
                  "Similar sequence",
                  "E-val",
                  "Function",
                  "Organism",
                  "Aliases"
                ];

    $table = [];
    @ids = ();
    $n = 0;        # Count reported sequences
    foreach $sim ( @$sims )
    {
        $fid = $sim->[1];
        next if $seen{ $fid }++;                #  One hit per sequence
        next if $fig->is_deleted_fid( $fid );   #  Hide deleted sequences
        my $alii = scalar $fig->feature_aliases( $fid );
        $alii =~ s/,/, /g;
        push( @$table, [ $cgi->checkbox( -name => 'list_to',
                                         -value => $fid,
                                         -override => 1,
                                         -checked => 0,
                                         -label => ""
                                       ),
                         &HTML::fid_link( $cgi, $fid ),
                         [ $sim->[10], "TD NoWrap" ],
                         scalar $fig->function_of( $fid ),
                         $fig->genus_species( $fig->genome_of( $fid ) ),
                         $alii
                       ]
            );
        push( @ids, $fid );
        last if ++$n >= 1000;    #  Stop after 1000
    }

    $ids = join(",",@ids);
    my $target = "window$$";
    push( @$html, $cgi->start_form( -method => 'post',
                                    -target => $target,
                                    -action => $this_script
                                  ),
                  $cgi->hidden(-name => 'ids',  -value => $ids),
                  $cgi->hidden(-name => 'qid',  -value => $query),
                  $cgi->hidden(-name => 'qseq', -value => $seq),
                  $cgi->submit('Extract Matched Sequences'),
                # $cgi->submit('Align Matched Sequences'),
                  &HTML::make_table($col_hdrs,$table,"Best Hits"),
                  $cgi->submit('Extract Matched Sequences'),
                # $cgi->submit('Align Matched Sequences'),
                  $cgi->end_form
        );
}


sub verify_db {
    my($db,$type) = @_;

    if ($type =~ /^p/i)
    {
        if ((! -s "$db.psq") || (-M "$db.psq" > -M $db))
        {
            system "$FIG_Config::ext_bin/formatdb -p T -i $db";
        }
    }
    else
    {
        if ((! -s "$db.nsq") || (-M "$db.nsq" > -M $db))
        {
            system "$FIG_Config::ext_bin/formatdb -p F -i $db";
        }
    }
}       

sub export_assignments {
    my($fig,$cgi,$html,$who) = @_;
    my($genome,$x);

    my @genomes = map { $_ =~ /\((\d+\.\d+)\)/; $1 } $cgi->param('korgs');

    if (@genomes == 0)
    {
        @genomes = $fig->genomes;
    }
    
    my @assignments = $fig->assignments_made(\@genomes,$who,$cgi->param('after_date'));
    if (@assignments == 0)
    {
        push(@$html,$cgi->h1("Sorry, no assignments where made by $who"));
    }
    else
    {
        my $col_hdrs = ["FIG id", "External ID", "Genus/Species","Assignment"];
        my $tab = [];
        my($x,$peg,$func);
        foreach $x (@assignments)
        {
            ( $peg, $func ) = @$x;
            push( @$tab,[ HTML::set_prot_links( $cgi, $peg ),
                          HTML::set_prot_links( $cgi, ext_id( $fig, $peg ) ),
                          $fig->genus_species($fig->genome_of($peg)),
                          $func
                        ] );
        }
        
        if ($cgi->param('save_assignments'))
        {
            my $user = $cgi->param('save_user');
            if ($user)
            {
                &FIG::verify_dir("$FIG_Config::data/Assignments/$user");
                my $file = &FIG::epoch_to_readable(time) . ":$who:exported_from_local_SEED";
                if (open(TMP,">$FIG_Config::data/Assignments/$user/$file"))
                {
                    print TMP join("",map { join("\t",@$_) . "\n" } map { [$_->[0],$_->[3]] } @$tab);
                    close(TMP);
                }
                push(@$html,$cgi->h1("Saved Assignment Set $file"));
            }
            else
            {
                push(@$html,$cgi->h1("You need to specify a user to save an assignment set"));
            }
        }

        if ($cgi->param('tabs'))
        {
            print $cgi->header;
            print "<pre>\n";
            print join("",map { join("\t",@$_) . "\n" } @$tab);
            print "</pre>\n";
            exit;
        }
        else
        {
            push(@$html,&HTML::make_table($col_hdrs,$tab,"Assignments Made by $who"));
        }
    }
}

sub ext_id {
    my($fig,$peg) = @_;

    my @mapped = grep { $_ !~ /^fig/ } map { $_->[0] } $fig->mapped_prot_ids($peg);
    if (@mapped == 0)
    {
        return $peg;
    }

    my @tmp = ();
    if ((@tmp = grep { $_ =~ /^sp/ }   @mapped) && (@tmp > 0))  { return $tmp[0] }
    if ((@tmp = grep { $_ =~ /^pir/ }  @mapped) && (@tmp > 0))  { return $tmp[0] }
    if ((@tmp = grep { $_ =~ /^gi/ }   @mapped) && (@tmp > 0))  { return $tmp[0] }
    if ((@tmp = grep { $_ =~ /^tr/ }   @mapped) && (@tmp > 0))  { return $tmp[0] }
    if ((@tmp = grep { $_ =~ /^tn/ }   @mapped) && (@tmp > 0))  { return $tmp[0] }
    if ((@tmp = grep { $_ =~ /^kegg/ } @mapped) && (@tmp > 0))  { return $tmp[0] }
    
    return $peg;
}

sub translate_assignments {
    my($fig,$cgi,$html,$from_func,$to_func) = @_;

    my @funcs = grep { $_ =~ /^\S.*\S$/ } split(/[\012\015]+/,$from_func);

    my $user = $cgi->param('save_user');
    if ($user)
    {
	&FIG::verify_dir("$FIG_Config::data/Assignments/$user");
	my $file = &FIG::epoch_to_readable(time) . ":$user:translation";
	if (open(TMP,">$FIG_Config::data/Assignments/$user/$file"))
	{
	    my($peg,$func);

	    foreach $from_func (@funcs)
	    {
		my $from_funcQ = quotemeta $from_func;

		foreach $peg ($fig->seqs_with_role($from_func))
		{
		    if ($peg =~ /^fig\|/)
		    {
			$func = $fig->function_of($peg);
			if ($func eq $from_func)
			{
			    print TMP "$peg\t$to_func\n";
			}
			else
			{
			    my @pieces = grep { $_ } split(/(\s+[\/@]\s+)|(\s*;\s+)/,$func);
			    if (@pieces > 1)
			    {
				my $func1 = join("",map { $_ =~ s/^$from_funcQ$/$to_func/; $_ } @pieces);
				if ($func ne $func1)
				{
				    print TMP "$peg\t$func1\n";
				}
			    }
			}
		    }
		}
	    }
	    close(TMP);
	}
	push(@$html,$cgi->h1("Saved Assignment Set $file"));
    }
    else
    {
        push(@$html,$cgi->h1("You need to specify a user to save an assignment set"));
    }
}

sub find_pegs_by_cv1 {
    my ($fig, $cgi, $html, $user, $pattern, $cv) = @_;

    # Remember kind of search that got us hear so we can call back
    # with same kind
    my $search = "Search";
    if ($cgi->param('Search genome selected below')) {
	$search=uri_escape('Search genome selected below');
    } elsif ( $cgi->param('Search Selected Organisms') )  {
	$search = uri_escape('Search Selected Organisms');
    } elsif ( $cgi->param('Find Genes in Org that Might Play the Role') ) {
        $search = uri_escape('Find Genes in Org that Might Play the Role');
    }

    my $search_results = $fig->search_cv_file($cv, $pattern);

    my $find_col_hdrs = ["Find","Vocab. Name","ID; Term"];
    my $find_table_rows;
    my $counter = 0; 
    for my $r (@$search_results)
    {
	my @temp = split("\t",$r);
	my $row = [];
	my $id= $temp[1];
	my $term = $temp[2];
	my $id_and_term = $id."; ".$term;
	my $pattern=uri_escape("$id; $term");

	my $link = "index.cgi?pattern=$pattern&Search=1&user=$user";
        my $cb = "<a href=$link>Find PEGs</a>";

	#feh my $cb = $cgi->submit(-name=>'$search', -value=>'Find PEGs');
	#my $cb_value = $cv."split_here".$id."; ".$term;
	#my $cb ="<input type=checkbox name=find_checked_$counter value='$cb_value'>" ;
	push(@$row,$cb);
	push(@$row,$cv);
	push(@$row,$id_and_term);
	push(@$find_table_rows,$row);
	$counter = $counter + 1; 
    } 

    my $find_terms_button="";
    if ($counter > 0) {
	$find_terms_button= $cgi->submit(-name=>'$search', -value=>'$search');
    }

    # build the page
    push @$html, 
    $cgi->start_form(),
    $cgi->hidden(-name=>'user', -value=>'$user'),
    $cgi->br,
    "<h2>Search for PEGs annotated with Contrlled Vocabulary Terms</h2>",
    $cgi->hr,
    "<h4>Terms Matching Your Criteria </h4>\n",
    $cgi->br,
    &HTML::make_table($find_col_hdrs,$find_table_rows),
    $cgi->br,
    $find_terms_button,
    $cgi->end_form;

    return $html;
}

sub find_pegs_by_cv {
    my ($fig, $cgi, $html, $user, $pattern, $cv) = @_;

    # Remember kind of search that got us hear so we can call back
    # with same kind  (not working so force to simple Search)

    my $search = "Search";

    #if ($cgi->param('Search genome selected below')) {
    #	$search='Search genome selected below';
    #} elsif ( $cgi->param('Search Selected Organisms') )  {
    #	$search = 'Search Selected Organisms';
    #} elsif ( $cgi->param('Find Genes in Org that Might Play the Role') ) {
    #    $search = 'Find Genes in Org that Might Play the Role';
    #}

    my $search_results = $fig->search_cv_file($cv, $pattern);

    my $find_col_hdrs = ["Find","Vocab. Name","ID; Term"];
    my @patterns=();
    for my $r (@$search_results)
    {
	my @temp = split("\t",$r);
	my $id= $temp[1];
	my $term = $temp[2];
	my $pattern="$id; $term";

	push(@patterns,$pattern);
    } 

    my @pattern_radio;
    if ($#patterns + 1) {
	@pattern_radio = $cgi->radio_group( -name     => 'pattern',
					       -values   => [ @patterns ]
					       );
    } else {
	@pattern_radio = ("Nothing found");
    }

    my $find_terms_button= $cgi->submit(-name=>"Search", -value=>"Search");

    # build the page
    push @$html, 
    $cgi->start_form(),
    $cgi->hidden(-name=>'user', -value=>'$user'),
    $cgi->br,
    "<h2>Search for PEGs annotated with Contrlled Vocabulary Terms</h2>",
    $cgi->hr,
    "<h4>$cv Terms Matching Your Criteria </h4>\n",
    $cgi->br,
    $find_terms_button,
    $cgi->br,
    $cgi->br,
    join( "<br>", @pattern_radio),
#    &HTML::make_table($find_col_hdrs,$find_table_rows),
    $cgi->br,
    $find_terms_button,
    $cgi->end_form;

    return $html;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3