[Bio] / RegSubMa / SVStart.pm Repository:
ViewVC logotype

View of /RegSubMa/SVStart.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1.1.1 - (download) (as text) (annotate) (vendor branch)
Wed Apr 4 17:08:51 2007 UTC (13 years, 2 months ago) by mkubal
Branch: MAIN, fig
CVS Tags: v_1_0_0, HEAD
Changes since 1.1: +0 -0 lines
initial checkin

package SVStart;

use URI::Escape;
use HTML;
use FIG_Config;
use Table;
use SeedViewer;

our @ISA = qw( Exporter );
our @EXPORT = qw( get_blastresult execute_blastall get_textsearch_result get_overview get_toolpage get_organisms_list get_subsystems_list get_blastsearch get_textsearch);

1;

sub get_blastresult {
  my ($fig, $cgi) = @_;

  # get options from params hash
  my $fig     = $FIG_Config::fig . "/";
  my $tmpdir  = $FIG_Config::temp . "/";
  my $orgdir  = $FIG_Config::organisms . "/";
  my $ext_bin = $FIG_Config::ext_bin . "/";
  my $seq     = $cgi->param("sequence");

  my $org = $cgi->param("genome");

  # initialize filename for sequence
  my $tmp_seq = "";
  my $html;

  # check for blast
  if ($cgi->param("blast_tool") =~ /blast/) {
    
    # initialize additional options
    my $add_options = "";

    # construct filename for sequence
    $tmp_seq = $tmpdir . "run_blast_tmp$$.seq";

    # create database filename string
    my $db = $orgdir;

    # check which blast was selected
    if (($cgi->param("blast_tool") eq "blastn") || ($cgi->param("blast_tool") eq "tblastn")) {
      $db .= "$org/contigs";
      
      # execute a format db
      if ((! -s "$db.nsq") || (-M "$db.nsq" > -M $db)) {
	system $ext_bin . "formatdb -p F -i $db";
      }

      if ($cgi->param("blast_tool") eq "blastn") {
	$add_options = "-r 1 -q -1 ";
      }
    } else {
      $db .= "$org/Features/peg/fasta";
      
      # execute a format db
      if ((! -s "$db.psq") || (-M "$db.psq" > -M $db)) {
	system $ext_bin . "formatdb -p T -i $db";
      }
    }

    # extend params hash with blast options
    $params->{program}       = $cgi->param("blast_tool");
    $params->{sequence_file} = $tmp_seq;
    $params->{database}      = $db;
    $params->{options}       = $cgi->param("blast_options");

    # strip sequence from whitespaces
    $seq =~ s/\s+//g;

    # write sequence to file
    open(SEQ, ">$tmp_seq") || die "run_blast could not open $tmp_seq";
    print SEQ ">query\n$seq\n";
    close(SEQ);
  
    # call blast
    if (($cgi->param("blast_tool") eq "blastn") or ($cgi->param("blast_tool") eq "tblastn")) {
      $params->{options} .= $add_options;
      @$html = execute_blastall($params);
    } else {
      @$html = execute_blastall($params);
    }

  # the tool must be either protein or dna scan
  } else {

    if ($org eq "") {
      return "You must select an organism.";
    }

    # construct filename for sequence
    $tmp_seq = $tmpdir . "tmp$$.pat";

    # initialize header variable
    my @out;
    my $col_hdrs;

    open(PAT,">$tmp_seq") || die "could not open $tmp_seq";
    $seq =~ s/[\s\012\015]+/ /g;
    print PAT "$seq\n";
    close(PAT);

    # check for protein or dna scan
    if ($params->{program} eq "Protein scan_for_matches") {
      @out = `$ext_bin/scan_for_matches -p $tmp_seq < $orgdir/$org/Features/peg/fasta`;
      $col_hdrs = ["peg","begin","end","string","function of peg"];
    } else {
      @out = `cat $orgdir/$org/contigs | $ext_bin/scan_for_matches -c $tmp_seq`;
      $col_hdrs = ["contig","begin","end","string"];
    }

    if (@out < 1) {
      push(@$html,$cgi->h1("Sorry, no hits"));
    } else {
      if (@out > 2000) {
	push(@$html,$cgi->h1("truncating to the first 1000 hits"));
	$#out = 1999;
      }
      for ($i=0; ($i < @out); $i += 2) {
	if ($out[$i] =~ /^>([^:]+):\[(\d+),(\d+)\]/) {
	 
	  $a = $1;
	  $b = $2;
	  $c = $3;
	  $d = $out[$i+1];
	  chomp $d;
	  if ($params->{program} eq "Protein scan_for_matches") {
	    push(@$tab, [ &HTML::fid_link($cgi,$a,1), $b, $c, $d, scalar $fig->function_of($a, $params->{user}) ]);
	  } else {
	    push(@$tab,[$a,$b,$c,$d]);
	  }
	}
      }
      push(@$html,&HTML::make_table($col_hdrs,$tab,"Matches"));
    }
  }

  # delete temporary file
  unlink($tmp_seq);

  # initialize content variable
  my $content = "<div class='blast'><pre>";

  # write result to content variable
  foreach my $line (@$html) {
    $line =~ s/(fig\|\d+\.\d+\.peg\.\d+)/<a href=index\.cgi\?action=ShowAnnotation\&prot=$1>$1<\/a>/g;
    $content .= $line;
  }
  
  # close content div
  $content .= "</pre></div>";

  # return html content string
  return $content;
}

sub execute_blastall {
    my($params) = @_;

    # get options from params hash
    my $prog    = $params->{program};
    my $input   = $params->{sequence_file};
    my $db      = $params->{database};
    my $options = $params->{options};
    my $ext_bin = $FIG_Config::ext_bin . "/";

    # initialize command line string
    my $blastall = $ext_bin . "blastall";
    my @args = ( '-p', $prog, '-i', $input, '-d', $db, split(/\s+/, $options) );

    # create filehandle
    my $bfh;
    my $pid = open( $bfh, "-|" );
    if ($pid == 0) {
      
      # execute blast
      exec( $blastall,  @args );
      die join(" ", $blastall, @args, "failed: $!");
    }

    # return blast output
    <$bfh>
}

sub get_textsearch_result {
  my($fig, $cgi) = @_;

  # get the search pattern
  my $pattern = $cgi->param('pattern');
  unless (defined($pattern)) {
    return undef;
  }
  
  $pattern =~ s/'//g;
  $pattern =~ s/"//g;

  my $return_value = "<div style='font-size: 13px; text-align: justify; margin: 20px; width: 70%;'><h2>Your search for '$pattern' produced the following results</h2><p>The results are ordered in four categories: <i>Organisms</i>, <i>Subsystems</i>, <i>Functional Roles</i> and <i>Features</i>. If a category table does not appear, there were no results for the according category. The search results for the protein category will be limited to 100 entries. You can browse through the result by using each tables navigation. You can sort each table by a column, by clicking the column header.</p><p> If you do not find the information you were looking for within these results, try refining your search below.</p></div>";

      $return_value .= "<div style='margin-left: 40px;'>" . &get_textsearch($pattern, 1) . "</div><div style='margin-left: 20px;'>";;
  my $result = $fig->search_database($pattern, $cgi->param('extended_search'));
  if (defined($result)) {

    # there is a result, check it's format
    if (ref($result) eq 'HASH') {
      
      # the result is an id, return it along with it's type
      return $result;
      
    } else {
      # the result has multiple types, create a table for each one

      foreach my $result_type (@$result) {
	my $title = "";
	if ($result_type->{type} eq 'organism') {
	  $title = 'Organisms';
	  my $explain_link = &create_help_link(
					       { string    => '',
						 text      => 'Click on an <b>Organism</b> in the following table to get to its Overview Page',
						 help_page => 'Organism',
					       } );
	  $title .= $explain_link;
	} elsif ($result_type->{type} eq 'subsystem') {
	  $title = 'Subsystems';
	  my $explain_link = &create_help_link(
					       { string    => '',
						 text      => 'Click on a <b>Subsystem</b> in the following table to get to its Overview Page',
						 help_page => 'Subsystem',
					       } );
	  $title .= $explain_link;
	} elsif ($result_type->{type} eq 'functional_role') {
	  $title = 'Functional Roles';
	  my $explain_link = &create_help_link(
					       { string    => '',
						 text      => 'Click on a <b>Functional Role</b> in the following table to get to its Overview Page.<br/><br/>You can also click on the <b>Subsystem</b> this role is in, to see its Overview.',
						 help_page => 'Functional Role',
					       } );
	  $title .= $explain_link;
	} elsif ($result_type->{type} eq 'proteins') {
	  $title = 'Features';
	  my $explain_link = &create_help_link(
					       { string    => '',
						 text      => 'Click on a <b>Feature</b> in the following table to get to its Overview Page',
						 help_page => 'Feature',
					       } );
	  $title .= $explain_link;
	}
	$return_value .= "<div><h2>$title</h2>" . get_result_table($result_type->{result}, $result_type->{type}, $fig) . "</div>";
      }

      $return_value .= "</div>";
      
    }
  } else {
    $return_value .= "<div style='font-size: 13px; text-align: justify; margin: 20px; width: 70%;'><i>The phrase '$pattern' could not be found in the database.</i>";
    if ($cgi->param('quick_search')) {
      $return_value .= "<p>You have searched using the 'quick search' option, this will only find items in subsystems. Try searching again, without this option to get more results.</p>";
    }
    $return_value .= "</div>";
  }
  
  return $return_value;
}

sub get_result_table {
  my ($data, $type, $fig) = @_;

  my $col_hdrs;
  if ($type eq 'organism') {
    $col_hdrs = ['Genome ID', 'Genome Name', 'Domain'];
    for (my $i=0; $i<scalar(@$data); $i++) {
      $data->[$i]->[1] = "<a href='index.cgi?action=ShowOrganism&genome=" . $data->[$i]->[1] . "'>" . $data->[$i]->[1] . "</a>";
    }
  } elsif ($type eq 'subsystem') {
    $col_hdrs = ['Subsystem'];
    for (my $i=0; $i<scalar(@$data); $i++) {
      $data->[$i]->[0] = "<a href='index.cgi?action=ShowSubsystem&subsystem_name=" . $data->[$i]->[0] . "'>" . $data->[$i]->[0] . "</a>";
    }
  } elsif ($type eq 'functional_role') {
    $col_hdrs = ['Functional Role', 'Subsystem'];
    for (my $i=0; $i<scalar(@$data); $i++) {
      $data->[$i]->[0] = "<a href='index.cgi?action=ShowFunctionalRole&role_name=" . $data->[$i]->[0] . "&subsystem_name=" . $data->[$i]->[1] . "'>" . $data->[$i]->[0] . "</a>";
      $data->[$i]->[1] = "<a href='index.cgi?action=ShowSubsystem&subsystem_name=" . $data->[$i]->[1] . "'>" . $data->[$i]->[1] . "</a>";
    }
  } elsif ($type eq 'proteins') {
    $col_hdrs = ['ID', 'Function', 'Organism'];
    for (my $i=0; $i<scalar(@$data); $i++) {
      $data->[$i]->[0] = "<a href='index.cgi?action=ShowAnnotation&prot=" . $data->[$i]->[0] . "'>" . $data->[$i]->[0] . "</a>";
      $data->[$i]->[2] = "<a href='index.cgi?action=ShowOrganism&genome=" . $data->[$i]->[2] . "'>" . $fig->orgname_of_orgid($data->[$i]->[2]) . "</a>";
    }
  }

  my $show_browse = 1;
  if (scalar(@$data) < 11) {
    $show_browse = 0;
  }
  return &Table::new({ data              => $data,
		       columns           => $col_hdrs,
		       perpage           => 10,
		       show_perpage      => 0,
		       show_topbrowse    => $show_browse,
		       show_bottombrowse => 0,
		       sortable          => 1,
		       table_width       => 800,
		       id                => "result_" . $type
		     });
}

sub get_toolpage {
  my($fig_or_sprout,$peg_id) = @_;

  # initialize some variables
  my($url,$method,@args,$line,$name,$val);
  
  # get the cgi object
  my $cgi = new CGI();
  
  # initialize html variable
  my $html = "";
  
  my $seq = &SVAnnotation::get_translation($fig_or_sprout,$peg_id);
  my $tool = $cgi->param('tool');

  $/ = "\n//\n";
  my @tools = grep { $_ =~ /^$tool\n/ } `cat $FIG_Config::global/LinksToTools`;
  if (@tools == 1) {
    chomp $tools[0];
    (undef,undef,$url,$method,@args) = split(/\n/,$tools[0]);
    my $args = [];
    foreach $line (@args) {
	next if ($line =~ /^\#/); # ignore comments
	($name,$val) = split(/\t/,$line);
	$val =~ s/FIGID/$peg_id/;
	$val =~ s/FIGSEQ/$seq/;
	$val =~ s/\\n/\n/g;
	push(@$args,[$name,$val]);
      }
    
    my @result;
    
    if ($method =~/internal/i) {
      my $pegid;
      #If method is internal, then the url is actually a  perl script
      my $script = $url;
      $script=~ s/\.pl//g;
      
      my @script_array = &SVAnnotation::flat_array(@$args);
      return &FIG::run_gathering_output("$FIG_Config::bin/$script", @script_array);

    } else {
      return &HTML::get_html($url,$method,$args);
    }
  }
}

sub call_tool {
  my($fig_or_sprout,$peg_id) = @_;
  
  # initialize some variables
  my($url,$method,@args,$line,$name,$val);
  
  # get the cgi object
  my $cgi = new CGI();
  
  # initialize html variable
  my $html = "";
  
  my $seq = &SVAnnotation::get_translation($fig_or_sprout,$peg_id);
  if (! $seq) {
    $html = $cgi->h1("Sorry, $peg_id does not have a translation");
    return $html;
  }
  my $protQ = quotemeta $peg_id;
  
  my $tool = $cgi->param('tool');
  $/ = "\n//\n";
  my @tools = grep { $_ =~ /^$tool\n/ } `cat $FIG_Config::global/LinksToTools`;
  if (@tools == 1) {
    chomp $tools[0];
    (undef,undef,$url,$method,@args) = split(/\n/,$tools[0]);
    my $args = [];
    foreach $line (@args) {
	next if ($line =~ /^\#/); # ignore comments
	($name,$val) = split(/\t/,$line);
	$val =~ s/FIGID/$peg_id/;
	$val =~ s/FIGSEQ/$seq/;
	$val =~ s/\\n/\n/g;
	push(@$args,[$name,$val]);
      }
    
    my @result;
    
    if ($method =~/internal/i) {
      my $pegid;
      #If method is internal, then the url is actually a  perl script
      my $script = $url;
      $script=~ s/\.pl//g;
      
      my @script_array = &SVAnnotation::flat_array(@$args);
      my $out = &FIG::run_gathering_output("$FIG_Config::bin/$script", @script_array);
      @result = split(/[\012\015]+/,$out);
      
    } else {
      @result = &HTML::get_html($url,$method,$args);
    }
    
    # some pages are setting the base
    #@result = grep {$_ !~ /base href/} @result;
    
    # and some pages have the audactiy to add <head> and <body tags>
    # first remove them by regexp:
    map {$_ =~ s/^.*<\/head>//i; $_ =~ s/^.*<body>//i} @result;
    map {$_ =~ s/<\/body>.*$//i; $_ =~ s/<\/html>.*$//i} @result;
    
    # now try looping through
    my $splice=0; my $splast=0;
    foreach my $i (0..$#result) {
      if ($result[$i] =~ /<body>/i || $result[$i] =~ /<\/head>/i) {$splice=$i}
      if ($result[$i] =~ /<\/body>/i) {$splast=$i}
    }
    if ($splast) {
      splice(@result, -$splast);
    }
    if ($splice) {
      splice(@result, 0, $splice);
    }
    
    #
    # The extra form in the Sprout header causes some problems with javascript-containing
    # pages. So if we're in sprout, and there's javascript in the page, just show the
    # output and exit.
    #
    
    if ($is_sprout) {
      for my $rl (@result) {
	if ($rl =~ /javascript/i) {
	  print $cgi->header;
	  print join("", @result);
	  exit 0;
	}
      }
    }
    
    foreach (@result) {
      $html .= $_;
    }
  }
  
  return $html;
}

sub get_overview {
  my ($fig) = @_;

  # get counts from database
  my( $at, $bt, $et, $vt, $envt ) = $fig->genome_counts;
  my( $ac, $bc, $ec ) = $fig->genome_counts("complete");

  my $complete_link = &create_help_link( { string    => 'complete',
					   text      => 'A genome is considered <b>complete</b> when 80% of the sequence data are in contigs over 20 kb and at least 90% of the genome is covered',
					   help_page => 'Glossary',
					 } );
  
  # create table containing the values
  my $genome_table .= qq~
<table>
   <tr><td><b>total</b></td><td colspan=2><b>$complete_link</b></td></tr>
   <tr><td>$at</td><td>$ac</td><td><b><a href='index.cgi?action=TextSearch&pattern=Archaea'>archaeal</a></b></td></tr>
   <tr><td>$bt</td><td>$bc</td><td><b><a href='index.cgi?action=TextSearch&pattern=Bacteria'>bacterial</a></b></td></tr>
   <tr><td>$et</td><td>$ec</td><td><b><a href='index.cgi?action=TextSearch&pattern=Eukaryota'>eukaryal</a></b></td></tr>
   <tr><td>$vt</td><td></td><td><b><a href='index.cgi?action=TextSearch&pattern=Virus'>viral</a></b></td></tr>
   <tr><td>$envt</td><td></td><td><b><a href='index.cgi?action=TextSearch&pattern=Environmental%20Sample'>environmental</a></b></td></tr>
</table>~;

  # return content html string
  return { title => 'Genomes', color => 'brown', content => $genome_table };
}

sub get_organisms_list {
  my ($fig, $cgi) = @_;
  
  # get the organisms sorted
  my %sort=(
	    "Virus"               => '4',
	    "Eukaryota"           => '3',
	    "Bacteria"            => '1',
	    "Archaea"             => '2',
	    "unknown"             => '5',
	    "Environmental Sample"=> '9',
	   );

  my %domains;
  my @g = $fig->genomes(1);
  map { $domains{$_} = $fig->genome_domain($_) } @g;
  
  my @sorted = sort {$sort{$domains{$a}} <=> $sort{$domains{$b}}
		       || uc($fig->genus_species($a)) cmp uc($fig->genus_species($b))} @g;

  my $labels;
  my $values;

  foreach my $genome (@sorted) {
    push(@$values, $genome);
    push(@$labels, $fig->genus_species($genome));
  }

  return { names => $labels,
	   ids   => $values };
}

sub get_subsystems_list {
  my ($fig, $cgi) = @_;
  
  my @all_subs = sort $fig->all_subsystems();
  my @values;
  foreach my $sub (@all_subs) {
    if ($fig->usable_subsystem($sub)) {
      push(@values, $sub);
    }
  }

  my @labels;
  foreach my $v (@values) {
    my $l = $v;
    $l =~ s/_/ /g;
    push(@labels, $l);
  }

  return { ids => \@values, labels => \@labels };
}

sub get_textsearch {
  my ($preselected_value, $plain) = @_;

  my $quick_link = &create_help_link( { string    => 'quick search',
					text      => 'The <b>quick search</b> will only cover features in subsystems',
					help_page => 'Subsystems',
					 } );
  
  unless ($preselected_value) {
    $preselected_value = "Enter search term";
  }

  my $extended = "<tr><td><i>(Example search: 'dnaK', 'EC 1.9.3.1' or 'histidine biosynthesis')</i></td><td><input type=checkbox name=quick_search>$quick_link</td></tr>";

  if ($plain) {
    $extended = "";
  }

  return qq~
<form action="index.cgi" method="post"><table><tr><td>
   <input type="text" name="pattern" value="$preselected_value" style="width: 440px;" onfocus="if (document.getElementById('pattern').value=='Enter search term') { document.getElementById('pattern').value=''; }" id="pattern"></td>
      <td><input type="submit" value="Text Search"></td></tr>
   $extended</table>
<input type='hidden' name='action' value='TextSearch'>
</form>
~;
}

sub get_blastsearch {
  my ($fig, $cgi) = @_;

  my $id = "org_select1";

  my $content = qq~
<form action="index.cgi" method="post" name="blast_form"><table><tr><td>
   ~ . &get_organisms($fig, $cgi, $id, 1) . qq~</td><td><input type="button" value="Blast Search" onclick="check_sequence('org_select1');"></td></tr>
   <tr><td><textarea style="width: 440px;" name="sequence" cols=55 rows=8 id="sequence" onfocus="if (document.getElementById('sequence').value=='paste in Protein or DNA Sequence') { document.getElementById('sequence').value=''; }">paste in Protein or DNA Sequence</textarea></td></tr></table>
<span style="display: none;">
  <select name="blast_tool" id="blast_tool">
    <option selected="selected" value="blastp">blastp</option>
    <option value="blastx">blastx</option>
  </select>
</span>
   <input type="hidden" name="tool" id="tool" value="blastp">
   <input type="hidden" name="action" value="BlastSearch">
</form>
~;
  
  return $content;
}

sub get_organisms {
  my ($fig, $cgi, $id, $simple) = @_;

  my @values;
  my $label;
  my $attribute;
  
  # this is just a hash that will put the bacteria first, then the euks, then the archs, and so on
  my %sort=(
	    "Virus"               => '4',
	    "Eukaryota"           => '3',
	    "Bacteria"            => '1',
	    "Archaea"             => '2',
	    "unknown"             => '5',
	    "Environmental Sample"=> '9',
	   );
  

  if (exists($params->{sorted_organisms_list})) {
    @values = @{$params->{sorted_organisms_list}->{vals}};
    $label = $params->{sorted_organisms_list}->{labels};
  } else {
    my %domains;
    my @g = $fig->genomes(1);
    map { $domains->{$_} = $fig->genome_domain($_) } @g;
    
    my @sorted = sort {$sort{$domains{$a}} <=> $sort{$domains{$b}}
			 || uc($fig->genus_species($a)) cmp uc($fig->genus_species($b))} @g;
    foreach my $genome (@sorted) {
      push @values, $genome;
      $label->{$genome}=$fig->genus_species($genome), " ($genome)";
      $attribute->{$genome}={class=>$fig->genome_domain($genome)};
      $attribute->{$genome}=~ s/\s+//g;
    }
    $params->{sorted_organism_list} = { labels => $label, vals => \@values };
    
    unshift(@values, "_choose_org");
    $label->{_choose_org} = "Pick an organism";

    $params->{sorted_organisms_list}->{vals} = \@values;
    $params->{sorted_organisms_list}->{labels} = $label;
  }

  my $content = qq~
<form action="index.cgi" method="post" name="organism_form">
~ . $cgi->popup_menu(-name       => 'genome',
		     -id         => $id,
		     -style      => "width: 400px",
		     -values     => \@values,
		     -labels     => $label,
		     -attributes => $attribute ) . qq~<input type="submit" value="Show Organism">
   <input type="hidden" name="action" value="ShowOrganism">
</form>
~;

  if (defined($simple)) {
    $content = $cgi->popup_menu(-name       => 'genome',
				-id         => $id,
				-style      => "width: 400px",
				-values     => \@values,
				-labels     => $label,
				-attributes => $attribute );
    delete($params->{simple});
  }

  return $content;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3