[Bio] / FigWebServices / protein_info.cgi Repository:
ViewVC logotype

View of /FigWebServices/protein_info.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.10 - (download) (annotate)
Mon Dec 5 19:12:12 2005 UTC (13 years, 11 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, caBIG-05Apr06-00, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, caBIG-13Feb06-00, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.9: +17 -0 lines
add license words

# -*- perl -*-
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#


=pod

=head1 proteininfo.cgi

Get some information about a bunch of proteins. 

=cut

use strict;
use FIG;
use HTML;
use raelib;
my $raelib=new raelib;
use CGI;
my $cgi=new CGI;
use LWP::Simple qw(!head); # see the caveat in perldoc LWP about importing two head methods.

my $fig;
eval {
    $fig = new FIG;
};  

if ($@ ne "")
{
    my $err = $@;
    
    my(@html);
    
    push(@html, $cgi->p("Error connecting to SEED database."));
    if ($err =~ /Could not connect to DBI:.*could not connect to server/)
    {
        push(@html, $cgi->p("Could not connect to relational database of type $FIG_Config::dbms named $FIG_Config::db on port $FIG_Config::dbport."));
    }   
    else
    {
        push(@html, $cgi->pre($err));
    }   
    &HTML::show_page($cgi, \@html, 1);
    exit;
}   
    
my $html = [];
my $user = $cgi->param('user');

unshift(@$html, "<TITLE>The SEED - Protein Information</TITLE>\n");

my $ids;
if ($cgi->param('request')) {
 # figure out if we have any proteins and find out what they are
 my @proteins=$cgi->param('proteins');
 if ($cgi->upload('fileupload'))
 {
   my $fh=$cgi->upload('fileupload');
   push @proteins, (<$fh>);
 }
 if ($cgi->param('korgs'))
 {
  push @proteins, map {$fig->pegs_of($_)} $cgi->param('korgs');
 }
 $ids=&parse_ids(@proteins); # this does it all in one but does not allow error checking
}

if ($ids && $cgi->param('request') eq "Protein Information")
{
 &protein_info($fig,$cgi,$html,$ids);
}
elsif ($ids && $cgi->param('request') eq "Subsystem Information")
{
 &subsystem_info($fig,$cgi,$html,$ids);
}
else
{
  &show_initial($fig,$cgi,$html);
}

&HTML::show_page($cgi,$html,1);
exit;


sub show_initial {
 my ($fig,$cgi,$html)=@_;
 # generate a blank page
 push @$html, 
 $cgi->start_multipart_form(),
 "<h2>Generate information and links about a series of proteins</h2>\n",
 "<p>Please generate a list of protein IDs. There are several methods provided. You can choose one or more organisms from the scrolling list, you can paste some gene or protein IDs into the box or you can upload a file of IDs. Or you can do all three. We will then try and map the IDs that you find onto FIG IDs. If we are able to map them you will see a table of results. If we are unable to map some we'll let you know which ones. You can separate your accessions with spaces, returns, or commas.</p>\n",
 "<p>Typical IDs are in the following format:</p>\n",
 "<ol>\n<li><b>FIG</b>: &nbsp; fig|83333.1.peg.1697</li>\n<li><b>Genbank</b><ul><li>Refseq: &nbsp; begin with NP_ or NC_</li>\n",
 "<li>gi numbers &nbsp; These are just numeric, please add the characters 'gi|' to make a number like gi|16129669</li>\n",
 "<li>GenBank Accessions &nbsp; numbers and letters such as AAF12034</li>\n</ul>\n",
 "<li><b>SwissProt, PIR, Trembl, Uniprot</b> &nbsp; a single letter and some digits</li></ol>\n",
 "<p>", $cgi->submit(-name=>'request', -value=>'Protein Information'), $cgi->submit(-name=>'request', -value=>'Subsystem Information'), $cgi->reset, "</p>\n",
 "<br><b>Choose one or more organisms from this list:</b><br>\n",
 $raelib->scrolling_org_list($cgi, "1"),
 "<b>Or paste some IDs here:</b><br>\n",
 $cgi->textarea(-name=>"proteins", -rows=>10, -columns=>40), "<br>\n", 
 "<br><b>Or choose a file here:</b><br>\n",
 $cgi->filefield(-name=>"fileupload", -size=>50), "<br>\n",
 $cgi->submit(-name=>'request', -value=>'Protein Information'), $cgi->submit(-name=>'request', -value=>'Subsystem Information'), $cgi->reset, $cgi->end_form;
 return $html;
}

sub protein_info {
 my ($fig,$cgi,$html,$ids)=@_;
 # predefine the color section for the subsys link
 my $color="&color=" . join("&color=", map {@{$ids->{$_}}} keys %$ids);
 
 my $tab; my @unknowns;
 foreach my $key (keys %$ids) {
  unless (scalar(@{$ids->{$key}})) {
   push @unknowns, $key;
   next;
  }
  my $cs="td rowspan=".scalar(@{$ids->{$key}});
  my $first=[$key, $cs];
  foreach my $peg (@{$ids->{$key}}) {
   
   # OLD STYLE: Regular link into the subsystems page
   # link to ss is: subsys.cgi?can_alter=$can_alter&SPROUT=$sprout&user=$user&ssa_name=$esc_sub&request=show_ssa&show_clusters=1&sort=by_phylo"
   #my $ss = join "<br>\n", 
   #  		map {"<a href='subsys.cgi?&user=$user&ssa_name=". $_->[0] ."&request=show_ssa&show_clusters=1&sort=by_phylo'>" . $_->[0] . "</a>"} 
   #		(sort $fig->subsystems_for_peg($peg));

   # NEW STYLE: Link to displaysubsys.cgi
   #display_subsys.cgi?ssa_name=Capsular_polysaccharide_biosynthesis_in_Staphylococcus&color=uni|P95695&color=uni|Q99X66&uni|Q99X65
   my $ss = join "<br>\n", 
               map {"<a href='display_subsys.cgi?user=$user&ssa_name=". $_->[0] . "$color'>" . $_->[0] . "</a>"}
	       (sort $fig->subsystems_for_peg($peg));
   
   unless ($ss) {$ss=" None "}
   my $ffp=join "", map {"<a href='proteinfamilies.cgi?user=$user&family=$_'>" . $fig->family_function($_) . "</a><br>\n"} ($fig->families_for_protein($peg));
   unless ($ffp) {$ffp=" None "}
   if ($first) 
   {
     push @$tab, [$first, "<a href='protein.cgi?user=$user&prot=$peg'>$peg</a>\n", 
     	$fig->genus_species($fig->genome_of($peg)), scalar($fig->function_of($peg, $user)), $ss, $ffp]; undef($first)
   } 
   else 
   {
     push @$tab, ["<a href='protein.cgi?user=$user&prot=$peg'>$peg</a>\n", $fig->genus_species($fig->genome_of($peg)), 
     		scalar($fig->function_of($peg, $user)), $ss, $ffp]
   }
  }
 }
 
 push @$html, &HTML::make_table(["ID", "FIG ID<br><small>Link goes to protein page</small>", "Genus Species", "Functional Role", "Subsystems<br><small>Link will color subsystem with all pegs</small>", "Protein Families<br><small>Link will explore Protein Family</small>"], $tab, "IDs"), "\n";
 if (scalar @unknowns) 
 {
   open (OUT, ">$FIG_Config::temp/protein_info_not_found.$$.txt") || die "Can't open $FIG_Config::temp/protein_info_not_found.$$.txt";
   print OUT join "\n", "For request from ", $cgi->remote_host, " couldn't find the following IDs", @unknowns, '';
   close OUT;
   my $list=join "</li>\n<li>", @unknowns;
   push @$html, "<p>We do not know about the following IDs. Sorry.</p><ul><li>$list</li></ul>\n";
 }
 
}


sub subsystem_info {
 my ($fig,$cgi,$html,$ids)=@_;
 # predefine the color section for the subsys link
 my $color="&color=" . join("&color=", map {@{$ids->{$_}}} keys %$ids);
 
 my $ss; my $pegcount;
 map {
  my $peg=$_;
  map {$ss->{$_->[0]}->{$peg}++; $pegcount->{$peg}++}
  $fig->subsystems_for_peg($peg);
 }
 map {@{$ids->{$_}}} keys %$ids;
 
 # generate the links to subsystems
 push @$html, "<h3>Subsystems: # of pegs with matching expression level</h3>\n<ul>\n";
 push @$html, map {
  my $color="&color=" . join("&color=", keys %{$ss->{$_}});
  $_="<li><a href='display_subsys.cgi?user=$user&ssa_name=$_$color'>$_</a> (".(scalar(keys %{$ss->{$_}})).")</li>\n";
  }
  sort {scalar(keys %{$ss->{$b}}) <=> scalar(keys %{$ss->{$a}})}  keys %$ss;
 push @$html, "</ul>\n";
 
 # now generate the ones that don't have links
 push @$html, "<hr>\n<h3>Pegs Not in Subsystem</h3>\n<ul>";
 push @$html,
  map {$_="<li><a href='protein.cgi?user=$user&prot=$_'>$_</a></li>\n"}
  grep {!$pegcount->{$_}}
  map {@{$ids->{$_}}} 
  sort {$a cmp $b} keys %$ids;
 push @$html, "</ul>\n";
 
}




=head2 parse_ids

Given an array or list of IDs in any format separated from each other by spaces or commas, this will return a reference to a hash. The key is the ID, and the value is a reference to an arrays of the FIG IDs that match.

=cut

sub parse_ids {
 my @given=@_;
 
 
 # here were are going to parse out what we were given. We will split on whitespace and commas
 
 my $want;
 # If you look through the code, and find a really long run on line, blame GJO
 # he complained about having to delete my beautifully functional code. Perhaps he is worried about
 # disk space or something.
 map {
   s/^\s+//; s/\s+$//;
   if (/^\d+$/) {$_="gi|".$_}
   elsif (/^[A-Z]\d+$/) {$_="uni|".$_}
   @{$want->{$_}}=$fig->by_alias($_) if ($_); 
   }
   map {split /[\s*\,]/, $_} 
   @given;
 
 return $want;
}
 

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3