[Bio] / FigWebServices / pir.cgi Repository:
ViewVC logotype

View of /FigWebServices/pir.cgi

Parent Directory Parent Directory | Revision Log Revision Log

Revision 1.29 - (download) (annotate)
Fri Apr 8 20:43:32 2005 UTC (15 years ago) by redwards
Branch: MAIN
Changes since 1.28: +4 -3 lines
Added creation of the download file from pir.cgi

# -*- perl -*-



Compare some data between SEED and PIR. We probably need to add things like p2p or automatic ftp gets or something like that


use strict;
use FIG;
use HTML;
use raelib;
use CGI;
my $cgi=new CGI;
use LWP::Simple qw(!head); # see the caveat in perldoc LWP about importing two head methods.

my $fig;
eval {
    $fig = new FIG;

if ($@ ne "")
    my $err = $@;
    push(@html, $cgi->p("Error connecting to SEED database."));
    if ($err =~ /Could not connect to DBI:.*could not connect to server/)
        push(@html, $cgi->p("Could not connect to relational database of type $FIG_Config::dbms named $FIG_Config::db on port $FIG_Config::dbport."));
        push(@html, $cgi->pre($err));
    &HTML::show_page($cgi, \@html, 1);

$ENV{"PATH"} = "$FIG_Config::bin:$FIG_Config::ext_bin:" . $ENV{"PATH"};

# these should probably be in FIG::Config. Oh well.
my $pir_file_from_pir="$FIG_Config::data/Global/pirsfinfo.dat";
my $pir_correspondence_file="$FIG_Config::data/Global/pirsfcorrespondence.txt";
my $pir_source_file="ftp://ftp.pir.georgetown.edu/pir_databases/pirsf/data/pirsfinfo.dat";

# ha ha ha ha ha
# I am an idiot, and can't spell. so we need to correct that and make sure we always use an e
if (-e "$FIG_Config::data/Global/pirsfcorrespondance.txt") {rename("$FIG_Config::data/Global/pirsfcorrespondance.txt", $pir_correspondence_file)}

my $html = [];
my $user = $cgi->param('user');

unshift(@$html, "<TITLE>The SEED - PIR/SEED comparisons </TITLE>\n");
# make sure that we read the file at the beginning

unless (-e $pir_file_from_pir) {$html=&check_updates($html); &HTML::show_page($cgi,$html,1); exit(0)}
my ($pegbypir, $pirid) =&read_pir_file("initial");

if ($cgi->param('submit') eq "Tabulate summary") {
 $html=&table_annotations($html, $pegbypir, $pirid);
 push @$html, $cgi->p({class=>"diagnostic"}, ("\n<small>Generating this table took approximately " . (time-$^T) . " seconds\n</small>"));
elsif ($cgi->param('submit') =~ m/^Update/) {
 # this should catch Update Data and Updata Anyway
elsif ($cgi->param('submit') eq "Create Download File") {
 print STDERR "Creating download file\n";
 push @$html, $cgi->p({class=>"diagnostic"}, ("\n<small>Generating this table took approximately " . (time-$^T) . " seconds\n</small>"));
elsif ($cgi->param('submit') eq "Check for updates") {
 push @$html, $cgi->p({class=>"diagnostic"}, ("\n<small>Checking for updates took approximately " . (time-$^T) . " seconds\n</small>"));
elsif ($cgi->param('pirsf') && !($cgi->param('submit') eq "Update view")) {
 # we want to display one of the correspondences
 my $selfurl=$cgi->url;
 push @$html, $cgi->p, "<a href=\"/FIG/Html/pir.html#correspondence\" class=\"help\" target=\"pirhelp\">Help on this correspondence table</a>";
 if ($cgi->param('ssonly')) {push @$html, "<a href=\"$selfurl?pirsf=" . $cgi->param("pirsf") . "&user=$user\">Show All Matches</a>\n"}
 else {push @$html, "<a href=\"$selfurl?pirsf=" . $cgi->param("pirsf") . "&ssonly='1'&user=$user\">Show only matches with a subsystem</a>\n"}
 my $col_hdrs = ["PIR Superfamily<br><small>Link goes to PIR<small>", "Genome", "UniProt","PEG", "FIG Function", "FIG Subsystem"];
 my $tab = [];
 # figure out the colors
 my %color;
  my %function_cnt; my $max;
  foreach my $peg (@{$pegbypir->{$cgi->param('pirsf')}}) {
   $function_cnt{scalar $fig->function_of($peg)}++;
  my @all_functions=sort {$function_cnt{$b} <=> $function_cnt{$a}} keys %function_cnt;
  my @color=&cool_colors();
  for (my $i=0; $i<=$#all_functions; $i++) {
   if ($i > $#color) {$color{$all_functions[$i]}="#FFFFFF"}
   else {$color{$all_functions[$i]}=$color[$i]}
 foreach my $peg (@{$pegbypir->{$cgi->param('pirsf')}}) {
  my @sslinks;
  foreach my $subsys ($fig->subsystems_for_peg($peg)) {
   my $spaced=$$subsys[0];
   $spaced =~ s/_/ /g;
   push @sslinks, $cgi->a({href => "subsys.cgi?&user=$user&ssa_name=" . $$subsys[0] . "&request=show_ssa"}, $spaced);
  my $pirlink=$cgi->param('pirsf');
  $pirlink =~ /^PIR(SF\d+)/;
  $pirlink="<a href='http://pir.georgetown.edu/sfcs-cgi/new/pirclassif.pl?id=$1'>PIR$1</a>" . $pirid->{$cgi->param('pirsf')};
  next if ($cgi->param("ssonly") && !(scalar @sslinks));
  push (@$tab, 
		&HTML::fid_link($cgi, $peg, 1), 
		[scalar $fig->function_of($peg), "td style=\"background: $color{scalar $fig->function_of($peg)}\""],
		(join ", ", @sslinks)]
 push(@$html,&HTML::make_table($col_hdrs,$tab,"\nCorrespondence between SEED and PIR"));
else {
 unshift @$html, "<TITLE>The SEED - PIR comparison page</TITLE>\n";


sub show_initial {
 my ($fig,$cgi,$html)=@_;
 # generate a blank page
 # we want a list of all functions that have >= 1 peg unless we want all
 my $min=10;
 if ($cgi->param("min")) {$min=$cgi->param("min")}
 if ($cgi->param("showall")) {$min=0}
 my $full=1;
 if ($cgi->param("preliminary")) {$full=0}
 # count different subsystems per sf
 my $textlimit;
 $cgi->param('textlimit') && ($textlimit = $cgi->param('textlimit'));
 my $ss; my @pirsf;
 foreach my $sf (keys %$pegbypir) {
  next unless ($pegbypir->{$sf});
  next unless (scalar @{$pegbypir->{$sf}} >= $min);
  next if ($full && $pirid->{$sf} =~ /\(preliminary\)/i);
  next if (!$full && $pirid->{$sf} =~ /\(full/i);
  next if ($textlimit && !($pirid->{$sf} =~ m/$textlimit/i || $sf =~ m/$textlimit/i));
  push @pirsf, $sf;
  if ($cgi->param('showsubsys')) {
   foreach my $peg (@{$pegbypir->{$sf}}) {
    foreach my $subsys ($fig->subsystems_for_peg($peg)) {

 # now generate the labels
 my $display;
 foreach my $sf (@pirsf) {
  next unless ($sf);
  my $displayname=$sf . " " . $pirid->{$sf};
  if (length($displayname) > 50) {$displayname=substr($displayname, 0, 50)}
  if ($cgi->param('showsubsys')) {$display->{$sf}=$displayname . " [". scalar @{$pegbypir->{$sf}} . "/". (scalar keys %{$ss->{$sf}}) . "]"}
  else {$display->{$sf}=$displayname . " [". scalar @{$pegbypir->{$sf}} . "]"}
 my $numberpirfams=scalar keys %$display;
 unshift @pirsf, '';  $display->{''}='';
 my $size=10;
 if ($numberpirfams < $size) {$size=$numberpirfams}

 # info about the file that they can download
 my $ftpfile='<p>The comparison file has not been built yet. Please proceed.</p>';
 if (-e "$FIG_Config::temp/seed2pir.txt") {
  my @stat=stat("$FIG_Config::temp/seed2pir.txt");
  $ftpfile="<p>The comparison file was last built at ". localtime($stat[8]) . "</p>\n".
    "<p>You may download the file from <a href=\"" . $FIG_Config::temp_base . "seed2pir.txt\">seed2pir.txt</a></p>\n";

 push (@$html, $cgi->start_form(-action => "pir.cgi"),
  $cgi->h2("Please choose your super family"),
  "\nFirst, please enter a username: ", $cgi->textfield(-name=>"user", -value=>$user), $cgi->p,
  "\n<a href=\"/FIG/Html/pir.html\" class=\"help\" target=\"pirhelp\">Help on SEED/PIR Correspondence</a>",
  "\nThe list shows the PIR superfamilies. If only one number is shown (default) this is the number of PEGs that map to that superfamily. ",
  "\nIf you choose to show subsystem counts in this menu, you will get two numbers. The first of the two numbers in parenthesis is the number ",
  "\nof PEGs that map to that superfamily, and the second number in parenthesis is the number of <em>different</em> ",
  "\nsubsystems that those PEGs are in.\n", $cgi->p,"\n",
  $cgi->scrolling_list(-name=>'pirsf', -values=>[keys %$display], -labels=>$display, -size=>$size+1), $cgi->p,"\n",
  "\nNumber of superfamilies shown: ", $numberpirfams, $cgi->p,"\n",
  "\n<a href=\"/FIG/Html/pir.html#menu\" class=\"help\" target=\"pirhelp\">Help on the menu contents</a>","\n",
  "\nMinimum number of pegs per PIR superfamily shown in list &nbsp; <input type='text' name='min' value='$min' size=3 />","\n",
  "\nor show all PIR superfamilies: ", $cgi->checkbox(-name=>"showall", -label=>""), $cgi->p,"\n",
  $cgi->checkbox(-name=>"showsubsys", -label=>"Show subsystem counts in list"), $cgi->p,"\n",
  $cgi->checkbox(-name=>"preliminary", -label=>"Show only preliminary PIR superfamilies"), $cgi->p,"\n",
  "Limit the list to some text: ", $cgi->textfield(-name=>'textlimit', -size=>20), $cgi->p, "\n",
  $cgi->submit('submit', 'Recreate List'),"\n",
  $cgi->submit('submit', 'Show Correspondence'),"\n",
  $cgi->p, $cgi->hr, $cgi->p,"\n",
  $cgi->p("<strong>Generate Data Tables</strong>"),
  "<a href=\"/FIG/Html/pir.html#datatables\" class=\"help\" target=\"pirhelp\">Help on Generating Data Tables</a>",
  $cgi->p("The data tables are an alternative way to view the data in summary form. For the summary of the comparisons between PIR and SEED annotations, please check here:\n"),
  $cgi->checkbox(-name=>"onlyss", -label=>"Show only those PEGs that are in a subsystem and a superfamily", -checked=>"on"), $cgi->p,"\n",
  $cgi->checkbox(-name=>"totalsort", -label=>"Sort by the total number of different annotations"), $cgi->p,"\n",
  $cgi->submit('submit', 'Tabulate summary'),"\n",
  $cgi->p, $cgi->hr, "\n",
  $cgi->p("<strong>Check for Updates</strong>"),"\n",
  "<a href=\"/FIG/Html/pir.html#updates\" class=\"help\" target=\"pirhelp\">Help on updates</a>", "\n",
  $cgi->p("Please click the button to check the PIR site and see whether there is a new version of the PIR superfamily data you should use"),"\n",
  $cgi->submit('submit', 'Check for updates'),"\n",

  $cgi->p, $cgi->hr, $cgi->p,"\n",
  $cgi->p("<strong>Data for PIR</strong>"),
  "<a href=\"/FIG/Html/pir.html#ftpdown\" class=\"help\" target=\"pirhelp\">Help on Data for PIR</a>\n",
  $cgi->p("Please click the button to create a new file.\n"),
  $cgi->submit('submit', 'Create Download File'),"\n",



=head1 read_pir_file

 Read the PIR data file that describes superfamilies and the PIR proteins that have those families.
 The PIR data file is from ftp://ftp.pir.georgetown.edu/pir_databases/pirsf/data/pirsfinfo.dat and
 contains the family name beginning with a > and then a list of PIR ids.

 I split this using the PIRSF\d+ and return two hashes. One that correlates PIRSF\d+ to fig id and one
 that correlates it to superfamily names.


sub read_pir_file {
 unless (-e $pir_file_from_pir) {
  # background the job here
 # just read the file, convert it to an HTML table and return it
 unless (-e $pir_correspondence_file) {
  #print STDERR "Can't find the correspondence file pirsfcorrespondence.txt so we are going to make it\n";
  raelib->pirsfcorrespondence($pir_file_from_pir, $pir_correspondence_file);
  push @$html, "<h2 style=\"background: red\">The PIR correspondence file was rebuilt for you</h2>";
 open (IN, $pir_correspondence_file) || die "Can't open $pir_correspondence_file";
 my $pir;
 my $functions;
 my $id;
 my $added;
 while (<IN>) {
  if (s/^>//) {
   unless ($added) {
    # we didn't find anything that maps here
   undef $added;
  else {
   my ($pirid, $peg)=split /\t/;
   next unless ($peg);
   push @{$pir->{$id}}, $peg;
 return $pir, $functions;

=head1 table_annotations

Return a table sorted by the number of annotations, and list a bunch of stuff


sub table_annotations {
 my ($html, $pegbypir, $pirid)=@_;
 my $count; my $subsystems; my $countinss;
 foreach my $sf (keys %$pegbypir) {
  my $function; my $ss; my $functionandss;
  foreach my $peg (@{$pegbypir->{$sf}}) {
   my $fn=scalar $fig->function_of($peg);
   my $newss=0; # this is a boolean to see whether this peg has a subsys associated with it.
   foreach my $subsys ($fig->subsystems_for_peg($peg)) {$ss->{$$subsys[0]}++; $newss++}
   $functionandss->{$fn}++ if ($newss);
  $subsystems->{$sf}=join "; ", keys %$ss;
  $count->{$sf}=scalar keys %$function;
  $countinss->{$sf}=scalar keys %$functionandss;

 my $col_hdrs = ["Number of annotations in subsystems", "Number of SEED annotations", "PIRSF<br><small>(Link goes to SEED/PIR comparison)</small>", 
                 "Superfamily name", "Subsystems in superfamily"];
 my $tab; my $lastcount; my $row;
 # note we are going to make a new table every 200 or so rows because that way the browsers don't get messed up. We actually increment between two identical counts
 my @superfamilies=sort {$countinss->{$b} <=> $countinss->{$a}} keys %$count;
 if ($cgi->param('totalsort')) {@superfamilies=sort {$count->{$b} <=> $count->{$a}} keys %$count}
 foreach my $sf (@superfamilies) {
  next if ($cgi->param('onlyss') && !($countinss->{$sf}));
  if ($lastcount ne $count->{$sf}) {
   if ($tab && ($row > 200)) {
    my $et=time-$^T;
    print STDERR "pir.cgi: Added to table at $lastcount after $et total seconds\n";
    undef $tab;
  push @$tab, [
   "<a href=\"/FIG/pir.cgi?pirsf=$sf&ssonly='1'&user='$user'\">$sf</a>",
 return $html;


=head2 check_updates()

Check the PIR remote site and see whether there are updates that we need to get


sub check_updates {
 my $html=shift;
 # first lets see if we can get the times of the updates to see if we have a newer file
 # using LWP::Simple. this is straight from the perldoc 
 my ($content_type, $document_length, $remotemtime, $expires, $server)=LWP::Simple::head($pir_source_file);
 my $localmtime=0;
 # if we don't have the file yet, we don't want to panic!
 if (-e $pir_file_from_pir) {
  my @stat=stat($pir_file_from_pir);
 # now we are going to translate the difference in times into some English for the website
 if (!$remotemtime) {
   push @$html, $cgi->p({class=>"error"}, "Could not connect to PIR to check the status of the PIR file. Please check the location of $pir_source_file");
 elsif ($remotemtime > $localmtime) {
  # the remote version is newer
  push @$html, $cgi->p("\nThe remote file $pir_source_file is newer than your current file. You should proceed with the update."),
  	$cgi->p("\nThe remote file was modified on ". scalar(localtime($remotemtime))),
  	$cgi->p("\nThe local file was modified on ". scalar(localtime($localmtime))),
	$cgi->start_form(), $cgi->hidden(-name=>"user", -value=>$user), $cgi->submit('submit', 'Update Data'), $cgi->end_form();
 else {
  # the local version is as new as the remote one.
  push @$html, $cgi->p("\nThe local file is up to date and there is no need to update your source PIR superfamilies."),
  	$cgi->p("\nThe remote file was modified on ". scalar(localtime($remotemtime))),
	$cgi->p("\nThe local file was modified on ". scalar(localtime($localmtime))),
  	$cgi->start_form(), $cgi->hidden(-name=>"user", -value=>$user), $cgi->submit('submit', 'Update Anyway'), $cgi->end_form();

 return $html;


=head2 update_data()

Download the new data file from pir, and generate the correspondence


sub update_data {
 # rename the old correspondence file. We are going to add a number to the old file so that we keep it, but this should allow us
 # to save the information for a while in case something happens. At somepoint we should probably move these to /tmp or delete them
 # or something.
 my $html=shift;
 # here we need to background downloading and installing the data.
 my $bkj=$fig->run_in_background(sub 
     my $time=time;
     print "Loading new PIR superfamily data started at ", scalar(localtime($time)), "\n";
     system("load_pirsf 1");
     print "Complete at ", scalar(localtime(time)), ". Running took ", (time-$time)/60, " minutes\n";
 push @$html, "<h2>Downloading and installing new data has started</h2>",
    "<p>The job has started in the background but will likely take some time.<br />\n",
    "The job has an ID of $bkj, and you can check it out from the <A href=\"/FIG/seed_ctl.cgi\">SEED Control Panel</a></p>\n",
    "<p>You should not use the PIR superfamilies while the job is runnning</p>\n";
 return $html;

sub create_download_file {
 my $html=shift;
 my $bkj=$fig->run_in_background(sub
    	my $time=time;
	print "Generating PIR/SEED correspondance from SEED side began at ", scalar(localtime($time)), "\n";
	system("seed2pir > $FIG_Config::temp/seed2pir.txt");
	print "Complete at ", scalar(localtime(time)), ". Running took ", (time-$time)/60, " minutes\n";

 push @$html, "<h2>Creating your data has started</h2>",
    "<p>The job has started in the background but will likely take some time.<br />\n",
    "The job has an ID of $bkj, and you can check it out from the <A href=\"/FIG/seed_ctl.cgi\">SEED Control Panel</a></p>\n";
 return $html;


sub cool_colors {
 # just an array of "websafe" colors or whatever colors we want to use. Feel free to remove bad colors (hence the lines not being equal length!)
 return (
 '#C0C0C0', '#FF40C0', '#FF8040', '#FF0080', '#FFC040', '#40C0FF', '#40FFC0', '#C08080', '#C0FF00', '#00FF80', '#00C040',
 "#6B8E23", "#483D8B", "#2E8B57", "#008000", "#006400", "#800000", "#00FF00", "#7FFFD4", 
 "#87CEEB", "#A9A9A9", "#90EE90", "#D2B48C", "#8DBC8F", "#D2691E", "#87CEFA", "#E9967A", "#FFE4C4", "#FFB6C1",
 "#E0FFFF", "#FFA07A", "#DB7093", "#9370DB", "#008B8B", "#FFDEAD", "#DA70D6", "#DCDCDC", "#FF00FF", "#6A5ACD",
 "#00FA9A", "#228B22", "#1E90FF", "#FA8072", "#CD853F", "#DC143C", "#FF6347", "#98FB98", "#4682B4",
 "#D3D3D3", "#7B68EE", "#FF7F50", "#FF69B4", "#BC8F8F", "#A0522D", "#DEB887", "#00DED1",
 "#6495ED", "#800080", "#FFD700", "#F5DEB3", "#66CDAA", "#FF4500", "#4B0082", "#CD5C5C",
 "#EE82EE", "#7CFC00", "#FFFF00", "#191970", "#FFFFE0", "#DDA0DD", "#00BFFF", "#DAA520",
 "#00FF7F", "#9400D3", "#BA55D3", "#D8BFD8", "#8B4513", "#3CB371", "#5F9EA0", "#4169E1", "#20B2AA", "#8A2BE2", "#ADFF2F",
 "#F0FFFF", "#B0E0E6", "#FF1493", "#B8860B", "#FF0000", "#F08080", "#7FFF00", "#8B0000",
 "#40E0D0", "#0000CD", "#48D1CC", "#8B008B", "#696969", "#AFEEEE", "#FF8C00", "#EEE8AA", "#A52A2A",
 "#FFE4B5", "#B0C4DE", "#FAF0E6", "#9ACD32", "#B22222", "#FAFAD2", "#808080", "#0000FF",
 "#000080", "#32CD32", "#FFFACD", "#9932CC", "#FFA500", "#F0E68C", "#E6E6FA", "#F4A460", "#C71585",
 "#BDB76B", "#00FFFF", "#FFDAB9", "#ADD8E6", "#778899",

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3