[Bio] / FigWebServices / pir.cgi Repository:
ViewVC logotype

View of /FigWebServices/pir.cgi

Parent Directory Parent Directory | Revision Log Revision Log

Revision 1.14 - (download) (annotate)
Sat Feb 12 02:33:06 2005 UTC (15 years, 1 month ago) by redwards
Branch: MAIN
Changes since 1.13: +1 -0 lines
more more changes to pir.cgi

# -*- perl -*-



Compare some data between SEED and PIR. We probably need to add things like p2p or automatic ftp gets or something like that


use strict;
use FIG;
use HTML;
use raelib;
use CGI;
my $cgi=new CGI;

my $fig;
eval {
    $fig = new FIG;

if ($@ ne "")
    my $err = $@;
    push(@html, $cgi->p("Error connecting to SEED database."));
    if ($err =~ /Could not connect to DBI:.*could not connect to server/)
        push(@html, $cgi->p("Could not connect to relational database of type $FIG_Config::dbms named $FIG_Config::db on port $FIG_Config::dbport."));
        push(@html, $cgi->pre($err));
    &HTML::show_page($cgi, \@html, 1);

$ENV{"PATH"} = "$FIG_Config::bin:$FIG_Config::ext_bin:" . $ENV{"PATH"};

my $html = [];
my $user = $cgi->param('user');

# make sure that we read the file at the beginning
my ($pegbypir, $pirid) =&read_pir_file();

if ($cgi->param('tabulate')) {
 $html=&table_annotations($html, $pegbypir, $pirid);
 push @$html, $cgi->p({class=>"diagnostic"}, ("<small>Generating this table took approximately " . (time-$^T) . " seconds\n</small>"));
elsif ($cgi->param('pirsf')) {
 # we want to display one of the correspondances
 my $selfurl=$cgi->url;
 push @$html, $cgi->p;
 if ($cgi->param('ssonly')) {push @$html, "<a href=\"$selfurl?pirsf=" . $cgi->param("pirsf") . "&user=" . $cgi->param('user') . "\">Show All Matches</a>\n"}
 else {push @$html, "<a href=\"$selfurl?pirsf=" . $cgi->param("pirsf") . "&ssonly='1'&user=" . $cgi->param('user') . "\">Show only matches with a subsystem</a>\n"}
 my $col_hdrs = ["PIR Superfamily<br><small>Link goes to PIR<small>", "Genome", "PEG", "FIG Function", "FIG Subsystem"];
 my $tab = [];
 foreach my $peg (@{$pegbypir->{$cgi->param('pirsf')}}) {
  my @sslinks;
  foreach my $subsys ($fig->subsystems_for_peg($peg)) {
   push @sslinks, $cgi->a({href => "subsys.cgi?&user=$user&ssa_name=" . $$subsys[0] . "&request=show_ssa"}, $$subsys[0]);
  my $pirlink=$cgi->param('pirsf');
  $pirlink =~ /^PIR(SF\d+)/;
  $pirlink="<a href='http://pir.georgetown.edu/sfcs-cgi/new/pirclassif.pl?id=$1'>PIR$1</a>" . $pirid->{$cgi->param('pirsf')};
  next if ($cgi->param("ssonly") && !(scalar @sslinks));
  push (@$tab, [$pirlink, $fig->genus_species($fig->genome_of($peg)), &HTML::fid_link($cgi, $peg, 1), (scalar $fig->function_of($peg)), (join ", ", @sslinks)]);
 push(@$html,&HTML::make_table($col_hdrs,$tab,"Correspondance between SEED and PIR"));
else {
 unshift @$html, "<TITLE>The SEED - PIR comparison page</TITLE>\n";


sub show_initial {
 my ($fig,$cgi,$html)=@_;
 # generate a blank page
 # we want a list of all functions that have >= 1 peg unless we want all
 my $min=10;
 if ($cgi->param("min")) {$min=$cgi->param("min")}
 if ($cgi->param("showall")) {$min=0}
 my $full=1;
 if ($cgi->param("preliminary")) {$full=0}
 # count different subsystems per sf
 my $ss; my @pirsf;
 foreach my $sf (keys %$pegbypir) {
  next unless ($pegbypir->{$sf});
  next unless (scalar @{$pegbypir->{$sf}} >= $min);
  next if ($full && $pirid->{$sf} =~ /\(preliminary\)/i);
  next if (!$full && $pirid->{$sf} =~ /\(full/i);
  push @pirsf, $sf;
  if ($cgi->param('showsubsys')) {
   foreach my $peg (@{$pegbypir->{$sf}}) {
    foreach my $subsys ($fig->subsystems_for_peg($peg)) {
 # now generate the labels
 my $display;
 foreach my $sf (@pirsf) {
  next unless ($sf);
  my $displayname=$pirid->{$sf};
  if (length($displayname) > 50) {$displayname=substr($displayname, 0, 50)}
  if ($cgi->param('showsubsys')) {$display->{$sf}=$displayname . " [". scalar @{$pegbypir->{$sf}} . "/". (scalar keys %{$ss->{$sf}}) . "]"}
  else {$display->{$sf}=$displayname . " [". scalar @{$pegbypir->{$sf}} . "]"}
 unshift @pirsf, ''; $display->{''}='';
 push (@$html, $cgi->start_form(-action => "pir.cgi"),
  $cgi->h2("Please choose your super family"),
  "First, please enter a username: ", $cgi->textfield(-name=>"user"), $cgi->p,
  "The pull down list shows the PIR superfamilies. If only one number is shown (default) this is the number of PEGs that map to that superfamily. ",
  "If you choose to show subsystem counts in this menu, you will get two numbers. The first of the two numbers in parenthesis is the number ",
  "of PEGs that map to that superfamily, and the second number in parenthesis is the number of <em>different</em> ",
  "subsystems that those PEGs are in.\n", $cgi->p,
  $cgi->popup_menu(-name=>'pirsf', -values=>[keys %$display], -labels=>$display), $cgi->p,
  "\nMinimum number of pegs per PIR superfamily shown in list &nbsp; <input type='text' name='min' value='$min' size=3 />",
  "or show all PIR superfamilies: ", $cgi->checkbox(-name=>"showall", -label=>""), $cgi->p,
  $cgi->checkbox(-name=>"showsubsys", -label=>"Show subsystem counts in pull down menu (this will slow things down!)"), $cgi->p,
  $cgi->checkbox(-name=>"preliminary", -label=>"Show only preliminary PIR superfamilies"), $cgi->p,
  $cgi->submit('submit', 'Update view'),
  $cgi->submit('submit', 'Show Correspondance'),



=head1 read_pir_file

 Read the PIR data file that describes superfamilies and the PIR proteins that have those families.
 The PIR data file is from ftp://ftp.pir.georgetown.edu/pir_databases/pirsf/data/pirsfinfo.dat and
 contains the family name beginning with a > and then a list of PIR ids.

 I split this using the PIRSF\d+ and return two hashes. One that correlates PIRSF\d+ to fig id and one
 that correlates it to superfamily names.


sub read_pir_file {

# just read the file, convert it to an HTML table and return it
 unless (-e "$FIG_Config::data/Global/pirsfcorrespondance.txt") {
  print STDERR "Can't find the correspondance file pirsfcorrespondance.txt so we are going to make it\n";
  raelib->pirsfcorrespondance("$FIG_Config::data/Global/pirsfinfo.dat", "$FIG_Config::data/Global/pirsfcorrespondance.txt");
 open (IN, "$FIG_Config::data/Global/pirsfcorrespondance.txt") || die "Can't open $FIG_Config::data/Global/pirsfcorrespondance.txt";
 my $pir;
 my $functions;
 my $id;
 my $added;
 while (<IN>) {
  if (s/^>//) {
   unless ($added) {
    # we didn't find anything that maps here
   undef $added;
  else {
   my ($pirid, $peg)=split /\t/;
   next unless ($peg);
   push @{$pir->{$id}}, $peg;
 return $pir, $functions;

=head1 table_annotations

Return a table sorted by the number of annotations, and list a bunch of stuff


sub table_annotations {
 my ($html, $pegbypir, $pirid)=@_;
 my $count; my $subsystems; my $countinss;
 foreach my $sf (keys %$pegbypir) {
  my $function; my $ss; my $functionandss;
  foreach my $peg (@{$pegbypir->{$sf}}) {
   my $fn=scalar $fig->function_of($peg);
   foreach my $subsys ($fig->subsystems_for_peg($peg)) {$ss->{$$subsys[0]}++}
   $functionandss->{$fn}++ if (scalar keys %$ss);
  $subsystems->{$sf}=join "; ", keys %$ss;
  $count->{$sf}=scalar keys %$function;
  $countinss->{$sf}=scalar keys %$functionandss;

 my $col_hdrs = ["Number of annotations in subsystems", "Number of SEED annotations", "PIRSF<br><small>(Link goes to SEED/PIR comparison)</small>", 
                 "Superfamily name", "Subsystems in superfamily"];
 my $tab; my $lastcount; my $row;
 # note we are going to make a new table every 200 or so rows because that way the browsers don't get messed up. We actually increment between two identical counts
 my @superfamilies=sort {$countinss->{$b} <=> $countinss->{$a}} keys %$count;
 if ($cgi->param('totalsort')) {@superfamilies=sort {$count->{$b} <=> $count->{$a}} keys %$count}
 foreach my $sf (@superfamilies) {
  next if ($cgi->param('onlyss') && !($countinss->{$sf}));
  if ($lastcount ne $count->{$sf}) {
   if ($tab && ($row > 200)) {
    print STDERR "Added to table at $lastcount\n";
    undef $tab;
  push @$tab, [
   "<a href=\"http://seed-linux-2.uchicago.edu/FIG/pir.cgi?pirsf=$sf&ssonly='1'&user=''\">$sf</a>",
 return $html;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3