[Bio] / FigKernelPackages / HTML.pm Repository:
ViewVC logotype

View of /FigKernelPackages/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (download) (as text) (annotate)
Fri Mar 26 21:54:11 2004 UTC (15 years, 10 months ago) by olson
Branch: MAIN
Changes since 1.7: +1 -1 lines
Fix baseurl stuff some more.

package HTML;

use Carp;
use Data::Dumper;
use LWP::UserAgent;
use LWP::Simple;
use URI::URL;
use HTTP::Request::Common;

sub show_page {
    my($cgi,$html,$no_home) = @_;
    my $i;

    #
    # Find the HTML header
    #

    my $html_hdr_file = "./Html/html.hdr";
    if (! -f $html_hdr_file)
    {
	$html_hdr_file = "$FIG_Config::fig/CGI/Html/html.hdr";
    }

    my $html_tail_file = "./Html/html.tail";
    if (! -f $html_tail_file)
    {
	$html_tail_file = "$FIG_Config::fig/CGI/Html/html.tail";
    }


    print $cgi->header;

    #
    #  The SEED header file goes immediately after <BODY>.  Figure out
    #  what parts of the HTML document skeleton are there, and fill in
    #  missing ones.
    #
    #  This list should be as comprehensive as feasible:
    #

    my %head_tag = ( base     => 1,
                     basefont => 1,
                     html     => 1,
                     isindex  => 1,
                     link     => 1,
                     meta     => 1,
                     nextid   => 1,
                     style    => 1,
                     title    => 1
                   );

    #
    #  This list need not be comprehensive; it is just stopping conditions:
    #

    my %body_tag = ( a      => 1,
                     br     => 1,
                     center => 1,
                     form   => 1,
                     h1     => 1,
                     h2     => 1,
                     h3     => 1,
                     hr     => 1,
                     img    => 1,
                     p      => 1,
                     pre    => 1,
                     table  => 1
                   );

    my $html_line = -1;
    my $head_line = -1;
    my $base_line = -1;
    my $head_end_line = -1;
    my $body_line = -1;
    my $last_head_line = -1;  #  If no head tags are found, text goes at top.
    my $done = 0;

    for ( $i = 0; $i < @$html; $i++ )
    {
	#  Some special cases:

	if ( $html->[$i] =~ /\<html[^0-9a-z]/i ) { $html_line = $i }
	if ( $html->[$i] =~ /\<head[^0-9a-z]/i ) { $head_line = $i }
	if ( $html->[$i] =~ /\<base[^0-9a-z]/i ) { $base_line = $i }
	if ( $html->[$i] =~ /\<\/head\>/i )      { $head_end_line = $i }

	#  The content goes after this line:

	if ( $html->[$i] =~ /\<body[^0-9a-z]/i )
	{
	    $body_line = $i;
	    $last;
	}

	#  Now the general case.
	#  Analyze all the html tags on the line:

	foreach ( $html->[$i] =~ /\<\/?([0-9a-z]+)/ig )
	{
	    #  At first body tag, we stop the search and put the text
	    #  after the last line with a head tag:

	    if ( $body_tag{ lc $_ } )
	    {
		$done = 1;
		last;
	    }

	    #  If this is a head tag, then move the marker forward

	    elsif ( $head_tag{ lc $_ } )
	    {
		$last_head_line = $i;
	    }
	}
	last if $done;      # When done, break loop to avoid increment
    }

    #  Some sanity checks on structure:

    if ( 1 )
    {
	if ( $html_line >= 0 )
	{
	    if ( ( $head_line >= 0 ) && ( $html_line > $head_line ) )
	    {
		print STDERR "<HTML> tag follows <HEAD> tag\n";
	    }
	    if ( ( $head_end_line >= 0 ) && ( $html_line > $head_end_line ) )
	    {
		print STDERR "<HTML> tag follows </HEAD> tag\n";
	    }
	}
	if ( $head_line >= 0 )
	{
	    if ( ( $head_end_line >= 0 ) && ( $head_line > $head_end_line ) )
	    {
		print STDERR "<HEAD> tag follows </HEAD> tag\n";
	    }
	}
    }

    #
    #  Okay.  Let's put in the html header file, and missing tags:
    #
    #  <BODY> goes after last head line
    #

    if ( $body_line < 0 )
    {
	$body_line = $last_head_line + 1;
	splice( @$html, $body_line, 0, "<BODY>\n" );
    }

    #
    #  Seed page header (if it exists) goes after <BODY>
    #

    if ( -f $html_hdr_file )
    {
	splice( @$html, $body_line + 1, 0, `cat $html_hdr_file` );
    }

    #
    #  </HEAD> goes before <BODY>
    #

    if ( $head_end_line < 0 )
    {
	$head_end_line = $body_line;
	splice( @$html, $body_line, 0, "</HEAD>\n" );
    }

    #
    #  <BASE ...> goes before </HEAD>
    #

    if ( $base_line < 0 )
    {
	#
	#  Use a relative base address for pages.  Also, because I am
	#  worried about when FIG_config.pm gets updated (clean installs
	#  only, or every update?), I provide an alternative derivation
	#  from $cgi_url. -- GJO
	#
	# BASE href needs to be absolute. RDO.
	#
	# 
	$base_url = &FIG::cgi_url;
# 	my $base_url = $FIG_Config::cgi_base;
# 	if ( ! $base_url )                      # if cgi_base was not defined
# 	{
# 	    $base_url = $FIG_Config::cgi_url;   # get the full cgi url
# 	    $base_url =~ s~^http://[^/]*~~;     # remove protocol and host
# 	    $base_url =~ m~/$~ || $base_url =~ s~$~/~; # check trailing slash
# 	} 

	$base_line = $head_end_line;
	splice( @$html, $base_line, 0, "<BASE href=\"$base_url/\">\n" );
    }

    #
    #  <HTML> goes at the top of the output
    #

    if ( $html_line < 0 )
    {
	$html_line = 0;
	splice( @$html, $html_line, 0, "<HTML>\n" );
    }

    #
    #  <HEAD> goes after <HTML>
    #

    if ( $head_line < 0 )
    {
	$head_line = $html_line + 1;
	splice( @$html, $head_line, 0, "<HEAD>\n" );
    }

    #
    #  Place FIG search link at bottom of page
    #

    my @tail = -f $html_tail_file ? `cat $html_tail_file` : ();
    if (! $no_home)
    {
	my $user = $cgi->param('user') || "";
	push( @tail, "<hr><a href=\"index.cgi?user=$user\">FIG search</a>\n" );
    }

    #
    #  Figure out where to insert The SEED tail.  Before </body>,
    #  or before </html>, or at end of page.
    #

    my @tags = ();

    for ($i=0; ($i < @$html) && ($html->[$i] !~ /\<\/body\>/i); $i++) {}
    if ($i >= @$html)        # </body> not found; look for </html>
    {
	push @tags, "\n</BODY>\n";
	# Even if tag is not found, index points to correct place for splice
	for ($i=0; ($i < @$html) && ($html->[$i] !~ /\<\/html\>/i); $i++) {}
	if ($i >= @$html)    # </html> not found; add it
	{
	    push @tags, "</HTML>\n";
	}
    }

    if ( @tail )
    {
	splice( @$html, $i, 0, @tail, @tags );
    }
    elsif ( @tags )
    {
	splice( @$html, $i, 0, @tags );
    }

    print @$html;
}

sub make_table {
    my($col_hdrs,$tab,$title,$instr) = @_;
    my(@tab);

    push( @tab, "\n<table border>\n",
                "\t<caption><b>$title</b></caption>\n",
                "\t<tr>\n\t\t<th>"
              . join( "</th>\n\t\t<th>", @$col_hdrs )
              . "</th>\n\t</tr>\n"
        );
    my($i,$nowrap);

    for ($i=0; ($i < @$instr) && ($instr->[$i] !~ /nowrap/); $i++) {}
    $nowrap = ($i == @$instr) ? "" : " nowrap";

    my $row;
    foreach $row (@$tab)
    {
	push( @tab, "\t<tr>\n"
	          . join( "\n", map { &expand($_,$nowrap) } @$row )
	          . "\n\t</tr>\n"
	    );
    }
    push(@tab,"</table>\n");
    return join("",@tab);
}

sub expand {
    my($x,$nowrap) = @_;

    if ($x =~ /^\@([^:]+)\:([^:]+)\:(.*)$/)
    {
	return "\t\t<td$nowrap $1=\"$2\">$3</td>";
    }
    else
    {
	return "\t\t<td$nowrap>$x</td>";
    }
}

sub ec_link {
    my($role) = @_;

    if ($role =~ /(\d+\.\d+\.\d+\.\d+)/)
    {
	return "<a href=\"http://www.genome.ad.jp/dbget-bin/www_bget?ec:$1\">$role</a>";
    }
    else
    {
	return $role;
    }
}

sub role_link {
    my($cgi,$role) = @_;

    my $roleR = ($role =~ /^(\d+\.\d+\.\d+\.\d+)\s+-\s+/) ? $1 : $role;
    my $user = $cgi->param('user');
    if (! $user) { $user = "" }
    my $link = $cgi->url() . "?role=$roleR&user=$user";
    $link =~ s/[a-z]+\.cgi\?/pom.cgi?/;
    return "<a href=$link>$role</a>";
}

sub fid_link {
    my($cgi,$fid,$local,$just_url) = @_;
    my($n);

    if ($fid =~ /^fig\|\d+\.\d+\.([a-z]+)\.(\d+)/)
    {
	if ($local)
	{
	    if ($1 eq "peg")
	    {
		$n = $2;
	    }
	    else
	    {
		$n = "$1.$2";
	    }
	}
	else
	{
	    $n = $fid;
	}
	if ($1 ne "peg") { return $n }
	my $user = $cgi->param('user');
	if (! $user) { $user = "" }
	my $trans = $cgi->param('translate') ? "&translate=1" : "";
	my $link = $cgi->url() . "?prot=$fid&user=$user$trans";
	$link =~ s/[a-z_A-Z]+\.cgi\?/protein.cgi?/;
	if ($just_url)
	{
	    return $link;
	}
	else
	{
	    return "<a href=$link>$n</a>";
	}
    }
    return $fid;
}

sub family_link {
    my($family,$user) = @_;

    return $family;
}

use URI::Escape;

sub get_html {
    my( $url, $type, $kv_pairs) = @_;
    my( $encoded, $ua, $args, @args, $out, @output, $x );
    
    $ua = new LWP::UserAgent;
    $ua->timeout( 900 );

    if ($type =~/post/i)
    {
	$args = [];
	foreach $x (@$kv_pairs)
	{
	    push(@$args, ( $x->[0], $x->[1]) );
	}
	my $request  = POST $url, $args;
	my $response = $ua->request($request);
	$out = $response->content;
    }
    else
    {
	@args = ();
	foreach $x (@$kv_pairs)
	{
	    push( @args, "$x->[0]=" . uri_escape($x->[1]) );
	}
	
	if (@args > 0)
	{
	    $url .= "?" . join("&",@args);
	}
	$request = new HTTP::Request('GET', $url);
	my $response = $ua->request($request);

	if ($response->is_success) 
	{
	    $out = $response->content;
	} 
	else 
	{
	    $out = "<H1>Error: " . $response->code . "</H1>" . $response->message;
	}
    }
#   set up a document with proper eol characters
    @output = split(/[\012\015]+/,$out);
    foreach $out (@output) { $out .= "\n"; }

#   Now splice in a line of the form <base href=URL> to cause all relative links to work 
#   properly.  Remove the header.

    for ($i=0; ($i < @output) && ($output[$i] !~ /^\s*\</); $i++) {}
    if ($i < @output)
    {
	
	splice(@output,0,$i);
    }

    for ($i=0; ($i < @output) && ($output[$i] !~ /\<body\>/i); $i++) {}
    if ($i == @output)
    {
	$i = -1;
    }
    splice(@output,$i+1,0,"<base href=\"$url\">\n");
    return @output;
}

sub trim_output {
    my($out) = @_;
    my $i;

    for ($i=0; ($i < @$out) && ($out->[$i] !~ /^\</); $i++) {}
    splice(@$out,0,$i);

    for ($i=0; ($i < @$out) && ($out->[$i] !~ /\<body\>/i); $i++) {}
    if ($i == @$out)
    {
	for ($i=0; ($i < @$out) && ($out->[$i] !~ /\<html\>/i); $i++) {}
	if ($i == @$out)
	{
	    $i = -1;
	}
    }
    for ($j=$i+1; ($j < @$out) && ($out->[$j] !~ /^\<hr\>$/); $j++) {}
    if ($j < @$out)
    {
	splice(@$out,$i+1,($j-$i));
    }

    for ($i=0; ($i < @$out) && ($out->[$i] !~ /\<\/body\>/i); $i++) {}
    if ($i == @$out)
    {
	for ($i=0; ($i < @$out) && ($out->[$i] !~ /\<\/html\>/i); $i++) {}
    }

    for ($j=$i-1; ($j > 0) && ($out->[$j] !~ /FIG search/); $j--) {}
    if ($j > 0)
    {
	my @tmp = `cat $html_tail_file`;
	my $n = @tmp;
	splice(@$out,$j-$n,$n+1);
    }
}

sub set_prot_links {
    my($cgi,$x) = @_;
    my($before,$match,$after);

    if ($x =~ /^(.*)(fig\|\d+\.\d+\.peg\.\d+)(.*)$/)
    {
	$before = $1;
	$match = $2;
	$after = $3;
	return &set_prot_links($cgi,$before) . &HTML::fid_link($cgi,$match) . &set_prot_links($cgi,$after) . "\n";
    }
    elsif ($x =~ /^(.*)(gi\|\d+)(.*)$/)
    {
	$before = $1;
	$match = $2;
	$after = $3;
	return &set_prot_links($cgi,$before) . &HTML::gi_link($cgi,$match) . &set_prot_links($cgi,$after) . "\n";
    }
    elsif ($x =~ /^(.*)(sp\|[A-Z0-9]{6})(.*)$/)
    {
	$before = $1;
	$match = $2;
	$after = $3;
	return &set_prot_links($cgi,$before) . &HTML::sp_link($cgi,$match) . &set_prot_links($cgi,$after) . "\n";
    }
    elsif ($x =~ /^(.*)(pirnr\|NF\d+)(.*)$/)
    {
	$before = $1;
	$match = $2;
	$after = $3;
	return &set_prot_links($cgi,$before) . &HTML::pir_link($cgi,$match) . &set_prot_links($cgi,$after) . "\n";
    }
    return $x;
}

sub gi_link {
    my($cgi,$gi) = @_;

    if ($gi =~ /^gi\|(\d+)$/)
    {
	return "<a href=http://www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?cmd=Retrieve&db=Protein&list_uids=$1&dopt=GenPept>$gi</a>";
    }
    return $gi;
}

sub sp_link {
    my($cgi,$sp) = @_;

    if ($sp =~ /^sp\|(\S+)$/)
    {
	return "<a href=http://us.expasy.org/cgi-bin/get-sprot-entry?$1>$sp</a>";
    }
    return $sp;
}

sub pir_link {
    my($cgi,$pir) = @_;

    if ($pir =~ /^pirnr\|(NF\d+)$/)
    {
	return "<a href=http://pir.georgetown.edu/cgi-bin/nfEntry.pl?id=$1>$pir</a>";
    }
    return $pir;
}


1

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3