[Bio] / FigWebServices / comp_genomes.cgi Repository:
ViewVC logotype

View of /FigWebServices/comp_genomes.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.12 - (download) (annotate)
Sat Nov 9 00:01:10 2013 UTC (6 years ago) by golsen
Branch: MAIN
CVS Tags: rast_rel_2014_0912, rast_rel_2014_0729, HEAD
Changes since 1.11: +22 -3 lines
Add a test on modification dates to force an update of the cached data.

# -*- perl -*-
#
# Copyright (c) 2003-2013 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#


use HTML;
use strict;
use ButtonArray;
use display_related_genomes;
use Carp;

use CGI;
my $cgi = new CGI;

if (0)
{
    my $VAR1;
    eval(join("",`cat /tmp/comp_genomes_parms`));
    $cgi = $VAR1;
#   print STDERR &Dumper($cgi);
}

if (0)
{
    print $cgi->header;
    my @params = $cgi->param;
    print "<pre>\n";
    foreach $_ (@params)
    {
        print "$_\t:",join(",",$cgi->param($_)),":\n";
    }

    if (0)
    {
        if (open(TMP,">/tmp/comp_genomes_parms"))
        {
            print TMP &Dumper($cgi);
            close(TMP);
        }
    }
    exit;
}

use FIG;
use FIGV;

my $fig = new FIG;
my $ref = &type_of_genome($cgi->param('reference_genome'));
if ($ref && ($ref->[0] =~ /^\//))
{
    $fig = new FIGV($ref->[0],$fig);
}

my $html = [];

unshift @$html, "<TITLE>Compare Genomes</TITLE>\n";

my @full_text = $cgi->param('comp_orgs');
if (@full_text == 0)
{
    #   RAO: I added this to allow calling it with either no arguments or with just a user.
    #        This is probably how it will be called from within the NMPDR
    my @orgs;
    my %org_labels;
    foreach my $org ($fig->genomes('complete'))
    {
        my $label = compute_genome_label($fig, $org);
        $org_labels{$org} = $label;
        push(@orgs, $org);
    }

    #  Make the sort case independent -- GJO

    #  @orgs = sort { $a cmp $b } @orgs;
    @orgs = sort { lc( $org_labels{$a} ) cmp lc( $org_labels{$b} ) } @orgs;

    my $n_genomes = @orgs;

    #
    # Make a list of the org names for the code that doesn't use the
    # name/value separation in the scrolling list.
    #

    my @org_names = map { $org_labels{$_} } @orgs;

    my $user = $cgi->param('user');

    push(@$html,$cgi->br,
                "If you wish to compare the contents of several genomes, you can use this tool.
Choose a set of genomes (at least two).<br><br> ",
	$cgi->start_form(-action => 'comp_genomes.cgi'));
    if ($user) { push(@$html,$cgi->hidden(-name => 'user', -value => $user)) }
    push(@$html,
                $cgi->scrolling_list( -name   => 'comp_orgs',
                                      -values => [ @org_names ],
                                      -size   => 10,
                                      -multiple => 1,
                                    ), $cgi->br,$cgi->br,
                 "<br><br>",
                $cgi->submit('Compare Genomes'),
                $cgi->end_form
	);
    &HTML::show_page($cgi,$html);
    exit;
}
if ($cgi->param('Update Functions in MouseOvers') && (@full_text > 0))
{
    &update_functions($fig,\@full_text);
    push(@$html,"<h1>Done</h1>");
    &HTML::show_page($cgi,$html);
    exit;
}

my $user = $cgi->param('user') || '';

if (@full_text < 2)
{
    push(@$html,"<h1>You need to specify at least 2 genomes</h1>");
}
else
{
    my $nGenome = @full_text;
    my $peg = $cgi->param('peg');
    my $sz = $cgi->param('sz');
    if ((! $ref) && $peg)  { $ref = &type_of_genome(&FIG::genome_of($peg)) }

    if ( ! $ref )
    {
        push( @$html, $cgi->h2('Pick a reference genome and display order:') );

        #  The following code builds an HTML form that allows the user to
        #  select the reference genome and order the remaining genomes as
        #  desired. -- GJO

        #  Names displayed and used as selection parameter values
        my $genomeNames = [ map { &compute_genome_label($fig,$_) } @full_text ];
        #  Information for the <FORM> tag.  A default Name will be added.
        my $formData = { Method  => 'post',
                         Action  => 'comp_genomes.cgi',
                         EncType => 'multipart/form-data'
                       };
        #  Hidden <INPUT Type=hidden> data
        my $hiddenData = { user => $user,
                           comp_orgs => \@full_text
                         };
        $hiddenData->{ sz } = $sz if $sz;
        #  These are the parameter names of the selected reference and
        #  subsequently selected genomes
        my $paramNames = [ 'reference_genome', map { "comp_$_" } ( 1 .. ($nGenome-1) ) ];
        #  Column headings for reference and subsequently selected genomes
        my $colHeads = [ 'ref', 1 .. ($nGenome-1) ];
        #  Set the label of the submit button.
        my $submitData = { Value => 'Compare Genomes' };

        push @$html, ButtonArray::buttonArrayScript(),   # The JavaScript
                     ButtonArray::buttonArrayForm( $formData, $hiddenData, $submitData,
                                                    $paramNames, $colHeads, $genomeNames, \@full_text );
    }
    else
    {
        my %used_id = ( $ref->[0] => 1 );
        my @other_genomes;
        for ( my $i = 1; $i < $nGenome; $i++ )
        {
            my $genome;
            if ( $genome = $cgi->param( "comp_$i" ) )
            {
                my $gen = &type_of_genome( $genome );
                push @other_genomes, $gen;
                $used_id{ $gen->[0] } = 1;
            }
        }

        push @other_genomes, grep { ! $used_id{ $_->[0] } } map { &type_of_genome($_) } @full_text;

        my $ok = &cache_data($ref,\@other_genomes);

        if ($ok == 0)
        {
            ### At this point, the data is cached, and we need to build the parameters
            ### to invoke display_related_genomes
            my ( $genome_mouseovers, $desc_table ) = &genome_mouseovers( $fig, $ref, @other_genomes );
            my $contig_entries = &contig_entries($fig,$cgi,$peg,$sz,$ref,@other_genomes);
            push @$html, "<BR /><P>\n",
                         $desc_table,
                         "<P><P>\n",
                         &display_related_genomes::display_related_genomes( $contig_entries, $genome_mouseovers );
        }
        else
        {
            push(@$html,"<h2>Computing Required Data: Try Again in $ok Minutes</h2>");
        }
    }
}

&HTML::show_page($cgi,$html);
exit;


sub contig_entries {
    my($fig,$cgi,$peg,$sz,$ref,@others) = @_;

#   my $ref_cache_dir = "$ref->[0]/GenomeComparisonCache";
    my $ref_cache_dir = &GenomeComparisonCache($ref->[1]);

    my %in_window;
    if ($peg && $sz)
    {
        my($contig,$beg,$end) = &FIG::boundaries_of(scalar $fig->feature_location($peg));
        my $min = &FIG::min($beg,$end);
        my $max = &FIG::max($beg,$end);
        if ($contig)
        {
            my($features,undef,undef) = $fig->genes_in_region(&FIG::genome_of($peg),$contig,$min-($sz/2),$max+($sz/2));
            foreach my $fid (@$features)
            {
                if ($fid =~ /peg/)
                {
                    $in_window{$fid} = 1;
                }
            }
        }
    }
    my(@against,@pegs);
    foreach my $other (map { $_->[1] } @others)
    {
        open(OTHER,"<$ref_cache_dir/$other")
            || die "could not open $ref_cache_dir/$other";
        while (defined($_ = <OTHER>))
        {
            chomp;
            my($peg1I,$peg1,$type,$contig2I,$peg2I,$peg2,$iden,$mousetext) = split(/\t/,$_);
            if ((! $peg) || $in_window{$peg1})
            {
                push(@{$against[$peg1I]},[$type,$contig2I,$peg2I,$iden,[$peg2,$mousetext,&link($cgi,$peg2)]]);
            }
        }
        close(OTHER);
    }
    open(REF,"<$ref_cache_dir/reference_genome")
        || confess "could not open $ref_cache_dir/reference_genome";

    while (defined($_ = <REF>))
    {
        chomp;
        my($peg1I,$peg1,$contig1I,$contig1,$beg1,$end1,$func1) = split(/\t/,$_);
        next if ($peg && (! $in_window{$peg1}));
        my $mousetext = join("<br>",("location: $contig1 $beg1 $end1",
                                     "function: " . &html_escape($func1)
                            ));

        $pegs[$peg1I] = [$contig1I,$peg1I,int((abs($end1-$beg1)+1)/3),[$peg1,$mousetext,&link($cgi,$peg1)],$against[$peg1I]];
    }
    close(REF);
    
    my $contig_entries = [];
    while (@pegs > 0)
    {
        my($first);
        if (($first = shift @pegs) && defined($first))
        {
            my $contig_entry = [$first];
            my $curr         = $first->[0];
            while ((@pegs > 0) && (! (defined($pegs[0]) && ($pegs[0]->[0] != $curr))))
            {
                my $next = shift @pegs;
                if (defined($next))
                {
                    push(@$contig_entry,$next);
                }
            }
            push(@$contig_entries,$contig_entry);
        }
    }
    return $contig_entries;
}

sub link {
    my($cgi,$peg) = @_;

    if ($cgi->param('rast') && defined $FIG_Config::rast_cgi_url)
    {
        return "$FIG_Config::rast_cgi_url/index.cgi?action=ShowAnnotation&prot=$peg";
    }
    else
    {
        return &HTML::fid_link($cgi,$peg,0,1);
    }
}

sub genome_mouseovers {
    my ( $fig, $ref, @genomes ) = @_;

    my $mouseovers = [];
    my $descripts = "\n<TABLE>\n"
                  . "  <TR>\n"
                  . "    <TH VAlign=bottom>Genome<BR />ID</TH>\n"
                  . "    <TH VAlign=bottom>Organism</TH>\n"
                  . "    <TH VAlign=bottom>Contigs</TH>\n"
                  . "    <TH>Protein-<BR />encoding<BR />genes</TH>\n"
                  . "  </TR>\n";
    foreach my $genome ( map { $_->[1] } ( $ref, @genomes ) )
    {
        my $gs = $fig->genus_species($genome);
        my $gc = $fig->number_of_contigs($genome);
        my $num_pegs = $fig->genome_pegs($genome);
        push ( @$mouseovers, [ $genome,
                               join("<br>",($gs,
                                            "contigs: $gc",
                                            "protein-encoding genes: $num_pegs")),
                               "genome_statistics.cgi?genome=$genome",
                              ]);
        $descripts .= "  <TR>"
                   .  "    <TD Align=right>$genome</TD>\n"
                   .  "    <TD>$gs</TD>\n"
                   .  "    <TD Align=center>$gc</TD>\n"
                   .  "    <TD Align=center>$num_pegs</TD>\n"
                   .  "  </TR>\n";
    }
    $descripts .= "</TABLE>\n\n";

    return wantarray ? ( $mouseovers, $descripts ) : $mouseovers;
}

sub type_of_genome {
    my($text) = @_;

    if (($text =~ /^(\/.*\/(\d+\.\d+))$/) && (-d $text))
    {
        return [$1,$2,"rast"];
    }
    #  Original regexp was giving false matches, which made the directory
    #  search fail on genomes with a dotted decimal-containing strain.
    elsif (( $text =~ /^(\d+\.\d+)$/) || ( $text =~ /\((\d+\.\d+)\)\s+\[/))
    {
        return [ "$FIG_Config::organisms/$1", $1, "seed" ];
    }
    else
    {
        return undef;
    }
}

sub cache_data {
    my($ref,$other_genomes) = @_;
    my($other);

#   my $cache_dir = "$ref->[0]/GenomeComparisonCache";
    my $cache_dir = &GenomeComparisonCache($ref->[1]);

    &FIG::verify_dir($cache_dir);

    my $cost = int((-s "$ref->[0]/contigs") / 1000000) + 1;
    $cost    = $cost * $cost;    # square of megs of data

    my $bad = 0;
    my $ref_data  = "$cache_dir/reference_genome";
    my $ref_fasta = "$ref->[0]/Features/peg/fasta";
    if    ( ! -s $ref_data ) { $bad += $cost }
    #  Check for stale data (this is really not the correct check, but it helps)
    elsif ( -f $ref_fasta && ( -M $ref_data > -M $ref_fasta ) )
    {
        unlink $ref_data;
        $bad += $cost;
    }

    foreach $other (@$other_genomes)
    {
        my $other_data  = "$cache_dir/$other->[1]";
        my $other_fasta = "$other->[0]/Features/peg/fasta";
        if ( ! -s $other_data )
        {
            $bad += $cost
        }
        #  Check for stale data (this is really not the correct check, but it helps)
        elsif ( -f $other_fasta && ( -M $other_data > -M $other_fasta )
             || -f $ref_fasta   && ( -M $other_data > -M $ref_fasta   )
              )
        {
            unlink $other_data;
            $bad += $cost;
        }
    }
    if ($bad == 0)
    {
        return 0;
    }

    my $to_cache_parms = join(" ",(map { join("::",@$_) } ($ref,@$other_genomes)));
    system "$FIG_Config::bin/cache_comparison_data $to_cache_parms < /dev/null > /dev/null 2> /dev/null &";
    return $bad;
}

#-------------------------------------------------------------------------------
#  Escape special characters in text for use as html:
#
#     $html = html_escape( $text )
#
#-------------------------------------------------------------------------------
sub html_escape { local $_ = $_[0]; s/\&/&amp;/g; s/>/&gt;/g; s/</&lt;/g; $_ }


sub update_functions {
    my($fig,$full_text) = @_;

    my @genomes = map { $_->[1] } grep { $_->[2] eq "seed" } map { &type_of_genome($_) } @$full_text;
    foreach my $genome (@genomes)
    {
	my($fixes,$genomes) = &fix_reference_genome($fig,$genome);
	if ($fixes)
	{
	    foreach my $genome1 (@$genomes)
	    {
		if (-d "$FIG_Config::organisms/$genome1")
		{
		    &fix_to_file($fig,$genome,$genome1,$fixes);
		}
	    }
	}
    }
}

sub fix_reference_genome {
    my($fig,$genome) = @_;

    my $fixes = {};
#   my $dir = "$FIG_Config::organisms/$genome/GenomeComparisonCache";
    my $dir = &GenomeComparisonCache($genome);

    if ((-d $dir) && (-s "$dir/reference_genome") && open(REF,"<$dir/reference_genome"))
    {
	my %funcs;
	if (open(FUNCS,"<$FIG_Config::organisms/$genome/assigned_functions"))
	{
	    while (defined($_ = <FUNCS>))
	    {
		chomp;
		my($peg,$func) = split(/\t/,$_);
		$funcs{$peg} = $func ? $func : '';
	    }
	    close(FUNCS);
	    my @ref = map { chomp; [split(/\t/,$_)] } <REF>;
	    close(REF);
	    my $changed = 0;
	    foreach my $ref (@ref)
	    {
		if ($ref->[6] ne $funcs{$ref->[1]})
		{
		    $changed++;
		    $fixes->{$ref->[1]} = $ref->[6] = $funcs{$ref->[1]};
		}
	    }

	    if ($changed)
	    {
		open(REF,">$dir/reference_genome.fixed") || confess "could not open $dir/reference_genome.fixed";
		foreach my $ref (@ref)
		{
		    print REF join("\t",@$ref),"\n";
		}
		close(REF);
		if (-s "$dir/reference_genome.old") { unlink "$dir/reference_genome.old" }
		rename("$dir/reference_genome","$dir/reference_genome.old");
		rename("$dir/reference_genome.fixed","$dir/reference_genome");

		if (opendir(DIR,$dir))
		{
		    my @to_fix = grep { -d "$FIG_Config::organisms/$_" }
		                 grep { $_ =~ /^\d+\.\d+/ }
		                 readdir(DIR);
		    closedir(DIR);
		    return ($fixes,\@to_fix);
		}
	    }
	}
    }
    return undef;
}

sub fix_to_file {
    my($fig,$genome1,$genome2,$fixes) = @_;

#   my $file = "$FIG_Config::organisms/$genome2/GenomeComparisonCache/$genome1";
    my $cache_dir = &GenomeComparisonCache($genome2);
    my $file = "$cache_dir/$genome1";

    if (open(TOFIX,"<$file") && open(FIXED,">$file.fixed"))
    {
	while (defined($_ = <TOFIX>))
	{
	    chomp;
	    my @flds = split(/\t/,$_);
	    my @pieces = split(/\<br\>/,$flds[7]);
	    my $func;
	    if ($func = $fixes->{$flds[5]})
	    {
		$pieces[$#pieces] = "function: $func";
	    }
	    if (($pieces[3] =~ /^coverage\[ref\]: (\S+)/) && ($1 > 0.8) &&
		($pieces[4] =~ /^coverage\[other\]: (\S+)/) && ($1 > 0.8))
	    {
		splice(@pieces,3,2);
	    }
	    $flds[7] = join("<br>",@pieces);
	    print FIXED join("\t",@flds),"\n";
	}
	close(FIXED);
	close(TOFIX);
	if (-s "$file.old") { unlink("$file.old") }
	rename($file,"$file.old");
	rename("$file.fixed",$file);
    }
}

sub compute_genome_label
{
    my($fig, $org) = @_;
    my $label;

    if ($org =~ /\/\d+\.\d+$/) { return $org }

    my $gs = $fig->genus_species($org);
    if ($fig->genome_domain($org) ne "Environmental Sample")
    {
	my $gc=$fig->number_of_contigs($org);
	$label = "$gs ($org) [$gc contigs]";
    }
    else 
    {
	$label = "$gs ($org)";
    }
    return $label;
}

sub GenomeComparisonCache {
    my($genome) = @_;

    my $compDir       = $FIG_Config::GenomeComparisonCache ? $FIG_Config::GenomeComparisonCache : 
	                                                     "$FIG_Config::temp/GenomeComparisonCache";
    my $ref_cache_dir = "$compDir/$genome";
    return $ref_cache_dir;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3