[Bio] / FigWebServices / diffsF.cgi Repository:
ViewVC logotype

View of /FigWebServices/diffsF.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (download) (annotate)
Wed Oct 18 07:24:04 2006 UTC (13 years, 7 months ago) by overbeek
Branch: MAIN
Changes since 1.3: +90 -23 lines
First attempt at providing alignments. -- /gdp

# -*- perl -*-
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

use GenoGraphics;

use FIG;
my $fig = new FIG;

use FigFam;
my $default_dir = "$FIG_Config::data/FigfamsData";

use HTML;

use CGI;
my $cgi = new CGI;

if (0)
{
    my $VAR1;
    eval(join("",`cat /tmp/diffsF_parms`));
    $cgi = $VAR1;
#   print STDERR &Dumper($cgi);
}

if (0)
{
    print $cgi->header;
    my @params = $cgi->param;
    print "<pre>\n";
    foreach $_ (@params)
    {
	print "$_\t:",join(",",$cgi->param($_)),":\n";
    }

    if (0)
    {
	if (open(TMP,">/tmp/diffsF_parms"))
	{
	    print TMP &Dumper($cgi);
	    close(TMP);
	}
    }
    exit;
}

use constant FID    =>  0;
use constant LOCUS  =>  1;
use constant CONTIG =>  2;
use constant START  =>  3;
use constant STOP   =>  4;
use constant LEN    =>  5;
use constant STRAND =>  6;
use constant TYPE   =>  7;
use constant TAXID  =>  8;

my $html = [];
unshift @$html, "<TITLE>Feature Differences</TITLE>\n";
$org1   = $cgi->param('org1');
$org2   = $cgi->param('org2');
$gene   = $cgi->param('gene');
$align1 = $cgi->param('align1');
$align2 = $cgi->param('align2');

if (!((-d $org1) || (-d $org2)))
{
    push(@$html,$cgi->h1("bad parms: gene=$gene align1=$align1 align2=$align2 org1=$org1 org2=$org2"));
}
elsif ($gene = $cgi->param('gene'))
{
    &show_region($fig,$cgi,$html,$org1,$gene);
}
elsif (($gene1 = $cgi->param('align1')) && ($gene2 = $cgi->param('align2')))
{
    &show_alignment($fig, $cgi, $html, $gene1, $gene2, $org1, $org2);
}
else
{
    &list_diffs($fig, $cgi, $html, $org1, $org2);
}
&HTML::show_page($cgi,$html);

sub show_region {
    my($fig,$cgi,$html,$org,$gene) = @_;
    my @feat  = &load_tbl2($org,$gene);
    for ($i=0; ($i < @feat) && ($feat[$i]->[FID] ne $gene); $i++) {}
    if ($i < @feat)
    {
	$first = ($i > 4) ? $i-5 : 0;
	$last  = ($i < (@feat - 5)) ? $i+5 : @feat - 1;
	my $gg = [];
	my $genes = [];
	$min = 10000000;
	$max = 0;
	while ($first <= $last)
	{
	    $f = $feat[$first];
	    $beg = $f->[START];
	    $end = $f->[STOP];
	    $min = &FIG::min($min,$beg);
	    $min = &FIG::min($min,$end);
	    $max = &FIG::max($max,$beg);
	    $max = &FIG::max($max,$end);
	    $fid = $f->[FID];
#	    $fid =~ /\.([a-z]+\.\d+)$/;
#	    $info = $1;
	    push(@$genes,[ &FIG::min($beg,$end),
			   &FIG::max($beg,$end),
			   ($beg < $end) ? "rightArrow" : "leftArrow",
			   ($fid eq $gene) ? "red" : "blue",
			   "",
			   "",
			   "$fid: $beg to $end",
			   ""
			   ] );
	    $first++;
	}
	my $map = ['Region',0,$max+1-$min,&decr_coords($genes,$min)];
	push(@$gg,$map);
	push( @$html, @{ &GenoGraphics::render( $gg, 1000, 4, 1 ) } );
    }
}
	    
sub decr_coords {
    my($genes,$min) = @_;
    my($gene);

    foreach $gene (@$genes)
    {
	$gene->[0] -= $min;
	$gene->[1] -= $min;
    }
    return $genes;
}

sub list_diffs {
    my ($fig, $cgi, $html, $org1, $org2) = @_;
    
    $org1T = &load_tbl1($org1);
    $org2T = &load_tbl1($org2);
    
    my @both;
    my @just1;
    foreach $key (sort { by_locus($org1T->{$a}, $org1T->{$b}) } keys %$org1T)
    {
	$type1  = $org1T->{$key}->[TYPE];
	$locus1 = $org1T->{$key}->[LOCUS];

	if ($org2T->{$key})
	{
	    $type2  = $org2T->{$key}->[TYPE];
	    $locus2 = $org2T->{$key}->[LOCUS];
	    if (($type1 ne $type2) || ($locus1 ne $locus2))
	    {
		$fid1 = $org1T->{$key}->[FID];
		$fid2 = $org2T->{$key}->[FID];
		$link1 = &link($org1,$fid1);
		$link2 = &link($org2,$fid2);
		
		if (($len1 = $org1T->{$key}->[LEN]) < ($len2 = $org2T->{$key}->[LEN])) {
		    $changed = qq(lengthened);
		}
		else {
		    $changed = qq(shortened);
		}
		
		push @both, [ $changed, $link1, $len1, $link2, $len2, ($len2 - $len1) ];
	    }
	}
	else
	{
	    $fid1 = $org1T->{$key}->[FID];
	    $link1 = &link($org1,$fid1);
	    push @just1, [ $link1, $org1T->{$key}->[LEN] ];
	}
    }
    
    my @just2;
    foreach $key (sort { by_locus($org2T->{$a},$org2T->{$b}) } keys %$org2T)
    {
	if (! $org1T->{$key})
	{
	    $type2  = $org2T->{$key}->[TYPE];
	    $locus2 = $org2T->{$key}->[LOCUS];
	    $fid2 = $org2T->{$key}->[FID];
	    $link2 = &link($org2,$fid2);
	    push @just2, [ $link2, $org2T->{$key}->[LEN] ];
	}
    }
    
    push @$html, map {
	$align = qq(<A HREF="diffsF.cgi?align1=$_->[1]&align2=$_->[3]&org1=$org1&org2=$org2">align</A>);
	$cgi->h2("$align $_->[0]: $_->[1] ($_->[2] bp) => $_->[3] ($_->[4] bp), diff=$_->[5]")
	}
    sort { 
	abs($b->[5]) <=> abs($a->[5])
	} @both;
    
    
    push @$html, map {
	$cgi->h2("just2: $_->[0] ($_->[1] bp)")
	}
    sort { 
	$b->[1] <=> $a->[1]
	} @just2;
    
    
    push @$html, map {
	$cgi->h2("just1: $_->[0] ($_->[1] bp)")
	}
    sort { 
	$b->[1] <=> $a->[1]
	} @just1;
}

sub show_alignment {
    my ($fig, $cgi, $html, $gene1, $gene2, $org1, $org2) = @_;
    
    my %neighbors = map { m/^(\d+\.\d+)/o ? ($1 => 1) : () } `cat $org2/neighbors`;
    
    my (@tmp, $tmp);
    if (@tmp = grep { m/^(\S+)/o && ($1 eq $gene2) } `cat $org2/found`) {
	if ($tmp[0] =~ m/^\S+\t(FIG\d{6})/o) {
	    $figfam = new FigFam($fig, $1, $default_dir);
	    my @members = grep { $neighbors{$fig->genome_of($_)} } $figfam->list_members();
	    
	    my $tmp_seqs = "$FIG_Config::temp/tmp.$$";
	    open(TMP, ">$tmp_seqs")
		|| ((push @$html, $cgi->h2("Could not write-open $tmp_seqs")) && return);
	    
	    foreach my $id (@members) {
		my $seq = $fig->get_translation($id);
		&FIG::display_id_and_seq($id, \$seq, \*TMP);
	    }
	    close(TMP)
		|| ((push @$html, $cgi->h2("Could not close $tmp_seqs")) && return);
	    
	    $tmp = $gene1;
	    $tmp =~ s{\|}{\\\|}o;
	    system("echo $tmp1 | pull_fasta_entries $org1/Features/peg/fasta >> $tmp_seqs")
		&& ((push @$html, $cgi->h2("Could not append seq of $gene1 to $tmp_seqs")) && return);
	    
	    $tmp = $gene2;
	    $tmp =~ s{\|}{\\\|}o;
	    system("echo $tmp1 | pull_fasta_entries $org2/Features/peg/fasta >> $tmp_seqs")
		&& ((push @$html, $cgi->h2("Could not append seq of $gene2 to $tmp_seqs")) && return);
	    
	    system("clustalw -infile=$tmp_seqs -align -outorder=aligned > /dev/null")
		&& ((push @$html, $cgi->h2("clustalw failed on file $tmp_seqs")) && return);
	    
	    push @$html, qq(<pre>\n);
	    push @$html, `cat $tmp_seqs.aln`;
	    push @$html, qq(</pre>\n);
	}
	else {
	    $tmp[0] =~ s/\t/ /go;
	    push @$html, $cgi->h2("Could not parse family from entry: $tmp[0]");
	}
    }
    else {
	push @$html, $cgi->h2("No family found for gene2=$gene2");
    }
}

sub load_tbl1
{
    my ($dir) = @_;
    my ($entry, $id, $locus, $contig, $beg, $end, $len, $strand, $taxid, $type);

    open(TBL, "cat $dir/Features/*/tbl |") || die "Could not open $dir";
    
    my $tbl = {};
    while (defined($entry = <TBL>))
    {
	chomp $entry;
	
	($id, $locus) = split /\t/, $entry;
	$id =~ m/^[^\|]+\|(\d+\.\d+)\.([^\.]+)/;
	($taxid, $type) = ($1, $2);
	
	if ((($contig, $beg, $end, $len, $strand) = &from_locus($locus)) 
	   && defined($contig) && $contig
	   && defined($beg)    && $beg
	   && defined($end)    && $end
	   && defined($len)    && $len
           && defined($strand) && $strand
           )
	{
	    $tbl->{"$contig\t$strand$end"} = [ $id, $locus, $contig, $beg, $end, $len, $strand, $type, $taxid ];
	}
	else
	{
	    warn "INVALID ENTRY:\t$entry\n";
	}
    }
    
    return $tbl;
}

sub load_tbl2
{
    my ($dir,$gene) = @_;
    
    my ($entry, $id, $locus, $contig, $beg, $end, $len, $strand, $taxid, $type, $keep);

    open(TBL, "cat $dir/Features/*/tbl |") || die "Could not open $dir";
    
    my $tbl = {};
    while (defined($entry = <TBL>))
    {
	chomp $entry;
	
	($id, $locus) = split /\t/, $entry;
	$id =~ m/^[^\|]+\|(\d+\.\d+)\.([^\.]+)/;
	($taxid, $type) = ($1, $2);
	
	if ((($contig, $beg, $end, $len, $strand) = &from_locus($locus)) 
	   && defined($contig) && $contig
	   && defined($beg)    && $beg
	   && defined($end)    && $end
	   && defined($len)    && $len
           && defined($strand) && $strand
           )
	{
	    push(@{$tbl->{$contig}}, [ $id, $locus, $contig, $beg, $end, $len, $strand, $type, $taxid ]);
	    if ($id eq $gene) { $keep = $contig }
	}
	else
	{
	    warn "INVALID ENTRY:\t$entry\n";
	}
    }
    $keep || die "no contig";
    return sort { by_locus($a,$b) } @{$tbl->{$keep}};
}

sub from_locus
{
    my ($locus) = @_;
    my ($contig, $beg, $end);
    
    if (  ($locus =~ m/^([^,]+)_(\d+)_\d+/)  && ($contig = $1) && ($beg = $2)
       && ($locus =~ m/[^,]+_\d+_(\d+)$/) && ($end = $1)
       )
    {
	return ($contig, $beg, $end, (1+abs($end-$beg)), (($beg < $end) ? '+' : '-'));
    }
    else
    {
	die "Invalid locus $locus";
    }
    
    return ();
}

sub by_locus
{
    my ($a, $b) = @_;

    my (undef, undef, $A_contig, $A_beg, $A_end, $A_len, $A_strand) = @$a;
    my (undef, undef, $B_contig, $B_beg, $B_end, $B_len, $B_strand) = @$b;

    return (  ($A_contig cmp $B_contig) 
	   || (&FIG::min($A_beg, $A_end) <=> &FIG::min($B_beg, $B_end))
	   || ($B_len <=> $A_len)
	   || ($A_strand cmp $B_strand)
	   );
}

sub link {
    my($org,$fid) = @_;

    return "<a href=diffsF.cgi?org1=$org&gene=$fid>$fid</a>";
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3