[Bio] / FigWebServices / diffsF.cgi Repository:
ViewVC logotype

Diff of /FigWebServices/diffsF.cgi

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Thu Oct 5 00:28:58 2006 UTC revision 1.7, Wed Oct 18 23:12:25 2006 UTC
# Line 1  Line 1 
1    # -*- perl -*-
2    #
3    # Copyright (c) 2003-2006 University of Chicago and Fellowship
4    # for Interpretations of Genomes. All Rights Reserved.
5    #
6    # This file is part of the SEED Toolkit.
7    #
8    # The SEED Toolkit is free software. You can redistribute
9    # it and/or modify it under the terms of the SEED Toolkit
10    # Public License.
11    #
12    # You should have received a copy of the SEED Toolkit Public License
13    # along with this program; if not write to the University of Chicago
14    # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15    # Genomes at veronika@thefig.info or download a copy from
16    # http://www.theseed.org/LICENSE.TXT.
17    #
18    
19  use GenoGraphics;  use GenoGraphics;
20    
21  use FIG;  use FIG;
22  my $fig = new FIG;  my $fig = new FIG;
23    
24    use FigFam;
25    my $default_dir = "$FIG_Config::data/FigfamsData";
26    
27  use HTML;  use HTML;
28    
29  use CGI;  use CGI;
# Line 52  Line 73 
73  $org1 = $cgi->param('org1');  $org1 = $cgi->param('org1');
74  $org2 = $cgi->param('org2');  $org2 = $cgi->param('org2');
75  $gene = $cgi->param('gene');  $gene = $cgi->param('gene');
76    $align1 = $cgi->param('align1');
77    $align2 = $cgi->param('align2');
78    
79  if (!((-d $org1) || (-d $org2)))  if (!((-d $org1) || (-d $org2)))
80  {  {
81      push(@$html,$cgi->h1("bad parms: gene=$gene org1=$org1 org2=$org2"));      push(@$html,$cgi->h1("bad parms: gene=$gene align1=$align1 align2=$align2 org1=$org1 org2=$org2"));
82  }  }
83  elsif ($gene = $cgi->param('gene'))  elsif ($gene = $cgi->param('gene'))
84  {  {
85      &show_region($fig,$cgi,$html,$org1,$gene);      &show_region($fig,$cgi,$html,$org1,$gene);
86  }  }
87    elsif (($gene1 = $cgi->param('align1')) && ($gene2 = $cgi->param('align2')))
88    {
89        &show_alignment($fig, $cgi, $html, $gene1, $gene2, $org1, $org2);
90    }
91  else  else
92  {  {
93      &list_diffs($fig,$cgi,$html,$org1,$org2);      &list_diffs($fig,$cgi,$html,$org1,$org2);
# Line 89  Line 116 
116              $max = &FIG::max($max,$beg);              $max = &FIG::max($max,$beg);
117              $max = &FIG::max($max,$end);              $max = &FIG::max($max,$end);
118              $fid = $f->[FID];              $fid = $f->[FID];
119              $fid =~ /\.([a-z]+\.\d+)$/;  #           $fid =~ /\.([a-z]+\.\d+)$/;
120              $info = $1;  #           $info = $1;
121              push(@$genes,[ &FIG::min($beg,$end),              push(@$genes,[ &FIG::min($beg,$end),
122                             &FIG::max($beg,$end),                             &FIG::max($beg,$end),
123                             ($beg < $end) ? "rightArrow" : "leftArrow",                             ($beg < $end) ? "rightArrow" : "leftArrow",
# Line 126  Line 153 
153      $org1T = &load_tbl1($org1);      $org1T = &load_tbl1($org1);
154      $org2T = &load_tbl1($org2);      $org2T = &load_tbl1($org2);
155    
156        my @both;
157        my @just1;
158      foreach $key (sort { by_locus($org1T->{$a},$org1T->{$b}) } keys %$org1T)      foreach $key (sort { by_locus($org1T->{$a},$org1T->{$b}) } keys %$org1T)
159      {      {
160          $type1  = $org1T->{$key}->[TYPE];          $type1  = $org1T->{$key}->[TYPE];
# Line 141  Line 170 
170                  $fid2 = $org2T->{$key}->[FID];                  $fid2 = $org2T->{$key}->[FID];
171                  $link1 = &link($org1,$fid1);                  $link1 = &link($org1,$fid1);
172                  $link2 = &link($org2,$fid2);                  $link2 = &link($org2,$fid2);
173                  push(@$html,$cgi->h2("changed: $link1 => $link2"));  
174                    if (($len1 = $org1T->{$key}->[LEN]) < ($len2 = $org2T->{$key}->[LEN])) {
175                        $changed = qq(lengthened);
176                    }
177                    else {
178                        $changed = qq(shortened);
179                    }
180    
181                    push @both, [ $changed, $fid1, $link1, $len1, $fid2, $link2, $len2, ($len2 - $len1) ];
182              }              }
183          }          }
184          else          else
185          {          {
186              $fid1 = $org1T->{$key}->[FID];              $fid1 = $org1T->{$key}->[FID];
187              $link1 = &link($org1,$fid1);              $link1 = &link($org1,$fid1);
188              push(@$html,$cgi->h2("just1: $link1"));              push @just1, [ $fid1, $link1, $org1T->{$key}->[LEN] ];
189          }          }
190      }      }
191    
192        my @just2;
193      foreach $key (sort { by_locus($org2T->{$a},$org2T->{$b}) } keys %$org2T)      foreach $key (sort { by_locus($org2T->{$a},$org2T->{$b}) } keys %$org2T)
194      {      {
195          if (! $org1T->{$key})          if (! $org1T->{$key})
# Line 160  Line 198 
198              $locus2 = $org2T->{$key}->[LOCUS];              $locus2 = $org2T->{$key}->[LOCUS];
199              $fid2 = $org2T->{$key}->[FID];              $fid2 = $org2T->{$key}->[FID];
200              $link2 = &link($org2,$fid2);              $link2 = &link($org2,$fid2);
201              push(@$html,$cgi->h2("just2: $link2"));              push @just2, [ $fid2, $link2, $org2T->{$key}->[LEN] ];
202          }          }
203      }      }
204    
205        push @$html, map {
206            $align = qq(<A HREF="diffsF.cgi?align1=$_->[1]&align2=$_->[4]&org1=$org1&org2=$org2">align</A>);
207            $cgi->h2("$align $_->[0]: $_->[2] ($_->[3] bp) => $_->[5] ($_->[6] bp), diff=$_->[7]")
208            }
209        sort {
210            abs($b->[7]) <=> abs($a->[7])
211            } @both;
212    
213    
214        push @$html, map {
215            $cgi->h2("just2: $_->[1] ($_->[2] bp)")
216            }
217        sort {
218            $b->[2] <=> $a->[2]
219            } @just2;
220    
221    
222        push @$html, map {
223            $cgi->h2("just1: $_->[1] ($_->[2] bp)")
224            }
225        sort {
226            $b->[2] <=> $a->[2]
227            } @just1;
228    }
229    
230    sub show_alignment {
231        my ($fig, $cgi, $html, $gene1, $gene2, $org1, $org2) = @_;
232    
233        my @neighbors;
234        my %neighbors;
235        my $neighbors;
236        if (-s "$org2/neighbors") {
237            @neighbors = sort { $a <=> $b} map { m/^(\d+\.\d+)/o ? ($1) : () } `cat $org2/neighbors`;
238            %neighbors = map  { $_  => 1 } @neighbors;
239            $neighbors = join(" ", @neighbors);
240            @neighbors ? push(@$html, $cgi->h2("Close neighbors are: " . join(" ", @neighbors)))
241                       : push(@$html, $cgi->h2("No close nieghbors found")) && return
242                       ;
243        }
244        else {
245            push(@$html, $cgi->h2("Close neighbors file $org2/neighbors does not exist"))
246                && return;
247        }
248    
249    
250        my $md5exec   = (($_ = `which md5sum`) !~ m/^no/) ? "md5sum"
251                         : (($_ = `which md5`) !~ m/^no/) ? "md5"
252                         : push(@$html, $cgi->h2("Could not find an MD5 checksum tool")) && return
253                         ;
254        my $chksum = `echo $neighbors | $md5exec`;   chomp $chksum;
255        my $tmp_close = "$FIG_Config::temp/tmp_close_$chksum";
256    
257        my ($size, $mod);
258        if ((not ($size = (-s $tmp_close))) || ($mod = (-M $tmp_close > 0.1))) {
259            push @$html, $cgi->h2("Rebuilding DB $tmp_close, size=$size, mod=$mod");
260    
261            system("rm -f $tmp_close");
262            foreach my $org (@neighbors) {
263                push @$html, $cgi->h2("Appending fasta for $org to $tmp_close");
264                system("cat $FIG_Config::organisms/$org/Features/peg/fasta >> $tmp_close")
265                    && ((push @$html
266                         , $cgi->h2("Could not append $FIG_Config::organisms/$org/Features/peg/fasta to $tmp_close"))
267                        && return);
268            }
269            system("$FIG_Config::ext_bin/formatdb -i $tmp_close")
270                && ((push @$html, $cgi->h2("Could not run $FIG_Config::ext_bin/formatdb on $tmp_close, err=$?"))
271                    && return);
272        }
273    
274        my $tmp_fid  = "$FIG_Config::temp/tmp_fid_$$";
275    
276        open(TMP_FID, ">$tmp_fid")
277            || ((push @$html, $cgi->h2("Could not write-open $tmp_fid")) && &cleanup($tmp_fid, $tmp_seqs) && return);
278        print TMP_FID "$gene1\n";
279        $seq_gene1 =  `$FIG_Config::bin/pull_fasta_entries $org1/Features/peg/fasta < $tmp_fid`;
280        $seq_gene1 =~ s/^>fig/>old/;
281    
282        open(TMP_FID, ">$tmp_fid")
283            || ((push @$html, $cgi->h2("Could not write-open $tmp_fid")) && &cleanup($tmp_fid, $tmp_seqs) && return);
284        print TMP_FID "$gene2\n";
285        $seq_gene2 =  `$FIG_Config::bin/pull_fasta_entries $org2/Features/peg/fasta < $tmp_fid`;
286        $seq_gene2 =~ s/^>fig/>new/;
287    
288        my ($figfam, $fam_id, @members);
289        my @tmp = grep { m/^(\S+)/o && ($1 eq $gene2) } `cat $org2/found`;
290        if (not @tmp) {
291            push @$html, $cgi->h2("No family found for gene2=$gene2")
292        }
293        else {
294            if ($tmp[0] =~ m/^\S+\t(FIG\d{6})/o) {
295                $figfam = new FigFam($fig, $1, $default_dir);
296                $fam_id = $figfam->family_id();
297                push @$html, $cgi->h2("$gene2 is in Family $fam_id");
298                @members = $figfam->list_members();
299            }
300            else {
301                $tmp[0] =~ s/\t/ /go;
302                push(@$html, $cgi->h2("Could not parse family from entry: $tmp[0]"))
303                    && &cleanup($tmp_fid, $tmp_seqs) && return;
304            }
305        }
306    
307        if (@members) {
308            push @$html, $cgi->h2(qq($fam_id has ) . (scalar @members) . qq( members));
309            if (@members > 15) {
310                @members = grep { $neighbors{$fig->genome_of($_)} } @members;
311                my $s    = (@members == 1) ? qq() : qq(s);
312                my $were = (@members == 1) ? qq(was from a) : qq(were from);
313                push @$html, $cgi->h2((scalar @members) . qq( member$s of $fam_id $were close neighbor$s));
314            }
315        }
316        else {
317            push @$html, $cgi->h2("No family members found --- searching with BLASTP");
318            if (@members = &find_top_blast_hits($fig, $cgi, $html, $seq_gene2, $tmp_close)) {
319                if (@members > 15) {
320                    @members = grep { $neighbors{$fig->genome_of($_)} } @members;
321                    my $s    = (@members == 1) ? qq() : qq(s);
322                    my $were = (@members == 1) ? qq(was from a) : qq(were from);
323                    push @$html, $cgi->h2((scalar @members) . qq( BLASTP hit$s $were close neighbor$s));
324                }
325            }
326            else {
327                push( @$html, $cgi->h2("No BLAST sims for $gene2 against neighbors") )
328                    && &cleanup($tmp_fid, $tmp_seqs) && return;
329            }
330        }
331    
332        my $tmp_seqs = "$FIG_Config::temp/tmp_seqs_$$";
333        open(TMP_SEQS, ">$tmp_seqs")
334            || ((push @$html, $cgi->h2("Could not write-open $tmp_seqs")) && &cleanup($tmp_fid, $tmp_seqs) && return);
335        foreach my $id (@members) {
336            my $seq = $fig->get_translation($id);
337            if ($seq) {
338    #           push @$html, $cgi->h2("Got seq for $id");
339                &FIG::display_id_and_seq($id, \$seq, \*TMP_SEQS);
340            }
341            else {
342                push @$html, $cgi->h2("No seq for $id");
343            }
344        }
345    
346        print TMP_SEQS $seq_gene1;
347        print TMP_SEQS $seq_gene2;
348    
349        close(TMP_SEQS)
350            || ((push @$html, $cgi->h2("Could not close $tmp_seqs"))
351                && &cleanup($tmp_fid, $tmp_seqs) && return);
352    
353        system("$FIG_Config::ext_bin/clustalw -infile=$tmp_seqs -align -outorder=aligned > /dev/null")
354            && ((push @$html, $cgi->h2("clustalw failed on file $tmp_seqs"))
355                && &cleanup($tmp_fid, $tmp_seqs) && return);
356    
357        push @$html, qq(<DIV><FONT FACE="Courier">\n);
358        @tmp = `cat $tmp_seqs.aln`;
359        @tmp = map { &fid_link($_) } @tmp;
360        push @$html, (@tmp ? (@tmp) : ("No lines read from $tmp_seqs.aln"));
361        push @$html, qq(</FONT></DIV>\n);
362    
363        &cleanup($tmp_fid, $tmp_seqs);
364    }
365    
366    sub cleanup {
367        my ($tmp_seqs, $tmp_fid) =@_;
368        return if $ENV{DEBUG};
369    
370        system("rm -f $tmp_fid $tmp_seqs*")
371            && ((push @$html, $cgi->h2("Could not remove temporary files"))
372                && return);
373    }
374    
375    sub find_top_blast_hits {
376        my ($fig, $cgi, $html, $query_seq, $tmp_close) = @_;
377    
378        my $tmp_query = "$FIG_Config::temp/tmp_query_$$";
379        open( TMP_QUERY, ">$tmp_query" )
380            || ((push @$html, $cgi->h2("Could not write-open $tmp_query")) && return);
381        print TMP_QUERY $query_seq;
382        close(TMP_QUERY)
383            || ((push @$html, $cgi->h2("Could not close $tmp_query")) && return);
384    
385        my @hits = `$FIG_Config::ext_bin/blastall -i $tmp_query -d $tmp_close -p blastp -m8 -FF -e1.0e-10 | cut -f2`;
386        chomp @hits;
387    
388        push @$html, $cgi->h2(qq(BLAST found ) . (scalar @hits) . qq( hits\n));
389        return @hits;
390  }  }
391    
392  sub load_tbl1  sub load_tbl1
# Line 233  Line 457 
457              warn "INVALID ENTRY:\t$entry\n";              warn "INVALID ENTRY:\t$entry\n";
458          }          }
459      }      }
460      $keep || die "no contig";      $keep || die "no contig for gene=$gene";
461      return sort { by_locus($a,$b) } @{$tbl->{$keep}};      return sort { by_locus($a,$b) } @{$tbl->{$keep}};
462  }  }
463    
# Line 275  Line 499 
499    
500      return "<a href=diffsF.cgi?org1=$org&gene=$fid>$fid</a>";      return "<a href=diffsF.cgi?org1=$org&gene=$fid>$fid</a>";
501  }  }
502    
503    sub fid_link {
504        my($line) = @_;
505    
506        chomp $line;
507        if ($line =~ m/^(fig\S+)\b(.*)$/o) {
508            my ($id, $rest) = ($1, $2);
509            $rest =~ s/\s/\&nbsp\;/go;
510            $line =  qq(<A HREF="http://anno-3.nmpdr.org/anno/FIG/protein.cgi?prot=$id">$id</A>$rest);
511        }
512        else {
513            $line =~ s/\s/\&nbsp\;/go;
514        }
515    
516        return $line . qq(<BR>\n);
517    }

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.7

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3