[Bio] / FigWebServices / check_role_in_ss.cgi Repository:
ViewVC logotype

View of /FigWebServices/check_role_in_ss.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.20 - (download) (annotate)
Tue Jan 12 19:38:35 2010 UTC (9 years, 10 months ago) by overbeek
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.19: +222 -1 lines
Marks stuff

#########################################################################
# -*- perl -*-
#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#

use FIG;
my $fig = new FIG;

use SSserver;
use LWP::UserAgent;
use gjoalignment;
use gjonewicklib;
use protdist_neighbor;
use gjophylip;

use HTML;
use strict;

use CGI;
my $cgi = new CGI;

if (0)
{
    my $VAR1;
    eval(join("",`cat $FIG_Config::temp/process_role`));
    $cgi = $VAR1;
    print STDERR &Dumper($cgi);
}

if (0)
{
    print $cgi->header;
    my @params = $cgi->param;
    print "<pre>\n";
    foreach $_ (@params)
    {
	print "$_\t:",join(",",$cgi->param($_)),":\n";
    }

    if (0)
    {
	if (open(TMP,">$FIG_Config::temp/process_role"))
	{
	    print TMP &Dumper($cgi);
	    close(TMP);
	}
    }
    exit;
}
my $html = [];

my $roleI   = $cgi->param('roleI');
my $request = $cgi->param('request');
my $project = $cgi->param('Project');
my $set     = $cgi->param('set');
my $dynamic_table_file = $cgi->param('dynamic_table_file');

if ($cgi->param('Delete Existing Project') && $project)
{
    my $projectQ = quotemeta $project;
    my $projD = "$FIG_Config::global/JensenAnalysis/$projectQ";
    system "rm -r $projD";
    &initial_page($fig,$cgi,$html);
}
elsif ( (! $request) && (! $project))
{
    &initial_page($fig,$cgi,$html);
}
elsif ( $request && ($request eq "show_plan"))
{
    &show_plan($fig,$cgi,$html);
}
elsif ( $project && (! $request))
{
    &picked_project($fig,$cgi,$html,$project);
}
elsif ( ($request eq "pick_roles") && (! $project))
{
    &pick_roles($fig,$cgi,$html);
}
elsif ( ($request eq "pick_project") && (! $project))
{
    &pick_project($fig,$cgi,$html);
}
elsif ( ($request eq "picked_project") && $project)
{
    &picked_project($fig,$cgi,$html,$project);
}
elsif ( ($request eq "pick_reps") && $project)
{
    &pick_reps($fig,$cgi,$html,$project);
}
elsif ( ($request eq "set_reps") && $project)
{
    &set_reps($fig,$cgi,$html,$project);
}
elsif ( $request && ($request eq "process_role") && $project && $roleI)
{
    &process_role($fig,$html,$cgi,$html,$project,$roleI);
}
elsif ( $request && ($request eq "rebuild_role") && defined($roleI))
{
    &rebuild_role($fig,$project,$roleI);
    &process_role($fig,$html,$cgi,$html,$project,$roleI);
}
elsif ( $request && ($request eq "process_set") && $project && $set)
{
    &process_set($fig,$html,$cgi,$project,$roleI,$set);
}
elsif ($request && $project && $roleI && $set &&
       ($request eq "mark_frameshifts"))
{
    &mark_frameshifts($fig,$cgi,$html,$project,$roleI,$set);
}
elsif ($request && $project && $roleI && $set &&
       ($request eq "mark_truncations"))
{
    &mark_truncations($fig,$cgi,$html,$project,$roleI,$set);
}
elsif ($request && ($request eq 'merge') && $project)
{
    &merge($fig,$cgi,$html,$project);
}
elsif ($request && ($request eq 'show_merge') && $project)
{
    &show_merge($fig,$cgi,$html,$project);
}
elsif ($request && ($request eq 'show_bootstrapped_tree') && $project)
{
    &show_bootstrapped_tree($fig,$cgi,$html,$project);
}
elsif ($request && ($request eq 'update_pdb') && $project)
{
    &update_pdb($fig,$cgi,$html,$project);
}
elsif ($request && ($request eq 'show_pdb') && $project)
{
    &show_pdb($fig,$cgi,$html,$project);
}
elsif ($request && ($request eq 'show_lit') && $project)
{
    &show_lit($fig,$cgi,$html,$project);
}
elsif ($request && ($request eq 'get_names') && $project)
{
    &get_names($fig,$cgi,$html,$project);
}
elsif ($request && ($request eq 'build_tree') && $project)
{
    &build_tree($fig,$cgi,$project,$html);
    push(@$html,$cgi->h2('built tree'));
}
elsif ($request && ($request eq 'build_ali') && $project)
{
    &build_ali($fig,$cgi,$project);
    push(@$html,$cgi->h2('built alignment from representatives'));
}
elsif ($request && ($request eq 'show_possible_collapses') && $project)
{
    &show_possible_collapses($fig,$cgi,$html,$project);
}
elsif ($request && ($request eq 'collapse_reps1') && $project)
{
    &collapse_reps1($fig,$cgi,$html,$project);
}
elsif ($request && ($request eq 'collapse_reps2') && $project)
{
    &collapse_reps2($fig,$cgi,$html,$project);
}
elsif ($request && ($request eq 'show_aliases') && $project)
{
    &show_aliases($fig,$cgi,$html,$project);
}
elsif ($request && ($request eq 'show_dynamic_table') && $project)
{
    if ( $dynamic_table_file )
    {
	# use tab-separated file to populate table
	&show_dynamic_table_from_file($fig,$cgi,$html,$project,$dynamic_table_file);
    }
    else
    {
	# use groups and other files to create table
	&show_dynamic_table($fig,$cgi,$html,$project);
    }
}
else
{
    &show_dir($fig,$project,$html);
}

&HTML::show_page($cgi,$html);
exit;

sub set_reps {
    my($fig,$cgi,$html,$project) = @_;

### This code actually alters Merged/reps.fasta to reflect the picks
###  Just push into $html a message saying that the picks were processed, and then 
###  invoke &show_dir($fig,$project,$html)
###

    push @$html, '<h1>show reps</h1>';

    my $dir = "$FIG_Config::global/JensenAnalysis/$project/Merged";

    my @groups = &groups($project);

    my %group_file;
    my %fid_to_group;

    foreach my $group ( @groups )
    {
	$group_file{$group} = [ &gjoalignment::read_fasta_file("$dir/Reps/$group") ];
	
	foreach my $rec ( @{ $group_file{$group} } )
	{
	    my($fid) = $rec->[0];
	    $fid_to_group{$fid} = $group;
	}
    }

    my @rep_file = &gjoalignment::read_fasta_file("$dir/reps.fasta");

    my %group_to_rep;
    
    foreach my $rec ( @rep_file )
    {
	my($rep)  = $rec->[0];
	my $group = $fid_to_group{$rep};
	$group_to_rep{$group} = $rep;
    }
    
    my @changes = ();
    foreach my $group ( @groups )
    {
	my $rep_new = $cgi->param($group);
	my $rep_old = $group_to_rep{$group};
	
	if ( $rep_new ne $rep_old )
	{
	    my $seq_file = $group_file{$group};
	    my $rep_new_index;

	    for (my $i = 0; $i < @$seq_file; $i++)
	    {
		if ( $seq_file->[$i][0] eq $rep_new )
		{
		    $rep_new_index = $i;
		}
	    }

	    my($rep_new_seq) = splice(@$seq_file, $rep_new_index, 1);
	    splice(@$seq_file, 0, 0, $rep_new_seq);
	    
	    my $compress = 0;
	    &gjoalignment::write_fasta_file($seq_file, $compress, "$dir/Reps/$group");

	    push @changes, [$group, $rep_old, $rep_new];

	    foreach my $rec ( @rep_file )
	    {
		if ( $rec->[0] eq $rep_old )
		{
		    $rec = $rep_new_seq;
		}
	    }
	}
    }

    if ( @changes )
    {
	my $compress = 0;
	&gjoalignment::write_fasta_file(\@rep_file, $compress, "$dir/reps.fasta");
	
	foreach my $change ( sort {$a->[0] cmp $b->[0]} @changes )
	{
	    my($group, $rep_old, $rep_new) = @$change;
	    push @$html, "<h4>Changed respresentative for $group from " . &link_to_peg($rep_old) . ' to ' . &link_to_peg($rep_new) . "</h4>\n";
	}
	&build_ali($fig,$cgi,$project);
	&build_tree($fig,$cgi,$project);
    }
    else
    {
	push @$html, "<h4>No representatives were changed</h4>\n";
    }

    &show_dir($fig,$project,$html);    
}

sub pick_reps {
    my($fig,$cgi,$html,$project) = @_;

### Display tables for eacg group in Merged/Reps.  The rep seq is in Merged/reps.fasta.
### This should build a form with one table per group.
### When the user hits "submit choices", this invokes check_roles_in_ss.cgi with
### request=set_reps.  That routine actually sets the reps (then, probably showing the project)
### 

    my $dir = "$FIG_Config::global/JensenAnalysis/$project/Merged";

    my %is_a_rep = map {$_ => 1} &representatives($project);

    my %peg_to_pdb;
    if ( -e "$dir/pdb.entries" )
    {
	foreach my $line ( `cat $dir/pdb.entries` )
	{
	    chomp $line;
	    my($peg, $gi) = split(/\t/, $line);
	    push @{ $peg_to_pdb{$peg} }, $gi;
	}
    }

    my %peg_to_pubmed;
    if ( -e "$dir/lit.entries" )
    {
	foreach my $line ( `cat $dir/lit.entries` )
	{
	    chomp $line;
	    my($peg, $pubmed) = split(/\t/, $line);
	    push @{ $peg_to_pubmed{$peg} }, $pubmed;
	}
    }

    push @$html, "<h2>Select representative PEGs</h2>";
    push @$html, $cgi->start_form(-method => 'POST', -action => "./check_role_in_ss.cgi");
    push @$html, $cgi->hidden( -name => "request", -value => 'set_reps', -override => 1);
    push @$html, $cgi->hidden( -name => "Project", -value => $project, -override => 1);
    
    foreach my $group ( &groups($project) )
    {
	my @pegs = map {$_->[0]} &gjoalignment::read_fasta_file("$dir/Reps/$group");

	# put representative PEG first in @pegs list
	my($rep_peg) = grep {$is_a_rep{$_}} @pegs;
	my %seen;
	@pegs = grep {not $seen{$_}++} ($rep_peg, @pegs);

	my $title = (@pegs == 1)? "Group: $group" : 'Group: ' . &show_comparison_link($fig,$cgi,\@pegs,$group); 

	my $col_hdrs = ['Rep', 'FIG id', 'Function', 'Organism', 'PDB', 'Literature'];
	my $tab = [];

	foreach my $peg ( @pegs )
	{
	    my $rep = $is_a_rep{$peg}? qq(<input type="radio" name="$group" value="$peg" checked="checked">) : qq(<input type="radio" name="$group" value="$peg">);
	    my $func = $fig->function_of($peg);

	    my $pdb = $peg_to_pdb{$peg}? join(',<br>', map {&link_to_ncbi_gi($_)} @{ $peg_to_pdb{$peg} }) : '';
	    my $pubmed = $peg_to_pubmed{$peg}? join(',<br>', map {&link_to_pubmed($_)} @{ $peg_to_pubmed{$peg} }) : '';

	    push @$tab, [$rep, 
			 &link_to_peg($peg), 
			 $func, 
			 $fig->genus_species(&FIG::genome_of($peg)),
			 $pdb,
			 $pubmed,
			 ];
	}

	push @$html, &HTML::make_table($col_hdrs, $tab, $title), "<p>";
    }

    push @$html, $cgi->submit('Submit Choices'),
    push @$html, $cgi->reset;
    push @$html, $cgi->end_form;
    
    return;
}

sub groups {
    my($project) = @_;

    my $dir = "$FIG_Config::global/JensenAnalysis/$project/Merged/Reps";
    my @groups = ();
    
    if ( -d $dir )
    {
	opendir(DIR, $dir) or die "could not open directory '$dir': $!";
	@groups = sort grep {/^group\d+$/} readdir(DIR);
	closedir(DIR);
    }

    return @groups;
}

sub group_fids {
    my($project) = @_;
    my(%group_fids, $line);

    my @groups = &groups($project);

    my $dir = "$FIG_Config::global/JensenAnalysis/$project/Merged/Reps";

    foreach my $group ( @groups )
    {
        open(GRP, "<$dir/$group") or die "could not open file '$dir/$group': $!";
        while ( defined($line = <GRP>) )
        {
            chomp $line;
            if ( $line =~ /^>(fig\|\d+\.\d+\.peg\.\d+)/ )
            {
                my $fid = $1;
                push @{ $group_fids{$group} }, $fid;
            }
        }
        close(GRP);
    }

    return \%group_fids;
}

sub representatives {
    my($project) = @_;

    my $dir = "$FIG_Config::global/JensenAnalysis/$project/Merged";
    my @reps = ();

    if ( -e "$dir/reps.fasta" )
    {
	@reps = map {$_->[0] => 1} &gjoalignment::read_fasta_file("$dir/reps.fasta");
    }
    
    return @reps
}

sub role_dir {
    my($project,$roleI) = @_;

    my $dir = "$FIG_Config::global/JensenAnalysis/$project";
    return "$dir/Roles/$roleI";
}

sub link_to_peg {
    my($peg) = @_;

#    my $window = "PEG.$$";
#    return "<a target=$window href=seedviewer.cgi?page=Annotation&feature=$peg>$peg</a>";
    return "<a href=seedviewer.cgi?page=Annotation&feature=$peg>$peg</a>";
}

sub link_to_ncbi_gi {
    my($gi) = @_;
    
    return qq(<a href="http://www.ncbi.nlm.nih.gov/protein/$gi">$gi</a>);
}

sub link_to_pubmed {
    my($pubmed) = @_;
    
    return qq(<a href="http://www.ncbi.nlm.nih.gov/sites/entrez?db=pubmed&cmd=search&term=$pubmed">$pubmed</a>);
}

sub role_link {
    my($tuple,$project,$fig) = @_;

    my($roleI,$role) = @$tuple;
    return "<a href=./check_role_in_ss.cgi?Project=$project&roleI=$roleI&request=process_role>$role</a>";
}

sub process_role {
    my($fig,$html,$cgi,$html,$project,$roleI) = @_;

    if (! defined($roleI))
    {
	push(@$html,$cgi->h3('badly formed parameters - missing role'));
    }
    else
    {
        my $role = &index_to_role($project,$roleI);
	my $dir = "$FIG_Config::global/JensenAnalysis/$project/Roles/$roleI/split_info";
	&FIG::verify_dir($dir);
	if (! -s "$dir/set.sizes")
	{
	    &rebuild_role($fig,$project,$roleI);
	}

	if (-s "$dir/set.sizes")
	{
	    my @sizes = map { $_=~ /^(\d+)\s+(\d+)/; [$1,$2] } `cat $dir/set.sizes`;
	    my $col_hdrs = ['subset number','size of set','One PEG','length','Organism','',''];
	    my $tab = [];
	    my @rep_pegs = ();
	    foreach my $entry (@sizes)
	    {
		my($set,$sz) = @$entry;
		my $peg = &one_peg($project,$roleI,$set);
		push(@rep_pegs,$peg);
		push(@$tab,[&show_set_link($project,$roleI,$set,$cgi,$fig),
			    &compare_set_link($fig,$cgi,$project,$roleI,$set),
			    &link_to_peg($peg),
			    length($fig->get_translation($peg)),
			    $fig->genus_species(&FIG::genome_of($peg)),
			    ($sz == 1) ? &mark_frameshifts_link($cgi,$project,$roleI,$set) : "",
			    ($sz == 1) ? &mark_truncations_link($cgi,$project,$roleI,$set) : "",
		           ]);
	    }
	    push(@$html,"<br><br>",&HTML::make_table($col_hdrs,$tab,"$role: Sets to Validate"));
	    push(@$html,"<br><br>",&show_comparison_link($fig,$cgi,\@rep_pegs,'compare representatives'));
	    push(@$html,"<br><br>",&rebuild_role_link($fig,$project,$roleI,$role));
	}
	else
	{
	    push(@$html,$cgi->h3("malformed directory: $dir,$roleI has no split_info/set.sizes"));
	}
    }
}

sub compare_set_link {
    my($fig,$cgi,$project,$roleI,$set) = @_;

    my @tuples  = &pegs_in_set($project,$roleI,$set);
    my @pegs;

    my @tmp = sort { $b->[1] <=> $a->[1] } @tuples;
    my $gap = int((@tmp +39)/ 40);
    my($i);
    for ($i=0; ($i < @tmp); $i += $gap)
    {
	push(@pegs,$tmp[$i]->[0]);
    }
    if ($pegs[-1] ne $tmp[-1]->[0]) { push(@pegs,$tmp[-1]->[0]) }
    return &show_comparison_link($fig,$cgi,\@pegs,scalar @tmp);
}

sub pegs_in_set {
    my($project,$roleI,$set) = @_;
    my $roleD = &role_dir($project,$roleI);

    $/ = "\n>";
    my @tuples =  map { ($_ =~ /^>?(\S+)[^\n]*\n(\S[^\n]*\S)/) ? [$1,length($2)] : () } `cat $roleD/split_info/$set`;
    $/ = "\n";
    return @tuples;
}

sub show_comparison_link {
    my($fig,$cgi,$pegs,$display) = @_;

    my $argL = join("&",map { "feature=$_" } @$pegs);
#    my $window = "compare_pegs.$$";
#    return "<a target=$window href=seedviewer.cgi?page=Regions&$argL>$display</a>";
    return "<a href=seedviewer.cgi?page=Regions&$argL>$display</a>";
}

sub mark_frameshifts_link {
    my($cgi,$project,$roleI,$set) = @_;
    
    my $user = $cgi->param('user') || "";
#    my $window = "FS.$$";
#    return "<a target=$window href=\"./check_role_in_ss.cgi?user=$user&Project=$project&roleI=$roleI&set=$set&request=mark_frameshifts\">Frameshifts</a>";
    return "<a href=\"./check_role_in_ss.cgi?user=$user&Project=$project&roleI=$roleI&set=$set&request=mark_frameshifts\">Frameshifts</a>";
}

sub mark_truncations_link {
    my($cgi,$project,$roleI,$set) = @_;

    my $user = $cgi->param('user') || "";
#    my $window = "FS.$$";
#    return "<a target=$window href=\"./check_role_in_ss.cgi?user=$user&Project=$project&roleI=$roleI&set=$set&request=mark_truncations\">Truncations</a>";
    return "<a href=\"./check_role_in_ss.cgi?user=$user&Project=$project&roleI=$roleI&set=$set&request=mark_truncations\">Truncations</a>";
}

sub mark_frameshifts {

    &comment_pegs('frameshift',@_);
}

sub mark_truncations {

    &comment_pegs('truncation',@_);
}

sub comment_pegs {
    my($comment,$fig,$cgi,$html,$project,$roleI,$set) = @_;

    my $user = $cgi->param('user') || 'RoleAnalysis';
    my $setF = "$FIG_Config::global/JensenAnalysis/$project/Roles/$roleI/split_info/$set";
    my @pegs = map { $_ =~ /^>(\S+)/; $1 } `grep "^>" $setF`;
    foreach my $peg (@pegs)
    {
	my $func = $fig->function_of($peg);
	if (index($func,$comment) < 0)
	{
	    $fig->assign_function($peg,$user,"$func # $comment");
	    push(@$html,$cgi->h3("marked $peg as $comment"));
	}
    }
}

sub index_to_role {
    my($project,$roleI) = @_;

    my $dir = "$FIG_Config::global/JensenAnalysis/$project";
    my @tmp = grep { $_ =~ /^$roleI\t/ } `cat $dir/index`;
    return ($tmp[0] && ($tmp[0] =~ /^$roleI\t(\S.*\S)/)) ? $1 : undef;
}

sub one_peg {
    my($project,$roleI,$set) = @_;
    my $setF = "$FIG_Config::global/JensenAnalysis/$project/Roles/$roleI/split_info/$set";

    my $x;
    if (open(SET,"<$setF") )
    {
	if (($x = <SET>) && 
	    ($x =~ /^>(fig\|\d+\.\d+\.peg\.\d+)/))
	{
	    close(SET);
	    return $1;
	}
	close(SET);
    }
    return undef;
}

sub rebuild_role_link {
    my($fig,$project,$roleI,$role) = @_;

    return "<a href=\"./check_role_in_ss.cgi?Project=$project&roleI=$roleI&request=rebuild_role\">rebuild $role</a>";
}

sub merge_link {
    my($cgi,$project) = @_;

    my @merge_stuff = (
		       $cgi->start_form(-action => "./check_role_in_ss.cgi"),
	               $cgi->hidden( -name => "request", -value => 'merge', -override => 1),
	               $cgi->hidden( -name => "Project", -value => $project, -override => 1),
		       "pad left: ", $cgi->textfield(-name => "pad_left", -size => 6, -value => 0),"<br><br>",
		       "pad right: ", $cgi->textfield(-name => "pad_right", -size => 6, -value => 0),"<br><br>",
		       $cgi->submit('Create Merged Alignment'),
		       $cgi->end_form
		       );
    return join("",@merge_stuff);
#   return "<a href=\"./check_role_in_ss.cgi?Project=$project&request=merge\">merge into full alignment</a>";
}

sub show_merge_link {
    my($project) = @_;
    
    my $dir = "$FIG_Config::global/JensenAnalysis/$project";
    if (-s "$dir/Merged/ali.fasta")
    {
	return "<a href=\"./check_role_in_ss.cgi?Project=$project&request=show_merge\">show merge into full alignment</a>";
    }
    else
    {
	return $cgi->h2("Merged alignment has not yet been built");
    }
}

sub show_bootstrapped_tree_link {
    my($project) = @_;
    
    my $dir = "$FIG_Config::global/JensenAnalysis/$project/Merged";
    if ((-s "$dir/TreeData/tree.nwk") &&
	(-s "$dir/boot.nwk") &&
	((-M "$dir/TreeData/tree.nwk") < (-M "$dir/boot.nwk")))
    {
	return "<a href=\"./check_role_in_ss.cgi?Project=$project&request=show_bootstrapped_tree\">show bootstrapped tree</a>";
    }
    else
    {
	return $cgi->h2("The tree was not built after a bootstrap");
    }
}

sub update_pdb_link {
    my($project) = @_;
    
    my $dir = "$FIG_Config::global/JensenAnalysis/$project";
    if (-s "$dir/Merged/seqs.fasta")
    {
	return "<a href=\"./check_role_in_ss.cgi?Project=$project&request=update_pdb\">Update PDB entries</a>";
    }
    else
    {
	return $cgi->h2("Merged alignment has not yet been built");
    }
}

sub show_pdb_link {
    my($project) = @_;
    
    my $dir = "$FIG_Config::global/JensenAnalysis/$project";
    if (-s "$dir/Merged/seqs.fasta")
    {
	return "<a href=\"./check_role_in_ss.cgi?Project=$project&request=show_pdb\">Show PDB entries</a>";
    }
    else
    {
	return $cgi->h2("Merged alignment has not yet been built");
    }
}

sub show_lit_link {
    my($project) = @_;
    
    my $dir = "$FIG_Config::global/JensenAnalysis/$project";
    if (-s "$dir/Merged/seqs.fasta")
    {
	return "<a href=\"./check_role_in_ss.cgi?Project=$project&request=show_lit\">Show literature entries</a>";
    }
}

sub get_names_link {
    my($project) = @_;
    
    my $dir = "$FIG_Config::global/JensenAnalysis/$project";
    if (-s "$dir/Merged/seqs.fasta")
    {
	return "<a href=\"./check_role_in_ss.cgi?Project=$project&request=get_names\">Get More Meaningful Names for Sequences</a>";
    }
    else
    {
	return $cgi->h2("Merged alignment has not yet been built");
    }
}

sub link_to_pick_reps {
    my($project) = @_;
    
    my $dir = "$FIG_Config::global/JensenAnalysis/$project/Merged";
    if (-d "$dir/Reps")
    {
	return "<a href=\"./check_role_in_ss.cgi?Project=$project&request=pick_reps\">Pick Representatives</a>";
    }
    else
    {
	return $cgi->h2("Merged alignment has not yet been built");
    }
}


sub link_to_build_tree {
    my($project) = @_;
    
    my $dir = "$FIG_Config::global/JensenAnalysis/$project";
    if (-s "$dir/Merged/ali.fasta")
    {
	return "<a href=\"./check_role_in_ss.cgi?Project=$project&request=build_tree\">build_tree</a>";
    }
    else
    {
	return $cgi->h2("Merged alignment has not yet been built");
    }
}

sub link_to_build_ali {
    my($project) = @_;
    
    my $dir = "$FIG_Config::global/JensenAnalysis/$project";
    if (-s "$dir/Merged/reps.fasta")
    {
	return "<a href=\"./check_role_in_ss.cgi?Project=$project&request=build_ali\">build_ali</a>";
    }
    else
    {
	return $cgi->h2("Merged alignment has not yet been built");
    }
}

sub link_to_show_possible_collapses {
    my($project) = @_;

    my $dir = "$FIG_Config::global/JensenAnalysis/$project/Merged/TreeData";
    if (-s "$dir/tree.nwk")
    {
	my @collapse_stuff = (
			      $cgi->hr,
			      $cgi->start_form(-action => "./check_role_in_ss.cgi"),
			      $cgi->hidden( -name => "request", -value => 'collapse_reps1', -override => 1),
			      $cgi->hidden( -name => "Project", -value => $project, -override => 1),
			      "# iterations for bootstrap: ", $cgi->textfield(-name => "bootstrapN", -size => 6, -value => 20),"<br><br>",
			      "bootstrap cutoff: ", $cgi->textfield(-name => "bootstrap_cutoff", -size => 6, -value => 0.85),"<br><br>",
			      $cgi->submit('Generate Collapse Data from Bootstrap'),
			      $cgi->end_form,
			      $cgi->hr
		       );
	return join("\n",@collapse_stuff);
    }
    else
    {
	return $cgi->h2("Initial tree has not yet been built");
    }
}

sub show_plan_link {

    return "<a href=\"./check_role_in_ss.cgi?request=show_plan\">show_plan</a>";
}

sub show_plan {
    my($fig,$cgi,$html) = @_;

    my @plan = <DATA>;
    push(@$html,@plan);
}


sub link_to_show_aliases {
    my($project) = @_;
    
    my $dir = "$FIG_Config::global/JensenAnalysis/$project";
    if (-s "$dir/Merged/aliases")
    {
	return "<a href=\"./check_role_in_ss.cgi?Project=$project&request=show_aliases\">show_aliases</a>";
    }
    else
    {
	return $cgi->h2("Merged alignment has not yet been built");
    }
}

sub link_to_show_dynamic_table {
    my($project) = @_;
    
    my $dir = "$FIG_Config::global/JensenAnalysis/$project/Merged/Reps";
    if (-d $dir)
    {
	return "<a href=\"./check_role_in_ss.cgi?Project=$project&request=show_dynamic_table\">show dynamic table</a>";
    }
    else
    {
	return '';
    }
}

sub rebuild_role {
    my($fig,$project,$roleI) = @_;

    my $dir = "$FIG_Config::global/JensenAnalysis/$project";
    my ($ss) = map { $_ =~ /(\S.*\S)/; $1 } `cat $dir/subsystem`;
    my $role = &index_to_role($project,$roleI);

    my @pegs = &pegs_fulfilling_role_in_subsystem($fig,$ss,$role);

    my $roleD = &role_dir($project,$roleI);
    system "rm -r $roleD";
    &FIG::verify_dir($roleD);
    open(BAD,">$roleD/bad.pegs") || die "could not open $roleD/bad.pegs";
    open(GOOD,">$roleD/good.pegs") || die "could not open $roleD/good.pegs";

    foreach my $peg (@pegs)
    {
	my $bad_code;
	if ($bad_code = &is_it_bad($fig,$peg))
	{
	    print BAD "$peg\t$bad_code\n";
	}
	else
	{
	    if (my $pseq = $fig->get_translation($peg))
	    {
		my $genome = $fig->genome_of($peg);
		my $func   = $fig->function_of($peg);
		my $gs = $fig->genus_species($genome);
		print GOOD ">$peg [$gs] $func\n$pseq\n";
	    }
	    else
	    {
#		print STDERR "failed to get translation for $peg\n";
		print BAD "$peg\tno translation\n";
	    }
	}
    }
    close(BAD);
    close(GOOD);

    &FIG::run("$FIG_Config::bin/split_sequences_into_sets $roleD/split_info 1.0e-20 0.7 < $roleD/good.pegs");
}

sub show_set_link {
    my($project,$roleI,$set,$cgi,$fig) = @_;

    return "<a href=./check_role_in_ss.cgi?Project=$project&roleI=$roleI&request=process_set&set=$set>$set</a>";
}

sub process_set {
    my($fig,$html,$cgi,$project,$roleI,$set) = @_;

    my $user = $cgi->param('user') || 'RoleAnalysis';
    if ((! defined($roleI)) || (! defined($set)))
    {
	push(@$html,$cgi->h3('badly formed parameters - missing role or set'));
    }
    else
    {
	my $roleD = &role_dir($project,$roleI);
	my $setD  = "$roleD/$set";
	if ((! -d $setD) || (! -s "$setD/seqs.fasta"))
	{
	    &FIG::verify_dir($setD);
	    system "cp $roleD/split_info/$set $setD/seqs.fasta";
	}
	    

	if ((! -s "$setD/ali.fasta") || 
	    ((-M "$setD/seqs.fasta") < (-M "$setD/ali.fasta")))
	{
	    my @seqs  = &gjoalignment::read_fasta_file("$setD/seqs.fasta");
	    if (@seqs == 1)
	    {
		&FIG::run("cp $setD/seqs.fasta $setD/ali.fasta");
	    }
	    elsif (@seqs > 0)
	    {
		&FIG::run("$FIG_Config::bin/align_seqs < $setD/seqs.fasta > $setD/ali.fasta");
	    }
	    else
	    {
		push(@$html,$cgi->h2("$setD/seqs.fasta is empty or invalid"));
		return;
	    }
	}

	my $dir = "$FIG_Config::global/JensenAnalysis/$project";
	if (-s "$dir/Merged/aliases")
	{
	    &rename_ids("$dir/Merged/aliases","$setD/ali.fasta","$setD/renamed.fasta");
	}
	else
	{
	    system "cp $setD/ali.fasta $setD/renamed.fasta";
	}
	push(@$html,`$FIG_Config::bin/alignment_to_html -t < $setD/renamed.fasta`);
    }
}

sub rename_ids {
    my($aliases,$ali,$renamed) = @_;

    my %aliases_map = map { $_ =~ /^(\S+)\s+[^\|]+\|(\S+)/; $1 => $2 } `cat $aliases`;
    open(IN,"<$ali")      || die "could not open $ali";
    open(OUT,">$renamed") || die "could not open $renamed";

    my $to;
    while (defined($_ = <IN>))
    {
	if (($_ =~ /^>(\S+)([^\n]*)/) && ($to = $aliases_map{$1}))
	{
	    print OUT ">$to$2\n";
	}
	else
	{
	    print OUT $_;
	}
    }
    close(IN);
    close(OUT);
}
	    

sub is_it_bad {
    my($fig,$peg) = @_;

    my $func = $fig->function_of($peg);
    if    ($func =~ /\#.*trunc/i)       { return "truncated"    }
    if    ($func =~ /\#.*fragment/i)    { return "fragment"     }
    if    ($func =~ /\#.*frame/i)       { return "frameshifted" }
    if    ($func =~ /\#.*incomplete/i)  { return "incomplete"   }
    return '';
}


sub initial_page {
    my($fig,$cgi,$html) = @_;

    my $user = $cgi->param('user') || 'RoleAnalysis';
    my @existing = &existing_projects;

    my @sub = sort grep { $fig->usable_subsystem($_) } $fig->all_subsystems;

    push(@$html,"<br><br>",&show_plan_link,"<br><br>");
    push(@$html,$cgi->start_form(-action => "./check_role_in_ss.cgi"));

    if (@existing > 0)
    {
	push(@$html,
	        "Pick an existing project to work on, or go below to start a new project:<br>",
	  
	        $cgi->scrolling_list( -name => 'Project',
				      -values => \@existing,
				      -size => 5),
	     "<br><br>",
	     $cgi->submit('Use Existing Project'),
	     $cgi->submit('Delete Existing Project')
            );

    }
    push(@$html,"<br><br>",
	        "Pick a subsystem: <br><br>",
	        $cgi->scrolling_list( -name => 'subsys',
				      -values => \@sub,
				      -size => 10),
	        "<br><br>",
	        $cgi->hidden( -name => "user", -value => $user, -override => 1),
	        $cgi->hidden( -name => "request", -value => 'pick_roles', -override => 1),
	        $cgi->submit('Pick Subsystem'),
	        $cgi->end_form);

    &FIG::verify_dir("$FIG_Config::global/JensenAnalysis");
}

sub existing_projects {

    my $projD = "$FIG_Config::global/JensenAnalysis";
    my %skip_dirs = ('misc' => 1);
    my @existing = ();
    if (opendir(PROJ,$projD))
    {
	@existing = grep {! exists $skip_dirs{$_}} grep { $_ !~ /^\./ } readdir(PROJ);
    }
    return sort @existing;
}

sub pick_roles {
    my($fig,$cgi,$html) = @_;

    my $user = $cgi->param('user') || 'RoleAnalysis';

    my $subsys = $cgi->param('subsys');
    if (! $subsys)
    {
	push(@$html,$cgi->h1('You need to select a subsystem'));
	return;
    }

    my @roles = $fig->subsystem_to_roles($subsys);

    push(@$html,$cgi->start_form(-action => "./check_role_in_ss.cgi"),
	        "Pick one or more roles: <br><br>",
	        $cgi->scrolling_list( -name => 'role_to_process',
				      -values => \@roles,
				      -multiple => 1,
				      -size => 10),
	        "<br><br>",
	        $cgi->hidden( -name => "user", -value => $user, -override => 1),
	        $cgi->hidden( -name => "request", -value => 'pick_project', -override => 1),
	        $cgi->hidden( -name => "subsys", -value => $subsys, -override => 1),
	        $cgi->submit('Picked Roles'),
	        $cgi->end_form);

}

sub pick_project {
    my($fig,$cgi,$html) = @_;

    my $user = $cgi->param('user') || 'RoleAnalysis';
    my $subsys = $cgi->param('subsys');
    my @roles  = $cgi->param('role_to_process');
    if ((! $subsys) || (@roles < 1))
    {
	push(@$html,$cgi->h1('You need to select a subsystem and roles'));
	return;
    }

    my $default_project = ($roles[0] =~ /([a-z\_\-\ A-Z]{3,100})/) ? $1 : "";
    $default_project =~ s/ /_/g;
    $default_project =~ s/_+$//;


    push(@$html,$cgi->start_form(-action => "./check_role_in_ss.cgi"),
	        "Define Project Name: <br><br>",
	        $cgi->hidden( -name => "user", -value => $user, -override => 1),
	        $cgi->hidden( -name => "request", -value => 'picked_project', -override => 1),
	        $cgi->hidden( -name => "subsys", -value => $subsys, -override => 1));
    foreach my $role (@roles)
    {
	push(@$html,$cgi->hidden( -name => "role_to_process", -value => $role, -override => 1));
    }
    push(@$html,
	        "<br>You need to specify a project name that you will use whenever you work on these roles:<br><br>",
	        $cgi->textfield( -name => "Project", -size => 50, -value => $default_project ),
	        $cgi->submit('Decided on Project Name'),
	        $cgi->end_form);

}

sub picked_project {
    my($fig,$cgi,$html,$project) = @_;

    my $projD = "$FIG_Config::global/JensenAnalysis/$project";

    if (! -d $projD)
    {
	&initialize_dir($fig,$cgi,$html);
    }
    &show_dir($fig,$project,$html);
}

sub show_dir {
    my($fig,$project,$html) = @_;

    my $projD = "$FIG_Config::global/JensenAnalysis/$project";

    my($ss) = map { $_ =~ /(\S.*\S)/; $1 } `cat $projD/subsystem`;
    my $curator = $fig->subsystem_curator($ss);
    push(@$html,$cgi->h2("$curator: $ss"));
    my $roles_dir = "$projD/Roles";
    my @index = map { $_ =~ /^(\d+)\s+(\S.*\S)/; [$1,$2] } `cat $projD/index`;
    push(@$html,"<ol>\n", (map { "<li> " . &role_link($_,$project,$fig) . "\n" } @index), "</ol>\n"),

    push(@$html,"<br>",&merge_link($cgi,$project),"<br>");
    push(@$html,"<br>",&show_merge_link($project),"<br>");
    push(@$html,"<br>",&show_bootstrapped_tree_link($project),"<br>");
    push(@$html,"<br>",&link_to_build_tree($project),"<br>");
    push(@$html,"<br>",&link_to_build_ali($project),"<br>");
    push(@$html,"<br>",&link_to_show_possible_collapses($project),"<br>");
    push(@$html,"<br>",&link_to_pick_reps($project),"<br>");

    if (-s "$projD/Merged/pdb.entries")
    {
	push(@$html,"<br>",&show_pdb_link($project),"<br>");
    }
    else
    {
	push(@$html,"<br>",&update_pdb_link($project),"<br>");
    }
    if ((! -s "$projD/Merged/lit.entries") && (-s "$projD/Merged/seqs.fasta"))
    {
	&get_lit_entries($fig,$project);
    }

    if (-s "$projD/Merged/lit.entries")
    {
	push(@$html,"<br>",&show_lit_link($project),"<br>");
    }

    if (-s "$projD/Merged/aliases")
    {
	push(@$html,"<br>",&link_to_show_aliases($project),"<br>");
    }
    else
    {
	push(@$html,"<br>",&get_names_link($project),"<br>");
    }
    push(@$html,"<br>",&show_plan_link,"<br>");
    push(@$html,"<br>",&show_lit_link,"<br>");

    if (-d "$projD/Merged/Reps")
    {
	push(@$html,"<br>",&link_to_show_dynamic_table($project),"<br>");
    }
}

sub get_lit_entries {
    my($fig,$project) = @_;
    my $dir = "$FIG_Config::global/JensenAnalysis/$project/Merged";
    if (! -s "$dir/lit.entries")
    {
	if (-s "$dir/seqs.fasta")
	{
	    open(LIT,">$dir/lit.entries") || die "could not open $dir/lit.entries";

	    my @pegs = map { $_ =~ /^>(\S+)/; $1 } `grep "^>" $dir/seqs.fasta`;
	    foreach my $peg (@pegs)
	    {
		my @dlits = map { ($_->[2] =~ /^dlit\((\d+)\)/) ? $1 : () } 
		            $fig->get_attributes($peg);
		foreach my $dlit (@dlits)
		{
		    print LIT "$peg\t$dlit\n";
		}
	    }
	    close(LIT);
	}
    }
}

sub read_lit_entries {
    my($project) = @_;

    my $dir = "$FIG_Config::global/JensenAnalysis/$project/Merged";
    my $lit_file = "$dir/lit.entries";

    my %lit_entries = ();

    if ( ! -e $lit_file ) {
        return \%lit_entries;
    }

    my $line;

    open(LIT, "<$lit_file") or die "could not open file '$lit_file':$!";
    while ( defined($line = <LIT>) )
    {
        chomp $line;
        my($fid, $pubmed) = split(/\t/, $line);
        push @{ $lit_entries{$fid} }, $pubmed;
    }
    close(LIT);

    return \%lit_entries;
}

sub read_pdb_entries {
    my($project) = @_;

    my $dir = "$FIG_Config::global/JensenAnalysis/$project/Merged";
    my $pdb_file = "$dir/pdb.entries";

    my %pdb_entries = ();

    if ( ! -e $pdb_file ) {
        return \%pdb_entries;
    }

    my $line;

    open(PDB, "<$pdb_file") or die "could not open file '$pdb_file':$!";
    while ( defined($line = <PDB>) )
    {
        chomp $line;
        my($fid, $pdb) = split(/\t/, $line);
        push @{ $pdb_entries{$fid} }, $pdb;
    }
    close(PDB);

    return \%pdb_entries;
}

sub initialize_dir {
    my($fig,$cgi,$html) = @_;

    my $subsys = $cgi->param('subsys');
    my $proj   = $cgi->param('Project');
    my @roles  = $cgi->param('role_to_process');

    my $projD = "$FIG_Config::global/JensenAnalysis/$proj";
    &FIG::verify_dir($projD);
    &record_subsystem($projD,$subsys);
    &record_roles($projD,\@roles);
}
    
sub record_subsystem {
    my($projD,$subsys) = @_;

    open(TMP,">$projD/subsystem") 
	|| die "could not open $projD/subsystem";
    print TMP "$subsys\n";
    close(TMP);
}

sub record_roles {
    my($projD,$roles) = @_;

    &FIG::verify_dir("$projD/Roles");
    open(TMP,">$projD/index")
	|| die "could not open $projD/index";

    my $i;
    for ($i=0; ($i < @$roles); $i++)
    {
	my $roleI = $i+1;
	print TMP "$roleI\t$roles->[$i]\n";
    }
    close(TMP);
}

sub pegs_fulfilling_role_in_subsystem {
    my($fig,$ss,$role) = @_;

#    $ss =~ s/_/ /g;
#     my $ss_server = new SSserver;
#     my $tuples = $ss_server->pegs_implementing_roles($ss,[$role]);
#     print STDERR &Dumper($ss,$role,$tuples->[0]->[1]); die "aborted";
#     return @{$tuples->[0]->[1]};

    my $genomes = $fig->subsystem_genomes($ss);
    my @pegs = ();
    foreach my $genome (map { $_->[0] } @$genomes)
    {
	push(@pegs,$fig->pegs_in_subsystem_cell($ss,$genome,$role));
    }
    @pegs = grep { index($fig->function_of($_),$role) >= 0 } @pegs;
#    print STDERR &Dumper(\@pegs); die "aborted";
    return @pegs;
}

####################  temporary code to get PDB entries ###############
use DBI;
use AnnotationClearingHouse::ACH;

sub update_pdb {
    my($fig,$cgi,$html,$project) = @_;
    my $dir = "$FIG_Config::global/JensenAnalysis/$project/Merged";
    if (! -d $dir) 
    {
	push(@$html,$cgi->h2("something is wrong - $dir does not exist or has wrong permissions"));
	return;
    }

    if (! -s "$dir/pdb.entries")
    {
	open(PDB,">$dir/pdb.entries") || die "could not open $dir/pdb.entries";
	my $db = "ACH_TEST";
	my $dbuser = "ach"; # "root";  root gets read-write, ach only read
	my $dbhost = "bio-data-1.mcs.anl.gov";
	my $dbpass = '';
	my $dbh = DBI->connect("DBI:mysql:dbname=$db;host=$dbhost", $dbuser, $dbpass);
	my $ach = AnnotationClearingHouse::ACH->new( $dbh );

	my @pegs = map {/>(\S+)/} `grep '^>' $dir/seqs.fasta`;

	for (my $i = 0; $i < @pegs; $i++)
	{
	    my $peg = $pegs[$i];
	    my $aliases = $ach->id2set($peg);
	    my @pdb = map { ($_->[0] =~ /^(\d{4,20})$/) ? [$_->[0],$_->[2]] : () }
	              grep {$_->[4] eq 'pdb'} 
	              @$aliases;
	    if ( @pdb )
	    {
		foreach my $tuple (@pdb)
		{
		    print PDB join("\t",($pegs[$i],@$tuple)),"\n";
		}
	    }
	}
	close(PDB);
    }
    &show_pdb($fig,$cgi,$html,$project);
}

sub dlit_link {
    my($dlit) = @_;

    return "<a href=http://www.ncbi.nlm.nih.gov/sites/entrez?cmd=Retrieve&db=PubMed&list_uids=$dlit&dopt=AbstractPlus>$dlit</a>";
}

sub pdb_link {
    my($pdb) = @_;
    
    return qq(<a href="http://www.ncbi.nlm.nih.gov/protein/$pdb">$pdb</a>);
}

sub show_lit {
    my($fig,$cgi,$html,$project) = @_;

    my $all = $fig->all_titles;
    my %titles = map { $_->[0] => $_->[1] } @$all;
    my $dir = "$FIG_Config::global/JensenAnalysis/$project/Merged";
    my $col_hdrs = ['PEG','Genome','pubmed','Title'];
    my $tab      = [];
    my @dlits = sort { $a->[2] cmp $b->[2] } map { $_ =~ /^(\S+)\s+(\d+)/; [$1,$2] } `cat $dir/lit.entries`;
    foreach my $tuple (@dlits)
    {
	my($peg,$dlit) = @$tuple;
	my $title = $titles{$dlit};
	push(@$tab,[&link_to_peg($peg),$fig->genus_species($fig->genome_of($peg)),&dlit_link($dlit),$title]);
    }
    push(@$html,&HTML::make_table($col_hdrs,$tab,"Relevant Literature"));
}

sub show_pdb {
    my($fig,$cgi,$html,$project) = @_;

    my $dir = "$FIG_Config::global/JensenAnalysis/$project/Merged";
    my $col_hdrs = ['PEG','Genome','PDB entry','Description'];
    my $tab      = [];
    open(PDB,"<$dir/pdb.entries") || die "could not open $dir/pdb.entries";

    my %str;
    foreach my $entry (map { chomp; [split(/\t/,$_)] } `cat $dir/pdb.entries`)
    {
	my($peg,$gi,$desc) = @$entry;
	$str{$desc} = 1;
	my $link = "<a href=http://0-www.ncbi.nlm.nih.gov.millennium.unicatt.it/protein/$gi>$gi</a>";
	push(@$tab,[&link_to_peg($peg),$fig->genus_species(&FIG::genome_of($peg)),$link,$desc]);
    }
    my $N = keys(%str);
    push(@$html,&HTML::make_table($col_hdrs,$tab,"Links to $N distinct PDB Structures"));
}

sub get_names {
    my($fig,$cgi,$html,$project) = @_;

    my $dir = "$FIG_Config::global/JensenAnalysis/$project/Merged";
    if (! -d $dir) 
    {
	push(@$html,$cgi->h2("something is wrong - $dir does not exist or has wrong permissions"));
	return;
    }
    open(RENAME,">$dir/aliases") || die "could not open $dir/rename.aliases";

    my $fasta_file = "$dir/seqs.fasta";
    my $fasta_tmp_file = $FIG_Config::temp . "/seq_file.$$.fasta";

    # rewrite to a temp file with organism name and function in AroPath input format
    &rewrite_fasta_file($fig, $fasta_file, $fasta_tmp_file);

    my $url = 'http://aropath.lanl.gov/cgi-bin/Convert_org_to_acronym.cgi';
    my $ua = LWP::UserAgent->new();
    my $resp = $ua->post($url, { infile => [$fasta_tmp_file], suggestion => 'no'}, 'Content-type' => 'multipart/form-data');

    unlink($fasta_tmp_file);
    
    # Check for a result.
    if ($resp->is_success)
    {
        # Here we get the response from aropath
	my $dat   = $resp->content;
	my @lines = split("\n", $dat);
    
	my $i;
	for ($i = 0; (($i < @lines) and ($lines[$i] !~ /^Please wait\.\.\./)); $i++) {}
	$i++;
    
	for ($i; (($i < @lines) and ($lines[$i] !~ /^<meta/)); $i++)
	{
	    if ( $lines[$i] =~ /^>\[No match/ )
	    {
		if ( $lines[$i] =~ /(fig\|\d+\.\d+\.peg\.\d+)\s+\[([^\]]+)\]/ )
		{
		    my($fid, $genome) = ($1, $2);
		    my $seed_alias = &seed_alias($fid, $genome);
		    print RENAME "$fid\tseed|$seed_alias\n";
		}
		else
		{
		    print STDERR "could not get fig id and genome name from aropath output\n$lines[$i]\n";
		}
	    }
	    elsif ( $lines[$i] =~ /^>(\S+)\s+(\S+)/ )
	    {
		my($aropath_alias, $fid) = ($1, $2);
		print RENAME "$fid\taro|$aropath_alias\n";
	    }	    
	}
    } 
    else 
    {
	# Here we failed to get a response from aropath
	my @pegs = map { $_ =~ /^>(\S+)/; $1 } `grep "^>" $dir/seqs.fasta`;
	foreach my $peg (@pegs)
	{
	    my $genome = &FIG::genome_of($peg);
	    my $seed_alias = &seed_alias($peg, $genome);
	    print RENAME "$peg\tseed|$seed_alias\n";
	}
	    
    }
    close(RENAME);
    push(@$html,$cgi->h2("Fnished getting the renaming acronyms"));
}

sub rewrite_fasta_file {
    my($fig, $fasta, $tmp) = @_;
    my $line;

    open(FASTA, "<$fasta") or die "could not open file: '$fasta'; $!";
    open(TMP, ">$tmp") or die "could not open file: '$tmp'; $!";

    while ( defined($line = <FASTA>) )
    {
	chomp $line;
	if ( $line =~ /^>(\S+)/ )
	{
	    my $fid = $1;
	    my $org = $fig->org_of($fid);
	    my $func = $fig->function_of($fid);
	    
	    if ( $func =~ /([^\#]+)\s\#/ ) 
	    {
		$func = $1;
	    }

	    print TMP ">$fid [$org] [$func]\n";
	}
	else
	{
	    print TMP "$line\n";
	}
    }
    
    close(TMP);
    close(FASTA);
}

sub seed_alias {
    my($fid, $genome) = @_;

    my($pegnum) = ($fid=~ /(\d+)$/);

    my @words = grep {! /sp\.|ACTT|str\./} split(/\s+/, $genome);

    my $alias;

    if ( @words == 1 )
    {
	print STDERR "you need to modify the subroutine that creates the alias (short name) for genome '$genome'\n";
    }
    elsif ( @words == 2 )
    {
	my $t1 = substr($words[0],0,1);
	my $t2 = substr($words[1],0,4);
	if ( $t2 =~ /[-\.]$/ ) {
	    $t2 = substr($words[1],0,5);
	}
	    
	$alias = $t1 . $t2 . '.' . $pegnum;
    }
    else 
    {
	my $t1 = substr($words[0],0,1);
	my $t2 = substr($words[1],0,4);
	my $t3 = $words[-1];

	if ( $t2 =~ /[-\.]$/ ) {
	    $t2 = substr($words[1],0,5);
	}
	    
	$alias = $t1 . $t2 . $t3 . '.' . $pegnum;
    }

    return $alias;
}

sub show_bootstrapped_tree {
    my($fig,$cgi,$html,$project) = @_;
    my $dir = "$FIG_Config::global/JensenAnalysis/$project/Merged";

    my $treeF  = "$dir/TreeData/tree.nwk";
    my $tree  = gjonewicklib::read_newick_tree(  $treeF );
    $tree = reroot_newick_to_approx_midpoint( $tree );
    $tree = aesthetic_newick_tree( $tree );
    my $boot_file = "$dir/boot.nwk";
    my @samples = gjonewicklib::read_newick_trees( $boot_file );
    my $treeL = gjophylip::bootstrap_label_nodes( $tree, \@samples );
    &show_tree($fig,$cgi,$html,$project,$treeL);
}

sub show_merge {
    my($fig,$cgi,$html,$project) = @_;
    my $dir = "$FIG_Config::global/JensenAnalysis/$project";

    if ((-s "$dir/Merged/aliases") && ((-M "$dir/Merged/aliases") < (-M "$dir/Merged/renamed.fasta")))
    {
	&rename_ids("$dir/Merged/aliases","$dir/Merged/ali.fasta","$dir/Merged/renamed.fasta");
    }
    push(@$html,`$FIG_Config::bin/alignment_to_html -t < $dir/Merged/renamed.fasta`);
    push(@$html,"<br><br>",&link_to_build_tree($project));
    push(@$html,"<br><br>",&link_to_build_ali($project));
    push(@$html,"<br>",&link_to_show_possible_collapses($project),"<br>");
    push(@$html,"<br><br>",&link_to_show_aliases($project));
}

sub build_tree {
    my($fig,$cgi,$project,$html) = @_;
    my $dir = "$FIG_Config::global/JensenAnalysis/$project";
    &FIG::verify_dir("$dir/Merged/TreeData");
    my $aliF = "$dir/Merged/ali.fasta";
    my %reps = map { ($_ =~ /^>(\S+)/) ? ($1 => 1) : () } `grep "^>" $dir/Merged/reps.fasta`;
    my @align   = grep { $reps{$_->[0]}}  &gjoalignment::read_fasta($aliF);
#   
#   Trimming the alignment may drop some sequences.  We need to report the
#   dropped sequences, but go ahead and build the tree.
#
    if (@align != keys(%reps))
    {
	my %in_ali = map { $_->[0] => 1 } @align;
	my @missing = grep { ! $in_ali{$_} } keys(%reps);
	my $missing = join(",",@missing);
	push(@$html,$cgi->h2("sequence in reps not in alignment: $missing"));
    }

    my $tree = &build_a_tree(\@align);
    my $treeF   = "$dir/Merged/TreeData/tree.nwk";
    &write_tree($tree,$treeF);
}

sub build_ali {
    my($fig,$cgi,$project) = @_;
    my $pad_left = $cgi->param('pad_left') || 0;
    my $pad_right = $cgi->param('pad_right') || 0;
    my $dir = "$FIG_Config::global/JensenAnalysis/$project";
    &FIG::verify_dir("$dir/Merged");
    my $aliF = "$dir/Merged/ali.fasta";
    my $repsF = "$dir/Merged/reps.fasta";
    &FIG::run("$FIG_Config::bin/trim_sequences -pad_left=$pad_left -pad_right=$pad_right 1.0e-5 0.7 < $repsF  > $dir/Merged/trimmed.fasta; $FIG_Config::bin/align_seqs < $dir/Merged/trimmed.fasta > $aliF");

#
# The set of sequences may have shrunk in the trimming.  Hence, you need to contract the set of representative
# sequences.
#
    my @align   = &gjoalignment::read_fasta($aliF);
    my @reps    = &gjoalignment::read_fasta($repsF);
    my %in_ali = map { $_->[0] => 1 } @align;
    my @reps   = grep { $in_ali{$_->[0]} } @reps;
    &gjoalignment::write_fasta_file(\@reps,0,$repsF);
}

sub write_tree {
    my($tree,$treeF) = @_;

    &gjonewicklib::writeNewickTree($tree,$treeF);
    return;
}

sub check_alignment {
    my($fig,$cgi,$project) = @_;

    my $dir = "$FIG_Config::global/JensenAnalysis/$project";
    my $aliF = "$dir/Merged/ali.fasta";

    my @reps    = map { ($_ =~ /^>(\S+)/) ? $1 : () } `grep "^>" $dir/Merged/reps.fasta`;
    my %align   = map { $_->[0] => 1 }  &gjoalignment::read_fasta($aliF);
    my $i;
    for ($i=0; ($i < @reps) && $align{$reps[$i]}; $i++) {}
    if ($i < @reps)
    {
	&build_ali($fig,$cgi,$project);
    }
}

sub check_tree {
    my($fig,$cgi,$project) = @_;

    my $dir = "$FIG_Config::global/JensenAnalysis/$project";
    my $treeF   = "$dir/Merged/TreeData/tree.nwk";

    my $tree  = gjonewicklib::read_newick_tree(  $treeF );
    my @tips = &gjonewicklib::newick_tip_list($tree);
    my %reps = map { ($_ =~ /^>(\S+)/) ? ($1 => 1) : () } `grep "^>" $dir/Merged/reps.fasta`;
    if (@tips != keys(%reps))
    {
	&build_tree($fig,$cgi,$project);
    }
}

sub collapse_reps1 {
    my($fig,$cgi,$html,$project) = @_;

    &check_alignment($fig,$cgi,$project);
    &check_tree($fig,$cgi,$project);

    my $dir    = "$FIG_Config::global/JensenAnalysis/$project/Merged";
    my $treeF  = "$dir/TreeData/tree.nwk";
    if (! -s $treeF)
    {
	push(@$html,$cgi->h2('sorry, no tree exists to collapse'));
    }
    else
    {
	my $bootstrapN = $cgi->param('bootstrapN');
	my $cutoff     = $cgi->param('bootstrap_cutoff');
	if (! ($bootstrapN && $cutoff))
	{
	    push(@$html,$cgi->h2("bad bootstrap values: N=$bootstrapN cutoff=$cutoff"));
	}
	else
	{
	    my $aliF = "$dir/ali.fasta";
	    if (! -s "$aliF")
	    {
		push(@$html,$cgi->h2("missing alignment"));
	    }
	    else
	    {
		my $tree  = gjonewicklib::read_newick_tree(  $treeF );
		my @tips = &gjonewicklib::newick_tip_list($tree);
		my %reps = map { ($_ =~ /^>(\S+)/) ? ($1 => 1) : () } `grep "^>" $dir/reps.fasta`;
		if (@tips != keys(%reps))
		{
		    push(@$html,$cgi->h2("you need to rebuild the tree before proceeding"));
		}
		else
		{
		    $tree = reroot_newick_to_approx_midpoint( $tree );
		    $tree = aesthetic_newick_tree( $tree );

		    my @align   = grep { $reps{$_->[0]}}  &gjoalignment::read_fasta($aliF);
		    my @samples = ();

		    my $boot_file = "$dir/boot.nwk";
		    if ((! -s $boot_file) || ((-M "$dir/reps.fasta") <= (-M $boot_file)) ||
			((-M "$dir/TreeData/tree.nwk") <= (-M $boot_file)))
		    { 
			unlink($boot_file) ;
		    }
		    elsif (-s $boot_file)
		    {
			@samples = gjonewicklib::read_newick_trees( $boot_file );
		    }
		    open TREES, ">>$boot_file";
		    my $i;
		    for (my $i = @samples; ($i < $bootstrapN); $i++)
		    {
			my $aliT = &gjoalignment::bootstrap_sample(\@align);
			my $treeB = &build_a_tree($aliT);
			push(@samples,$treeB);
			gjonewicklib::writeNewickTree( $treeB, \*TREES );
		    }
		    close(TREES);

		    my $treeL = gjophylip::bootstrap_label_nodes( $tree, \@samples );
		    &show_tree($fig,$cgi,$html,$project,$treeL);
		    &show_splits($fig,$cgi,$html,$project,$treeL);
		}
	    }
	}
    }
}

sub show_tree {
    my($fig,$cgi,$html,$project,$tree) = @_;

    my $dir     = "$FIG_Config::global/JensenAnalysis/$project/Merged";
    my $tree1   = &gjonewicklib::copy_newick_tree($tree);
    $tree1      = gjonewicklib::aesthetic_newick_tree( $tree1, 1 );
    my $relabel = {};
    
    my %aliases_map;
    my $aliases = "$dir/aliases";
    if (-s $aliases)
    {
	%aliases_map = map { $_ =~ /^(\S+)\s+[^\|]+\|(\S+)/; $1 => $2 } `cat $aliases`;
    }
    
    foreach my $peg (map { ($_ =~ />(\S+)/) ? $1 : () } `grep "^>" $dir/reps.fasta`)
    {
	my $gs = $fig->genus_species($fig->genome_of($peg));
	my $alias = $aliases_map{$peg};
	if ($alias)
	{
	    $relabel->{$peg} = "$alias: $gs";
	}
	else
	{
	    $relabel->{$peg} = "$peg: $gs";
	}
    }
    my $labeled = &gjonewicklib::newick_relabel_tips($tree1,$relabel);
    push(@$html,"<pre>\n");
    push(@$html,join("\n",@{&gjonewicklib::text_plot_newick($labeled,200,2,2)}));
    push(@$html,"</pre>\n");
}

sub show_splits {
    my($fig,$cgi,$html,$project,$tree) = @_;
    &show_splits1($fig,$cgi,$html,$project,$tree,1);
}

sub show_splits1 {
    my($fig,$cgi,$html,$project,$node,$level) = @_;

    my @descendants = &gjonewicklib::newick_desc_list($node);
    if (@descendants)
    {
	my $label = &gjonewicklib::newick_lbl($node);
	my $cutoff     = $cgi->param('bootstrap_cutoff');
	my $split = 0;
	if ($label && ($label >= $cutoff))
	{
	    &show_a_split($fig,$cgi,$html,$project,$node,$level,$label);
	    $split = 1;
	}

	my $next = 1;
	foreach my $child (@descendants)
	{
	    &show_splits1($fig,$cgi,$html,$project,$child,$split ? "$level.$next" : $level);
	    $next++;
	}
    }
 }


sub show_a_split {
    my($fig,$cgi,$html,$project,$node,$level,$label) = @_;

    my $col_hdrs = ['PEG','Compare Regions for Collapsed','Genus/species'];
    my $tab     = [];
    my @tips = &gjonewicklib::newick_tip_list($node);
    foreach my $peg (@tips)
    {
	push(@$tab,[&link_to_peg($peg),
		    &show_comparison_link($fig,$cgi,\@tips,'show regions'),
		    $fig->genus_species($fig->genome_of($peg))]);
    }
    my @one_tab = (
		   $cgi->hr,
		   $cgi->start_form(-action => "./check_role_in_ss.cgi"),
		   $cgi->hidden( -name => "request", -value => 'collapse_reps2', -override => 1),
		   $cgi->hidden( -name => "Project", -value => $project, -override => 1)
		   );
    push(@one_tab, map { $cgi->hidden( -name => "collapse", -value => $_, -override => 1) } @tips);
    push(@one_tab,&HTML::make_table($col_hdrs,$tab,"$level: One Possible Collapse - bootstrap=$label"));
    push(@one_tab,$cgi->submit('Collapse this set'),
		   $cgi->end_form,
		   $cgi->hr
		  );
    push(@$html,@one_tab);
}

sub collapse_reps2 {
    my($fig,$cgi,$html,$project) = @_;

    my @pegs = $cgi->param('collapse');
    if (@pegs > 1)
    {
	my $dir = "$FIG_Config::global/JensenAnalysis/$project/Merged";
	my $repsF = "$dir/reps.fasta";
	my $repD  = "$dir/Reps";
	my $rep   = $pegs[0];
	my %to_collapse = map { $_ => 1 } @pegs;
	my %where = map { (($_ =~ /(group\d+):>(\S+)/) && $to_collapse{$2}) ? ($1 => 1) : () } `grep "^>" $repD/*`;
	my @where = keys(%where);

	if (@where > 1)
	{
	    my $i;
	    for ($i=1; ($i < @where); $i++)
	    {
		&FIG::run("cat $repD/$where[$i] >> $repD/$where[0]; rm -f $repD/$where[$i]");
	    }
	    my $reps = &gjoalignment::read_fasta_file($repsF);
	    my @to_keep = grep { (! $to_collapse{$_->[0]}) || ($_->[0] eq $rep) } @$reps;
	    &gjoalignment::write_fasta_file(\@to_keep,1,$repsF);
	    push(@$html,$cgi->h2("collapsed to $pegs[0]"));
	}
	else
	{
	    push(@$html,$cgi->h2("the collapse has probably already been done"));
	}
    }
}

sub build_a_tree {
    my($ali) = @_;
    my $tree = &protdist_neighbor::protdist_neighbor($ali,{ tree_format => 'gjo' });
    return $tree;
}

sub show_aliases {
    my($fig,$cgi,$html,$project) = @_;
    my $dir = "$FIG_Config::global/JensenAnalysis/$project/Merged";

    my $col_hdrs = ['Alias','Genus/Species','PEG','Function'];
    my $tab = [];
    foreach $_ (map { $_ =~ /^(\S+)\s+[^\|]*\|(\S+)/; [$1,$2] } `cat $dir/aliases`)
    {
	my($peg,$alias) = @$_;
	my $gs = $fig->genus_species($fig->genome_of($peg));
	push(@$tab,[$alias,$gs,&link_to_peg($peg),scalar $fig->function_of($peg)]);
    }
    push(@$html,&HTML::make_table($col_hdrs,$tab,'Aliases'));
}

sub show_dynamic_table {
    my($fig,$cgi,$html,$project) = @_;

    my $basedir = "$FIG_Config::global/JensenAnalysis";
    my $dir     = "$basedir/$project/Merged/Reps";

    my $group_fids = &group_fids($project);

    my $lit = &read_lit_entries($project);
    my $pdb = &read_pdb_entries($project);

    my $title = "$project, dynamic table";

    # for now get javascript, CSS and images from other site
    my $misc_url = "http://www.theseed.org/Papers/MMBR-Aspartokinase";

    print $cgi->header;

    print <<END;
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title>$title</title>
<script type="text/javascript" src="$misc_url/js/dynamicTable_c.js"></script>
<link rel="stylesheet" href="$misc_url/css/dynamicTable.css" type="text/css" />
</head>
<body>

<table class="dynamicTable">
<thead>
<tr>
END

    my $header_count = 0;
    foreach my $txt ('Cohesion Group', 'Gene Nbd', 'Genome Name', 'SEED ID', 'PubMed', 'PDB' )
    {
	$header_count++;
	print qq(<th class="column_1_$header_count">$txt</th>\n);
    }

    print <<END;
</tr>
</thead>

<tbody>
END

    my $cg_count = 0;
#    foreach my $group ( grep {/group00007/} sort keys %$group_fids )
    foreach my $group ( sort keys %$group_fids )
    {
	$cg_count++;

	my $fids = $group_fids->{$group};
	
	my $gn_link = qq(<a href="http://seed-viewer.theseed.org/?page=Regions&feature=) .                                                             
                      join('&feature=', @{ $group_fids->{$group} }) .
                      qq(" title="gene neighborhoods" target="_blank"><img src="$misc_url/images/gene_nbd.png" border=0 height=25></a>);

	foreach my $fid ( sort @$fids )
	{
	    my $fid_link = &link_to_peg($fid);

	    my $org = $fig->genus_species(&FIG::genome_of($fid));

	    my $pubmed_link = '';
	    if ( exists $lit->{$fid} ) {
		$pubmed_link = join(', ', map {&dlit_link($_)} @{ $lit->{$fid} });
	    }

	    my $pdb_link = '';
	    if ( exists $pdb->{$fid} ) {
		$pdb_link = join(', ', map {&pdb_link($_)} @{ $pdb->{$fid} });
	    }

	    if ( $cg_count%2 != 0 ) {
		print "<tr>\n";
	    } else {
		print qq(<tr bgcolor="\#D6FFAF">\n);
	    }

	    print qq(<td class="column_1_1">$group</td>\n);
	    print qq(<td class="column_1_2">$gn_link</td>\n);
	    print qq(<td class="column_1_3">$org</td>\n);
	    print qq(<td class="column_1_4">$fid_link</td>\n);
	    print qq(<td class="column_1_5">$pubmed_link</td>\n);
	    print qq(<td class="column_1_6">$pdb_link</td>\n);
	    print "</tr>\n\n";
	}
    }

    print "</tbody>\n";
    print "</table>\n";

    print "<br>\n";
    print qq(<a title="Dynamic Table - A javascript table sort widget." href="http://dynamictable.com">Quick and easy table sorting powered by Dynamic Table</a>\n);

    print "</body>\n";
    print "</html>\n";

    exit;
}

sub merge {
    my($fig,$cgi,$html,$project) = @_;

    my $pad_left = $cgi->param('pad_left');
    my $pad_right = $cgi->param('pad_right');

    my $dir = "$FIG_Config::global/JensenAnalysis/$project";

    if (! -s "$dir/index")
    {
	push(@$html,$cgi->h2("This project has not reached the point where a merge makes sense"));
	return;
    }

    my @roles = map { chomp; [split(/\t/,$_)] } `cat $dir/index`;
    &FIG::verify_dir("$dir/Merged");
    open(SEQS,">$dir/Merged/seqs.fasta")
	|| die "could not open $dir/Merged/seqs.fasta";

    my $err = 0;
    foreach my $tuple (@roles)
    {
	my($roleI,$role) = @$tuple;
	$err = $err || &gather_role($cgi,$html,$project,$roleI,\*SEQS);
    }
    close(SEQS);

    if ($err)
    {
	push(@$html,$cgi->h2("You have errors that prevented the merge from being done"));
    }
    else
    {
	system "rm -r $dir/Merged/Reps $dir/Merged/reps.fasta";
	&FIG::run("$FIG_Config::bin/representative_sequences -s 0.9 -d $dir/Merged/Reps < $dir/Merged/seqs.fasta > $dir/Merged/reps.fasta");
	&FIG::run("$FIG_Config::bin/trim_sequences -pad_left=$pad_left -pad_right=$pad_right 1.0e-5 0.7 < $dir/Merged/reps.fasta > $dir/Merged/trimmed.fasta; $FIG_Config::bin/align_seqs < $dir/Merged/trimmed.fasta > $dir/Merged/ali.fasta");

	if (-s "$dir/Merged/aliases")
	{
	    &rename_ids("$dir/Merged/aliases","$dir/Merged/ali.fasta","$dir/Merged/renamed.fasta");
	}
	else
	{
	    system "cp $dir/Merged/ali.fasta $dir/Merged/renamed.fasta";
	}
	push(@$html,`$FIG_Config::bin/alignment_to_html -t < $dir/Merged/renamed.fasta`);
	push(@$html,"<br><br>",&table_of_dropped($fig,$cgi,"$dir/Merged/seqs.fasta","$dir/Merged/ali.fasta"));
    }
}

sub table_of_dropped {
    my($fig,$cgi,$seqsF,$aliF) = @_;

    my $col_hdrs = ['PEG that got dropped','Genome','Function'];
    my $tab = [];

    my %ali = map { ($_ =~ /^>(\S+)/) ? ($1 => 1) : () } `cat $aliF`;
    foreach my $peg (map { ($_ =~ /^>(\S+)/) ? $1 : () } `cat $seqsF`)
    {
	if (! $ali{$peg})
	{
	    my $genome = &FIG::genome_of($peg);
	    push(@$tab,[&link_to_peg($peg),$fig->genus_species($genome),scalar $fig->function_of($peg)]);
	}
    }
    return &HTML::make_table($col_hdrs,$tab,'PEGs that got Dropped');
}

sub gather_role {
    my($cgi,$html,$project,$roleI,$fh) = @_;

    my $err = 0;
    my $roleD = &role_dir($project,$roleI);
    my $dir = "$roleD/split_info";
    my @sizes = map { $_=~ /^(\d+)\s+(\d+)/; [$1,$2] } `cat $dir/set.sizes`;
    foreach $set (map { $_->[0] } @sizes)
    {
	my $setD  = "$roleD/$set";
	if (-s "$setD/renamed.fasta")
	{
	    $err = $err || &gather_set($cgi,$html,$setD,$fh);
	}
	else
	{
	    push(@$html,$cgi->h2("$set in Role $roleI has not been aligned yet"));
	    $err = 1;
	}
    }
    return $err;
}

sub gather_set {
    my($cgi,$html,$setD,$fh) = @_;

    my @seqs = &gjoalignment::read_fasta("$setD/seqs.fasta");
    foreach my $tuple (@seqs)
    {
	my($id,$comment,$seq) = @$tuple;
	print $fh ">$id $comment\n$seq\n";
    }
}


__DATA__
<h1> The Plan</h1>

The overall plan is to provide support for clarifying the evolutionary history
of sets of proteins that all implement a common functional role (sometimes fused
with other domains that play distinct roles).  At this point in time, we are just learning
the details of what can be learned -- we are working with sequences from a fraction of a percent of
what might be called distinct species (or distinct "operational taxonomic unit", or whatever).
We simply have not got a lot of data, but a great deal more will be arriving in the next few
years,  Our goal here is to help offer a framework where people can study individual functional roles
in increasing detail.  Our belief is that this detailed study will set the stage for drawing
the insights needed to move forward.  We will try to provide a framework for supporting
detailed reviews covering specific functional roles.

<h2>The Deliverables</h2>

What does someone studying a functional role need to try to unravel the evolutionary
history of the genes and proteins that implement it?
Minimally, we are trying to provide the following:

<ol>
<li>
First, we need to gather a set of sequences that we believe implement the functional role.
We plan on excluding truncated and frameshifted sequences.

<li>
We will split the set of sequences into groups that will be represented in the overall analysis 
by a single <i>representative sequence</i>.

<li>
We will supply a <i>trimmed alignment of the representative sequences</i>
in which extra domains have been deleted and the highly variable
parts of the ends have been removed.

<li>
For each group containing more than a single sequence, we will provide a trimmed alignment
of the set of sequences.  Each such group represents what is believed to be a distuishable,
clearly separable, subtree.  That is, they are a phylogenetically related group of sequences
that (through some unspecified means) are separable from other sequences in the collection,
and we choose to represent the group by a single sequence.  Jensen calls these groups <i>cohesion groups</i>. 

<li>
We will provide phylogenetic trees for the overall set of representative sequences and for each cohesion group.

<li>
We will support the identification of which sequences have associated crystal structures.

<li>

We will support the collection of detailed sets of literature references attached to individual sequences.

<li>
We will provide a table that integrates the data a <i>dynamic table</i> that supports a very simple, fairly powerful
user interface to study detailed events.

</ol>

<h2>The Steps </h2>

<ol>
<li>
We begin by gathering the sequences that are attached to specific functional roles within subsystems
maintained in the SEED.  Individual sequences that are believed to contain frameshifts or truncations
should be marked by a comment attached to the function.  We provide a tool that breaks the overall set
into subsets of sequences that all look more-or-less similar.  Unusually short (or long) sequences are
broken into separate sets for examination.  The first stage of the analysis involves just doing the manual
effort needed to weed out the bad sequences.

<li>
The next step is to construct abbreviations or acronyms to be used for each sequence.  The SEED
identifiers are not useful to a biologist who needs clues as to genus and species.  We construct
identifiers that can be used as reasonable aliases of the SEED identifiers.

<li>
We collect the literature references and the links to known crystal structures.

<li>
Then, we build an initial alignment be
<ul>
<li>
collapsing sets of very close sequences into groups that will be represented by single representatives,
<li> gathering and trimming the representative sequences,
<li> foming an initial alignment, and
<li> doing a visual inspection to see if we got the correct domains (trimming off extra domains).
</ul>
<li>The representative sequences, groups that are represented, and the initial alignment represent the starting point
for forming the actual <i>cohesion groups</i>.
We now begin the iterative process that produces the final cohesion groups.
<li>
Having created the cohesion groups, we proceed to support manual selection of the representative sequences 
(allowing the user to pick sequences that have unusually large amounts of associated literature, crystal structures, or whatever).
<li>
We then create the <i>dynamic table</i> which supports the visual analysis of sets of sequences,
their chromosomal contexts, fusion events, and so forth.
<li>We construct the overall tree of the representative sequences.
<li>We construct alignments for all cohesion groups containing two or more sequences, and we construct
trees for those containing four or more sequences.
</ol>

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3