[Bio] / FigKernelPackages / P2Pupdate.pm Repository:
ViewVC logotype

View of /FigKernelPackages/P2Pupdate.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.36 - (download) (as text) (annotate)
Mon Aug 9 14:38:55 2004 UTC (15 years, 7 months ago) by overbeek
Branch: MAIN
Changes since 1.35: +9 -4 lines
export only current assignments (not all that were made)

package P2Pupdate;

use strict;

use FIG_Config;
use FIG;
use Carp;
use Data::Dumper;
use Cwd;

=pod

=head1 updating code

This routine packages what is needed to upgrade an older system to the 
current code.  Code releases are numered

     p1n1.p2n2.p3n3...

where "." is added at the point the code moved to another branch of
the tree.  FIG, who provided the initial release of the SEED, will
number all of their code releases as 

       FIGn  

where n is an integer. Suppose that between releases 13 and 14 a
second group (which we will term "Idiots" for convenience) took
release 13 and wished to branch the code tree.  At that point, they
would name their first release as

	FIG13.Idiots1

We are, of course, being both cavalier and nasty when we make such a
reference.  We do, however, wish to express the view that it will
benefit everyone to attempt to reconcile differences and maintain a
single code progression as long as possible.  There are often good
reasons to part ways, but we urge people to think carefully before
taking such a step.

Two code releases 

    i1.i2.i3...in
and j1.j2.j3...jm with m <= n

are compatible iff 

    ip == jp  for p < m, and
    jm and im have the same "source" and
    jm <= im

A new code release must have the property that it can bring any
"older" compatible release up to its release.

Note that there is an issue relating to the code to build/install packages.
Since a system may be radically restructured between releases of code, the
code to build a "package" and the code to "install" a package are radically 
separated.  For example, the code in P2Pupdate.pm for building an assignment
package and the code for installing an assignment package both apply to the
release of code current on the system containing P2Pupdate.pm.  In fact, the
code releases may be quite different on two synchronizing systems.

To make things work the following rules must be observed:

    1. a code release is a tar file containing VERSION, Packages,
       bin/ToolTemplates, and CGI/ToolTemplates.  The installing system needs
       to place these at the appropriate spots, and then run bring_system_up_to_date,
       which is supposed to do any required restructuring.

    2. an assignments package is a tar file containing a single directory.  The directory
       contains subdirectories -- one per genome.  Each genome subdirectory contains zero
       or more files.  The name of the file is the "user" and the contents will be the
       assignments made by that user.

    3. an annotations package is a tar file containing a single directory.  The files in 
       the directory are named by genome. They contain the annotations for the genome.

=cut

=pod

=head1 what_code_do_I_have

usage: &what_code_do_I_have($fig_base)

This just returns the current version of the code.

=cut

sub what_code_do_I_have {
    my($fig_base) = @_;

    my $version = &FIG::file_read("$fig_base/VERSION");
    chomp $version;
    return $version;
}

=pod

=head1 updatable_code

usage: &updatable_code_code($v1,$v2)

    This just returns true iff the two versions of code are compatible and $v1
    is "more recent".

=cut

sub updatable_code {
    my($v1,$v2) = @_;
    my($i,$v1p,$v1n,$v2p,$v2n);

    my @v1 = split(/\./,$v1);
    my @v2 = split(/\./,$v2);
    if (@v1 < @v2) { return 0 }

    for ($i=0; ($i < $#v2) && ($v1[$i] eq $v2[$i]); $i++) {}
    if ($i == $#v2)
    {
	$v1[$i] =~ /^(.*[^\d])(\d+)$/;
	$v1p = $1;
	$v1n = $2;

	$v2[$i] =~ /^(.*[^\d])(\d+)$/;
	$v2p = $1;
	$v2n = $2;

	return (($v2p eq $v1p) && ($v2n < $v1n));
    }
    return 0;
}

=pod

=head1 package_code

usage: &package_code($fig_disk,$file)

$fig_base must be an absolute filename (begins with "/") giving the FIG from which
   the updated code release will be taken.

$file must be an absolute filename where the "code package" will be built.

=cut

sub package_code {
    my($fig_disk,$file) = @_;

    &force_absolute($fig_disk);
    &force_absolute($file);
    my @tmp = &FIG::file_head("$fig_disk/CURRENT_RELEASE", 1);
    my $current_release = $tmp[0];
    chomp $current_release;

    &FIG::run("cd $fig_disk/dist/releases; tar czf $file $current_release");
}

sub force_absolute {
    my($file) = @_;

    if (substr($file,0,1) ne "/")
    {
	print "Error: Please use absolute file names (i.e., /Users/fig/... or /home/fig/...)\n";
	exit;
    }
}

=pod

=head1 install_code

usage: &install_code($fig_disk,$package)

$fig_disk must be an absolute filename (begins with "/") giving the FIG to be updated.

$package must be an absolute filename where the "code package" from which to make
    the update exists.

Note that this routine does not check that the updated code is compatible, or even less
current.  It is assumed that upper level logic is doing that.

=cut

sub install_code {
    my($fig_disk,$package) = @_;
    my $fig_base = "$fig_disk/FIG";
    &force_absolute($fig_base);
    &force_absolute($package);

    if (getcwd() !~ /FIGdisk$/) { print die "Sorry, you must run this while in $FIG_Config::fig_disk" }
	
    
    (! -d "$fig_disk/BackupCode") || &FIG::run("rm -rf $fig_disk/BackupCode");
    mkdir("$fig_disk/BackupCode",0777) || die "Could not make the BackupCode directory";
    (! -d "$fig_disk/BackupEnv") || &FIG::run("rm -rf $fig_disk/BackupEnv");
    mkdir("$fig_disk/BackupEnv",0777) || die "Could not make the BackupEnv directory";

    my $version = &what_code_do_I_have($fig_base);
    &FIG::run("cd $fig_disk; mv README install lib man env src $fig_disk/BackupEnv");
    &FIG::run("cd $fig_base; mv VERSION Packages CGI $fig_disk/BackupCode");
    print STDERR "made backups\n";

    &FIG::run("cd $fig_disk; tar xzf $package");
    print STDERR "untarred new code\n";

    &fix_config("$fig_base/Packages/FIG_Config.pm","$fig_disk/BackupCode/Packages/FIG_Config.pm");
    &FIG::run("cd $fig_base/bin; touch ToolTemplates/*/*; make all");
    &FIG::run("cd $fig_base/CGI; touch ToolTemplates/*/*; make all");
    print STDERR "installed new bin and CGI\n";

    &FIG::run("bring_system_up_to_date $version");
}

=pod

=head1 package_lightweight_code

usage: &package_lightweight_code($fig_disk,$file)

$fig_base must be an absolute filename (begins with "/") giving the FIG from which
   the updated code release will be taken.

$file must be an absolute filename where the "code package" will be built.

=cut

sub package_lightweight_code {
    my($fig_disk,$file) = @_;

    &force_absolute($fig_disk);
    &force_absolute($file);
    my @tmp = &FIG::file_head("$fig_disk/CURRENT_RELEASE", 1);
    my $current_release = $tmp[0];
    chomp $current_release;

    &FIG::run("cd $fig_disk/dist/releases; tar czf $file $current_release");
}

=pod

=head1 install_lightweight_code

usage: &install_lightweight_code($fig_disk,$package)

$fig_disk must be an absolute filename (begins with "/") giving the FIG to be updated.

$package must be an absolute filename where the "code package" from which to make
    the update exists.

Note that this routine does not check that the updated code is compatible, or even less
current.  It is assumed that upper level logic is doing that.

=cut

sub install_lightweight_code {
    my($fig_disk,$package) = @_;
    my $fig_base = "$fig_disk/FIG";
    &force_absolute($fig_base);
    &force_absolute($package);

    if (! mkdir("$fig_disk/Tmp$$",0777))
    {
	print "Error: could not make $fig_disk/Tmp$$\n";
	exit;
    }

    &FIG::run("cd $fig_disk/Tmp$$; tar xzf $package");
    if (! opendir(TMP,"$fig_disk/Tmp$$"))
    {
	print "Error: could not open $fig_disk/Tmp$$\n";
	exit;
    }

    my @rels = grep { $_ !~ /^\./ } readdir(TMP);
    closedir(TMP);
    if (@rels != 1)
    {
	print "Error: Bad code package: $package\n";
	exit;
    }

    my $new_release = $rels[0];
    if (-d "$fig_disk/dist/releases/$new_release")
    {
	print "Error: $new_release already exists; we are doing nothing\n";
	exit;
    }

    &FIG::run("mv $fig_disk/Tmp$$/$new_release $fig_disk/dist/releases");
    &FIG::run("rm -rf $fig_disk/Tmp$$");

    #
    # Ugh. For now, find the arch in the fig config file $fig_disk/config/fig-user-env.sh"
    #

    my $arch;
    open(FH, "<$fig_disk/config/fig-user-env.sh");
    while (<FH>)
    {
	if (/RTARCH="(.*)"/)
	{
	    $arch = $1;
	    last;
	}
    }
    close(FH);

    if ($arch eq "")
    {
	die "Couldn't determine SEED install architecture, not switching to release.";
    }
    
    $ENV{RTARCH} = $arch;

    #
    # Need to put the ext_bin in the path.
    #

    $ENV{PATH} .= ":$FIG_Config::ext_bin";
	
    &FIG::run("$FIG_Config::bin/switch_to_release $new_release");
}

    
sub fix_config {
    my($new,$old) = @_;
    my($line,$i);

    my @new = &FIG::file_read($new);
    foreach $line (&FIG::file_read($old))
    {
	if ($line =~ /^(\S+)\s+\=/)
	{
	    my $var = $1;
	    my $varQ = quotemeta $var;

	    for ($i=0; ($i < $#new) && ($new[$i] !~ /^$varQ\s+\=/); $i++) {}
	    if ($i == $#new)
	    {
		splice(@new,$i,0,$line);
	    }
	    else
	    {
		splice(@new,$i,1,$line);
	    }
	}
    }
    open(NEW,">$new") || confess "could not overwrite $new";
    foreach $line (@new)
    {
	print NEW $line;
    }
    close(NEW);
}

=pod

=head1 what_genomes_will_I_sync 

usage: &what_genomes_will_I_sync($fig_base,$who)

This routine returns the list of genome IDs that you are willing to sync with $who.

=cut

sub what_genomes_will_I_sync {
    my($fig_base,$who) = @_;

# This is the promiscuous version - it will sync all genomes with anyone.

    opendir(GENOMES,"$fig_base/Data/Organisms") || die "could not open $fig_base/Data/Organisms";
    my @genomes = grep { $_ =~ /^\d+\.\d+$/ } readdir(GENOMES);
    closedir(GENOMES);
    return @genomes;
}

=pod

=head1 package_annotations

usage: &package_annotations($fig,$genomes,$file)

$genomes is a pointer to a list of genome IDs that will be exchanged.

$file must be an absolute filename where the "annotation package" will be built.

=cut

sub package_annotations {
    my($fig,$who,$date,$genomes,$file) = @_;
    my $fig_base = "$FIG_Config::fig_disk/FIG";

    if (open(ANNOTATIONS,">$file"))
    {
	my @annotations = sort { $a->[0] cmp $b->[0] } $fig->annotations_made($genomes,$who,$date);
	foreach my $x (@annotations)
	{
	    print ANNOTATIONS join("\n",@$x),"\n///\n";
	}
	print ANNOTATIONS "//\n";

	foreach my $x (@annotations)
	{
	    my $peg = $x->[0];
	    my @aliases = grep { $_ =~ /^(sp\||gi\||pirnr\||kegg\||N[PGZ]_)/ } $fig->feature_aliases($peg);
	    print ANNOTATIONS join("\t",($peg,join(",",@aliases),$fig->genus_species($fig->genome_of($peg)),scalar $fig->function_of($peg))) . "\n";
	}
	print ANNOTATIONS "//\n";

	foreach my $x (@annotations)
	{
	    my $peg;
	    ($peg,undef) = @$x;
	    my $seq = $fig->get_translation($peg);
	    &FIG::display_id_and_seq($peg,\$seq,\*ANNOTATIONS);
	}
	close(ANNOTATIONS);
    }
}


=pod

=head1 install_annotations

usage: &install_annotations($fig_disk,$package)

$fig_disk must be an absolute filename (begins with "/") giving the FIG to be updated.

$package must be an absolute filename where the "annotations package" from which to make
    the update exists.

=cut

sub install_annotations {
    my($fig,$package) = @_;
    my($user,$who,$date,$userR,@assignments,$peg,$aliases,$org,$func);
    my(%pegs,%seq_of,@seq,$peg_to,$trans_pegs,$seq,$line,@ann,$ann);
    my($genome);

    my $fig_disk = $FIG_Config::fig_disk;
    open(IN,"<$package") || die "could not open $package";
    $/ = "\n//\n";
    if (defined($line = <IN>))
    {
	my(@annotations);
	
	$line =~ s/\n\/\/\n/\n/s;
	$line =~ s/\n\/\/\/\n//s;
	@ann = split(/\n\/\/\/\n/,$line);
	foreach $ann (@ann)
	{
	    if ($ann =~ /^(fig\|\d+\.\d+\.peg\.\d+)\n(\d+)\n(\S+)\n(.*)/s)
	    {
		push(@annotations,[$1,$2,$3,$4]);
	    }
	}
	$/ = "\n";
	while ($line && defined($line = <IN>) && ($line !~ /^\/\//))
	{
	    chomp $line;
	    ($peg,$aliases,$org,$func) = split(/\t/,$line);
	    $pegs{$peg} = [$aliases,$org,$func];
	}
    
	if ($line) { $line = <IN> }
	while (defined($line) && ($line !~ /^\/\//))
	{
	    if ($line =~ /^>(\S+)/)
	    {
		$peg = $1;
		@seq = ();
		while (defined($line = <IN>) && ($line !~ /^[>\/]/) && ($line !~ /^\/\//))
		{
		    push(@seq,$line);
		    $line = <IN>;
		}
		$seq = join("",@seq);
		$seq =~ s/[ \n\t]//gs;
		$seq_of{$peg} = uc $seq;
	    }
	}
	close(IN);
	$trans_pegs = $fig->translate_pegs(\%pegs,\%seq_of);
	@annotations = sort { ($a->[0] cmp $b->[0]) or ($a->[1] <=> $b->[1]) }
                       map { ($peg = $trans_pegs->{$_->[0]}) ? [$peg,$_->[1],$_->[2],$_->[3]] : () }
	               @annotations;

	if (-d "$fig_disk/BackupAnnotations") { system "rm -rf $fig_disk/BackupAnnotations" }
	mkdir("$fig_disk/BackupAnnotations",0777);
	mkdir("$fig_disk/BackupAnnotations/New",0777);
	my $i;
	for ($i=0; ($i < @annotations); $i++)
	{
	    if (($i == 0) || ($fig->genome_of($annotations[$i]->[0]) ne $fig->genome_of($annotations[$i-1]->[0])))
	    {
		if ($i != 0)
		{
		    close(OUT);
		}
		$genome = $fig->genome_of($annotations[$i]->[0]);
		open(OUT,">$fig_disk/BackupAnnotations/New/$genome")
		    || die "could not open $fig_disk/BackupAnnotations/New/$genome";
	    }
	    print OUT join("\n",@{$annotations[$i]}),"\n//\n";
	}
	if ($i > 0) { close(OUT) }
    }

    opendir(TMP,"$fig_disk/BackupAnnotations/New") || die "could not open $fig_disk/BackupAnnotations/New";
    my @genomes = grep { $_ =~ /^\d+\.\d+$/ } readdir(TMP);
    closedir(TMP);
    foreach $genome (@genomes)
    {
	next if (! -d "$fig_disk/FIG/Data/organisms/$genome");

	print STDERR "installing $fig_disk/FIG/Data/Organisms/$genome/annotations\n";
	if (-s "$fig_disk/FIG/Data/Organisms/$genome/annotations")
	{
	    &FIG::run("cp -p $fig_disk/FIG/Data/Organisms/$genome/annotations $fig_disk/BackupAnnotations/$genome");
	    &FIG::run("$FIG_Config::bin/merge_annotations $fig_disk/BackupAnnotations/$genome $fig_disk/BackupAnnotations/New/$genome > $fig_disk/FIG/Data/Organisms/$genome/annotations");
	}
	else
	{
	    &FIG::run("cp $fig_disk/BackupAnnotations/New/$genome $fig_disk/FIG/Data/Organisms/$genome/annotations");
	}
	chmod 02777,"$fig_disk/FIG/Data/Organisms/$genome/annotations";
    }
    &FIG::run("$FIG_Config::bin/index_annotations");
}

=pod

=head1 restore_annotations

usage: &restore_annotations($fig_disk);

$fig_disk must be an absolute filename (begins with "/") giving the FIG to be updated.

=cut

sub restore_annotations {
    my($fig_disk) = @_;

    &force_absolute($fig_disk);
    (-d "$fig_disk/BackupAnnotations") || die "could not find an active backup";
    opendir(TMP,"$fig_disk/BackupAnnotations") || die "could not open $fig_disk/BackupAnnotations";
    my @genomes = grep { $_ =~ /^\d+\.\d+$/ } readdir(TMP);
    closedir(TMP);
    foreach my $genome (@genomes)
    {
	unlink("$fig_disk/FIG/Data/Organisms/$genome/annotations");
	&FIG::run("cp $fig_disk/BackupAnnotations/$genome $fig_disk/FIG/Data/Organisms/$genome/annotations");
	system "chmod 2777 $fig_disk/FIG/Data/Organisms/$genome/annotations";
    }
    &FIG::run("$FIG_Config::bin/index_annotations");
}

=pod

=head1 package_aassignments

usage: package_assignments($fig,$user,$who,$date,$genomes,$file)

$user designates the user wishing to get the assignments

$who designates whose assignments you want (defaults to "master")

$date if given indicates a point in time (get assignments after that point)

$genomes is a pointer to a list of genome IDs that will be exchanged.

$file must be an absolute filename where the "assignment package" will be built.

=cut

sub package_assignments {
    my($fig,$user,$who,$date,$genomes,$file) = @_;
    my($genome,$x,$org,$curr,$peg);
    $who   = $who ? $who : "master";
    $date  = $date ? $date : 0;

    if (open(ASSIGNMENTS,">$file"))
    {
	print ASSIGNMENTS "$user\t$who\t$date\n";
	my @assignments = sort { $a->[0] cmp $b->[0] } $fig->assignments_made_full($genomes,$who,$date);
	my @curr_assignments = ();
	foreach $x (@assignments)
	{
	    my($peg, $function, undef, undef) = @$x;
	    if ($function eq $fig->function_of($peg,$who))
	    {
		print ASSIGNMENTS join("\t", $peg, $function),"\n";
		push(@curr_assignments,$x);
	    }
	}
	print ASSIGNMENTS "//\n";

	foreach $x (@curr_assignments)
	{
	    ($peg,undef) = @$x;
	    my @aliases = grep { $_ =~ /^(sp\||gi\||pirnr\||kegg\||N[PGZ]_)/ } $fig->feature_aliases($peg);

	    my $alias_txt = join(",",@aliases);
	    my $gs_txt = $fig->genus_species($fig->genome_of($peg));
	    my $func_txt = scalar $fig->function_of($peg);
	    
	    print ASSIGNMENTS join("\t",($peg,
					 $alias_txt,
					 $gs_txt,
					 $func_txt)) . "\n";
	}
	print ASSIGNMENTS "//\n";

	foreach $x (@curr_assignments)
	{
	    ($peg,undef) = @$x;
	    my $seq = $fig->get_translation($peg);
	    &FIG::display_id_and_seq($peg,\$seq,\*ASSIGNMENTS);
	}
	close(ASSIGNMENTS);
    }
}

=pod

=head1 install_assignments

usage: &install_assignments($package)

$package must be a filename where the "assignments package" from which to make
    the assignment set exists

=cut

sub install_assignments {
    my($fig,$package,$make_assignments) = @_;
    my($user,$who,$date,$userR,@assignments,$peg,$aliases,$org,$func);
    my(%pegs,%seq_of,@seq,$peg_to,$trans_pegs,$seq);

    open(IN,"<$package") || die "could not open $package";
    my $line = <IN>;
    chomp $line;
    ($user,$who,$date) = split(/\t/,$line);
    $userR = $user;
    $userR =~ s/^master://;

    while (defined($line = <IN>) && ($line !~ /^\/\//))
    {
	if ($line =~ /^(fig\|\d+\.\d+\.peg\.\d+)\t(\S.*\S)/)
	{
	    push(@assignments,[$1,$2]);
	}
    }
    while ($line && defined($line = <IN>) && ($line !~ /^\/\//))
    {
	chomp $line;
	($peg,$aliases,$org,$func) = split(/\t/,$line);
	$pegs{$peg} = [$aliases,$org,$func];
    }
    
    if ($line) { $line = <IN> }
    while (defined($line) && ($line !~ /^\/\//))
    {
	if ($line =~ /^>(\S+)/)
	{
	    $peg = $1;
	    @seq = ();
	    while (defined($line = <IN>) && ($line !~ /^[>\/]/) && ($line !~ /^\/\//))
	    {
		push(@seq,$line);
		$line = <IN>;
	    }
	    $seq = join("",@seq);
	    $seq =~ s/[ \n\t]//gs;
	    $seq_of{$peg} = uc $seq;
	}
    }
    close(IN);
    $trans_pegs = $fig->translate_pegs(\%pegs,\%seq_of);

    &FIG::verify_dir("$FIG_Config::data/Assignments/$userR");
    my $file = &FIG::epoch_to_readable($date) . ":$who:imported";
    $file =~ s/\//-/g;

    if (! $make_assignments)
    {
	open(OUT,">$FIG_Config::data/Assignments/$userR/$file") 
	    || die "could not open $FIG_Config::data/Assignments/$userR/$file";
    }

    foreach $peg (keys(%$trans_pegs))
    {
	$peg_to = $trans_pegs->{$peg};
	$func   = $pegs{$peg}->[2];
	if ($fig->function_of($peg_to) ne $func)
	{
	    if ($make_assignments)
	    {
		if ($user =~ /master:(.*)/)
		{
		    $userR = $1;
		    $fig->assign_function($peg_to,"master",$func,"");
		    if ($userR ne "none")
		    {
			$fig->add_annotation($peg_to,$userR,"Set master function to\n$func\n");
		    }
		}
		else
		{
		    $fig->assign_function($peg_to,$user,$func,"");
		    if ($user ne "none")
		    {
			$fig->add_annotation($peg_to,$user,"Set function to\n$func\n");
		    }
		}
	    }
	    else
	    {
		print OUT "$peg_to\t$func\n";
	    }
	}
    }
    if (! $make_assignments)
    {
	close(OUT);
	if (! -s "$FIG_Config::data/Assignments/$userR/$file") { unlink("$FIG_Config::data/Assignments/$userR/$file") }
    }
}

=pod

=head1 package_translation_rules

usage: &package_translation_rules($fig_base,$file)

$fig_base must be an absolute filename (begins with "/") giving the FIG from which
   the updated code release will be taken.

$file must be an absolute filename where the "translation_rules package" will be built.

=cut

sub package_translation_rules {
    my($fig_base,$file) = @_;

    &FIG::run("cp $fig_base/Data/Global/function.synonyms $file");
}

=pod

=head1 install_translation_rules

usage: &install_translation_rules($fig_disk,$from,$package)

$fig_disk must be an absolute filename (begins with "/") giving the FIG to be updated.

$package must be an absolute filename where the "translation_rules package" from which to make
    the update exists.

=cut

sub install_translation_rules {
    my($fig_disk,$from,$package) = @_;

    my $file = "$fig_disk/FIG/Data/Global/function.synonyms";
    &force_absolute($fig_disk);
    if (-d "$fig_disk/BackupTranslation_Rules") { system "rm -rf $fig_disk/BackupTranslation_Rules" }
    mkdir("$fig_disk/BackupTranslation_Rules",0777);
    chmod 02777,"$fig_disk/BackupTranslation_Rules";
    if (-s $file)
    {
	&FIG::run("cp $file $fig_disk/BackupTranslation_Rules");
    }
    &FIG::run("$FIG_Config::bin/merge_translation_rules $fig_disk/BackupTranslation_Rules/function.synonyms $package $from > $file");
    chmod 02777,$file;
}

=pod

=head1 restore_translation_rules

usage: &restore_translation_rules($fig_disk);

$fig_disk must be an absolute filename (begins with "/") giving the FIG to be updated.

=cut

sub restore_translation_rules {
    my($fig_disk) = @_;

    &force_absolute($fig_disk);

    my $file = "$fig_disk/FIG/Data/Global/function.synonyms";
    (-s "$fig_disk/BackupTranslation_Rules/function.synonyms") || die "could not find an active backup";
    if (-s "$fig_disk/BackupTranslation_Rules/function.synonyms")
    {
	&FIG::run("cp $fig_disk/BackupTranslation_Rules/function.synonyms $file");
	chmod 02777, $file;
    }
}

sub package_subsystems {
    my($fig,$file,$just_exchangable) = @_;
    my($ssa);

    $just_exchangable = defined($just_exchangable) ? $just_exchangable : 1;
    my @exchangable = grep { (! $just_exchangable) || $fig->is_exchangable_subsystem($_) }
                      $fig->all_subsystems;

    my $fig = new FIG;
    if ((@exchangable > 0) && open(SUB,">$file"))
    {
	foreach $ssa (@exchangable)
	{
#	    print STDERR "writing $ssa to $file\n";
	    my($spreadsheet,$notes) = $fig->exportable_subsystem($ssa);
	    print SUB join("",@$spreadsheet),join("",@$notes),"########################\n";
	}
	close(SUB);
    }
    else
    {
#	print STDERR &Dumper(\@exchangable,$file);
    }
}

sub install_subsystems {
    my($fig,$package) = @_;

    &FIG::run("$FIG_Config::bin/import_subsystems master last_release < $package");
}

package SubsystemFile;

use Data::Dumper;
use strict;

sub new
{
    my($class, $qdir, $file, $fig) = @_;
    my(@info);

    @info = FIG::file_head($file, 4);
    if (!@info)
    {
	warn "Cannot open $file\n";
	return undef;
    }

    chomp(@info);

    my $name = $info[0];
    my $version = $info[1];
    my $exc = $info[2];

    my @c = split(/\t/, $info[3]);

    my $curator = $c[1];

    my $self = {
	qdir => $qdir,
	file => $file,
	name => $name,
	version => $version,
	exchangable => $exc,
	curator => $curator,
	fig => $fig,
    };

    return bless($self, $class);
		  
}

#
# Load the export file into internal data structures.
#
# It's structured as
#
# name
# version
# exchangable
# creation date <tab> curator <tab> "started"
# //
# roles
# //
# subsets
# //
# spreadsheet
# //
# assignments
# //
# sequences
# //
# notes
# //
#
# Subsections:
#
# roles:
#
#    abbr <tab> role-name
#
# subsets has meaning to the acutal subsystems, but we'll use it as a string.
#
# spreadsheet:
#
#    genome <tab> variant <tab> items
#
# Where items is tab-separated columns, each of which is comma-separated peg number in the genome
#
# assignments:
#
#  fid <tab> aliases <tab> organism <tab> function
#
# sequences:
#
#  list of fasta's
#
# notes:
#
#  plain text
#
sub load
{
    my($self) = @_;

    my $fig = $self->{fig};

    my($fh);

    open($fh, "<$self->{file}") or die "Cannot open $self->{file}: $!\n";

    #
    # Skip intro section
    #

    while (<$fh>)
    {
	chomp;
	last if m,^//,;
    }

    #
    # Read the roles.
    #


    my $nroles;
    
    while (<$fh>)
    {
	last if m,^//,;
	
	$self->{role_text} .= $_;
	chomp $_;

	my($abbr, $role) = split(/\t/);

	warn "Have role $role\n";
	
	push(@{$self->{roles}}, $role);
	push(@{$self->{abbrs}}, $abbr);

	$nroles++;
    }

    #
    # Read in subsets as a string.
    #

    while (<$fh>)
    {
	last if m,^//,;

	$self->{subsets_text} .= $_;
    }

    #
    # Read the spreadsheet.
    #

    while (<$fh>)
    {
	last if m,^//,;

	$self->{spreadsheet_text} .= $_;

	chomp;

	my($genome, $variant, @items) = split(/\t/, $_, $nroles + 2);

	push(@{$self->{genomes}}, $genome);

	my $gobj = GenomeObj->new($self, $fig, $genome, $variant, [@items]);

	$self->{genome_objs}->{$genome} = $gobj;
    }

    #
    # Read PEG info
    #

    while (<$fh>)
    {
	last if m,^//,;

	chomp;

	my ($peg, $aliases, $org, $func) = split(/\t/);

	push(@{$self->{pegs}}, [$peg, $aliases, $org, $func]);
    }

    #
    # Read sequence info
    #

    my($cur, $cur_peg);

    while (<$fh>)
    {
	if (/^>(fig\|\d+\.\d+\.peg\.\d+)/)
	{
	    if ($cur)
	    {
		$cur =~ s/\s+//gs;
		$self->{peg_seq}->{$cur_peg} = $cur;
	    }
	    $cur_peg = $1;
	    $cur = '';
	}
	elsif (m,^//,)
	{
	    $cur =~ s/\s+//gs;
	    $self->{peg_seq}->{$cur_peg} = $cur;
	    last;
	}
	else
	{
	    $cur .= $_;
	}
    }

    #
    # Read notes as a string
    #

    while (<$fh>)
    {
	last if m,^//,;

	$self->{notes_txt} .= $_;
    }
	    
}

#
# Analyze this subsystem for compatibility with this SEED install.
#
# Returns three lists:
#
# A major conflict list, consisting of tuples
# [$ss_peg, $ss_func, $loc_peg, $loc_func, $subs] where $ss_peg
# is the peg in the subsystem being analyzied, and $ss_func is
# its assigned function in that subsystem. $loc_peg is the peg
# in the local SEED, and $loc_func its local assignment. $subs is
# the list of pairs [$subsystem_name, $role] denoting the subsystem(s)
# that $loc_peg particpates in.
#
# A conflict is flagged if the local function is different than
# the one being imported, and if the local peg is in a subsystem.
#
# A minor conflict list, consisting of tuples [$ss_peg, $ss_func, $loc_peg, $loc_func].
#
#
# The second list is a list of subsystem pegs that do not have
# a local equivalent. Each entry is a triple
# [peg, orgname, function].
#

sub analyze
{
    my($self) = @_;
    my $fig = $self->{fig};

    #
    # First we map the PEGs in this subsystem to PEGs in the
    # local SEED.
    #
    # translate_pegs requires a hash of peg->[aliases] as the first argument,
    # and a hash of peg->sequence as the second argument.
    #

    my %pegs;
    my %seqs_of;

    for my $pegent (@{$self->{pegs}})
    {
	my($peg, $aliases, $org, $func) = @$pegent;
	$pegs{$peg} = [$aliases, $org, $func];
	$seqs_of{$peg} = $self->{peg_seq}->{$peg};
    }

    my $tran_peg = $fig->translate_pegs(\%pegs, \%seqs_of);

    #
    # tran_peg is now a hash from subsystem_peg->local_peg
    #

    #
    # Write the translations out to a file in the queue directory
    # for use during installation.
    #

    {
	open(my $fh, ">$self->{qdir}/peg_translation");
	for my $p (keys(%$tran_peg))
	{
	    my $tp = $tran_peg->{$p};
	    print $fh "$p\t$tp\n";
	}
	close($fh);
    }
    
    #
    # Now we walk the PEGs, determining a) which are missing
    # in the local SEED, and b) which have a conflicting assignment.
    #

    my($conflict, $minor_conflict, $missing);
    $conflict = [];
    $missing = [];

    for my $pegent (@{$self->{pegs}})
    {
	my($ss_peg, undef, $ss_org, $ss_func) = @$pegent;
	
	if (my $loc_peg = $tran_peg->{$ss_peg})
	{
	    my $loc_func = $fig->function_of($loc_peg);
	    my @subs = $fig->subsystems_for_peg($loc_peg);

	    #
	    # If the functions don't match, it's a conflict.
	    # If the local function is in a subsystem, it's a major
	    # conflict. If it's not, it's a minor conflict.
	    #
	    
	    if ($loc_func ne $ss_func)
	    {
		push(@$conflict, [$ss_peg, $ss_func, $loc_peg, $loc_func, [@subs]]);
	    }
	}
	else
	{
	    push(@$missing, [$ss_peg, $ss_org, $ss_func]);
	}
    }

    return ($conflict, $missing);
}


sub name
{
    my($self) = @_;
    return $self->{name};
}


sub version
{
    my($self) = @_;
    return $self->{version};
}

sub exchangable
{
    my($self) = @_;
    return $self->{exchangable};
}

sub curator
{
    my($self) = @_;
    return $self->{curator};
}

package GenomeObj;

sub new
{
    my($class, $subfile, $fig, $genome, $variant, $items) = @_;

    my $self = {
	fig => $fig,
	subfile => $subfile, 
	genome => $genome,
	variant => $variant,
	items => $items,
    };
    return bless($self, $class);
	
}


1

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3