[Bio] / FigKernelScripts / import_similarity_bundle.pl Repository:
ViewVC logotype

View of /FigKernelScripts/import_similarity_bundle.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Tue Feb 5 02:34:33 2008 UTC (11 years, 9 months ago) by parrello
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, rast_rel_2008_06_18, myrast_rel40, rast_rel_2008_06_16, mgrast_dev_05262011, rast_rel_2008_12_18, mgrast_dev_04082011, rast_rel_2008_07_21, rast_rel_2010_0928, rast_2008_0924, mgrast_version_3_2, mgrast_dev_12152011, rast_rel_2008_04_23, mgrast_dev_06072011, rast_rel_2008_09_30, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, rast_rel_2011_0119, mgrast_rel_2008_0625, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, rast_rel_2008_10_09, mgrast_dev_04012011, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, rast_rel_2010_0827, mgrast_rel_2008_1110, myrast_33, rast_rel_2011_0928, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, mgrast_dev_04052011, mgrast_dev_02222011, rast_rel_2009_03_26, mgrast_dev_10262011, rast_rel_2008_11_24, rast_rel_2008_08_07, HEAD
Changes since 1.2: +5 -3 lines
Fixed a POD error.

#
# Copyright (c) 2003-2006 University of Chicago and Fellowship
# for Interpretations of Genomes. All Rights Reserved.
#
# This file is part of the SEED Toolkit.
# 
# The SEED Toolkit is free software. You can redistribute
# it and/or modify it under the terms of the SEED Toolkit
# Public License. 
#
# You should have received a copy of the SEED Toolkit Public License
# along with this program; if not write to the University of Chicago
# at info@ci.uchicago.edu or the Fellowship for Interpretation of
# Genomes at veronika@thefig.info or download a copy from
# http://www.theseed.org/LICENSE.TXT.
#


use strict;
use FIG;
use File::Copy;
use FileLocking;
use Fcntl ':flock';
use FileHandle;

=pod 

=head2 NAME

import_similarity_bundle - import computed sim bundle

=head2 SYNOPSIS

import_similarity_bundle bundle-dir

=head2 DESCRIPTION

B<import_similarity_bundle> imports the result of a sims computation. These results are
structured as a set of files containing augmented similarities as created by 
process_new_sims(1).

The import process takes the following actions:

=over 4

=item * 

Copies the similarity files to FIG/Data/NewSims.

=item *

Removes the entries in FIG/Data/Global/pending_similarities for the sims we are installing.

=item *

Invokes insert_dynamic_sims to load the sims into the relational database.

=item *

Creates entries in the SEED postprocessing job queues for all the ids in the sims installed.

=back

=cut

my $fig = new FIG;

my $usage = "import_similarity_bundle bundle-dir";

my @postprocess_job_queues = qw(auto_assign pchs pins ifc);

@ARGV == 1 or die $usage;

my $sim_dir = shift(@ARGV);

if (!-d $sim_dir)
{
    die "Sim bundle directory $sim_dir not found\n";
}

check_bundle($sim_dir);
#update_pending_list($sim_dir);
#copy_to_new_sims($sim_dir);
#insert_dynamic($sim_dir);
update_postprocess_list($sim_dir);

sub check_bundle
{
    my($sim_dir) = @_;

    if (! -f "$sim_dir/ids")
    {
	die "Bundle $sim_dir missing ids file\n";
    }

    #
    # Sims files are in the sims subdirectory  of the bundle we're given.
    #

    my @sims = <$sim_dir/sims/sim*>;

    if (@sims == 0)
    {
	die "Bundle $sim_dir missing sim files\n";
    }

    for my $sim_file (@sims)
    {
	if (! -f $sim_file)
	{
	    die "Sim file $sim_file not a plain file\n";
	}
	if (open(S, "<$sim_file"))
	{
	    close(S);
	}
	else
	{
	    die "Cannot open $sim_file for reading: $!\n";
	}
    }
}

sub copy_to_new_sims
{
    my($sim_dir) = @_;

    #
    # Sims files are in the sims subdirectory  of the bundle we're given.
    #

    &FIG::verify_dir("$FIG_Config::data/NewSims");

    opendir(D, "$sim_dir/sims");
    for my $sim_file (readdir(D))
    {
	my $path = "$sim_dir/sims/$sim_file";
       
	next unless $sim_file =~ /^sim/ and -f $path;
	
	copy($path, "$FIG_Config::data/NewSims/$sim_file")  or die "Could not copy $path to $FIG_Config::data/NewSims/$sim_file: $!";
    }
    closedir(D);
}

sub insert_dynamic
{
    my($sim_dir) = @_;

    #
    # Sims files are in the sims subdirectory  of the bundle we're given.
    #

    opendir(D, "$sim_dir/sims");
    for my $sim_file (readdir(D))
    {
	my $path = "$sim_dir/sims/$sim_file";
       
	next unless $sim_file =~ /^sim/ and -f $path;

	print "Loading sims file: $path\n";
	my $rc = $fig->insert_dynamic_sims_file($path);
	print "Insert returns rc=$rc\n";
    }
}

sub update_pending_list
{
    my($sim_dir) = @_;
    
    if (open(P,  "<$sim_dir/ids"))
    {
	my %ids;
	while (<P>)
	{
	    if (/(\S+)/)
	    {
		$ids{$1}++;
	    }
	}
	close(P);
	
	#
	# Remove them from the pending_similarities file.
	#
	
	if (open(PEND, "+<$FIG_Config::global/pending_similarities"))
	{
	    flock(PEND, LOCK_EX);
	    
	    open(PBAK, ">$FIG_Config::global/pending_similarities.$$");
	    seek(PEND, 0, 0);
	    
	    while (<PEND>)
	    {
		chomp;
		if (/(\S+)/)
		{
		    print PBAK "$1\n";
		}
	    }
	    close(PBAK);
	    
	    open(PBAK, "<$FIG_Config::global/pending_similarities.$$");
	    seek(PEND, 0, 0);
	    truncate(PEND, 0);
	    while (<PBAK>)
	    {
		chomp;
		if (not $ids{$_})
		{
		    print PEND "$_\n";
		}
	    }
	    close(PEND);
	    close(PBAK);
	}
    }
    else
    {
	die "Cannot open ids list: $!\n";
    }
}

=head3 update_postprocess_list

Add the ids in the sim bundle to the postprocessing queues

=cut

sub update_postprocess_list
{
    my($sim_dir) = @_;

    my $qdir = "$FIG_Config::global/postproc_queues";
    &FIG::verify_dir($qdir);

    my $lockfile = "$qdir/qlock";

    open(LOCK, ">$lockfile") or die "Cannot open $lockfile: $!\n";

    flock(LOCK, LOCK_EX) or die "Flock $lockfile failed: $!\n";

    if (open(P,  "<$sim_dir/ids"))
    {
	my %fh;
	my @fh;
	for my $queue (@postprocess_job_queues)
	{
	    my $qfile = "$qdir/$queue";
	    my $fh = new FileHandle(">>$qfile");
	    $fh or die "Cannot open $qfile for append: $!\n";
	    $fh{$queue} = $fh;
	    push(@fh, $fh);
	}
	
	while (<P>)
	{
	    for my $fh (@fh)
	    {
		print $fh $_;
	    }
	}
	close(P);
	map { close($_) } @fh;
    }
    else
    {
	die "Cannot open ids list: $!\n";
    }
}
	    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3