[Bio] / Clearinghouse / clearinghouse_services.pl Repository:
ViewVC logotype

View of /Clearinghouse/clearinghouse_services.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.34 - (download) (as text) (annotate)
Wed Feb 25 23:15:41 2009 UTC (10 years, 7 months ago) by olson
Branch: MAIN
CVS Tags: myrast_33, HEAD
Changes since 1.33: +68 -39 lines
Rework genome and metagenome registration code to make transactions work properly.

use strict;
use SOAP::Lite;

use SOAP::Transport::HTTP;


# We do this so that the "intelligent" code in cgi_url doesn't
# fall over when we are invoked as a SOAP call to localhost.
#

delete $ENV{'HTTP_HOST'};

#print Scripts::add_feature("83333.1", "peg", "C1_1_100", '', []);
#exit;

SOAP::Transport::HTTP::CGI   
    -> dispatch_to('Scripts')     
-> handle;

package Scripts;

use strict;
use IPC::Open3;
use File::Temp qw/ :mktemp /;
use FIG_Config;
use CH_Config;
use FIG;
use Clearinghouse::ch_database;
use Clearinghouse::utils;

sub startup {

    #
    # Don't do this!
    #
    return "Not wiping database!";

    #drop and then create all tables.

    my $dbh = db_init();

    eval {$dbh->SQL ("DROP table clearing_house")};	
    eval {$dbh->SQL ("DROP table accession")};
    eval {$dbh->SQL ("DROP table meta_data")};	
    eval {$dbh->SQL ("DROP table package_location")};	
    eval {$dbh->SQL ("DROP table next_genome_id")};	
    eval {$dbh->SQL ("DROP table next_feature_id")};	
    eval {$dbh->SQL ("DROP table locks")};	
    eval {$dbh->SQL ("DROP table types")};	
    eval {$dbh->SQL ("DROP table feature")};	
    eval {$dbh->SQL ("DROP table genome_checksum")};	
    eval {$dbh->SQL ("DROP table contig_checksum")};	

    $dbh->SQL("CREATE table clearing_house (id serial, accession_id int, type text, meta_data text, description text)");
    $dbh->SQL("CREATE table meta_data (ch_id int, tag text, value text)");
    $dbh->SQL("CREATE table accession (id serial, who text, accession_date int, comments text, source text)");
    $dbh->SQL("CREATE table package_location (id serial, ch_id int, pathname text)");
    $dbh->SQL("CREATE table next_genome_id (taxon text, next_id int)");
    $dbh->SQL("CREATE table next_feature_id (genome_id text, type text, next_id int)");
    $dbh->SQL("CREATE table locks (name text, lock int)");
    $dbh->SQL("CREATE table types (type text)");
    $dbh->SQL("create table feature (id serial, genome varchar(255), type varchar(255), location text, feature_id integer)");
    $dbh->SQL("create table genome_checksum(genome varchar(255), checksum varchar(255))");
    $dbh->SQL("create table contig_checksum(genome varchar(255), contig varchar(255), checksum varchar(255))");

    lock_init($dbh);	
    type_init($dbh);
}

#
# check to see if service is alive.
#
sub ping
{
    my $ok;
    eval {
	my $db = db_init();
	my $dbh = $db->{_dbh};
	local($dbh->{RaiseError})=1;

	$dbh->selectall_arrayref(qq(SELECT 1));

	$ok = 1;
    };
    return $ok;
}

#
#check to see if a genome is registered
#
sub is_genome_registered {
    my ($class, $genome) = @_;
    my $db = db_init();
    my $dbh = $db->{_dbh};
    my ($taxon, $id) = split(/\./, $genome);

    my $result = $db->SQL(q(SELECT next_id FROM next_genome_id 
		WHERE (taxon = ? AND next_id > ?)), undef, $taxon, $id);

    if (@$result == 0) {
	return 0;
    }
    return 1;
}


#
#get a new genome ID
# um, the "die's" in here will leave the lock held.
#
sub register_genome {
    my ($class, $taxon) = @_;
    my $db = db_init();
    my $dbh = $db->{_dbh};

    $dbh->{RaiseError} = 1;

    $dbh->begin_work();

    my $next_id;
    eval {
	
	#if this genome is not in table, insert it
	#get the next value, then update the next value
	
	my $res = $dbh->selectall_arrayref(q(SELECT next_id from next_genome_id  where taxon = ? FOR UPDATE), undef, $taxon);

	if ($res && @$res)
	{
	    $next_id = $res->[0]->[0];
	    $dbh->do(q{UPDATE next_genome_id set next_id = ? where taxon=? }, undef, $next_id + 1, $taxon);
	}
	else
	{
	    #print STDERR "sth-rows = 0";
	    $dbh->do(q(INSERT into next_genome_id (taxon, next_id)
			    VALUES (?, ?)), undef,
			    $taxon, 4);
	    $next_id = 3;
	}	
	
	ch_log(join("\t", "register_genome", $ENV{REMOTE_ADDR},
		    $taxon, $next_id) . "\n");
    };
    if ($@)
    {
	die "error executing queries: $@\n";
	$dbh->rollback()
    }
    else
    {
	$dbh->commit();
    }
    
    
    return($next_id);

}

sub register_genome_checksums
{
}

#
#get a new feature ID
#
sub register_feature {
    my ($class, $genome, $type, $number ) = @_;
    my $db = db_init();
    my $dbh = $db->{_dbh};
    my ($taxon, $id) = split(/\./, $genome);

    my $result = $db->SQL(q(SELECT next_id FROM next_genome_id 
			    WHERE (taxon = ? AND next_id > ?)), undef, $taxon, $id);
    if (@$result == 0) {
	die "Genome ID is not registered";
    }

    my $next_id = allocate_feature_id($dbh, $genome, $type, $number);

    ch_log(join("\t", "register_feature", $ENV{REMOTE_ADDR},
		$genome, $type, $number, $next_id) . "\n");
    return($next_id);
}

sub get_next_feature_id
{
    my ($class, $genome, $type) = @_;
    my $db = db_init();
    my $dbh = $db->{_dbh};
    my ($taxon, $id) = split(/\./, $genome);

    my $result = $db->SQL(q(SELECT next_id FROM next_genome_id 
			    WHERE (taxon = ? AND next_id > ?)), undef, $taxon, $id);
    if (@$result == 0) {
	die "Genome ID is not registered";
    }

    my $next_id = lookup_next_feature_id($dbh, $genome, $type);

    return($next_id);
}

#
# Add a new feature to the clearinghouse.
#
# The location is of the form
#
#   contigid_start_end,contigid_start_end,...
# 
# We rewrite these to be fo the form
#
#  genomeid:contigid_start_end,genomeid:contig_start_end, etc
#
#
# When we get a request to add a feature, we query the database to determine if  that
# feature has already been added somewhere (it may have been added due to propoagation of
# subsystems from machines that were offline at the time the peg was created, or due to
# independent creation of the same feature).
#
#
# Checksums is [[contig_id, checksum], ..]
#
sub add_feature
{
    my ($class, $genome, $type, $raw_location, $checksums, $translation) = @_;
    my $result;
    my $next_id;

    my $db = db_init();
    my $dbh = $db->{_dbh};

    #
    # Rewrite the location to include genome ids.
    #

    my @location;
    for my $elt (split(/,/, $raw_location))
    {
	if ($elt =~ /^.*_\d+_\d+$/)
	{
	    push(@location, "$genome:$elt");
	}
	else
	{
	    die "Invalid location format $raw_location\n";
	}
    }
    my $location = join(",", @location);

    #
    # See if we already have this feature in the database, but with a different name.
    #

    $result = $db->SQL(q(SELECT id, feature_id
			 FROM feature
			 WHERE genome = ? and type = ? and location = ?), undef,
		       $genome, $type, $location);

    if (@$result == 0)
    {
	#
	# We don't already have this feature, so allocate a new id.
	#
	my ($taxon, $id) = split(/\./, $genome);

	$result = $db->SQL(q(SELECT next_id FROM next_genome_id 
			     WHERE (taxon = ? AND next_id > ?)), undef, $taxon, $id);
	if (@$result == 0) {
	    die "Genome ID is not registered";
	}
	
	$next_id = allocate_feature_id($dbh, $genome, $type, 1);

	#
	# And update the database with this new id.
	#

	$result = $db->SQL(q(INSERT INTO feature (genome, type, location, feature_id)
			     VALUES (?, ?, ?, ?)),  undef,
			   $genome, $type, $location, $next_id);
    }
    else
    {
	#
	# Looks like we have a match.
	#
	if (@$result > 1)
	{
	    warn "Multiple results found for feature?! $genome $type $location\n";
	}

	my($db_id, $db_feature_id) = @{$result->[0]};

	warn "Found id=$db_id fid=$db_feature_id for $genome $type $location\n";

	$next_id = $db_feature_id;
    }

    my $new_id = "fig|$genome.$type.$next_id";
    return $new_id;
}

sub get_download_url {
    my $rv = &FIG::cgi_url() . "/pkg_download.cgi";
    return($rv);
}

sub get_upload_url {
    #
    # make the placeholder entry for this ID
    #
    my $id;
    my $dbh = db_init();
    
    my $sth = $dbh->{_dbh}->prepare(q(INSERT into package_location (pathname)
				      VALUES (?)
				     )) or die $dbh->{_dbh}->errstr;
    my $res = $sth->execute("EMPTY");

    #get the id to pass back

    my $id = $dbh->get_inserted_id('package_location', $sth);
    my $rv = &FIG::cgi_url() . "/pkg_upload.cgi";
    return([$rv, $id]);
}

#
#file has been pushed to this server
#
sub finish_upload {
    my $class = shift;
    my $id_list = shift;
    my $type = shift;
    my $who = shift;
    my $meta_data = shift;
    my $description = shift;
    my $source = shift;
    
    #get file name for this id from the DB
    my $db = db_init();
    my $dbh = $db->{_dbh};
    
    #
    # insert the accession, the record and the metadata
    #
    
    if (check_for_duplicates($type, $meta_data)) {
	die "Duplicate record";
    }

    my $accession_id = make_accession($db, $who, $source);
    my $ch_id = insert_ch_record($db, $accession_id, $type, $meta_data, $description);
    insert_meta_data($dbh, $ch_id, $meta_data);

    #insert package location into location table
    my $id_list_str = join(", ", @$id_list);
    my $id_path_list = $db->SQL(qq(SELECT id, pathname FROM package_location WHERE id in ( $id_list_str )));
    
    my $sth = $dbh->prepare(q{UPDATE package_location set ch_id = ?, pathname=? where id=? }) or die $dbh->errstr;
    for my $id_entry (@$id_path_list) {
	my($file_id, $temp_file) = @$id_entry;
	my $path = "$CH_Config::dir/$type.$ch_id.$file_id";
	rename $temp_file, $path;
	$sth->execute($ch_id, $path,$file_id );
    }
}

sub how_many_are_there {
	my ($class, $type_list) = @_;
	my @count_list;

	my $db = db_init();
	my $dbh = $db->{_dbh};
	my $sth = $dbh->prepare(q{SELECT COUNT(*) from clearing_house WHERE type = ?}) or die $dbh->errstr;
	my $result;
	my $count;

	for my $type (@$type_list)
	{
		$sth->execute($type);		
		$sth->bind_columns(undef, \$count);
		$sth->fetch;
		print STDERR "Count for $type = $count\n";
		push (@count_list, $count);	
	}
	return(\@count_list);
}
#
#query by key value from the meta data table
#this won't work right now, make it look like the below
#
sub what_is_there_by_key {
    my ($class, $key, $value) = @_;
    my $db = db_init();
    
    my $result = $db->SQL(q(SELECT DISTINCT c.id, c.type, c.meta_data, c.description, a.who, a.source
			    FROM meta_data m, clearing_house c, accession a, package_location pl
			    WHERE (m.ch_id = c.id AND
				   m.tag = ? AND
				   m.value = ? AND
				   c.accession_id = a.id AND
				   pl.ch_id = c.id
				  )
			   ),

			  undef, $key, $value);
    
    return($result);
}

#
#returns all selected entries sorted by the sort argument
#
sub what_is_there {
    # argument - sort column, type
    my %columns = ("ID", "c.id", "Type", "c.type", "Who", "a.who", "Source", "a.source", "Date", "a.accession_date");
    my ($class, $column, $type_list) = @_;
    my $dbcolumn = $columns{$column};
    if (!defined($dbcolumn)) {
	die "Invalid column name $column";
    }

    my $where_clause;
    if (@$type_list > 0) {
	my @n = map {"'$_'"} @$type_list;
	my $nlist = join(",", @n); 
 	$where_clause = "AND c.type IN ($nlist)";
    }

    my $db = db_init();
    my $dbh = $db->{_dbh};
    #
    # Need  SELECT DISTINCT because we're now joining on the package_location table, and it can have
    # multiple entries.
    #
    my $result = $db->SQL(qq(SELECT DISTINCT c.id, c.type, c.meta_data, c.description, a.who, a.source, a.accession_date
			     FROM clearing_house c, accession a, package_location pl
			     WHERE (c.accession_id = a.id AND
				    c.id = pl.ch_id
				    $where_clause
				    )
			     ORDER BY $dbcolumn));
    
    for my $row (@$result)
    {
	my $ch_id = $row->[0];
	
	my $id_list = $dbh->selectcol_arrayref(q(SELECT id from package_location WHERE ch_id = ?), undef, $ch_id);
	push(@$row, $id_list);
    }
    return($result);
}


#
#call to have the server insert a file that is local. This is used when the web server is colocated with the CH and the file has somehow been made available locally
#
sub take_this_file {
    # full file path is the arg instead of url. The file is assumed to be local and in the right place
    
    my $class = shift;
    my $type = shift;
    my $who = shift;
    my $meta_data = shift;
    my $description = shift;
    my $source = shift;
    my $temp_file = shift;
    
    if (!-f $temp_file) {
	die "No such file $temp_file";
    }

    if (! check_types($type)) {
	die "Invalid type $type";
    }
    if (check_for_duplicates($type, $meta_data)) {
	die "Duplicate record";
    }
    
    #validate the package now
    #FIG validate_package (type, meta_data, temp_file) (this must check the meta data for type-apprporiate requirements
    #die if it is bogus
    
    my $db = db_init();
    my $dbh = $db->{_dbh};
    
    #
    # insert accession, the record, the meta data and the location
    #
    my $accession_id = make_accession($db, $who, $source);
    my $ch_id = insert_ch_record($db, $accession_id, $type, $meta_data, $description);
    insert_meta_data($dbh, $ch_id, $meta_data);
    my $path = "$CH_Config::dir/$type.$ch_id";
    system ("cp  $temp_file $path");
    insert_package_location($dbh, $ch_id, $path);
}

#
# the ch server takes a package from the caller. Caller must not be behind firewall, NAT, etc.
#
sub take_this_pkg {
    
    my $class = shift;
    my $type = shift;
    my $who = shift;
    my $meta_data = shift;
    my $description = shift;
    my $source = shift;
    my $url = shift;
    
    if (! check_types($type)) {
	die "Invalid type $type";
    }
    
    if (check_for_duplicates($type, $meta_data)) {
	die "Duplicate record";
    }

    # retrieve file
    my $temp_file = mktemp("$CH_Config::dir/clgXXXX");
    my $result = system("curl -L $url > $temp_file");
    if ($result) {
	die "Transfer of $url failed with result=$result\n";
    }
    
    #validate the package now
    #FIG validate_package (type, meta_data, temp_file) (this must check the meta data for type-apprporiate requirements
    #die if it is bogus
    
    my $db = db_init();
    my $dbh = $db->{_dbh};
    
    #
    # insert accession, the record, meta data and location
    #
    my $accession_id = make_accession($db, $who, $source);
    my $ch_id = insert_ch_record($db, $accession_id, $type, $meta_data, $description);
    insert_meta_data($dbh, $ch_id, $meta_data);
    my $path = "$CH_Config::dir/$type.$ch_id";
    insert_package_location($dbh, $ch_id, $path);
    rename $temp_file, $path;
}

sub register_metagenome_taxon_id
{
    my($class, $user, $genome_name) = @_;

    my $db = db_init();
    my $dbh = $db->{_dbh};

    $dbh->begin_work();


    my $tax;
    eval {
	my $res = $dbh->selectall_arrayref(qq(SELECT next_id FROM next_metagenome_id FOR UPDATE));
	if (@$res == 0)
	{
	    die "next_metagenome_id table not initialized\n";
	}
	elsif (@$res > 1)
	{
	    die "next_metagenome_id table has more than one entry\n";
	}
	my $id = $res->[0]->[0];
	$id++;
	$dbh->do(qq(UPDATE next_metagenome_id SET next_id = ?), undef, $id);

	$tax = 4440000 + $id;
	$dbh->do(qq(INSERT INTO metagenome_registry (tax_id, name, user)
		    VALUES (?, ?, ?)), undef, $tax, $genome_name, $user);

    };

    if ($@)
    {
	$dbh->rollback();
	die "register metagenome eval failed: $@";
    }
    $dbh->commit();

    return $tax;

}

sub register_subsystem_id
{
    my($class, $ss_name) = @_;

    my $db = db_init();
    my $dbh = $db->{_dbh};

    $dbh->begin_work();

    my $res = $dbh->selectcol_arrayref(qq(SELECT id
					  FROM subsystem_accession
					  WHERE ss_name = ? FOR UPDATE), undef, $ss_name);
    if (@$res)
    {
	$dbh->commit();
	return $res->[0];
    }

    my $id;
    
    my $res = $dbh->selectcol_arrayref(qq(SELECT MAX(id) FROM subsystem_accession));
    if (@$res == 0)
    {
	$id = 1;
    }
    else
    {
	$id = $res->[0] + 1;
    }

    $dbh->do(qq(INSERT INTO subsystem_accession (id, ss_name, creation_time)
		VALUES (?, ?, CURRENT_TIMESTAMP)), undef, $id, $ss_name);
    $dbh->commit();
    return $id;
}

sub lookup_subsystem_by_id
{
    my($class, $id) = @_;

    my $db = db_init();
    my $dbh = $db->{_dbh};

    my $res = $dbh->selectcol_arrayref(qq(SELECT ss_name
					  FROM subsystem_accession
					  WHERE id = ?), undef, $id);
    if (@$res)
    {
	return $res->[0];
    }
    else
    {
	return undef;
    }
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3