[Bio] / FigWebServices / webservices_mgrast.cgi Repository:
ViewVC logotype

View of /FigWebServices/webservices_mgrast.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (annotate)
Sat Jul 5 16:37:44 2008 UTC (11 years, 7 months ago) by redwards
Branch: MAIN
CVS Tags: rast_rel_2009_05_18, rast_rel_2008_12_18, rast_2008_0924, rast_rel_2008_09_30, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, rast_rel_2009_02_05, mgrast_rel_2008_0625, rast_rel_2008_10_09, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, rast_rel_2009_07_09, mgrast_rel_2008_1110, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, rast_rel_2009_03_26, rast_rel_2008_11_24, rast_rel_2008_08_07
Changes since 1.1: +88 -55 lines
Updating user access control to use the new backend stuff

#__perl__

use strict;
use Carp;
use CGI::Carp qw(fatalsToBrowser); # this makes debugging a lot easier by throwing errors out to the browser
use SOAP::Lite;
use SOAP::Transport::HTTP;

SOAP::Transport::HTTP::CGI   
-> dispatch_to('MgRastWebServices')     
-> handle;


package MgRastWebServices;

use FIGV;
use DBMaster;
use Job48;
use Data::Dumper;
use GenomeMeta;
use Data::Dumper;
use raelib;



# This is a series of methods I added for accessing the mg-RAST. User authentication is currently based only on username (not password), and is handled
# by a separate routine towards the end of the file. If you want to change the user access control, just change that method.
# 

my $dbmaster; # a globally scoped dbmaster so only need to construct it once


=begin WSDL
_IN username $string
_RETURN $string
_DOC Find all of a users jobs. Input is a username and output is a comma separated list of the users jobs. Note that this list includes both the users jobs and jobs of their organization
=cut
sub mg_rast_user_jobs {
	my ($class, $login_name)  = @_;
	my ($userj, $orgj)=$class->_user_mg_rast_jobs($login_name);
	return join(",", @$userj, @$orgj);
}

=begin WSDL
_IN job $string
_IN username $string
_RETURN $string
_DOC Find information about a job. Input is a job id and a username, output is a tuple of [id, user, genome id, genome name, size, number of contigs, number of pegs]
=cut
sub mg_rast_job_info {
	my ($class, $job_id, $login_name)  = @_;
	
	my $job=$class->_validate_mg_rast_user($job_id, $login_name, 'getjob');
	if ($job =~ /Access Error/) {return $job}
	
	my ($sz, $numcontigs, $npegs)=(0,0,0);
	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	my $figv=FIGV->new($orgdir);
	$npegs=$figv->genome_pegs($job->genome_id);
	
	my $contig_lens = $figv->contig_lengths($job->genome_id);
	while ( my($contig,$len) = each %$contig_lens)
	{
		$sz += $len;
		$numcontigs++
	}
	
	return join(", ", $job->id, $job->user, $job->genome_id, $job->genome_name, $sz, $numcontigs, $npegs);
}

=begin WSDL
_IN username $string
_RETURN $string
_DOC Find information about all the jobs a user has access to. Input is a username and output is a list of tuples of [job id, user, genome id, and genome name]
=cut
sub mg_rast_users_job_info {
	my ($class, $login_name)  = @_;
	my ($userj, $orgj)=$class->_user_mg_rast_jobs($login_name);
	my $output;
	foreach my $jobid (@$userj, @$orgj)
	{
		my $job=$class->_validate_mg_rast_user($jobid, $login_name, 'getjob');
		next if ($job =~ /Access Error/);
		
		my ($coords, $time, $meta);
		eval {$meta = GenomeMeta->new(undef, "/vol/metagenome-48-hour/Jobs.prod/$jobid/meta.xml");};
		if ($@) {$coords = $time = "Error with xml"}
		else {
			eval {$coords = $class->mg_rast_lat_lon($jobid, $login_name)};
			eval {$time   = $meta->get_metadata("optional_info.time");};
			$coords =~ s/^\s+//; $coords =~ s/\s+$//; $coords =~ s/^\s*\,\s*$//;
			$time =~ s/^\s+//; $time =~ s/\s+$//;
		}
		
		$output .= join("\t", $job->id, $job->user, $job->genome_id, 
		$job->genome_name,  $coords, $time). "\n";
	}
	chomp($output);
	return $output;
}


=begin WSDL
_IN job $string
_IN username $string
_IN searchterm $string
_RETURN $string search results
_DOC Search for text in a job. Input is a job, a username, and a term to search for, and the results are the search results from querying that metagenome
=cut
sub mg_search_metagenome {
	my ($class, $job_id, $login_name, $search)  = @_;
	return undef unless ($search);
	
	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
	
	my $figv=FIGV->new($orgdir);
	my $res=$figv->search_features_by_annotation($search, 1);
	my $return;
	
	foreach my $fn (keys %$res)
	{
		$return .= join("\n", map {join("\t", $_, $fn)} @{$res->{$fn}});
	}
	return  $return;
	
}

=begin WSDL
_IN username $string
_IN searchterm $string
_RETURN $string search results
_DOC Search for text in all jobs a user has access to. Input is a username, and a term to search for, and the results are the search results from querying all metagenomes that user has access to
=cut
sub mg_search_users_metagenomes {
	my ($class, $login_name, $search)  = @_;
	return undef unless ($search);
	
	my ($userj, $orgj)=$class->_user_mg_rast_jobs($login_name);
	my $return;
	foreach my $job_id (@$userj, @$orgj)
	{
		
		my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
		if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
		
		my $figv=FIGV->new($orgdir);
		my $res=$figv->search_features_by_annotation($search, 1);
		
		foreach my $fn (keys %$res)
		{
			$return .= join("\n", map {join("\t", $_, $fn)} @{$res->{$fn}})."\n";
		}
	}
	chomp($return);
	return  $return;
	
}


=begin WSDL
_IN job $string
_IN username $string
_RETURN $string bindings
_DOC Retrieve the data that binds a job to a set of subsystems. Input is a RAST job id and a username, output is the bindings that join that job to the subsystems. This is a tple of subsystem name, protein function, and sequence within the metagenome.
=cut
sub mg_rast_subsystems {
	my ($class, $job_id, $login_name)  = @_;
	
	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
	
	open(IN, "$orgdir/Subsystems/bindings") || return "No subsystems found\n";
	return join("", <IN>);
}

=begin WSDL
_IN job $string
_IN username $string
_RETURN $string counts
_DOC Retrieve the counts of each subsystem in a metagenome. Input is a RAST job id and a username, output is a list of tuples of subsystem name and number of occurences
=cut
sub mg_rast_subsystem_counts {
	my ($class, $job_id, $login_name)  = @_;
	
	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
	
	open(IN, "$orgdir/Subsystems/bindings") || return "No subsystems found\n";
	my %count;
	while (<IN>)
	{
		my @a=split /\t/;
		$count{$a[0]}++;
	}
	
	return map {"$_\t$count{$_}\n"} sort {$a cmp $b} keys %count;
}

=begin WSDL
_IN job $string
_IN username $string
_IN number $string
_RETURN $string bindings
_DOC Retrieve the sequences from a job. Input is a RAST job id and a username, output is the DNA sequences in the job in fasta format. The number is a limit in how many sequences will be returned.
=cut
sub mg_rast_sequences {
	my ($class, $job_id, $login_name, $number)  = @_;
	
	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
	
	my $fasta;
	eval {$fasta = raelib->read_fasta("$orgdir/contigs")};
	if ($@) {return "Error: $@"}
	my $keys = raelib->rand([keys %$fasta]); # randomize the order of the sequences returned. 
	if ($number) {@$keys = splice(@$keys, 0, $number)}
	
	#return join("\n", "keys", @$keys);	
	return join("", map {$_ = ">$_\n".$fasta->{$_}."\n"} @$keys);
}

=begin WSDL
_IN job $string
_IN username $string
_RETURN $string
_DOC Get the location (GPS coordinates) where a sample was taken (if available). This is a single point for every sample, and if there is more than one location for a sample it is the average of all locations. Input is a RAST job id and a username, output is a tuple of latitude and longitude where the sample was taken, if it is known. 
=cut
sub mg_rast_lat_lon {
	my ($class, $job_id, $login_name)  = @_;
	
	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
	
	my $meta = GenomeMeta->new(undef, "/vol/metagenome-48-hour/Jobs.prod/$job_id/meta.xml");
	my $ret = $meta->get_metadata("optional_info.latitude") .",".$meta->get_metadata("optional_info.longitude");
	$ret =~ s/^\s*\,\s*$//;
	return $ret;
}

=begin WSDL
_IN job $string
_IN username $string
_RETURN $string
_DOC Get the location (GPS coordinates) where a sample was taken (if available). These are semi-colon separated tuples of lat-lon. Input is a RAST job id and a username, output is the coordinates where the sample was taken. 
=cut
sub mg_rast_coordinates {
	my ($class, $job_id, $login_name)  = @_;
	
	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
	
	my $meta = GenomeMeta->new(undef, "/vol/metagenome-48-hour/Jobs.prod/$job_id/meta.xml");
	return $meta->get_metadata("optional_info.coordinates");
}

=begin WSDL
_IN job $string
_IN username $string
_RETURN $string
_DOC Get the time that a sample was taken (if available). Input is a RAST job id and a username, output is the date or time that the sample was taken.
=cut
sub mg_rast_time {
	my ($class, $job_id, $login_name)  = @_;
	
	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
	
	my $meta = GenomeMeta->new(undef, "/vol/metagenome-48-hour/Jobs.prod/$job_id/meta.xml");
	return $meta->get_metadata("optional_info.time");
}




sub _validate_mg_rast_user {
# a common method for validating a user. Please note that at the moment this does not use the password (but it should!)
# this is shared by several things above.

# If the user is valid will return a path to the job directory, otherwise will return "Access Error"

# by default returns the org dir, but if the boolean job is set will return the reference to the job

	my ($class, $dir, $login_name, $getjob)=@_;
		
	my $dbm = $class->_get_dbmaster();

	my $user = $dbm->User->init({ login => $login_name });
	my $ids;
	map {$ids->{$_->_id}=1} @{$user->scopes};


	my $job_id=$FIG_Config::fortyeight_jobs.$dir; # not sure whether config dirs always end in /, but they should!
		next unless (-d $job_id && -e "$job_id/DONE" && !-e "$job_id/DELETE");

	my $job;
	eval {$job = Job48->new($job_id)};
	die $@ if ($@);
	next if ($job->to_be_deleted);

	my $rights=$dbm->Rights->get_objects( { name => 'view',
			data_type => 'genome',
			data_id => $job->genome_id,
			});

	foreach my $r (@$rights) {
		if ($ids->{$r->scope->_id})
		{
			return $job if ($getjob);
			return $job->orgdir;
		}
	}
	return "Access Error";
}



sub _user_mg_rast_jobs {
	# a method to extract all the jobs for a user. Returns references to two arrays. 
	# The first is user jobs, and the second is jobs of the users organization
	my ($class, $login_name)=@_;
		
	my $dbm = $class->_get_dbmaster();

	my $user = $dbm->User->init({ login => $login_name });
	my $ownid = $user->get_user_scope->_id;
	my $ids;
	map {$ids->{$_->_id}=1} @{$user->scopes};

	my $ownjobs; my $otherjobs;

	opendir(DIR, $FIG_Config::fortyeight_jobs) || die "Can't open ".$FIG_Config::fortyeight_jobs;
	foreach my $dir (grep {m/^\d+$/} readdir(DIR))
	{

		my $job_id=$FIG_Config::fortyeight_jobs.$dir; # not sure whether config dirs always end in /, but they should!
		next unless (-d $job_id && -e "$job_id/DONE" && !-e "$job_id/DELETE");

		my $job;
		eval {$job = Job48->new($job_id)};
		die $@ if ($@);
		next if ($job->to_be_deleted);

		my $rights=$dbm->Rights->get_objects( { name => 'view',
				data_type => 'genome',
				data_id => $job->genome_id,
				});

		foreach my $r (@$rights) {
			$ownjobs->{$dir}=$job if ($r->scope->_id eq $ownid);
			$otherjobs->{$dir}=$job if ($ids->{$r->scope->_id});
		}


	}
	
	# delete own jobs from list of all jobs
	map {delete $otherjobs->{$_}} keys %$ownjobs;
	return ([keys %$ownjobs], [keys %$otherjobs]);

}

sub _get_dbmaster {
	# an internal method to connect to the database and return the dbmster
	my ($class)=@_;
	return $dbmaster if (defined $dbmaster);
	
	$ENV{DBHOST} = 'bioseed.mcs.anl.gov';

	$dbmaster = DBMaster->new(-database => $FIG_Config::webapplication_db,
			-backend => $FIG_Config::webapplication_backend,
			-host => $FIG_Config::webapplication_host,
			-user => $FIG_Config::webapplication_user || "rast",
			);
	
	return $dbmaster;
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3