[Bio] / FigWebServices / webservices_mgrast.cgi Repository:
ViewVC logotype

View of /FigWebServices/webservices_mgrast.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (annotate)
Thu May 29 23:58:31 2008 UTC (11 years, 8 months ago) by redwards
Branch: MAIN
CVS Tags: rast_rel_2008_06_18, rast_rel_2008_06_16
Adding new wsdl/web service components

#__perl__

use strict;
use Carp;
use SOAP::Lite;
use SOAP::Transport::HTTP;

SOAP::Transport::HTTP::CGI   
-> dispatch_to('MgRastWebServices')     
-> handle;


package MgRastWebServices;

use FIGV;
use DBMaster;
use Job48;
use Data::Dumper;
use GenomeMeta;
use Data::Dumper;
use raelib;



# This is a series of methods I added for accessing the mg-RAST. User authentication is currently based only on username (not password), and is handled
# by a separate routine towards the end of the file. If you want to change the user access control, just change that method.
# 

=begin WSDL
_IN username $string
_RETURN $string
_DOC Find all of a users jobs. Input is a username and output is a comma separated list of the users jobs. Note that this list includes both the users jobs and jobs of their organization
=cut
sub mg_rast_user_jobs {
	my ($class, $login_name)  = @_;
	my ($userj, $orgj)=$class->_user_mg_rast_jobs($login_name);
	return join(",", @$userj, @$orgj);
}

=begin WSDL
_IN job $string
_IN username $string
_RETURN $string
_DOC Find information about a job. Input is a job id and a username, output is a tuple of [id, user, genome id, genome name, size, number of contigs, number of pegs]
=cut
sub mg_rast_job_info {
	my ($class, $job_id, $login_name)  = @_;
	
	my $job=$class->_validate_mg_rast_user($job_id, $login_name, 'getjob');
	if ($job =~ /Access Error/) {return $job}
	
	my ($sz, $numcontigs, $npegs)=(0,0,0);
	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	my $figv=FIGV->new($orgdir);
	$npegs=$figv->genome_pegs($job->genome_id);
	
	my $contig_lens = $figv->contig_lengths($job->genome_id);
	while ( my($contig,$len) = each %$contig_lens)
	{
		$sz += $len;
		$numcontigs++
	}
	
	return join(", ", $job->id, $job->user, $job->genome_id, $job->genome_name, $sz, $numcontigs, $npegs);
}

=begin WSDL
_IN username $string
_RETURN $string
_DOC Find information about all the jobs a user has access to. Input is a username and output is a list of tuples of [job id, user, genome id, and genome name]
=cut
sub mg_rast_users_job_info {
	my ($class, $login_name)  = @_;
	my ($userj, $orgj)=$class->_user_mg_rast_jobs($login_name);
	my $output;
	foreach my $jobid (@$userj, @$orgj)
	{
		my $job=$class->_validate_mg_rast_user($jobid, $login_name, 'getjob');
		next if ($job =~ /Access Error/);
		
		my ($coords, $time, $meta);
		eval {$meta = GenomeMeta->new(undef, "/vol/metagenome-48-hour/Jobs.prod/$jobid/meta.xml");};
		if ($@) {$coords = $time = "Error with xml"}
		else {
			eval {$coords = $class->mg_rast_lat_lon($jobid, $login_name)};
			eval {$time   = $meta->get_metadata("optional_info.time");};
			$coords =~ s/^\s+//; $coords =~ s/\s+$//; $coords =~ s/^\s*\,\s*$//;
			$time =~ s/^\s+//; $time =~ s/\s+$//;
		}
		
		$output .= join("\t", $job->id, $job->user, $job->genome_id, 
		$job->genome_name,  $coords, $time). "\n";
	}
	chomp($output);
	return $output;
}


=begin WSDL
_IN job $string
_IN username $string
_IN searchterm $string
_RETURN $string search results
_DOC Search for text in a job. Input is a job, a username, and a term to search for, and the results are the search results from querying that metagenome
=cut
sub mg_search_metagenome {
	my ($class, $job_id, $login_name, $search)  = @_;
	return undef unless ($search);
	
	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
	
	my $figv=FIGV->new($orgdir);
	my $res=$figv->search_features_by_annotation($search, 1);
	my $return;
	
	foreach my $fn (keys %$res)
	{
		$return .= join("\n", map {join("\t", $_, $fn)} @{$res->{$fn}});
	}
	return  $return;
	
}

=begin WSDL
_IN username $string
_IN searchterm $string
_RETURN $string search results
_DOC Search for text in all jobs a user has access to. Input is a username, and a term to search for, and the results are the search results from querying all metagenomes that user has access to
=cut
sub mg_search_users_metagenomes {
	my ($class, $login_name, $search)  = @_;
	return undef unless ($search);
	
	my ($userj, $orgj)=$class->_user_mg_rast_jobs($login_name);
	my $return;
	foreach my $job_id (@$userj, @$orgj)
	{
		
		my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
		if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
		
		my $figv=FIGV->new($orgdir);
		my $res=$figv->search_features_by_annotation($search, 1);
		
		foreach my $fn (keys %$res)
		{
			$return .= join("\n", map {join("\t", $_, $fn)} @{$res->{$fn}})."\n";
		}
	}
	chomp($return);
	return  $return;
	
}


=begin WSDL
_IN job $string
_IN username $string
_RETURN $string bindings
_DOC Retrieve the data that binds a job to a set of subsystems. Input is a RAST job id and a username, output is the bindings that join that job to the subsystems. This is a tple of subsystem name, protein function, and sequence within the metagenome.
=cut
sub mg_rast_subsystems {
	my ($class, $job_id, $login_name)  = @_;
	
	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
	
	open(IN, "$orgdir/Subsystems/bindings") || return "No subsystems found\n";
	return join("", <IN>);
}

=begin WSDL
_IN job $string
_IN username $string
_RETURN $string counts
_DOC Retrieve the counts of each subsystem in a metagenome. Input is a RAST job id and a username, output is a list of tuples of subsystem name and number of occurences
=cut
sub mg_rast_subsystem_counts {
	my ($class, $job_id, $login_name)  = @_;
	
	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
	
	open(IN, "$orgdir/Subsystems/bindings") || return "No subsystems found\n";
	my %count;
	while (<IN>)
	{
		my @a=split /\t/;
		$count{$a[0]}++;
	}
	
	return map {"$_\t$count{$_}\n"} sort {$a cmp $b} keys %count;
}

=begin WSDL
_IN job $string
_IN username $string
_IN number $string
_RETURN $string bindings
_DOC Retrieve the sequences from a job. Input is a RAST job id and a username, output is the DNA sequences in the job in fasta format. The number is a limit in how many sequences will be returned.
=cut
sub mg_rast_sequences {
	my ($class, $job_id, $login_name, $number)  = @_;
	
	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
	
	my $fasta;
	eval {$fasta = raelib->read_fasta("$orgdir/contigs")};
	if ($@) {return "Error: $@"}
	my $keys = raelib->rand([keys %$fasta]); # randomize the order of the sequences returned. 
	if ($number) {@$keys = splice(@$keys, 0, $number)}
	
	#return join("\n", "keys", @$keys);	
	return join("", map {$_ = ">$_\n".$fasta->{$_}."\n"} @$keys);
}

=begin WSDL
_IN job $string
_IN username $string
_RETURN $string
_DOC Get the location (GPS coordinates) where a sample was taken (if available). This is a single point for every sample, and if there is more than one location for a sample it is the average of all locations. Input is a RAST job id and a username, output is a tuple of latitude and longitude where the sample was taken, if it is known. 
=cut
sub mg_rast_lat_lon {
	my ($class, $job_id, $login_name)  = @_;
	
	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
	
	my $meta = GenomeMeta->new(undef, "/vol/metagenome-48-hour/Jobs.prod/$job_id/meta.xml");
	my $ret = $meta->get_metadata("optional_info.latitude") .",".$meta->get_metadata("optional_info.longitude");
	$ret =~ s/^\s*\,\s*$//;
	return $ret;
}

=begin WSDL
_IN job $string
_IN username $string
_RETURN $string
_DOC Get the location (GPS coordinates) where a sample was taken (if available). These are semi-colon separated tuples of lat-lon. Input is a RAST job id and a username, output is the coordinates where the sample was taken. 
=cut
sub mg_rast_coordinates {
	my ($class, $job_id, $login_name)  = @_;
	
	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
	
	my $meta = GenomeMeta->new(undef, "/vol/metagenome-48-hour/Jobs.prod/$job_id/meta.xml");
	return $meta->get_metadata("optional_info.coordinates");
}

=begin WSDL
_IN job $string
_IN username $string
_RETURN $string
_DOC Get the time that a sample was taken (if available). Input is a RAST job id and a username, output is the date or time that the sample was taken.
=cut
sub mg_rast_time {
	my ($class, $job_id, $login_name)  = @_;
	
	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
	
	my $meta = GenomeMeta->new(undef, "/vol/metagenome-48-hour/Jobs.prod/$job_id/meta.xml");
	return $meta->get_metadata("optional_info.time");
}




sub _validate_mg_rast_user {
	# a common method for validating a user. Please note that at the moment this does not use the password (but it should!)
	# this is shared by several things above.
	
	# If the user is valid will return a path to the job directory, otherwise will return "Access Error"
	
	# by default returns the org dir, but if the boolean job is set will return the reference to the job
	
	my ($self, $job_id, $login_name, $getjob)=@_;
	$ENV{DBHOST} = 'bioseed.mcs.anl.gov';
	my $dbm = DBMaster->new(-database => 'FortyEight_WebApplication');
	my $users = $dbm->User->get_objects({ login => $login_name });
	my $org=$users->[0]->{organisation};
	my $user;
	
	# get the metagenome job directory. This is hard wired at the moment, but
	# shouldn't be. Not sure if the job directory is in FIG_Config
	# Also, I return "Access Error" regardless of whether the job is there or not to stop people 
	# hacking.
	$job_id="/vol/metagenome-48-hour/Jobs.prod/$job_id";
	return "Access Error 1 and $job_id" unless (-e $job_id); # simple check that we have that id
	
	# now make sure we are a valid user. If we are not a valid user this should throw an error
	
	my $job;
	eval {$job = Job48->new($job_id, $user->[0])};
	if ($@ || !$job) {return "Access Error 2"}
	
	# another user check -- this is just a simple check and doesn't allow for public data or common
	# organizations, so it is not as good. But it works
	return "Access Error 3" unless ($job->user eq $login_name || $job->getUserObject->{organisation} == $org); 
	
	return "Access Error: job deleted" if ($job->to_be_deleted);
	
	return $job if ($getjob);
	
	my $orgdir=$job->orgdir;
	
	return $orgdir;
}

sub _user_mg_rast_jobs {
	# a method to extract all the jobs for a user. Returns references to two arrays. 
	# The first is user jobs, and the second is jobs of the users organization
	my ($self, $login_name)=@_;
	$ENV{DBHOST} = 'bioseed.mcs.anl.gov';
	my $org;
	my $dbm = DBMaster->new(-database => 'FortyEight_WebApplication');
	my $users = $dbm->User->get_objects({ login => $login_name });
	my $org=$users->[0]->{organisation};
	
	my @user; my @org;
	
	opendir(DIR, "/vol/metagenome-48-hour/Jobs.prod/") || die "Can't open /vol/metagenome-48-hour/Jobs.prod/";
	foreach my $dir (grep {m/^\d+$/} readdir(DIR))
	{
		my $job_id="/vol/metagenome-48-hour/Jobs.prod/$dir";
		next unless (-d $job_id && -e "$job_id/DONE" && !-e "$job_id/DELETE");
		# print STDERR "$job_id\n";
		my $job;
		eval {$job = Job48->new($job_id)};
		die $@ if ($@);
		die "No job for $job\n" if (!$job);
		next if ($job->to_be_deleted);
		($job->user eq $login_name) ? push @user, $job->id :
		($job->getUserObject->{organisation} == $org) ? push @org, $job->id : 1;
	}
	@user = sort {$a <=> $b} @user;
	@org  = sort {$a <=> $b} @org;
	
	return \@user, \@org;
}



MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3