[Bio] / FigWebServices / fig_scripts_service.cgi Repository:
ViewVC logotype

View of /FigWebServices/fig_scripts_service.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.13 - (download) (annotate)
Thu May 29 23:56:55 2008 UTC (11 years, 8 months ago) by redwards
Branch: MAIN
CVS Tags: rast_rel_2008_06_18, rast_rel_2008_06_16
Changes since 1.12: +23 -0 lines
adding helpful comment

#__perl__



############################### PLEASE NOTE #####################
# 
#
#   Please don't add methods to this web service any more
# 
#   We have a new organization of the web services approach:
#   
#   1. For the scripts that do the work
#          webservices_* have the code that does all the heavy lifting. 
#
#   2. To generate the wsdl files
#	   wsdl_* have a very simple call to POD2WSDL that takes your POD and makes it
#          a web service. 
#
#
#   Note that we have one webservices_* and one wsdl_* file for each of rast, mg-rast, seed, and nmpdr.
#   Hopefully this will make it a lot easier to add new features over time.




use strict;
use Carp;
use SOAP::Lite;
use IPC::Open3;
use FIGO;
use SOAP::Transport::HTTP;
use DBMaster;
use Job48;
use Data::Dumper;
use GenomeMeta;
use PinnedRegions;

SOAP::Transport::HTTP::CGI   
-> dispatch_to('Scripts')     
-> handle;

package Scripts;

use FIG;
use FIGV;
use FIG_Config;
use Data::Dumper;
use FigFams;
use FigFam;
use IPC::Open3;
use raelib;

my $dbm;
BEGIN {
	eval { $dbm = DBMaster->new(-database => 'FortyEight_WebApplication') };
	die $@ if $@;
};
	


=begin WSDL
_IN auth $string
_IN args $string
_RETURN $string jobid
_DOC Input is a hash with lots of parms and a fasta file, output is a job id | error
=cut
sub load_fasta_to_rast {
	my ($class, %args) = @_;
	open FILE,  ">/tmp/foo";
	print FILE $args{file};
	#print STDERR &Dumper(\%args);
	#print STDERR "PWD ", $args{password}, "\n";
	#print STDERR "Dumped\n";


	# We have to authenticate this user first. Check with Daniel how to do that
	my $organism = {name => $args{genome_name},
			tax_id => $args{tax_id},
			lineage => $args{lineage},
			genetic_code => $args{genetic_code}
			};

	#print STDERR &Dumper($organism);
	my $msg;
	my $user = $args{user};
	my $seed_id;
	my $project_id = "Test Project";
	my $filename = "/tmp/foo";

	 my $data = prepare_data_for_create_job( $msg , $seed_id, $organism, $project_id, $filename, $user);
	print STDERR &Dumper($data), "Data\n";
	my ($job_number,$err) = Job48->create_new_job($data);

	print STDERR "job, err = $job_number, $err\n";
	return(join ":", $err, $job_number); 

}

sub prepare_data_for_create_job{
  my ( $msg , $seed_id, $organism, $project_desc, $filename, $user) = @_;

  my $meta = { "upload.timestamp" => time,
	       "status.uploaded" => "complete",
	       "keep_genecalls" => "0",
	       "correction.automatic" => "1" ,
#	       "submit.candidate" =>  $msg,
#	       "replace.seedID" => $seed_id, 
	     };
  
  	print STDERR &Dumper($meta), "meta\n";
  open(SEQFILE , "$filename") or die "Can't open file $filename!\n";
  my $data = { taxonomy_id => $organism->{tax_id},
	       metagenome => 0,
	       genome => $organism->{name},
	       user => $user,
	       project => $project_desc,
	       taxonomy => $organism->{lineage},
	       genetic_code => $organism->{genetic_code},
	       sequence_file => \*SEQFILE,
	       meta => $meta
	     };

# print STDERR "made data \n"; 
  
#  	print STDERR &Dumper($data), "data\n";
  
  
  return $data;
}


=begin WSDL
_IN in $string
_IN in $string
_RETURN $string status
_DOC Input is a RAST job id, and username  output is a status
=cut
sub check_rast_job {
	my ($class, $job_id, $login_name)  = @_;

	#print STDERR "Check $job_id, $login_name";
	$ENV{DBHOST} = 'bioseed.mcs.anl.gov';
		# my $dbm = DBMaster->new(-database => 'FortyEight_WebApplication');
	my $users = $dbm->User->get_objects({ login => $login_name });
	my $user;
	if ($users && @$users) {
		$user = $users->[0];
		my ($stage, $status) = Job48->get_status_of_job($job_id, $user);
		print STDERR "stage $stage, status $status\n";
		return(join ":", $stage, $status); 
	} else {
	#	print STDERR "no users\n";
		return("No users");
	}
}


=begin WSDL
_IN in $string
_RETURN $string
_DOC Input is a RAST jobid file, output is a big file
=cut
sub get_rast_results {
	my ($class, $jobid) = @_;
	return ("this should be a big file");
}

#
# mg-RAST methods by Rob
#
# This is a series of methods I added for accessing the mg-RAST. User authentication is currently based only on username (not password), and is handled
# by a separate routine towards the end of the file. If you want to change the user access control, just change that method.
# 

=begin WSDL
_IN username $string
_RETURN $string
_DOC Input is a username and output is a comma separated list of the users jobs. Note that this list includes both the users jobs and jobs of their organization
=cut
sub mg_rast_user_jobs {
	my ($class, $login_name)  = @_;
	my ($userj, $orgj)=$class->_user_mg_rast_jobs($login_name);
	return join(",", @$userj, @$orgj);
}

=begin WSDL
_IN job $string
_IN username $string
_RETURN $string
_DOC Input is a job id and a username, output is a tuple of [id, user, genome id, genome name, size, number of contigs, number of pegs]
=cut
sub mg_rast_job_info {
	my ($class, $job_id, $login_name)  = @_;

	my $job=$class->_validate_mg_rast_user($job_id, $login_name, 'getjob');
	if ($job =~ /Access Error/) {return $job}

	my ($sz, $numcontigs, $npegs)=(0,0,0);
	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	my $figv=FIGV->new($orgdir);
	$npegs=$figv->genome_pegs($job->genome_id);

	my $contig_lens = $figv->contig_lengths($job->genome_id);
	while ( my($contig,$len) = each %$contig_lens)
	{
		$sz += $len;
		$numcontigs++
	}

	return join(", ", $job->id, $job->user, $job->genome_id, $job->genome_name, $sz, $numcontigs, $npegs);
}

=begin WSDL
_IN username $string
_RETURN $string
_DOC Input is a username and output is a list of tuples of [job id, user, genome id, and genome name]
=cut
sub mg_rast_users_job_info {
	my ($class, $login_name)  = @_;
	my ($userj, $orgj)=$class->_user_mg_rast_jobs($login_name);
	my $output;
	foreach my $jobid (@$userj, @$orgj)
	{
		my $job=$class->_validate_mg_rast_user($jobid, $login_name, 'getjob');
		next if ($job =~ /Access Error/);

		my ($coords, $time, $meta);
		eval {$meta = GenomeMeta->new(undef, "/vol/metagenome-48-hour/Jobs.prod/$jobid/meta.xml");};
		if ($@) {$coords = $time = "Error with xml"}
		else {
			eval {$coords = $class->mg_rast_lat_lon($jobid, $login_name)};
			eval {$time   = $meta->get_metadata("optional_info.time");};
			$coords =~ s/^\s+//; $coords =~ s/\s+$//; $coords =~ s/^\s*\,\s*$//;
			$time =~ s/^\s+//; $time =~ s/\s+$//;
		}

		$output .= join("\t", $job->id, $job->user, $job->genome_id, 
				$job->genome_name,  $coords, $time). "\n";
	}
	chomp($output);
	return $output;
}


=begin WSDL
_IN job $string
_IN username $string
_IN searchterm $string
_RETURN $string search results
_DOC Input is a job, a username, and a term to search for, and the results are the search results from querying that metagenome
=cut
sub mg_search_metagenome {
	my ($class, $job_id, $login_name, $search)  = @_;
	return undef unless ($search);

	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
	
	my $figv=FIGV->new($orgdir);
	my $res=$figv->search_features_by_annotation($search, 1);
	my $return;

	foreach my $fn (keys %$res)
	{
		$return .= join("\n", map {join("\t", $_, $fn)} @{$res->{$fn}});
	}
	return  $return;

}
	
=begin WSDL
_IN username $string
_IN searchterm $string
_RETURN $string search results
_DOC Input is a username, and a term to search for, and the results are the search results from querying all metagenomes that user has access to
=cut
sub mg_search_users_metagenomes {
	my ($class, $login_name, $search)  = @_;
	return undef unless ($search);

	my ($userj, $orgj)=$class->_user_mg_rast_jobs($login_name);
	my $return;
	foreach my $job_id (@$userj, @$orgj)
	{

		my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
		if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}

		my $figv=FIGV->new($orgdir);
		my $res=$figv->search_features_by_annotation($search, 1);

		foreach my $fn (keys %$res)
		{
			$return .= join("\n", map {join("\t", $_, $fn)} @{$res->{$fn}})."\n";
		}
	}
	chomp($return);
	return  $return;

}
	

=begin WSDL
_IN job $string
_IN username $string
_RETURN $string bindings
_DOC Input is a RAST job id and a username, output is the bindings that join that job to the subsystems. This is a tple of subsystem name, protein function, and sequence within the metagenome.
=cut
sub mg_rast_subsystems {
	my ($class, $job_id, $login_name)  = @_;

	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
	
	open(IN, "$orgdir/Subsystems/bindings") || return "No subsystems found\n";
	return join("", <IN>);
}

=begin WSDL
_IN job $string
_IN username $string
_RETURN $string counts
_DOC Input is a RAST job id and a username, output is a list of tuples of subsystem name and number of occurences
=cut
sub mg_rast_subsystem_counts {
	my ($class, $job_id, $login_name)  = @_;

	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}

	open(IN, "$orgdir/Subsystems/bindings") || return "No subsystems found\n";
	my %count;
	while (<IN>)
	{
		my @a=split /\t/;
		$count{$a[0]}++;
	}

	return map {"$_\t$count{$_}\n"} sort {$a cmp $b} keys %count;
}

=begin WSDL
_IN job $string
_IN username $string
_IN number $string
_RETURN $string bindings
_DOC Input is a RAST job id and a username, output is the DNA sequences in the job. The number is a limit in how many sequences will be returned.
=cut
sub mg_rast_sequences {
	my ($class, $job_id, $login_name, $number)  = @_;
	
	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
	
	my $fasta;
	eval {$fasta = raelib->read_fasta("$orgdir/contigs")};
	if ($@) {return "Error: $@"}
	my $keys = raelib->rand([keys %$fasta]); # randomize the order of the sequences returned. 
	if ($number) {@$keys = splice(@$keys, 0, $number)}
	
	#return join("\n", "keys", @$keys);	
	return join("", map {$_ = ">$_\n".$fasta->{$_}."\n"} @$keys);
}

=begin WSDL
_IN job $string
_IN username $string
_RETURN $string
_DOC Input is a RAST job id and a username, output is a tuple of latitude and longitude where the sample was taken, if it is known. This is a single point for every sample, and if there is more than one location for a sample it is the average of all locations.
=cut
sub mg_rast_lat_lon {
	my ($class, $job_id, $login_name)  = @_;

	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
	
	my $meta = GenomeMeta->new(undef, "/vol/metagenome-48-hour/Jobs.prod/$job_id/meta.xml");
	my $ret = $meta->get_metadata("optional_info.latitude") .",".$meta->get_metadata("optional_info.longitude");
	$ret =~ s/^\s*\,\s*$//;
	return $ret;
}
	
=begin WSDL
_IN job $string
_IN username $string
_RETURN $string
_DOC Input is a RAST job id and a username, output is the coordinates where the sample was taken. These are semi-colon separated tuples of lat-lon
=cut
sub mg_rast_coordinates {
	my ($class, $job_id, $login_name)  = @_;

	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
	
	my $meta = GenomeMeta->new(undef, "/vol/metagenome-48-hour/Jobs.prod/$job_id/meta.xml");
	return $meta->get_metadata("optional_info.coordinates");
}
	
=begin WSDL
_IN job $string
_IN username $string
_RETURN $string
_DOC Input is a RAST job id and a username, output is the date or time that the sample was taken.
=cut
sub mg_rast_time {
	my ($class, $job_id, $login_name)  = @_;

	my $orgdir=$class->_validate_mg_rast_user($job_id, $login_name);
	if ($orgdir =~ /Access Error/ || !(-e $orgdir)) {return $orgdir}
	
	my $meta = GenomeMeta->new(undef, "/vol/metagenome-48-hour/Jobs.prod/$job_id/meta.xml");
	return $meta->get_metadata("optional_info.time");
}
	
	
=begin WSDL
_IN in $string
_RETURN $string
_DOC Input is an alias, output is a sequence
=cut
sub ali_to_seq {
	my ($class, $arg) = @_;
	return stdin_caller($class, "ali_to_seq", $arg);
}

=begin WSDL
_IN in $string
_RETURN $string
_DOC Input is a comma separated list of pegs, and output is the pegs in order along the genome.
=cut
sub adjacent {
	my ($class, $arg) = @_;
	$arg =~ s/\,\s*/\n/g;
	return stdin_caller($class, "adjacent", $arg);
}

=begin WSDL
_IN in $string
_RETURN $string
_DOC Input is a peg, output is two column table of peg\tcluster
=cut
sub cluster_by_bbhs {
	my ($class, $arg) = @_;
	return stdin_caller($class, "cluster_by_bbhs", $arg);
}

=begin WSDL
_IN in $string
_RETURN $string
_DOC Input is a peg, output is two column table of peg\cluster
=cut
sub cluster_by_sim {
	my ($class, $arg) = @_;
	return stdin_caller($class, "cluster_by_sim", $arg);
}

=begin WSDL
_IN in $string
_RETURN $string
_DOC Input is a peg, output is two column table of peg\text func
=cut
sub external_calls {
	my ($class, $arg) = @_;
	return stdin_caller($class, "external_calls", $arg);
}

=begin WSDL
_IN in $string
_RETURN $string
_DOC Input is a peg, output is a function
=cut
sub function_of {
	my ($class, $arg) = @_;
	my $fig=new FIG;
	return scalar($fig->function_of($arg));
}

=begin WSDL
_IN in $string
_RETURN $string
_DOC Input is a peg, output is a single column table of genomes
=cut
sub genomes_of {
	my ($class, $arg) = @_;
	return stdin_caller($class, "genomes_of", $arg);
}


=begin WSDL
_IN genome $string
_RETURN $string
_DOC Input is a genome ID, output is the genus and species of the genome
=cut
sub genus_species {
	my ($class, $arg) = @_;
	my $fig=new FIG;
	return $fig->genus_species($arg);
}


=begin WSDL
_IN in $string
_RETURN $string
_DOC Input is a single column table of pegs, output is a single column table of fasta.DNA
=cut
sub fid2dna {
	my ($class, $arg) = @_;
	return stdin_caller($class, "fid2dna", $arg);
}

=begin WSDL
_IN in $string
_RETURN $string
_DOC Input is a genome id, output is a list of pegs in that genome
=cut
sub pegs_of {
	my ($class, $arg) = @_;
	my $fig = new FIG;
        return (join ",", $fig->pegs_of($arg));
}

=begin WSDL
_IN genome $string
_IN contig $string
_RETURN $string
_DOC Input is a genome id and a contig name, return is the length of the contig
=cut
sub contig_ln {
	my $class = shift();
	my $fig = new FIG;
	return $fig->contig_ln(@_);
}

=begin WSDL
_IN in $string
_RETURN $string
_DOC Input is a single column table of pegs, output is a single column table of archaeal pegs
=cut
sub is_archaeal {
	my ($class, $arg) = @_;
	return stdin_caller($class, "is_archaeal", $arg);
}

=begin WSDL
_IN in $string
_RETURN $string
_DOC Input is a single column table of pegs, output is a single column table of bacterial pegs
=cut
sub is_bacterial {
	my ($class, $arg) = @_;
	return stdin_caller($class, "is_bacterial", $arg);
}

=begin WSDL
_IN in $string
_RETURN $string
_DOC Input is a single column table of pegs, output is a single column table of eukaryotic pegs
=cut
sub is_eukaryotic {
	my ($class, $arg) = @_;
	return stdin_caller($class, "is_eukaryotic", $arg);
}

=begin WSDL
_IN in $string
_RETURN $string
_DOC Input is a single column table of pegs, output is a single column table of prokaryotic pegs
=cut
sub is_prokaryotic {
	my ($class, $arg) = @_;
	return stdin_caller($class, "is_prokaryotic", $arg);
}

=begin WSDL
_IN in $string
_RETURN $string
_DOC Input is a peg, output is peg-Translation
=cut
sub translation_of {
	my ($class, $arg) = @_;
	return stdin_caller($class, "translation_of", $arg);
}

=begin WSDL
_IN in $string
_RETURN $string
_DOC Input is a peg, output is tab separated string of aliases
=cut
sub aliases_of {
	my ($class, $arg) = @_;
	return stdin_caller($class, "aliases_of", $arg);
}

=begin WSDL
_IN in $string
_RETURN $string
_DOC Input is an alias, output is a peg
=cut
sub alias2fig {
	my ($class, $arg) = @_;
	return stdin_caller($class, "alias2fig", $arg);
}



=begin WSDL
_IN in $string ec code
_RETURN $string ec name
_DOC Input is aec code , output is ec name
=cut
sub ec_name {
	my $class = shift();
	my $fig = new FIG;
	my $result = $fig->ec_name(@_);
	return $result;
}

=begin WSDL
_IN in $string
_RETURN $string
_DOC Input is a peg, output is list of [protein, score] for things that are coupled to this peg
=cut
sub coupled_to {
	my $class = shift();
	my $fig = new FIG;
	my $return=undef;
	my @result = $fig->coupled_to(@_);
	if (@result)
	{
		$return = join("\n", map {$_->[0].",".$_->[1]} @result);
	}
	return $return;
}

=begin WSDL
_IN in $string
_RETURN $string
_DOC Input is a peg, output is list of [protein, score] for things that are coupled to this peg
=cut
sub abstract_coupled_to {
	my $class = shift();
	my $fig = new FIG;
	my $return=undef;
	my @result = $fig->abstract_coupled_to(@_);
	if (@result)
	{
		$return = join("\n", map {$_->[0].",".$_->[1]} @result);
	}
	return $return;
}

=begin WSDL
_IN complete $string
_IN restrictions $string
_IN domain $string
_RETURN @string
_DOC Input is constraints, output is a comma separated list of genomes
=cut
sub all_genomes {
	my $class = shift();
	my $fig=new FIG;
	my @genomes=$fig->genomes(@_);
	return join(",", @genomes);
}

=begin WSDL
_IN complete $string
_IN restrictions $string
_IN domain $string
_RETURN @string
_DOC Input is constraints, output is list of genomes
=cut
sub genomes {
	my $class = shift();
	my $fig = new FIG;
	my @result = $fig->genomes(@_);
	my @genomes;  
	 foreach my $genome (@result)       
	 { 
	         print STDERR "Genome is -$genome-\n";

		  my $genus_species = $fig->genus_species($genome);
	          push @genomes,  join("\t",$genome,$genus_species);
	         # print STDERR join("\t",$genome,$genus_species);
	 }

	#print STDERR @genomes;
	return @genomes;
	#return @result;
}

=begin WSDL
_IN in $string
_RETURN @string
_DOC Input is a peg, output is list of loc on contig
=cut
sub feature_location {
	my ($class, $arg1) = @_;
	my $fig = new FIG;
	my @result = ($fig->feature_location($arg1));
	return @result;
}

=begin WSDL
_IN in $string
_RETURN $string
_DOC Input is a peg, output is translation
=cut
sub get_translation {
	my ($class, $arg1) = @_;
	my $fig = new FIG;
	my $result = $fig->get_translation($arg1);
	return $result;
}

=begin WSDL
_IN pat1 $string
_IN pat2 $string
_RETURN @string
_DOC Input is two patterns, first one is used in search_index, second used to grep the results
=cut
sub search_and_grep {
	my ($class, $arg1, $arg2) = @_;

	my $fig = new FIG;

	my ($pegs, $roles) =  $fig->search_index($arg1);
	
	my (@result_list, $entry);

	for $entry (@$pegs) {
		push (@result_list, grep(/$arg2/, @$entry));
	}	
	push (@result_list, grep(/$arg2/, @$roles));
	chomp @result_list;
	my $return_value = join ("\n", @result_list); 
	return $return_value;
}


=begin WSDL
_IN pat1 $string
_RETURN @string
_DOC Input is a pattern to search for, output is tab separated list of pegs and roles
=cut
sub simple_search {
	my ($class, $arg1)=@_;
	
	my $fig = new FIG;

	my ($pegs, $roles) =  $fig->search_index($arg1);

	my (@result_list, $entry);

	for $entry (@$pegs) {
		push (@result_list, (join("\t", @$entry)));
	}

	# push (@result_list, (join("\t", @$roles)));
	chomp @result_list;
	my $return_value = join ("\n", @result_list);
	return $return_value;
}        

	
=begin WSDL
_RETURN $string list of families
_DOC No Input, output is list of all families
=cut
sub all_families {
	my ($class) = @_;
	my $fig = new FIG;
	my $figfams = new FigFams($fig);

        my @out = $figfams->all_families;
	print STDERR Dumper(@out);
	return @out;
}

=begin WSDL
_RETURN $string list of subsystems and their classifications
_DOC No Input, output is a list of all the subsystems and their classifications
=cut
sub all_subsystem_classifications {
	my ($class) = @_;
	my $fig = new FIG;
	my $output;

	my %found;
	map {   
		my @classification=@{$fig->subsystem_classification($_)};
		$#classification=1;
		push @classification, $_;
		$output.= join("\t", @classification)."\n";
	} sort {$a cmp $b} ($fig->all_subsystems());
	return $output;
}
			

=begin WSDL
_RETURN $string list of families and funcs
_DOC No Input, output is list of all families
=cut
sub all_families_with_funcs {
	my ($class) = @_;
	my $fig = new FIG;
	my $figfams = new FigFams($fig);

        my @out =$figfams->all_families_with_funcs;
	return @out;
}

=begin WSDL
_IN in $string list of famids
_RETURN $string 2 col table, famid, peg
_DOC Input is list of families, outoput is 2 col table of famid, peg
=cut
sub list_members {
	my ($class, $famids) = @_;
	my $fig = new FIG;
	my $figfams = new FigFams($fig);
	my @in = split(/\t/, $famids);
	warn("Starting 2 list members $famids\n");
    my @out = ();
    foreach my $famid (@in)
    {
	my $famO = new FigFam($fig,$famid);
	foreach my $peg ($famO->list_members)
	{
	    push(@out,[$famid,$peg]);
	}
    }
    return @out;
}

=begin WSDL
_IN in $string list of pegs
_RETURN $string returns a 3-column table [PEG,Function,AliasesCommaSeparated]
_DOC Input is list of families,returns a 3-column table [PEG,Function,AliasesCommaSeparated]
=cut
sub CDS_data {
	my ($class, $pegs) = @_;
	my $fig = new FIG;
	my $figfams = new FigFams($fig);
	my @in = split(/\t/, $pegs);

 #warn("Starting CDS data $pegs\n");
 #print STDERR &Dumper($pegs);

    my @out = ();
    foreach my $peg (@in)
    {
	my @famids = $figfams->families_containing_peg($peg);
	foreach my $famid (@famids)
	{
	    push(@out,[$peg,scalar $fig->function_of($peg),[$fig->feature_aliases($peg)]]);
	}
    }
    return @out;
}

=begin WSDL
_IN in $string list of pegs
_RETURN $string a 2-column table [PEG,Sequence]
_DOC Input is list of families,returns a 2-column table [PEG,Sequence] 
=cut
sub CDS_sequences {
	my ($class, $pegs) = @_;
	my $fig = new FIG;
	my $figfams = new FigFams($fig);
	my @in = split(/\t/, $pegs);

 #warn("Starting CDS seq $pegs\n");
 #print STDERR &Dumper($pegs);
    my @out = ();
    foreach my $peg (@in)
    {
	push(@out,[$peg,$fig->get_translation($peg)]);
    }
    return @out;
}

=begin WSDL
_IN in $string list of id seq pairs
_RETURN $string returns a 2-column table [Id,FamilyID]
_DOC Input is list of families,returns a 2-column table [Id,FamilyID]
=cut
sub is_member_of {
	my ($class, $id_seqs) = @_;
	my $fig = new FIG;
	my $figfams = new FigFams($fig);
	#warn("Doing is member $id_seqs\n");
 #print STDERR &Dumper($id_seqs);

	my @in = split(/\n/, $id_seqs);
    my @out = ();
    foreach my $pair (@in)
    {
	my($id,$seq) = split(/\t/, $pair);
	my($famO,undef) = $figfams->place_in_family($seq);
	if ($famO)
	{
	    push(@out,[$id,$famO->family_id]);
	}
    }
    return @out;
}

=begin WSDL
_IN peg $string
_IN maxN $string
_IN maxP $string
_RETURN $string
_DOC Input is a peg, an optional maximum number of hits (default=50), and an optional maximum E value (default=1e-5), return is a list of sims
=cut
sub sims {
	my ($class, $peg, $maxN, $maxP)=@_;
	unless (defined $maxN) {$maxN=50}
	unless (defined $maxP) {$maxP=1e-5}
	my $fig=new FIG;
	my $return=undef;
	foreach my $sim ($fig->sims($peg, $maxN, $maxP, 'figx'))
	{
		$return .= join("\t", @$sim). "\n";
	}
	
	return $return;
}
	

sub stdin_caller {
	my ($class, $name, $arg) = @_;
	my($rd, $wr, $err, $pid, $std_err, $return_value, @std_out);
	if (!($pid = open3($wr, $rd, $err, "$FIG_Config::bin/$name")))
	{
	die "Cannot run open3 $name: $!";
	}

	$wr->write($arg);
	close($wr);

	@std_out= <$rd>;
	close($rd);
	waitpid $pid, 0;
	$return_value = join ("", @std_out); 
	return $return_value;
}



=begin WSDL
_IN genome $string
_IN location1 $string
_IN location2 $string
_RETURN @string
_DOC Input is a genome ID and one or more locations in the form contig_start_stop, output is the DNA sequence
=cut
sub dna_sequence {
	my ($class, $genome, @locations)=@_;
	my $fig = new FIG;
	my $seq=$fig->dna_seq($genome,@locations);
	return $seq;
}



sub _validate_mg_rast_user {
	# a common method for validating a user. Please note that at the moment this does not use the password (but it should!)
	# this is shared by several things above.

	# If the user is valid will return a path to the job directory, otherwise will return "Access Error"

	# by default returns the org dir, but if the boolean job is set will return the reference to the job

	my ($self, $job_id, $login_name, $getjob)=@_;
	$ENV{DBHOST} = 'bioseed.mcs.anl.gov';
# my $dbm = DBMaster->new(-database => 'FortyEight_WebApplication');
	my $users = $dbm->User->get_objects({ login => $login_name });
	my $org=$users->[0]->{organisation};
	my $user;

# get the metagenome job directory. This is hard wired at the moment, but
# shouldn't be. Not sure if the job directory is in FIG_Config
# Also, I return "Access Error" regardless of whether the job is there or not to stop people 
# hacking.
	$job_id="/vol/metagenome-48-hour/Jobs.prod/$job_id";
	return "Access Error 1 and $job_id" unless (-e $job_id); # simple check that we have that id

# now make sure we are a valid user. If we are not a valid user this should throw an error
	
	my $job;
	eval {$job = Job48->new($job_id, $user->[0])};
	if ($@ || !$job) {return "Access Error 2"}

# another user check -- this is just a simple check and doesn't allow for public data or common
# organizations, so it is not as good. But it works
	return "Access Error 3" unless ($job->user eq $login_name || $job->getUserObject->{organisation} == $org); 
	
	return "Access Error: job deleted" if ($job->to_be_deleted);
	
	return $job if ($getjob);
	
	my $orgdir=$job->orgdir;
	
	return $orgdir;
}

sub _user_mg_rast_jobs {
	# a method to extract all the jobs for a user. Returns references to two arrays. 
	# The first is user jobs, and the second is jobs of the users organization
	my ($self, $login_name)=@_;
	$ENV{DBHOST} = 'bioseed.mcs.anl.gov';
	my $org;
# my $dbm = DBMaster->new(-database => 'FortyEight_WebApplication');
	my $users = $dbm->User->get_objects({ login => $login_name });
	my $org=$users->[0]->{organisation};

	my @user; my @org;

	opendir(DIR, "/vol/metagenome-48-hour/Jobs.prod/") || die "Can't open /vol/metagenome-48-hour/Jobs.prod/";
	foreach my $dir (grep {m/^\d+$/} readdir(DIR))
	{
		my $job_id="/vol/metagenome-48-hour/Jobs.prod/$dir";
		next unless (-d $job_id && -e "$job_id/DONE" && !-e "$job_id/DELETE");
		# print STDERR "$job_id\n";
		my $job;
		eval {$job = Job48->new($job_id)};
		die $@ if ($@);
		die "No job for $job\n" if (!$job);
		next if ($job->to_be_deleted);
		($job->user eq $login_name) ? push @user, $job->id :
			($job->getUserObject->{organisation} == $org) ? push @org, $job->id : 1;
	}
	@user = sort {$a <=> $b} @user;
	@org  = sort {$a <=> $b} @org;

	return \@user, \@org;
}

=begin WSDL
_IN peg_id $string
_IN n_pch_pins $string
_IN n_sims $string
_IN sim_cutoff $string
_IN color_sim_cutoff $string
_IN sort_by $string
_RETURN $string
_DOC Input is a FIG (PEG) ID and ..., output is the pinned regions data
=cut
sub pinned_region_data {
    my ($class, $peg, $n_pch_pins, $n_sims, $sim_cutoff, $color_sim_cutoff, $sort_by, $fast_color, $sims_from, $region_size) = @_;

    my $fig = new FIG;
    
    defined($n_pch_pins)       or $n_pch_pins = 5;
    defined($n_sims)           or $n_sims = 0;
    defined($sim_cutoff)       or $sim_cutoff = 1e-20;
    defined($color_sim_cutoff) or $color_sim_cutoff = 1e-20;
    defined($sort_by)          or $sort_by = '';

    defined($fast_color)       or $fast_color = 0;
    defined($sims_from)        or $sims_from = 'blast';
    defined($region_size)      or $region_size = 16000;
    
     my $pin_desc = {
 	             'pegs'                   => [$peg],
 		     'collapse_close_genomes' => 0,
 		     'n_pch_pins'             => $n_pch_pins, 
 		     'n_sims'                 => $n_sims, 
 		     'show_genomes'           => '',
 		     'sim_cutoff'             => $sim_cutoff, 
 		     'color_sim_cutoff'       => $color_sim_cutoff, 
 		     'sort_by'                => $sort_by,
		     'show_genomes'           => [],
 		   };

    my $maps = &PinnedRegions::pinned_regions($fig, $pin_desc, $fast_color, $sims_from, $region_size);
    my $txt  = Dumper($maps);
    return $txt;
}



MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3