[Bio] / FigWebServices / webservices_seed.cgi Repository:
ViewVC logotype

View of /FigWebServices/webservices_seed.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (download) (annotate)
Wed Aug 18 20:04:41 2010 UTC (9 years, 3 months ago) by redwards
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.7: +53 -23 lines
Changing FigFams/FigFam to FFs/FF

#__perl__

use strict;
use Carp;
use SOAP::Lite;
use SOAP::Transport::HTTP;
use Data::Dumper;

SOAP::Transport::HTTP::CGI   
-> dispatch_to('SeedWebServices')     
-> handle;

package SeedWebServices;

use FIG;
use FIG_Config;
use Data::Dumper;
use FFs;
use FF;
use IPC::Open3;
use PinnedRegions;
use URI::Escape;



=begin WSDL
_IN alias $string
_RETURN $string
_DOC Retrieve the protein sequence for a given identifier. Input is an alias, output is a sequence
=cut
sub ali_to_seq {
	my ($class, $arg) = @_;
	my $result = stdin_caller($class, "ali_to_seq", $arg);
	return SOAP::Data->type('string')->name('ali_to_seqReturn')->value($result);
}

=begin WSDL
_IN pegs $string
_RETURN $string
_DOC Retrieve the set of pegs in order along the chromosome. Input is a comma separated list of pegs, and output is the pegs in order along the genome.
=cut
sub adjacent {
	my ($class, $arg) = @_;
	$arg =~ s/\,\s*/\n/g;
	my $result = stdin_caller($class, "adjacent", $arg);
	return SOAP::Data->type('string')->name('adjacentReturn')->value($result);
}

=begin WSDL
_IN peg $string
_RETURN $string
_DOC Get the clusters for a peg by bidirectional best hits. Input is a peg, output is two column table of [peg, cluster]
=cut
sub cluster_by_bbhs {
	my ($class, $arg) = @_;
	my $result = stdin_caller($class, "cluster_by_bbhs", $arg);
	return SOAP::Data->type('string')->name('cluster_by_bbhsReturn')->value($result);
}

=begin WSDL
_IN peg $string
_RETURN $string
_DOC Get the clusters for a peg by similarity. Input is a peg, output is two column table of [peg, cluster]
=cut
sub cluster_by_sim {
	my ($class, $arg) = @_;
	my $result = stdin_caller($class, "cluster_by_sim", $arg);
	return SOAP::Data->type('string')->name('cluster_by_simReturn')->value($result);
}

=begin WSDL
_IN peg $string
_RETURN $string
_DOC Get the annotations for a peg from all other known sources. Input is a peg, output is two column table of [peg, other function]
=cut
sub external_calls {
	my ($class, $arg) = @_;
	#my $result = stdin_caller($class, "external_calls", $arg);
	my $fig=new FIG;
	my $result = join("\n",map { join("\t",@$_) } $fig->external_calls( [ $arg ] ));
	return SOAP::Data->type('string')->name('external_callsReturn')->value($result);
}

=begin WSDL
_IN peg $string
_RETURN $string
_DOC Get the functional annotation of a given protein identifier. Input is a peg, output is a function
=cut
sub function_of {
	my ($class, $arg) = @_;
	my $fig=new FIG;
	return SOAP::Data->type('string')->name('function_ofReturn')->value(scalar($fig->function_of($arg)));
}

=begin WSDL
_IN pegs $string
_RETURN @string
_DOC Get the functional annotation of a comma-joined list of protein identifiers. Input is a list of pegs, output is  an array of hashes, with two keys - id is the peg, function is the function of that peg.
=cut
sub functions_of {
	my  ($class, $arg)=@_;
	my $fig=new FIG;

	my @pegs = split /\,/, $arg;
	my @result;


	foreach my $peg (@pegs) {
		my $data = \SOAP::Data->value(  
				SOAP::Data->name("id" => $peg),
				SOAP::Data->name("function" => scalar($fig->function_of($peg)))
				);
		push @result, $data;
	}
	my $return = SOAP::Data->name("functions_ofReturn" =>
			\SOAP::Data->value(
				SOAP::Data->name("item" => @result)
				)->type("ArrayOf_Hashes")
			);
	return $return;
}


=begin WSDL
_IN peg $string
_RETURN $string
_DOC Get the genome(s) that a given protein identifier refers to. Input is a peg, output is a single column table of genomes
=cut
sub genomes_of {
	my ($class, $arg) = @_;
	my $result = stdin_caller($class, "genomes_of", $arg);
	return SOAP::Data->type('string')->name('genomes_ofReturn')->value($result);
}


=begin WSDL
_IN genomeid $string
_RETURN $string
_DOC Get the genus and species of a genome identifier. Input is a genome ID, output is the genus and species of the genome
=cut
sub genus_species {
	my ($class, $arg) = @_;
	my $fig=new FIG;
	my $result = $fig->genus_species($arg);
	return SOAP::Data->type('string')->name('genus_speciesReturn')->value($result);
}


=begin WSDL
_IN peg $string
_RETURN $string
_DOC Get the DNA sequence for a given protein identifier. Input is a peg, output is the DNA sequence in fasta format.
=cut
sub fid2dna {
	my ($class, $arg) = @_;
	my $result = stdin_caller($class, "fid2dna", $arg);
	return SOAP::Data->type('string')->name('fid2dnaReturn')->value($result);
}

=begin WSDL
_IN peg $string
_RETURN $string
_DOC Get the DNA sequence for a set of protein identifiers. Input is a comma-joined list of pegs, output is the DNA sequence in fasta format.
=cut
sub fids2dna {
	my ($class, $arg) = @_;
	my $seq;
	foreach my $peg (split /\,/, $arg) {
		$seq .= stdin_caller($class, "fid2dna", $peg);
	}
	return SOAP::Data->type('string')->name('fids2dnaReturn')->value($seq);
}


=begin WSDL
_IN genomeid $string
_RETURN @string
_DOC Get a comma-separated list of all the contigs in a genome
=cut
sub contigs_of {
	my ($class, $genome)=@_;
	my $fig = new FIG;
	my @result = $fig->contigs_of($genome);

	return SOAP::Data->name("contigs_ofReturn" =>
			\SOAP::Data->value(
				SOAP::Data->name("item" => @result)->type('string')
				)->type("ArrayOf_string")
			);
}


=begin WSDL
_IN genomeid $string
_IN location1 $string
_RETURN $string
_DOC Please note, dna_sequence is deprecated. Please use dna_seq instead. The methods return the same thing.
=cut
sub dna_sequence {
	my ($class, $genome, $locations)=@_;
	my $fig = new FIG;
	my $seq=$fig->dna_seq($genome,$locations);
	return SOAP::Data->type('string')->name('dna_sequenceReturn')->value($seq);
}

=begin WSDL
_IN genomeid $string
_IN location1 $string
_RETURN $string
_DOC Get the DNA sequence for a region in a genome. Input is a genome ID and a location in the form contig_start_stop, output is the DNA sequence in fasta format.
=cut
sub dna_seq {
	my ($class, $genome, $locations)=@_;
	my $fig = new FIG;
	my $seq=$fig->dna_seq($genome,$locations);
	return SOAP::Data->type('string')->name('dna_seqReturn')->value($seq);
}



=begin WSDL
_IN genomeid $string
_RETURN @string
_DOC Get all the protein identifiers associated with a genome. Input is a genome id, output is a list of pegs in that genome
=cut
sub pegs_of {
	my ($class, $arg) = @_;
	my $fig = new FIG;
	my @result = $fig->pegs_of($arg);
	return SOAP::Data->name("pegs_ofReturn" =>
			\SOAP::Data->value(
				SOAP::Data->name("item" => @result)->type('string')
				)->type("ArrayOf_string")
			);
}

=begin WSDL
_IN genomeid $string
_RETURN @string
_DOC Get all the RNA identifiers associated with a genome. Input is a genome ID, and output is a list (an array) of the RNAs in that genome
=cut
sub rnas_of {
	my ($class, $arg) = @_;
	my $fig = new FIG;
	my @result = $fig->rnas_of($arg);
	return SOAP::Data->name("rnas_ofReturn" =>
			\SOAP::Data->value(
				SOAP::Data->name("item" => @result)->type('string')
				)->type("ArrayOf_string")
			);
}

=begin WSDL
_IN genomeid $string
_IN contig $string
_RETURN $int
_DOC Get the length of the DNA sequence in a contig in a genome. Input is a genome id and a contig name, return is the length of the contig
=cut
sub contig_ln {
	my $class = shift();
	my $fig = new FIG;
	my $len = $fig->contig_ln(@_);
	return SOAP::Data->name('contig_lnReturn')->value($len)->type('int');
}

=begin WSDL
_IN featureid $string
_RETURN $string
_DOC Retrieve the DNA sequence for a particular feature. Note that this will take a feature id (peg, rna, etc), and return the DNA sequence for that id. There is also a separate method to get the DNA sequence for an arbitrary location on a genome
=cut
sub get_dna_seq {
	my ($class, $fid) = @_;
	my $fig = new FIG;
	my $seq = $fig->get_dna_seq($fid);
	return SOAP::Data->type('string')->name('get_dna_seqReturn')->value($seq);
}

=begin WSDL
_IN genomeid $string
	_RETURN $string
_DOC Test whether an organism is Archaeal. Input is a genome identifier, and output is true or false (or 1 or 0)
	=cut
	sub is_archaeal {
		my ($class, $arg) = @_;
		my $result = stdin_caller($class, "is_archaeal", $arg);
		return SOAP::Data->type('string')->name('get_translationReturn')->value($result);
	}

=begin WSDL
_IN genomeid $string
	_RETURN $string
_DOC Test whether an organism is Bacterial. Input is a genome identifier, and output is true or false (or 1 or 0)
	=cut
	sub is_bacterial {
		my ($class, $arg) = @_;
		my $result = stdin_caller($class, "is_bacterial", $arg);
		return SOAP::Data->type('string')->name('is_bacterialReturn')->value($result);
	}

=begin WSDL
_IN genomeid $string
_RETURN $string
_DOC Test whether an organism is Eukaryotic. Input is a genome identifier, and output is true or false (or 1 or 0)
=cut
sub is_eukaryotic {
	my ($class, $arg) = @_;
	my $result = stdin_caller($class, "is_eukaryotic", $arg);
	return SOAP::Data->type('string')->name('is_eukaryoticReturn')->value($result);
}

=begin WSDL
_IN genomeid $string
_RETURN $string
_DOC Test whether an organism is a Prokaryote. Input is a genome identifier, and output is true or false (or 1 or 0)
=cut
sub is_prokaryotic {
	my ($class, $arg) = @_;
	my $result = stdin_caller($class, "is_prokaryotic", $arg);
	return SOAP::Data->type('string')->name('is_prokaryoticReturn')->value($result);
}

=begin WSDL
_IN peg $string
_RETURN $string
_DOC Get the translation (protein sequence) of a peg. Input is a peg, output is the protein sequence. (Note that this is a synonym of get_translation).
=cut
sub translation_of {
	my ($class, $arg) = @_;
	my $result = stdin_caller($class, "translation_of", $arg);
	return SOAP::Data->type('string')->name('translation_ofReturn')->value($result);
}

=begin WSDL
_IN peg $string
_RETURN $string
_DOC Get the translation (protein sequence) of a peg. Input is a peg, output is translation. (Note that this is a synonym of translation_of);
=cut
sub get_translation {
	my ($class, $arg1) = @_;
	my $fig = new FIG;
	my $result = $fig->get_translation($arg1);
	return SOAP::Data->type('string')->name('get_translationReturn')->value($result);
}

=begin WSDL
_IN peg $string
_RETURN $string
_DOC Get the location of a peg on its contig. Input is a peg, output is list of locations on contigs. Usually this will be a single location, but sometimes it can either be more than one region on a contig, or even on multiple contigs. For convenience it is a comma joined list, often you will want to pass that to boundaries_of
=cut
sub feature_location {
	my ($class, $arg1) = @_;
	my $fig = new FIG;
	my $result = scalar($fig->feature_location($arg1));
	return SOAP::Data->type('string')->name('feature_locationReturn')->value($result);
}

=begin WSDL
_IN locations $string
_RETURN @string
_DOC Get the boundaries of a feature location. A feature can have multiple locations on a contig (e.g. split locations, introns, etc). This just returns an array of [contig, beginning, end]. You can pass it the output from feature_location directly
=cut
sub boundaries_of {
	my ($class, $arg) = @_;
	my $fig = new FIG;
	my @result = $fig->boundaries_of($arg);
	return SOAP::Data->name("boundaries_ofReturn" =>
		\SOAP::Data->value(
			SOAP::Data->name("item" => @result)->type('string')
			)->type("ArrayOf_string")
		);
}

=begin WSDL
_IN md5 $string
_RETURN @string
_DOC Get the FIG IDs associated with the MD5 sum of a protein sequence. Input is the md5 checksum, output is an array of strings of FIG ids. This should be faster, and more complete, than using  aliases or other ways to match protein sequences.
=cut
sub pegs_with_md5 {
	my ($class, $arg) = @_;
	my $fig = new FIG;
	my @result = $fig->pegs_with_md5($arg);
	return SOAP::Data->name("pegs_with_md5Return" => 
		\SOAP::Data->value(
			SOAP::Data->name("item" => @result)->type('string')
			)->type("ArrayOf_string")
		);
}

=begin WSDL
_IN md5 $string
_RETURN $string
_DOC Get the FIG IDs associated with the MD5 sum of a protein sequence. Input is the md5 checksum, output is a comma separated list of FIG ids as a single string. This should be faster, and more complete, than using  aliases or other ways to match protein sequences.
=cut
sub pegs_with_md5_string {
	my ($class, $arg) = @_;
	my $fig = new FIG;
	my @result = $fig->pegs_with_md5($arg);
	my $res = join(",", @result);
	return SOAP::Data->type('string')->name('pegs_with_md5_stringReturn')->value($res);
}

=begin WSDL
_IN peg $string
_RETURN @string
_DOC Get the aliases of a peg. These are the identifiers that other databases use. Input is a peg, output is an array of aliases
=cut
sub aliases_of {
	my ($class, $arg) = @_;
	my $fig = new FIG;
	my @result = $fig->feature_aliases($arg);
	return SOAP::Data->name("aliases_ofReturn" =>
		\SOAP::Data->value(
			SOAP::Data->name("item" => @result)->type('string')
			)->type("ArrayOf_string")
		);
}

=begin WSDL
_IN peg $string
_RETURN @string
_DOC Get the corresponding ids of a peg. These are the identifiers that other databases use. Input is a peg, output is an array of aliases
=cut
sub get_corresponding_ids {
	my ($class, $arg) = @_;
	my $fig = new FIG;
	my @result = $fig->get_corresponding_ids($arg, 1);
	return SOAP::Data->name("get_corresponding_idsReturn" =>
		\SOAP::Data->value(
			SOAP::Data->name("item" => @result)->type('string')
			)->type("ArrayOf_string")
		);
}

=begin WSDL
_IN alias $string
_RETURN @string
_DOC Get the FIG ID(s) (peg) for a given external identifier. Input is an identifier used by another database, output is a list of our identifiers. Note that an alias can refer to more than one protein since the mapping is done via protein sequence.
=cut
sub alias2fig {
	my ($class, $arg) = @_;
	my $fig = new FIG;
	my @result = $fig->by_all_aliases($arg);
	return SOAP::Data->name("alias2figReturn" =>
			\SOAP::Data->value(
				SOAP::Data->name("item" => @result)->type('string')
				)->type("ArrayOf_string")
			);
}

=begin WSDL
_IN EC_number $string ec code
_RETURN $string ec name
_DOC Get the name for a given E.C. number. Input is an EC number, output is the name
=cut
sub ec_name {
	my $class = shift();
	my $fig = new FIG;
	my $result = $fig->ec_name(@_);
	return SOAP::Data->type('string')->name('ec_nameReturn')->value($result);
}

=begin WSDL
_IN Reaction_number $string reaction code number
_IN genomeid $string
_RETURN $string
_DOC Get a tab-separated list of [subsystem name, functional role, peg, subsystem variant code for that genome] for any given reaction id and genome id. Maps the reaction id to peg, peg to genome, and genome to variant code
=cut
sub reaction_to_role {
	my ($class, $rxn, $genomeid) = @_;
	my $fig = new FIG;
	my @ecs = $fig->catalyzed_by($rxn);

	my @return;
	foreach my $ec (@ecs) {
		my @ssr = $fig->subsystems_for_ec($ec); # this is a tple of subsys, role, protein
		my $subsys;
		map {push @{$subsys->{$_->[0]}->{$_->[1]}}, $_->[2]} @ssr;
		foreach my $ss (keys %$subsys) {
			my $sub = $fig->get_subsystem($ss);
			my $vc = $sub->get_variant_code_for_genome($genomeid);
			foreach my $role (keys %{$subsys->{$ss}}) {
				my @cell;
				eval {@cell = $sub->get_pegs_from_cell($genomeid, $role)};
				if (@cell) {
#push @return, [$ss, $role, join(", ", @cell), $vc];
					push @return, join("\t", $ss, $role, join(", ", @cell), $vc);
				}
				else {  
#push @return, [$ss, $role, undef, $vc];
					push @return, join("\t", $ss, $role, undef, $vc);
				}
			}
		}
	}
	return SOAP::Data->type('string')->name('reaction_to_roleReturn')->value(join("\n", @return));
}


=begin WSDL
_IN peg $string
_RETURN @string
_DOC Get the pegs that are coupled to any given peg. Input is a peg, output is list of [protein, score] for things that are coupled to this peg
=cut
sub coupled_to {
	my $class = shift();
	my $fig = new FIG;
	my @result = $fig->coupled_to(@_);
	if (@result)
	{
		@result = map {$_->[0].",".$_->[1]} @result;
	}
	return SOAP::Data->name("coupled_toReturn" =>
			\SOAP::Data->value(
				SOAP::Data->name("item" => @result)->type('string')
				)->type("ArrayOf_string")
			);
}

=begin WSDL
_IN peg $string
_RETURN @string
_DOC Get the pegs that may be coupled to this peg through abstract coupling. Input is a peg, output is list of [protein, score] for things that are coupled to this peg
=cut
sub abstract_coupled_to {
	my $class = shift();
	my $fig = new FIG;
	my @result = $fig->abstract_coupled_to(@_);
	if (@result)
	{
		@result = map {$_->[0].",".$_->[1]} @result;
	}
	return SOAP::Data->name("abstract_coupled_toReturn" =>
			\SOAP::Data->value(
				SOAP::Data->name("item" => @result)->type('string')
				)->type("ArrayOf_string")
			);
}

=begin WSDL
_IN peg_id $string
_IN n_pch_pins $string
_IN n_sims $string
_IN sim_cutoff $string
_IN color_sim_cutoff $string
_IN sort_by $string
_RETURN $string
_DOC Input is a FIG (PEG) ID and ..., output is the pinned regions data
=cut 
sub pinned_region_data {
	my ($class, $peg, $n_pch_pins, $n_sims, $sim_cutoff, $color_sim_cutoff, $sort_by, $fast_color, $sims_from, $region_size) = @_;

	my $fig = new FIG;

	defined($n_pch_pins)       or $n_pch_pins = 5;
	defined($n_sims)           or $n_sims = 0;
	defined($sim_cutoff)       or $sim_cutoff = 1e-20;
	defined($color_sim_cutoff) or $color_sim_cutoff = 1e-20;
	defined($sort_by)          or $sort_by = '';

	defined($fast_color)       or $fast_color = 0;
	defined($sims_from)        or $sims_from = 'blast';
	defined($region_size)      or $region_size = 16000;

	my $pin_desc = {
		'pegs'                   => [$peg],
		'collapse_close_genomes' => 0,
		'n_pch_pins'             => $n_pch_pins,
		'n_sims'                 => $n_sims, 
		'show_genomes'           => '',
		'sim_cutoff'             => $sim_cutoff,
		'color_sim_cutoff'       => $color_sim_cutoff,
		'sort_by'                => $sort_by,
		'show_genomes'           => [],
	};

	my $maps = &PinnedRegions::pinned_regions($fig, $pin_desc, $fast_color, $sims_from, $region_size);
	my $txt  = Dumper($maps);
	return $txt;
}   


=begin WSDL
_IN complete $string
_IN restrictions $string
_IN domain $string
_RETURN @string
_DOC Get a set of genomes. The inputs are a series of constraints - whether the sequence is complete, other restrictions, and a domain of life (Bacteria, Archaea, Eukarya, Viral, Environmental Genome). Output is a list of genome ids. An example use is with the parameters ("complete", undef, "Bacteria") that will return all complete bacterial genomes.
=cut
sub all_genomes {
	my $class = shift();
	my $fig=new FIG;
	my @genomes=$fig->genomes(@_);
	return SOAP::Data->name("all_genomesReturn" =>
			\SOAP::Data->value(
				SOAP::Data->name("item" => @genomes)->type('string')
				)->type("ArrayOf_string")
			);
}

=begin WSDL
_IN complete $string
_IN restrictions $string
_IN domain $string
_RETURN @string
_DOC Get a set of genomes. The inputs are a series of constraints - whether the sequence is complete, other restrictions, and a domain of life (Bacteria, Archaea, Eukarya, Viral, Environmental Genome). Output is a list of genome ids with the genus species appended. An example use is with the parameters ("complete", undef, "Bacteria") that will return all complete bacterial genomes.
=cut
sub genomes {
	my $class = shift();
	my $fig = new FIG;
	my @result = $fig->genomes(@_);
	my @genomes;  
	foreach my $genome (@result)       
	{ 
		my $genus_species = $fig->genus_species($genome);
		push @genomes,  join("\t",$genome,$genus_species);
	}
	return SOAP::Data->name("genomesReturn" =>
			\SOAP::Data->value(
				SOAP::Data->name("item" => @genomes)->type('string')
				)->type("ArrayOf_string")
			);

}

=begin WSDL
_IN genomeid $string
_RETURN $string
_DOC If this genome replaces another one (it is a more upto date version), what is the ID of the older genome?
=cut
sub replaces {
	my ($class, $genomeid) = @_;
	my $fig = new FIG;
	my $result = $fig->replaces($genomeid);
	return SOAP::Data->type('string')->name('replacesReturn')->value($result);
}

=begin WSDL
_IN genomeid $string
_RETURN $string
_DOC Returns the taxonomy of a given genomeid
=cut
sub taxonomy_of {
	my ($class, $genomeid) = @_;
	my $fig = new FIG;
	my $result = $fig->taxonomy_of($genomeid);
	return SOAP::Data->type('string')->name('taxonomy_ofReturn')->value($result);
}


=begin WSDL
_IN pattern1 $string
_IN pattern2 $string
_RETURN @string
_DOC Search and grep through the database. Input is two patterns, first one is used in search_index, second used to grep the results to restrict to a smaller set. Output is an array of hashes with keys id, organism, otherIds, functionalAssignment, and annotator.
=cut
sub search_and_grep {
	my ($class, $arg1, $arg2) = @_;

	my $fig = new FIG;

	my ($pegs, $roles) =  $fig->search_index($arg1);

	my (@result, $entry);

	for $entry (@$pegs) {
		if (grep(/$arg2/, @$entry)) {
			my $data = \SOAP::Data->value(  
					SOAP::Data->name("id" => $entry->[0]),
					SOAP::Data->name("organism" => $entry->[1]),
					SOAP::Data->name("otherIds" => $entry->[2]),
					SOAP::Data->name("functionalAssignment" => $entry->[3]),
					SOAP::Data->name("annotator" => $entry->[4])
					);
			push @result, $data;
		}
	}	

	return SOAP::Data->name("search_and_grepReturn" =>
			\SOAP::Data->value(
				SOAP::Data->name("item" => @result)
				)->type("ArrayOf_Hashes")
			);
}


=begin WSDL
_IN pattern $string
_RETURN @string
_DOC Search the database. Input is a pattern to search for, output is list of pegs and roles
=cut
sub simple_search {
	my ($class, $arg1)=@_;

	my $fig = new FIG;

	my ($pegs, $roles) =  $fig->search_index($arg1);

	my (@result, $entry);

	for $entry (@$pegs) {
		push (@result, (join("\t", @$entry)));
	}
	chomp @result;

	return SOAP::Data->name("simple_searchReturn" =>
			\SOAP::Data->value(
				SOAP::Data->name("item" => @result)->type('string')
				)->type("ArrayOf_string")
			);
}        


=begin WSDL
_RETURN @string list of subsystems and their classifications
_DOC Get a list of all the subsystems and their classifications. No input needed, it just returns a list of all the subsystems and their classifications
=cut
sub all_subsystem_classifications {
	my ($class) = @_;
	my $fig = new FIG;
	my @result;

	my %found;
	map {   
		my @classification=@{$fig->subsystem_classification($_)};
		$#classification=1;
		push @classification, $_;
		push @result, join("\t", @classification);
	} sort {$a cmp $b} ($fig->all_subsystems());

	return SOAP::Data->name("all_subsystem_classificationsReturn" =>
			\SOAP::Data->value(
				SOAP::Data->name("item" => @result)->type('string')
				)->type("ArrayOf_string")
			);
}


=begin WSDL
_RETURN @string list of families
_DOC Get all the FIG protein families (FIGfams). No input needed, it just returns a list of all families
=cut
sub all_families {
	my ($class) = @_;
	my $fig = new FIG;
	my $figfams = new FFs($fig);

	my @result = $figfams->all_families;

	return SOAP::Data->name("all_familiesReturn" =>
			\SOAP::Data->value(
				SOAP::Data->name("item" => @result)->type('string')
				)->type("ArrayOf_string")
			);

}

=begin WSDL
_RETURN @string
_DOC Get all the FIG protein families (FIGfams) with their assigned functions. No input needed, it just returns a list of all the families and their functions.
=cut
sub all_families_with_funcs {
	my ($class) = @_;
	my $fig = new FIG;
	my $figfams = new FFs($fig);

	my @result = $figfams->all_families_with_funcs;

	return SOAP::Data->name("all_families_with_funcsReturn" =>
			\SOAP::Data->value(
				SOAP::Data->name("item" => @result)->type('string')
				)->type("ArrayOf_string")
			);
}

=begin WSDL
_IN families $string list of famids
_RETURN @string
_DOC Get all the pegs in some FIGfams. The input is a tab-separated list of family IDs, and the output is a two column table of [family id, peg]
=cut
sub list_members {
	my ($class, $famids) = @_;
	my $fig = new FIG;
	my $figfams = new FFs($fig);
	my @in = split(/\t/, $famids);
	my @out = ();
	foreach my $famid (@in)
	{
		my $famO = new FF($fig,$famid);
		foreach my $peg ($famO->list_members)
		{
			push(@out,[$famid,$peg]);
		}
	}
	return SOAP::Data->name("list_membersReturn" =>
			\SOAP::Data->value(
				SOAP::Data->name("item" => @out)->type('string')
				)->type("ArrayOf_string")
			);
}

=begin WSDL
_IN families $string list of pegs
_RETURN @string returns a 3-column table [PEG,Function,AliasesCommaSeparated]
_DOC Get all the pegs in some FIGfams, their functions, and aliases. Input is a tab-separated list of pegs, returns a 3-column comma separated table [PEG, Function, Aliases]
=cut
sub CDS_data {
	my ($class, $pegs) = @_;
	my $fig = new FIG;
	my $figfams = new FFs($fig);
	my @in = split(/\t/, $pegs);

	my @out = ();
	foreach my $peg (@in)
	{
		my @famids = $figfams->families_containing_peg($peg);
		foreach my $famid (@famids)
		{
			push(@out,join("\t", $peg, scalar($fig->function_of($peg)), join(",", $fig->feature_aliases($peg))));
		}
	}
	return SOAP::Data->name("CDS_dataReturn" =>
			\SOAP::Data->value(
				SOAP::Data->name("item" => @out)->type('string')
				)->type("ArrayOf_string")
			);
}

=begin WSDL
_IN families $string list of pegs
_RETURN @string a 2-column table [PEG,Sequence]
_DOC Get the protein sequences for a list of proteins. Input is a tab-separated list of peg, returns a 2-column comma separated table of [PEG, sequence] 
=cut
sub CDS_sequences {
	my ($class, $pegs) = @_;
	my $fig = new FIG;
	my $figfams = new FFs($fig);
	my @in = split(/\t/, $pegs);

	my @out = ();
	foreach my $peg (@in)
	{
		push(@out,join(",", $peg,$fig->get_translation($peg)));
	}
	return SOAP::Data->name("CDS_sequencesReturn" =>
			\SOAP::Data->value(
				SOAP::Data->name("item" => @out)->type('string')
				)->type("ArrayOf_string")
			);
}

=begin WSDL
_IN sequences $string list of id seq pairs
_RETURN @string returns a 2-column table [Id,FamilyID]
_DOC Tries to put a protein sequence in a family. Input is a tab-separated id and sequence, delimited by new lines. The output is a comma-separated 2-column table [your sequence id, FamilyID] if the sequence is placed in a family.
=cut
sub is_member_of {
	my ($class, $id_seqs) = @_;
	my $fig = new FIG;
	my $figfams = new FFs($fig);

	my @in = split(/\n/, $id_seqs);
	my @out = ();
	foreach my $pair (@in)
	{
		my($id,$seq) = split(/\t/, $pair);
		my($famO,undef) = $figfams->place_in_family($seq);
		if ($famO)
		{
			push(@out,$id. "," . $famO->family_id);
		}
	}
	
	return SOAP::Data->name("CDS_sequencesReturn" =>
			\SOAP::Data->value(
				SOAP::Data->name("item" => @out)->type('string')
				)->type("ArrayOf_string")
			);
}

=begin WSDL
_IN peg $string
_IN maxN $string
_IN maxP $string
_RETURN @string
_DOC Retrieve the sims (precomputed BLAST hits) for a given protein sequence. Input is a peg, an optional maximum number of hits (default=50), and an optional maximum E value (default=1e-5). The output is a list of sims in modified tab separated (-m 8) format. Additional columns include length of query and database sequences, and method used.
=cut
sub sims {
	my ($class, $peg, $maxN, $maxP)=@_;
	unless (defined $maxN) {$maxN=50}
	unless (defined $maxP) {$maxP=1e-5}
	my $fig=new FIG;
	my @return=();
	foreach my $sim ($fig->sims($peg, $maxN, $maxP, 'figx'))
	{
		push @return, join("\t", @$sim);
	}
	
	return SOAP::Data->name("simsReturn" =>
			\SOAP::Data->value(
				SOAP::Data->name("item" => @return)->type('string')
				)->type("ArrayOf_string")
			);
}


	

##### INTERNAL METHODS

sub stdin_caller {
	my ($class, $name, $arg) = @_;
	my($rd, $wr, $err, $pid, $std_err, $return_value, @std_out);
	if (!($pid = open3($wr, $rd, $err, "$FIG_Config::bin/$name")))
	{
		die "Cannot run open3 $name: $!";
	}
	
	$wr->write($arg);
	close($wr);
	
	@std_out= <$rd>;
	close($rd);
	waitpid $pid, 0;
	$return_value = join ("", @std_out); 
	return $return_value;
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3