[Bio] / FigWebServices / webservices_seed.cgi Repository:
ViewVC logotype

View of /FigWebServices/webservices_seed.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (annotate)
Thu May 29 23:58:31 2008 UTC (11 years, 8 months ago) by redwards
Branch: MAIN
CVS Tags: rast_rel_2008_06_18, rast_rel_2008_06_16, rast_2008_0924, rast_rel_2008_09_30, mgrast_rel_2008_0924, mgrast_rel_2008_1110_v2, mgrast_rel_2008_0625, rast_rel_2008_10_09, rast_release_2008_09_29, mgrast_rel_2008_0806, mgrast_rel_2008_0923, mgrast_rel_2008_0919, mgrast_rel_2008_1110, rast_rel_2008_09_29, mgrast_rel_2008_0917, rast_rel_2008_10_29, rast_rel_2008_11_24, rast_rel_2008_08_07
Adding new wsdl/web service components

#__perl__

use strict;
use Carp;
use SOAP::Lite;
use SOAP::Transport::HTTP;
use Data::Dumper;

SOAP::Transport::HTTP::CGI   
-> dispatch_to('SeedWebServices')     
-> handle;

package SeedWebServices;

use FIG;
use FIG_Config;
use Data::Dumper;
use FigFams;
use FigFam;
use IPC::Open3;
use PinnedRegions;



=begin WSDL
_IN alias $string
_RETURN $string
_DOC Retrieve the protein sequence for a given identifier. Input is an alias, output is a sequence
=cut
sub ali_to_seq {
	my ($class, $arg) = @_;
	return stdin_caller($class, "ali_to_seq", $arg);
}

=begin WSDL
_IN pegs $string
_RETURN $string
_DOC Retrieve the set of pegs in order along the chromosome. Input is a comma separated list of pegs, and output is the pegs in order along the genome.
=cut
sub adjacent {
	my ($class, $arg) = @_;
	$arg =~ s/\,\s*/\n/g;
	return stdin_caller($class, "adjacent", $arg);
}

=begin WSDL
_IN peg $string
_RETURN $string
_DOC Get the clusters for a peg by bidirectional best hits. Input is a peg, output is two column table of [peg, cluster]
=cut
sub cluster_by_bbhs {
	my ($class, $arg) = @_;
	return stdin_caller($class, "cluster_by_bbhs", $arg);
}

=begin WSDL
_IN peg $string
_RETURN $string
_DOC Get the clusters for a peg by similarity. Input is a peg, output is two column table of [peg, cluster]
=cut
sub cluster_by_sim {
	my ($class, $arg) = @_;
	return stdin_caller($class, "cluster_by_sim", $arg);
}

=begin WSDL
_IN peg $string
_RETURN $string
_DOC Get the annotations for a peg from all other known sources. Input is a peg, output is two column table of [peg, other function]
=cut
sub external_calls {
	my ($class, $arg) = @_;
	return stdin_caller($class, "external_calls", $arg);
}

=begin WSDL
_IN peg $string
_RETURN $string
_DOC Get the functional annotation of a given protein identifier. Input is a peg, output is a function
=cut
sub function_of {
	my ($class, $arg) = @_;
	my $fig=new FIG;
	return scalar($fig->function_of($arg));
}

=begin WSDL
_IN peg $string
_RETURN $string
_DOC Get the genome(s) that a given protein identifier refers to. Input is a peg, output is a single column table of genomes
=cut
sub genomes_of {
	my ($class, $arg) = @_;
	return stdin_caller($class, "genomes_of", $arg);
}


=begin WSDL
_IN genomeid $string
_RETURN $string
_DOC Get the genus and species of a genome identifier. Input is a genome ID, output is the genus and species of the genome
=cut
sub genus_species {
	my ($class, $arg) = @_;
	my $fig=new FIG;
	return $fig->genus_species($arg);
}


=begin WSDL
_IN peg $string
_RETURN $string
_DOC Get the DNA sequence for a given protein identifier. Input is a peg, output is the DNA sequence in fasta format.
=cut
sub fid2dna {
	my ($class, $arg) = @_;
	return stdin_caller($class, "fid2dna", $arg);
}


=begin WSDL
_IN genomeid $string
_IN location1 $string
_IN location2 $string
_RETURN @string
_DOC Get the DNA sequence for a region in a genome. Input is a genome ID and one or more locations in the form contig_start_stop, output is the DNA sequence in fasta format.
=cut
sub dna_sequence {
	my ($class, $genome, @locations)=@_;
	my $fig = new FIG;
	my $seq=$fig->dna_seq($genome,@locations);
	return $seq;
}



=begin WSDL
_IN genomeid $string
_RETURN $string
_DOC Get all the protein identifiers associated with a genome. Input is a genome id, output is a list of pegs in that genome
=cut
sub pegs_of {
	my ($class, $arg) = @_;
	my $fig = new FIG;
	return (join ",", $fig->pegs_of($arg));
}

=begin WSDL
_IN genomeid $string
_IN contig $string
_RETURN $string
_DOC Get the length of the DNA sequence in a contig in a genome. Input is a genome id and a contig name, return is the length of the contig
=cut
sub contig_ln {
	my $class = shift();
	my $fig = new FIG;
	return $fig->contig_ln(@_);
}

=begin WSDL
_IN genomeid $string
_RETURN $string
_DOC Test whether an organism is Archaeal. Input is a genome identifier, and output is true or false (or 1 or 0)
=cut
sub is_archaeal {
	my ($class, $arg) = @_;
	return stdin_caller($class, "is_archaeal", $arg);
}

=begin WSDL
_IN genomeid $string
_RETURN $string
_DOC Test whether an organism is Bacterial. Input is a genome identifier, and output is true or false (or 1 or 0)
=cut
sub is_bacterial {
	my ($class, $arg) = @_;
	return stdin_caller($class, "is_bacterial", $arg);
}

=begin WSDL
_IN genomeid $string
_RETURN $string
_DOC Test whether an organism is Eukaryotic. Input is a genome identifier, and output is true or false (or 1 or 0)
=cut
sub is_eukaryotic {
	my ($class, $arg) = @_;
	return stdin_caller($class, "is_eukaryotic", $arg);
}

=begin WSDL
_IN genomeid $string
_RETURN $string
_DOC Test whether an organism is a Prokaryote. Input is a genome identifier, and output is true or false (or 1 or 0)
=cut
sub is_prokaryotic {
	my ($class, $arg) = @_;
	return stdin_caller($class, "is_prokaryotic", $arg);
}

=begin WSDL
_IN peg $string
_RETURN $string
_DOC Get the translation (protein sequence) of a peg. Input is a peg, output is the protein sequence
=cut
sub translation_of {
	my ($class, $arg) = @_;
	return stdin_caller($class, "translation_of", $arg);
}

=begin WSDL
_IN peg $string
_RETURN $string
_DOC Get the translation (protein sequence) of a peg. Input is a peg, output is translation
=cut
sub get_translation {
	my ($class, $arg1) = @_;
	my $fig = new FIG;
	my $result = $fig->get_translation($arg1);
	return $result;
}

=begin WSDL
_IN peg $string
_RETURN @string
_DOC Get the location of a peg on its contig. Input is a peg, output is list of loc on contig
=cut
sub feature_location {
	my ($class, $arg1) = @_;
	my $fig = new FIG;
	my @result = ($fig->feature_location($arg1));
	return @result;
}

=begin WSDL
_IN peg $string
_RETURN $string
_DOC Get the aliases of a peg. These are the identifiers that other databases use. Input is a peg, output is tab separated string of aliases
=cut
sub aliases_of {
	my ($class, $arg) = @_;
	return stdin_caller($class, "aliases_of", $arg);
}

=begin WSDL
_IN alias $string
_RETURN $string
_DOC Get the FIG ID (peg) for a given external identifier. Input is an identifier used by another database, output is our identifier
=cut
sub alias2fig {
	my ($class, $arg) = @_;
	return stdin_caller($class, "alias2fig", $arg);
}



=begin WSDL
_IN EC_number $string ec code
_RETURN $string ec name
_DOC Get the name for a given E.C. number. Input is an EC number, output is the name
=cut
sub ec_name {
	my $class = shift();
	my $fig = new FIG;
	my $result = $fig->ec_name(@_);
	return $result;
}

=begin WSDL
_IN peg $string
_RETURN $string
_DOC Get the pegs that are coupled to any given peg. Input is a peg, output is list of [protein, score] for things that are coupled to this peg
=cut
sub coupled_to {
	my $class = shift();
	my $fig = new FIG;
	my $return=undef;
	my @result = $fig->coupled_to(@_);
	if (@result)
	{
		$return = join("\n", map {$_->[0].",".$_->[1]} @result);
	}
	return $return;
}

=begin WSDL
_IN peg $string
_RETURN $string
_DOC Get the pegs that may be coupled to this peg through abstract coupling. Input is a peg, output is list of [protein, score] for things that are coupled to this peg
=cut
sub abstract_coupled_to {
	my $class = shift();
	my $fig = new FIG;
	my $return=undef;
	my @result = $fig->abstract_coupled_to(@_);
	if (@result)
	{
		$return = join("\n", map {$_->[0].",".$_->[1]} @result);
	}
	return $return;
}

=begin WSDL
_IN peg_id $string
_IN n_pch_pins $string
_IN n_sims $string
_IN sim_cutoff $string
_IN color_sim_cutoff $string
_IN sort_by $string
_RETURN $string
_DOC Input is a FIG (PEG) ID and ..., output is the pinned regions data
=cut 
sub pinned_region_data {
    my ($class, $peg, $n_pch_pins, $n_sims, $sim_cutoff, $color_sim_cutoff, $sort_by, $fast_color, $sims_from, $region_size) = @_;
    
    my $fig = new FIG;
    
    defined($n_pch_pins)       or $n_pch_pins = 5;
    defined($n_sims)           or $n_sims = 0;
    defined($sim_cutoff)       or $sim_cutoff = 1e-20;
    defined($color_sim_cutoff) or $color_sim_cutoff = 1e-20;
    defined($sort_by)          or $sort_by = '';
    
    defined($fast_color)       or $fast_color = 0;
    defined($sims_from)        or $sims_from = 'blast';
    defined($region_size)      or $region_size = 16000;

     my $pin_desc = {
                     'pegs'                   => [$peg],
                     'collapse_close_genomes' => 0,
                     'n_pch_pins'             => $n_pch_pins,
                     'n_sims'                 => $n_sims, 
                     'show_genomes'           => '',
                     'sim_cutoff'             => $sim_cutoff,
                     'color_sim_cutoff'       => $color_sim_cutoff,
                     'sort_by'                => $sort_by,
                     'show_genomes'           => [],
                   };
                   
    my $maps = &PinnedRegions::pinned_regions($fig, $pin_desc, $fast_color, $sims_from, $region_size);
    my $txt  = Dumper($maps);
    return $txt;
}   


=begin WSDL
_IN complete $string
_IN restrictions $string
_IN domain $string
_RETURN @string
_DOC Get a set of genomes. The inputs are a series of constraints - whether the sequence is complete, other restrictions, and a domain of life (Bacteria, Archaea, Eukarya, Viral, Environmental Genome). Output is a comma separated list of genomes
=cut
sub all_genomes {
	my $class = shift();
	my $fig=new FIG;
	my @genomes=$fig->genomes(@_);
	return join(",", @genomes);
}

=begin WSDL
_IN complete $string
_IN restrictions $string
_IN domain $string
_RETURN @string
_DOC Get a set of genomes. The inputs are a series of constraints - whether the sequence is complete, other restrictions, and a domain of life (Bacteria, Archaea, Eukarya, Viral, Environmental Genome). Output is a comma separated list of genomes.
=cut
sub genomes {
	my $class = shift();
	my $fig = new FIG;
	my @result = $fig->genomes(@_);
	my @genomes;  
	foreach my $genome (@result)       
	{ 
		print STDERR "Genome is -$genome-\n";
		
		my $genus_species = $fig->genus_species($genome);
		push @genomes,  join("\t",$genome,$genus_species);
		# print STDERR join("\t",$genome,$genus_species);
	}
	
	#print STDERR @genomes;
	return @genomes;
	#return @result;
}

=begin WSDL
_IN pattern1 $string
_IN pattern2 $string
_RETURN @string
_DOC Search and grep through the database. Input is two patterns, first one is used in search_index, second used to grep the results to restrict to a smaller set.
=cut
sub search_and_grep {
	my ($class, $arg1, $arg2) = @_;
	
	my $fig = new FIG;
	
	my ($pegs, $roles) =  $fig->search_index($arg1);
	
	my (@result_list, $entry);
	
	for $entry (@$pegs) {
		push (@result_list, grep(/$arg2/, @$entry));
	}	
	push (@result_list, grep(/$arg2/, @$roles));
	chomp @result_list;
	my $return_value = join ("\n", @result_list); 
	return $return_value;
}


=begin WSDL
_IN pattern $string
_RETURN @string
_DOC Search the database. Input is a pattern to search for, output is tab separated list of pegs and roles
=cut
sub simple_search {
	my ($class, $arg1)=@_;
	
	my $fig = new FIG;
	
	my ($pegs, $roles) =  $fig->search_index($arg1);
	
	my (@result_list, $entry);
	
	for $entry (@$pegs) {
		push (@result_list, (join("\t", @$entry)));
	}
	
	# push (@result_list, (join("\t", @$roles)));
	chomp @result_list;
	my $return_value = join ("\n", @result_list);
	return $return_value;
}        


=begin WSDL
_RETURN $string list of subsystems and their classifications
_DOC Get a list of all the subsystems and their classifications. No input needed, it just returns a list of all the subsystems and their classifications
=cut
sub all_subsystem_classifications {
	my ($class) = @_;
	my $fig = new FIG;
	my $output;
	
	my %found;
	map {   
		my @classification=@{$fig->subsystem_classification($_)};
		$#classification=1;
		push @classification, $_;
		$output.= join("\t", @classification)."\n";
	} sort {$a cmp $b} ($fig->all_subsystems());
	return $output;
}


=begin WSDL
_RETURN $string list of families
_DOC Get all the FIG protein families (FIGfams). No input needed, it just returns a list of all families
=cut
sub all_families {
	my ($class) = @_;
	my $fig = new FIG;
	my $figfams = new FigFams($fig);
	
	my @out = $figfams->all_families;
	print STDERR Dumper(@out);
	return @out;
}

=begin WSDL
_RETURN $string list of families and funcs
_DOC Get all the FIG protein families (FIGfams) with their assigned functions. No input needed, it just returns a list of all the families and their functions.
=cut
sub all_families_with_funcs {
	my ($class) = @_;
	my $fig = new FIG;
	my $figfams = new FigFams($fig);
	
	my @out =$figfams->all_families_with_funcs;
	return @out;
}

=begin WSDL
_IN families $string list of famids
_RETURN $string 2 col table, famid, peg
_DOC Get all the pegs in some FIGfams. The input is a tab-separated list of family IDs, and the output is a two column table of [family id, peg]
=cut
sub list_members {
	my ($class, $famids) = @_;
	my $fig = new FIG;
	my $figfams = new FigFams($fig);
	my @in = split(/\t/, $famids);
	warn("Starting 2 list members $famids\n");
    my @out = ();
    foreach my $famid (@in)
    {
		my $famO = new FigFam($fig,$famid);
		foreach my $peg ($famO->list_members)
		{
			push(@out,[$famid,$peg]);
		}
    }
    return @out;
}

=begin WSDL
_IN families $string list of pegs
_RETURN $string returns a 3-column table [PEG,Function,AliasesCommaSeparated]
_DOC Get all the pegs in some FIGfams, their functions, and aliases. Input is a tab-separated list of families, returns a 3-column comma separated table [PEG, Function, Aliases]
=cut
sub CDS_data {
	my ($class, $pegs) = @_;
	my $fig = new FIG;
	my $figfams = new FigFams($fig);
	my @in = split(/\t/, $pegs);
	
	#warn("Starting CDS data $pegs\n");
	#print STDERR &Dumper($pegs);
	
    my @out = ();
    foreach my $peg (@in)
    {
		my @famids = $figfams->families_containing_peg($peg);
		foreach my $famid (@famids)
		{
			push(@out,[$peg,scalar $fig->function_of($peg),[$fig->feature_aliases($peg)]]);
		}
    }
    return @out;
}

=begin WSDL
_IN families $string list of pegs
_RETURN $string a 2-column table [PEG,Sequence]
_DOC Get the protein sequences for a list of FIGfams. Input is a tab-separated list of families, returns a 2-column comma separated table of [PEG, sequence] 
=cut
sub CDS_sequences {
	my ($class, $pegs) = @_;
	my $fig = new FIG;
	my $figfams = new FigFams($fig);
	my @in = split(/\t/, $pegs);
	
	#warn("Starting CDS seq $pegs\n");
	#print STDERR &Dumper($pegs);
    my @out = ();
    foreach my $peg (@in)
    {
		push(@out,[$peg,$fig->get_translation($peg)]);
    }
    return @out;
}

=begin WSDL
_IN sequences $string list of id seq pairs
_RETURN $string returns a 2-column table [Id,FamilyID]
_DOC Tries to put a protein sequence in a family. Input is a tab-separated id and sequence, delimited by new lines. The output is a comma-separated 2-column table [your sequence id, FamilyID] if the sequence is placed in a family.
=cut
sub is_member_of {
	my ($class, $id_seqs) = @_;
	my $fig = new FIG;
	my $figfams = new FigFams($fig);
	#warn("Doing is member $id_seqs\n");
	#print STDERR &Dumper($id_seqs);
	
	my @in = split(/\n/, $id_seqs);
    my @out = ();
    foreach my $pair (@in)
    {
		my($id,$seq) = split(/\t/, $pair);
		my($famO,undef) = $figfams->place_in_family($seq);
		if ($famO)
		{
			push(@out,[$id,$famO->family_id]);
		}
    }
    return @out;
}

=begin WSDL
_IN peg $string
_IN maxN $string
_IN maxP $string
_RETURN $string
_DOC Retrieve the sims (precomputed BLAST hits) for a given protein sequence. Input is a peg, an optional maximum number of hits (default=50), and an optional maximum E value (default=1e-5). The output is a list of sims in modified tab separated (-m 8) format. Additional columns include length of query and database sequences, and method used.
=cut
sub sims {
	my ($class, $peg, $maxN, $maxP)=@_;
	unless (defined $maxN) {$maxN=50}
	unless (defined $maxP) {$maxP=1e-5}
	my $fig=new FIG;
	my $return=undef;
	foreach my $sim ($fig->sims($peg, $maxN, $maxP, 'figx'))
	{
		$return .= join("\t", @$sim). "\n";
	}
	
	return $return;
}



##### INTERNAL METHODS

sub stdin_caller {
	my ($class, $name, $arg) = @_;
	my($rd, $wr, $err, $pid, $std_err, $return_value, @std_out);
	if (!($pid = open3($wr, $rd, $err, "$FIG_Config::bin/$name")))
	{
		die "Cannot run open3 $name: $!";
	}
	
	$wr->write($arg);
	close($wr);
	
	@std_out= <$rd>;
	close($rd);
	waitpid $pid, 0;
	$return_value = join ("", @std_out); 
	return $return_value;
}


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3