[Bio] / FigWebServices / figfam_server.cgi Repository:
ViewVC logotype

View of /FigWebServices/figfam_server.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (annotate)
Fri May 1 18:44:59 2009 UTC (10 years, 6 months ago) by disz
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2009_07_09, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Simple version of figfam server

use strict;
use FIG;
use CGI;
use Data::Dumper;
use FFs;
use FF;
use Kmers;

my $kmerDB = "/home/overbeek/Proj/MotifFF/Rel9/10mers/kmer.db";
my $friDB = "/home/overbeek/Proj/MotifFF/Rel9/10mers/FRI.db";
my $binDB = "/scratch/olson/table.rel9+sims.Apr25";
my $kmers = Kmers->new_using_C($binDB, $friDB);
my $kmers_ross = new Kmers($kmerDB, $friDB);

my $cgi = new CGI;
my $fig = new FIG;
my $ffs = new FFs("$FIG_Config::FigfamsData");
my $function = $cgi->param('function');
$function or myerror($cgi, "500 missing argument", "missing function argument");

#
#The FIGfam server processes requests of the form:
#
#    1. PLACE-IN-FAMILY takes as input a list of protein sequences.  It
#       returns a list where each element describes the outcome of
#       trying to place the corresponding input sequence into a
#       FIGfam.  Each output can be either
#
#                COULD-NOT-PLACE-IN-FAMILY
#       or
#                ID FUNCTION
#
#       where ID is of the form FIGxxxxxx and FUNCTION is the family
#       function. 
#
#    2. MEMBERS-OF-FAMILIES takes as input a list of FIGfam IDs.  The
#       output is a list of functions for those families
#       (INVALID-FAMILY will be returned for IDs that do not correspond
#       to an active family), as well as a list of the IDs in each family.
#
#    3. SHOULD-BE-MEMBER takes as input a list of 2-tuples
#
#             [FIGfam-ID,protein sequence]
#
#       It returns a list of boolean values indicating whether or not
#       the indicated protein sequence can be placed in the designated
#       family.
#
#    4. ALL-FAMILIES returns a list of [FIGfam-ID,function] tuples.
#
#
#    5. ASSIGN-FUNCTION-TO-PROT is similar to PLACE-IN-FAMILY, except
#       that the returned list contains either
#
#                COULD-NOT-PLACE-IN-FAMILY
#       or
#                ID FUNCTION
#
#       That is, it does not indicate which FIGfam was used to
#       determine the function.  This allows higher-performance
#       alternatives for cases in which multiple FIGfams implement the
#       same function.  The algorithm supported utilizes the underlying
#       FIGfams, but characterizes sets that implement the same
#       function and does not support distinguishing which FIGfam
#       is actually the right subgrouping.
#
#	6. ASSIGN-FUNCTIONS-TO-DNA takes as input a list of DNA
#       sequences.  It returns a list where each element describes 
#       a region of DNA that is believed to be part of a gene encoding
#       a protein sequence that would be placed into a FIGfam
#       successfully, if the whole protein sequence could be
#       determined.  That is, the returned list will contain entrties
#       of either the form
#
#                COULD-NOT-PLACE-ANY-REGIONS-IN-FAMILIES
#       or
#                BEGIN1 END1 FUNCTION1 BEGIN2 END2 FUNCTION2 ...
#
#       where BEGIN and END specify a region (if BEGIN is greater than
#       END, the region described is on the reverse strand) and
#       FUNCTION is the family function of the protein sequence that is
#       believed to be encoded by DNA including the embedded region.
#       Each input sequence can produce an arbitrary number of matched
#       regions, there will be 3 fields for each matched region.  Note
#       that the described region may include frameshifts and embedded
#       stop codons.  The algorithm seeking meaningful sections of DNA
#       assumes that it may have an incomplete, low-quality sequence
#       (and uses an algorithm that attempts to locate meaningful
#       matches even so).


print $cgi->header();

if ($function eq "members_of_families") {
	my @id = $cgi->param('id');
	@id or myerror($cgi, "500 missing id", "figfam server missing id argument");
	foreach my $famid (@id) {
		my $fam = new FF($famid, $ffs->{dir});
		if ($fam) {
			print $famid, " ",  $fam->family_function(), "\n";
			print join("\n", $fam->list_members()), "\n";
		} else {
			print "$famid INVALID-FAMILY\n";
		}
	}

} elsif ($function eq "should_be_member") {
	my @id = $cgi->param('id_seq');
	@id or myerror($cgi, "500 missing id_seq", "figfam server missing id_seq argument");
	foreach my $parm (@id) {
		my ($famid, $seq) = split /,/, $parm; 
		my $fam = new FF($famid, $ffs->{dir});
		if ($fam) {
			my ($bool, $sims) = $fam->should_be_member($seq), "\n";
			print $bool?1:0, "\n";
		} else  {
			print "INVALID-FAMILY\t$famid\n";
		}
	}

} elsif ($function eq "all_families") {
	print join("\n", $ffs->all_families(1)), "\n";

} elsif ($function eq "place_in_family") {
	my @id = $cgi->param('id_seq');
	@id or myerror($cgi, "500 missing seq", "figfam server missing seq argument");
	foreach my $parm (@id) {
		my ($id, $seq) = split /,/, $parm; 
print "\n$seq\n";
		my ($fam) = $ffs->place_in_family($seq);
		if ($fam) {
			print "$id\t$fam->{id}\t$fam->{function}\n";
		} else {
			print "COULD-NOT-PLACE-IN_FAMILY\n";
		}
	}

} elsif ($function eq "assign_function_to_prot") {
	my @id = $cgi->param('id_seq');
	@id or myerror($cgi, "500 missing id_seq", "figfam server missing id_seq argument");
	foreach my $parm (@id) {
		my ($id, $seq) = split /,/, $parm; 
		my $func = $kmers->assign_function_to_prot($seq);
		if ($func) {
			print "$id\t$func\n";
		} else {
			print "$id\tCOULD-NOT-PLACE\n";
		}
	}
} elsif ($function eq "assign_function_to_DNA") {
	my @id = $cgi->param('id_seq');
	@id or myerror($cgi, "500 missing id_seq", "figfam server missing id_seq argument");
	foreach my $parm (@id) {
		my ($id, $seq) = split /,/, $parm; 
		my @hits = $kmers->assign_functions_to_DNA($seq);
		foreach my $hit (@hits) {
			my ($b,$e,$func) = @$hit;
			print join("\t",($id,join("_",($id,$b,$e)),$func)),"\n";
		}
#		print "Ross\n";
#		my @hits = $kmers_ross->assign_functions_to_DNA($seq);
#		foreach my $hit (@hits) {
#			my ($b,$e,$func) = @$hit;
#			print join("\t",($id,join("_",($id,$b,$e)),$func)),"\n";
#		}
	}
} else { 
	myerror($cgi, "500 bad function argument $function", "usage:figfam function=[place_in_family | members_of_families | should_be_member | all_families");
}

sub myerror
{
    my($cgi, $stat, $msg) = @_;
    print $cgi->header(-status =>  $stat);
    
    print "$msg\n";
    exit;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3