[Bio] / FigWebServices / figfam_server_1.cgi Repository:
ViewVC logotype

View of /FigWebServices/figfam_server_1.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (download) (annotate)
Fri May 1 20:41:33 2009 UTC (10 years, 11 months ago) by disz
Branch: MAIN
CVS Tags: mgrast_dev_08112011, rast_rel_2009_05_18, mgrast_dev_08022011, rast_rel_2014_0912, myrast_rel40, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2009_07_09, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Changes since 1.1: +2 -1 lines
fixed some error handling

use strict;
use FIG;
use CGI;
use Data::Dumper;
use FFs;
use FF;
use Kmers;
use FIG_Config;


my $fig = new FIG;
my $ffs = new FFs("$FIG_Config::FigfamsData");

my $kmerDB = "/home/overbeek/Proj/MotifFF/Rel9/10mers/kmer.db";
my $friDB = "/home/overbeek/Proj/MotifFF/Rel9/10mers/FRI.db";
my $binDB = "/scratch/olson/table.rel9+sims.Apr25";
my $kmers = Kmers->new_using_C($binDB, $friDB);
my $kmers_ross = new Kmers($kmerDB, $friDB);

$| = 1;
my $use_tempfile = 0;
my $hook_called;
my $proc_buffer;
my $clean_up = 0;

my $header = "Content-type: text/plain\n\n";

my ($func, $function) = split(/=/, $ENV{QUERY_STRING});
if ($func ne "function") {
	myerror(CGI->new(),  "500 missing function", "Unknown argument $func\n");
}
$function or myerror(CGI->new(), "500 missing argument", "missing function argument");


#print STDERR "$function\n";
if ($function eq "members_of_families") {
	my $cgi = new CGI;
	my @id = $cgi->param('id');
	print $cgi->header();
	@id or myerror($cgi, "500 missing id", "figfam server $function missing id argument");
	foreach my $famid (@id) {
		my $fam = new FF($famid, $ffs->{dir});
		if ($fam) {
			print $famid, " ",  $fam->family_function(), "\n";
			print join("\n", $fam->list_members()), "\n";
		} else {
			print "$famid INVALID-FAMILY\n";
		}
	}
} elsif ($function eq "should_be_member") {
	my $cgi = new CGI;
	print $cgi->header();
	my @id = $cgi->param('id_seq');
	@id or myerror($cgi, "500 missing id_seq", "figfam server $function missing id_seq argument");
	foreach my $parm (@id) {
		my ($famid, $seq) = split /,/, $parm; 
		my $fam = new FF($famid, $ffs->{dir});
		if ($fam) {
			my ($bool, $sims) = $fam->should_be_member($seq), "\n";
			print $bool?1:0, "\n";
		} else  {
			print "INVALID-FAMILY\t$famid\n";
		}
	}
} elsif ($function eq "all_families") {
	my $cgi = new CGI;
	print $cgi->header();
	print join("\n", $ffs->all_families(1)), "\n";
} elsif ($function eq "place_in_family" || $function eq "assign_functions_to_DNA" ||
	$function eq "assign_function_to_prot") {
	$clean_up = 1;
	$proc_buffer = new ProcBufferFASTA();
	#print STDERR "Setting hook\n";
	CGI::upload_hook(\&hook, $proc_buffer, $use_tempfile);
} else {
	myerror(CGI->new(),  "500 invalid function", "invalid function $function\n");
}

if ($clean_up) {
	$proc_buffer->parse_fasta();
}


sub hook {
	my ($filename, $buffer, $bytes_read, $data) = @_;
	
	if (not $hook_called) {
		print $header;
#		print "BYTES = $bytes_read\n$buffer<br>";
		#print "ENV String ", $ENV{QUERY_STRING}, "\n";
		#print "Filename $filename\n";
	} else {
#		print "Read $bytes_read of $filename\n$buffer<br>";
	}
	$data->process_block($buffer);
	$hook_called += $bytes_read;
	
}

if ($hook_called) {
	#print "All done $hook_called\n";
}

exit;

#
#The FIGfam server processes requests of the form:
#
#    1. PLACE-IN-FAMILY takes as input a list of protein sequences.  It
#       returns a list where each element describes the outcome of
#       trying to place the corresponding input sequence into a
#       FIGfam.  Each output can be either
#
#                COULD-NOT-PLACE-IN-FAMILY
#       or
#                ID FUNCTION
#
#       where ID is of the form FIGxxxxxx and FUNCTION is the family
#       function. 
#
#    2. MEMBERS-OF-FAMILIES takes as input a list of FIGfam IDs.  The
#       output is a list of functions for those families
#       (INVALID-FAMILY will be returned for IDs that do not correspond
#       to an active family), as well as a list of the IDs in each family.
#
#    3. SHOULD-BE-MEMBER takes as input a list of 2-tuples
#
#             [FIGfam-ID,protein sequence]
#
#       It returns a list of boolean values indicating whether or not
#       the indicated protein sequence can be placed in the designated
#       family.
#
#    4. ALL-FAMILIES returns a list of [FIGfam-ID,function] tuples.
#
#
#    5. ASSIGN-FUNCTION-TO-PROT is similar to PLACE-IN-FAMILY, except
#       that the returned list contains either
#
#                COULD-NOT-PLACE-IN-FAMILY
#       or
#                ID FUNCTION
#
#       That is, it does not indicate which FIGfam was used to
#       determine the function.  This allows higher-performance
#       alternatives for cases in which multiple FIGfams implement the
#       same function.  The algorithm supported utilizes the underlying
#       FIGfams, but characterizes sets that implement the same
#       function and does not support distinguishing which FIGfam
#       is actually the right subgrouping.
#
#	6. ASSIGN-FUNCTIONS-TO-DNA takes as input a list of DNA
#       sequences.  It returns a list where each element describes 
#       a region of DNA that is believed to be part of a gene encoding
#       a protein sequence that would be placed into a FIGfam
#       successfully, if the whole protein sequence could be
#       determined.  That is, the returned list will contain entrties
#       of either the form
#
#                COULD-NOT-PLACE-ANY-REGIONS-IN-FAMILIES
#       or
#                BEGIN1 END1 FUNCTION1 BEGIN2 END2 FUNCTION2 ...
#
#       where BEGIN and END specify a region (if BEGIN is greater than
#       END, the region described is on the reverse strand) and
#       FUNCTION is the family function of the protein sequence that is
#       believed to be encoded by DNA including the embedded region.
#       Each input sequence can produce an arbitrary number of matched
#       regions, there will be 3 fields for each matched region.  Note
#       that the described region may include frameshifts and embedded
#       stop codons.  The algorithm seeking meaningful sections of DNA
#       assumes that it may have an incomplete, low-quality sequence
#       (and uses an algorithm that attempts to locate meaningful
#       matches even so).

sub myerror
{
    my($cgi, $stat, $msg) = @_;
    print $cgi->header(-status =>  $stat);
    print "$msg\n";
    exit;
}


package ProcBufferFASTA;
use strict;
sub new {
	my($class) = @_;
	my $self = { 
		buf => '',
		fasta=> ''
	} ;
	return bless $self, $class;
}

sub process_block
{
	my($self, $block) = @_;
	$self->{buf} .= $block;
	#print "$block, DNA Processing\n";
	
	while ($self->{buf} =~ s/^([^\n]*\n)//go) {
		if (substr($1, 0, 1) eq ">") {
			$self->parse_fasta();
			$self->{fasta} = $1;
		} else {
			$self->{fasta} .= $1;
		}
	}
	
}
sub parse_fasta {
    my ($self) = @_;
    my $state = 'expect_header';
    my $cur_id;
    my $seq = '';
    #print STDERR "FASTA =$self->{fasta}\n";
    while ($self->{fasta} =~ /([^\n]*)\n/go) {
	    $_  = $1;
            if ($state eq 'expect_header')
            {
                if (/^>(\S+)/)
                {
                    $cur_id = $1;
                    $state = 'expect_data';
                    #print $clean_fh ">$cur_id\n";
                    next;
                }
                else
                {
                    die "Invalid fasta: Expected header at line $.\n";
                }
            }
            elsif ($state eq 'expect_data')
            {
                if (/^>(\S+)/)
                {
                    $cur_id = $1;
                    $state = 'expect_data';
                    #print $clean_fh ">$cur_id\n";
                    next;
                }
	        elsif (/^([acgtumrwsykbdhvn]*)\s*$/i)
                #elsif (/^([*abcdefghijklmnopqrstuvwxyz]*)\s*$/i)
                {
	  	    $seq .= lc($1);
                    #print $clean_fh lc($1) . "\n";
                    next;
                }
                elsif (/^([*abcdefghijklmnopqrstuvwxyz]*)\s*$/i)
                {
	  	    $seq .= $1;
                    ##print $clean_fh lc($1) . "\n";
			#print "SEQ 2 = $seq\n";
                    next;
                    ##die "Invalid fasta: Bad data (appears to be protein translation data) at line $.\n";
                }
                else
                {
                    my $str = $_;
                    if (length($_) > 100)
                    {
                        $str = substr($_, 0, 50) . " [...] " . substr($_, -50);
                    }
                    print "Invalid fasta: Bad data at line $.\n$str\n";
			exit;
                }
            }
            else
            {
                die "Internal error: invalid state $state\n";
            }
        }


	if ($seq) {
		#print "XXX$cur_id, $seq\n";
		#print $function;
		my $id = $cur_id;
		if ($function eq "assign_functions_to_DNA") {
			my @hits = $kmers->assign_functions_to_DNA($seq);
			foreach my $hit (@hits) {
				my ($b,$e,$func) = @$hit;
				print join("\t",($id,join("_",($id,$b,$e)),$func)),"\n";
			}
		} elsif ($function eq "assign_function_to_prot") {
			my $func = $kmers->assign_function_to_prot($seq);
			if ($func) {
				print "$id\t$func\n";
			} else {
				print "$id\tCOULD-NOT-PLACE\n";
			}
		} elsif ($function eq "place_in_family") {
			my ($fam) = $ffs->place_in_family($seq);
			if ($fam) {
				print "$fam->{id}\t$fam->{function}\n";
			} else {
				print "COULD-NOT-PLACE-IN_FAMILY\n";
			}
		}
	}
}
	

package ProcBuffer;
use strict;
sub new {
	my($class) = @_;
	my $self = { 
		buf => '',
	} ;
	return bless $self, $class;
}

sub process_block
{
	my($self, $block) = @_;
	$self->{buf} .= $block;
	print $block;
}



MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3