[Bio] / FigWebServices / rest_rtmg.cgi Repository:
ViewVC logotype

View of /FigWebServices/rest_rtmg.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (annotate)
Fri May 3 17:26:20 2013 UTC (6 years, 6 months ago) by redwards
Branch: MAIN
CVS Tags: rast_rel_2014_0912, rast_rel_2014_0729, HEAD
Changes since 1.2: +178 -96 lines
more real time metagenomies

#__perl__

# NOTE THAT use strict will break this!!
use lib '/home/redwards/perl/lib/perl5/site_perl/5.8.7/i686-linux/';
use CGI;
use CGI::Carp qw/fatalsToBrowser/;
use JSON::XS;
use Data::Dumper;
use Fcntl qw(:DEFAULT :flock);


use ANNOserver;
use SAPserver;
$|=1;

=pod 

=head1 rest_rtmg.cgi

YAWS - Yet another web service!

Why: We're using rpc encoding which is basically URL encoding. In this, I call something like 

http://bioseed.mcs.anl.gov/~redwards/FIG/rest_seed.cgi/multiply/2/3/4/5 

and get a response. Why do we need another web service? Mainly because of the Google work. Google pretty much exclusively deals with http requests, and eschews SOAP and other encodings as being too complex. 

The data returned is all in JSON format (http://www.json.org/) which is the Javascript object notation format. JSON is a really light weight markup language that cna handle complex objects quite easily. 

I am also aiming for lightweight code. In this case, we're not going to instantiate anything until we need it. Hopefully.

This code is almost exclusivvely used with the real time metagenomics site, so that is what I'm going to use it for.

=cut

# a really simple RESTful web service that returns seed data

my $cgi=new CGI qw/:standard/;
my $json= new JSON::XS;

print $cgi->header('text/plain');

# get the query with path so we get the RESTful information
my $abs = $cgi->url(-absolute=>1); # this will be the cgi-bin/rest.cgi stuff
my $rest = $cgi->url(-path_info=>1);
$rest =~ s/^.*$abs\///;
my @rest=split m#/#, $rest;

my $method = shift @rest;

# there is no good way of passing a null value!!
map {undef $rest[$_] if ($rest[$_] eq "undef" || $rest[$_] eq "null")} (0..$#rest);

my $result =  $json->encode({result => &{$method}(@rest) });

print $result, "\n";



=pod

=head1 multiply.

This is a really simple method that just multiplies two numbers! It's great for testing things out

=cut

sub multiply {
        
	my $x = 1;
        map {$x = $x * $_} @_;
        return $x;
}

=head1 annotate

This takes a dna sequence and returns the bare bones annotation. This is for first pass, kind of what's there look.

=cut

sub annotate {
	my %sequences = @_;
	my $sequences; 
	map {push @$sequences, [$_, "no comment", $sequences{$_}]} keys %sequences;

    	# Create a FIGfam server object.
    	my $ffServer = ANNOserver->new();
    	# Pass the input file to the FIGfam server to get assignments.
	my $reliability = 2;
	my $maxGap = 1000;
	my $kmer = 8;
	my $resultH = $ffServer->assign_functions_to_dna({-input=>$sequences, -minHits=>$reliability, -maxGap=>$maxGap, -kmer=>$kmer});

# Loop through the results. We send good results to the standard output,
# and failures to the standard error file.
	my $allresults;
	while (my $result = $resultH->get_next()) {push @$allresults, $result;}
	return $allresults;

};



=head1 annotate_fasta_file

This takes a LOCAL fasta file and returns the bare bones annotation. This is for first pass, kind of what's there look. Italso takes an optional reliability. If not provided, defaults to 2.

=cut

sub annotate_fasta_file {
	my ($file, $reliability, $kmer, $maxGap) = @_;
	if (!defined $reliability) {$reliability = 2}
	if (!defined $maxGap) {$maxGap = 600}
	if (!defined $kmer) {$kmer = 8}
	use raelib;
	use FIG_Config;
	$file = $FIG_Config::temp . "/rtmg/" . $file;
	unless (-e $file) {return "$file not found";}
	##return ("using $file");
	my $fa=raelib->read_fasta($file);
	map {push @$sequences, [$_, "no comment", $fa->{$_}]} keys %$fa;

    	# Create a FIGfam server object.
    	my $ffServer = ANNOserver->new();
    	# Pass the input file to the FIGfam server to get assignments.
	my $maxGap = 1000;
	my $resultH = $ffServer->assign_functions_to_dna({-input=>$sequences, -minHits=>$reliability, -maxGap=>$maxGap, -kmer=>$kmer});

	my $allresults=[];
	open(OUT, ">$file.annotations") || die "can't write to annotations file";
	while (my $result = $resultH->get_next()) { 
		push @{$allresults}, $result; 
		my $seqid = $result->[0];
		foreach my $line (@{$result}[1..$#$result]) {
			print OUT $seqid;
			map {ref($_) eq "ARRAY" ? print OUT "\t", join("\t", @$_) : print OUT "\t$_"} @$line;
			print OUT "\n";
		}
	}
	close OUT;
	#unlink $file;
	return $allresults;
};


=head1 annotate_fasta_file_lite

This takes a LOCAL fasta file and returns the bare bones annotation. This is for first pass, kind of what's there look. Italso takes an optional reliability. If not provided, defaults to 2.

## NOT anymore This only returns true that the file has been processed and does not return any data.

This updates the tables and returns their locations

=cut

sub annotate_fasta_file_lite {
	my %params=@_;
	my $file = $params{'file'};
	my $reliability = $params{'reliability'};
	my $kmer = $params{'kmer'};
	my $maxGap = $params{'maxgap'};
	my $jobid = $params{'jobID'};
	my $level = $params{'level'};
	my $kmerDataset = $params{'kmerDataset'};
	if (!defined $reliability) {$reliability = 2}
	if (!defined $maxGap) {$maxGap = 600}
	if (!defined $kmer) {$kmer = 8}
	use raelib;
	use FIG_Config;
	my $outputdir = $FIG_Config::temp . "/rtmg/" . $jobid;

	$file = $outputdir. "/" . $file;
	open(PROGRESS, ">$file.progress") || die "can't open $file.progress";
	print PROGRESS time, ": Trying to read $file\n";
	unless (-e $file) {return "$file not found";}
	##return ("using $file");
	my $fa=raelib->read_fasta($file);
	map {push @$sequences, [$_, "no comment", $fa->{$_}]} keys %$fa;

	print PROGRESS time, ": Read ", $#$sequences, " sequences from $file\n";
    	# Create a FIGfam server object.
    	my $ffServer = ANNOserver->new();
    	# Pass the input file to the FIGfam server to get assignments.
	my $maxGap = 1000;
	my @kmerDataset = $kmerDataset ? (-kmerDataset => $kmerDataset) : ();
	print PROGRESS time, ": Annotating with minHits: $reliability; kmerDataset: $kmerDataset; maxGap: $maxGap; and kmers: $kmer\n";
	my $resultH = $ffServer->assign_functions_to_dna({
		-input=>$sequences,
		-minHits=>$reliability,
		@kmerDataset,
		-maxGap=>$maxGap,
		-kmer=>$kmer
	});

	# now add the data to the existing data
	my $htmldir = $FIG_Config::temp. "/rtmg/$jobid/web";
	unless (-e $htmldir) {mkdir $htmldir, 0755}

	print PROGRESS time, ": Counting results\n";
	my @allresults; my $c=0;
	while (my $result = $resultH->get_next()) { 
		my $seqid = $result->[0];
		foreach my $line (@{$result}[1..$#$result]) {
			$c++;
			print PROGRESS ".";
			#unless ($c % 100) {print PROGRESS "."}
			my $data = $seqid;
			foreach my $l (@$line) {
				if (ref($l) eq "ARRAY") {
					$data .= "\t" . join("\t", @$l);
				} else {
					$data .= "\t" . $l;
				}
			}
			push @allresults, $data;
		}
	}
	print PROGRESS "\n";

	sleep(rand(5));
	my $opentime=time;
	print PROGRESS time, ": Opening output files at $opentime\n";
	my $seqfile = $htmldir . "/total.txt";
	open(SEQ, ">>$seqfile") || die "can't open sums filein append mode";
	open(COU, ">$file.annotations") || die "can't open $file.annotations";
	flock(SEQ, 2) || die "can't get a flock on $seqfile"; # collect an exclusive lock on the file so we can append to it.
	print PROGRESS time, ": Writing output after ", (time-$opentime), " seconds\n";
	print SEQ join("\n", @allresults), "\n";
	print COU join("\n", @allresults), "\n";
	close SEQ;
	close COU;
	print PROGRESS time, ": Updating the counts\n";
	my %count;
	map {my @a=split /\t/; $count{$a[4]}++} @allresults;
	my $ret = &rewrite_table($jobid, undef, \%count);

	print PROGRESS time, ": Done and closing";
	#return ({jobId=>$jobid});
	return $ret;
}

=head1 update_table
	
	Rebuild the counts table

=cut

sub update_table {
	my ($jobid, $level)=@_;
	
	my $htmldir = $FIG_Config::temp. "/rtmg/$jobid/web";
	my $seqfile = $htmldir . "/total.txt";
	my %count;
	open(IN, $seqfile) || die "Can't open $seqfile";
	while (<IN>) {
		chomp;
		my @a=split  /\t/;
		$count{$a[4]}++;
	}
	close IN;
	
	return &rewrite_table($jobid, $level, \%count);
}

sub rewrite_table {
	my ($jobid, $level, $count)=@_;
	

# Levels:
#	0=>"Function", 1=>"One level of subsystems", 2=>"Two levels of subsystems", 3=>"Three levels of subsystems"

	my $htmldir = $FIG_Config::temp. "/rtmg/$jobid/web";
	unless (-e $htmldir) {die "Should have found a directory at $htmldir"}

	my $subsystems; my $params;
	sysopen(DATA, "$htmldir/data.pl", O_RDWR | O_CREAT) || die "can't open $htmldir/data.pl: $!";
	my $ofh = select(FH); $| = 1; select ($ofh);
	flock(DATA, LOCK_EX) || die "can't write-lock DATA";
	my $in = join("", (<DATA>));
	my $VAR1; my $VAR2;
	eval $in;
	$subsystems=$VAR1;
	$params=$VAR2;
	seek(DATA, 0, 0) or die "can't rewind DATA";
	
	# read the current counts etc
	if (!defined $level) {
		if ($params->{'level'}) {$level = $params->{'level'}}
		else {$level = 3}
	}

	if (!defined $jobid) {
		if ($params->{'jobid'}) {$jobid = $params->{'jobid'}}
		else {die "Huh? No job id"}
	}

	$params->{'level'}=$level;
	$params->{'jobid'}=$jobid;
	$params->{'received'}++;


# now add the subsystems
	my $sshash;
	my %allss;
		
# read the missing subsystems
	my $sap = SAPserver->new();
	# my @new = map {!defined {$subsystems->{'fn'}->{$_}}} keys %$count;
	#$sshash = $sap->subsystems_for_role({-ids=>\@new, -usable => 1, -exclude => ['cluster-based', 'experimental']});
	$sshash = $sap->subsystems_for_role({-ids=>[keys %$count], -usable => 1, -exclude => ['cluster-based', 'experimental']});
	map {@{$subsystems->{'fn'}->{$_}} = @{$sshash->{$_}}} keys %$sshash;  
# now get all the classifications
			
	my @missing;
	foreach my $ssarr (values %$sshash) {
		foreach my $ss (@$ssarr) {
			unless ($subsystems->{'classification'}->{$ss}) {
				push @missing, $ss;
			}
		}
	}

	my $class = $sap->classification_of({-ids=>\@missing});
	map {$#{$class->{$_}}=1; $subsystems->{'classification'}->{$_} = $class->{$_}} keys %$class;

	print DATA Dumper($subsystems, $params);
	truncate(DATA, tell(DATA)) || die "can't truncate the file";

	my $class = $subsystems->{'classification'};

	my $tl; my $ol; my $thr;
	foreach my $fn (keys %$count) {
		foreach my $ss (@{$sshash->{$fn}}) {
			my $three = join("\t", @{$class->{$ss}}, $fn);
			$thr->{$three} += $count->{$fn};
			my $two = join("\t", @{$class->{$ss}});
			$tl->{$two} += $count->{$fn};
			my $one = $class->{$ss}->[0];
			$ol->{$one} += $count->{$fn};
		}
	}

	open(TXT, ">$htmldir/${jobid}_function_counts.txt") || die "can't open $htmldir/${jobid}_function_counts.txt";
	open(HTML, ">$htmldir/counts.html") || die "can't open $htmldir/counts.html";
	open(ONE, ">$htmldir/one.html") || die "can't open $htmldir/one.html";
	open(ONETXT, ">$htmldir/${jobid}_level_one.txt") || die "can't open $htmldir/${jobid}_level_one.txt";
	open(TWO, ">$htmldir/two.html") || die "can't open $htmldir/two.html";
	open(TWOTXT, ">$htmldir/${jobid}_level_two.txt") || die "can't open $htmldir/${jobid}_level_two.txt";
	open(THR, ">$htmldir/three.html") || die "can't open $htmldir/three.html";
	open(THRTXT, ">$htmldir/${jobid}_level_three.txt") || die "can't open $htmldir/${jobid}_level_three.txt";
	open(ANN, ">$htmldir/${jobid}_annotations.txt") || die "can't open $htmldir/${jobid}_annotations.txt";
	flock(TXT, 2) || die "Can't get a flock on $htmldir/counts.txt";
	flock(HTML, 2) || die "Can't get a flock on $htmldir/counts.html";
	flock(ONE, 2) || die "Can't get a flock on ONE";
	flock(TWO, 2) || die "Can't get a flock on TWO";
	flock(THR, 2) || die "Can't get a flock on THR";
	flock(ANN, 2) || die "Can't get a flock on ANN";
	flock(ONETXT, 2) || die "Can't get a flock on ONETXT";
	flock(TWOTXT, 2) || die "Can't get a flock on TWOTXT";
	flock(THRTXT, 2) || die "Can't get a flock on THRTXT";
	print HTML "<TABLE>\n";
	print ONE "<TABLE>\n"; print TWO "<TABLE>\n"; print THR "<TABLE>\n";

	foreach my $fn (sort {$count->{$b} <=> $count->{$a}} keys %$count) {
		print TXT join("\t",  $fn, $count->{$fn}), "\n";
		print HTML "<tr><td>", join("</td><td>", $fn, $count->{$fn}), "</td></tr>\n";
	}

	map {
		print ONETXT join("\t", $_, $ol->{$_}), "\n";
		print ONE "<tr><td>", join("</td><td>", $_, $ol->{$_}), "</td></tr>\n";
	} sort {$ol->{$b} <=> $ol->{$a}} keys %$ol;

	map {
		my $cols = $_;
		$cols  =~ s/\t/<\/td><td>/g;
		print TWOTXT join("\t", $_, $tl->{$_}), "\n";
		print TWO "<tr><td>", join("</td><td>", $cols, $tl->{$_}), "</td></tr>\n";
	} sort {$tl->{$b} <=> $tl->{$a}} keys %$tl;
	
	map {
		my $cols = $_;
		$cols  =~ s/\t/<\/td><td>/g;
		print THRTXT join("\t", $_, $thr->{$_}), "\n";
		print THR "<tr><td>", join("</td><td>", $cols, $thr->{$_}), "</td></tr>\n";
	} sort  {$thr->{$b} <=> $thr->{$a}} keys %$thr;
	



	print ONE "</TABLE>\n";
	print TWO "</TABLE>\n";
	print THR "</TABLE>\n";
	print HTML "</TABLE>\n";
	close ONE;
	close TWO;
	close THR;
	close ANN;
	close HTML;
	close TXT;
	close ONETXT;
	close TWOTXT;
	close THRTXT;
	close DATA;
	return ({
			"oneLevel"  => $FIG_Config::temp_url."/rtmg/$jobid/web/one.html",
			"twoLevel"  => $FIG_Config::temp_url."/rtmg/$jobid/web/two.html",
			"threeLevel"  => $FIG_Config::temp_url."/rtmg/$jobid/web/three.html",
			"tableHtml" => $FIG_Config::temp_url."/rtmg/$jobid/web/counts.html",
			"tableTxt"  => { 
				'raw' => $FIG_Config::temp_url."/rtmg/$jobid/web/${jobid}_function_counts.txt", 
				'one'  => $FIG_Config::temp_url."/rtmg/$jobid/web/${jobid}_level_one.txt",
				'two'  => $FIG_Config::temp_url."/rtmg/$jobid/web/${jobid}_level_two.txt",
				'three'  => $FIG_Config::temp_url."/rtmg/$jobid/web/${jobid}_level_three.txt",
				},
			"jobId"     => $jobid,
			"received"  => $params->{'received'},
			});
}




1;


MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3