[Bio] / MGRASTBackend / mg_ff_server.pl Repository:
ViewVC logotype

View of /MGRASTBackend/mg_ff_server.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (download) (as text) (annotate)
Fri Jul 24 15:48:43 2009 UTC (10 years, 3 months ago) by arodri7
Branch: MAIN
CVS Tags: HEAD
Changes since 1.3: +5 -2 lines
update

#
# Create the gff and genbank export files from the similarities.
#

use DB_File;
use Data::Dumper;

use strict;
use FIG;
use FIG_Config;
use File::Basename;
use GenomeMeta;
use Job48;
use JobStage;
use SGE;
use FortyEightMeta::SimDB;
use FortyEightMeta::SimStatusDB;

my $STAGE = "figfam_server";

@ARGV == 1 or die "Usage: $0 job-dir\n";
my $jobdir = shift;

my $stage = new JobStage('Job48', $STAGE, $jobdir);
$stage or die "Cannot create job for $jobdir\n";

my $job_id = basename($jobdir);
my $job = $stage->job();
my $meta = $job->meta;

print "Running job! $jobdir\n";


$stage->set_status("in_progress");
$stage->set_running("yes");

my $proc = "$jobdir/proc";
chdir($proc) or $stage->fatal("cannot chdir $proc: $!");

my $fasta = $meta->get_metadata("dereplication.fasta_file");
($fasta and -f $fasta) or $stage->fatal("fasta not found: '$fasta'");
run_assign_to_ff($fasta,$proc);

my $assignedFile = "$proc/raw_assigned_ff_seqs";
write_fasta($fasta, $assignedFile, $proc);

$stage->set_qualified_metadata("assigned_file", "$proc/raw_assigned_ff_seqs");
$stage->set_qualified_metadata("assigned_fasta", "$proc/assigned.fasta");
$stage->set_qualified_metadata("non_assigned_fasta", "$proc/non_assigned.fasta");

$stage->set_status("complete");
$stage->set_running("no");

exit(0);


sub run_assign_to_ff
{
    my ($fasta,$job_dir) = @_;

    # run the fasta file through the FIGfams server
    my $outputAssignedF = "$job_dir/raw_assigned_ff_seqs";
    my $outputNotAssignedF = "$job_dir/raw_not_assigned_ff_seqs";
    my $kmersdb = "$FIG_Config::KmersData";

    #my $cmd = "/home/arodri7/my_sandbox/FigKernelScripts/assign_using_dna_server_standalone.pl -reliability 2 < $fasta";
    #my $cmd = "$FIG_Config::bin/assign_using_dna_server_standalone -reliability 2 < $fasta >> $outputAssignedF 2>> $outputNotAssignedF";
    my $cmd = "$FIG_Config::bin/assign_to_dna_using_kmers 0 $kmersdb < $fasta";

    open (P, "$cmd 2>&1|") or $stage->fatal("Failed pipe open: $!: $cmd");
    open (OUT, ">$outputAssignedF") or $stage->fata("Failed file open $|: $outputAssignedF");
    while (<P>)
    {
	print OUT $_;
    }
    close (P) or $stage->fatal("Error on close: \$!=$! \$?=$?: $cmd");
    close OUT;
    #system("$FIG_Config::bin/assign_using_dna_server_standalone -reliability 2 < $fasta >> $outputAssignedF 2>> $outputNotAssignedF");
    #system("$FIG_Config::bin/assign_to_dna_using_kmers 0 $kmersdb < $fasta >> $outputAssignedF 2>> $outputNotAssignedF");
}

sub write_fasta
{
    my ($fasta, $assignedFile,$jobdir) = @_;

    my $ids={};
    my $outnonAssignedFasta = "$jobdir/non_assigned.fasta";
    my $outAssignedFasta = "$jobdir/assigned.fasta";

    open (FW, ">$outnonAssignedFasta");
    open (FASTAPLUS, ">$outAssignedFasta");
    open (OUT, "$assignedFile");

    while (my $line = <OUT>)
    {
	my ($id,$numHits,$hitID,$role,$otu) = split(/\t/,$line);
	#my ($id) = $line =~ /^(\S+)/;
	push @{$ids->{$id}}, $hitID;
    }
    close OUT;
    
    open (FULL, "$fasta");
    
    my $line = <FULL>;
    while ($line && ($line =~ /^>(\S+)/))
    {
	my $id = $1;
	my @seq = ();
	while (defined($line = <FULL>) && ($line !~ /^>/))
	{
	    $line =~ s/\s//g;
	    push(@seq,$line);
	}
	my $seq = join("",@seq);
    
	if ($ids->{$id})
	{
	    foreach my $sub_id (@{$ids->{$id}})
	    {
		print FASTAPLUS ">$sub_id\n$seq\n";
	    }
	}
	else
	{
	    print FW ">$id\n$seq\n";
	}
    }
    close FW;
    close FASTAPLUS;
    close FULL;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3