[Bio] / FigKernelScripts / pg_generate_possible_assignments.pl Repository:
ViewVC logotype

View of /FigKernelScripts/pg_generate_possible_assignments.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (download) (as text) (annotate)
Sun Feb 24 21:52:10 2013 UTC (6 years, 8 months ago) by overbeek
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
pg pipeline

use strict;
use Data::Dumper;
use Getopt::Long;

my $usage = "usage: pg_generate_possible_assignments -d Data\n";
my $dataD;

my $rc  = GetOptions('d=s' => \$dataD);
if ((! $rc) || (! -d $dataD)) { print STDERR $usage; exit }

open(IN,"<$dataD/genomes.with.job.and.genomeID")
    || die "could not open $dataD/genomes.with.job.and.genomeID";
open(OUT,">","$dataD/proposed.assignments.for.pubseed")
    || die "could not open $dataD/proposed.assignments.for.pubseed";
 
while (defined($_ = <IN>))
{
    chop;
    my($name,$pubseed,$rast_job,$rast_genome) = split(/\t/,$_);
    print STDERR "processing $rast_genome\n";
    my $pubseed_orgD = "/vol/public-pseed/FIGdisk/FIG/Data/Organisms/$pubseed";
    my $rast_orgD    = "/vol/rast-prod/jobs/$rast_job/rp/$rast_genome";

    if (($pubseed =~ /^\d+\.\d+$/) && (-d $pubseed_orgD))
    {
	my $pubseed_funcs = &load_seq_to_peg_func($pubseed_orgD);
	my $rast_funcs    = &load_seq_to_peg_func($rast_orgD);

	foreach my $seq (keys(%$pubseed_funcs))
	{
	    if (my $pub_tuples = $pubseed_funcs->{$seq})
	    {
		if (my $rast_tuples = $rast_funcs->{$seq})
		{
		    if (my $proposed = &common_func($rast_tuples))
		    {
			foreach my $tuple (@$pub_tuples)
			{
			    my($pegPS,$funcPS) = @$tuple;
			    if ($proposed ne $funcPS) 
			    {
				print OUT join("\t",($pegPS,$proposed)),"\n";
			    }
			}
		    }
		}
	    }
	}
    }
}
close(IN);
close(OUT);

sub load_seq_to_peg_func {
    my($orgD) = @_;

    my $seq_to_tuples = {};
    my %peg_to_func;
    if (-s "$orgD/assigned_functions")
    {
	%peg_to_func = map { ($_ =~ /^(\S+)\t(\S[^\t]*\S)/) ? ($1 => $2) : () } 
	               `cat $orgD/assigned_functions`;
    }
    else
    {
	%peg_to_func = map { ($_ =~ /^(\S+)\t(\S[^\t]*\S)/) ? ($1 => $2) : () } 
	               `cat $orgD/proposed*functions`;
    }

    $/ = "\n>";
    foreach $_ (`cat $orgD/Features/peg/fasta`)
    {
	chomp;
	if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s)
	{
	    my $id = $1;
	    my $seq = $2;
	    $seq =~ s/\s//g;
	    my $f = $peg_to_func{$id};
	    my $f = $f ? $f : '';
	    push(@{$seq_to_tuples->{$seq}},[$id,$f]);
	}
    }
    $/ = "\n";
    return $seq_to_tuples;
}

sub common_func {
    my($tuples) = @_;

    my $i;
    for ($i=0; ($i < @$tuples) && ($tuples->[0]->[1] eq $tuples->[$i]->[1]); $i++) {}
    return ($i == @$tuples) ? $tuples->[0]->[1] : undef ;
}

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3