[Bio] / FigKernelScripts / get_exemplars_as_needed.pl Repository:
ViewVC logotype

View of /FigKernelScripts/get_exemplars_as_needed.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (download) (as text) (annotate)
Sat May 11 18:53:22 2013 UTC (6 years, 6 months ago) by overbeek
Branch: MAIN
CVS Tags: rast_rel_2014_0729, rast_rel_2014_0912, HEAD
Changes since 1.2: +1 -1 lines
minor fixes

use strict;
use Data::Dumper;
use FIG;
my $fig = new FIG;
use Getopt::Long;

my $usage = "usage: get_exemplars_as_needed [-dlits DlitsTab]\n";
my $dlits;
my $rc  = GetOptions('dlits=s' => \$dlits); 
if (! $rc) { print STDERR $usage; exit }
if (! $dlits)
{
    $dlits = "dlits.$$";
    &SeedUtils::run("get_dlit_data > $dlits");
}

my %preferred;
my $last = '';
foreach $_ (`cat /vol/public-pseed/FIGdisk/FIG/Data/Global/genome.sets`)
{
    chop;
    my($set,$genome,undef) = split(/\t/,$_);
    if ($set ne $last)
    {
	$preferred{$genome} = 1;
    }
    $last = $set;
}

my %md5_with_lit = map { ($_ =~ /^literature_ref\(\'([^']+)\',(\d+)/) ? ($1 => 1) : () } `cat $dlits`;

while (defined($_ = <STDIN>))
{
    if ($_ =~ /^(\S.*\S)/)
    {
	my $role = $1;
	my @exemplars = &get_exemplars($role,\%preferred,\%md5_with_lit);
	if (@exemplars > 0)
	{
	    foreach my $tuple (@exemplars)
	    {
		my($md5,$sc,$peg) = @$tuple;
		if ($md5)
		{
		    print join("\t",($sc,$md5,$peg,$role)),"\n";
		}
	    }
	}
	else
	{
	    print STDERR "none\t$role\n";
	}
    }
    else
    {
	print STDERR "bad\t$_";
    }
}

sub get_exemplars {
    my($role,$preferred,$md5s_with_lit) = @_;

    my %md5s;

    my @role_peg;
    my $roleC = $role;
    $roleC =~ s/([' ()])/\\$1/g;
    print STDERR "$roleC\n";
    my @tmp = `echo $roleC | svr_role_to_pegs | cut -f2`;
    chomp @tmp;
    foreach my $peg (@tmp)
    {
	push(@role_peg,[$role,$peg]);
    }
    my %dups;
    foreach $_ (@role_peg)
    {
	my($role,$peg) = @$_;
	my $g = $fig->genome_of($peg);
	$dups{$g}++;
    }

    my %bad;
    foreach my $g (keys(%dups))
    {
	if ($dups{$g} > 1)
	{
	    $bad{$g} = 1;
	}
    }

    my %bad_md5;
    my %role_to_peg;
    foreach $_ (@role_peg)
    {
	my($role,$peg) = @$_;
	my $g = $fig->genome_of($peg);
	if ($bad{$g})
	{
	    my $md5 = $fig->md5_of_peg($peg);
	    $bad_md5{$md5} = 1;
	}
	else
	{
	    $role_to_peg{$peg} = 1;
	}
    }

    my @pegs = keys(%role_to_peg);
    
    foreach my $peg (@pegs)
    {
	my $md5 = $fig->md5_of_peg($peg);
	my $lit = $md5s_with_lit->{$md5} ? 10 : 0;
	my $dup = $bad_md5{$md5} ? 0 : 3;
	my $pref = $preferred->{$fig->genome_of($peg)} ? 2 : 0;
	$md5s{$md5} = [($lit + $dup + $pref), $peg];
    }
    my @poss = sort { $md5s{$b}->[0] <=> $md5s{$a}->[0] } keys(%md5s);
    if (@poss > 5) { $#poss = 4 }
    return sort { $b->[1] <=> $a->[1] } map { [$_,sprintf("%0.3f",$md5s{$_}->[0]/15),$md5s{$_}->[1]] } @poss;
}
    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3