[Bio] / FortyEight / pull_sims_from_server.pl Repository:
ViewVC logotype

View of /FortyEight/pull_sims_from_server.pl

Parent Directory Parent Directory | Revision Log Revision Log

Revision 1.1 - (download) (as text) (annotate)
Wed Jul 8 20:18:46 2009 UTC (10 years, 7 months ago) by olson
Branch: MAIN
CVS Tags: mgrast_dev_08112011, mgrast_dev_08022011, rast_rel_2014_0912, mgrast_dev_05262011, mgrast_dev_04082011, rast_rel_2010_0928, mgrast_version_3_2, mgrast_dev_12152011, mgrast_dev_06072011, rast_rel_2009_0925, rast_rel_2010_0526, rast_rel_2014_0729, mgrast_dev_02212011, rast_rel_2010_1206, mgrast_release_3_0, mgrast_dev_03252011, rast_rel_2010_0118, rast_rel_2011_0119, mgrast_release_3_0_4, mgrast_release_3_0_2, mgrast_release_3_0_3, mgrast_release_3_0_1, mgrast_dev_03312011, mgrast_release_3_1_2, mgrast_release_3_1_1, mgrast_release_3_1_0, mgrast_dev_04132011, mgrast_dev_04012011, rast_rel_2009_07_09, rast_rel_2010_0827, myrast_33, rast_rel_2011_0928, mgrast_dev_04052011, mgrast_dev_02222011, mgrast_dev_10262011, HEAD
Add support for pulling sims from sims server and submitting only those that don't match to be computed.

Add use of $FIG_Config::rast_sims_data to point at the directory holding
nr & peg.synonyms; explicitly here for the purposes of more easily updating
to new sims databases.

# Given a fasta file, pull the sims from the sims server for the sequences.
# Write them to stdout, and write nonmatching IDs to stderr.

use FIG;
use Digest::MD5;
use Data::Dumper;

my $fhin = \*STDIN;

my $fig = new FIG;

my %id_to_md5;
my %md5_to_id;
my @ids;
my @md5s;
while ((my($id, $seqp, undef) = &FIG::read_fasta_record($fhin)))
    my $md5 = Digest::MD5::md5_hex(uc($$seqp));
    my $mid = "gnl|md5|$md5";
    $id_to_md5{$id} = $mid;
    $md5_to_id{$mid} = $id;
    push(@ids, $id);
    push(@md5s, $mid);

$chunksize = 200;

my %seen = %md5_to_id;
while (@md5s)
    my @chunk = splice(@md5s, 0, $chunksize);
    #print "process chunk\n";
    # print STDERR "@chunk \n";
    my @sims = $fig->sims(\@chunk, 300, undef, undef, 'raw');

    my $last;
    while (my $sim = shift @sims)
	if ($sim->id1 ne $last)
	    delete $seen{$last};
	    $last = $sim->id1;
	my $new = $md5_to_id{$sim->id1};
	if ($new)
	    $sim->[0] = $new;
	print join("\t", @$sim), "\n";
    delete $seen{$last};

print STDERR "$_\n" for sort { &FIG::by_fig_id($a, $b) } values %seen;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3