use strict; use Data::Dumper; my $usage = "usage: gather_pg_sets RepeatSets PGsets Precomputed1 Precomputed2..."; my($repeat_clustersF,$pg_setsF); ( ($repeat_clustersF = shift @ARGV) && ($pg_setsF = shift @ARGV) ) || die $usage; my @dirs = @ARGV; # Something like ('PubSEED.Output.1','PubSEED.Output.2','BrucellaPGs'); (@dirs > 0) || die "You need to give the precomputed correspondence sets (see build_PGs_for_OTUs)"; open(RP,"| cluster_objects | tabs2rel > $repeat_clustersF") || die "FAILED"; open(PG,"| cluster_objects | tabs2rel > $pg_setsF") || die "FAILED"; my %rep_peg; foreach my $d (@dirs) { opendir(SUB,$d) || die $d; my @sub = grep { $_ =~/^\d/ } readdir(SUB); closedir(SUB); foreach my $subD (@sub) { if (-s "$d/$subD/PG/repeat.clusters") { open(CON,"rel2tabs < $d/$subD/PG/repeat.clusters |") || die "$d/$subD"; while (defined($_ = )) { chop; my @pegs = split(/\t/,$_); foreach $_ (@pegs) { $rep_peg{$_} = 1 } #### mark pegs in repeats for (my $i=1; ($i < @pegs); $i++) { print RP join("\t",($pegs[0],$pegs[$i])),"\n"; } } close(CON); } } } foreach my $d (@dirs) { opendir(SUB,$d) || die $d; my @sub = grep { $_ =~/^\d/ } readdir(SUB); closedir(SUB); foreach my $subD (@sub) { if (-s "$d/$subD/PG/pg.sets") { open(CON,"rel2tabs < $d/$subD/PG/pg.sets |") || die "$d/$subD"; while (defined($_ = )) { chop; my @pegs = grep { ! $rep_peg{$_} } split(/\t/,$_); for (my $i=1; ($i < @pegs); $i++) { print PG join("\t",($pegs[0],$pegs[$i])),"\n"; } } close(CON); } } }