Parent Directory
|
Revision Log
Modifications to pangenome code to move common code to PG.pm.
use strict; use Data::Dumper; use Getopt::Long; use SeedEnv; use PG; my $usage = "usage: pg_in_some_but_not_X -d Data\n"; my $dataD; my $rc = GetOptions('d=s' => \$dataD,); if ((! $rc) || (! -d $dataD)) { print STDERR $usage; exit } my $pg = new PG($dataD); my %anno; if (-s "$dataD/anno.seed") { %anno = map { chop; ($_ => 1) } `cat $dataD/anno.seed`; } else { print STDERR "NOTE: you have not included any ASEED genomes in a file $dataD/anno.seed\n"; } my$peg_to_func = $pg->load_funcs(); my($peg_to_seq,$seq_to_pegs) = $pg->load_seqs(); my($peg_to_fam,$fam_to_pegs) = &load_fams($dataD); open(OUT,">","$dataD/role.inconsistencies") || die "could not open $dataD/role.inconsistencies"; my %seen; foreach my $genome ($pg->genomes) { my $bindings = $pg->subsystem_bindings_for_genome($genome); &process_genome($peg_to_func,$peg_to_seq,$seq_to_pegs,$peg_to_fam,$fam_to_pegs,\%seen,\*OUT,$bindings,\%anno); } close(OUT); sub process_genome { my($peg_to_func,$peg_to_seq,$seq_to_pegs,$peg_to_fam,$fam_to_pegs,$seen,$fhO,$bindings,$anno) = @_; foreach my $binding (@$bindings) { my($subsys,$role,$peg) = @$binding; my $func1 = $peg_to_func->{$peg}; if ($func1 && (index($role,$func1) >= 0)) { if (my $fam = $peg_to_fam->{$peg}) { my $pegs2 = $fam_to_pegs->{$fam}; foreach my $peg2 (@$pegs2) { my $func2 = $peg_to_func->{$peg2}; if ((! $func2) || ($func1 ne $func2)) { if (! ($seen->{"$func1\t$func2"} || $seen->{"$func2\t$func1"})) { $seen->{"$func1\t$func2"} = 1; my @anno_pegs = &get_anno_pegs([$peg,$peg2],$anno,$peg_to_seq,$seq_to_pegs); print OUT join("\t",($peg,$func1,$peg2,$func2,join(",",@anno_pegs),$role,$subsys)),"\n"; } } } } my $seq = $peg_to_seq->{$peg}; if ($seq) { my $pegs2 = $seq_to_pegs->{$seq}; if ($pegs2) { foreach my $peg2 (@$pegs2) { my $func2 = $peg_to_func->{$peg2}; if ((! $func2) || ($func1 ne $func2)) { if (! ($seen->{"$func1\t$func2"} || $seen->{"$func2\t$func1"})) { $seen->{"$func1\t$func2"} = 1; my @anno_pegs = &get_anno_pegs([$peg,$peg2],$anno,$peg_to_seq,$seq_to_pegs); print OUT join("\t",($peg,$func1,$peg2,$func2,join(",",@anno_pegs),$role,$subsys)),"\n"; } } } } } } } } sub get_anno_pegs { my($initial_pegs,$anno,$peg_to_seq,$seq_to_pegs) = @_; my %anno_pegs; foreach my $peg (@$initial_pegs) { my $same_seq = $seq_to_pegs->{$peg_to_seq->{$peg}}; my @apegs = grep { $anno->{&SeedUtils::genome_of($_)} } @$same_seq; foreach $_ (@apegs) { $anno_pegs{$_} = 1 } } return keys(%anno_pegs); } sub load_funcs { my($dataD) = @_; my $to_func = {}; foreach my $job (`cut -f 3 $dataD/genomes.with.job`) { chop $job; foreach $_ (`cat /vol/rast-prod/jobs/$job/rp/*/proposed*functions`) { if ($_ =~ /^(\S+)\t(\S.*\S)/) { my $peg = $1; my $func = $2; $func =~ s/\s+\#.*$//; $to_func->{$peg} = $func; } } } if (-s "$dataD/anno.seed") { foreach my $g (`cat $dataD/anno.seed`) { chop $g; foreach $_ (`cat /vol/mirror-seed/Data.mirror/Organisms/$g/assigned_functions`) { if ($_ =~ /^(\S+)\t(\S[^\t]*\S)/) { my $peg = $1; my $func = $2; $func =~ s/\s+\#.*$//; $to_func->{$peg} = $func; } } } } return $to_func; } sub load_seqs { my($dataD) = @_; my $peg_to_seq = {}; my $seq_to_pegs = {}; foreach my $job (`cut -f 3 $dataD/genomes.with.job`) { chop $job; $/ = "\n>"; foreach $_ (`cat /vol/rast-prod/jobs/$job/rp/*/Features/peg/fasta`) { chomp; if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s) { my $peg = $1; my $seq = $2; $seq =~ s/\s//gs; $peg_to_seq->{$peg} = $seq; push(@{$seq_to_pegs->{$seq}},$peg); } } $/ = "\n"; } if (-s "$dataD/anno.seed") { foreach my $g (`cat $dataD/anno.seed`) { chop $g; $/ = "\n>"; foreach $_ (`cat /vol/mirror-seed/Data.mirror/Organisms/$g/Features/peg/fasta`) { chomp; if ($_ =~ /^>?(\S+)[^\n]*\n(.*)/s) { my $peg = $1; my $seq = $2; $seq =~ s/\s//gs; $peg_to_seq->{$peg} = $seq; push(@{$seq_to_pegs->{$seq}},$peg); } } $/ = "\n"; } } return ($peg_to_seq,$seq_to_pegs); } sub load_fams { my($dataD) = @_; my $peg_to_fam = {}; my $fam_to_pegs = {}; my $fam = 1; foreach $_ (`cat $dataD/protein.families`) { chop; my $pegs = [split(/\t/,$_)]; foreach my $peg (@$pegs) { $peg_to_fam->{$peg} = $fam; $fam_to_pegs->{$fam} = $pegs; } $fam++; } return ($peg_to_fam,$fam_to_pegs); }
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |