Parent Directory
|
Revision Log
Revision 1.2 - (view) (download) (as text)
1 : | overbeek | 1.1 | use SeedEnv; |
2 : | use strict; | ||
3 : | use Carp; | ||
4 : | use Data::Dumper; | ||
5 : | use Getopt::Long; | ||
6 : | |||
7 : | my $usage = "usage: ex_get_subsystem_based_estimates -d Data\n"; | ||
8 : | my $dataD; | ||
9 : | my $rc = GetOptions('d=s' => \$dataD); | ||
10 : | |||
11 : | if ((! $rc) || (! -d $dataD)) | ||
12 : | { | ||
13 : | print STDERR $usage; exit ; | ||
14 : | } | ||
15 : | |||
16 : | open(GENOME,"<$dataD/genome") || die "$dataD/genome does not exist"; | ||
17 : | my $genome = <GENOME>; | ||
18 : | chomp $genome; | ||
19 : | ($genome =~ /^(\d+\.\d+)/) || die "genome: $genome is invalid"; | ||
20 : | $genome = $1; | ||
21 : | |||
22 : | my $pccsF = "$dataD/locus.pccs"; | ||
23 : | my $aliasF = "$dataD/aliases"; | ||
24 : | open(ALIAS,"<$aliasF") || die "could not open $aliasF"; | ||
25 : | my %to_peg; | ||
26 : | while (defined($_ = <ALIAS>)) | ||
27 : | { | ||
28 : | chomp; | ||
29 : | my @tmp = split(/\t/,$_); | ||
30 : | my @fig = grep { $_ =~ /^fig\|/ } @tmp; | ||
31 : | overbeek | 1.2 | if (@fig == 1) |
32 : | overbeek | 1.1 | { |
33 : | overbeek | 1.2 | foreach my $alias (grep { $_ ne $fig[0] } @tmp) |
34 : | overbeek | 1.1 | { |
35 : | $to_peg{$alias} = $fig[0]; | ||
36 : | } | ||
37 : | } | ||
38 : | } | ||
39 : | close(ALIAS); | ||
40 : | |||
41 : | my $corrH = {}; | ||
42 : | open(PCC,"<$dataD/locus.pccs") || die "could not open $dataD/locus.pccs"; | ||
43 : | while (defined($_ = <PCC>)) | ||
44 : | { | ||
45 : | if (($_ =~ /^(\S+)\t(\S+)\t(\S+)/) && | ||
46 : | (my $peg1 = $to_peg{$1}) && | ||
47 : | (my $peg2 = $to_peg{$2})) | ||
48 : | { | ||
49 : | $corrH->{$peg1}->{$peg2} = $corrH->{$peg2}->{$peg1} = $3; | ||
50 : | } | ||
51 : | } | ||
52 : | close(PCC); | ||
53 : | |||
54 : | my $sapO = SAPserver->new; | ||
55 : | |||
56 : | my $genomeH = $sapO->genomes_to_subsystems( -ids => [$genome] ); | ||
57 : | my @subs = map { ($_->[1] =~ /^\*?(0|-1)$/) ? () : $_->[0] } @{$genomeH->{$genome}}; | ||
58 : | |||
59 : | my $subH = $sapO->ids_in_subsystems( -subsystems => \@subs, | ||
60 : | -genome => $genome); | ||
61 : | my @subs = sort keys %$subH; | ||
62 : | |||
63 : | my %bad; | ||
64 : | |||
65 : | foreach my $sub (@subs) | ||
66 : | { | ||
67 : | my %pegs; | ||
68 : | my @pegs; | ||
69 : | |||
70 : | my $sub_entry = $subH->{$sub}; | ||
71 : | @pegs = (); | ||
72 : | foreach my $role (keys(%$sub_entry)) | ||
73 : | { | ||
74 : | my $pegs = $sub_entry->{$role}; | ||
75 : | foreach $_ (@$pegs) { $pegs{$_} = 1 } | ||
76 : | } | ||
77 : | @pegs = sort { &SeedUtils::by_fig_id($a,$b) } keys(%pegs); | ||
78 : | |||
79 : | my @sets = grep { @$_ > 1 } split_on_pc(\@pegs,$corrH); | ||
80 : | |||
81 : | if (@sets > ((@pegs + 2) / 3)) | ||
82 : | { | ||
83 : | $bad{$sub} = 1; | ||
84 : | # print STDERR &Dumper([$sub,\@sets]); | ||
85 : | } | ||
86 : | else | ||
87 : | { | ||
88 : | foreach my $set (@sets) | ||
89 : | { | ||
90 : | if (@$set > 1) | ||
91 : | { | ||
92 : | print join(",",@$set),"\tInSubsystem:$sub\n"; | ||
93 : | } | ||
94 : | } | ||
95 : | } | ||
96 : | } | ||
97 : | foreach $_ (keys(%bad)) | ||
98 : | { | ||
99 : | print STDERR "bad subsystem\t$_\n"; | ||
100 : | } | ||
101 : | |||
102 : | sub split_on_pc { | ||
103 : | my($pegs,$corrH) = @_; | ||
104 : | |||
105 : | my @sets = (); | ||
106 : | my %used; | ||
107 : | my $i; | ||
108 : | for ($i=0; ($i < (@$pegs - 1)); $i++) | ||
109 : | { | ||
110 : | if (! $used{$pegs->[$i]}) | ||
111 : | { | ||
112 : | my @poss = ($pegs->[$i]); | ||
113 : | my $j; | ||
114 : | for ($j=$i+1; ($j < @$pegs); $j++) | ||
115 : | { | ||
116 : | if (&corr($pegs->[$j],\@poss,$corrH)) | ||
117 : | { | ||
118 : | push(@poss,$pegs->[$j]); | ||
119 : | $used{$pegs->[$j]} = 1; | ||
120 : | } | ||
121 : | } | ||
122 : | push(@sets,\@poss); | ||
123 : | } | ||
124 : | } | ||
125 : | return @sets; | ||
126 : | } | ||
127 : | |||
128 : | sub corr { | ||
129 : | my($peg1,$cluster,$corrH) = @_; | ||
130 : | |||
131 : | my $sum = 0; | ||
132 : | foreach my $peg2 (@$cluster) | ||
133 : | { | ||
134 : | my $v = $corrH->{$peg1}->{$peg2}; | ||
135 : | if ((! defined($v)) || ($v < 0.4)) { return 0 } | ||
136 : | $sum += $v; | ||
137 : | } | ||
138 : | return (($sum / @$cluster) >= 0.7); | ||
139 : | } |
MCS Webmaster | ViewVC Help |
Powered by ViewVC 1.0.3 |