[Bio] / FigKernelScripts / rapid_subsystem_inference.pl Repository:
ViewVC logotype

Annotation of /FigKernelScripts/rapid_subsystem_inference.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.22 - (view) (download) (as text)

1 : overbeek 1.1 ########################################################################
2 : olson 1.7 #
3 :    
4 : overbeek 1.1
5 : olson 1.4 use strict;
6 :    
7 : olson 1.18 use Data::Dumper;
8 : overbeek 1.1 use FIG;
9 : olson 1.20 use Subsystem;
10 : overbeek 1.1 my $fig = new FIG;
11 :    
12 : olson 1.7 my $usage = "usage: rapid_subsystem_inference SubsystemDescDir [evidence-log] < AssignedFunctions";
13 :    
14 :     @ARGV == 1 or @ARGV == 2 or die $usage;
15 : overbeek 1.1
16 : olson 1.7 my $subsysD = shift;
17 :     my $evlogFile = shift;
18 :    
19 :     my $evlogFH;
20 :    
21 :     if ($evlogFile)
22 :     {
23 :     $evlogFH = new FileHandle(">>$evlogFile");
24 :     $evlogFH or warn "Cannot open evidence log $evlogFile for append: $!";
25 :     }
26 : olson 1.8 else
27 :     {
28 :     $evlogFH = new FileHandle(">/dev/null");
29 :     }
30 : olson 1.4
31 : overbeek 1.1
32 :     &FIG::verify_dir($subsysD);
33 :     open(SUBS,">$subsysD/subsystems")
34 :     || die "could not open $subsysD/subsystems";
35 : overbeek 1.21 open(BINDINGS,"| sort -u > $subsysD/bindings")
36 : overbeek 1.1 || die "could not open $subsysD/bindings";
37 :    
38 : olson 1.7 #
39 :     # Read the proposed function annotations from stdin. Split
40 :     # assignments that are joined with ; or @ into separate roles.
41 :     #
42 :     # Store the role => peg bindings in the %bindings hash.
43 :     #
44 :    
45 : olson 1.4 my %bindings;
46 : overbeek 1.16 my %funcs;
47 :     while (defined($_ = <STDIN>)) ### Keep just the last assignment in the file
48 :     {
49 :     if ($_ =~ /^(\S+)\t(\S.*\S)/)
50 :     {
51 :     $funcs{$1} = $2;
52 :     }
53 :     }
54 :    
55 :     foreach my $peg (keys(%funcs))
56 :     {
57 :     my $func = $funcs{$peg};
58 :     $func =~ s/\s*[\#\!].*$//;
59 :     my @roles = split(/(; )|( [@\/] )/,$func);
60 :     foreach my $role (@roles)
61 :     {
62 :     $bindings{$role}->{$peg} = 1;
63 :     }
64 :     }
65 :    
66 : olson 1.18 foreach my $subsys_name (sort grep {$fig->usable_subsystem($_) } $fig->all_subsystems)
67 : overbeek 1.1 {
68 : olson 1.20 # my $sobj = $fig->get_subsystem($subsys_name);
69 :     my $sobj = Subsystem->new($subsys_name, $fig);
70 : overbeek 1.11 if (! $sobj)
71 :     {
72 :     print STDERR "Something is screwed up with $subsys_name\n";
73 :     next;
74 :     }
75 :    
76 : olson 1.19 # print STDERR "Process $subsys_name\n";
77 : olson 1.18
78 : overbeek 1.1 my @roles = $sobj->get_roles;
79 : overbeek 1.5 my @non_aux_roles = grep { ! $sobj->is_aux_role($_) } @roles;
80 :    
81 : overbeek 1.1 my(%vcodes);
82 :     foreach my $genome ($sobj->get_genomes)
83 :     {
84 :     my @roles_in_genome = ();
85 :     my $vcode = $sobj->get_variant_code($sobj->get_genome_index($genome));
86 : overbeek 1.12 next if (($vcode eq '0') || ($vcode =~ /\*/));
87 : overbeek 1.1
88 : overbeek 1.5 foreach my $role (@non_aux_roles)
89 : overbeek 1.1 {
90 :     my @pegs = $sobj->get_pegs_from_cell($genome,$role);
91 :     if (@pegs > 0)
92 :     {
93 :     push(@roles_in_genome,$role);
94 :     }
95 :     }
96 : olson 1.7
97 :     #
98 :     # @roles_in_genome contains all of the roles in this subsystem
99 :     # for which this genome has pegs.
100 :     #
101 :     # Construct $key from these; this is the signature of this
102 :     # genome in this subsystem. Save the variant code for this set.
103 :     #
104 : redwards 1.17 # There is an occasional bug where genomes have vc > 0 but no
105 :     # pegs in cells. In this case, the length of the key will be 0
106 : olson 1.7
107 : overbeek 1.1 my $key = join("\t",sort @roles_in_genome);
108 : redwards 1.17 $vcodes{$key}->{$vcode}++ if (length($key) > 0);
109 : overbeek 1.1 }
110 :    
111 : olson 1.19 # print STDERR Dumper($subsys_name, \%vcodes);
112 : olson 1.7 #
113 :     # Compute the signature for the genome in question based
114 :     # on the %bindings hash we computed ealier.
115 :     #
116 :    
117 : overbeek 1.1 my @roles_in_this_genome = ();
118 : overbeek 1.5 foreach my $role (@non_aux_roles)
119 : overbeek 1.1 {
120 :     if (defined($bindings{$role}))
121 :     {
122 :     push(@roles_in_this_genome,$role);
123 :     }
124 :     }
125 : olson 1.7
126 : overbeek 1.1 my $key = join("\t",sort @roles_in_this_genome);
127 : overbeek 1.6
128 : olson 1.7 #
129 :     # Attempt to match this signature with one already present in the subsystem.
130 :     #
131 :    
132 : overbeek 1.6 my $n;
133 :     my $bestN = 0;
134 :     my $bestK = undef;
135 : olson 1.7 my $matches = $vcodes{$key};
136 : olson 1.15
137 : olson 1.7 if ($matches)
138 :     {
139 :     #
140 :     # We found an exact match.
141 :     #
142 : olson 1.18 print $evlogFH join("\t", "exact_match", $subsys_name, join(",", sort keys %$matches), $key), "\n";
143 : olson 1.7 }
144 :     else
145 : overbeek 1.6 {
146 : olson 1.7 #
147 :     # No exact mactch
148 :     #
149 : olson 1.18 foreach my $key1 (sort keys(%vcodes))
150 : overbeek 1.6 {
151 : olson 1.7 #
152 :     # Recall: $key is the key we are trying to match
153 :     # $key1 is the key we are currently examining.
154 :    
155 : overbeek 1.13 if (&not_minus_1($vcodes{$key1}) &&
156 :     (length($key) > length($key1)) &&
157 :     ($n = &contains($key,$key1)) &&
158 :     ($n > $bestN))
159 : overbeek 1.6 {
160 :     $bestN = $n;
161 :     $bestK = $key1;
162 :     }
163 :     }
164 : overbeek 1.9 if ($bestK)
165 :     {
166 :     $matches = $vcodes{$bestK};
167 : olson 1.18 print $evlogFH join("\t", "partial_match", $subsys_name, join(",", sort keys %$matches), $key), "\n" if $evlogFH;
168 : overbeek 1.9 }
169 : overbeek 1.6 }
170 :    
171 : overbeek 1.1 if (defined($matches))
172 :     {
173 :     my @vcs = sort { ($vcodes{$b} <=> $vcodes{$a}) or ($b cmp $a) } keys(%$matches);
174 :     my $best = $vcs[0];
175 : overbeek 1.10
176 :     print $evlogFH join("\t", "best_subsys", $subsys_name, $best), "\n";
177 :     print SUBS join("\t",($subsys_name,$best)),"\n";
178 : olson 1.18 foreach my $role (sort @non_aux_roles)
179 : overbeek 1.1 {
180 : overbeek 1.10 if (defined(my $bhash = $bindings{$role}))
181 : overbeek 1.1 {
182 : olson 1.18 foreach my $peg (sort { &FIG::by_fig_id($a, $b) } keys(%{$bhash}))
183 : overbeek 1.1 {
184 : overbeek 1.10 print BINDINGS join("\t",($subsys_name,$role,$peg)),"\n";
185 : overbeek 1.1 }
186 :     }
187 :     }
188 :     }
189 :     }
190 :     close(SUBS);
191 :     close(BINDINGS);
192 : overbeek 1.6
193 : overbeek 1.9
194 :     # returns undef iff $k2 is not a subset of $k1. If it is, it returns the size of $k2
195 : overbeek 1.6 sub contains {
196 :     my($k1,$k2) = @_;
197 :    
198 :     my %s1 = map { $_ => 1 } split(/\t/,$k1);
199 :     my @s2 = split(/\t/,$k2);
200 :     my $i;
201 :     for ($i=0; ($i < @s2) && $s1{$s2[$i]}; $i++) {}
202 :     return ($i < @s2) ? undef : scalar @s2;
203 :     }
204 : overbeek 1.13
205 :     sub not_minus_1 {
206 :     my($hits) = @_;
207 :    
208 :     my @poss = keys(%$hits);
209 :     my $i;
210 :     for ($i=0; ($i < @poss) && ($poss[$i] eq "-1"); $i++) {}
211 :     return ($i < @poss);
212 :     }

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3