[Bio] / FigKernelPackages / ANNO.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/ANNO.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (view) (download) (as text)

1 : olson 1.1 #!/usr/bin/perl -w
2 :     use strict;
3 :    
4 :     #
5 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
6 :     # for Interpretations of Genomes. All Rights Reserved.
7 :     #
8 :     # This file is part of the SEED Toolkit.
9 :     #
10 :     # The SEED Toolkit is free software. You can redistribute
11 :     # it and/or modify it under the terms of the SEED Toolkit
12 :     # Public License.
13 :     #
14 :     # You should have received a copy of the SEED Toolkit Public License
15 :     # along with this program; if not write to the University of Chicago
16 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
17 :     # Genomes at veronika@thefig.info or download a copy from
18 :     # http://www.theseed.org/LICENSE.TXT.
19 :     #
20 :     package ANNO;
21 :    
22 :     use strict;
23 :     use ERDB;
24 :     use Tracer;
25 :     use SeedUtils;
26 :     use ServerThing;
27 :    
28 :     sub new {
29 :     my ($class) = @_;
30 :     # Create the sapling object.
31 :     my $sap = ERDB::GetDatabase('Sapling');
32 :     # Create the server object.
33 :     my $retVal = { db => $sap };
34 :     # Bless and return it.
35 :     bless $retVal, $class;
36 :     return $retVal;
37 :     }
38 :    
39 :    
40 :     =head2 Primary Methods
41 :    
42 :     =head3 methods
43 :    
44 :     my $methodList = $ssObject->methods();
45 :    
46 :     Return a list of the methods allowed on this object.
47 :    
48 :     =cut
49 :    
50 :     use constant METHODS => [qw(metabolic_reconstruction
51 :     assign_function_to_prot
52 :     call_genes
53 :     find_rnas
54 :     assign_functions_to_DNA
55 : parrello 1.4 find_special_proteins
56 : olson 1.1 )];
57 :    
58 :     sub methods {
59 :     # Get the parameters.
60 :     my ($self) = @_;
61 :     # Return the result.
62 :     return METHODS;
63 :     }
64 :    
65 :     #
66 :     # Docs are in ANNOserver.pm.
67 :     #
68 :    
69 : parrello 1.4 sub find_special_proteins {
70 :     # Get the parameters.
71 :     my ($self, $args) = @_;
72 :     # Pull in the special protein finder.
73 :     require find_special_proteins;
74 :     # Convert the hash to the form expected by find_special_proteins.
75 :     my $params = {
76 :     contigs => $args->{-contigs},
77 :     is_init => $args->{-is_init},
78 :     is_alt => $args->{-is_alt},
79 :     is_term => $args->{-is_term},
80 :     comment => $args->{-comment}
81 :     };
82 :     if (exists $args->{-templates}) {
83 :     my $templates = $args->{-templates};
84 :     if (ref $templates eq 'ARRAY') {
85 :     $params->{references} = $templates;
86 :     } elsif ($templates =~ /^pyr/) {
87 :     $params->{pyrrolysine} = 1
88 :     }
89 :     }
90 :     # Process the input.
91 :     my @retVal = find_special_proteins::find_selenoproteins($params);
92 :     # Return the result.
93 :     return \@retVal;
94 :     }
95 :    
96 : olson 1.1 sub metabolic_reconstruction {
97 :     # Get the parameters.
98 :     my ($self, $args) = @_;
99 :    
100 :     my $sapling = $self->{db};
101 :     my $retVal = [];
102 :    
103 :     # This counter will be used to generate user IDs for roles without them.
104 :     my $next = 1000;
105 :    
106 :     my $id_roles = $args->{-roles};
107 :     my @id_roles1 = map { (ref $_ ? $_ : [$_, "FR" . ++$next]) } @$id_roles;
108 :    
109 :     my @id_roles = ();
110 :     foreach my $tuple (@id_roles1)
111 :     {
112 :     my($function,$id) = @$tuple;
113 : parrello 1.3 foreach my $role (split(/(?:; )|(?: [\]\@] )/,$function))
114 : olson 1.1 {
115 :     push(@id_roles,[$role,$id]);
116 :     }
117 :     }
118 :    
119 :     my %big;
120 :     my $id_display = 1;
121 :     map {push(@{$big{$_->[0]}}, $_->[1])} @id_roles;
122 :     my @resultRows = $sapling->GetAll("Subsystem Includes Role",
123 : parrello 1.5 'Subsystem(usable) = ? ORDER BY Subsystem(id), Includes(sequence)',
124 :     [1], [qw(Subsystem(id) Role(id) Includes(abbreviation))]);
125 : olson 1.1 my %ss_roles;
126 :     foreach my $row (@resultRows) {
127 :     my ($sub, $role, $abbr) = @$row;
128 :     $ss_roles{$sub}->{$role} = $abbr;
129 :     }
130 :     foreach my $sub (keys %ss_roles) {
131 :     my $roles = $ss_roles{$sub};
132 : parrello 1.3 my @rolesubset = grep { $big{$_} } keys %$roles;
133 :     my @abbr = map{$roles->{$_}} @rolesubset;
134 : olson 1.1 my $set = join(" ", @abbr);
135 :     if (@abbr > 0) {
136 :     my ($variant, $size) = $self->get_max_subset($sub, $set);
137 :     if ($variant) {
138 :     foreach my $role (keys %$roles) {
139 :     if ($id_display) {
140 : parrello 1.3 if (exists $big{$role}) {
141 :     foreach my $id (@{$big{$role}}) {
142 :     push (@$retVal, [$variant, $role, $id]);
143 :     }
144 :     }
145 : olson 1.1 } else {
146 :     push (@$retVal, [$variant, $role]);
147 :     }
148 :     }
149 :     }
150 :     }
151 :     }
152 :     # Return the result.
153 :     return $retVal;
154 :     }
155 :    
156 :     =head2 Internal Utility Methods
157 :    
158 :     =head3 get_max_subset
159 :    
160 :     my ($max_variant, $max_size) = $ssObject->get_max_subset($sub, $setA);
161 :    
162 :     Given a subsystem ID and a role rule, return the ID of the variant for
163 :     the subsystem that matches the most roles in the rule and the number of
164 :     roles matched.
165 :    
166 :     =over 4
167 :    
168 :     =item sub
169 :    
170 :     Name (ID) of the subsystem whose variants are to be examined.
171 :    
172 :     =item setA
173 :    
174 :     A space-delimited list of role abbreviations, lexically ordered. This provides
175 :     a unique specification of the roles in the set.
176 :    
177 :     =item RETURN
178 :    
179 : parrello 1.2 Returns a 2-element list consisting of name variant found (subsystem name, colon,
180 :     and variant code) and the number of roles matched.
181 : olson 1.1
182 :     =back
183 :    
184 :     =cut
185 :    
186 :     sub get_max_subset {
187 :     my ($self, $sub, $setA) = @_;
188 :     my $sapling = $self->{db};
189 :     my $max_size = 0;
190 :     my $max_set;
191 :     my $max_variant;
192 :     my %set_hash;
193 :     my $qh = $sapling->Get("Subsystem Describes Variant", 'Subsystem(id) = ? AND Variant(type) = ?', [$sub, 'normal']);
194 :     while (my $resultRow = $qh->Fetch()) {
195 :     my @variantRoleRule = $resultRow->Value('Variant(role-rule)');
196 :     my ($variantCode) = $resultRow->Value('Variant(code)');
197 :     my $variantId = $sub.":".$variantCode;
198 :     foreach my $setB (@variantRoleRule) {
199 :     my $size = is_A_a_superset_of_B($setA, $setB);
200 :     if ($size && $size > $max_size) {
201 :     $max_size = $size;
202 :     $max_set = $setB;
203 :     $max_variant = $variantId;
204 :     }
205 :     }
206 :     }
207 :     #if ($max_size) {
208 :     #print STDERR "Success $max_variant, $max_set\n";
209 :     #}
210 :     return($max_variant, $max_size);
211 :     }
212 :    
213 :    
214 :     =head3 is_A_a_superset_of_B
215 :    
216 :     my $size = SS::is_A_a_superset_of_B($a, $b);
217 :    
218 :     This method takes as input two role rules, and returns 0 if the first
219 :     role rule is NOT a superset of the second; otherwise, it returns the size
220 :     of the second rule. A role rule is a space-delimited list of role
221 :     abbreviations in lexical order. This provides a unique identifier for a
222 :     set of roles in a subsystem.
223 :    
224 :     =over 4
225 :    
226 :     =item a
227 :    
228 :     First role rule.
229 :    
230 :     =item b
231 :    
232 :     Second role rule.
233 :    
234 :     =item RETURN
235 :    
236 :     Returns 0 if the first rule is NOT a superset of the second and the size of the
237 :     second rule if it is. As a result, if the first rule IS a superset, this method
238 :     will evaluate to TRUE, and to FALSE otherwise.
239 :    
240 :     =back
241 :    
242 :     =cut
243 :    
244 :     sub is_A_a_superset_of_B {
245 :     my ($a, $b) = @_;
246 :     my @a = split(" ", $a);
247 :     my @b = split(" ", $b);
248 :     if (@b > @a) {
249 :     return(0);
250 :     }
251 :     my %given;
252 :     map { $given{$_} = 1} @a;
253 :     map { if (! $given{$_}) {return 0}} split(" ", $b);
254 :     my $l = scalar(@b);
255 :     return scalar(@b);
256 :     }
257 :    
258 :    
259 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3