[Bio] / FigKernelPackages / ACH.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/ACH.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :     use strict;
3 :    
4 :     #
5 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
6 :     # for Interpretations of Genomes. All Rights Reserved.
7 :     #
8 :     # This file is part of the SEED Toolkit.
9 :     #
10 :     # The SEED Toolkit is free software. You can redistribute
11 :     # it and/or modify it under the terms of the SEED Toolkit
12 :     # Public License.
13 :     #
14 :     # You should have received a copy of the SEED Toolkit Public License
15 :     # along with this program; if not write to the University of Chicago
16 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
17 :     # Genomes at veronika@thefig.info or download a copy from
18 :     # http://www.theseed.org/LICENSE.TXT.
19 :     #
20 :     package ACH;
21 :    
22 :     use strict;
23 :     use Tracer;
24 :     use SeedUtils;
25 :     use ServerThing;
26 :     use ERDB;
27 :    
28 :     =head1 ACH Server Function Object
29 :    
30 :     This file contains the functions and utilities used by the Annotation
31 :     Clearinghouse Server (B<ach_server.cgi>). The L</Primary Methods> represent
32 :     function calls direct to the server. These all have a signature similar to the
33 :     following.
34 :    
35 :     my $document = $achObject->function_name($args);
36 :    
37 :     where C<$achObject> is an object created by this module,
38 :     C<$args> is a parameter structure, and C<function_name> is the Annotation
39 :     Clearinghouse Server function name. The output is a structure, generally a hash
40 :     reference, but sometimes a string or a list reference.
41 :    
42 :     This server is used to access assertions harvested from the Annotation
43 :     Clearinghouse and stored in the Sapling database. At the current time, it
44 :     is generally one to two weeks behind the latest server data.
45 :    
46 :     =head2 Special Methods
47 :    
48 :     =head3 new
49 :    
50 : disz 1.7 my $ffObject = ACHserver->new();
51 : parrello 1.1
52 :     Create a new Annotation Clearinghouse server function object. The server
53 :     function object contains a pointer to a L<Sapling> object, and is used to invoke
54 :     the server functions.
55 :    
56 :     =cut
57 :    
58 : disz 1.7 #
59 :     # Actually, if you are using ACH.pm, you should do ACH->new(), not ACHserver->new()
60 :     # That comment above is for the benefit of the pod doc stuff on how to use ACHserver
61 :     # that is generated from this file.
62 :     #
63 :    
64 : parrello 1.1 sub new {
65 :     my ($class) = @_;
66 :     # Get the sapling database.
67 :     my $sap = ERDB::GetDatabase('Sapling');
68 :     # Create the server object.
69 :     my $retVal = {
70 :     db => $sap,
71 :     };
72 :     # Bless and return it.
73 :     bless $retVal, $class;
74 :     return $retVal;
75 :     }
76 :    
77 :     =head2 Primary Methods
78 :    
79 : parrello 1.4 =head3 methods
80 :    
81 :     my $document = $achObject->methods();
82 :    
83 :     Return a list of the methods allowed on this object.
84 :    
85 :     =cut
86 :    
87 :     use constant METHODS => [qw(equiv_sequence
88 :     equiv_precise
89 :     )];
90 :    
91 :     sub methods {
92 :     # Get the parameters.
93 :     my ($self) = @_;
94 :     # Return the result.
95 :     return METHODS;
96 :     }
97 :    
98 : parrello 1.1 =head3 equiv_sequence
99 :    
100 :     my $document = $achObject->equiv_sequence($args);
101 :    
102 :     Return the assertions for all genes in the database that match the
103 :     identified protein sequences. A protein sequence can be identified by a
104 :     prefixed MD5 code or any prefixed gene identifier (e.g. C<uni|AYQ44>,
105 :     C<gi|85841784>, or C<fig|360108.3.peg.1041>).
106 :    
107 :     =over 4
108 :    
109 :     =item args
110 :    
111 :     Reference to a list of protein identifiers, or reference to a hash
112 :     with the key C<-ids> whose value is a reference to a list of identifiers. Each
113 :     identifier should be a prefixed gene identifier or the C<md5|>-prefixed MD5 of a
114 :     protein sequence. If the parameter is a hash reference, then if the key C<-hash>
115 :     is provided, the return value will be in the form of a hash instead of a list.
116 :    
117 :     =item RETURN
118 :    
119 : parrello 1.3 Normally, returns a reference to a list of 5-tuples. Each 5-tuple contains an
120 : parrello 1.1 identifier that is sequence-equivalent to at least one of the input identifiers,
121 : parrello 1.3 the asserted function of that identifier, the source of the assertion, a
122 :     flag that is TRUE if the assertion is by an expert, and the name of the genome
123 :     relevant to the identifier (if any). If the C<-hash> flag is specified in the
124 :     parameter list, then the return value will be a hash of lists, keyed by incoming
125 :     protein identifier, mapping each protein identifier to a list of the relevant
126 :     5-tuples.
127 : parrello 1.1
128 :     =back
129 :    
130 :     =cut
131 :    
132 : disz 1.7 sub show_methods {
133 :     my @methods = ("equiv_precise", "equiv_sequence");
134 :     return(\@methods);
135 :     }
136 :    
137 :    
138 : parrello 1.1 sub equiv_sequence {
139 :     # Get the parameters.
140 :     my ($self, $args) = @_;
141 :     # Get the Sapling database.
142 :     my $sap = $self->{db};
143 : parrello 1.2 # Convert a list to a hash.
144 :     if (ref $args ne 'HASH') {
145 :     $args = { -ids => $args };
146 :     }
147 : parrello 1.1 # Find out if we're returning a hash.
148 :     my $hashFlag = $args->{-hash} || 0;
149 :     # Declare the return variable.
150 :     my $retVal = ($hashFlag ? {} : []);
151 :     # Get the list of IDs.
152 :     my $ids = ServerThing::GetIdList(-ids => $args);
153 :     # Loop through the IDs in the list.
154 :     for my $id (@$ids) {
155 :     # This hash will contain a list of the relevant protein sequence IDs.
156 :     my %prots;
157 :     # We'll put our assertions found in here.
158 :     my @results;
159 :     # Determine the ID type.
160 :     if ($id =~ /^md5\|(.+)/) {
161 :     # Here we have a protein sequence MD5 ID. In this case, we just
162 :     # strip the prefix to get a Sapling protein sequence ID.
163 :     $prots{$1} = 1;
164 :     } else {
165 :     # Here we have a gene ID. Start by asking for all of the
166 :     # protein sequences it identifies directly.
167 :     my @prots = $sap->GetFlat("Identifier Names ProteinSequence",
168 :     'Identifier(id) = ?', [$id],
169 :     'ProteinSequence(id)');
170 :     # Add the ones it identifies through a feature.
171 :     push @prots, $sap->GetFlat("Identifier Identifies Feature Produces ProteinSequence",
172 :     'Identifier(id) = ?', [$id],
173 :     'ProteinSequence(id)');
174 :     # Put all the proteins found in the hash.
175 :     for my $prot (@prots) {
176 :     $prots{$prot} = 1;
177 :     }
178 :     }
179 :     # Loop through the protein sequences, finding assertions. For each
180 : parrello 1.3 # protein, we make two queries. Note that we expect the number of
181 :     # protein sequences to be small, despite the large amount of work
182 :     # performed above.
183 : parrello 1.1 for my $prot (sort keys %prots) {
184 : parrello 1.3 # Get the assertions on the protein's identifiers.
185 :     @results = $sap->GetAll("ProteinSequence IsNamedBy Identifier HasAssertionFrom Source",
186 :     "ProteinSequence(id) = ?", [$prot],
187 :     [qw(Identifier(id) HasAssertionFrom(function)
188 :     Source(id) HasAssertionFrom(expert))]);
189 :     # Add the assertions on the identifiers for the protein's features.
190 :     push @results, $sap->GetAll("ProteinSequence IsProteinFor Feature IsIdentifiedBy Identifier HasAssertionFrom Source AND Feature IsOwnedBy Genome",
191 :     "ProteinSequence(id) = ?", [$prot],
192 :     [qw(Identifier(id) HasAssertionFrom(function)
193 :     Source(id) HasAssertionFrom(expert)
194 :     Genome(scientific-name))]);
195 : parrello 1.1 }
196 :     # If we found results, put them in the return object.
197 : parrello 1.3 Trace(scalar(@results) . " results found for $id.") if T(3);
198 : parrello 1.1 if (@results) {
199 :     if ($hashFlag) {
200 :     $retVal->{$id} = \@results;
201 :     } else {
202 :     push @$retVal, @results;
203 :     }
204 :     }
205 :     }
206 :     # Return the result.
207 :     return $retVal;
208 :     }
209 :    
210 :     =head3 equiv_precise
211 :    
212 :     my $document = $achObject->equiv_precise($args);
213 :    
214 :     Return the assertions for all genes in the database that match the
215 :     identified gene. The gene can be specified by any prefixed gene
216 :     identifier (e.g. C<uni|AYQ44>, C<gi|85841784>, or
217 :     C<fig|360108.3.peg.1041>).
218 :    
219 :     =over 4
220 :    
221 :     =item args
222 :    
223 :     Reference to a list of gene identifiers, or reference to a hash
224 :     with the key C<-ids> whose value is a reference to a list of
225 :     identifiers. Each identifier should be a prefixed gene identifier.
226 :     or the C<md5|>-prefixed MD5 of a protein sequence. If the parameter
227 :     is a hash reference, then if the key C<-hash> is provided, the return value will
228 :     be in the form of a hash instead of a list.
229 :    
230 :     =item RETURN
231 :    
232 :     Normally, returns a reference to a list of 2-tuples. Each 2-tuple consists
233 :     of an input identifier followed by a reference to a list of 4-tuples.
234 :     Each 4-tuple contains an identifier that is equivalent to the input identifier,
235 :     the asserted function of that identifier, the source of the assertion, and a
236 :     flag that is TRUE if the assertion is by an expert.
237 :    
238 :     =back
239 :    
240 :     =cut
241 :    
242 :     sub equiv_precise {
243 :     # Get the parameters.
244 :     my ($self, $args) = @_;
245 :     # Get the Sapling database.
246 :     my $sap = $self->{db};
247 :     # Declare the return variable.
248 :     my $retVal = [];
249 :     # Convert a list to a hash.
250 :     if (ref $args ne 'HASH') {
251 :     $args = { -ids => $args };
252 :     }
253 :     # Get the list of IDs.
254 :     my $ids = ServerThing::GetIdList(-ids => $args);
255 :     foreach my $id (@$ids) {
256 :     my @resultRows = $sap->GetAll("Identifier HasAssertionFrom Source",
257 :     'Identifier(id) = ? ',
258 :     [$id], [qw(Identifier(id)
259 :     HasAssertionFrom(function)
260 :     Source(id)
261 :     HasAssertionFrom(expert))]);
262 :     push @$retVal, [$id, \@resultRows];
263 :     }
264 :     # Return the result.
265 :     return $retVal;
266 :     }
267 :    
268 :    
269 :    
270 :    
271 :    
272 : disz 1.7 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3