[Bio] / FigKernelPackages / ACH.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/ACH.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :     use strict;
3 :    
4 :     #
5 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
6 :     # for Interpretations of Genomes. All Rights Reserved.
7 :     #
8 :     # This file is part of the SEED Toolkit.
9 :     #
10 :     # The SEED Toolkit is free software. You can redistribute
11 :     # it and/or modify it under the terms of the SEED Toolkit
12 :     # Public License.
13 :     #
14 :     # You should have received a copy of the SEED Toolkit Public License
15 :     # along with this program; if not write to the University of Chicago
16 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
17 :     # Genomes at veronika@thefig.info or download a copy from
18 :     # http://www.theseed.org/LICENSE.TXT.
19 :     #
20 :     package ACH;
21 :    
22 :     use strict;
23 :     use Tracer;
24 :     use SeedUtils;
25 :     use ServerThing;
26 :     use ERDB;
27 :    
28 :     =head1 ACH Server Function Object
29 :    
30 :     This file contains the functions and utilities used by the Annotation
31 :     Clearinghouse Server (B<ach_server.cgi>). The L</Primary Methods> represent
32 :     function calls direct to the server. These all have a signature similar to the
33 :     following.
34 :    
35 :     my $document = $achObject->function_name($args);
36 :    
37 :     where C<$achObject> is an object created by this module,
38 :     C<$args> is a parameter structure, and C<function_name> is the Annotation
39 :     Clearinghouse Server function name. The output is a structure, generally a hash
40 :     reference, but sometimes a string or a list reference.
41 :    
42 :     This server is used to access assertions harvested from the Annotation
43 :     Clearinghouse and stored in the Sapling database. At the current time, it
44 :     is generally one to two weeks behind the latest server data.
45 :    
46 :     =head2 Special Methods
47 :    
48 :     =head3 new
49 :    
50 :     my $ffObject = ACH->new();
51 :    
52 :     Create a new Annotation Clearinghouse server function object. The server
53 :     function object contains a pointer to a L<Sapling> object, and is used to invoke
54 :     the server functions.
55 :    
56 :     =cut
57 :    
58 :     sub new {
59 :     my ($class) = @_;
60 :     # Get the sapling database.
61 :     my $sap = ERDB::GetDatabase('Sapling');
62 :     # Create the server object.
63 :     my $retVal = {
64 :     db => $sap,
65 :     };
66 :     # Bless and return it.
67 :     bless $retVal, $class;
68 :     return $retVal;
69 :     }
70 :    
71 :     =head2 Primary Methods
72 :    
73 : parrello 1.4 =head3 methods
74 :    
75 :     my $document = $achObject->methods();
76 :    
77 :     Return a list of the methods allowed on this object.
78 :    
79 :     =cut
80 :    
81 :     use constant METHODS => [qw(equiv_sequence
82 :     equiv_precise
83 :     )];
84 :    
85 :     sub methods {
86 :     # Get the parameters.
87 :     my ($self) = @_;
88 :     # Return the result.
89 :     return METHODS;
90 :     }
91 :    
92 : parrello 1.1 =head3 equiv_sequence
93 :    
94 :     my $document = $achObject->equiv_sequence($args);
95 :    
96 :     Return the assertions for all genes in the database that match the
97 :     identified protein sequences. A protein sequence can be identified by a
98 :     prefixed MD5 code or any prefixed gene identifier (e.g. C<uni|AYQ44>,
99 :     C<gi|85841784>, or C<fig|360108.3.peg.1041>).
100 :    
101 :     =over 4
102 :    
103 :     =item args
104 :    
105 :     Reference to a list of protein identifiers, or reference to a hash
106 :     with the key C<-ids> whose value is a reference to a list of identifiers. Each
107 :     identifier should be a prefixed gene identifier or the C<md5|>-prefixed MD5 of a
108 :     protein sequence. If the parameter is a hash reference, then if the key C<-hash>
109 :     is provided, the return value will be in the form of a hash instead of a list.
110 :    
111 :     =item RETURN
112 :    
113 : parrello 1.3 Normally, returns a reference to a list of 5-tuples. Each 5-tuple contains an
114 : parrello 1.1 identifier that is sequence-equivalent to at least one of the input identifiers,
115 : parrello 1.3 the asserted function of that identifier, the source of the assertion, a
116 :     flag that is TRUE if the assertion is by an expert, and the name of the genome
117 :     relevant to the identifier (if any). If the C<-hash> flag is specified in the
118 :     parameter list, then the return value will be a hash of lists, keyed by incoming
119 :     protein identifier, mapping each protein identifier to a list of the relevant
120 :     5-tuples.
121 : parrello 1.1
122 :     =back
123 :    
124 :     =cut
125 :    
126 :     sub equiv_sequence {
127 :     # Get the parameters.
128 :     my ($self, $args) = @_;
129 :     # Get the Sapling database.
130 :     my $sap = $self->{db};
131 : parrello 1.2 # Convert a list to a hash.
132 :     if (ref $args ne 'HASH') {
133 :     $args = { -ids => $args };
134 :     }
135 : parrello 1.1 # Find out if we're returning a hash.
136 :     my $hashFlag = $args->{-hash} || 0;
137 :     # Declare the return variable.
138 :     my $retVal = ($hashFlag ? {} : []);
139 :     # Get the list of IDs.
140 :     my $ids = ServerThing::GetIdList(-ids => $args);
141 :     # Loop through the IDs in the list.
142 :     for my $id (@$ids) {
143 :     # This hash will contain a list of the relevant protein sequence IDs.
144 :     my %prots;
145 :     # We'll put our assertions found in here.
146 :     my @results;
147 : parrello 1.5 MemTrace("Processing id $id in task $$.") if T(3); ##HACK
148 : parrello 1.1 # Determine the ID type.
149 :     if ($id =~ /^md5\|(.+)/) {
150 :     # Here we have a protein sequence MD5 ID. In this case, we just
151 :     # strip the prefix to get a Sapling protein sequence ID.
152 :     $prots{$1} = 1;
153 :     } else {
154 :     # Here we have a gene ID. Start by asking for all of the
155 :     # protein sequences it identifies directly.
156 :     my @prots = $sap->GetFlat("Identifier Names ProteinSequence",
157 :     'Identifier(id) = ?', [$id],
158 :     'ProteinSequence(id)');
159 :     # Add the ones it identifies through a feature.
160 :     push @prots, $sap->GetFlat("Identifier Identifies Feature Produces ProteinSequence",
161 :     'Identifier(id) = ?', [$id],
162 :     'ProteinSequence(id)');
163 :     # Put all the proteins found in the hash.
164 :     for my $prot (@prots) {
165 :     $prots{$prot} = 1;
166 :     }
167 :     }
168 :     # Loop through the protein sequences, finding assertions. For each
169 : parrello 1.3 # protein, we make two queries. Note that we expect the number of
170 :     # protein sequences to be small, despite the large amount of work
171 :     # performed above.
172 : parrello 1.1 for my $prot (sort keys %prots) {
173 : parrello 1.5 MemTrace("Processing protein $id in task $$.") if T(3); ##HACK
174 : parrello 1.3 # Get the assertions on the protein's identifiers.
175 :     @results = $sap->GetAll("ProteinSequence IsNamedBy Identifier HasAssertionFrom Source",
176 :     "ProteinSequence(id) = ?", [$prot],
177 :     [qw(Identifier(id) HasAssertionFrom(function)
178 :     Source(id) HasAssertionFrom(expert))]);
179 :     # Add the assertions on the identifiers for the protein's features.
180 :     push @results, $sap->GetAll("ProteinSequence IsProteinFor Feature IsIdentifiedBy Identifier HasAssertionFrom Source AND Feature IsOwnedBy Genome",
181 :     "ProteinSequence(id) = ?", [$prot],
182 :     [qw(Identifier(id) HasAssertionFrom(function)
183 :     Source(id) HasAssertionFrom(expert)
184 :     Genome(scientific-name))]);
185 : parrello 1.1 }
186 : parrello 1.5 MemTrace("Processing results in task $$.") if T(3); ##HACK
187 : parrello 1.1 # If we found results, put them in the return object.
188 : parrello 1.3 Trace(scalar(@results) . " results found for $id.") if T(3);
189 : parrello 1.1 if (@results) {
190 :     if ($hashFlag) {
191 :     $retVal->{$id} = \@results;
192 :     } else {
193 :     push @$retVal, @results;
194 :     }
195 :     }
196 :     }
197 :     # Return the result.
198 :     return $retVal;
199 :     }
200 :    
201 :     =head3 equiv_precise
202 :    
203 :     my $document = $achObject->equiv_precise($args);
204 :    
205 :     Return the assertions for all genes in the database that match the
206 :     identified gene. The gene can be specified by any prefixed gene
207 :     identifier (e.g. C<uni|AYQ44>, C<gi|85841784>, or
208 :     C<fig|360108.3.peg.1041>).
209 :    
210 :     =over 4
211 :    
212 :     =item args
213 :    
214 :     Reference to a list of gene identifiers, or reference to a hash
215 :     with the key C<-ids> whose value is a reference to a list of
216 :     identifiers. Each identifier should be a prefixed gene identifier.
217 :     or the C<md5|>-prefixed MD5 of a protein sequence. If the parameter
218 :     is a hash reference, then if the key C<-hash> is provided, the return value will
219 :     be in the form of a hash instead of a list.
220 :    
221 :     =item RETURN
222 :    
223 :     Normally, returns a reference to a list of 2-tuples. Each 2-tuple consists
224 :     of an input identifier followed by a reference to a list of 4-tuples.
225 :     Each 4-tuple contains an identifier that is equivalent to the input identifier,
226 :     the asserted function of that identifier, the source of the assertion, and a
227 :     flag that is TRUE if the assertion is by an expert.
228 :    
229 :     =back
230 :    
231 :     =cut
232 :    
233 :     sub equiv_precise {
234 :     # Get the parameters.
235 :     my ($self, $args) = @_;
236 :     # Get the Sapling database.
237 :     my $sap = $self->{db};
238 :     # Declare the return variable.
239 :     my $retVal = [];
240 :     # Convert a list to a hash.
241 :     if (ref $args ne 'HASH') {
242 :     $args = { -ids => $args };
243 :     }
244 :     # Get the list of IDs.
245 :     my $ids = ServerThing::GetIdList(-ids => $args);
246 :     foreach my $id (@$ids) {
247 :     my @resultRows = $sap->GetAll("Identifier HasAssertionFrom Source",
248 :     'Identifier(id) = ? ',
249 :     [$id], [qw(Identifier(id)
250 :     HasAssertionFrom(function)
251 :     Source(id)
252 :     HasAssertionFrom(expert))]);
253 :     push @$retVal, [$id, \@resultRows];
254 :     }
255 :     # Return the result.
256 :     return $retVal;
257 :     }
258 :    
259 :    
260 :    
261 :    
262 :    
263 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3