[Bio] / FigKernelPackages / ACH.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/ACH.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :     use strict;
3 :    
4 :     #
5 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
6 :     # for Interpretations of Genomes. All Rights Reserved.
7 :     #
8 :     # This file is part of the SEED Toolkit.
9 :     #
10 :     # The SEED Toolkit is free software. You can redistribute
11 :     # it and/or modify it under the terms of the SEED Toolkit
12 :     # Public License.
13 :     #
14 :     # You should have received a copy of the SEED Toolkit Public License
15 :     # along with this program; if not write to the University of Chicago
16 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
17 :     # Genomes at veronika@thefig.info or download a copy from
18 :     # http://www.theseed.org/LICENSE.TXT.
19 :     #
20 :     package ACH;
21 :    
22 :     use strict;
23 :     use Tracer;
24 :     use SeedUtils;
25 :     use ServerThing;
26 :     use ERDB;
27 :    
28 :     =head1 ACH Server Function Object
29 :    
30 :     This file contains the functions and utilities used by the Annotation
31 :     Clearinghouse Server (B<ach_server.cgi>). The L</Primary Methods> represent
32 :     function calls direct to the server. These all have a signature similar to the
33 :     following.
34 :    
35 :     my $document = $achObject->function_name($args);
36 :    
37 :     where C<$achObject> is an object created by this module,
38 :     C<$args> is a parameter structure, and C<function_name> is the Annotation
39 :     Clearinghouse Server function name. The output is a structure, generally a hash
40 :     reference, but sometimes a string or a list reference.
41 :    
42 :     This server is used to access assertions harvested from the Annotation
43 :     Clearinghouse and stored in the Sapling database. At the current time, it
44 :     is generally one to two weeks behind the latest server data.
45 :    
46 :     =head2 Special Methods
47 :    
48 :     =head3 new
49 :    
50 :     my $ffObject = ACH->new();
51 :    
52 :     Create a new Annotation Clearinghouse server function object. The server
53 :     function object contains a pointer to a L<Sapling> object, and is used to invoke
54 :     the server functions.
55 :    
56 :     =cut
57 :    
58 :     sub new {
59 :     my ($class) = @_;
60 :     # Get the sapling database.
61 :     my $sap = ERDB::GetDatabase('Sapling');
62 :     # Create the server object.
63 :     my $retVal = {
64 :     db => $sap,
65 :     };
66 :     # Bless and return it.
67 :     bless $retVal, $class;
68 :     return $retVal;
69 :     }
70 :    
71 :     =head2 Primary Methods
72 :    
73 :     =head3 equiv_sequence
74 :    
75 :     my $document = $achObject->equiv_sequence($args);
76 :    
77 :     Return the assertions for all genes in the database that match the
78 :     identified protein sequences. A protein sequence can be identified by a
79 :     prefixed MD5 code or any prefixed gene identifier (e.g. C<uni|AYQ44>,
80 :     C<gi|85841784>, or C<fig|360108.3.peg.1041>).
81 :    
82 :     =over 4
83 :    
84 :     =item args
85 :    
86 :     Reference to a list of protein identifiers, or reference to a hash
87 :     with the key C<-ids> whose value is a reference to a list of identifiers. Each
88 :     identifier should be a prefixed gene identifier or the C<md5|>-prefixed MD5 of a
89 :     protein sequence. If the parameter is a hash reference, then if the key C<-hash>
90 :     is provided, the return value will be in the form of a hash instead of a list.
91 :    
92 :     =item RETURN
93 :    
94 :     Normally, returns a reference to a list of 4-tuples. Each 4-tuple contains an
95 :     identifier that is sequence-equivalent to at least one of the input identifiers,
96 :     the asserted function of that identifier, the source of the assertion, and a
97 :     flag that is TRUE if the assertion is by an expert. If the C<-hash> flag
98 :     is specified in the parameter list, then the return value will be a hash
99 :     of lists, keyed by incoming protein identifier, mapping each protein
100 :     identifier to a list of the relevant 4-tuples.
101 :    
102 :     =back
103 :    
104 :     =cut
105 :    
106 :     sub equiv_sequence {
107 :     # Get the parameters.
108 :     my ($self, $args) = @_;
109 :     # Get the Sapling database.
110 :     my $sap = $self->{db};
111 : parrello 1.2 # Convert a list to a hash.
112 :     if (ref $args ne 'HASH') {
113 :     $args = { -ids => $args };
114 :     }
115 : parrello 1.1 # Find out if we're returning a hash.
116 :     my $hashFlag = $args->{-hash} || 0;
117 :     # Declare the return variable.
118 :     my $retVal = ($hashFlag ? {} : []);
119 :     # Get the list of IDs.
120 :     my $ids = ServerThing::GetIdList(-ids => $args);
121 :     # These object name sequences are the base of every query. The first finds
122 :     # every identifier associated with a gene that produces the protein sequence.
123 :     # The second finds every identifier associated with the protein sequence
124 :     # itself.
125 :     my @objects = ("ProteinSequence IsProteinFor Feature IsIdentifiedBy Identifier HasAssertionFrom Source",
126 :     "ProteinSequence IsNamedBy Identifier HasAssertionFrom Source");
127 :     # This is the list of fields we want back.
128 :     my $fields = [qw(Identifier(id) HasAssertionFrom(function) Source(id)
129 :     HasAssertionFrom(expert))];
130 :     # Finally, this is the filter clause.
131 :     my $filter = "ProteinSequence(id) = ?";
132 :     # Loop through the IDs in the list.
133 :     for my $id (@$ids) {
134 :     # This hash will contain a list of the relevant protein sequence IDs.
135 :     # We apply the above two queries to each protein sequence of interest.
136 :     my %prots;
137 :     # We'll put our assertions found in here.
138 :     my @results;
139 :     # Determine the ID type.
140 :     if ($id =~ /^md5\|(.+)/) {
141 :     # Here we have a protein sequence MD5 ID. In this case, we just
142 :     # strip the prefix to get a Sapling protein sequence ID.
143 :     $prots{$1} = 1;
144 :     } else {
145 :     # Here we have a gene ID. Start by asking for all of the
146 :     # protein sequences it identifies directly.
147 :     my @prots = $sap->GetFlat("Identifier Names ProteinSequence",
148 :     'Identifier(id) = ?', [$id],
149 :     'ProteinSequence(id)');
150 :     # Add the ones it identifies through a feature.
151 :     push @prots, $sap->GetFlat("Identifier Identifies Feature Produces ProteinSequence",
152 :     'Identifier(id) = ?', [$id],
153 :     'ProteinSequence(id)');
154 :     # Put all the proteins found in the hash.
155 :     for my $prot (@prots) {
156 :     $prots{$prot} = 1;
157 :     }
158 :     }
159 :     # Loop through the protein sequences, finding assertions. For each
160 :     # protein, we make two queries.
161 :     for my $prot (sort keys %prots) {
162 :     for my $objects (@objects) {
163 :     push @results, $sap->GetAll($objects, $filter, $prot, $fields);
164 :     }
165 :     }
166 :     # If we found results, put them in the return object.
167 :     if (@results) {
168 :     if ($hashFlag) {
169 :     $retVal->{$id} = \@results;
170 :     } else {
171 :     push @$retVal, @results;
172 :     }
173 :     }
174 :     }
175 :     # Return the result.
176 :     return $retVal;
177 :     }
178 :    
179 :     =head3 equiv_precise
180 :    
181 :     my $document = $achObject->equiv_precise($args);
182 :    
183 :     Return the assertions for all genes in the database that match the
184 :     identified gene. The gene can be specified by any prefixed gene
185 :     identifier (e.g. C<uni|AYQ44>, C<gi|85841784>, or
186 :     C<fig|360108.3.peg.1041>).
187 :    
188 :     =over 4
189 :    
190 :     =item args
191 :    
192 :     Reference to a list of gene identifiers, or reference to a hash
193 :     with the key C<-ids> whose value is a reference to a list of
194 :     identifiers. Each identifier should be a prefixed gene identifier.
195 :     or the C<md5|>-prefixed MD5 of a protein sequence. If the parameter
196 :     is a hash reference, then if the key C<-hash> is provided, the return value will
197 :     be in the form of a hash instead of a list.
198 :    
199 :     =item RETURN
200 :    
201 :     Normally, returns a reference to a list of 2-tuples. Each 2-tuple consists
202 :     of an input identifier followed by a reference to a list of 4-tuples.
203 :     Each 4-tuple contains an identifier that is equivalent to the input identifier,
204 :     the asserted function of that identifier, the source of the assertion, and a
205 :     flag that is TRUE if the assertion is by an expert.
206 :    
207 :     =back
208 :    
209 :     =cut
210 :    
211 :     sub equiv_precise {
212 :     # Get the parameters.
213 :     my ($self, $args) = @_;
214 :     # Get the Sapling database.
215 :     my $sap = $self->{db};
216 :     # Declare the return variable.
217 :     my $retVal = [];
218 :     # Convert a list to a hash.
219 :     if (ref $args ne 'HASH') {
220 :     $args = { -ids => $args };
221 :     }
222 :     # Get the list of IDs.
223 :     my $ids = ServerThing::GetIdList(-ids => $args);
224 :     foreach my $id (@$ids) {
225 :     my @resultRows = $sap->GetAll("Identifier HasAssertionFrom Source",
226 :     'Identifier(id) = ? ',
227 :     [$id], [qw(Identifier(id)
228 :     HasAssertionFrom(function)
229 :     Source(id)
230 :     HasAssertionFrom(expert))]);
231 :     push @$retVal, [$id, \@resultRows];
232 :     }
233 :     # Return the result.
234 :     return $retVal;
235 :     }
236 :    
237 :    
238 :    
239 :    
240 :    
241 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3