[Bio] / FigKernelPackages / AliasAnalysis.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/AliasAnalysis.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     package AliasAnalysis;
4 :    
5 :     use strict;
6 :     use Tracer;
7 :     use FIG;
8 :    
9 :     =head1 Alias Analysis Module
10 :    
11 :     =head2 Introduction
12 :    
13 :     This module encapsulates data about aliases. For each alias, it tells us how to generate
14 :     the appropriate link, what the type is for the alias, its export format, and its display
15 :     format. To add new alias types, we simply update this package.
16 :    
17 :     An alias has three forms. The I<internal> form is how the alias is stored in the database.
18 :     The I<export> form is the form into which it should be translated when being exported to
19 :     BRC databases. The I<natural> form is the form it takes in its own environment. For
20 :     example, C<gi|15675083> is the internal form of a GenBank ID. Its export form is
21 :     C<NCBI_gi:15675083>, and its natural form is simply C<15675083>.
22 :    
23 :     =head2 The Alias Table
24 :    
25 :     The alias table is a hash of hashes. Each sub-hash relates to a specific type of alias, and
26 :     the key names the alias type (e.g. C<uniprot>, C<KEGG>). The sub-hashes have three fields.
27 :    
28 :     =over 4
29 :    
30 :     =item pattern
31 :    
32 :     This is a regular expression that will match aliases of the specified type in their internal
33 :     forms.
34 :    
35 :     =item convert
36 :    
37 :     This field is a hash of conversions. The key for each is the conversion type and the
38 :     data is a replacement expression. These replacement expressions rely on the pattern match
39 :     having just taken place and use the C<$1>, C<$2>, ... variables to get text from the
40 :     alias's internal form. An alias's natural form, export form, and URL are all implemented as
41 :     different types of conversions. New conversion types can be created at
42 :     will be updating the table without having to worry about changing any code. Note that for
43 :     the URL conversion, a value of C<undef> means no URL is available.
44 :    
45 :     =item normalize
46 :    
47 :     This is a prefix that can be used to convert an alias from its natural form to its
48 :     internal form.
49 :    
50 :     =back
51 :    
52 :     At some point the Alias Table may be converted from an inline hash to an external XML file.
53 :    
54 :     =cut
55 :    
56 :     my %AliasTable = (
57 :     RefSeq => {
58 :     pattern => '([NXYZA]P_[0-9\.]+)',
59 :     convert => { natural => '$1',
60 :     export => 'RefSeq_Prot:$1',
61 :     url => 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=protein;cmd=search;term=$1',
62 :     },
63 :     normalize => '',
64 :     },
65 :     GenBank => {
66 :     pattern => 'gi\|(\d+)',
67 :     convert => { natural => '$1',
68 :     export => 'NCBI_gi:$1',
69 : parrello 1.2 url => 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve;db=Protein&list_uids=$1;dopt=GenPept',
70 : parrello 1.1 },
71 :     normalize => 'gi|',
72 :     },
73 :     SwissProt => {
74 :     pattern => 'sp\|([A-Z0-9]{6})',
75 :     convert => { natural => '$1',
76 :     export => 'Swiss-Prot:$1',
77 :     url => 'http://us.expasy.org/cgi-bin/get-sprot-entry?$1',
78 :     },
79 :     normalize => 'sp|',
80 :     },
81 :     UniProt => {
82 :     pattern => 'uni\|([A-Z0-9]{6})',
83 :     convert => { natural => '$1',
84 :     export => 'UniProtKB:',
85 :     url => 'http://www.ebi.uniprot.org/uniprot-srv/uniProtView.do?proteinAc=$1',
86 :     },
87 :     normalize => 'uni|',
88 :     },
89 :     KEGG => {
90 :     pattern => 'kegg\|(([a-z]{2,4}):([a-zA-Z_0-9]+))',
91 :     convert => { natural => '$1',
92 :     export => 'KEGG:$2+$3',
93 :     url => 'http://www.genome.ad.jp/dbget-bin/www_bget?$2+$3',
94 :     },
95 :     normalize => 'kegg|',
96 :     },
97 :     LocusTag => {
98 :     pattern => 'LocusTag:([A-Za-z0-9_]+)',
99 :     convert => { natural => '$1',
100 :     export => 'Locus_Tag:$1',
101 :     url => undef,
102 :     },
103 :     normalize => 'LocusTag:',
104 :     },
105 :     GeneID => {
106 :     pattern => 'GeneID:(\d+)',
107 :     convert => { natural => '$1',
108 :     export => 'GeneID:$1',
109 :     url => undef,
110 :     },
111 :     normalize => 'GeneID:',
112 :     },
113 :     Trembl => {
114 :     pattern => 'tr\|([a-zA-Z0-9]+)',
115 :     convert => { natural => '$1',
116 :     export => 'TrEMBL:$1',
117 :     url => 'http://ca.expasy.org/uniprot/$1',
118 :     },
119 :     normalize => 'tr|',
120 :     },
121 :     );
122 :    
123 :     =head2 Public Methods
124 :    
125 :     =head3 AliasTypes
126 :    
127 : parrello 1.3 my @aliasTypes = AliasAnalysis::AliasTypes();
128 : parrello 1.1
129 :     Return a list of the alias types. The list can be used to create a menu or dropdown
130 :     for selecting a preferred alias.
131 :    
132 :     =cut
133 :    
134 :     sub AliasTypes {
135 :     return sort keys %AliasTable;
136 :     }
137 :    
138 :     =head3 Find
139 :    
140 : parrello 1.3 my $aliasFound = AliasAnalysis::Find($type, \@aliases);
141 : parrello 1.1
142 :     Find the first alias of the specified type in the list.
143 :    
144 :     =over 4
145 :    
146 :     =item type
147 :    
148 :     Type of alias desired. This must be one of the keys in C<%AliasTable>.
149 :    
150 :     =item aliases
151 :    
152 :     Reference of a list containing alias names. The first alias name that matches
153 :     the structure of the specified alias type will be returned. The incoming
154 :     aliases are presumed to be in internal form.
155 :    
156 :     =item RETURN
157 :    
158 :     Returns the natural form of the desired alias, or C<undef> if no alias of
159 :     the specified type could be found.
160 :    
161 :     =back
162 :    
163 :     =cut
164 :    
165 :     sub Find {
166 :     # Get the parameters.
167 :     my ($type, $aliases) = @_;
168 :     # Declare the return variable.
169 :     my $retVal;
170 :     # Insure we have a valid alias type.
171 :     if (! exists $AliasTable{$type}) {
172 :     Confess("Invalid aliase type \"$type\" specified.");
173 :     } else {
174 :     # Get the pattern for the specified alias type.
175 :     my $pattern = $AliasTable{$type}->{pattern};
176 :     Trace("Alias pattern is /$pattern/.") if T(3);
177 :     # Search for matching aliases. We can't use GREP here because we want
178 :     # to stop as soon as we find a match. That way, the $1,$2.. variables
179 :     # will be set properly.
180 :     my $found;
181 :     for my $alias (@$aliases) { last if $found;
182 :     Trace("Matching against \"$alias\".") if T(4);
183 :     if ($alias =~ /$pattern/) {
184 :     Trace("Match found.") if T(4);
185 :     # Here we have a match. Return the matching alias's natural form.
186 :     $retVal = eval($AliasTable{$type}->{convert}->{natural});
187 :     $found = 1;
188 :     }
189 :     }
190 :     }
191 :     # Return the value found.
192 :     return $retVal;
193 :     }
194 :    
195 :     =head3 Type
196 :    
197 : parrello 1.3 my $naturalName = AliasAnalysis::Type($type => $name);
198 : parrello 1.1
199 :     Return the natural name of an alias if it is of the specified type, and C<undef> otherwise.
200 :     Note that the result of this method will be TRUE if the alias is an internal form of the named
201 :     type and FALSE otherwise.
202 :    
203 :     =over 4
204 :    
205 :     =item type
206 :    
207 :     Relevant alias type.
208 :    
209 :     =item name
210 :    
211 :     Internal-form alias to be matched to the specified type.
212 :    
213 :     =item RETURN
214 :    
215 :     Returns the natural form of the alias if it is of the specified type, and C<undef> otherwise.
216 :    
217 :     =back
218 :    
219 :     =cut
220 :    
221 :     sub Type {
222 :     # Get the parameters.
223 :     my ($type, $name) = @_;
224 :     # Declare the return variable. If there is no match, it will stay undefined.
225 :     my $retVal;
226 :     # Check the alias type.
227 :     my $pattern = $AliasTable{$type}->{pattern};
228 :     if ($name =~ /$pattern/) {
229 :     # We have a match, so we return the natural form of the alias.
230 :     $retVal = eval($AliasTable{$type}->{convert}->{natural});
231 :     }
232 :     # Return the result.
233 :     return $retVal;
234 :     }
235 :    
236 :     =head3 FormatHtml
237 :    
238 : parrello 1.3 my $htmlText = AliasAnalysis::FormatHtml(@aliases);
239 : parrello 1.1
240 :     Create an html string that contains the specified aliases in a comma-separated list
241 :     with hyperlinks where available. The aliases are expected to be in internal form and
242 :     will stay that way.
243 :    
244 :     =over 4
245 :    
246 :     =item aliases
247 :    
248 :     A list of aliases in internal form that are to be formatted into HTML.
249 :    
250 :     =item RETURN
251 :    
252 :     Returns a string containing the aliases in a comma-separated list, with hyperlinks
253 :     present on those for which hyperlinks are available.
254 :    
255 :     =back
256 :    
257 :     =cut
258 :    
259 :     sub FormatHtml {
260 :     # Get the parameters.
261 :     my (@aliases) = @_;
262 :     # Set up the output list. The hyperlinked aliases will be put in here, and then
263 :     # srung together before returning to the caller.
264 :     my @retVal = ();
265 :     # Loop through the incoming aliases.
266 :     for my $alias (@aliases) {
267 :     # We'll compute the alias's URL in here.
268 :     my $url;
269 :     # Check this alias against all the known types.
270 :     for my $type (keys %AliasTable) { last if defined $url;
271 :     # Get the URL conversion expression for this alias type.
272 :     my $urlExpression = $AliasTable{$type}->{convert}->{url};
273 :     # Check to see if we found the right type.
274 :     my $pattern = $AliasTable{$type}->{pattern};
275 :     Trace("Matching \"$alias\" to /$pattern/.") if T(4);
276 :     if ($alias =~ /$pattern/) {
277 :     # Here we did. Set the URL variable if there's a url expression and
278 :     # null it out otherwise.
279 :     if ($urlExpression) {
280 :     Trace("Evaluating $urlExpression.") if T(4);
281 :     $url = eval("\"$urlExpression\"");
282 :     } else {
283 :     # This will stop the loop, but will evaluate as false when
284 :     # we decide whether or not to hyperlink the alias.
285 :     $url = "";
286 :     }
287 :     }
288 :     }
289 :     # Check to see if we found a URL.
290 :     if ($url) {
291 :     $alias = "<a href=\"$url\">$alias</a>";
292 :     }
293 :     # Push this alias into the return list.
294 :     push @retVal, $alias;
295 :     }
296 :     # Convert the aliases into a comma-separated string.
297 :     return join(", ", @retVal);
298 :     }
299 :    
300 : parrello 1.3 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3