[Bio] / FigKernelPackages / Sim.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/Sim.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.11 - (view) (download) (as text)

1 : olson 1.7 #
2 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 : parrello 1.8 =head1 Similarity Object
19 :    
20 :     =head2 Introduction
21 :    
22 :     The similarity object provides access by name to the fields of a similarity
23 :     list. Unlike a standard object, the similarity object is stored as a list
24 :     reference, not a hash reference. The similarity fields are pulled from the
25 :     appropriate places in the list.
26 :    
27 :     A blast takes a sequence called the I<query> and matches it against a
28 :     I<database>. When describing the data in a similarity, we will
29 :     refer repeatedly to the query sequence and the database sequence. Often,
30 :     the query and database sequences will be given by peg IDs. In some cases,
31 :     however, they will be contig IDs. In both cases, the match is represented
32 :     by an alignment between portions of the sequences. Gap characters may
33 :     be required to get the alignments to match, and the number of gaps is
34 :     part of the data in the similarity.
35 :    
36 :     =cut
37 : olson 1.2
38 : efrank 1.1 package Sim;
39 : olson 1.2
40 : parrello 1.8 =head3 as_string
41 :    
42 :     C<< my $simString = "$sim"; >>
43 :    
44 :     or
45 :    
46 :     C<< my $simString = $sim->as_string; >>
47 :    
48 :     Return the similarity as a descriptive string, consisting of the query peg,
49 :     the similar peg, and the match score.
50 :    
51 :     =cut
52 :    
53 :     use Tracer;
54 : olson 1.6 use overload '""' => \&as_string;
55 :    
56 : parrello 1.8 sub as_string {
57 :     my ($obj) = @_;
58 : olson 1.10 return sprintf("sim:%s->%s:%s:%s", $obj->id1, $obj->id2, $obj->psc, $obj->iden);
59 : olson 1.6 }
60 :    
61 : parrello 1.8 =head3 new_from_line
62 :    
63 :     C<< my $sim = Sim->new_from_line($line); >>
64 : olson 1.6
65 : parrello 1.8 Create a similarity object from a blast output line. The line is presumed to have
66 :     the complete list of similarity values in it, tab-separated.
67 :    
68 :     =over 4
69 :    
70 :     =item line
71 :    
72 :     Input line, containing the similarity values in it delimited by tabs. A line terminator
73 :     may be present at the end.
74 :    
75 :     =item RETURN
76 :    
77 :     Returns a similarity object that allows the values to be accessed by name.
78 :    
79 :     =back
80 :    
81 :     =cut
82 :    
83 :     sub new_from_line {
84 :     my ($class, $line) = @_;
85 : olson 1.6 chomp $line;
86 : parrello 1.8 Trace("Data line for SIM contains: $line") if T(4);
87 : olson 1.6 my $self = [split(/\t/, $line)];
88 :     return bless $self, $class;
89 :     }
90 :    
91 : parrello 1.8 =head3 validate
92 :    
93 :     C<< my $okFlag = $sim->validate(); >>
94 :    
95 :     Return TRUE if the similarity values are valid, else FALSE.
96 :    
97 :     =cut
98 : olson 1.6
99 : parrello 1.8 sub validate {
100 :     my ($self) = @_;
101 : olson 1.6 return ($self->id1 ne "" and
102 : parrello 1.8 $self->id2 ne "" and
103 :     $self->iden =~ /^[.\d]+$/ and
104 :     $self->ali_ln =~ /^\d+$/ and
105 :     $self->mismatches =~ /^\d+$/ and
106 :     $self->gaps =~ /^\d+$/ and
107 :     $self->b1 =~ /^\d+$/ and
108 :     $self->e1 =~ /^\d+$/ and
109 :     $self->b2 =~ /^\d+$/ and
110 :     $self->e2 =~ /^\d+$/ and
111 :     $self->psc =~ /^[-.e\d]+$/ and
112 :     $self->bsc =~ /^[-.\d]+$/ and
113 :     $self->ln1 =~ /^\d+$/ and
114 :     $self->ln2 =~ /^\d+$/);
115 :     }
116 :    
117 :     =head3 as_line
118 :    
119 :     C<< my $line = $sim->as_line; >>
120 : olson 1.6
121 : parrello 1.8 Return the similarity as an output line. This is exactly the reverse of
122 :     L</new_from_line>.
123 : olson 1.6
124 : parrello 1.8 =cut
125 : olson 1.6
126 : parrello 1.8 sub as_line {
127 :     my ($self) = @_;
128 : olson 1.6 return join("\t", @$self) . "\n";
129 :     }
130 :    
131 : parrello 1.8 =head3 id1
132 :    
133 :     C<< my $id = $sim->id1; >>
134 :    
135 :     Return the ID of the query sequence that was blasted against the database.
136 : olson 1.6
137 : parrello 1.8 =cut
138 : olson 1.6
139 : parrello 1.8 sub id1 {
140 :     my ($sim) = @_;
141 : efrank 1.1 return $sim->[0];
142 :     }
143 :    
144 : parrello 1.8 =head3 id2
145 :    
146 :     C<< my $id = $sim->id2; >>
147 :    
148 :     Return the ID of the sequence in the database that matched the query sequence.
149 :    
150 :     =cut
151 :    
152 :     sub id2 {
153 :     my ($sim) = @_;
154 : efrank 1.1 return $sim->[1];
155 :     }
156 :    
157 : overbeek 1.9 sub feature2 {
158 : parrello 1.11 require FIGO;
159 : overbeek 1.9 my($sim) = @_;
160 :     my $id = $sim->[1];
161 :     if ($id !~ /^fig\|/) { return undef }
162 :     my $figO = new FIGO;
163 :     return FeatureO->new($figO, $id);
164 :     }
165 :    
166 : parrello 1.8 =head3 iden
167 :    
168 :     C<< my $percent = $sim->iden; >>
169 :    
170 :     Return the percentage identity between the query and database sequences.
171 :    
172 :     =cut
173 :    
174 :     sub iden {
175 :     my ($sim) = @_;
176 : efrank 1.1 return $sim->[2];
177 :     }
178 :    
179 : parrello 1.8 =head3 ali_ln
180 :    
181 :     C<< my $chars = $sim->ali_ln; >>
182 :    
183 :     Return the length (in characters) of the alignment between the two similar sequences.
184 :    
185 :     =cut
186 :    
187 :     sub ali_ln {
188 :     my ($sim) = @_;
189 : efrank 1.1 return $sim->[3];
190 :     }
191 :    
192 : parrello 1.8 =head3 mismatches
193 :    
194 :     C<< my $count = $sim->mismatches; >>
195 :    
196 :     Return the number of alignment positions that do not match.
197 :    
198 :     =cut
199 :    
200 :     sub mismatches {
201 :     my ($sim) = @_;
202 : efrank 1.1 return $sim->[4];
203 :     }
204 :    
205 : parrello 1.8 =head3 gaps
206 :    
207 :     C<< my $count = $sim->gaps; >>
208 :    
209 :     Return the number of gaps required to align the sequences.
210 :    
211 :     =cut
212 :    
213 :     sub gaps {
214 :     my ($sim) = @_;
215 : efrank 1.1 return $sim->[5];
216 :     }
217 :    
218 : parrello 1.8 =head3 b1
219 :    
220 :     C<< my $beginOffset = $sim->b1; >>
221 :    
222 :     Return the position in the query sequence at which the alignment begins.
223 :    
224 :     =cut
225 :    
226 :     sub b1 {
227 :     my ($sim) = @_;
228 : efrank 1.1 return $sim->[6];
229 :     }
230 :    
231 : parrello 1.8 =head3 e1
232 :    
233 :     C<< my $endOffset = $sim->e1; >>
234 :    
235 :     Return the position in the query sequence at which the alignment ends.
236 :    
237 :     =cut
238 :    
239 :     sub e1 {
240 :     my ($sim) = @_;
241 : efrank 1.1 return $sim->[7];
242 :     }
243 :    
244 : parrello 1.8 =head3 b2
245 :    
246 :     C<< my $beginOffset = $sim->b2; >>
247 :    
248 :     Position in the database sequence at which the alignment begins.
249 :    
250 :     =cut
251 :    
252 :     sub b2 {
253 :     my ($sim) = @_;
254 : efrank 1.1 return $sim->[8];
255 :     }
256 :    
257 : parrello 1.8 =head3 e2
258 :    
259 :     C<< my $endOffset = $sim->e2; >>
260 :    
261 :     Return the position in the database sequence at which the alignment ends.
262 :    
263 :     =cut
264 :    
265 :     sub e2 {
266 :     my ($sim) = @_;
267 : efrank 1.1 return $sim->[9];
268 :     }
269 :    
270 : parrello 1.8 =head3 psc
271 :    
272 :     C<< my $score = $sim->psc; >>
273 :    
274 :     Return the similarity score as a floating-point number. The score is the computed
275 :     probability that the similarity is a result of random chance. A score of 0 indicates a
276 :     perfect match. A higher score indicates a less-perfect match. Values of C<1e-10> or
277 :     less are considered good matches.
278 :    
279 :     =cut
280 :    
281 :     sub psc {
282 :     my ($sim) = @_;
283 : overbeek 1.3 return ($sim->[10] =~ /^e-/) ? "1.0" . $sim->[10] : $sim->[10];
284 :     }
285 :    
286 : parrello 1.8 =head3 bsc
287 :    
288 :     C<< my $score = $sim->bsc; >>
289 :    
290 :     Return the bit score for this similarity. The bit score is an estimate of the
291 :     search space required to find the similarity by chance. A higher bit score
292 :     indicates a better match.
293 :    
294 :     =cut
295 :    
296 :     sub bsc {
297 :     my ($sim) = @_;
298 : overbeek 1.3 return $sim->[11];
299 :     }
300 :    
301 : parrello 1.8 =head3 bsc
302 :    
303 :     C<< my $score = $sim->bit_score; >>
304 :    
305 :     Return the bit score for this similarity. The bit score is an estimate of the
306 :     search space required to find the similarity by chance. A higher bit score
307 :     indicates a better match.
308 :    
309 :     =cut
310 :    
311 :     sub bit_score {
312 :     my ($sim) = @_;
313 :     return $sim->bsc;
314 : efrank 1.1 }
315 :    
316 : parrello 1.8 =head3 ln1
317 :    
318 :     C<< my $length = $sim->ln1; >>
319 :    
320 :     Return the number of characters in the query sequence.
321 :    
322 :     =cut
323 :    
324 :     sub ln1 {
325 :     my ($sim) = @_;
326 : overbeek 1.3 return $sim->[12];
327 :     }
328 :    
329 : parrello 1.8 =head3 ln2
330 :    
331 :     C<< my $length = $sim->ln2; >>
332 :    
333 :     Return the length of the database sequence.
334 :    
335 :     =cut
336 :    
337 :     sub ln2 {
338 :     my ($sim) = @_;
339 : overbeek 1.3 return $sim->[13];
340 :     }
341 :    
342 : parrello 1.8 =head3 tool
343 :    
344 :     C<< my $name = $sim->tool; >>
345 :    
346 :     Return the name of the tool used to find this similarity.
347 :    
348 :     =cut
349 :    
350 :     sub tool {
351 :     my ($sim) = @_;
352 : efrank 1.1 return $sim->[14];
353 :     }
354 :    
355 : parrello 1.8 sub def2 {
356 :     my ($sim) = @_;
357 : efrank 1.1 return $sim->[15];
358 :     }
359 :    
360 : parrello 1.8 sub ali {
361 :     my ($sim) = @_;
362 : efrank 1.1 return $sim->[16];
363 :     }
364 :    
365 : parrello 1.8 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3