[Bio] / FigKernelPackages / Sim.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/Sim.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.7, Mon Dec 5 19:06:30 2005 UTC revision 1.8, Mon Oct 16 07:16:16 2006 UTC
# Line 15  Line 15 
15  # http://www.theseed.org/LICENSE.TXT.  # http://www.theseed.org/LICENSE.TXT.
16  #  #
17    
18    =head1 Similarity Object
19    
20    =head2 Introduction
21    
22    The similarity object provides access by name to the fields of a similarity
23    list. Unlike a standard object, the similarity object is stored as a list
24    reference, not a hash reference. The similarity fields are pulled from the
25    appropriate places in the list.
26    
27    A blast takes a sequence called the I<query> and matches it against a
28    I<database>. When describing the data in a similarity, we will
29    refer repeatedly to the query sequence and the database sequence. Often,
30    the query and database sequences will be given by peg IDs. In some cases,
31    however, they will be contig IDs. In both cases, the match is represented
32    by an alignment between portions of the sequences. Gap characters may
33    be required to get the alignments to match, and the number of gaps is
34    part of the data in the similarity.
35    
36    =cut
37    
38  package Sim;  package Sim;
 use FIGAttributes;  
 use base 'FIGAttributes';  
39    
40    =head3 as_string
41    
42    C<< my $simString = "$sim"; >>
43    
44    or
45    
46    C<< my $simString = $sim->as_string; >>
47    
48    Return the similarity as a descriptive string, consisting of the query peg,
49    the similar peg, and the match score.
50    
51    =cut
52    
53    use Tracer;
54  use overload '""' => \&as_string;  use overload '""' => \&as_string;
55    
56  sub as_string  sub as_string {
 {  
57      my($obj) = @_;      my($obj) = @_;
   
58      return sprintf("sim:%s->%s:%s", $obj->id1, $obj->id2, $obj->psc);      return sprintf("sim:%s->%s:%s", $obj->id1, $obj->id2, $obj->psc);
59  }  }
60    
61  sub new_from_line  =head3 new_from_line
62  {  
63      my($class, $line) = @_;  C<< my $sim = Sim->new_from_line($line); >>
64    
65    Create a similarity object from a blast output line. The line is presumed to have
66    the complete list of similarity values in it, tab-separated.
67    
68    =over 4
69    
70    =item line
71    
72    Input line, containing the similarity values in it delimited by tabs. A line terminator
73    may be present at the end.
74    
75    =item RETURN
76    
77    Returns a similarity object that allows the values to be accessed by name.
78    
79    =back
80    
81    =cut
82    
83    sub new_from_line {
84        my ($class, $line) = @_;
85      chomp $line;      chomp $line;
86        Trace("Data line for SIM contains: $line") if T(4);
87      my $self = [split(/\t/, $line)];      my $self = [split(/\t/, $line)];
88      return bless $self, $class;      return bless $self, $class;
89  }  }
90    
91  sub validate  =head3 validate
92  {  
93      my($self) = @_;  C<< my $okFlag = $sim->validate(); >>
94    
95    Return TRUE if the similarity values are valid, else FALSE.
96    
97    =cut
98    
99    sub validate {
100        my ($self) = @_;
101      return ($self->id1 ne "" and      return ($self->id1 ne "" and
102              $self->id2 ne "" and              $self->id2 ne "" and
103              $self->iden =~ /^[.\d]+$/ and              $self->iden =~ /^[.\d]+$/ and
# Line 58  Line 114 
114              $self->ln2 =~ /^\d+$/);              $self->ln2 =~ /^\d+$/);
115  }  }
116    
117    =head3 as_line
118    
119  sub as_line  C<< my $line = $sim->as_line; >>
 {  
     my($self) = @_;  
120    
121    Return the similarity as an output line. This is exactly the reverse of
122    L</new_from_line>.
123    
124    =cut
125    
126    sub as_line {
127        my ($self) = @_;
128      return join("\t", @$self) . "\n";      return join("\t", @$self) . "\n";
129  }  }
130    
131    =head3 id1
132    
133    C<< my $id = $sim->id1; >>
134    
135  sub id1 :Scalar {  Return the ID of the query sequence that was blasted against the database.
136    
137    =cut
138    
139    sub id1 {
140      my($sim) = @_;      my($sim) = @_;
141      return $sim->[0];      return $sim->[0];
142  }  }
143    
144  sub id2 :Scalar {  =head3 id2
145    
146    C<< my $id = $sim->id2; >>
147    
148    Return the ID of the sequence in the database that matched the query sequence.
149    
150    =cut
151    
152    sub id2 {
153      my($sim) = @_;      my($sim) = @_;
154      return $sim->[1];      return $sim->[1];
155  }  }
156    
157  sub iden :Scalar {  =head3 iden
158    
159    C<< my $percent = $sim->iden; >>
160    
161    Return the percentage identity between the query and database sequences.
162    
163    =cut
164    
165    sub iden {
166      my($sim) = @_;      my($sim) = @_;
167      return $sim->[2];      return $sim->[2];
168  }  }
169    
170  sub ali_ln :Scalar {  =head3 ali_ln
171    
172    C<< my $chars = $sim->ali_ln; >>
173    
174    Return the length (in characters) of the alignment between the two similar sequences.
175    
176    =cut
177    
178    sub ali_ln {
179      my($sim) = @_;      my($sim) = @_;
180      return $sim->[3];      return $sim->[3];
181  }  }
182    
183  sub mismatches :Scalar {  =head3 mismatches
184    
185    C<< my $count = $sim->mismatches; >>
186    
187    Return the number of alignment positions that do not match.
188    
189    =cut
190    
191    sub mismatches {
192      my($sim) = @_;      my($sim) = @_;
193      return $sim->[4];      return $sim->[4];
194  }  }
195    
196  sub gaps :Scalar {  =head3 gaps
197    
198    C<< my $count = $sim->gaps; >>
199    
200    Return the number of gaps required to align the sequences.
201    
202    =cut
203    
204    sub gaps {
205      my($sim) = @_;      my($sim) = @_;
206      return $sim->[5];      return $sim->[5];
207  }  }
208    
209  sub b1 :Scalar {  =head3 b1
210    
211    C<< my $beginOffset = $sim->b1; >>
212    
213    Return the position in the query sequence at which the alignment begins.
214    
215    =cut
216    
217    sub b1 {
218      my($sim) = @_;      my($sim) = @_;
219      return $sim->[6];      return $sim->[6];
220  }  }
221    
222  sub e1 :Scalar {  =head3 e1
223    
224    C<< my $endOffset = $sim->e1; >>
225    
226    Return the position in the query sequence at which the alignment ends.
227    
228    =cut
229    
230    sub e1 {
231      my($sim) = @_;      my($sim) = @_;
232      return $sim->[7];      return $sim->[7];
233  }  }
234    
235  sub b2 :Scalar {  =head3 b2
236    
237    C<< my $beginOffset = $sim->b2; >>
238    
239    Position in the database sequence at which the alignment begins.
240    
241    =cut
242    
243    sub b2 {
244      my($sim) = @_;      my($sim) = @_;
245      return $sim->[8];      return $sim->[8];
246  }  }
247    
248  sub e2 :Scalar {  =head3 e2
249    
250    C<< my $endOffset = $sim->e2; >>
251    
252    Return the position in the database sequence at which the alignment ends.
253    
254    =cut
255    
256    sub e2 {
257      my($sim) = @_;      my($sim) = @_;
258      return $sim->[9];      return $sim->[9];
259  }  }
260    
261  sub psc :Scalar {  =head3 psc
262    
263    C<< my $score = $sim->psc; >>
264    
265    Return the similarity score as a floating-point number. The score is the computed
266    probability that the similarity is a result of random chance. A score of 0 indicates a
267    perfect match. A higher score indicates a less-perfect match. Values of C<1e-10> or
268    less are considered good matches.
269    
270    =cut
271    
272    sub psc {
273      my($sim) = @_;      my($sim) = @_;
274      return ($sim->[10] =~ /^e-/) ? "1.0" . $sim->[10] : $sim->[10];      return ($sim->[10] =~ /^e-/) ? "1.0" . $sim->[10] : $sim->[10];
275  }  }
276    
277  sub bsc :Scalar {  =head3 bsc
278    
279    C<< my $score = $sim->bsc; >>
280    
281    Return the bit score for this similarity. The bit score is an estimate of the
282    search space required to find the similarity by chance. A higher bit score
283    indicates a better match.
284    
285    =cut
286    
287    sub bsc {
288      my($sim) = @_;      my($sim) = @_;
289      return $sim->[11];      return $sim->[11];
290  }  }
291    
292  sub bit_score :Scalar {  =head3 bsc
293    
294    C<< my $score = $sim->bit_score; >>
295    
296    Return the bit score for this similarity. The bit score is an estimate of the
297    search space required to find the similarity by chance. A higher bit score
298    indicates a better match.
299    
300    =cut
301    
302    sub bit_score {
303      my($sim) = @_;      my($sim) = @_;
304      return $sim->[11];      return $sim->bsc;
305  }  }
306    
307  sub ln1 :Scalar {  =head3 ln1
308    
309    C<< my $length = $sim->ln1; >>
310    
311    Return the number of characters in the query sequence.
312    
313    =cut
314    
315    sub ln1 {
316      my($sim) = @_;      my($sim) = @_;
317      return $sim->[12];      return $sim->[12];
318  }  }
319    
320  sub ln2 :Scalar {  =head3 ln2
321    
322    C<< my $length = $sim->ln2; >>
323    
324    Return the length of the database sequence.
325    
326    =cut
327    
328    sub ln2 {
329      my($sim) = @_;      my($sim) = @_;
330      return $sim->[13];      return $sim->[13];
331  }  }
332    
333  sub tool :Scalar {  =head3 tool
334    
335    C<< my $name = $sim->tool; >>
336    
337    Return the name of the tool used to find this similarity.
338    
339    =cut
340    
341    sub tool {
342      my($sim) = @_;      my($sim) = @_;
343      return $sim->[14];      return $sim->[14];
344  }  }
345    
346  sub def2 :Scalar {  sub def2 {
347      my($sim) = @_;      my($sim) = @_;
348      return $sim->[15];      return $sim->[15];
349  }  }
350    
351  sub ali :Scalar {  sub ali {
352      my($sim) = @_;      my($sim) = @_;
353      return $sim->[16];      return $sim->[16];
354  }  }
355    
356  1  1;

Legend:
Removed from v.1.7  
changed lines
  Added in v.1.8

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3