[Bio] / FigKernelPackages / Overlap.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/Overlap.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 : olson 1.3 #
3 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
4 :     # for Interpretations of Genomes. All Rights Reserved.
5 :     #
6 :     # This file is part of the SEED Toolkit.
7 : parrello 1.4 #
8 : olson 1.3 # The SEED Toolkit is free software. You can redistribute
9 :     # it and/or modify it under the terms of the SEED Toolkit
10 : parrello 1.4 # Public License.
11 : olson 1.3 #
12 :     # You should have received a copy of the SEED Toolkit Public License
13 :     # along with this program; if not write to the University of Chicago
14 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15 :     # Genomes at veronika@thefig.info or download a copy from
16 :     # http://www.theseed.org/LICENSE.TXT.
17 :     #
18 :    
19 : parrello 1.1
20 :     package Overlap;
21 :    
22 :     use strict;
23 :     use Tracer;
24 :     use BasicLocation;
25 :    
26 :     =head1 Overlap Descriptor
27 :    
28 :     =head2 Introduction
29 :    
30 :     An overlap descriptor contains information describing two overlapping feature
31 :     segments in a genome. To completely describe an overlap, you need the IDs of the
32 :     overlapping features, the index in each feature's location list of the overlapping
33 :     segments and the locations of the segments themselves. From this information it is
34 :     possible to derive the type of overlap (normal, embedded, convergent, or divergent)
35 :     and the number of overlapping base pairs.
36 :    
37 :     The string representation of an overlap consists of a single letter describing the
38 :     overlap type (B<N>ormal, B<E>mbedded, B<C>onvergent, or B<D>ivergent), a sequence
39 :     of digits indicating the number of overlapping bases, a single colon (C<:>), and
40 :     then the augmented locations separated by a slash. For example, the following string
41 :     describes an overlap between the first segment of feature B<fig|83333.1.peg.1005> and
42 :     the second segment of feature B<fig|83333.1.peg.1004>.
43 :    
44 :     N12:
45 :     NC_000913_1080579+111(fid = fig|83333.1.peg.1005, index = 0)/
46 :     NC_000913_1080677+732(fig = fig|83333.1.peg.1004, index = 1)
47 :    
48 :     This is a single string, even though it's displayed on three lines in order to
49 :     increase readability. The purpose of encapsulating all this data in a single
50 :     string is to make it easy to pass around in web forms. It is expected that the
51 :     normal method of storing overlap information is as an object of this type.
52 :    
53 :     =cut
54 :    
55 :     #: Constructor Overlap->new();
56 :    
57 :     # Table of overlap types.
58 :     my %TypeTable = ( n => 'normal', e => 'embedded', c => 'convergent', d => 'divergent' );
59 :    
60 :     =head2 Public Methods
61 :    
62 :     =head3 new
63 :    
64 : parrello 1.4 my $olap = Overlap->new($loc0, $loc1);
65 : parrello 1.1
66 :     Construct an overlap object from two augmented locations.
67 :    
68 :     =over 4
69 :    
70 :     =item loc0, loc1
71 :    
72 :     Augmented location objects describing the two segments. In addition to the
73 :     location information, each object should contain the feature ID (C<fid>) and
74 :     the index of the segment in the feature's location list (C<index>).
75 :    
76 :     If the locations do not overlap, an undefined value will be returned.
77 :    
78 :     =back
79 :    
80 : parrello 1.4 my $olap = Overlap->new($olapString);
81 : parrello 1.1
82 :     Construct an overlap object from a display string.
83 :    
84 :     =over 4
85 :    
86 :     =item olapString
87 :    
88 :     The string representation of an overlap, consisting of a single letter describing the
89 :     overlap type (B<N>ormal, B<E>mbedded, B<C>onvergent, or B<D>ivergent), a sequence
90 :     of digits indicating the number of overlapping bases, a single colon (C<:>), and
91 :     then the augmented locations separated by a slash.
92 :    
93 :     =back
94 :    
95 :     =cut
96 :    
97 :     sub new {
98 :     # Get the parameters.
99 :     my ($class, @p) = @_;
100 :     # Declare the variables for the object components.
101 :     my ($type, $len, $loc0, $loc1);
102 :     # Declare the return value.
103 :     my $retVal;
104 :     # Check the constructor type.
105 :     if (@p == 1) {
106 :     # Here we're constructing from a string. First, we parse out the pieces.
107 :     my ($loc0String, $loc1String);
108 :     if ($p[0] =~ m![necd](\d+):\s*([^/]+)\s*/\s*(.+)$!i) {
109 :     ($type, $len, $loc0String, $loc1String) = ($1, $2, $3, $4);
110 :     } else {
111 :     Confess("Invalid overlap string \"$p[0]\".");
112 :     }
113 :     # Convert the type character to a type name.
114 :     $type = TypeTable{lc $type};
115 :     # Convert the location strings to locations.
116 :     $loc0 = BasicLocation->new($loc0String);
117 :     $loc1 = BasicLocation->new($loc1String);
118 :     } else {
119 :     # Here we have a pair of augmented locations.
120 :     ($loc0, $loc1) = @p;
121 :     # Determine the type and length of the overlap.
122 :     ($type, $len) = CheckOverlap($loc0, $loc1);
123 :     }
124 :     # If an overlap was found, create and bless the object.
125 :     if ($type) {
126 :     # Get copies of the locations.
127 :     my @locations = (BasicLocation->new($loc0), BasicLocation->new($loc1));
128 :     # Determine which location is the left one and which is the right one. This
129 :     # is very useful for the clients.
130 :     my ($left, $right);
131 :     if ($loc0->Left < $loc1->Left) {
132 :     ($left, $right) = ($locations[0], $locations[1]);
133 :     } else {
134 :     ($left, $right) = ($locations[1], $locations[0]);
135 :     }
136 :     $retVal = {
137 :     _type => $type,
138 :     _len => $len,
139 :     _locs => \@locations,
140 :     _left => $left,
141 :     _right => $right
142 :     };
143 :     bless $retVal, $class;
144 :     }
145 :     # Return the result.
146 :     return $retVal;
147 :     }
148 :    
149 :     =head3 CheckOverlap
150 :    
151 : parrello 1.4 my ($type, $len) = Overlap::CheckOverlap($loc0, $loc1);
152 : parrello 1.1
153 :     Check for an overlap between two locations.
154 :    
155 :     =over 4
156 :    
157 :     =item loc0, loc1
158 :    
159 :     Location objects representing the locations for which overlap information is desired.
160 : parrello 1.2 These may be B<BasicLocation>s or B<FullLocation>s.
161 : parrello 1.1
162 :     =item RETURN
163 :    
164 :     Returns a two-element list. The first element is a string describing the type of overlap--
165 :     C<embedded>, C<normal>, C<convergent>, or C<divergent>. The second element is the number of
166 :     overlapping base pairs. If the locations do not overlap, the first element will be undefined
167 :     and the second will be 0.
168 :    
169 :     =back
170 :    
171 :     =cut
172 :     #: Return Type @;
173 :     sub CheckOverlap {
174 :     # Get the parameters.
175 :     my ($loc0, $loc1) = @_;
176 :     # Declare the return variables.
177 :     my ($type, $len) = (undef, 0);
178 : parrello 1.2 # If these are full locations, get the bounds.
179 :     if ($loc0->isa('FullLocation')) {
180 :     ($loc0, undef, undef) = $loc0->GetBounds();
181 :     }
182 :     if ($loc1->isa('FullLocation')) {
183 :     ($loc1, undef, undef) = $loc1->GetBounds();
184 :     }
185 : parrello 1.1 # Both locations must belong to the same contig.
186 :     if ($loc0->Contig eq $loc1->Contig) {
187 :     # Sort the locations.
188 :     if (BasicLocation::Cmp($loc0, $loc1) > 0) {
189 :     ($loc0, $loc1) = ($loc1, $loc0);
190 :     }
191 :     # There is overlap if the right endpoint of the location 1 is past the
192 :     # left endpoint of location 2. This test is simple because we've sorted
193 :     # the locations by their left endpoint.
194 :     if ($loc0->Right >= $loc1->Left) {
195 :     # Now we check for the different kinds of overlap.
196 :     if ($loc0->Right >= $loc1->Right) {
197 :     # Here the entire second location is inside the first.
198 :     $type = "embedded";
199 :     $len = $loc1->Length;
200 :     } else {
201 :     # Here we have a normal overlap. The overlap extends from the left point
202 :     # of the second location to the right point of the first location.
203 :     $len = $loc0->Right + 1 - $loc0->Left;
204 :     # The overlap type depends on the directions.
205 :     if ($loc0->Dir eq $loc1->Dir) {
206 :     $type = "normal";
207 :     } elsif ($loc0->Dir eq '+') {
208 :     $type = "convergent";
209 :     } else {
210 :     $type = "divergent";
211 :     }
212 :     }
213 :     }
214 :     }
215 :     # Return the result.
216 :     return ($type, $len);
217 :     }
218 :    
219 :     =head3 Type
220 :    
221 : parrello 1.4 my $type = $olap->Type;
222 : parrello 1.1
223 :     Return the type of this overlap.
224 :    
225 :     =cut
226 :     #: Return Type $;
227 :     sub Type {
228 :     # Get this instance.
229 :     my ($self) = @_;
230 :     # Return the overlap type.
231 :     return $self->{_type};
232 :     }
233 :    
234 :     =head3 Length
235 :    
236 : parrello 1.4 my $len = $olap->Length;
237 : parrello 1.1
238 :     Return the number of overlapping base pairs.
239 :    
240 :     =cut
241 :     #: Return Type $;
242 :     sub Length {
243 :     # Get this instance.
244 :     my ($self) = @_;
245 :     # Return the overlap type.
246 :     return $self->{_len};
247 :     }
248 :    
249 :     =head3 Loc
250 :    
251 : parrello 1.4 my $loc = $olap->Loc($name);
252 : parrello 1.1
253 :     Return the named location. The location returned will be a location object augmented with the
254 :     relevant feature ID (C<fid>) and segment index (C<index>).
255 :    
256 :     =over 4
257 :    
258 :     =item name
259 :    
260 :     Name of the desired location. If C<left> is specified, the leftmost of the two overlapping
261 :     locations will be returned. If C<right> is specified, the rightmost of the two overlapping
262 :     locations will be returned. Otherwise, the name is presumed to be an index into the list
263 :     of locations passed into the constructor. C<0> specifies the first such location and C<1>
264 :     the second.
265 :    
266 :     =item RETURN
267 :    
268 :     Returns the named location object.
269 :    
270 :     =back
271 :    
272 :     =cut
273 :     #: Return Type %;
274 :     sub Loc {
275 :     # Get the parameters.
276 :     my ($self, $name) = @_;
277 :     # Declare the return variable.
278 :     my $retVal;
279 :     # Choose the named location.
280 :     if ($name eq 'left') {
281 :     $retVal = $self->{'_left'};
282 :     } elsif ($name eq 'right') {
283 :     $retVal = $self->{'_right'};
284 :     } else {
285 :     $retVal = $self->{'_locs'}->[$name];
286 :     }
287 :     # Return the result.
288 :     return $retVal;
289 :     }
290 :    
291 :     =head3 String
292 :    
293 : parrello 1.4 my $olapString = $olap->String;
294 : parrello 1.1
295 :     Return a string representation of this overlap.
296 :    
297 :     The string returned can be used in the constructor to re-create a copy of the overlap.
298 :    
299 :     =cut
300 :     #: Return Type $;
301 :     sub String {
302 :     # Get this instance.
303 :     my ($self) = @_;
304 :     # Assemble the string representation.
305 :     my $retVal = (uc substr($self->Type,0,1)) . $self->Length . ": ";
306 :     my @locs = map { $_->AugmentString } @{$self->{'_locs'}};
307 :     $retVal .= join " / ", @locs;
308 :     # Return the result.
309 :     return $retVal;
310 :     }
311 :    
312 :     =head3 Matches
313 :    
314 : parrello 1.4 my $flag = Overlap::Matches($olapA, $olapB);
315 : parrello 1.1
316 :     Return TRUE if the two overlaps contain the same data, else FALSE.
317 :    
318 :     =over 4
319 :    
320 :     TODO: items
321 :    
322 :     =back
323 :    
324 :     =cut
325 :     #: Return Type $;
326 :     sub Matches {
327 :     # Get the parameters.
328 :     my ($self, $olapA, $olapB) = @_;
329 :     # Declare the return variable.
330 :     my $retVal;
331 :     # TODO: code
332 :     # Return the result.
333 :     return $retVal;
334 :     }
335 :    
336 :     1;
337 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3