[Bio] / FigKernelPackages / Overlap.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/Overlap.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     package Overlap;
4 :    
5 :     use strict;
6 :     use Tracer;
7 :     use BasicLocation;
8 :    
9 :     =head1 Overlap Descriptor
10 :    
11 :     =head2 Introduction
12 :    
13 :     An overlap descriptor contains information describing two overlapping feature
14 :     segments in a genome. To completely describe an overlap, you need the IDs of the
15 :     overlapping features, the index in each feature's location list of the overlapping
16 :     segments and the locations of the segments themselves. From this information it is
17 :     possible to derive the type of overlap (normal, embedded, convergent, or divergent)
18 :     and the number of overlapping base pairs.
19 :    
20 :     The string representation of an overlap consists of a single letter describing the
21 :     overlap type (B<N>ormal, B<E>mbedded, B<C>onvergent, or B<D>ivergent), a sequence
22 :     of digits indicating the number of overlapping bases, a single colon (C<:>), and
23 :     then the augmented locations separated by a slash. For example, the following string
24 :     describes an overlap between the first segment of feature B<fig|83333.1.peg.1005> and
25 :     the second segment of feature B<fig|83333.1.peg.1004>.
26 :    
27 :     N12:
28 :     NC_000913_1080579+111(fid = fig|83333.1.peg.1005, index = 0)/
29 :     NC_000913_1080677+732(fig = fig|83333.1.peg.1004, index = 1)
30 :    
31 :     This is a single string, even though it's displayed on three lines in order to
32 :     increase readability. The purpose of encapsulating all this data in a single
33 :     string is to make it easy to pass around in web forms. It is expected that the
34 :     normal method of storing overlap information is as an object of this type.
35 :    
36 :     =cut
37 :    
38 :     #: Constructor Overlap->new();
39 :    
40 :     # Table of overlap types.
41 :     my %TypeTable = ( n => 'normal', e => 'embedded', c => 'convergent', d => 'divergent' );
42 :    
43 :     =head2 Public Methods
44 :    
45 :     =head3 new
46 :    
47 :     C<< my $olap = Overlap->new($loc0, $loc1); >>
48 :    
49 :     Construct an overlap object from two augmented locations.
50 :    
51 :     =over 4
52 :    
53 :     =item loc0, loc1
54 :    
55 :     Augmented location objects describing the two segments. In addition to the
56 :     location information, each object should contain the feature ID (C<fid>) and
57 :     the index of the segment in the feature's location list (C<index>).
58 :    
59 :     If the locations do not overlap, an undefined value will be returned.
60 :    
61 :     =back
62 :    
63 :     C<< my $olap = Overlap->new($olapString); >>
64 :    
65 :     Construct an overlap object from a display string.
66 :    
67 :     =over 4
68 :    
69 :     =item olapString
70 :    
71 :     The string representation of an overlap, consisting of a single letter describing the
72 :     overlap type (B<N>ormal, B<E>mbedded, B<C>onvergent, or B<D>ivergent), a sequence
73 :     of digits indicating the number of overlapping bases, a single colon (C<:>), and
74 :     then the augmented locations separated by a slash.
75 :    
76 :     =back
77 :    
78 :     =cut
79 :    
80 :     sub new {
81 :     # Get the parameters.
82 :     my ($class, @p) = @_;
83 :     # Declare the variables for the object components.
84 :     my ($type, $len, $loc0, $loc1);
85 :     # Declare the return value.
86 :     my $retVal;
87 :     # Check the constructor type.
88 :     if (@p == 1) {
89 :     # Here we're constructing from a string. First, we parse out the pieces.
90 :     my ($loc0String, $loc1String);
91 :     if ($p[0] =~ m![necd](\d+):\s*([^/]+)\s*/\s*(.+)$!i) {
92 :     ($type, $len, $loc0String, $loc1String) = ($1, $2, $3, $4);
93 :     } else {
94 :     Confess("Invalid overlap string \"$p[0]\".");
95 :     }
96 :     # Convert the type character to a type name.
97 :     $type = TypeTable{lc $type};
98 :     # Convert the location strings to locations.
99 :     $loc0 = BasicLocation->new($loc0String);
100 :     $loc1 = BasicLocation->new($loc1String);
101 :     } else {
102 :     # Here we have a pair of augmented locations.
103 :     ($loc0, $loc1) = @p;
104 :     # Determine the type and length of the overlap.
105 :     ($type, $len) = CheckOverlap($loc0, $loc1);
106 :     }
107 :     # If an overlap was found, create and bless the object.
108 :     if ($type) {
109 :     # Get copies of the locations.
110 :     my @locations = (BasicLocation->new($loc0), BasicLocation->new($loc1));
111 :     # Determine which location is the left one and which is the right one. This
112 :     # is very useful for the clients.
113 :     my ($left, $right);
114 :     if ($loc0->Left < $loc1->Left) {
115 :     ($left, $right) = ($locations[0], $locations[1]);
116 :     } else {
117 :     ($left, $right) = ($locations[1], $locations[0]);
118 :     }
119 :     $retVal = {
120 :     _type => $type,
121 :     _len => $len,
122 :     _locs => \@locations,
123 :     _left => $left,
124 :     _right => $right
125 :     };
126 :     bless $retVal, $class;
127 :     }
128 :     # Return the result.
129 :     return $retVal;
130 :     }
131 :    
132 :     =head3 CheckOverlap
133 :    
134 :     C<< my ($type, $len) = Overlap::CheckOverlap($loc0, $loc1); >>
135 :    
136 :     Check for an overlap between two locations.
137 :    
138 :     =over 4
139 :    
140 :     =item loc0, loc1
141 :    
142 :     Location objects representing the locations for which overlap information is desired.
143 :    
144 :     =item RETURN
145 :    
146 :     Returns a two-element list. The first element is a string describing the type of overlap--
147 :     C<embedded>, C<normal>, C<convergent>, or C<divergent>. The second element is the number of
148 :     overlapping base pairs. If the locations do not overlap, the first element will be undefined
149 :     and the second will be 0.
150 :    
151 :     =back
152 :    
153 :     =cut
154 :     #: Return Type @;
155 :     sub CheckOverlap {
156 :     # Get the parameters.
157 :     my ($loc0, $loc1) = @_;
158 :     # Declare the return variables.
159 :     my ($type, $len) = (undef, 0);
160 :     # Both locations must belong to the same contig.
161 :     if ($loc0->Contig eq $loc1->Contig) {
162 :     # Sort the locations.
163 :     if (BasicLocation::Cmp($loc0, $loc1) > 0) {
164 :     ($loc0, $loc1) = ($loc1, $loc0);
165 :     }
166 :     # There is overlap if the right endpoint of the location 1 is past the
167 :     # left endpoint of location 2. This test is simple because we've sorted
168 :     # the locations by their left endpoint.
169 :     if ($loc0->Right >= $loc1->Left) {
170 :     # Now we check for the different kinds of overlap.
171 :     if ($loc0->Right >= $loc1->Right) {
172 :     # Here the entire second location is inside the first.
173 :     $type = "embedded";
174 :     $len = $loc1->Length;
175 :     } else {
176 :     # Here we have a normal overlap. The overlap extends from the left point
177 :     # of the second location to the right point of the first location.
178 :     $len = $loc0->Right + 1 - $loc0->Left;
179 :     # The overlap type depends on the directions.
180 :     if ($loc0->Dir eq $loc1->Dir) {
181 :     $type = "normal";
182 :     } elsif ($loc0->Dir eq '+') {
183 :     $type = "convergent";
184 :     } else {
185 :     $type = "divergent";
186 :     }
187 :     }
188 :     }
189 :     }
190 :     # Return the result.
191 :     return ($type, $len);
192 :     }
193 :    
194 :     =head3 Type
195 :    
196 :     C<< my $type = $olap->Type; >>
197 :    
198 :     Return the type of this overlap.
199 :    
200 :     =cut
201 :     #: Return Type $;
202 :     sub Type {
203 :     # Get this instance.
204 :     my ($self) = @_;
205 :     # Return the overlap type.
206 :     return $self->{_type};
207 :     }
208 :    
209 :     =head3 Length
210 :    
211 :     C<< my $len = $olap->Length; >>
212 :    
213 :     Return the number of overlapping base pairs.
214 :    
215 :     =cut
216 :     #: Return Type $;
217 :     sub Length {
218 :     # Get this instance.
219 :     my ($self) = @_;
220 :     # Return the overlap type.
221 :     return $self->{_len};
222 :     }
223 :    
224 :     =head3 Loc
225 :    
226 :     C<< my $loc = $olap->Loc($name); >>
227 :    
228 :     Return the named location. The location returned will be a location object augmented with the
229 :     relevant feature ID (C<fid>) and segment index (C<index>).
230 :    
231 :     =over 4
232 :    
233 :     =item name
234 :    
235 :     Name of the desired location. If C<left> is specified, the leftmost of the two overlapping
236 :     locations will be returned. If C<right> is specified, the rightmost of the two overlapping
237 :     locations will be returned. Otherwise, the name is presumed to be an index into the list
238 :     of locations passed into the constructor. C<0> specifies the first such location and C<1>
239 :     the second.
240 :    
241 :     =item RETURN
242 :    
243 :     Returns the named location object.
244 :    
245 :     =back
246 :    
247 :     =cut
248 :     #: Return Type %;
249 :     sub Loc {
250 :     # Get the parameters.
251 :     my ($self, $name) = @_;
252 :     # Declare the return variable.
253 :     my $retVal;
254 :     # Choose the named location.
255 :     if ($name eq 'left') {
256 :     $retVal = $self->{'_left'};
257 :     } elsif ($name eq 'right') {
258 :     $retVal = $self->{'_right'};
259 :     } else {
260 :     $retVal = $self->{'_locs'}->[$name];
261 :     }
262 :     # Return the result.
263 :     return $retVal;
264 :     }
265 :    
266 :     =head3 String
267 :    
268 :     C<< my $olapString = $olap->String; >>
269 :    
270 :     Return a string representation of this overlap.
271 :    
272 :     The string returned can be used in the constructor to re-create a copy of the overlap.
273 :    
274 :     =cut
275 :     #: Return Type $;
276 :     sub String {
277 :     # Get this instance.
278 :     my ($self) = @_;
279 :     # Assemble the string representation.
280 :     my $retVal = (uc substr($self->Type,0,1)) . $self->Length . ": ";
281 :     my @locs = map { $_->AugmentString } @{$self->{'_locs'}};
282 :     $retVal .= join " / ", @locs;
283 :     # Return the result.
284 :     return $retVal;
285 :     }
286 :    
287 :     =head3 Matches
288 :    
289 :     C<< my $flag = Overlap::Matches($olapA, $olapB); >>
290 :    
291 :     Return TRUE if the two overlaps contain the same data, else FALSE.
292 :    
293 :     =over 4
294 :    
295 :     TODO: items
296 :    
297 :     =back
298 :    
299 :     =cut
300 :     #: Return Type $;
301 :     sub Matches {
302 :     # Get the parameters.
303 :     my ($self, $olapA, $olapB) = @_;
304 :     # Declare the return variable.
305 :     my $retVal;
306 :     # TODO: code
307 :     # Return the result.
308 :     return $retVal;
309 :     }
310 :    
311 :     1;
312 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3