[Bio] / FigKernelPackages / Markups.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/Markups.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     package Markups;
4 :    
5 :     require Exporter;
6 :     @ISA = ('Exporter');
7 :     @EXPORT = qw();
8 :     @EXPORT_OK = qw();
9 :    
10 :     use strict;
11 :     use Tracer;
12 :     use PageBuilder;
13 :    
14 :     =head1 Markup Utilities
15 :    
16 :     =head2 Introduction
17 :    
18 :     The markup utilities provide a mechanism for managing markups to sections of a
19 :     FIG feature. The user specifies a region inside the feature's translation and
20 :     assigns it a label. The labels are used as styles when displaying the translation.
21 :    
22 :     The styles for the labels will be taken from the file C<labels.css> in the
23 :     CSS directory. The full path to the file is
24 :    
25 :     $FIG_Config::fig/CGI/Html/css/labels.css
26 :    
27 :     The styles should be expressed as classes. For example, in the following style file
28 :    
29 :     .lowerGamma { background-color: yellow }
30 :     .upperGamma { background-color: turquoise }
31 :     .supraCore { color: red }
32 :    
33 :     there are three labels defined-- C<lowerGamma>, C<upperGamma>, and C<supraCore>. The
34 :     gamma type determines the background color of the region; a supra-core section changes
35 :     the font color to red. The Markup object must read the style file and determine from it
36 :     which labels are acceptable. Style changes should not alter the base font, only
37 :     decorations, color, style, and weight. The protein translation will be rendered using
38 :     the C<PRE> tag, and changes to the base font will throw off the character spacing.
39 :    
40 :     The Markup object accepts as a parameter a fig-like object that enables it to access the
41 :     data store. This can be a real C<FIG> object or an object that mimics a FIG object but uses
42 :     a different method for accessing the data, such as an C<SFXlate> object.
43 :    
44 :     Markups will be rendered using the HTML C<SPAN> tag. The rules of HTML are very strict, so a
45 :     markup can be wholly inside another markup, but it cannot overlap. So, for example, consider
46 :    
47 :     |--------|
48 :     ABCDEFGHIJKLMNOPQRSTUVWXYZ
49 :     |====|
50 :    
51 :     Here we have G through P with one marking and K through P for another. This is legal because
52 :     the shorter marking is entirely inside the larger one. The following, however, is NOT legal.
53 :    
54 :     |--------|
55 :     ABCDEFGHIJKLMNOPQRSTUVWXYZ
56 :     |========|
57 :    
58 :     Here the second marking extends past the end of the first. To be legal, this would have to
59 :     be reformatted as
60 :    
61 :     |--------|
62 :     ABCDEFGHIJKLMNOPQRSTUVWXYZ
63 :     |====||==|
64 :    
65 :     The second marking is split in two so that it follows the rules.
66 :    
67 :     If this proves to be an onerous restriction, the rendering engine can be made a little
68 :     smarter to account for the possibility of overlap.
69 :    
70 :     =cut
71 :    
72 :     #: Constructor Markups->new();
73 :    
74 :     =head2 Public Methods
75 :    
76 :     =head3 new
77 :    
78 : parrello 1.2 my $$marks = Markups->new($fid, $fig);
79 : parrello 1.1
80 :     Construct a new Markups object for a specified feature.
81 :    
82 :     =over 4
83 :    
84 :     =item fid
85 :    
86 :     ID of the feature being marked up.
87 :    
88 :     =item fig
89 :    
90 :     FIG object used to access the data store.
91 :    
92 :     =back
93 :    
94 :     =cut
95 :    
96 :     sub new {
97 :     # Get the parameters.
98 :     my ($class, $fid, $fig) = @_;
99 :     # Read in the markup data.
100 :     my $markList = $fig->ReadMarkups($fid);
101 :     # Sort it for rendering purposes.
102 :     my @sortedList = sort { Compare($a,$b) } @{$markList};
103 :     # Create the $marks object.
104 :     my $retVal = {
105 :     marks => \@sortedList,
106 :     fig => $fig,
107 :     fid => $fid
108 :     };
109 :     # Bless and return it.
110 :     bless $retVal;
111 :     return $retVal;
112 :     }
113 :    
114 :     =head3 Insert
115 :    
116 : parrello 1.2 my $loc = $marks->Insert($start, $len, $label);
117 : parrello 1.1
118 :     Insert a new entry into the markup list. If an identical entry already exists, this
119 :     method will have no effect.
120 :    
121 :     =over 4
122 :    
123 :     =item start
124 :    
125 :     Offset (1-based) of the first letter in the protein translation to be marked.
126 :    
127 :     =item len
128 :    
129 :     Number of letters in the protein translation to be marked.
130 :    
131 :     =item label
132 :    
133 :     Label for this markup.
134 :    
135 :     =item RETURN
136 :    
137 :     Returns the location in the markup list where the new entry can be found.
138 :    
139 :     =back
140 :    
141 :     =cut
142 :     #: Return Type $;
143 :     sub Insert {
144 :     # Get the parameters.
145 :     my ($self, $start, $len, $label) = @_;
146 :     # Create the new entry.
147 :     my $entry = [$start, $len, $label];
148 :     # Look for it in the markup list.
149 :     my ($retVal, $flag) = _Find($self->{marks}, $entry);
150 :     # If it wasn't found, we have to insert it.
151 :     if (! $flag) {
152 :     splice @{$self->{marks}}, $retVal, 0, $entry;
153 :     }
154 :     # Return the location of the new entry.
155 :     return $retVal;
156 :     }
157 :    
158 :     =head3 Delete
159 :    
160 : parrello 1.2 $marks->Delete($start, $len, $label);
161 : parrello 1.1
162 :     Delete an entry from the markup list. If the entry does not exist, this method will
163 :     have no effect.
164 :    
165 :     =over 4
166 :    
167 :     =item start
168 :    
169 :     Offset (1-based) of the first letter in the protein translation of the markup.
170 :    
171 :     =item len
172 :    
173 :     Number of letters in the protein translation affected by the markup.
174 :    
175 :     =item label
176 :    
177 :     Label of the markup.
178 :    
179 :     =back
180 :    
181 :     =cut
182 :     #: Return Type ;
183 :     sub Delete {
184 :     # Get the parameters.
185 :     my ($self, $start, $len, $label) = @_;
186 :     # Create the new entry.
187 :     my $entry = [$start, $len, $label];
188 :     # Look for it in the markup list.
189 :     my ($loc, $flag) = _Find($self->{marks}, $entry);
190 :     # If it was found, we have to delete it.
191 :     if ($flag) {
192 :     splice @{$self->{marks}}, $loc, 1;
193 :     }
194 :     }
195 :    
196 :     =head3 List
197 :    
198 : parrello 1.2 my @marks = $marks->List();
199 : parrello 1.1
200 :     Return a list of this feature's markups. The value returned is a sorted list
201 :     of 3-tuples. Each 3-tuple consists of the offset to the start of the markup,
202 :     the length of the markup, and the label of the markup. The offset is 1-based
203 :     and the offset and length are both in terms of position in the feature's
204 :     protein translation.
205 :    
206 :     =cut
207 :     #: Return Type @;
208 :     sub List {
209 :     # Get the parameters.
210 :     my ($self) = @_;
211 :     # Return the result.
212 :     return @{$self->{marks}};
213 :     }
214 :    
215 :     =head3 Save
216 :    
217 : parrello 1.2 $marks->Save();
218 : parrello 1.1
219 :     Save this markup list. The markups will be written back to disk or to a database,
220 :     depending on the nature of the incoming access object.
221 :    
222 :     =cut
223 :     #: Return Type ;
224 :     sub Save {
225 :     # Get the parameters.
226 :     my ($self) = @_;
227 :     # Get the FIG-like object.
228 :     my $fig = $self->{fig};
229 :     # Write the markups.
230 :     $fig->WriteMarkups($self->{fid}, $self->{marks});
231 :     }
232 :    
233 :     =head3 Render
234 :    
235 : parrello 1.2 my $proteinHtml = $marks->Render($id, $lineWidth);
236 : parrello 1.1
237 :     Render the feature's protein translation using the specified markups. The translation will
238 :     be converted to HTML, with C<SPAN> tags used to alter the display of the marked-up areas. If
239 :     a line width is specified, then the translation will be broken into fixed-length chunks on
240 :     separate lines. (Some browsers have trouble with long unbroken character strings.)
241 :    
242 :     The basic rendering algorithm works by copying sections of the translation string to the
243 :     return string interrupted by certain events. There are three types of events (1) start of
244 :     a markup, (2) end of a markup, and (3) end of a line. Three separate data structures will
245 :     be used to track the three events. Because we require all markups to be wholly contained
246 :     in other markups, the end-of-markup data structure can be handled using a simple stack. The
247 :     end-of-line structure is simply a number that tells us how much space remains on the current
248 :     line. The start-of-markup structure is the markup list itself, which has been carefully
249 :     maintained in such a way that we can run through it linearly to find the start locations in
250 :     the correct order.
251 :    
252 :     =over 4
253 :    
254 :     =item id (optional)
255 :    
256 :     ID to be assigned to the translation. If this value is specified, the entire translation will
257 :     be wrapped in a C<PRE> element with the specified ID. The ID can be used to find the translation
258 :     in JavaScript code.
259 :    
260 :     =item lineWidth (optional)
261 :    
262 :     Number of characters per line. If this value is specified, the translation will be broken into
263 :     fixed-length chunks.
264 :    
265 :     =item RETURN
266 :    
267 :     Returns an HTML string rendering the marked-up protein translation.
268 :    
269 :     =back
270 :    
271 :     =cut
272 :     #: Return Type $;
273 :     sub Render {
274 :     # Get the parameters.
275 :     my ($self, $id, $lineWidth) = @_;
276 :     my $fig = $self->{fig};
277 :     # Check for an ID. Note that we use a leading space if an ID is present to separate the
278 :     # ID attribute from the PRE tag.
279 :     my $idAttribute = ($id ? " id=\"$id\"" : "");
280 :     # Begin building the string by putting in the PRE tag.
281 :     my $retVal = "<pre$idAttribute>";
282 :     # Get our feature's protein translation.
283 :     my $proteins = $fig->get_translation($self->{fid});
284 :     # Get the translation length. This is used as a sort of plus-infinity in the various
285 :     # loops.
286 :     my $translationLength = length $proteins;
287 :     my $infinity = $translationLength + 1;
288 :     # Determine the chunk size. A new-line will be inserted after every chunk to make the
289 :     # display more manageable.
290 :     my $chunkSize = ($lineWidth ? $lineWidth : $infinity);
291 :     # This next list is the end-of-markup stack. We prime it with a value past the
292 :     # end of the translation string. The first element of a stack entry is the location
293 :     # at which to put the tag. The second element is the tag itself. Most entries will
294 :     # specify a "</span>" tag, but we want "</pre>" for the very last one.
295 :     my @endMarks = ([$infinity, "</pre>"]);
296 :     # Get the markup list and the number of markups.
297 :     my $markList = $self->{marks};
298 :     my $markCount = @{$markList};
299 :     # Get the location of the next chunk break.
300 :     my $chunkBreak = $chunkSize;
301 :     # Now position on the first markup and the first protein.
302 :     my $loc = 0;
303 :     my $markIndex = 0;
304 :     # Loop until we've copied everything, which means loop until we empty the end-mark
305 :     # stack.
306 :     while (@endMarks) {
307 :     # Now we must find the next point where we need to do something. We'll stash the
308 :     # location of the next point and the action we're to take. First, we look
309 :     # for end-of-markup, which is the highest-priority event.
310 :     my ($nextMark, $type) = ($endMarks[$#endMarks]->[0], 'endOfMarkup');
311 :     # Next, look for the end of a chunk. This is lower priority than end-of-markup,
312 :     # but higher priority than start-of-markup.
313 :     if ($chunkBreak < $nextMark) {
314 :     ($nextMark, $type) = ($chunkBreak, 'endOfChunk');
315 :     }
316 :     # Finally, look for the start of a new markup. This is the lowest-priority break.
317 :     # Note we pretend there's an extra mark past the end of the list. This prevents
318 :     # an infinite loop.
319 :     my $nextStartMark = ($markIndex < $markCount ? $markList->[$markIndex]->[0] : $infinity);
320 :     if ($nextStartMark < $nextMark) {
321 :     ($nextMark, $type) = ($nextStartMark, 'startOfMarkup');
322 :     }
323 :     # Insure we don't go past the end of the translation string.
324 :     if ($nextMark > $translationLength) {
325 :     $nextMark = $translationLength;
326 :     }
327 :     # Now grab the string between here and the next mark and put it onto the return
328 :     # string.
329 :     $retVal .= substr $proteins, $loc, $nextMark - $loc;
330 :     # Update our location in the protein translation string.
331 :     $loc = $nextMark;
332 :     # Now we can put in the appropriate character or tag.
333 :     if ($type eq 'endOfMarkup') {
334 :     # Close the SPAN tag to end the markup style.
335 :     $retVal .= $endMarks[$#endMarks]->[1];
336 :     # Pop the mark off the end-of-markup stack.
337 :     pop @endMarks;
338 :     } elsif ($type eq 'endOfChunk') {
339 :     # Put in a new-line.
340 :     $retVal .= "\n";
341 :     # Update the pointer to the start of the next chunk.
342 :     $chunkBreak += $chunkSize;
343 :     } elsif ($type eq 'startOfMarkup') {
344 :     # Put in a SPAN tag activating the markup label.
345 :     my $tag = "<span class=\"$markList->[$markIndex]->[2]\">";
346 :     $retVal .= $tag;
347 :     # Now compute the location at which this markup will end.
348 :     my $endPoint = $loc + $markList->[$markIndex]->[1];
349 :     # Insure it's not past the end of the translation.
350 :     if ($endPoint > $translationLength) {
351 :     $endPoint = $translationLength;
352 :     }
353 :     # Push it onto the end-mark stack.
354 :     push @endMarks, [$endPoint, "</span>"];
355 :     # Move to the next markup in the markup list.
356 :     $markIndex++;
357 :     } else {
358 :     # Here we have an error. The next markup point is not anything we recognize.
359 :     Confess("Unknown marking directive $type when at location $loc in translation for $self->{fid}.");
360 :     }
361 :     }
362 :     # Return the result.
363 :     return $retVal;
364 :     }
365 :    
366 :     =head3 GetLabels
367 :    
368 : parrello 1.2 my @labels = Markups::GetLabels();
369 : parrello 1.1
370 :     Return a list of the valid markup labels. These are computed from reading the appropriate style file.
371 :    
372 :     A markup label is a style class found in the file C<$FIG_Config::fig/CGI/Html/css/labels.css>. This is
373 :     a very dumb parser, and looks for the style by processing lines where the first non-white character is
374 :     a period. Most programs for editting style files enforce that kind of structural restriction, so it is
375 :     not expected to be a problem.
376 :    
377 :     =cut
378 :     #: Return Type @;
379 :     sub GetLabels {
380 :     # Declare the return variable.
381 :     my @retVal = ();
382 :     # Open the style file.
383 :     Open (\*STYLEIN, "<$FIG_Config::fig/CGI/Html/css/labels.css");
384 :     # Loop until we run out of file, saving any labels we find.
385 :     while (my $line = <STYLEIN>) {
386 :     if ($line =~ /^\s*\.(\S+)\s/) {
387 :     push @retVal, $1;
388 :     }
389 :     }
390 :     # Close the style file.
391 :     close STYLEIN;
392 :     # Return the result.
393 :     return @retVal;
394 :     }
395 :    
396 :     =head3 Compare
397 :    
398 : parrello 1.2 my $cmp = Markups::Compare($a, $b);
399 : parrello 1.1
400 :     Compare two markup entries for sorting. Markup entries are sorted by ascending start location
401 :     followed by descending length. This is exactly the ideal order for rendering the markups.
402 :    
403 :     A markup entry is always a reference to a 3-tuple, consisting of the 1-based starting offset,
404 :     the length, and then the label. The starting offset and length are relative to the protein
405 :     translation of the feature.
406 :    
407 :     =over 4
408 :    
409 :     =item a
410 :    
411 :     First markup entry.
412 :    
413 :     =item b
414 :    
415 :     Second markup entry.
416 :    
417 :     =item RETURN
418 :    
419 :     Returns 0 if the markups are identical, a negative number if the first markup entry
420 :     should precede the second, and a positive number if the first markup entry should follow
421 :     the second.
422 :    
423 :     =back
424 :    
425 :     =cut
426 :     #: Return Type $;
427 :     sub Compare {
428 :     # Get the parameters.
429 :     my ($a, $b) = @_;
430 :     # Compare the start positions.
431 :     my $retVal = ($a->[0] <=> $b->[0]);
432 :     # If necessary, compare the lengths. Note the comparison result is inverted because
433 :     # we want longer lengths in front of shorter ones.
434 :     if (! $retVal) {
435 :     $retVal = -($a->[1] <=> $b->[1]);
436 :     }
437 :     # Finally, compare the labels. This is a string comparison.
438 :     if (! $retVal) {
439 :     $retVal = ($a->[2] cmp $b->[2]);
440 :     }
441 :     # Return the result.
442 :     return $retVal;
443 :     }
444 :    
445 :     =head3 Clear
446 :    
447 : parrello 1.2 my = $marks->Clear();
448 : parrello 1.1
449 :     Delete all the markups.
450 :    
451 :     =cut
452 :     #: Return Type ;
453 :     sub Clear {
454 :     # Get the parameters.
455 :     my ($self) = @_;
456 :     # Erase the markup list.
457 :     $self->{marks} = [];
458 :     }
459 :    
460 :     =head2 Internal Utilities
461 :    
462 :     =head3 Find
463 :    
464 : parrello 1.2 my ($loc, $flag) = Markups::_Find($list, $entry);
465 : parrello 1.1
466 :     Find the proper location for a markup entry in a markup list.
467 :    
468 :     =over 4
469 :    
470 :     =item list
471 :    
472 :     Markup list to search. It must be sorted using the L</Compare> function.
473 :    
474 :     =item entry
475 :    
476 :     Reference to a 3-tuple representing the desired markup entry. The first element is the
477 :     offset to the start of the markup, the second is the length, and the third is the
478 :     label.
479 :    
480 :     =item RETURN
481 :    
482 :     Returns a 2-element list. The first element is the location in the markup list at
483 :     which the entry should be placed. The second element is TRUE if the entry was
484 :     found in the markup list and FALSE otherwise.
485 :    
486 :     =back
487 :    
488 :     =cut
489 :     #: Return Type @;
490 :     sub _Find {
491 :     # Get the parameters.
492 :     my ($list, $entry) = @_;
493 :     # Get the length of the markup list.
494 :     my $len = @{$list};
495 :     # Declare the key loop variables.
496 :     my $loc = 0;
497 :     my $cmp = 1;
498 :     # Loop through the list.
499 :     while ($loc < $len && ($cmp = Compare($entry, $list->[$loc])) > 0) { $loc++; }
500 :     # At this point, either $loc is the location where the new entry should be
501 :     # inserted, or the location of an identical entry. The value of $cmp is 0 if
502 :     # an identical entry was found.
503 :     return ($loc, ($cmp == 0));
504 :     }
505 :    
506 :     1;
507 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3