[Bio] / FigKernelPackages / SeedUtils.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/SeedUtils.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.18, Thu Dec 3 15:34:16 2009 UTC revision 1.19, Fri Dec 4 21:16:58 2009 UTC
# Line 27  Line 27 
27      our @EXPORT = qw(hypo boundaries_of parse_fasta_record create_fasta_record      our @EXPORT = qw(hypo boundaries_of parse_fasta_record create_fasta_record
28                       rev_comp genome_of min max sims verify_dir between translate                       rev_comp genome_of min max sims verify_dir between translate
29                       standard_genetic_code parse_location roles_in_function                       standard_genetic_code parse_location roles_in_function
30                       strip_ec);                       strip_ec location_string location_cmp strand_of);
31    
32  =head1 SEED Utility Methods  =head1 SEED Utility Methods
33    
# Line 145  Line 145 
145    
146  =head3 parse_location  =head3 parse_location
147    
148      my ($contig, $begin, $end) = parse_location($locString);      my ($contig, $begin, $end, $strand) = parse_location($locString);
149    
150  Return the contigID, start offset, and end offset for a specified  Return the contigID, start offset, and end offset for a specified
151  location string (see L<SAP/Location Strings>).  location string (see L<SAP/Location Strings>).
# Line 158  Line 158 
158    
159  =item RETURN  =item RETURN
160    
161  Returns a three-element list containing the contig ID from the location  Returns a four-element list containing the contig ID from the location
162  string, the starting offset of the location, and the ending offset. If  string, the starting offset of the location, the ending offset, and the
163  the location string is not valid, the values returned will be C<undef>.  strand. If the location string is not valid, the values returned will be
164    C<undef>.
165    
166  =back  =back
167    
# Line 170  Line 171 
171      # Get the parameters.      # Get the parameters.
172      my ($locString) = @_;      my ($locString) = @_;
173      # Declare the return variables.      # Declare the return variables.
174      my ($contig, $begin, $end);      my ($contig, $begin, $end, $strand);
175      # Parse the location string.      # Parse the location string.
176      if ($locString =~ /^(.+)_(\d+)([+-])(\d+)$/) {      if ($locString =~ /^(.+)_(\d+)([+-])(\d+)$/) {
177          # Pull out the contig ID and the begin location.          # Pull out the contig ID, strand, and begin location.
178          $contig = $1;          $contig = $1;
179          $begin = $2;          $begin = $2;
180            $strand = $3;
181          # Compute the ending location from the direction and length.          # Compute the ending location from the direction and length.
182          if ($3 eq '+') {          if ($3 eq '+') {
183              $end = $begin + $4 - 1;              $end = $begin + $4 - 1;
# Line 184  Line 186 
186          }          }
187      }      }
188      # Return the results.      # Return the results.
189      return ($contig, $begin, $end);      return ($contig, $begin, $end, $strand);
190    }
191    
192    =head3 location_string
193    
194        my $locString = location_string($contig, $beg, $end);
195    
196    Form a location string for the specified contig that starts at the
197    indicated begin location and stops at the indicated end location. A
198    single-base location will automatically be put on the forward strand.
199    
200    =over 4
201    
202    =item contig
203    
204    ID of the contig to contain this location.
205    
206    =item beg
207    
208    Beginning offset of the location.
209    
210    =item end
211    
212    Ending offset of the location.
213    
214    =item RETURN
215    
216    Returns a location string (see L<SAP/Location String>) for the specified
217    location.
218    
219    =back
220    
221    =cut
222    
223    sub location_string {
224        # Get the parameters.
225        my ($contig, $beg, $end) = @_;
226        # Compute the strand and length.
227        my ($strand, $len);
228        if ($beg <= $end) {
229            $strand = '+';
230            $len = $end + 1 - $beg;
231        } else {
232            $strand = '-';
233            $len = $beg + 1 - $end;
234        }
235        # Return the result.
236        return $contig . "_$beg$strand$len";
237    }
238    
239    
240    =head3 location_cmp
241    
242        my $cmp = location_cmp($loc1, $loc2);
243    
244    Compare two location strings (see L<SAP/Location Strings>).
245    
246    The ordering principle for locations is that they are sorted first by contig ID, then by
247    leftmost position, in reverse order by length, and then by direction. The effect is that
248    within a contig, the locations are ordered first and foremost in the way they would
249    appear when displayed in a picture of the contig and second in such a way that embedded
250    locations come after the locations in which they are embedded. In the case of two
251    locations that represent the exact same base pairs, the forward (C<+>) location is
252    arbitrarily placed first.
253    
254    =over 4
255    
256    =item loc1
257    
258    First location string to compare.
259    
260    =item loc2
261    
262    Second location string to compare.
263    
264    =item RETURN
265    
266    Returns a negative number if the B<loc1> location sorts first, a positive number if the
267    B<loc2> location sorts first, and zero if the two locations are the same.
268    
269    
270    =back
271    
272    =cut
273    
274    sub location_cmp {
275        # Get the parameters.
276        my ($loc1, $loc2) = @_;
277        # Parse the locations.
278        my ($contig1, $beg1, $strand1, $len1) = $loc1 =~ /^(.+)_(\d+)([+-])(\d+)$/;
279        my $left1 = ($strand1 eq '+' ? $beg1 : $beg1 - $len1 + 1);
280        my ($contig2, $beg2, $strand2, $len2) = $loc2 =~ /^(.+)_(\d+)([+-])(\d+)$/;
281        my $left2 = ($strand2 eq '+' ? $beg2 : $beg2 - $len2 + 1);
282        # Declare the return variable. We compare the indicative parts of the location
283        # in order. Note that we sort in reverse order by length, so the last comparison
284        # puts 2 before 1.
285        my $retVal = ($contig1 cmp $contig2) || ($left1 <=> $left2) ||
286                     ($len2 <=> $len1);
287        # If everything matches to this point, check the strands.
288        if (! $retVal) {
289            if ($strand1 eq '+') {
290                # First location is positive, so if the locations are unequal, it
291                # sorts first.
292                $retVal = ($strand2 eq '+' ? 0 : -1);
293            } else {
294                # First location is negative, so if the locations are unequal, it
295                # sorts second.
296                $retVal = ($strand1 eq '-' ? 0 : 1);
297            }
298        }
299        # Return the result.
300        return $retVal;
301    }
302    
303    =head3 strand_of
304    
305        my $plusOrMinus = strand_of($loc);
306    
307    Return the strand (C<+> or C<->) from the specified location string.
308    
309    =over 4
310    
311    =item loc
312    
313    Location string to parse (see L<SAP/Location Strings>).
314    
315    =item RETURN
316    
317    Returns C<+> if the location is on the forward strand, else C<->.
318    
319    =back
320    
321    =cut
322    
323    sub strand_of {
324        # Get the parameters.
325        my ($loc) = @_;
326        # Declare the return variable.
327        my $retVal;
328        # Parse the strand indicator from the location.
329        if ($loc =~ /\d+([+-])\d+/) {
330            $retVal = $1;
331        }
332        # Return the result.
333        return $retVal;
334  }  }
335    
336    

Legend:
Removed from v.1.18  
changed lines
  Added in v.1.19

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3