[Bio] / Sprout / FIGRules.pm Repository:
ViewVC logotype

Annotation of /Sprout/FIGRules.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (view) (download) (as text)

1 : parrello 1.1 package FIGRules;
2 :    
3 :     require Exporter;
4 :     @ISA = ('Exporter');
5 :     @EXPORT = qw(NormalizeAlias FigCompare);
6 :     use strict;
7 :    
8 :     =head1 FIG Rules Module
9 :    
10 :     =head2 Introduction
11 :    
12 :     This module contains methods that are shared by both B<FIG.pm> and B<Sprout.pm>.
13 :    
14 :     =cut
15 :    
16 :     #
17 :    
18 :     =head2 Public Methods
19 :    
20 :     =head3 NormalizeAlias
21 :    
22 :     C<< my ($newAlias, $flag) = NormalizeAlias($alias); >>
23 :    
24 :     Convert a feature alias to a normalized form. The incoming alias is examined to determine
25 :     whether it is a FIG feature name, a UNIPROT feature name, or a GenBank feature name. A
26 :     prefix is then applied to convert the alias to the form in which it occurs in the Sprout
27 :     database. The supported feature name styles are as follows.
28 :    
29 :     C<fig|>I<dd..d>C<.>I<dd..d>C<.peg.>I<dd..d> where "I<dd..d>" is a sequence of one or more
30 :     digits, is a FIG feature name.
31 :    
32 :     I<dd..dd> where "I<dd..d>" is a sequence of one or more digits, is a GenBank feature name.
33 :    
34 :     I<XXXXXX> where "I<XXXXXX>" is a sequence of exactly 6 letters and/or digits, is a UNIPROT
35 :     feature name.
36 :    
37 :     =over 4
38 :    
39 :     =item alias
40 :    
41 :     Alias to be converted to its normal form.
42 :    
43 :     =item RETURN
44 :    
45 :     Returns a two-element list. The first element (newAlias) is the normalized alias; the second
46 :     (flag) is 1 if the aliias is a FIG feature name, 0 if it is not. Thus, if the flag value is
47 :     1, the alias will be expected in the B<Feature(id)> field of the Sprout data, and if it is
48 :     0, the alias will be expected in the B<Feature(alias)> field.
49 :    
50 : parrello 1.2 =back
51 :    
52 : parrello 1.1 =cut
53 :    
54 :     sub NormalizeAlias {
55 :     # Get the parameters.
56 :     my ($alias) = @_;
57 :     # Declare the return variables.
58 :     my ($retVal,$flag);
59 :     # Determine the type of alias.
60 :     if ($alias =~ /^fig\|\d+\.\d+\.peg\.\d+$/) {
61 :     # Here we have a FIG feature ID.
62 :     $retVal = $alias;
63 :     $flag = 1;
64 :     } elsif ($alias =~ /^\d+$/) {
65 :     # Here we have a GenBank alias.
66 :     $retVal = "gi|" . $alias;
67 :     $flag = 0;
68 :     } elsif ($alias =~ /^[A-Z0-9]{6}$/) {
69 :     # Here we have a UNIPROT alias.
70 :     $retVal = "uni|" . $alias;
71 :     $flag = 0;
72 :     } else {
73 :     # Here we have an unknown alias type. We assumed that it does not require
74 :     # normalization. (If it does, then additional ELSIF-cases need to be added
75 :     # above.)
76 :     $retVal = $alias;
77 :     $flag = 0;
78 :     }
79 :     # Return the normalized alias and the flag.
80 :     return ($retVal, $flag);
81 :     }
82 :    
83 :     =head3 FIGCompare
84 :    
85 :     C<< my $cmp = FIGCompare($aPeg, $bPeg); >>
86 :    
87 :     Compare two FIG IDs. This method is designed for use in sorting a list of FIG-style
88 :     feature IDs. For example, to sort the list C<@pegs>, you would use.
89 :    
90 :     C<< my @sortedPegs = sort { &FIGCompare($a,$b) } @pegs; >>
91 :    
92 :     =over 4
93 :    
94 :     =item aPeg
95 :    
96 :     First feature ID to compare.
97 :    
98 :     =item bPeg
99 :    
100 :     Second feature ID to compare.
101 :    
102 :     =item RETURN
103 :    
104 :     Returns a negative number if C<aPeg> should sort before C<bPeg>, a positive number if C<aPeg>
105 :     should sort after C<bPeg>, and zero if both should sort to the same place.
106 :    
107 :     =back
108 :    
109 :     =cut
110 :    
111 :     sub FIGCompare {
112 :     # Get the parameters.
113 :     my($aPeg, $bPeg) = @_;
114 :     # Declare the work variables.
115 :     my($g1,$g2,$t1,$t2,$n1,$n2);
116 :     # Declare the return variable.
117 :     my $retVal;
118 :     # The IF-condition parses out the pieces of the IDs. If both IDs are FIG IDs, then
119 :     # the condition will match and we'll do a comparison of the pieces. If either one is
120 :     # not a FIG ID, we'll do a strict string comparison. The FIG ID pieces are,
121 :     # respectively, the Genome ID, the feature type, and the feature index number. These
122 :     # are all dot-delimited, except that the genome ID already has a dot in it.
123 :     if (($aPeg =~ /^fig\|(\d+\.\d+).([^\.]+)\.(\d+)$/) && (($g1,$t1,$n1) = ($1,$2,$3)) &&
124 :     ($bPeg =~ /^fig\|(\d+\.\d+).([^\.]+)\.(\d+)$/) && (($g2,$t2,$n2) = ($1,$2,$3))) {
125 :     $retVal = (($g1 <=> $g2) or ($t1 cmp $t2) or ($n1 <=> $n2));
126 :     } else {
127 :     $retVal = ($a cmp $b);
128 :     }
129 :     # Return the comparison indicator.
130 :     return $retVal;
131 :     }
132 :    
133 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3