[Bio] / FigKernelPackages / BadCall.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/BadCall.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 : olson 1.2 #
3 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
4 :     # for Interpretations of Genomes. All Rights Reserved.
5 :     #
6 :     # This file is part of the SEED Toolkit.
7 :     #
8 :     # The SEED Toolkit is free software. You can redistribute
9 :     # it and/or modify it under the terms of the SEED Toolkit
10 :     # Public License.
11 :     #
12 :     # You should have received a copy of the SEED Toolkit Public License
13 :     # along with this program; if not write to the University of Chicago
14 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15 :     # Genomes at veronika@thefig.info or download a copy from
16 :     # http://www.theseed.org/LICENSE.TXT.
17 :     #
18 :    
19 : parrello 1.1
20 :     package BadCall;
21 :    
22 :     use strict;
23 :     use Tracer;
24 :     use FIG;
25 :     use BasicLocation;
26 :     use Overlap;
27 :    
28 :     =head1 Bad Call Utilities
29 :    
30 :     =head2 Introduction
31 :    
32 :     This module contains utility methods for finding and analyzing bad gene calls. The
33 :     default constructor uses a FIG object; however, any other object that mimics the
34 :     FIG object signatures can be used. By convention, all calls to FIG object methods
35 :     will use the variable name I<$fig>, so that the FIG methods used can be easily
36 :     identified.
37 :    
38 :     Most B<Location> objects manipulated by this package are I<augmented locations>. An
39 :     augmented location contains two additional fields-- the feature ID (C<$loc->{fid}>)
40 :     and the location's index in the feature's location list (C<$loc->{index}). An
41 :     augmented location enables us to relate the location back to the feature of interest.
42 :    
43 :     =cut
44 :    
45 :     #: Constructor BadCall->new();
46 :    
47 :     =head2 Public Methods
48 :    
49 :     =head3 new
50 :    
51 :     C<< my $bc = BadCall->new($figLikeObject); >>
52 :    
53 :     Construct a new, blank BadCall object.
54 :    
55 :     =over 4
56 :    
57 :     =item figLikeObject
58 :    
59 :     An object that mimics the FIG object, which will be used to access genetic information.
60 :     If no parameter is specified, a vanilla FIG object will be used.
61 :    
62 :     =item RETURN
63 :    
64 :     Returns an object that can be used to locate and analyze bad gene calls.
65 :    
66 :     =back
67 :    
68 :     =cut
69 :    
70 :     sub new {
71 :     # Get the parameters.
72 :     my ($class, $figLikeObject) = @_;
73 :     if (! defined $figLikeObject) {
74 :     $figLikeObject = FIG->new();
75 :     }
76 :     # Create the $bc object.
77 :     my $retVal = {
78 :     fig => $figLikeObject
79 :     };
80 :     # Bless and return it.
81 :     bless $retVal, $class;
82 :     return $retVal;
83 :     }
84 :    
85 :     =head3 LocationList
86 :    
87 :     C<< my @locs = $bc->LocationList($genomeID); >>
88 :    
89 :     Return a sorted list of the augmented locations for the features of the specified genome.
90 :    
91 :     =over 4
92 :    
93 :     =item genomeID
94 :    
95 :     ID of the genome whose features should be put into the list.
96 :    
97 :     =item RETURN
98 :    
99 :     Returns a list of augmented locations for all the feature segments on the genome's contigs,
100 :     sorted in the order they appear on the contigs, so that overlapping segments will be next
101 :     to each other.
102 :    
103 :     =back
104 :    
105 :     =cut
106 :     #: Return Type @%;
107 :     sub LocationList {
108 :     # Get the parameters.
109 :     my ($self, $genomeID) = @_;
110 :     my $fig = $self->{fig};
111 :     # Get the genome's features.
112 :     my $featureDataList = $fig->all_features_detailed($genomeID);
113 :     # @featureDataList now contains a list of tuples. Each tuple's first element is a
114 :     # feature ID, and its second element is the feature's location list, comma-separated.
115 :     # We use This information to create a list of augmented locations.
116 :     my @locList = ();
117 :     for my $featureData (@{$featureDataList}) {
118 :     # Get the feature ID and the location strings.
119 :     my $fid = $featureData->[0];
120 :     my @locations = split /\s*,\s*/, $featureData->[1];
121 :     # Loop through the location strings, creating augmented locations.
122 :     for (my $i = 0; $i <= $#locations; $i++) {
123 :     # Create the location object.
124 :     my $loc = BasicLocation->new($locations[$i] . "(fid = $fid, index = $i)");
125 :     # Add it to the list.
126 :     push @locList, $loc;
127 :     }
128 :     }
129 :     # Sort and return the list.
130 :     my @retVal = sort { BasicLocation::Cmp($a, $b) } @locList;
131 :     return @retVal;
132 :     }
133 :    
134 :     =head3 Overlaps
135 :    
136 :     C<< my %overlaps = $bc->Overlaps($genomeID); >>
137 :    
138 :     Find the overlapping features in a genome.
139 :    
140 :     This method processes the sorted segment list for a genome's features amd produces
141 :     a hash describing which features overlap other features. Each feature ID maps to
142 :     a list of overlap objects describing the overlaps. Each overlap will appear
143 :     in two lists of the hash-- one for each participating feature.
144 :    
145 :     =over 4
146 :    
147 :     =item genomeID
148 :    
149 :     ID of the genome whose overlaps are desired.
150 :    
151 :     =item RETURN
152 :    
153 :     Returns a hash of lists, keyed by feature ID. Each list will contain overlap objects
154 :     describing the overlaps involving the specified feature.
155 :    
156 :     =back
157 :    
158 :     =cut
159 :     #: Return Type %@;
160 :     sub Overlaps {
161 :     # Get the parameters.
162 :     my ($self, $genomeID) = @_;
163 :     my $fig = $self->{fig};
164 :     # Get the genome's location list. The location list is sorted in such a way as to
165 :     # facilitate detection of overlaps. Any two overlapping locations will be adjacent
166 :     # to each other.
167 :     my @locList = $self->LocationList($genomeID);
168 :     # Create the return hash.
169 :     my %retVal = ();
170 :     # Now we run through the locations checking for overlaps. If one is found, we add
171 :     # it to the return hash.
172 :     for (my $i = 0; $i < $#locList; $i++) {
173 :     # Get the current location and feature ID.
174 :     my $loc0 = $locList[$i];
175 :     my $fid0 = $loc0->{fid};
176 :     # We now loop through the locations following the current one, stopping at the
177 :     # first which is not an overlap.
178 :     my $done = 0;
179 :     for (my $j = $i + 1; $j <= $#locList && ! $done; $j++) {
180 :     my $loc1 = $locList[$j];
181 :     # Check for overlap. If an overlap exists, the Overlap constructor will
182 :     # return an overlap object; otherwise it will return an undefined value.
183 :     my $olap;
184 :     if ($olap = Overlap->new($loc0, $loc1)) {
185 :     # Here we have an overlap. We put the overlap object in the hash
186 :     # for both of the participating features.
187 :     my $fid1 = $loc1->{fid};
188 :     Tracer::AddToListMap(\%retVal, $fid0, $olap);
189 :     Tracer::AddToListMap(\%retVal, $fid1, $olap);
190 :     } else {
191 :     $done = 1;
192 :     }
193 :     }
194 :     }
195 :     # Return the result.
196 :     return %retVal;
197 :     }
198 :    
199 :     =head3 OverlapStrings
200 :    
201 :     C<< my %overlaps = $bc->OverlapStrings($genomeID); >>
202 :    
203 :     Return a hash of all the overlaps in a genome. Unlike L</Overlaps>, which returns a hash
204 :     of lists of overlap objects, this method returns a hash of lists of strings. This makes it
205 :     easier to format the overlaps for display.
206 :    
207 :     =over 4
208 :    
209 :     =item genomeID
210 :    
211 :     ID of the genome whose overlaps are desired.
212 :    
213 :     =item RETURN
214 :    
215 :     Returns a hash of lists, keyed by feature ID. Each list will contain overlap strings
216 :     describing the overlaps involving the specified feature.
217 :    
218 :     =back
219 :    
220 :     =cut
221 :     #: Return Type %@;
222 :     sub OverlapStrings {
223 :     # Get the parameters.
224 :     my ($self, $genomeID) = @_;
225 :     my $fig = $self->{fig};
226 :     # Get the hash of overlaps.
227 :     my %overlaps = $self->Overlaps($genomeID);
228 :     # Create the return hash.
229 :     my %retVal = ();
230 :     # Loop through the overlap hash.
231 :     for my $fid (keys %overlaps) {
232 :     for my $olap (@{$overlaps{$fid}}) {
233 :     my $olapString = $olap->String;
234 :     Tracer::AddToListMap(\%retVal, $fid, $olapString);
235 :     }
236 :     }
237 :     # Return the result.
238 :     return %retVal;
239 :     }
240 :    
241 :     1;
242 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3