[Bio] / FigKernelPackages / CorrespondenceCache.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/CorrespondenceCache.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : parrello 1.1 #
2 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :     package CorrespondenceCache;
18 :    
19 :     use strict;
20 :     use Tracer;
21 :     use SeedUtils;
22 :     use ServerThing;
23 :    
24 :     =head1 Genome Correspondence Cache Object
25 :    
26 :     This is a helper object for Sapling Server methods that must manage large
27 :     numbers of gene correspondences. It maintains a hash of genome correspondences
28 :     so that they can be used over and over without recomputation. When the hash
29 :     gets too big, it will be cleared and restarted. Hopefully, that will not be
30 :     an issue.
31 :    
32 :     The object has the following fields.
33 :    
34 :     =over 4
35 :    
36 :     =item map
37 :    
38 :     Reference to a hash keyed on a pair of genome IDs separated by a slash. The
39 :     first genome ID is the source and the second is the target; the value in
40 :     the hash is a sub-hash that contains the gene correspondences from the source
41 :     to the target.
42 :    
43 :     =item count
44 :    
45 :     Number of hashes in the map. When this exceeds the maximum, the hash is
46 :     cleared and we start over.
47 :    
48 :     =back
49 :    
50 :     =cut
51 :    
52 :     # Maximum number of maps to keep in memory.
53 :     use constant MAX_MAPS => 500;
54 :    
55 :     =head2 Special Methods
56 :    
57 :     =head3 new
58 :    
59 :     my $corrCache = CorrespondenceCache->new();
60 :    
61 :     Construct a new, blank correspondence cache.
62 :    
63 :     =cut
64 :    
65 :     sub new {
66 :     # Get the parameters.
67 :     my ($class) = @_;
68 :     # Create the object.
69 :     my $retVal = {
70 :     map => {},
71 :     count => 0
72 :     };
73 :     # Bless and return it.
74 :     bless $retVal, $class;
75 :     return $retVal;
76 :     }
77 :    
78 :    
79 :     =head2 Public Methods
80 :    
81 :     =head3 get_correspondent
82 :    
83 :     my $fid2 = $corrCache->get_correspondent($fid1, $genome2);
84 :    
85 :     Return the FIG ID of the gene in a specified genome that corresponds to the
86 :     specified incoming gene.
87 :    
88 :     =over 4
89 :    
90 :     =item fid1
91 :    
92 :     FIG ID of the gene for which a corresponding gene is desired.
93 :    
94 :     =item genome2
95 :    
96 :     Target genome in which the corresponding gene should be found.
97 :    
98 :     =item RETURN
99 :    
100 :     Returns the FIG ID of the corresponding gene in the target genome, or
101 :     an undefined value if one cannot be found.
102 :    
103 :     =back
104 :    
105 :     =cut
106 :    
107 :     sub get_correspondent {
108 :     # Get the parameters.
109 :     my ($self, $fid1, $genome2) = @_;
110 :     # Declare the return variable. If we don't find a correspondent, it will
111 :     # remain undefined.
112 :     my $retVal;
113 :     # Get the ID of the source gene's genome.
114 :     my $genome1 = genome_of($fid1);
115 :     # Look for a correspondence table.
116 :     my $corrHash = $self->get_correspondence_map($genome1, $genome2);
117 :     # Only continue if we found one.
118 :     if (defined $corrHash) {
119 :     # Get the corresponding gene from the hash.
120 :     $retVal = $corrHash->{$fid1};
121 :     }
122 :     # Return the result.
123 :     return $retVal;
124 :     }
125 :    
126 :     =head3 get_correspondence_map
127 :    
128 :     my $corrHash = $corrCache->get_correspondence_map($genome1, $genome2);
129 :    
130 :     Return the hash mapping genes in the specified source genome
131 :     (I<$genome1>) to corresponding genes in the specified target genome
132 :     (I<$genome2>).
133 :    
134 :     This method will actually build the correspondence in both directions at
135 :     the same time and cache the one that is not requested. If the desired
136 :     correspondence is already cached, it will be returned without preamble.
137 :     If the map is already full, it will be cleared before the new correspondences
138 :     are put in.
139 :    
140 :     =over 4
141 :    
142 :     =item genome1
143 :    
144 :     Source genome for the correspondence map.
145 :    
146 :     =item genome2
147 :    
148 :     Target genome for the correspondence map.
149 :    
150 :     =item RETURN
151 :    
152 :     Returns a reference to a hash that maps genes in the source genome to corresponding genes
153 :     in the target genome, or C<undef> if no correspondence could be created. (This is commonly
154 :     because one of the genomes is incomplete.)
155 :    
156 :     =back
157 :    
158 :     =cut
159 :    
160 :     sub get_correspondence_map {
161 :     # Get the parameters.
162 :     my ($self, $genome1, $genome2) = @_;
163 :     # Check for a map already in the cache.
164 :     my $mapKey = "$genome1/$genome2";
165 :     my $retVal = $self->{map}{$mapKey};
166 :     if (! defined $retVal) {
167 :     # We need to create the map. Insure there's room.
168 :     if ($self->{count} + 2 > MAX_MAPS) {
169 :     Trace("Clearing correspondence cache.") if T(Corr => 2);
170 :     $self->{map} = {};
171 :     $self->{count} = 0;
172 :     }
173 :     Trace("Finding correspondence from $genome1 to $genome2.") if T(Corr => 3);
174 :     # Compute the name of the converse map.
175 :     my $converseKey = "$genome2/$genome1";
176 :     # Get the correspondence data from the source to the target. We insist that both
177 :     # directions be represented so we can cache the converse map at this time.
178 :     my $corrList = ServerThing::GetCorrespondenceData($genome1, $genome2, 0, 1);
179 :     # The maps will go in here.
180 :     my (%map, %converse);
181 :     # Loop through the correspondence data, building the maps.
182 :     for my $listRow (@$corrList) {
183 :     # Get the corresponding genes.
184 :     my ($fid1, $fid2) = @$listRow;
185 :     # Get the directional indicator.
186 :     my $dir = $listRow->[8];
187 :     # Update the maps.
188 :     if ($dir ne '<-') {
189 :     $map{$fid1} = $fid2;
190 :     }
191 :     if ($dir ne '->') {
192 :     $converse{$fid2} = $fid1;
193 :     }
194 :     }
195 :     # Store the maps in the cache.
196 :     $self->{map}{$mapKey} = \%map;
197 :     $self->{map}{$converseKey} = \%converse;
198 :     # Update the map count.
199 :     $self->{count} += 2;
200 :     # Return the main map.
201 :     $retVal = \%map;
202 :     }
203 :     # Return the result.
204 :     return $retVal;
205 :     }
206 :    
207 :    
208 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3