[Bio] / Sprout / SaplingDataLoader.pm Repository:
ViewVC logotype

Annotation of /Sprout/SaplingDataLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     #
4 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
5 :     # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 :    
20 :     package SaplingDataLoader;
21 :    
22 :     use strict;
23 :     use Tracer;
24 :     use Stats;
25 :     use SeedUtils;
26 :     use SAPserver;
27 :     use Sapling;
28 :    
29 :     =head1 Sapling Data Loader
30 :    
31 :     This is the base class for packages that load the Sapling database from
32 :     SEED data files.
33 :    
34 :     =head2 Loader Object Methods
35 :    
36 :     =head3 new
37 :    
38 :     my $loaderObject = SaplingGenomeLoader->new($sap, @stats);
39 :    
40 :     Create a loader object that can be used to facilitate loading Sapling data from a
41 :     directory.
42 :    
43 :     =over 4
44 :    
45 :     =item sap
46 :    
47 :     L<Sapling> object used to access the target database.
48 :    
49 :     =item stats
50 :    
51 :     List of names for statistics to be initialized in the statistics object.
52 :    
53 :     =back
54 :    
55 :     The object created contains the following fields.
56 :    
57 :     =over 4
58 :    
59 :     =item supportRecords
60 :    
61 :     A hash of hashes, used to track the support records known to exist in the database.
62 :    
63 :     =item sap
64 :    
65 :     L<Sapling> object used to access the database.
66 :    
67 :     =item stats
68 :    
69 :     L<Stats> object for tracking statistical information about the load.
70 :    
71 :     =back
72 :    
73 :     =cut
74 :    
75 :     sub new {
76 :     # Get the parameters.
77 :     my ($class, $sap, @stats) = @_;
78 :     # Create the object.
79 :     my $retVal = {
80 :     sap => $sap,
81 :     stats => Stats->new(@stats),
82 :     supportRecords => {}
83 :     };
84 :     # Bless and return it.
85 :     bless $retVal, $class;
86 :     return $retVal;
87 :     }
88 :    
89 :     =head2 Internal Utility Methods
90 :    
91 :     =head3 DeleteRelatedRecords
92 :    
93 :     DeleteRelatedRecords($sap, $genome, $stats, $relName, $entityName);
94 :    
95 :     Delete all the records in the named entity and relationship relating to the
96 :     specified genome and roll up the statistics in the specified statistics object.
97 :    
98 :     =over 4
99 :    
100 :     =item sap
101 :    
102 :     L<Sapling> object for accessing the database.
103 :    
104 :     =item genome
105 :    
106 :     ID of the relevant genome.
107 :    
108 :     =item stats
109 :    
110 :     L<Stats> object for tracking the delete activity.
111 :    
112 :     =item relName
113 :    
114 :     Name of a relationship from the B<Genome> table.
115 :    
116 :     =item entityName
117 :    
118 :     Name of the entity on the other side of the relationship.
119 :    
120 :     =back
121 :    
122 :     =cut
123 :    
124 :     sub DeleteRelatedRecords {
125 :     # Get the parameters.
126 :     my ($sap, $genome, $stats, $relName, $entityName) = @_;
127 :     # Get all the relationship records.
128 :     my (@targets) = $sap->GetFlat($relName, "$relName(from-link) = ?", [$genome],
129 :     "to-link");
130 :     # Loop through the relationship records, deleting them and the target entity
131 :     # records.
132 :     for my $target (@targets) {
133 :     # Delete the relationship instance.
134 :     $sap->DeleteRow($relName, $genome, $target);
135 :     $stats->Add($relName => 1);
136 :     # Delete the entity instance.
137 :     my $subStats = $sap->Delete($entityName, $target);
138 :     # Roll up the statistics.
139 :     $stats->Accumulate($subStats);
140 :     }
141 :     }
142 :    
143 :     =head3 ExtractFields
144 :    
145 :     my %fieldHash = SaplingGenomeLoader::ExtractFields($tableName, $dataHash);
146 :    
147 :     Extract from the incoming hash the field names and values from the specified table.
148 :    
149 :     =over 4
150 :    
151 :     =item tableName
152 :    
153 :     Name of the table whose field names and values are desired.
154 :    
155 :     =item dataHash
156 :    
157 :     Reference to a hash mapping fully-qualified ERDB field names to values.
158 :    
159 :     =item RETURN
160 :    
161 :     Returns a hash containing only the fields from the specified table and their values.
162 :    
163 :     =back
164 :    
165 :     =cut
166 :    
167 :     sub ExtractFields {
168 :     # Get the parameters.
169 :     my ($tableName, $dataHash) = @_;
170 :     # Declare the return variable.
171 :     my %retVal;
172 :     # Extract the desired fields.
173 :     for my $field (keys %$dataHash) {
174 :     # Is this a field for the specified table?
175 :     if ($field =~ /^$tableName\(([^)]+)/) {
176 :     # Yes, put it in the output hash.
177 :     $retVal{$1} = $dataHash->{$field};
178 :     }
179 :     }
180 :     # Return the computed hash.
181 :     return %retVal;
182 :     }
183 :    
184 :     =head3 InsureEntity
185 :    
186 :     my $createdFlag = $loaderObject->InsureEntity($entityType => $id, %fields);
187 :    
188 :     Insure that the specified record exists in the database. If no record is found of the
189 :     specified type with the specified ID, one will be created with the indicated fields.
190 :    
191 :     =over 4
192 :    
193 :     =item $entityType
194 :    
195 :     Type of entity to check.
196 :    
197 :     =item id
198 :    
199 :     ID of the entity instance in question.
200 :    
201 :     =item fields
202 :    
203 :     Hash mapping field names to values for all the fields in the desired entity record except
204 :     for the ID.
205 :    
206 :     =item RETURN
207 :    
208 :     Returns TRUE if a new object was created, FALSE if it already existed.
209 :    
210 :     =back
211 :    
212 :     =cut
213 :    
214 :     sub InsureEntity {
215 :     # Get the parameters.
216 :     my ($self, $entityType, $id, %fields) = @_;
217 :     # Get the database.
218 :     my $sap = $self->{sap};
219 :     # Get the support record ID hash.
220 :     my $supportHash = $self->{supportRecords};
221 :     # Denote we haven't created a new record.
222 :     my $retVal = 0;
223 :     # Get the sub-hash for this entity type.
224 :     my $entityHash = $supportHash->{$entityType};
225 :     if (! defined $entityHash) {
226 :     $entityHash = {};
227 :     $supportHash->{$entityType} = $entityHash;
228 :     }
229 :     # Check for this instance.
230 :     if (! $entityHash->{$id}) {
231 :     # It's not found. Check the database.
232 :     if (! $sap->Exists($entityType => $id)) {
233 :     # It's not in the database either, so create it.
234 :     $sap->InsertObject($entityType, id => $id, %fields);
235 :     $self->{stats}->Add(insertSupport => 1);
236 :     $retVal = 1;
237 :     }
238 :     # Mark the record in the hash so we know we have it.
239 :     $entityHash->{$id} = 1;
240 :     }
241 :     # Return the insertion indicator.
242 :     return $retVal;
243 :     }
244 :    
245 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3