[Bio] / Sprout / ERDBLoad.pm Repository:
ViewVC logotype

Annotation of /Sprout/ERDBLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.13 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     package ERDBLoad;
4 :    
5 :     use strict;
6 :     use Tracer;
7 :     use PageBuilder;
8 :     use ERDB;
9 :     use Stats;
10 :    
11 :     =head1 ERDB Table Load Utility Object
12 :    
13 :     =head2 Introduction
14 :    
15 : parrello 1.3 This object is designed to assist with creating the load file for an ERDB
16 :     data relation. The user constructs the object by specifying an ERDB object
17 :     and a relation name. This create the load file for the relevant relation. The client
18 :     then passes in data lines which are written to a file, and calls
19 :     L</Finish> to close the file and get the statistics.
20 : parrello 1.1
21 : parrello 1.3 This module makes use of the internal ERDB method C<_IsPrimary>.
22 : parrello 1.1
23 :     =cut
24 :    
25 :     #
26 :    
27 :     =head2 Public Methods
28 :    
29 :     =head3 new
30 :    
31 : parrello 1.12 C<< my $erload = ERDBLoad->new($erdb, $relationName, $directory, $loadOnly, $ignore); >>
32 : parrello 1.1
33 :     Begin loading an ERDB relation.
34 :    
35 :     =over 4
36 :    
37 :     =item erdb
38 :    
39 :     ERDB object representing the target database.
40 :    
41 :     =item relationName
42 :    
43 :     Name of the relation being loaded.
44 :    
45 :     =item directory
46 :    
47 :     Name of the directory to use for the load files, WITHOUT a trailing slash.
48 :    
49 : parrello 1.8 =item loadOnly
50 : parrello 1.2
51 : parrello 1.8 TRUE if the data is to be loaded from an existing file, FALSE if a file is
52 :     to be created.
53 : parrello 1.2
54 : parrello 1.10 =item ignore
55 :    
56 :     TRUE if the data is to be discarded. This is used to save time when only
57 :     a subset of the tables need to be loaded: the data for the ignored tables
58 :     is simply discarded.
59 :    
60 : parrello 1.1 =back
61 :    
62 :     =cut
63 :    
64 :     sub new {
65 :     # Get the parameters.
66 : parrello 1.10 my ($class, $erdb, $relationName, $directory, $loadOnly, $ignore) = @_;
67 : parrello 1.1 # Validate the directory name.
68 :     if (! -d $directory) {
69 :     Confess("Load directory \"$directory\" not found.");
70 :     }
71 :     # Determine the name for this relation's load file.
72 :     my $fileName = "$directory/$relationName.dtx";
73 : parrello 1.8 # Declare the file handle variable.
74 :     my $fileHandle;
75 :     # Determine whether or not this is a primary relation.
76 :     my $primary = ($erdb->_IsPrimary($relationName) ? 1 : 0);
77 : parrello 1.10 # Check to see if this is a load-only, ignore, or a generate-and-load.
78 :     if ($ignore) {
79 :     Trace("Relation $relationName will be ignored.") if T(2);
80 :     $fileHandle = "";
81 :     } elsif ($loadOnly) {
82 : parrello 1.8 Trace("Relation $relationName will be loaded from $fileName.") if T(2);
83 :     $fileHandle = "";
84 :     } else {
85 : parrello 1.13 # If this is a primary entity relation whose key is not a hash string, sort
86 :     # the output to remove duplicate keys.
87 :     my $fileString = ($erdb->SortNeeded($relationName) ?
88 : parrello 1.8 "| sort +0 -1 -u -t \"\t\" >$fileName" :
89 :     ">$fileName");
90 :     # Open the output file and remember its handle.
91 :     $fileHandle = Open(undef, $fileString);
92 :     Trace("Relation $relationName load file created with primary flag $primary.") if T(2);
93 :     }
94 : parrello 1.1 # Create the $erload object.
95 :     my $retVal = {
96 :     dbh => $erdb,
97 :     fh => $fileHandle,
98 :     fileName => $fileName,
99 :     relName => $relationName,
100 :     fileSize => 0,
101 :     lineCount => 0,
102 :     stats => Stats->new(),
103 : parrello 1.10 primary => $primary,
104 :     ignore => ($ignore ? 1 : 0)
105 : parrello 1.1 };
106 :     # Bless and return it.
107 :     bless $retVal, $class;
108 :     return $retVal;
109 :     }
110 :    
111 : parrello 1.10 =head3 Ignore
112 :    
113 : parrello 1.11 C<< my $flag = $erload->Ignore; >>
114 : parrello 1.10
115 :     Return TRUE if we are ignoring this table, else FALSE.
116 :    
117 :     =cut
118 :     #: Return Type $;
119 :     sub Ignore {
120 :     # Get the parameters.
121 :     my ($self) = @_;
122 :     # Return the result.
123 :     return $self->{ignore};
124 :     }
125 :    
126 : parrello 1.1 =head3 Put
127 :    
128 :     C<< my = $erload->Put($field1, $field2, ..., $fieldN); >>
129 :    
130 :     Write a line of data to the load file. This may also cause the load file to be closed
131 :     and data read into the table.
132 :    
133 :     =over 4
134 :    
135 :     =item field1, field2, ..., fieldN
136 :    
137 :     List of field values to be put into the data line. The field values must be in the
138 :     order determined shown in the documentation for the table. Internal tabs and
139 :     new-lines will automatically be escaped before the data line is formatted.
140 :    
141 :     =back
142 :    
143 :     =cut
144 :     #: Return Type ;
145 :     sub Put {
146 : parrello 1.9 # Get the ERDBLoad instance and the field list.
147 :     my ($self, @rawFields) = @_;
148 : parrello 1.10 # Only proceed if we're not ignoring.
149 :     if (! $self->{ignore}) {
150 : parrello 1.13 # Convert the hash-string fields to their digested value.
151 :     $self->{dbh}->DigestFields($self->{relName}, \@rawFields);
152 : parrello 1.10 # Insure the field values are okay.
153 :     my $truncates = $self->{dbh}->VerifyFields($self->{relName}, \@rawFields);
154 :     # Run through the list of field values, escaping them.
155 :     my @fields = map { Tracer::Escape($_) } @rawFields;
156 :     # If this is a primary relation, append the new-record field.
157 :     if ($self->{primary}) {
158 :     push @fields, '0';
159 :     }
160 :     # Form a data line from the fields.
161 :     my $line = join("\t", @fields) . "\n";
162 :     # Write the new record to the load file.
163 :     my $fh = $self->{fh};
164 :     print $fh $line;
165 :     # Determine how long this will make the load file.
166 :     my $lineLength = length $line;
167 :     # Update the statistics.
168 :     $self->{fileSize} += $lineLength;
169 :     $self->{lineCount} ++;
170 :     $self->Add("lineOut");
171 :     if ($truncates > 0) {
172 :     $self->Add("truncated", $truncates);
173 :     }
174 : parrello 1.9 }
175 : parrello 1.4 }
176 :    
177 :     =head3 Add
178 :    
179 : parrello 1.9 C<< my = $stats->Add($statName, $value); >>
180 : parrello 1.4
181 :     Increment the specified statistic.
182 :    
183 :     =over 4
184 :    
185 :     =item statName
186 :    
187 :     Name of the statistic to increment.
188 :    
189 : parrello 1.9 =item value (optional)
190 :    
191 :     Value by which to increment it. If omitted, C<1> is assumed.
192 :    
193 : parrello 1.4 =back
194 :    
195 :     =cut
196 :     #: Return Type ;
197 :     sub Add {
198 :     # Get the parameters.
199 : parrello 1.9 my ($self, $statName, $value) = @_;
200 :     # Fix the value.
201 :     if (! defined $value) {
202 :     $value = 1;
203 :     }
204 : parrello 1.4 # Increment the statistic.
205 : parrello 1.9 $self->{stats}->Add($statName, $value);
206 : parrello 1.1 }
207 :    
208 :     =head3 Finish
209 :    
210 :     C<< my $stats = $erload->Finish(); >>
211 :    
212 :     Finish loading the table. This closes the load file and loads its contents into the database.
213 :     It also creates the indexes if the DBMS uses post-indexing.
214 :    
215 :     =over 4
216 :    
217 :     =item RETURN
218 :    
219 :     Returns a statistics object describing what happened during the load and containing any
220 :     error messages.
221 :    
222 :     =back
223 :    
224 :     =cut
225 :    
226 :     sub Finish {
227 :     # Get this object instance.
228 :     my ($self) = @_;
229 : parrello 1.10 if ($self->{fh}) {
230 :     # Close the load file.
231 :     close $self->{fh};
232 :     }
233 : parrello 1.1 # Return the statistics object.
234 :     return $self->{stats};
235 :     }
236 :    
237 :     =head3 RelName
238 :    
239 :     C<< my $name = $erload->RelName; >>
240 :    
241 :     Name of the relation being loaded by this object.
242 :    
243 :     =cut
244 :    
245 :     sub RelName {
246 :     # Get the object instance.
247 :     my ($self) = @_;
248 :     # Return the relation name.
249 :     return $self->{relName};
250 :     }
251 :    
252 :     1;
253 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3