[Bio] / Sprout / ERDBLoad.pm Repository:
ViewVC logotype

Annotation of /Sprout/ERDBLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     package ERDBLoad;
4 :    
5 :     use strict;
6 :     use Tracer;
7 :     use PageBuilder;
8 :     use ERDB;
9 :     use Stats;
10 :    
11 :     =head1 ERDB Table Load Utility Object
12 :    
13 :     =head2 Introduction
14 :    
15 : parrello 1.3 This object is designed to assist with creating the load file for an ERDB
16 :     data relation. The user constructs the object by specifying an ERDB object
17 :     and a relation name. This create the load file for the relevant relation. The client
18 :     then passes in data lines which are written to a file, and calls
19 :     L</Finish> to close the file and get the statistics.
20 : parrello 1.1
21 : parrello 1.3 This module makes use of the internal ERDB method C<_IsPrimary>.
22 : parrello 1.1
23 :     =cut
24 :    
25 :     #
26 :    
27 :     =head2 Public Methods
28 :    
29 :     =head3 new
30 :    
31 :     C<< my $erload = ERDBLoad->new($erdb, $relationName, $directory); >>
32 :    
33 :     Begin loading an ERDB relation.
34 :    
35 :     =over 4
36 :    
37 :     =item erdb
38 :    
39 :     ERDB object representing the target database.
40 :    
41 :     =item relationName
42 :    
43 :     Name of the relation being loaded.
44 :    
45 :     =item directory
46 :    
47 :     Name of the directory to use for the load files, WITHOUT a trailing slash.
48 :    
49 : parrello 1.8 =item loadOnly
50 : parrello 1.2
51 : parrello 1.8 TRUE if the data is to be loaded from an existing file, FALSE if a file is
52 :     to be created.
53 : parrello 1.2
54 : parrello 1.1 =back
55 :    
56 :     =cut
57 :    
58 :     sub new {
59 :     # Get the parameters.
60 : parrello 1.8 my ($class, $erdb, $relationName, $directory, $loadOnly) = @_;
61 : parrello 1.1 # Validate the directory name.
62 :     if (! -d $directory) {
63 :     Confess("Load directory \"$directory\" not found.");
64 :     }
65 :     # Determine the name for this relation's load file.
66 :     my $fileName = "$directory/$relationName.dtx";
67 : parrello 1.8 # Declare the file handle variable.
68 :     my $fileHandle;
69 :     # Determine whether or not this is a primary relation.
70 :     my $primary = ($erdb->_IsPrimary($relationName) ? 1 : 0);
71 :     # Check to see if this is a load-only or a generate-and-load.
72 :     if ($loadOnly) {
73 :     Trace("Relation $relationName will be loaded from $fileName.") if T(2);
74 :     $fileHandle = "";
75 :     } else {
76 :     # If this is a primary entity relation, sort the output to remove
77 :     # duplicate keys.
78 :     my $fileString = ($erdb->IsEntity($relationName) ?
79 :     "| sort +0 -1 -u -t \"\t\" >$fileName" :
80 :     ">$fileName");
81 :     # Open the output file and remember its handle.
82 :     $fileHandle = Open(undef, $fileString);
83 :     Trace("Relation $relationName load file created with primary flag $primary.") if T(2);
84 :     }
85 : parrello 1.1 # Create the $erload object.
86 :     my $retVal = {
87 :     dbh => $erdb,
88 :     fh => $fileHandle,
89 :     fileName => $fileName,
90 :     relName => $relationName,
91 :     fileSize => 0,
92 :     lineCount => 0,
93 :     stats => Stats->new(),
94 : parrello 1.8 primary => $primary
95 : parrello 1.1 };
96 :     # Bless and return it.
97 :     bless $retVal, $class;
98 :     return $retVal;
99 :     }
100 :    
101 :     =head3 Put
102 :    
103 :     C<< my = $erload->Put($field1, $field2, ..., $fieldN); >>
104 :    
105 :     Write a line of data to the load file. This may also cause the load file to be closed
106 :     and data read into the table.
107 :    
108 :     =over 4
109 :    
110 :     =item field1, field2, ..., fieldN
111 :    
112 :     List of field values to be put into the data line. The field values must be in the
113 :     order determined shown in the documentation for the table. Internal tabs and
114 :     new-lines will automatically be escaped before the data line is formatted.
115 :    
116 :     =back
117 :    
118 :     =cut
119 :     #: Return Type ;
120 :     sub Put {
121 :     # Get the ERDBLoad instance.
122 :     my $self = shift @_;
123 :     # Run through the list of field values, escaping them.
124 :     my @fields = map { Tracer::Escape($_) } @_;
125 :     # If this is a primary relation, append the new-record field.
126 :     if ($self->{primary}) {
127 :     push @fields, '0';
128 :     }
129 :     # Form a data line from the fields.
130 :     my $line = join("\t", @fields) . "\n";
131 :     # Write the new record to the load file.
132 :     my $fh = $self->{fh};
133 :     print $fh $line;
134 : parrello 1.3 # Determine how long this will make the load file.
135 :     my $lineLength = length $line;
136 : parrello 1.1 # Update the statistics.
137 :     $self->{fileSize} += $lineLength;
138 :     $self->{lineCount} ++;
139 : parrello 1.4 $self->Add("lineOut");
140 :     }
141 :    
142 :     =head3 Add
143 :    
144 :     C<< my = $stats->Add($statName); >>
145 :    
146 :     Increment the specified statistic.
147 :    
148 :     =over 4
149 :    
150 :     =item statName
151 :    
152 :     Name of the statistic to increment.
153 :    
154 :     =back
155 :    
156 :     =cut
157 :     #: Return Type ;
158 :     sub Add {
159 :     # Get the parameters.
160 :     my ($self, $statName) = @_;
161 :     # Increment the statistic.
162 :     $self->{stats}->Add($statName);
163 : parrello 1.1 }
164 :    
165 :     =head3 Finish
166 :    
167 :     C<< my $stats = $erload->Finish(); >>
168 :    
169 :     Finish loading the table. This closes the load file and loads its contents into the database.
170 :     It also creates the indexes if the DBMS uses post-indexing.
171 :    
172 :     =over 4
173 :    
174 :     =item RETURN
175 :    
176 :     Returns a statistics object describing what happened during the load and containing any
177 :     error messages.
178 :    
179 :     =back
180 :    
181 :     =cut
182 :    
183 :     sub Finish {
184 :     # Get this object instance.
185 :     my ($self) = @_;
186 : parrello 1.3 # Close the load file.
187 :     close $self->{fh};
188 : parrello 1.1 # Return the statistics object.
189 :     return $self->{stats};
190 :     }
191 :    
192 :     =head3 RelName
193 :    
194 :     C<< my $name = $erload->RelName; >>
195 :    
196 :     Name of the relation being loaded by this object.
197 :    
198 :     =cut
199 :    
200 :     sub RelName {
201 :     # Get the object instance.
202 :     my ($self) = @_;
203 :     # Return the relation name.
204 :     return $self->{relName};
205 :     }
206 :    
207 :     1;
208 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3