[Bio] / Sprout / ERDBLoad.pm Repository:
ViewVC logotype

Annotation of /Sprout/ERDBLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     package ERDBLoad;
4 :    
5 :     use strict;
6 :     use Tracer;
7 :     use PageBuilder;
8 :     use ERDB;
9 :     use Stats;
10 :    
11 :     =head1 ERDB Table Load Utility Object
12 :    
13 :     =head2 Introduction
14 :    
15 : parrello 1.3 This object is designed to assist with creating the load file for an ERDB
16 :     data relation. The user constructs the object by specifying an ERDB object
17 :     and a relation name. This create the load file for the relevant relation. The client
18 :     then passes in data lines which are written to a file, and calls
19 :     L</Finish> to close the file and get the statistics.
20 : parrello 1.1
21 : parrello 1.3 This module makes use of the internal ERDB method C<_IsPrimary>.
22 : parrello 1.1
23 :     =cut
24 :    
25 :     #
26 :    
27 :     =head2 Public Methods
28 :    
29 :     =head3 new
30 :    
31 :     C<< my $erload = ERDBLoad->new($erdb, $relationName, $directory); >>
32 :    
33 :     Begin loading an ERDB relation.
34 :    
35 :     =over 4
36 :    
37 :     =item erdb
38 :    
39 :     ERDB object representing the target database.
40 :    
41 :     =item relationName
42 :    
43 :     Name of the relation being loaded.
44 :    
45 :     =item directory
46 :    
47 :     Name of the directory to use for the load files, WITHOUT a trailing slash.
48 :    
49 : parrello 1.2 =item estimatedRows (optional)
50 :    
51 :     Estimated maximum number of table rows. If omitted, the table will be created in
52 :     a format that permits an essentially unlimited number of rows.
53 :    
54 : parrello 1.1 =back
55 :    
56 :     =cut
57 :    
58 :     sub new {
59 :     # Get the parameters.
60 : parrello 1.2 my ($class, $erdb, $relationName, $directory, $estimatedRows) = @_;
61 : parrello 1.1 # Validate the directory name.
62 :     if (! -d $directory) {
63 :     Confess("Load directory \"$directory\" not found.");
64 :     }
65 :     # Determine the name for this relation's load file.
66 :     my $fileName = "$directory/$relationName.dtx";
67 : parrello 1.4 # If this is a primary entity relation, sort the output to remove
68 :     # duplicate keys.
69 :     my $fileString = ($erdb->IsEntity($relationName) ?
70 : parrello 1.6 "| sort +0 -1 -u -t \"\t\" >$fileName" :
71 : parrello 1.4 ">$fileName");
72 : parrello 1.1 # Open the output file and remember its handle.
73 : parrello 1.4 my $fileHandle = Open(undef, $fileString);
74 : parrello 1.1 # Create the $erload object.
75 :     my $retVal = {
76 :     dbh => $erdb,
77 :     fh => $fileHandle,
78 :     fileName => $fileName,
79 :     relName => $relationName,
80 :     fileSize => 0,
81 :     lineCount => 0,
82 :     stats => Stats->new(),
83 : parrello 1.7 primary => ($erdb->_IsPrimary($relationName) ? 1 : 0),
84 : parrello 1.1 };
85 : parrello 1.6 Trace("Relation $relationName load file created with primary flag " . $retVal->{primary} . ".")
86 :     if T(2);
87 : parrello 1.1 # Bless and return it.
88 :     bless $retVal, $class;
89 :     return $retVal;
90 :     }
91 :    
92 :     =head3 Put
93 :    
94 :     C<< my = $erload->Put($field1, $field2, ..., $fieldN); >>
95 :    
96 :     Write a line of data to the load file. This may also cause the load file to be closed
97 :     and data read into the table.
98 :    
99 :     =over 4
100 :    
101 :     =item field1, field2, ..., fieldN
102 :    
103 :     List of field values to be put into the data line. The field values must be in the
104 :     order determined shown in the documentation for the table. Internal tabs and
105 :     new-lines will automatically be escaped before the data line is formatted.
106 :    
107 :     =back
108 :    
109 :     =cut
110 :     #: Return Type ;
111 :     sub Put {
112 :     # Get the ERDBLoad instance.
113 :     my $self = shift @_;
114 :     # Run through the list of field values, escaping them.
115 :     my @fields = map { Tracer::Escape($_) } @_;
116 :     # If this is a primary relation, append the new-record field.
117 :     if ($self->{primary}) {
118 :     push @fields, '0';
119 :     }
120 :     # Form a data line from the fields.
121 :     my $line = join("\t", @fields) . "\n";
122 :     # Write the new record to the load file.
123 :     my $fh = $self->{fh};
124 :     print $fh $line;
125 : parrello 1.3 # Determine how long this will make the load file.
126 :     my $lineLength = length $line;
127 : parrello 1.1 # Update the statistics.
128 :     $self->{fileSize} += $lineLength;
129 :     $self->{lineCount} ++;
130 : parrello 1.4 $self->Add("lineOut");
131 :     }
132 :    
133 :     =head3 Add
134 :    
135 :     C<< my = $stats->Add($statName); >>
136 :    
137 :     Increment the specified statistic.
138 :    
139 :     =over 4
140 :    
141 :     =item statName
142 :    
143 :     Name of the statistic to increment.
144 :    
145 :     =back
146 :    
147 :     =cut
148 :     #: Return Type ;
149 :     sub Add {
150 :     # Get the parameters.
151 :     my ($self, $statName) = @_;
152 :     # Increment the statistic.
153 :     $self->{stats}->Add($statName);
154 : parrello 1.1 }
155 :    
156 :     =head3 Finish
157 :    
158 :     C<< my $stats = $erload->Finish(); >>
159 :    
160 :     Finish loading the table. This closes the load file and loads its contents into the database.
161 :     It also creates the indexes if the DBMS uses post-indexing.
162 :    
163 :     =over 4
164 :    
165 :     =item RETURN
166 :    
167 :     Returns a statistics object describing what happened during the load and containing any
168 :     error messages.
169 :    
170 :     =back
171 :    
172 :     =cut
173 :    
174 :     sub Finish {
175 :     # Get this object instance.
176 :     my ($self) = @_;
177 : parrello 1.3 # Close the load file.
178 :     close $self->{fh};
179 : parrello 1.1 # Return the statistics object.
180 :     return $self->{stats};
181 :     }
182 :    
183 :     =head3 RelName
184 :    
185 :     C<< my $name = $erload->RelName; >>
186 :    
187 :     Name of the relation being loaded by this object.
188 :    
189 :     =cut
190 :    
191 :     sub RelName {
192 :     # Get the object instance.
193 :     my ($self) = @_;
194 :     # Return the relation name.
195 :     return $self->{relName};
196 :     }
197 :    
198 :     1;
199 :    

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3