[Bio] / Sprout / ERDBLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/ERDBLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Sun Aug 14 23:32:08 2005 UTC revision 1.6, Thu Oct 13 14:49:36 2005 UTC
# Line 12  Line 12 
12    
13  =head2 Introduction  =head2 Introduction
14    
15  This object is designed to assist with loading an ERDB data relation. The user  This object is designed to assist with creating the load file for an ERDB
16  constructs the object by specifying an ERDB object and a relation name. This  data relation. The user constructs the object by specifying an ERDB object
17  re-creates the relevant relation. The client then passes in data lines which  and a relation name. This create the load file for the relevant relation. The client
18  are written to a file. When the file gets big enough, it is loaded into the  then passes in data lines which are written to a file, and calls
19  table. Calling the L</Finish> method loads any leftover data and optionally  L</Finish> to close the file and get the statistics.
 creates the index.  
20    
21  This module makes use of the internal ERDB property C<_dbh> and the internal  This module makes use of the internal ERDB method C<_IsPrimary>.
 method C<_IsPrimary>.  
22    
23  =cut  =cut
24    
# Line 66  Line 64 
64      }      }
65      # Determine the name for this relation's load file.      # Determine the name for this relation's load file.
66      my $fileName = "$directory/$relationName.dtx";      my $fileName = "$directory/$relationName.dtx";
67      # Decide whether or not we should specify estimated rows.      # If this is a primary entity relation, sort the output to remove
68      my $rowEstimate = ($FIG_Config::estimate_rows ? $estimatedRows : undef);      # duplicate keys.
69      # Create the target table. If this is a pre-index DBMS, we      my $fileString = ($erdb->IsEntity($relationName) ?
70      # also create the indexes. If the table already exists,                          "| sort +0 -1 -u -t \"\t\" >$fileName" :
71      # it will be dropped.                          ">$fileName");
     $erdb->CreateTable($relationName, $FIG_Config::preIndex, $rowEstimate);  
72      # Open the output file and remember its handle.      # Open the output file and remember its handle.
73      my $fileHandle = Open(undef, ">$fileName");      my $fileHandle = Open(undef, $fileString);
74      # Create the $erload object.      # Create the $erload object.
75      my $retVal = {      my $retVal = {
76                    dbh => $erdb,                    dbh => $erdb,
# Line 85  Line 82 
82                    stats => Stats->new(),                    stats => Stats->new(),
83                    primary => $erdb->_IsPrimary($relationName)                    primary => $erdb->_IsPrimary($relationName)
84                   };                   };
85        Trace("Relation $relationName load file created with primary flag " . $retVal->{primary} . ".")
86            if T(2);
87      # Bless and return it.      # Bless and return it.
88      bless $retVal, $class;      bless $retVal, $class;
89      return $retVal;      return $retVal;
# Line 120  Line 119 
119      }      }
120      # Form a data line from the fields.      # Form a data line from the fields.
121      my $line = join("\t", @fields) . "\n";      my $line = join("\t", @fields) . "\n";
     # Determine how long this will make the load file.  
     my $lineLength = length $line;  
     if ($lineLength > (200000000 - $self->{fileSize})) {  
         # Here it would be too long, so we force a load.  
         $self->Flush();  
     }  
122      # Write the new record to the load file.      # Write the new record to the load file.
123      my $fh = $self->{fh};      my $fh = $self->{fh};
124      print $fh $line;      print $fh $line;
125        # Determine how long this will make the load file.
126        my $lineLength = length $line;
127      # Update the statistics.      # Update the statistics.
128      $self->{fileSize} += $lineLength;      $self->{fileSize} += $lineLength;
129      $self->{lineCount} ++;      $self->{lineCount} ++;
130        $self->Add("lineOut");
131  }  }
132    
133  =head3 Flush  =head3 Add
134    
135    C<< my  = $stats->Add($statName); >>
136    
137    Increment the specified statistic.
138    
139    =over 4
140    
141    =item statName
142    
143  C<< $erload->Flush(); >>  Name of the statistic to increment.
144    
145  Load all the data currently in the load file into the database. This clears the load  =back
 file and re-opens it.  
146    
147  =cut  =cut
148  #: Return Type ;  #: Return Type ;
149  sub Flush {  sub Add {
150      # Get the parameters.      # Get the parameters.
151      my ($self) = @_;      my ($self, $statName) = @_;
152      # Flush the data in the load file.      # Increment the statistic.
153      $self->_FlushData();      $self->{stats}->Add($statName);
     # Re-open the file so it can accept more data.  
     $self->_ReOpen();  
154  }  }
155    
156  =head3 Finish  =head3 Finish
# Line 173  Line 174 
174  sub Finish {  sub Finish {
175      # Get this object instance.      # Get this object instance.
176      my ($self) = @_;      my ($self) = @_;
177      # Flush out the data in the load file.      # Close the load file.
178      $self->_FlushData();      close $self->{fh};
     # If this is a post-index DBMS, create the indexes.  
     if (! $FIG_Config::preIndex) {  
         $self->{dbh}->CreateIndex($self->RelName);  
     }  
     # Delete the load file.  
     unlink $self->{fileName};  
179      # Return the statistics object.      # Return the statistics object.
180      return $self->{stats};      return $self->{stats};
181  }  }
# Line 200  Line 195 
195      return $self->{relName};      return $self->{relName};
196  }  }
197    
 =head2 Internal Methods  
   
 =head3 ReOpen  
   
 Re-open the load file.  
   
 This is an instance method.  
   
 =cut  
   
 sub _ReOpen {  
     # Get this instance.  
     my ($self) = @_;  
     # Open the file with the current filehandle in truncate mode.  
     Open($self->{fh}, ">" . $self->{fileName});  
     # Denote the file is empty.  
     $self->{fileSize} = 0;  
     $self->{lineCount} = 0;  
 }  
   
 =head3 FlushData  
   
 Close the load file and load all its data into the table.  
   
 This is an instance method.  
   
 =cut  
   
 sub _FlushData {  
     # Get this instance.  
     my ($self) = @_;  
     # Get the relation name.  
     my $relName = $self->RelName;  
     Trace("Flushing data to table $relName.") if T(2);  
     # Close the load file.  
     close $self->{fh};  
     # We must use the load file to load the table. First, we get the DBKernel  
     # handle and the statistics object.  
     my $stats = $self->{stats};  
     my $dbh = $self->{dbh}->{_dbh};  
     # Begin a database transaction. This is not actually for integrity reasons; it  
     # speeds up the slow load process.  
     $dbh->begin_tran();  
     # Load the database table safely.  
     my $rv;  
     eval {  
         Trace("Loading file into relation $relName.") if T(3);  
         $rv = $dbh->load_table(file => $self->{fileName}, tbl => $relName);  
     };  
     # Check to see if we succeeded.  
     if (!defined $rv) {  
         # We've failed. Format a useful message. If we have an error message from  
         # EVAL, we use it.  
         my $msg = "Table load failed for $relName" . ($@ ? ": $@" : ".");  
         $stats->AddMessage($msg);  
         Trace($msg) if T(1);  
     } else {  
         # Here we successfully loaded the table. Trace the number of records loaded.  
         my $lineCount = $self->{lineCount};  
         my $byteCount = $self->{fileSize};  
         Trace("$lineCount records ($byteCount bytes) loaded into $relName.") if T(2);  
         # Accumulate the statistics.  
         $stats->Add("records", $lineCount);  
         $stats->Add("bytes", $byteCount);  
     }  
     # Close the database transaction.  
     $dbh->commit_tran();  
 }  
   
198  1;  1;
199    

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.6

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3