[Bio] / Sprout / SimLoad.pl Repository:
ViewVC logotype

Diff of /Sprout/SimLoad.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Wed Jul 27 20:05:58 2005 UTC revision 1.11, Thu Dec 6 14:58:03 2007 UTC
# Line 4  Line 4 
4  use CGI;  use CGI;
5  use Tracer;  use Tracer;
6  use Genome;  use Genome;
7    use ERDBLoad;
8    use DBKernel;
9  use SimBlocks;  use SimBlocks;
10  use File::Path;  use File::Path;
11  use BasicLocation;  use BasicLocation;
12    use Cwd;
13    
14  =head1 Similarity Block Loader  =head1 Similarity Block Loader
15    
16  This script loads the similarity block database from  This script loads the similarity block database from
17  the input files.  the input files. The load process involves two steps:
18    converting the input files into C<dtx> load files
19  The script takes a single parameter-- the directory name.  (B<generate>), and loading the C<dtx> files into the
20  The default directory name is taken from the config file  database (B<load>).
21  parameter C<$fig_config::SimBlastData>. The output files  
22  will be produced in the similarity block data directory  The script takes a single parameter-- a directory name.
23  C<$fig_config::SimBlockData>, which will be created if  The default directory name is C<"$FIG_Config::data/CloseStrainSets">.
24  it does not exist.  The input files should be in subdirectories called
25    C<Block> under the subdirectories of the input directory.
26    The subdirectory names themselves are considered the name
27    of the close-strain set. So, for example
28    
29        Data/CloseStrainSets/Vibrio/Block
30    
31    would be presumed to contain the Vibrio strains.
32    
33    The output files will be produced in the similarity block
34    data directory C<$fig_config::SimBlockData>, which will be
35    created if it does not exist. The input directory and all its
36    subdirectories will be processed for input files.
37    
38  In addition to the directory name, the following  In addition to the directory name, the following
39  option parameters are supported.  option parameters are supported.
# Line 28  Line 43 
43  =item trace  =item trace
44    
45  Trace level for output messages. A higher number means more  Trace level for output messages. A higher number means more
46  messages. The default is C<1>.  messages. The default is C<3>. Trace messages are sent to
47    the file C<trace.log> in the B<$FIG_Config::tmp>
48    directory.
49    
50  =item logFile  =item sql
51    
52  Name of a file to be used for trace messages, or  If specified, SQL activity will be traced at the specified
53  C<*> to send them to the standard output. If a  trace level.
 file is specified, the output directory must  
 already exist. The default is C<*>.  
54    
55  =item load  =item load
56    
# Line 47  Line 62 
62  C<yes> to generate output files from input files, else  C<yes> to generate output files from input files, else
63  C<no>. The default is C<yes>.  C<no>. The default is C<yes>.
64    
65    =item createDB
66    
67    If specified, the database will be dropped and
68    re-created before loading.
69    
70  =back  =back
71    
72  For example, the following command line will process the  For example, the following command line will process the
73  C<Fragment.sort> file in the C</Users/fig/BlastData> directory and  input files in the C</Users/fig/BlastData> directory tree
74  run at a trace level of 3.  and run at a trace level of 3.
75    
76  C<< SimLoad /Users/fig/BlastData -trace=3 >>      SimLoad -trace=3 /Users/fig/BlastData
77    
78  Please note that spaces in the file and directory names must  The following command line converts the input files in
79  be escaped as C<\b> to prevent them from being parsed as  the default directory into load files but does not load
80  argument delimiters.  the database and runs at a trace level of 2.
81    
82        SimLoad -load=no
83    
84  =head2 Input Directory  =head2 Input Directory
85    
86  The following files must exist in the input directory.  The following files must exist in each C<Block> directory
87    under the input directory.
88    
89  =over 4  =over 4
90    
91  =item Genome.tbl  =item genome.tbl
92    
93  This is a tab-delimited file that contains the ID of each  This is a tab-delimited file that contains the ID of each
94  genome followed by a description string.  genome followed by a description string.
95    
96  =item Block.tbl, InterGenic_Block.tbl  =item block.tbl, intergenic_block.tbl
97    
98  These are tab-delimited files that associate a gene name  These are tab-delimited files that associate a gene name
99  with each block. The InterGenic file is optional.  with each block. The InterGenic file is optional.
100    
101  =item Region.tbl, InterGenic_Region.tbl  =item region.tbl, intergenic_region.tbl
102    
103  These are tab-delimited files that describe each region  These are tab-delimited files that describe each region
104  of a block. The InterGenic file is optional.  of a block. The InterGenic file is optional.
# Line 84  Line 107 
107    
108  The format of each file is given below.  The format of each file is given below.
109    
110  =head3 Genome.tbl  =head3 genome.tbl
111    
112  The Genome file is copied almost unmodified to the  The Genome file is copied almost unmodified to the
113  load file for the B<Genome> entity. Each record  load file for the B<Genome> entity. Each record
# Line 102  Line 125 
125  A text description of the genome (usually the species name with  A text description of the genome (usually the species name with
126  a strain ID).  a strain ID).
127    
128    =item groupName
129    
130    The name of the group to which the genome belongs.
131    
132  =back  =back
133    
134  =head3 Block.tbl, InterGenic_Block.tbl  =head3 block.tbl, intergenic_block.tbl
135    
136  These files produce most of the data found in the B<GroupBlock>  These files produce most of the data found in the B<GroupBlock>
137  entity. Each record represents a single block. Blocks either  entity. Each record represents a single block. Blocks either
# Line 127  Line 154 
154    
155  =back  =back
156    
157  =head3 Region.tbl, InterGenic_Region.tbl  =head3 region.tbl, intergenic_region.tbl
158    
159  These files describe the regions in each block. They are  These files describe the regions in each block. They are
160  used to derive the relationships between genomes and  used to derive the relationships between genomes and
# Line 278  Line 305 
305  my $TRAILER = 999999999;  my $TRAILER = 999999999;
306    
307  # Parse the command line.  # Parse the command line.
308  my ($options, @arguments) = Tracer::ParseCommand({ trace => 1,  my ($options, @arguments) = StandardSetup(['SimBlocks'],
309                                                    logFile => "*",                                                  { load => ['yes', "'no' to suppress loading the database"],
310                                                    load => 'yes',                                                    generate => ['yes', "'no' to suppress generating the load files"],
311                                                    generate => 'yes'},                                                    createDB => [0, 'drop and create the database before loading']
312                                                    },
313                                                    "directoryName",
314                                                   @ARGV);                                                   @ARGV);
315  # Extract the directory name from the argument array.  # Extract the directory name from the argument array.
316  my $inDirectory = $FIG_Config::simBlastData;  my $inDirectoryTree = ($arguments[0] ? Cwd::abs_path($arguments[0]) : "$FIG_Config::data/CloseStrainSets");
317  if ($arguments[0]) {  # Check to see if we need to create the database.
318      $inDirectory = Cwd::abs_path($arguments[0]);  if ($options->{createDB}) {
319  }      my $dbname = SimBlocks::DBName();
320  # Set up tracing.      Trace("Creating database $dbname.") if T(2);
321  my $traceLevel = $options->{trace};      DBKernel::CreateDB($dbname);
322  my $traceOption = $options->{logFile};  }
 my $TracingToFile = ($traceOption ne "*");  
 my $traceDestination = ($TracingToFile ? ">$traceOption" : "TEXT");  
 TSetup("$traceLevel Tracer", $traceDestination);  
323  # Get the output directory.  # Get the output directory.
324  my $outDirectory = $FIG_Config::simBlocksData;  my $outDirectory = $FIG_Config::simBlocksData;
325    # Default it if it isn't present.
326    if (! $outDirectory) {
327        $outDirectory = "$FIG_Config::fig/BlockData";
328    }
329  # Insure that it exists.  # Insure that it exists.
330  if (! -d $outDirectory) {  if (! -d $outDirectory) {
331      Output("Creating output directory $outDirectory.");      Trace("Creating output directory $outDirectory.") if T(2);
332      mkpath($outDirectory);      mkpath($outDirectory);
333    } elsif ($options->{generate} eq 'yes') {
334        # Here we have an output directory already and are going to generate new
335        # load files. Clear any leftover data from previous runs.
336        my @files = grep { $_ =~ /.dtx$/ } Tracer::OpenDir($outDirectory);
337        my $numFiles = @files;
338        if ($numFiles > 0) {
339            Trace("Deleting $numFiles old dtx files from $outDirectory.") if T(2);
340            unlink map { "$outDirectory/$_" } @files;
341        }
342  }  }
343  # Create an error counter.  # Create an error counter and a directory counter.
344  my $errorCount = 0;  my $errorCount = 0;
345    my $dirCount = 0;
346    # Get a similarity block object. Note that we've waited until the database
347    # was already created before doing this. The tables, however, may not yet
348    # exist, so we can't do very much with it.
349    my $simBlocks = SimBlocks->new();
350    # Determine if this is a load-only run.
351    my $loadOnly = ($options->{generate} eq 'no');
352    # Create the loader objects for the tables. These are global to the whole
353    # script.
354    my $GenomeLoad = ERDBLoad->new($simBlocks, 'Genome', $outDirectory, $loadOnly);
355    my $ContigLoad = ERDBLoad->new($simBlocks, 'Contig', $outDirectory, $loadOnly);
356    my $GroupBlockLoad = ERDBLoad->new($simBlocks, 'GroupBlock', $outDirectory, $loadOnly);
357    my $RegionLoad = ERDBLoad->new($simBlocks, 'Region', $outDirectory, $loadOnly);
358    my $ConsistsOfLoad = ERDBLoad->new($simBlocks, 'ConsistsOf', $outDirectory, $loadOnly);
359    my $ContainsRegionLoad = ERDBLoad->new($simBlocks, 'ContainsRegion', $outDirectory, $loadOnly);
360    my $HasInstanceOfLoad = ERDBLoad->new($simBlocks, 'HasInstanceOf', $outDirectory, $loadOnly);
361    my $IncludesRegionLoad = ERDBLoad->new($simBlocks, 'IncludesRegion', $outDirectory, $loadOnly);
362    # This hash helps us to clean up when we're done.
363    my $loaderList = { Genome => $GenomeLoad, Contig => $ContigLoad,
364                       GroupBlock => $GroupBlockLoad, Region => $RegionLoad,
365                       ConsistsOf => $ConsistsOfLoad, ContainsRegion => $ContainsRegionLoad,
366                       HasInstanceOf => $HasInstanceOfLoad,
367                       IncludesRegion => $IncludesRegionLoad};
368  # Check to see if we should generate the output files.  # Check to see if we should generate the output files.
369  if ($options->{generate} eq 'no') {  if ($loadOnly) {
370      # Here we are to use existing output files.      # Here we are to use existing output files.
371      Output("Existing database load files will be used.");      Trace("Existing database load files will be used.") if T(2);
372  } else {  } else {
373      # Here we need to produce new output files.      # Here we need to produce new output files.
374      # Verify that the input directory exists.      # Verify that the input directory exists.
375      if (! -d $inDirectory) {      if (! -d $inDirectoryTree) {
376          Confess("Input directory \"$inDirectory\" not found.");          Confess("Input directory \"$inDirectoryTree\" not found.");
377        }
378        # Loop through the subdirectories.
379        for my $inputDirectory (Tracer::OpenDir($inDirectoryTree, 1)) {
380            # Verify that this is a directory.
381            my $inDirectory = "$inDirectoryTree/$inputDirectory/Blocks";
382            if (-d $inDirectory) {
383                # Here we have a directory to process. Check for a genome
384                # file.
385                my $genomeFileName = "$inDirectory/genome.tbl";
386                if (! -e $genomeFileName) {
387                    Trace("$genomeFileName not found. Directory skipped.") if T(1);
388                } else {
389                    # Now we can process the directory and accumulate the error
390                    # count.
391                    $errorCount += ProcessDirectory($inDirectory, $outDirectory, $inputDirectory);
392                    $dirCount++;
393      }      }
394      # Our first task is to copy the genome data to the output directory          }
395      # and build the genome list.      }
396        Trace("Load files generated from $dirCount directories.") if T(2);
397        # Now finish up the loading.
398        my $stats = Stats->new();
399        for my $table (keys %{$loaderList}) {
400            Trace("Finishing $table.") if T(2);
401            my $tableThing = $loaderList->{$table};
402            my $newStats = $tableThing->Finish();
403            $stats->Accumulate($newStats);
404        }
405        Trace("Statistics for generate.\n" . $stats->Show()) if T(2);
406    }
407    # Check for errors.
408    if ($errorCount > 0) {
409        Trace("$errorCount errors found in input files.") if T(0);
410    } else {
411        # No errors, so it's okay to load the database.
412        if ($options->{load} eq 'yes') {
413            # Here we have no outstanding errors and the user wants us to load
414            # the database. First, we create a similarity block object.
415            # Use it to load the database. Note we specify that the tables are to be
416            # dropped and rebuilt.
417            $simBlocks->LoadTables($outDirectory, 1);
418            Trace("Database loaded.") if T(2);
419        }
420    }
421    
422    # Process a single input directory.
423    sub ProcessDirectory {
424        my ($inDirectory, $outDirectory, $groupName) = @_;
425        Trace("Processing directory $inDirectory.") if T(2);
426        # Our first task is to copy the genome data to the output
427        # and add the genomes to the genome list.
428      my %genomes = ();      my %genomes = ();
429      (open GENOMESIN, "<$inDirectory/Genome.tbl") ||      Open(\*GENOMESIN, "<$inDirectory/genome.tbl");
430          Confess("Could not open Genome input file in $inDirectory.");      # Count the genomes read and errors found.
     (open GENOMESOUT, ">$outDirectory/Genome.dtx") ||  
         Confess("Could not open Genome output file in $outDirectory.");  
     # Count the genomes read.  
431      my $genomeCount = 0;      my $genomeCount = 0;
432        my $errorCount = 0;
433      # Loop through the input.      # Loop through the input.
434      while (my $genomeData = <GENOMESIN>) {      while (my $genomeData = <GENOMESIN>) {
435          # Echo the genome record to the output.          # Echo the genome record to the output.
436          print GENOMESOUT $genomeData;          my @fields = Tracer::ParseRecord($genomeData);
437            $GenomeLoad->Put(@fields, $groupName);
438          # Extract the genome ID.          # Extract the genome ID.
439          my ($genomeID) = Tracer::ParseRecord($genomeData);          my $genomeID = $fields[0];
440          # Store it in the genomes hash. We start with a value of 0. If          # Store it in the genomes hash. We start with a value of 0. If
441          # contig information for the genome is found, we change the value          # contig information for the genome is found, we change the value
442          # to 1. When we're all done with the regions, we can check the          # to 1. When we're all done with the regions, we can check the
# Line 336  Line 445 
445          # Count this genome.          # Count this genome.
446          $genomeCount++;          $genomeCount++;
447      }      }
448      Output("$genomeCount genomes found.");      Trace("$genomeCount genomes found.") if T(2);
449      # Close the files.      # Close the genome file.
450      close GENOMESIN;      close GENOMESIN;
     close GENOMESOUT;  
451      # Create the contig hash used to associate contigs to their parent      # Create the contig hash used to associate contigs to their parent
452      # genomes.      # genomes.
453      my %contigs = ();      my %contigs = ();
454      # Now we begin to read the Block and Region files in parallel. Both      # Now we begin to read the Block and Region files in parallel. Both
455      # are sorted by block ID, so all processing for this section of the      # are sorted by block ID, so all processing for this section of the
456      # script is done a block at a time. The first task is to      # script is done a block at a time. First, we determine which file
457      # open the output files.      # sets we'll be processing. There are at most two: regular and
458      (open BLOCKSOUT, ">$outDirectory/GroupBlock.dtx") ||      # intergenic.
         Confess("Could not open block output file in $outDirectory.");  
     (open REGIONSOUT, ">$outDirectory/Region.dtx") ||  
         Confess("Could not open region output file in $outDirectory.");  
     (open INSTANCESOUT, ">$outDirectory/HasInstanceOf.dtx") ||  
         Confess("Could not open instance output file in $outDirectory.");  
     (open CONTAINSOUT, ">$outDirectory/ContainsRegion.dtx") ||  
         Confess("Could not open contig-region output file in $outDirectory.");  
     (open INCLUDESOUT, ">$outDirectory/IncludesRegion.dtx") ||  
         Confess("Could not open block-region output file in $outDirectory.");  
     # Determine which file sets we'll be processing.  
459      my @fileSets = ();      my @fileSets = ();
460      my @prefixes = ("", "InterGenic_");      my @prefixes = ("", "intergenic_");
461      for my $prefix (@prefixes) {      for my $prefix (@prefixes) {
462          if (-e "$inDirectory/${prefix}Block.tbl") {          if (-e "$inDirectory/${prefix}block.tbl") {
463              push @fileSets, $prefix;              push @fileSets, $prefix;
464          }          }
465      }      }
466        # Set up the duplicate-region check.
467        my %allRegions = ();
468      # Set up some counters.      # Set up some counters.
469      my ($blocksCount, $regionsCount) = (0, 0);      my ($blocksCount, $regionsCount) = (0, 0);
470      # Loop through the useful file sets.      # Loop through the useful file sets.
471      for my $fileSet (@fileSets) {      for my $fileSet (@fileSets) {
472          (open BLOCKSIN, "<$inDirectory/${fileSet}Block.tbl") ||          Open(\*BLOCKSIN, "<$inDirectory/${fileSet}block.tbl");
473              Confess("Could not open block input file in $inDirectory.");          Open(\*REGIONSIN, "<$inDirectory/${fileSet}region.tbl");
474          (open REGIONSIN, "<$inDirectory/${fileSet}Region.tbl") ||          Trace("Processing ${fileSet}Blocks.") if T(2);
             Confess("Could not open region input file in $inDirectory.");  
         Output("Processing ${fileSet}Blocks.");  
475          # The outer loop processes blocks. This is accomplished by reading          # The outer loop processes blocks. This is accomplished by reading
476          # through the block file. We prime the loop by reading the first          # through the block file. We prime the loop by reading the first
477          # region record. This is because we finish processing a block when          # region record. This is because we finish processing a block when
# Line 403  Line 501 
501                  # If this region's block ID is invalid, complain                  # If this region's block ID is invalid, complain
502                  # and skip it.                  # and skip it.
503                  if ($regionRecord{blockID} != $blockID) {                  if ($regionRecord{blockID} != $blockID) {
504                      Output("Block $regionRecord{blockID} in region record $regionsCount not found in block input file at record $blocksCount.");                      Trace("Block $regionRecord{blockID} in region record $regionsCount not found in block input file at record $blocksCount.") if T(0);
505                      $errorCount++;                      $errorCount++;
506                  } else {                  } else {
507                      # Here both files are in sync, which is good. The next step is                      # Here both files are in sync, which is good. The next step is
# Line 411  Line 509 
509                      my $genomeID = $regionRecord{genomeID};                      my $genomeID = $regionRecord{genomeID};
510                      my $contigID = "$genomeID:$regionRecord{contigID}";                      my $contigID = "$genomeID:$regionRecord{contigID}";
511                      if (! exists $genomes{$genomeID}) {                      if (! exists $genomes{$genomeID}) {
512                          Output("Genome $genomeID in region record $regionsCount not found in genome input file.");                          Trace("Genome $genomeID in region record $regionsCount not found in genome input file.") if T(0);
513                          $errorCount++;                          $errorCount++;
514                      } else {                      } else {
515                          # Denote this genome has an instance of this block.                          # Denote this genome has an instance of this block.
# Line 435  Line 533 
533                              $blockData{len} = length $snippet;                              $blockData{len} = length $snippet;
534                          } elsif ($blockData{len} != length $snippet) {                          } elsif ($blockData{len} != length $snippet) {
535                              # Here it is not the first, but the lengths do not match.                              # Here it is not the first, but the lengths do not match.
536                              Output("Snippet for region record $regionsCount does not match block length $blockData{len}.");                              Trace("Snippet for region record $regionsCount does not match block length $blockData{len}.") if T(0);
537                              $errorCount++;                              $errorCount++;
538                          } else {                          } else {
539                              # Here everything is legitimate, so we merge the new                              # Here everything is legitimate, so we merge the new
# Line 453  Line 551 
551              # We have now processed all the regions in the block. Insure we found at least              # We have now processed all the regions in the block. Insure we found at least
552              # one.              # one.
553              if (! $regionCounter) {              if (! $regionCounter) {
554                  Output("No regions found for block $blockID at $blocksCount in block input file.");                  Trace("No regions found for block $blockID at $blocksCount in block input file.") if T(0);
555                  $errorCount++;                  $errorCount++;
556              } else {              } else {
557                  Trace("$regionCounter regions found in block $blockID.") if T(2);                  Trace("$regionCounter regions found in block $blockID.") if T(4);
558                  # Write the block record.                  # Write the block record. Note that the block ID is prefixed by the group name to
559                    # make it unique.
560                  my $variance = $blockData{"snip-count"} / $blockData{len};                  my $variance = $blockData{"snip-count"} / $blockData{len};
561                  print BLOCKSOUT join("\t", $blockID, $blockData{description}, $blockData{len},                  $GroupBlockLoad->Put("$groupName:$blockID", $blockData{description}, $blockData{len},
562                                       $blockData{"snip-count"}, $variance, $blockData{pattern}) . "\n";                                       $blockData{"snip-count"}, $variance, $blockData{pattern});
563                  # Find all the variance points in the block pattern. We'll use them to create                  # Find all the variance points in the block pattern. We'll use them to create
564                  # the content strings for each region.                  # the content strings for each region.
565                  my @positions = SimBlocks::ParsePattern($blockData{pattern});                  my @positions = SimBlocks::ParsePattern($blockData{pattern});
566                  # Loop through the regions, writing them out to the region output file.                  # Loop through the regions, writing them out to the region output file.
567                  for my $region (keys %regionMap) {                  for my $region (keys %regionMap) {
568                        if (length($region) > 80) {
569                            Trace("Invalid region key \"$region\".") if T(1);
570                        }
571                      # Get the region's snips.                      # Get the region's snips.
572                      my $source = $regionMap{$region};                      my $source = $regionMap{$region};
573                      my $content = "";                      my $content = "";
# Line 475  Line 577 
577                      # Get the region's location data.                      # Get the region's location data.
578                      my $location = BasicLocation->new($region);                      my $location = BasicLocation->new($region);
579                      # Write this region to the output files.                      # Write this region to the output files.
580                      print REGIONSOUT join("\t", $region, $location->Contig, $location->Dir,                      $RegionLoad->Put($region, $location->Contig, $location->Dir,
581                                            $location->Length, $regionPeg{$region},                                       $location->Right, $location->Length,
582                                            $location->Left, $content) . "\n";                                       $regionPeg{$region}, $location->Left, $content);
583                      print CONTAINSOUT join("\t", $location->Contig, $region,                      $ContainsRegionLoad->Put($location->Contig, $region, $location->Length,
584                                             $location->Length, $location->Left) . "\n";                                               $location->Left);
585                      print INCLUDESOUT join("\t", $blockID, $region);                      $IncludesRegionLoad->Put("$groupName:$blockID", $region);
586                  }                  }
587                  # Finally, we need to connect this block to the genomes in which it occurs.                  # Finally, we need to connect this block to the genomes in which it occurs.
588                  for my $genomeID (keys %genomesFound) {                  for my $genomeID (keys %genomesFound) {
589                      print INSTANCESOUT join("\t", $genomeID, $blockID) . "\n";                      $HasInstanceOfLoad->Put($genomeID, "$groupName:$blockID");
590                  }                  }
591                  # Count this block's regions.                  # Count this block's regions.
592                  $regionsCount += $regionCounter;                  $regionsCount += $regionCounter;
# Line 495  Line 597 
597          close REGIONSIN;          close REGIONSIN;
598      }      }
599      # Close the output files.      # Close the output files.
     close REGIONSOUT;  
     close BLOCKSOUT;  
     close INSTANCESOUT;  
600      # All the block data has been written. Tell the user what we found.      # All the block data has been written. Tell the user what we found.
601      Output("$blocksCount blocks processed, $regionsCount regions processed.");      Trace("$blocksCount blocks processed, $regionsCount regions processed.") if T(2);
602      # The next task is to write the genome/contig data. This is provided by the      # The next task is to write the genome/contig data. This is provided by the
603      # "%contigs" hash. First, we need to open the files.      # "%contigs" hash.
604      my $contigsCount = 0;      my $contigsCount = 0;
     (open CONTIGSOUT, ">$outDirectory/Contig.dtx") ||  
         Confess("Could not open contig output file in $outDirectory.");  
     (open CONSISTSOUT, ">$outDirectory/ConsistsOf.dtx") ||  
         Confess("Could not open consists output file in $outDirectory.");  
605      for my $contigID (keys %contigs) {      for my $contigID (keys %contigs) {
606          print CONTIGSOUT "$contigID\n";          $ContigLoad->Put("$contigID");
607          print CONSISTSOUT join("\t", $contigs{$contigID}, $contigID) . "\n";          $ConsistsOfLoad->Put($contigs{$contigID}, $contigID);
608          $contigsCount++;          $contigsCount++;
609      }      }
610      Output("$contigsCount contigs found.");      Trace("$contigsCount contigs found.") if T(2);
611      # Now warn the user about all the genomes that didn't have blocks.      # Now warn the user about all the genomes that didn't have blocks.
612      for my $genomeID (keys %genomes) {      for my $genomeID (keys %genomes) {
613          if (! $genomes{$genomeID}) {          if (! $genomes{$genomeID}) {
614              Output("Genome $genomeID did not have any regions.");              Trace("Genome $genomeID did not have any regions.") if T(1);
615              $errorCount++;              $errorCount++;
616          }          }
617      }      }
618  }      return $errorCount;
 # Check for errors.  
 if ($errorCount > 0) {  
     Output("$errorCount errors found in input files.");  
 } else {  
     # No errors, so it's okay to load the database.  
     if ($options->{load} eq 'yes') {  
         # Here we have no outstanding errors and the user wants us to load  
         # the database. First, we create a similarity block object.  
         my $simBlocks = SimBlocks->new();  
         # Use it to load the database. Note we specify that the tables are to be  
         # dropped and rebuilt.  
         $simBlocks->LoadTables($outDirectory, 1);  
         Output("Database loaded.");  
     }  
619  }  }
620  # Tell the user we're done.  # Tell the user we're done.
621  Output("Processing complete.");  Trace("Processing complete.") if T(0);
622    
623  # Read a region record from the file and parse it into a hash  # Read a region record from the file and parse it into a hash
624  # for return to the caller. If we reach end-of-file, the  # for return to the caller. If we reach end-of-file, the
# Line 562  Line 643 
643      return %retVal;      return %retVal;
644  }  }
645    
 # Write an output line. The output line will be traced at level 0. If  
 # $TracingToFile is set, it will be echoed to the standard output.  
 sub Output {  
     my ($message) = @_;  
     Trace($message) if T(0);  
     if ($TracingToFile) {  
         print "$message\n";  
     }  
 }  
   
646  1;  1;

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.11

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3