[Bio] / Sprout / SimLoad.pl Repository:
ViewVC logotype

Diff of /Sprout/SimLoad.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Wed Jul 27 20:05:58 2005 UTC revision 1.9, Thu Feb 2 21:30:05 2006 UTC
# Line 4  Line 4 
4  use CGI;  use CGI;
5  use Tracer;  use Tracer;
6  use Genome;  use Genome;
7    use ERDBLoad;
8    use DBKernel;
9  use SimBlocks;  use SimBlocks;
10  use File::Path;  use File::Path;
11  use BasicLocation;  use BasicLocation;
12    use Cwd;
13    
14  =head1 Similarity Block Loader  =head1 Similarity Block Loader
15    
16  This script loads the similarity block database from  This script loads the similarity block database from
17  the input files.  the input files. The load process involves two steps:
18    converting the input files into C<dtx> load files
19  The script takes a single parameter-- the directory name.  (B<generate>), and loading the C<dtx> files into the
20  The default directory name is taken from the config file  database (B<load>).
21  parameter C<$fig_config::SimBlastData>. The output files  
22  will be produced in the similarity block data directory  The script takes a single parameter-- a directory name.
23  C<$fig_config::SimBlockData>, which will be created if  The default directory name is C<"$FIG_Config::data/CloseStrainSets">.
24  it does not exist.  The input files should be in subdirectories called
25    C<Block> under the subdirectories of the input directory.
26    The subdirectory names themselves are considered the name
27    of the close-strain set. So, for example
28    
29        Data/CloseStrainSets/Vibrio/Block
30    
31    would be presumed to contain the Vibrio strains.
32    
33    The output files will be produced in the similarity block
34    data directory C<$fig_config::SimBlockData>, which will be
35    created if it does not exist. The input directory and all its
36    subdirectories will be processed for input files.
37    
38  In addition to the directory name, the following  In addition to the directory name, the following
39  option parameters are supported.  option parameters are supported.
# Line 28  Line 43 
43  =item trace  =item trace
44    
45  Trace level for output messages. A higher number means more  Trace level for output messages. A higher number means more
46  messages. The default is C<1>.  messages. The default is C<3>. Trace messages are sent to
47    the file C<trace.log> in the B<$FIG_Config::tmp>
48    directory.
49    
50  =item logFile  =item sql
51    
52  Name of a file to be used for trace messages, or  If specified, SQL activity will be traced at the specified
53  C<*> to send them to the standard output. If a  trace level.
 file is specified, the output directory must  
 already exist. The default is C<*>.  
54    
55  =item load  =item load
56    
# Line 47  Line 62 
62  C<yes> to generate output files from input files, else  C<yes> to generate output files from input files, else
63  C<no>. The default is C<yes>.  C<no>. The default is C<yes>.
64    
65    =item createDB
66    
67    If specified, the database will be dropped and
68    re-created before loading.
69    
70  =back  =back
71    
72  For example, the following command line will process the  For example, the following command line will process the
73  C<Fragment.sort> file in the C</Users/fig/BlastData> directory and  input files in the C</Users/fig/BlastData> directory tree
74  run at a trace level of 3.  and run at a trace level of 3.
75    
76  C<< SimLoad /Users/fig/BlastData -trace=3 >>  C<< SimLoad -trace=3 /Users/fig/BlastData >>
77    
78  Please note that spaces in the file and directory names must  The following command line converts the input files in
79  be escaped as C<\b> to prevent them from being parsed as  the default directory into load files but does not load
80  argument delimiters.  the database and runs at a trace level of 2.
81    
82    C<< SimLoad -load=no >>
83    
84  =head2 Input Directory  =head2 Input Directory
85    
86  The following files must exist in the input directory.  The following files must exist in each C<Block> directory
87    under the input directory.
88    
89  =over 4  =over 4
90    
91  =item Genome.tbl  =item genome.tbl
92    
93  This is a tab-delimited file that contains the ID of each  This is a tab-delimited file that contains the ID of each
94  genome followed by a description string.  genome followed by a description string.
95    
96  =item Block.tbl, InterGenic_Block.tbl  =item block.tbl, intergenic_block.tbl
97    
98  These are tab-delimited files that associate a gene name  These are tab-delimited files that associate a gene name
99  with each block. The InterGenic file is optional.  with each block. The InterGenic file is optional.
100    
101  =item Region.tbl, InterGenic_Region.tbl  =item region.tbl, intergenic_region.tbl
102    
103  These are tab-delimited files that describe each region  These are tab-delimited files that describe each region
104  of a block. The InterGenic file is optional.  of a block. The InterGenic file is optional.
# Line 84  Line 107 
107    
108  The format of each file is given below.  The format of each file is given below.
109    
110  =head3 Genome.tbl  =head3 genome.tbl
111    
112  The Genome file is copied almost unmodified to the  The Genome file is copied almost unmodified to the
113  load file for the B<Genome> entity. Each record  load file for the B<Genome> entity. Each record
# Line 102  Line 125 
125  A text description of the genome (usually the species name with  A text description of the genome (usually the species name with
126  a strain ID).  a strain ID).
127    
128    =item groupName
129    
130    The name of the group to which the genome belongs.
131    
132  =back  =back
133    
134  =head3 Block.tbl, InterGenic_Block.tbl  =head3 block.tbl, intergenic_block.tbl
135    
136  These files produce most of the data found in the B<GroupBlock>  These files produce most of the data found in the B<GroupBlock>
137  entity. Each record represents a single block. Blocks either  entity. Each record represents a single block. Blocks either
# Line 127  Line 154 
154    
155  =back  =back
156    
157  =head3 Region.tbl, InterGenic_Region.tbl  =head3 region.tbl, intergenic_region.tbl
158    
159  These files describe the regions in each block. They are  These files describe the regions in each block. They are
160  used to derive the relationships between genomes and  used to derive the relationships between genomes and
# Line 278  Line 305 
305  my $TRAILER = 999999999;  my $TRAILER = 999999999;
306    
307  # Parse the command line.  # Parse the command line.
308  my ($options, @arguments) = Tracer::ParseCommand({ trace => 1,  my ($options, @arguments) = StandardSetup(['SimBlocks'],
309                                                    logFile => "*",                                                  { load => ['yes', "'no' to suppress loading the database"],
310                                                    load => 'yes',                                                    generate => ['yes', "'no' to suppress generating the load files"],
311                                                    generate => 'yes'},                                                    createDB => [0, 'drop and create the database before loading']
312                                                    },
313                                                    "directoryName",
314                                                   @ARGV);                                                   @ARGV);
315  # Extract the directory name from the argument array.  # Extract the directory name from the argument array.
316  my $inDirectory = $FIG_Config::simBlastData;  my $inDirectoryTree = ($arguments[0] ? Cwd::abs_path($arguments[0]) : "$FIG_Config::data/CloseStrainSets");
317  if ($arguments[0]) {  # Check to see if we need to create the database.
318      $inDirectory = Cwd::abs_path($arguments[0]);  if ($options->{createDB}) {
319  }      Trace("Creating database.") if T(2);
320  # Set up tracing.      DBKernel::CreateDB($FIG_Config::simBlocksDB);
321  my $traceLevel = $options->{trace};  }
 my $traceOption = $options->{logFile};  
 my $TracingToFile = ($traceOption ne "*");  
 my $traceDestination = ($TracingToFile ? ">$traceOption" : "TEXT");  
 TSetup("$traceLevel Tracer", $traceDestination);  
322  # Get the output directory.  # Get the output directory.
323  my $outDirectory = $FIG_Config::simBlocksData;  my $outDirectory = $FIG_Config::simBlocksData;
324  # Insure that it exists.  # Insure that it exists.
325  if (! -d $outDirectory) {  if (! -d $outDirectory) {
326      Output("Creating output directory $outDirectory.");      Trace("Creating output directory $outDirectory.") if T(2);
327      mkpath($outDirectory);      mkpath($outDirectory);
328    } elsif ($options->{generate} eq 'yes') {
329        # Here we have an output directory already and are going to generate new
330        # load files. Clear any leftover data from previous runs.
331        my @files = grep { $_ =~ /.dtx$/ } Tracer::OpenDir($outDirectory);
332        my $numFiles = @files;
333        if ($numFiles > 0) {
334            Trace("Deleting $numFiles old dtx files from $outDirectory.") if T(2);
335            unlink map { "$outDirectory/$_" } @files;
336        }
337  }  }
338  # Create an error counter.  # Create an error counter and a directory counter.
339  my $errorCount = 0;  my $errorCount = 0;
340    my $dirCount = 0;
341    # Get a similarity block object. Note that we've waited until the database
342    # was already created before doing this. The tables, however, may not yet
343    # exist, so we can't do very much with it.
344    my $simBlocks = SimBlocks->new();
345    # Determine if this is a load-only run.
346    my $loadOnly = ($options->{generate} eq 'no');
347    # Create the loader objects for the tables. These are global to the whole
348    # script.
349    my $GenomeLoad = ERDBLoad->new($simBlocks, 'Genome', $outDirectory, $loadOnly);
350    my $ContigLoad = ERDBLoad->new($simBlocks, 'Contig', $outDirectory, $loadOnly);
351    my $GroupBlockLoad = ERDBLoad->new($simBlocks, 'GroupBlock', $outDirectory, $loadOnly);
352    my $RegionLoad = ERDBLoad->new($simBlocks, 'Region', $outDirectory, $loadOnly);
353    my $ConsistsOfLoad = ERDBLoad->new($simBlocks, 'ConsistsOf', $outDirectory, $loadOnly);
354    my $ContainsRegionLoad = ERDBLoad->new($simBlocks, 'ContainsRegion', $outDirectory, $loadOnly);
355    my $HasInstanceOfLoad = ERDBLoad->new($simBlocks, 'HasInstanceOf', $outDirectory, $loadOnly);
356    my $IncludesRegionLoad = ERDBLoad->new($simBlocks, 'IncludesRegion', $outDirectory, $loadOnly);
357    # This hash helps us to clean up when we're done.
358    my $loaderList = { Genome => $GenomeLoad, Contig => $ContigLoad,
359                       GroupBlock => $GroupBlockLoad, Region => $RegionLoad,
360                       ConsistsOf => $ConsistsOfLoad, ContainsRegion => $ContainsRegionLoad,
361                       HasInstanceOf => $HasInstanceOfLoad,
362                       IncludesRegion => $IncludesRegionLoad};
363  # Check to see if we should generate the output files.  # Check to see if we should generate the output files.
364  if ($options->{generate} eq 'no') {  if ($loadOnly) {
365      # Here we are to use existing output files.      # Here we are to use existing output files.
366      Output("Existing database load files will be used.");      Trace("Existing database load files will be used.") if T(2);
367  } else {  } else {
368      # Here we need to produce new output files.      # Here we need to produce new output files.
369      # Verify that the input directory exists.      # Verify that the input directory exists.
370      if (! -d $inDirectory) {      if (! -d $inDirectoryTree) {
371          Confess("Input directory \"$inDirectory\" not found.");          Confess("Input directory \"$inDirectoryTree\" not found.");
372        }
373        # Loop through the subdirectories.
374        for my $inputDirectory (Tracer::OpenDir($inDirectoryTree, 1)) {
375            # Verify that this is a directory.
376            my $inDirectory = "$inDirectoryTree/$inputDirectory/Blocks";
377            if (-d $inDirectory) {
378                # Here we have a directory to process. Check for a genome
379                # file.
380                my $genomeFileName = "$inDirectory/genome.tbl";
381                if (! -e $genomeFileName) {
382                    Trace("$genomeFileName not found. Directory skipped.") if T(1);
383                } else {
384                    # Now we can process the directory and accumulate the error
385                    # count.
386                    $errorCount += ProcessDirectory($inDirectory, $outDirectory, $inputDirectory);
387                    $dirCount++;
388                }
389            }
390        }
391        Trace("Load files generated from $dirCount directories.") if T(2);
392        # Now finish up the loading.
393        my $stats = Stats->new();
394        for my $table (keys %{$loaderList}) {
395            Trace("Finishing $table.") if T(2);
396            my $tableThing = $loaderList->{$table};
397            my $newStats = $tableThing->Finish();
398            $stats->Accumulate($newStats);
399        }
400        Trace("Statistics for generate.\n" . $stats->Show()) if T(2);
401    }
402    # Check for errors.
403    if ($errorCount > 0) {
404        Trace("$errorCount errors found in input files.") if T(0);
405    } else {
406        # No errors, so it's okay to load the database.
407        if ($options->{load} eq 'yes') {
408            # Here we have no outstanding errors and the user wants us to load
409            # the database. First, we create a similarity block object.
410            # Use it to load the database. Note we specify that the tables are to be
411            # dropped and rebuilt.
412            $simBlocks->LoadTables($outDirectory, 1);
413            Trace("Database loaded.") if T(2);
414      }      }
415      # Our first task is to copy the genome data to the output directory  }
416      # and build the genome list.  
417    # Process a single input directory.
418    sub ProcessDirectory {
419        my ($inDirectory, $outDirectory, $groupName) = @_;
420        Trace("Processing directory $inDirectory.") if T(2);
421        # Our first task is to copy the genome data to the output
422        # and add the genomes to the genome list.
423      my %genomes = ();      my %genomes = ();
424      (open GENOMESIN, "<$inDirectory/Genome.tbl") ||      Open(\*GENOMESIN, "<$inDirectory/genome.tbl");
425          Confess("Could not open Genome input file in $inDirectory.");      # Count the genomes read and errors found.
     (open GENOMESOUT, ">$outDirectory/Genome.dtx") ||  
         Confess("Could not open Genome output file in $outDirectory.");  
     # Count the genomes read.  
426      my $genomeCount = 0;      my $genomeCount = 0;
427        my $errorCount = 0;
428      # Loop through the input.      # Loop through the input.
429      while (my $genomeData = <GENOMESIN>) {      while (my $genomeData = <GENOMESIN>) {
430          # Echo the genome record to the output.          # Echo the genome record to the output.
431          print GENOMESOUT $genomeData;          my @fields = Tracer::ParseRecord($genomeData);
432            $GenomeLoad->Put(@fields, $groupName);
433          # Extract the genome ID.          # Extract the genome ID.
434          my ($genomeID) = Tracer::ParseRecord($genomeData);          my $genomeID = $fields[0];
435          # Store it in the genomes hash. We start with a value of 0. If          # Store it in the genomes hash. We start with a value of 0. If
436          # contig information for the genome is found, we change the value          # contig information for the genome is found, we change the value
437          # to 1. When we're all done with the regions, we can check the          # to 1. When we're all done with the regions, we can check the
# Line 336  Line 440 
440          # Count this genome.          # Count this genome.
441          $genomeCount++;          $genomeCount++;
442      }      }
443      Output("$genomeCount genomes found.");      Trace("$genomeCount genomes found.") if T(2);
444      # Close the files.      # Close the genome file.
445      close GENOMESIN;      close GENOMESIN;
     close GENOMESOUT;  
446      # Create the contig hash used to associate contigs to their parent      # Create the contig hash used to associate contigs to their parent
447      # genomes.      # genomes.
448      my %contigs = ();      my %contigs = ();
449      # Now we begin to read the Block and Region files in parallel. Both      # Now we begin to read the Block and Region files in parallel. Both
450      # are sorted by block ID, so all processing for this section of the      # are sorted by block ID, so all processing for this section of the
451      # script is done a block at a time. The first task is to      # script is done a block at a time. First, we determine which file
452      # open the output files.      # sets we'll be processing. There are at most two: regular and
453      (open BLOCKSOUT, ">$outDirectory/GroupBlock.dtx") ||      # intergenic.
         Confess("Could not open block output file in $outDirectory.");  
     (open REGIONSOUT, ">$outDirectory/Region.dtx") ||  
         Confess("Could not open region output file in $outDirectory.");  
     (open INSTANCESOUT, ">$outDirectory/HasInstanceOf.dtx") ||  
         Confess("Could not open instance output file in $outDirectory.");  
     (open CONTAINSOUT, ">$outDirectory/ContainsRegion.dtx") ||  
         Confess("Could not open contig-region output file in $outDirectory.");  
     (open INCLUDESOUT, ">$outDirectory/IncludesRegion.dtx") ||  
         Confess("Could not open block-region output file in $outDirectory.");  
     # Determine which file sets we'll be processing.  
454      my @fileSets = ();      my @fileSets = ();
455      my @prefixes = ("", "InterGenic_");      my @prefixes = ("", "intergenic_");
456      for my $prefix (@prefixes) {      for my $prefix (@prefixes) {
457          if (-e "$inDirectory/${prefix}Block.tbl") {          if (-e "$inDirectory/${prefix}block.tbl") {
458              push @fileSets, $prefix;              push @fileSets, $prefix;
459          }          }
460      }      }
461        # Set up the duplicate-region check.
462        my %allRegions = ();
463      # Set up some counters.      # Set up some counters.
464      my ($blocksCount, $regionsCount) = (0, 0);      my ($blocksCount, $regionsCount) = (0, 0);
465      # Loop through the useful file sets.      # Loop through the useful file sets.
466      for my $fileSet (@fileSets) {      for my $fileSet (@fileSets) {
467          (open BLOCKSIN, "<$inDirectory/${fileSet}Block.tbl") ||          Open(\*BLOCKSIN, "<$inDirectory/${fileSet}block.tbl");
468              Confess("Could not open block input file in $inDirectory.");          Open(\*REGIONSIN, "<$inDirectory/${fileSet}region.tbl");
469          (open REGIONSIN, "<$inDirectory/${fileSet}Region.tbl") ||          Trace("Processing ${fileSet}Blocks.") if T(2);
             Confess("Could not open region input file in $inDirectory.");  
         Output("Processing ${fileSet}Blocks.");  
470          # The outer loop processes blocks. This is accomplished by reading          # The outer loop processes blocks. This is accomplished by reading
471          # through the block file. We prime the loop by reading the first          # through the block file. We prime the loop by reading the first
472          # region record. This is because we finish processing a block when          # region record. This is because we finish processing a block when
# Line 403  Line 496 
496                  # If this region's block ID is invalid, complain                  # If this region's block ID is invalid, complain
497                  # and skip it.                  # and skip it.
498                  if ($regionRecord{blockID} != $blockID) {                  if ($regionRecord{blockID} != $blockID) {
499                      Output("Block $regionRecord{blockID} in region record $regionsCount not found in block input file at record $blocksCount.");                      Trace("Block $regionRecord{blockID} in region record $regionsCount not found in block input file at record $blocksCount.") if T(0);
500                      $errorCount++;                      $errorCount++;
501                  } else {                  } else {
502                      # Here both files are in sync, which is good. The next step is                      # Here both files are in sync, which is good. The next step is
# Line 411  Line 504 
504                      my $genomeID = $regionRecord{genomeID};                      my $genomeID = $regionRecord{genomeID};
505                      my $contigID = "$genomeID:$regionRecord{contigID}";                      my $contigID = "$genomeID:$regionRecord{contigID}";
506                      if (! exists $genomes{$genomeID}) {                      if (! exists $genomes{$genomeID}) {
507                          Output("Genome $genomeID in region record $regionsCount not found in genome input file.");                          Trace("Genome $genomeID in region record $regionsCount not found in genome input file.") if T(0);
508                          $errorCount++;                          $errorCount++;
509                      } else {                      } else {
510                          # Denote this genome has an instance of this block.                          # Denote this genome has an instance of this block.
# Line 435  Line 528 
528                              $blockData{len} = length $snippet;                              $blockData{len} = length $snippet;
529                          } elsif ($blockData{len} != length $snippet) {                          } elsif ($blockData{len} != length $snippet) {
530                              # Here it is not the first, but the lengths do not match.                              # Here it is not the first, but the lengths do not match.
531                              Output("Snippet for region record $regionsCount does not match block length $blockData{len}.");                              Trace("Snippet for region record $regionsCount does not match block length $blockData{len}.") if T(0);
532                              $errorCount++;                              $errorCount++;
533                          } else {                          } else {
534                              # Here everything is legitimate, so we merge the new                              # Here everything is legitimate, so we merge the new
# Line 453  Line 546 
546              # We have now processed all the regions in the block. Insure we found at least              # We have now processed all the regions in the block. Insure we found at least
547              # one.              # one.
548              if (! $regionCounter) {              if (! $regionCounter) {
549                  Output("No regions found for block $blockID at $blocksCount in block input file.");                  Trace("No regions found for block $blockID at $blocksCount in block input file.") if T(0);
550                  $errorCount++;                  $errorCount++;
551              } else {              } else {
552                  Trace("$regionCounter regions found in block $blockID.") if T(2);                  Trace("$regionCounter regions found in block $blockID.") if T(4);
553                  # Write the block record.                  # Write the block record. Note that the block ID is prefixed by the group name to
554                    # make it unique.
555                  my $variance = $blockData{"snip-count"} / $blockData{len};                  my $variance = $blockData{"snip-count"} / $blockData{len};
556                  print BLOCKSOUT join("\t", $blockID, $blockData{description}, $blockData{len},                  $GroupBlockLoad->Put("$groupName:$blockID", $blockData{description}, $blockData{len},
557                                       $blockData{"snip-count"}, $variance, $blockData{pattern}) . "\n";                                       $blockData{"snip-count"}, $variance, $blockData{pattern});
558                  # Find all the variance points in the block pattern. We'll use them to create                  # Find all the variance points in the block pattern. We'll use them to create
559                  # the content strings for each region.                  # the content strings for each region.
560                  my @positions = SimBlocks::ParsePattern($blockData{pattern});                  my @positions = SimBlocks::ParsePattern($blockData{pattern});
561                  # Loop through the regions, writing them out to the region output file.                  # Loop through the regions, writing them out to the region output file.
562                  for my $region (keys %regionMap) {                  for my $region (keys %regionMap) {
563                        if (length($region) > 80) {
564                            Trace("Invalid region key \"$region\".") if T(1);
565                        }
566                      # Get the region's snips.                      # Get the region's snips.
567                      my $source = $regionMap{$region};                      my $source = $regionMap{$region};
568                      my $content = "";                      my $content = "";
# Line 475  Line 572 
572                      # Get the region's location data.                      # Get the region's location data.
573                      my $location = BasicLocation->new($region);                      my $location = BasicLocation->new($region);
574                      # Write this region to the output files.                      # Write this region to the output files.
575                      print REGIONSOUT join("\t", $region, $location->Contig, $location->Dir,                      $RegionLoad->Put($region, $location->Contig, $location->Dir,
576                                            $location->Length, $regionPeg{$region},                                       $location->Right, $location->Length,
577                                            $location->Left, $content) . "\n";                                       $regionPeg{$region}, $location->Left, $content);
578                      print CONTAINSOUT join("\t", $location->Contig, $region,                      $ContainsRegionLoad->Put($location->Contig, $region, $location->Length,
579                                             $location->Length, $location->Left) . "\n";                                               $location->Left);
580                      print INCLUDESOUT join("\t", $blockID, $region);                      $IncludesRegionLoad->Put("$groupName:$blockID", $region);
581                  }                  }
582                  # Finally, we need to connect this block to the genomes in which it occurs.                  # Finally, we need to connect this block to the genomes in which it occurs.
583                  for my $genomeID (keys %genomesFound) {                  for my $genomeID (keys %genomesFound) {
584                      print INSTANCESOUT join("\t", $genomeID, $blockID) . "\n";                      $HasInstanceOfLoad->Put($genomeID, "$groupName:$blockID");
585                  }                  }
586                  # Count this block's regions.                  # Count this block's regions.
587                  $regionsCount += $regionCounter;                  $regionsCount += $regionCounter;
# Line 495  Line 592 
592          close REGIONSIN;          close REGIONSIN;
593      }      }
594      # Close the output files.      # Close the output files.
     close REGIONSOUT;  
     close BLOCKSOUT;  
     close INSTANCESOUT;  
595      # All the block data has been written. Tell the user what we found.      # All the block data has been written. Tell the user what we found.
596      Output("$blocksCount blocks processed, $regionsCount regions processed.");      Trace("$blocksCount blocks processed, $regionsCount regions processed.") if T(2);
597      # The next task is to write the genome/contig data. This is provided by the      # The next task is to write the genome/contig data. This is provided by the
598      # "%contigs" hash. First, we need to open the files.      # "%contigs" hash.
599      my $contigsCount = 0;      my $contigsCount = 0;
     (open CONTIGSOUT, ">$outDirectory/Contig.dtx") ||  
         Confess("Could not open contig output file in $outDirectory.");  
     (open CONSISTSOUT, ">$outDirectory/ConsistsOf.dtx") ||  
         Confess("Could not open consists output file in $outDirectory.");  
600      for my $contigID (keys %contigs) {      for my $contigID (keys %contigs) {
601          print CONTIGSOUT "$contigID\n";          $ContigLoad->Put("$contigID");
602          print CONSISTSOUT join("\t", $contigs{$contigID}, $contigID) . "\n";          $ConsistsOfLoad->Put($contigs{$contigID}, $contigID);
603          $contigsCount++;          $contigsCount++;
604      }      }
605      Output("$contigsCount contigs found.");      Trace("$contigsCount contigs found.") if T(2);
606      # Now warn the user about all the genomes that didn't have blocks.      # Now warn the user about all the genomes that didn't have blocks.
607      for my $genomeID (keys %genomes) {      for my $genomeID (keys %genomes) {
608          if (! $genomes{$genomeID}) {          if (! $genomes{$genomeID}) {
609              Output("Genome $genomeID did not have any regions.");              Trace("Genome $genomeID did not have any regions.") if T(1);
610              $errorCount++;              $errorCount++;
611          }          }
612      }      }
613  }      return $errorCount;
 # Check for errors.  
 if ($errorCount > 0) {  
     Output("$errorCount errors found in input files.");  
 } else {  
     # No errors, so it's okay to load the database.  
     if ($options->{load} eq 'yes') {  
         # Here we have no outstanding errors and the user wants us to load  
         # the database. First, we create a similarity block object.  
         my $simBlocks = SimBlocks->new();  
         # Use it to load the database. Note we specify that the tables are to be  
         # dropped and rebuilt.  
         $simBlocks->LoadTables($outDirectory, 1);  
         Output("Database loaded.");  
     }  
614  }  }
615  # Tell the user we're done.  # Tell the user we're done.
616  Output("Processing complete.");  Trace("Processing complete.") if T(0);
617    
618  # Read a region record from the file and parse it into a hash  # Read a region record from the file and parse it into a hash
619  # for return to the caller. If we reach end-of-file, the  # for return to the caller. If we reach end-of-file, the
# Line 562  Line 638 
638      return %retVal;      return %retVal;
639  }  }
640    
 # Write an output line. The output line will be traced at level 0. If  
 # $TracingToFile is set, it will be echoed to the standard output.  
 sub Output {  
     my ($message) = @_;  
     Trace($message) if T(0);  
     if ($TracingToFile) {  
         print "$message\n";  
     }  
 }  
   
641  1;  1;

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.9

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3