[Bio] / FigKernelScripts / TransactFeatures.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/TransactFeatures.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.6, Thu Aug 11 05:16:46 2005 UTC revision 1.20, Tue Feb 5 03:54:42 2008 UTC
# Line 1  Line 1 
1  #!/usr/bin/perl -w  #!/usr/bin/perl -w
2    #
3    # Copyright (c) 2003-2006 University of Chicago and Fellowship
4    # for Interpretations of Genomes. All Rights Reserved.
5    #
6    # This file is part of the SEED Toolkit.
7    #
8    # The SEED Toolkit is free software. You can redistribute
9    # it and/or modify it under the terms of the SEED Toolkit
10    # Public License.
11    #
12    # You should have received a copy of the SEED Toolkit Public License
13    # along with this program; if not write to the University of Chicago
14    # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15    # Genomes at veronika@thefig.info or download a copy from
16    # http://www.theseed.org/LICENSE.TXT.
17    #
18    
19    
20  =head1 Add / Delete / Change Features  =head1 Add / Delete / Change Features
21    
# Line 7  Line 24 
24  a command. The second specifies a directory full of transaction files. The third  a command. The second specifies a directory full of transaction files. The third
25  specifies a file that tells us which feature IDs are available for each organism.  specifies a file that tells us which feature IDs are available for each organism.
26    
27  C<TransactFeatures> I<[options]> I<command> I<transactionDirectory> I<idFile>  C<TransactFeatures> [I<options>] I<command> I<transactionDirectory> I<idFile>
28    
29  The supported commands are  The supported commands are
30    
# Line 16  Line 33 
33  =item count  =item count
34    
35  Count the number of IDs needed to process the ADD and CHANGE transactions. This  Count the number of IDs needed to process the ADD and CHANGE transactions. This
36  will produce an listing of the number of feature IDs needed for each  will produce a listing of the number of feature IDs needed for each
37  organism and feature type. This command is mostly a sanity check: it provides  organism and feature type. This command is mostly a sanity check: it provides
38  useful statistics without changing anything.  useful statistics without changing anything.
39    
# Line 28  Line 45 
45    
46  =item process  =item process
47    
48  Process the transactions and update the FIG data store. This will also create  Process the transactions and update the FIG data store. This will also update
49  a copy of each transaction file in which the pseudo-IDs have been replaced by  the NR file and queue features for similarity generation.
 real IDs.  
   
 =item annotate  
   
 Annotate the features created by the transactions so as to indicate how they were  
 derived.  
50    
51  =item fix  =item fudge
52    
53  Fix the locations of new features and verify the translations of new and changed  Convert transactions that have already been applied to new transactions that can
54  features.  be used to test the transaction processor.
55    
56  =back  =back
57    
# Line 155  Line 166 
166  Assume that the transaction files do not contain aliases. This means that in CHANGE  Assume that the transaction files do not contain aliases. This means that in CHANGE
167  records the translation will immediately follow the location.  records the translation will immediately follow the location.
168    
169    =item sql
170    
171    Trace SQL commands.
172    
173    =item tblFiles
174    
175    Output TBL files containing the corrected IDs. (B<process> command only)
176    
177    =item start
178    
179    ID of the first genome to process. This allows restarting a transaction run that failed
180    in the middle. The default is to run all transaction files.
181    
182    =back
183    
184  =cut  =cut
185    
186  use strict;  use strict;
187  use Tracer;  use Tracer;
 use DocUtils;  
 use TestUtils;  
188  use Cwd;  use Cwd;
189  use File::Copy;  use File::Copy;
190  use File::Path;  use File::Path;
# Line 169  Line 193 
193  use TransactionProcessor;  use TransactionProcessor;
194  use ApplyTransactions;  use ApplyTransactions;
195  use CountTransactions;  use CountTransactions;
196  use AnnotateTransactions;  use FudgeTransactions;
 use FixTransactions;  
197    
198  # Get the command-line options.  # Get the command-line options.
199  my ($options, @parameters) = Tracer::ParseCommand({ trace => 3, safe => 0, noAlias => 0 }, @ARGV);  my ($options, @parameters) = StandardSetup(["FIG"],
200  # Set up tracing.                      { safe => [0, "use database transactions"],
201  my $traceLevel = $options->{trace};                        trace => [2, "trace level"],
202  TSetup("$traceLevel Tracer DocUtils FIG", "TEXT");                        noAlias => [0, "do not expect aliases in CHANGE transactions"],
203  # Get the FIG object.                        start => [' ', "start with this genome"],
204  my $fig = FIG->new();                        tblFiles => [0, "output TBL files containing the corrected IDs"] },
205                        "command transactionDirectory IDfile",
206                      @ARGV);
207  # Get the command.  # Get the command.
208  my $mainCommand = lc shift @parameters;  my $mainCommand = lc shift @parameters;
209    # Get the FIG object.
210    my $fig = FIG->new();
211  # Create the transaction object.  # Create the transaction object.
212  my $controlBlock;  my $controlBlock;
213  if ($mainCommand eq 'count' || $mainCommand eq 'register') {  if ($mainCommand eq 'count' || $mainCommand eq 'register') {
214      $controlBlock = CountTransactions->new($options, $mainCommand, @parameters);      $controlBlock = CountTransactions->new($options, $mainCommand, @parameters);
215  } elsif ($mainCommand eq 'process') {  } elsif ($mainCommand eq 'process') {
216      $controlBlock = ApplyTransactions->new($options, $mainCommand, @parameters);      $controlBlock = ApplyTransactions->new($options, $mainCommand, @parameters);
217  } elsif ($mainCommand eq 'annotate') {  } elsif ($mainCommand eq 'fudge') {
218      $controlBlock = AnnotateTransactions->new($options, $mainCommand, @parameters);      $controlBlock = FudgeTransactions->new($options, $mainCommand, @parameters);
 } elsif ($mainCommand eq 'fix') {  
     $controlBlock = FixTransactions->new($options, $mainCommand, @parameters);  
219  } else {  } else {
220      Confess("Invalid command \"$mainCommand\" specified on command line.");      Confess("Invalid command \"$mainCommand\" specified on command line.");
221  }  }
# Line 205  Line 230 
230      my $orgsFound = 0;      my $orgsFound = 0;
231      my %transFiles = ();      my %transFiles = ();
232      my @transDirectory = OpenDir($parameters[0], 1);      my @transDirectory = OpenDir($parameters[0], 1);
233        # Pull out the "start" option value. This will be a space if all genomes should
234        # be processed, in which case it will always compare less than the genome ID.
235        my $startGenome = $options->{start};
236      # The next step is to create a hash of organism IDs to file names. This      # The next step is to create a hash of organism IDs to file names. This
237      # saves us some painful parsing later.      # saves us some painful parsing later.
238      for my $transFileName (@transDirectory) {      for my $transFileName (@transDirectory) {
239            # Parse the file name. This will only match if it's a real transaction file.
240          if ($transFileName =~ /^tbl_diff_(\d+\.\d+)$/) {          if ($transFileName =~ /^tbl_diff_(\d+\.\d+)$/) {
241                # Get the genome ID;
242                my $genomeID = $1;
243                # If we're skipping, only include this genome ID if it's equal to
244                # or greater than the start value.
245                if ($genomeID ge $startGenome) {
246              $transFiles{$1} = "$parameters[0]/$transFileName";              $transFiles{$1} = "$parameters[0]/$transFileName";
247              $orgsFound++;              $orgsFound++;
248          }          }
249      }      }
250        }
251      Trace("$orgsFound genome transaction files found in directory $parameters[0].") if T(2);      Trace("$orgsFound genome transaction files found in directory $parameters[0].") if T(2);
252      if (! $orgsFound) {      if (! $orgsFound) {
253          Confess("No \"tbl_diff\" files found in directory $parameters[1].");          Confess("No \"tbl_diff\" files found in directory $parameters[1].");
# Line 253  Line 288 
288                  }                  }
289                  $controlBlock->IncrementStat($command);                  $controlBlock->IncrementStat($command);
290              }              }
291                # Close the transaction input file.
292                close TRANS;
293              # Terminate processing for this genome.              # Terminate processing for this genome.
294              my $orgStats = $controlBlock->EndGenome();              my $orgStats = $controlBlock->EndGenome();
295              Trace("Statistics for $genomeID\n\n" . $orgStats->Show() . "\n") if T(3);              Trace("Statistics for $genomeID\n\n" . $orgStats->Show() . "\n") if T(3);
             # Close the transaction input file.  
             close TRANS;  
296          }          }
297      }      }
298      # Terminate processing.      # Terminate processing.

Legend:
Removed from v.1.6  
changed lines
  Added in v.1.20

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3