[Bio] / FigKernelScripts / TransactFeatures.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/TransactFeatures.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.6, Thu Aug 11 05:16:46 2005 UTC revision 1.17, Wed Dec 7 19:38:22 2005 UTC
# Line 1  Line 1 
1  #!/usr/bin/perl -w  #!/usr/bin/perl -w
2    #
3    # Copyright (c) 2003-2006 University of Chicago and Fellowship
4    # for Interpretations of Genomes. All Rights Reserved.
5    #
6    # This file is part of the SEED Toolkit.
7    #
8    # The SEED Toolkit is free software. You can redistribute
9    # it and/or modify it under the terms of the SEED Toolkit
10    # Public License.
11    #
12    # You should have received a copy of the SEED Toolkit Public License
13    # along with this program; if not write to the University of Chicago
14    # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15    # Genomes at veronika@thefig.info or download a copy from
16    # http://www.theseed.org/LICENSE.TXT.
17    #
18    
19    
20  =head1 Add / Delete / Change Features  =head1 Add / Delete / Change Features
21    
# Line 7  Line 24 
24  a command. The second specifies a directory full of transaction files. The third  a command. The second specifies a directory full of transaction files. The third
25  specifies a file that tells us which feature IDs are available for each organism.  specifies a file that tells us which feature IDs are available for each organism.
26    
27  C<TransactFeatures> I<[options]> I<command> I<transactionDirectory> I<idFile>  C<TransactFeatures> [I<options>] I<command> I<transactionDirectory> I<idFile>
28    
29  The supported commands are  The supported commands are
30    
# Line 16  Line 33 
33  =item count  =item count
34    
35  Count the number of IDs needed to process the ADD and CHANGE transactions. This  Count the number of IDs needed to process the ADD and CHANGE transactions. This
36  will produce an listing of the number of feature IDs needed for each  will produce a listing of the number of feature IDs needed for each
37  organism and feature type. This command is mostly a sanity check: it provides  organism and feature type. This command is mostly a sanity check: it provides
38  useful statistics without changing anything.  useful statistics without changing anything.
39    
# Line 28  Line 45 
45    
46  =item process  =item process
47    
48  Process the transactions and update the FIG data store. This will also create  Process the transactions and update the FIG data store. This will also update
49  a copy of each transaction file in which the pseudo-IDs have been replaced by  the NR file and queue features for similarity generation.
 real IDs.  
   
 =item annotate  
   
 Annotate the features created by the transactions so as to indicate how they were  
 derived.  
50    
51  =item fix  =item fudge
52    
53  Fix the locations of new features and verify the translations of new and changed  Convert transactions that have already been applied to new transactions that can
54  features.  be used to test the transaction processor.
55    
56  =back  =back
57    
# Line 155  Line 166 
166  Assume that the transaction files do not contain aliases. This means that in CHANGE  Assume that the transaction files do not contain aliases. This means that in CHANGE
167  records the translation will immediately follow the location.  records the translation will immediately follow the location.
168    
169    =item sql
170    
171    Trace SQL commands.
172    
173    =item tblFiles
174    
175    Output TBL files containing the corrected IDs. (B<process> command only)
176    
177    =item start
178    
179    ID of the first genome to process. This allows restarting a transaction run that failed
180    in the middle. The default is to run all transaction files.
181    
182    =back
183    
184  =cut  =cut
185    
186  use strict;  use strict;
187  use Tracer;  use Tracer;
188  use DocUtils;  use DocUtils;
 use TestUtils;  
189  use Cwd;  use Cwd;
190  use File::Copy;  use File::Copy;
191  use File::Path;  use File::Path;
# Line 169  Line 194 
194  use TransactionProcessor;  use TransactionProcessor;
195  use ApplyTransactions;  use ApplyTransactions;
196  use CountTransactions;  use CountTransactions;
197  use AnnotateTransactions;  use FudgeTransactions;
 use FixTransactions;  
198    
199  # Get the command-line options.  # Get the command-line options.
200  my ($options, @parameters) = Tracer::ParseCommand({ trace => 3, safe => 0, noAlias => 0 }, @ARGV);  my ($options, @parameters) = Tracer::ParseCommand({ trace => 3, sql => 0, safe => 0, noAlias => 0,
201                                                        start => ' ', tblFiles => 0},
202                                                      @ARGV);
203    # Get the command.
204    my $mainCommand = lc shift @parameters;
205  # Set up tracing.  # Set up tracing.
206  my $traceLevel = $options->{trace};  my $traceLevel = $options->{trace};
207  TSetup("$traceLevel Tracer DocUtils FIG", "TEXT");  my $tracing = "$traceLevel Tracer DocUtils FIG";
208    if ($options->{sql}) {
209        $tracing .= " SQL";
210    }
211    TSetup($tracing, "TEXT");
212  # Get the FIG object.  # Get the FIG object.
213  my $fig = FIG->new();  my $fig = FIG->new();
 # Get the command.  
 my $mainCommand = lc shift @parameters;  
214  # Create the transaction object.  # Create the transaction object.
215  my $controlBlock;  my $controlBlock;
216  if ($mainCommand eq 'count' || $mainCommand eq 'register') {  if ($mainCommand eq 'count' || $mainCommand eq 'register') {
217      $controlBlock = CountTransactions->new($options, $mainCommand, @parameters);      $controlBlock = CountTransactions->new($options, $mainCommand, @parameters);
218  } elsif ($mainCommand eq 'process') {  } elsif ($mainCommand eq 'process') {
219      $controlBlock = ApplyTransactions->new($options, $mainCommand, @parameters);      $controlBlock = ApplyTransactions->new($options, $mainCommand, @parameters);
220  } elsif ($mainCommand eq 'annotate') {  } elsif ($mainCommand eq 'fudge') {
221      $controlBlock = AnnotateTransactions->new($options, $mainCommand, @parameters);      $controlBlock = FudgeTransactions->new($options, $mainCommand, @parameters);
 } elsif ($mainCommand eq 'fix') {  
     $controlBlock = FixTransactions->new($options, $mainCommand, @parameters);  
222  } else {  } else {
223      Confess("Invalid command \"$mainCommand\" specified on command line.");      Confess("Invalid command \"$mainCommand\" specified on command line.");
224  }  }
# Line 205  Line 233 
233      my $orgsFound = 0;      my $orgsFound = 0;
234      my %transFiles = ();      my %transFiles = ();
235      my @transDirectory = OpenDir($parameters[0], 1);      my @transDirectory = OpenDir($parameters[0], 1);
236        # Pull out the "start" option value. This will be a space if all genomes should
237        # be processed, in which case it will always compare less than the genome ID.
238        my $startGenome = $options->{start};
239      # The next step is to create a hash of organism IDs to file names. This      # The next step is to create a hash of organism IDs to file names. This
240      # saves us some painful parsing later.      # saves us some painful parsing later.
241      for my $transFileName (@transDirectory) {      for my $transFileName (@transDirectory) {
242            # Parse the file name. This will only match if it's a real transaction file.
243          if ($transFileName =~ /^tbl_diff_(\d+\.\d+)$/) {          if ($transFileName =~ /^tbl_diff_(\d+\.\d+)$/) {
244                # Get the genome ID;
245                my $genomeID = $1;
246                # If we're skipping, only include this genome ID if it's equal to
247                # or greater than the start value.
248                if ($genomeID ge $startGenome) {
249              $transFiles{$1} = "$parameters[0]/$transFileName";              $transFiles{$1} = "$parameters[0]/$transFileName";
250              $orgsFound++;              $orgsFound++;
251          }          }
252      }      }
253        }
254      Trace("$orgsFound genome transaction files found in directory $parameters[0].") if T(2);      Trace("$orgsFound genome transaction files found in directory $parameters[0].") if T(2);
255      if (! $orgsFound) {      if (! $orgsFound) {
256          Confess("No \"tbl_diff\" files found in directory $parameters[1].");          Confess("No \"tbl_diff\" files found in directory $parameters[1].");
# Line 253  Line 291 
291                  }                  }
292                  $controlBlock->IncrementStat($command);                  $controlBlock->IncrementStat($command);
293              }              }
294                # Close the transaction input file.
295                close TRANS;
296              # Terminate processing for this genome.              # Terminate processing for this genome.
297              my $orgStats = $controlBlock->EndGenome();              my $orgStats = $controlBlock->EndGenome();
298              Trace("Statistics for $genomeID\n\n" . $orgStats->Show() . "\n") if T(3);              Trace("Statistics for $genomeID\n\n" . $orgStats->Show() . "\n") if T(3);
             # Close the transaction input file.  
             close TRANS;  
299          }          }
300      }      }
301      # Terminate processing.      # Terminate processing.

Legend:
Removed from v.1.6  
changed lines
  Added in v.1.17

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3