[Bio] / FigKernelScripts / TransactFeatures.pl Repository:
ViewVC logotype

Diff of /FigKernelScripts/TransactFeatures.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.10, Mon Aug 15 20:40:22 2005 UTC revision 1.18, Sun Jan 15 21:29:41 2006 UTC
# Line 1  Line 1 
1  #!/usr/bin/perl -w  #!/usr/bin/perl -w
2    #
3    # Copyright (c) 2003-2006 University of Chicago and Fellowship
4    # for Interpretations of Genomes. All Rights Reserved.
5    #
6    # This file is part of the SEED Toolkit.
7    #
8    # The SEED Toolkit is free software. You can redistribute
9    # it and/or modify it under the terms of the SEED Toolkit
10    # Public License.
11    #
12    # You should have received a copy of the SEED Toolkit Public License
13    # along with this program; if not write to the University of Chicago
14    # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15    # Genomes at veronika@thefig.info or download a copy from
16    # http://www.theseed.org/LICENSE.TXT.
17    #
18    
19    
20  =head1 Add / Delete / Change Features  =head1 Add / Delete / Change Features
21    
# Line 7  Line 24 
24  a command. The second specifies a directory full of transaction files. The third  a command. The second specifies a directory full of transaction files. The third
25  specifies a file that tells us which feature IDs are available for each organism.  specifies a file that tells us which feature IDs are available for each organism.
26    
27  C<TransactFeatures> I<[options]> I<command> I<transactionDirectory> I<idFile>  C<TransactFeatures> [I<options>] I<command> I<transactionDirectory> I<idFile>
28    
29  The supported commands are  The supported commands are
30    
# Line 16  Line 33 
33  =item count  =item count
34    
35  Count the number of IDs needed to process the ADD and CHANGE transactions. This  Count the number of IDs needed to process the ADD and CHANGE transactions. This
36  will produce an listing of the number of feature IDs needed for each  will produce a listing of the number of feature IDs needed for each
37  organism and feature type. This command is mostly a sanity check: it provides  organism and feature type. This command is mostly a sanity check: it provides
38  useful statistics without changing anything.  useful statistics without changing anything.
39    
# Line 28  Line 45 
45    
46  =item process  =item process
47    
48  Process the transactions and update the FIG data store. This will also create  Process the transactions and update the FIG data store. This will also update
49  a copy of each transaction file in which the pseudo-IDs have been replaced by  the NR file and queue features for similarity generation.
 real IDs.  
   
 =item annotate  
   
 Annotate the features created by the transactions so as to indicate how they were  
 derived.  
   
 =item check  
   
 Verify that the locations and translations of the new and changed features are  
 correct.  
   
 =item fix  
   
 Fix the locations and translations of the new and changed features.  
   
 =item aliasMove  
   
 Move the aliases from the old features to the ones that replaced them.  
   
 =item attribute  
   
 Move the attributes from the old features to the ones that replaced them.  
50    
51  =item attributeCheck  =item fudge
52    
53  Same as C<attribute>, but no changes are made to the database.  Convert transactions that have already been applied to new transactions that can
54    be used to test the transaction processor.
55    
56  =back  =back
57    
# Line 175  Line 170 
170    
171  Trace SQL commands.  Trace SQL commands.
172    
173    =item tblFiles
174    
175    Output TBL files containing the corrected IDs. (B<process> command only)
176    
177    =item start
178    
179    ID of the first genome to process. This allows restarting a transaction run that failed
180    in the middle. The default is to run all transaction files.
181    
182  =back  =back
183    
184  =cut  =cut
# Line 182  Line 186 
186  use strict;  use strict;
187  use Tracer;  use Tracer;
188  use DocUtils;  use DocUtils;
 use TestUtils;  
189  use Cwd;  use Cwd;
190  use File::Copy;  use File::Copy;
191  use File::Path;  use File::Path;
# Line 191  Line 194 
194  use TransactionProcessor;  use TransactionProcessor;
195  use ApplyTransactions;  use ApplyTransactions;
196  use CountTransactions;  use CountTransactions;
197  use AnnotateTransactions;  use FudgeTransactions;
 use AttributeTransactions;  
 use FixTransactions;  
 use MoveAliases;  
198    
199  # Get the command-line options.  # Get the command-line options.
200  my ($options, @parameters) = Tracer::ParseCommand({ trace => 3, sql => 0, safe => 0, noAlias => 0 },  my ($options, @parameters) = StandardSetup(["DocUtils"],
201                        { safe => [0, "use database transactions"],
202                          noAlias => [0, "do not expect aliases in CHANGE transactions"],
203                          start => [' ', "start with this genome"],
204                          tblFiles => [0, "output TBL files containing the corrected IDs"] },
205                        "command transactionDirectory IDfile",
206                                                    @ARGV);                                                    @ARGV);
207  # Get the command.  # Get the command.
208  my $mainCommand = lc shift @parameters;  my $mainCommand = lc shift @parameters;
 # Set up tracing.  
 my $traceLevel = $options->{trace};  
 my $tracing = "$traceLevel Tracer DocUtils FIG";  
 if ($options->{sql}) {  
     $tracing .= " SQL";  
 }  
 TSetup($tracing, "TEXT");  
209  # Get the FIG object.  # Get the FIG object.
210  my $fig = FIG->new();  my $fig = FIG->new();
211  # Create the transaction object.  # Create the transaction object.
# Line 216  Line 214 
214      $controlBlock = CountTransactions->new($options, $mainCommand, @parameters);      $controlBlock = CountTransactions->new($options, $mainCommand, @parameters);
215  } elsif ($mainCommand eq 'process') {  } elsif ($mainCommand eq 'process') {
216      $controlBlock = ApplyTransactions->new($options, $mainCommand, @parameters);      $controlBlock = ApplyTransactions->new($options, $mainCommand, @parameters);
217  } elsif ($mainCommand eq 'annotate') {  } elsif ($mainCommand eq 'fudge') {
218      $controlBlock = AnnotateTransactions->new($options, $mainCommand, @parameters);      $controlBlock = FudgeTransactions->new($options, $mainCommand, @parameters);
 } elsif ($mainCommand eq 'fix' || $mainCommand eq 'check') {  
     $controlBlock = FixTransactions->new($options, $mainCommand, @parameters);  
 } elsif ($mainCommand eq 'aliasmove') {  
     $controlBlock = MoveAliases->new($options, $mainCommand, @parameters);  
 } elsif ($mainCommand eq 'attribute') {  
     $controlBlock = AttributeTransactions->new($options, $mainCommand, @parameters);  
 } elsif ($mainCommand eq 'attributeCheck') {  
     $controlBlock = AttributeTransactions->new($options, $mainCommand, @parameters);  
219  } else {  } else {
220      Confess("Invalid command \"$mainCommand\" specified on command line.");      Confess("Invalid command \"$mainCommand\" specified on command line.");
221  }  }
# Line 240  Line 230 
230      my $orgsFound = 0;      my $orgsFound = 0;
231      my %transFiles = ();      my %transFiles = ();
232      my @transDirectory = OpenDir($parameters[0], 1);      my @transDirectory = OpenDir($parameters[0], 1);
233        # Pull out the "start" option value. This will be a space if all genomes should
234        # be processed, in which case it will always compare less than the genome ID.
235        my $startGenome = $options->{start};
236      # The next step is to create a hash of organism IDs to file names. This      # The next step is to create a hash of organism IDs to file names. This
237      # saves us some painful parsing later.      # saves us some painful parsing later.
238      for my $transFileName (@transDirectory) {      for my $transFileName (@transDirectory) {
239            # Parse the file name. This will only match if it's a real transaction file.
240          if ($transFileName =~ /^tbl_diff_(\d+\.\d+)$/) {          if ($transFileName =~ /^tbl_diff_(\d+\.\d+)$/) {
241                # Get the genome ID;
242                my $genomeID = $1;
243                # If we're skipping, only include this genome ID if it's equal to
244                # or greater than the start value.
245                if ($genomeID ge $startGenome) {
246              $transFiles{$1} = "$parameters[0]/$transFileName";              $transFiles{$1} = "$parameters[0]/$transFileName";
247              $orgsFound++;              $orgsFound++;
248          }          }
249      }      }
250        }
251      Trace("$orgsFound genome transaction files found in directory $parameters[0].") if T(2);      Trace("$orgsFound genome transaction files found in directory $parameters[0].") if T(2);
252      if (! $orgsFound) {      if (! $orgsFound) {
253          Confess("No \"tbl_diff\" files found in directory $parameters[1].");          Confess("No \"tbl_diff\" files found in directory $parameters[1].");
# Line 288  Line 288 
288                  }                  }
289                  $controlBlock->IncrementStat($command);                  $controlBlock->IncrementStat($command);
290              }              }
291                # Close the transaction input file.
292                close TRANS;
293              # Terminate processing for this genome.              # Terminate processing for this genome.
294              my $orgStats = $controlBlock->EndGenome();              my $orgStats = $controlBlock->EndGenome();
295              Trace("Statistics for $genomeID\n\n" . $orgStats->Show() . "\n") if T(3);              Trace("Statistics for $genomeID\n\n" . $orgStats->Show() . "\n") if T(3);
             # Close the transaction input file.  
             close TRANS;  
296          }          }
297      }      }
298      # Terminate processing.      # Terminate processing.

Legend:
Removed from v.1.10  
changed lines
  Added in v.1.18

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3