[Bio] / Sprout / LoadSproutTables.pl Repository:
ViewVC logotype

Diff of /Sprout/LoadSproutTables.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.42, Wed Dec 20 20:03:50 2006 UTC revision 1.49, Tue Sep 16 18:53:28 2008 UTC
# Line 104  Line 104 
104    
105  =back  =back
106    
 =over 4  
   
107  Most of the above preparation is performed by the B<NMPDRSetup> utility.  Most of the above preparation is performed by the B<NMPDRSetup> utility.
108  NMPDRSetup prints the instructions for completing the process, including  NMPDRSetup prints the instructions for completing the process, including
109  loading the Sprout database. The specific procedure for loading  loading the Sprout database. The specific procedure for loading
110  the Sprout data, however, is as follows.  the Sprout data, however, is as follows.
111    
 =item 1  
   
 Type  
   
     nohup LoadSproutTables -dbLoad -user=you -background "*" >null &  
   
 where C<you> is your user ID, and press ENTER.  
   
 The above command line runs the load in the background. The standard output,  
 standard error, and trace output will be directed to files in the FIG temporary  
 directory. If your user name is C<Bruce> then the files will be named  
 C<outBruce.log>, C<errBruce.log>, and C<traceBruce.log> respectively.  
   
 If the load fails at some point and you are able to correct the problem, use the  
 C<resume> option to restart it. For example, if the load failed while doing the  
 Feature load group, you would resume it using  
   
     nohup LoadSproutTables -dbLoad -dbCreate -user=you -resume -background Feature >null &  
   
 =item 2  
   
 Type  
   
     index_sprout_lucene  
   
  and press ENTER. This will create the Lucene indexes for the Sprout data.  
   
 =back  
   
112  =head2 LoadSproutTables Command  =head2 LoadSproutTables Command
113    
114  C<LoadSproutTables> creates the load files for Sprout tables and optionally loads them.  C<LoadSproutTables> creates the load files for Sprout tables and optionally loads them.
# Line 152  Line 121 
121    
122  Loads B<Genome>, B<HasContig>, B<Contig>, B<IsMadeUpOf>, and B<Sequence>.  Loads B<Genome>, B<HasContig>, B<Contig>, B<IsMadeUpOf>, and B<Sequence>.
123    
 =item Feature  
   
 Loads B<Feature>, B<FeatureAlias>, B<FeatureTranslation>, B<FeatureUpstream>,  
 B<IsLocatedIn>, B<FeatureLink>.  
   
 =item Coupling  
   
 Loads B<Coupling>, B<IsEvidencedBy>, B<PCH>, B<ParticipatesInCoupling>,  
 B<UsesAsEvidence>.  
   
124  =item Subsystem  =item Subsystem
125    
126  Loads B<Subsystem>, B<Role>, B<SSCell>, B<ContainsFeature>, B<IsGenomeOf>,  Loads B<Subsystem>, B<Role>, B<SSCell>, B<ContainsFeature>, B<IsGenomeOf>,
127  B<IsRoleOf>, B<OccursInSubsystem>, B<ParticipatesIn>, B<HasSSCell>,  B<IsRoleOf>, B<OccursInSubsystem>, B<ParticipatesIn>, B<HasSSCell>,
128  B<Catalyzes>, B<ConsistsOfRoles>, B<RoleSubset>, B<HasRoleSubset>,  B<ConsistsOfRoles>, B<RoleSubset>, B<HasRoleSubset>,
129  B<ConsistsOfGenomes>, B<GenomeSubset>, B<HasGenomeSubset>, B<Diagram>,  B<ConsistsOfGenomes>, B<GenomeSubset>, B<HasGenomeSubset>, B<Diagram>,
130  B<RoleOccursIn>.  B<RoleOccursIn>, B<SubSystemClass>, B<RoleEC>, B<IsIdentifiedByEC>, and
131    B<ContainsFeature>.
132    
133  =item Annotation  =item Annotation
134    
135  Loads B<SproutUser>, B<UserAccess>, B<Annotation>, B<IsTargetOfAnnotation>,  Loads B<SproutUser>, B<UserAccess>, B<Annotation>, B<IsTargetOfAnnotation>, and
136  B<MadeAnnotation>.  B<MadeAnnotation>.
137    
138  =item Property  =item Property
139    
140  Loads B<Property>, B<HasProperty>.  Loads B<Property>, and B<HasProperty>.
   
 =item Group  
   
 Loads B<GenomeGroups>.  
141    
142  =item Source  =item Source
143    
144  Loads B<Source>, B<ComesFrom>, B<SourceURL>.  Loads B<Source>, B<ComesFrom>, and B<SourceURL>.
   
 =item External  
   
 Loads B<ExternalAliasOrg>, B<ExternalAliasFunc>.  
145    
146  =item Reaction  =item Reaction
147    
148  Loads B<ReactionURL>, B<Compound>, B<CompoundName>,  Loads B<ReactionURL>, B<Compound>, B<CompoundName>,
149  B<CompoundCAS>, B<IsAComponentOf>, B<Reaction>.  B<CompoundCAS>, B<IsAComponentOf>, B<Reaction>, B<Scenario>, B<IsInputFor>,
150    B<IsOutputOf>, B<IsOnDiagram>, and B<Catalyzes>.
151    
152  =item Synonym  =item Synonym
153    
# Line 206  Line 159 
159    
160  =item Drug  =item Drug
161    
162  Loads B<DrugProject>, B<ContainsTopic>, B<DrugTopic>, B<ContainsAnalysisOf>,  Loads B<PDB>, B<DocksWith>, C<IsProteinForFeature>, and C<Ligand>.
163  B<PDB>, B<IncludesBound>, B<PDB>, B<IsBoundIn>, B<BindsWith>, B<Ligand>,  
164  B<DescribesProteinForFeature>, and B<FeatureConservation>.  =item Feature
165    
166    Loads B<Feature>, B<FeatureAlias>, B<FeatureTranslation>, B<FeatureUpstream>,
167    B<IsLocatedIn>, B<FeatureLink>, B<IsAliasOf>, B<CDD>, B<HasFeature>,
168    B<HasRoleInSubsystem>, B<FeatureEssential>, B<FeatureVirulent>, B<FeatureIEDB>,
169    B<CDD>, B<IsPresentOnProteinOf>, and B<Keyword>.
170    
171  =item *  =item *
172    
# Line 256  Line 214 
214  will not be created. This option is useful if you are setting up a copy of Sprout  will not be created. This option is useful if you are setting up a copy of Sprout
215  and have load files already set up from the original version.  and have load files already set up from the original version.
216    
 =item primaryOnly  
   
 If TRUE, only the group's primary entity will be loaded.  
   
217  =item background  =item background
218    
219  Redirect the standard and error output to files in the FIG temporary directory.  Redirect the standard and error output to files in the FIG temporary directory.
# Line 283  Line 237 
237    
238  use strict;  use strict;
239  use Tracer;  use Tracer;
 use DocUtils;  
240  use Cwd;  use Cwd;
241  use FIG;  use FIG;
242  use SFXlate;  use SFXlate;
# Line 293  Line 246 
246  use Stats;  use Stats;
247  use SFXlate;  use SFXlate;
248    
249    # This is a list of the load groups in their natural order. We'll go through these in sequence, processing
250    # the ones the user asks for.
251    my @LoadGroups = qw(Genome Subsystem Property Annotation Source Reaction Synonym Family Drug Feature);
252    
253  # Get the command-line parameters and options.  # Get the command-line parameters and options.
254  my ($options, @parameters) = StandardSetup(['SproutLoad', 'ERDBLoad', 'Stats',  my ($options, @parameters) = StandardSetup(['SproutLoad', 'ERDBLoad', 'Stats',
255                                              'ERDB', 'Load', 'Sprout', 'Subsystem'],                                              'ERDB', 'Load', 'Sprout', 'Subsystem'],
# Line 301  Line 258 
258                                                dbLoad => [0, "load the database from generated files"],                                                dbLoad => [0, "load the database from generated files"],
259                                                dbCreate => [0, "drop and re-create the database"],                                                dbCreate => [0, "drop and re-create the database"],
260                                                loadOnly => [0, "load the database from previously generated files"],                                                loadOnly => [0, "load the database from previously generated files"],
                                               primaryOnly => [0, "only process the group's main entity"],  
261                                                resume => [0, "resume a complete load starting with the first group specified in the parameter list"],                                                resume => [0, "resume a complete load starting with the first group specified in the parameter list"],
262                                                phone => ["", "phone number (international format) to call when load finishes"],                                                phone => ["", "phone number (international format) to call when load finishes"],
263                                                  trace => [3, "trace level"],
264                                              },                                              },
265                                              "<group1> <group2> ...",                                              "<group1> <group2> ...",
266                                              @ARGV);                                              @ARGV);
# Line 328  Line 285 
285  my $spl = SproutLoad->new($sprout, $fig, $geneFile, $options->{subsysFile}, $options);  my $spl = SproutLoad->new($sprout, $fig, $geneFile, $options->{subsysFile}, $options);
286  # Insure we have an output directory.  # Insure we have an output directory.
287  FIG::verify_dir($FIG_Config::sproutData);  FIG::verify_dir($FIG_Config::sproutData);
288    # Check for the "*" option.
289    if ($parameters[0] eq '*') {
290        @parameters = @LoadGroups;
291    }
292  # If we're resuming, we only want to have 1 parameter.  # If we're resuming, we only want to have 1 parameter.
293  my $resume = $options->{resume};  my $resume = $options->{resume};
294  if ($resume && @parameters > 1) {  if ($resume && @parameters > 1) {
# Line 335  Line 296 
296  } elsif (! @parameters) {  } elsif (! @parameters) {
297      Trace("No load groups were specified.") if T(0);      Trace("No load groups were specified.") if T(0);
298  }  }
299    # Process the resume option here. We modify the incoming parameters to
300    # contain the resume group and everything after it.
301    if ($resume) {
302        # Save the starting group.
303        my $resumeGroup = $parameters[0];
304        # Copy the load group list into the parameter array.
305        @parameters = @LoadGroups;
306        # Shift out the groups until we reach our desired starting point.
307        while (scalar(@parameters) && $parameters[0] ne $resumeGroup) {
308            shift @parameters;
309        }
310        if (! @parameters) {
311            Confess("Resume group \"$resumeGroup\" not found.");
312        }
313    }
314  # Set a variable to contain return type information.  # Set a variable to contain return type information.
315  my $rtype;  my $rtype;
316    # Set up a statistics object for statistics about the entire load.
317    my $totalStats = Stats->new();
318  # Insure we catch errors.  # Insure we catch errors.
319  eval {  eval {
320      # Process the parameters.      # Process the parameters.
321      for my $group (@parameters) {      for my $group (@parameters) {
322          Trace("Processing load group $group.") if T(2);          Trace("Processing load group $group.") if T(2);
323          my $stats;          # Compute the string we want to execute.
324          if ($group eq 'Genome' || $group eq '*') {          my $code = "\$spl->Load${group}Data()";
325              $spl->LoadGenomeData();          # Load this group.
326              $group = ResumeCheck($resume, $group);          my $stats = eval($code);
327          }          if ($@) {
328          if ($group eq 'Feature' || $group eq '*') {              Confess("Load group error: $@");
329              $spl->LoadFeatureData();          }
330              $group = ResumeCheck($resume, $group);          # Merge the statistics into the master.
331          }          $totalStats->Accumulate($stats);
332          if ($group eq 'Coupling' || $group eq '*') {      }
333              $spl->LoadCouplingData();      # Compute the statistical display.
334              $group = ResumeCheck($resume, $group);      my $statDisplay = $totalStats->Show();
335          }      # Display it.
336          if ($group eq 'Subsystem' || $group eq '*') {      Trace("Statistics for this load:\n$statDisplay") if T(2);
337              $spl->LoadSubsystemData();      # Check for a "table load failed" message. If we find one, we want
338              $group = ResumeCheck($resume, $group);      # to end with an error.
339          }      if ($statDisplay =~ /table load failed/i) {
340          if ($group eq 'Property' || $group eq '*') {          Confess("One or more table loads failed.");
             $spl->LoadPropertyData();  
             $group = ResumeCheck($resume, $group);  
         }  
         if ($group eq 'Annotation' || $group eq '*') {  
             $spl->LoadAnnotationData();  
             $group = ResumeCheck($resume, $group);  
         }  
         if ($group eq 'Group' || $group eq '*') {  
             $spl->LoadGroupData();  
             $group = ResumeCheck($resume, $group);  
         }  
         if ($group eq 'Source' || $group eq '*') {  
             $spl->LoadSourceData();  
             $group = ResumeCheck($resume, $group);  
         }  
         if ($group eq 'External' || $group eq '*') {  
             $spl->LoadExternalData();  
             $group = ResumeCheck($resume, $group);  
         }  
         if ($group eq 'Reaction' || $group eq '*') {  
             $spl->LoadReactionData();  
             $group = ResumeCheck($resume, $group);  
         }  
         if ($group eq 'Synonym' || $group eq '*') {  
             $spl->LoadSynonymData();  
             $group = ResumeCheck($resume, $group);  
         }  
         if ($group eq 'Family' || $group eq '*') {  
             $spl->LoadFamilyData();  
             $group = ResumeCheck($resume, $group);  
         }  
 #        if ($group eq 'Drug' || $group eq '*') {  
 #            $spl->LoadDrugData();  
 #            $group = ResumeCheck($resume, $group);  
 #        }  
341      }      }
342  };  };
343  if ($@) {  if ($@) {
# Line 413  Line 356 
356      }      }
357  }  }
358    
 # If the resume flag is set, return "*", else return "".  
 sub ResumeCheck {  
     my ($resume, $group) = @_;  
     return ($resume ? "*" : $group);  
 }  
   
359  1;  1;

Legend:
Removed from v.1.42  
changed lines
  Added in v.1.49

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3