[Bio] / Sprout / SproutLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/SproutLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.26, Mon Jan 30 21:57:02 2006 UTC revision 1.39, Thu Jun 8 15:34:53 2006 UTC
# Line 80  Line 80 
80  =item subsysFile  =item subsysFile
81    
82  Either the name of the file containing the list of trusted subsystems or a reference  Either the name of the file containing the list of trusted subsystems or a reference
83  to a list of subsystem names. If nothing is specified, all known subsystems will be  to a list of subsystem names. If nothing is specified, all NMPDR subsystems will be
84  considered trusted. Only subsystem data related to the trusted subsystems is loaded.  considered trusted. (A subsystem is considered NMPDR if it has a file named C<NMPDR>
85    in its data directory.) Only subsystem data related to the trusted subsystems is loaded.
86    
87  =item options  =item options
88    
# Line 94  Line 95 
95  sub new {  sub new {
96      # Get the parameters.      # Get the parameters.
97      my ($class, $sprout, $fig, $genomeFile, $subsysFile, $options) = @_;      my ($class, $sprout, $fig, $genomeFile, $subsysFile, $options) = @_;
98      # Load the list of genomes into a hash.      # Create the genome hash.
99      my %genomes;      my %genomes = ();
100        # We only need it if load-only is NOT specified.
101        if (! $options->{loadOnly}) {
102      if (! defined($genomeFile) || $genomeFile eq '') {      if (! defined($genomeFile) || $genomeFile eq '') {
103          # Here we want all the complete genomes and an access code of 1.          # Here we want all the complete genomes and an access code of 1.
104          my @genomeList = $fig->genomes(1);          my @genomeList = $fig->genomes(1);
# Line 129  Line 132 
132              Confess("Invalid genome parameter ($type) in SproutLoad constructor.");              Confess("Invalid genome parameter ($type) in SproutLoad constructor.");
133          }          }
134      }      }
135        }
136      # Load the list of trusted subsystems.      # Load the list of trusted subsystems.
137      my %subsystems = ();      my %subsystems = ();
138        # We only need it if load-only is NOT specified.
139        if (! $options->{loadOnly}) {
140      if (! defined $subsysFile || $subsysFile eq '') {      if (! defined $subsysFile || $subsysFile eq '') {
141          # Here we want all the subsystems.              # Here we want all the NMPDR subsystems. First we get the whole list.
142          %subsystems = map { $_ => 1 } $fig->all_subsystems();              my @subs = $fig->all_subsystems();
143                # Loop through, checking for the NMPDR file.
144                for my $sub (@subs) {
145                    if (-e "$FIG_Config::data/Subsystems/$sub/NMPDR") {
146                        $subsystems{$sub} = 1;
147                    }
148                }
149      } else {      } else {
150          my $type = ref $subsysFile;          my $type = ref $subsysFile;
151          if ($type eq 'ARRAY') {          if ($type eq 'ARRAY') {
# Line 153  Line 165 
165              Confess("Invalid subsystem parameter in SproutLoad constructor.");              Confess("Invalid subsystem parameter in SproutLoad constructor.");
166          }          }
167      }      }
168        }
169      # Get the data directory from the Sprout object.      # Get the data directory from the Sprout object.
170      my ($directory) = $sprout->LoadInfo();      my ($directory) = $sprout->LoadInfo();
171      # Create the Sprout load object.      # Create the Sprout load object.
# Line 162  Line 175 
175                    subsystems => \%subsystems,                    subsystems => \%subsystems,
176                    sprout => $sprout,                    sprout => $sprout,
177                    loadDirectory => $directory,                    loadDirectory => $directory,
178                    erdb => $sprout->{_erdb},                    erdb => $sprout,
179                    loaders => [],                    loaders => [],
180                    options => $options                    options => $options
181                   };                   };
# Line 250  Line 263 
263              $loadGenome->Add("genomeIn");              $loadGenome->Add("genomeIn");
264              # The access code comes in via the genome hash.              # The access code comes in via the genome hash.
265              my $accessCode = $genomeHash->{$genomeID};              my $accessCode = $genomeHash->{$genomeID};
266              # Get the genus, species, and strain from the scientific name. Note that we append              # Get the genus, species, and strain from the scientific name.
             # the genome ID to the strain. In some cases this is the totality of the strain name.  
267              my ($genus, $species, @extraData) = split / /, $self->{fig}->genus_species($genomeID);              my ($genus, $species, @extraData) = split / /, $self->{fig}->genus_species($genomeID);
268              my $extra = join " ", @extraData, "[$genomeID]";              my $extra = join " ", @extraData;
269              # Get the full taxonomy.              # Get the full taxonomy.
270              my $taxonomy = $fig->taxonomy_of($genomeID);              my $taxonomy = $fig->taxonomy_of($genomeID);
271              # Output the genome record.              # Output the genome record.
# Line 436  Line 448 
448      FeatureTranslation      FeatureTranslation
449      FeatureUpstream      FeatureUpstream
450      IsLocatedIn      IsLocatedIn
451        HasFeature
452    
453  =over 4  =over 4
454    
# Line 461  Line 474 
474      my $loadFeatureLink = $self->_TableLoader('FeatureLink');      my $loadFeatureLink = $self->_TableLoader('FeatureLink');
475      my $loadFeatureTranslation = $self->_TableLoader('FeatureTranslation');      my $loadFeatureTranslation = $self->_TableLoader('FeatureTranslation');
476      my $loadFeatureUpstream = $self->_TableLoader('FeatureUpstream');      my $loadFeatureUpstream = $self->_TableLoader('FeatureUpstream');
477        my $loadHasFeature = $self->_TableLoader('HasFeature');
478      # Get the maximum sequence size. We need this later for splitting up the      # Get the maximum sequence size. We need this later for splitting up the
479      # locations.      # locations.
480      my $chunkSize = $self->{sprout}->MaxSegment();      my $chunkSize = $self->{sprout}->MaxSegment();
# Line 481  Line 495 
495                  my ($featureID, $locations, undef, $type) = @{$featureData};                  my ($featureID, $locations, undef, $type) = @{$featureData};
496                  # Create the feature record.                  # Create the feature record.
497                  $loadFeature->Put($featureID, 1, $type);                  $loadFeature->Put($featureID, 1, $type);
498                    # Link it to the parent genome.
499                    $loadHasFeature->Put($genomeID, $featureID, $type);
500                  # Create the aliases.                  # Create the aliases.
501                  for my $alias ($fig->feature_aliases($featureID)) {                  for my $alias ($fig->feature_aliases($featureID)) {
502                      $loadFeatureAlias->Put($featureID, $alias);                      $loadFeatureAlias->Put($featureID, $alias);
# Line 697  Line 713 
713          my ($genomeID, $roleID);          my ($genomeID, $roleID);
714          my %roleData = ();          my %roleData = ();
715          for my $subsysID (@subsysIDs) {          for my $subsysID (@subsysIDs) {
             Trace("Creating subsystem $subsysID.") if T(3);  
             $loadSubsystem->Add("subsystemIn");  
716              # Get the subsystem object.              # Get the subsystem object.
717              my $sub = $fig->get_subsystem($subsysID);              my $sub = $fig->get_subsystem($subsysID);
718                # Only proceed if the subsystem has a spreadsheet.
719                if (! $sub->{empty_ss}) {
720                    Trace("Creating subsystem $subsysID.") if T(3);
721                    $loadSubsystem->Add("subsystemIn");
722              # Create the subsystem record.              # Create the subsystem record.
723              my $curator = $sub->get_curator();              my $curator = $sub->get_curator();
724              my $notes = $sub->get_notes();              my $notes = $sub->get_notes();
# Line 748  Line 766 
766                      # part of the spreadsheet cell ID.                      # part of the spreadsheet cell ID.
767                      for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) {                      for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) {
768                          # Get the features in the spreadsheet cell for this genome and role.                          # Get the features in the spreadsheet cell for this genome and role.
769                          my @pegs = $sub->get_pegs_from_cell($row, $col);                              my @pegs = grep { !$fig->is_deleted_fid($_) } $sub->get_pegs_from_cell($row, $col);
770                          # Only proceed if features exist.                          # Only proceed if features exist.
771                          if (@pegs > 0) {                          if (@pegs > 0) {
772                              # Create the spreadsheet cell.                              # Create the spreadsheet cell.
# Line 801  Line 819 
819                  # Connect the subset to the subsystem.                  # Connect the subset to the subsystem.
820                  $loadHasRoleSubset->Put($subsysID, $actualID);                  $loadHasRoleSubset->Put($subsysID, $actualID);
821                  # Connect the subset to its roles.                  # Connect the subset to its roles.
822                  my @roles = $sub->get_subset($subsetID);                      my @roles = $sub->get_subsetC_roles($subsetID);
823                  for my $roleID (@roles) {                  for my $roleID (@roles) {
824                      $loadConsistsOfRoles->Put($actualID, $roleID);                      $loadConsistsOfRoles->Put($actualID, $roleID);
825                  }                  }
# Line 854  Line 872 
872              }              }
873          }          }
874      }      }
875        }
876      # Finish the load.      # Finish the load.
877      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
878      return $retVal;      return $retVal;
# Line 1015  Line 1034 
1034          # Loop through the genomes.          # Loop through the genomes.
1035          for my $genomeID (sort keys %{$genomeHash}) {          for my $genomeID (sort keys %{$genomeHash}) {
1036              Trace("Processing $genomeID.") if T(3);              Trace("Processing $genomeID.") if T(3);
             # Get the genome's PEGs.  
             my @pegs = $fig->pegs_of($genomeID);  
             for my $peg (@pegs) {  
                 Trace("Processing $peg.") if T(4);  
1037                  # Create a hash of timestamps. We use this to prevent duplicate time stamps                  # Create a hash of timestamps. We use this to prevent duplicate time stamps
1038                  # from showing up for a single PEG's annotations.                  # from showing up for a single PEG's annotations.
1039                  my %seenTimestamps = ();                  my %seenTimestamps = ();
1040                  # Loop through the annotations.              # Get the genome's annotations.
1041                  for my $tuple ($fig->feature_annotations($peg, "raw")) {              my @annotations = $fig->read_all_annotations($genomeID);
1042                      my ($fid, $timestamp, $user, $text) = @{$tuple};              Trace("Processing annotations.") if T(2);
1043                for my $tuple (@annotations) {
1044                    # Get the annotation tuple.
1045                    my ($peg, $timestamp, $user, $text) = @{$tuple};
1046                      # Here we fix up the annotation text. "\r" is removed,                      # Here we fix up the annotation text. "\r" is removed,
1047                      # and "\t" and "\n" are escaped. Note we use the "s"                      # and "\t" and "\n" are escaped. Note we use the "s"
1048                      # modifier so that new-lines inside the text do not                      # modifier so that new-lines inside the text do not
# Line 1039  Line 1057 
1057                          # Here it's a number. We need to insure the one we use to form                          # Here it's a number. We need to insure the one we use to form
1058                          # the key is unique.                          # the key is unique.
1059                          my $keyStamp = $timestamp;                          my $keyStamp = $timestamp;
1060                          while ($seenTimestamps{$keyStamp}) {                      while ($seenTimestamps{"$peg:$keyStamp"}) {
1061                              $keyStamp++;                              $keyStamp++;
1062                          }                          }
                         $seenTimestamps{$keyStamp} = 1;  
1063                          my $annotationID = "$peg:$keyStamp";                          my $annotationID = "$peg:$keyStamp";
1064                        $seenTimestamps{$annotationID} = 1;
1065                          # Insure the user exists.                          # Insure the user exists.
1066                          if (! $users{$user}) {                          if (! $users{$user}) {
1067                              $loadSproutUser->Put($user, "SEED user");                              $loadSproutUser->Put($user, "SEED user");
# Line 1061  Line 1079 
1079                  }                  }
1080              }              }
1081          }          }
     }  
1082      # Finish the load.      # Finish the load.
1083      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
1084      return $retVal;      return $retVal;
# Line 1460  Line 1477 
1477              # Here we really need to finish.              # Here we really need to finish.
1478              Trace("Finishing $relName.") if T(2);              Trace("Finishing $relName.") if T(2);
1479              my $stats = $loader->Finish();              my $stats = $loader->Finish();
1480              if ($self->{options}->{dbLoad} && ! $loader->Ignore) {              if ($self->{options}->{dbLoad}) {
1481                  # Here we want to use the load file just created to load the database.                  # Here we want to use the load file just created to load the database.
1482                  Trace("Loading relation $relName.") if T(2);                  Trace("Loading relation $relName.") if T(2);
1483                  my $newStats = $self->{sprout}->LoadUpdate(1, [$relName]);                  my $newStats = $self->{sprout}->LoadUpdate(1, [$relName]);

Legend:
Removed from v.1.26  
changed lines
  Added in v.1.39

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3