[Bio] / Sprout / SproutLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/SproutLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.90, Thu Dec 6 14:53:50 2007 UTC revision 1.94, Tue Sep 16 18:42:40 2008 UTC
# Line 13  Line 13 
13      use BasicLocation;      use BasicLocation;
14      use HTML;      use HTML;
15      use AliasAnalysis;      use AliasAnalysis;
16        use BioWords;
17    
18  =head1 Sprout Load Methods  =head1 Sprout Load Methods
19    
# Line 170  Line 171 
171          for my $subsystem (keys %subsystems) {          for my $subsystem (keys %subsystems) {
172              my $name = $subsystem;              my $name = $subsystem;
173              $name =~ s/_/ /g;              $name =~ s/_/ /g;
 #            my $classes = $fig->subsystem_classification($subsystem);  
 #            $name .= " " . join(" ", @{$classes});  
174              $subsystems{$subsystem} = $name;              $subsystems{$subsystem} = $name;
175          }          }
176      }      }
# Line 257  Line 256 
256          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
257      } else {      } else {
258          Trace("Generating genome data.") if T(2);          Trace("Generating genome data.") if T(2);
259            # Get the full info for the FIG genomes.
260            my %genomeInfo = map { $_->[0] => { gname => $_->[1], szdna => $_->[2], maindomain => $_->[3],
261                                                pegs => $_->[4], rnas => $_->[5], complete => $_->[6] } } @{$fig->genome_info()};
262          # Now we loop through the genomes, generating the data for each one.          # Now we loop through the genomes, generating the data for each one.
263          for my $genomeID (sort keys %{$genomeHash}) {          for my $genomeID (sort keys %{$genomeHash}) {
264              Trace("Generating data for genome $genomeID.") if T(3);              Trace("Generating data for genome $genomeID.") if T(3);
# Line 286  Line 288 
288                  $group = $FIG_Config::otherGroup;                  $group = $FIG_Config::otherGroup;
289              }              }
290              close TMP;              close TMP;
291                # Get the contigs.
292                my @contigs = $fig->all_contigs($genomeID);
293                # Get this genome's info array.
294                my $info = $genomeInfo{$genomeID};
295              # Output the genome record.              # Output the genome record.
296              $loadGenome->Put($genomeID, $accessCode, $fig->is_complete($genomeID),              $loadGenome->Put($genomeID, $accessCode, $info->{complete}, scalar(@contigs),
297                               $dnaSize, $genus, $group, $species, $extra, $version, $taxonomy);                               $dnaSize, $genus, $info->{pegs}, $group, $info->{rnas}, $species, $extra, $version, $taxonomy);
298              # Now we loop through each of the genome's contigs.              # Now we loop through each of the genome's contigs.
             my @contigs = $fig->all_contigs($genomeID);  
299              for my $contigID (@contigs) {              for my $contigID (@contigs) {
300                  Trace("Processing contig $contigID for $genomeID.") if T(4);                  Trace("Processing contig $contigID for $genomeID.") if T(4);
301                  $loadContig->Add("contigIn");                  $loadContig->Add("contigIn");
# Line 350  Line 355 
355      FeatureIEDB      FeatureIEDB
356      CDD      CDD
357      IsPresentOnProteinOf      IsPresentOnProteinOf
358        CellLocation
359        IsPossiblePlaceFor
360        ExternalDatabase
361        IsAlsoFoundIn
362        Keyword
363    
364  =over 4  =over 4
365    
# Line 384  Line 394 
394      my $loadFeatureIEDB = $self->_TableLoader('FeatureIEDB');      my $loadFeatureIEDB = $self->_TableLoader('FeatureIEDB');
395      my $loadCDD = $self->_TableLoader('CDD');      my $loadCDD = $self->_TableLoader('CDD');
396      my $loadIsPresentOnProteinOf = $self->_TableLoader('IsPresentOnProteinOf');      my $loadIsPresentOnProteinOf = $self->_TableLoader('IsPresentOnProteinOf');
397        my $loadCellLocation = $self->_TableLoader('CellLocation');
398        my $loadIsPossiblePlaceFor = $self->_TableLoader('IsPossiblePlaceFor');
399        my $loadIsAlsoFoundIn = $self->_TableLoader('IsAlsoFoundIn');
400        my $loadExternalDatabase = $self->_TableLoader('ExternalDatabase');
401        my $loadKeyword = $self->_TableLoader('Keyword');
402      # Get the subsystem hash.      # Get the subsystem hash.
403      my $subHash = $self->{subsystems};      my $subHash = $self->{subsystems};
404      # Get the property keys.      # Get the property keys.
405      my $propKeys = $self->{propKeys};      my $propKeys = $self->{propKeys};
406      # Create a hashes to hold CDD and alias values.      # Create a hashes to hold CDD, Cell Location (PSORT), External Database, and alias values.
407      my %CDD = ();      my %CDD = ();
408      my %alias = ();      my %alias = ();
409        my %cellLocation = ();
410        my %xdb = ();
411        # Create the bio-words object.
412        my $biowords = BioWords->new(exceptions => "$FIG_Config::sproutData/Exceptions.txt",
413                                     stops => "$FIG_Config::sproutData/StopWords.txt",
414                                     cache => 0);
415        # One of the things we have to do here is build the keyword table, and the keyword
416        # table needs to contain the originating text and feature count for each stem. Unfortunately,
417        # the number of distinct keywords is so large it causes PERL to hang if we try to
418        # keep them in memory. As a result, we need to track them using disk files.
419        # Our approach will be to use two sequential files. One will contain stems and phonexes.
420        # Each time a stem occurs in a feature, a record will be written to that file. The stem
421        # file can then be sorted and collated to determine the number of features for each
422        # stem. A separate file will contain keywords and stems. This last file
423        # will be subjected to a sort unique on stem/keyword. The file is then merged
424        # with the stem file to create the keyword table relation (keyword, stem, phonex, count).
425        my $stemFileName = "$FIG_Config::temp/stems$$.tbl";
426        my $keyFileName = "$FIG_Config::temp/keys$$.tbl";
427        my $stemh = Open(undef, "| sort -T\"$FIG_Config::temp\" -t\"\t\" -k1,1 >$stemFileName");
428        my $keyh = Open(undef, "| sort -T\"$FIG_Config::temp\" -t\"\t\" -u -k1,1 -k2,2 >$keyFileName");
429      # Get the maximum sequence size. We need this later for splitting up the      # Get the maximum sequence size. We need this later for splitting up the
430      # locations.      # locations.
431      my $chunkSize = $self->{sprout}->MaxSegment();      my $chunkSize = $self->{sprout}->MaxSegment();
# Line 453  Line 488 
488                              $alias{$alias} = 1;                              $alias{$alias} = 1;
489                          }                          }
490                      }                      }
491                        # Add the corresponding IDs. We ask for 2-tuples of the form (id, database).
492                        my @corresponders = $fig->get_corresponding_ids($featureID, 1);
493                        for my $tuple (@corresponders) {
494                            my ($id, $xdb) = @{$tuple};
495                            # Ignore SEED: that's us.
496                            if ($xdb ne 'SEED') {
497                                # Connect this ID to the feature.
498                                $loadIsAlsoFoundIn->Put($featureID, $xdb, $id);
499                                # Add it as a keyword.
500                                push @keywords, $id;
501                                # If this is a new database, create a record for it.
502                                if (! exists $xdb{$xdb}) {
503                                    $xdb{$xdb} = 1;
504                                    $loadExternalDatabase->Put($xdb);
505                                }
506                            }
507                        }
508                      Trace("Assignment for $featureID is: $assignment") if T(4);                      Trace("Assignment for $featureID is: $assignment") if T(4);
509                      # Break the assignment into words and shove it onto the                      # Break the assignment into words and shove it onto the
510                      # keyword list.                      # keyword list.
# Line 528  Line 580 
580                          push @keywords, 'iedb';                          push @keywords, 'iedb';
581                          $loadFeature->Add('iedb');                          $loadFeature->Add('iedb');
582                      }                      }
583                      # Now we have some other attributes we need to process. Currently,                      # Now we have some other attributes we need to process. To get
584                      # this is CDD and CELLO, but we expect the number to increase.                      # through them, we convert the attribute list for this feature
585                        # into a two-layer hash: key => subkey => value.
586                      my %attributeHash = ();                      my %attributeHash = ();
587                      for my $attrRow (@{$attributes->{$featureID}}) {                      for my $attrRow (@{$attributes->{$featureID}}) {
588                          my (undef, $key, @values) = @{$attrRow};                          my (undef, $key, @values) = @{$attrRow};
589                          $key =~ /^([^:]+)::(.+)/;                          my ($realKey, $subKey);
590                            if ($key =~ /^([^:]+)::(.+)/) {
591                                ($realKey, $subKey) = ($1, $2);
592                            } else {
593                                ($realKey, $subKey) = ($key, "");
594                            }
595                          if (exists $attributeHash{$1}) {                          if (exists $attributeHash{$1}) {
596                              $attributeHash{$1}->{$2} = \@values;                              $attributeHash{$1}->{$2} = \@values;
597                          } else {                          } else {
598                              $attributeHash{$1} = {$2 => \@values};                              $attributeHash{$1} = {$2 => \@values};
599                          }                          }
600                      }                      }
601                      my $celloValue = "unknown";                      # First we handle CDD. This is a bit complicated, because
                     # Pull in the CELLO attribute. There will never be more than one.  
                     # If we have one, it's a feature attribute AND a keyword.  
                     my @celloData = keys %{$attributeHash{CELLO}};  
                     if (@celloData) {  
                         $celloValue = $celloData[0];  
                         push @keywords, $celloValue;  
                     }  
                     # Now we handle CDD. This is a bit more complicated, because  
602                      # there are multiple CDDs per protein.                      # there are multiple CDDs per protein.
603                      if (exists $attributeHash{CDD}) {                      if (exists $attributeHash{CDD}) {
604                          # Get the hash of CDD IDs to scores for this feature. We                          # Get the hash of CDD IDs to scores for this feature. We
# Line 557  Line 607 
607                          my @cddData = sort keys %{$cddHash};                          my @cddData = sort keys %{$cddHash};
608                          for my $cdd (@cddData) {                          for my $cdd (@cddData) {
609                              # Extract the score for this CDD and decode it.                              # Extract the score for this CDD and decode it.
610                              my ($codeScore) = split(/\s*,\s*/, $cddHash->{$cdd}->[1]);                              my ($codeScore) = split(/\s*[,;]\s*/, $cddHash->{$cdd}->[0]);
611                              my $realScore = FIGRules::DecodeScore($codeScore);                              my $realScore = FIGRules::DecodeScore($codeScore);
612                              # We can't afford to crash because of a bad attribute                              # We can't afford to crash because of a bad attribute
613                              # value, hence the IF below.                              # value, hence the IF below.
614                              if (! defined($realScore)) {                              if (! defined($realScore)) {
615                                  # Bad score, so count it.                                  # Bad score, so count it.
616                                  $loadFeature->Add('badCDDscore');                                  $loadFeature->Add('badCDDscore');
617                                    Trace("CDD score \"$codeScore\" for feature $featureID invalid.") if T(3);
618                              } else {                              } else {
619                                  # Create the connection.                                  # Create the connection.
620                                  $loadIsPresentOnProteinOf->Put($cdd, $featureID, $realScore);                                  $loadIsPresentOnProteinOf->Put($cdd, $featureID, $realScore);
# Line 575  Line 626 
626                              }                              }
627                          }                          }
628                      }                      }
629                      # Now we need to bust up hyphenated words in the keyword                      # Next we do PSORT cell locations. here the confidence value
630                      # list. We keep them separate and put them at the end so                      # could have the value "unknown", which we translate to -1.
631                      # the original word order is available.                      if (exists $attributeHash{PSORT}) {
632                      my $keywordString = "";                          # This will be a hash of cell locations to confidence
633                      my $bustedString = "";                          # factors.
634                      for my $keyword (@keywords) {                          my $psortHash = $attributeHash{PSORT};
635                          if (length $keyword >= 3) {                          for my $psort (keys %{$psortHash}) {
636                              $keywordString .= " $keyword";                              # Get the confidence, and convert it to a number if necessary.
637                              if ($keyword =~ /-/) {                              my $confidence = $psortHash->{$psort};
638                                  my @words = split /-/, $keyword;                              if ($confidence eq 'unknown') {
639                                  $bustedString .= join(" ", "", @words);                                  $confidence = -1;
640                                }
641                                $loadIsPossiblePlaceFor->Put($psort, $featureID, $confidence);
642                                # If this cell location does not yet exist, create its record.
643                                if (! exists $cellLocation{$psort}) {
644                                    $cellLocation{$psort} = 1;
645                                    $loadCellLocation->Put($psort);
646                                }
647                                # If this is a significant location, add it as a keyword.
648                                if ($confidence > 2.5) {
649                                    push @keywords, $psort;
650                              }                              }
651                          }                          }
652                      }                      }
653                      $keywordString .= $bustedString;                      # Phobius data is next. This consists of the signal peptide location and
654                        # the transmembrane locations.
655                        my $signalList = "";
656                        my $transList = "";
657                        if (exists $attributeHash{Phobius}) {
658                            # This will be a hash of two keys (transmembrane and signal) to
659                            # location strings. If there's no value, we stuff in an empty string.
660                            $signalList = ($attributeHash{Phobius}->{signal} || "");
661                            $transList = ($attributeHash{Phobius}->{transmembrane} || "");
662                        }
663                        # Here are some more numbers: isoelectric point, molecular weight, and
664                        # the similar-to-human flag.
665                        my $isoelectric = 0;
666                        if (exists $attributeHash{isoelectric_point}) {
667                            $isoelectric = $attributeHash{isoelectric_point}->{""};
668                        }
669                        my $similarToHuman = 0;
670                        if (exists $attributeHash{similar_to_human} && $attributeHash{similar_to_human}->{""} eq 'yes') {
671                            $similarToHuman = 1;
672                        }
673                        my $molecularWeight = 0;
674                        if (exists $attributeHash{molecular_weight}) {
675                            $molecularWeight = $attributeHash{molecular_weight}->{""};
676                        }
677                        # Create the keyword string.
678                        my $keywordString = join(" ", @keywords);
679                        Trace("Real keyword string for $featureID: $keywordString.") if T(4);
680                      # Get rid of annoying punctuation.                      # Get rid of annoying punctuation.
681                      $keywordString =~ s/[();]//g;                      $keywordString =~ s/[();@#\/]/ /g;
682                      # Clean the keyword list.                      # Get the list of keywords in the keyword string.
683                      my $cleanWords = $sprout->CleanKeywords($keywordString);                      my @realKeywords = grep { $biowords->IsWord($_) } $biowords->Split($keywordString);
684                        # We need to do two things here: create the keyword string for the feature table
685                        # and write records to the keyword and stem files. The stuff we write to
686                        # the files will be taken from the following two hashes. The stuff used
687                        # to create the keyword string will be taken from the list.
688                        my (%keys, %stems, @realStems);
689                        for my $keyword (@realKeywords) {
690                            # Compute the stem and phonex for this keyword.
691                            my ($stem, $phonex) = $biowords->StemLookup($keyword);
692                            # Only proceed if a stem comes back. If no stem came back, it's a
693                            # stop word and we throw it away.
694                            if ($stem) {
695                                $keys{$keyword} = $stem;
696                                $stems{$stem} = $phonex;
697                                push @realStems, $stem;
698                            }
699                        }
700                        # Now create the keyword string.
701                        my $cleanWords = join(" ", @realStems);
702                      Trace("Keyword string for $featureID: $cleanWords") if T(4);                      Trace("Keyword string for $featureID: $cleanWords") if T(4);
703                        # Write the stem and keyword records.
704                        for my $stem (keys %stems) {
705                            Tracer::PutLine($stemh, [$stem, $stems{$stem}]);
706                        }
707                        for my $key (keys %keys) {
708                            # The stem goes first in this file, because we want to sort
709                            # by stem and then keyword.
710                            Tracer::PutLine($keyh, [$keys{$key}, $key]);
711                        }
712                      # Now we need to process the feature's locations. First, we split them up.                      # Now we need to process the feature's locations. First, we split them up.
713                      my @locationList = split /\s*,\s*/, $locations;                      my @locationList = split /\s*,\s*/, $locations;
714                      # Next, we convert them to Sprout location objects.                      # Next, we convert them to Sprout location objects.
715                      my @locObjectList = map { BasicLocation->new("$genomeID:$_") } @locationList;                      my @locObjectList = map { BasicLocation->new("$genomeID:$_") } @locationList;
716                      # Assemble them into a sprout location string for later.                      # Assemble them into a sprout location string for later.
717                      my $locationString = join(", ", map { $_->String } @locObjectList);                      my $locationString = join(", ", map { $_->String } @locObjectList);
718                        # We'll store the sequence length in here.
719                        my $sequenceLength = 0;
720                      # This part is the roughest. We need to relate the features to contig                      # This part is the roughest. We need to relate the features to contig
721                      # locations, and the locations must be split so that none of them exceed                      # locations, and the locations must be split so that none of them exceed
722                      # the maximum segment size. This simplifies the genes_in_region processing                      # the maximum segment size. This simplifies the genes_in_region processing
# Line 608  Line 724 
724                      my $i = 1;                      my $i = 1;
725                      # Loop through the locations.                      # Loop through the locations.
726                      for my $locObject (@locObjectList) {                      for my $locObject (@locObjectList) {
727                            # Record the length.
728                            $sequenceLength += $locObject->Length;
729                          # Split this location into a list of chunks.                          # Split this location into a list of chunks.
730                          my @locOList = ();                          my @locOList = ();
731                          while (my $peeling = $locObject->Peel($chunkSize)) {                          while (my $peeling = $locObject->Peel($chunkSize)) {
# Line 623  Line 741 
741                              $i++;                              $i++;
742                          }                          }
743                      }                      }
744                      # Finally, reassemble the location objects into a list of Sprout location strings.                      # Now we get some ancillary flags.
745                        my $locked = $fig->is_locked_fid($featureID);
746                        my $in_genbank = $fig->peg_in_gendb($featureID);
747                      # Create the feature record.                      # Create the feature record.
748                      $loadFeature->Put($featureID, 1, $user, $quality, $celloValue, $type, $assignment, $cleanWords, $locationString);                      $loadFeature->Put($featureID, 1, $user, $quality, $type, $in_genbank, $isoelectric, $locked, $molecularWeight,
749                                          $sequenceLength, $signalList, $similarToHuman, $assignment, $cleanWords, $locationString,
750                                          $transList);
751                  }                  }
752              }              }
753              Trace("Genome $genomeID processed.") if T(3);              Trace("Genome $genomeID processed.") if T(3);
754          }          }
755      }      }
756        Trace("Sorting keywords.") if T(2);
757        # Now we need to load the keyword table from the key and stem files.
758        close $keyh;
759        close $stemh;
760        Trace("Loading keywords.") if T(2);
761        $keyh = Open(undef, "<$keyFileName");
762        $stemh = Open(undef, "<$stemFileName");
763        # We'll count the keywords in here, for tracing purposes.
764        my $count = 0;
765        # These variables track the current stem's data. When an incoming
766        # keyword's stem changes, these will be recomputed.
767        my ($currentStem, $currentPhonex, $currentCount);
768        # Prime the loop by reading the first stem in the stem file.
769        my ($nextStem, $nextPhonex) = Tracer::GetLine($stemh);
770        # Loop through the keyword file.
771        while (! eof $keyh) {
772            # Read this keyword.
773            my ($thisStem, $thisKey) = Tracer::GetLine($keyh);
774            # Check to see if it's the new stem yet.
775            if ($thisStem ne $currentStem) {
776                # Yes. It's a terrible error if it's not also the next stem.
777                if ($thisStem ne $nextStem) {
778                    Confess("Error in stem file. Expected \"$nextStem\", but found \"$thisStem\".");
779                } else {
780                    # Here we're okay.
781                    ($currentStem, $currentPhonex) = ($nextStem, $nextPhonex);
782                    # Count the number of features for this stem.
783                    $currentCount = 0;
784                    while ($nextStem eq $thisStem) {
785                        ($nextStem, $nextPhonex) = Tracer::GetLine($stemh);
786                        $currentCount++;
787                    }
788                }
789            }
790            # Now $currentStem is the same as $thisStem, and the other $current-vars
791            # contain the stem's data (phonex and count).
792            $loadKeyword->Put($thisKey, $currentCount, $currentPhonex, $currentStem);
793            if (++$count % 1000 == 0 && T(3)) {
794                Trace("$count keywords loaded.");
795            }
796        }
797        Trace("$count keywords loaded into keyword table.") if T(2);
798      # Finish the loads.      # Finish the loads.
799      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
800      return $retVal;      return $retVal;
# Line 668  Line 832 
832      ConsistsOfGenomes      ConsistsOfGenomes
833      GenomeSubset      GenomeSubset
834      HasGenomeSubset      HasGenomeSubset
     Catalyzes  
835      Diagram      Diagram
836      RoleOccursIn      RoleOccursIn
837        SubsystemHopeNotes
838    
839  =over 4  =over 4
840    
# Line 717  Line 881 
881      my $loadHasRoleSubset = $self->_TableLoader('HasRoleSubset');      my $loadHasRoleSubset = $self->_TableLoader('HasRoleSubset');
882      my $loadHasGenomeSubset = $self->_TableLoader('HasGenomeSubset');      my $loadHasGenomeSubset = $self->_TableLoader('HasGenomeSubset');
883      my $loadSubsystemClass = $self->_TableLoader('SubsystemClass');      my $loadSubsystemClass = $self->_TableLoader('SubsystemClass');
884        my $loadSubsystemHopeNotes = $self->_TableLoader('SubsystemHopeNotes');
885      if ($self->{options}->{loadOnly}) {      if ($self->{options}->{loadOnly}) {
886          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
887      } else {      } else {
# Line 741  Line 906 
906                  # Create the subsystem record.                  # Create the subsystem record.
907                  my $curator = $sub->get_curator();                  my $curator = $sub->get_curator();
908                  my $notes = $sub->get_notes();                  my $notes = $sub->get_notes();
909                  $loadSubsystem->Put($subsysID, $curator, $notes);                  my $version = $sub->get_version();
910                    my $description = $sub->get_description();
911                    $loadSubsystem->Put($subsysID, $curator, $version, $description, $notes);
912                    # Add the hope notes.
913                    my $hopeNotes = $sub->get_hope_curation_notes();
914                    if ($hopeNotes) {
915                        $loadSubsystemHopeNotes->Put($sub, $hopeNotes);
916                    }
917                  # Now for the classification string. This comes back as a list                  # Now for the classification string. This comes back as a list
918                  # reference and we convert it to a space-delimited string.                  # reference and we convert it to a space-delimited string.
919                  my $classList = $fig->subsystem_classification($subsysID);                  my $classList = $fig->subsystem_classification($subsysID);
# Line 751  Line 923 
923                  for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) {                  for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) {
924                      # Get the role's abbreviation.                      # Get the role's abbreviation.
925                      my $abbr = $sub->get_role_abbr($col);                      my $abbr = $sub->get_role_abbr($col);
926                        # Get its essentiality.
927                        my $aux = $fig->is_aux_role_in_subsystem($subsysID, $roleID);
928                        # Get its reaction note.
929                        my $hope_note = $sub->get_hope_reaction_notes($roleID) || "";
930                      # Connect to this role.                      # Connect to this role.
931                      $loadOccursInSubsystem->Add("roleIn");                      $loadOccursInSubsystem->Add("roleIn");
932                      $loadOccursInSubsystem->Put($roleID, $subsysID, $abbr, $col);                      $loadOccursInSubsystem->Put($roleID, $subsysID, $abbr, $aux, $col, $hope_note);
933                      # If it's a new role, add it to the role table.                      # If it's a new role, add it to the role table.
934                      if (! exists $roleData{$roleID}) {                      if (! exists $roleData{$roleID}) {
935                          # Get the role's abbreviation.                          # Get the role's abbreviation.
# Line 897  Line 1073 
1073                  }                  }
1074              }              }
1075          }          }
         # Before we leave, we must create the Catalyzes table. We start with the reactions,  
         # then use the "ecToRoles" table to convert EC numbers to role IDs.  
         my @reactions = $fig->all_reactions();  
         for my $reactionID (@reactions) {  
             # Get this reaction's list of roles. The results will be EC numbers.  
             my @ecs = $fig->catalyzed_by($reactionID);  
             # Loop through the roles, creating catalyzation records.  
             for my $thisEC (@ecs) {  
                 if (exists $ecToRoles{$thisEC}) {  
                     for my $thisRole (@{$ecToRoles{$thisEC}}) {  
                         $loadCatalyzes->Put($thisRole, $reactionID);  
                     }  
                 }  
             }  
         }  
1076      }      }
1077      # Finish the load.      # Finish the load.
1078      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
# Line 1289  Line 1450 
1450      IsIdentifiedByCAS      IsIdentifiedByCAS
1451      HasCompoundName      HasCompoundName
1452      IsAComponentOf      IsAComponentOf
1453        Scenario
1454        Catalyzes
1455        HasScenario
1456        IsInputFor
1457        IsOutputOf
1458        ExcludesReaction
1459        IncludesReaction
1460        IsOnDiagram
1461        IncludesReaction
1462    
1463  This method proceeds reaction by reaction rather than genome by genome.  This method proceeds reaction by reaction rather than genome by genome.
1464    
# Line 1316  Line 1486 
1486      my $loadIsAComponentOf = $self->_TableLoader('IsAComponentOf');      my $loadIsAComponentOf = $self->_TableLoader('IsAComponentOf');
1487      my $loadIsIdentifiedByCAS = $self->_TableLoader('IsIdentifiedByCAS');      my $loadIsIdentifiedByCAS = $self->_TableLoader('IsIdentifiedByCAS');
1488      my $loadHasCompoundName = $self->_TableLoader('HasCompoundName');      my $loadHasCompoundName = $self->_TableLoader('HasCompoundName');
1489        my $loadScenario = $self->_TableLoader('Scenario');
1490        my $loadHasScenario = $self->_TableLoader('HasScenario');
1491        my $loadIsInputFor = $self->_TableLoader('IsInputFor');
1492        my $loadIsOutputOf = $self->_TableLoader('IsOutputOf');
1493        my $loadIsOnDiagram = $self->_TableLoader('IsOnDiagram');
1494        my $loadIncludesReaction = $self->_TableLoader('IncludesReaction');
1495        my $loadExcludesReaction = $self->_TableLoader('ExcludesReaction');
1496        my $loadCatalyzes = $self->_TableLoader('Catalyzes');
1497      if ($self->{options}->{loadOnly}) {      if ($self->{options}->{loadOnly}) {
1498          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
1499      } else {      } else {
# Line 1324  Line 1502 
1502          my %compoundNames = ();          my %compoundNames = ();
1503          my %compoundCASes = ();          my %compoundCASes = ();
1504          # First we create the compounds.          # First we create the compounds.
1505          my @compounds = $fig->all_compounds();          my %compounds = map { $_ => 1 } $fig->all_compounds();
1506          for my $cid (@compounds) {          for my $cid (keys %compounds) {
1507              # Check for names.              # Check for names.
1508              my @names = $fig->names_of_compound($cid);              my @names = $fig->names_of_compound($cid);
1509              # Each name will be given a priority number, starting with 1.              # Each name will be given a priority number, starting with 1.
# Line 1355  Line 1533 
1533          # we initialize the discriminator index. This is a single integer used to insure          # we initialize the discriminator index. This is a single integer used to insure
1534          # duplicate elements in a reaction are not accidentally collapsed.          # duplicate elements in a reaction are not accidentally collapsed.
1535          my $discrim = 0;          my $discrim = 0;
1536          my @reactions = $fig->all_reactions();          my %reactions = map { $_ => 1 } $fig->all_reactions();
1537          for my $reactionID (@reactions) {          for my $reactionID (keys %reactions) {
1538              # Create the reaction record.              # Create the reaction record.
1539              $loadReaction->Put($reactionID, $fig->reversible($reactionID));              $loadReaction->Put($reactionID, $fig->reversible($reactionID));
1540              # Compute the reaction's URL.              # Compute the reaction's URL.
# Line 1379  Line 1557 
1557                  }                  }
1558              }              }
1559          }          }
1560            # Now we run through the subsystems and roles, generating the scenarios
1561            # and connecting the reactions. We'll need some hashes to prevent
1562            # duplicates and a counter for compound group keys.
1563            my %roles = ();
1564            my %scenarios = ();
1565            my @subsystems = $fig->all_subsystems();
1566            for my $subName (@subsystems) {
1567                my $sub = $fig->get_subsystem($subName);
1568                Trace("Processing $subName reactions.") if T(3);
1569                # Get the subsystem's reactions.
1570                my %reactions = $sub->get_hope_reactions();
1571                # Loop through the roles, connecting them to the reactions.
1572                for my $role (keys %reactions) {
1573                    # Only process this role if it is new.
1574                    if (! $roles{$role}) {
1575                        $roles{$role} = 1;
1576                        my @reactions = @{$reactions{$role}};
1577                        for my $reaction (@reactions) {
1578                            $loadCatalyzes->Put($role, $reaction);
1579                        }
1580                    }
1581                }
1582                Trace("Processing $subName scenarios.") if T(3);
1583                # Get the subsystem's scenarios.
1584                my @scenarioNames = $sub->get_hope_scenario_names();
1585                # Loop through the scenarios, creating scenario data.
1586                for my $scenarioName (@scenarioNames) {
1587                    # Link this scenario to this subsystem.
1588                    $loadHasScenario->Put($subName, $scenarioName);
1589                    # If this scenario is new, we need to create it.
1590                    if (! $scenarios{$scenarioName}) {
1591                        Trace("Creating scenario $scenarioName.") if T(3);
1592                        $scenarios{$scenarioName} = 1;
1593                        # Create the scenario itself.
1594                        $loadScenario->Put($scenarioName);
1595                        # Attach the input compounds.
1596                        for my $input ($sub->get_hope_input_compounds($scenarioName)) {
1597                            $loadIsInputFor->Put($input, $scenarioName);
1598                        }
1599                        # Now we need to set up the output compounds. They come in two
1600                        # groups, which we mark 0 and 1.
1601                        my $outputGroup = 0;
1602                        # Set up the output compounds.
1603                        for my $outputGroup ($sub->get_hope_output_compounds($scenarioName)) {
1604                            # Attach the compounds.
1605                            for my $compound (@$outputGroup) {
1606                                $loadIsOutputOf->Put($scenarioName, $compound, $outputGroup);
1607                            }
1608                        }
1609                        # Create the reaction lists.
1610                        my @addReactions = $sub->get_hope_additional_reactions($scenarioName);
1611                        for my $reaction (@addReactions) {
1612                            $loadIncludesReaction->Put($scenarioName, $reaction);
1613                        }
1614                        my @notReactions = $sub->get_hope_ignore_reactions($scenarioName);
1615                        for my $reaction (@notReactions) {
1616                            $loadExcludesReaction->Put($scenarioName, $reaction);
1617                        }
1618                        # Link the maps.
1619                        my @maps = $sub->get_hope_map_ids($scenarioName);
1620                        for my $map (@maps) {
1621                            $loadIsOnDiagram->Put($scenarioName, "map$map");
1622                        }
1623                    }
1624                }
1625            }
1626      }      }
1627      # Finish the load.      # Finish the load.
1628      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();

Legend:
Removed from v.1.90  
changed lines
  Added in v.1.94

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3