[Bio] / Sprout / SproutLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/SproutLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.92, Sun Mar 23 16:33:15 2008 UTC revision 1.93, Tue Sep 9 21:02:10 2008 UTC
# Line 13  Line 13 
13      use BasicLocation;      use BasicLocation;
14      use HTML;      use HTML;
15      use AliasAnalysis;      use AliasAnalysis;
16        use BioWords;
17    
18  =head1 Sprout Load Methods  =head1 Sprout Load Methods
19    
# Line 354  Line 355 
355      FeatureIEDB      FeatureIEDB
356      CDD      CDD
357      IsPresentOnProteinOf      IsPresentOnProteinOf
358        CellLocation
359        IsPossiblePlaceFor
360        ExternalDatabase
361        IsAlsoFoundIn
362        Keyword
363    
364  =over 4  =over 4
365    
# Line 388  Line 394 
394      my $loadFeatureIEDB = $self->_TableLoader('FeatureIEDB');      my $loadFeatureIEDB = $self->_TableLoader('FeatureIEDB');
395      my $loadCDD = $self->_TableLoader('CDD');      my $loadCDD = $self->_TableLoader('CDD');
396      my $loadIsPresentOnProteinOf = $self->_TableLoader('IsPresentOnProteinOf');      my $loadIsPresentOnProteinOf = $self->_TableLoader('IsPresentOnProteinOf');
397        my $loadCellLocation = $self->_TableLoader('CellLocation');
398        my $loadIsPossiblePlaceFor = $self->_TableLoader('IsPossiblePlaceFor');
399        my $loadIsAlsoFoundIn = $self->_TableLoader('IsAlsoFoundIn');
400        my $loadExternalDatabase = $self->_TableLoader('ExternalDatabase');
401        my $loadKeyword = $self->_TableLoader('Keyword');
402      # Get the subsystem hash.      # Get the subsystem hash.
403      my $subHash = $self->{subsystems};      my $subHash = $self->{subsystems};
404      # Get the property keys.      # Get the property keys.
405      my $propKeys = $self->{propKeys};      my $propKeys = $self->{propKeys};
406      # Create a hashes to hold CDD and alias values.      # Create a hashes to hold CDD, Cell Location (PSORT), External Database, and alias values.
407      my %CDD = ();      my %CDD = ();
408      my %alias = ();      my %alias = ();
409        my %cellLocation = ();
410        my %xdb = ();
411        # Create the bio-words object.
412        my $biowords = BioWords->new(cache => 0);
413        # One of the things we have to do here is build the keyword table, and the keyword
414        # table needs to contain the originating text and feature count for each stem. Unfortunately,
415        # the number of distinct keywords is so large it causes PERL to hang if we try to
416        # keep them in memory. As a result, we need to track them using disk files.
417        # Our approach will be to use two sequential files. One will contain stems and phonexes.
418        # Each time a stem occurs in a feature, a record will be written to that file. The stem
419        # file can then be sorted and collated to determine the number of features for each
420        # stem. A separate file will contain keywords and stems. This last file
421        # will be subjected to a sort unique on stem/keyword. The file is then merged
422        # with the stem file to create the keyword table relation (keyword, stem, phonex, count).
423        my $stemFileName = "$FIG_Config::temp/stems$$.tbl";
424        my $keyFileName = "$FIG_Config::temp/keys$$.tbl";
425        my $stemh = Open(undef, "| sort -T\"$FIG_Config::temp\" -t\"\t\" -k1,1 >$stemFileName");
426        my $keyh = Open(undef, "| sort -T\"$FIG_Config::temp\" -t\"\t\" -u -k1,1 -k2,2 >$keyFileName");
427      # Get the maximum sequence size. We need this later for splitting up the      # Get the maximum sequence size. We need this later for splitting up the
428      # locations.      # locations.
429      my $chunkSize = $self->{sprout}->MaxSegment();      my $chunkSize = $self->{sprout}->MaxSegment();
# Line 457  Line 486 
486                              $alias{$alias} = 1;                              $alias{$alias} = 1;
487                          }                          }
488                      }                      }
489                      # Add the corresponding IDs. Note we have to remove the FIG ID from the                      # Add the corresponding IDs. We ask for 2-tuples of the form (id, database).
490                      # return list. It's already among the keywords.                      my @corresponders = $fig->get_corresponding_ids($featureID, 1);
491                      my @corresponders = grep { $_ !~ /^fig/} $fig->get_corresponding_ids($featureID);                      for my $tuple (@corresponders) {
492                      push @keywords, @corresponders;                          my ($id, $xdb) = @{$tuple};
493                            # Ignore SEED: that's us.
494                            if ($xdb ne 'SEED') {
495                                # Connect this ID to the feature.
496                                $loadIsAlsoFoundIn->Put($featureID, $xdb, $id);
497                                # Add it as a keyword.
498                                push @keywords, $id;
499                                # If this is a new database, create a record for it.
500                                if (! exists $xdb{$xdb}) {
501                                    $xdb{$xdb} = 1;
502                                    $loadExternalDatabase->Put($xdb);
503                                }
504                            }
505                        }
506                      Trace("Assignment for $featureID is: $assignment") if T(4);                      Trace("Assignment for $featureID is: $assignment") if T(4);
507                      # Break the assignment into words and shove it onto the                      # Break the assignment into words and shove it onto the
508                      # keyword list.                      # keyword list.
# Line 536  Line 578 
578                          push @keywords, 'iedb';                          push @keywords, 'iedb';
579                          $loadFeature->Add('iedb');                          $loadFeature->Add('iedb');
580                      }                      }
581                      # Now we have some other attributes we need to process. Currently,                      # Now we have some other attributes we need to process. To get
582                      # this is CDD and CELLO, but we expect the number to increase.                      # through them, we convert the attribute list for this feature
583                        # into a two-layer hash: key => subkey => value.
584                      my %attributeHash = ();                      my %attributeHash = ();
585                      for my $attrRow (@{$attributes->{$featureID}}) {                      for my $attrRow (@{$attributes->{$featureID}}) {
586                          my (undef, $key, @values) = @{$attrRow};                          my (undef, $key, @values) = @{$attrRow};
587                          $key =~ /^([^:]+)::(.+)/;                          my ($realKey, $subKey);
588                            if ($key =~ /^([^:]+)::(.+)/) {
589                                ($realKey, $subKey) = ($1, $2);
590                            } else {
591                                ($realKey, $subKey) = ($key, "");
592                            }
593                          if (exists $attributeHash{$1}) {                          if (exists $attributeHash{$1}) {
594                              $attributeHash{$1}->{$2} = \@values;                              $attributeHash{$1}->{$2} = \@values;
595                          } else {                          } else {
596                              $attributeHash{$1} = {$2 => \@values};                              $attributeHash{$1} = {$2 => \@values};
597                          }                          }
598                      }                      }
599                      my $celloValue = "unknown";                      # First we handle CDD. This is a bit complicated, because
                     # Pull in the CELLO attribute. There will never be more than one.  
                     # If we have one, it's a feature attribute AND a keyword.  
                     my @celloData = keys %{$attributeHash{CELLO}};  
                     if (@celloData) {  
                         $celloValue = $celloData[0];  
                         push @keywords, $celloValue;  
                     }  
                     # Now we handle CDD. This is a bit more complicated, because  
600                      # there are multiple CDDs per protein.                      # there are multiple CDDs per protein.
601                      if (exists $attributeHash{CDD}) {                      if (exists $attributeHash{CDD}) {
602                          # Get the hash of CDD IDs to scores for this feature. We                          # Get the hash of CDD IDs to scores for this feature. We
# Line 565  Line 605 
605                          my @cddData = sort keys %{$cddHash};                          my @cddData = sort keys %{$cddHash};
606                          for my $cdd (@cddData) {                          for my $cdd (@cddData) {
607                              # Extract the score for this CDD and decode it.                              # Extract the score for this CDD and decode it.
608                              my ($codeScore) = split(/\s*,\s*/, $cddHash->{$cdd}->[1]);                              my ($codeScore) = split(/\s*[,;]\s*/, $cddHash->{$cdd}->[0]);
609                              my $realScore = FIGRules::DecodeScore($codeScore);                              my $realScore = FIGRules::DecodeScore($codeScore);
610                              # We can't afford to crash because of a bad attribute                              # We can't afford to crash because of a bad attribute
611                              # value, hence the IF below.                              # value, hence the IF below.
612                              if (! defined($realScore)) {                              if (! defined($realScore)) {
613                                  # Bad score, so count it.                                  # Bad score, so count it.
614                                  $loadFeature->Add('badCDDscore');                                  $loadFeature->Add('badCDDscore');
615                                    Trace("CDD score \"$codeScore\" for feature $featureID invalid.") if T(3);
616                              } else {                              } else {
617                                  # Create the connection.                                  # Create the connection.
618                                  $loadIsPresentOnProteinOf->Put($cdd, $featureID, $realScore);                                  $loadIsPresentOnProteinOf->Put($cdd, $featureID, $realScore);
# Line 583  Line 624 
624                              }                              }
625                          }                          }
626                      }                      }
627                      # Now we need to bust up hyphenated words in the keyword                      # Next we do PSORT cell locations. here the confidence value
628                      # list. We keep them separate and put them at the end so                      # could have the value "unknown", which we translate to -1.
629                      # the original word order is available.                      if (exists $attributeHash{PSORT}) {
630                      my $keywordString = "";                          # This will be a hash of cell locations to confidence
631                      my $bustedString = "";                          # factors.
632                      for my $keyword (@keywords) {                          my $psortHash = $attributeHash{PSORT};
633                          if (length $keyword >= 3) {                          for my $psort (keys %{$psortHash}) {
634                              $keywordString .= " $keyword";                              # Get the confidence, and convert it to a number if necessary.
635                              if ($keyword =~ /-/) {                              my $confidence = $psortHash->{$psort};
636                                  my @words = split /-/, $keyword;                              if ($confidence eq 'unknown') {
637                                  $bustedString .= join(" ", "", @words);                                  $confidence = -1;
638                                }
639                                $loadIsPossiblePlaceFor->Put($psort, $featureID, $confidence);
640                                # If this cell location does not yet exist, create its record.
641                                if (! exists $cellLocation{$psort}) {
642                                    $cellLocation{$psort} = 1;
643                                    $loadCellLocation->Put($psort);
644                                }
645                                # If this is a significant location, add it as a keyword.
646                                if ($confidence > 2.5) {
647                                    push @keywords, $psort;
648                              }                              }
649                          }                          }
650                      }                      }
651                      $keywordString .= $bustedString;                      # Phobius data is next. This consists of the signal peptide location and
652                        # the transmembrane locations.
653                        my $signalList = "";
654                        my $transList = "";
655                        if (exists $attributeHash{Phobius}) {
656                            # This will be a hash of two keys (transmembrane and signal) to
657                            # location strings. If there's no value, we stuff in an empty string.
658                            $signalList = ($attributeHash{Phobius}->{signal} || "");
659                            $transList = ($attributeHash{Phobius}->{transmembrane} || "");
660                        }
661                        # Here are some more numbers: isoelectric point, molecular weight, and
662                        # the similar-to-human flag.
663                        my $isoelectric = 0;
664                        if (exists $attributeHash{isoelectric_point}) {
665                            $isoelectric = $attributeHash{isoelectric_point}->{""};
666                        }
667                        my $similarToHuman = 0;
668                        if (exists $attributeHash{similar_to_human} && $attributeHash{similar_to_human}->{""} eq 'yes') {
669                            $similarToHuman = 1;
670                        }
671                        my $molecularWeight = 0;
672                        if (exists $attributeHash{molecular_weight}) {
673                            $molecularWeight = $attributeHash{molecular_weight}->{""};
674                        }
675                        # Create the keyword string.
676                        my $keywordString = join(" ", @keywords);
677                        Trace("Real keyword string for $featureID: $keywordString.") if T(4);
678                      # Get rid of annoying punctuation.                      # Get rid of annoying punctuation.
679                      $keywordString =~ s/[();]//g;                      $keywordString =~ s/[();@#\/]/ /g;
680                      # Clean the keyword list.                      # Get the list of keywords in the keyword string.
681                      my $cleanWords = $sprout->CleanKeywords($keywordString);                      my @realKeywords = grep { $biowords->IsWord($_) } $biowords->Split($keywordString);
682                        # We need to do two things here: create the keyword string for the feature table
683                        # and write records to the keyword and stem files. The stuff we write to
684                        # the files will be taken from the following two hashes. The stuff used
685                        # to create the keyword string will be taken from the list.
686                        my (%keys, %stems, @realStems);
687                        for my $keyword (@realKeywords) {
688                            # Compute the stem and phonex for this keyword.
689                            my ($stem, $phonex) = $biowords->StemLookup($keyword);
690                            # Only proceed if a stem comes back. If no stem came back, it's a
691                            # stop word and we throw it away.
692                            if ($stem) {
693                                $keys{$keyword} = $stem;
694                                $stems{$stem} = $phonex;
695                                push @realStems, $stem;
696                            }
697                        }
698                        # Now create the keyword string.
699                        my $cleanWords = join(" ", @realStems);
700                      Trace("Keyword string for $featureID: $cleanWords") if T(4);                      Trace("Keyword string for $featureID: $cleanWords") if T(4);
701                        # Write the stem and keyword records.
702                        for my $stem (keys %stems) {
703                            Tracer::PutLine($stemh, [$stem, $stems{$stem}]);
704                        }
705                        for my $key (keys %keys) {
706                            # The stem goes first in this file, because we want to sort
707                            # by stem and then keyword.
708                            Tracer::PutLine($keyh, [$keys{$key}, $key]);
709                        }
710                      # Now we need to process the feature's locations. First, we split them up.                      # Now we need to process the feature's locations. First, we split them up.
711                      my @locationList = split /\s*,\s*/, $locations;                      my @locationList = split /\s*,\s*/, $locations;
712                      # Next, we convert them to Sprout location objects.                      # Next, we convert them to Sprout location objects.
713                      my @locObjectList = map { BasicLocation->new("$genomeID:$_") } @locationList;                      my @locObjectList = map { BasicLocation->new("$genomeID:$_") } @locationList;
714                      # Assemble them into a sprout location string for later.                      # Assemble them into a sprout location string for later.
715                      my $locationString = join(", ", map { $_->String } @locObjectList);                      my $locationString = join(", ", map { $_->String } @locObjectList);
716                        # We'll store the sequence length in here.
717                        my $sequenceLength = 0;
718                      # This part is the roughest. We need to relate the features to contig                      # This part is the roughest. We need to relate the features to contig
719                      # locations, and the locations must be split so that none of them exceed                      # locations, and the locations must be split so that none of them exceed
720                      # the maximum segment size. This simplifies the genes_in_region processing                      # the maximum segment size. This simplifies the genes_in_region processing
# Line 616  Line 722 
722                      my $i = 1;                      my $i = 1;
723                      # Loop through the locations.                      # Loop through the locations.
724                      for my $locObject (@locObjectList) {                      for my $locObject (@locObjectList) {
725                            # Record the length.
726                            $sequenceLength += $locObject->Length;
727                          # Split this location into a list of chunks.                          # Split this location into a list of chunks.
728                          my @locOList = ();                          my @locOList = ();
729                          while (my $peeling = $locObject->Peel($chunkSize)) {                          while (my $peeling = $locObject->Peel($chunkSize)) {
# Line 631  Line 739 
739                              $i++;                              $i++;
740                          }                          }
741                      }                      }
742                      # Finally, reassemble the location objects into a list of Sprout location strings.                      # Now we get some ancillary flags.
743                        my $locked = $fig->is_locked_fid($featureID);
744                        my $in_genbank = $fig->peg_in_gendb($featureID);
745                      # Create the feature record.                      # Create the feature record.
746                      $loadFeature->Put($featureID, 1, $user, $quality, $celloValue, $type, $assignment, $cleanWords, $locationString);                      $loadFeature->Put($featureID, 1, $user, $quality, $type, $in_genbank, $isoelectric, $locked, $molecularWeight,
747                                          $sequenceLength, $signalList, $similarToHuman, $assignment, $cleanWords, $locationString,
748                                          $transList);
749                  }                  }
750              }              }
751              Trace("Genome $genomeID processed.") if T(3);              Trace("Genome $genomeID processed.") if T(3);
752          }          }
753      }      }
754        Trace("Sorting keywords.") if T(2);
755        # Now we need to load the keyword table from the key and stem files.
756        close $keyh;
757        close $stemh;
758        Trace("Loading keywords.") if T(2);
759        $keyh = Open(undef, "<$keyFileName");
760        $stemh = Open(undef, "<$stemFileName");
761        # We'll count the keywords in here, for tracing purposes.
762        my $count = 0;
763        # These variables track the current stem's data. When an incoming
764        # keyword's stem changes, these will be recomputed.
765        my ($currentStem, $currentPhonex, $currentCount);
766        # Prime the loop by reading the first stem in the stem file.
767        my ($nextStem, $nextPhonex) = Tracer::GetLine($stemh);
768        # Loop through the keyword file.
769        while (! eof $keyh) {
770            # Read this keyword.
771            my ($thisStem, $thisKey) = Tracer::GetLine($keyh);
772            # Check to see if it's the new stem yet.
773            if ($thisStem ne $currentStem) {
774                # Yes. It's a terrible error if it's not also the next stem.
775                if ($thisStem ne $nextStem) {
776                    Confess("Error in stem file. Expected \"$nextStem\", but found \"$thisStem\".");
777                } else {
778                    # Here we're okay.
779                    ($currentStem, $currentPhonex) = ($nextStem, $nextPhonex);
780                    # Count the number of features for this stem.
781                    $currentCount = 0;
782                    while ($nextStem eq $thisStem) {
783                        ($nextStem, $nextPhonex) = Tracer::GetLine($stemh);
784                        $currentCount++;
785                    }
786                }
787            }
788            # Now $currentStem is the same as $thisStem, and the other $current-vars
789            # contain the stem's data (phonex and count).
790            $loadKeyword->Put($thisKey, $currentCount, $currentPhonex, $currentStem);
791            if (++$count % 1000 == 0 && T(3)) {
792                Trace("$count keywords loaded.");
793            }
794        }
795        Trace("$count keywords loaded into keyword table.") if T(2);
796      # Finish the loads.      # Finish the loads.
797      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
798      return $retVal;      return $retVal;
# Line 676  Line 830 
830      ConsistsOfGenomes      ConsistsOfGenomes
831      GenomeSubset      GenomeSubset
832      HasGenomeSubset      HasGenomeSubset
     Catalyzes  
833      Diagram      Diagram
834      RoleOccursIn      RoleOccursIn
835        SubsystemHopeNotes
836    
837  =over 4  =over 4
838    
# Line 725  Line 879 
879      my $loadHasRoleSubset = $self->_TableLoader('HasRoleSubset');      my $loadHasRoleSubset = $self->_TableLoader('HasRoleSubset');
880      my $loadHasGenomeSubset = $self->_TableLoader('HasGenomeSubset');      my $loadHasGenomeSubset = $self->_TableLoader('HasGenomeSubset');
881      my $loadSubsystemClass = $self->_TableLoader('SubsystemClass');      my $loadSubsystemClass = $self->_TableLoader('SubsystemClass');
882        my $loadSubsystemHopeNotes = $self->_TableLoader('SubsystemHopeNotes');
883      if ($self->{options}->{loadOnly}) {      if ($self->{options}->{loadOnly}) {
884          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
885      } else {      } else {
# Line 749  Line 904 
904                  # Create the subsystem record.                  # Create the subsystem record.
905                  my $curator = $sub->get_curator();                  my $curator = $sub->get_curator();
906                  my $notes = $sub->get_notes();                  my $notes = $sub->get_notes();
907                    my $version = $sub->get_version();
908                  my $description = $sub->get_description();                  my $description = $sub->get_description();
909                  $loadSubsystem->Put($subsysID, $curator, $description, $notes);                  $loadSubsystem->Put($subsysID, $curator, $version, $description, $notes);
910                    # Add the hope notes.
911                    my $hopeNotes = $sub->get_hope_curation_notes();
912                    if ($hopeNotes) {
913                        $loadSubsystemHopeNotes->Put($sub, $hopeNotes);
914                    }
915                  # Now for the classification string. This comes back as a list                  # Now for the classification string. This comes back as a list
916                  # reference and we convert it to a space-delimited string.                  # reference and we convert it to a space-delimited string.
917                  my $classList = $fig->subsystem_classification($subsysID);                  my $classList = $fig->subsystem_classification($subsysID);
# Line 760  Line 921 
921                  for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) {                  for (my $col = 0; defined($roleID = $sub->get_role($col)); $col++) {
922                      # Get the role's abbreviation.                      # Get the role's abbreviation.
923                      my $abbr = $sub->get_role_abbr($col);                      my $abbr = $sub->get_role_abbr($col);
924                        # Get its essentiality.
925                        my $aux = $fig->is_aux_role_in_subsystem($subsysID, $roleID);
926                        # Get its reaction note.
927                        my $hope_note = $sub->get_hope_reaction_notes($roleID) || "";
928                      # Connect to this role.                      # Connect to this role.
929                      $loadOccursInSubsystem->Add("roleIn");                      $loadOccursInSubsystem->Add("roleIn");
930                      $loadOccursInSubsystem->Put($roleID, $subsysID, $abbr, $col);                      $loadOccursInSubsystem->Put($roleID, $subsysID, $abbr, $aux, $col, $hope_note);
931                      # If it's a new role, add it to the role table.                      # If it's a new role, add it to the role table.
932                      if (! exists $roleData{$roleID}) {                      if (! exists $roleData{$roleID}) {
933                          # Get the role's abbreviation.                          # Get the role's abbreviation.
# Line 906  Line 1071 
1071                  }                  }
1072              }              }
1073          }          }
         # Before we leave, we must create the Catalyzes table. We start with the reactions,  
         # then use the "ecToRoles" table to convert EC numbers to role IDs.  
         my @reactions = $fig->all_reactions();  
         for my $reactionID (@reactions) {  
             # Get this reaction's list of roles. The results will be EC numbers.  
             my @ecs = $fig->catalyzed_by($reactionID);  
             # Loop through the roles, creating catalyzation records.  
             for my $thisEC (@ecs) {  
                 if (exists $ecToRoles{$thisEC}) {  
                     for my $thisRole (@{$ecToRoles{$thisEC}}) {  
                         $loadCatalyzes->Put($thisRole, $reactionID);  
                     }  
                 }  
             }  
         }  
1074      }      }
1075      # Finish the load.      # Finish the load.
1076      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();
# Line 1298  Line 1448 
1448      IsIdentifiedByCAS      IsIdentifiedByCAS
1449      HasCompoundName      HasCompoundName
1450      IsAComponentOf      IsAComponentOf
1451        Scenario
1452        Catalyzes
1453        HasScenario
1454        IsInputFor
1455        IsOutputOf
1456        ExcludesReaction
1457        IncludesReaction
1458        IsOnDiagram
1459        IncludesReaction
1460    
1461  This method proceeds reaction by reaction rather than genome by genome.  This method proceeds reaction by reaction rather than genome by genome.
1462    
# Line 1325  Line 1484 
1484      my $loadIsAComponentOf = $self->_TableLoader('IsAComponentOf');      my $loadIsAComponentOf = $self->_TableLoader('IsAComponentOf');
1485      my $loadIsIdentifiedByCAS = $self->_TableLoader('IsIdentifiedByCAS');      my $loadIsIdentifiedByCAS = $self->_TableLoader('IsIdentifiedByCAS');
1486      my $loadHasCompoundName = $self->_TableLoader('HasCompoundName');      my $loadHasCompoundName = $self->_TableLoader('HasCompoundName');
1487        my $loadScenario = $self->_TableLoader('Scenario');
1488        my $loadHasScenario = $self->_TableLoader('HasScenario');
1489        my $loadIsInputFor = $self->_TableLoader('IsInputFor');
1490        my $loadIsOutputOf = $self->_TableLoader('IsOutputOf');
1491        my $loadIsOnDiagram = $self->_TableLoader('IsOnDiagram');
1492        my $loadIncludesReaction = $self->_TableLoader('IncludesReaction');
1493        my $loadExcludesReaction = $self->_TableLoader('ExcludesReaction');
1494        my $loadCatalyzes = $self->_TableLoader('Catalyzes');
1495      if ($self->{options}->{loadOnly}) {      if ($self->{options}->{loadOnly}) {
1496          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
1497      } else {      } else {
# Line 1333  Line 1500 
1500          my %compoundNames = ();          my %compoundNames = ();
1501          my %compoundCASes = ();          my %compoundCASes = ();
1502          # First we create the compounds.          # First we create the compounds.
1503          my @compounds = $fig->all_compounds();          my %compounds = map { $_ => 1 } $fig->all_compounds();
1504          for my $cid (@compounds) {          for my $cid (keys %compounds) {
1505              # Check for names.              # Check for names.
1506              my @names = $fig->names_of_compound($cid);              my @names = $fig->names_of_compound($cid);
1507              # Each name will be given a priority number, starting with 1.              # Each name will be given a priority number, starting with 1.
# Line 1364  Line 1531 
1531          # we initialize the discriminator index. This is a single integer used to insure          # we initialize the discriminator index. This is a single integer used to insure
1532          # duplicate elements in a reaction are not accidentally collapsed.          # duplicate elements in a reaction are not accidentally collapsed.
1533          my $discrim = 0;          my $discrim = 0;
1534          my @reactions = $fig->all_reactions();          my %reactions = map { $_ => 1 } $fig->all_reactions();
1535          for my $reactionID (@reactions) {          for my $reactionID (keys %reactions) {
1536              # Create the reaction record.              # Create the reaction record.
1537              $loadReaction->Put($reactionID, $fig->reversible($reactionID));              $loadReaction->Put($reactionID, $fig->reversible($reactionID));
1538              # Compute the reaction's URL.              # Compute the reaction's URL.
# Line 1388  Line 1555 
1555                  }                  }
1556              }              }
1557          }          }
1558            # Now we run through the subsystems and roles, generating the scenarios
1559            # and connecting the reactions. We'll need some hashes to prevent
1560            # duplicates and a counter for compound group keys.
1561            my %roles = ();
1562            my %scenarios = ();
1563            my @subsystems = $fig->all_subsystems();
1564            for my $subName (@subsystems) {
1565                my $sub = $fig->get_subsystem($subName);
1566                Trace("Processing $subName reactions.") if T(3);
1567                # Get the subsystem's reactions.
1568                my %reactions = $sub->get_hope_reactions();
1569                # Loop through the roles, connecting them to the reactions.
1570                for my $role (keys %reactions) {
1571                    # Only process this role if it is new.
1572                    if (! $roles{$role}) {
1573                        $roles{$role} = 1;
1574                        my @reactions = @{$reactions{$role}};
1575                        for my $reaction (@reactions) {
1576                            $loadCatalyzes->Put($role, $reaction);
1577                        }
1578                    }
1579                }
1580                Trace("Processing $subName scenarios.") if T(3);
1581                # Get the subsystem's scenarios.
1582                my @scenarioNames = $sub->get_hope_scenario_names();
1583                # Loop through the scenarios, creating scenario data.
1584                for my $scenarioName (@scenarioNames) {
1585                    # Link this scenario to this subsystem.
1586                    $loadHasScenario->Put($subName, $scenarioName);
1587                    # If this scenario is new, we need to create it.
1588                    if (! $scenarios{$scenarioName}) {
1589                        Trace("Creating scenario $scenarioName.") if T(3);
1590                        $scenarios{$scenarioName} = 1;
1591                        # Create the scenario itself.
1592                        $loadScenario->Put($scenarioName);
1593                        # Attach the input compounds.
1594                        for my $input ($sub->get_hope_input_compounds($scenarioName)) {
1595                            $loadIsInputFor->Put($input, $scenarioName);
1596                        }
1597                        # Now we need to set up the output compounds. They come in two
1598                        # groups, which we mark 0 and 1.
1599                        my $outputGroup = 0;
1600                        # Set up the output compounds.
1601                        for my $outputGroup ($sub->get_hope_output_compounds($scenarioName)) {
1602                            # Attach the compounds.
1603                            for my $compound (@$outputGroup) {
1604                                $loadIsOutputOf->Put($scenarioName, $compound, $outputGroup);
1605                            }
1606                        }
1607                        # Create the reaction lists.
1608                        my @addReactions = $sub->get_hope_additional_reactions($scenarioName);
1609                        for my $reaction (@addReactions) {
1610                            $loadIncludesReaction->Put($scenarioName, $reaction);
1611                        }
1612                        my @notReactions = $sub->get_hope_ignore_reactions($scenarioName);
1613                        for my $reaction (@notReactions) {
1614                            $loadExcludesReaction->Put($scenarioName, $reaction);
1615                        }
1616                        # Link the maps.
1617                        my @maps = $sub->get_hope_map_ids($scenarioName);
1618                        for my $map (@maps) {
1619                            $loadIsOnDiagram->Put($scenarioName, "map$map");
1620                        }
1621                    }
1622                }
1623            }
1624      }      }
1625      # Finish the load.      # Finish the load.
1626      my $retVal = $self->_FinishAll();      my $retVal = $self->_FinishAll();

Legend:
Removed from v.1.92  
changed lines
  Added in v.1.93

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3