[Bio] / Sprout / SproutLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/SproutLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.89, Mon Nov 5 23:43:57 2007 UTC revision 1.92, Sun Mar 23 16:33:15 2008 UTC
# Line 52  Line 52 
52    
53  =head3 new  =head3 new
54    
55  C<< my $spl = SproutLoad->new($sprout, $fig, $genomeFile, $subsysFile, $options); >>      my $spl = SproutLoad->new($sprout, $fig, $genomeFile, $subsysFile, $options);
56    
57  Construct a new Sprout Loader object, specifying the two participating databases and  Construct a new Sprout Loader object, specifying the two participating databases and
58  the name of the files containing the list of genomes and subsystems to use.  the name of the files containing the list of genomes and subsystems to use.
# Line 170  Line 170 
170          for my $subsystem (keys %subsystems) {          for my $subsystem (keys %subsystems) {
171              my $name = $subsystem;              my $name = $subsystem;
172              $name =~ s/_/ /g;              $name =~ s/_/ /g;
 #            my $classes = $fig->subsystem_classification($subsystem);  
 #            $name .= " " . join(" ", @{$classes});  
173              $subsystems{$subsystem} = $name;              $subsystems{$subsystem} = $name;
174          }          }
175      }      }
# Line 198  Line 196 
196    
197  =head3 LoadOnly  =head3 LoadOnly
198    
199  C<< my $flag = $spl->LoadOnly; >>      my $flag = $spl->LoadOnly;
200    
201  Return TRUE if we are in load-only mode, else FALSE.  Return TRUE if we are in load-only mode, else FALSE.
202    
# Line 212  Line 210 
210    
211  =head3 LoadGenomeData  =head3 LoadGenomeData
212    
213  C<< my $stats = $spl->LoadGenomeData(); >>      my $stats = $spl->LoadGenomeData();
214    
215  Load the Genome, Contig, and Sequence data from FIG into Sprout.  Load the Genome, Contig, and Sequence data from FIG into Sprout.
216    
# Line 257  Line 255 
255          Trace("Loading from existing files.") if T(2);          Trace("Loading from existing files.") if T(2);
256      } else {      } else {
257          Trace("Generating genome data.") if T(2);          Trace("Generating genome data.") if T(2);
258            # Get the full info for the FIG genomes.
259            my %genomeInfo = map { $_->[0] => { gname => $_->[1], szdna => $_->[2], maindomain => $_->[3],
260                                                pegs => $_->[4], rnas => $_->[5], complete => $_->[6] } } @{$fig->genome_info()};
261          # Now we loop through the genomes, generating the data for each one.          # Now we loop through the genomes, generating the data for each one.
262          for my $genomeID (sort keys %{$genomeHash}) {          for my $genomeID (sort keys %{$genomeHash}) {
263              Trace("Generating data for genome $genomeID.") if T(3);              Trace("Generating data for genome $genomeID.") if T(3);
# Line 286  Line 287 
287                  $group = $FIG_Config::otherGroup;                  $group = $FIG_Config::otherGroup;
288              }              }
289              close TMP;              close TMP;
290                # Get the contigs.
291                my @contigs = $fig->all_contigs($genomeID);
292                # Get this genome's info array.
293                my $info = $genomeInfo{$genomeID};
294              # Output the genome record.              # Output the genome record.
295              $loadGenome->Put($genomeID, $accessCode, $fig->is_complete($genomeID),              $loadGenome->Put($genomeID, $accessCode, $info->{complete}, scalar(@contigs),
296                               $dnaSize, $genus, $group, $species, $extra, $version, $taxonomy);                               $dnaSize, $genus, $info->{pegs}, $group, $info->{rnas}, $species, $extra, $version, $taxonomy);
297              # Now we loop through each of the genome's contigs.              # Now we loop through each of the genome's contigs.
             my @contigs = $fig->all_contigs($genomeID);  
298              for my $contigID (@contigs) {              for my $contigID (@contigs) {
299                  Trace("Processing contig $contigID for $genomeID.") if T(4);                  Trace("Processing contig $contigID for $genomeID.") if T(4);
300                  $loadContig->Add("contigIn");                  $loadContig->Add("contigIn");
# Line 328  Line 332 
332    
333  =head3 LoadFeatureData  =head3 LoadFeatureData
334    
335  C<< my $stats = $spl->LoadFeatureData(); >>      my $stats = $spl->LoadFeatureData();
336    
337  Load the feature data from FIG into Sprout.  Load the feature data from FIG into Sprout.
338    
# Line 453  Line 457 
457                              $alias{$alias} = 1;                              $alias{$alias} = 1;
458                          }                          }
459                      }                      }
460                        # Add the corresponding IDs. Note we have to remove the FIG ID from the
461                        # return list. It's already among the keywords.
462                        my @corresponders = grep { $_ !~ /^fig/} $fig->get_corresponding_ids($featureID);
463                        push @keywords, @corresponders;
464                      Trace("Assignment for $featureID is: $assignment") if T(4);                      Trace("Assignment for $featureID is: $assignment") if T(4);
465                      # Break the assignment into words and shove it onto the                      # Break the assignment into words and shove it onto the
466                      # keyword list.                      # keyword list.
# Line 599  Line 607 
607                      my @locationList = split /\s*,\s*/, $locations;                      my @locationList = split /\s*,\s*/, $locations;
608                      # Next, we convert them to Sprout location objects.                      # Next, we convert them to Sprout location objects.
609                      my @locObjectList = map { BasicLocation->new("$genomeID:$_") } @locationList;                      my @locObjectList = map { BasicLocation->new("$genomeID:$_") } @locationList;
610                        # Assemble them into a sprout location string for later.
611                        my $locationString = join(", ", map { $_->String } @locObjectList);
612                      # This part is the roughest. We need to relate the features to contig                      # This part is the roughest. We need to relate the features to contig
613                      # locations, and the locations must be split so that none of them exceed                      # locations, and the locations must be split so that none of them exceed
614                      # the maximum segment size. This simplifies the genes_in_region processing                      # the maximum segment size. This simplifies the genes_in_region processing
# Line 622  Line 632 
632                          }                          }
633                      }                      }
634                      # Finally, reassemble the location objects into a list of Sprout location strings.                      # Finally, reassemble the location objects into a list of Sprout location strings.
                     $locations = join(", ", map { $_->String } @locObjectList);  
635                      # Create the feature record.                      # Create the feature record.
636                      $loadFeature->Put($featureID, 1, $user, $quality, $celloValue, $type, $assignment, $cleanWords, $locations);                      $loadFeature->Put($featureID, 1, $user, $quality, $celloValue, $type, $assignment, $cleanWords, $locationString);
637                  }                  }
638              }              }
639              Trace("Genome $genomeID processed.") if T(3);              Trace("Genome $genomeID processed.") if T(3);
# Line 637  Line 646 
646    
647  =head3 LoadSubsystemData  =head3 LoadSubsystemData
648    
649  C<< my $stats = $spl->LoadSubsystemData(); >>      my $stats = $spl->LoadSubsystemData();
650    
651  Load the subsystem data from FIG into Sprout.  Load the subsystem data from FIG into Sprout.
652    
# Line 740  Line 749 
749                  # Create the subsystem record.                  # Create the subsystem record.
750                  my $curator = $sub->get_curator();                  my $curator = $sub->get_curator();
751                  my $notes = $sub->get_notes();                  my $notes = $sub->get_notes();
752                  $loadSubsystem->Put($subsysID, $curator, $notes);                  my $description = $sub->get_description();
753                    $loadSubsystem->Put($subsysID, $curator, $description, $notes);
754                  # Now for the classification string. This comes back as a list                  # Now for the classification string. This comes back as a list
755                  # reference and we convert it to a space-delimited string.                  # reference and we convert it to a space-delimited string.
756                  my $classList = $fig->subsystem_classification($subsysID);                  my $classList = $fig->subsystem_classification($subsysID);
# Line 919  Line 929 
929    
930  =head3 LoadPropertyData  =head3 LoadPropertyData
931    
932  C<< my $stats = $spl->LoadPropertyData(); >>      my $stats = $spl->LoadPropertyData();
933    
934  Load the attribute data from FIG into Sprout.  Load the attribute data from FIG into Sprout.
935    
# Line 1007  Line 1017 
1017    
1018  =head3 LoadAnnotationData  =head3 LoadAnnotationData
1019    
1020  C<< my $stats = $spl->LoadAnnotationData(); >>      my $stats = $spl->LoadAnnotationData();
1021    
1022  Load the annotation data from FIG into Sprout.  Load the annotation data from FIG into Sprout.
1023    
# Line 1114  Line 1124 
1124    
1125  =head3 LoadSourceData  =head3 LoadSourceData
1126    
1127  C<< my $stats = $spl->LoadSourceData(); >>      my $stats = $spl->LoadSourceData();
1128    
1129  Load the source data from FIG into Sprout.  Load the source data from FIG into Sprout.
1130    
# Line 1192  Line 1202 
1202    
1203  =head3 LoadExternalData  =head3 LoadExternalData
1204    
1205  C<< my $stats = $spl->LoadExternalData(); >>      my $stats = $spl->LoadExternalData();
1206    
1207  Load the external data from FIG into Sprout.  Load the external data from FIG into Sprout.
1208    
# Line 1272  Line 1282 
1282    
1283  =head3 LoadReactionData  =head3 LoadReactionData
1284    
1285  C<< my $stats = $spl->LoadReactionData(); >>      my $stats = $spl->LoadReactionData();
1286    
1287  Load the reaction data from FIG into Sprout.  Load the reaction data from FIG into Sprout.
1288    
# Line 1386  Line 1396 
1396    
1397  =head3 LoadSynonymData  =head3 LoadSynonymData
1398    
1399  C<< my $stats = $spl->LoadSynonymData(); >>      my $stats = $spl->LoadSynonymData();
1400    
1401  Load the synonym groups into Sprout.  Load the synonym groups into Sprout.
1402    
# Line 1431  Line 1441 
1441          if (! defined($result)) {          if (! defined($result)) {
1442              Confess("Database error in Synonym load: " . $sth->errstr());              Confess("Database error in Synonym load: " . $sth->errstr());
1443          } else {          } else {
1444                Trace("Processing synonym results.") if T(2);
1445              # Remember the current synonym.              # Remember the current synonym.
1446              my $current_syn = "";              my $current_syn = "";
1447              # Count the features.              # Count the features.
1448              my $featureCount = 0;              my $featureCount = 0;
1449                my $entryCount = 0;
1450              # Loop through the synonym/peg pairs.              # Loop through the synonym/peg pairs.
1451              while (my @row = $sth->fetchrow()) {              while (my @row = $sth->fetchrow()) {
1452                  # Get the synonym group ID and feature ID.                  # Get the synonym group ID and feature ID.
1453                  my ($syn_id, $peg) = @row;                  my ($syn_id, $peg) = @row;
1454                    # Count this row.
1455                    $entryCount++;
1456                    if ($entryCount % 1000 == 0) {
1457                        Trace("$entryCount rows processed.") if T(3);
1458                    }
1459                  # Insure it's for one of our genomes.                  # Insure it's for one of our genomes.
1460                  my $genomeID = FIG::genome_of($peg);                  my $genomeID = FIG::genome_of($peg);
1461                  if (exists $genomeHash->{$genomeID}) {                  if (exists $genomeHash->{$genomeID}) {
# Line 1457  Line 1474 
1474                      }                      }
1475                  }                  }
1476              }              }
1477                Trace("$entryCount rows produced $featureCount features.") if T(2);
1478          }          }
1479      }      }
1480      # Finish the load.      # Finish the load.
# Line 1466  Line 1484 
1484    
1485  =head3 LoadFamilyData  =head3 LoadFamilyData
1486    
1487  C<< my $stats = $spl->LoadFamilyData(); >>      my $stats = $spl->LoadFamilyData();
1488    
1489  Load the protein families into Sprout.  Load the protein families into Sprout.
1490    
# Line 1534  Line 1552 
1552    
1553  =head3 LoadDrugData  =head3 LoadDrugData
1554    
1555  C<< my $stats = $spl->LoadDrugData(); >>      my $stats = $spl->LoadDrugData();
1556    
1557  Load the drug target data into Sprout.  Load the drug target data into Sprout.
1558    
# Line 1668  Line 1686 
1686                          # Decode the score.                          # Decode the score.
1687                          my $realScore = FIGRules::DecodeScore($score);                          my $realScore = FIGRules::DecodeScore($score);
1688                          # Connect the PDB to the feature.                          # Connect the PDB to the feature.
1689                          $loadIsProteinForFeature->Put($pdbData->[0], $pdbID, $start, $realScore, $end);                          $loadIsProteinForFeature->Put($pdbID, $pdbData->[0], $start, $realScore, $end);
1690                      }                      }
1691                  }                  }
1692              }              }
# Line 1733  Line 1751 
1751    
1752  =head3 SpecialAttribute  =head3 SpecialAttribute
1753    
1754  C<< my $count = SproutLoad::SpecialAttribute($id, \@attributes, $idxMatch, \@idxValues, $pattern, $loader); >>      my $count = SproutLoad::SpecialAttribute($id, \@attributes, $idxMatch, \@idxValues, $pattern, $loader);
1755    
1756  Look for special attributes of a given type. A special attribute is found by comparing one of  Look for special attributes of a given type. A special attribute is found by comparing one of
1757  the columns of the incoming attribute list to a search pattern. If a match is found, then  the columns of the incoming attribute list to a search pattern. If a match is found, then
# Line 1909  Line 1927 
1927    
1928  =head3 GetGenomeAttributes  =head3 GetGenomeAttributes
1929    
1930  C<< my $aHashRef = GetGenomeAttributes($fig, $genomeID, \@fids, \@propKeys); >>      my $aHashRef = GetGenomeAttributes($fig, $genomeID, \@fids, \@propKeys);
1931    
1932  Return a hash of attributes keyed on feature ID. This method gets all the NMPDR-related  Return a hash of attributes keyed on feature ID. This method gets all the NMPDR-related
1933  attributes for all the features of a genome in a single call, then organizes them into  attributes for all the features of a genome in a single call, then organizes them into

Legend:
Removed from v.1.89  
changed lines
  Added in v.1.92

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3