[Bio] / Sprout / SproutLoad.pm Repository:
ViewVC logotype

Diff of /Sprout/SproutLoad.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.88, Mon Nov 5 22:52:06 2007 UTC revision 1.90, Thu Dec 6 14:53:50 2007 UTC
# Line 52  Line 52 
52    
53  =head3 new  =head3 new
54    
55  C<< my $spl = SproutLoad->new($sprout, $fig, $genomeFile, $subsysFile, $options); >>      my $spl = SproutLoad->new($sprout, $fig, $genomeFile, $subsysFile, $options);
56    
57  Construct a new Sprout Loader object, specifying the two participating databases and  Construct a new Sprout Loader object, specifying the two participating databases and
58  the name of the files containing the list of genomes and subsystems to use.  the name of the files containing the list of genomes and subsystems to use.
# Line 198  Line 198 
198    
199  =head3 LoadOnly  =head3 LoadOnly
200    
201  C<< my $flag = $spl->LoadOnly; >>      my $flag = $spl->LoadOnly;
202    
203  Return TRUE if we are in load-only mode, else FALSE.  Return TRUE if we are in load-only mode, else FALSE.
204    
# Line 212  Line 212 
212    
213  =head3 LoadGenomeData  =head3 LoadGenomeData
214    
215  C<< my $stats = $spl->LoadGenomeData(); >>      my $stats = $spl->LoadGenomeData();
216    
217  Load the Genome, Contig, and Sequence data from FIG into Sprout.  Load the Genome, Contig, and Sequence data from FIG into Sprout.
218    
# Line 328  Line 328 
328    
329  =head3 LoadFeatureData  =head3 LoadFeatureData
330    
331  C<< my $stats = $spl->LoadFeatureData(); >>      my $stats = $spl->LoadFeatureData();
332    
333  Load the feature data from FIG into Sprout.  Load the feature data from FIG into Sprout.
334    
# Line 557  Line 557 
557                          my @cddData = sort keys %{$cddHash};                          my @cddData = sort keys %{$cddHash};
558                          for my $cdd (@cddData) {                          for my $cdd (@cddData) {
559                              # Extract the score for this CDD and decode it.                              # Extract the score for this CDD and decode it.
560                              my ($codeScore) = split(/\s*,\s*/, $cddHash->{$cdd}->[0]);                              my ($codeScore) = split(/\s*,\s*/, $cddHash->{$cdd}->[1]);
561                              my $realScore = FIGRules::DecodeScore($codeScore);                              my $realScore = FIGRules::DecodeScore($codeScore);
562                                # We can't afford to crash because of a bad attribute
563                                # value, hence the IF below.
564                                if (! defined($realScore)) {
565                                    # Bad score, so count it.
566                                    $loadFeature->Add('badCDDscore');
567                                } else {
568                              # Create the connection.                              # Create the connection.
569                              $loadIsPresentOnProteinOf->Put($cdd, $featureID, $realScore);                              $loadIsPresentOnProteinOf->Put($cdd, $featureID, $realScore);
570                              # If this CDD does not yet exist, create its record.                              # If this CDD does not yet exist, create its record.
# Line 568  Line 574 
574                              }                              }
575                          }                          }
576                      }                      }
577                        }
578                      # Now we need to bust up hyphenated words in the keyword                      # Now we need to bust up hyphenated words in the keyword
579                      # list. We keep them separate and put them at the end so                      # list. We keep them separate and put them at the end so
580                      # the original word order is available.                      # the original word order is available.
# Line 592  Line 599 
599                      my @locationList = split /\s*,\s*/, $locations;                      my @locationList = split /\s*,\s*/, $locations;
600                      # Next, we convert them to Sprout location objects.                      # Next, we convert them to Sprout location objects.
601                      my @locObjectList = map { BasicLocation->new("$genomeID:$_") } @locationList;                      my @locObjectList = map { BasicLocation->new("$genomeID:$_") } @locationList;
602                        # Assemble them into a sprout location string for later.
603                        my $locationString = join(", ", map { $_->String } @locObjectList);
604                      # This part is the roughest. We need to relate the features to contig                      # This part is the roughest. We need to relate the features to contig
605                      # locations, and the locations must be split so that none of them exceed                      # locations, and the locations must be split so that none of them exceed
606                      # the maximum segment size. This simplifies the genes_in_region processing                      # the maximum segment size. This simplifies the genes_in_region processing
# Line 615  Line 624 
624                          }                          }
625                      }                      }
626                      # Finally, reassemble the location objects into a list of Sprout location strings.                      # Finally, reassemble the location objects into a list of Sprout location strings.
                     $locations = join(", ", map { $_->String } @locObjectList);  
627                      # Create the feature record.                      # Create the feature record.
628                      $loadFeature->Put($featureID, 1, $user, $quality, $celloValue, $type, $assignment, $cleanWords, $locations);                      $loadFeature->Put($featureID, 1, $user, $quality, $celloValue, $type, $assignment, $cleanWords, $locationString);
629                  }                  }
630              }              }
631              Trace("Genome $genomeID processed.") if T(3);              Trace("Genome $genomeID processed.") if T(3);
# Line 630  Line 638 
638    
639  =head3 LoadSubsystemData  =head3 LoadSubsystemData
640    
641  C<< my $stats = $spl->LoadSubsystemData(); >>      my $stats = $spl->LoadSubsystemData();
642    
643  Load the subsystem data from FIG into Sprout.  Load the subsystem data from FIG into Sprout.
644    
# Line 912  Line 920 
920    
921  =head3 LoadPropertyData  =head3 LoadPropertyData
922    
923  C<< my $stats = $spl->LoadPropertyData(); >>      my $stats = $spl->LoadPropertyData();
924    
925  Load the attribute data from FIG into Sprout.  Load the attribute data from FIG into Sprout.
926    
# Line 1000  Line 1008 
1008    
1009  =head3 LoadAnnotationData  =head3 LoadAnnotationData
1010    
1011  C<< my $stats = $spl->LoadAnnotationData(); >>      my $stats = $spl->LoadAnnotationData();
1012    
1013  Load the annotation data from FIG into Sprout.  Load the annotation data from FIG into Sprout.
1014    
# Line 1107  Line 1115 
1115    
1116  =head3 LoadSourceData  =head3 LoadSourceData
1117    
1118  C<< my $stats = $spl->LoadSourceData(); >>      my $stats = $spl->LoadSourceData();
1119    
1120  Load the source data from FIG into Sprout.  Load the source data from FIG into Sprout.
1121    
# Line 1185  Line 1193 
1193    
1194  =head3 LoadExternalData  =head3 LoadExternalData
1195    
1196  C<< my $stats = $spl->LoadExternalData(); >>      my $stats = $spl->LoadExternalData();
1197    
1198  Load the external data from FIG into Sprout.  Load the external data from FIG into Sprout.
1199    
# Line 1265  Line 1273 
1273    
1274  =head3 LoadReactionData  =head3 LoadReactionData
1275    
1276  C<< my $stats = $spl->LoadReactionData(); >>      my $stats = $spl->LoadReactionData();
1277    
1278  Load the reaction data from FIG into Sprout.  Load the reaction data from FIG into Sprout.
1279    
# Line 1379  Line 1387 
1387    
1388  =head3 LoadSynonymData  =head3 LoadSynonymData
1389    
1390  C<< my $stats = $spl->LoadSynonymData(); >>      my $stats = $spl->LoadSynonymData();
1391    
1392  Load the synonym groups into Sprout.  Load the synonym groups into Sprout.
1393    
# Line 1424  Line 1432 
1432          if (! defined($result)) {          if (! defined($result)) {
1433              Confess("Database error in Synonym load: " . $sth->errstr());              Confess("Database error in Synonym load: " . $sth->errstr());
1434          } else {          } else {
1435                Trace("Processing synonym results.") if T(2);
1436              # Remember the current synonym.              # Remember the current synonym.
1437              my $current_syn = "";              my $current_syn = "";
1438              # Count the features.              # Count the features.
1439              my $featureCount = 0;              my $featureCount = 0;
1440                my $entryCount = 0;
1441              # Loop through the synonym/peg pairs.              # Loop through the synonym/peg pairs.
1442              while (my @row = $sth->fetchrow()) {              while (my @row = $sth->fetchrow()) {
1443                  # Get the synonym group ID and feature ID.                  # Get the synonym group ID and feature ID.
1444                  my ($syn_id, $peg) = @row;                  my ($syn_id, $peg) = @row;
1445                    # Count this row.
1446                    $entryCount++;
1447                    if ($entryCount % 1000 == 0) {
1448                        Trace("$entryCount rows processed.") if T(3);
1449                    }
1450                  # Insure it's for one of our genomes.                  # Insure it's for one of our genomes.
1451                  my $genomeID = FIG::genome_of($peg);                  my $genomeID = FIG::genome_of($peg);
1452                  if (exists $genomeHash->{$genomeID}) {                  if (exists $genomeHash->{$genomeID}) {
# Line 1450  Line 1465 
1465                      }                      }
1466                  }                  }
1467              }              }
1468                Trace("$entryCount rows produced $featureCount features.") if T(2);
1469          }          }
1470      }      }
1471      # Finish the load.      # Finish the load.
# Line 1459  Line 1475 
1475    
1476  =head3 LoadFamilyData  =head3 LoadFamilyData
1477    
1478  C<< my $stats = $spl->LoadFamilyData(); >>      my $stats = $spl->LoadFamilyData();
1479    
1480  Load the protein families into Sprout.  Load the protein families into Sprout.
1481    
# Line 1527  Line 1543 
1543    
1544  =head3 LoadDrugData  =head3 LoadDrugData
1545    
1546  C<< my $stats = $spl->LoadDrugData(); >>      my $stats = $spl->LoadDrugData();
1547    
1548  Load the drug target data into Sprout.  Load the drug target data into Sprout.
1549    
# Line 1661  Line 1677 
1677                          # Decode the score.                          # Decode the score.
1678                          my $realScore = FIGRules::DecodeScore($score);                          my $realScore = FIGRules::DecodeScore($score);
1679                          # Connect the PDB to the feature.                          # Connect the PDB to the feature.
1680                          $loadIsProteinForFeature->Put($pdbData->[0], $pdbID, $start, $realScore, $end);                          $loadIsProteinForFeature->Put($pdbID, $pdbData->[0], $start, $realScore, $end);
1681                      }                      }
1682                  }                  }
1683              }              }
# Line 1726  Line 1742 
1742    
1743  =head3 SpecialAttribute  =head3 SpecialAttribute
1744    
1745  C<< my $count = SproutLoad::SpecialAttribute($id, \@attributes, $idxMatch, \@idxValues, $pattern, $loader); >>      my $count = SproutLoad::SpecialAttribute($id, \@attributes, $idxMatch, \@idxValues, $pattern, $loader);
1746    
1747  Look for special attributes of a given type. A special attribute is found by comparing one of  Look for special attributes of a given type. A special attribute is found by comparing one of
1748  the columns of the incoming attribute list to a search pattern. If a match is found, then  the columns of the incoming attribute list to a search pattern. If a match is found, then
# Line 1902  Line 1918 
1918    
1919  =head3 GetGenomeAttributes  =head3 GetGenomeAttributes
1920    
1921  C<< my $aHashRef = GetGenomeAttributes($fig, $genomeID, \@fids, \@propKeys); >>      my $aHashRef = GetGenomeAttributes($fig, $genomeID, \@fids, \@propKeys);
1922    
1923  Return a hash of attributes keyed on feature ID. This method gets all the NMPDR-related  Return a hash of attributes keyed on feature ID. This method gets all the NMPDR-related
1924  attributes for all the features of a genome in a single call, then organizes them into  attributes for all the features of a genome in a single call, then organizes them into

Legend:
Removed from v.1.88  
changed lines
  Added in v.1.90

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3