[Bio] / Sprout / DrugSproutLoader.pm Repository:
ViewVC logotype

Diff of /Sprout/DrugSproutLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1, Thu Oct 9 18:16:26 2008 UTC revision 1.3, Thu Feb 5 06:48:52 2009 UTC
# Line 42  Line 42 
42    
43  [[SproutPm]] object for the database being loaded.  [[SproutPm]] object for the database being loaded.
44    
 =item source  
   
 [[FigPm]] object used to access the source data. If this parameter is undefined,  
 it will be created the first time the L</source> method is called.  
   
45  =item options  =item options
46    
47  Reference to a hash of command-line options.  Reference to a hash of command-line options.
# Line 61  Line 56 
56    
57  sub new {  sub new {
58      # Get the parameters.      # Get the parameters.
59      my ($class, $erdb, $source, $options) = @_;      my ($class, $erdb, $options) = @_;
60      # Create the table list.      # Create the table list.
61      my @tables = sort qw(PDB Ligand IsProteinForFeature DocksWith);      my @tables = sort qw(PDB Ligand IsProteinForFeature DocksWith);
62      # Create the BaseSproutLoader object.      # Create the BaseSproutLoader object.
63      my $retVal = BaseSproutLoader::new($class, $erdb, $source, $options, @tables);      my $retVal = BaseSproutLoader::new($class, $erdb, $options, @tables);
64      # Return it.      # Return it.
65      return $retVal;      return $retVal;
66  }  }
# Line 111  Line 106 
106                      # The ZINC ID is found in the first return column, prefixed with the word ZINC.                      # The ZINC ID is found in the first return column, prefixed with the word ZINC.
107                      if ($zinc_data->[0] =~ /^ZINC:(\d+)$/) {                      if ($zinc_data->[0] =~ /^ZINC:(\d+)$/) {
108                          $zinc_id = $1;                          $zinc_id = $1;
109                          # Check for a duplicate.                          $self->Track(zincIDs => $zinc_id, 10000);
110                            # Check for a duplicate. These are very, very common.
111                          if ($zinc_id eq $last_zinc_id) {                          if ($zinc_id eq $last_zinc_id) {
112                              $self->AddWarning('zinc-duplicate' => "Duplicate ligand $zinc_data->[0] found.");                              $self->Add('duplicate-zinc' => 1);
113                          } else {                          } else {
114                              # Here it's safe to output the ligand. The ligand name is the attribute value                              # Here it's safe to output the ligand. The ligand name is the attribute value
115                              # (third column in the row).                              # (third column in the row).
116                              $self->PutE(Ligand => $zinc_data->[2]);                              $self->PutE(Ligand => $zinc_id, name => $zinc_data->[2]);
117                              # Insure we don't try to add this ID again.                              # Insure we don't try to add this ID again.
118                              $last_zinc_id = $zinc_id;                              $last_zinc_id = $zinc_id;
119                          }                          }
# Line 127  Line 123 
123                  }                  }
124              }              }
125          }          }
         # Now comes the "DocksWith" relationship.  
         Trace("Generating docking data.") if T(2);  
         # This hash is used to compute the number of docking results, which is an  
         # attribute of the PDB.  
         my %pdbHash;  
         # Get all the docking data. This may cause problems if there are too many PDBs,  
         # at which point we'll need another algorithm. The indicator that this is  
         # happening will be a timeout error in the next statement.  
         my @dockData = $fig->query_attributes('$key = ? AND $value < ?',  
                                               ['docking_results', $FIG_Config::dockLimit]);  
         Trace(scalar(@dockData) . " rows of docking data found.") if T(3);  
         for my $dockData (@dockData) {  
             # Get the docking data components.  
             my ($pdbID, $docking_key, @valueData) = @{$dockData};  
             # Fix the PDB ID. It's supposed to be lower-case, but this does not always happen.  
             $pdbID = lc $pdbID;  
             # Strip off the object type.  
             $pdbID =~ s/pdb://;  
             # Extract the ZINC ID from the docking key. Note that the "ZINC" string  
             # does not always get put in correctly, so it's optional in the pattern.  
             my (undef, $zinc_id) = $docking_key =~ /^docking_results::(ZINC)?(\d+)$/i;  
             if (! $zinc_id) {  
                 $self->AddWarning('dockdata-errors' => "Invalid docking result key $docking_key for $pdbID.") if T(0);  
126              } else {              } else {
127                  # Get the pieces of the value and parse the energy.          # Here we're working with a genome. We need to find all the PDBs that connect
128                  # Note that we don't care about the rank, since          # to this genome's features.
                 # we can sort on the energy level itself in our database.  
                 my ($energy, $tool, $type) = @valueData;  
                 my ($rank, $total, $vanderwaals, $electrostatic) = split /\s*;\s*/, $energy;  
                 # Ignore predicted results.  
                 if ($type ne "Predicted") {  
                     # Count this docking result.  
                     $pdbHash{$pdbID}++;  
                     # Write the result to the output.  
                     $self->PutR(DocksWith => $pdbID, $zinc_id, electrostatic => $electrostatic,  
                                 reason => $type, tool => $tool, 'total-energy' => $total,  
                                 'vanderwaals-energy' => $vanderwaals);  
                 }  
             }  
         }  
         # Now we need to find all the PDBs that have connections to features.  
         # At the current time, we can't parallelize this part, even though  
         # it's genome-based, because of the docking counts. We'll fix this  
         # later.  
129          Trace("Connecting features.") if T(2);          Trace("Connecting features.") if T(2);
130          # Loop through the genomes. We get them from the section list, then          my $genome = $self->section();
         # eliminate this section, which is the global.  
         my $global = $self->section();  
         my @genomes = grep { $_ ne $global } $self->GetSectionList();  
         for my $genome (@genomes) {  
131              Trace("Generating PDB connections for $genome.") if T(3);              Trace("Generating PDB connections for $genome.") if T(3);
132            # We'll keep track of the PDBs we find in here.
133            my %pdbHash;
134              # Get all of the PDBs that BLAST against this genome's features.              # Get all of the PDBs that BLAST against this genome's features.
135              my @attributeData = $fig->get_attributes("fig|$genome%", 'PDB::%');          my @attributeData = $fig->get_attributes("fig|$genome%", 'PDB');
136              for my $pdbData (@attributeData) {              for my $pdbData (@attributeData) {
137                  # The PDB ID is coded as a subkey.                  # The PDB ID is coded as a subkey.
138                  if ($pdbData->[1] !~ /PDB::(.+)/i) {                  if ($pdbData->[1] !~ /PDB::(.+)/i) {
139                      $self->AddWarning('pdb-key-error' => "Invalid PDB ID \"$pdbData->[1]\" in attribute table.");                      $self->AddWarning('pdb-key-error' => "Invalid PDB ID \"$pdbData->[1]\" in attribute table.");
140                  } else {                  } else {
141                      my $pdbID = $1;                  my $pdbID = lc $1;
142                      # Insure the PDB is in the hash.                      # Insure the PDB is in the hash.
143                      if (! exists $pdbHash{$pdbID}) {                      if (! exists $pdbHash{$pdbID}) {
144                          $pdbHash{$pdbID} = 0;                          $pdbHash{$pdbID} = 0;
# Line 227  Line 180 
180              # Output the PDBs found.              # Output the PDBs found.
181              Trace("Unspooling PDBs") if T(2);              Trace("Unspooling PDBs") if T(2);
182              for my $pdbID (sort keys %pdbHash) {              for my $pdbID (sort keys %pdbHash) {
183                  $self->PutE(PDB => $pdbID, 'docking-count' => $pdbHash{$pdbID});              $self->Track(PDBs => $pdbID, 100);
184                # We need to find every ligand that docks with this PDB. Unfortunately, the
185                # uploaded PDB data has upper-case IDs, while we use lower-case so that we
186                # map to the IDs on the PDB web site. We fix this by asking for both.
187                my @dockData = $fig->query_attributes('($object = ? OR $object = ?) AND $key = ? AND $value < ?',
188                                                      ["PDB:" . uc $pdbID, "PDB:$pdbID",
189                                                       'docking_results', $FIG_Config::dockLimit]);
190                Trace(scalar(@dockData) . " rows of docking data found.") if T(3);
191                # Count the docking data actually used.
192                my $docksUsed = 0;
193                # Loop through the docking data.
194                for my $dockData (@dockData) {
195                    # Get the docking data components. We ignore the object ID, since we already
196                    # know what it is.
197                    my (undef, $docking_key, @valueData) = @{$dockData};
198                    # Extract the ZINC ID from the docking key. Note that the "ZINC" string
199                    # does not always get put in correctly, so it's optional in the pattern.
200                    my (undef, $zinc_id) = $docking_key =~ /^docking_results::(ZINC)?(\d+)$/i;
201                    if (! $zinc_id) {
202                        $self->AddWarning('dockdata-errors' => "Invalid docking result key $docking_key for $pdbID.") if T(0);
203                    } else {
204                        # Get the pieces of the value and parse the energy.
205                        # Note that we don't care about the rank, since
206                        # we can sort on the energy level itself in our database.
207                        my ($energy, $tool, $type) = @valueData;
208                        my ($rank, $total, $vanderwaals, $electrostatic) = split /\s*;\s*/, $energy;
209                        # Ignore predicted results.
210                        if ($type ne "Predicted") {
211                            # Write the result to the output.
212                            $self->PutR(DocksWith => $pdbID, $zinc_id,
213                                        'electrostatic-energy' => $electrostatic,
214                                        reason => $type, tool => $tool,
215                                        'total-energy' => $total,
216                                        'vanderwaals-energy' => $vanderwaals);
217                            # Count it.
218                            $docksUsed++;
219                        }
220                    }
221              }              }
222                # Output the PDB record.
223                $self->PutE(PDB => $pdbID, 'docking-count' => $docksUsed);
224          }          }
225      }      }
226  }  }

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.3

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3