[Bio] / Sprout / GenomeSaplingLoader.pm Repository:
ViewVC logotype

Diff of /Sprout/GenomeSaplingLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.9, Thu Mar 25 16:20:40 2010 UTC revision 1.12, Thu Oct 14 17:27:37 2010 UTC
# Line 217  Line 217 
217      # Get the Sapling object.      # Get the Sapling object.
218      my $sapling = $self->db();      my $sapling = $self->db();
219      # Get the name of the taxonomy dump directory.      # Get the name of the taxonomy dump directory.
220      my $taxDir = "$FIG_Config::global/Taxonomy";      my $taxDir = "/vol/biodb/ncbi/taxonomy";
221      # The first step is to read in all the names. We will build a hash that maps      # The first step is to read in all the names. We will build a hash that maps
222      # each taxonomy ID to a list of its names. The first scientific name encountered      # each taxonomy ID to a list of its names. The first scientific name encountered
223      # will be saved as the primary name. Only scientific names, synonoyms, and      # will be saved as the primary name. Only scientific names, synonoyms, and
# Line 244  Line 244 
244          }          }
245      }      }
246      # Now we read in the taxonomy nodes. For each node, we generate a TaxonomicGrouping      # Now we read in the taxonomy nodes. For each node, we generate a TaxonomicGrouping
247      # record, and we connect it to its parent using IsGroupFor.      # record, and we connect it to its parent using IsGroupFor. We also keep the node ID
248        # for later so we know what's available.
249      close $ih;      close $ih;
250      $ih = Open(undef, "<$taxDir/nodes.dmp");      $ih = Open(undef, "<$taxDir/nodes.dmp");
251      while (! eof $ih) {      while (! eof $ih) {
# Line 271  Line 272 
272          # Connect the group to its parent.          # Connect the group to its parent.
273          $self->PutR(IsGroupFor => $parent, $taxID);          $self->PutR(IsGroupFor => $parent, $taxID);
274      }      }
275        # Read in the merge file. The merge file tells us which old IDs are mapped to
276        # new IDs. We need this to connect genomes with old IDs to the correct group.
277        my %merges;
278        $ih = Open(undef, "<$taxDir/merged.dmp");
279        while (! eof $ih) {
280            # Get this merge record.
281            my ($oldID, $newID) = GetTaxData($ih);
282            # Store it in the hash.
283            $merges{$oldID} = $newID;
284        }
285      # Now we need to connect each genome to its taxonomic grouping.      # Now we need to connect each genome to its taxonomic grouping.
286      # Get the genome hash. This gives us our list of genome IDs.      # Get the genome hash. This gives us our list of genome IDs.
287      my $genomeHash = $sapling->GenomeHash();      my $genomeHash = $sapling->GenomeHash();
# Line 278  Line 289 
289      for my $genomeID (keys %$genomeHash) {      for my $genomeID (keys %$genomeHash) {
290          # Get this genome's taxonomic group.          # Get this genome's taxonomic group.
291          my ($taxID) = split /\./, $genomeID, 2;          my ($taxID) = split /\./, $genomeID, 2;
292          # Connect the genome and the group.          # Check to see if we have this tax ID. If we don't, we check for a merge.
293            if (! $primaryNames{$taxID}) {
294                if ($merges{$taxID}) {
295                    $taxID = $merges{$taxID};
296                    $self->Add('merged-names' => 1);
297                    Trace("$genomeID has alternate taxonomy ID $taxID.") if T(ERDBLoadGroup => 2);
298                } else {
299                    $taxID = undef;
300                    $self->Add('missing-groups' => 1);
301                    Trace("$genomeID has no taxonomy group.") if T(ERDBLoadGroup => 1);
302                }
303            }
304            # Connect the genome and the group if the group is real.
305            if (defined $taxID) {
306          $self->PutR(IsTaxonomyOf => $taxID, $genomeID);          $self->PutR(IsTaxonomyOf => $taxID, $genomeID);
307      }      }
308  }  }
309    }
310    
311    
312  =head3 PlaceGenome  =head3 PlaceGenome

Legend:
Removed from v.1.9  
changed lines
  Added in v.1.12

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3