[Bio] / Sprout / FeatureSaplingLoader.pm Repository:
ViewVC logotype

Diff of /Sprout/FeatureSaplingLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.9, Mon Feb 1 20:14:28 2010 UTC revision 1.13, Sat Feb 26 19:05:31 2011 UTC
# Line 66  Line 66 
66      my ($class, $erdb, $options) = @_;      my ($class, $erdb, $options) = @_;
67      # Create the table list.      # Create the table list.
68      my @tables = sort qw(Feature FeatureEssential FeatureEvidence FeatureLink      my @tables = sort qw(Feature FeatureEssential FeatureEvidence FeatureLink
69                           FeatureVirulent IsOwnerOf IsLocatedIn Identifies                           FeatureVirulent IsOwnerOf IsLocatedIn IsIdentifiedBy
70                           Identifier IsNamedBy ProteinSequence Concerns                           Identifier IsNamedBy ProteinSequence Concerns
71                           IsAttachmentSiteFor Publication IsProteinFor                           IsAttachmentSiteFor Publication IsProteinFor
72                           Role IsFunctionalIn);                           Role RoleIndex IsFunctionalIn);
73      # Create the BaseSaplingLoader object.      # Create the BaseSaplingLoader object.
74      my $retVal = BaseSaplingLoader::new($class, $erdb, $options, @tables);      my $retVal = BaseSaplingLoader::new($class, $erdb, $options, @tables);
75      # Return it.      # Return it.
# Line 98  Line 98 
98          # Load this genome's features.          # Load this genome's features.
99          $self->LoadGenomeFeatures($genomeID);          $self->LoadGenomeFeatures($genomeID);
100      } else {      } else {
101          # The global data is the roles from subsystems.          # The global data is the roles from subsystems and the publications.
102          my $fig = $self->source();          my $fig = $self->source();
103          # First, we get the subsystem list.          # We need the master map of roles to IDs.
104            my %roleHash;
105            my $lastRoleIndex = -1;
106            my $roleMapFile = $erdb->LoadDirectory() . "/roleMap.tbl";
107            if (-f $roleMapFile) {
108                for my $mapLine (Tracer::GetFile($roleMapFile)) {
109                    my ($role, $idx) = split /\t/, $mapLine;
110                    $roleHash{$role} = $idx;
111                    if ($idx > $lastRoleIndex) {
112                        $lastRoleIndex = $idx;
113                    }
114                }
115            }
116            # We'll track duplicate roles in here.
117            my %roleList;
118            # Now we get the subsystem list.
119          my $subHash = $erdb->SubsystemHash();          my $subHash = $erdb->SubsystemHash();
120          for my $sub (sort keys %$subHash) {          for my $sub (sort keys %$subHash) {
121              $self->Add(subsystems => 1);              $self->Add(subsystems => 1);
# Line 109  Line 124 
124              my @roles = $fig->subsystem_to_roles($sub);              my @roles = $fig->subsystem_to_roles($sub);
125              for my $role (@roles) {              for my $role (@roles) {
126                  $self->Add(subsystemRoles => 1);                  $self->Add(subsystemRoles => 1);
127                  $self->PutE(Role => $role, hypothetical => hypo($role));                  # Check to see if this role is hypothetical.
128                    my $hypo = hypo($role);
129                    if (! $hypo) {
130                        # Is this role in the role index hash?
131                        my $roleIndex = $roleHash{$role};
132                        if (! defined $roleIndex) {
133                            # No, compute a new index for it.
134                            $roleIndex = ++$lastRoleIndex;
135                            $roleHash{$role} = $roleIndex;
136                        }
137                        if (! $roleList{$role}) {
138                            $roleList{$role} = 1;
139                            $self->PutE(RoleIndex => $role, role_index => $roleIndex);
140                        }
141                    }
142                    $self->PutE(Role => $role, hypothetical => $hypo);
143                }
144            }
145            Trace("Subsystem roles generated.") if T(2);
146            # Write out the role master file.
147            Tracer::PutFile($roleMapFile, [map { "$_\t$roleHash{$_}" } keys %roleHash]);
148            Trace("Role master file written to $roleMapFile.") if T(2);
149            # Now, we get the publications.
150            my $pubs = $fig->all_titles();
151            for my $pub (@$pubs) {
152                # Get the ID and title.
153                my ($pubmedID, $title) = @$pub;
154                # Only proceed if the ID is valid.
155                if ($pubmedID) {
156                    # Create a hyperlink from the title and the pubmed ID.
157                    my $link;
158                    if (! $title) {
159                        $link = HyperLink->new("<unknown>");
160                    } else {
161                        $link = HyperLink->new($title, "http://www.ncbi.nlm.nih.gov/pubmed/$pubmedID");
162                    }
163                    # Create the publication record.
164                    $self->PutE(Publication => $pubmedID, citation => $link);
165              }              }
166          }          }
167          Trace("Subsystem roles generated.") if T(3);          Trace("Publications generated.") if T(2);
168      }      }
169  }  }
170    
# Line 214  Line 266 
266          # Connect the feature to its genome.          # Connect the feature to its genome.
267          $self->PutR(IsOwnerOf => $genomeID, $fid);          $self->PutR(IsOwnerOf => $genomeID, $fid);
268          # Connect the feature to its roles.          # Connect the feature to its roles.
269          my ($roles, $errors) = LoaderUtils::RolesForLoading($assignment);          my ($roles, $errors) = SeedUtils::roles_for_loading($assignment);
270          if (! defined $roles) {          if (! defined $roles) {
271              # Here the functional assignment was suspicious.              # Here the functional assignment was suspicious.
272              $self->Add(suspiciousFunction => 1);              $self->Add(suspiciousFunction => 1);
# Line 229  Line 281 
281              $self->Add(badFeatureRoles => $errors);              $self->Add(badFeatureRoles => $errors);
282          }          }
283          # Now we have a whole bunch of attribute-related stuff to store in          # Now we have a whole bunch of attribute-related stuff to store in
284          # secondary Feature tables. First is the evidence codes.          # secondary Feature tables. First is the evidence codes. This is special
285            # because we have to save the DLIT numbers.
286            my @dlits;
287          my @evidenceTuples = $fig->get_attributes($fid, 'evidence_code');          my @evidenceTuples = $fig->get_attributes($fid, 'evidence_code');
288          for my $evidenceTuple (@evidenceTuples) {          for my $evidenceTuple (@evidenceTuples) {
289              my (undef, undef, $code) = @$evidenceTuple;              my (undef, undef, $code) = @$evidenceTuple;
290              $self->PutE(FeatureEvidence => $fid, 'evidence-code' => $code);              $self->PutE(FeatureEvidence => $fid, 'evidence-code' => $code);
291                # If this is a direct literature reference, save it.
292                if ($code =~ /dlit\((\d+)/) {
293                    push @dlits, $1;
294                    $self->Add(dlits => 1);
295                }
296          }          }
297          # Now we have the external links. These are stored using hyperlink objects.          # Now we have the external links. These are stored using hyperlink objects.
298          my @links = $fig->fid_links($fid);          my @links = $fig->fid_links($fid);
# Line 267  Line 326 
326              # Get the translation.              # Get the translation.
327              my $proteinSequence = $fig->get_translation($fid);              my $proteinSequence = $fig->get_translation($fid);
328              if (! $proteinSequence) {              if (! $proteinSequence) {
329                  Trace("No protein sequence found for $fid.") if T(2);                  Trace("No protein sequence found for $fid.") if T(ERDBLoadGroup => 2);
330                  $self->Add(missingProtein => 1);                  $self->Add(missingProtein => 1);
331                  # Here there was some sort of error and the protein sequence did                  # Here there was some sort of error and the protein sequence did
332                  # not come back. Ask for the DNA and translate it instead.                  # not come back. Ask for the DNA and translate it instead.
# Line 279  Line 338 
338              # Create the protein record.              # Create the protein record.
339              $self->PutE(ProteinSequence => $proteinID, sequence => $proteinSequence);              $self->PutE(ProteinSequence => $proteinID, sequence => $proteinSequence);
340              $self->PutR(IsProteinFor => $proteinID, $fid);              $self->PutR(IsProteinFor => $proteinID, $fid);
341              # Get the publications for this PEG.              # Connect this protein to the feature's publications (if any).
342              my @pubs = $fig->get_attributes($fid, 'PUBMED_CURATED_RELEVANT');              for my $pub (@dlits) {
343              for my $pub (@pubs) {                  $self->PutR(Concerns => $pub, $proteinID);
                 # Parse out the article title from the data.  
                 my (undef, undef, $data, $url) = @$pub;  
                 my @pieces = split /,/, $data, 3;  
                 if (defined $pieces[2]) {  
                     # Create the publication record.  
                     my $hl = HyperLink->new($pieces[2], $url);  
                     my $key = ERDB::DigestKey($url);  
                     $self->PutE(Publication => $key, citation => $hl);  
                     # Connect it to the protein.  
                     $self->PutR(Concerns => $key, $proteinID);  
                 }  
344              }              }
345          }          }
346          # Now we need to compute the identifiers. We start with the aliases.          # Now we need to compute the identifiers. We start with the aliases.
# Line 303  Line 351 
351          for my $aliasTuple (@$aliasList) {          for my $aliasTuple (@$aliasList) {
352              my ($aliasID, $aliasType, $aliasConf) = @$aliasTuple;              my ($aliasID, $aliasType, $aliasConf) = @$aliasTuple;
353              # Get the natural form. If there is none, then the canonical              # Get the natural form. If there is none, then the canonical
354              # form IS the natural form.              # form IS the natural form. Note we have to make a special check
355              my $natural = AliasAnalysis::Type($aliasType => $aliasID) || $aliasID;              # for locus tags, which have an insane number of variants.
356                my $natural;
357                if ($aliasID =~ /LocusTag:(.+)/) {
358                    $natural = $1;
359                } else {
360                    $natural = AliasAnalysis::Type($aliasType => $aliasID) || $aliasID;
361                }
362              # Create the identifier record.              # Create the identifier record.
363              $self->PutE(Identifier => $aliasID, natural_form => $natural,              $self->PutE(Identifier => $aliasID, natural_form => $natural,
364                          source => $aliasType);                          source => $aliasType);
# Line 314  Line 368 
368                  $self->PutR(IsNamedBy => $proteinID, $aliasID);                  $self->PutR(IsNamedBy => $proteinID, $aliasID);
369              } else {              } else {
370                  # No. Connect it to the feature.                  # No. Connect it to the feature.
371                  $self->PutR(Identifies => $aliasID, $fid, conf => $aliasConf);                  $self->PutR(IsIdentifiedBy => $fid, $aliasID, conf => $aliasConf);
372              }              }
373          }          }
374          # Finally, this feature is an alias of itself.          # Finally, this feature is an alias of itself.
375          $self->PutE(Identifier => $fid, natural_form => $fid,          $self->PutE(Identifier => $fid, natural_form => $fid,
376                      source => 'SEED');                      source => 'SEED');
377          $self->PutR(Identifies => $fid, $fid, conf => 'A');          $self->PutR(IsIdentifiedBy => $fid, $fid, conf => 'A');
378      }      }
379  }  }
380    

Legend:
Removed from v.1.9  
changed lines
  Added in v.1.13

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3