[Bio] / Sprout / FeatureSaplingLoader.pm Repository:
ViewVC logotype

Diff of /Sprout/FeatureSaplingLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.6, Thu Oct 29 18:31:48 2009 UTC revision 1.9, Mon Feb 1 20:14:28 2010 UTC
# Line 27  Line 27 
27      use HyperLink;      use HyperLink;
28      use AliasAnalysis;      use AliasAnalysis;
29      use LoaderUtils;      use LoaderUtils;
30        use Digest::MD5;
31        use SeedUtils;
32      use base 'BaseSaplingLoader';      use base 'BaseSaplingLoader';
33    
34  =head1 Sapling Feature Load Group Class  =head1 Sapling Feature Load Group Class
# Line 66  Line 68 
68      my @tables = sort qw(Feature FeatureEssential FeatureEvidence FeatureLink      my @tables = sort qw(Feature FeatureEssential FeatureEvidence FeatureLink
69                           FeatureVirulent IsOwnerOf IsLocatedIn Identifies                           FeatureVirulent IsOwnerOf IsLocatedIn Identifies
70                           Identifier IsNamedBy ProteinSequence Concerns                           Identifier IsNamedBy ProteinSequence Concerns
71                           IsAttachmentSiteFor Publication IsProteinFor);                           IsAttachmentSiteFor Publication IsProteinFor
72                             Role IsFunctionalIn);
73      # Create the BaseSaplingLoader object.      # Create the BaseSaplingLoader object.
74      my $retVal = BaseSaplingLoader::new($class, $erdb, $options, @tables);      my $retVal = BaseSaplingLoader::new($class, $erdb, $options, @tables);
75      # Return it.      # Return it.
# Line 88  Line 91 
91      my ($self) = @_;      my ($self) = @_;
92      # Get the database object.      # Get the database object.
93      my $erdb = $self->db();      my $erdb = $self->db();
94      # Only proceed if this is a normal section. There's no global feature data.      # Check for local or global.
95      if (! $self->global()) {      if (! $self->global()) {
96          # Get the section ID.          # Here we are generating data for a genome.
97          my $genomeID = $self->section();          my $genomeID = $self->section();
98          # Load this genome's features.          # Load this genome's features.
99          $self->LoadGenomeFeatures($genomeID);          $self->LoadGenomeFeatures($genomeID);
100        } else {
101            # The global data is the roles from subsystems.
102            my $fig = $self->source();
103            # First, we get the subsystem list.
104            my $subHash = $erdb->SubsystemHash();
105            for my $sub (sort keys %$subHash) {
106                $self->Add(subsystems => 1);
107                Trace("Processing roles for $sub.") if T(3);
108                # Get this subsystem's roles and write them out.
109                my @roles = $fig->subsystem_to_roles($sub);
110                for my $role (@roles) {
111                    $self->Add(subsystemRoles => 1);
112                    $self->PutE(Role => $role, hypothetical => hypo($role));
113                }
114            }
115            Trace("Subsystem roles generated.") if T(3);
116      }      }
117  }  }
118    
# Line 126  Line 145 
145      my $aliasDir = $sapling->LoadDirectory() . "/AliasData";      my $aliasDir = $sapling->LoadDirectory() . "/AliasData";
146      my $aliasHash = LoaderUtils::ReadAliasFile($aliasDir, $genomeID);      my $aliasHash = LoaderUtils::ReadAliasFile($aliasDir, $genomeID);
147      if (! defined $aliasHash) {      if (! defined $aliasHash) {
148          Trace("No aliases found for $genomeID.") if T(1);          Trace("No aliases found for $genomeID.") if T(ERDBLoadGroup => 1);
149          $self->Add(missingAliasFile => 1);          $self->Add(missingAliasFile => 1);
150          $aliasHash = {};          $aliasHash = {};
151      }      }
# Line 138  Line 157 
157          my ($fid, $locationString, $aliases, $type, undef, undef, $assignment,          my ($fid, $locationString, $aliases, $type, undef, undef, $assignment,
158              $assignmentMaker, $quality) = @$feature;              $assignmentMaker, $quality) = @$feature;
159          $self->Track(Features => $fid, 1000);          $self->Track(Features => $fid, 1000);
160          # Fix the assignment for non-PEG features.          # Fix missing assignments. For RNAs, the assignment may be in the alias list.
161          if (! defined $assignment) {          if (! defined $assignment) {
162                if ($type eq 'rna') {
163              $assignment = $aliases;              $assignment = $aliases;
164              $assignmentMaker ||= 'master';              $assignmentMaker ||= 'master';
165                } else {
166                    $assignment = '';
167                }
168          }          }
169          # Convert the location string to a list of location objects.          # Convert the location string to a list of location objects.
170          my @locs = map { BasicLocation->new($_) } split /\s*,\s*/, $locationString;          my @locs = map { BasicLocation->new($_) } split /\s*,\s*/, $locationString;
# Line 180  Line 203 
203              if ($assignment =~ /att([LR])\s+for\s+(fig\|.+)/) {              if ($assignment =~ /att([LR])\s+for\s+(fig\|.+)/) {
204                  $self->PutR(IsAttachmentSiteFor => $fid, $2, edge => $1);                  $self->PutR(IsAttachmentSiteFor => $fid, $2, edge => $1);
205              } else {              } else {
206                  Trace("Invalid attachment function for $fid: $assignment") if T(1);                  Trace("Invalid attachment function for $fid: $assignment") if T(ERDBLoadGroup => 1);
207                  $self->Add(badAttachment => 1);                  $self->Add(badAttachment => 1);
208              }              }
209          }          }
# Line 190  Line 213 
213                      locked => $fig->is_locked_fid($fid));                      locked => $fig->is_locked_fid($fid));
214          # Connect the feature to its genome.          # Connect the feature to its genome.
215          $self->PutR(IsOwnerOf => $genomeID, $fid);          $self->PutR(IsOwnerOf => $genomeID, $fid);
216            # Connect the feature to its roles.
217            my ($roles, $errors) = LoaderUtils::RolesForLoading($assignment);
218            if (! defined $roles) {
219                # Here the functional assignment was suspicious.
220                $self->Add(suspiciousFunction => 1);
221                Trace("$fid has a suspicious function: $assignment") if T(ERDBLoadGroup => 1);
222            } else {
223                # Here we have a good assignment.
224                for my $role (@$roles) {
225                    $self->Add(featureRole => 1);
226                    $self->PutR(IsFunctionalIn => $role, $fid);
227                    $self->PutE(Role => $role, hypothetical => hypo($role));
228                }
229                $self->Add(badFeatureRoles => $errors);
230            }
231          # Now we have a whole bunch of attribute-related stuff to store in          # Now we have a whole bunch of attribute-related stuff to store in
232          # secondary Feature tables. First is the evidence codes.          # secondary Feature tables. First is the evidence codes.
233          my @evidenceTuples = $fig->get_attributes($fid, 'evidence_code');          my @evidenceTuples = $fig->get_attributes($fid, 'evidence_code');
# Line 228  Line 266 
266          if ($type eq 'peg') {          if ($type eq 'peg') {
267              # Get the translation.              # Get the translation.
268              my $proteinSequence = $fig->get_translation($fid);              my $proteinSequence = $fig->get_translation($fid);
269                if (! $proteinSequence) {
270                    Trace("No protein sequence found for $fid.") if T(2);
271                    $self->Add(missingProtein => 1);
272                    # Here there was some sort of error and the protein sequence did
273                    # not come back. Ask for the DNA and translate it instead.
274                    my $dna = $fig->get_dna_seq($fid);
275                    $proteinSequence = FIG::translate($dna, undef, 1);
276                }
277              # Compute the ID.              # Compute the ID.
278              $proteinID = ERDB::DigestKey($proteinSequence);              $proteinID = $sapling->ProteinID($proteinSequence);
279              # Create the protein record.              # Create the protein record.
280              $self->PutE(ProteinSequence => $proteinID, sequence => $proteinSequence);              $self->PutE(ProteinSequence => $proteinID, sequence => $proteinSequence);
281              $self->PutR(IsProteinFor => $proteinID, $fid);              $self->PutR(IsProteinFor => $proteinID, $fid);

Legend:
Removed from v.1.6  
changed lines
  Added in v.1.9

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3