[Bio] / Sprout / FeatureSproutLoader.pm Repository:
ViewVC logotype

Annotation of /Sprout/FeatureSproutLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     #
4 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
5 :     # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 :    
20 :     package FeatureSproutLoader;
21 :    
22 :     use strict;
23 :     use Tracer;
24 :     use ERDB;
25 :     use BioWords;
26 :     use AliasAnalysis;
27 : parrello 1.6 use DBMaster;
28 :     use HyperLink;
29 :     use FFs;
30 : parrello 1.1 use base 'BaseSproutLoader';
31 :    
32 :     =head1 Sprout Feature Load Group Class
33 :    
34 :     =head2 Introduction
35 :    
36 :     The Feature Load Group includes all of the major feature-related tables.
37 :    
38 :     =head3 new
39 :    
40 : parrello 1.3 my $sl = FeatureSproutLoader->new($erdb, $source, $options, @tables);
41 : parrello 1.1
42 : parrello 1.3 Construct a new FeatureSproutLoader object.
43 : parrello 1.1
44 :     =over 4
45 :    
46 :     =item erdb
47 :    
48 :     [[SproutPm]] object for the database being loaded.
49 :    
50 :     =item options
51 :    
52 :     Reference to a hash of command-line options.
53 :    
54 :     =item tables
55 :    
56 :     List of tables in this load group.
57 :    
58 :     =back
59 :    
60 :     =cut
61 :    
62 :     sub new {
63 :     # Get the parameters.
64 : parrello 1.6 my ($class, $erdb, $options) = @_;
65 : parrello 1.1 # Create the table list.
66 :     my @tables = sort qw(Feature IsLocatedIn FeatureAlias IsAliasOf FeatureLink
67 :     FeatureTranslation FeatureUpstream HasFeature HasRoleInSubsystem
68 :     FeatureEssential FeatureVirulent FeatureIEDB CDD IsPresentOnProteinOf
69 : parrello 1.6 CellLocation IsPossiblePlaceFor IsAlsoFoundIn ExternalDatabase Keyword
70 :     ProteinFamily IsFamilyForFeature ProteinFamilyName FeatureEC);
71 : parrello 1.1 # Create the BaseSproutLoader object.
72 : parrello 1.6 my $retVal = BaseSproutLoader::new($class, $erdb, $options, @tables);
73 : parrello 1.2 # Get the list of relevant attributes.
74 : parrello 1.1 # Bless and return it.
75 :     bless $retVal, $class;
76 :     return $retVal;
77 :     }
78 :    
79 :     =head2 Public Methods
80 :    
81 :     =head3 Generate
82 :    
83 :     $sl->Generate();
84 :    
85 :     Generate the data for the feature-related files.
86 :    
87 :     =cut
88 :    
89 :     sub Generate {
90 :     # Get the parameters.
91 :     my ($self) = @_;
92 :     # Get the sprout object.
93 :     my $sprout = $self->db();
94 :     # Get the FIG object.
95 :     my $fig = $self->source();
96 :     # Get the subsystem list.
97 :     my $subHash = $self->GetSubsystems();
98 :     # Get the word stemmer.
99 :     my $stemmer = $sprout->GetStemmer();
100 : parrello 1.6 # Get access to FIGfams.
101 :     my $figfam_data = &FIG::get_figfams_data();
102 :     my $ffs = new FFs($figfam_data);
103 : parrello 1.1 # Only proceed if this is not the global section.
104 :     if (! $self->global()) {
105 : parrello 1.4 # Get the section ID.
106 :     my $genomeID = $self->section();
107 : parrello 1.6 # Connect to the ontology database.
108 :     my $sqlite_db = "/home/mkubal/Temp/Ontology/ontology.sqlite";
109 :     my $ontology_dbmaster = DBMaster->new(-database => $sqlite_db, -backend => 'SQLite');
110 : parrello 1.1 # Get the maximum sequence size. We need this later for splitting up the
111 :     # locations.
112 :     my $chunkSize = $sprout->MaxSegment();
113 :     Trace("Loading features for genome $genomeID.") if T(ERDBLoadGroup => 3);
114 :     # Get the feature list for this genome.
115 :     my $features = $fig->all_features_detailed_fast($genomeID);
116 :     # Sort and count the list.
117 :     my @featureTuples = sort { $a->[0] cmp $b->[0] } @{$features};
118 :     my $count = scalar @featureTuples;
119 :     Trace("$count features found for genome $genomeID.") if T(ERDBLoadGroup => 3);
120 :     # Get the attributes for this genome and put them in a hash by feature ID.
121 : parrello 1.6 my $attributes = $self->GetGenomeAttributes($genomeID, \@featureTuples);
122 : parrello 1.1 Trace("Looping through features for $genomeID.") if T(ERDBLoadGroup => 3);
123 :     # Loop through the features.
124 :     for my $featureTuple (@featureTuples) {
125 :     # Split the tuple.
126 : parrello 1.4 my ($featureID, $locations, $aliases, $type, $minloc, $maxloc, $assignment, $user, $quality) = @{$featureTuple};
127 : parrello 1.1 # Make sure this feature is active.
128 :     if (! $fig->is_deleted_fid($featureID)) {
129 :     # Handle missing assignments.
130 :     if (! defined $assignment) {
131 :     $assignment = '';
132 :     $user = '';
133 :     } else {
134 :     # The default assignment-maker is FIG.
135 :     $user ||= 'fig';
136 :     }
137 :     # Count this feature.
138 :     $self->Track(features => $featureID, 1000);
139 :     # Fix the quality. It is almost always a space, but some odd stuff might sneak through, and the
140 :     # Sprout database requires a single character.
141 :     if (! defined($quality) || $quality eq "") {
142 :     $quality = " ";
143 :     }
144 : parrello 1.6 # Get the coupling count. The coupled features are returned as a list,
145 :     # and we store it as a scalar to get the count.
146 :     my $couplingCount = $fig->coupled_to($featureID);
147 : parrello 1.1 # Begin building the keywords. We start with the genome ID, the
148 :     # feature ID, the taxonomy, and the organism name.
149 :     my @keywords = ($genomeID, $featureID, $fig->genus_species($genomeID),
150 :     $fig->taxonomy_of($genomeID));
151 :     # Create the aliases.
152 : parrello 1.4 for my $alias (split /,/, $aliases) {
153 : parrello 1.1 # Connect this alias to this feature and make an Alias record for it.
154 : parrello 1.6 $self->PutR(IsAliasOf => $alias, $featureID);
155 :     $self->PutE(FeatureAlias => $alias);
156 : parrello 1.1 # Add it to the keyword list.
157 :     push @keywords, $alias;
158 :     # If this is a locus tag, also add its natural form as a keyword.
159 :     my $naturalName = AliasAnalysis::Type(LocusTag => $alias);
160 :     if ($naturalName) {
161 :     push @keywords, $naturalName;
162 :     }
163 :     }
164 :     # Add the corresponding IDs. We ask for 2-tuples of the form (id, database).
165 :     my @corresponders = $fig->get_corresponding_ids($featureID, 1);
166 :     for my $tuple (@corresponders) {
167 :     my ($id, $xdb) = @{$tuple};
168 : parrello 1.6 # Ignore SEED: that's us. Also ignore contig IDs. Those are due to a bug.
169 :     if ($xdb ne 'SEED' && ! ($xdb eq 'RefSeq' && $id =~ /^[A-Z][A-Z]_\d+$/)) {
170 : parrello 1.1 # Connect this ID to the feature and mark its database.
171 : parrello 1.6 $self->PutR(IsAlsoFoundIn => $featureID, $xdb,
172 : parrello 1.1 alias => $id);
173 : parrello 1.6 $self->PutE(ExternalDatabase => $xdb);
174 :     # Add it as a keyword and an alias.
175 : parrello 1.1 push @keywords, $id;
176 : parrello 1.6 $self->PutR(IsAliasOf => $id, $featureID);
177 :     $self->PutE(FeatureAlias => $id);
178 : parrello 1.1 }
179 :     }
180 :     Trace("Assignment for $featureID is: $assignment") if T(ERDBLoadGroup => 4);
181 :     # Break the assignment into words and shove it onto the
182 :     # keyword list.
183 :     push @keywords, split(/\s+/, $assignment);
184 : parrello 1.5 # Add any EC numbers.
185 : parrello 1.6 my @ecs = BioWords::ExtractECs($assignment);
186 :     for my $ec (@ecs) {
187 :     push @keywords, $ec;
188 :     $self->PutE(FeatureEC => $featureID, ec => $ec);
189 :     }
190 : parrello 1.1 # Link this feature to the parent genome.
191 : parrello 1.6 $self->PutR(HasFeature => $genomeID, $featureID,
192 : parrello 1.1 type => $type);
193 :     # Get the links.
194 :     my @links = $fig->fid_links($featureID);
195 :     for my $link (@links) {
196 : parrello 1.6 $self->PutE(FeatureLink => $featureID, link => $link);
197 : parrello 1.1 }
198 :     # If this is a peg, generate the translation and the upstream.
199 :     if ($type eq 'peg') {
200 :     $self->Add(pegIn => 1);
201 :     my $translation = $fig->get_translation($featureID);
202 :     if ($translation) {
203 : parrello 1.6 $self->PutE(FeatureTranslation => $featureID,
204 : parrello 1.1 translation => $translation);
205 :     }
206 :     # We use the default upstream values of u=200 and c=100.
207 :     my $upstream = $fig->upstream_of($featureID, 200, 100);
208 :     if ($upstream) {
209 : parrello 1.6 $self->PutE(FeatureUpstream => $featureID,
210 : parrello 1.1 'upstream-sequence' => $upstream);
211 :     }
212 :     }
213 :     # Now we need to find the subsystems this feature participates in.
214 :     my @ssList = $fig->subsystems_for_peg($featureID);
215 :     # This hash prevents us from adding the same subsystem twice.
216 :     my %seen = ();
217 :     for my $ssEntry (@ssList) {
218 :     # Get the subsystem and role.
219 :     my ($subsystem, $role) = @{$ssEntry};
220 :     # Only proceed if we like this subsystem.
221 :     if (exists $subHash->{$subsystem}) {
222 :     # If this is the first time we've seen this subsystem for
223 :     # this peg, store the has-role link.
224 :     if (! $seen{$subsystem}) {
225 : parrello 1.6 $self->PutR(HasRoleInSubsystem => $featureID, $subsystem,
226 :     genome => $genomeID, type => $type);
227 : parrello 1.5 # Save the subsystem's keywords.
228 : parrello 1.1 push @keywords, split /[\s_]+/, $subsystem;
229 :     }
230 : parrello 1.5 # Now add the role and any embedded EC nubmers to the keyword list.
231 : parrello 1.1 push @keywords, split /\s+/, $role;
232 : parrello 1.5 push @keywords, BioWords::ExtractECs($role);
233 : parrello 1.1 }
234 :     }
235 : parrello 1.5 # For each hyphenated word, we also need the pieces.
236 :     my @hyphenated = grep { $_ =~ /-/ } @keywords;
237 :     for my $hyphenated (@hyphenated) {
238 :     # Bust it into pieces.
239 :     my @pieces = grep { length($_) > 2 } split /-/, $hyphenated;
240 :     push @keywords, @pieces;
241 :     }
242 : parrello 1.1 # There are three special attributes computed from property
243 :     # data that we build next. If the special attribute is non-empty,
244 :     # its name will be added to the keyword list. First, we get all
245 :     # the attributes for this feature. They will come back as
246 : parrello 1.6 # 4-tuples: [peg, name, value, URL].
247 :     my @attributes = @{$attributes->{$featureID}};
248 : parrello 1.1 # Now we process each of the special attributes.
249 :     if ($self->SpecialAttribute($featureID, \@attributes,
250 : parrello 1.6 2, [1,3], '^(essential|potential_essential)$',
251 : parrello 1.1 qw(FeatureEssential essential))) {
252 :     push @keywords, 'essential';
253 :     $self->Add(essential => 1);
254 :     }
255 :     if ($self->SpecialAttribute($featureID, \@attributes,
256 : parrello 1.6 1, [2,3], '^virulen',
257 : parrello 1.1 qw(FeatureVirulent virulent))) {
258 :     push @keywords, 'virulent';
259 :     $self->Add(virulent => 1);
260 :     }
261 :     if ($self->SpecialAttribute($featureID, \@attributes,
262 : parrello 1.6 1, [2,3], '^iedb_',
263 : parrello 1.1 qw(FeatureIEDB iedb))) {
264 :     push @keywords, 'iedb';
265 :     $self->Add(iedb => 1);
266 :     }
267 :     # Now we have some other attributes we need to process. To get
268 :     # through them, we convert the attribute list for this feature
269 :     # into a two-layer hash: key => subkey => value.
270 :     my %attributeHash = ();
271 :     for my $attrRow (@{$attributes->{$featureID}}) {
272 :     my (undef, $key, @values) = @{$attrRow};
273 :     my ($realKey, $subKey);
274 :     if ($key =~ /^([^:]+)::(.+)/) {
275 :     ($realKey, $subKey) = ($1, $2);
276 :     } else {
277 :     ($realKey, $subKey) = ($key, "");
278 :     }
279 :     if (exists $attributeHash{$realKey}) {
280 :     $attributeHash{$realKey}->{$subKey} = \@values;
281 :     } else {
282 :     $attributeHash{$realKey} = {$subKey => \@values};
283 :     }
284 :     }
285 : parrello 1.6 TraceDump(AttributeHash => \%attributeHash) if T(FeatureLoadGroup => 4);
286 : parrello 1.1 # First we handle CDD. This is a bit complicated, because
287 :     # there are multiple CDDs per protein.
288 :     if (exists $attributeHash{CDD}) {
289 :     # Get the hash of CDD IDs to scores for this feature. We
290 :     # already know it exists because of the above IF.
291 :     my $cddHash = $attributeHash{CDD};
292 : parrello 1.6 my @cddData = sort keys %$cddHash;
293 : parrello 1.1 for my $cdd (@cddData) {
294 :     # Extract the score for this CDD and decode it.
295 :     my ($codeScore) = split(/\s*[,;]\s*/, $cddHash->{$cdd}->[0]);
296 :     my $realScore = FIGRules::DecodeScore($codeScore);
297 :     # We can't afford to crash because of a bad attribute
298 :     # value, hence the IF below.
299 :     if (! defined($realScore)) {
300 :     # Bad score, so count it.
301 :     $self->Add(badCDDscore => 1);
302 :     Trace("CDD score \"$codeScore\" for feature $featureID invalid.") if T(ERDBLoadGroup => 3);
303 :     } else {
304 :     # Create the connection and a CDD record.
305 : parrello 1.6 $self->PutR(IsPresentOnProteinOf => $cdd, $featureID,
306 :     score => $realScore);
307 :     $self->PutE(CDD => $cdd);
308 :     }
309 :     }
310 :     }
311 :     # A similar situation exists for protein families.
312 :     if (exists $attributeHash{PFAM}) {
313 :     # Get the hash of PFAMs to scores for this feature.
314 :     my $pfamHash = $attributeHash{PFAM};
315 :     for my $pfam (sort keys %$pfamHash) {
316 :     # Extract the range.
317 :     my $codeScore = $pfamHash->{$pfam}->[0];
318 :     $codeScore =~ /;(.+)/;
319 :     my $range = $1;
320 :     # Strip off the PFAM id from the source.
321 :     my ($pfamID) = split /_/, $pfam, 2;
322 :     # Emit the ProteinFamily record.
323 :     $self->PutE(ProteinFamily => $pfamID);
324 :     # Connect it to the feature.
325 :     $self->PutR(IsFamilyForFeature => $pfamID, $featureID,
326 :     range => $range);
327 :     # Get its name from the ontology database. There can
328 :     # be at most one.
329 :     my $dt_objs =
330 :     $ontology_dbmaster->pfam->get_objects({id => $pfamID});
331 :     if (defined $dt_objs->[0]) {
332 :     $self->PutE(ProteinFamilyName => $pfamID,
333 :     common_name => $dt_objs->[0]->term());
334 : parrello 1.1 }
335 :     }
336 :     }
337 :     # Next we do PSORT cell locations. here the confidence value
338 :     # could have the value "unknown", which we translate to -1.
339 :     if (exists $attributeHash{PSORT}) {
340 :     # This will be a hash of cell locations to confidence
341 :     # factors.
342 :     my $psortHash = $attributeHash{PSORT};
343 :     for my $psort (keys %{$psortHash}) {
344 :     # Get the confidence, and convert it to a number if necessary.
345 : parrello 1.6 my $confidence = $psortHash->{$psort}->[0];
346 : parrello 1.1 if ($confidence eq 'unknown') {
347 :     $confidence = -1;
348 :     }
349 : parrello 1.6 $self->PutR(IsPossiblePlaceFor => $psort, $featureID,
350 :     confidence => $confidence);
351 :     $self->PutE(CellLocation => $psort);
352 : parrello 1.1 # If this is a significant location, add it as a keyword.
353 :     if ($confidence > 2.5) {
354 : parrello 1.6 # Before we add it as a keyword, we convert it from
355 :     # capital-case to hyphenated by inserting hyphens at
356 :     # case transition points.
357 :     $psort =~ s/([a-z])([A-Z])/$1-$2/g;
358 : parrello 1.1 push @keywords, $psort;
359 :     }
360 :     }
361 :     }
362 :     # Phobius data is next. This consists of the signal peptide location and
363 :     # the transmembrane locations.
364 :     my $signalList = "";
365 :     my $transList = "";
366 : parrello 1.6 my $transCount = 0;
367 : parrello 1.1 if (exists $attributeHash{Phobius}) {
368 :     # This will be a hash of two keys (transmembrane and signal) to
369 : parrello 1.6 # location lists. GetCommaList converts them into comma-separated
370 :     # location strings. If there's no value, it returns an empty string.
371 : parrello 1.1 $signalList = $self->GetCommaList($attributeHash{Phobius}->{signal});
372 : parrello 1.6 my $transList = $attributeHash{Phobius}->{transmembrane};
373 :     my @transMap = split /\s*,\s*/, $transList;
374 :     $transCount = (defined $transList ? scalar(@transMap) : 0);
375 : parrello 1.1 }
376 :     # Here are some more numbers: isoelectric point, molecular weight, and
377 :     # the similar-to-human flag.
378 :     my $isoelectric = 0;
379 :     if (exists $attributeHash{isoelectric_point}) {
380 : parrello 1.6 $isoelectric = $attributeHash{isoelectric_point}->{""}->[0];
381 : parrello 1.1 }
382 :     my $similarToHuman = 0;
383 : parrello 1.6 if (exists $attributeHash{similar_to_human} && $attributeHash{similar_to_human}->{""}->[0] eq 'yes') {
384 : parrello 1.1 $similarToHuman = 1;
385 :     }
386 :     my $molecularWeight = 0;
387 :     if (exists $attributeHash{molecular_weight}) {
388 : parrello 1.6 $molecularWeight = $attributeHash{molecular_weight}->{""}->[0];
389 : parrello 1.1 }
390 :     # Join the keyword string.
391 :     my $keywordString = join(" ", @keywords);
392 :     # Get rid of annoying punctuation.
393 : parrello 1.5 $keywordString =~ s/[();@#\/,]/ /g;
394 : parrello 1.6 # Get the list of keywords in the keyword string, minus the delimiters.
395 :     my @realKeywords = grep { $stemmer->IsWord($_) }
396 :     $stemmer->Split($keywordString);
397 : parrello 1.1 # We need to do two things here: create the keyword string for the feature table
398 :     # and write records to the keyword table for the keywords.
399 :     my (%keys, %stems, @realStems);
400 :     for my $keyword (@realKeywords) {
401 :     # Compute the stem and phonex for this keyword.
402 :     my ($stem, $phonex) = $stemmer->StemLookup($keyword);
403 :     # Only proceed if a stem comes back. If no stem came back, it's a
404 :     # stop word and we throw it away.
405 :     if ($stem) {
406 :     $keys{$keyword} = $stem;
407 :     $stems{$stem} = $phonex;
408 :     push @realStems, $stem;
409 :     }
410 :     }
411 :     # Now create the keyword string.
412 :     my $cleanWords = join(" ", @realStems);
413 :     Trace("Keyword string for $featureID: $cleanWords") if T(ERDBLoadGroup => 4);
414 :     # Create keyword table entries for the keywords found.
415 :     for my $key (keys %keys) {
416 :     my $stem = $keys{$key};
417 : parrello 1.6 $self->PutE(Keyword => $key, stem => $stem, phonex => $stems{$stem});
418 : parrello 1.1 }
419 :     # Now we need to process the feature's locations. First, we split them up.
420 :     my @locationList = split /\s*,\s*/, $locations;
421 :     # Next, we convert them to Sprout location objects.
422 :     my @locObjectList = map { BasicLocation->new("$genomeID:$_") } @locationList;
423 :     # Assemble them into a sprout location string for later.
424 :     my $locationString = join(", ", map { $_->String } @locObjectList);
425 :     # We'll store the sequence length in here.
426 :     my $sequenceLength = 0;
427 :     # This part is the roughest. We need to relate the features to contig
428 :     # locations, and the locations must be split so that none of them exceed
429 :     # the maximum segment size. This simplifies the genes_in_region processing
430 :     # for Sprout. To start, we create the location position indicator.
431 :     my $i = 1;
432 :     # Loop through the locations.
433 :     for my $locObject (@locObjectList) {
434 :     # Record the length.
435 :     $sequenceLength += $locObject->Length;
436 :     # Split this location into a list of chunks.
437 :     my @locOList = ();
438 :     while (my $peeling = $locObject->Peel($chunkSize)) {
439 :     $self->Add(peeling => 1);
440 :     push @locOList, $peeling;
441 :     }
442 :     push @locOList, $locObject;
443 :     # Loop through the chunks, creating IsLocatedIn records. The variable
444 :     # "$i" will be used to keep the location index.
445 :     for my $locChunk (@locOList) {
446 : parrello 1.6 $self->PutR(IsLocatedIn => $featureID, $locChunk->Contig,
447 :     beg => $locChunk->Left, dir => $locChunk->Dir,
448 :     len => $locChunk->Length, locN => $i);
449 : parrello 1.1 $i++;
450 :     }
451 :     }
452 : parrello 1.6 # Check for figfams. In case we find any, we need the range.
453 :     # It's the whole sequence.
454 :     my $range = "1-$sequenceLength";
455 :     # Ask for the figfams.
456 :     my @fams = $ffs->families_containing_peg($featureID);
457 :     # Connect them to the feature (if any).
458 :     for my $fam (@fams) {
459 :     $self->PutE(ProteinFamily => $fam);
460 :     $self->PutR(IsFamilyForFeature => $fam, $featureID,
461 :     range => $range);
462 :     }
463 : parrello 1.1 # Now we get some ancillary flags.
464 :     my $locked = $fig->is_locked_fid($featureID);
465 :     my $in_genbank = $fig->peg_in_gendb($featureID);
466 :     # Create the feature record.
467 : parrello 1.6 $self->PutE(Feature => $featureID, 'assignment-maker' => $user,
468 : parrello 1.1 'assignment-quality' => $quality, 'feature-type' => $type,
469 :     'in-genbank' => $in_genbank, 'isoelectric-point' => $isoelectric,
470 :     locked => $locked, 'molecular-weight' => $molecularWeight,
471 :     'sequence-length' => $sequenceLength,
472 :     'signal-peptide' => $signalList, 'similar-to-human' => $similarToHuman,
473 :     assignment => $assignment, keywords => $cleanWords,
474 :     'location-string' => $locationString,
475 : parrello 1.6 'transmembrane-map' => $transList,
476 :     'conserved-neighbors' => $couplingCount,
477 :     'transmembrane-domain-count' => $transCount);
478 : parrello 1.1 }
479 :     }
480 :     }
481 :     }
482 :    
483 :    
484 :     =head3 SpecialAttribute
485 :    
486 :     my $count = $sl->SpecialAttribute($id, \@attributes, $idxMatch, \@idxValues, $pattern, $tableName, $field);
487 :    
488 :     Look for special attributes of a given type. A special attribute is found by comparing one of
489 :     the columns of the incoming attribute list to a search pattern. If a match is found, then
490 :     a set of columns is put into an output table connected to the specified ID.
491 :    
492 :     For example, when processing features, the attribute list we look at has three columns: attribute
493 :     name, attribute value, and attribute value HTML. The IEDB attribute exists if the attribute name
494 :     begins with C<iedb_>. The call signature is therefore
495 :    
496 :     my $found = SpecialAttribute($fid, \@attributeList, 0, [0,2], '^iedb_', 'FeatureIEDB', 'iedb');
497 :    
498 :     The pattern is matched against column 0, and if we have a match, then column 2's value is put
499 :     to the output along with the specified feature ID.
500 :    
501 :     =over 4
502 :    
503 :     =item id
504 :    
505 :     ID of the object whose special attributes are being loaded. This forms the first column of the
506 :     output.
507 :    
508 :     =item attributes
509 :    
510 :     Reference to a list of tuples.
511 :    
512 :     =item idxMatch
513 :    
514 :     Index in each tuple of the column to be matched against the pattern. If the match is
515 :     successful, an output record will be generated.
516 :    
517 :     =item idxValues
518 :    
519 : parrello 1.6 Reference to a list containing the indexes of the value and URL to put in the
520 :     second column of the output.
521 : parrello 1.1
522 :     =item pattern
523 :    
524 :     Pattern to be matched against the specified column. The match will be case-insensitive.
525 :    
526 :     =item tableName
527 :    
528 :     Name of the table to contain the attribute values found.
529 :    
530 :     =item fieldName
531 :    
532 :     Name of the field to contain the attribute values in the output table.
533 :    
534 :     =item RETURN
535 :    
536 :     Returns a count of the matches found.
537 :    
538 :     =item
539 :    
540 :     =back
541 :    
542 :     =cut
543 :    
544 :     sub SpecialAttribute {
545 :     # Get the parameters.
546 :     my ($self, $id, $attributes, $idxMatch, $idxValues, $pattern, $tableName, $fieldName) = @_;
547 :     # Declare the return variable.
548 :     my $retVal = 0;
549 :     # Loop through the attribute rows.
550 :     for my $row (@{$attributes}) {
551 :     # Check for a match.
552 :     if ($row->[$idxMatch] =~ m/$pattern/i) {
553 : parrello 1.6 # We have a match, so output a row.
554 :     my $value = HyperLink->new(map { $row->[$_] } @$idxValues);
555 :     $self->PutE($tableName => $id, $fieldName => $value);
556 : parrello 1.1 $retVal++;
557 :     }
558 :     }
559 :     Trace("$retVal special attributes found for $id and table $tableName.") if T(ERDBLoadGroup => 4) && $retVal;
560 :     # Return the number of matches.
561 :     return $retVal;
562 :     }
563 :    
564 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3