[Bio] / Sprout / FeatureSproutLoader.pm Repository:
ViewVC logotype

Annotation of /Sprout/FeatureSproutLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.10 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     #
4 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
5 :     # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 :    
20 :     package FeatureSproutLoader;
21 :    
22 :     use strict;
23 :     use Tracer;
24 :     use ERDB;
25 :     use BioWords;
26 :     use AliasAnalysis;
27 : parrello 1.6 use DBMaster;
28 :     use HyperLink;
29 :     use FFs;
30 : parrello 1.8 use SOAP::Lite;
31 : parrello 1.9 use Time::HiRes;
32 : parrello 1.10 use LoaderUtils;
33 : parrello 1.1 use base 'BaseSproutLoader';
34 :    
35 :     =head1 Sprout Feature Load Group Class
36 :    
37 :     =head2 Introduction
38 :    
39 :     The Feature Load Group includes all of the major feature-related tables.
40 :    
41 :     =head3 new
42 :    
43 : parrello 1.3 my $sl = FeatureSproutLoader->new($erdb, $source, $options, @tables);
44 : parrello 1.1
45 : parrello 1.3 Construct a new FeatureSproutLoader object.
46 : parrello 1.1
47 :     =over 4
48 :    
49 :     =item erdb
50 :    
51 :     [[SproutPm]] object for the database being loaded.
52 :    
53 :     =item options
54 :    
55 :     Reference to a hash of command-line options.
56 :    
57 :     =item tables
58 :    
59 :     List of tables in this load group.
60 :    
61 :     =back
62 :    
63 :     =cut
64 :    
65 :     sub new {
66 :     # Get the parameters.
67 : parrello 1.6 my ($class, $erdb, $options) = @_;
68 : parrello 1.1 # Create the table list.
69 :     my @tables = sort qw(Feature IsLocatedIn FeatureAlias IsAliasOf FeatureLink
70 :     FeatureTranslation FeatureUpstream HasFeature HasRoleInSubsystem
71 : parrello 1.9 FeatureEssential FeatureVirulent FeatureIEDB CDD
72 :     IsPresentOnProteinOf CellLocation IsPossiblePlaceFor
73 :     IsAlsoFoundIn ExternalDatabase Keyword ProteinFamily
74 :     IsFamilyForFeature ProteinFamilyName FeatureEC);
75 : parrello 1.1 # Create the BaseSproutLoader object.
76 : parrello 1.6 my $retVal = BaseSproutLoader::new($class, $erdb, $options, @tables);
77 : parrello 1.2 # Get the list of relevant attributes.
78 : parrello 1.1 # Bless and return it.
79 :     bless $retVal, $class;
80 :     return $retVal;
81 :     }
82 :    
83 :     =head2 Public Methods
84 :    
85 :     =head3 Generate
86 :    
87 :     $sl->Generate();
88 :    
89 :     Generate the data for the feature-related files.
90 :    
91 :     =cut
92 :    
93 :     sub Generate {
94 :     # Get the parameters.
95 :     my ($self) = @_;
96 :     # Get the sprout object.
97 :     my $sprout = $self->db();
98 :     # Get the FIG object.
99 :     my $fig = $self->source();
100 :     # Get the subsystem list.
101 :     my $subHash = $self->GetSubsystems();
102 :     # Get the word stemmer.
103 :     my $stemmer = $sprout->GetStemmer();
104 : parrello 1.6 # Get access to FIGfams.
105 :     my $figfam_data = &FIG::get_figfams_data();
106 : parrello 1.9 my $ffs = new FFs($figfam_data, $fig);
107 :     # Compute the load directory.
108 :     my $loadDirectory = $sprout->LoadDirectory();
109 : parrello 1.1 # Only proceed if this is not the global section.
110 :     if (! $self->global()) {
111 : parrello 1.4 # Get the section ID.
112 :     my $genomeID = $self->section();
113 : parrello 1.9 MemTrace("Starting section $genomeID.") if T(ERDBLoadGroup => 3);
114 : parrello 1.6 # Connect to the ontology database.
115 :     my $sqlite_db = "/home/mkubal/Temp/Ontology/ontology.sqlite";
116 :     my $ontology_dbmaster = DBMaster->new(-database => $sqlite_db, -backend => 'SQLite');
117 : parrello 1.9 # This is our master hash of FIG IDs to aliases.
118 : parrello 1.10 my $aliasMasterHash = LoaderUtils::ReadAliasFile($loadDirectory, $genomeID) || {};
119 : parrello 1.1 # Get the maximum sequence size. We need this later for splitting up the
120 :     # locations.
121 :     my $chunkSize = $sprout->MaxSegment();
122 : parrello 1.9 MemTrace("Loading features for genome $genomeID.") if T(ERDBLoadGroup => 3);
123 : parrello 1.1 # Get the feature list for this genome.
124 :     my $features = $fig->all_features_detailed_fast($genomeID);
125 :     # Sort and count the list.
126 :     my @featureTuples = sort { $a->[0] cmp $b->[0] } @{$features};
127 :     my $count = scalar @featureTuples;
128 : parrello 1.9 MemTrace("$count features found for genome $genomeID.") if T(ERDBLoadGroup => 3);
129 : parrello 1.1 # Get the attributes for this genome and put them in a hash by feature ID.
130 : parrello 1.6 my $attributes = $self->GetGenomeAttributes($genomeID, \@featureTuples);
131 : parrello 1.1 Trace("Looping through features for $genomeID.") if T(ERDBLoadGroup => 3);
132 :     # Loop through the features.
133 :     for my $featureTuple (@featureTuples) {
134 :     # Split the tuple.
135 : parrello 1.9 my ($featureID, $locations, $aliases, $type, $minloc, $maxloc, $assignment,
136 :     $user, $quality) = @{$featureTuple};
137 : parrello 1.1 # Make sure this feature is active.
138 :     if (! $fig->is_deleted_fid($featureID)) {
139 :     # Handle missing assignments.
140 :     if (! defined $assignment) {
141 :     $assignment = '';
142 :     $user = '';
143 :     } else {
144 :     # The default assignment-maker is FIG.
145 :     $user ||= 'fig';
146 :     }
147 :     # Count this feature.
148 :     $self->Track(features => $featureID, 1000);
149 :     # Fix the quality. It is almost always a space, but some odd stuff might sneak through, and the
150 :     # Sprout database requires a single character.
151 :     if (! defined($quality) || $quality eq "") {
152 :     $quality = " ";
153 :     }
154 : parrello 1.6 # Get the coupling count. The coupled features are returned as a list,
155 :     # and we store it as a scalar to get the count.
156 :     my $couplingCount = $fig->coupled_to($featureID);
157 : parrello 1.1 # Begin building the keywords. We start with the genome ID, the
158 :     # feature ID, the taxonomy, and the organism name.
159 :     my @keywords = ($genomeID, $featureID, $fig->genus_species($genomeID),
160 :     $fig->taxonomy_of($genomeID));
161 : parrello 1.9 # Next come the aliases. We put all aliases found in this hash.
162 :     # They will be output as alias names and as keywords.
163 : parrello 1.7 my %aliasHash;
164 : parrello 1.9 # Note the trick here to insure that we have a list reference even
165 :     # if this feature isn't in the alias table.
166 : parrello 1.10 my $aliasList = $aliasMasterHash->{$featureID} || [];
167 : parrello 1.9 # Loop through this feature ID's aliases.
168 :     for my $aliasTuple (@$aliasList) {
169 :     my ($aliasID, $aliasType, $aliasConf) = @$aliasTuple;
170 :     # Only proceed if this alias is new.
171 :     if (! exists $aliasHash{$aliasID}) {
172 :     # Save this alias.
173 :     $aliasHash{$aliasID} = 1;
174 :     # Get its natural form.
175 :     my $natural = AliasAnalysis::Type($aliasType => $aliasID);
176 :     # Only proceed if a natural form exists.
177 :     if ($natural) {
178 :     $self->Add(miscAlias => 1);
179 :     # Save the natural form.
180 :     $aliasHash{$natural} = 1;
181 :     # Is this a corresponding ID?
182 :     if ($aliasConf eq 'A') {
183 :     # Yes. Connect its natural form to the feature.
184 :     $self->PutR(IsAlsoFoundIn => $featureID, $aliasType,
185 :     alias => $natural);
186 :     $self->PutE(ExternalDatabase => $aliasType);
187 :     }
188 :     }
189 : parrello 1.1 }
190 :     }
191 : parrello 1.9 # Create the aliases and put them in the keyword list.
192 : parrello 1.7 for my $alias (sort keys %aliasHash) {
193 :     # Connect this alias to this feature and make an Alias record for it.
194 :     $self->PutR(IsAliasOf => $alias, $featureID);
195 :     $self->PutE(FeatureAlias => $alias);
196 :     # Add it to the keyword list.
197 :     push @keywords, $alias;
198 :     }
199 : parrello 1.1 Trace("Assignment for $featureID is: $assignment") if T(ERDBLoadGroup => 4);
200 :     # Break the assignment into words and shove it onto the
201 :     # keyword list.
202 :     push @keywords, split(/\s+/, $assignment);
203 : parrello 1.5 # Add any EC numbers.
204 : parrello 1.6 my @ecs = BioWords::ExtractECs($assignment);
205 :     for my $ec (@ecs) {
206 :     push @keywords, $ec;
207 :     $self->PutE(FeatureEC => $featureID, ec => $ec);
208 :     }
209 : parrello 1.1 # Link this feature to the parent genome.
210 : parrello 1.6 $self->PutR(HasFeature => $genomeID, $featureID,
211 : parrello 1.1 type => $type);
212 :     # Get the links.
213 :     my @links = $fig->fid_links($featureID);
214 :     for my $link (@links) {
215 : parrello 1.6 $self->PutE(FeatureLink => $featureID, link => $link);
216 : parrello 1.1 }
217 :     # If this is a peg, generate the translation and the upstream.
218 :     if ($type eq 'peg') {
219 :     $self->Add(pegIn => 1);
220 :     my $translation = $fig->get_translation($featureID);
221 :     if ($translation) {
222 : parrello 1.6 $self->PutE(FeatureTranslation => $featureID,
223 : parrello 1.1 translation => $translation);
224 :     }
225 :     # We use the default upstream values of u=200 and c=100.
226 :     my $upstream = $fig->upstream_of($featureID, 200, 100);
227 :     if ($upstream) {
228 : parrello 1.6 $self->PutE(FeatureUpstream => $featureID,
229 : parrello 1.1 'upstream-sequence' => $upstream);
230 :     }
231 :     }
232 :     # Now we need to find the subsystems this feature participates in.
233 :     my @ssList = $fig->subsystems_for_peg($featureID);
234 :     # This hash prevents us from adding the same subsystem twice.
235 :     my %seen = ();
236 :     for my $ssEntry (@ssList) {
237 :     # Get the subsystem and role.
238 :     my ($subsystem, $role) = @{$ssEntry};
239 :     # Only proceed if we like this subsystem.
240 :     if (exists $subHash->{$subsystem}) {
241 :     # If this is the first time we've seen this subsystem for
242 :     # this peg, store the has-role link.
243 :     if (! $seen{$subsystem}) {
244 : parrello 1.6 $self->PutR(HasRoleInSubsystem => $featureID, $subsystem,
245 :     genome => $genomeID, type => $type);
246 : parrello 1.5 # Save the subsystem's keywords.
247 : parrello 1.1 push @keywords, split /[\s_]+/, $subsystem;
248 :     }
249 : parrello 1.5 # Now add the role and any embedded EC nubmers to the keyword list.
250 : parrello 1.1 push @keywords, split /\s+/, $role;
251 : parrello 1.5 push @keywords, BioWords::ExtractECs($role);
252 : parrello 1.1 }
253 :     }
254 : parrello 1.5 # For each hyphenated word, we also need the pieces.
255 :     my @hyphenated = grep { $_ =~ /-/ } @keywords;
256 :     for my $hyphenated (@hyphenated) {
257 :     # Bust it into pieces.
258 :     my @pieces = grep { length($_) > 2 } split /-/, $hyphenated;
259 :     push @keywords, @pieces;
260 :     }
261 : parrello 1.1 # There are three special attributes computed from property
262 :     # data that we build next. If the special attribute is non-empty,
263 :     # its name will be added to the keyword list. First, we get all
264 :     # the attributes for this feature. They will come back as
265 : parrello 1.6 # 4-tuples: [peg, name, value, URL].
266 :     my @attributes = @{$attributes->{$featureID}};
267 : parrello 1.1 # Now we process each of the special attributes.
268 :     if ($self->SpecialAttribute($featureID, \@attributes,
269 : parrello 1.6 2, [1,3], '^(essential|potential_essential)$',
270 : parrello 1.1 qw(FeatureEssential essential))) {
271 :     push @keywords, 'essential';
272 :     $self->Add(essential => 1);
273 :     }
274 :     if ($self->SpecialAttribute($featureID, \@attributes,
275 : parrello 1.6 1, [2,3], '^virulen',
276 : parrello 1.1 qw(FeatureVirulent virulent))) {
277 :     push @keywords, 'virulent';
278 :     $self->Add(virulent => 1);
279 :     }
280 :     if ($self->SpecialAttribute($featureID, \@attributes,
281 : parrello 1.6 1, [2,3], '^iedb_',
282 : parrello 1.1 qw(FeatureIEDB iedb))) {
283 :     push @keywords, 'iedb';
284 :     $self->Add(iedb => 1);
285 :     }
286 :     # Now we have some other attributes we need to process. To get
287 :     # through them, we convert the attribute list for this feature
288 :     # into a two-layer hash: key => subkey => value.
289 :     my %attributeHash = ();
290 :     for my $attrRow (@{$attributes->{$featureID}}) {
291 :     my (undef, $key, @values) = @{$attrRow};
292 :     my ($realKey, $subKey);
293 :     if ($key =~ /^([^:]+)::(.+)/) {
294 :     ($realKey, $subKey) = ($1, $2);
295 :     } else {
296 :     ($realKey, $subKey) = ($key, "");
297 :     }
298 :     if (exists $attributeHash{$realKey}) {
299 :     $attributeHash{$realKey}->{$subKey} = \@values;
300 :     } else {
301 :     $attributeHash{$realKey} = {$subKey => \@values};
302 :     }
303 :     }
304 : parrello 1.6 TraceDump(AttributeHash => \%attributeHash) if T(FeatureLoadGroup => 4);
305 : parrello 1.1 # First we handle CDD. This is a bit complicated, because
306 :     # there are multiple CDDs per protein.
307 :     if (exists $attributeHash{CDD}) {
308 :     # Get the hash of CDD IDs to scores for this feature. We
309 :     # already know it exists because of the above IF.
310 :     my $cddHash = $attributeHash{CDD};
311 : parrello 1.6 my @cddData = sort keys %$cddHash;
312 : parrello 1.1 for my $cdd (@cddData) {
313 :     # Extract the score for this CDD and decode it.
314 :     my ($codeScore) = split(/\s*[,;]\s*/, $cddHash->{$cdd}->[0]);
315 :     my $realScore = FIGRules::DecodeScore($codeScore);
316 :     # We can't afford to crash because of a bad attribute
317 :     # value, hence the IF below.
318 :     if (! defined($realScore)) {
319 :     # Bad score, so count it.
320 :     $self->Add(badCDDscore => 1);
321 :     Trace("CDD score \"$codeScore\" for feature $featureID invalid.") if T(ERDBLoadGroup => 3);
322 :     } else {
323 :     # Create the connection and a CDD record.
324 : parrello 1.6 $self->PutR(IsPresentOnProteinOf => $cdd, $featureID,
325 :     score => $realScore);
326 :     $self->PutE(CDD => $cdd);
327 :     }
328 :     }
329 :     }
330 :     # A similar situation exists for protein families.
331 :     if (exists $attributeHash{PFAM}) {
332 :     # Get the hash of PFAMs to scores for this feature.
333 :     my $pfamHash = $attributeHash{PFAM};
334 :     for my $pfam (sort keys %$pfamHash) {
335 :     # Extract the range.
336 :     my $codeScore = $pfamHash->{$pfam}->[0];
337 :     $codeScore =~ /;(.+)/;
338 :     my $range = $1;
339 :     # Strip off the PFAM id from the source.
340 :     my ($pfamID) = split /_/, $pfam, 2;
341 :     # Emit the ProteinFamily record.
342 :     $self->PutE(ProteinFamily => $pfamID);
343 :     # Connect it to the feature.
344 :     $self->PutR(IsFamilyForFeature => $pfamID, $featureID,
345 :     range => $range);
346 :     # Get its name from the ontology database. There can
347 :     # be at most one.
348 :     my $dt_objs =
349 :     $ontology_dbmaster->pfam->get_objects({id => $pfamID});
350 :     if (defined $dt_objs->[0]) {
351 :     $self->PutE(ProteinFamilyName => $pfamID,
352 :     common_name => $dt_objs->[0]->term());
353 : parrello 1.1 }
354 :     }
355 :     }
356 :     # Next we do PSORT cell locations. here the confidence value
357 :     # could have the value "unknown", which we translate to -1.
358 :     if (exists $attributeHash{PSORT}) {
359 :     # This will be a hash of cell locations to confidence
360 :     # factors.
361 :     my $psortHash = $attributeHash{PSORT};
362 :     for my $psort (keys %{$psortHash}) {
363 :     # Get the confidence, and convert it to a number if necessary.
364 : parrello 1.6 my $confidence = $psortHash->{$psort}->[0];
365 : parrello 1.1 if ($confidence eq 'unknown') {
366 :     $confidence = -1;
367 :     }
368 : parrello 1.6 $self->PutR(IsPossiblePlaceFor => $psort, $featureID,
369 :     confidence => $confidence);
370 :     $self->PutE(CellLocation => $psort);
371 : parrello 1.1 # If this is a significant location, add it as a keyword.
372 :     if ($confidence > 2.5) {
373 : parrello 1.6 # Before we add it as a keyword, we convert it from
374 :     # capital-case to hyphenated by inserting hyphens at
375 :     # case transition points.
376 :     $psort =~ s/([a-z])([A-Z])/$1-$2/g;
377 : parrello 1.1 push @keywords, $psort;
378 :     }
379 :     }
380 :     }
381 :     # Phobius data is next. This consists of the signal peptide location and
382 :     # the transmembrane locations.
383 :     my $signalList = "";
384 :     my $transList = "";
385 : parrello 1.6 my $transCount = 0;
386 : parrello 1.1 if (exists $attributeHash{Phobius}) {
387 :     # This will be a hash of two keys (transmembrane and signal) to
388 : parrello 1.6 # location lists. GetCommaList converts them into comma-separated
389 :     # location strings. If there's no value, it returns an empty string.
390 : parrello 1.1 $signalList = $self->GetCommaList($attributeHash{Phobius}->{signal});
391 : parrello 1.6 my $transList = $attributeHash{Phobius}->{transmembrane};
392 :     my @transMap = split /\s*,\s*/, $transList;
393 :     $transCount = (defined $transList ? scalar(@transMap) : 0);
394 : parrello 1.1 }
395 :     # Here are some more numbers: isoelectric point, molecular weight, and
396 :     # the similar-to-human flag.
397 :     my $isoelectric = 0;
398 :     if (exists $attributeHash{isoelectric_point}) {
399 : parrello 1.6 $isoelectric = $attributeHash{isoelectric_point}->{""}->[0];
400 : parrello 1.1 }
401 :     my $similarToHuman = 0;
402 : parrello 1.6 if (exists $attributeHash{similar_to_human} && $attributeHash{similar_to_human}->{""}->[0] eq 'yes') {
403 : parrello 1.1 $similarToHuman = 1;
404 :     }
405 :     my $molecularWeight = 0;
406 :     if (exists $attributeHash{molecular_weight}) {
407 : parrello 1.6 $molecularWeight = $attributeHash{molecular_weight}->{""}->[0];
408 : parrello 1.1 }
409 :     # Join the keyword string.
410 :     my $keywordString = join(" ", @keywords);
411 :     # Get rid of annoying punctuation.
412 : parrello 1.5 $keywordString =~ s/[();@#\/,]/ /g;
413 : parrello 1.6 # Get the list of keywords in the keyword string, minus the delimiters.
414 :     my @realKeywords = grep { $stemmer->IsWord($_) }
415 :     $stemmer->Split($keywordString);
416 : parrello 1.1 # We need to do two things here: create the keyword string for the feature table
417 :     # and write records to the keyword table for the keywords.
418 :     my (%keys, %stems, @realStems);
419 :     for my $keyword (@realKeywords) {
420 :     # Compute the stem and phonex for this keyword.
421 :     my ($stem, $phonex) = $stemmer->StemLookup($keyword);
422 :     # Only proceed if a stem comes back. If no stem came back, it's a
423 :     # stop word and we throw it away.
424 :     if ($stem) {
425 :     $keys{$keyword} = $stem;
426 :     $stems{$stem} = $phonex;
427 :     push @realStems, $stem;
428 :     }
429 :     }
430 :     # Now create the keyword string.
431 :     my $cleanWords = join(" ", @realStems);
432 :     Trace("Keyword string for $featureID: $cleanWords") if T(ERDBLoadGroup => 4);
433 :     # Create keyword table entries for the keywords found.
434 :     for my $key (keys %keys) {
435 :     my $stem = $keys{$key};
436 : parrello 1.6 $self->PutE(Keyword => $key, stem => $stem, phonex => $stems{$stem});
437 : parrello 1.1 }
438 :     # Now we need to process the feature's locations. First, we split them up.
439 :     my @locationList = split /\s*,\s*/, $locations;
440 :     # Next, we convert them to Sprout location objects.
441 :     my @locObjectList = map { BasicLocation->new("$genomeID:$_") } @locationList;
442 :     # Assemble them into a sprout location string for later.
443 :     my $locationString = join(", ", map { $_->String } @locObjectList);
444 :     # We'll store the sequence length in here.
445 :     my $sequenceLength = 0;
446 :     # This part is the roughest. We need to relate the features to contig
447 :     # locations, and the locations must be split so that none of them exceed
448 :     # the maximum segment size. This simplifies the genes_in_region processing
449 :     # for Sprout. To start, we create the location position indicator.
450 :     my $i = 1;
451 :     # Loop through the locations.
452 :     for my $locObject (@locObjectList) {
453 :     # Record the length.
454 :     $sequenceLength += $locObject->Length;
455 :     # Split this location into a list of chunks.
456 :     my @locOList = ();
457 :     while (my $peeling = $locObject->Peel($chunkSize)) {
458 :     $self->Add(peeling => 1);
459 :     push @locOList, $peeling;
460 :     }
461 :     push @locOList, $locObject;
462 :     # Loop through the chunks, creating IsLocatedIn records. The variable
463 :     # "$i" will be used to keep the location index.
464 :     for my $locChunk (@locOList) {
465 : parrello 1.6 $self->PutR(IsLocatedIn => $featureID, $locChunk->Contig,
466 :     beg => $locChunk->Left, dir => $locChunk->Dir,
467 :     len => $locChunk->Length, locN => $i);
468 : parrello 1.1 $i++;
469 :     }
470 :     }
471 : parrello 1.6 # Check for figfams. In case we find any, we need the range.
472 :     # It's the whole sequence.
473 :     my $range = "1-$sequenceLength";
474 :     # Ask for the figfams.
475 :     my @fams = $ffs->families_containing_peg($featureID);
476 :     # Connect them to the feature (if any).
477 :     for my $fam (@fams) {
478 :     $self->PutE(ProteinFamily => $fam);
479 :     $self->PutR(IsFamilyForFeature => $fam, $featureID,
480 :     range => $range);
481 :     }
482 : parrello 1.1 # Now we get some ancillary flags.
483 :     my $locked = $fig->is_locked_fid($featureID);
484 :     my $in_genbank = $fig->peg_in_gendb($featureID);
485 :     # Create the feature record.
486 : parrello 1.6 $self->PutE(Feature => $featureID, 'assignment-maker' => $user,
487 : parrello 1.1 'assignment-quality' => $quality, 'feature-type' => $type,
488 :     'in-genbank' => $in_genbank, 'isoelectric-point' => $isoelectric,
489 :     locked => $locked, 'molecular-weight' => $molecularWeight,
490 :     'sequence-length' => $sequenceLength,
491 :     'signal-peptide' => $signalList, 'similar-to-human' => $similarToHuman,
492 :     assignment => $assignment, keywords => $cleanWords,
493 :     'location-string' => $locationString,
494 : parrello 1.6 'transmembrane-map' => $transList,
495 :     'conserved-neighbors' => $couplingCount,
496 :     'transmembrane-domain-count' => $transCount);
497 : parrello 1.1 }
498 :     }
499 :     }
500 :     }
501 :    
502 :    
503 :     =head3 SpecialAttribute
504 :    
505 :     my $count = $sl->SpecialAttribute($id, \@attributes, $idxMatch, \@idxValues, $pattern, $tableName, $field);
506 :    
507 :     Look for special attributes of a given type. A special attribute is found by comparing one of
508 :     the columns of the incoming attribute list to a search pattern. If a match is found, then
509 :     a set of columns is put into an output table connected to the specified ID.
510 :    
511 :     For example, when processing features, the attribute list we look at has three columns: attribute
512 :     name, attribute value, and attribute value HTML. The IEDB attribute exists if the attribute name
513 :     begins with C<iedb_>. The call signature is therefore
514 :    
515 :     my $found = SpecialAttribute($fid, \@attributeList, 0, [0,2], '^iedb_', 'FeatureIEDB', 'iedb');
516 :    
517 :     The pattern is matched against column 0, and if we have a match, then column 2's value is put
518 :     to the output along with the specified feature ID.
519 :    
520 :     =over 4
521 :    
522 :     =item id
523 :    
524 :     ID of the object whose special attributes are being loaded. This forms the first column of the
525 :     output.
526 :    
527 :     =item attributes
528 :    
529 :     Reference to a list of tuples.
530 :    
531 :     =item idxMatch
532 :    
533 :     Index in each tuple of the column to be matched against the pattern. If the match is
534 :     successful, an output record will be generated.
535 :    
536 :     =item idxValues
537 :    
538 : parrello 1.6 Reference to a list containing the indexes of the value and URL to put in the
539 :     second column of the output.
540 : parrello 1.1
541 :     =item pattern
542 :    
543 :     Pattern to be matched against the specified column. The match will be case-insensitive.
544 :    
545 :     =item tableName
546 :    
547 :     Name of the table to contain the attribute values found.
548 :    
549 :     =item fieldName
550 :    
551 :     Name of the field to contain the attribute values in the output table.
552 :    
553 :     =item RETURN
554 :    
555 :     Returns a count of the matches found.
556 :    
557 :     =item
558 :    
559 :     =back
560 :    
561 :     =cut
562 :    
563 :     sub SpecialAttribute {
564 :     # Get the parameters.
565 :     my ($self, $id, $attributes, $idxMatch, $idxValues, $pattern, $tableName, $fieldName) = @_;
566 :     # Declare the return variable.
567 :     my $retVal = 0;
568 :     # Loop through the attribute rows.
569 :     for my $row (@{$attributes}) {
570 :     # Check for a match.
571 :     if ($row->[$idxMatch] =~ m/$pattern/i) {
572 : parrello 1.6 # We have a match, so output a row.
573 :     my $value = HyperLink->new(map { $row->[$_] } @$idxValues);
574 :     $self->PutE($tableName => $id, $fieldName => $value);
575 : parrello 1.1 $retVal++;
576 :     }
577 :     }
578 :     Trace("$retVal special attributes found for $id and table $tableName.") if T(ERDBLoadGroup => 4) && $retVal;
579 :     # Return the number of matches.
580 :     return $retVal;
581 :     }
582 :    
583 : parrello 1.9
584 : parrello 1.1 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3