[Bio] / Sprout / FeatureSproutLoader.pm Repository:
ViewVC logotype

Annotation of /Sprout/FeatureSproutLoader.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.9 - (view) (download) (as text)

1 : parrello 1.1 #!/usr/bin/perl -w
2 :    
3 :     #
4 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
5 :     # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 :    
20 :     package FeatureSproutLoader;
21 :    
22 :     use strict;
23 :     use Tracer;
24 :     use ERDB;
25 :     use BioWords;
26 :     use AliasAnalysis;
27 : parrello 1.6 use DBMaster;
28 :     use HyperLink;
29 :     use FFs;
30 : parrello 1.8 use SOAP::Lite;
31 : parrello 1.9 use Time::HiRes;
32 : parrello 1.1 use base 'BaseSproutLoader';
33 :    
34 :     =head1 Sprout Feature Load Group Class
35 :    
36 :     =head2 Introduction
37 :    
38 :     The Feature Load Group includes all of the major feature-related tables.
39 :    
40 :     =head3 new
41 :    
42 : parrello 1.3 my $sl = FeatureSproutLoader->new($erdb, $source, $options, @tables);
43 : parrello 1.1
44 : parrello 1.3 Construct a new FeatureSproutLoader object.
45 : parrello 1.1
46 :     =over 4
47 :    
48 :     =item erdb
49 :    
50 :     [[SproutPm]] object for the database being loaded.
51 :    
52 :     =item options
53 :    
54 :     Reference to a hash of command-line options.
55 :    
56 :     =item tables
57 :    
58 :     List of tables in this load group.
59 :    
60 :     =back
61 :    
62 :     =cut
63 :    
64 :     sub new {
65 :     # Get the parameters.
66 : parrello 1.6 my ($class, $erdb, $options) = @_;
67 : parrello 1.1 # Create the table list.
68 :     my @tables = sort qw(Feature IsLocatedIn FeatureAlias IsAliasOf FeatureLink
69 :     FeatureTranslation FeatureUpstream HasFeature HasRoleInSubsystem
70 : parrello 1.9 FeatureEssential FeatureVirulent FeatureIEDB CDD
71 :     IsPresentOnProteinOf CellLocation IsPossiblePlaceFor
72 :     IsAlsoFoundIn ExternalDatabase Keyword ProteinFamily
73 :     IsFamilyForFeature ProteinFamilyName FeatureEC);
74 : parrello 1.1 # Create the BaseSproutLoader object.
75 : parrello 1.6 my $retVal = BaseSproutLoader::new($class, $erdb, $options, @tables);
76 : parrello 1.2 # Get the list of relevant attributes.
77 : parrello 1.1 # Bless and return it.
78 :     bless $retVal, $class;
79 :     return $retVal;
80 :     }
81 :    
82 :     =head2 Public Methods
83 :    
84 :     =head3 Generate
85 :    
86 :     $sl->Generate();
87 :    
88 :     Generate the data for the feature-related files.
89 :    
90 :     =cut
91 :    
92 :     sub Generate {
93 :     # Get the parameters.
94 :     my ($self) = @_;
95 :     # Get the sprout object.
96 :     my $sprout = $self->db();
97 :     # Get the FIG object.
98 :     my $fig = $self->source();
99 :     # Get the subsystem list.
100 :     my $subHash = $self->GetSubsystems();
101 :     # Get the word stemmer.
102 :     my $stemmer = $sprout->GetStemmer();
103 : parrello 1.6 # Get access to FIGfams.
104 :     my $figfam_data = &FIG::get_figfams_data();
105 : parrello 1.9 my $ffs = new FFs($figfam_data, $fig);
106 :     # Compute the load directory.
107 :     my $loadDirectory = $sprout->LoadDirectory();
108 : parrello 1.1 # Only proceed if this is not the global section.
109 :     if (! $self->global()) {
110 : parrello 1.4 # Get the section ID.
111 :     my $genomeID = $self->section();
112 : parrello 1.9 MemTrace("Starting section $genomeID.") if T(ERDBLoadGroup => 3);
113 : parrello 1.6 # Connect to the ontology database.
114 :     my $sqlite_db = "/home/mkubal/Temp/Ontology/ontology.sqlite";
115 :     my $ontology_dbmaster = DBMaster->new(-database => $sqlite_db, -backend => 'SQLite');
116 : parrello 1.9 # This is our master hash of FIG IDs to aliases.
117 :     my %aliasMasterHash;
118 :     # Open this genome's alias file. The alias files are created by the AliasCrunch
119 :     # script.
120 :     my $aliasFile = "$loadDirectory/alias.$genomeID.tbl";
121 :     if (! -f $aliasFile) {
122 :     Trace("No aliases found for $genomeID.") if T(ERDBLoadGroup => 1);
123 :     } else {
124 :     my $aliasH = Open(undef, "<$aliasFile");
125 :     while (! eof $aliasH) {
126 :     my ($aliasFid, $aliasID, $aliasType, $aliasConf) = Tracer::GetLine($aliasH);
127 :     push @{$aliasMasterHash{$aliasFid}}, [$aliasID, $aliasType, $aliasConf];
128 :     }
129 :     close $aliasH;
130 :     MemTrace("Aliases adjusted.") if T(ERDBLoadGroup => 3);
131 :     }
132 : parrello 1.1 # Get the maximum sequence size. We need this later for splitting up the
133 :     # locations.
134 :     my $chunkSize = $sprout->MaxSegment();
135 : parrello 1.9 MemTrace("Loading features for genome $genomeID.") if T(ERDBLoadGroup => 3);
136 : parrello 1.1 # Get the feature list for this genome.
137 :     my $features = $fig->all_features_detailed_fast($genomeID);
138 :     # Sort and count the list.
139 :     my @featureTuples = sort { $a->[0] cmp $b->[0] } @{$features};
140 :     my $count = scalar @featureTuples;
141 : parrello 1.9 MemTrace("$count features found for genome $genomeID.") if T(ERDBLoadGroup => 3);
142 : parrello 1.1 # Get the attributes for this genome and put them in a hash by feature ID.
143 : parrello 1.6 my $attributes = $self->GetGenomeAttributes($genomeID, \@featureTuples);
144 : parrello 1.1 Trace("Looping through features for $genomeID.") if T(ERDBLoadGroup => 3);
145 :     # Loop through the features.
146 :     for my $featureTuple (@featureTuples) {
147 :     # Split the tuple.
148 : parrello 1.9 my ($featureID, $locations, $aliases, $type, $minloc, $maxloc, $assignment,
149 :     $user, $quality) = @{$featureTuple};
150 : parrello 1.1 # Make sure this feature is active.
151 :     if (! $fig->is_deleted_fid($featureID)) {
152 :     # Handle missing assignments.
153 :     if (! defined $assignment) {
154 :     $assignment = '';
155 :     $user = '';
156 :     } else {
157 :     # The default assignment-maker is FIG.
158 :     $user ||= 'fig';
159 :     }
160 :     # Count this feature.
161 :     $self->Track(features => $featureID, 1000);
162 :     # Fix the quality. It is almost always a space, but some odd stuff might sneak through, and the
163 :     # Sprout database requires a single character.
164 :     if (! defined($quality) || $quality eq "") {
165 :     $quality = " ";
166 :     }
167 : parrello 1.6 # Get the coupling count. The coupled features are returned as a list,
168 :     # and we store it as a scalar to get the count.
169 :     my $couplingCount = $fig->coupled_to($featureID);
170 : parrello 1.1 # Begin building the keywords. We start with the genome ID, the
171 :     # feature ID, the taxonomy, and the organism name.
172 :     my @keywords = ($genomeID, $featureID, $fig->genus_species($genomeID),
173 :     $fig->taxonomy_of($genomeID));
174 : parrello 1.9 # Next come the aliases. We put all aliases found in this hash.
175 :     # They will be output as alias names and as keywords.
176 : parrello 1.7 my %aliasHash;
177 : parrello 1.9 # Note the trick here to insure that we have a list reference even
178 :     # if this feature isn't in the alias table.
179 :     my $aliasList = $aliasMasterHash{$featureID} || [];
180 :     # Loop through this feature ID's aliases.
181 :     for my $aliasTuple (@$aliasList) {
182 :     my ($aliasID, $aliasType, $aliasConf) = @$aliasTuple;
183 :     # Only proceed if this alias is new.
184 :     if (! exists $aliasHash{$aliasID}) {
185 :     # Save this alias.
186 :     $aliasHash{$aliasID} = 1;
187 :     # Get its natural form.
188 :     my $natural = AliasAnalysis::Type($aliasType => $aliasID);
189 :     # Only proceed if a natural form exists.
190 :     if ($natural) {
191 :     $self->Add(miscAlias => 1);
192 :     # Save the natural form.
193 :     $aliasHash{$natural} = 1;
194 :     # Is this a corresponding ID?
195 :     if ($aliasConf eq 'A') {
196 :     # Yes. Connect its natural form to the feature.
197 :     $self->PutR(IsAlsoFoundIn => $featureID, $aliasType,
198 :     alias => $natural);
199 :     $self->PutE(ExternalDatabase => $aliasType);
200 :     }
201 :     }
202 : parrello 1.1 }
203 :     }
204 : parrello 1.9 # Create the aliases and put them in the keyword list.
205 : parrello 1.7 for my $alias (sort keys %aliasHash) {
206 :     # Connect this alias to this feature and make an Alias record for it.
207 :     $self->PutR(IsAliasOf => $alias, $featureID);
208 :     $self->PutE(FeatureAlias => $alias);
209 :     # Add it to the keyword list.
210 :     push @keywords, $alias;
211 :     }
212 : parrello 1.1 Trace("Assignment for $featureID is: $assignment") if T(ERDBLoadGroup => 4);
213 :     # Break the assignment into words and shove it onto the
214 :     # keyword list.
215 :     push @keywords, split(/\s+/, $assignment);
216 : parrello 1.5 # Add any EC numbers.
217 : parrello 1.6 my @ecs = BioWords::ExtractECs($assignment);
218 :     for my $ec (@ecs) {
219 :     push @keywords, $ec;
220 :     $self->PutE(FeatureEC => $featureID, ec => $ec);
221 :     }
222 : parrello 1.1 # Link this feature to the parent genome.
223 : parrello 1.6 $self->PutR(HasFeature => $genomeID, $featureID,
224 : parrello 1.1 type => $type);
225 :     # Get the links.
226 :     my @links = $fig->fid_links($featureID);
227 :     for my $link (@links) {
228 : parrello 1.6 $self->PutE(FeatureLink => $featureID, link => $link);
229 : parrello 1.1 }
230 :     # If this is a peg, generate the translation and the upstream.
231 :     if ($type eq 'peg') {
232 :     $self->Add(pegIn => 1);
233 :     my $translation = $fig->get_translation($featureID);
234 :     if ($translation) {
235 : parrello 1.6 $self->PutE(FeatureTranslation => $featureID,
236 : parrello 1.1 translation => $translation);
237 :     }
238 :     # We use the default upstream values of u=200 and c=100.
239 :     my $upstream = $fig->upstream_of($featureID, 200, 100);
240 :     if ($upstream) {
241 : parrello 1.6 $self->PutE(FeatureUpstream => $featureID,
242 : parrello 1.1 'upstream-sequence' => $upstream);
243 :     }
244 :     }
245 :     # Now we need to find the subsystems this feature participates in.
246 :     my @ssList = $fig->subsystems_for_peg($featureID);
247 :     # This hash prevents us from adding the same subsystem twice.
248 :     my %seen = ();
249 :     for my $ssEntry (@ssList) {
250 :     # Get the subsystem and role.
251 :     my ($subsystem, $role) = @{$ssEntry};
252 :     # Only proceed if we like this subsystem.
253 :     if (exists $subHash->{$subsystem}) {
254 :     # If this is the first time we've seen this subsystem for
255 :     # this peg, store the has-role link.
256 :     if (! $seen{$subsystem}) {
257 : parrello 1.6 $self->PutR(HasRoleInSubsystem => $featureID, $subsystem,
258 :     genome => $genomeID, type => $type);
259 : parrello 1.5 # Save the subsystem's keywords.
260 : parrello 1.1 push @keywords, split /[\s_]+/, $subsystem;
261 :     }
262 : parrello 1.5 # Now add the role and any embedded EC nubmers to the keyword list.
263 : parrello 1.1 push @keywords, split /\s+/, $role;
264 : parrello 1.5 push @keywords, BioWords::ExtractECs($role);
265 : parrello 1.1 }
266 :     }
267 : parrello 1.5 # For each hyphenated word, we also need the pieces.
268 :     my @hyphenated = grep { $_ =~ /-/ } @keywords;
269 :     for my $hyphenated (@hyphenated) {
270 :     # Bust it into pieces.
271 :     my @pieces = grep { length($_) > 2 } split /-/, $hyphenated;
272 :     push @keywords, @pieces;
273 :     }
274 : parrello 1.1 # There are three special attributes computed from property
275 :     # data that we build next. If the special attribute is non-empty,
276 :     # its name will be added to the keyword list. First, we get all
277 :     # the attributes for this feature. They will come back as
278 : parrello 1.6 # 4-tuples: [peg, name, value, URL].
279 :     my @attributes = @{$attributes->{$featureID}};
280 : parrello 1.1 # Now we process each of the special attributes.
281 :     if ($self->SpecialAttribute($featureID, \@attributes,
282 : parrello 1.6 2, [1,3], '^(essential|potential_essential)$',
283 : parrello 1.1 qw(FeatureEssential essential))) {
284 :     push @keywords, 'essential';
285 :     $self->Add(essential => 1);
286 :     }
287 :     if ($self->SpecialAttribute($featureID, \@attributes,
288 : parrello 1.6 1, [2,3], '^virulen',
289 : parrello 1.1 qw(FeatureVirulent virulent))) {
290 :     push @keywords, 'virulent';
291 :     $self->Add(virulent => 1);
292 :     }
293 :     if ($self->SpecialAttribute($featureID, \@attributes,
294 : parrello 1.6 1, [2,3], '^iedb_',
295 : parrello 1.1 qw(FeatureIEDB iedb))) {
296 :     push @keywords, 'iedb';
297 :     $self->Add(iedb => 1);
298 :     }
299 :     # Now we have some other attributes we need to process. To get
300 :     # through them, we convert the attribute list for this feature
301 :     # into a two-layer hash: key => subkey => value.
302 :     my %attributeHash = ();
303 :     for my $attrRow (@{$attributes->{$featureID}}) {
304 :     my (undef, $key, @values) = @{$attrRow};
305 :     my ($realKey, $subKey);
306 :     if ($key =~ /^([^:]+)::(.+)/) {
307 :     ($realKey, $subKey) = ($1, $2);
308 :     } else {
309 :     ($realKey, $subKey) = ($key, "");
310 :     }
311 :     if (exists $attributeHash{$realKey}) {
312 :     $attributeHash{$realKey}->{$subKey} = \@values;
313 :     } else {
314 :     $attributeHash{$realKey} = {$subKey => \@values};
315 :     }
316 :     }
317 : parrello 1.6 TraceDump(AttributeHash => \%attributeHash) if T(FeatureLoadGroup => 4);
318 : parrello 1.1 # First we handle CDD. This is a bit complicated, because
319 :     # there are multiple CDDs per protein.
320 :     if (exists $attributeHash{CDD}) {
321 :     # Get the hash of CDD IDs to scores for this feature. We
322 :     # already know it exists because of the above IF.
323 :     my $cddHash = $attributeHash{CDD};
324 : parrello 1.6 my @cddData = sort keys %$cddHash;
325 : parrello 1.1 for my $cdd (@cddData) {
326 :     # Extract the score for this CDD and decode it.
327 :     my ($codeScore) = split(/\s*[,;]\s*/, $cddHash->{$cdd}->[0]);
328 :     my $realScore = FIGRules::DecodeScore($codeScore);
329 :     # We can't afford to crash because of a bad attribute
330 :     # value, hence the IF below.
331 :     if (! defined($realScore)) {
332 :     # Bad score, so count it.
333 :     $self->Add(badCDDscore => 1);
334 :     Trace("CDD score \"$codeScore\" for feature $featureID invalid.") if T(ERDBLoadGroup => 3);
335 :     } else {
336 :     # Create the connection and a CDD record.
337 : parrello 1.6 $self->PutR(IsPresentOnProteinOf => $cdd, $featureID,
338 :     score => $realScore);
339 :     $self->PutE(CDD => $cdd);
340 :     }
341 :     }
342 :     }
343 :     # A similar situation exists for protein families.
344 :     if (exists $attributeHash{PFAM}) {
345 :     # Get the hash of PFAMs to scores for this feature.
346 :     my $pfamHash = $attributeHash{PFAM};
347 :     for my $pfam (sort keys %$pfamHash) {
348 :     # Extract the range.
349 :     my $codeScore = $pfamHash->{$pfam}->[0];
350 :     $codeScore =~ /;(.+)/;
351 :     my $range = $1;
352 :     # Strip off the PFAM id from the source.
353 :     my ($pfamID) = split /_/, $pfam, 2;
354 :     # Emit the ProteinFamily record.
355 :     $self->PutE(ProteinFamily => $pfamID);
356 :     # Connect it to the feature.
357 :     $self->PutR(IsFamilyForFeature => $pfamID, $featureID,
358 :     range => $range);
359 :     # Get its name from the ontology database. There can
360 :     # be at most one.
361 :     my $dt_objs =
362 :     $ontology_dbmaster->pfam->get_objects({id => $pfamID});
363 :     if (defined $dt_objs->[0]) {
364 :     $self->PutE(ProteinFamilyName => $pfamID,
365 :     common_name => $dt_objs->[0]->term());
366 : parrello 1.1 }
367 :     }
368 :     }
369 :     # Next we do PSORT cell locations. here the confidence value
370 :     # could have the value "unknown", which we translate to -1.
371 :     if (exists $attributeHash{PSORT}) {
372 :     # This will be a hash of cell locations to confidence
373 :     # factors.
374 :     my $psortHash = $attributeHash{PSORT};
375 :     for my $psort (keys %{$psortHash}) {
376 :     # Get the confidence, and convert it to a number if necessary.
377 : parrello 1.6 my $confidence = $psortHash->{$psort}->[0];
378 : parrello 1.1 if ($confidence eq 'unknown') {
379 :     $confidence = -1;
380 :     }
381 : parrello 1.6 $self->PutR(IsPossiblePlaceFor => $psort, $featureID,
382 :     confidence => $confidence);
383 :     $self->PutE(CellLocation => $psort);
384 : parrello 1.1 # If this is a significant location, add it as a keyword.
385 :     if ($confidence > 2.5) {
386 : parrello 1.6 # Before we add it as a keyword, we convert it from
387 :     # capital-case to hyphenated by inserting hyphens at
388 :     # case transition points.
389 :     $psort =~ s/([a-z])([A-Z])/$1-$2/g;
390 : parrello 1.1 push @keywords, $psort;
391 :     }
392 :     }
393 :     }
394 :     # Phobius data is next. This consists of the signal peptide location and
395 :     # the transmembrane locations.
396 :     my $signalList = "";
397 :     my $transList = "";
398 : parrello 1.6 my $transCount = 0;
399 : parrello 1.1 if (exists $attributeHash{Phobius}) {
400 :     # This will be a hash of two keys (transmembrane and signal) to
401 : parrello 1.6 # location lists. GetCommaList converts them into comma-separated
402 :     # location strings. If there's no value, it returns an empty string.
403 : parrello 1.1 $signalList = $self->GetCommaList($attributeHash{Phobius}->{signal});
404 : parrello 1.6 my $transList = $attributeHash{Phobius}->{transmembrane};
405 :     my @transMap = split /\s*,\s*/, $transList;
406 :     $transCount = (defined $transList ? scalar(@transMap) : 0);
407 : parrello 1.1 }
408 :     # Here are some more numbers: isoelectric point, molecular weight, and
409 :     # the similar-to-human flag.
410 :     my $isoelectric = 0;
411 :     if (exists $attributeHash{isoelectric_point}) {
412 : parrello 1.6 $isoelectric = $attributeHash{isoelectric_point}->{""}->[0];
413 : parrello 1.1 }
414 :     my $similarToHuman = 0;
415 : parrello 1.6 if (exists $attributeHash{similar_to_human} && $attributeHash{similar_to_human}->{""}->[0] eq 'yes') {
416 : parrello 1.1 $similarToHuman = 1;
417 :     }
418 :     my $molecularWeight = 0;
419 :     if (exists $attributeHash{molecular_weight}) {
420 : parrello 1.6 $molecularWeight = $attributeHash{molecular_weight}->{""}->[0];
421 : parrello 1.1 }
422 :     # Join the keyword string.
423 :     my $keywordString = join(" ", @keywords);
424 :     # Get rid of annoying punctuation.
425 : parrello 1.5 $keywordString =~ s/[();@#\/,]/ /g;
426 : parrello 1.6 # Get the list of keywords in the keyword string, minus the delimiters.
427 :     my @realKeywords = grep { $stemmer->IsWord($_) }
428 :     $stemmer->Split($keywordString);
429 : parrello 1.1 # We need to do two things here: create the keyword string for the feature table
430 :     # and write records to the keyword table for the keywords.
431 :     my (%keys, %stems, @realStems);
432 :     for my $keyword (@realKeywords) {
433 :     # Compute the stem and phonex for this keyword.
434 :     my ($stem, $phonex) = $stemmer->StemLookup($keyword);
435 :     # Only proceed if a stem comes back. If no stem came back, it's a
436 :     # stop word and we throw it away.
437 :     if ($stem) {
438 :     $keys{$keyword} = $stem;
439 :     $stems{$stem} = $phonex;
440 :     push @realStems, $stem;
441 :     }
442 :     }
443 :     # Now create the keyword string.
444 :     my $cleanWords = join(" ", @realStems);
445 :     Trace("Keyword string for $featureID: $cleanWords") if T(ERDBLoadGroup => 4);
446 :     # Create keyword table entries for the keywords found.
447 :     for my $key (keys %keys) {
448 :     my $stem = $keys{$key};
449 : parrello 1.6 $self->PutE(Keyword => $key, stem => $stem, phonex => $stems{$stem});
450 : parrello 1.1 }
451 :     # Now we need to process the feature's locations. First, we split them up.
452 :     my @locationList = split /\s*,\s*/, $locations;
453 :     # Next, we convert them to Sprout location objects.
454 :     my @locObjectList = map { BasicLocation->new("$genomeID:$_") } @locationList;
455 :     # Assemble them into a sprout location string for later.
456 :     my $locationString = join(", ", map { $_->String } @locObjectList);
457 :     # We'll store the sequence length in here.
458 :     my $sequenceLength = 0;
459 :     # This part is the roughest. We need to relate the features to contig
460 :     # locations, and the locations must be split so that none of them exceed
461 :     # the maximum segment size. This simplifies the genes_in_region processing
462 :     # for Sprout. To start, we create the location position indicator.
463 :     my $i = 1;
464 :     # Loop through the locations.
465 :     for my $locObject (@locObjectList) {
466 :     # Record the length.
467 :     $sequenceLength += $locObject->Length;
468 :     # Split this location into a list of chunks.
469 :     my @locOList = ();
470 :     while (my $peeling = $locObject->Peel($chunkSize)) {
471 :     $self->Add(peeling => 1);
472 :     push @locOList, $peeling;
473 :     }
474 :     push @locOList, $locObject;
475 :     # Loop through the chunks, creating IsLocatedIn records. The variable
476 :     # "$i" will be used to keep the location index.
477 :     for my $locChunk (@locOList) {
478 : parrello 1.6 $self->PutR(IsLocatedIn => $featureID, $locChunk->Contig,
479 :     beg => $locChunk->Left, dir => $locChunk->Dir,
480 :     len => $locChunk->Length, locN => $i);
481 : parrello 1.1 $i++;
482 :     }
483 :     }
484 : parrello 1.6 # Check for figfams. In case we find any, we need the range.
485 :     # It's the whole sequence.
486 :     my $range = "1-$sequenceLength";
487 :     # Ask for the figfams.
488 :     my @fams = $ffs->families_containing_peg($featureID);
489 :     # Connect them to the feature (if any).
490 :     for my $fam (@fams) {
491 :     $self->PutE(ProteinFamily => $fam);
492 :     $self->PutR(IsFamilyForFeature => $fam, $featureID,
493 :     range => $range);
494 :     }
495 : parrello 1.1 # Now we get some ancillary flags.
496 :     my $locked = $fig->is_locked_fid($featureID);
497 :     my $in_genbank = $fig->peg_in_gendb($featureID);
498 :     # Create the feature record.
499 : parrello 1.6 $self->PutE(Feature => $featureID, 'assignment-maker' => $user,
500 : parrello 1.1 'assignment-quality' => $quality, 'feature-type' => $type,
501 :     'in-genbank' => $in_genbank, 'isoelectric-point' => $isoelectric,
502 :     locked => $locked, 'molecular-weight' => $molecularWeight,
503 :     'sequence-length' => $sequenceLength,
504 :     'signal-peptide' => $signalList, 'similar-to-human' => $similarToHuman,
505 :     assignment => $assignment, keywords => $cleanWords,
506 :     'location-string' => $locationString,
507 : parrello 1.6 'transmembrane-map' => $transList,
508 :     'conserved-neighbors' => $couplingCount,
509 :     'transmembrane-domain-count' => $transCount);
510 : parrello 1.1 }
511 :     }
512 :     }
513 :     }
514 :    
515 :    
516 :     =head3 SpecialAttribute
517 :    
518 :     my $count = $sl->SpecialAttribute($id, \@attributes, $idxMatch, \@idxValues, $pattern, $tableName, $field);
519 :    
520 :     Look for special attributes of a given type. A special attribute is found by comparing one of
521 :     the columns of the incoming attribute list to a search pattern. If a match is found, then
522 :     a set of columns is put into an output table connected to the specified ID.
523 :    
524 :     For example, when processing features, the attribute list we look at has three columns: attribute
525 :     name, attribute value, and attribute value HTML. The IEDB attribute exists if the attribute name
526 :     begins with C<iedb_>. The call signature is therefore
527 :    
528 :     my $found = SpecialAttribute($fid, \@attributeList, 0, [0,2], '^iedb_', 'FeatureIEDB', 'iedb');
529 :    
530 :     The pattern is matched against column 0, and if we have a match, then column 2's value is put
531 :     to the output along with the specified feature ID.
532 :    
533 :     =over 4
534 :    
535 :     =item id
536 :    
537 :     ID of the object whose special attributes are being loaded. This forms the first column of the
538 :     output.
539 :    
540 :     =item attributes
541 :    
542 :     Reference to a list of tuples.
543 :    
544 :     =item idxMatch
545 :    
546 :     Index in each tuple of the column to be matched against the pattern. If the match is
547 :     successful, an output record will be generated.
548 :    
549 :     =item idxValues
550 :    
551 : parrello 1.6 Reference to a list containing the indexes of the value and URL to put in the
552 :     second column of the output.
553 : parrello 1.1
554 :     =item pattern
555 :    
556 :     Pattern to be matched against the specified column. The match will be case-insensitive.
557 :    
558 :     =item tableName
559 :    
560 :     Name of the table to contain the attribute values found.
561 :    
562 :     =item fieldName
563 :    
564 :     Name of the field to contain the attribute values in the output table.
565 :    
566 :     =item RETURN
567 :    
568 :     Returns a count of the matches found.
569 :    
570 :     =item
571 :    
572 :     =back
573 :    
574 :     =cut
575 :    
576 :     sub SpecialAttribute {
577 :     # Get the parameters.
578 :     my ($self, $id, $attributes, $idxMatch, $idxValues, $pattern, $tableName, $fieldName) = @_;
579 :     # Declare the return variable.
580 :     my $retVal = 0;
581 :     # Loop through the attribute rows.
582 :     for my $row (@{$attributes}) {
583 :     # Check for a match.
584 :     if ($row->[$idxMatch] =~ m/$pattern/i) {
585 : parrello 1.6 # We have a match, so output a row.
586 :     my $value = HyperLink->new(map { $row->[$_] } @$idxValues);
587 :     $self->PutE($tableName => $id, $fieldName => $value);
588 : parrello 1.1 $retVal++;
589 :     }
590 :     }
591 :     Trace("$retVal special attributes found for $id and table $tableName.") if T(ERDBLoadGroup => 4) && $retVal;
592 :     # Return the number of matches.
593 :     return $retVal;
594 :     }
595 :    
596 : parrello 1.9
597 : parrello 1.1 1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3