[Bio] / FigKernelPackages / PinnedRegions.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/PinnedRegions.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.32 - (view) (download) (as text)

1 : dsouza 1.1 # -*- perl -*-
2 :     ########################################################################
3 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
4 :     # for Interpretations of Genomes. All Rights Reserved.
5 :     #
6 :     # This file is part of the SEED Toolkit.
7 :     #
8 :     # The SEED Toolkit is free software. You can redistribute
9 :     # it and/or modify it under the terms of the SEED Toolkit
10 :     # Public License.
11 :     #
12 :     # You should have received a copy of the SEED Toolkit Public License
13 :     # along with this program; if not write to the University of Chicago
14 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
15 :     # Genomes at veronika@thefig.info or download a copy from
16 :     # http://www.theseed.org/LICENSE.TXT.
17 :     ########################################################################
18 :    
19 :     package PinnedRegions;
20 :    
21 :     use strict;
22 :     use warnings;
23 :    
24 : olson 1.31 use Data::Dumper;
25 :    
26 : dsouza 1.1 use FIG;
27 : olson 1.32 use FFs;
28 : dsouza 1.1 use FIG_Config;
29 : parrello 1.9 use Tracer;
30 : paczian 1.27 use BasicLocation;
31 : dsouza 1.1
32 : parrello 1.9 use Time::HiRes qw( usleep gettimeofday tv_interval );
33 : dsouza 1.1
34 :     sub pinned_regions {
35 : paczian 1.16 my($fig, $pin_desc, $fast_color, $sims_from, $map_sz, $add_features, $extended) = @_;
36 : parrello 1.11 Trace("Pinned regions method called.") if T(3);
37 : dsouza 1.1 # Get list of pegs required by the description in $pin_desc
38 : dsouza 1.25 my $pinned_pegs = &expand_peg_list($fig, $pin_desc);
39 : parrello 1.9 Trace("Pinned pegs are " . join(", ", @$pinned_pegs) . ".") if T(3);
40 : parrello 1.11 # Filter out the pegs that don't exist.
41 : dsouza 1.1 # Get regions around pinned pegs -- boundaries, features, etc.
42 :     my $regions = &define_regions($fig, $map_sz, $pinned_pegs);
43 :     # Filter out overlapping regions caused by gene fusions, frame shifts etc. where multiple PEGs
44 :     # are pinned (by similarity or PCH) to the input PEG
45 :     $regions = &filter_regions_1($pin_desc, $regions);
46 :    
47 : dsouza 1.4 # Get information for each feature -- location, strand, function etc.
48 : paczian 1.16 my $feature_data = &feature_data($fig, $regions, $extended);
49 : dsouza 1.1
50 : dsouza 1.3 &add_functional_coupling($fig, $pin_desc, $regions, $feature_data);
51 : paczian 1.16
52 : bartels 1.8 # &add_figfams($fig, $feature_data);
53 : paczian 1.17 &add_subsystem_data($fig, $pin_desc, $feature_data, $extended);
54 : paczian 1.16
55 : parrello 1.9 Trace("Coloring pegs.") if T(3);
56 : dsouza 1.4 # Assign a set number to some PEGs through transitive closure based on similarity, from blast scores
57 : dsouza 1.25 &color_pegs($fig, $pin_desc, $pinned_pegs, $regions, $feature_data, $fast_color, $sims_from);
58 : dsouza 1.1
59 :     # Filter out regions which have only a single PEG (the pinned one) colored.
60 : dsouza 1.14 # $regions = &filter_regions_2($pin_desc, $regions, $feature_data);
61 : dsouza 1.1
62 : bartels 1.8 if ( defined( $add_features ) ) {
63 : parrello 1.11 &add_features_to_fdata( $fig, $feature_data, $add_features, $regions );
64 : bartels 1.8 }
65 :    
66 : dsouza 1.1 # Add feature data to the regions to make the final maps
67 :     my $maps = &make_maps($fig, $regions, $feature_data);
68 : paczian 1.16
69 : dsouza 1.1 return $maps;
70 :     }
71 :    
72 :     sub expand_peg_list {
73 : dsouza 1.25 my($fig, $pin_desc) = @_;
74 : dsouza 1.1 my @pegs = ();
75 :    
76 :     # Filter for legitimate genome IDS -- should handle deleted organisms and (in NMPDR) non-NMPDR orgs
77 :     my %ok_genome_id = map {$_ => 1} $fig->genomes;
78 :    
79 : dsouza 1.5 # If the user has selected the genomes to be displayed, filter out all other genomes
80 :     if ( @{ $pin_desc->{'show_genomes'} } )
81 :     {
82 : parrello 1.11 # create new %ok_genome_id hash from intersection of selected genomes and legitimate genome IDs
83 :     %ok_genome_id = map {$_ => 1} grep {$ok_genome_id{$_}} @{ $pin_desc->{'show_genomes'} };
84 : dsouza 1.5 }
85 :    
86 : dsouza 1.1 if ( @{ $pin_desc->{'pegs'} } > 1 )
87 :     {
88 : parrello 1.11 # Input already contains list of pegs, no need for expansion
89 : paczian 1.30 if ($pin_desc->{'pegs'}->[0] =~ /^\d/) {
90 :     @pegs = @{ $pin_desc->{'pegs'} };
91 :     } else {
92 :     @pegs = grep {$ok_genome_id{$fig->genome_of($_)}} @{ $pin_desc->{'pegs'} };
93 :     }
94 : dsouza 1.1 }
95 :     else
96 :     {
97 : parrello 1.11 # Get PCH pinned pegs
98 :     my $pch_pinned_pegs = &pch_pinned_pegs($fig, $pin_desc, \%ok_genome_id);
99 : dsouza 1.1
100 : parrello 1.11 $pch_pinned_pegs = &select_pegs($fig, $pin_desc, $pin_desc->{'n_pch_pins'}, $pch_pinned_pegs);
101 : dsouza 1.1
102 : dsouza 1.14 # Get similarity pinned pegs
103 :     my $sim_pegs = &sim_pegs($fig, $pin_desc, \%ok_genome_id);
104 : dsouza 1.1
105 : parrello 1.11 # Remove PCH PEGs (if any) from similarity pinned PEG list
106 :     my %seen = map {$_ => 1} @$pch_pinned_pegs;
107 :     $sim_pegs = [grep {! $seen{$_}} @$sim_pegs];
108 :    
109 :     $sim_pegs = &select_pegs($fig, $pin_desc, $pin_desc->{'n_sims'}, $sim_pegs);
110 :    
111 :     @pegs = ();
112 :    
113 :     # Get input peg
114 :     my $peg = $pin_desc->{'pegs'}[0];
115 :     if ( $pin_desc->{'sort_by'} eq 'phylogeny' )
116 :     {
117 :     # First add input peg, then sort by phylogeny
118 :     @pegs = $fig->sort_fids_by_taxonomy($peg, @$sim_pegs, @$pch_pinned_pegs);
119 :     }
120 : dsouza 1.21 elsif ( $pin_desc->{'sort_by'} eq 'phylogenetic_distance' )
121 : parrello 1.11 {
122 :     # Sort by phylogenetic distance or organism name
123 :     my $g1 = $fig->genome_of($peg);
124 :     @pegs = map {$_->[0] }
125 :     sort {$a->[1] <=> $b->[1] or $a->[2] cmp $b->[2]}
126 :     map {[$_, $fig->crude_estimate_of_distance($g1,$fig->genome_of($_)), $fig->org_of($_)]} @$sim_pegs, @$pch_pinned_pegs;
127 :     # Add input peg at front of list
128 :     unshift @pegs, $peg;
129 :     }
130 : dsouza 1.21 elsif ( $pin_desc->{'sort_by'} eq 'similarity' )
131 :     {
132 :     # Sort by similarity to input peg (first peg in list of arguments)
133 : dsouza 1.25 @pegs = &sort_pegs_by_similarity($fig, $peg, @$sim_pegs, @$pch_pinned_pegs);
134 : dsouza 1.21 }
135 : dsouza 1.1 }
136 : dsouza 1.21
137 : dsouza 1.1 return \@pegs
138 :     }
139 :    
140 :     sub pch_pinned_pegs {
141 :     my($fig, $pin_desc, $ok_genome_id) = @_;
142 :    
143 :     my @pegs = ();
144 :    
145 :     if ( $pin_desc->{'n_pch_pins'} > 0 )
146 :     {
147 : parrello 1.11 # Get input peg
148 :     my $peg = $pin_desc->{'pegs'}[0];
149 : dsouza 1.1
150 : parrello 1.11 # Get pegs in pch pin with input peg, and filter out deleted or non-NMPDR organisms
151 :     @pegs = grep {$ok_genome_id->{$fig->genome_of($_)}}
152 :     grep {$_ ne $peg}
153 : dsouza 1.1 map {$_->[0]} $fig->in_pch_pin_with_and_evidence($peg);
154 :     }
155 :    
156 :     return \@pegs;
157 :     }
158 :    
159 :     sub sim_pegs {
160 :     my($fig, $pin_desc, $ok_genome_id) = @_;
161 :    
162 :     # Return a list of PEGS similar to the input PEG, with evalue less than or equal to
163 :     # the user defined similarity cutoff.
164 :     # If the number of sims requested is 0, return the empty list
165 :    
166 :     my $n_sims = $pin_desc->{'n_sims'};
167 :     my $sim_cutoff = $pin_desc->{'sim_cutoff'};
168 :     my @pegs = ();
169 :    
170 :     if ( $n_sims > 0 )
171 :     {
172 : parrello 1.11 my $peg = $pin_desc->{'pegs'}[0];
173 :     my %seen;
174 : paczian 1.15
175 : parrello 1.11 @pegs = grep {$ok_genome_id->{$fig->genome_of($_)}}
176 :     grep {! $seen{$_}++}
177 :     map {$_->[1]} $fig->sims($peg, 10000, $sim_cutoff, "fig");
178 : dsouza 1.1
179 : parrello 1.11 my $n_pegs = @pegs;
180 : dsouza 1.1 }
181 :    
182 :     return \@pegs;
183 :     }
184 :    
185 :     sub select_pegs {
186 :     my($fig, $pin_desc, $n_pegs, $pegs) = @_;
187 :    
188 :     if ( $n_pegs == 0 )
189 :     {
190 : parrello 1.11 return [];
191 : dsouza 1.1 }
192 :    
193 : dsouza 1.3 if ( $pin_desc->{'collapse_close_genomes'} == 1 )
194 : dsouza 1.1 {
195 : parrello 1.11 # The ordering of the PEGs done here is in order to =select= the correct set, the ordering for the
196 :     # display is done later.
197 : dsouza 1.2
198 : parrello 1.11 # To collapse the PEG list, we want to:
199 :     # a. Return at most $n_pegs PEGs,
200 :     # b. include a representative PEG from every genus, subject to a, and
201 :     # c. include a representative PEG from each genus-species, subject to a and b.
202 :     # Individual strains will get represented by a single PEG, chosen arbitrarily.
203 :    
204 :     # The PEG selection is defined by the order of the PEGs. This could be done beter.
205 :    
206 :     my(%seen, @unique_genus, @unique_genus_species);
207 :    
208 :     foreach my $peg ( @$pegs )
209 :     {
210 :     my $org = $fig->org_of($peg);
211 :     # 'org_of' returns undef for deleted PEGs, these need to be omitted from the peg list returned
212 :    
213 :     if ( $org )
214 :     {
215 :     # Use only genus+species to drop strain information
216 :     my($genus, $species) = split(/\s+/, $org);
217 :     my $gs = "$genus $species";
218 :    
219 :     if ( not $seen{$genus}++ )
220 :     {
221 :     # First PEG from this genus, add it to @unique_genus.
222 :     # Mark the genus+species as seen.
223 :     # A subsequent PEG with the same genus and species will be dropped.
224 :     # A subsequent PEG with the same genus but different species will be added to @unique_genus_species.
225 :     $seen{$gs}++;
226 :     push @unique_genus, $peg;
227 :     }
228 :     elsif ( not $seen{$gs}++ )
229 :     {
230 :     # First PEG from this genus+species, add it to @unique_genus_species.
231 :     push @unique_genus_species, $peg;
232 :     }
233 :     }
234 :     }
235 : dsouza 1.1
236 : parrello 1.11 # Keep the unique_genus PEGS at the top, followed by the unique_genus_species
237 :     $pegs = [@unique_genus, @unique_genus_species];
238 : dsouza 1.1 }
239 :    
240 :     # Truncate list if necessary
241 : dsouza 1.3 if ( $n_pegs < @$pegs )
242 : dsouza 1.1 {
243 : parrello 1.11 $#{ $pegs } = $n_pegs - 1;
244 : dsouza 1.1 }
245 :    
246 :     return $pegs;
247 :     }
248 :    
249 : dsouza 1.21 sub sort_pegs_by_similarity {
250 : dsouza 1.25 my($fig, $input_peg, @other_pegs) = @_;
251 :    
252 :     # Set blast environment variables
253 :     $ENV{"BLASTMAT"} ||= "$FIG_Config::blastmat";
254 :     if ($ENV{"PATH"} !~ /fig\/bin/) { $ENV{"PATH"} = "$FIG_Config::bin:" . $ENV{"PATH"}; }
255 : dsouza 1.21
256 :     # Get amino acid sequences
257 : dsouza 1.25 my $sequences = &get_peg_sequences($fig, [$input_peg, @other_pegs]);
258 : dsouza 1.21
259 :     # Write the input peg sequence to a fasta file
260 :     my $input_seq = {$input_peg => $sequences->{$input_peg}};
261 :     my $input_seq_file = "$FIG_Config::temp/input_seq.$$.tmp.fasta";
262 :     &write_seqs_to_file($input_seq, $input_seq_file);
263 :    
264 :     # Write the other peg sequences to a fasta file
265 :     delete $sequences->{$input_peg};
266 :     my $other_seq_file = "$FIG_Config::temp/other_seq.$$.tmp.fasta";
267 :     &write_seqs_to_file($sequences, $other_seq_file);
268 :    
269 : dsouza 1.25 # Run formatdb
270 :     &formatdb_file($fig, $other_seq_file);
271 :    
272 :     # BLAST input peg sequence against other peg sequences
273 :     # set high evalue cutoff so all hits get reported
274 : dsouza 1.24 my $cutoff = 10;
275 : dsouza 1.25 my $hits = &blast_files($fig, $input_seq_file, $other_seq_file, $cutoff);
276 : dsouza 1.21
277 :     my @sorted = ($input_peg);
278 :     foreach my $hit ( @$hits )
279 :     {
280 : dsouza 1.25 # BLAST output is already sorted by similarity
281 : dsouza 1.21 my($peg2) = (split(/\s+/, $hit))[1];
282 : dsouza 1.25 push @sorted, $peg2;
283 : dsouza 1.21 }
284 :    
285 :     return @sorted;
286 :     }
287 :    
288 : dsouza 1.1 sub filter_regions_1 {
289 :     my($pin_desc, $regions) = @_;
290 :     my $new_regions;
291 :    
292 :     # Filter out some overlapping regions caused by gene fusions, frame shifts etc. where multiple PEGs
293 :     # are pinned (by similarity or PCH) to the input PEG
294 :     # Note that all overlaps are NOT eliminated
295 :    
296 :     if ( @{ $pin_desc->{'pegs'} } == 1 )
297 :     {
298 : parrello 1.11 # Input pin description is for a single input peg
299 :     my %seen;
300 :    
301 :     foreach my $region ( @$regions )
302 :     {
303 :     my $pinned_peg = $region->{'pinned_peg'};
304 :    
305 :     # If the pinned peg from a region has already been 'seen', don't
306 :     # add the region into the @$new_regions array
307 :    
308 :     if ( not $seen{$pinned_peg} )
309 :     {
310 :     push @$new_regions, $region;
311 :    
312 :     my $fids = $region->{'features'};
313 :     foreach my $fid ( @$fids )
314 :     {
315 :     $seen{$fid} = 1;
316 :     }
317 :     }
318 :     }
319 : dsouza 1.1 }
320 :     else
321 :     {
322 : parrello 1.11 # input PEGs is a list -- keep all of them
323 :     $new_regions = $regions;
324 : dsouza 1.1 }
325 :    
326 :     return $new_regions;
327 :     }
328 :    
329 :     sub filter_regions_2 {
330 :     my($pin_desc, $regions, $feature_data) = @_;
331 :     my $new_regions;
332 :    
333 :     if ( @{ $pin_desc->{'pegs'} } == 1 )
334 :     {
335 : parrello 1.11 # Input is single peg
336 :     my $input_peg = $pin_desc->{'pegs'}[0];
337 : dsouza 1.1
338 : parrello 1.11 foreach my $region ( @$regions )
339 :     {
340 :     my $fids = $region->{'features'};
341 :     my $n_in_sets = grep {$feature_data->{$_}{'set_number'}} @$fids;
342 :    
343 :     if ( $n_in_sets > 1 or $region->{'pinned_peg'} eq $input_peg )
344 :     {
345 :     # Regions should be displayed only if:
346 :     # a. more than one PEG is in a 'colored set' OR
347 :     # b. the pinned PEG is the input PEG
348 :     push @$new_regions, $region;
349 :     }
350 :     }
351 : dsouza 1.1 }
352 :     else
353 :     {
354 : parrello 1.11 # Input is a list of pegs -- keep all of the regions
355 :     $new_regions = $regions
356 : dsouza 1.1 }
357 :    
358 :     return $new_regions;
359 :     }
360 :    
361 :     sub make_maps {
362 :     my($fig, $regions, $feature_data) = @_;
363 :    
364 :     foreach my $region ( @$regions )
365 :     {
366 : parrello 1.11 my $features = [];
367 :     my $fids = $region->{'features'};
368 : dsouza 1.1
369 : parrello 1.11 foreach my $fid ( @$fids )
370 :     {
371 :     push @$features, $feature_data->{$fid};
372 :     # if ( !( $fid =~ /fig/ ) ) {
373 :     # print STDERR $fid." FIDMAKEMAPS\n";
374 :     # }
375 :     }
376 : dsouza 1.1
377 : parrello 1.11 $region->{'features'} = $features;
378 : dsouza 1.1 }
379 :    
380 :     return $regions;
381 :     }
382 :    
383 :     sub define_regions {
384 :     my($fig, $map_sz, $pinned_pegs) = @_;
385 :    
386 :     # Define the 'region' around the pinned peg, get features, and store some region information
387 :     my $regions = [];
388 :     my $half_map_sz = int($map_sz/2);
389 :    
390 :     foreach my $peg ( @$pinned_pegs )
391 :     {
392 : paczian 1.30 my $genome;
393 : paczian 1.29 my $loc;
394 : paczian 1.30 if ($peg =~ /^(fig\|)*(\d+\.\d+)[\.\:]{1}(.+\_\d+\_\d+)$/) {
395 :     $loc = $3;
396 :     $genome = $2;
397 : paczian 1.29 } else {
398 :     $loc = $fig->feature_location($peg);
399 : paczian 1.30 $genome = $fig->genome_of($peg);
400 : paczian 1.29 }
401 : parrello 1.11 # We only proceed if a location was found. Failure to find a location indicates
402 :     # that the feature is deleted.
403 :     if ($loc) {
404 :     my($contig, $beg, $end) = $fig->boundaries_of($loc);
405 :    
406 :     my $region_mid = int(($beg + $end)/2);
407 :     my $region_beg = $region_mid - $half_map_sz;
408 :     my $region_end = $region_mid + $half_map_sz;
409 :    
410 :     my($fids) = $fig->genes_in_region($genome, $contig, $region_beg, $region_end);
411 :    
412 :     my $region = {};
413 :     $region->{'genome_id'} = $fig->genome_of($peg);
414 :     $region->{'org_name'} = $fig->genus_species($region->{'genome_id'});
415 :     $region->{'contig'} = $contig;
416 :     $region->{'beg'} = $region_beg;
417 :     $region->{'mid'} = $region_mid;
418 :     $region->{'end'} = $region_end;
419 :     $region->{'features'} = $fids;
420 :     $region->{'pinned_peg_strand'} = ($beg <= $end)? '+' : '-';
421 :     $region->{'pinned_peg'} = $peg;
422 :    
423 :     push @$regions, $region;
424 :     }
425 : dsouza 1.1 }
426 :    
427 :     return $regions;
428 :     }
429 :    
430 : bartels 1.8 sub add_features_to_fdata {
431 :     my ( $fig, $feature_data, $add_features, $regions ) = @_;
432 :     foreach my $region ( @$regions ) {
433 : parrello 1.11 my $new_feats = $add_features->{ $region->{ 'genome_id' } };
434 :     foreach my $nf ( @$new_feats ) {
435 :     if ( $nf->{ 'contig' } eq $region->{ 'contig' } &&
436 :     $nf->{ 'start' } < $region->{ 'end' } &&
437 :     $nf->{ 'start' } > $region->{ 'beg' } ) {
438 :     $feature_data->{ $nf->{ 'name' } } = &new_feature_entry( $fig, $nf->{ 'name' }, $region->{ 'mid' }, $region->{ 'contig' }, $nf->{ 'start' }, $nf->{ 'stop' }, $nf->{ 'type' }, $nf->{ 'function' } );
439 :     push @{ $region->{ 'features' } }, $nf->{ 'name' };
440 :     }
441 :     }
442 : bartels 1.8 }
443 :     }
444 :    
445 :     sub new_feature_entry {
446 :     my ( $fig, $fid, $region_mid, $contig, $beg, $end, $type, $func ) = @_;
447 :    
448 :     if ( !defined( $type ) ) {
449 : parrello 1.11 $type = 'unknown';
450 : bartels 1.8 }
451 :    
452 :     if ( !defined( $func ) ) {
453 : parrello 1.11 $func = '';
454 : bartels 1.8 }
455 :    
456 :     my($left, $right) = sort {$a <=> $b} ($beg, $end);
457 :     my $size = $right - $left + 1;
458 :     my $strand = ($beg <= $end)? '+' : '-';
459 :     my $offset = int(($left + $right)/2) - $region_mid;
460 :     my $offset_beg = $left - $region_mid;
461 :     my $offset_end = $right - $region_mid;
462 :    
463 :     return {
464 : parrello 1.11 'fid' => $fid,
465 :     'type' => $type,
466 :     'contig' => $contig,
467 :     'beg' => $beg,
468 :     'end' => $end,
469 :     'size' => $size,
470 :     'strand' => $strand,
471 :     'offset' => $offset,
472 :     'offset_beg' => $offset_beg,
473 :     'offset_end' => $offset_end,
474 :     'function' => $func,
475 : paczian 1.28 'location' => $contig."_".$beg."_".$end
476 : bartels 1.8 };
477 :     }
478 :    
479 : dsouza 1.1 sub feature_data {
480 : paczian 1.16 my($fig, $regions, $extended) = @_;
481 : dsouza 1.1 my %feature_data;
482 :    
483 : olson 1.31 my %all_fids;
484 :    
485 :     foreach my $region ( @$regions )
486 :     {
487 :     my $fids = $region->{'features'};
488 :     map { $all_fids{$_}++ } @$fids;
489 :     }
490 :    
491 :     my @all_fids = keys(%all_fids);
492 :     my @locations = $fig->feature_location_bulk(\@all_fids);
493 :     my %locations = map { $_->[0] => $_->[1] } @locations;
494 :    
495 :     my $aliases = $fig->feature_aliases_bulk(\@all_fids);
496 :     my $functions = $fig->function_of_bulk(\@all_fids);
497 :    
498 :     # print STDERR Dumper(\%locations, $aliases, $functions);
499 : dsouza 1.1 foreach my $region ( @$regions )
500 :     {
501 : parrello 1.11 my $region_mid = $region->{'mid'};
502 :     my $fids = $region->{'features'};
503 : dsouza 1.1
504 : parrello 1.11 foreach my $fid ( @$fids )
505 :     {
506 :     # Get feature data if this feature has not been seen before, this avoids repeating
507 :     # this step when pegs occur in multiple regions
508 :     if ( not exists $feature_data{$fid} )
509 :     {
510 : olson 1.31 $feature_data{$fid} = &feature_entry($fig, $fid, $region_mid, $extended, $functions, \%locations, $aliases );
511 : parrello 1.11 }
512 :     }
513 : dsouza 1.1 }
514 :    
515 : paczian 1.16 if ($extended) {
516 :     # get the evidence codes
517 :     my @all_fids = keys(%feature_data);
518 :     my @codes = grep { $_->[1] =~ /^evidence_code/i } $fig->get_attributes(\@all_fids, 'evidence_code');
519 :     my $pretty_codes = {};
520 :     foreach my $code (@codes) {
521 :     my $pretty_code = $code->[2];
522 :     if ($pretty_code =~ /;/) {
523 :     my ($cd, $ss) = split(";", $code->[2]);
524 :     $pretty_code = $cd;
525 :     }
526 :     $pretty_code =~ s/lit\((\d+)\)/lit\(<a href='http:\/\/www\.ncbi\.nlm\.nih\.gov\/sites\/entrez\?cmd=Retrieve&db=PubMed&list_uids=$1&dopt=AbstractPlus' target=_blank>$1<\/a>\)/;
527 :     push(@{$pretty_codes->{$code->[0]}}, $pretty_code);
528 :     }
529 :     foreach my $entry (keys(%$pretty_codes)) {
530 :     $feature_data{$entry}{'evcodes'} = join(", ", @{$pretty_codes->{$entry}});
531 :     }
532 :     }
533 :    
534 : dsouza 1.1 return \%feature_data;
535 :     }
536 :    
537 :     sub feature_entry {
538 : olson 1.31 my($fig, $fid, $region_mid, $extended, $all_functions, $all_locations, $all_aliases) = @_;
539 : dsouza 1.1
540 :     my $type = $fig->ftype($fid);
541 : olson 1.31 my $loc = $all_locations->{$fid};
542 : dsouza 1.1 my($contig, $beg, $end) = $fig->boundaries_of($loc);
543 :     my($left, $right) = sort {$a <=> $b} ($beg, $end);
544 :     my $strand = ($beg <= $end)? '+' : '-';
545 :     my $offset = int(($left + $right)/2) - $region_mid;
546 :     my $offset_beg = $left - $region_mid;
547 :     my $offset_end = $right - $region_mid;
548 : olson 1.31 my $func = $all_functions->{$fid};
549 : paczian 1.27 my $genome = $fig->genome_of($fid);
550 :     my $location = FullLocation->new($fig, $genome, $loc);
551 :     my $size = 0;
552 :     map { $size += $_->Length } @{$location->Locs};
553 : dsouza 1.1
554 : paczian 1.16 my $retval = { 'fid' => $fid,
555 : paczian 1.26 'location' => $loc,
556 : paczian 1.16 'type' => $type,
557 :     'contig' => $contig,
558 :     'beg' => $beg,
559 :     'end' => $end,
560 :     'size' => $size,
561 :     'strand' => $strand,
562 :     'offset' => $offset,
563 :     'offset_beg' => $offset_beg,
564 :     'offset_end' => $offset_end,
565 : dsouza 1.25 'function' => $func };
566 : paczian 1.16 if ($extended) {
567 : olson 1.31 my $a = $all_aliases->{$fid};
568 :     my $aliases = ref($a) ? join("|", @$a) : '';
569 :    
570 : paczian 1.16 $retval->{aliases} = $aliases;
571 :     }
572 :    
573 :     return $retval;
574 : dsouza 1.1 }
575 :    
576 : dsouza 1.3 sub add_functional_coupling {
577 :     my($fig, $pin_desc, $regions, $feature_data) = @_;
578 :    
579 :     my $fc_cutoff = defined($pin_desc->{'fc_cutoff'})? $pin_desc->{'fc_cutoff'} : 4;
580 :    
581 :     foreach my $region ( @$regions )
582 :     {
583 : parrello 1.11 my $pin = $region->{'pinned_peg'};
584 :     my @pegs = grep {$feature_data->{$_}{'type'} eq 'peg'} @{ $region->{'features'} };
585 : dsouza 1.3
586 : parrello 1.11 foreach my $couple ( $fig->coupled_to_batch(@pegs) )
587 :     {
588 :     my($peg1, $peg2, $sc) = @$couple;
589 :    
590 :     if ( $peg1 eq $pin and $sc >= $fc_cutoff )
591 :     {
592 :     $feature_data->{$peg2}{'fc_score'} = $sc;
593 : paczian 1.15 $feature_data->{$peg2}{'fc_pin'} = $peg1;
594 : parrello 1.11 }
595 :     }
596 : dsouza 1.3 }
597 :     }
598 :    
599 : dsouza 1.5 sub add_figfams {
600 : olson 1.32 return;
601 :     # my($fig, $feature_data) = @_;
602 : dsouza 1.5
603 : olson 1.32 # # Get FigFams directory from config file
604 :     # my $figfam_dir = $fig->get_figfams_data();
605 : dsouza 1.5
606 : olson 1.32 # # Check if FigFams directory is defined and exists on current machine
607 :     # if ( defined($figfam_dir) and (-d $figfam_dir) )
608 :     # {
609 :     # # Get all PEG IDs
610 :     # my @pegs = grep {$feature_data->{$_}{'type'} eq 'peg'} keys %$feature_data;
611 :     # # Get $figfams object
612 :     # my $figfams = new FigFams($fig, $figfam_dir);
613 :     # # Get FigFam family ID for @pegs
614 :     # my $figfam = $figfams->families_containing_peg_bulk(\@pegs);
615 :     # # Get hash of FigFam ID to family function
616 :     # my $family_function = $figfams->family_functions();
617 : parrello 1.11
618 : olson 1.32 # # Some FigFams (the ones that are not subsystem related) have the FigFam ID in the family function.
619 :     # # This results in the popup displaying the ID twice, so one should be removed.
620 : parrello 1.11
621 : olson 1.32 # my %figfam_text;
622 :     # foreach my $fid ( keys %$feature_data )
623 :     # {
624 :     # if ( $figfam->{$fid} )
625 :     # {
626 :     # my $figfam_id = $figfam->{$fid};
627 :     # if ( ! exists $figfam_text{$figfam_id} ) {
628 :     # if ( $family_function->{$figfam_id} =~ /^FIG\d+/ ) {
629 :     # $figfam_text{$figfam_id} = $family_function->{$figfam_id};
630 :     # } else {
631 :     # $figfam_text{$figfam_id} = $figfam_id . ': ' . $family_function->{$figfam_id};
632 :     # }
633 :     # }
634 :    
635 :     # # Add FigFam information to hash -- to go into the popup text
636 :     # $feature_data->{$fid}{'figfam'} = $figfam_text{$figfam_id};
637 :     # }
638 :     # }
639 :     # }
640 : dsouza 1.5 }
641 :    
642 : dsouza 1.3 sub add_subsystem_data {
643 : paczian 1.17 my($fig, $pin_desc, $feature_data, $extended) = @_;
644 : dsouza 1.14
645 :     # Get subsystem_information for =all= pegs
646 :    
647 : paczian 1.16 my @fids = keys(%$feature_data);
648 :     my %ssdata = $fig->subsystems_for_pegs_complete(\@fids);
649 :     my @subsystems = ();
650 :     foreach my $p (keys(%ssdata)) {
651 :     foreach my $entry (@{$ssdata{$p}}) {
652 :     push(@subsystems, [ $entry->[0], $entry->[1], $entry->[2], $p ]);
653 :     }
654 :     }
655 : paczian 1.17 unless ($extended) {
656 : paczian 1.19 @subsystems = grep { $fig->usable_subsystem($_->[0]) } @subsystems;
657 : paczian 1.18 @subsystems = grep { $fig->is_exchangable_subsystem($_->[0]) } @subsystems;
658 : paczian 1.17 }
659 : paczian 1.16
660 : dsouza 1.14 my %peg_to_ss;
661 : paczian 1.16 foreach my $rec ( @subsystems ) {
662 :     my($ss_name, $role, $variant, $fid) = @$rec;
663 :     $ss_name =~ s/_/ /g;
664 :    
665 :     if ( $variant eq '0' ) {
666 :     # no subsystem
667 : paczian 1.19 if ($extended) {
668 :     my $ss_text = "$ss_name (not yet classified for this organism)";
669 : paczian 1.20 $peg_to_ss{$fid}{$ss_name} = $ss_text;
670 : paczian 1.19 }
671 : paczian 1.16 } elsif ( $variant eq '-1' or $variant eq '*-1' ) {
672 :     # subsystem not functional in this organism
673 :     my $ss_text = "$ss_name (classified 'not active' in this organism)";
674 : paczian 1.20 $peg_to_ss{$fid}{$ss_name} = $ss_text;
675 : paczian 1.16 } else {
676 :     $peg_to_ss{$fid}{$ss_name} = 1;
677 : dsouza 1.14 }
678 :     }
679 :    
680 : dsouza 1.3 # Count number of occurences of each subsystem
681 :     my %ss_count;
682 : dsouza 1.14 foreach my $fid ( keys %peg_to_ss )
683 : dsouza 1.3 {
684 : dsouza 1.14 foreach my $ss ( keys %{ $peg_to_ss{$fid} } )
685 :     {
686 :     $ss_count{$ss}++;
687 :     }
688 : dsouza 1.3 }
689 :    
690 :     # Sort subsystems based on count and create hash with unique index for each subsystem where
691 :     # lower indices correspond to higher number of occurences of the subsystem.
692 :     my %ss_index;
693 :     my $index = 0;
694 :     foreach my $ss ( sort {$ss_count{$b} <=> $ss_count{$a} or $a cmp $b} keys %ss_count )
695 :     {
696 : parrello 1.11 $ss_index{$ss} = ++$index;
697 : dsouza 1.3 }
698 :    
699 :     # Add subsystem information for pegs in subsystems
700 :     foreach my $fid ( keys %peg_to_ss )
701 :     {
702 : paczian 1.20 my @subsystems;
703 :     foreach my $key (keys(%{$peg_to_ss{$fid}})) {
704 :     if ($peg_to_ss{$fid}{$key} ne "1") {
705 :     push(@subsystems, [ $peg_to_ss{$fid}{$key}, $ss_index{$key} ]);
706 :     } else {
707 :     push(@subsystems, [ $key, $ss_index{$key} ]);
708 :     }
709 :     }
710 : parrello 1.11
711 :     if ( @subsystems )
712 :     {
713 : paczian 1.20 $feature_data->{$fid}{'subsystems'} = [sort {$a->[1] <=> $b->[1]} @subsystems];
714 : parrello 1.11 }
715 : dsouza 1.3 }
716 :     }
717 :    
718 : dsouza 1.1 sub color_pegs {
719 : dsouza 1.25 my($fig, $pin_desc, $pinned_pegs, $regions, $feature_data, $fast_color, $sims_from) = @_;
720 : dsouza 1.1
721 :     # Run blastp and get connected pegs
722 :     my $blast_hit = {};
723 :     my $color_sim_cutoff = $pin_desc->{'color_sim_cutoff'};
724 :    
725 :     if ( $sims_from eq 'blast' )
726 :     {
727 : dsouza 1.25 $blast_hit = &blast_hits($fig, $regions, $feature_data, $fast_color, $color_sim_cutoff);
728 : dsouza 1.1 }
729 :     else
730 :     {
731 : dsouza 1.25 $blast_hit = &blast_hits_from_sims($fig, $regions, $feature_data, $fast_color, $color_sim_cutoff);
732 : dsouza 1.1 }
733 :    
734 :     # Assign set numbers to pegs based on blast scores
735 :     my $color_sets = &partition_pegs($blast_hit);
736 :    
737 :     # Sort peg sets to that a) larger sets have smaller set numbers, and
738 :     # b) sets closer to the pinned peg have smaller set numbers
739 : dsouza 1.4 $color_sets = &sort_color_sets($pin_desc, $pinned_pegs, $color_sets, $feature_data);
740 : dsouza 1.1
741 :     # Add color set number into $feature_data
742 :     for (my $i = 0; $i < @$color_sets; $i++)
743 :     {
744 : parrello 1.11 # Use an index that starts at 1 (matches with coloring index)
745 :     my $n = $i + 1;
746 :     foreach my $peg ( @{ $color_sets->[$i] } )
747 :     {
748 :     $feature_data->{$peg}{'set_number'} = $n;
749 :    
750 :     # If this is the pinned set, set the 'color' to 'red'
751 :     if ( $n == 1 )
752 :     {
753 :     $feature_data->{$peg}{'color'} = 'red';
754 :     }
755 :     }
756 : dsouza 1.1 }
757 :     }
758 :    
759 :     sub sort_color_sets {
760 :     my($pin_desc, $pinned_pegs, $color_sets, $feature_data) = @_;
761 :    
762 : dsouza 1.3 # Find the color set containing the set of pinned pegs, i.e. the set containing the input peg.
763 :     # If the input peg is found (or if input is a list of pegs, returned value is the array index for the
764 :     # set.
765 :     # If the input peg is not found, the returned value is an reference to an array containing the input peg.
766 :     my $set = &pinned_set($pin_desc, $pinned_pegs, $color_sets);
767 : dsouza 1.1
768 : dsouza 1.3 my $pinned_color_set;
769 :     if ( $set =~ /^\d+$/ )
770 :     {
771 : parrello 1.11 # Splice out the color set containing the input peg
772 :     ($pinned_color_set) = splice(@$color_sets,$set,1);
773 : dsouza 1.3 }
774 :     else
775 :     {
776 : parrello 1.11 $pinned_color_set = $set;
777 : dsouza 1.3 }
778 : dsouza 1.1
779 : dsouza 1.3 # Add offset (summed distance from
780 :     my @color_sets = map {[$_, &offset($_, $feature_data)]} @$color_sets;
781 : dsouza 1.1 # Sort the remaining color sets based on:
782 :     # a. size (number of pegs) and
783 :     # b. offset from mid-point of region
784 :     @$color_sets = map {$_->[0]}
785 :     sort {@{$b->[0]} <=> @{$a->[0]} or $a->[1] <=> $b->[1]}
786 : dsouza 1.3 # sort {$a->[1] <=> $b->[1] or @{$b->[0]} <=> @{$a->[0]}}
787 : dsouza 1.1 map {[$_, &offset($_, $feature_data)]} @$color_sets;
788 :    
789 :     # Add the set of pinned pegs at the front of the returned list so that it gets colored red
790 :     return [$pinned_color_set, @$color_sets];
791 :     }
792 :    
793 : dsouza 1.3 sub pinned_set {
794 : dsouza 1.1 my($pin_desc, $pinned_pegs, $color_sets) = @_;
795 :    
796 : dsouza 1.3 # Returns an integer if the input is a peg-list or if the input peg is in a set.
797 :     # Returns the input peg (as an array reference) if the input peg is not in a set.
798 :    
799 : dsouza 1.1 if ( @{ $pin_desc->{'pegs'} } == 1 )
800 :     {
801 : parrello 1.11 # Get input peg if it exists -- the set containing this peg should be colored red
802 :     my $peg = $pin_desc->{'pegs'}[0];
803 : dsouza 1.1
804 : parrello 1.11 # Iterate through the color sets until you find the set containing the input peg
805 :     for (my $i = 0; $i < @$color_sets; $i++)
806 :     {
807 :     foreach my $peg2 ( @{ $color_sets->[$i] } )
808 :     {
809 :     if ( $peg2 eq $peg )
810 :     {
811 :     # Return the set index
812 :     return $i;
813 :     }
814 :     }
815 :     }
816 : dsouza 1.1
817 : parrello 1.11 # The input peg may not be in a set if there is only one region or the coloring cutoff is very stringent.
818 :     return [$peg];
819 : dsouza 1.1 }
820 :     else
821 :     {
822 : parrello 1.11 # Input is a peg-list, which may be split into multiple sets -- the largest will be called the
823 :     # "pinned" set.
824 : paczian 1.30 my $i = 0;
825 : parrello 1.12 my $max_count = 0;
826 : parrello 1.11 my %pinned = map {$_ => 1} @$pinned_pegs;
827 :    
828 :     for (my $j = 0; $j < @$color_sets; $j++)
829 :     {
830 :     # count how many pinned pegs are found in each set
831 : parrello 1.12 my $count = scalar(grep {$pinned{$_}} @{ $color_sets->[$j] }) || 0;
832 : parrello 1.11
833 :     if ( $max_count < $count )
834 :     {
835 :     # Keep track of the set having the largest number of pinned pegs
836 :     $max_count = $count;
837 :     $i = $j;
838 :     }
839 :     }
840 : dsouza 1.1
841 : parrello 1.11 return $i;
842 : dsouza 1.1 }
843 :     }
844 :    
845 :     sub offset {
846 :     my($set, $feature_data) = @_;
847 :    
848 :     my $offset;
849 :     foreach my $peg ( @$set )
850 :     {
851 : parrello 1.11 $offset += abs($feature_data->{$peg}{'offset'});
852 : dsouza 1.1 }
853 : dsouza 1.3
854 :     return sprintf("%.2f", $offset/@$set);
855 : dsouza 1.1 }
856 :    
857 :     sub partition_pegs {
858 :     my($blast_hit) = @_;
859 :    
860 :     my %seen;
861 :     my $sets = [];
862 :    
863 :     # Iterate through the pegs with blast hits in arbitrary order
864 :     foreach my $peg1 ( keys %$blast_hit )
865 :     {
866 : parrello 1.11 # If it has not been 'seen' (and placed in a set) use it as the seed for a set
867 :     if ( not $seen{$peg1} )
868 :     {
869 :     my $set = [$peg1];
870 :    
871 :     # Mark $peg1 as seen
872 :     $seen{$peg1} = 1;
873 :    
874 :     # Grow set through transitive closure -- note that @$set keeps growing
875 :     for (my $i = 0; $i < @$set; $i++)
876 :     {
877 :     $peg1 = $set->[$i];
878 :    
879 :     # Iterate through blast hits
880 :     foreach my $peg2 ( keys %{ $blast_hit->{$peg1} } )
881 :     {
882 :     # If $peg2 has not been seen, put it in the set, and mark it as seen
883 :     if ( not exists $seen{$peg2} )
884 :     {
885 :     push @$set, $peg2;
886 :     $seen{$peg2} = 1;
887 :     }
888 :     }
889 :     }
890 :    
891 :     # Add the new set to the set of sets
892 :     push @$sets, $set;
893 :     }
894 : dsouza 1.1 }
895 :    
896 :     return $sets;
897 :     }
898 :    
899 :     sub blast_hits {
900 : dsouza 1.25 my($fig, $regions, $feature_data, $fast_color, $color_sim_cutoff) = @_;
901 :    
902 :     # Set blast environment variables
903 :     $ENV{"BLASTMAT"} ||= "$FIG_Config::blastmat";
904 :     if ($ENV{"PATH"} !~ /fig\/bin/) { $ENV{"PATH"} = "$FIG_Config::bin:" . $ENV{"PATH"}; }
905 : dsouza 1.1
906 :     # Get amino acid sequences
907 : paczian 1.29 my @pegs = grep {$feature_data->{$_}{'type'} && $feature_data->{$_}{'type'} eq 'peg'} keys %$feature_data;
908 : dsouza 1.21 my $sequences = &get_peg_sequences($fig, \@pegs);
909 : dsouza 1.1
910 :     # Write the entire set of sequences to a fasta file
911 :     my $all_seqs_file = "$FIG_Config::temp/all_seqs.$$.tmp.fasta";
912 :     &write_seqs_to_file($sequences, $all_seqs_file);
913 :    
914 : dsouza 1.25 # Run formatdb
915 :     &formatdb_file($fig, $all_seqs_file);
916 :    
917 :     # If $fast_color == 0, the complete blast of all vs. all sequences is performed
918 :     # Otherwise, instead of all vs. all, the sequences from a single pinned peg region
919 :     # is blasted against all sequences. If any of the hits are better than a given cutoff
920 :     # ($cutoff_2), the pegs hit are deemed to be 'done', i.e. it is assumed that blasting this
921 :     # peg will not yield additional information for forming the peg sets. These 'done' pegs
922 :     # are then omitted from the blast when the sequences from the region they belong to
923 :     # is blasted.
924 :     my %blast_hit;
925 :     if ( $fast_color )
926 : dsouza 1.1 {
927 : dsouza 1.25 my $cutoff_2 = $color_sim_cutoff * 1e-20;
928 :     my %done_with;
929 :    
930 :     foreach my $region ( @$regions )
931 :     {
932 :     # Iterate through each region
933 :     my $fids = $region->{'features'};
934 :    
935 :     # Build up a hash of peg sequences which are not 'done_with'
936 :     my %region_seqs;
937 :     foreach my $peg ( grep {$feature_data->{$_}{'type'} eq 'peg'} @$fids )
938 :     {
939 :     if ( $sequences->{$peg} and not $done_with{$peg} )
940 :     {
941 :     $region_seqs{$peg} = $sequences->{$peg};
942 :     }
943 :     }
944 :    
945 :     if ( scalar keys %region_seqs )
946 :     {
947 :     # Write the region sequences to a file
948 :     my $region_seqs_file = "$FIG_Config::temp/region_seqs.$$.tmp.fasta";
949 :     &write_seqs_to_file(\%region_seqs, $region_seqs_file);
950 :    
951 :     # BLAST the region sequences against all other sequences
952 :     my $hits = &blast_files($fig, $region_seqs_file, $all_seqs_file, $color_sim_cutoff);
953 :    
954 :     # Build up hash of blast hits
955 :     foreach my $hit ( @$hits )
956 :     {
957 :     my($peg1, $peg2, $sc) = (split(/\s+/, $hit))[0,1,10];
958 :    
959 :     if ( $peg1 ne $peg2 )
960 :     {
961 :     $blast_hit{$peg1}{$peg2} = 1;
962 :     $blast_hit{$peg2}{$peg1} = 1;
963 :    
964 :     # If the blast score is less than the chosen cutoff, mark $peg2 as 'done_with' so
965 :     # that it will not be blasted with it's region sequences
966 :     if ( $sc <= $cutoff_2 )
967 :     {
968 :     $done_with{$peg2} = 1;
969 :     }
970 :     }
971 :     }
972 :     }
973 :     }
974 : dsouza 1.1 }
975 :     else
976 :     {
977 : parrello 1.11 # BLAST sequence file against itself
978 : dsouza 1.25 my $hits = &blast_files($fig, $all_seqs_file, $all_seqs_file, $color_sim_cutoff);
979 : dsouza 1.1
980 : dsouza 1.25 # Build up hash of blast hits
981 :     foreach my $hit ( @$hits )
982 :     {
983 :     my($peg1, $peg2, $sc) = (split(/\s+/, $hit))[0,1,10];
984 : parrello 1.11
985 : dsouza 1.25 if ( $peg1 ne $peg2 )
986 :     {
987 :     $blast_hit{$peg1}{$peg2} = 1;
988 :     $blast_hit{$peg2}{$peg1} = 1;
989 :     }
990 :     }
991 : dsouza 1.1 }
992 :    
993 :     return \%blast_hit;
994 :     }
995 :    
996 :     sub blast_hits_from_sims {
997 : dsouza 1.25 my($fig, $regions, $feature_data, $fast_color, $color_sim_cutoff) = @_;
998 : dsouza 1.1
999 :     my %blast_hit;
1000 :    
1001 :     my $maxN = 2000;
1002 :     my $maxP = $color_sim_cutoff;
1003 :     my $select = 'fig';
1004 :     my $max_expand = '';
1005 :     my $filters = '';
1006 :    
1007 :     # Create a hash of all pegs
1008 :     my %region_peg = map {$_ => 1} grep {$feature_data->{$_}{'type'} eq 'peg'} keys %$feature_data;
1009 :    
1010 : dsouza 1.25 if ( $fast_color == 1 )
1011 : dsouza 1.1 {
1012 : dsouza 1.25 my $cutoff_2 = $color_sim_cutoff * 1e-20;
1013 :     my %done_with;
1014 :    
1015 :     # Iterate through each peg
1016 :     foreach my $peg1 ( keys %region_peg )
1017 :     {
1018 :     # Skip the 'done_with' pegs
1019 :     next if ($done_with{$peg1});
1020 : dsouza 1.1
1021 : dsouza 1.25 my @sims = $fig->sims($peg1, $maxN, $maxP, $select, $max_expand, $filters);
1022 : parrello 1.11
1023 : dsouza 1.25 foreach my $sim ( grep {$region_peg{$_->[1]} and $_->[1] ne $peg1} @sims )
1024 :     {
1025 :     my($peg2, $sc) = @$sim[1,10];
1026 : parrello 1.11
1027 : dsouza 1.25 $blast_hit{$peg1}{$peg2} = 1;
1028 :     $blast_hit{$peg2}{$peg1} = 1;
1029 :    
1030 :     # If the blast score is less than the chosen cutoff, mark $peg2 as 'done_with' so
1031 :     # that it will not be blasted with it's region sequences
1032 :     if ( $sc <= $cutoff_2 )
1033 :     {
1034 :     $done_with{$peg2} = 1;
1035 :     }
1036 :     }
1037 :     }
1038 :     }
1039 :     else
1040 : dsouza 1.1 {
1041 : dsouza 1.25 # Iterate through each peg
1042 :     foreach my $peg1 ( keys %region_peg )
1043 :     {
1044 :     my @sims = $fig->sims($peg1, $maxN, $maxP, $select, $max_expand, $filters);
1045 :    
1046 :     foreach my $peg2 ( map {$_->[1]} grep {$region_peg{$_->[1]} and $_->[1] ne $peg1} @sims )
1047 :     {
1048 :     $blast_hit{$peg1}{$peg2} = 1;
1049 :     $blast_hit{$peg2}{$peg1} = 1;
1050 :     }
1051 :     }
1052 : dsouza 1.1 }
1053 : dsouza 1.25
1054 :     return \%blast_hit;
1055 : dsouza 1.1 }
1056 :    
1057 :     sub blast_files {
1058 :     my($fig, $input, $database, $cutoff) = @_;
1059 :    
1060 : dsouza 1.24 my $cmd = "$FIG_Config::ext_bin/blastall";
1061 :     my @args = ('-p', 'blastp', '-i', $input, '-d', $database, '-m', 8, '-e', $cutoff);
1062 :     my @blast_out = $fig->run_gathering_output($cmd, @args);
1063 : dsouza 1.25
1064 : dsouza 1.24 return \@blast_out;
1065 :     }
1066 : overbeek 1.23
1067 : dsouza 1.1 sub formatdb_file {
1068 :     my($fig, $file) = @_;
1069 :    
1070 :     my $cmd = "$FIG_Config::ext_bin/formatdb -i $file -p T";
1071 :     $fig->run($cmd);
1072 :     }
1073 :    
1074 :     sub write_seqs_to_file {
1075 :     my($seq, $fasta_file) = @_;
1076 :    
1077 :     open(FASTA, ">$fasta_file") or die "could not create FASTA file '$fasta_file': $!";
1078 :     foreach my $peg ( keys %$seq )
1079 :     {
1080 : parrello 1.11 print FASTA ">$peg\n$seq->{$peg}\n";
1081 : dsouza 1.1 }
1082 :     close(FASTA) or die "could not close file FASTA file '$fasta_file': $!";
1083 :     }
1084 :    
1085 :     sub get_peg_sequences {
1086 : dsouza 1.21 my($fig, $pegs) = @_;
1087 : dsouza 1.1 my %sequences;
1088 :    
1089 :     # Iterate over each peg
1090 : dsouza 1.21 foreach my $peg ( @$pegs )
1091 : dsouza 1.1 {
1092 : parrello 1.11 my $seq = $fig->get_translation($peg);
1093 : dsouza 1.1
1094 : parrello 1.11 if ( $seq )
1095 :     {
1096 :     $sequences{$peg} = $seq;
1097 :     }
1098 :     else
1099 :     {
1100 :     print STDERR "could not get sqeuence for $peg\n";
1101 :     }
1102 : dsouza 1.1 }
1103 :    
1104 :     return \%sequences;
1105 :     }
1106 :    
1107 :    
1108 :     1;

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3