[Bio] / FigKernelPackages / PinnedRegions.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/PinnedRegions.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Thu Dec 13 23:16:03 2007 UTC revision 1.3, Fri Dec 21 17:22:47 2007 UTC
# Line 44  Line 44 
44      # Assigning set numbers requires BLASTing and use similarity scores to do transitive closure      # Assigning set numbers requires BLASTing and use similarity scores to do transitive closure
45      my $feature_data = &feature_data($fig, $regions);      my $feature_data = &feature_data($fig, $regions);
46    
47        &add_functional_coupling($fig, $pin_desc, $regions, $feature_data);
48        &add_subsystem_data($fig, $pin_desc, $feature_data);
49    
50      # Assign a set number to some PEGs based on similarity from blast scores      # Assign a set number to some PEGs based on similarity from blast scores
51      &color_pegs($fig, $pin_desc, $pinned_pegs, $regions, $feature_data, $fast_color, $sims_from);      &color_pegs($fig, $pin_desc, $pinned_pegs, $regions, $feature_data, $fast_color, $sims_from);
52    
53      # Filter out regions which have only a single PEG (the pinned one) colored.      # Filter out regions which have only a single PEG (the pinned one) colored.
54      $regions = &filter_regions_2($pin_desc, $regions, $feature_data);  #    $regions = &filter_regions_2($pin_desc, $regions, $feature_data);
55    
56      # Add feature data to the regions to make the final maps      # Add feature data to the regions to make the final maps
57      my $maps = &make_maps($fig, $regions, $feature_data);      my $maps = &make_maps($fig, $regions, $feature_data);
# Line 73  Line 76 
76          # Get PCH pinned pegs          # Get PCH pinned pegs
77          my $pch_pinned_pegs = &pch_pinned_pegs($fig, $pin_desc, \%ok_genome_id);          my $pch_pinned_pegs = &pch_pinned_pegs($fig, $pin_desc, \%ok_genome_id);
78    
79          my $n_pins = $pin_desc->{'n_pch_pins'};          $pch_pinned_pegs = &select_pegs($fig, $pin_desc, $pin_desc->{'n_pch_pins'}, $pch_pinned_pegs);
         $pch_pinned_pegs = &select_pegs($fig, $pin_desc, $n_pins, $pch_pinned_pegs);  
80    
81          # Get similarity pinned pegs          # Get similarity pinned pegs
82          my $sim_pegs        = &sim_pegs($fig, $pin_desc, \%ok_genome_id);          my $sim_pegs        = &sim_pegs($fig, $pin_desc, \%ok_genome_id);
83    
84            # Remove PCH PEGs (if any) from similarity pinned PEG list
85          my %seen  = map {$_ => 1} @$pch_pinned_pegs;          my %seen  = map {$_ => 1} @$pch_pinned_pegs;
86          $sim_pegs = [grep {! $seen{$_}} @$sim_pegs];          $sim_pegs = [grep {! $seen{$_}} @$sim_pegs];
87    
88          $sim_pegs = &select_pegs($fig, $pin_desc, $pin_desc->{'n_sims'}, $sim_pegs);          $sim_pegs = &select_pegs($fig, $pin_desc, $pin_desc->{'n_sims'}, $sim_pegs);
89    
90          @pegs = ();          @pegs = ();
# Line 159  Line 163 
163          return [];          return [];
164      }      }
165    
166      my $peg    = $pin_desc->{'pegs'}[0];      if ( $pin_desc->{'collapse_close_genomes'} == 1 )
     my $genome = $fig->genome_of($peg);  
   
     my %g_count;  
     my %gs_count;  
     my @peg_data;  
     # Add organism name, genus, species, phylogenetic distance, genus count and genus-species count  
     # The count is just an unique integer assigned to each PEG based on the genus or genus-species  
     # to be used later for sorting the pegs  
     foreach my $peg2 ( @$pegs )  
167      {      {
168          my $org2  = $fig->org_of($peg2);          # The ordering of the PEGs done here is in order to =select= the correct set, the ordering for the
169            # display is done later.
170    
171            # To collapse the PEG list, we want to:
172            # a. Return at most $n_pegs PEGs,
173            # b. include a representative PEG from every genus, subject to a, and
174            # c. include a representative PEG from each organism (genus-species), subject to a and b.
175    
176            my(%seen, @unique_genus, @unique_org);
177    
178            foreach my $peg ( @$pegs )
179            {
180                my $org = $fig->org_of($peg);
181          # 'org_of' returns undef for deleted PEGs, these need to be omitted from the peg list returned          # 'org_of' returns undef for deleted PEGs, these need to be omitted from the peg list returned
182          if ( $org2 )  
183                if ( $org )
184          {          {
185              my $dist2 = $fig->crude_estimate_of_distance($genome, $fig->genome_of($peg2));                  my($genus) = split(/\s+/, $org);
186              my($genus2, $species2) = split(/\s+/, $org2);  
187              $g_count{$genus2}++;                  if ( not $seen{$genus}++ ) {
188              $gs_count{$genus2}{$species2}++;                      push @unique_genus, $peg;
189                    } elsif ( not $seen{$org}++ ) {
190              push @peg_data, [$peg2, $org2, $genus2, $species2, $dist2, $gs_count{$genus2}{$species2}, $g_count{$genus2}];                      push @unique_org, $peg;
191          }          }
192      }      }
   
     # The sorting done here is for =selection= of the PEGs, not the display order  
     my $collapse_close_genomes = $pin_desc->{'collapse_close_genomes'};  
   
     if ( $collapse_close_genomes == 1 )  
     {  
         # Sort on genus count [6], genus-species count [5], phylogenetic distance [4], organism name [1] and PEG id [0]  
         @peg_data = sort {$a->[6] <=> $b->[6] or  
                           $a->[5] <=> $b->[5] or  
                           $a->[4] <=> $b->[4] or  
                           $a->[1] cmp $b->[1] or  
                           $a->[0] cmp $b->[0]} @peg_data;  
   
 # OLD code  
 #       # Separate the 'close' pegs from the 'diverse' ones,  
 #       my @pegs_close   = sort { $a->[5] <=> $b->[5] or  
 #                                 $a->[6] <=> $b->[6] or  
 #                                 $a->[4] <=> $b->[4] or  
 #                                 $a->[1] cmp $b->[1] or  
 #                                 $a->[0] cmp $b->[0]} @same_org, @same_gs;  
   
 #       my @pegs_diverse = sort { $a->[5] <=> $b->[5] or  
 #                                 $a->[6] <=> $b->[6] or  
 #                                 $a->[4] <=> $b->[4] or  
 #                                 $a->[1] cmp $b->[1] or  
 #                                 $a->[0] cmp $b->[0]} @same_g, @rest;  
   
 #       # Add some close PEGs to the diverse ones -- for every 6 'diverse' pegs, add one 'close' peg  
 #       my $n_add = int($n_pegs/6);  
 #       my $m     = ($n_add <= @pegs_close)? $n_add : @pegs_close;  
   
 #       if ( $m == 0 )  
 #       {  
 #           @peg_data = @pegs_diverse;  
 #       }  
 #       else  
 #       {  
 #           @peg_data = (@pegs_close[0..($m-1)], @pegs_diverse);  
 #       }  
193      }      }
     else  
     {  
         # taking out the sorting, it turns out this is confusing to the user.  
   
         # Separate pegs into diversity related categories relative to the input PEG:  
         # @same_org    same genus, same species, same strain  
         # @same_gs     same genus, same species, different strain  
         # same_g       same genus, different species  
         # @rest        all others  
   
 #       my $org    = $fig->org_of($peg);  
 #       my($genus, $species) = split(/\s+/, $org);  
   
 #       my(@same_org, @same_gs, @same_g, @rest);  
 #       foreach my $rec ( @peg_data )  
 #       {  
 #           my($org2, $genus2, $species2) = @$rec[1,2,3];  
 #           if ( $org2 eq $org ) {  
 #               push @same_org, $rec;  
 #           } elsif ( $genus2 eq $genus ) {  
 #               if ( $species2 eq $species ) {  
 #                   push @same_gs, $rec;  
 #               } else {  
 #                   push @same_g, $rec;  
 #               }  
 #           } else {  
 #               push @rest, $rec;  
 #           }  
 #       }  
   
 #       # Sort on PEG id [0]  
 #       @same_org = sort {$a->[0] cmp $b->[0]} @same_org;  
   
 #       # Sort on PEG id [0], organism name [1] and phylogenetic distance [4]  
 #       @same_gs  = sort {$a->[4] <=> $b->[4] or $a->[1] cmp $b->[1] or $a->[0] cmp $b->[0]} @same_gs;  
 #       @same_g   = sort {$a->[4] <=> $b->[4] or $a->[1] cmp $b->[1] or $a->[0] cmp $b->[0]} @same_g;  
 #       @rest     = sort {$a->[4] <=> $b->[4] or $a->[1] cmp $b->[1] or $a->[0] cmp $b->[0]} @rest;  
194    
195  #       @peg_data = (@same_org, @same_gs, @same_g, @rest);          $pegs = [@unique_genus, @unique_org];
196      }      }
197    
198      # Truncate list if necessary      # Truncate list if necessary
199      if ( $n_pegs < @peg_data )      if ( $n_pegs < @$pegs )
200      {      {
201          $#peg_data = $n_pegs - 1;          $#{ $pegs } = $n_pegs - 1;
202      }      }
203    
     # Extract peg list  
     @$pegs = map {$_->[0]} @peg_data;  
   
204      return $pegs;      return $pegs;
205  }  }
206    
# Line 434  Line 362 
362      my $loc                 = $fig->feature_location($fid);      my $loc                 = $fig->feature_location($fid);
363      my($contig, $beg, $end) = $fig->boundaries_of($loc);      my($contig, $beg, $end) = $fig->boundaries_of($loc);
364      my($left, $right)       = sort {$a <=> $b} ($beg, $end);      my($left, $right)       = sort {$a <=> $b} ($beg, $end);
365        my $size                = $right - $left + 1;
366      my $strand              = ($beg <= $end)? '+' : '-';      my $strand              = ($beg <= $end)? '+' : '-';
367      my $offset              = int(($left + $right)/2) - $region_mid;      my $offset              = int(($left + $right)/2) - $region_mid;
368      my $offset_beg          = $left  - $region_mid;      my $offset_beg          = $left  - $region_mid;
# Line 446  Line 375 
375               'contig'     => $contig,               'contig'     => $contig,
376               'beg'        => $beg,               'beg'        => $beg,
377               'end'        => $end,               'end'        => $end,
378                 'size'       => $size,
379               'strand'     => $strand,               'strand'     => $strand,
380               'offset'     => $offset,               'offset'     => $offset,
381               'offset_beg' => $offset_beg,               'offset_beg' => $offset_beg,
# Line 454  Line 384 
384           };           };
385  }  }
386    
387    sub add_functional_coupling {
388        my($fig, $pin_desc, $regions, $feature_data) = @_;
389    
390        my $fc_cutoff = defined($pin_desc->{'fc_cutoff'})? $pin_desc->{'fc_cutoff'} : 4;
391    
392        foreach my $region ( @$regions )
393        {
394            my $pin  = $region->{'pinned_peg'};
395            my @pegs = grep {$feature_data->{$_}{'type'} eq 'peg'} @{ $region->{'features'} };
396    
397            foreach my $couple ( $fig->coupled_to_batch(@pegs) )
398            {
399                my($peg1, $peg2, $sc) = @$couple;
400    
401                if ( $peg1 eq $pin and $sc >= $fc_cutoff )
402                {
403                    $feature_data->{$peg2}{'fc_score'} = $sc;
404                }
405            }
406        }
407    }
408    
409    sub add_subsystem_data {
410        my($fig, $pin_desc, $feature_data) = @_;
411    
412        # Get subsystem_information for =all= pegs
413        my %peg_to_ss = $fig->subsystems_for_pegs([grep {$feature_data->{$_}{'type'} eq 'peg'} keys %$feature_data]);
414    
415        # Count number of occurences of each subsystem
416        my %ss_count;
417        foreach my $ss ( map {$_->[0]} map {@$_} values %peg_to_ss )
418        {
419            $ss_count{$ss}++;
420        }
421    
422        # Sort subsystems based on count and create hash with unique index for each subsystem where
423        # lower indices correspond to higher number of occurences of the subsystem.
424        my %ss_index;
425        my $index = 0;
426        foreach my $ss ( sort {$ss_count{$b} <=> $ss_count{$a} or $a cmp $b} keys %ss_count )
427        {
428            $ss_index{$ss} = ++$index;
429        }
430    
431        # Add subsystem information for pegs in subsystems
432        foreach my $fid ( keys %peg_to_ss )
433        {
434            my @subsystems = ();
435            foreach my $rec ( @{ $peg_to_ss{$fid} } )
436            {
437                if ( @$rec )
438                {
439                    push @subsystems, $rec->[0];
440                }
441            }
442    
443            if ( @subsystems )
444            {
445                $feature_data->{$fid}{'subsystems'} = [sort {$a->[1] <=> $b->[1]} map {$_->[0] =~ s/_/ /g; $_} map {[$_, $ss_index{$_}]} @subsystems];
446            }
447        }
448    }
449    
450  sub color_pegs {  sub color_pegs {
451      my($fig, $pin_desc, $pinned_pegs, $regions, $feature_data, $fast_color, $sims_from) = @_;      my($fig, $pin_desc, $pinned_pegs, $regions, $feature_data, $fast_color, $sims_from) = @_;
452    
# Line 498  Line 491 
491  sub sort_color_sets {  sub sort_color_sets {
492      my($pin_desc, $pinned_pegs, $color_sets, $feature_data) = @_;      my($pin_desc, $pinned_pegs, $color_sets, $feature_data) = @_;
493    
494      # Find the color set containing the set of pinned pegs, i.e. the set containing      # Find the color set containing the set of pinned pegs, i.e. the set containing the input peg.
495      # the input peg,      # If the input peg is found (or if input is a list of pegs, returned value is the array index for the
496      my $i = &pinned_set_index($pin_desc, $pinned_pegs, $color_sets);      # set.
497        # If the input peg is not found, the returned value is an reference to an array containing the input peg.
498        my $set = &pinned_set($pin_desc, $pinned_pegs, $color_sets);
499    
500        my $pinned_color_set;
501        if ( $set =~ /^\d+$/ )
502        {
503      # Splice out the color set containing the input peg      # Splice out the color set containing the input peg
504      my($pinned_color_set) = splice(@$color_sets,$i,1);          ($pinned_color_set) = splice(@$color_sets,$set,1);
505        }
506        else
507        {
508            $pinned_color_set = $set;
509        }
510    
511        # Add offset (summed distance from
512        my @color_sets = map {[$_, &offset($_, $feature_data)]}  @$color_sets;
513      # Sort the remaining color sets based on:      # Sort the remaining color sets based on:
514      # a. size (number of pegs) and      # a. size (number of pegs) and
515      # b. offset from mid-point of region      # b. offset from mid-point of region
516      @$color_sets = map {$_->[0]}      @$color_sets = map {$_->[0]}
517                       sort {@{$b->[0]} <=> @{$a->[0]} or $a->[1] <=> $b->[1]}                       sort {@{$b->[0]} <=> @{$a->[0]} or $a->[1] <=> $b->[1]}
518    #                     sort {$a->[1] <=> $b->[1] or @{$b->[0]} <=> @{$a->[0]}}
519                         map {[$_, &offset($_, $feature_data)]}  @$color_sets;                         map {[$_, &offset($_, $feature_data)]}  @$color_sets;
520    
521      # Add the set of pinned pegs at the front of the returned list so that it gets colored red      # Add the set of pinned pegs at the front of the returned list so that it gets colored red
522      return [$pinned_color_set, @$color_sets];      return [$pinned_color_set, @$color_sets];
523  }  }
524    
525  sub pinned_set_index {  sub pinned_set {
526      my($pin_desc, $pinned_pegs, $color_sets) = @_;      my($pin_desc, $pinned_pegs, $color_sets) = @_;
527    
528        # Returns an integer if the input is a peg-list or if the input peg is in a set.
529        # Returns the input peg (as an array reference) if the input peg is not in a set.
530    
531      if ( @{ $pin_desc->{'pegs'} } == 1 )      if ( @{ $pin_desc->{'pegs'} } == 1 )
532      {      {
533          # Get input peg if it exists -- the set containing this peg should be colored red          # Get input peg if it exists -- the set containing this peg should be colored red
# Line 537  Line 546 
546              }              }
547          }          }
548    
549          # should not reach here          # The input peg may not be in a set if there is only one region or the coloring cutoff is very stringent.
550          print STDERR "problem finding input peg";          return [$peg];
         return 0;  
551      }      }
552      else      else
553      {      {
554            # Input is a peg-list, which may be split into multiple sets -- the largest will be called the
555            # "pinned" set.
556          my($i, $max_count);          my($i, $max_count);
557          my %pinned = map {$_ => 1} @$pinned_pegs;          my %pinned = map {$_ => 1} @$pinned_pegs;
558    
# Line 572  Line 582 
582          $offset += abs($feature_data->{$peg}{'offset'});          $offset += abs($feature_data->{$peg}{'offset'});
583      }      }
584    
585      return $offset;      return sprintf("%.2f", $offset/@$set);
586  }  }
587    
588  sub partition_pegs {  sub partition_pegs {

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.3

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3