[Bio] / FigKernelPackages / Clustering.pm Repository:
ViewVC logotype

Diff of /FigKernelPackages/Clustering.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2, Thu Feb 18 16:57:11 2010 UTC revision 1.6, Thu Nov 11 23:29:48 2010 UTC
# Line 1  Line 1 
1  #  #
2    # This is a SAS component.
3    #
4  # Copyright (c) 2003-2006 University of Chicago and Fellowship  # Copyright (c) 2003-2006 University of Chicago and Fellowship
5  # for Interpretations of Genomes. All Rights Reserved.  # for Interpretations of Genomes. All Rights Reserved.
6  #  #
# Line 31  Line 33 
33      {      {
34          if    ($dist_func_ref eq "avg_dist")            { $dist_func_ref = \&avg_dist }          if    ($dist_func_ref eq "avg_dist")            { $dist_func_ref = \&avg_dist }
35          elsif ($dist_func_ref eq "max_dist")            { $dist_func_ref = \&max_dist }          elsif ($dist_func_ref eq "max_dist")            { $dist_func_ref = \&max_dist }
36            elsif ($dist_func_ref eq "min_dist")            { $dist_func_ref = \&single_linkage_dist }
37          elsif ($dist_func_ref eq "single_linkage_dist") { $dist_func_ref = \&single_linkage_dist }          elsif ($dist_func_ref eq "single_linkage_dist") { $dist_func_ref = \&single_linkage_dist }
38            elsif ($dist_func_ref eq "double_linkage_dist") { $dist_func_ref = \&double_linkage_dist }
39            elsif ($dist_func_ref eq "triple_linkage_dist") { $dist_func_ref = \&triple_linkage_dist }
40          else { confess "Could not resolve the distance function" }          else { confess "Could not resolve the distance function" }
41      }      }
42      my @clusters = defined($things) ? @$things :      my @clusters = defined($things) ? map { [$_] } @$things :
43                                        map { [$_] } keys(%$connections);                                        map { [$_] } keys(%$connections);
44      my @trees    = map { [$_->[0],0,[undef]] } @clusters;      my @trees    = map { [$_->[0],0,[undef]] } @clusters;
45    
# Line 48  Line 53 
53          $treeI->[1] = $treeJ->[1] = $d/2;          $treeI->[1] = $treeJ->[1] = $d/2;
54          $trees[$cI] = $parent;          $trees[$cI] = $parent;
55          splice(@trees,$cJ,1);          splice(@trees,$cJ,1);
   
56          push(@{$clusters[$cI]},@{$clusters[$cJ]});          push(@{$clusters[$cI]},@{$clusters[$cJ]});
57          splice(@clusters,$cJ,1);          splice(@clusters,$cJ,1);
58          ($cI,$cJ,$d) = &closest($connections,\@clusters,$max_dist,$dist_func_ref);          ($cI,$cJ,$d) = &closest($connections,\@clusters,$max_dist,$dist_func_ref);
# Line 64  Line 68 
68      {      {
69          for ($j=$i+1; ($j < @$clusters); $j++)          for ($j=$i+1; ($j < @$clusters); $j++)
70          {          {
71              my $dist = &$dist_func_ref($connections,$clusters->[$i],$clusters->[$j]);              my $dist = &$dist_func_ref($connections,$clusters->[$i],$clusters->[$j],$max_dist);
72              if (defined($dist) && ($dist <= $max_dist))              if (defined($dist) && ($dist <= $max_dist))
73              {              {
74                  if ((! defined($best)) || ($best > $dist))                  if ((! defined($best)) || ($best > $dist))
# Line 97  Line 101 
101      return $best;      return $best;
102  }  }
103    
104    sub double_linkage_dist {
105        my($connections,$clust1,$clust2,$max_dist) = @_;
106    
107        return &n_linkage_dist($connections,$clust1,$clust2,$max_dist,2);
108    }
109    
110    sub triple_linkage_dist {
111        my($connections,$clust1,$clust2,$max_dist) = @_;
112    
113        return &n_linkage_dist($connections,$clust1,$clust2,$max_dist,3);
114    }
115    
116    
117    sub n_linkage_dist {
118        my($connections,$clust1,$clust2,$max_dist,$min_link) = @_;
119    
120        my $best;
121        my $count = 0;
122        foreach my $x (@$clust1)
123        {
124            foreach my $y (@$clust2)
125            {
126                my $dist = $connections->{$x}->{$y};
127                if (defined($dist) && ($dist <= $max_dist))
128                {
129                    $count++;
130                    if ((! defined($best)) || ($dist > $best))
131                    {
132                        $best = $dist;
133                    }
134                }
135            }
136        }
137        my $max_clust = (@$clust1 >= @$clust2) ? @$clust1 : @$clust2;
138        my $need = ($max_clust > $min_link) ? $min_link : $max_clust;
139        return ($count >= $need) ? $best : undef;
140    }
141    
142  sub max_dist {  sub max_dist {
143      my($connections,$clust1,$clust2) = @_;      my($connections,$clust1,$clust2) = @_;
144    
# Line 106  Line 148 
148          foreach my $y (@$clust2)          foreach my $y (@$clust2)
149          {          {
150              my $dist = $connections->{$x}->{$y};              my $dist = $connections->{$x}->{$y};
151              if ((! defined($best)) || (defined($dist) && ($dist > $best)))              if (! defined($dist)) { return undef }
152                if ((! defined($best)) || ($dist > $best))
153              {              {
154                  $best = $dist;                  $best = $dist;
155              }              }
# Line 125  Line 168 
168          foreach my $y (@$clust2)          foreach my $y (@$clust2)
169          {          {
170              my $dist = $connections->{$x}->{$y};              my $dist = $connections->{$x}->{$y};
171                if (! defined($dist)) { return undef }
172              $n++;              $n++;
173              $sum += $dist;              $sum += $dist;
174          }          }
175      }      }
176      return $n ? ($sum/$n) : undef;      return ($sum/$n);
177  }  }
178    
179  1  1

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.6

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3