[Bio] / FigKernelPackages / Clustering.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/Clustering.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (view) (download) (as text)

1 : overbeek 1.4 #
2 :     # This is a SAS component.
3 : overbeek 1.1 #
4 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
5 :     # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 :    
20 :     package Clustering;
21 :    
22 :     use Carp;
23 :     use Data::Dumper;
24 : overbeek 1.2 use tree_utilities;
25 : overbeek 1.1
26 :     # $connections->{$object1} ->{$object2} is the distance between $object1 and $object2, if it is defined (undef
27 :     # is equivalent to infinity)
28 :     #
29 :     sub cluster {
30 : overbeek 1.2 my($connections,$max_dist,$dist_func_ref,$things) = @_;
31 :    
32 :     if (! ref($dist_func_ref))
33 :     {
34 :     if ($dist_func_ref eq "avg_dist") { $dist_func_ref = \&avg_dist }
35 :     elsif ($dist_func_ref eq "max_dist") { $dist_func_ref = \&max_dist }
36 : overbeek 1.6 elsif ($dist_func_ref eq "min_dist") { $dist_func_ref = \&single_linkage_dist }
37 : overbeek 1.2 elsif ($dist_func_ref eq "single_linkage_dist") { $dist_func_ref = \&single_linkage_dist }
38 : overbeek 1.6 elsif ($dist_func_ref eq "double_linkage_dist") { $dist_func_ref = \&double_linkage_dist }
39 :     elsif ($dist_func_ref eq "triple_linkage_dist") { $dist_func_ref = \&triple_linkage_dist }
40 : overbeek 1.2 else { confess "Could not resolve the distance function" }
41 :     }
42 : overbeek 1.4 my @clusters = defined($things) ? map { [$_] } @$things :
43 : overbeek 1.2 map { [$_] } keys(%$connections);
44 :     my @trees = map { [$_->[0],0,[undef]] } @clusters;
45 : overbeek 1.1
46 : overbeek 1.2 my ($cI,$cJ,$d) = &closest($connections,\@clusters,$max_dist,$dist_func_ref);
47 : overbeek 1.1 while (defined($cI))
48 :     {
49 : overbeek 1.2 my $treeI = $trees[$cI];
50 :     my $treeJ = $trees[$cJ];
51 :     my $parent = ['',0,[0,$treeI,$treeJ]];
52 :     $treeI->[2]->[0] = $treeJ->[2]->[0] = $parent;
53 :     $treeI->[1] = $treeJ->[1] = $d/2;
54 :     $trees[$cI] = $parent;
55 :     splice(@trees,$cJ,1);
56 : overbeek 1.1 push(@{$clusters[$cI]},@{$clusters[$cJ]});
57 :     splice(@clusters,$cJ,1);
58 : overbeek 1.2 ($cI,$cJ,$d) = &closest($connections,\@clusters,$max_dist,$dist_func_ref);
59 : overbeek 1.1 }
60 : overbeek 1.2 return (\@clusters,\@trees);
61 : overbeek 1.1 }
62 :    
63 :     sub closest {
64 :     my($connections,$clusters,$max_dist,$dist_func_ref) = @_;
65 :    
66 :     my($i,$j,$best,$bestI,$bestJ);
67 :     for ($i=0; ($i < (@$clusters - 1)); $i++)
68 :     {
69 :     for ($j=$i+1; ($j < @$clusters); $j++)
70 :     {
71 : overbeek 1.6 my $dist = &$dist_func_ref($connections,$clusters->[$i],$clusters->[$j],$max_dist);
72 : overbeek 1.1 if (defined($dist) && ($dist <= $max_dist))
73 :     {
74 :     if ((! defined($best)) || ($best > $dist))
75 :     {
76 :     $bestI = $i;
77 :     $bestJ = $j;
78 :     $best = $dist;
79 :     }
80 :     }
81 :     }
82 :     }
83 : overbeek 1.2 return ($bestI,$bestJ,$best);
84 : overbeek 1.1 }
85 :    
86 :     sub single_linkage_dist {
87 :     my($connections,$clust1,$clust2) = @_;
88 :    
89 :     my $best;
90 :     foreach my $x (@$clust1)
91 :     {
92 :     foreach my $y (@$clust2)
93 :     {
94 :     my $dist = $connections->{$x}->{$y};
95 :     if ((! defined($best)) || (defined($dist) && ($dist < $best)))
96 :     {
97 :     $best = $dist;
98 :     }
99 :     }
100 :     }
101 :     return $best;
102 :     }
103 :    
104 : overbeek 1.6 sub double_linkage_dist {
105 :     my($connections,$clust1,$clust2,$max_dist) = @_;
106 :    
107 :     return &n_linkage_dist($connections,$clust1,$clust2,$max_dist,2);
108 :     }
109 :    
110 :     sub triple_linkage_dist {
111 :     my($connections,$clust1,$clust2,$max_dist) = @_;
112 :    
113 :     return &n_linkage_dist($connections,$clust1,$clust2,$max_dist,3);
114 :     }
115 :    
116 :    
117 :     sub n_linkage_dist {
118 :     my($connections,$clust1,$clust2,$max_dist,$min_link) = @_;
119 :    
120 :     my $best;
121 :     my $count = 0;
122 :     foreach my $x (@$clust1)
123 :     {
124 :     foreach my $y (@$clust2)
125 :     {
126 :     my $dist = $connections->{$x}->{$y};
127 :     if (defined($dist) && ($dist <= $max_dist))
128 :     {
129 :     $count++;
130 :     if ((! defined($best)) || ($dist > $best))
131 :     {
132 :     $best = $dist;
133 :     }
134 :     }
135 :     }
136 :     }
137 :     my $max_clust = (@$clust1 >= @$clust2) ? @$clust1 : @$clust2;
138 :     my $need = ($max_clust > $min_link) ? $min_link : $max_clust;
139 :     return ($count >= $need) ? $best : undef;
140 :     }
141 :    
142 : overbeek 1.1 sub max_dist {
143 :     my($connections,$clust1,$clust2) = @_;
144 :    
145 :     my $best;
146 :     foreach my $x (@$clust1)
147 :     {
148 :     foreach my $y (@$clust2)
149 :     {
150 :     my $dist = $connections->{$x}->{$y};
151 : overbeek 1.5 if (! defined($dist)) { return undef }
152 :     if ((! defined($best)) || ($dist > $best))
153 : overbeek 1.1 {
154 :     $best = $dist;
155 :     }
156 :     }
157 :     }
158 :     return $best;
159 :     }
160 :    
161 :     sub avg_dist {
162 :     my($connections,$clust1,$clust2) = @_;
163 :    
164 :     my $sum = 0;
165 :     my $n = 0;
166 :     foreach my $x (@$clust1)
167 :     {
168 :     foreach my $y (@$clust2)
169 :     {
170 :     my $dist = $connections->{$x}->{$y};
171 : overbeek 1.5 if (! defined($dist)) { return undef }
172 : overbeek 1.1 $n++;
173 :     $sum += $dist;
174 :     }
175 :     }
176 : overbeek 1.5 return ($sum/$n);
177 : overbeek 1.1 }
178 :    
179 :     1

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3