[Bio] / FigKernelPackages / Clustering.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/Clustering.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (view) (download) (as text)

1 : overbeek 1.4 #
2 :     # This is a SAS component.
3 : overbeek 1.1 #
4 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
5 :     # for Interpretations of Genomes. All Rights Reserved.
6 :     #
7 :     # This file is part of the SEED Toolkit.
8 :     #
9 :     # The SEED Toolkit is free software. You can redistribute
10 :     # it and/or modify it under the terms of the SEED Toolkit
11 :     # Public License.
12 :     #
13 :     # You should have received a copy of the SEED Toolkit Public License
14 :     # along with this program; if not write to the University of Chicago
15 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
16 :     # Genomes at veronika@thefig.info or download a copy from
17 :     # http://www.theseed.org/LICENSE.TXT.
18 :     #
19 :    
20 :     package Clustering;
21 :    
22 :     use Carp;
23 :     use Data::Dumper;
24 : overbeek 1.2 use tree_utilities;
25 : overbeek 1.1
26 :     # $connections->{$object1} ->{$object2} is the distance between $object1 and $object2, if it is defined (undef
27 :     # is equivalent to infinity)
28 :     #
29 :     sub cluster {
30 : overbeek 1.2 my($connections,$max_dist,$dist_func_ref,$things) = @_;
31 :    
32 :     if (! ref($dist_func_ref))
33 :     {
34 :     if ($dist_func_ref eq "avg_dist") { $dist_func_ref = \&avg_dist }
35 :     elsif ($dist_func_ref eq "max_dist") { $dist_func_ref = \&max_dist }
36 :     elsif ($dist_func_ref eq "single_linkage_dist") { $dist_func_ref = \&single_linkage_dist }
37 : overbeek 1.3 elsif ($dist_func_ref eq "min_dist") { $dist_func_ref = \&single_linkage_dist }
38 : overbeek 1.2 else { confess "Could not resolve the distance function" }
39 :     }
40 : overbeek 1.4 my @clusters = defined($things) ? map { [$_] } @$things :
41 : overbeek 1.2 map { [$_] } keys(%$connections);
42 :     my @trees = map { [$_->[0],0,[undef]] } @clusters;
43 : overbeek 1.1
44 : overbeek 1.2 my ($cI,$cJ,$d) = &closest($connections,\@clusters,$max_dist,$dist_func_ref);
45 : overbeek 1.1 while (defined($cI))
46 :     {
47 : overbeek 1.2 my $treeI = $trees[$cI];
48 :     my $treeJ = $trees[$cJ];
49 :     my $parent = ['',0,[0,$treeI,$treeJ]];
50 :     $treeI->[2]->[0] = $treeJ->[2]->[0] = $parent;
51 :     $treeI->[1] = $treeJ->[1] = $d/2;
52 :     $trees[$cI] = $parent;
53 :     splice(@trees,$cJ,1);
54 :    
55 : overbeek 1.1 push(@{$clusters[$cI]},@{$clusters[$cJ]});
56 :     splice(@clusters,$cJ,1);
57 : overbeek 1.2 ($cI,$cJ,$d) = &closest($connections,\@clusters,$max_dist,$dist_func_ref);
58 : overbeek 1.1 }
59 : overbeek 1.2 return (\@clusters,\@trees);
60 : overbeek 1.1 }
61 :    
62 :     sub closest {
63 :     my($connections,$clusters,$max_dist,$dist_func_ref) = @_;
64 :    
65 :     my($i,$j,$best,$bestI,$bestJ);
66 :     for ($i=0; ($i < (@$clusters - 1)); $i++)
67 :     {
68 :     for ($j=$i+1; ($j < @$clusters); $j++)
69 :     {
70 :     my $dist = &$dist_func_ref($connections,$clusters->[$i],$clusters->[$j]);
71 :     if (defined($dist) && ($dist <= $max_dist))
72 :     {
73 :     if ((! defined($best)) || ($best > $dist))
74 :     {
75 :     $bestI = $i;
76 :     $bestJ = $j;
77 :     $best = $dist;
78 :     }
79 :     }
80 :     }
81 :     }
82 : overbeek 1.2 return ($bestI,$bestJ,$best);
83 : overbeek 1.1 }
84 :    
85 :     sub single_linkage_dist {
86 :     my($connections,$clust1,$clust2) = @_;
87 :    
88 :     my $best;
89 :     foreach my $x (@$clust1)
90 :     {
91 :     foreach my $y (@$clust2)
92 :     {
93 :     my $dist = $connections->{$x}->{$y};
94 :     if ((! defined($best)) || (defined($dist) && ($dist < $best)))
95 :     {
96 :     $best = $dist;
97 :     }
98 :     }
99 :     }
100 :     return $best;
101 :     }
102 :    
103 :     sub max_dist {
104 :     my($connections,$clust1,$clust2) = @_;
105 :    
106 :     my $best;
107 :     foreach my $x (@$clust1)
108 :     {
109 :     foreach my $y (@$clust2)
110 :     {
111 :     my $dist = $connections->{$x}->{$y};
112 :     if ((! defined($best)) || (defined($dist) && ($dist > $best)))
113 :     {
114 :     $best = $dist;
115 :     }
116 :     }
117 :     }
118 :     return $best;
119 :     }
120 :    
121 :     sub avg_dist {
122 :     my($connections,$clust1,$clust2) = @_;
123 :    
124 :     my $sum = 0;
125 :     my $n = 0;
126 :     foreach my $x (@$clust1)
127 :     {
128 :     foreach my $y (@$clust2)
129 :     {
130 :     my $dist = $connections->{$x}->{$y};
131 :     $n++;
132 :     $sum += $dist;
133 :     }
134 :     }
135 :     return $n ? ($sum/$n) : undef;
136 :     }
137 :    
138 :     1

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3