[Bio] / FigKernelPackages / Clustering.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/Clustering.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (view) (download) (as text)

1 : overbeek 1.1 #
2 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 :     package Clustering;
19 :    
20 :     use Carp;
21 :     use Data::Dumper;
22 : overbeek 1.2 use tree_utilities;
23 : overbeek 1.1
24 :     # $connections->{$object1} ->{$object2} is the distance between $object1 and $object2, if it is defined (undef
25 :     # is equivalent to infinity)
26 :     #
27 :     sub cluster {
28 : overbeek 1.2 my($connections,$max_dist,$dist_func_ref,$things) = @_;
29 :    
30 :     if (! ref($dist_func_ref))
31 :     {
32 :     if ($dist_func_ref eq "avg_dist") { $dist_func_ref = \&avg_dist }
33 :     elsif ($dist_func_ref eq "max_dist") { $dist_func_ref = \&max_dist }
34 :     elsif ($dist_func_ref eq "single_linkage_dist") { $dist_func_ref = \&single_linkage_dist }
35 : overbeek 1.3 elsif ($dist_func_ref eq "min_dist") { $dist_func_ref = \&single_linkage_dist }
36 : overbeek 1.2 else { confess "Could not resolve the distance function" }
37 :     }
38 :     my @clusters = defined($things) ? @$things :
39 :     map { [$_] } keys(%$connections);
40 :     my @trees = map { [$_->[0],0,[undef]] } @clusters;
41 : overbeek 1.1
42 : overbeek 1.2 my ($cI,$cJ,$d) = &closest($connections,\@clusters,$max_dist,$dist_func_ref);
43 : overbeek 1.1 while (defined($cI))
44 :     {
45 : overbeek 1.2 my $treeI = $trees[$cI];
46 :     my $treeJ = $trees[$cJ];
47 :     my $parent = ['',0,[0,$treeI,$treeJ]];
48 :     $treeI->[2]->[0] = $treeJ->[2]->[0] = $parent;
49 :     $treeI->[1] = $treeJ->[1] = $d/2;
50 :     $trees[$cI] = $parent;
51 :     splice(@trees,$cJ,1);
52 :    
53 : overbeek 1.1 push(@{$clusters[$cI]},@{$clusters[$cJ]});
54 :     splice(@clusters,$cJ,1);
55 : overbeek 1.2 ($cI,$cJ,$d) = &closest($connections,\@clusters,$max_dist,$dist_func_ref);
56 : overbeek 1.1 }
57 : overbeek 1.2 return (\@clusters,\@trees);
58 : overbeek 1.1 }
59 :    
60 :     sub closest {
61 :     my($connections,$clusters,$max_dist,$dist_func_ref) = @_;
62 :    
63 :     my($i,$j,$best,$bestI,$bestJ);
64 :     for ($i=0; ($i < (@$clusters - 1)); $i++)
65 :     {
66 :     for ($j=$i+1; ($j < @$clusters); $j++)
67 :     {
68 :     my $dist = &$dist_func_ref($connections,$clusters->[$i],$clusters->[$j]);
69 :     if (defined($dist) && ($dist <= $max_dist))
70 :     {
71 :     if ((! defined($best)) || ($best > $dist))
72 :     {
73 :     $bestI = $i;
74 :     $bestJ = $j;
75 :     $best = $dist;
76 :     }
77 :     }
78 :     }
79 :     }
80 : overbeek 1.2 return ($bestI,$bestJ,$best);
81 : overbeek 1.1 }
82 :    
83 :     sub single_linkage_dist {
84 :     my($connections,$clust1,$clust2) = @_;
85 :    
86 :     my $best;
87 :     foreach my $x (@$clust1)
88 :     {
89 :     foreach my $y (@$clust2)
90 :     {
91 :     my $dist = $connections->{$x}->{$y};
92 :     if ((! defined($best)) || (defined($dist) && ($dist < $best)))
93 :     {
94 :     $best = $dist;
95 :     }
96 :     }
97 :     }
98 :     return $best;
99 :     }
100 :    
101 :     sub max_dist {
102 :     my($connections,$clust1,$clust2) = @_;
103 :    
104 :     my $best;
105 :     foreach my $x (@$clust1)
106 :     {
107 :     foreach my $y (@$clust2)
108 :     {
109 :     my $dist = $connections->{$x}->{$y};
110 :     if ((! defined($best)) || (defined($dist) && ($dist > $best)))
111 :     {
112 :     $best = $dist;
113 :     }
114 :     }
115 :     }
116 :     return $best;
117 :     }
118 :    
119 :     sub avg_dist {
120 :     my($connections,$clust1,$clust2) = @_;
121 :    
122 :     my $sum = 0;
123 :     my $n = 0;
124 :     foreach my $x (@$clust1)
125 :     {
126 :     foreach my $y (@$clust2)
127 :     {
128 :     my $dist = $connections->{$x}->{$y};
129 :     $n++;
130 :     $sum += $dist;
131 :     }
132 :     }
133 :     return $n ? ($sum/$n) : undef;
134 :     }
135 :    
136 :     1

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3