[Bio] / FigKernelPackages / Clustering.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/Clustering.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : overbeek 1.1 #
2 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 :     package Clustering;
19 :    
20 :     use Carp;
21 :     use Data::Dumper;
22 : overbeek 1.2 use tree_utilities;
23 : overbeek 1.1
24 :     # $connections->{$object1} ->{$object2} is the distance between $object1 and $object2, if it is defined (undef
25 :     # is equivalent to infinity)
26 :     #
27 :     sub cluster {
28 : overbeek 1.2 my($connections,$max_dist,$dist_func_ref,$things) = @_;
29 :    
30 :     if (! ref($dist_func_ref))
31 :     {
32 :     if ($dist_func_ref eq "avg_dist") { $dist_func_ref = \&avg_dist }
33 :     elsif ($dist_func_ref eq "max_dist") { $dist_func_ref = \&max_dist }
34 :     elsif ($dist_func_ref eq "single_linkage_dist") { $dist_func_ref = \&single_linkage_dist }
35 :     else { confess "Could not resolve the distance function" }
36 :     }
37 :     my @clusters = defined($things) ? @$things :
38 :     map { [$_] } keys(%$connections);
39 :     my @trees = map { [$_->[0],0,[undef]] } @clusters;
40 : overbeek 1.1
41 : overbeek 1.2 my ($cI,$cJ,$d) = &closest($connections,\@clusters,$max_dist,$dist_func_ref);
42 : overbeek 1.1 while (defined($cI))
43 :     {
44 : overbeek 1.2 my $treeI = $trees[$cI];
45 :     my $treeJ = $trees[$cJ];
46 :     my $parent = ['',0,[0,$treeI,$treeJ]];
47 :     $treeI->[2]->[0] = $treeJ->[2]->[0] = $parent;
48 :     $treeI->[1] = $treeJ->[1] = $d/2;
49 :     $trees[$cI] = $parent;
50 :     splice(@trees,$cJ,1);
51 :    
52 : overbeek 1.1 push(@{$clusters[$cI]},@{$clusters[$cJ]});
53 :     splice(@clusters,$cJ,1);
54 : overbeek 1.2 ($cI,$cJ,$d) = &closest($connections,\@clusters,$max_dist,$dist_func_ref);
55 : overbeek 1.1 }
56 : overbeek 1.2 return (\@clusters,\@trees);
57 : overbeek 1.1 }
58 :    
59 :     sub closest {
60 :     my($connections,$clusters,$max_dist,$dist_func_ref) = @_;
61 :    
62 :     my($i,$j,$best,$bestI,$bestJ);
63 :     for ($i=0; ($i < (@$clusters - 1)); $i++)
64 :     {
65 :     for ($j=$i+1; ($j < @$clusters); $j++)
66 :     {
67 :     my $dist = &$dist_func_ref($connections,$clusters->[$i],$clusters->[$j]);
68 :     if (defined($dist) && ($dist <= $max_dist))
69 :     {
70 :     if ((! defined($best)) || ($best > $dist))
71 :     {
72 :     $bestI = $i;
73 :     $bestJ = $j;
74 :     $best = $dist;
75 :     }
76 :     }
77 :     }
78 :     }
79 : overbeek 1.2 return ($bestI,$bestJ,$best);
80 : overbeek 1.1 }
81 :    
82 :     sub single_linkage_dist {
83 :     my($connections,$clust1,$clust2) = @_;
84 :    
85 :     my $best;
86 :     foreach my $x (@$clust1)
87 :     {
88 :     foreach my $y (@$clust2)
89 :     {
90 :     my $dist = $connections->{$x}->{$y};
91 :     if ((! defined($best)) || (defined($dist) && ($dist < $best)))
92 :     {
93 :     $best = $dist;
94 :     }
95 :     }
96 :     }
97 :     return $best;
98 :     }
99 :    
100 :     sub max_dist {
101 :     my($connections,$clust1,$clust2) = @_;
102 :    
103 :     my $best;
104 :     foreach my $x (@$clust1)
105 :     {
106 :     foreach my $y (@$clust2)
107 :     {
108 :     my $dist = $connections->{$x}->{$y};
109 :     if ((! defined($best)) || (defined($dist) && ($dist > $best)))
110 :     {
111 :     $best = $dist;
112 :     }
113 :     }
114 :     }
115 :     return $best;
116 :     }
117 :    
118 :     sub avg_dist {
119 :     my($connections,$clust1,$clust2) = @_;
120 :    
121 :     my $sum = 0;
122 :     my $n = 0;
123 :     foreach my $x (@$clust1)
124 :     {
125 :     foreach my $y (@$clust2)
126 :     {
127 :     my $dist = $connections->{$x}->{$y};
128 :     $n++;
129 :     $sum += $dist;
130 :     }
131 :     }
132 :     return $n ? ($sum/$n) : undef;
133 :     }
134 :    
135 :     1

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3