[Bio] / FigKernelPackages / Clustering.pm Repository:
ViewVC logotype

Annotation of /FigKernelPackages/Clustering.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : overbeek 1.1 #
2 :     # Copyright (c) 2003-2006 University of Chicago and Fellowship
3 :     # for Interpretations of Genomes. All Rights Reserved.
4 :     #
5 :     # This file is part of the SEED Toolkit.
6 :     #
7 :     # The SEED Toolkit is free software. You can redistribute
8 :     # it and/or modify it under the terms of the SEED Toolkit
9 :     # Public License.
10 :     #
11 :     # You should have received a copy of the SEED Toolkit Public License
12 :     # along with this program; if not write to the University of Chicago
13 :     # at info@ci.uchicago.edu or the Fellowship for Interpretation of
14 :     # Genomes at veronika@thefig.info or download a copy from
15 :     # http://www.theseed.org/LICENSE.TXT.
16 :     #
17 :    
18 :     package Clustering;
19 :    
20 :     use Carp;
21 :     use Data::Dumper;
22 :    
23 :    
24 :     # $connections->{$object1} ->{$object2} is the distance between $object1 and $object2, if it is defined (undef
25 :     # is equivalent to infinity)
26 :     #
27 :     sub cluster {
28 :     my($connections,$max_dist,$dist_func_ref) = @_;
29 :    
30 :     my @clusters = map { [$_] } keys(%$connections);
31 :    
32 :     my ($cI,$cJ) = &closest($connections,\@clusters,$max_dist,$dist_func_ref);
33 :     while (defined($cI))
34 :     {
35 :     push(@{$clusters[$cI]},@{$clusters[$cJ]});
36 :     splice(@clusters,$cJ,1);
37 :     ($cI,$cJ) = &closest($connections,\@clusters,$max_dist,$dist_func_ref);
38 :     }
39 :     return \@clusters;
40 :     }
41 :    
42 :     sub closest {
43 :     my($connections,$clusters,$max_dist,$dist_func_ref) = @_;
44 :    
45 :     my($i,$j,$best,$bestI,$bestJ);
46 :     for ($i=0; ($i < (@$clusters - 1)); $i++)
47 :     {
48 :     for ($j=$i+1; ($j < @$clusters); $j++)
49 :     {
50 :     my $dist = &$dist_func_ref($connections,$clusters->[$i],$clusters->[$j]);
51 :     if (defined($dist) && ($dist <= $max_dist))
52 :     {
53 :     if ((! defined($best)) || ($best > $dist))
54 :     {
55 :     $bestI = $i;
56 :     $bestJ = $j;
57 :     $best = $dist;
58 :     }
59 :     }
60 :     }
61 :     }
62 :     return ($bestI,$bestJ);
63 :     }
64 :    
65 :     sub single_linkage_dist {
66 :     my($connections,$clust1,$clust2) = @_;
67 :    
68 :     my $best;
69 :     foreach my $x (@$clust1)
70 :     {
71 :     foreach my $y (@$clust2)
72 :     {
73 :     my $dist = $connections->{$x}->{$y};
74 :     if ((! defined($best)) || (defined($dist) && ($dist < $best)))
75 :     {
76 :     $best = $dist;
77 :     }
78 :     }
79 :     }
80 :     return $best;
81 :     }
82 :    
83 :     sub max_dist {
84 :     my($connections,$clust1,$clust2) = @_;
85 :    
86 :     my $best;
87 :     foreach my $x (@$clust1)
88 :     {
89 :     foreach my $y (@$clust2)
90 :     {
91 :     my $dist = $connections->{$x}->{$y};
92 :     if ((! defined($best)) || (defined($dist) && ($dist > $best)))
93 :     {
94 :     $best = $dist;
95 :     }
96 :     }
97 :     }
98 :     return $best;
99 :     }
100 :    
101 :     sub avg_dist {
102 :     my($connections,$clust1,$clust2) = @_;
103 :    
104 :     my $sum = 0;
105 :     my $n = 0;
106 :     foreach my $x (@$clust1)
107 :     {
108 :     foreach my $y (@$clust2)
109 :     {
110 :     my $dist = $connections->{$x}->{$y};
111 :     $n++;
112 :     $sum += $dist;
113 :     }
114 :     }
115 :     return $n ? ($sum/$n) : undef;
116 :     }
117 :    
118 :     1

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3