1 #include <cmath>
2 #include <iostream>
3 #include <algorithm>
4 #include <limits>
5 #include "bsta_k_medoid.h"
6 //:
7 // \file
8 #ifdef _MSC_VER
9 # include "vcl_msvc_warnings.h"
10 #endif
11 #include <cassert>
12
bsta_k_medoid(const unsigned n_elements,bool verbose)13 bsta_k_medoid::bsta_k_medoid(const unsigned n_elements, bool verbose)
14 {
15 n_elements_ = n_elements;
16 distance_array_.resize(n_elements, n_elements);
17 distance_array_.fill(0.0);
18 verbose_ = verbose;
19 }
20
21 //------------------------------------------------
22 // Is an element a medoid?
is_medoid(const unsigned i) const23 bool bsta_k_medoid::is_medoid(const unsigned i) const
24 {
25 std::vector<unsigned>::const_iterator result;
26 result = std::find(medoids_.begin(), medoids_.end(), i);
27 return result != medoids_.end();
28 }
29
30 //------------------------------------------------
31 // is an element in cluster k ?
in_cluster(const unsigned i,const unsigned k) const32 bool bsta_k_medoid::in_cluster(const unsigned i, const unsigned k) const
33 {
34 //Easy checks first
35 if (k>=this->k())
36 return false;
37 if (is_medoid(i))
38 return i==k;
39
40 std::vector<unsigned>::const_iterator result;
41 result = std::find(clusters_[k].begin(), clusters_[k].end(), i);
42 return result != clusters_[k].end();
43 }
44
45 //------------------------------------------------
46 //the distance between an element and its medoid
medoid_distance(const unsigned i) const47 double bsta_k_medoid::medoid_distance(const unsigned i) const
48 {
49 double d = 0;
50 for (unsigned k = 0; k<this->k(); ++k)
51 if (this->in_cluster(i, k))
52 d = distance(i, this->medoid(k));
53 return d;
54 }
55
56 //---------------------------------------------------
57 // The total distance between elements in the cluster and the cluster medoid
total_distance(const unsigned k) const58 double bsta_k_medoid::total_distance(const unsigned k) const
59 {
60 assert(k<this->k());
61 double d = 0;
62 unsigned m = medoid(k);//the medoid corresponding to index k
63 for (unsigned i = 0; i<this->size(k); ++i)
64 d += distance(clusters_[k][i], m);
65 return d;
66 }
67
68 //--------------------------------------------------------
69 // Compute the change in distance of swapping medoid j with
70 // current medoid k with respect to element i
dc(const unsigned i,const unsigned j,const unsigned k)71 double bsta_k_medoid::dc(const unsigned i, const unsigned j, const unsigned k)
72 {
73 //current distance
74 double d_ik = distance(i, k);
75
76 //distance to j
77 double d_ij = distance(i, j);
78
79 // distance change if swap were done
80 return d_ij - d_ik;
81 }
82
83 //--------------------------------------------------------
84 // Compute the change in distance of swapping medoid j with
85 // current medoid k with respect to total distance between medoids
dcm(const unsigned j,const unsigned k)86 double bsta_k_medoid::dcm(const unsigned j, const unsigned k)
87 {
88 //iterate over medoids
89 double dk = 0, dj = 0;
90 unsigned kmax = this->k();
91 if (!kmax)
92 return 0.0;
93 unsigned count = 0;
94 for (unsigned nk=0; nk<kmax; ++nk, ++count)
95 {
96 unsigned m = this->medoid(nk);
97 if (m==k)
98 continue;
99 dk += this->distance(k, m);
100 dj += this->distance(j, m);
101 }
102 if (!count)
103 return 0;
104 //negative change if inter-medoid distance increases
105 double result = (dk-dj)/count;
106 return result;
107 }
108
109
110 //: Clear the cluster vectors
clear_clusters()111 void bsta_k_medoid::clear_clusters()
112 {
113 for (unsigned j=0; j<this->k(); ++j)
114 clusters_[j].clear();
115 }
116
117
118 //:assign non-medoids to clusters
form_clusters()119 void bsta_k_medoid::form_clusters()
120 {
121 this->clear_clusters();
122 for (unsigned i = 0; i<n_elements_;++i)
123 if (is_medoid(i))
124 continue;
125 else
126 {
127 //find closest medoid
128 double dmin = std::numeric_limits<double>::max();
129 unsigned jmin=0;
130 for (unsigned j=0; j<this->k(); ++j)
131 if (distance(i,this->medoid(j))<dmin)
132 {
133 jmin = j;
134 dmin = distance(i,this->medoid(j));
135 }
136 //assign i to the closest (jmin)
137 clusters_[jmin].push_back(i);
138 }
139 //put medoids into their own clusters
140 for (unsigned j=0; j<this->k(); ++j)
141 clusters_[j].push_back(medoids_[j]);
142 }
143
144
145 //:replace medoid k with medoid j
replace_medoid(const unsigned j,const unsigned k)146 bool bsta_k_medoid::replace_medoid(const unsigned j, const unsigned k)
147 {
148 std::vector<unsigned>::iterator result;
149 result = std::find(medoids_.begin(), medoids_.end(), k);
150 if (result == medoids_.end())
151 return false;
152 (*result) = j;
153 return true;
154 }
155
156
157 //: Returns false if swap is not warranted
test_medoid_swap(unsigned & mj,unsigned & mk)158 bool bsta_k_medoid::test_medoid_swap(unsigned& mj, unsigned& mk)
159 {
160 //Impossible values
161 mj = n_elements_, mk = n_elements_;
162
163 // for each j not a medoid
164 double Sdc_min = std::numeric_limits<double>::max();
165 unsigned jmin=0, kmin=0;
166 for ( unsigned j = 0; j<n_elements_; ++j)
167 if (is_medoid(j))
168 continue;
169 else
170 for (unsigned k = 0; k<this->k(); ++k)
171 {
172 if (verbose_)
173 {
174 std::cout << "\n===== Current Medoids(";
175 for (unsigned m = 0; m<this->k(); ++m)
176 std::cout << medoid(m) << ' ';
177 std::cout << ")\n"
178 << "Checking Swap " << j << "->" << medoid(k) << '\n';
179 }
180 double Sdc = 0;
181 unsigned count = 0;
182 //Sum up the effect of swapping j for k on each non-medoid element
183 for (unsigned i = 0; i<n_elements_;++i)
184 if (is_medoid(i)||i==j)
185 continue;
186 else{
187 Sdc += dc(i,j,medoid(k));
188 ++count;}
189
190 if (count>0)
191 Sdc /= count;
192 double med_dist=this->dcm(j, medoid(k));
193 double total = Sdc + med_dist;
194
195 if (verbose_)
196 {
197 std::cout << "Inter-element distance change " << Sdc << '\n'
198 << "Inter-medoid distance change " << med_dist << '\n'
199 << "Total change " << total << '\n';
200 }
201
202 if (total < Sdc_min)
203 {
204 jmin = j;
205 kmin = medoid(k);
206 Sdc_min = total;
207 }
208 }
209
210 if (Sdc_min<0)
211 {
212 mj = jmin;
213 mk = kmin;
214 return true;
215 }
216 return false;
217 }
218
219 //-------------------------------------------------------
220 // Find the best set of k medoids
221 //
do_clustering(const unsigned nk)222 void bsta_k_medoid::do_clustering(const unsigned nk)
223 {
224 assert(nk<=n_elements_);
225
226 // arbitrarily select k medoids
227 medoids_.clear();
228 // choose first k elements as medoids
229 for (unsigned i = 0; i<nk; ++i)
230 medoids_.push_back(i);
231 // reset the clusters
232 clusters_.clear();
233 clusters_.resize(nk);
234 //We are done if there are no non-medoids
235 if (nk==n_elements_)
236 {
237 for (unsigned i = 0; i<nk; ++i)
238 clusters_[i].push_back(i);
239 return;
240 }
241 //otherwise
242 this->form_clusters();
243 unsigned mj, mk;//potential swap for medoids( swap j for k)
244 while (test_medoid_swap(mj, mk))
245 {
246 replace_medoid(mj, mk);
247 form_clusters();
248 if (verbose_)
249 {
250 std::cout << "***Swapping " << mj << "->" << mk << "***\n";
251 for (unsigned k = 0; k<this->k(); ++k)
252 {
253 std::cout << "Medoid[" << k << "] = " << medoid(k) << '\n'
254 << "with cluster\n";
255 for (unsigned j = 0; j<size(k); ++j)
256 std::cout << clusters_[k][j] << ' ' ;
257 std::cout << '\n'
258 << "Total Cluster Distance = "
259 << total_distance(k)<< '\n';
260 }
261 }
262 }
263 return;
264 }
265