1 #include <cmath>
2 #include <iostream>
3 #include <algorithm>
4 #include <limits>
5 #include "bsta_k_medoid.h"
6 //:
7 // \file
8 #ifdef _MSC_VER
9 #  include "vcl_msvc_warnings.h"
10 #endif
11 #include <cassert>
12 
bsta_k_medoid(const unsigned n_elements,bool verbose)13 bsta_k_medoid::bsta_k_medoid(const unsigned n_elements, bool verbose)
14 {
15   n_elements_ = n_elements;
16   distance_array_.resize(n_elements, n_elements);
17   distance_array_.fill(0.0);
18   verbose_ = verbose;
19 }
20 
21 //------------------------------------------------
22 // Is an element a medoid?
is_medoid(const unsigned i) const23 bool bsta_k_medoid::is_medoid(const unsigned i) const
24 {
25   std::vector<unsigned>::const_iterator result;
26   result = std::find(medoids_.begin(), medoids_.end(), i);
27   return result != medoids_.end();
28 }
29 
30 //------------------------------------------------
31 // is an element in cluster k ?
in_cluster(const unsigned i,const unsigned k) const32 bool bsta_k_medoid::in_cluster(const unsigned i, const unsigned k) const
33 {
34   //Easy checks first
35   if (k>=this->k())
36     return false;
37   if (is_medoid(i))
38     return i==k;
39 
40   std::vector<unsigned>::const_iterator result;
41   result = std::find(clusters_[k].begin(), clusters_[k].end(), i);
42   return result != clusters_[k].end();
43 }
44 
45 //------------------------------------------------
46 //the distance between an element and its medoid
medoid_distance(const unsigned i) const47 double bsta_k_medoid::medoid_distance(const unsigned i) const
48 {
49   double d = 0;
50   for (unsigned k = 0; k<this->k(); ++k)
51     if (this->in_cluster(i, k))
52       d = distance(i, this->medoid(k));
53   return d;
54 }
55 
56 //---------------------------------------------------
57 // The total distance between elements in the cluster and the cluster medoid
total_distance(const unsigned k) const58 double bsta_k_medoid::total_distance(const unsigned k) const
59 {
60   assert(k<this->k());
61   double d = 0;
62   unsigned m = medoid(k);//the medoid corresponding to index k
63   for (unsigned i = 0; i<this->size(k); ++i)
64     d += distance(clusters_[k][i], m);
65   return d;
66 }
67 
68 //--------------------------------------------------------
69 // Compute the change in distance of swapping medoid j with
70 // current medoid k with respect to element i
dc(const unsigned i,const unsigned j,const unsigned k)71 double bsta_k_medoid::dc(const unsigned i, const unsigned j, const unsigned k)
72 {
73   //current distance
74   double d_ik = distance(i, k);
75 
76   //distance to j
77   double d_ij = distance(i, j);
78 
79   // distance change if swap were done
80   return d_ij - d_ik;
81 }
82 
83 //--------------------------------------------------------
84 // Compute the change in distance of swapping medoid j with
85 // current medoid k with respect to total distance between medoids
dcm(const unsigned j,const unsigned k)86 double bsta_k_medoid::dcm(const unsigned j, const unsigned k)
87 {
88   //iterate over medoids
89   double dk = 0, dj = 0;
90   unsigned kmax = this->k();
91   if (!kmax)
92     return 0.0;
93   unsigned count = 0;
94   for (unsigned nk=0; nk<kmax; ++nk, ++count)
95   {
96     unsigned m = this->medoid(nk);
97     if (m==k)
98       continue;
99     dk += this->distance(k, m);
100     dj += this->distance(j, m);
101   }
102   if (!count)
103     return 0;
104   //negative change if inter-medoid distance increases
105   double result = (dk-dj)/count;
106   return result;
107 }
108 
109 
110 //: Clear the cluster vectors
clear_clusters()111 void bsta_k_medoid::clear_clusters()
112 {
113   for (unsigned j=0; j<this->k(); ++j)
114     clusters_[j].clear();
115 }
116 
117 
118 //:assign non-medoids to clusters
form_clusters()119 void bsta_k_medoid::form_clusters()
120 {
121   this->clear_clusters();
122   for (unsigned i = 0; i<n_elements_;++i)
123     if (is_medoid(i))
124       continue;
125     else
126     {
127       //find closest medoid
128       double dmin = std::numeric_limits<double>::max();
129       unsigned jmin=0;
130       for (unsigned j=0; j<this->k(); ++j)
131         if (distance(i,this->medoid(j))<dmin)
132         {
133           jmin = j;
134           dmin = distance(i,this->medoid(j));
135         }
136       //assign i to the closest (jmin)
137       clusters_[jmin].push_back(i);
138     }
139   //put medoids into their own clusters
140   for (unsigned j=0; j<this->k(); ++j)
141     clusters_[j].push_back(medoids_[j]);
142 }
143 
144 
145 //:replace medoid k with medoid j
replace_medoid(const unsigned j,const unsigned k)146 bool bsta_k_medoid::replace_medoid(const unsigned j, const unsigned k)
147 {
148   std::vector<unsigned>::iterator result;
149   result = std::find(medoids_.begin(), medoids_.end(), k);
150   if (result == medoids_.end())
151     return false;
152   (*result) = j;
153   return true;
154 }
155 
156 
157 //: Returns false if swap is not warranted
test_medoid_swap(unsigned & mj,unsigned & mk)158 bool bsta_k_medoid::test_medoid_swap(unsigned& mj, unsigned& mk)
159 {
160   //Impossible values
161   mj = n_elements_, mk = n_elements_;
162 
163   // for each j not a medoid
164   double Sdc_min = std::numeric_limits<double>::max();
165   unsigned jmin=0, kmin=0;
166   for ( unsigned j = 0; j<n_elements_; ++j)
167     if (is_medoid(j))
168       continue;
169     else
170       for (unsigned k = 0; k<this->k(); ++k)
171       {
172         if (verbose_)
173         {
174           std::cout << "\n===== Current Medoids(";
175           for (unsigned m = 0; m<this->k(); ++m)
176             std::cout << medoid(m) << ' ';
177           std::cout << ")\n"
178                    << "Checking Swap " << j << "->" << medoid(k) << '\n';
179         }
180         double Sdc = 0;
181         unsigned count = 0;
182         //Sum up the effect of swapping j for k on each non-medoid element
183         for (unsigned i = 0; i<n_elements_;++i)
184           if (is_medoid(i)||i==j)
185             continue;
186           else{
187             Sdc += dc(i,j,medoid(k));
188             ++count;}
189 
190         if (count>0)
191           Sdc /= count;
192         double med_dist=this->dcm(j, medoid(k));
193         double total = Sdc + med_dist;
194 
195         if (verbose_)
196         {
197           std::cout << "Inter-element distance change " << Sdc << '\n'
198                    << "Inter-medoid distance change " << med_dist << '\n'
199                    << "Total change " << total << '\n';
200         }
201 
202         if (total < Sdc_min)
203         {
204           jmin = j;
205           kmin = medoid(k);
206           Sdc_min = total;
207         }
208       }
209 
210   if (Sdc_min<0)
211   {
212     mj = jmin;
213     mk = kmin;
214     return true;
215   }
216   return false;
217 }
218 
219 //-------------------------------------------------------
220 // Find the best set of k medoids
221 //
do_clustering(const unsigned nk)222 void bsta_k_medoid::do_clustering(const unsigned nk)
223 {
224   assert(nk<=n_elements_);
225 
226   // arbitrarily select k medoids
227   medoids_.clear();
228   // choose first k elements as medoids
229   for (unsigned i = 0; i<nk; ++i)
230     medoids_.push_back(i);
231   // reset the clusters
232   clusters_.clear();
233   clusters_.resize(nk);
234   //We are done if there are no non-medoids
235   if (nk==n_elements_)
236   {
237     for (unsigned i = 0; i<nk; ++i)
238       clusters_[i].push_back(i);
239     return;
240   }
241   //otherwise
242   this->form_clusters();
243   unsigned mj, mk;//potential swap for medoids( swap j for k)
244   while (test_medoid_swap(mj, mk))
245   {
246     replace_medoid(mj, mk);
247     form_clusters();
248     if (verbose_)
249     {
250       std::cout << "***Swapping " << mj << "->" << mk << "***\n";
251       for (unsigned k = 0; k<this->k(); ++k)
252       {
253         std::cout << "Medoid[" << k << "] = " << medoid(k) << '\n'
254                  << "with cluster\n";
255         for (unsigned j = 0; j<size(k); ++j)
256           std::cout << clusters_[k][j] << ' ' ;
257         std::cout << '\n'
258                  << "Total Cluster Distance = "
259                  << total_distance(k)<< '\n';
260       }
261     }
262   }
263   return;
264 }
265