1 // This is brl/bbas/bsta/bsta_k_medoid.h 2 #ifndef bsta_k_medoid_h_ 3 #define bsta_k_medoid_h_ 4 //: 5 // \file 6 // \brief Form k clusters using distance to representative objects (medoids) 7 // \author Joseph Mundy 8 // \date June 11, 2005 9 // 10 // A clustering algorithm based on the distance within the cluster to 11 // a representative element and the total distance between representatives. 12 // The input is a n x n matrix of pairwise distances 13 // The output is a set of k representatives (medoids) that minimize the 14 // sum of the average distance from each medoid to other elements 15 // closest to the medoid and the average distance between medoids. 16 // 17 // For k = 1 the medoid would be the element that minimizes the average 18 // distance to all other elements. The elements are indexed from 0 to n-1. 19 // 20 // Fairly computationally expensive: The space requirement is n x n 21 // The time is k x (n - k) x (n - k) x number of swaps to minimize 22 // total distance. There might be on the order of k swaps (or worse). 23 24 #include <vector> 25 #include <iostream> 26 #include <cassert> 27 #ifdef _MSC_VER 28 # include <vcl_msvc_warnings.h> 29 #endif 30 #include <vbl/vbl_array_2d.h> 31 32 class bsta_k_medoid 33 { 34 public: 35 bsta_k_medoid(unsigned n_elements, bool verbose = false); 36 ~bsta_k_medoid()= default; 37 38 //: insert a distance into the array, the entry j, i is automatically added insert_distance(const unsigned i,const unsigned j,double d)39 inline void insert_distance(const unsigned i, const unsigned j, double d) 40 {assert((i<n_elements_)&&(j<n_elements_)); 41 distance_array_[i][j] = d; distance_array_[j][i] = d;} 42 43 //: The distance between two elements distance(const unsigned i,const unsigned j)44 inline double distance(const unsigned i, const unsigned j) const 45 {assert((i<n_elements_)&&(j<n_elements_)); return distance_array_[i][j];} 46 47 //: cluster the elements using k medoids 48 void do_clustering(const unsigned k); 49 50 //:get number of medoids k()51 inline unsigned k() const 52 {return medoids_.size();} 53 54 //: get a medoid medoid(const unsigned i)55 unsigned medoid(const unsigned i) const 56 {assert(i<medoids_.size()); return medoids_[i];} 57 58 //: is an element a medoid? 59 bool is_medoid(const unsigned i) const; 60 61 //: number of elements in cluster k size(const unsigned k)62 inline unsigned size(const unsigned k) const 63 {assert(k<this->k());return clusters_[k].size();} 64 65 //: the elements in cluster k elements(const unsigned k)66 inline std::vector<unsigned> elements(const unsigned k) 67 {assert(k<this->k());return clusters_[k];} 68 69 //: is an element in cluster k ? 70 bool in_cluster(const unsigned i, const unsigned k) const; 71 72 //:the distance between an element and its medoid 73 double medoid_distance(const unsigned i) const; 74 75 //: the total distance between elements and the medoid in cluster k 76 double total_distance(const unsigned k) const; 77 78 //: print distance array (for debugging) 79 inline void print_distance_array(std::ostream & str = std::cout) 80 {str << '\n' << distance_array_ << '\n';} 81 82 protected: 83 84 //: avg distance change for element i resulting from swapping medoids, j->k. 85 double dc(const unsigned i, const unsigned j, const unsigned k); 86 87 //: avg inter-medoid distance change resulting from swapping medoids, j->k. 88 double dcm(const unsigned j, const unsigned k); 89 90 //: replace medoid k with medoid k in the set of medoids 91 bool replace_medoid(const unsigned j, const unsigned k); 92 93 //: determine if a swap of j with k leads to a reduction in distance 94 bool test_medoid_swap(unsigned& mj, unsigned& mk); 95 96 //: clear the cluster sets 97 void clear_clusters(); 98 99 //: assign non-medoids to their nearest medoid, forming clusters 100 void form_clusters(); 101 102 private: 103 //: print useful debug messages 104 bool verbose_; 105 106 //: the size of the distance array 107 unsigned n_elements_; 108 109 //: the k medoids 110 std::vector<unsigned> medoids_; 111 112 //: The set of elements closest to a given medoid 113 std::vector<std::vector<unsigned> > clusters_; 114 115 //: The array of pair-wise distances between elements 116 vbl_array_2d<double> distance_array_; 117 }; 118 119 #endif // bsta_k_medoid_h_ 120