1 /** @file hikmeans.c
2  ** @brief Hierarchical Integer K-Means Clustering - Declaration
3  ** @author Brian Fulkerson
4  ** @author Andrea Vedaldi
5  **/
6 
7 /*
8 Copyright (C) 2007-12 Andrea Vedaldi and Brian Fulkerson.
9 All rights reserved.
10 
11 This file is part of the VLFeat library and is made available under
12 the terms of the BSD license (see the COPYING file).
13 */
14 
15 /** @file hikmeans.h
16  ** @brief Hierarchical integer K-Means clustering
17  **
18  ** Hierarchical integer K-Means clustering (HIKM) is a simple
19  ** hierarchical version of integer K-Means (@ref ikmeans.h
20  ** "IKM"). The algorithm recursively applies integer K-means to create
21  ** more refined partitions of the data.
22  **
23  ** Create a tree with ::vl_hikm_new() and delete it with
24  ** ::vl_hikm_delete(). Use ::vl_hikm_train() to build the tree
25  ** from training data and ::vl_hikm_push() to project new data down
26  ** a HIKM tree.
27  **
28  ** @section hikm-tree HIKM tree
29  **
30  ** The HIKM tree is represented by a ::VlHIKMTree structure, which
31  ** contains a tree composed of ::VlHIKMNode. Each node is an
32  ** integer K-means filter which partitions the data into @c K
33  ** clusters.
34  **/
35 
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <math.h>
40 
41 #include "hikmeans.h"
42 
43 /** ------------------------------------------------------------------
44  ** @internal
45  ** @brief Copy a subset of the data to a buffer
46  ** @param data Data
47  ** @param ids Data labels
48  ** @param N Number of indices
49  ** @param M Data dimensionality
50  ** @param id Label of data to copy
51  ** @param N2 Number of data copied (out)
52  ** @return a new buffer with a copy of the selected data.
53  **/
54 
55 vl_uint8*
vl_hikm_copy_subset(vl_uint8 const * data,vl_uint32 * ids,vl_size N,vl_size M,vl_uint32 id,vl_size * N2)56 vl_hikm_copy_subset (vl_uint8 const * data,
57                      vl_uint32 *ids,
58                      vl_size N, vl_size M,
59                      vl_uint32 id, vl_size *N2)
60 {
61   vl_uindex i ;
62   vl_size count = 0;
63 
64   /* count how many data points with this label there are */
65   for (i = 0 ; i < N ; i++) {
66     if (ids[i] == id) {
67       count ++ ;
68     }
69   }
70   *N2 = count ;
71 
72   /* copy each datum to the buffer */
73   {
74     vl_uint8 *new_data = vl_malloc (sizeof(*new_data) * M * count);
75     count = 0;
76     for (i = 0 ; i < N ; i ++) {
77       if (ids[i] == id) {
78         memcpy(new_data + count * M,
79                data + i * M,
80                sizeof(*new_data) * M);
81         count ++ ;
82       }
83     }
84     *N2 = count ;
85     return new_data ;
86   }
87 }
88 
89 /** ------------------------------------------------------------------
90  ** @brief Compute HIKM clustering.
91  **
92  ** @param tree   HIKM tree to initialize.
93  ** @param data   Data to cluster.
94  ** @param N      Number of data points.
95  ** @param K      Number of clusters for this node.
96  ** @param height Tree height.
97  **
98  ** @remark height cannot be smaller than 1.
99  **
100  ** @return a new HIKM node representing a sub-clustering.
101  **/
102 
103 static VlHIKMNode *
xmeans(VlHIKMTree * tree,vl_uint8 const * data,vl_size N,vl_size K,vl_size height)104 xmeans (VlHIKMTree *tree,
105         vl_uint8 const *data,
106         vl_size N, vl_size K, vl_size height)
107 {
108   VlHIKMNode *node = vl_malloc (sizeof(*node)) ;
109   vl_uint32 *ids = vl_malloc (sizeof(*ids) * N) ;
110 
111   node->filter = vl_ikm_new (tree -> method) ;
112   node->children = (height == 1) ? 0 : vl_malloc (sizeof(*node->children) * K) ;
113 
114   vl_ikm_set_max_niters (node->filter, tree->max_niters) ;
115   vl_ikm_set_verbosity  (node->filter, tree->verb - 1  ) ;
116   vl_ikm_init_rand_data (node->filter, data, tree->M, N, K) ;
117   vl_ikm_train (node->filter, data, N) ;
118   vl_ikm_push (node->filter, ids, data, N) ;
119 
120   /* recursively process each child */
121   if (height > 1) {
122     vl_uindex k ;
123     for (k = 0 ; k < K ; ++k) {
124       vl_size partition_N ;
125       vl_size partition_K ;
126       vl_uint8 *partition ;
127 
128       partition = vl_hikm_copy_subset
129         (data, ids, N, tree->M, (vl_uint32)k, &partition_N) ;
130 
131       partition_K = VL_MIN (K, partition_N) ;
132 
133       node->children [k] = xmeans
134         (tree, partition, partition_N, partition_K, height - 1) ;
135 
136       vl_free (partition) ;
137 
138       if (tree->verb > (signed)tree->depth - (signed)height) {
139         VL_PRINTF("hikmeans: branch at depth %d: %6.1f %% completed\n",
140                   tree->depth - height,
141                   (double) (k+1) / K * 100) ;
142       }
143     }
144   }
145 
146   vl_free (ids) ;
147   return node ;
148 }
149 
150 /** ------------------------------------------------------------------
151  ** @internal
152  ** @brief Delete node
153  **
154  ** @param node to delete.
155  **
156  ** The function deletes recursively @a node and all its descendent.
157  **/
158 
159 static void
xdelete(VlHIKMNode * node)160 xdelete (VlHIKMNode *node)
161 {
162   if(node) {
163     vl_uindex k ;
164     if (node->children) {
165       for(k = 0 ; k < vl_ikm_get_K (node->filter) ; ++k)
166         xdelete (node->children[k]) ;
167       vl_free (node->children) ;
168     }
169     if (node->filter) {
170       vl_ikm_delete (node->filter) ;
171     }
172     vl_free(node);
173   }
174 }
175 
176 /** ------------------------------------------------------------------
177  ** @brief New HIKM tree
178  ** @param method clustering method.
179  ** @return new HIKM tree.
180  **/
181 
182 VlHIKMTree *
vl_hikm_new(int method)183 vl_hikm_new (int method)
184 {
185   VlHIKMTree *f = vl_calloc (sizeof(VlHIKMTree), 1) ;
186   f->max_niters = 200 ;
187   f->method = method ;
188   return f ;
189 }
190 
191 /** ------------------------------------------------------------------
192  ** @brief Delete HIKM tree
193  ** @param f HIKM tree.
194  **/
195 
196 void
vl_hikm_delete(VlHIKMTree * f)197 vl_hikm_delete (VlHIKMTree *f)
198 {
199   if (f) {
200     xdelete (f->root) ;
201     vl_free (f) ;
202   }
203 }
204 
205 /** ------------------------------------------------------------------
206  ** @brief Initialize HIKM tree
207  ** @param f HIKM tree.
208  ** @param M Data dimensionality.
209  ** @param K Number of clusters per node.
210  ** @param depth Tree depth.
211  ** @return a new HIKM tree representing the clustering.
212  **
213  ** @remark @a depth cannot be smaller than 1.
214  **/
215 
216 void
vl_hikm_init(VlHIKMTree * f,vl_size M,vl_size K,vl_size depth)217 vl_hikm_init (VlHIKMTree *f, vl_size M, vl_size K, vl_size depth)
218 {
219   assert(depth > 0) ;
220   assert(M > 0) ;
221   assert(K > 0) ;
222 
223   xdelete (f -> root) ;
224   f->root = 0;
225   f->M = M ;
226   f->K = K ;
227   f->depth = depth ;
228 }
229 
230 /** ------------------------------------------------------------------
231  ** @brief Train HIKM tree
232  ** @param f       HIKM tree.
233  ** @param data    Data to cluster.
234  ** @param N       Number of data.
235  **/
236 
237 void
vl_hikm_train(VlHIKMTree * f,vl_uint8 const * data,vl_size N)238 vl_hikm_train (VlHIKMTree *f, vl_uint8 const *data, vl_size N)
239 {
240   f->root= xmeans (f, data, N, VL_MIN(f->K, N), f->depth) ;
241 }
242 
243 /** ------------------------------------------------------------------
244  ** @brief Project data down HIKM tree
245  ** @param f HIKM tree.
246  ** @param asgn Path down the tree (out).
247  ** @param data Data to project.
248  ** @param N Number of data.
249  **
250  ** The function writes to @a asgn the path of the data @a data
251  ** down the HIKM tree @a f. The parameter @a asgn must point to
252  ** an array of @c M by @c N elements, where @c M is the depth of
253  ** the HIKM tree and @c N is the number of data point to process.
254  **/
255 
256 void
vl_hikm_push(VlHIKMTree * f,vl_uint32 * asgn,vl_uint8 const * data,vl_size N)257 vl_hikm_push (VlHIKMTree *f, vl_uint32 *asgn, vl_uint8 const *data, vl_size N)
258 {
259   vl_uindex i, d ;
260   vl_size M = vl_hikm_get_ndims (f) ;
261   vl_size depth = vl_hikm_get_depth (f) ;
262 
263   /* for each datum */
264   for(i = 0 ; i < N ; i++) {
265     VlHIKMNode *node = f->root ;
266     d = 0 ;
267     while (node) {
268       vl_uint32 best ;
269       vl_ikm_push (node->filter,
270                    &best,
271                    data + i * M, 1) ;
272       asgn[i * depth + d] = best ;
273       ++ d ;
274       if (!node->children) break ;
275       node = node->children [best] ;
276     }
277   }
278 }
279 
280 /* ---------------------------------------------------------------- */
281 /*                                              Setters and getters */
282 /* ---------------------------------------------------------------- */
283 
284 /** @brief Get data dimensionality
285  ** @param f HIKM tree.
286  ** @return data dimensionality.
287  **/
288 
289 vl_size
vl_hikm_get_ndims(VlHIKMTree const * f)290 vl_hikm_get_ndims (VlHIKMTree const* f)
291 {
292   return f->M ;
293 }
294 
295 /** @brief Get K
296  ** @param f HIKM tree.
297  ** @return K.
298  **/
299 
300 vl_size
vl_hikm_get_K(VlHIKMTree const * f)301 vl_hikm_get_K (VlHIKMTree const *f)
302 {
303   return f->K ;
304 }
305 
306 /** @brief Get depth
307  ** @param f HIKM tree.
308  ** @return depth.
309  **/
310 
311 vl_size
vl_hikm_get_depth(VlHIKMTree const * f)312 vl_hikm_get_depth (VlHIKMTree const *f)
313 {
314   return f->depth ;
315 }
316 
317 
318 /** @brief Get verbosity level
319  ** @param f HIKM tree.
320  ** @return verbosity level.
321  **/
322 
323 int
vl_hikm_get_verbosity(VlHIKMTree const * f)324 vl_hikm_get_verbosity (VlHIKMTree const *f)
325 {
326   return f->verb ;
327 }
328 
329 /** @brief Get maximum number of iterations
330  ** @param f HIKM tree.
331  ** @return maximum number of iterations.
332  **/
333 
334 vl_size
vl_hikm_get_max_niters(VlHIKMTree const * f)335 vl_hikm_get_max_niters (VlHIKMTree const *f)
336 {
337   return f-> max_niters ;
338 }
339 
340 /** @brief Get maximum number of iterations
341  ** @param f HIKM tree.
342  ** @return maximum number of iterations.
343  **/
344 
345 VlHIKMNode const *
vl_hikm_get_root(VlHIKMTree const * f)346 vl_hikm_get_root (VlHIKMTree const *f)
347 {
348   return f->root ;
349 }
350 
351 /** @brief Set verbosity level
352  ** @param f HIKM tree.
353  ** @param verb verbosity level.
354  **/
355 
356 void
vl_hikm_set_verbosity(VlHIKMTree * f,int verb)357 vl_hikm_set_verbosity (VlHIKMTree *f, int verb)
358 {
359   f->verb = verb ;
360 }
361 
362 /** @brief Set maximum number of iterations
363  ** @param f HIKM tree.
364  ** @param max_niters maximum number of iterations.
365  **/
366 
367 void
vl_hikm_set_max_niters(VlHIKMTree * f,int max_niters)368 vl_hikm_set_max_niters (VlHIKMTree *f, int max_niters)
369 {
370   f->max_niters = max_niters ;
371 }
372