1 /** @file hikmeans.c
2 ** @brief Hierarchical Integer K-Means Clustering - Declaration
3 ** @author Brian Fulkerson
4 ** @author Andrea Vedaldi
5 **/
6
7 /*
8 Copyright (C) 2007-12 Andrea Vedaldi and Brian Fulkerson.
9 All rights reserved.
10
11 This file is part of the VLFeat library and is made available under
12 the terms of the BSD license (see the COPYING file).
13 */
14
15 /** @file hikmeans.h
16 ** @brief Hierarchical integer K-Means clustering
17 **
18 ** Hierarchical integer K-Means clustering (HIKM) is a simple
19 ** hierarchical version of integer K-Means (@ref ikmeans.h
20 ** "IKM"). The algorithm recursively applies integer K-means to create
21 ** more refined partitions of the data.
22 **
23 ** Create a tree with ::vl_hikm_new() and delete it with
24 ** ::vl_hikm_delete(). Use ::vl_hikm_train() to build the tree
25 ** from training data and ::vl_hikm_push() to project new data down
26 ** a HIKM tree.
27 **
28 ** @section hikm-tree HIKM tree
29 **
30 ** The HIKM tree is represented by a ::VlHIKMTree structure, which
31 ** contains a tree composed of ::VlHIKMNode. Each node is an
32 ** integer K-means filter which partitions the data into @c K
33 ** clusters.
34 **/
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <math.h>
40
41 #include "hikmeans.h"
42
43 /** ------------------------------------------------------------------
44 ** @internal
45 ** @brief Copy a subset of the data to a buffer
46 ** @param data Data
47 ** @param ids Data labels
48 ** @param N Number of indices
49 ** @param M Data dimensionality
50 ** @param id Label of data to copy
51 ** @param N2 Number of data copied (out)
52 ** @return a new buffer with a copy of the selected data.
53 **/
54
55 vl_uint8*
vl_hikm_copy_subset(vl_uint8 const * data,vl_uint32 * ids,vl_size N,vl_size M,vl_uint32 id,vl_size * N2)56 vl_hikm_copy_subset (vl_uint8 const * data,
57 vl_uint32 *ids,
58 vl_size N, vl_size M,
59 vl_uint32 id, vl_size *N2)
60 {
61 vl_uindex i ;
62 vl_size count = 0;
63
64 /* count how many data points with this label there are */
65 for (i = 0 ; i < N ; i++) {
66 if (ids[i] == id) {
67 count ++ ;
68 }
69 }
70 *N2 = count ;
71
72 /* copy each datum to the buffer */
73 {
74 vl_uint8 *new_data = vl_malloc (sizeof(*new_data) * M * count);
75 count = 0;
76 for (i = 0 ; i < N ; i ++) {
77 if (ids[i] == id) {
78 memcpy(new_data + count * M,
79 data + i * M,
80 sizeof(*new_data) * M);
81 count ++ ;
82 }
83 }
84 *N2 = count ;
85 return new_data ;
86 }
87 }
88
89 /** ------------------------------------------------------------------
90 ** @brief Compute HIKM clustering.
91 **
92 ** @param tree HIKM tree to initialize.
93 ** @param data Data to cluster.
94 ** @param N Number of data points.
95 ** @param K Number of clusters for this node.
96 ** @param height Tree height.
97 **
98 ** @remark height cannot be smaller than 1.
99 **
100 ** @return a new HIKM node representing a sub-clustering.
101 **/
102
103 static VlHIKMNode *
xmeans(VlHIKMTree * tree,vl_uint8 const * data,vl_size N,vl_size K,vl_size height)104 xmeans (VlHIKMTree *tree,
105 vl_uint8 const *data,
106 vl_size N, vl_size K, vl_size height)
107 {
108 VlHIKMNode *node = vl_malloc (sizeof(*node)) ;
109 vl_uint32 *ids = vl_malloc (sizeof(*ids) * N) ;
110
111 node->filter = vl_ikm_new (tree -> method) ;
112 node->children = (height == 1) ? 0 : vl_malloc (sizeof(*node->children) * K) ;
113
114 vl_ikm_set_max_niters (node->filter, tree->max_niters) ;
115 vl_ikm_set_verbosity (node->filter, tree->verb - 1 ) ;
116 vl_ikm_init_rand_data (node->filter, data, tree->M, N, K) ;
117 vl_ikm_train (node->filter, data, N) ;
118 vl_ikm_push (node->filter, ids, data, N) ;
119
120 /* recursively process each child */
121 if (height > 1) {
122 vl_uindex k ;
123 for (k = 0 ; k < K ; ++k) {
124 vl_size partition_N ;
125 vl_size partition_K ;
126 vl_uint8 *partition ;
127
128 partition = vl_hikm_copy_subset
129 (data, ids, N, tree->M, (vl_uint32)k, &partition_N) ;
130
131 partition_K = VL_MIN (K, partition_N) ;
132
133 node->children [k] = xmeans
134 (tree, partition, partition_N, partition_K, height - 1) ;
135
136 vl_free (partition) ;
137
138 if (tree->verb > (signed)tree->depth - (signed)height) {
139 VL_PRINTF("hikmeans: branch at depth %d: %6.1f %% completed\n",
140 tree->depth - height,
141 (double) (k+1) / K * 100) ;
142 }
143 }
144 }
145
146 vl_free (ids) ;
147 return node ;
148 }
149
150 /** ------------------------------------------------------------------
151 ** @internal
152 ** @brief Delete node
153 **
154 ** @param node to delete.
155 **
156 ** The function deletes recursively @a node and all its descendent.
157 **/
158
159 static void
xdelete(VlHIKMNode * node)160 xdelete (VlHIKMNode *node)
161 {
162 if(node) {
163 vl_uindex k ;
164 if (node->children) {
165 for(k = 0 ; k < vl_ikm_get_K (node->filter) ; ++k)
166 xdelete (node->children[k]) ;
167 vl_free (node->children) ;
168 }
169 if (node->filter) {
170 vl_ikm_delete (node->filter) ;
171 }
172 vl_free(node);
173 }
174 }
175
176 /** ------------------------------------------------------------------
177 ** @brief New HIKM tree
178 ** @param method clustering method.
179 ** @return new HIKM tree.
180 **/
181
182 VlHIKMTree *
vl_hikm_new(int method)183 vl_hikm_new (int method)
184 {
185 VlHIKMTree *f = vl_calloc (sizeof(VlHIKMTree), 1) ;
186 f->max_niters = 200 ;
187 f->method = method ;
188 return f ;
189 }
190
191 /** ------------------------------------------------------------------
192 ** @brief Delete HIKM tree
193 ** @param f HIKM tree.
194 **/
195
196 void
vl_hikm_delete(VlHIKMTree * f)197 vl_hikm_delete (VlHIKMTree *f)
198 {
199 if (f) {
200 xdelete (f->root) ;
201 vl_free (f) ;
202 }
203 }
204
205 /** ------------------------------------------------------------------
206 ** @brief Initialize HIKM tree
207 ** @param f HIKM tree.
208 ** @param M Data dimensionality.
209 ** @param K Number of clusters per node.
210 ** @param depth Tree depth.
211 ** @return a new HIKM tree representing the clustering.
212 **
213 ** @remark @a depth cannot be smaller than 1.
214 **/
215
216 void
vl_hikm_init(VlHIKMTree * f,vl_size M,vl_size K,vl_size depth)217 vl_hikm_init (VlHIKMTree *f, vl_size M, vl_size K, vl_size depth)
218 {
219 assert(depth > 0) ;
220 assert(M > 0) ;
221 assert(K > 0) ;
222
223 xdelete (f -> root) ;
224 f->root = 0;
225 f->M = M ;
226 f->K = K ;
227 f->depth = depth ;
228 }
229
230 /** ------------------------------------------------------------------
231 ** @brief Train HIKM tree
232 ** @param f HIKM tree.
233 ** @param data Data to cluster.
234 ** @param N Number of data.
235 **/
236
237 void
vl_hikm_train(VlHIKMTree * f,vl_uint8 const * data,vl_size N)238 vl_hikm_train (VlHIKMTree *f, vl_uint8 const *data, vl_size N)
239 {
240 f->root= xmeans (f, data, N, VL_MIN(f->K, N), f->depth) ;
241 }
242
243 /** ------------------------------------------------------------------
244 ** @brief Project data down HIKM tree
245 ** @param f HIKM tree.
246 ** @param asgn Path down the tree (out).
247 ** @param data Data to project.
248 ** @param N Number of data.
249 **
250 ** The function writes to @a asgn the path of the data @a data
251 ** down the HIKM tree @a f. The parameter @a asgn must point to
252 ** an array of @c M by @c N elements, where @c M is the depth of
253 ** the HIKM tree and @c N is the number of data point to process.
254 **/
255
256 void
vl_hikm_push(VlHIKMTree * f,vl_uint32 * asgn,vl_uint8 const * data,vl_size N)257 vl_hikm_push (VlHIKMTree *f, vl_uint32 *asgn, vl_uint8 const *data, vl_size N)
258 {
259 vl_uindex i, d ;
260 vl_size M = vl_hikm_get_ndims (f) ;
261 vl_size depth = vl_hikm_get_depth (f) ;
262
263 /* for each datum */
264 for(i = 0 ; i < N ; i++) {
265 VlHIKMNode *node = f->root ;
266 d = 0 ;
267 while (node) {
268 vl_uint32 best ;
269 vl_ikm_push (node->filter,
270 &best,
271 data + i * M, 1) ;
272 asgn[i * depth + d] = best ;
273 ++ d ;
274 if (!node->children) break ;
275 node = node->children [best] ;
276 }
277 }
278 }
279
280 /* ---------------------------------------------------------------- */
281 /* Setters and getters */
282 /* ---------------------------------------------------------------- */
283
284 /** @brief Get data dimensionality
285 ** @param f HIKM tree.
286 ** @return data dimensionality.
287 **/
288
289 vl_size
vl_hikm_get_ndims(VlHIKMTree const * f)290 vl_hikm_get_ndims (VlHIKMTree const* f)
291 {
292 return f->M ;
293 }
294
295 /** @brief Get K
296 ** @param f HIKM tree.
297 ** @return K.
298 **/
299
300 vl_size
vl_hikm_get_K(VlHIKMTree const * f)301 vl_hikm_get_K (VlHIKMTree const *f)
302 {
303 return f->K ;
304 }
305
306 /** @brief Get depth
307 ** @param f HIKM tree.
308 ** @return depth.
309 **/
310
311 vl_size
vl_hikm_get_depth(VlHIKMTree const * f)312 vl_hikm_get_depth (VlHIKMTree const *f)
313 {
314 return f->depth ;
315 }
316
317
318 /** @brief Get verbosity level
319 ** @param f HIKM tree.
320 ** @return verbosity level.
321 **/
322
323 int
vl_hikm_get_verbosity(VlHIKMTree const * f)324 vl_hikm_get_verbosity (VlHIKMTree const *f)
325 {
326 return f->verb ;
327 }
328
329 /** @brief Get maximum number of iterations
330 ** @param f HIKM tree.
331 ** @return maximum number of iterations.
332 **/
333
334 vl_size
vl_hikm_get_max_niters(VlHIKMTree const * f)335 vl_hikm_get_max_niters (VlHIKMTree const *f)
336 {
337 return f-> max_niters ;
338 }
339
340 /** @brief Get maximum number of iterations
341 ** @param f HIKM tree.
342 ** @return maximum number of iterations.
343 **/
344
345 VlHIKMNode const *
vl_hikm_get_root(VlHIKMTree const * f)346 vl_hikm_get_root (VlHIKMTree const *f)
347 {
348 return f->root ;
349 }
350
351 /** @brief Set verbosity level
352 ** @param f HIKM tree.
353 ** @param verb verbosity level.
354 **/
355
356 void
vl_hikm_set_verbosity(VlHIKMTree * f,int verb)357 vl_hikm_set_verbosity (VlHIKMTree *f, int verb)
358 {
359 f->verb = verb ;
360 }
361
362 /** @brief Set maximum number of iterations
363 ** @param f HIKM tree.
364 ** @param max_niters maximum number of iterations.
365 **/
366
367 void
vl_hikm_set_max_niters(VlHIKMTree * f,int max_niters)368 vl_hikm_set_max_niters (VlHIKMTree *f, int max_niters)
369 {
370 f->max_niters = max_niters ;
371 }
372