1 // This is mul/mbl/mbl_clusters.hxx
2 #ifndef mbl_clusters_hxx_
3 #define mbl_clusters_hxx_
4 //:
5 // \file
6 // \brief Class to record clusters of data, for faster neighbour finding
7 // \author Tim Cootes
8
9 #include <iostream>
10 #include <algorithm>
11 #include "mbl_clusters.h"
12 #include <cassert>
13 #ifdef _MSC_VER
14 # include <vcl_msvc_warnings.h>
15 #endif
16 #include <vsl/vsl_binary_io.h>
17 #include <vsl/vsl_vector_io.h>
18
19 //: Default constructor
20 template<class T, class D>
mbl_clusters()21 mbl_clusters<T,D>::mbl_clusters()
22 : data_(nullptr)
23 {
24 }
25
26 //: Define maximum radius for each cluster
27 template<class T, class D>
set_max_r(double r)28 void mbl_clusters<T,D>::set_max_r(double r)
29 {
30 max_r_ = r;
31 }
32
33 //: Empty clusters
34 template<class T, class D>
empty()35 void mbl_clusters<T,D>::empty()
36 {
37 p_.resize(0);
38 index_.resize(0);
39 r_.resize(0);
40 }
41
42 //: Define external data array (pointer retained)
43 // Empty existing clusters, then process every element of data
44 // to create clusters, by calling add_object()
45 template<class T, class D>
set_data(const std::vector<T> & data)46 void mbl_clusters<T,D>::set_data(const std::vector<T>& data)
47 {
48 empty();
49 data_ = &data;
50 unsigned n = data.size();
51 for (unsigned i=0;i<n;++i) add_object(i);
52 }
53
54 //: Define external data array (pointer retained)
55 // Use carefully! This sets the internal pointer to
56 // point to data. Really only to be used after loading
57 // internals using b_read(bfs).
58 template<class T, class D>
set_data_ptr(const std::vector<T> & data)59 void mbl_clusters<T,D>::set_data_ptr(const std::vector<T>& data)
60 {
61 data_=&data;
62 }
63
64 //: Return index of nearest object in data() to t
65 // Nearest object in data() to t is given by data()[nearest(t,d)];
66 // The distance to the point is d
67 template<class T, class D>
nearest(const T & t,double & d) const68 unsigned mbl_clusters<T,D>::nearest(const T& t, double& d) const
69 {
70 assert(data_!=nullptr);
71
72 // Initialise with first in data
73 unsigned best_i = 0;
74 d = D::d(data()[0],t);
75
76 const T* data_ptr = &data()[0];
77
78 // Try each cluster in turn
79 for (unsigned j=0;j<p_.size();++j)
80 {
81 double dj = D::d(t,p_[j]);
82 if (dj-r_[j]<d)
83 {
84 // There may be a point in the cluster closer than the current best
85 const std::vector<unsigned>& ind = index_[j];
86 for (unsigned int i : ind)
87 {
88 double di=D::d(data_ptr[i],t);
89 if (di<d) { d=di; best_i=i; }
90 }
91 }
92 }
93 return best_i;
94 }
95
96 //: Return index of nearest object in data() to t
97 // Consider only objects in clusters given in c_list
98 // Nearest object in data() to t is given by data()[nearest(t,d)];
99 // The distance to the point is d
100 template<class T, class D>
nearest(const T & t,double & d,const std::vector<unsigned> & c_list) const101 unsigned mbl_clusters<T,D>::nearest(const T& t, double& d,
102 const std::vector<unsigned>& c_list) const
103 {
104 assert(data_!=nullptr);
105
106 // Initialise with first in set for c_list[0]
107 unsigned best_i = 0;
108 d = D::d(data()[index_[c_list[0]][0]],t);
109
110 const T* data_ptr = &data()[0];
111
112 // Try each cluster in turn
113 for (unsigned int j : c_list)
114 {
115 double dj = D::d(t,p_[j]);
116 if (dj-r_[j]<d)
117 {
118 // There may be a point in the cluster closer than the current best
119 const std::vector<unsigned>& ind = index_[j];
120 for (unsigned int i : ind)
121 {
122 double di=D::d(data_ptr[i],t);
123 if (di<d) { d=di; best_i=i; }
124 }
125 }
126 }
127 return best_i;
128 }
129
130
131 //: Return index of nearest cluster in data() to t
132 // Finds nearest cluster key point to t
133 // The distance to the point is d
134 template<class T, class D>
nearest_cluster(const T & t,double & d) const135 unsigned mbl_clusters<T,D>::nearest_cluster(const T& t, double& d) const
136 {
137 assert(p_.size()>0);
138
139 d = D::d(p_[0],t);
140 unsigned best_j = 0;
141
142 // Try each cluster in turn
143 for (unsigned j=1;j<p_.size();++j)
144 {
145 double dj = D::d(t,p_[j]);
146 if (dj<d) { d=dj; best_j=j; }
147 }
148
149 return best_j;
150 }
151
152 //: Return indices of clusters which may contain nearest point to t
153 // Searches through all the clusters
154 template<class T, class D>
nearest_clusters(const T & t,double & max_d,std::vector<unsigned> & near_c) const155 void mbl_clusters<T,D>::nearest_clusters(const T& t, double& max_d,
156 std::vector<unsigned>& near_c) const
157 {
158 assert(p_.size()>0);
159
160 std::vector<unsigned> c1;
161 std::vector<double> d1;
162
163 double d = D::d(p_[0],t);
164 c1.push_back(0);
165 d1.push_back(d);
166 max_d = d+r_[0];
167
168 // Try each cluster in turn, recording any that might include closest
169 for (unsigned j=1;j<p_.size();++j)
170 {
171 double dj = D::d(t,p_[j]);
172 if (dj-r_[j]<=max_d)
173 {
174 c1.push_back(j);
175 d1.push_back(dj);
176 max_d=std::min(max_d,dj+r_[j]);
177 }
178 }
179
180 // Pass through the data again to prune out far clusters
181 near_c.resize(0);
182 for (unsigned i=0;i<c1.size();++i)
183 {
184 if (d1[i]-r_[c1[i]]<=max_d) near_c.push_back(c1[i]);
185 }
186 }
187
188 //: Return indices of clusters which may contain nearest point to t
189 // Searches through clusters listed in c_list.
190 // On input, max_d gives initial limit on distance.
191 // On exit, max_d gives the revised limit on the distance
192 template<class T, class D>
nearest_clusters(const T & t,double & max_d,const std::vector<unsigned> & c_list,std::vector<unsigned> & near_c) const193 void mbl_clusters<T,D>::nearest_clusters(const T& t, double& max_d,
194 const std::vector<unsigned>& c_list,
195 std::vector<unsigned>& near_c) const
196 {
197 assert(p_.size()>0);
198
199 // Storage for first pass
200 std::vector<unsigned> c1;
201 std::vector<double> d1;
202
203 // Try each cluster in turn, recording any that might include closest
204 for (unsigned int j : c_list)
205 {
206 double dj = D::d(t,p_[j]);
207 if (dj-r_[j]<=max_d)
208 {
209 c1.push_back(j);
210 d1.push_back(dj);
211 max_d=std::min(max_d,dj+r_[j]);
212 }
213 }
214
215 // Pass through the data again to prune out far clusters
216 near_c.resize(0);
217 for (unsigned i=0;i<c1.size();++i)
218 {
219 if (d1[i]-r_[c1[i]]<=max_d) near_c.push_back(c1[i]);
220 }
221 }
222
223 //: Create a new cluster around point index i
224 // Return index of cluster
225 template<class T, class D>
create_cluster(unsigned new_i,double r)226 unsigned mbl_clusters<T,D>::create_cluster(unsigned new_i, double r)
227 {
228 // Create a new cluster using this as a key point
229 p_.push_back(data()[new_i]);
230 r_.push_back(r);
231 std::vector<unsigned> ind(1);
232 ind[0]=new_i;
233 index_.push_back(ind);
234 return p_.size()-1;
235 }
236
237 //: Append new object with index i and assign to a cluster
238 // Assumes that new object data()[i] is available.
239 // Deduce which cluster it belongs to and add it.
240 // Create new cluster if further than max_r() from any.
241 // Return index of cluster it is assigned to
242 template<class T, class D>
add_object(unsigned new_i,double r)243 unsigned mbl_clusters<T,D>::add_object(unsigned new_i, double r)
244 {
245 assert(new_i<data_->size());
246
247 // If initially empty, create one cluster
248 if (p_.empty())
249 return create_cluster(new_i, r);
250
251 double d;
252 unsigned j=nearest_cluster(data()[new_i],d);
253 d+=r; // Allow for new_i being key point of another cluster, radius r
254 if (d<max_r_)
255 {
256 // Add it to cluster j
257 index_[j].push_back(new_i);
258 if (d>r_[j]) r_[j]=d; // Update max radius of cluster
259 }
260 else
261 j=create_cluster(new_i,r);
262
263 return j;
264 }
265
266 //: Assign object data()[i] to cluster ci, knowing distance r.
267 // r is the distance D::d(data()[i],p()[ci])
268 template<class T, class D>
assign_to_cluster(unsigned i,unsigned ci,double r)269 void mbl_clusters<T,D>::assign_to_cluster(unsigned i, unsigned ci,
270 double r)
271 {
272 index_[ci].push_back(i);
273 if (r>r_[ci]) r_[ci]=r;
274 }
275
276
277 //: Append new object with index i (data()[i]), creating a new cluster
278 // Return index of resulting cluster, which is initialised with
279 // given radius.
280 unsigned add_cluster(unsigned i, double r=0.0);
281
282
283 //: Finds list of clusters whose keypoint is within d of t
284 // Returns number of such clusters. If >0, then nearest_c
285 // gives index of cluster with centre nearest to t
286 template<class T, class D>
clusters_within_d(const T & t,double d,std::vector<unsigned> & c_list,unsigned & nearest_c,double & min_d)287 unsigned mbl_clusters<T,D>::clusters_within_d(const T& t, double d,
288 std::vector<unsigned>& c_list,
289 unsigned& nearest_c,
290 double& min_d)
291 {
292 c_list.resize(0);
293 nearest_c=0;
294 min_d = d+1;
295 for (unsigned i=0;i<p_.size();++i)
296 {
297 double di=D::d(t,p_[i]);
298 if (di<=d)
299 {
300 c_list.push_back(i);
301 if (di<min_d) { nearest_c=i; min_d=di; }
302 }
303 }
304 return c_list.size();
305 }
306
307 //: Finds list of clusters whose keypoint is within d of t
308 // Returns number of such clusters. If >0, then nearest_c
309 // gives index of cluster with centre nearest to t
310 template<class T, class D>
clusters_within_d(const T & t,double d,const std::vector<unsigned> & in_list,std::vector<unsigned> & c_list,unsigned & nearest_c,double & min_d)311 unsigned mbl_clusters<T,D>::clusters_within_d(const T& t, double d,
312 const std::vector<unsigned>& in_list,
313 std::vector<unsigned>& c_list,
314 unsigned& nearest_c,
315 double& min_d)
316 {
317 c_list.resize(0);
318 nearest_c=0;
319 min_d = d+1;
320 for (unsigned int i : in_list)
321 {
322 double di=D::d(t,p_[i]);
323 if (di<=d)
324 {
325 c_list.push_back(i);
326 if (di<min_d) { nearest_c=i; min_d=di; }
327 }
328 }
329 return c_list.size();
330 }
331
332
333 //: Finds list of clusters whose keypoint is within max_r() of t
334 // Returns number of such clusters. If >0, then nearest_c
335 // gives index of cluster with centre nearest to t
336 template<class T, class D>
clusters_within_max_r(const T & t,std::vector<unsigned> & c_list,unsigned & nearest_c,double & min_d)337 unsigned mbl_clusters<T,D>::clusters_within_max_r(const T& t,
338 std::vector<unsigned>& c_list,
339 unsigned& nearest_c,
340 double& min_d)
341 {
342 return clusters_within_d(t,max_r(),c_list,nearest_c,min_d);
343 }
344
345 //: Finds list of clusters whose keypoint is within max_r() of t
346 template<class T, class D>
clusters_within_max_r(const T & t,const std::vector<unsigned> & in_list,std::vector<unsigned> & c_list,unsigned & nearest_c,double & min_d)347 unsigned mbl_clusters<T,D>::clusters_within_max_r(const T& t,
348 const std::vector<unsigned>& in_list,
349 std::vector<unsigned>& c_list,
350 unsigned& nearest_c,
351 double& min_d)
352 {
353 return clusters_within_d(t,max_r(),in_list,c_list,nearest_c,min_d);
354 }
355
356 //: Create list of object indices in listed clusters
357 // Concatenates lists of indices for each cluster in c_list
358 template<class T, class D>
in_clusters(const std::vector<unsigned> & c_list,std::vector<unsigned> & o_list) const359 void mbl_clusters<T,D>::in_clusters(const std::vector<unsigned>& c_list,
360 std::vector<unsigned>& o_list) const
361 {
362 o_list.resize(0);
363 for (unsigned int i : c_list)
364 {
365 const std::vector<unsigned>& ind = index()[i];
366 for (unsigned int j : ind) o_list.push_back(j);
367 }
368 }
369
370
371 //: Write out list of elements in each cluster
372 template<class T, class D>
print_cluster_sets(std::ostream & os) const373 void mbl_clusters<T,D>::print_cluster_sets(std::ostream& os) const
374 {
375 for (unsigned i=0;i<index_.size();++i)
376 {
377 os << i << ") ";
378 for (unsigned j=0;j<index_[i].size();++j)
379 os<<index_[i][j] << ' ';
380 os<<'\n';
381 }
382 }
383
384 //: Write out list of elements in each cluster
385 template<class T, class D>
print_summary(std::ostream & os) const386 void mbl_clusters<T,D>::print_summary(std::ostream& os) const
387 {
388 os << " max_r: " << max_r_ << " n_clusters: " << p_.size();
389 }
390
391 template<class T, class D>
version_no() const392 short mbl_clusters<T,D>::version_no() const
393 {
394 return 1;
395 }
396
397 template<class T, class D>
b_write(vsl_b_ostream & bfs) const398 void mbl_clusters<T,D>::b_write(vsl_b_ostream& bfs) const
399 {
400 vsl_b_write(bfs,version_no());
401 vsl_b_write(bfs,p_);
402 vsl_b_write(bfs,r_);
403 vsl_b_write(bfs,max_r_);
404 vsl_b_write(bfs,index_);
405 }
406
407 template<class T, class D>
b_read(vsl_b_istream & bfs)408 void mbl_clusters<T,D>::b_read(vsl_b_istream& bfs)
409 {
410 short version;
411 vsl_b_read(bfs,version);
412 switch (version)
413 {
414 case 1:
415 vsl_b_read(bfs,p_);
416 vsl_b_read(bfs,r_);
417 vsl_b_read(bfs,max_r_);
418 vsl_b_read(bfs,index_);
419 break;
420
421 default:
422 std::cerr << "mbl_clusters<T,D>::b_read() "
423 "Unexpected version number " << version << std::endl;
424 bfs.is().clear(std::ios::badbit); // Set an unrecoverable IO error on stream
425 return;
426 }
427
428 data_=nullptr;
429 }
430
431 //: Binary file stream output operator for class reference
432 template<class T, class D>
vsl_b_write(vsl_b_ostream & bfs,const mbl_clusters<T,D> & c)433 void vsl_b_write(vsl_b_ostream& bfs, const mbl_clusters<T,D>& c)
434 {
435 c.b_write(bfs);
436 }
437
438 //: Binary file stream input operator for class reference
439 template<class T, class D>
vsl_b_read(vsl_b_istream & bfs,mbl_clusters<T,D> & c)440 void vsl_b_read(vsl_b_istream& bfs, mbl_clusters<T,D>& c)
441 {
442 c.b_read(bfs);
443 }
444
445 //: Stream output operator for class reference
446 template<class T, class D>
operator <<(std::ostream & os,const mbl_clusters<T,D> & c)447 std::ostream& operator<<(std::ostream& os,const mbl_clusters<T,D>& c)
448 {
449 c.print_summary(os);
450 return os;
451 }
452
453
454 #define MBL_CLUSTERS_INSTANTIATE(T,D) \
455 template class mbl_clusters< T,D >; \
456 template void vsl_b_write(vsl_b_ostream& bfs, const mbl_clusters<T,D >& c); \
457 template void vsl_b_read(vsl_b_istream& bfs, mbl_clusters<T,D >& c); \
458 template std::ostream& operator<<(std::ostream& os,const mbl_clusters<T,D >& c)
459
460 #endif // mbl_clusters_hxx_
461