1 // This is mul/mbl/mbl_clusters.hxx
2 #ifndef mbl_clusters_hxx_
3 #define mbl_clusters_hxx_
4 //:
5 // \file
6 // \brief  Class to record clusters of data, for faster neighbour finding
7 // \author Tim Cootes
8 
9 #include <iostream>
10 #include <algorithm>
11 #include "mbl_clusters.h"
12 #include <cassert>
13 #ifdef _MSC_VER
14 #  include <vcl_msvc_warnings.h>
15 #endif
16 #include <vsl/vsl_binary_io.h>
17 #include <vsl/vsl_vector_io.h>
18 
19 //: Default constructor
20 template<class T, class D>
mbl_clusters()21 mbl_clusters<T,D>::mbl_clusters()
22   : data_(nullptr)
23 {
24 }
25 
26 //: Define maximum radius for each cluster
27 template<class T, class D>
set_max_r(double r)28 void mbl_clusters<T,D>::set_max_r(double r)
29 {
30   max_r_ = r;
31 }
32 
33 //: Empty clusters
34 template<class T, class D>
empty()35 void mbl_clusters<T,D>::empty()
36 {
37   p_.resize(0);
38   index_.resize(0);
39   r_.resize(0);
40 }
41 
42 //: Define external data array (pointer retained)
43 //  Empty existing clusters, then process every element of data
44 //  to create clusters, by calling add_object()
45 template<class T, class D>
set_data(const std::vector<T> & data)46 void mbl_clusters<T,D>::set_data(const std::vector<T>& data)
47 {
48   empty();
49   data_ = &data;
50   unsigned n = data.size();
51   for (unsigned i=0;i<n;++i)  add_object(i);
52 }
53 
54 //: Define external data array (pointer retained)
55 //  Use carefully! This sets the internal pointer to
56 //  point to data.  Really only to be used after loading
57 //  internals using b_read(bfs).
58 template<class T, class D>
set_data_ptr(const std::vector<T> & data)59 void mbl_clusters<T,D>::set_data_ptr(const std::vector<T>& data)
60 {
61   data_=&data;
62 }
63 
64 //: Return index of nearest object in data() to t
65 //  Nearest object in data() to t is given by data()[nearest(t,d)];
66 //  The distance to the point is d
67 template<class T, class D>
nearest(const T & t,double & d) const68 unsigned mbl_clusters<T,D>::nearest(const T& t, double& d) const
69 {
70   assert(data_!=nullptr);
71 
72   // Initialise with first in data
73   unsigned best_i = 0;
74   d = D::d(data()[0],t);
75 
76   const T* data_ptr = &data()[0];
77 
78   // Try each cluster in turn
79   for (unsigned j=0;j<p_.size();++j)
80   {
81     double dj = D::d(t,p_[j]);
82     if (dj-r_[j]<d)
83     {
84       // There may be a point in the cluster closer than the current best
85       const std::vector<unsigned>& ind = index_[j];
86       for (unsigned int i : ind)
87       {
88         double di=D::d(data_ptr[i],t);
89         if (di<d) { d=di; best_i=i; }
90       }
91     }
92   }
93   return best_i;
94 }
95 
96 //: Return index of nearest object in data() to t
97 //  Consider only objects in clusters given in c_list
98 //  Nearest object in data() to t is given by data()[nearest(t,d)];
99 //  The distance to the point is d
100 template<class T, class D>
nearest(const T & t,double & d,const std::vector<unsigned> & c_list) const101 unsigned mbl_clusters<T,D>::nearest(const T& t, double& d,
102                    const std::vector<unsigned>& c_list) const
103 {
104   assert(data_!=nullptr);
105 
106   // Initialise with first in set for c_list[0]
107   unsigned best_i = 0;
108   d = D::d(data()[index_[c_list[0]][0]],t);
109 
110   const T* data_ptr = &data()[0];
111 
112   // Try each cluster in turn
113   for (unsigned int j : c_list)
114   {
115     double dj = D::d(t,p_[j]);
116     if (dj-r_[j]<d)
117     {
118       // There may be a point in the cluster closer than the current best
119       const std::vector<unsigned>& ind = index_[j];
120       for (unsigned int i : ind)
121       {
122         double di=D::d(data_ptr[i],t);
123         if (di<d) { d=di; best_i=i; }
124       }
125     }
126   }
127   return best_i;
128 }
129 
130 
131 //: Return index of nearest cluster in data() to t
132 //  Finds nearest cluster key point to t
133 //  The distance to the point is d
134 template<class T, class D>
nearest_cluster(const T & t,double & d) const135 unsigned mbl_clusters<T,D>::nearest_cluster(const T& t, double& d) const
136 {
137   assert(p_.size()>0);
138 
139   d = D::d(p_[0],t);
140   unsigned best_j = 0;
141 
142   // Try each cluster in turn
143   for (unsigned j=1;j<p_.size();++j)
144   {
145     double dj = D::d(t,p_[j]);
146     if (dj<d) { d=dj; best_j=j; }
147   }
148 
149   return best_j;
150 }
151 
152 //: Return indices of clusters which may contain nearest point to t
153 //  Searches through all the clusters
154 template<class T, class D>
nearest_clusters(const T & t,double & max_d,std::vector<unsigned> & near_c) const155 void mbl_clusters<T,D>::nearest_clusters(const T& t, double& max_d,
156                            std::vector<unsigned>& near_c) const
157 {
158   assert(p_.size()>0);
159 
160   std::vector<unsigned> c1;
161   std::vector<double> d1;
162 
163   double d = D::d(p_[0],t);
164   c1.push_back(0);
165   d1.push_back(d);
166   max_d = d+r_[0];
167 
168   // Try each cluster in turn, recording any that might include closest
169   for (unsigned j=1;j<p_.size();++j)
170   {
171     double dj = D::d(t,p_[j]);
172     if (dj-r_[j]<=max_d)
173     {
174       c1.push_back(j);
175       d1.push_back(dj);
176       max_d=std::min(max_d,dj+r_[j]);
177     }
178   }
179 
180   // Pass through the data again to prune out far clusters
181   near_c.resize(0);
182   for (unsigned i=0;i<c1.size();++i)
183   {
184     if (d1[i]-r_[c1[i]]<=max_d) near_c.push_back(c1[i]);
185   }
186 }
187 
188 //: Return indices of clusters which may contain nearest point to t
189 //  Searches through clusters listed in c_list.
190 //  On input, max_d gives initial limit on distance.
191 //  On exit, max_d gives the revised limit on the distance
192 template<class T, class D>
nearest_clusters(const T & t,double & max_d,const std::vector<unsigned> & c_list,std::vector<unsigned> & near_c) const193 void mbl_clusters<T,D>::nearest_clusters(const T& t, double& max_d,
194                            const std::vector<unsigned>& c_list,
195                            std::vector<unsigned>& near_c) const
196 {
197   assert(p_.size()>0);
198 
199   // Storage for first pass
200   std::vector<unsigned> c1;
201   std::vector<double> d1;
202 
203   // Try each cluster in turn, recording any that might include closest
204   for (unsigned int j : c_list)
205   {
206     double dj = D::d(t,p_[j]);
207     if (dj-r_[j]<=max_d)
208     {
209       c1.push_back(j);
210       d1.push_back(dj);
211       max_d=std::min(max_d,dj+r_[j]);
212     }
213   }
214 
215   // Pass through the data again to prune out far clusters
216   near_c.resize(0);
217   for (unsigned i=0;i<c1.size();++i)
218   {
219     if (d1[i]-r_[c1[i]]<=max_d) near_c.push_back(c1[i]);
220   }
221 }
222 
223 //: Create a new cluster around point index i
224 //  Return index of cluster
225 template<class T, class D>
create_cluster(unsigned new_i,double r)226 unsigned mbl_clusters<T,D>::create_cluster(unsigned new_i, double r)
227 {
228   // Create a new cluster using this as a key point
229   p_.push_back(data()[new_i]);
230   r_.push_back(r);
231   std::vector<unsigned> ind(1);
232   ind[0]=new_i;
233   index_.push_back(ind);
234   return p_.size()-1;
235 }
236 
237 //: Append new object with index i and assign to a cluster
238 //  Assumes that new object data()[i] is available.
239 //  Deduce which cluster it belongs to and add it.
240 //  Create new cluster if further than max_r() from any.
241 //  Return index of cluster it is assigned to
242 template<class T, class D>
add_object(unsigned new_i,double r)243 unsigned mbl_clusters<T,D>::add_object(unsigned new_i, double r)
244 {
245   assert(new_i<data_->size());
246 
247   // If initially empty, create one cluster
248   if (p_.empty())
249     return create_cluster(new_i, r);
250 
251   double d;
252   unsigned j=nearest_cluster(data()[new_i],d);
253   d+=r;  // Allow for new_i being key point of another cluster, radius r
254   if (d<max_r_)
255   {
256     // Add it to cluster j
257     index_[j].push_back(new_i);
258     if (d>r_[j]) r_[j]=d;  // Update max radius of cluster
259   }
260   else
261     j=create_cluster(new_i,r);
262 
263   return j;
264 }
265 
266 //: Assign object data()[i] to cluster ci, knowing distance r.
267 //  r is the distance D::d(data()[i],p()[ci])
268 template<class T, class D>
assign_to_cluster(unsigned i,unsigned ci,double r)269 void mbl_clusters<T,D>::assign_to_cluster(unsigned i, unsigned ci,
270                                           double r)
271 {
272   index_[ci].push_back(i);
273   if (r>r_[ci]) r_[ci]=r;
274 }
275 
276 
277 //: Append new object with index i (data()[i]), creating a new cluster
278 //  Return index of resulting cluster, which is initialised with
279 //  given radius.
280 unsigned add_cluster(unsigned i, double r=0.0);
281 
282 
283 //: Finds list of clusters whose keypoint is within d of t
284 //  Returns number of such clusters. If >0, then nearest_c
285 //  gives index of cluster with centre nearest to t
286 template<class T, class D>
clusters_within_d(const T & t,double d,std::vector<unsigned> & c_list,unsigned & nearest_c,double & min_d)287 unsigned mbl_clusters<T,D>::clusters_within_d(const T& t, double d,
288                                               std::vector<unsigned>& c_list,
289                                               unsigned& nearest_c,
290                                               double& min_d)
291 {
292   c_list.resize(0);
293   nearest_c=0;
294   min_d = d+1;
295   for (unsigned i=0;i<p_.size();++i)
296   {
297     double di=D::d(t,p_[i]);
298     if (di<=d)
299     {
300       c_list.push_back(i);
301       if (di<min_d) { nearest_c=i; min_d=di; }
302     }
303   }
304   return c_list.size();
305 }
306 
307 //: Finds list of clusters whose keypoint is within d of t
308 //  Returns number of such clusters. If >0, then nearest_c
309 //  gives index of cluster with centre nearest to t
310 template<class T, class D>
clusters_within_d(const T & t,double d,const std::vector<unsigned> & in_list,std::vector<unsigned> & c_list,unsigned & nearest_c,double & min_d)311 unsigned mbl_clusters<T,D>::clusters_within_d(const T& t, double d,
312                                               const std::vector<unsigned>& in_list,
313                                               std::vector<unsigned>& c_list,
314                                               unsigned& nearest_c,
315                                               double& min_d)
316 {
317   c_list.resize(0);
318   nearest_c=0;
319   min_d = d+1;
320   for (unsigned int i : in_list)
321   {
322     double di=D::d(t,p_[i]);
323     if (di<=d)
324     {
325       c_list.push_back(i);
326       if (di<min_d) { nearest_c=i; min_d=di; }
327     }
328   }
329   return c_list.size();
330 }
331 
332 
333 //: Finds list of clusters whose keypoint is within max_r() of t
334 //  Returns number of such clusters. If >0, then nearest_c
335 //  gives index of cluster with centre nearest to t
336 template<class T, class D>
clusters_within_max_r(const T & t,std::vector<unsigned> & c_list,unsigned & nearest_c,double & min_d)337 unsigned mbl_clusters<T,D>::clusters_within_max_r(const T& t,
338                                                   std::vector<unsigned>& c_list,
339                                                   unsigned& nearest_c,
340                                                   double& min_d)
341 {
342   return clusters_within_d(t,max_r(),c_list,nearest_c,min_d);
343 }
344 
345 //: Finds list of clusters whose keypoint is within max_r() of t
346 template<class T, class D>
clusters_within_max_r(const T & t,const std::vector<unsigned> & in_list,std::vector<unsigned> & c_list,unsigned & nearest_c,double & min_d)347 unsigned mbl_clusters<T,D>::clusters_within_max_r(const T& t,
348                                                   const std::vector<unsigned>& in_list,
349                                                   std::vector<unsigned>& c_list,
350                                                   unsigned& nearest_c,
351                                                   double& min_d)
352 {
353   return clusters_within_d(t,max_r(),in_list,c_list,nearest_c,min_d);
354 }
355 
356 //: Create list of object indices in listed clusters
357 //  Concatenates lists of indices for each cluster in c_list
358 template<class T, class D>
in_clusters(const std::vector<unsigned> & c_list,std::vector<unsigned> & o_list) const359 void mbl_clusters<T,D>::in_clusters(const std::vector<unsigned>& c_list,
360                                     std::vector<unsigned>& o_list) const
361 {
362   o_list.resize(0);
363   for (unsigned int i : c_list)
364   {
365     const std::vector<unsigned>& ind = index()[i];
366     for (unsigned int j : ind) o_list.push_back(j);
367   }
368 }
369 
370 
371 //: Write out list of elements in each cluster
372 template<class T, class D>
print_cluster_sets(std::ostream & os) const373 void mbl_clusters<T,D>::print_cluster_sets(std::ostream& os) const
374 {
375   for (unsigned i=0;i<index_.size();++i)
376   {
377     os << i << ") ";
378     for (unsigned j=0;j<index_[i].size();++j)
379       os<<index_[i][j] << ' ';
380     os<<'\n';
381   }
382 }
383 
384 //: Write out list of elements in each cluster
385 template<class T, class D>
print_summary(std::ostream & os) const386 void mbl_clusters<T,D>::print_summary(std::ostream& os) const
387 {
388   os << " max_r: " << max_r_ << " n_clusters: " << p_.size();
389 }
390 
391 template<class T, class D>
version_no() const392 short mbl_clusters<T,D>::version_no() const
393 {
394     return 1;
395 }
396 
397 template<class T, class D>
b_write(vsl_b_ostream & bfs) const398 void mbl_clusters<T,D>::b_write(vsl_b_ostream& bfs) const
399 {
400   vsl_b_write(bfs,version_no());
401   vsl_b_write(bfs,p_);
402   vsl_b_write(bfs,r_);
403   vsl_b_write(bfs,max_r_);
404   vsl_b_write(bfs,index_);
405 }
406 
407 template<class T, class D>
b_read(vsl_b_istream & bfs)408 void mbl_clusters<T,D>::b_read(vsl_b_istream& bfs)
409 {
410   short version;
411   vsl_b_read(bfs,version);
412   switch (version)
413   {
414     case 1:
415       vsl_b_read(bfs,p_);
416       vsl_b_read(bfs,r_);
417       vsl_b_read(bfs,max_r_);
418       vsl_b_read(bfs,index_);
419     break;
420 
421   default:
422     std::cerr << "mbl_clusters<T,D>::b_read() "
423       "Unexpected version number " << version << std::endl;
424     bfs.is().clear(std::ios::badbit); // Set an unrecoverable IO error on stream
425     return;
426   }
427 
428   data_=nullptr;
429 }
430 
431 //: Binary file stream output operator for class reference
432 template<class T, class D>
vsl_b_write(vsl_b_ostream & bfs,const mbl_clusters<T,D> & c)433 void vsl_b_write(vsl_b_ostream& bfs, const mbl_clusters<T,D>& c)
434 {
435   c.b_write(bfs);
436 }
437 
438 //: Binary file stream input operator for class reference
439 template<class T, class D>
vsl_b_read(vsl_b_istream & bfs,mbl_clusters<T,D> & c)440 void vsl_b_read(vsl_b_istream& bfs, mbl_clusters<T,D>& c)
441 {
442   c.b_read(bfs);
443 }
444 
445 //: Stream output operator for class reference
446 template<class T, class D>
operator <<(std::ostream & os,const mbl_clusters<T,D> & c)447 std::ostream& operator<<(std::ostream& os,const mbl_clusters<T,D>& c)
448 {
449   c.print_summary(os);
450   return os;
451 }
452 
453 
454 #define MBL_CLUSTERS_INSTANTIATE(T,D) \
455 template class mbl_clusters< T,D >; \
456 template void vsl_b_write(vsl_b_ostream& bfs, const mbl_clusters<T,D >& c); \
457 template void vsl_b_read(vsl_b_istream& bfs, mbl_clusters<T,D >& c); \
458 template std::ostream& operator<<(std::ostream& os,const mbl_clusters<T,D >& c)
459 
460 #endif // mbl_clusters_hxx_
461