1 /**
2 * @file core/tree/rectangle_tree/discrete_hilbert_value_impl.hpp
3 * @author Mikhail Lozhnikov
4 *
5 * Definition of the DiscreteHilbertValue class, a class that calculates
6 * the ordering of points using the Hilbert curve.
7 *
8 * mlpack is free software; you may redistribute it and/or modify it under the
9 * terms of the 3-clause BSD license. You should have received a copy of the
10 * 3-clause BSD license along with mlpack. If not, see
11 * http://www.opensource.org/licenses/BSD-3-Clause for more information.
12 */
13 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_DISCRETE_HILBERT_VALUE_IMPL_HPP
14 #define MLPACK_CORE_TREE_RECTANGLE_TREE_DISCRETE_HILBERT_VALUE_IMPL_HPP
15
16 #include "discrete_hilbert_value.hpp"
17
18 namespace mlpack {
19 namespace tree /** Trees and tree-building procedures. */ {
20
21 template<typename TreeElemType>
DiscreteHilbertValue()22 DiscreteHilbertValue<TreeElemType>::DiscreteHilbertValue() :
23 localHilbertValues(NULL),
24 ownsLocalHilbertValues(false),
25 numValues(0),
26 valueToInsert(NULL),
27 ownsValueToInsert(false)
28 { }
29
30 template<typename TreeElemType>
~DiscreteHilbertValue()31 DiscreteHilbertValue<TreeElemType>::~DiscreteHilbertValue()
32 {
33 if (ownsLocalHilbertValues)
34 delete localHilbertValues;
35 if (ownsValueToInsert)
36 delete valueToInsert;
37 }
38
39 template<typename TreeElemType>
40 template<typename TreeType>
DiscreteHilbertValue(const TreeType * tree)41 DiscreteHilbertValue<TreeElemType>::DiscreteHilbertValue(const TreeType* tree) :
42 localHilbertValues(NULL),
43 ownsLocalHilbertValues(false),
44 numValues(0),
45 valueToInsert(tree->Parent() ?
46 tree->Parent()->AuxiliaryInfo().HilbertValue().ValueToInsert() :
47 new arma::Col<HilbertElemType>(tree->Dataset().n_rows)),
48 ownsValueToInsert(tree->Parent() ? false : true)
49 {
50 // Calculate the Hilbert value for all points.
51 if (!tree->Parent()) // This is the root node.
52 ownsLocalHilbertValues = true;
53 else if (tree->Parent()->Child(0).IsLeaf())
54 {
55 // This is a leaf node.
56 assert(tree->Parent()->NumChildren() > 0);
57 ownsLocalHilbertValues = true;
58 }
59
60 if (ownsLocalHilbertValues)
61 {
62 localHilbertValues = new arma::Mat<HilbertElemType>(tree->Dataset().n_rows,
63 tree->MaxLeafSize() + 1);
64 }
65 }
66
67 template<typename TreeElemType>
68 template<typename TreeType>
69 DiscreteHilbertValue<TreeElemType>::
DiscreteHilbertValue(const DiscreteHilbertValue & other,TreeType * tree,bool deepCopy)70 DiscreteHilbertValue(const DiscreteHilbertValue& other,
71 TreeType* tree,
72 bool deepCopy) :
73 localHilbertValues(NULL),
74 ownsLocalHilbertValues(other.ownsLocalHilbertValues),
75 numValues(other.NumValues()),
76 valueToInsert(NULL),
77 ownsValueToInsert(other.ownsValueToInsert)
78 {
79 if (deepCopy)
80 {
81 // Only leaf nodes own the localHilbertValues dataset.
82 // Intermediate nodes store the pointer to the corresponding dataset.
83 if (ownsLocalHilbertValues)
84 localHilbertValues = new arma::Mat<HilbertElemType>(
85 *other.LocalHilbertValues());
86 else
87 localHilbertValues = NULL;
88
89 // Only the root owns ownsValueToInsert. Other nodes the pointer.
90 if (ownsValueToInsert)
91 valueToInsert = new arma::Col<HilbertElemType>(
92 *other.ValueToInsert());
93 else
94 {
95 assert(tree->Parent() != NULL);
96 // Copy the pointer from the parent node.
97 valueToInsert = const_cast<arma::Col<HilbertElemType>*>
98 (tree->Parent()->AuxiliaryInfo().HilbertValue().ValueToInsert());
99 }
100
101 if (tree->NumChildren() == 0)
102 {
103 // We have to update pointers to the localHilbertValues dataset in
104 // intermediate nodes.
105 TreeType* node = tree;
106
107 while (node->Parent() != NULL)
108 {
109 if (node->Parent()->NumChildren() > 1)
110 {
111 const std::vector<TreeType*> parentChildren =
112 node->AuxiliaryInfo().Children(node->Parent());
113 // If node is not the last child of its parent, we shouldn't copy
114 // the localHilbertValues pointer.
115 if (parentChildren[node->Parent()->NumChildren() - 2] == NULL)
116 break;
117 }
118 node->Parent()->AuxiliaryInfo().HilbertValue().LocalHilbertValues() =
119 localHilbertValues;
120 node = node->Parent();
121 }
122 }
123 }
124 else
125 {
126 localHilbertValues = const_cast<arma::Mat<HilbertElemType>*>
127 (other.LocalHilbertValues());
128 valueToInsert = const_cast<arma::Col<HilbertElemType>*>
129 (other.ValueToInsert());
130 }
131 }
132
133 template<typename TreeElemType>
134 DiscreteHilbertValue<TreeElemType>::
DiscreteHilbertValue(DiscreteHilbertValue && other)135 DiscreteHilbertValue(DiscreteHilbertValue&& other) :
136 localHilbertValues(other.localHilbertValues),
137 ownsLocalHilbertValues(other.ownsLocalHilbertValues),
138 numValues(other.numValues),
139 valueToInsert(other.valueToInsert),
140 ownsValueToInsert(other.ownsValueToInsert)
141 {
142 other.localHilbertValues = NULL;
143 other.ownsLocalHilbertValues = false;
144 other.numValues = 0;
145 other.valueToInsert = NULL;
146 other.ownsValueToInsert = false;
147 }
148
149 template<typename TreeElemType>
150 template<typename VecType>
151 arma::Col<typename DiscreteHilbertValue<TreeElemType>::HilbertElemType>
152 DiscreteHilbertValue<TreeElemType>::
CalculateValue(const VecType & pt,typename std::enable_if_t<IsVector<VecType>::value> *)153 CalculateValue(const VecType& pt,
154 typename std::enable_if_t<IsVector<VecType>::value>*)
155 {
156 typedef typename VecType::elem_type VecElemType;
157 arma::Col<HilbertElemType> res(pt.n_rows);
158 // Calculate the number of bits for the exponent.
159 const int numExpBits = std::ceil(std::log2(
160 std::numeric_limits<VecElemType>::max_exponent -
161 std::numeric_limits<VecElemType>::min_exponent + 1.0));
162
163 // Calculate the number of bits for the mantissa.
164 const int numMantBits = order - numExpBits - 1;
165
166 for (size_t i = 0; i < pt.n_rows; ++i)
167 {
168 int e;
169 VecElemType normalizedVal = std::frexp(pt(i), &e);
170 bool sgn = std::signbit(normalizedVal);
171
172 if (pt(i) == 0)
173 e = std::numeric_limits<VecElemType>::min_exponent;
174
175 if (sgn)
176 normalizedVal = -normalizedVal;
177
178 if (e < std::numeric_limits<VecElemType>::min_exponent)
179 {
180 HilbertElemType tmp = (HilbertElemType) 1 <<
181 (std::numeric_limits<VecElemType>::min_exponent - e);
182
183 e = std::numeric_limits<VecElemType>::min_exponent;
184 normalizedVal /= tmp;
185 }
186
187 // Extract the mantissa.
188 HilbertElemType tmp = (HilbertElemType) 1 << numMantBits;
189 res(i) = std::floor(normalizedVal * tmp);
190
191 // Add the exponent.
192 assert(res(i) < ((HilbertElemType) 1 << numMantBits));
193 res(i) |= ((HilbertElemType)
194 (e - std::numeric_limits<VecElemType>::min_exponent)) << numMantBits;
195
196 assert(res(i) < ((HilbertElemType) 1 << (order - 1)) - 1);
197
198 // Negative values should be inverted.
199 if (sgn)
200 {
201 res(i) = ((HilbertElemType) 1 << (order - 1)) - 1 - res(i);
202 assert((res(i) >> (order - 1)) == 0);
203 }
204 else
205 {
206 res(i) |= (HilbertElemType) 1 << (order - 1);
207 assert((res(i) >> (order - 1)) == 1);
208 }
209 }
210
211 HilbertElemType M = (HilbertElemType) 1 << (order - 1);
212
213 // Since the Hilbert curve is continuous we should permutate and intend
214 // coordinate axes depending on the position of the point.
215 for (HilbertElemType Q = M; Q > 1; Q >>= 1)
216 {
217 HilbertElemType P = Q - 1;
218
219 for (size_t i = 0; i < pt.n_rows; ++i)
220 {
221 if (res(i) & Q) // Invert.
222 res(0) ^= P;
223 else // Permutate.
224 {
225 HilbertElemType t = (res(0) ^ res(i)) & P;
226 res(0) ^= t;
227 res(i) ^= t;
228 }
229 }
230 }
231
232 // Gray encode.
233 for (size_t i = 1; i < pt.n_rows; ++i)
234 res(i) ^= res(i - 1);
235
236 HilbertElemType t = 0;
237
238 // Some coordinate axes should be inverted.
239 for (HilbertElemType Q = M; Q > 1; Q >>= 1)
240 if (res(pt.n_rows - 1) & Q)
241 t ^= Q - 1;
242
243 for (size_t i = 0; i < pt.n_rows; ++i)
244 res(i) ^= t;
245
246 // We should rearrange bits in order to compare two Hilbert values faster.
247 arma::Col<HilbertElemType> rearrangedResult(pt.n_rows, arma::fill::zeros);
248
249 for (size_t i = 0; i < order; ++i)
250 for (size_t j = 0; j < pt.n_rows; ++j)
251 {
252 size_t bit = (i * pt.n_rows + j) % order;
253 size_t row = (i * pt.n_rows + j) / order;
254
255 rearrangedResult(row) |= (((res(j) >> (order - 1 - i)) & 1) <<
256 (order - 1 - bit));
257 }
258
259 return rearrangedResult;
260 }
261
262 template<typename TreeElemType>
263 int DiscreteHilbertValue<TreeElemType>::
CompareValues(const arma::Col<HilbertElemType> & value1,const arma::Col<HilbertElemType> & value2)264 CompareValues(const arma::Col<HilbertElemType>& value1,
265 const arma::Col<HilbertElemType>& value2)
266 {
267 for (size_t i = 0; i < value1.n_rows; ++i)
268 {
269 if (value1(i) > value2(i))
270 return 1;
271 else if (value1(i) < value2(i))
272 return -1;
273 }
274
275 return 0;
276 }
277
278
279
280 template<typename TreeElemType>
281 template<typename VecType1, typename VecType2>
282 int DiscreteHilbertValue<TreeElemType>::
ComparePoints(const VecType1 & pt1,const VecType2 & pt2,typename std::enable_if_t<IsVector<VecType1>::value> *,typename std::enable_if_t<IsVector<VecType2>::value> *)283 ComparePoints(const VecType1& pt1,
284 const VecType2& pt2,
285 typename std::enable_if_t<IsVector<VecType1>::value>*,
286 typename std::enable_if_t<IsVector<VecType2>::value>*)
287 {
288 arma::Col<HilbertElemType> val1 = CalculateValue(pt1);
289 arma::Col<HilbertElemType> val2 = CalculateValue(pt2);
290
291 return CompareValues(val1, val2);
292 }
293
294 template<typename TreeElemType>
295 int DiscreteHilbertValue<TreeElemType>::
CompareValues(const DiscreteHilbertValue & val1,const DiscreteHilbertValue & val2)296 CompareValues(const DiscreteHilbertValue& val1,
297 const DiscreteHilbertValue& val2)
298 {
299 if (val1.NumValues() > 0 && val2.NumValues() == 0)
300 return 1;
301 else if (val1.NumValues() == 0 && val2.NumValues() > 0)
302 return -1;
303 else if (val1.NumValues() == 0 && val2.NumValues() == 0)
304 return 0;
305
306 return CompareValues(val1.LocalHilbertValues()->col(val1.NumValues() - 1),
307 val2.LocalHilbertValues()->col(val2.NumValues() - 1));
308 }
309
310 template<typename TreeElemType>
311 int DiscreteHilbertValue<TreeElemType>::
CompareWith(const DiscreteHilbertValue & val) const312 CompareWith(const DiscreteHilbertValue& val) const
313 {
314 return CompareValues(*this, val);
315 }
316
317 template<typename TreeElemType>
318 template<typename VecType>
319 int DiscreteHilbertValue<TreeElemType>::
CompareWith(const VecType & pt,typename std::enable_if_t<IsVector<VecType>::value> *) const320 CompareWith(const VecType& pt,
321 typename std::enable_if_t<IsVector<VecType>::value>*) const
322 {
323 arma::Col<HilbertElemType> val = CalculateValue(pt);
324
325 if (numValues == 0)
326 return -1;
327
328 return CompareValues(localHilbertValues->col(numValues - 1), val);
329 }
330
331 template<typename TreeElemType>
332 template<typename VecType>
333 int DiscreteHilbertValue<TreeElemType>::
CompareWithCachedPoint(const VecType &,typename std::enable_if_t<IsVector<VecType>::value> *) const334 CompareWithCachedPoint(const VecType& ,
335 typename std::enable_if_t<IsVector<VecType>::value>*) const
336 {
337 if (numValues == 0)
338 return -1;
339
340 return CompareValues(localHilbertValues->col(numValues - 1), *valueToInsert);
341 }
342
343 template<typename TreeElemType>
344 template<typename TreeType, typename VecType>
345 size_t DiscreteHilbertValue<TreeElemType>::
InsertPoint(TreeType * node,const VecType & pt,typename std::enable_if_t<IsVector<VecType>::value> *)346 InsertPoint(TreeType *node,
347 const VecType& pt,
348 typename std::enable_if_t<IsVector<VecType>::value>*)
349 {
350 size_t i = 0;
351
352 // All points are inserted to the root node.
353 if (!node->Parent())
354 *valueToInsert = CalculateValue(pt);
355 if (node->IsLeaf())
356 {
357 // Find an appropriate place.
358 for (i = 0; i < numValues; ++i)
359 if (CompareValues(localHilbertValues->col(i), *valueToInsert) > 0)
360 break;
361
362 for (size_t j = numValues; j > i; j--)
363 localHilbertValues->col(j) = localHilbertValues->col(j-1);
364
365 localHilbertValues->col(i) = *valueToInsert;
366 numValues++;
367 // Propagate changes of the largest Hilbert value downward.
368 TreeType* root = node->Parent();
369
370 while (root != NULL)
371 {
372 root->AuxiliaryInfo().HilbertValue().UpdateLargestValue(root);
373
374 root = root->Parent();
375 }
376 }
377
378 return i;
379 }
380
381 template<typename TreeElemType>
382 template<typename TreeType>
InsertNode(TreeType * node)383 void DiscreteHilbertValue<TreeElemType>::InsertNode(TreeType* node)
384 {
385 DiscreteHilbertValue &val = node->AuxiliaryInfo().HilbertValue();
386
387 if (CompareWith(node, val) < 0)
388 {
389 localHilbertValues = val.LocalHilbertValues();
390 numValues = val.NumValues();
391 }
392 }
393
394 template<typename TreeElemType>
395 template<typename TreeType>
396 void DiscreteHilbertValue<TreeElemType>::
DeletePoint(TreeType *,const size_t localIndex)397 DeletePoint(TreeType* /* node */, const size_t localIndex)
398 {
399 // Delete the Hilbert value from the local dataset
400 for (size_t i = numValues - 1; i > localIndex; i--)
401 localHilbertValues->col(i - 1) = localHilbertValues->col(i);
402
403 numValues--;
404 }
405
406 template<typename TreeElemType>
407 template<typename TreeType>
408 void DiscreteHilbertValue<TreeElemType>::
RemoveNode(TreeType * node,const size_t nodeIndex)409 RemoveNode(TreeType* node, const size_t nodeIndex)
410 {
411 if (node->NumChildren() <= 1)
412 {
413 localHilbertValues = NULL;
414 numValues = 0;
415 return;
416 }
417 if (nodeIndex + 1 == node->NumChildren())
418 {
419 // Update the largest Hilbert value if the value exists
420 TreeType& child = node->Child(nodeIndex - 1);
421 if (child.AuxiliaryInfo.HilbertValue().NumValues() != 0)
422 {
423 numValues = child.AuxiliaryInfo.HilbertValue().NumValues();
424 localHilbertValues =
425 child.AuxiliaryInfo.HilbertValue().LocalHilbertValues();
426 }
427 else
428 {
429 localHilbertValues = NULL;
430 numValues = 0;
431 }
432 }
433 }
434
435 template<typename TreeElemType>
436 DiscreteHilbertValue<TreeElemType>& DiscreteHilbertValue<TreeElemType>::
operator =(const DiscreteHilbertValue & val)437 operator=(const DiscreteHilbertValue& val)
438 {
439 if (this == &val)
440 return *this;
441
442 if (ownsLocalHilbertValues)
443 delete localHilbertValues;
444
445 localHilbertValues = const_cast<arma::Mat<HilbertElemType>* >
446 (val.LocalHilbertValues());
447 ownsLocalHilbertValues = false;
448 numValues = val.NumValues();
449
450 return *this;
451 }
452
453 template<typename TreeElemType>
NullifyData()454 void DiscreteHilbertValue<TreeElemType>::NullifyData()
455 {
456 ownsLocalHilbertValues = false;
457 }
458
459 template<typename TreeElemType>
460 template<typename TreeType>
UpdateLargestValue(TreeType * node)461 void DiscreteHilbertValue<TreeElemType>::UpdateLargestValue(TreeType* node)
462 {
463 if (!node->IsLeaf())
464 {
465 // Update the largest Hilbert value
466 localHilbertValues = node->Child(node->NumChildren() -
467 1).AuxiliaryInfo().HilbertValue().LocalHilbertValues();
468 numValues = node->Child(node->NumChildren() -
469 1).AuxiliaryInfo().HilbertValue().NumValues();
470 }
471 }
472
473 template<typename TreeElemType>
474 template<typename TreeType>
RedistributeHilbertValues(TreeType * parent,const size_t firstSibling,const size_t lastSibling)475 void DiscreteHilbertValue<TreeElemType>::RedistributeHilbertValues(
476 TreeType* parent,
477 const size_t firstSibling,
478 const size_t lastSibling)
479 {
480 // We need to update the local dataset if points were redistributed.
481 size_t numPoints = 0;
482 for (size_t i = firstSibling; i <= lastSibling; ++i)
483 numPoints += parent->Child(i).NumPoints();
484
485 // Copy the local Hilbert values.
486 arma::Mat<HilbertElemType> tmp(localHilbertValues->n_rows, numPoints);
487
488 size_t iPoint = 0;
489 for (size_t i = firstSibling; i<= lastSibling; ++i)
490 {
491 DiscreteHilbertValue<TreeElemType> &value =
492 parent->Child(i).AuxiliaryInfo().HilbertValue();
493
494 for (size_t j = 0; j < value.NumValues(); ++j)
495 {
496 tmp.col(iPoint) = value.LocalHilbertValues()->col(j);
497 iPoint++;
498 }
499 }
500 assert(iPoint == numPoints);
501
502 iPoint = 0;
503
504 // Redistribute the Hilbert values.
505 for (size_t i = firstSibling; i <= lastSibling; ++i)
506 {
507 DiscreteHilbertValue<TreeElemType> &value =
508 parent->Child(i).AuxiliaryInfo().HilbertValue();
509
510 for (size_t j = 0; j < parent->Child(i).NumPoints(); ++j)
511 {
512 value.LocalHilbertValues()->col(j) = tmp.col(iPoint);
513 iPoint++;
514 }
515 value.NumValues() = parent->Child(i).NumPoints();
516 }
517
518 assert(iPoint == numPoints);
519 }
520
521 template<typename TreeElemType>
522 template<typename Archive>
serialize(Archive & ar,const unsigned int)523 void DiscreteHilbertValue<TreeElemType>::serialize(
524 Archive& ar,
525 const unsigned int /* version */)
526 {
527 ar & BOOST_SERIALIZATION_NVP(localHilbertValues);
528 ar & BOOST_SERIALIZATION_NVP(ownsLocalHilbertValues);
529 ar & BOOST_SERIALIZATION_NVP(numValues);
530 ar & BOOST_SERIALIZATION_NVP(valueToInsert);
531 ar & BOOST_SERIALIZATION_NVP(ownsValueToInsert);
532 }
533
534 } // namespace tree
535 } // namespace mlpack
536
537 #endif // MLPACK_CORE_TREE_RECTANGLE_TREE_DISCRETE_HILBERT_VALUE_IMPL_HPP
538