1 /**
2  * @file core/tree/rectangle_tree/discrete_hilbert_value_impl.hpp
3  * @author Mikhail Lozhnikov
4  *
5  * Definition of the DiscreteHilbertValue class, a class that calculates
6  * the ordering of points using the Hilbert curve.
7  *
8  * mlpack is free software; you may redistribute it and/or modify it under the
9  * terms of the 3-clause BSD license.  You should have received a copy of the
10  * 3-clause BSD license along with mlpack.  If not, see
11  * http://www.opensource.org/licenses/BSD-3-Clause for more information.
12  */
13 #ifndef MLPACK_CORE_TREE_RECTANGLE_TREE_DISCRETE_HILBERT_VALUE_IMPL_HPP
14 #define MLPACK_CORE_TREE_RECTANGLE_TREE_DISCRETE_HILBERT_VALUE_IMPL_HPP
15 
16 #include "discrete_hilbert_value.hpp"
17 
18 namespace mlpack {
19 namespace tree /** Trees and tree-building procedures. */ {
20 
21 template<typename TreeElemType>
DiscreteHilbertValue()22 DiscreteHilbertValue<TreeElemType>::DiscreteHilbertValue() :
23     localHilbertValues(NULL),
24     ownsLocalHilbertValues(false),
25     numValues(0),
26     valueToInsert(NULL),
27     ownsValueToInsert(false)
28 { }
29 
30 template<typename TreeElemType>
~DiscreteHilbertValue()31 DiscreteHilbertValue<TreeElemType>::~DiscreteHilbertValue()
32 {
33   if (ownsLocalHilbertValues)
34     delete localHilbertValues;
35   if (ownsValueToInsert)
36     delete valueToInsert;
37 }
38 
39 template<typename TreeElemType>
40 template<typename TreeType>
DiscreteHilbertValue(const TreeType * tree)41 DiscreteHilbertValue<TreeElemType>::DiscreteHilbertValue(const TreeType* tree) :
42     localHilbertValues(NULL),
43     ownsLocalHilbertValues(false),
44     numValues(0),
45     valueToInsert(tree->Parent() ?
46         tree->Parent()->AuxiliaryInfo().HilbertValue().ValueToInsert() :
47         new arma::Col<HilbertElemType>(tree->Dataset().n_rows)),
48     ownsValueToInsert(tree->Parent() ? false : true)
49 {
50   // Calculate the Hilbert value for all points.
51   if (!tree->Parent()) // This is the root node.
52     ownsLocalHilbertValues = true;
53   else if (tree->Parent()->Child(0).IsLeaf())
54   {
55     // This is a leaf node.
56     assert(tree->Parent()->NumChildren() > 0);
57     ownsLocalHilbertValues = true;
58   }
59 
60   if (ownsLocalHilbertValues)
61   {
62     localHilbertValues = new arma::Mat<HilbertElemType>(tree->Dataset().n_rows,
63         tree->MaxLeafSize() + 1);
64   }
65 }
66 
67 template<typename TreeElemType>
68 template<typename TreeType>
69 DiscreteHilbertValue<TreeElemType>::
DiscreteHilbertValue(const DiscreteHilbertValue & other,TreeType * tree,bool deepCopy)70 DiscreteHilbertValue(const DiscreteHilbertValue& other,
71                      TreeType* tree,
72                      bool deepCopy) :
73     localHilbertValues(NULL),
74     ownsLocalHilbertValues(other.ownsLocalHilbertValues),
75     numValues(other.NumValues()),
76     valueToInsert(NULL),
77     ownsValueToInsert(other.ownsValueToInsert)
78 {
79   if (deepCopy)
80   {
81     // Only leaf nodes own the localHilbertValues dataset.
82     // Intermediate nodes store the pointer to the corresponding dataset.
83     if (ownsLocalHilbertValues)
84       localHilbertValues = new arma::Mat<HilbertElemType>(
85           *other.LocalHilbertValues());
86     else
87       localHilbertValues = NULL;
88 
89     // Only the root owns ownsValueToInsert. Other nodes the pointer.
90     if (ownsValueToInsert)
91       valueToInsert = new arma::Col<HilbertElemType>(
92           *other.ValueToInsert());
93     else
94     {
95       assert(tree->Parent() != NULL);
96       // Copy the pointer from the parent node.
97       valueToInsert = const_cast<arma::Col<HilbertElemType>*>
98         (tree->Parent()->AuxiliaryInfo().HilbertValue().ValueToInsert());
99     }
100 
101     if (tree->NumChildren() == 0)
102     {
103       // We have to update pointers to the localHilbertValues dataset in
104       // intermediate nodes.
105       TreeType* node = tree;
106 
107       while (node->Parent() != NULL)
108       {
109         if (node->Parent()->NumChildren() > 1)
110         {
111           const std::vector<TreeType*> parentChildren =
112               node->AuxiliaryInfo().Children(node->Parent());
113           // If node is not the last child of its parent, we shouldn't copy
114           // the localHilbertValues pointer.
115           if (parentChildren[node->Parent()->NumChildren() - 2] == NULL)
116             break;
117         }
118         node->Parent()->AuxiliaryInfo().HilbertValue().LocalHilbertValues() =
119           localHilbertValues;
120         node = node->Parent();
121       }
122     }
123   }
124   else
125   {
126     localHilbertValues = const_cast<arma::Mat<HilbertElemType>*>
127         (other.LocalHilbertValues());
128     valueToInsert = const_cast<arma::Col<HilbertElemType>*>
129         (other.ValueToInsert());
130   }
131 }
132 
133 template<typename TreeElemType>
134 DiscreteHilbertValue<TreeElemType>::
DiscreteHilbertValue(DiscreteHilbertValue && other)135 DiscreteHilbertValue(DiscreteHilbertValue&& other) :
136     localHilbertValues(other.localHilbertValues),
137     ownsLocalHilbertValues(other.ownsLocalHilbertValues),
138     numValues(other.numValues),
139     valueToInsert(other.valueToInsert),
140     ownsValueToInsert(other.ownsValueToInsert)
141 {
142   other.localHilbertValues = NULL;
143   other.ownsLocalHilbertValues = false;
144   other.numValues = 0;
145   other.valueToInsert = NULL;
146   other.ownsValueToInsert = false;
147 }
148 
149 template<typename TreeElemType>
150 template<typename VecType>
151 arma::Col<typename DiscreteHilbertValue<TreeElemType>::HilbertElemType>
152 DiscreteHilbertValue<TreeElemType>::
CalculateValue(const VecType & pt,typename std::enable_if_t<IsVector<VecType>::value> *)153 CalculateValue(const VecType& pt,
154                typename std::enable_if_t<IsVector<VecType>::value>*)
155 {
156   typedef typename VecType::elem_type VecElemType;
157   arma::Col<HilbertElemType> res(pt.n_rows);
158   // Calculate the number of bits for the exponent.
159   const int numExpBits = std::ceil(std::log2(
160       std::numeric_limits<VecElemType>::max_exponent -
161       std::numeric_limits<VecElemType>::min_exponent + 1.0));
162 
163   // Calculate the number of bits for the mantissa.
164   const int numMantBits = order - numExpBits - 1;
165 
166   for (size_t i = 0; i < pt.n_rows; ++i)
167   {
168     int e;
169     VecElemType normalizedVal = std::frexp(pt(i), &e);
170     bool sgn = std::signbit(normalizedVal);
171 
172     if (pt(i) == 0)
173       e = std::numeric_limits<VecElemType>::min_exponent;
174 
175     if (sgn)
176       normalizedVal = -normalizedVal;
177 
178     if (e < std::numeric_limits<VecElemType>::min_exponent)
179     {
180       HilbertElemType tmp = (HilbertElemType) 1 <<
181           (std::numeric_limits<VecElemType>::min_exponent - e);
182 
183       e = std::numeric_limits<VecElemType>::min_exponent;
184       normalizedVal /= tmp;
185     }
186 
187     // Extract the mantissa.
188     HilbertElemType tmp = (HilbertElemType) 1 << numMantBits;
189     res(i) = std::floor(normalizedVal * tmp);
190 
191     // Add the exponent.
192     assert(res(i) < ((HilbertElemType) 1 << numMantBits));
193     res(i) |= ((HilbertElemType)
194         (e - std::numeric_limits<VecElemType>::min_exponent)) << numMantBits;
195 
196     assert(res(i) < ((HilbertElemType) 1 << (order - 1)) - 1);
197 
198     // Negative values should be inverted.
199     if (sgn)
200     {
201       res(i) = ((HilbertElemType) 1 << (order - 1)) - 1 - res(i);
202       assert((res(i) >> (order - 1)) == 0);
203     }
204     else
205     {
206       res(i) |= (HilbertElemType) 1 << (order - 1);
207       assert((res(i) >> (order - 1)) == 1);
208     }
209   }
210 
211   HilbertElemType M = (HilbertElemType) 1 << (order - 1);
212 
213   // Since the Hilbert curve is continuous we should permutate and intend
214   // coordinate axes depending on the position of the point.
215   for (HilbertElemType Q = M; Q > 1; Q >>= 1)
216   {
217     HilbertElemType P = Q - 1;
218 
219     for (size_t i = 0; i < pt.n_rows; ++i)
220     {
221       if (res(i) & Q) // Invert.
222         res(0) ^= P;
223       else // Permutate.
224       {
225         HilbertElemType t = (res(0) ^ res(i)) & P;
226         res(0) ^= t;
227         res(i) ^= t;
228       }
229     }
230   }
231 
232   // Gray encode.
233   for (size_t i = 1; i < pt.n_rows; ++i)
234     res(i) ^= res(i - 1);
235 
236   HilbertElemType t = 0;
237 
238   // Some coordinate axes should be inverted.
239   for (HilbertElemType Q = M; Q > 1; Q >>= 1)
240     if (res(pt.n_rows - 1) & Q)
241       t ^= Q - 1;
242 
243   for (size_t i = 0; i < pt.n_rows; ++i)
244     res(i) ^= t;
245 
246   // We should rearrange bits in order to compare two Hilbert values faster.
247   arma::Col<HilbertElemType> rearrangedResult(pt.n_rows, arma::fill::zeros);
248 
249   for (size_t i = 0; i < order; ++i)
250     for (size_t j = 0; j < pt.n_rows; ++j)
251     {
252       size_t bit = (i * pt.n_rows + j) % order;
253       size_t row = (i * pt.n_rows + j) / order;
254 
255       rearrangedResult(row) |= (((res(j) >> (order - 1 - i)) & 1) <<
256           (order - 1 - bit));
257     }
258 
259   return rearrangedResult;
260 }
261 
262 template<typename TreeElemType>
263 int DiscreteHilbertValue<TreeElemType>::
CompareValues(const arma::Col<HilbertElemType> & value1,const arma::Col<HilbertElemType> & value2)264 CompareValues(const arma::Col<HilbertElemType>& value1,
265               const arma::Col<HilbertElemType>& value2)
266 {
267   for (size_t i = 0; i < value1.n_rows; ++i)
268   {
269     if (value1(i) > value2(i))
270       return 1;
271     else if (value1(i) < value2(i))
272       return -1;
273   }
274 
275   return 0;
276 }
277 
278 
279 
280 template<typename TreeElemType>
281 template<typename VecType1, typename VecType2>
282 int DiscreteHilbertValue<TreeElemType>::
ComparePoints(const VecType1 & pt1,const VecType2 & pt2,typename std::enable_if_t<IsVector<VecType1>::value> *,typename std::enable_if_t<IsVector<VecType2>::value> *)283 ComparePoints(const VecType1& pt1,
284               const VecType2& pt2,
285               typename std::enable_if_t<IsVector<VecType1>::value>*,
286               typename std::enable_if_t<IsVector<VecType2>::value>*)
287 {
288   arma::Col<HilbertElemType> val1 = CalculateValue(pt1);
289   arma::Col<HilbertElemType> val2 = CalculateValue(pt2);
290 
291   return CompareValues(val1, val2);
292 }
293 
294 template<typename TreeElemType>
295 int DiscreteHilbertValue<TreeElemType>::
CompareValues(const DiscreteHilbertValue & val1,const DiscreteHilbertValue & val2)296 CompareValues(const DiscreteHilbertValue& val1,
297               const DiscreteHilbertValue& val2)
298 {
299   if (val1.NumValues() > 0 && val2.NumValues() == 0)
300     return 1;
301   else if (val1.NumValues() == 0 && val2.NumValues() > 0)
302     return -1;
303   else if (val1.NumValues() == 0 && val2.NumValues() == 0)
304     return 0;
305 
306   return CompareValues(val1.LocalHilbertValues()->col(val1.NumValues() - 1),
307                        val2.LocalHilbertValues()->col(val2.NumValues() - 1));
308 }
309 
310 template<typename TreeElemType>
311 int DiscreteHilbertValue<TreeElemType>::
CompareWith(const DiscreteHilbertValue & val) const312 CompareWith(const DiscreteHilbertValue& val) const
313 {
314   return CompareValues(*this, val);
315 }
316 
317 template<typename TreeElemType>
318 template<typename VecType>
319 int DiscreteHilbertValue<TreeElemType>::
CompareWith(const VecType & pt,typename std::enable_if_t<IsVector<VecType>::value> *) const320 CompareWith(const VecType& pt,
321             typename std::enable_if_t<IsVector<VecType>::value>*) const
322 {
323   arma::Col<HilbertElemType> val = CalculateValue(pt);
324 
325   if (numValues == 0)
326     return -1;
327 
328   return CompareValues(localHilbertValues->col(numValues - 1), val);
329 }
330 
331 template<typename TreeElemType>
332 template<typename VecType>
333 int DiscreteHilbertValue<TreeElemType>::
CompareWithCachedPoint(const VecType &,typename std::enable_if_t<IsVector<VecType>::value> *) const334 CompareWithCachedPoint(const VecType& ,
335             typename std::enable_if_t<IsVector<VecType>::value>*) const
336 {
337   if (numValues == 0)
338     return -1;
339 
340   return CompareValues(localHilbertValues->col(numValues - 1), *valueToInsert);
341 }
342 
343 template<typename TreeElemType>
344 template<typename TreeType, typename VecType>
345 size_t DiscreteHilbertValue<TreeElemType>::
InsertPoint(TreeType * node,const VecType & pt,typename std::enable_if_t<IsVector<VecType>::value> *)346 InsertPoint(TreeType *node,
347             const VecType& pt,
348             typename std::enable_if_t<IsVector<VecType>::value>*)
349 {
350   size_t i = 0;
351 
352   // All points are inserted to the root node.
353   if (!node->Parent())
354     *valueToInsert = CalculateValue(pt);
355   if (node->IsLeaf())
356   {
357     // Find an appropriate place.
358     for (i = 0; i < numValues; ++i)
359       if (CompareValues(localHilbertValues->col(i), *valueToInsert) > 0)
360         break;
361 
362     for (size_t j = numValues; j > i; j--)
363       localHilbertValues->col(j) = localHilbertValues->col(j-1);
364 
365     localHilbertValues->col(i) = *valueToInsert;
366     numValues++;
367     // Propagate changes of the largest Hilbert value downward.
368     TreeType* root = node->Parent();
369 
370     while (root != NULL)
371     {
372       root->AuxiliaryInfo().HilbertValue().UpdateLargestValue(root);
373 
374       root = root->Parent();
375     }
376   }
377 
378   return i;
379 }
380 
381 template<typename TreeElemType>
382 template<typename TreeType>
InsertNode(TreeType * node)383 void DiscreteHilbertValue<TreeElemType>::InsertNode(TreeType* node)
384 {
385   DiscreteHilbertValue &val = node->AuxiliaryInfo().HilbertValue();
386 
387   if (CompareWith(node, val) < 0)
388   {
389     localHilbertValues = val.LocalHilbertValues();
390     numValues = val.NumValues();
391   }
392 }
393 
394 template<typename TreeElemType>
395 template<typename TreeType>
396 void DiscreteHilbertValue<TreeElemType>::
DeletePoint(TreeType *,const size_t localIndex)397 DeletePoint(TreeType* /* node */, const size_t localIndex)
398 {
399   // Delete the Hilbert value from the local dataset
400   for (size_t i = numValues - 1; i > localIndex; i--)
401     localHilbertValues->col(i - 1) = localHilbertValues->col(i);
402 
403   numValues--;
404 }
405 
406 template<typename TreeElemType>
407 template<typename TreeType>
408 void DiscreteHilbertValue<TreeElemType>::
RemoveNode(TreeType * node,const size_t nodeIndex)409 RemoveNode(TreeType* node, const size_t nodeIndex)
410 {
411   if (node->NumChildren() <= 1)
412   {
413     localHilbertValues = NULL;
414     numValues = 0;
415     return;
416   }
417   if (nodeIndex + 1 == node->NumChildren())
418   {
419     // Update the largest Hilbert value if the value exists
420     TreeType& child = node->Child(nodeIndex - 1);
421     if (child.AuxiliaryInfo.HilbertValue().NumValues() != 0)
422     {
423       numValues = child.AuxiliaryInfo.HilbertValue().NumValues();
424       localHilbertValues =
425           child.AuxiliaryInfo.HilbertValue().LocalHilbertValues();
426     }
427     else
428     {
429       localHilbertValues = NULL;
430       numValues = 0;
431     }
432   }
433 }
434 
435 template<typename TreeElemType>
436 DiscreteHilbertValue<TreeElemType>& DiscreteHilbertValue<TreeElemType>::
operator =(const DiscreteHilbertValue & val)437 operator=(const DiscreteHilbertValue& val)
438 {
439   if (this == &val)
440     return *this;
441 
442   if (ownsLocalHilbertValues)
443     delete localHilbertValues;
444 
445   localHilbertValues = const_cast<arma::Mat<HilbertElemType>* >
446       (val.LocalHilbertValues());
447   ownsLocalHilbertValues = false;
448   numValues = val.NumValues();
449 
450   return *this;
451 }
452 
453 template<typename TreeElemType>
NullifyData()454 void DiscreteHilbertValue<TreeElemType>::NullifyData()
455 {
456   ownsLocalHilbertValues = false;
457 }
458 
459 template<typename TreeElemType>
460 template<typename TreeType>
UpdateLargestValue(TreeType * node)461 void DiscreteHilbertValue<TreeElemType>::UpdateLargestValue(TreeType* node)
462 {
463   if (!node->IsLeaf())
464   {
465     // Update the largest Hilbert value
466     localHilbertValues = node->Child(node->NumChildren() -
467         1).AuxiliaryInfo().HilbertValue().LocalHilbertValues();
468     numValues = node->Child(node->NumChildren() -
469         1).AuxiliaryInfo().HilbertValue().NumValues();
470   }
471 }
472 
473 template<typename TreeElemType>
474 template<typename TreeType>
RedistributeHilbertValues(TreeType * parent,const size_t firstSibling,const size_t lastSibling)475 void DiscreteHilbertValue<TreeElemType>::RedistributeHilbertValues(
476     TreeType* parent,
477     const size_t firstSibling,
478     const size_t lastSibling)
479 {
480   // We need to update the local dataset if points were redistributed.
481   size_t numPoints = 0;
482   for (size_t i = firstSibling; i <= lastSibling; ++i)
483     numPoints += parent->Child(i).NumPoints();
484 
485   // Copy the local Hilbert values.
486   arma::Mat<HilbertElemType> tmp(localHilbertValues->n_rows, numPoints);
487 
488   size_t iPoint = 0;
489   for (size_t i = firstSibling; i<= lastSibling; ++i)
490   {
491     DiscreteHilbertValue<TreeElemType> &value =
492         parent->Child(i).AuxiliaryInfo().HilbertValue();
493 
494     for (size_t j = 0; j < value.NumValues(); ++j)
495     {
496       tmp.col(iPoint) = value.LocalHilbertValues()->col(j);
497       iPoint++;
498     }
499   }
500   assert(iPoint == numPoints);
501 
502   iPoint = 0;
503 
504   // Redistribute the Hilbert values.
505   for (size_t i = firstSibling; i <= lastSibling; ++i)
506   {
507     DiscreteHilbertValue<TreeElemType> &value =
508         parent->Child(i).AuxiliaryInfo().HilbertValue();
509 
510     for (size_t j = 0; j < parent->Child(i).NumPoints(); ++j)
511     {
512       value.LocalHilbertValues()->col(j) = tmp.col(iPoint);
513       iPoint++;
514     }
515     value.NumValues() = parent->Child(i).NumPoints();
516   }
517 
518   assert(iPoint == numPoints);
519 }
520 
521 template<typename TreeElemType>
522 template<typename Archive>
serialize(Archive & ar,const unsigned int)523 void DiscreteHilbertValue<TreeElemType>::serialize(
524     Archive& ar,
525     const unsigned int /* version */)
526 {
527   ar & BOOST_SERIALIZATION_NVP(localHilbertValues);
528   ar & BOOST_SERIALIZATION_NVP(ownsLocalHilbertValues);
529   ar & BOOST_SERIALIZATION_NVP(numValues);
530   ar & BOOST_SERIALIZATION_NVP(valueToInsert);
531   ar & BOOST_SERIALIZATION_NVP(ownsValueToInsert);
532 }
533 
534 } // namespace tree
535 } // namespace mlpack
536 
537 #endif  //  MLPACK_CORE_TREE_RECTANGLE_TREE_DISCRETE_HILBERT_VALUE_IMPL_HPP
538