1 // Copyright (c) 2017-2021, Lawrence Livermore National Security, LLC and
2 // other Axom Project Developers. See the top-level LICENSE file for details.
3 //
4 // SPDX-License-Identifier: (BSD-3-Clause)
5
6 #ifndef AXOM_SPIN_BUILD_RADIX_TREE_H_
7 #define AXOM_SPIN_BUILD_RADIX_TREE_H_
8
9 #include "axom/config.hpp" // for axom compile-time definitions
10
11 #include "axom/core/execution/execution_space.hpp"
12 #include "axom/core/execution/for_all.hpp"
13
14 #include "axom/core/utilities/AnnotationMacros.hpp" // for annotations
15
16 #include "axom/primal/geometry/BoundingBox.hpp"
17 #include "axom/primal/geometry/Point.hpp"
18
19 #include "axom/spin/internal/linear_bvh/RadixTree.hpp"
20
21 #include "axom/spin/MortonIndex.hpp"
22
23 #include "axom/core/utilities/Utilities.hpp" // for isNearlyEqual()
24 #include "axom/slic/interface/slic.hpp" // for slic
25
26 #if defined(AXOM_USE_RAJA)
27 // RAJA includes
28 #include "RAJA/RAJA.hpp"
29 #endif
30
31 #include <atomic> // For std::atomic_thread_fence
32
33 #if defined(AXOM_USE_CUDA) && defined(AXOM_USE_RAJA)
34 // NOTE: uses the cub installation that is bundled with RAJA
35 #include "cub/device/device_radix_sort.cuh"
36 #endif
37
38 namespace axom
39 {
40 namespace spin
41 {
42 namespace internal
43 {
44 namespace linear_bvh
45 {
46 //------------------------------------------------------------------------------
47 //Returns 30 bit morton code for coordinates for
48 // x, y, and z are expecting to be between [0,1]
49 template <typename FloatType, int Dims>
morton32_encode(const primal::Vector<FloatType,Dims> & point)50 static inline AXOM_HOST_DEVICE axom::int32 morton32_encode(
51 const primal::Vector<FloatType, Dims>& point)
52 {
53 //for a float, take the first 10 bits. Note, 2^10 = 1024
54 constexpr int NUM_BITS_PER_DIM = 32 / Dims;
55 constexpr FloatType FLOAT_TO_INT = 1 << NUM_BITS_PER_DIM;
56 constexpr FloatType FLOAT_CEILING = FLOAT_TO_INT - 1;
57
58 int32 int_coords[Dims];
59 for(int i = 0; i < Dims; i++)
60 {
61 int_coords[i] =
62 fmin(fmax(point[i] * FLOAT_TO_INT, (FloatType)0), FLOAT_CEILING);
63 }
64
65 primal::Point<int32, Dims> integer_pt(int_coords);
66
67 return convertPointToMorton<int32>(integer_pt);
68 }
69
70 //------------------------------------------------------------------------------
71 //Returns 30 bit morton code for coordinates for
72 //coordinates in the unit cude
morton64_encode(axom::float32 x,axom::float32 y,axom::float32 z=0.0)73 static inline AXOM_HOST_DEVICE axom::int64 morton64_encode(axom::float32 x,
74 axom::float32 y,
75 axom::float32 z = 0.0)
76 {
77 //take the first 21 bits. Note, 2^21= 2097152.0f
78 x = fmin(fmax(x * 2097152.0f, 0.0f), 2097151.0f);
79 y = fmin(fmax(y * 2097152.0f, 0.0f), 2097151.0f);
80 z = fmin(fmax(z * 2097152.0f, 0.0f), 2097151.0f);
81
82 primal::Point<int64, 3> integer_pt =
83 primal::Point<int64, 3>::make_point((int64)x, (int64)y, (int64)z);
84
85 return convertPointToMorton<int64>(integer_pt);
86 }
87
88 template <typename ExecSpace, typename BoxIndexable, typename FloatType, int NDIMS>
transform_boxes(const BoxIndexable boxes,primal::BoundingBox<FloatType,NDIMS> * aabbs,int32 size,FloatType scale_factor)89 void transform_boxes(const BoxIndexable boxes,
90 primal::BoundingBox<FloatType, NDIMS>* aabbs,
91 int32 size,
92 FloatType scale_factor)
93 {
94 AXOM_PERF_MARK_FUNCTION("transform_boxes");
95
96 for_all<ExecSpace>(
97 size,
98 AXOM_LAMBDA(int32 i) {
99 primal::BoundingBox<FloatType, NDIMS> aabb = boxes[i];
100
101 aabb.scale(scale_factor);
102
103 aabbs[i] = aabb;
104 });
105 }
106
107 //------------------------------------------------------------------------------
108 template <typename ExecSpace, typename FloatType, int NDIMS>
reduce(primal::BoundingBox<FloatType,NDIMS> * aabbs,int32 size)109 primal::BoundingBox<FloatType, NDIMS> reduce(
110 primal::BoundingBox<FloatType, NDIMS>* aabbs,
111 int32 size)
112 {
113 AXOM_PERF_MARK_FUNCTION("reduce_abbs");
114
115 #ifdef AXOM_USE_RAJA
116 using reduce_policy = typename axom::execution_space<ExecSpace>::reduce_policy;
117
118 primal::Point<FloatType, NDIMS> min_pt, max_pt;
119
120 FloatType infinity = std::numeric_limits<FloatType>::max();
121 FloatType neg_infinity = std::numeric_limits<FloatType>::lowest();
122
123 for(int dim = 0; dim < NDIMS; dim++)
124 {
125 RAJA::ReduceMin<reduce_policy, FloatType> min_coord(infinity);
126 RAJA::ReduceMax<reduce_policy, FloatType> max_coord(neg_infinity);
127
128 for_all<ExecSpace>(
129 size,
130 AXOM_LAMBDA(int32 i) {
131 const primal::BoundingBox<FloatType, NDIMS>& aabb = aabbs[i];
132 min_coord.min(aabb.getMin()[dim]);
133 max_coord.max(aabb.getMax()[dim]);
134 });
135
136 min_pt[dim] = min_coord.get();
137 max_pt[dim] = max_coord.get();
138 }
139
140 return primal::BoundingBox<FloatType, NDIMS>(min_pt, max_pt);
141 #else
142 static_assert(std::is_same<ExecSpace, SEQ_EXEC>::value,
143 "Only SEQ_EXEC supported without RAJA");
144
145 primal::BoundingBox<FloatType, NDIMS> global_bounds;
146
147 for_all<ExecSpace>(size, [&](int32 i) { global_bounds.addBox(aabbs[i]); });
148
149 return global_bounds;
150 #endif
151 }
152
153 //------------------------------------------------------------------------------
154 template <typename ExecSpace, typename FloatType, int NDIMS>
get_mcodes(primal::BoundingBox<FloatType,NDIMS> * aabbs,int32 size,const primal::BoundingBox<FloatType,NDIMS> & bounds,uint32 * mcodes)155 void get_mcodes(primal::BoundingBox<FloatType, NDIMS>* aabbs,
156 int32 size,
157 const primal::BoundingBox<FloatType, NDIMS>& bounds,
158 uint32* mcodes)
159 {
160 AXOM_PERF_MARK_FUNCTION("get_mcodes");
161
162 primal::Vector<FloatType, NDIMS> extent, inv_extent, min_coord;
163
164 extent = primal::Vector<FloatType, NDIMS>(bounds.getMax());
165 extent -= primal::Vector<FloatType, NDIMS>(bounds.getMin());
166 min_coord = primal::Vector<FloatType, NDIMS>(bounds.getMin());
167
168 for(int i = 0; i < NDIMS; ++i)
169 {
170 inv_extent[i] = utilities::isNearlyEqual<FloatType>(extent[i], .0f)
171 ? 0.f
172 : 1.f / extent[i];
173 }
174
175 for_all<ExecSpace>(
176 size,
177 AXOM_LAMBDA(int32 i) {
178 const primal::BoundingBox<FloatType, NDIMS>& aabb = aabbs[i];
179
180 // get the center and normalize it
181 primal::Vector<FloatType, NDIMS> centroid(aabb.getCentroid());
182 centroid = primal::Vector<FloatType, NDIMS>(
183 (centroid - min_coord).array() * inv_extent.array());
184 mcodes[i] = morton32_encode(centroid);
185 });
186 }
187
188 //------------------------------------------------------------------------------
189 template <typename ExecSpace, typename IntType>
array_counting(IntType * iterator,const IntType & size,const IntType & start,const IntType & step)190 void array_counting(IntType* iterator,
191 const IntType& size,
192 const IntType& start,
193 const IntType& step)
194 {
195 AXOM_PERF_MARK_FUNCTION("array_counting");
196
197 for_all<ExecSpace>(
198 size,
199 AXOM_LAMBDA(int32 i) { iterator[i] = start + i * step; });
200 }
201
202 //------------------------------------------------------------------------------
203 //
204 // reorder and array based on a new set of indices.
205 // array [a,b,c]
206 // indices [1,0,2]
207 // result [b,a,c]
208 //
209 template <typename ExecSpace, typename T>
reorder(int32 * indices,T * & array,int32 size,int allocatorID)210 void reorder(int32* indices, T*& array, int32 size, int allocatorID)
211 {
212 AXOM_PERF_MARK_FUNCTION("reorder");
213
214 T* temp = axom::allocate<T>(size, allocatorID);
215
216 for_all<ExecSpace>(
217 size,
218 AXOM_LAMBDA(int32 i) {
219 int32 in_idx = indices[i];
220 temp[i] = array[in_idx];
221 });
222
223 axom::deallocate(array);
224 array = temp;
225 }
226
227 //------------------------------------------------------------------------------
228 #if defined(AXOM_USE_RAJA) && \
229 ((RAJA_VERSION_MAJOR > 0) || \
230 ((RAJA_VERSION_MAJOR == 0) && (RAJA_VERSION_MINOR >= 12)))
231
232 template <typename ExecSpace>
sort_mcodes(uint32 * & mcodes,int32 size,int32 * iter)233 void sort_mcodes(uint32*& mcodes, int32 size, int32* iter)
234 {
235 AXOM_PERF_MARK_FUNCTION("sort_mcodes");
236
237 array_counting<ExecSpace>(iter, size, 0, 1);
238
239 AXOM_PERF_MARK_SECTION(
240 "raja_stable_sort",
241 using EXEC_POL = typename axom::execution_space<ExecSpace>::loop_policy;
242 RAJA::stable_sort_pairs<EXEC_POL>(RAJA::make_span(mcodes, size),
243 RAJA::make_span(iter, size)););
244 }
245
246 #else
247
248 // fall back to std::stable_sort
249 template <typename ExecSpace>
sort_mcodes(uint32 * & mcodes,int32 size,int32 * iter)250 void sort_mcodes(uint32*& mcodes, int32 size, int32* iter)
251 {
252 AXOM_PERF_MARK_FUNCTION("sort_mcodes");
253
254 array_counting<ExecSpace>(iter, size, 0, 1);
255
256 AXOM_PERF_MARK_SECTION(
257 "cpu_sort",
258
259 std::stable_sort(iter,
260 iter + size,
261 [=](int32 i1, int32 i2) { return mcodes[i1] < mcodes[i2]; });
262
263 );
264
265 const int allocID = axom::execution_space<ExecSpace>::allocatorID();
266 reorder<ExecSpace>(iter, mcodes, size, allocID);
267 }
268
269 #endif /* RAJA version 0.12.0 and above */
270
271 //------------------------------------------------------------------------------
272 //
273 // count leading zeros
274 //
clz(axom::int32 x)275 inline AXOM_HOST_DEVICE axom::int32 clz(axom::int32 x)
276 {
277 axom::int32 y;
278 axom::int32 n = 32;
279 y = x >> 16;
280 if(y != 0)
281 {
282 n = n - 16;
283 x = y;
284 }
285 y = x >> 8;
286 if(y != 0)
287 {
288 n = n - 8;
289 x = y;
290 }
291 y = x >> 4;
292 if(y != 0)
293 {
294 n = n - 4;
295 x = y;
296 }
297 y = x >> 2;
298 if(y != 0)
299 {
300 n = n - 2;
301 x = y;
302 }
303 y = x >> 1;
304 if(y != 0) return axom::int32(n - 2);
305 return axom::int32(n - x);
306 }
307
308 //------------------------------------------------------------------------------
309 template <typename IntType, typename MCType>
delta(const IntType & a,const IntType & b,const IntType & inner_size,const MCType * mcodes)310 AXOM_HOST_DEVICE IntType delta(const IntType& a,
311 const IntType& b,
312 const IntType& inner_size,
313 const MCType* mcodes)
314 {
315 bool tie = false;
316 bool out_of_range = (b < 0 || b > inner_size);
317 //still make the call but with a valid adderss
318 const int32 bb = (out_of_range) ? 0 : b;
319 const uint32 acode = mcodes[a];
320 const uint32 bcode = mcodes[bb];
321 //use xor to find where they differ
322 uint32 exor = acode ^ bcode;
323 tie = (exor == 0);
324 //break the tie, a and b must always differ
325 exor = tie ? uint32(a) ^ uint32(bb) : exor;
326 int32 count = clz(exor);
327 if(tie) count += 32;
328 count = (out_of_range) ? -1 : count;
329 return count;
330 }
331
332 //------------------------------------------------------------------------------
333 template <typename ExecSpace, typename FloatType, int NDIMS>
build_tree(RadixTree<FloatType,NDIMS> & data)334 void build_tree(RadixTree<FloatType, NDIMS>& data)
335 {
336 AXOM_PERF_MARK_FUNCTION("build_tree");
337
338 // http://research.nvidia.com/sites/default/files/publications/karras2012hpg_paper.pdf
339
340 // Pointers and vars are redeclared because I have a faint memory
341 // of a huge amount of pain and suffering due so cuda
342 // lambda captures of pointers inside a struct. Bad memories
343 // of random segfaults ........ be warned
344 const int32 inner_size = data.m_inner_size;
345 int32* lchildren_ptr = data.m_left_children;
346 int32* rchildren_ptr = data.m_right_children;
347 int32* parent_ptr = data.m_parents;
348 const uint32* mcodes_ptr = data.m_mcodes;
349
350 for_all<ExecSpace>(
351 inner_size,
352 AXOM_LAMBDA(int32 i) {
353 //determine range direction
354 int32 d = 0 > (delta(i, i + 1, inner_size, mcodes_ptr) -
355 delta(i, i - 1, inner_size, mcodes_ptr))
356 ? -1
357 : 1;
358
359 //find upper bound for the length of the range
360 int32 min_delta = delta(i, i - d, inner_size, mcodes_ptr);
361 int32 lmax = 2;
362 while(delta(i, i + lmax * d, inner_size, mcodes_ptr) > min_delta)
363 lmax *= 2;
364
365 //binary search to find the lower bound
366 int32 l = 0;
367 for(int32 t = lmax / 2; t >= 1; t /= 2)
368 {
369 if(delta(i, i + (l + t) * d, inner_size, mcodes_ptr) > min_delta)
370 {
371 l += t;
372 }
373 }
374
375 int32 j = i + l * d;
376 int32 delta_node = delta(i, j, inner_size, mcodes_ptr);
377 int32 s = 0;
378 FloatType div_factor = 2.f;
379 //find the split postition using a binary search
380 for(int32 t = (int32)ceil(float32(l) / div_factor);;
381 div_factor *= 2, t = (int32)ceil(float32(l) / div_factor))
382 {
383 if(delta(i, i + (s + t) * d, inner_size, mcodes_ptr) > delta_node)
384 {
385 s += t;
386 }
387 if(t == 1) break;
388 }
389
390 int32 split = i + s * d + utilities::min(d, 0);
391 // assign parent/child pointers
392 if(utilities::min(i, j) == split)
393 {
394 //leaf
395 parent_ptr[split + inner_size] = i;
396 lchildren_ptr[i] = split + inner_size;
397 }
398 else
399 {
400 //inner node
401 parent_ptr[split] = i;
402 lchildren_ptr[i] = split;
403 }
404
405 if(utilities::max(i, j) == split + 1)
406 {
407 //leaf
408 parent_ptr[split + inner_size + 1] = i;
409 rchildren_ptr[i] = split + inner_size + 1;
410 }
411 else
412 {
413 parent_ptr[split + 1] = i;
414 rchildren_ptr[i] = split + 1;
415 }
416
417 if(i == 0)
418 {
419 // flag the root
420 parent_ptr[0] = -1;
421 }
422 });
423 }
424
425 //------------------------------------------------------------------------------
426 template <typename ExecSpace, typename T>
array_memset(T * array,const int32 size,const T val)427 static void array_memset(T* array, const int32 size, const T val)
428 {
429 AXOM_PERF_MARK_FUNCTION("array_memset");
430
431 for_all<ExecSpace>(
432 size,
433 AXOM_LAMBDA(int32 i) { array[i] = val; });
434 }
435
436 //------------------------------------------------------------------------------
437 // Fetches a bounding box value synchronized with another thread's store.
438 // On the CPU, this is achieved with an acquire fence. This is only really
439 // needed for non-x86 architectures with a weaker memory model (Power, ARM)
440 // On the GPU, we poll the bounding box values for a non-sentinel value.
441 template <typename ExecSpace, typename BBoxType>
sync_load(const BBoxType & box)442 AXOM_HOST_DEVICE static inline BBoxType sync_load(const BBoxType& box)
443 {
444 #ifdef __CUDA_ARCH__
445 using atomic_policy = typename axom::execution_space<ExecSpace>::atomic_policy;
446
447 using FloatType = typename BBoxType::CoordType;
448 using PointType = typename BBoxType::PointType;
449
450 constexpr int NDIMS = PointType::DIMENSION;
451
452 PointType min_pt {BBoxType::InvalidMin};
453 PointType max_pt {BBoxType::InvalidMax};
454
455 #ifdef SPIN_BVH_DEBUG_MEMORY_HAZARD
456 int nreads = 0; // number of extra reads needed for a non-sentinel value
457 #endif
458 for(int dim = 0; dim < NDIMS; dim++)
459 {
460 // Cast to volatile so reads always hit L2$ or memory.
461 volatile const FloatType& min_dim =
462 reinterpret_cast<volatile const FloatType&>(box.getMin()[dim]);
463 volatile const FloatType& max_dim =
464 reinterpret_cast<volatile const FloatType&>(box.getMax()[dim]);
465
466 // NOTE: There is a possibility for a read-after-write hazard, where the
467 // uncached store of an AABB on one thread isn't visible when another
468 // thread calls this method to read the value. However, this doesn't seem to
469 // be an issue on Volta; the atomicAdd used to terminate the first thread
470 // seems to correctly synchronize the prior atomic store operations for the
471 // bounding box data.
472 //
473 // Just in case this changes, we poll for a non-sentinel value to be read
474 // out. Naturally, this assumes that reads of sizeof(FloatType) don't tear.
475 #ifdef SPIN_BVH_DEBUG_MEMORY_HAZARD
476 while((min_pt[dim] = min_dim) == BBoxType::InvalidMin)
477 {
478 nreads++;
479 }
480 while((max_pt[dim] = max_dim) == BBoxType::InvalidMax)
481 {
482 nreads++;
483 }
484 #else
485 while((min_pt[dim] = min_dim) == BBoxType::InvalidMin)
486 ;
487 while((max_pt[dim] = max_dim) == BBoxType::InvalidMax)
488 ;
489 #endif
490 }
491
492 #ifdef SPIN_BVH_DEBUG_MEMORY_HAZARD
493 if(nreads > 0)
494 {
495 printf("Warning: needed %d extra reads for address %p\n", nreads, &box);
496 }
497 #endif
498
499 return BBoxType {min_pt, max_pt};
500
501 #else // __CUDA_ARCH__
502 std::atomic_thread_fence(std::memory_order_acquire);
503 return box;
504 #endif
505 }
506
507 //------------------------------------------------------------------------------
508 // Writes a bounding box to memory, synchronized with another thread's read.
509 // On the CPU, this is achieved with a release fence.
510 // On the GPU, this function uses atomicExch to write a value directly to the
511 // L2 cache, thus avoiding potential cache coherency issues between threads.
512 template <typename ExecSpace, typename BBoxType>
sync_store(BBoxType & box,const BBoxType & value)513 AXOM_HOST_DEVICE static inline void sync_store(BBoxType& box,
514 const BBoxType& value)
515 {
516 #if defined(__CUDA_ARCH__) && defined(AXOM_USE_RAJA)
517 using atomic_policy = typename axom::execution_space<ExecSpace>::atomic_policy;
518
519 using FloatType = typename BBoxType::CoordType;
520 using PointType = typename BBoxType::PointType;
521
522 constexpr int NDIMS = PointType::DIMENSION;
523
524 // Cast away the underlying const so we can directly modify the box data.
525 PointType& min_pt = const_cast<PointType&>(box.getMin());
526 PointType& max_pt = const_cast<PointType&>(box.getMax());
527
528 for(int dim = 0; dim < NDIMS; dim++)
529 {
530 RAJA::atomicExchange<atomic_policy>(&(min_pt[dim]), value.getMin()[dim]);
531 RAJA::atomicExchange<atomic_policy>(&(max_pt[dim]), value.getMax()[dim]);
532 }
533 #else // __CUDA_ARCH__
534 box = value;
535 std::atomic_thread_fence(std::memory_order_release);
536 #endif
537 }
538
539 //------------------------------------------------------------------------------
540 template <typename ExecSpace>
atomic_increment(int * addr)541 AXOM_HOST_DEVICE static inline int atomic_increment(int* addr)
542 {
543 #ifdef AXOM_USE_RAJA
544 using atomic_policy = typename axom::execution_space<ExecSpace>::atomic_policy;
545
546 return RAJA::atomicAdd<atomic_policy>(addr, 1);
547 #else
548 static_assert(std::is_same<ExecSpace, SEQ_EXEC>::value,
549 "Only SEQ_EXEC supported without RAJA");
550
551 // TODO: use atomic_ref?
552 int old = *addr;
553 (*addr)++;
554 return old;
555 #endif // AXOM_USE_RAJA
556 }
557
558 //------------------------------------------------------------------------------
559 template <typename ExecSpace, typename FloatType, int NDIMS>
propagate_aabbs(RadixTree<FloatType,NDIMS> & data,int allocatorID)560 void propagate_aabbs(RadixTree<FloatType, NDIMS>& data, int allocatorID)
561 {
562 AXOM_PERF_MARK_FUNCTION("propagate_abbs");
563
564 const int inner_size = data.m_inner_size;
565 const int leaf_size = data.m_inner_size + 1;
566 SLIC_ASSERT(leaf_size == data.m_size);
567
568 // Pointers and vars are redeclared because I have a faint memory
569 // of a huge amount of pain and suffering due so cuda
570 // labda captures of pointers indide a struct. Bad memories
571 // of random segfaults ........ be warned
572 const int32* lchildren_ptr = data.m_left_children;
573 const int32* rchildren_ptr = data.m_right_children;
574 const int32* parent_ptr = data.m_parents;
575 const primal::BoundingBox<FloatType, NDIMS>* leaf_aabb_ptr = data.m_leaf_aabbs;
576
577 primal::BoundingBox<FloatType, NDIMS>* inner_aabb_ptr = data.m_inner_aabbs;
578
579 int32* counters_ptr = axom::allocate<int32>(inner_size, allocatorID);
580
581 using BoxType = primal::BoundingBox<FloatType, NDIMS>;
582
583 array_memset<ExecSpace>(counters_ptr, inner_size, 0);
584 array_memset<ExecSpace>(inner_aabb_ptr, inner_size, BoxType {});
585
586 for_all<ExecSpace>(
587 leaf_size,
588 AXOM_LAMBDA(int32 i) {
589 primal::BoundingBox<FloatType, NDIMS> aabb = leaf_aabb_ptr[i];
590 int32 last_node = inner_size + i;
591 int32 current_node = parent_ptr[inner_size + i];
592
593 while(current_node != -1)
594 {
595 // TODO: If RAJA atomics get memory ordering policies in the future,
596 // we should look at replacing the sync_load/sync_stores by changing
597 // the below atomic to an acquire/release atomic.
598 int32 old = atomic_increment<ExecSpace>(&(counters_ptr[current_node]));
599
600 if(old == 0)
601 {
602 // first thread to get here kills itself
603 return;
604 }
605
606 int32 lchild = lchildren_ptr[current_node];
607 int32 rchild = rchildren_ptr[current_node];
608
609 int32 other_child = (lchild == last_node) ? rchild : lchild;
610
611 primal::BoundingBox<FloatType, NDIMS> other_aabb;
612 if(other_child >= inner_size)
613 {
614 other_aabb = leaf_aabb_ptr[other_child - inner_size];
615 }
616 else
617 {
618 other_aabb = sync_load<ExecSpace>(inner_aabb_ptr[other_child]);
619 }
620 aabb.addBox(other_aabb);
621
622 // Store the final AABB for this internal node coherently.
623 sync_store<ExecSpace>(inner_aabb_ptr[current_node], aabb);
624
625 last_node = current_node;
626 current_node = parent_ptr[current_node];
627 }
628 });
629
630 axom::deallocate(counters_ptr);
631 }
632
633 //------------------------------------------------------------------------------
634 template <typename ExecSpace, typename BoxIndexable, typename FloatType, int NDIMS>
build_radix_tree(const BoxIndexable boxes,int size,primal::BoundingBox<FloatType,NDIMS> & bounds,RadixTree<FloatType,NDIMS> & radix_tree,FloatType scale_factor,int allocatorID)635 void build_radix_tree(const BoxIndexable boxes,
636 int size,
637 primal::BoundingBox<FloatType, NDIMS>& bounds,
638 RadixTree<FloatType, NDIMS>& radix_tree,
639 FloatType scale_factor,
640 int allocatorID)
641 {
642 AXOM_PERF_MARK_FUNCTION("build_radix_tree");
643
644 // sanity checks
645 SLIC_ASSERT(size > 0);
646
647 radix_tree.allocate(size, allocatorID);
648
649 // copy so we don't reorder the input
650 transform_boxes<ExecSpace>(boxes, radix_tree.m_leaf_aabbs, size, scale_factor);
651
652 // evaluate global bounds
653 bounds = reduce<ExecSpace>(radix_tree.m_leaf_aabbs, size);
654
655 // sort aabbs based on morton code
656 // original positions of the sorted morton codes.
657 // allows us to gather / sort other arrays.
658 get_mcodes<ExecSpace>(radix_tree.m_leaf_aabbs, size, bounds, radix_tree.m_mcodes);
659 sort_mcodes<ExecSpace>(radix_tree.m_mcodes, size, radix_tree.m_leafs);
660
661 reorder<ExecSpace>(radix_tree.m_leafs, radix_tree.m_leaf_aabbs, size, allocatorID);
662
663 build_tree<ExecSpace>(radix_tree);
664
665 propagate_aabbs<ExecSpace>(radix_tree, allocatorID);
666 }
667
668 } /* namespace linear_bvh */
669 } /* namespace internal */
670 } /* namespace spin */
671 } /* namespace axom */
672 #endif
673