1 // Copyright 2009-2021 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 
4 #include "bvh_intersector_stream.h"
5 
6 #include "../geometry/intersector_iterators.h"
7 #include "../geometry/triangle_intersector.h"
8 #include "../geometry/trianglev_intersector.h"
9 #include "../geometry/trianglev_mb_intersector.h"
10 #include "../geometry/trianglei_intersector.h"
11 #include "../geometry/quadv_intersector.h"
12 #include "../geometry/quadi_intersector.h"
13 #include "../geometry/linei_intersector.h"
14 #include "../geometry/subdivpatch1_intersector.h"
15 #include "../geometry/object_intersector.h"
16 #include "../geometry/instance_intersector.h"
17 
18 #include "../common/scene.h"
19 #include <bitset>
20 
21 namespace embree
22 {
23   namespace isa
24   {
25     __aligned(64) static const int shiftTable[32] = {
26       (int)1 << 0, (int)1 << 1, (int)1 << 2, (int)1 << 3, (int)1 << 4, (int)1 << 5, (int)1 << 6, (int)1 << 7,
27       (int)1 << 8, (int)1 << 9, (int)1 << 10, (int)1 << 11, (int)1 << 12, (int)1 << 13, (int)1 << 14, (int)1 << 15,
28       (int)1 << 16, (int)1 << 17, (int)1 << 18, (int)1 << 19, (int)1 << 20, (int)1 << 21, (int)1 << 22, (int)1 << 23,
29       (int)1 << 24, (int)1 << 25, (int)1 << 26, (int)1 << 27, (int)1 << 28, (int)1 << 29, (int)1 << 30, (int)1 << 31
30     };
31 
32     template<int N, int types, bool robust, typename PrimitiveIntersector>
intersect(Accel::Intersectors * __restrict__ This,RayHitN ** inputPackets,size_t numOctantRays,IntersectContext * context)33     __forceinline void BVHNIntersectorStream<N, types, robust, PrimitiveIntersector>::intersect(Accel::Intersectors* __restrict__ This,
34                                                                                                     RayHitN** inputPackets,
35                                                                                                     size_t numOctantRays,
36                                                                                                     IntersectContext* context)
37     {
38       /* we may traverse an empty BVH in case all geometry was invalid */
39       BVH* __restrict__ bvh = (BVH*) This->ptr;
40       if (bvh->root == BVH::emptyNode)
41         return;
42 
43       // Only the coherent code path is implemented
44       assert(context->isCoherent());
45       intersectCoherent(This, (RayHitK<VSIZEL>**)inputPackets, numOctantRays, context);
46     }
47 
48     template<int N, int types, bool robust, typename PrimitiveIntersector>
49     template<int K>
intersectCoherent(Accel::Intersectors * __restrict__ This,RayHitK<K> ** inputPackets,size_t numOctantRays,IntersectContext * context)50     __forceinline void BVHNIntersectorStream<N, types, robust, PrimitiveIntersector>::intersectCoherent(Accel::Intersectors* __restrict__ This,
51                                                                                                             RayHitK<K>** inputPackets,
52                                                                                                             size_t numOctantRays,
53                                                                                                             IntersectContext* context)
54     {
55       assert(context->isCoherent());
56 
57       BVH* __restrict__ bvh = (BVH*) This->ptr;
58       __aligned(64) StackItemMaskCoherent stack[stackSizeSingle];  // stack of nodes
59       assert(numOctantRays <= MAX_INTERNAL_STREAM_SIZE);
60 
61       __aligned(64) TravRayKStream<K, robust> packets[MAX_INTERNAL_STREAM_SIZE/K];
62       __aligned(64) Frustum<robust> frustum;
63 
64       bool commonOctant = true;
65       const size_t m_active = initPacketsAndFrustum((RayK<K>**)inputPackets, numOctantRays, packets, frustum, commonOctant);
66       if (unlikely(m_active == 0)) return;
67 
68       /* case of non-common origin */
69       if (unlikely(!commonOctant))
70       {
71         const size_t numPackets = (numOctantRays+K-1)/K;
72         for (size_t i = 0; i < numPackets; i++)
73           This->intersect(inputPackets[i]->tnear() <= inputPackets[i]->tfar, *inputPackets[i], context);
74         return;
75       }
76 
77       stack[0].mask   = m_active;
78       stack[0].parent = 0;
79       stack[0].child  = bvh->root;
80 
81       ///////////////////////////////////////////////////////////////////////////////////
82       ///////////////////////////////////////////////////////////////////////////////////
83       ///////////////////////////////////////////////////////////////////////////////////
84 
85       StackItemMaskCoherent* stackPtr = stack + 1;
86 
87       while (1) pop:
88       {
89         if (unlikely(stackPtr == stack)) break;
90 
91         STAT3(normal.trav_stack_pop,1,1,1);
92         stackPtr--;
93         /*! pop next node */
94         NodeRef cur = NodeRef(stackPtr->child);
95         size_t m_trav_active = stackPtr->mask;
96         assert(m_trav_active);
97         NodeRef parent = stackPtr->parent;
98 
99         while (1)
100         {
101           if (unlikely(cur.isLeaf())) break;
102           const AABBNode* __restrict__ const node = cur.getAABBNode();
103           parent = cur;
104 
105           __aligned(64) size_t maskK[N];
106           for (size_t i = 0; i < N; i++)
107             maskK[i] = m_trav_active;
108           vfloat<N> dist;
109           const size_t m_node_hit = traverseCoherentStream(m_trav_active, packets, node, frustum, maskK, dist);
110           if (unlikely(m_node_hit == 0)) goto pop;
111 
112           BVHNNodeTraverserStreamHitCoherent<N, types>::traverseClosestHit(cur, m_trav_active, vbool<N>((int)m_node_hit), dist, (size_t*)maskK, stackPtr);
113           assert(m_trav_active);
114         }
115 
116         /* non-root and leaf => full culling test for all rays */
117         if (unlikely(parent != 0 && cur.isLeaf()))
118         {
119           const AABBNode* __restrict__ const node = parent.getAABBNode();
120           size_t boxID = 0xff;
121           for (size_t i = 0; i < N; i++)
122             if (node->child(i) == cur) { boxID = i; break; }
123           assert(boxID < N);
124           assert(cur == node->child(boxID));
125           m_trav_active = intersectAABBNodePacket(m_trav_active, packets, node, boxID, frustum.nf);
126         }
127 
128         /*! this is a leaf node */
129         assert(cur != BVH::emptyNode);
130         STAT3(normal.trav_leaves, 1, 1, 1);
131         size_t num; PrimitiveK<K>* prim = (PrimitiveK<K>*)cur.leaf(num);
132 
133         size_t bits = m_trav_active;
134 
135         /*! intersect stream of rays with all primitives */
136         size_t lazy_node = 0;
137 #if defined(__SSE4_2__)
138         STAT_USER(1,(popcnt(bits)+K-1)/K*4);
139 #endif
140         while(bits)
141         {
142           size_t i = bsf(bits) / K;
143           const size_t m_isec = ((((size_t)1 << K)-1) << (i*K));
144           assert(m_isec & bits);
145           bits &= ~m_isec;
146 
147           TravRayKStream<K, robust>& p = packets[i];
148           vbool<K> m_valid = p.tnear <= p.tfar;
149           PrimitiveIntersectorK<K>::intersectK(m_valid, This, *inputPackets[i], context, prim, num, lazy_node);
150           p.tfar = min(p.tfar, inputPackets[i]->tfar);
151         };
152 
153       } // traversal + intersection
154     }
155 
156     template<int N, int types, bool robust, typename PrimitiveIntersector>
occluded(Accel::Intersectors * __restrict__ This,RayN ** inputPackets,size_t numOctantRays,IntersectContext * context)157     __forceinline void BVHNIntersectorStream<N, types, robust, PrimitiveIntersector>::occluded(Accel::Intersectors* __restrict__ This,
158                                                                                                    RayN** inputPackets,
159                                                                                                    size_t numOctantRays,
160                                                                                                    IntersectContext* context)
161     {
162       /* we may traverse an empty BVH in case all geometry was invalid */
163       BVH* __restrict__ bvh = (BVH*) This->ptr;
164       if (bvh->root == BVH::emptyNode)
165         return;
166 
167       if (unlikely(context->isCoherent()))
168         occludedCoherent(This, (RayK<VSIZEL>**)inputPackets, numOctantRays, context);
169       else
170         occludedIncoherent(This, (RayK<VSIZEX>**)inputPackets, numOctantRays, context);
171     }
172 
173     template<int N, int types, bool robust, typename PrimitiveIntersector>
174     template<int K>
occludedCoherent(Accel::Intersectors * __restrict__ This,RayK<K> ** inputPackets,size_t numOctantRays,IntersectContext * context)175     __noinline void BVHNIntersectorStream<N, types, robust, PrimitiveIntersector>::occludedCoherent(Accel::Intersectors* __restrict__ This,
176                                                                                                         RayK<K>** inputPackets,
177                                                                                                         size_t numOctantRays,
178                                                                                                         IntersectContext* context)
179     {
180       assert(context->isCoherent());
181 
182       BVH* __restrict__ bvh = (BVH*)This->ptr;
183       __aligned(64) StackItemMaskCoherent stack[stackSizeSingle];  // stack of nodes
184       assert(numOctantRays <= MAX_INTERNAL_STREAM_SIZE);
185 
186       /* inactive rays should have been filtered out before */
187       __aligned(64) TravRayKStream<K, robust> packets[MAX_INTERNAL_STREAM_SIZE/K];
188       __aligned(64) Frustum<robust> frustum;
189 
190       bool commonOctant = true;
191       size_t m_active = initPacketsAndFrustum(inputPackets, numOctantRays, packets, frustum, commonOctant);
192 
193       /* valid rays */
194       if (unlikely(m_active == 0)) return;
195 
196       /* case of non-common origin */
197       if (unlikely(!commonOctant))
198       {
199         const size_t numPackets = (numOctantRays+K-1)/K;
200         for (size_t i = 0; i < numPackets; i++)
201           This->occluded(inputPackets[i]->tnear() <= inputPackets[i]->tfar, *inputPackets[i], context);
202         return;
203       }
204 
205       stack[0].mask   = m_active;
206       stack[0].parent = 0;
207       stack[0].child  = bvh->root;
208 
209       ///////////////////////////////////////////////////////////////////////////////////
210       ///////////////////////////////////////////////////////////////////////////////////
211       ///////////////////////////////////////////////////////////////////////////////////
212 
213       StackItemMaskCoherent* stackPtr = stack + 1;
214 
215       while (1) pop:
216       {
217         if (unlikely(stackPtr == stack)) break;
218 
219         STAT3(normal.trav_stack_pop,1,1,1);
220         stackPtr--;
221         /*! pop next node */
222         NodeRef cur = NodeRef(stackPtr->child);
223         size_t m_trav_active = stackPtr->mask & m_active;
224         if (unlikely(!m_trav_active)) continue;
225         assert(m_trav_active);
226         NodeRef parent = stackPtr->parent;
227 
228         while (1)
229         {
230           if (unlikely(cur.isLeaf())) break;
231           const AABBNode* __restrict__ const node = cur.getAABBNode();
232           parent = cur;
233 
234           __aligned(64) size_t maskK[N];
235           for (size_t i = 0; i < N; i++)
236             maskK[i] = m_trav_active;
237 
238           vfloat<N> dist;
239           const size_t m_node_hit = traverseCoherentStream(m_trav_active, packets, node, frustum, maskK, dist);
240           if (unlikely(m_node_hit == 0)) goto pop;
241 
242           BVHNNodeTraverserStreamHitCoherent<N, types>::traverseAnyHit(cur, m_trav_active, vbool<N>((int)m_node_hit), (size_t*)maskK, stackPtr);
243           assert(m_trav_active);
244         }
245 
246         /* non-root and leaf => full culling test for all rays */
247         if (unlikely(parent != 0 && cur.isLeaf()))
248         {
249           const AABBNode* __restrict__ const node = parent.getAABBNode();
250           size_t boxID = 0xff;
251           for (size_t i = 0; i < N; i++)
252             if (node->child(i) == cur) { boxID = i; break; }
253           assert(boxID < N);
254           assert(cur == node->child(boxID));
255           m_trav_active = intersectAABBNodePacket(m_trav_active, packets, node, boxID, frustum.nf);
256         }
257 
258         /*! this is a leaf node */
259         assert(cur != BVH::emptyNode);
260         STAT3(normal.trav_leaves, 1, 1, 1);
261         size_t num; PrimitiveK<K>* prim = (PrimitiveK<K>*)cur.leaf(num);
262 
263         size_t bits = m_trav_active & m_active;
264         /*! intersect stream of rays with all primitives */
265         size_t lazy_node = 0;
266 #if defined(__SSE4_2__)
267         STAT_USER(1,(popcnt(bits)+K-1)/K*4);
268 #endif
269         while (bits)
270         {
271           size_t i = bsf(bits) / K;
272           const size_t m_isec = ((((size_t)1 << K)-1) << (i*K));
273           assert(m_isec & bits);
274           bits &= ~m_isec;
275           TravRayKStream<K, robust>& p = packets[i];
276           vbool<K> m_valid = p.tnear <= p.tfar;
277           vbool<K> m_hit = PrimitiveIntersectorK<K>::occludedK(m_valid, This, *inputPackets[i], context, prim, num, lazy_node);
278           inputPackets[i]->tfar = select(m_hit & m_valid, vfloat<K>(neg_inf), inputPackets[i]->tfar);
279           m_active &= ~((size_t)movemask(m_hit) << (i*K));
280         }
281 
282       } // traversal + intersection
283     }
284 
285 
286     template<int N, int types, bool robust, typename PrimitiveIntersector>
287     template<int K>
occludedIncoherent(Accel::Intersectors * __restrict__ This,RayK<K> ** inputPackets,size_t numOctantRays,IntersectContext * context)288     __forceinline void BVHNIntersectorStream<N, types, robust, PrimitiveIntersector>::occludedIncoherent(Accel::Intersectors* __restrict__ This,
289                                                                                                              RayK<K>** inputPackets,
290                                                                                                              size_t numOctantRays,
291                                                                                                              IntersectContext* context)
292     {
293       assert(!context->isCoherent());
294       assert(types & BVH_FLAG_ALIGNED_NODE);
295 
296       __aligned(64) TravRayKStream<K,robust> packet[MAX_INTERNAL_STREAM_SIZE/K];
297 
298       assert(numOctantRays <= 32);
299       const size_t numPackets = (numOctantRays+K-1)/K;
300       size_t m_active = 0;
301       for (size_t i = 0; i < numPackets; i++)
302       {
303         const vfloat<K> tnear = inputPackets[i]->tnear();
304         const vfloat<K> tfar  = inputPackets[i]->tfar;
305         vbool<K> m_valid = (tnear <= tfar) & (tnear >= 0.0f);
306         m_active |= (size_t)movemask(m_valid) << (K*i);
307         const Vec3vf<K>& org = inputPackets[i]->org;
308         const Vec3vf<K>& dir = inputPackets[i]->dir;
309         vfloat<K> packet_min_dist = max(tnear, 0.0f);
310         vfloat<K> packet_max_dist = select(m_valid, tfar, neg_inf);
311         new (&packet[i]) TravRayKStream<K,robust>(org, dir, packet_min_dist, packet_max_dist);
312       }
313 
314       BVH* __restrict__ bvh = (BVH*)This->ptr;
315 
316       StackItemMaskT<NodeRef> stack[stackSizeSingle]; // stack of nodes
317       StackItemMaskT<NodeRef>* stackPtr = stack + 1;  // current stack pointer
318       stack[0].ptr = bvh->root;
319       stack[0].mask = m_active;
320 
321       size_t terminated = ~m_active;
322 
323       /* near/far offsets based on first ray */
324       const NearFarPrecalculations nf(Vec3fa(packet[0].rdir.x[0], packet[0].rdir.y[0], packet[0].rdir.z[0]), N);
325 
326       while (1) pop:
327       {
328         if (unlikely(stackPtr == stack)) break;
329         STAT3(shadow.trav_stack_pop,1,1,1);
330         stackPtr--;
331         NodeRef cur = NodeRef(stackPtr->ptr);
332         size_t cur_mask = stackPtr->mask & (~terminated);
333         if (unlikely(cur_mask == 0)) continue;
334 
335         while (true)
336         {
337           /*! stop if we found a leaf node */
338           if (unlikely(cur.isLeaf())) break;
339           const AABBNode* __restrict__ const node = cur.getAABBNode();
340 
341           const vint<N> vmask = traverseIncoherentStream(cur_mask, packet, node, nf, shiftTable);
342 
343           size_t mask = movemask(vmask != vint<N>(zero));
344           if (unlikely(mask == 0)) goto pop;
345 
346           __aligned(64) unsigned int child_mask[N];
347           vint<N>::storeu(child_mask, vmask); // this explicit store here causes much better code generation
348 
349           /*! one child is hit, continue with that child */
350           size_t r = bscf(mask);
351           assert(r < N);
352           cur = node->child(r);
353           BVHN<N>::prefetch(cur,types);
354           cur_mask = child_mask[r];
355 
356           /* simple in order sequence */
357           assert(cur != BVH::emptyNode);
358           if (likely(mask == 0)) continue;
359           stackPtr->ptr  = cur;
360           stackPtr->mask = cur_mask;
361           stackPtr++;
362 
363           for (; ;)
364           {
365             r = bscf(mask);
366             assert(r < N);
367 
368             cur = node->child(r);
369             BVHN<N>::prefetch(cur,types);
370             cur_mask = child_mask[r];
371             assert(cur != BVH::emptyNode);
372             if (likely(mask == 0)) break;
373             stackPtr->ptr  = cur;
374             stackPtr->mask = cur_mask;
375             stackPtr++;
376           }
377         }
378 
379         /*! this is a leaf node */
380         assert(cur != BVH::emptyNode);
381         STAT3(shadow.trav_leaves,1,1,1);
382         size_t num; PrimitiveK<K>* prim = (PrimitiveK<K>*)cur.leaf(num);
383 
384         size_t bits = cur_mask;
385         size_t lazy_node = 0;
386 
387         for (; bits != 0;)
388         {
389           const size_t rayID = bscf(bits);
390 
391           RayK<K> &ray = *inputPackets[rayID / K];
392           const size_t k = rayID % K;
393           if (PrimitiveIntersectorK<K>::occluded(This, ray, k, context, prim, num, lazy_node))
394           {
395             ray.tfar[k] = neg_inf;
396             terminated |= (size_t)1 << rayID;
397           }
398 
399           /* lazy node */
400           if (unlikely(lazy_node))
401           {
402             stackPtr->ptr = lazy_node;
403             stackPtr->mask = cur_mask;
404             stackPtr++;
405           }
406         }
407 
408         if (unlikely(terminated == (size_t)-1)) break;
409       }
410     }
411 
412     ////////////////////////////////////////////////////////////////////////////////
413     /// ArrayIntersectorKStream Definitions
414     ////////////////////////////////////////////////////////////////////////////////
415 
416     template<bool filter>
417     struct Triangle4IntersectorStreamMoeller {
418       template<int K> using Type = ArrayIntersectorKStream<K,TriangleMIntersectorKMoeller<4 COMMA K COMMA true>>;
419     };
420 
421     template<bool filter>
422     struct Triangle4vIntersectorStreamPluecker {
423       template<int K> using Type = ArrayIntersectorKStream<K,TriangleMvIntersectorKPluecker<4 COMMA K COMMA true>>;
424     };
425 
426     template<bool filter>
427     struct Triangle4iIntersectorStreamMoeller {
428       template<int K> using Type = ArrayIntersectorKStream<K,TriangleMiIntersectorKMoeller<4 COMMA K COMMA true>>;
429     };
430 
431     template<bool filter>
432     struct Triangle4iIntersectorStreamPluecker {
433       template<int K> using Type = ArrayIntersectorKStream<K,TriangleMiIntersectorKPluecker<4 COMMA K COMMA true>>;
434     };
435 
436     template<bool filter>
437     struct Quad4vIntersectorStreamMoeller {
438       template<int K> using Type = ArrayIntersectorKStream<K,QuadMvIntersectorKMoeller<4 COMMA K COMMA true>>;
439     };
440 
441     template<bool filter>
442     struct Quad4iIntersectorStreamMoeller {
443       template<int K> using Type = ArrayIntersectorKStream<K,QuadMiIntersectorKMoeller<4 COMMA K COMMA true>>;
444     };
445 
446     template<bool filter>
447     struct Quad4vIntersectorStreamPluecker {
448       template<int K> using Type = ArrayIntersectorKStream<K,QuadMvIntersectorKPluecker<4 COMMA K COMMA true>>;
449     };
450 
451     template<bool filter>
452     struct Quad4iIntersectorStreamPluecker {
453       template<int K> using Type = ArrayIntersectorKStream<K,QuadMiIntersectorKPluecker<4 COMMA K COMMA true>>;
454     };
455 
456     struct ObjectIntersectorStream {
457       template<int K> using Type = ArrayIntersectorKStream<K,ObjectIntersectorK<K COMMA false>>;
458     };
459 
460     struct InstanceIntersectorStream {
461       template<int K> using Type = ArrayIntersectorKStream<K,InstanceIntersectorK<K>>;
462     };
463 
464     // =====================================================================================================
465     // =====================================================================================================
466     // =====================================================================================================
467 
468     template<int N>
intersect(Accel::Intersectors * __restrict__ This,RayHitN ** inputRays,size_t numTotalRays,IntersectContext * context)469     void BVHNIntersectorStreamPacketFallback<N>::intersect(Accel::Intersectors* __restrict__ This,
470                                                                RayHitN** inputRays,
471                                                                size_t numTotalRays,
472                                                                IntersectContext* context)
473     {
474       if (unlikely(context->isCoherent()))
475         intersectK(This, (RayHitK<VSIZEL>**)inputRays, numTotalRays, context);
476       else
477         intersectK(This, (RayHitK<VSIZEX>**)inputRays, numTotalRays, context);
478     }
479 
480     template<int N>
occluded(Accel::Intersectors * __restrict__ This,RayN ** inputRays,size_t numTotalRays,IntersectContext * context)481     void BVHNIntersectorStreamPacketFallback<N>::occluded(Accel::Intersectors* __restrict__ This,
482                                                               RayN** inputRays,
483                                                               size_t numTotalRays,
484                                                               IntersectContext* context)
485     {
486       if (unlikely(context->isCoherent()))
487         occludedK(This, (RayK<VSIZEL>**)inputRays, numTotalRays, context);
488       else
489         occludedK(This, (RayK<VSIZEX>**)inputRays, numTotalRays, context);
490     }
491 
492     template<int N>
493     template<int K>
intersectK(Accel::Intersectors * __restrict__ This,RayHitK<K> ** inputRays,size_t numTotalRays,IntersectContext * context)494     __noinline void BVHNIntersectorStreamPacketFallback<N>::intersectK(Accel::Intersectors* __restrict__ This,
495                                                                               RayHitK<K>** inputRays,
496                                                                               size_t numTotalRays,
497                                                                               IntersectContext* context)
498     {
499       /* fallback to packets */
500       for (size_t i = 0; i < numTotalRays; i += K)
501       {
502         const vint<K> vi = vint<K>(int(i)) + vint<K>(step);
503         vbool<K> valid = vi < vint<K>(int(numTotalRays));
504         RayHitK<K>& ray = *(inputRays[i / K]);
505         valid &= ray.tnear() <= ray.tfar;
506         This->intersect(valid, ray, context);
507       }
508     }
509 
510     template<int N>
511     template<int K>
occludedK(Accel::Intersectors * __restrict__ This,RayK<K> ** inputRays,size_t numTotalRays,IntersectContext * context)512     __noinline void BVHNIntersectorStreamPacketFallback<N>::occludedK(Accel::Intersectors* __restrict__ This,
513                                                                              RayK<K>** inputRays,
514                                                                              size_t numTotalRays,
515                                                                              IntersectContext* context)
516     {
517       /* fallback to packets */
518       for (size_t i = 0; i < numTotalRays; i += K)
519       {
520         const vint<K> vi = vint<K>(int(i)) + vint<K>(step);
521         vbool<K> valid = vi < vint<K>(int(numTotalRays));
522         RayK<K>& ray = *(inputRays[i / K]);
523         valid &= ray.tnear() <= ray.tfar;
524         This->occluded(valid, ray, context);
525       }
526     }
527   }
528 }
529