1 // Copyright 2009-2021 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 
4 #pragma once
5 
6 #include "grid_soa.h"
7 #include "../common/ray.h"
8 #include "triangle_intersector_pluecker.h"
9 
10 namespace embree
11 {
12   namespace isa
13   {
14     template<int K>
15       struct MapUV0
16     {
17       const float* const grid_uv;
18       size_t ofs00, ofs01, ofs10, ofs11;
19 
MapUV0MapUV020       __forceinline MapUV0(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11)
21         : grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {}
22 
operatorMapUV023       __forceinline void operator() (vfloat<K>& u, vfloat<K>& v, Vec3vf<K>& Ng) const {
24         const vfloat<K> uv00(grid_uv[ofs00]);
25         const vfloat<K> uv01(grid_uv[ofs01]);
26         const vfloat<K> uv10(grid_uv[ofs10]);
27         const vfloat<K> uv11(grid_uv[ofs11]);
28         const Vec2vf<K> uv0 = GridSOA::decodeUV(uv00);
29         const Vec2vf<K> uv1 = GridSOA::decodeUV(uv01);
30         const Vec2vf<K> uv2 = GridSOA::decodeUV(uv10);
31         const Vec2vf<K> uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0));
32         u = uv[0]; v = uv[1];
33       }
34     };
35 
36     template<int K>
37       struct MapUV1
38     {
39       const float* const grid_uv;
40       size_t ofs00, ofs01, ofs10, ofs11;
41 
MapUV1MapUV142       __forceinline MapUV1(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11)
43         : grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {}
44 
operatorMapUV145       __forceinline void operator() (vfloat<K>& u, vfloat<K>& v, Vec3vf<K>& Ng) const {
46         const vfloat<K> uv00(grid_uv[ofs00]);
47         const vfloat<K> uv01(grid_uv[ofs01]);
48         const vfloat<K> uv10(grid_uv[ofs10]);
49         const vfloat<K> uv11(grid_uv[ofs11]);
50         const Vec2vf<K> uv0 = GridSOA::decodeUV(uv10);
51         const Vec2vf<K> uv1 = GridSOA::decodeUV(uv01);
52         const Vec2vf<K> uv2 = GridSOA::decodeUV(uv11);
53         const Vec2vf<K> uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0));
54         u = uv[0]; v = uv[1];
55       }
56     };
57 
58     template<int K>
59       class GridSOAIntersectorK
60     {
61     public:
62       typedef void Primitive;
63 
64       class Precalculations
65       {
66 #if defined(__AVX__)
67         static const int M = 8;
68 #else
69         static const int M = 4;
70 #endif
71 
72       public:
Precalculations(const vbool<K> & valid,const RayK<K> & ray)73         __forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray)
74           : grid(nullptr), intersector(valid,ray) {}
75 
76       public:
77         GridSOA* grid;
78         PlueckerIntersectorK<M,K> intersector; // FIXME: use quad intersector
79       };
80 
81       /*! Intersect a ray with the primitive. */
intersect(const vbool<K> & valid_i,Precalculations & pre,RayHitK<K> & ray,IntersectContext * context,const Primitive * prim,size_t & lazy_node)82       static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
83       {
84         const size_t dim_offset    = pre.grid->dim_offset;
85         const size_t line_offset   = pre.grid->width;
86         const float* const grid_x  = pre.grid->decodeLeaf(0,prim);
87         const float* const grid_y  = grid_x + 1 * dim_offset;
88         const float* const grid_z  = grid_x + 2 * dim_offset;
89         const float* const grid_uv = grid_x + 3 * dim_offset;
90 
91         const size_t max_x = pre.grid->width  == 2 ? 1 : 2;
92         const size_t max_y = pre.grid->height == 2 ? 1 : 2;
93         for (size_t y=0; y<max_y; y++)
94         {
95           for (size_t x=0; x<max_x; x++)
96           {
97             const size_t ofs00 = (y+0)*line_offset+(x+0);
98             const size_t ofs01 = (y+0)*line_offset+(x+1);
99             const size_t ofs10 = (y+1)*line_offset+(x+0);
100             const size_t ofs11 = (y+1)*line_offset+(x+1);
101             const Vec3vf<K> p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
102             const Vec3vf<K> p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
103             const Vec3vf<K> p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
104             const Vec3vf<K> p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
105 
106             pre.intersector.intersectK(valid_i,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
107             pre.intersector.intersectK(valid_i,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
108           }
109         }
110       }
111 
112       /*! Test if the ray is occluded by the primitive */
occluded(const vbool<K> & valid_i,Precalculations & pre,RayK<K> & ray,IntersectContext * context,const Primitive * prim,size_t & lazy_node)113       static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
114       {
115         const size_t dim_offset    = pre.grid->dim_offset;
116         const size_t line_offset   = pre.grid->width;
117         const float* const grid_x  = pre.grid->decodeLeaf(0,prim);
118         const float* const grid_y  = grid_x + 1 * dim_offset;
119         const float* const grid_z  = grid_x + 2 * dim_offset;
120         const float* const grid_uv = grid_x + 3 * dim_offset;
121 
122         vbool<K> valid = valid_i;
123         const size_t max_x = pre.grid->width  == 2 ? 1 : 2;
124         const size_t max_y = pre.grid->height == 2 ? 1 : 2;
125         for (size_t y=0; y<max_y; y++)
126         {
127           for (size_t x=0; x<max_x; x++)
128           {
129             const size_t ofs00 = (y+0)*line_offset+(x+0);
130             const size_t ofs01 = (y+0)*line_offset+(x+1);
131             const size_t ofs10 = (y+1)*line_offset+(x+0);
132             const size_t ofs11 = (y+1)*line_offset+(x+1);
133             const Vec3vf<K> p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
134             const Vec3vf<K> p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
135             const Vec3vf<K> p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
136             const Vec3vf<K> p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
137 
138             pre.intersector.intersectK(valid,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));
139             if (none(valid)) break;
140             pre.intersector.intersectK(valid,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));
141             if (none(valid)) break;
142           }
143         }
144         return !valid;
145       }
146 
147       template<typename Loader>
intersect(RayHitK<K> & ray,size_t k,IntersectContext * context,const float * const grid_x,const size_t line_offset,const size_t lines,Precalculations & pre)148         static __forceinline void intersect(RayHitK<K>& ray, size_t k,
149                                             IntersectContext* context,
150                                             const float* const grid_x,
151                                             const size_t line_offset,
152                                             const size_t lines,
153                                             Precalculations& pre)
154       {
155         typedef typename Loader::vfloat vfloat;
156         const size_t dim_offset    = pre.grid->dim_offset;
157         const float* const grid_y  = grid_x + 1 * dim_offset;
158         const float* const grid_z  = grid_x + 2 * dim_offset;
159         const float* const grid_uv = grid_x + 3 * dim_offset;
160         Vec3<vfloat> v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2);
161         pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Intersect1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));
162       };
163 
164       template<typename Loader>
occluded(RayK<K> & ray,size_t k,IntersectContext * context,const float * const grid_x,const size_t line_offset,const size_t lines,Precalculations & pre)165         static __forceinline bool occluded(RayK<K>& ray, size_t k,
166                                            IntersectContext* context,
167                                            const float* const grid_x,
168                                            const size_t line_offset,
169                                            const size_t lines,
170                                            Precalculations& pre)
171       {
172         typedef typename Loader::vfloat vfloat;
173         const size_t dim_offset    = pre.grid->dim_offset;
174         const float* const grid_y  = grid_x + 1 * dim_offset;
175         const float* const grid_z  = grid_x + 2 * dim_offset;
176         const float* const grid_uv = grid_x + 3 * dim_offset;
177         Vec3<vfloat> v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2);
178         return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Occluded1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));
179       }
180 
181       /*! Intersect a ray with the primitive. */
intersect(Precalculations & pre,RayHitK<K> & ray,size_t k,IntersectContext * context,const Primitive * prim,size_t & lazy_node)182       static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
183       {
184         const size_t line_offset   = pre.grid->width;
185         const size_t lines         = pre.grid->height;
186         const float* const grid_x  = pre.grid->decodeLeaf(0,prim);
187 #if defined(__AVX__)
188         intersect<GridSOA::Gather3x3>( ray, k, context, grid_x, line_offset, lines, pre);
189 #else
190         intersect<GridSOA::Gather2x3>(ray, k, context, grid_x            , line_offset, lines, pre);
191         if (likely(lines > 2))
192           intersect<GridSOA::Gather2x3>(ray, k, context, grid_x+line_offset, line_offset, lines, pre);
193 #endif
194       }
195 
196       /*! Test if the ray is occluded by the primitive */
occluded(Precalculations & pre,RayK<K> & ray,size_t k,IntersectContext * context,const Primitive * prim,size_t & lazy_node)197       static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
198       {
199         const size_t line_offset   = pre.grid->width;
200         const size_t lines         = pre.grid->height;
201         const float* const grid_x  = pre.grid->decodeLeaf(0,prim);
202 
203 #if defined(__AVX__)
204         return occluded<GridSOA::Gather3x3>( ray, k, context, grid_x, line_offset, lines, pre);
205 #else
206         if (occluded<GridSOA::Gather2x3>(ray, k, context, grid_x            , line_offset, lines, pre)) return true;
207         if (likely(lines > 2))
208           if (occluded<GridSOA::Gather2x3>(ray, k, context, grid_x+line_offset, line_offset, lines, pre)) return true;
209 #endif
210         return false;
211       }
212     };
213 
214     template<int K>
215     class GridSOAMBIntersectorK
216     {
217     public:
218       typedef void Primitive;
219       typedef typename GridSOAIntersectorK<K>::Precalculations Precalculations;
220 
221       /*! Intersect a ray with the primitive. */
intersect(const vbool<K> & valid_i,Precalculations & pre,RayHitK<K> & ray,IntersectContext * context,const Primitive * prim,size_t & lazy_node)222       static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
223       {
224         vfloat<K> vftime;
225         vint<K> vitime = getTimeSegment<K>(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime);
226 
227         vbool<K> valid1 = valid_i;
228         while (any(valid1)) {
229           const size_t j = bsf(movemask(valid1));
230           const int itime = vitime[j];
231           const vbool<K> valid2 = valid1 & (itime == vitime);
232           valid1 = valid1 & !valid2;
233           intersect(valid2,pre,ray,vftime,itime,context,prim,lazy_node);
234         }
235       }
236 
237       /*! Intersect a ray with the primitive. */
intersect(const vbool<K> & valid_i,Precalculations & pre,RayHitK<K> & ray,const vfloat<K> & ftime,int itime,IntersectContext * context,const Primitive * prim,size_t & lazy_node)238       static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, const vfloat<K>& ftime, int itime, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
239       {
240         const size_t grid_offset   = pre.grid->gridBytes >> 2;
241         const size_t dim_offset    = pre.grid->dim_offset;
242         const size_t line_offset   = pre.grid->width;
243         const float* const grid_x  = pre.grid->decodeLeaf(itime,prim);
244         const float* const grid_y  = grid_x + 1 * dim_offset;
245         const float* const grid_z  = grid_x + 2 * dim_offset;
246         const float* const grid_uv = grid_x + 3 * dim_offset;
247 
248         const size_t max_x = pre.grid->width  == 2 ? 1 : 2;
249         const size_t max_y = pre.grid->height == 2 ? 1 : 2;
250         for (size_t y=0; y<max_y; y++)
251         {
252           for (size_t x=0; x<max_x; x++)
253           {
254             size_t ofs00 = (y+0)*line_offset+(x+0);
255             size_t ofs01 = (y+0)*line_offset+(x+1);
256             size_t ofs10 = (y+1)*line_offset+(x+0);
257             size_t ofs11 = (y+1)*line_offset+(x+1);
258             const Vec3vf<K> a00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
259             const Vec3vf<K> a01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
260             const Vec3vf<K> a10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
261             const Vec3vf<K> a11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
262             ofs00 += grid_offset;
263             ofs01 += grid_offset;
264             ofs10 += grid_offset;
265             ofs11 += grid_offset;
266             const Vec3vf<K> b00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
267             const Vec3vf<K> b01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
268             const Vec3vf<K> b10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
269             const Vec3vf<K> b11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
270             const Vec3vf<K> p00 = lerp(a00,b00,ftime);
271             const Vec3vf<K> p01 = lerp(a01,b01,ftime);
272             const Vec3vf<K> p10 = lerp(a10,b10,ftime);
273             const Vec3vf<K> p11 = lerp(a11,b11,ftime);
274 
275             pre.intersector.intersectK(valid_i,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
276             pre.intersector.intersectK(valid_i,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));
277           }
278         }
279       }
280 
281       /*! Test if the ray is occluded by the primitive */
occluded(const vbool<K> & valid_i,Precalculations & pre,RayK<K> & ray,IntersectContext * context,const Primitive * prim,size_t & lazy_node)282       static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
283       {
284         vfloat<K> vftime;
285         vint<K> vitime = getTimeSegment<K>(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime);
286 
287         vbool<K> valid_o = valid_i;
288         vbool<K> valid1 = valid_i;
289         while (any(valid1)) {
290           const int j = int(bsf(movemask(valid1)));
291           const int itime = vitime[j];
292           const vbool<K> valid2 = valid1 & (itime == vitime);
293           valid1 = valid1 & !valid2;
294           valid_o &= !valid2 | occluded(valid2,pre,ray,vftime,itime,context,prim,lazy_node);
295         }
296         return !valid_o;
297       }
298 
299       /*! Test if the ray is occluded by the primitive */
occluded(const vbool<K> & valid_i,Precalculations & pre,RayK<K> & ray,const vfloat<K> & ftime,int itime,IntersectContext * context,const Primitive * prim,size_t & lazy_node)300       static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, const vfloat<K>& ftime, int itime, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
301       {
302         const size_t grid_offset   = pre.grid->gridBytes >> 2;
303         const size_t dim_offset    = pre.grid->dim_offset;
304         const size_t line_offset   = pre.grid->width;
305         const float* const grid_x  = pre.grid->decodeLeaf(itime,prim);
306         const float* const grid_y  = grid_x + 1 * dim_offset;
307         const float* const grid_z  = grid_x + 2 * dim_offset;
308         const float* const grid_uv = grid_x + 3 * dim_offset;
309 
310         vbool<K> valid = valid_i;
311         const size_t max_x = pre.grid->width  == 2 ? 1 : 2;
312         const size_t max_y = pre.grid->height == 2 ? 1 : 2;
313         for (size_t y=0; y<max_y; y++)
314         {
315           for (size_t x=0; x<max_x; x++)
316           {
317             size_t ofs00 = (y+0)*line_offset+(x+0);
318             size_t ofs01 = (y+0)*line_offset+(x+1);
319             size_t ofs10 = (y+1)*line_offset+(x+0);
320             size_t ofs11 = (y+1)*line_offset+(x+1);
321             const Vec3vf<K> a00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
322             const Vec3vf<K> a01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
323             const Vec3vf<K> a10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
324             const Vec3vf<K> a11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
325             ofs00 += grid_offset;
326             ofs01 += grid_offset;
327             ofs10 += grid_offset;
328             ofs11 += grid_offset;
329             const Vec3vf<K> b00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);
330             const Vec3vf<K> b01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);
331             const Vec3vf<K> b10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);
332             const Vec3vf<K> b11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);
333             const Vec3vf<K> p00 = lerp(a00,b00,ftime);
334             const Vec3vf<K> p01 = lerp(a01,b01,ftime);
335             const Vec3vf<K> p10 = lerp(a10,b10,ftime);
336             const Vec3vf<K> p11 = lerp(a11,b11,ftime);
337 
338             pre.intersector.intersectK(valid,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));
339             if (none(valid)) break;
340             pre.intersector.intersectK(valid,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));
341             if (none(valid)) break;
342           }
343         }
344         return valid;
345       }
346 
347       template<typename Loader>
intersect(RayHitK<K> & ray,size_t k,const float ftime,IntersectContext * context,const float * const grid_x,const size_t line_offset,const size_t lines,Precalculations & pre)348         static __forceinline void intersect(RayHitK<K>& ray, size_t k,
349                                             const float ftime,
350                                             IntersectContext* context,
351                                             const float* const grid_x,
352                                             const size_t line_offset,
353                                             const size_t lines,
354                                             Precalculations& pre)
355       {
356         typedef typename Loader::vfloat vfloat;
357         const size_t grid_offset   = pre.grid->gridBytes >> 2;
358         const size_t dim_offset    = pre.grid->dim_offset;
359         const float* const grid_y  = grid_x + 1 * dim_offset;
360         const float* const grid_z  = grid_x + 2 * dim_offset;
361         const float* const grid_uv = grid_x + 3 * dim_offset;
362 
363         Vec3<vfloat> a0, a1, a2;
364         Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2);
365 
366         Vec3<vfloat> b0, b1, b2;
367         Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2);
368 
369         Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime));
370         Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime));
371         Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime));
372 
373         pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Intersect1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));
374       };
375 
376       template<typename Loader>
occluded(RayK<K> & ray,size_t k,const float ftime,IntersectContext * context,const float * const grid_x,const size_t line_offset,const size_t lines,Precalculations & pre)377         static __forceinline bool occluded(RayK<K>& ray, size_t k,
378                                            const float ftime,
379                                            IntersectContext* context,
380                                            const float* const grid_x,
381                                            const size_t line_offset,
382                                            const size_t lines,
383                                            Precalculations& pre)
384       {
385         typedef typename Loader::vfloat vfloat;
386         const size_t grid_offset   = pre.grid->gridBytes >> 2;
387         const size_t dim_offset    = pre.grid->dim_offset;
388         const float* const grid_y  = grid_x + 1 * dim_offset;
389         const float* const grid_z  = grid_x + 2 * dim_offset;
390         const float* const grid_uv = grid_x + 3 * dim_offset;
391 
392         Vec3<vfloat> a0, a1, a2;
393         Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2);
394 
395         Vec3<vfloat> b0, b1, b2;
396         Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2);
397 
398         Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime));
399         Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime));
400         Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime));
401 
402         return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Occluded1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));
403       }
404 
405       /*! Intersect a ray with the primitive. */
intersect(Precalculations & pre,RayHitK<K> & ray,size_t k,IntersectContext * context,const Primitive * prim,size_t & lazy_node)406       static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
407       {
408         float ftime;
409         int itime = getTimeSegment(ray.time()[k], float(pre.grid->time_steps-1), ftime);
410 
411         const size_t line_offset   = pre.grid->width;
412         const size_t lines         = pre.grid->height;
413         const float* const grid_x  = pre.grid->decodeLeaf(itime,prim);
414 
415 #if defined(__AVX__)
416         intersect<GridSOA::Gather3x3>( ray, k, ftime, context, grid_x, line_offset, lines, pre);
417 #else
418         intersect<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x, line_offset, lines, pre);
419         if (likely(lines > 2))
420           intersect<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre);
421 #endif
422       }
423 
424       /*! Test if the ray is occluded by the primitive */
occluded(Precalculations & pre,RayK<K> & ray,size_t k,IntersectContext * context,const Primitive * prim,size_t & lazy_node)425       static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t& lazy_node)
426       {
427         float ftime;
428         int itime = getTimeSegment(ray.time()[k], float(pre.grid->time_steps-1), ftime);
429 
430         const size_t line_offset   = pre.grid->width;
431         const size_t lines         = pre.grid->height;
432         const float* const grid_x  = pre.grid->decodeLeaf(itime,prim);
433 
434 #if defined(__AVX__)
435         return occluded<GridSOA::Gather3x3>( ray, k, ftime, context, grid_x, line_offset, lines, pre);
436 #else
437         if (occluded<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x, line_offset, lines, pre)) return true;
438         if (likely(lines > 2))
439           if (occluded<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre)) return true;
440 #endif
441         return false;
442       }
443     };
444   }
445 }
446