1 // Copyright 2009-2021 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 
4 #pragma once
5 
6 #include "../common/ray.h"
7 #include "cylinder.h"
8 #include "plane.h"
9 #include "line_intersector.h"
10 #include "curve_intersector_precalculations.h"
11 
12 namespace embree
13 {
14   namespace isa
15   {
16     static const size_t numJacobianIterations = 5;
17 #if defined(__AVX__)
18     static const size_t numBezierSubdivisions = 2;
19 #else
20     static const size_t numBezierSubdivisions = 3;
21 #endif
22 
23     struct BezierCurveHit
24     {
BezierCurveHitBezierCurveHit25       __forceinline BezierCurveHit() {}
26 
BezierCurveHitBezierCurveHit27       __forceinline BezierCurveHit(const float t, const float u, const Vec3fa& Ng)
28         : t(t), u(u), v(0.0f), Ng(Ng) {}
29 
BezierCurveHitBezierCurveHit30       __forceinline BezierCurveHit(const float t, const float u, const float v, const Vec3fa& Ng)
31         : t(t), u(u), v(v), Ng(Ng) {}
32 
finalizeBezierCurveHit33       __forceinline void finalize() {}
34 
35     public:
36       float t;
37       float u;
38       float v;
39       Vec3fa Ng;
40     };
41 
42     template<typename NativeCurve3ff, typename Ray, typename Epilog>
intersect_bezier_iterative_debug(const Ray & ray,const float dt,const NativeCurve3ff & curve,size_t i,const vfloatx & u,const BBox<vfloatx> & tp,const BBox<vfloatx> & h0,const BBox<vfloatx> & h1,const Vec3vfx & Ng,const Vec4vfx & dP0du,const Vec4vfx & dP3du,const Epilog & epilog)43     __forceinline bool intersect_bezier_iterative_debug(const Ray& ray, const float dt, const NativeCurve3ff& curve, size_t i,
44                                                         const vfloatx& u, const BBox<vfloatx>& tp, const BBox<vfloatx>& h0, const BBox<vfloatx>& h1,
45                                                         const Vec3vfx& Ng, const Vec4vfx& dP0du, const Vec4vfx& dP3du,
46                                                         const Epilog& epilog)
47     {
48       if (tp.lower[i]+dt > ray.tfar) return false;
49       Vec3fa Ng_o = Vec3fa(Ng.x[i],Ng.y[i],Ng.z[i]);
50       if (h0.lower[i] == tp.lower[i]) Ng_o = -Vec3fa(dP0du.x[i],dP0du.y[i],dP0du.z[i]);
51       if (h1.lower[i] == tp.lower[i]) Ng_o = +Vec3fa(dP3du.x[i],dP3du.y[i],dP3du.z[i]);
52       BezierCurveHit hit(tp.lower[i]+dt,u[i],Ng_o);
53       return epilog(hit);
54     }
55 
56     template<typename NativeCurve3ff, typename Ray, typename Epilog>
intersect_bezier_iterative_jacobian(const Ray & ray,const float dt,const NativeCurve3ff & curve,float u,float t,const Epilog & epilog)57      __forceinline bool intersect_bezier_iterative_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, float u, float t, const Epilog& epilog)
58     {
59       const Vec3fa org = zero;
60       const Vec3fa dir = ray.dir;
61       const float length_ray_dir = length(dir);
62 
63       /* error of curve evaluations is propertional to largest coordinate */
64       const BBox3ff box = curve.bounds();
65       const float P_err = 16.0f*float(ulp)*reduce_max(max(abs(box.lower),abs(box.upper)));
66 
67       for (size_t i=0; i<numJacobianIterations; i++)
68       {
69         const Vec3fa Q = madd(Vec3fa(t),dir,org);
70         //const Vec3fa dQdu = zero;
71         const Vec3fa dQdt = dir;
72         const float Q_err = 16.0f*float(ulp)*length_ray_dir*t; // works as org=zero here
73 
74         Vec3ff P,dPdu,ddPdu; curve.eval(u,P,dPdu,ddPdu);
75         //const Vec3fa dPdt = zero;
76 
77         const Vec3fa R = Q-P;
78         const float len_R = length(R); //reduce_max(abs(R));
79         const float R_err = max(Q_err,P_err);
80         const Vec3fa dRdu = /*dQdu*/-dPdu;
81         const Vec3fa dRdt = dQdt;//-dPdt;
82 
83         const Vec3fa T = normalize(dPdu);
84         const Vec3fa dTdu = dnormalize(dPdu,ddPdu);
85         //const Vec3fa dTdt = zero;
86         const float cos_err = P_err/length(dPdu);
87 
88         /* Error estimate for dot(R,T):
89 
90            dot(R,T) = cos(R,T) |R| |T|
91                     = (cos(R,T) +- cos_error) * (|R| +- |R|_err) * (|T| +- |T|_err)
92                     = cos(R,T)*|R|*|T|
93                       +- cos(R,T)*(|R|*|T|_err + |T|*|R|_err)
94                       +- cos_error*(|R| + |T|)
95                       +- lower order terms
96            with cos(R,T) being in [0,1] and |T| = 1 we get:
97              dot(R,T)_err = |R|*|T|_err + |R|_err = cos_error*(|R|+1)
98         */
99 
100         const float f = dot(R,T);
101         const float f_err = len_R*P_err + R_err + cos_err*(1.0f+len_R);
102         const float dfdu = dot(dRdu,T) + dot(R,dTdu);
103         const float dfdt = dot(dRdt,T);// + dot(R,dTdt);
104 
105         const float K = dot(R,R)-sqr(f);
106         const float dKdu = /*2.0f*/(dot(R,dRdu)-f*dfdu);
107         const float dKdt = /*2.0f*/(dot(R,dRdt)-f*dfdt);
108         const float rsqrt_K = rsqrt(K);
109 
110         const float g = sqrt(K)-P.w;
111         const float g_err = R_err + f_err + 16.0f*float(ulp)*box.upper.w;
112         const float dgdu = /*0.5f*/dKdu*rsqrt_K-dPdu.w;
113         const float dgdt = /*0.5f*/dKdt*rsqrt_K;//-dPdt.w;
114 
115         const LinearSpace2f J = LinearSpace2f(dfdu,dfdt,dgdu,dgdt);
116         const Vec2f dut = rcp(J)*Vec2f(f,g);
117         const Vec2f ut = Vec2f(u,t) - dut;
118         u = ut.x; t = ut.y;
119 
120         if (abs(f) < f_err && abs(g) < g_err)
121         {
122           t+=dt;
123           if (!(ray.tnear() <= t && t <= ray.tfar)) return false; // rejects NaNs
124           if (!(u >= 0.0f && u <= 1.0f)) return false; // rejects NaNs
125           const Vec3fa R = normalize(Q-P);
126           const Vec3fa U = madd(Vec3fa(dPdu.w),R,dPdu);
127           const Vec3fa V = cross(dPdu,R);
128           BezierCurveHit hit(t,u,cross(V,U));
129           return epilog(hit);
130         }
131       }
132       return false;
133     }
134 
135     template<typename NativeCurve3ff, typename Ray, typename Epilog>
intersect_bezier_recursive_jacobian(const Ray & ray,const float dt,const NativeCurve3ff & curve,float u0,float u1,unsigned int depth,const Epilog & epilog)136     bool intersect_bezier_recursive_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve,
137                                              float u0, float u1, unsigned int depth, const Epilog& epilog)
138     {
139 #if defined(__AVX__)
140       enum { VSIZEX_ = 8 };
141       typedef vbool8 vboolx; // maximally 8-wide to work around KNL issues
142       typedef vint8 vintx;
143       typedef vfloat8 vfloatx;
144 #else
145       enum { VSIZEX_ = 4 };
146       typedef vbool4 vboolx;
147       typedef vint4 vintx;
148       typedef vfloat4 vfloatx;
149 #endif
150       typedef Vec3<vfloatx> Vec3vfx;
151       typedef Vec4<vfloatx> Vec4vfx;
152 
153       unsigned int maxDepth = numBezierSubdivisions;
154       bool found = false;
155       const Vec3fa org = zero;
156       const Vec3fa dir = ray.dir;
157 
158       unsigned int sptr = 0;
159       const unsigned int stack_size = numBezierSubdivisions+1; // +1 because of unstable workaround below
160       struct StackEntry {
161         vboolx valid;
162         vfloatx tlower;
163         float u0;
164         float u1;
165         unsigned int depth;
166       };
167       StackEntry stack[stack_size];
168       goto entry;
169 
170        /* terminate if stack is empty */
171       while (sptr)
172       {
173         /* pop from stack */
174         {
175           sptr--;
176           vboolx valid = stack[sptr].valid;
177           const vfloatx tlower = stack[sptr].tlower;
178           valid &= tlower+dt <= ray.tfar;
179           if (none(valid)) continue;
180           u0 = stack[sptr].u0;
181           u1 = stack[sptr].u1;
182           depth = stack[sptr].depth;
183           const size_t i = select_min(valid,tlower); clear(valid,i);
184           stack[sptr].valid = valid;
185           if (any(valid)) sptr++; // there are still items on the stack
186 
187           /* process next segment */
188           const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1)));
189           u0 = vu0[i+0];
190           u1 = vu0[i+1];
191         }
192       entry:
193 
194         /* subdivide curve */
195         const float dscale = (u1-u0)*(1.0f/(3.0f*(vfloatx::size-1)));
196         const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1)));
197         Vec4vfx P0, dP0du; curve.template veval<VSIZEX_>(vu0,P0,dP0du); dP0du = dP0du * Vec4vfx(dscale);
198         const Vec4vfx P3 = shift_right_1(P0);
199         const Vec4vfx dP3du = shift_right_1(dP0du);
200         const Vec4vfx P1 = P0 + dP0du;
201         const Vec4vfx P2 = P3 - dP3du;
202 
203         /* calculate bounding cylinders */
204         const vfloatx rr1 = sqr_point_to_line_distance(Vec3vfx(dP0du),Vec3vfx(P3-P0));
205         const vfloatx rr2 = sqr_point_to_line_distance(Vec3vfx(dP3du),Vec3vfx(P3-P0));
206         const vfloatx maxr12 = sqrt(max(rr1,rr2));
207         const vfloatx one_plus_ulp  = 1.0f+2.0f*float(ulp);
208         const vfloatx one_minus_ulp = 1.0f-2.0f*float(ulp);
209         vfloatx r_outer = max(P0.w,P1.w,P2.w,P3.w)+maxr12;
210         vfloatx r_inner = min(P0.w,P1.w,P2.w,P3.w)-maxr12;
211         r_outer = one_plus_ulp*r_outer;
212         r_inner = max(0.0f,one_minus_ulp*r_inner);
213         const CylinderN<vfloatx::size> cylinder_outer(Vec3vfx(P0),Vec3vfx(P3),r_outer);
214         const CylinderN<vfloatx::size> cylinder_inner(Vec3vfx(P0),Vec3vfx(P3),r_inner);
215         vboolx valid = true; clear(valid,vfloatx::size-1);
216 
217         /* intersect with outer cylinder */
218         BBox<vfloatx> tc_outer; vfloatx u_outer0; Vec3vfx Ng_outer0; vfloatx u_outer1; Vec3vfx Ng_outer1;
219         valid &= cylinder_outer.intersect(org,dir,tc_outer,u_outer0,Ng_outer0,u_outer1,Ng_outer1);
220         if (none(valid)) continue;
221 
222         /* intersect with cap-planes */
223         BBox<vfloatx> tp(ray.tnear()-dt,ray.tfar-dt);
224         tp = embree::intersect(tp,tc_outer);
225         BBox<vfloatx> h0 = HalfPlaneN<vfloatx::size>(Vec3vfx(P0),+Vec3vfx(dP0du)).intersect(org,dir);
226         tp = embree::intersect(tp,h0);
227         BBox<vfloatx> h1 = HalfPlaneN<vfloatx::size>(Vec3vfx(P3),-Vec3vfx(dP3du)).intersect(org,dir);
228         tp = embree::intersect(tp,h1);
229         valid &= tp.lower <= tp.upper;
230         if (none(valid)) continue;
231 
232         /* clamp and correct u parameter */
233         u_outer0 = clamp(u_outer0,vfloatx(0.0f),vfloatx(1.0f));
234         u_outer1 = clamp(u_outer1,vfloatx(0.0f),vfloatx(1.0f));
235         u_outer0 = lerp(u0,u1,(vfloatx(step)+u_outer0)*(1.0f/float(vfloatx::size)));
236         u_outer1 = lerp(u0,u1,(vfloatx(step)+u_outer1)*(1.0f/float(vfloatx::size)));
237 
238         /* intersect with inner cylinder */
239         BBox<vfloatx> tc_inner;
240         vfloatx u_inner0 = zero; Vec3vfx Ng_inner0 = zero; vfloatx u_inner1 = zero; Vec3vfx Ng_inner1 = zero;
241         const vboolx valid_inner = cylinder_inner.intersect(org,dir,tc_inner,u_inner0,Ng_inner0,u_inner1,Ng_inner1);
242 
243         /* at the unstable area we subdivide deeper */
244         const vboolx unstable0 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner0)) < 0.3f);
245         const vboolx unstable1 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner1)) < 0.3f);
246 
247         /* subtract the inner interval from the current hit interval */
248         BBox<vfloatx> tp0, tp1;
249         subtract(tp,tc_inner,tp0,tp1);
250         vboolx valid0 = valid & (tp0.lower <= tp0.upper);
251         vboolx valid1 = valid & (tp1.lower <= tp1.upper);
252         if (none(valid0 | valid1)) continue;
253 
254         /* iterate over all first hits front to back */
255         const vintx termDepth0 = select(unstable0,vintx(maxDepth+1),vintx(maxDepth));
256         vboolx recursion_valid0 = valid0 & (depth < termDepth0);
257         valid0 &= depth >= termDepth0;
258 
259         while (any(valid0))
260         {
261           const size_t i = select_min(valid0,tp0.lower); clear(valid0,i);
262           found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer0[i],tp0.lower[i],epilog);
263           //found = found | intersect_bezier_iterative_debug   (ray,dt,curve,i,u_outer0,tp0,h0,h1,Ng_outer0,dP0du,dP3du,epilog);
264           valid0 &= tp0.lower+dt <= ray.tfar;
265         }
266         valid1 &= tp1.lower+dt <= ray.tfar;
267 
268         /* iterate over all second hits front to back */
269         const vintx termDepth1 = select(unstable1,vintx(maxDepth+1),vintx(maxDepth));
270         vboolx recursion_valid1 = valid1 & (depth < termDepth1);
271         valid1 &= depth >= termDepth1;
272         while (any(valid1))
273         {
274           const size_t i = select_min(valid1,tp1.lower); clear(valid1,i);
275           found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer1[i],tp1.upper[i],epilog);
276           //found = found | intersect_bezier_iterative_debug   (ray,dt,curve,i,u_outer1,tp1,h0,h1,Ng_outer1,dP0du,dP3du,epilog);
277           valid1 &= tp1.lower+dt <= ray.tfar;
278         }
279 
280         /* push valid segments to stack */
281         recursion_valid0 &= tp0.lower+dt <= ray.tfar;
282         recursion_valid1 &= tp1.lower+dt <= ray.tfar;
283         const vboolx recursion_valid = recursion_valid0 | recursion_valid1;
284         if (any(recursion_valid))
285         {
286           assert(sptr < stack_size);
287           stack[sptr].valid = recursion_valid;
288           stack[sptr].tlower = select(recursion_valid0,tp0.lower,tp1.lower);
289           stack[sptr].u0 = u0;
290           stack[sptr].u1 = u1;
291           stack[sptr].depth = depth+1;
292           sptr++;
293         }
294       }
295       return found;
296     }
297 
298     template<template<typename Ty> class NativeCurve>
299     struct SweepCurve1Intersector1
300     {
301       typedef NativeCurve<Vec3ff> NativeCurve3ff;
302 
303       template<typename Epilog>
intersectSweepCurve1Intersector1304       __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,
305                                 IntersectContext* context,
306                                 const CurveGeometry* geom, const unsigned int primID,
307                                 const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
308                                 const Epilog& epilog)
309       {
310         STAT3(normal.trav_prims,1,1,1);
311 
312         /* move ray closer to make intersection stable */
313         NativeCurve3ff curve0(v0,v1,v2,v3);
314         curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0);
315         const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir));
316         const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f);
317         const NativeCurve3ff curve1 = curve0-ref;
318         return intersect_bezier_recursive_jacobian(ray,dt,curve1,0.0f,1.0f,1,epilog);
319       }
320     };
321 
322     template<template<typename Ty> class NativeCurve, int K>
323     struct SweepCurve1IntersectorK
324     {
325       typedef NativeCurve<Vec3ff> NativeCurve3ff;
326 
327       struct Ray1
328       {
Ray1SweepCurve1IntersectorK::Ray1329         __forceinline Ray1(RayK<K>& ray, size_t k)
330           : org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {}
331 
332         Vec3fa org;
333         Vec3fa dir;
334         float _tnear;
335         float& tfar;
336 
tnearSweepCurve1IntersectorK::Ray1337         __forceinline float& tnear() { return _tnear; }
338         //__forceinline float& tfar()  { return _tfar; }
tnearSweepCurve1IntersectorK::Ray1339         __forceinline const float& tnear() const { return _tnear; }
340         //__forceinline const float& tfar()  const { return _tfar; }
341 
342       };
343 
344       template<typename Epilog>
intersectSweepCurve1IntersectorK345       __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k,
346                                    IntersectContext* context,
347                                    const CurveGeometry* geom, const unsigned int primID,
348                                    const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,
349                                    const Epilog& epilog)
350       {
351         STAT3(normal.trav_prims,1,1,1);
352         Ray1 ray(vray,k);
353 
354         /* move ray closer to make intersection stable */
355         NativeCurve3ff curve0(v0,v1,v2,v3);
356         curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0);
357         const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir));
358         const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f);
359         const NativeCurve3ff curve1 = curve0-ref;
360         return intersect_bezier_recursive_jacobian(ray,dt,curve1,0.0f,1.0f,1,epilog);
361       }
362     };
363   }
364 }
365