1 // Copyright 2009-2021 Intel Corporation 2 // SPDX-License-Identifier: Apache-2.0 3 4 #pragma once 5 6 #include "../common/ray.h" 7 #include "cylinder.h" 8 #include "plane.h" 9 #include "line_intersector.h" 10 #include "curve_intersector_precalculations.h" 11 12 namespace embree 13 { 14 namespace isa 15 { 16 static const size_t numJacobianIterations = 5; 17 #if defined(__AVX__) 18 static const size_t numBezierSubdivisions = 2; 19 #else 20 static const size_t numBezierSubdivisions = 3; 21 #endif 22 23 struct BezierCurveHit 24 { BezierCurveHitBezierCurveHit25 __forceinline BezierCurveHit() {} 26 BezierCurveHitBezierCurveHit27 __forceinline BezierCurveHit(const float t, const float u, const Vec3fa& Ng) 28 : t(t), u(u), v(0.0f), Ng(Ng) {} 29 BezierCurveHitBezierCurveHit30 __forceinline BezierCurveHit(const float t, const float u, const float v, const Vec3fa& Ng) 31 : t(t), u(u), v(v), Ng(Ng) {} 32 finalizeBezierCurveHit33 __forceinline void finalize() {} 34 35 public: 36 float t; 37 float u; 38 float v; 39 Vec3fa Ng; 40 }; 41 42 template<typename NativeCurve3ff, typename Ray, typename Epilog> intersect_bezier_iterative_debug(const Ray & ray,const float dt,const NativeCurve3ff & curve,size_t i,const vfloatx & u,const BBox<vfloatx> & tp,const BBox<vfloatx> & h0,const BBox<vfloatx> & h1,const Vec3vfx & Ng,const Vec4vfx & dP0du,const Vec4vfx & dP3du,const Epilog & epilog)43 __forceinline bool intersect_bezier_iterative_debug(const Ray& ray, const float dt, const NativeCurve3ff& curve, size_t i, 44 const vfloatx& u, const BBox<vfloatx>& tp, const BBox<vfloatx>& h0, const BBox<vfloatx>& h1, 45 const Vec3vfx& Ng, const Vec4vfx& dP0du, const Vec4vfx& dP3du, 46 const Epilog& epilog) 47 { 48 if (tp.lower[i]+dt > ray.tfar) return false; 49 Vec3fa Ng_o = Vec3fa(Ng.x[i],Ng.y[i],Ng.z[i]); 50 if (h0.lower[i] == tp.lower[i]) Ng_o = -Vec3fa(dP0du.x[i],dP0du.y[i],dP0du.z[i]); 51 if (h1.lower[i] == tp.lower[i]) Ng_o = +Vec3fa(dP3du.x[i],dP3du.y[i],dP3du.z[i]); 52 BezierCurveHit hit(tp.lower[i]+dt,u[i],Ng_o); 53 return epilog(hit); 54 } 55 56 template<typename NativeCurve3ff, typename Ray, typename Epilog> intersect_bezier_iterative_jacobian(const Ray & ray,const float dt,const NativeCurve3ff & curve,float u,float t,const Epilog & epilog)57 __forceinline bool intersect_bezier_iterative_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, float u, float t, const Epilog& epilog) 58 { 59 const Vec3fa org = zero; 60 const Vec3fa dir = ray.dir; 61 const float length_ray_dir = length(dir); 62 63 /* error of curve evaluations is propertional to largest coordinate */ 64 const BBox3ff box = curve.bounds(); 65 const float P_err = 16.0f*float(ulp)*reduce_max(max(abs(box.lower),abs(box.upper))); 66 67 for (size_t i=0; i<numJacobianIterations; i++) 68 { 69 const Vec3fa Q = madd(Vec3fa(t),dir,org); 70 //const Vec3fa dQdu = zero; 71 const Vec3fa dQdt = dir; 72 const float Q_err = 16.0f*float(ulp)*length_ray_dir*t; // works as org=zero here 73 74 Vec3ff P,dPdu,ddPdu; curve.eval(u,P,dPdu,ddPdu); 75 //const Vec3fa dPdt = zero; 76 77 const Vec3fa R = Q-P; 78 const float len_R = length(R); //reduce_max(abs(R)); 79 const float R_err = max(Q_err,P_err); 80 const Vec3fa dRdu = /*dQdu*/-dPdu; 81 const Vec3fa dRdt = dQdt;//-dPdt; 82 83 const Vec3fa T = normalize(dPdu); 84 const Vec3fa dTdu = dnormalize(dPdu,ddPdu); 85 //const Vec3fa dTdt = zero; 86 const float cos_err = P_err/length(dPdu); 87 88 /* Error estimate for dot(R,T): 89 90 dot(R,T) = cos(R,T) |R| |T| 91 = (cos(R,T) +- cos_error) * (|R| +- |R|_err) * (|T| +- |T|_err) 92 = cos(R,T)*|R|*|T| 93 +- cos(R,T)*(|R|*|T|_err + |T|*|R|_err) 94 +- cos_error*(|R| + |T|) 95 +- lower order terms 96 with cos(R,T) being in [0,1] and |T| = 1 we get: 97 dot(R,T)_err = |R|*|T|_err + |R|_err = cos_error*(|R|+1) 98 */ 99 100 const float f = dot(R,T); 101 const float f_err = len_R*P_err + R_err + cos_err*(1.0f+len_R); 102 const float dfdu = dot(dRdu,T) + dot(R,dTdu); 103 const float dfdt = dot(dRdt,T);// + dot(R,dTdt); 104 105 const float K = dot(R,R)-sqr(f); 106 const float dKdu = /*2.0f*/(dot(R,dRdu)-f*dfdu); 107 const float dKdt = /*2.0f*/(dot(R,dRdt)-f*dfdt); 108 const float rsqrt_K = rsqrt(K); 109 110 const float g = sqrt(K)-P.w; 111 const float g_err = R_err + f_err + 16.0f*float(ulp)*box.upper.w; 112 const float dgdu = /*0.5f*/dKdu*rsqrt_K-dPdu.w; 113 const float dgdt = /*0.5f*/dKdt*rsqrt_K;//-dPdt.w; 114 115 const LinearSpace2f J = LinearSpace2f(dfdu,dfdt,dgdu,dgdt); 116 const Vec2f dut = rcp(J)*Vec2f(f,g); 117 const Vec2f ut = Vec2f(u,t) - dut; 118 u = ut.x; t = ut.y; 119 120 if (abs(f) < f_err && abs(g) < g_err) 121 { 122 t+=dt; 123 if (!(ray.tnear() <= t && t <= ray.tfar)) return false; // rejects NaNs 124 if (!(u >= 0.0f && u <= 1.0f)) return false; // rejects NaNs 125 const Vec3fa R = normalize(Q-P); 126 const Vec3fa U = madd(Vec3fa(dPdu.w),R,dPdu); 127 const Vec3fa V = cross(dPdu,R); 128 BezierCurveHit hit(t,u,cross(V,U)); 129 return epilog(hit); 130 } 131 } 132 return false; 133 } 134 135 template<typename NativeCurve3ff, typename Ray, typename Epilog> intersect_bezier_recursive_jacobian(const Ray & ray,const float dt,const NativeCurve3ff & curve,float u0,float u1,unsigned int depth,const Epilog & epilog)136 bool intersect_bezier_recursive_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, 137 float u0, float u1, unsigned int depth, const Epilog& epilog) 138 { 139 #if defined(__AVX__) 140 enum { VSIZEX_ = 8 }; 141 typedef vbool8 vboolx; // maximally 8-wide to work around KNL issues 142 typedef vint8 vintx; 143 typedef vfloat8 vfloatx; 144 #else 145 enum { VSIZEX_ = 4 }; 146 typedef vbool4 vboolx; 147 typedef vint4 vintx; 148 typedef vfloat4 vfloatx; 149 #endif 150 typedef Vec3<vfloatx> Vec3vfx; 151 typedef Vec4<vfloatx> Vec4vfx; 152 153 unsigned int maxDepth = numBezierSubdivisions; 154 bool found = false; 155 const Vec3fa org = zero; 156 const Vec3fa dir = ray.dir; 157 158 unsigned int sptr = 0; 159 const unsigned int stack_size = numBezierSubdivisions+1; // +1 because of unstable workaround below 160 struct StackEntry { 161 vboolx valid; 162 vfloatx tlower; 163 float u0; 164 float u1; 165 unsigned int depth; 166 }; 167 StackEntry stack[stack_size]; 168 goto entry; 169 170 /* terminate if stack is empty */ 171 while (sptr) 172 { 173 /* pop from stack */ 174 { 175 sptr--; 176 vboolx valid = stack[sptr].valid; 177 const vfloatx tlower = stack[sptr].tlower; 178 valid &= tlower+dt <= ray.tfar; 179 if (none(valid)) continue; 180 u0 = stack[sptr].u0; 181 u1 = stack[sptr].u1; 182 depth = stack[sptr].depth; 183 const size_t i = select_min(valid,tlower); clear(valid,i); 184 stack[sptr].valid = valid; 185 if (any(valid)) sptr++; // there are still items on the stack 186 187 /* process next segment */ 188 const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1))); 189 u0 = vu0[i+0]; 190 u1 = vu0[i+1]; 191 } 192 entry: 193 194 /* subdivide curve */ 195 const float dscale = (u1-u0)*(1.0f/(3.0f*(vfloatx::size-1))); 196 const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1))); 197 Vec4vfx P0, dP0du; curve.template veval<VSIZEX_>(vu0,P0,dP0du); dP0du = dP0du * Vec4vfx(dscale); 198 const Vec4vfx P3 = shift_right_1(P0); 199 const Vec4vfx dP3du = shift_right_1(dP0du); 200 const Vec4vfx P1 = P0 + dP0du; 201 const Vec4vfx P2 = P3 - dP3du; 202 203 /* calculate bounding cylinders */ 204 const vfloatx rr1 = sqr_point_to_line_distance(Vec3vfx(dP0du),Vec3vfx(P3-P0)); 205 const vfloatx rr2 = sqr_point_to_line_distance(Vec3vfx(dP3du),Vec3vfx(P3-P0)); 206 const vfloatx maxr12 = sqrt(max(rr1,rr2)); 207 const vfloatx one_plus_ulp = 1.0f+2.0f*float(ulp); 208 const vfloatx one_minus_ulp = 1.0f-2.0f*float(ulp); 209 vfloatx r_outer = max(P0.w,P1.w,P2.w,P3.w)+maxr12; 210 vfloatx r_inner = min(P0.w,P1.w,P2.w,P3.w)-maxr12; 211 r_outer = one_plus_ulp*r_outer; 212 r_inner = max(0.0f,one_minus_ulp*r_inner); 213 const CylinderN<vfloatx::size> cylinder_outer(Vec3vfx(P0),Vec3vfx(P3),r_outer); 214 const CylinderN<vfloatx::size> cylinder_inner(Vec3vfx(P0),Vec3vfx(P3),r_inner); 215 vboolx valid = true; clear(valid,vfloatx::size-1); 216 217 /* intersect with outer cylinder */ 218 BBox<vfloatx> tc_outer; vfloatx u_outer0; Vec3vfx Ng_outer0; vfloatx u_outer1; Vec3vfx Ng_outer1; 219 valid &= cylinder_outer.intersect(org,dir,tc_outer,u_outer0,Ng_outer0,u_outer1,Ng_outer1); 220 if (none(valid)) continue; 221 222 /* intersect with cap-planes */ 223 BBox<vfloatx> tp(ray.tnear()-dt,ray.tfar-dt); 224 tp = embree::intersect(tp,tc_outer); 225 BBox<vfloatx> h0 = HalfPlaneN<vfloatx::size>(Vec3vfx(P0),+Vec3vfx(dP0du)).intersect(org,dir); 226 tp = embree::intersect(tp,h0); 227 BBox<vfloatx> h1 = HalfPlaneN<vfloatx::size>(Vec3vfx(P3),-Vec3vfx(dP3du)).intersect(org,dir); 228 tp = embree::intersect(tp,h1); 229 valid &= tp.lower <= tp.upper; 230 if (none(valid)) continue; 231 232 /* clamp and correct u parameter */ 233 u_outer0 = clamp(u_outer0,vfloatx(0.0f),vfloatx(1.0f)); 234 u_outer1 = clamp(u_outer1,vfloatx(0.0f),vfloatx(1.0f)); 235 u_outer0 = lerp(u0,u1,(vfloatx(step)+u_outer0)*(1.0f/float(vfloatx::size))); 236 u_outer1 = lerp(u0,u1,(vfloatx(step)+u_outer1)*(1.0f/float(vfloatx::size))); 237 238 /* intersect with inner cylinder */ 239 BBox<vfloatx> tc_inner; 240 vfloatx u_inner0 = zero; Vec3vfx Ng_inner0 = zero; vfloatx u_inner1 = zero; Vec3vfx Ng_inner1 = zero; 241 const vboolx valid_inner = cylinder_inner.intersect(org,dir,tc_inner,u_inner0,Ng_inner0,u_inner1,Ng_inner1); 242 243 /* at the unstable area we subdivide deeper */ 244 const vboolx unstable0 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner0)) < 0.3f); 245 const vboolx unstable1 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner1)) < 0.3f); 246 247 /* subtract the inner interval from the current hit interval */ 248 BBox<vfloatx> tp0, tp1; 249 subtract(tp,tc_inner,tp0,tp1); 250 vboolx valid0 = valid & (tp0.lower <= tp0.upper); 251 vboolx valid1 = valid & (tp1.lower <= tp1.upper); 252 if (none(valid0 | valid1)) continue; 253 254 /* iterate over all first hits front to back */ 255 const vintx termDepth0 = select(unstable0,vintx(maxDepth+1),vintx(maxDepth)); 256 vboolx recursion_valid0 = valid0 & (depth < termDepth0); 257 valid0 &= depth >= termDepth0; 258 259 while (any(valid0)) 260 { 261 const size_t i = select_min(valid0,tp0.lower); clear(valid0,i); 262 found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer0[i],tp0.lower[i],epilog); 263 //found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer0,tp0,h0,h1,Ng_outer0,dP0du,dP3du,epilog); 264 valid0 &= tp0.lower+dt <= ray.tfar; 265 } 266 valid1 &= tp1.lower+dt <= ray.tfar; 267 268 /* iterate over all second hits front to back */ 269 const vintx termDepth1 = select(unstable1,vintx(maxDepth+1),vintx(maxDepth)); 270 vboolx recursion_valid1 = valid1 & (depth < termDepth1); 271 valid1 &= depth >= termDepth1; 272 while (any(valid1)) 273 { 274 const size_t i = select_min(valid1,tp1.lower); clear(valid1,i); 275 found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer1[i],tp1.upper[i],epilog); 276 //found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer1,tp1,h0,h1,Ng_outer1,dP0du,dP3du,epilog); 277 valid1 &= tp1.lower+dt <= ray.tfar; 278 } 279 280 /* push valid segments to stack */ 281 recursion_valid0 &= tp0.lower+dt <= ray.tfar; 282 recursion_valid1 &= tp1.lower+dt <= ray.tfar; 283 const vboolx recursion_valid = recursion_valid0 | recursion_valid1; 284 if (any(recursion_valid)) 285 { 286 assert(sptr < stack_size); 287 stack[sptr].valid = recursion_valid; 288 stack[sptr].tlower = select(recursion_valid0,tp0.lower,tp1.lower); 289 stack[sptr].u0 = u0; 290 stack[sptr].u1 = u1; 291 stack[sptr].depth = depth+1; 292 sptr++; 293 } 294 } 295 return found; 296 } 297 298 template<template<typename Ty> class NativeCurve> 299 struct SweepCurve1Intersector1 300 { 301 typedef NativeCurve<Vec3ff> NativeCurve3ff; 302 303 template<typename Epilog> intersectSweepCurve1Intersector1304 __noinline bool intersect(const CurvePrecalculations1& pre, Ray& ray, 305 IntersectContext* context, 306 const CurveGeometry* geom, const unsigned int primID, 307 const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3, 308 const Epilog& epilog) 309 { 310 STAT3(normal.trav_prims,1,1,1); 311 312 /* move ray closer to make intersection stable */ 313 NativeCurve3ff curve0(v0,v1,v2,v3); 314 curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0); 315 const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir)); 316 const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f); 317 const NativeCurve3ff curve1 = curve0-ref; 318 return intersect_bezier_recursive_jacobian(ray,dt,curve1,0.0f,1.0f,1,epilog); 319 } 320 }; 321 322 template<template<typename Ty> class NativeCurve, int K> 323 struct SweepCurve1IntersectorK 324 { 325 typedef NativeCurve<Vec3ff> NativeCurve3ff; 326 327 struct Ray1 328 { Ray1SweepCurve1IntersectorK::Ray1329 __forceinline Ray1(RayK<K>& ray, size_t k) 330 : org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {} 331 332 Vec3fa org; 333 Vec3fa dir; 334 float _tnear; 335 float& tfar; 336 tnearSweepCurve1IntersectorK::Ray1337 __forceinline float& tnear() { return _tnear; } 338 //__forceinline float& tfar() { return _tfar; } tnearSweepCurve1IntersectorK::Ray1339 __forceinline const float& tnear() const { return _tnear; } 340 //__forceinline const float& tfar() const { return _tfar; } 341 342 }; 343 344 template<typename Epilog> intersectSweepCurve1IntersectorK345 __forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k, 346 IntersectContext* context, 347 const CurveGeometry* geom, const unsigned int primID, 348 const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3, 349 const Epilog& epilog) 350 { 351 STAT3(normal.trav_prims,1,1,1); 352 Ray1 ray(vray,k); 353 354 /* move ray closer to make intersection stable */ 355 NativeCurve3ff curve0(v0,v1,v2,v3); 356 curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0); 357 const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir)); 358 const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f); 359 const NativeCurve3ff curve1 = curve0-ref; 360 return intersect_bezier_recursive_jacobian(ray,dt,curve1,0.0f,1.0f,1,epilog); 361 } 362 }; 363 } 364 } 365