1 // Copyright 2009-2021 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 
4 #pragma once
5 
6 #include "../common/default.h"
7 #include "../common/scene_curves.h"
8 
9 /*
10 
11   Implements Catmul Rom curves with control points p0, p1, p2, p3. At
12   t=0 the curve goes through p1, with tangent (p2-p0)/3, and for t=1
13   the curve goes through p2 with tangent (p3-p2)/2.
14 
15  */
16 
17 namespace embree
18 {
19   class CatmullRomBasis
20   {
21   public:
22 
23     template<typename T>
eval(const T & u)24       static __forceinline Vec4<T> eval(const T& u)
25     {
26       const T t  = u;
27       const T s  = T(1.0f) - u;
28       const T n0 = - t * s * s;
29       const T n1 = 2.0f + t * t * (3.0f * t - 5.0f);
30       const T n2 = 2.0f + s * s * (3.0f * s - 5.0f);
31       const T n3 = - s * t * t;
32       return T(0.5f) * Vec4<T>(n0, n1, n2, n3);
33     }
34 
35     template<typename T>
derivative(const T & u)36       static __forceinline Vec4<T>  derivative(const T& u)
37     {
38       const T t  =  u;
39       const T s  =  1.0f - u;
40       const T n0 =  - s * s + 2.0f * s * t;
41       const T n1 =  2.0f * t * (3.0f * t - 5.0f) + 3.0f * t * t;
42       const T n2 =  2.0f * s * (3.0f * t + 2.0f) - 3.0f * s * s;
43       const T n3 = -2.0f * s * t + t * t;
44       return T(0.5f) * Vec4<T>(n0, n1, n2, n3);
45     }
46 
47     template<typename T>
derivative2(const T & u)48       static __forceinline Vec4<T>  derivative2(const T& u)
49     {
50       const T t  =  u;
51       const T n0 = -3.0f * t + 2.0f;
52       const T n1 =  9.0f * t - 5.0f;
53       const T n2 = -9.0f * t + 4.0f;
54       const T n3 =  3.0f * t - 1.0f;
55       return Vec4<T>(n0, n1, n2, n3);
56     }
57   };
58 
59   struct PrecomputedCatmullRomBasis
60   {
61     enum { N = 16 };
62   public:
PrecomputedCatmullRomBasisPrecomputedCatmullRomBasis63     PrecomputedCatmullRomBasis() {}
64     PrecomputedCatmullRomBasis(int shift);
65 
66     /* basis for bspline evaluation */
67   public:
68     float c0[N+1][N+1];
69     float c1[N+1][N+1];
70     float c2[N+1][N+1];
71     float c3[N+1][N+1];
72 
73     /* basis for bspline derivative evaluation */
74   public:
75     float d0[N+1][N+1];
76     float d1[N+1][N+1];
77     float d2[N+1][N+1];
78     float d3[N+1][N+1];
79   };
80   extern PrecomputedCatmullRomBasis catmullrom_basis0;
81   extern PrecomputedCatmullRomBasis catmullrom_basis1;
82 
83   template<typename Vertex>
84     struct CatmullRomCurveT
85     {
86       Vertex v0,v1,v2,v3;
87 
CatmullRomCurveTCatmullRomCurveT88       __forceinline CatmullRomCurveT() {}
89 
CatmullRomCurveTCatmullRomCurveT90       __forceinline CatmullRomCurveT(const Vertex& v0, const Vertex& v1, const Vertex& v2, const Vertex& v3)
91         : v0(v0), v1(v1), v2(v2), v3(v3) {}
92 
beginCatmullRomCurveT93       __forceinline Vertex begin() const {
94         return madd(1.0f/6.0f,v0,madd(2.0f/3.0f,v1,1.0f/6.0f*v2));
95       }
96 
endCatmullRomCurveT97       __forceinline Vertex end() const {
98         return madd(1.0f/6.0f,v1,madd(2.0f/3.0f,v2,1.0f/6.0f*v3));
99       }
100 
centerCatmullRomCurveT101       __forceinline Vertex center() const {
102         return 0.25f*(v0+v1+v2+v3);
103       }
104 
boundsCatmullRomCurveT105       __forceinline BBox<Vertex> bounds() const {
106         return merge(BBox<Vertex>(v0),BBox<Vertex>(v1),BBox<Vertex>(v2),BBox<Vertex>(v3));
107       }
108 
109       __forceinline friend CatmullRomCurveT operator -( const CatmullRomCurveT& a, const Vertex& b ) {
110         return CatmullRomCurveT(a.v0-b,a.v1-b,a.v2-b,a.v3-b);
111       }
112 
xfm_prCatmullRomCurveT113       __forceinline CatmullRomCurveT<Vec3ff> xfm_pr(const LinearSpace3fa& space, const Vec3fa& p) const
114       {
115         const Vec3ff q0(xfmVector(space,v0-p), v0.w);
116         const Vec3ff q1(xfmVector(space,v1-p), v1.w);
117         const Vec3ff q2(xfmVector(space,v2-p), v2.w);
118         const Vec3ff q3(xfmVector(space,v3-p), v3.w);
119         return CatmullRomCurveT<Vec3ff>(q0,q1,q2,q3);
120       }
121 
evalCatmullRomCurveT122       __forceinline Vertex eval(const float t) const
123       {
124         const Vec4<float> b = CatmullRomBasis::eval(t);
125         return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
126       }
127 
eval_duCatmullRomCurveT128       __forceinline Vertex eval_du(const float t) const
129       {
130         const Vec4<float> b = CatmullRomBasis::derivative(t);
131         return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
132       }
133 
eval_duduCatmullRomCurveT134       __forceinline Vertex eval_dudu(const float t) const
135       {
136         const Vec4<float> b = CatmullRomBasis::derivative2(t);
137         return madd(b.x,v0,madd(b.y,v1,madd(b.z,v2,b.w*v3)));
138       }
139 
evalCatmullRomCurveT140       __forceinline void eval(const float t, Vertex& p, Vertex& dp, Vertex& ddp) const
141       {
142         p = eval(t);
143         dp = eval_du(t);
144         ddp = eval_dudu(t);
145       }
146 
147       template<int M>
vevalCatmullRomCurveT148       __forceinline Vec4vf<M> veval(const vfloat<M>& t) const
149       {
150         const Vec4vf<M> b = CatmullRomBasis::eval(t);
151         return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
152       }
153 
154       template<int M>
veval_duCatmullRomCurveT155       __forceinline Vec4vf<M> veval_du(const vfloat<M>& t) const
156       {
157         const Vec4vf<M> b = CatmullRomBasis::derivative(t);
158         return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
159       }
160 
161       template<int M>
veval_duduCatmullRomCurveT162       __forceinline Vec4vf<M> veval_dudu(const vfloat<M>& t) const
163       {
164         const Vec4vf<M> b = CatmullRomBasis::derivative2(t);
165         return madd(b.x, Vec4vf<M>(v0), madd(b.y, Vec4vf<M>(v1), madd(b.z, Vec4vf<M>(v2), b.w * Vec4vf<M>(v3))));
166       }
167 
168       template<int M>
vevalCatmullRomCurveT169       __forceinline void veval(const vfloat<M>& t, Vec4vf<M>& p, Vec4vf<M>& dp) const
170       {
171         p = veval<M>(t);
172         dp = veval_du<M>(t);
173       }
174 
175       template<int M>
eval0CatmullRomCurveT176       __forceinline Vec4vf<M> eval0(const int ofs, const int size) const
177       {
178         assert(size <= PrecomputedCatmullRomBasis::N);
179         assert(ofs <= size);
180         return madd(vfloat<M>::loadu(&catmullrom_basis0.c0[size][ofs]), Vec4vf<M>(v0),
181                     madd(vfloat<M>::loadu(&catmullrom_basis0.c1[size][ofs]), Vec4vf<M>(v1),
182                          madd(vfloat<M>::loadu(&catmullrom_basis0.c2[size][ofs]), Vec4vf<M>(v2),
183                               vfloat<M>::loadu(&catmullrom_basis0.c3[size][ofs]) * Vec4vf<M>(v3))));
184       }
185 
186       template<int M>
eval1CatmullRomCurveT187       __forceinline Vec4vf<M> eval1(const int ofs, const int size) const
188       {
189         assert(size <= PrecomputedCatmullRomBasis::N);
190         assert(ofs <= size);
191         return madd(vfloat<M>::loadu(&catmullrom_basis1.c0[size][ofs]), Vec4vf<M>(v0),
192                     madd(vfloat<M>::loadu(&catmullrom_basis1.c1[size][ofs]), Vec4vf<M>(v1),
193                          madd(vfloat<M>::loadu(&catmullrom_basis1.c2[size][ofs]), Vec4vf<M>(v2),
194                               vfloat<M>::loadu(&catmullrom_basis1.c3[size][ofs]) * Vec4vf<M>(v3))));
195       }
196 
197       template<int M>
derivative0CatmullRomCurveT198       __forceinline Vec4vf<M> derivative0(const int ofs, const int size) const
199       {
200         assert(size <= PrecomputedCatmullRomBasis::N);
201         assert(ofs <= size);
202         return madd(vfloat<M>::loadu(&catmullrom_basis0.d0[size][ofs]), Vec4vf<M>(v0),
203                     madd(vfloat<M>::loadu(&catmullrom_basis0.d1[size][ofs]), Vec4vf<M>(v1),
204                          madd(vfloat<M>::loadu(&catmullrom_basis0.d2[size][ofs]), Vec4vf<M>(v2),
205                               vfloat<M>::loadu(&catmullrom_basis0.d3[size][ofs]) * Vec4vf<M>(v3))));
206       }
207 
208       template<int M>
derivative1CatmullRomCurveT209       __forceinline Vec4vf<M> derivative1(const int ofs, const int size) const
210       {
211         assert(size <= PrecomputedCatmullRomBasis::N);
212         assert(ofs <= size);
213         return madd(vfloat<M>::loadu(&catmullrom_basis1.d0[size][ofs]), Vec4vf<M>(v0),
214                     madd(vfloat<M>::loadu(&catmullrom_basis1.d1[size][ofs]), Vec4vf<M>(v1),
215                          madd(vfloat<M>::loadu(&catmullrom_basis1.d2[size][ofs]), Vec4vf<M>(v2),
216                               vfloat<M>::loadu(&catmullrom_basis1.d3[size][ofs]) * Vec4vf<M>(v3))));
217       }
218 
219       /* calculates bounds of catmull-rom curve geometry */
accurateRoundBoundsCatmullRomCurveT220       __forceinline BBox3fa accurateRoundBounds() const
221       {
222         const int N = 7;
223         const float scale = 1.0f/(3.0f*(N-1));
224         Vec4vfx pl(pos_inf), pu(neg_inf);
225         for (int i=0; i<=N; i+=VSIZEX)
226         {
227           vintx vi = vintx(i)+vintx(step);
228           vboolx valid = vi <= vintx(N);
229           const Vec4vfx p  = eval0<VSIZEX>(i,N);
230           const Vec4vfx dp = derivative0<VSIZEX>(i,N);
231           const Vec4vfx pm = p-Vec4vfx(scale)*select(vi!=vintx(0),dp,Vec4vfx(zero));
232           const Vec4vfx pp = p+Vec4vfx(scale)*select(vi!=vintx(N),dp,Vec4vfx(zero));
233           pl = select(valid,min(pl,p,pm,pp),pl); // FIXME: use masked min
234           pu = select(valid,max(pu,p,pm,pp),pu); // FIXME: use masked min
235         }
236         const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
237         const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
238         const float r_min = reduce_min(pl.w);
239         const float r_max = reduce_max(pu.w);
240         const Vec3fa upper_r = Vec3fa(max(abs(r_min),abs(r_max)));
241         return enlarge(BBox3fa(lower,upper),upper_r);
242       }
243 
244       /* calculates bounds when tessellated into N line segments */
accurateFlatBoundsCatmullRomCurveT245       __forceinline BBox3fa accurateFlatBounds(int N) const
246       {
247         if (likely(N == 4))
248         {
249           const Vec4vf4 pi = eval0<4>(0,4);
250           const Vec3fa lower(reduce_min(pi.x),reduce_min(pi.y),reduce_min(pi.z));
251           const Vec3fa upper(reduce_max(pi.x),reduce_max(pi.y),reduce_max(pi.z));
252           const Vec3fa upper_r = Vec3fa(reduce_max(abs(pi.w)));
253           const Vec3ff pe = end();
254           return enlarge(BBox3fa(min(lower,pe),max(upper,pe)),max(upper_r,Vec3fa(abs(pe.w))));
255         }
256         else
257         {
258           Vec3vfx pl(pos_inf), pu(neg_inf); vfloatx ru(0.0f);
259           for (int i=0; i<=N; i+=VSIZEX)
260           {
261             vboolx valid = vintx(i)+vintx(step) <= vintx(N);
262             const Vec4vfx pi = eval0<VSIZEX>(i,N);
263 
264             pl.x = select(valid,min(pl.x,pi.x),pl.x); // FIXME: use masked min
265             pl.y = select(valid,min(pl.y,pi.y),pl.y);
266             pl.z = select(valid,min(pl.z,pi.z),pl.z);
267 
268             pu.x = select(valid,max(pu.x,pi.x),pu.x); // FIXME: use masked min
269             pu.y = select(valid,max(pu.y,pi.y),pu.y);
270             pu.z = select(valid,max(pu.z,pi.z),pu.z);
271 
272             ru = select(valid,max(ru,abs(pi.w)),ru);
273           }
274           const Vec3fa lower(reduce_min(pl.x),reduce_min(pl.y),reduce_min(pl.z));
275           const Vec3fa upper(reduce_max(pu.x),reduce_max(pu.y),reduce_max(pu.z));
276           const Vec3fa upper_r(reduce_max(ru));
277           return enlarge(BBox3fa(lower,upper),upper_r);
278         }
279       }
280 
281       friend __forceinline embree_ostream operator<<(embree_ostream cout, const CatmullRomCurveT& curve) {
282         return cout << "CatmullRomCurve { v0 = " << curve.v0 << ", v1 = " << curve.v1 << ", v2 = " << curve.v2 << ", v3 = " << curve.v3 << " }";
283       }
284     };
285 
286   template<typename CurveGeometry>
enlargeRadiusToMinWidth(const IntersectContext * context,const CurveGeometry * geom,const Vec3fa & ray_org,const CatmullRomCurveT<Vec3ff> & curve)287   __forceinline CatmullRomCurveT<Vec3ff> enlargeRadiusToMinWidth(const IntersectContext* context, const CurveGeometry* geom, const Vec3fa& ray_org, const CatmullRomCurveT<Vec3ff>& curve)
288   {
289     return CatmullRomCurveT<Vec3ff>(enlargeRadiusToMinWidth(context,geom,ray_org,curve.v0),
290                                     enlargeRadiusToMinWidth(context,geom,ray_org,curve.v1),
291                                     enlargeRadiusToMinWidth(context,geom,ray_org,curve.v2),
292                                     enlargeRadiusToMinWidth(context,geom,ray_org,curve.v3));
293   }
294 
295   typedef CatmullRomCurveT<Vec3fa> CatmullRomCurve3fa;
296 }
297 
298