1 // Copyright 2009-2021 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 
4 #pragma once
5 
6 #include "catmullclark_patch.h"
7 #include "bezier_patch.h"
8 #include "bezier_curve.h"
9 #include "catmullclark_coefficients.h"
10 
11 namespace embree
12 {
13   template<typename Vertex, typename Vertex_t = Vertex>
14   class __aligned(64) GregoryPatchT
15   {
16     typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
17     typedef GeneralCatmullClarkPatchT<Vertex,Vertex_t> GeneralCatmullClarkPatch;
18     typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClark1Ring;
19     typedef BezierCurveT<Vertex> BezierCurve;
20 
21   public:
22     Vertex v[4][4];
23     Vertex f[2][2];
24 
GregoryPatchT()25     __forceinline GregoryPatchT() {}
26 
GregoryPatchT(const CatmullClarkPatch & patch)27     __forceinline GregoryPatchT(const CatmullClarkPatch& patch) {
28       init(patch);
29     }
30 
GregoryPatchT(const CatmullClarkPatch & patch,const BezierCurve * border0,const BezierCurve * border1,const BezierCurve * border2,const BezierCurve * border3)31     __forceinline GregoryPatchT(const CatmullClarkPatch& patch,
32                                 const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3)
33     {
34       init_crackfix(patch,border0,border1,border2,border3);
35     }
36 
GregoryPatchT(const HalfEdge * edge,const char * vertices,size_t stride)37     __forceinline GregoryPatchT (const HalfEdge* edge, const char* vertices, size_t stride) {
38       init(CatmullClarkPatch(edge,vertices,stride));
39     }
40 
p0()41     __forceinline Vertex& p0() { return v[0][0]; }
p1()42     __forceinline Vertex& p1() { return v[0][3]; }
p2()43     __forceinline Vertex& p2() { return v[3][3]; }
p3()44     __forceinline Vertex& p3() { return v[3][0]; }
45 
e0_p()46     __forceinline Vertex& e0_p() { return v[0][1]; }
e0_m()47     __forceinline Vertex& e0_m() { return v[1][0]; }
e1_p()48     __forceinline Vertex& e1_p() { return v[1][3]; }
e1_m()49     __forceinline Vertex& e1_m() { return v[0][2]; }
e2_p()50     __forceinline Vertex& e2_p() { return v[3][2]; }
e2_m()51     __forceinline Vertex& e2_m() { return v[2][3]; }
e3_p()52     __forceinline Vertex& e3_p() { return v[2][0]; }
e3_m()53     __forceinline Vertex& e3_m() { return v[3][1]; }
54 
f0_p()55     __forceinline Vertex& f0_p() { return v[1][1]; }
f1_p()56     __forceinline Vertex& f1_p() { return v[1][2]; }
f2_p()57     __forceinline Vertex& f2_p() { return v[2][2]; }
f3_p()58     __forceinline Vertex& f3_p() { return v[2][1]; }
f0_m()59     __forceinline Vertex& f0_m() { return f[0][0]; }
f1_m()60     __forceinline Vertex& f1_m() { return f[0][1]; }
f2_m()61     __forceinline Vertex& f2_m() { return f[1][1]; }
f3_m()62     __forceinline Vertex& f3_m() { return f[1][0]; }
63 
p0()64     __forceinline const Vertex& p0() const { return v[0][0]; }
p1()65     __forceinline const Vertex& p1() const { return v[0][3]; }
p2()66     __forceinline const Vertex& p2() const { return v[3][3]; }
p3()67     __forceinline const Vertex& p3() const { return v[3][0]; }
68 
e0_p()69     __forceinline const Vertex& e0_p() const { return v[0][1]; }
e0_m()70     __forceinline const Vertex& e0_m() const { return v[1][0]; }
e1_p()71     __forceinline const Vertex& e1_p() const { return v[1][3]; }
e1_m()72     __forceinline const Vertex& e1_m() const { return v[0][2]; }
e2_p()73     __forceinline const Vertex& e2_p() const { return v[3][2]; }
e2_m()74     __forceinline const Vertex& e2_m() const { return v[2][3]; }
e3_p()75     __forceinline const Vertex& e3_p() const { return v[2][0]; }
e3_m()76     __forceinline const Vertex& e3_m() const { return v[3][1]; }
77 
f0_p()78     __forceinline const Vertex& f0_p() const { return v[1][1]; }
f1_p()79     __forceinline const Vertex& f1_p() const { return v[1][2]; }
f2_p()80     __forceinline const Vertex& f2_p() const { return v[2][2]; }
f3_p()81     __forceinline const Vertex& f3_p() const { return v[2][1]; }
f0_m()82     __forceinline const Vertex& f0_m() const { return f[0][0]; }
f1_m()83     __forceinline const Vertex& f1_m() const { return f[0][1]; }
f2_m()84     __forceinline const Vertex& f2_m() const { return f[1][1]; }
f3_m()85     __forceinline const Vertex& f3_m() const { return f[1][0]; }
86 
initCornerVertex(const CatmullClarkPatch & irreg_patch,const size_t index)87     __forceinline Vertex initCornerVertex(const CatmullClarkPatch& irreg_patch, const size_t index) {
88       return irreg_patch.ring[index].getLimitVertex();
89     }
90 
initPositiveEdgeVertex(const CatmullClarkPatch & irreg_patch,const size_t index,const Vertex & p_vtx)91     __forceinline Vertex initPositiveEdgeVertex(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) {
92       return madd(1.0f/3.0f,irreg_patch.ring[index].getLimitTangent(),p_vtx);
93     }
94 
initNegativeEdgeVertex(const CatmullClarkPatch & irreg_patch,const size_t index,const Vertex & p_vtx)95     __forceinline Vertex initNegativeEdgeVertex(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) {
96       return madd(1.0f/3.0f,irreg_patch.ring[index].getSecondLimitTangent(),p_vtx);
97     }
98 
initPositiveEdgeVertex2(const CatmullClarkPatch & irreg_patch,const size_t index,const Vertex & p_vtx)99     __forceinline Vertex initPositiveEdgeVertex2(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx)
100     {
101       CatmullClark1Ring3fa r0,r1,r2;
102       irreg_patch.ring[index].subdivide(r0);
103       r0.subdivide(r1);
104       r1.subdivide(r2);
105       return madd(8.0f/3.0f,r2.getLimitTangent(),p_vtx);
106     }
107 
initNegativeEdgeVertex2(const CatmullClarkPatch & irreg_patch,const size_t index,const Vertex & p_vtx)108     __forceinline Vertex initNegativeEdgeVertex2(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx)
109     {
110       CatmullClark1Ring3fa r0,r1,r2;
111       irreg_patch.ring[index].subdivide(r0);
112       r0.subdivide(r1);
113       r1.subdivide(r2);
114       return madd(8.0f/3.0f,r2.getSecondLimitTangent(),p_vtx);
115     }
116 
initFaceVertex(const CatmullClarkPatch & irreg_patch,const size_t index,const Vertex & p_vtx,const Vertex & e0_p_vtx,const Vertex & e1_m_vtx,const unsigned int face_valence_p1,const Vertex & e0_m_vtx,const Vertex & e3_p_vtx,const unsigned int face_valence_p3,Vertex & f_p_vtx,Vertex & f_m_vtx)117     void initFaceVertex(const CatmullClarkPatch& irreg_patch,
118 			const size_t index,
119 			const Vertex& p_vtx,
120                         const Vertex& e0_p_vtx,
121 			const Vertex& e1_m_vtx,
122 			const unsigned int face_valence_p1,
123  			const Vertex& e0_m_vtx,
124 			const Vertex& e3_p_vtx,
125 			const unsigned int face_valence_p3,
126 			Vertex& f_p_vtx,
127 			Vertex& f_m_vtx)
128     {
129       const unsigned int face_valence = irreg_patch.ring[index].face_valence;
130       const unsigned int edge_valence = irreg_patch.ring[index].edge_valence;
131       const unsigned int border_index = irreg_patch.ring[index].border_index;
132 
133       const Vertex& vtx     = irreg_patch.ring[index].vtx;
134       const Vertex e_i      = irreg_patch.ring[index].getEdgeCenter(0);
135       const Vertex c_i_m_1  = irreg_patch.ring[index].getQuadCenter(0);
136       const Vertex e_i_m_1  = irreg_patch.ring[index].getEdgeCenter(1);
137 
138       Vertex c_i, e_i_p_1;
139       const bool hasHardEdge0 =
140         std::isinf(irreg_patch.ring[index].vertex_crease_weight) &&
141         std::isinf(irreg_patch.ring[index].crease_weight[0]);
142 
143       if (unlikely((border_index == edge_valence-2) || hasHardEdge0))
144       {
145         /* mirror quad center and edge mid-point */
146         c_i     = madd(2.0f, e_i - c_i_m_1, c_i_m_1);
147         e_i_p_1 = madd(2.0f, vtx - e_i_m_1, e_i_m_1);
148       }
149       else
150       {
151         c_i     = irreg_patch.ring[index].getQuadCenter( face_valence-1 );
152         e_i_p_1 = irreg_patch.ring[index].getEdgeCenter( face_valence-1 );
153       }
154 
155       Vertex c_i_m_2, e_i_m_2;
156       const bool hasHardEdge1 =
157         std::isinf(irreg_patch.ring[index].vertex_crease_weight) &&
158         std::isinf(irreg_patch.ring[index].crease_weight[1]);
159 
160       if (unlikely(border_index == 2 || hasHardEdge1))
161       {
162         /* mirror quad center and edge mid-point */
163         c_i_m_2  = madd(2.0f, e_i_m_1 - c_i_m_1, c_i_m_1);
164         e_i_m_2  = madd(2.0f, vtx - e_i, + e_i);
165       }
166       else
167       {
168         c_i_m_2  = irreg_patch.ring[index].getQuadCenter( 1 );
169         e_i_m_2  = irreg_patch.ring[index].getEdgeCenter( 2 );
170       }
171 
172       const float d = 3.0f;
173       //const float c     = cosf(2.0f*M_PI/(float)face_valence);
174       //const float c_e_p = cosf(2.0f*M_PI/(float)face_valence_p1);
175       //const float c_e_m = cosf(2.0f*M_PI/(float)face_valence_p3);
176 
177       const float c     = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence);
178       const float c_e_p = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p1);
179       const float c_e_m = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p3);
180 
181       const Vertex r_e_p = 1.0f/3.0f * (e_i_m_1 - e_i_p_1) + 2.0f/3.0f * (c_i_m_1 - c_i);
182       const Vertex r_e_m = 1.0f/3.0f * (e_i     - e_i_m_2) + 2.0f/3.0f * (c_i_m_1 - c_i_m_2);
183 
184       f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p);
185       f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m);
186     }
187 
init(const CatmullClarkPatch & patch)188     __noinline void init(const CatmullClarkPatch& patch)
189     {
190       assert( patch.ring[0].hasValidPositions() );
191       assert( patch.ring[1].hasValidPositions() );
192       assert( patch.ring[2].hasValidPositions() );
193       assert( patch.ring[3].hasValidPositions() );
194 
195       p0() = initCornerVertex(patch,0);
196       p1() = initCornerVertex(patch,1);
197       p2() = initCornerVertex(patch,2);
198       p3() = initCornerVertex(patch,3);
199 
200       e0_p() = initPositiveEdgeVertex(patch,0, p0());
201       e1_p() = initPositiveEdgeVertex(patch,1, p1());
202       e2_p() = initPositiveEdgeVertex(patch,2, p2());
203       e3_p() = initPositiveEdgeVertex(patch,3, p3());
204 
205       e0_m() = initNegativeEdgeVertex(patch,0, p0());
206       e1_m() = initNegativeEdgeVertex(patch,1, p1());
207       e2_m() = initNegativeEdgeVertex(patch,2, p2());
208       e3_m() = initNegativeEdgeVertex(patch,3, p3());
209 
210       const unsigned int face_valence_p0 = patch.ring[0].face_valence;
211       const unsigned int face_valence_p1 = patch.ring[1].face_valence;
212       const unsigned int face_valence_p2 = patch.ring[2].face_valence;
213       const unsigned int face_valence_p3 = patch.ring[3].face_valence;
214 
215       initFaceVertex(patch,0,p0(),e0_p(),e1_m(),face_valence_p1,e0_m(),e3_p(),face_valence_p3,f0_p(),f0_m() );
216       initFaceVertex(patch,1,p1(),e1_p(),e2_m(),face_valence_p2,e1_m(),e0_p(),face_valence_p0,f1_p(),f1_m() );
217       initFaceVertex(patch,2,p2(),e2_p(),e3_m(),face_valence_p3,e2_m(),e1_p(),face_valence_p1,f2_p(),f2_m() );
218       initFaceVertex(patch,3,p3(),e3_p(),e0_m(),face_valence_p0,e3_m(),e2_p(),face_valence_p3,f3_p(),f3_m() );
219 
220     }
221 
init_crackfix(const CatmullClarkPatch & patch,const BezierCurve * border0,const BezierCurve * border1,const BezierCurve * border2,const BezierCurve * border3)222     __noinline void init_crackfix(const CatmullClarkPatch& patch,
223                                   const BezierCurve* border0,
224                                   const BezierCurve* border1,
225                                   const BezierCurve* border2,
226                                   const BezierCurve* border3)
227     {
228       assert( patch.ring[0].hasValidPositions() );
229       assert( patch.ring[1].hasValidPositions() );
230       assert( patch.ring[2].hasValidPositions() );
231       assert( patch.ring[3].hasValidPositions() );
232 
233       p0() = initCornerVertex(patch,0);
234       p1() = initCornerVertex(patch,1);
235       p2() = initCornerVertex(patch,2);
236       p3() = initCornerVertex(patch,3);
237 
238       e0_p() = initPositiveEdgeVertex(patch,0, p0());
239       e1_p() = initPositiveEdgeVertex(patch,1, p1());
240       e2_p() = initPositiveEdgeVertex(patch,2, p2());
241       e3_p() = initPositiveEdgeVertex(patch,3, p3());
242 
243       e0_m() = initNegativeEdgeVertex(patch,0, p0());
244       e1_m() = initNegativeEdgeVertex(patch,1, p1());
245       e2_m() = initNegativeEdgeVertex(patch,2, p2());
246       e3_m() = initNegativeEdgeVertex(patch,3, p3());
247 
248       if (unlikely(border0 != nullptr))
249       {
250         p0()   = border0->v0;
251         e0_p() = border0->v1;
252         e1_m() = border0->v2;
253         p1()   = border0->v3;
254       }
255 
256       if (unlikely(border1 != nullptr))
257       {
258         p1()   = border1->v0;
259         e1_p() = border1->v1;
260         e2_m() = border1->v2;
261         p2()   = border1->v3;
262       }
263 
264       if (unlikely(border2 != nullptr))
265       {
266         p2()   = border2->v0;
267         e2_p() = border2->v1;
268         e3_m() = border2->v2;
269         p3()   = border2->v3;
270       }
271 
272       if (unlikely(border3 != nullptr))
273       {
274         p3()   = border3->v0;
275         e3_p() = border3->v1;
276         e0_m() = border3->v2;
277         p0()   = border3->v3;
278       }
279 
280       const unsigned int face_valence_p0 = patch.ring[0].face_valence;
281       const unsigned int face_valence_p1 = patch.ring[1].face_valence;
282       const unsigned int face_valence_p2 = patch.ring[2].face_valence;
283       const unsigned int face_valence_p3 = patch.ring[3].face_valence;
284 
285       initFaceVertex(patch,0,p0(),e0_p(),e1_m(),face_valence_p1,e0_m(),e3_p(),face_valence_p3,f0_p(),f0_m() );
286       initFaceVertex(patch,1,p1(),e1_p(),e2_m(),face_valence_p2,e1_m(),e0_p(),face_valence_p0,f1_p(),f1_m() );
287       initFaceVertex(patch,2,p2(),e2_p(),e3_m(),face_valence_p3,e2_m(),e1_p(),face_valence_p1,f2_p(),f2_m() );
288       initFaceVertex(patch,3,p3(),e3_p(),e0_m(),face_valence_p0,e3_m(),e2_p(),face_valence_p3,f3_p(),f3_m() );
289     }
290 
291 
292     void computeGregoryPatchFacePoints(const unsigned int face_valence,
293 				       const Vertex& r_e_p,
294 				       const Vertex& r_e_m,
295 				       const Vertex& p_vtx,
296 				       const Vertex& e0_p_vtx,
297 				       const Vertex& e1_m_vtx,
298 				       const unsigned int face_valence_p1,
299 				       const Vertex& e0_m_vtx,
300 				       const Vertex& e3_p_vtx,
301 				       const unsigned int face_valence_p3,
302 				       Vertex& f_p_vtx,
303 				       Vertex& f_m_vtx,
304                                        const float d = 3.0f)
305     {
306       //const float c     = cosf(2.0*M_PI/(float)face_valence);
307       //const float c_e_p = cosf(2.0*M_PI/(float)face_valence_p1);
308       //const float c_e_m = cosf(2.0*M_PI/(float)face_valence_p3);
309 
310       const float c     = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence);
311       const float c_e_p = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p1);
312       const float c_e_m = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p3);
313 
314 
315       f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p);
316       f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m);
317       f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p);
318       f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m);
319     }
320 
init(const GeneralCatmullClarkPatch & patch)321     __noinline void init(const GeneralCatmullClarkPatch& patch)
322     {
323       assert(patch.size() == 4);
324 #if 0
325       CatmullClarkPatch qpatch; patch.init(qpatch);
326       init(qpatch);
327 #else
328       const float face_valence_p0 = patch.ring[0].face_valence;
329       const float face_valence_p1 = patch.ring[1].face_valence;
330       const float face_valence_p2 = patch.ring[2].face_valence;
331       const float face_valence_p3 = patch.ring[3].face_valence;
332 
333       Vertex p0_r_p, p0_r_m;
334       patch.ring[0].computeGregoryPatchEdgePoints( p0(), e0_p(), e0_m(), p0_r_p, p0_r_m );
335 
336       Vertex p1_r_p, p1_r_m;
337       patch.ring[1].computeGregoryPatchEdgePoints( p1(), e1_p(), e1_m(), p1_r_p, p1_r_m );
338 
339       Vertex p2_r_p, p2_r_m;
340       patch.ring[2].computeGregoryPatchEdgePoints( p2(), e2_p(), e2_m(), p2_r_p, p2_r_m );
341 
342       Vertex p3_r_p, p3_r_m;
343       patch.ring[3].computeGregoryPatchEdgePoints( p3(), e3_p(), e3_m(), p3_r_p, p3_r_m );
344 
345       computeGregoryPatchFacePoints(face_valence_p0, p0_r_p, p0_r_m, p0(), e0_p(), e1_m(), face_valence_p1, e0_m(), e3_p(), face_valence_p3, f0_p(), f0_m() );
346       computeGregoryPatchFacePoints(face_valence_p1, p1_r_p, p1_r_m, p1(), e1_p(), e2_m(), face_valence_p2, e1_m(), e0_p(), face_valence_p0, f1_p(), f1_m() );
347       computeGregoryPatchFacePoints(face_valence_p2, p2_r_p, p2_r_m, p2(), e2_p(), e3_m(), face_valence_p3, e2_m(), e1_p(), face_valence_p1, f2_p(), f2_m() );
348       computeGregoryPatchFacePoints(face_valence_p3, p3_r_p, p3_r_m, p3(), e3_p(), e0_m(), face_valence_p0, e3_m(), e2_p(), face_valence_p3, f3_p(), f3_m() );
349 
350 #endif
351     }
352 
353 
convert_to_bezier()354     __forceinline void convert_to_bezier()
355     {
356       f0_p() = (f0_p() + f0_m()) * 0.5f;
357       f1_p() = (f1_p() + f1_m()) * 0.5f;
358       f2_p() = (f2_p() + f2_m()) * 0.5f;
359       f3_p() = (f3_p() + f3_m()) * 0.5f;
360       f0_m() = Vertex( zero );
361       f1_m() = Vertex( zero );
362       f2_m() = Vertex( zero );
363       f3_m() = Vertex( zero );
364     }
365 
computeInnerVertices(const Vertex matrix[4][4],const Vertex f_m[2][2],const float uu,const float vv,Vertex_t & matrix_11,Vertex_t & matrix_12,Vertex_t & matrix_22,Vertex_t & matrix_21)366     static __forceinline void computeInnerVertices(const Vertex matrix[4][4], const Vertex f_m[2][2], const float uu, const float vv,
367 						   Vertex_t& matrix_11, Vertex_t& matrix_12, Vertex_t& matrix_22, Vertex_t& matrix_21)
368     {
369       if (unlikely(uu == 0.0f || uu == 1.0f || vv == 0.0f || vv == 1.0f))
370       {
371 	matrix_11 = matrix[1][1];
372 	matrix_12 = matrix[1][2];
373 	matrix_22 = matrix[2][2];
374 	matrix_21 = matrix[2][1];
375       }
376       else
377       {
378 	const Vertex_t f0_p = matrix[1][1];
379 	const Vertex_t f1_p = matrix[1][2];
380 	const Vertex_t f2_p = matrix[2][2];
381 	const Vertex_t f3_p = matrix[2][1];
382 
383 	const Vertex_t f0_m = f_m[0][0];
384 	const Vertex_t f1_m = f_m[0][1];
385 	const Vertex_t f2_m = f_m[1][1];
386 	const Vertex_t f3_m = f_m[1][0];
387 
388 	matrix_11 = (      uu  * f0_p +       vv  * f0_m)*rcp(uu+vv);
389 	matrix_12 = ((1.0f-uu) * f1_m +       vv  * f1_p)*rcp(1.0f-uu+vv);
390 	matrix_22 = ((1.0f-uu) * f2_p + (1.0f-vv) * f2_m)*rcp(2.0f-uu-vv);
391 	matrix_21 = (      uu  * f3_m + (1.0f-vv) * f3_p)*rcp(1.0f+uu-vv);
392       }
393     }
394 
395     template<typename vfloat>
computeInnerVertices(const Vertex v[4][4],const Vertex f[2][2],size_t i,const vfloat & uu,const vfloat & vv,vfloat & matrix_11,vfloat & matrix_12,vfloat & matrix_22,vfloat & matrix_21)396     static __forceinline void computeInnerVertices(const Vertex v[4][4], const Vertex f[2][2],
397                                                    size_t i, const vfloat& uu, const vfloat& vv, vfloat& matrix_11, vfloat& matrix_12, vfloat& matrix_22, vfloat& matrix_21)
398     {
399       const auto m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f);
400 
401       const vfloat f0_p = v[1][1][i];
402       const vfloat f1_p = v[1][2][i];
403       const vfloat f2_p = v[2][2][i];
404       const vfloat f3_p = v[2][1][i];
405 
406       const vfloat f0_m = f[0][0][i];
407       const vfloat f1_m = f[0][1][i];
408       const vfloat f2_m = f[1][1][i];
409       const vfloat f3_m = f[1][0][i];
410 
411       const vfloat one_minus_uu = vfloat(1.0f) - uu;
412       const vfloat one_minus_vv = vfloat(1.0f) - vv;
413 
414       const vfloat f0_i = (          uu * f0_p +           vv * f0_m) * rcp(uu+vv);
415       const vfloat f1_i = (one_minus_uu * f1_m +           vv * f1_p) * rcp(one_minus_uu+vv);
416       const vfloat f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv);
417       const vfloat f3_i = (          uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv);
418 
419       matrix_11 = select(m_border,f0_p,f0_i);
420       matrix_12 = select(m_border,f1_p,f1_i);
421       matrix_22 = select(m_border,f2_p,f2_i);
422       matrix_21 = select(m_border,f3_p,f3_i);
423     }
424 
eval(const Vertex matrix[4][4],const Vertex f[2][2],const float & uu,const float & vv)425     static __forceinline Vertex eval(const Vertex matrix[4][4], const Vertex f[2][2], const float& uu, const float& vv)
426     {
427       Vertex_t v_11, v_12, v_22, v_21;
428       computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
429 
430       const Vec4<float> Bu = BezierBasis::eval(uu);
431       const Vec4<float> Bv = BezierBasis::eval(vv);
432 
433       return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
434                   madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11        ,madd(Bu.z,v_12        ,Bu.w * matrix[1][3]))),
435                        madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21        ,madd(Bu.z,v_22        ,Bu.w * matrix[2][3]))),
436                             Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
437     }
438 
eval_du(const Vertex matrix[4][4],const Vertex f[2][2],const float uu,const float vv)439     static __forceinline Vertex eval_du(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
440     {
441       Vertex_t v_11, v_12, v_22, v_21;
442       computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
443 
444       const Vec4<float> Bu = BezierBasis::derivative(uu);
445       const Vec4<float> Bv = BezierBasis::eval(vv);
446 
447       return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
448                   madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11        ,madd(Bu.z,v_12        ,Bu.w * matrix[1][3]))),
449                        madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21        ,madd(Bu.z,v_22        ,Bu.w * matrix[2][3]))),
450                             Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
451     }
452 
eval_dv(const Vertex matrix[4][4],const Vertex f[2][2],const float uu,const float vv)453     static __forceinline Vertex eval_dv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
454     {
455       Vertex_t v_11, v_12, v_22, v_21;
456       computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
457 
458       const Vec4<float> Bu = BezierBasis::eval(uu);
459       const Vec4<float> Bv = BezierBasis::derivative(vv);
460 
461       return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
462                   madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11        ,madd(Bu.z,v_12        ,Bu.w * matrix[1][3]))),
463                        madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21        ,madd(Bu.z,v_22        ,Bu.w * matrix[2][3]))),
464                             Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
465     }
466 
eval_dudu(const Vertex matrix[4][4],const Vertex f[2][2],const float uu,const float vv)467     static __forceinline Vertex eval_dudu(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
468     {
469       Vertex_t v_11, v_12, v_22, v_21;
470       computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
471 
472       const Vec4<float> Bu = BezierBasis::derivative2(uu);
473       const Vec4<float> Bv = BezierBasis::eval(vv);
474 
475       return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
476                   madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11        ,madd(Bu.z,v_12        ,Bu.w * matrix[1][3]))),
477                        madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21        ,madd(Bu.z,v_22        ,Bu.w * matrix[2][3]))),
478                             Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
479      }
480 
eval_dvdv(const Vertex matrix[4][4],const Vertex f[2][2],const float uu,const float vv)481     static __forceinline Vertex eval_dvdv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
482     {
483       Vertex_t v_11, v_12, v_22, v_21;
484       computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
485 
486       const Vec4<float> Bu = BezierBasis::eval(uu);
487       const Vec4<float> Bv = BezierBasis::derivative2(vv);
488 
489       return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
490                   madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11        ,madd(Bu.z,v_12        ,Bu.w * matrix[1][3]))),
491                        madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21        ,madd(Bu.z,v_22        ,Bu.w * matrix[2][3]))),
492                             Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
493     }
494 
eval_dudv(const Vertex matrix[4][4],const Vertex f[2][2],const float uu,const float vv)495     static __forceinline Vertex eval_dudv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative
496     {
497       Vertex_t v_11, v_12, v_22, v_21;
498       computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21);
499 
500       const Vec4<float> Bu = BezierBasis::derivative(uu);
501       const Vec4<float> Bv = BezierBasis::derivative(vv);
502 
503       return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))),
504                   madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11        ,madd(Bu.z,v_12        ,Bu.w * matrix[1][3]))),
505                        madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21        ,madd(Bu.z,v_22        ,Bu.w * matrix[2][3]))),
506                             Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3]))))));
507     }
508 
eval(const float uu,const float vv)509     __forceinline Vertex eval(const float uu, const float vv) const {
510       return eval(v,f,uu,vv);
511     }
512 
eval_du(const float uu,const float vv)513     __forceinline Vertex eval_du( const float uu, const float vv) const {
514       return eval_du(v,f,uu,vv);
515     }
516 
eval_dv(const float uu,const float vv)517     __forceinline Vertex eval_dv( const float uu, const float vv) const {
518       return eval_dv(v,f,uu,vv);
519     }
520 
eval_dudu(const float uu,const float vv)521     __forceinline Vertex eval_dudu( const float uu, const float vv) const {
522       return eval_dudu(v,f,uu,vv);
523     }
524 
eval_dvdv(const float uu,const float vv)525     __forceinline Vertex eval_dvdv( const float uu, const float vv) const {
526       return eval_dvdv(v,f,uu,vv);
527     }
528 
eval_dudv(const float uu,const float vv)529     __forceinline Vertex eval_dudv( const float uu, const float vv) const {
530       return eval_dudv(v,f,uu,vv);
531     }
532 
normal(const Vertex matrix[4][4],const Vertex f_m[2][2],const float uu,const float vv)533     static __forceinline Vertex normal(const Vertex matrix[4][4], const Vertex f_m[2][2], const float uu, const float vv)  // FIXME: why not using basis functions
534     {
535       /* interpolate inner vertices */
536       Vertex_t matrix_11, matrix_12, matrix_22, matrix_21;
537       computeInnerVertices(matrix,f_m,uu,vv,matrix_11, matrix_12, matrix_22, matrix_21);
538 
539       /* tangentU */
540       const Vertex_t col0 = deCasteljau(vv, (Vertex_t)matrix[0][0], (Vertex_t)matrix[1][0], (Vertex_t)matrix[2][0], (Vertex_t)matrix[3][0]);
541       const Vertex_t col1 = deCasteljau(vv, (Vertex_t)matrix[0][1], (Vertex_t)matrix_11   , (Vertex_t)matrix_21   , (Vertex_t)matrix[3][1]);
542       const Vertex_t col2 = deCasteljau(vv, (Vertex_t)matrix[0][2], (Vertex_t)matrix_12   , (Vertex_t)matrix_22   , (Vertex_t)matrix[3][2]);
543       const Vertex_t col3 = deCasteljau(vv, (Vertex_t)matrix[0][3], (Vertex_t)matrix[1][3], (Vertex_t)matrix[2][3], (Vertex_t)matrix[3][3]);
544 
545       const Vertex_t tangentU = deCasteljau_tangent(uu, col0, col1, col2, col3);
546 
547       /* tangentV */
548       const Vertex_t row0 = deCasteljau(uu, (Vertex_t)matrix[0][0], (Vertex_t)matrix[0][1], (Vertex_t)matrix[0][2], (Vertex_t)matrix[0][3]);
549       const Vertex_t row1 = deCasteljau(uu, (Vertex_t)matrix[1][0], (Vertex_t)matrix_11   , (Vertex_t)matrix_12   , (Vertex_t)matrix[1][3]);
550       const Vertex_t row2 = deCasteljau(uu, (Vertex_t)matrix[2][0], (Vertex_t)matrix_21   , (Vertex_t)matrix_22   , (Vertex_t)matrix[2][3]);
551       const Vertex_t row3 = deCasteljau(uu, (Vertex_t)matrix[3][0], (Vertex_t)matrix[3][1], (Vertex_t)matrix[3][2], (Vertex_t)matrix[3][3]);
552 
553       const Vertex_t tangentV = deCasteljau_tangent(vv, row0, row1, row2, row3);
554 
555       /* normal = tangentU x tangentV */
556       const Vertex_t n = cross(tangentU,tangentV);
557 
558       return n;
559     }
560 
normal(const float uu,const float vv)561     __forceinline Vertex normal( const float uu, const float vv) const {
562       return normal(v,f,uu,vv);
563     }
564 
565     __forceinline void eval(const float u, const float v,
566                             Vertex* P, Vertex* dPdu, Vertex* dPdv,
567                             Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv,
568                             const float dscale = 1.0f) const
569     {
570       if (P) {
571         *P = eval(u,v);
572       }
573       if (dPdu) {
574         assert(dPdu); *dPdu = eval_du(u,v)*dscale;
575         assert(dPdv); *dPdv = eval_dv(u,v)*dscale;
576       }
577       if (ddPdudu) {
578         assert(ddPdudu); *ddPdudu = eval_dudu(u,v)*sqr(dscale);
579         assert(ddPdvdv); *ddPdvdv = eval_dvdv(u,v)*sqr(dscale);
580         assert(ddPdudv); *ddPdudv = eval_dudv(u,v)*sqr(dscale);
581       }
582     }
583 
584     template<class vfloat>
eval(const Vertex v[4][4],const Vertex f[2][2],const size_t i,const vfloat & uu,const vfloat & vv,const Vec4<vfloat> & u_n,const Vec4<vfloat> & v_n,vfloat & matrix_11,vfloat & matrix_12,vfloat & matrix_22,vfloat & matrix_21)585     static __forceinline vfloat eval(const Vertex v[4][4], const Vertex f[2][2],
586                                      const size_t i, const vfloat& uu, const vfloat& vv, const Vec4<vfloat>& u_n, const Vec4<vfloat>& v_n,
587                                      vfloat& matrix_11, vfloat& matrix_12, vfloat& matrix_22, vfloat& matrix_21)
588     {
589       const vfloat curve0_x = madd(v_n[0],vfloat(v[0][0][i]),madd(v_n[1],vfloat(v[1][0][i]),madd(v_n[2],vfloat(v[2][0][i]),v_n[3] * vfloat(v[3][0][i]))));
590       const vfloat curve1_x = madd(v_n[0],vfloat(v[0][1][i]),madd(v_n[1],vfloat(matrix_11 ),madd(v_n[2],vfloat(matrix_21 ),v_n[3] * vfloat(v[3][1][i]))));
591       const vfloat curve2_x = madd(v_n[0],vfloat(v[0][2][i]),madd(v_n[1],vfloat(matrix_12 ),madd(v_n[2],vfloat(matrix_22 ),v_n[3] * vfloat(v[3][2][i]))));
592       const vfloat curve3_x = madd(v_n[0],vfloat(v[0][3][i]),madd(v_n[1],vfloat(v[1][3][i]),madd(v_n[2],vfloat(v[2][3][i]),v_n[3] * vfloat(v[3][3][i]))));
593       return madd(u_n[0],curve0_x,madd(u_n[1],curve1_x,madd(u_n[2],curve2_x,u_n[3] * curve3_x)));
594     }
595 
596     template<typename vbool, typename vfloat>
eval(const Vertex v[4][4],const Vertex f[2][2],const vbool & valid,const vfloat & uu,const vfloat & vv,float * P,float * dPdu,float * dPdv,float * ddPdudu,float * ddPdvdv,float * ddPdudv,const float dscale,const size_t dstride,const size_t N)597     static __forceinline void eval(const Vertex v[4][4], const Vertex f[2][2],
598                                    const vbool& valid, const vfloat& uu, const vfloat& vv,
599                                    float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv,
600                                    const float dscale, const size_t dstride, const size_t N)
601     {
602       if (P) {
603         const Vec4<vfloat> u_n = BezierBasis::eval(uu);
604         const Vec4<vfloat> v_n = BezierBasis::eval(vv);
605         for (size_t i=0; i<N; i++) {
606           vfloat matrix_11, matrix_12, matrix_22, matrix_21;
607           computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times
608           vfloat::store(valid,P+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21));
609         }
610       }
611       if (dPdu)
612       {
613         {
614           assert(dPdu);
615           const Vec4<vfloat> u_n = BezierBasis::derivative(uu);
616           const Vec4<vfloat> v_n = BezierBasis::eval(vv);
617           for (size_t i=0; i<N; i++) {
618             vfloat matrix_11, matrix_12, matrix_22, matrix_21;
619             computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21);  // FIXME: calculated multiple times
620             vfloat::store(valid,dPdu+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*dscale);
621           }
622         }
623         {
624           assert(dPdv);
625           const Vec4<vfloat> u_n = BezierBasis::eval(uu);
626           const Vec4<vfloat> v_n = BezierBasis::derivative(vv);
627           for (size_t i=0; i<N; i++) {
628             vfloat matrix_11, matrix_12, matrix_22, matrix_21;
629             computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21);  // FIXME: calculated multiple times
630             vfloat::store(valid,dPdv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*dscale);
631           }
632         }
633       }
634       if (ddPdudu)
635       {
636         {
637           assert(ddPdudu);
638           const Vec4<vfloat> u_n = BezierBasis::derivative2(uu);
639           const Vec4<vfloat> v_n = BezierBasis::eval(vv);
640           for (size_t i=0; i<N; i++) {
641             vfloat matrix_11, matrix_12, matrix_22, matrix_21;
642             computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21);  // FIXME: calculated multiple times
643             vfloat::store(valid,ddPdudu+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(dscale));
644           }
645         }
646         {
647           assert(ddPdvdv);
648           const Vec4<vfloat> u_n = BezierBasis::eval(uu);
649           const Vec4<vfloat> v_n = BezierBasis::derivative2(vv);
650           for (size_t i=0; i<N; i++) {
651             vfloat matrix_11, matrix_12, matrix_22, matrix_21;
652             computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21);  // FIXME: calculated multiple times
653             vfloat::store(valid,ddPdvdv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(dscale));
654           }
655         }
656         {
657           assert(ddPdudv);
658           const Vec4<vfloat> u_n = BezierBasis::derivative(uu);
659           const Vec4<vfloat> v_n = BezierBasis::derivative(vv);
660           for (size_t i=0; i<N; i++) {
661             vfloat matrix_11, matrix_12, matrix_22, matrix_21;
662             computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21);  // FIXME: calculated multiple times
663             vfloat::store(valid,ddPdudv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(dscale));
664           }
665         }
666       }
667     }
668 
669     template<typename vbool, typename vfloat>
eval(const vbool & valid,const vfloat & uu,const vfloat & vv,float * P,float * dPdu,float * dPdv,float * ddPdudu,float * ddPdvdv,float * ddPdudv,const float dscale,const size_t dstride,const size_t N)670     __forceinline void eval(const vbool& valid, const vfloat& uu, const vfloat& vv,
671                             float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv,
672                             const float dscale, const size_t dstride, const size_t N) const {
673       eval(v,f,valid,uu,vv,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N);
674     }
675 
676     template<class T>
eval_t(const Vertex matrix[4][4],const Vec3<T> f[2][2],const T & uu,const T & vv)677       static __forceinline Vec3<T> eval_t(const Vertex matrix[4][4], const Vec3<T> f[2][2], const T& uu, const T& vv)
678     {
679       typedef typename T::Bool M;
680       const M m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f);
681 
682       const Vec3<T> f0_p = Vec3<T>(matrix[1][1].x,matrix[1][1].y,matrix[1][1].z);
683       const Vec3<T> f1_p = Vec3<T>(matrix[1][2].x,matrix[1][2].y,matrix[1][2].z);
684       const Vec3<T> f2_p = Vec3<T>(matrix[2][2].x,matrix[2][2].y,matrix[2][2].z);
685       const Vec3<T> f3_p = Vec3<T>(matrix[2][1].x,matrix[2][1].y,matrix[2][1].z);
686 
687       const Vec3<T> f0_m = f[0][0];
688       const Vec3<T> f1_m = f[0][1];
689       const Vec3<T> f2_m = f[1][1];
690       const Vec3<T> f3_m = f[1][0];
691 
692       const T one_minus_uu = T(1.0f) - uu;
693       const T one_minus_vv = T(1.0f) - vv;
694 
695       const Vec3<T> f0_i = (          uu * f0_p +           vv * f0_m) * rcp(uu+vv);
696       const Vec3<T> f1_i = (one_minus_uu * f1_m +           vv * f1_p) * rcp(one_minus_uu+vv);
697       const Vec3<T> f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv);
698       const Vec3<T> f3_i = (          uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv);
699 
700       const Vec3<T> F0( select(m_border,f0_p.x,f0_i.x), select(m_border,f0_p.y,f0_i.y), select(m_border,f0_p.z,f0_i.z) );
701       const Vec3<T> F1( select(m_border,f1_p.x,f1_i.x), select(m_border,f1_p.y,f1_i.y), select(m_border,f1_p.z,f1_i.z) );
702       const Vec3<T> F2( select(m_border,f2_p.x,f2_i.x), select(m_border,f2_p.y,f2_i.y), select(m_border,f2_p.z,f2_i.z) );
703       const Vec3<T> F3( select(m_border,f3_p.x,f3_i.x), select(m_border,f3_p.y,f3_i.y), select(m_border,f3_p.z,f3_i.z) );
704 
705       const T B0_u = one_minus_uu * one_minus_uu * one_minus_uu;
706       const T B0_v = one_minus_vv * one_minus_vv * one_minus_vv;
707       const T B1_u = 3.0f * (one_minus_uu * uu * one_minus_uu);
708       const T B1_v = 3.0f * (one_minus_vv * vv * one_minus_vv);
709       const T B2_u = 3.0f * (uu * one_minus_uu * uu);
710       const T B2_v = 3.0f * (vv * one_minus_vv * vv);
711       const T B3_u = uu * uu * uu;
712       const T B3_v = vv * vv * vv;
713 
714       const T x = madd(B0_v,madd(B0_u,matrix[0][0].x,madd(B1_u,matrix[0][1].x,madd(B2_u,matrix[0][2].x,B3_u * matrix[0][3].x))),
715                   madd(B1_v,madd(B0_u,matrix[1][0].x,madd(B1_u,F0.x          ,madd(B2_u,F1.x          ,B3_u * matrix[1][3].x))),
716                   madd(B2_v,madd(B0_u,matrix[2][0].x,madd(B1_u,F3.x          ,madd(B2_u,F2.x          ,B3_u * matrix[2][3].x))),
717                        B3_v*madd(B0_u,matrix[3][0].x,madd(B1_u,matrix[3][1].x,madd(B2_u,matrix[3][2].x,B3_u * matrix[3][3].x))))));
718 
719       const T y = madd(B0_v,madd(B0_u,matrix[0][0].y,madd(B1_u,matrix[0][1].y,madd(B2_u,matrix[0][2].y,B3_u * matrix[0][3].y))),
720                   madd(B1_v,madd(B0_u,matrix[1][0].y,madd(B1_u,F0.y          ,madd(B2_u,F1.y          ,B3_u * matrix[1][3].y))),
721                   madd(B2_v,madd(B0_u,matrix[2][0].y,madd(B1_u,F3.y          ,madd(B2_u,F2.y          ,B3_u * matrix[2][3].y))),
722                        B3_v*madd(B0_u,matrix[3][0].y,madd(B1_u,matrix[3][1].y,madd(B2_u,matrix[3][2].y,B3_u * matrix[3][3].y))))));
723 
724       const T z = madd(B0_v,madd(B0_u,matrix[0][0].z,madd(B1_u,matrix[0][1].z,madd(B2_u,matrix[0][2].z,B3_u * matrix[0][3].z))),
725                   madd(B1_v,madd(B0_u,matrix[1][0].z,madd(B1_u,F0.z          ,madd(B2_u,F1.z          ,B3_u * matrix[1][3].z))),
726                   madd(B2_v,madd(B0_u,matrix[2][0].z,madd(B1_u,F3.z          ,madd(B2_u,F2.z          ,B3_u * matrix[2][3].z))),
727                        B3_v*madd(B0_u,matrix[3][0].z,madd(B1_u,matrix[3][1].z,madd(B2_u,matrix[3][2].z,B3_u * matrix[3][3].z))))));
728 
729       return Vec3<T>(x,y,z);
730     }
731 
732     template<class T>
eval(const T & uu,const T & vv)733     __forceinline Vec3<T> eval(const T& uu, const T& vv) const
734     {
735       Vec3<T> ff[2][2];
736       ff[0][0] = Vec3<T>(f[0][0]);
737       ff[0][1] = Vec3<T>(f[0][1]);
738       ff[1][1] = Vec3<T>(f[1][1]);
739       ff[1][0] = Vec3<T>(f[1][0]);
740       return eval_t(v,ff,uu,vv);
741     }
742 
743     template<class T>
normal_t(const Vertex matrix[4][4],const Vec3<T> f[2][2],const T & uu,const T & vv)744       static __forceinline Vec3<T> normal_t(const Vertex matrix[4][4], const Vec3<T> f[2][2], const T& uu, const T& vv)
745     {
746       typedef typename T::Bool M;
747 
748       const Vec3<T> f0_p = Vec3<T>(matrix[1][1].x,matrix[1][1].y,matrix[1][1].z);
749       const Vec3<T> f1_p = Vec3<T>(matrix[1][2].x,matrix[1][2].y,matrix[1][2].z);
750       const Vec3<T> f2_p = Vec3<T>(matrix[2][2].x,matrix[2][2].y,matrix[2][2].z);
751       const Vec3<T> f3_p = Vec3<T>(matrix[2][1].x,matrix[2][1].y,matrix[2][1].z);
752 
753       const Vec3<T> f0_m = f[0][0];
754       const Vec3<T> f1_m = f[0][1];
755       const Vec3<T> f2_m = f[1][1];
756       const Vec3<T> f3_m = f[1][0];
757 
758       const T one_minus_uu = T(1.0f) - uu;
759       const T one_minus_vv = T(1.0f) - vv;
760 
761       const Vec3<T> f0_i = (          uu * f0_p +           vv * f0_m) * rcp(uu+vv);
762       const Vec3<T> f1_i = (one_minus_uu * f1_m +           vv * f1_p) * rcp(one_minus_uu+vv);
763       const Vec3<T> f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv);
764       const Vec3<T> f3_i = (          uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv);
765 
766 #if 1
767       const M m_corner0 = (uu == 0.0f) & (vv == 0.0f);
768       const M m_corner1 = (uu == 1.0f) & (vv == 0.0f);
769       const M m_corner2 = (uu == 1.0f) & (vv == 1.0f);
770       const M m_corner3 = (uu == 0.0f) & (vv == 1.0f);
771       const Vec3<T> matrix_11( select(m_corner0,f0_p.x,f0_i.x), select(m_corner0,f0_p.y,f0_i.y), select(m_corner0,f0_p.z,f0_i.z) );
772       const Vec3<T> matrix_12( select(m_corner1,f1_p.x,f1_i.x), select(m_corner1,f1_p.y,f1_i.y), select(m_corner1,f1_p.z,f1_i.z) );
773       const Vec3<T> matrix_22( select(m_corner2,f2_p.x,f2_i.x), select(m_corner2,f2_p.y,f2_i.y), select(m_corner2,f2_p.z,f2_i.z) );
774       const Vec3<T> matrix_21( select(m_corner3,f3_p.x,f3_i.x), select(m_corner3,f3_p.y,f3_i.y), select(m_corner3,f3_p.z,f3_i.z) );
775 #else
776       const M m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f);
777       const Vec3<T> matrix_11( select(m_border,f0_p.x,f0_i.x), select(m_border,f0_p.y,f0_i.y), select(m_border,f0_p.z,f0_i.z) );
778       const Vec3<T> matrix_12( select(m_border,f1_p.x,f1_i.x), select(m_border,f1_p.y,f1_i.y), select(m_border,f1_p.z,f1_i.z) );
779       const Vec3<T> matrix_22( select(m_border,f2_p.x,f2_i.x), select(m_border,f2_p.y,f2_i.y), select(m_border,f2_p.z,f2_i.z) );
780       const Vec3<T> matrix_21( select(m_border,f3_p.x,f3_i.x), select(m_border,f3_p.y,f3_i.y), select(m_border,f3_p.z,f3_i.z) );
781 #endif
782 
783       const Vec3<T> matrix_00 = Vec3<T>(matrix[0][0].x,matrix[0][0].y,matrix[0][0].z);
784       const Vec3<T> matrix_10 = Vec3<T>(matrix[1][0].x,matrix[1][0].y,matrix[1][0].z);
785       const Vec3<T> matrix_20 = Vec3<T>(matrix[2][0].x,matrix[2][0].y,matrix[2][0].z);
786       const Vec3<T> matrix_30 = Vec3<T>(matrix[3][0].x,matrix[3][0].y,matrix[3][0].z);
787 
788       const Vec3<T> matrix_01 = Vec3<T>(matrix[0][1].x,matrix[0][1].y,matrix[0][1].z);
789       const Vec3<T> matrix_02 = Vec3<T>(matrix[0][2].x,matrix[0][2].y,matrix[0][2].z);
790       const Vec3<T> matrix_03 = Vec3<T>(matrix[0][3].x,matrix[0][3].y,matrix[0][3].z);
791 
792       const Vec3<T> matrix_31 = Vec3<T>(matrix[3][1].x,matrix[3][1].y,matrix[3][1].z);
793       const Vec3<T> matrix_32 = Vec3<T>(matrix[3][2].x,matrix[3][2].y,matrix[3][2].z);
794       const Vec3<T> matrix_33 = Vec3<T>(matrix[3][3].x,matrix[3][3].y,matrix[3][3].z);
795 
796       const Vec3<T> matrix_13 = Vec3<T>(matrix[1][3].x,matrix[1][3].y,matrix[1][3].z);
797       const Vec3<T> matrix_23 = Vec3<T>(matrix[2][3].x,matrix[2][3].y,matrix[2][3].z);
798 
799       /* tangentU */
800       const Vec3<T> col0 = deCasteljau(vv, matrix_00, matrix_10, matrix_20, matrix_30);
801       const Vec3<T> col1 = deCasteljau(vv, matrix_01, matrix_11, matrix_21, matrix_31);
802       const Vec3<T> col2 = deCasteljau(vv, matrix_02, matrix_12, matrix_22, matrix_32);
803       const Vec3<T> col3 = deCasteljau(vv, matrix_03, matrix_13, matrix_23, matrix_33);
804 
805       const Vec3<T> tangentU = deCasteljau_tangent(uu, col0, col1, col2, col3);
806 
807       /* tangentV */
808       const Vec3<T> row0 = deCasteljau(uu, matrix_00, matrix_01, matrix_02, matrix_03);
809       const Vec3<T> row1 = deCasteljau(uu, matrix_10, matrix_11, matrix_12, matrix_13);
810       const Vec3<T> row2 = deCasteljau(uu, matrix_20, matrix_21, matrix_22, matrix_23);
811       const Vec3<T> row3 = deCasteljau(uu, matrix_30, matrix_31, matrix_32, matrix_33);
812 
813       const Vec3<T> tangentV = deCasteljau_tangent(vv, row0, row1, row2, row3);
814 
815       /* normal = tangentU x tangentV */
816       const Vec3<T> n = cross(tangentU,tangentV);
817       return n;
818     }
819 
820      template<class T>
normal(const T & uu,const T & vv)821     __forceinline Vec3<T> normal(const T& uu, const T& vv) const
822     {
823       Vec3<T> ff[2][2];
824       ff[0][0] = Vec3<T>(f[0][0]);
825       ff[0][1] = Vec3<T>(f[0][1]);
826       ff[1][1] = Vec3<T>(f[1][1]);
827       ff[1][0] = Vec3<T>(f[1][0]);
828       return normal_t(v,ff,uu,vv);
829     }
830 
bounds()831     __forceinline BBox<Vertex> bounds() const
832     {
833       const Vertex *const cv = &v[0][0];
834       BBox<Vertex> bounds (cv[0]);
835       for (size_t i=1; i<16; i++)
836         bounds.extend( cv[i] );
837       bounds.extend(f[0][0]);
838       bounds.extend(f[1][0]);
839       bounds.extend(f[1][1]);
840       bounds.extend(f[1][1]);
841       return bounds;
842     }
843 
844     friend embree_ostream operator<<(embree_ostream o, const GregoryPatchT& p)
845     {
846       for (size_t y=0; y<4; y++)
847 	for (size_t x=0; x<4; x++)
848 	  o << "v[" << y << "][" << x << "] " << p.v[y][x] << embree_endl;
849 
850       for (size_t y=0; y<2; y++)
851 	for (size_t x=0; x<2; x++)
852 	  o << "f[" << y << "][" << x << "] " << p.f[y][x] << embree_endl;
853       return o;
854     }
855   };
856 
857   typedef GregoryPatchT<Vec3fa,Vec3fa_t> GregoryPatch3fa;
858 
859   template<typename Vertex, typename Vertex_t>
BezierPatchT(const HalfEdge * edge,const char * vertices,size_t stride)860     __forceinline  BezierPatchT<Vertex,Vertex_t>::BezierPatchT (const HalfEdge* edge, const char* vertices, size_t stride)
861   {
862     CatmullClarkPatchT<Vertex,Vertex_t> patch(edge,vertices,stride);
863     GregoryPatchT<Vertex,Vertex_t> gpatch(patch);
864     gpatch.convert_to_bezier();
865     for (size_t y=0; y<4; y++)
866       for (size_t x=0; x<4; x++)
867         matrix[y][x] = (Vertex_t)gpatch.v[y][x];
868   }
869 
870    template<typename Vertex, typename Vertex_t>
BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t> & patch)871     __forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch)
872     {
873       GregoryPatchT<Vertex,Vertex_t> gpatch(patch);
874       gpatch.convert_to_bezier();
875       for (size_t y=0; y<4; y++)
876 	for (size_t x=0; x<4; x++)
877 	  matrix[y][x] = (Vertex_t)gpatch.v[y][x];
878     }
879 
880    template<typename Vertex, typename Vertex_t>
BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t> & patch,const BezierCurveT<Vertex> * border0,const BezierCurveT<Vertex> * border1,const BezierCurveT<Vertex> * border2,const BezierCurveT<Vertex> * border3)881      __forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch,
882                                                                const BezierCurveT<Vertex>* border0,
883                                                                const BezierCurveT<Vertex>* border1,
884                                                                const BezierCurveT<Vertex>* border2,
885                                                                const BezierCurveT<Vertex>* border3)
886     {
887       GregoryPatchT<Vertex,Vertex_t> gpatch(patch,border0,border1,border2,border3);
888       gpatch.convert_to_bezier();
889       for (size_t y=0; y<4; y++)
890 	for (size_t x=0; x<4; x++)
891 	  matrix[y][x] = (Vertex_t)gpatch.v[y][x];
892     }
893 }
894