1 // Copyright 2009-2021 Intel Corporation 2 // SPDX-License-Identifier: Apache-2.0 3 4 #pragma once 5 6 #include "catmullclark_patch.h" 7 #include "bezier_patch.h" 8 #include "bezier_curve.h" 9 #include "catmullclark_coefficients.h" 10 11 namespace embree 12 { 13 template<typename Vertex, typename Vertex_t = Vertex> 14 class __aligned(64) GregoryPatchT 15 { 16 typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch; 17 typedef GeneralCatmullClarkPatchT<Vertex,Vertex_t> GeneralCatmullClarkPatch; 18 typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClark1Ring; 19 typedef BezierCurveT<Vertex> BezierCurve; 20 21 public: 22 Vertex v[4][4]; 23 Vertex f[2][2]; 24 GregoryPatchT()25 __forceinline GregoryPatchT() {} 26 GregoryPatchT(const CatmullClarkPatch & patch)27 __forceinline GregoryPatchT(const CatmullClarkPatch& patch) { 28 init(patch); 29 } 30 GregoryPatchT(const CatmullClarkPatch & patch,const BezierCurve * border0,const BezierCurve * border1,const BezierCurve * border2,const BezierCurve * border3)31 __forceinline GregoryPatchT(const CatmullClarkPatch& patch, 32 const BezierCurve* border0, const BezierCurve* border1, const BezierCurve* border2, const BezierCurve* border3) 33 { 34 init_crackfix(patch,border0,border1,border2,border3); 35 } 36 GregoryPatchT(const HalfEdge * edge,const char * vertices,size_t stride)37 __forceinline GregoryPatchT (const HalfEdge* edge, const char* vertices, size_t stride) { 38 init(CatmullClarkPatch(edge,vertices,stride)); 39 } 40 p0()41 __forceinline Vertex& p0() { return v[0][0]; } p1()42 __forceinline Vertex& p1() { return v[0][3]; } p2()43 __forceinline Vertex& p2() { return v[3][3]; } p3()44 __forceinline Vertex& p3() { return v[3][0]; } 45 e0_p()46 __forceinline Vertex& e0_p() { return v[0][1]; } e0_m()47 __forceinline Vertex& e0_m() { return v[1][0]; } e1_p()48 __forceinline Vertex& e1_p() { return v[1][3]; } e1_m()49 __forceinline Vertex& e1_m() { return v[0][2]; } e2_p()50 __forceinline Vertex& e2_p() { return v[3][2]; } e2_m()51 __forceinline Vertex& e2_m() { return v[2][3]; } e3_p()52 __forceinline Vertex& e3_p() { return v[2][0]; } e3_m()53 __forceinline Vertex& e3_m() { return v[3][1]; } 54 f0_p()55 __forceinline Vertex& f0_p() { return v[1][1]; } f1_p()56 __forceinline Vertex& f1_p() { return v[1][2]; } f2_p()57 __forceinline Vertex& f2_p() { return v[2][2]; } f3_p()58 __forceinline Vertex& f3_p() { return v[2][1]; } f0_m()59 __forceinline Vertex& f0_m() { return f[0][0]; } f1_m()60 __forceinline Vertex& f1_m() { return f[0][1]; } f2_m()61 __forceinline Vertex& f2_m() { return f[1][1]; } f3_m()62 __forceinline Vertex& f3_m() { return f[1][0]; } 63 p0()64 __forceinline const Vertex& p0() const { return v[0][0]; } p1()65 __forceinline const Vertex& p1() const { return v[0][3]; } p2()66 __forceinline const Vertex& p2() const { return v[3][3]; } p3()67 __forceinline const Vertex& p3() const { return v[3][0]; } 68 e0_p()69 __forceinline const Vertex& e0_p() const { return v[0][1]; } e0_m()70 __forceinline const Vertex& e0_m() const { return v[1][0]; } e1_p()71 __forceinline const Vertex& e1_p() const { return v[1][3]; } e1_m()72 __forceinline const Vertex& e1_m() const { return v[0][2]; } e2_p()73 __forceinline const Vertex& e2_p() const { return v[3][2]; } e2_m()74 __forceinline const Vertex& e2_m() const { return v[2][3]; } e3_p()75 __forceinline const Vertex& e3_p() const { return v[2][0]; } e3_m()76 __forceinline const Vertex& e3_m() const { return v[3][1]; } 77 f0_p()78 __forceinline const Vertex& f0_p() const { return v[1][1]; } f1_p()79 __forceinline const Vertex& f1_p() const { return v[1][2]; } f2_p()80 __forceinline const Vertex& f2_p() const { return v[2][2]; } f3_p()81 __forceinline const Vertex& f3_p() const { return v[2][1]; } f0_m()82 __forceinline const Vertex& f0_m() const { return f[0][0]; } f1_m()83 __forceinline const Vertex& f1_m() const { return f[0][1]; } f2_m()84 __forceinline const Vertex& f2_m() const { return f[1][1]; } f3_m()85 __forceinline const Vertex& f3_m() const { return f[1][0]; } 86 initCornerVertex(const CatmullClarkPatch & irreg_patch,const size_t index)87 __forceinline Vertex initCornerVertex(const CatmullClarkPatch& irreg_patch, const size_t index) { 88 return irreg_patch.ring[index].getLimitVertex(); 89 } 90 initPositiveEdgeVertex(const CatmullClarkPatch & irreg_patch,const size_t index,const Vertex & p_vtx)91 __forceinline Vertex initPositiveEdgeVertex(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) { 92 return madd(1.0f/3.0f,irreg_patch.ring[index].getLimitTangent(),p_vtx); 93 } 94 initNegativeEdgeVertex(const CatmullClarkPatch & irreg_patch,const size_t index,const Vertex & p_vtx)95 __forceinline Vertex initNegativeEdgeVertex(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) { 96 return madd(1.0f/3.0f,irreg_patch.ring[index].getSecondLimitTangent(),p_vtx); 97 } 98 initPositiveEdgeVertex2(const CatmullClarkPatch & irreg_patch,const size_t index,const Vertex & p_vtx)99 __forceinline Vertex initPositiveEdgeVertex2(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) 100 { 101 CatmullClark1Ring3fa r0,r1,r2; 102 irreg_patch.ring[index].subdivide(r0); 103 r0.subdivide(r1); 104 r1.subdivide(r2); 105 return madd(8.0f/3.0f,r2.getLimitTangent(),p_vtx); 106 } 107 initNegativeEdgeVertex2(const CatmullClarkPatch & irreg_patch,const size_t index,const Vertex & p_vtx)108 __forceinline Vertex initNegativeEdgeVertex2(const CatmullClarkPatch& irreg_patch, const size_t index, const Vertex& p_vtx) 109 { 110 CatmullClark1Ring3fa r0,r1,r2; 111 irreg_patch.ring[index].subdivide(r0); 112 r0.subdivide(r1); 113 r1.subdivide(r2); 114 return madd(8.0f/3.0f,r2.getSecondLimitTangent(),p_vtx); 115 } 116 initFaceVertex(const CatmullClarkPatch & irreg_patch,const size_t index,const Vertex & p_vtx,const Vertex & e0_p_vtx,const Vertex & e1_m_vtx,const unsigned int face_valence_p1,const Vertex & e0_m_vtx,const Vertex & e3_p_vtx,const unsigned int face_valence_p3,Vertex & f_p_vtx,Vertex & f_m_vtx)117 void initFaceVertex(const CatmullClarkPatch& irreg_patch, 118 const size_t index, 119 const Vertex& p_vtx, 120 const Vertex& e0_p_vtx, 121 const Vertex& e1_m_vtx, 122 const unsigned int face_valence_p1, 123 const Vertex& e0_m_vtx, 124 const Vertex& e3_p_vtx, 125 const unsigned int face_valence_p3, 126 Vertex& f_p_vtx, 127 Vertex& f_m_vtx) 128 { 129 const unsigned int face_valence = irreg_patch.ring[index].face_valence; 130 const unsigned int edge_valence = irreg_patch.ring[index].edge_valence; 131 const unsigned int border_index = irreg_patch.ring[index].border_index; 132 133 const Vertex& vtx = irreg_patch.ring[index].vtx; 134 const Vertex e_i = irreg_patch.ring[index].getEdgeCenter(0); 135 const Vertex c_i_m_1 = irreg_patch.ring[index].getQuadCenter(0); 136 const Vertex e_i_m_1 = irreg_patch.ring[index].getEdgeCenter(1); 137 138 Vertex c_i, e_i_p_1; 139 const bool hasHardEdge0 = 140 std::isinf(irreg_patch.ring[index].vertex_crease_weight) && 141 std::isinf(irreg_patch.ring[index].crease_weight[0]); 142 143 if (unlikely((border_index == edge_valence-2) || hasHardEdge0)) 144 { 145 /* mirror quad center and edge mid-point */ 146 c_i = madd(2.0f, e_i - c_i_m_1, c_i_m_1); 147 e_i_p_1 = madd(2.0f, vtx - e_i_m_1, e_i_m_1); 148 } 149 else 150 { 151 c_i = irreg_patch.ring[index].getQuadCenter( face_valence-1 ); 152 e_i_p_1 = irreg_patch.ring[index].getEdgeCenter( face_valence-1 ); 153 } 154 155 Vertex c_i_m_2, e_i_m_2; 156 const bool hasHardEdge1 = 157 std::isinf(irreg_patch.ring[index].vertex_crease_weight) && 158 std::isinf(irreg_patch.ring[index].crease_weight[1]); 159 160 if (unlikely(border_index == 2 || hasHardEdge1)) 161 { 162 /* mirror quad center and edge mid-point */ 163 c_i_m_2 = madd(2.0f, e_i_m_1 - c_i_m_1, c_i_m_1); 164 e_i_m_2 = madd(2.0f, vtx - e_i, + e_i); 165 } 166 else 167 { 168 c_i_m_2 = irreg_patch.ring[index].getQuadCenter( 1 ); 169 e_i_m_2 = irreg_patch.ring[index].getEdgeCenter( 2 ); 170 } 171 172 const float d = 3.0f; 173 //const float c = cosf(2.0f*M_PI/(float)face_valence); 174 //const float c_e_p = cosf(2.0f*M_PI/(float)face_valence_p1); 175 //const float c_e_m = cosf(2.0f*M_PI/(float)face_valence_p3); 176 177 const float c = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence); 178 const float c_e_p = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p1); 179 const float c_e_m = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p3); 180 181 const Vertex r_e_p = 1.0f/3.0f * (e_i_m_1 - e_i_p_1) + 2.0f/3.0f * (c_i_m_1 - c_i); 182 const Vertex r_e_m = 1.0f/3.0f * (e_i - e_i_m_2) + 2.0f/3.0f * (c_i_m_1 - c_i_m_2); 183 184 f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p); 185 f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m); 186 } 187 init(const CatmullClarkPatch & patch)188 __noinline void init(const CatmullClarkPatch& patch) 189 { 190 assert( patch.ring[0].hasValidPositions() ); 191 assert( patch.ring[1].hasValidPositions() ); 192 assert( patch.ring[2].hasValidPositions() ); 193 assert( patch.ring[3].hasValidPositions() ); 194 195 p0() = initCornerVertex(patch,0); 196 p1() = initCornerVertex(patch,1); 197 p2() = initCornerVertex(patch,2); 198 p3() = initCornerVertex(patch,3); 199 200 e0_p() = initPositiveEdgeVertex(patch,0, p0()); 201 e1_p() = initPositiveEdgeVertex(patch,1, p1()); 202 e2_p() = initPositiveEdgeVertex(patch,2, p2()); 203 e3_p() = initPositiveEdgeVertex(patch,3, p3()); 204 205 e0_m() = initNegativeEdgeVertex(patch,0, p0()); 206 e1_m() = initNegativeEdgeVertex(patch,1, p1()); 207 e2_m() = initNegativeEdgeVertex(patch,2, p2()); 208 e3_m() = initNegativeEdgeVertex(patch,3, p3()); 209 210 const unsigned int face_valence_p0 = patch.ring[0].face_valence; 211 const unsigned int face_valence_p1 = patch.ring[1].face_valence; 212 const unsigned int face_valence_p2 = patch.ring[2].face_valence; 213 const unsigned int face_valence_p3 = patch.ring[3].face_valence; 214 215 initFaceVertex(patch,0,p0(),e0_p(),e1_m(),face_valence_p1,e0_m(),e3_p(),face_valence_p3,f0_p(),f0_m() ); 216 initFaceVertex(patch,1,p1(),e1_p(),e2_m(),face_valence_p2,e1_m(),e0_p(),face_valence_p0,f1_p(),f1_m() ); 217 initFaceVertex(patch,2,p2(),e2_p(),e3_m(),face_valence_p3,e2_m(),e1_p(),face_valence_p1,f2_p(),f2_m() ); 218 initFaceVertex(patch,3,p3(),e3_p(),e0_m(),face_valence_p0,e3_m(),e2_p(),face_valence_p3,f3_p(),f3_m() ); 219 220 } 221 init_crackfix(const CatmullClarkPatch & patch,const BezierCurve * border0,const BezierCurve * border1,const BezierCurve * border2,const BezierCurve * border3)222 __noinline void init_crackfix(const CatmullClarkPatch& patch, 223 const BezierCurve* border0, 224 const BezierCurve* border1, 225 const BezierCurve* border2, 226 const BezierCurve* border3) 227 { 228 assert( patch.ring[0].hasValidPositions() ); 229 assert( patch.ring[1].hasValidPositions() ); 230 assert( patch.ring[2].hasValidPositions() ); 231 assert( patch.ring[3].hasValidPositions() ); 232 233 p0() = initCornerVertex(patch,0); 234 p1() = initCornerVertex(patch,1); 235 p2() = initCornerVertex(patch,2); 236 p3() = initCornerVertex(patch,3); 237 238 e0_p() = initPositiveEdgeVertex(patch,0, p0()); 239 e1_p() = initPositiveEdgeVertex(patch,1, p1()); 240 e2_p() = initPositiveEdgeVertex(patch,2, p2()); 241 e3_p() = initPositiveEdgeVertex(patch,3, p3()); 242 243 e0_m() = initNegativeEdgeVertex(patch,0, p0()); 244 e1_m() = initNegativeEdgeVertex(patch,1, p1()); 245 e2_m() = initNegativeEdgeVertex(patch,2, p2()); 246 e3_m() = initNegativeEdgeVertex(patch,3, p3()); 247 248 if (unlikely(border0 != nullptr)) 249 { 250 p0() = border0->v0; 251 e0_p() = border0->v1; 252 e1_m() = border0->v2; 253 p1() = border0->v3; 254 } 255 256 if (unlikely(border1 != nullptr)) 257 { 258 p1() = border1->v0; 259 e1_p() = border1->v1; 260 e2_m() = border1->v2; 261 p2() = border1->v3; 262 } 263 264 if (unlikely(border2 != nullptr)) 265 { 266 p2() = border2->v0; 267 e2_p() = border2->v1; 268 e3_m() = border2->v2; 269 p3() = border2->v3; 270 } 271 272 if (unlikely(border3 != nullptr)) 273 { 274 p3() = border3->v0; 275 e3_p() = border3->v1; 276 e0_m() = border3->v2; 277 p0() = border3->v3; 278 } 279 280 const unsigned int face_valence_p0 = patch.ring[0].face_valence; 281 const unsigned int face_valence_p1 = patch.ring[1].face_valence; 282 const unsigned int face_valence_p2 = patch.ring[2].face_valence; 283 const unsigned int face_valence_p3 = patch.ring[3].face_valence; 284 285 initFaceVertex(patch,0,p0(),e0_p(),e1_m(),face_valence_p1,e0_m(),e3_p(),face_valence_p3,f0_p(),f0_m() ); 286 initFaceVertex(patch,1,p1(),e1_p(),e2_m(),face_valence_p2,e1_m(),e0_p(),face_valence_p0,f1_p(),f1_m() ); 287 initFaceVertex(patch,2,p2(),e2_p(),e3_m(),face_valence_p3,e2_m(),e1_p(),face_valence_p1,f2_p(),f2_m() ); 288 initFaceVertex(patch,3,p3(),e3_p(),e0_m(),face_valence_p0,e3_m(),e2_p(),face_valence_p3,f3_p(),f3_m() ); 289 } 290 291 292 void computeGregoryPatchFacePoints(const unsigned int face_valence, 293 const Vertex& r_e_p, 294 const Vertex& r_e_m, 295 const Vertex& p_vtx, 296 const Vertex& e0_p_vtx, 297 const Vertex& e1_m_vtx, 298 const unsigned int face_valence_p1, 299 const Vertex& e0_m_vtx, 300 const Vertex& e3_p_vtx, 301 const unsigned int face_valence_p3, 302 Vertex& f_p_vtx, 303 Vertex& f_m_vtx, 304 const float d = 3.0f) 305 { 306 //const float c = cosf(2.0*M_PI/(float)face_valence); 307 //const float c_e_p = cosf(2.0*M_PI/(float)face_valence_p1); 308 //const float c_e_m = cosf(2.0*M_PI/(float)face_valence_p3); 309 310 const float c = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence); 311 const float c_e_p = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p1); 312 const float c_e_m = CatmullClarkPrecomputedCoefficients::table.cos_2PI_div_n(face_valence_p3); 313 314 315 f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p); 316 f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m); 317 f_p_vtx = 1.0f / d * (c_e_p * p_vtx + (d - 2.0f*c - c_e_p) * e0_p_vtx + 2.0f*c* e1_m_vtx + r_e_p); 318 f_m_vtx = 1.0f / d * (c_e_m * p_vtx + (d - 2.0f*c - c_e_m) * e0_m_vtx + 2.0f*c* e3_p_vtx + r_e_m); 319 } 320 init(const GeneralCatmullClarkPatch & patch)321 __noinline void init(const GeneralCatmullClarkPatch& patch) 322 { 323 assert(patch.size() == 4); 324 #if 0 325 CatmullClarkPatch qpatch; patch.init(qpatch); 326 init(qpatch); 327 #else 328 const float face_valence_p0 = patch.ring[0].face_valence; 329 const float face_valence_p1 = patch.ring[1].face_valence; 330 const float face_valence_p2 = patch.ring[2].face_valence; 331 const float face_valence_p3 = patch.ring[3].face_valence; 332 333 Vertex p0_r_p, p0_r_m; 334 patch.ring[0].computeGregoryPatchEdgePoints( p0(), e0_p(), e0_m(), p0_r_p, p0_r_m ); 335 336 Vertex p1_r_p, p1_r_m; 337 patch.ring[1].computeGregoryPatchEdgePoints( p1(), e1_p(), e1_m(), p1_r_p, p1_r_m ); 338 339 Vertex p2_r_p, p2_r_m; 340 patch.ring[2].computeGregoryPatchEdgePoints( p2(), e2_p(), e2_m(), p2_r_p, p2_r_m ); 341 342 Vertex p3_r_p, p3_r_m; 343 patch.ring[3].computeGregoryPatchEdgePoints( p3(), e3_p(), e3_m(), p3_r_p, p3_r_m ); 344 345 computeGregoryPatchFacePoints(face_valence_p0, p0_r_p, p0_r_m, p0(), e0_p(), e1_m(), face_valence_p1, e0_m(), e3_p(), face_valence_p3, f0_p(), f0_m() ); 346 computeGregoryPatchFacePoints(face_valence_p1, p1_r_p, p1_r_m, p1(), e1_p(), e2_m(), face_valence_p2, e1_m(), e0_p(), face_valence_p0, f1_p(), f1_m() ); 347 computeGregoryPatchFacePoints(face_valence_p2, p2_r_p, p2_r_m, p2(), e2_p(), e3_m(), face_valence_p3, e2_m(), e1_p(), face_valence_p1, f2_p(), f2_m() ); 348 computeGregoryPatchFacePoints(face_valence_p3, p3_r_p, p3_r_m, p3(), e3_p(), e0_m(), face_valence_p0, e3_m(), e2_p(), face_valence_p3, f3_p(), f3_m() ); 349 350 #endif 351 } 352 353 convert_to_bezier()354 __forceinline void convert_to_bezier() 355 { 356 f0_p() = (f0_p() + f0_m()) * 0.5f; 357 f1_p() = (f1_p() + f1_m()) * 0.5f; 358 f2_p() = (f2_p() + f2_m()) * 0.5f; 359 f3_p() = (f3_p() + f3_m()) * 0.5f; 360 f0_m() = Vertex( zero ); 361 f1_m() = Vertex( zero ); 362 f2_m() = Vertex( zero ); 363 f3_m() = Vertex( zero ); 364 } 365 computeInnerVertices(const Vertex matrix[4][4],const Vertex f_m[2][2],const float uu,const float vv,Vertex_t & matrix_11,Vertex_t & matrix_12,Vertex_t & matrix_22,Vertex_t & matrix_21)366 static __forceinline void computeInnerVertices(const Vertex matrix[4][4], const Vertex f_m[2][2], const float uu, const float vv, 367 Vertex_t& matrix_11, Vertex_t& matrix_12, Vertex_t& matrix_22, Vertex_t& matrix_21) 368 { 369 if (unlikely(uu == 0.0f || uu == 1.0f || vv == 0.0f || vv == 1.0f)) 370 { 371 matrix_11 = matrix[1][1]; 372 matrix_12 = matrix[1][2]; 373 matrix_22 = matrix[2][2]; 374 matrix_21 = matrix[2][1]; 375 } 376 else 377 { 378 const Vertex_t f0_p = matrix[1][1]; 379 const Vertex_t f1_p = matrix[1][2]; 380 const Vertex_t f2_p = matrix[2][2]; 381 const Vertex_t f3_p = matrix[2][1]; 382 383 const Vertex_t f0_m = f_m[0][0]; 384 const Vertex_t f1_m = f_m[0][1]; 385 const Vertex_t f2_m = f_m[1][1]; 386 const Vertex_t f3_m = f_m[1][0]; 387 388 matrix_11 = ( uu * f0_p + vv * f0_m)*rcp(uu+vv); 389 matrix_12 = ((1.0f-uu) * f1_m + vv * f1_p)*rcp(1.0f-uu+vv); 390 matrix_22 = ((1.0f-uu) * f2_p + (1.0f-vv) * f2_m)*rcp(2.0f-uu-vv); 391 matrix_21 = ( uu * f3_m + (1.0f-vv) * f3_p)*rcp(1.0f+uu-vv); 392 } 393 } 394 395 template<typename vfloat> computeInnerVertices(const Vertex v[4][4],const Vertex f[2][2],size_t i,const vfloat & uu,const vfloat & vv,vfloat & matrix_11,vfloat & matrix_12,vfloat & matrix_22,vfloat & matrix_21)396 static __forceinline void computeInnerVertices(const Vertex v[4][4], const Vertex f[2][2], 397 size_t i, const vfloat& uu, const vfloat& vv, vfloat& matrix_11, vfloat& matrix_12, vfloat& matrix_22, vfloat& matrix_21) 398 { 399 const auto m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f); 400 401 const vfloat f0_p = v[1][1][i]; 402 const vfloat f1_p = v[1][2][i]; 403 const vfloat f2_p = v[2][2][i]; 404 const vfloat f3_p = v[2][1][i]; 405 406 const vfloat f0_m = f[0][0][i]; 407 const vfloat f1_m = f[0][1][i]; 408 const vfloat f2_m = f[1][1][i]; 409 const vfloat f3_m = f[1][0][i]; 410 411 const vfloat one_minus_uu = vfloat(1.0f) - uu; 412 const vfloat one_minus_vv = vfloat(1.0f) - vv; 413 414 const vfloat f0_i = ( uu * f0_p + vv * f0_m) * rcp(uu+vv); 415 const vfloat f1_i = (one_minus_uu * f1_m + vv * f1_p) * rcp(one_minus_uu+vv); 416 const vfloat f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv); 417 const vfloat f3_i = ( uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv); 418 419 matrix_11 = select(m_border,f0_p,f0_i); 420 matrix_12 = select(m_border,f1_p,f1_i); 421 matrix_22 = select(m_border,f2_p,f2_i); 422 matrix_21 = select(m_border,f3_p,f3_i); 423 } 424 eval(const Vertex matrix[4][4],const Vertex f[2][2],const float & uu,const float & vv)425 static __forceinline Vertex eval(const Vertex matrix[4][4], const Vertex f[2][2], const float& uu, const float& vv) 426 { 427 Vertex_t v_11, v_12, v_22, v_21; 428 computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21); 429 430 const Vec4<float> Bu = BezierBasis::eval(uu); 431 const Vec4<float> Bv = BezierBasis::eval(vv); 432 433 return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), 434 madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))), 435 madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))), 436 Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); 437 } 438 eval_du(const Vertex matrix[4][4],const Vertex f[2][2],const float uu,const float vv)439 static __forceinline Vertex eval_du(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative 440 { 441 Vertex_t v_11, v_12, v_22, v_21; 442 computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21); 443 444 const Vec4<float> Bu = BezierBasis::derivative(uu); 445 const Vec4<float> Bv = BezierBasis::eval(vv); 446 447 return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), 448 madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))), 449 madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))), 450 Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); 451 } 452 eval_dv(const Vertex matrix[4][4],const Vertex f[2][2],const float uu,const float vv)453 static __forceinline Vertex eval_dv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative 454 { 455 Vertex_t v_11, v_12, v_22, v_21; 456 computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21); 457 458 const Vec4<float> Bu = BezierBasis::eval(uu); 459 const Vec4<float> Bv = BezierBasis::derivative(vv); 460 461 return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), 462 madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))), 463 madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))), 464 Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); 465 } 466 eval_dudu(const Vertex matrix[4][4],const Vertex f[2][2],const float uu,const float vv)467 static __forceinline Vertex eval_dudu(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative 468 { 469 Vertex_t v_11, v_12, v_22, v_21; 470 computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21); 471 472 const Vec4<float> Bu = BezierBasis::derivative2(uu); 473 const Vec4<float> Bv = BezierBasis::eval(vv); 474 475 return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), 476 madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))), 477 madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))), 478 Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); 479 } 480 eval_dvdv(const Vertex matrix[4][4],const Vertex f[2][2],const float uu,const float vv)481 static __forceinline Vertex eval_dvdv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative 482 { 483 Vertex_t v_11, v_12, v_22, v_21; 484 computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21); 485 486 const Vec4<float> Bu = BezierBasis::eval(uu); 487 const Vec4<float> Bv = BezierBasis::derivative2(vv); 488 489 return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), 490 madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))), 491 madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))), 492 Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); 493 } 494 eval_dudv(const Vertex matrix[4][4],const Vertex f[2][2],const float uu,const float vv)495 static __forceinline Vertex eval_dudv(const Vertex matrix[4][4], const Vertex f[2][2], const float uu, const float vv) // approximative derivative 496 { 497 Vertex_t v_11, v_12, v_22, v_21; 498 computeInnerVertices(matrix,f,uu,vv,v_11, v_12, v_22, v_21); 499 500 const Vec4<float> Bu = BezierBasis::derivative(uu); 501 const Vec4<float> Bv = BezierBasis::derivative(vv); 502 503 return madd(Bv.x,madd(Bu.x,matrix[0][0],madd(Bu.y,matrix[0][1],madd(Bu.z,matrix[0][2],Bu.w * matrix[0][3]))), 504 madd(Bv.y,madd(Bu.x,matrix[1][0],madd(Bu.y,v_11 ,madd(Bu.z,v_12 ,Bu.w * matrix[1][3]))), 505 madd(Bv.z,madd(Bu.x,matrix[2][0],madd(Bu.y,v_21 ,madd(Bu.z,v_22 ,Bu.w * matrix[2][3]))), 506 Bv.w*madd(Bu.x,matrix[3][0],madd(Bu.y,matrix[3][1],madd(Bu.z,matrix[3][2],Bu.w * matrix[3][3])))))); 507 } 508 eval(const float uu,const float vv)509 __forceinline Vertex eval(const float uu, const float vv) const { 510 return eval(v,f,uu,vv); 511 } 512 eval_du(const float uu,const float vv)513 __forceinline Vertex eval_du( const float uu, const float vv) const { 514 return eval_du(v,f,uu,vv); 515 } 516 eval_dv(const float uu,const float vv)517 __forceinline Vertex eval_dv( const float uu, const float vv) const { 518 return eval_dv(v,f,uu,vv); 519 } 520 eval_dudu(const float uu,const float vv)521 __forceinline Vertex eval_dudu( const float uu, const float vv) const { 522 return eval_dudu(v,f,uu,vv); 523 } 524 eval_dvdv(const float uu,const float vv)525 __forceinline Vertex eval_dvdv( const float uu, const float vv) const { 526 return eval_dvdv(v,f,uu,vv); 527 } 528 eval_dudv(const float uu,const float vv)529 __forceinline Vertex eval_dudv( const float uu, const float vv) const { 530 return eval_dudv(v,f,uu,vv); 531 } 532 normal(const Vertex matrix[4][4],const Vertex f_m[2][2],const float uu,const float vv)533 static __forceinline Vertex normal(const Vertex matrix[4][4], const Vertex f_m[2][2], const float uu, const float vv) // FIXME: why not using basis functions 534 { 535 /* interpolate inner vertices */ 536 Vertex_t matrix_11, matrix_12, matrix_22, matrix_21; 537 computeInnerVertices(matrix,f_m,uu,vv,matrix_11, matrix_12, matrix_22, matrix_21); 538 539 /* tangentU */ 540 const Vertex_t col0 = deCasteljau(vv, (Vertex_t)matrix[0][0], (Vertex_t)matrix[1][0], (Vertex_t)matrix[2][0], (Vertex_t)matrix[3][0]); 541 const Vertex_t col1 = deCasteljau(vv, (Vertex_t)matrix[0][1], (Vertex_t)matrix_11 , (Vertex_t)matrix_21 , (Vertex_t)matrix[3][1]); 542 const Vertex_t col2 = deCasteljau(vv, (Vertex_t)matrix[0][2], (Vertex_t)matrix_12 , (Vertex_t)matrix_22 , (Vertex_t)matrix[3][2]); 543 const Vertex_t col3 = deCasteljau(vv, (Vertex_t)matrix[0][3], (Vertex_t)matrix[1][3], (Vertex_t)matrix[2][3], (Vertex_t)matrix[3][3]); 544 545 const Vertex_t tangentU = deCasteljau_tangent(uu, col0, col1, col2, col3); 546 547 /* tangentV */ 548 const Vertex_t row0 = deCasteljau(uu, (Vertex_t)matrix[0][0], (Vertex_t)matrix[0][1], (Vertex_t)matrix[0][2], (Vertex_t)matrix[0][3]); 549 const Vertex_t row1 = deCasteljau(uu, (Vertex_t)matrix[1][0], (Vertex_t)matrix_11 , (Vertex_t)matrix_12 , (Vertex_t)matrix[1][3]); 550 const Vertex_t row2 = deCasteljau(uu, (Vertex_t)matrix[2][0], (Vertex_t)matrix_21 , (Vertex_t)matrix_22 , (Vertex_t)matrix[2][3]); 551 const Vertex_t row3 = deCasteljau(uu, (Vertex_t)matrix[3][0], (Vertex_t)matrix[3][1], (Vertex_t)matrix[3][2], (Vertex_t)matrix[3][3]); 552 553 const Vertex_t tangentV = deCasteljau_tangent(vv, row0, row1, row2, row3); 554 555 /* normal = tangentU x tangentV */ 556 const Vertex_t n = cross(tangentU,tangentV); 557 558 return n; 559 } 560 normal(const float uu,const float vv)561 __forceinline Vertex normal( const float uu, const float vv) const { 562 return normal(v,f,uu,vv); 563 } 564 565 __forceinline void eval(const float u, const float v, 566 Vertex* P, Vertex* dPdu, Vertex* dPdv, 567 Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv, 568 const float dscale = 1.0f) const 569 { 570 if (P) { 571 *P = eval(u,v); 572 } 573 if (dPdu) { 574 assert(dPdu); *dPdu = eval_du(u,v)*dscale; 575 assert(dPdv); *dPdv = eval_dv(u,v)*dscale; 576 } 577 if (ddPdudu) { 578 assert(ddPdudu); *ddPdudu = eval_dudu(u,v)*sqr(dscale); 579 assert(ddPdvdv); *ddPdvdv = eval_dvdv(u,v)*sqr(dscale); 580 assert(ddPdudv); *ddPdudv = eval_dudv(u,v)*sqr(dscale); 581 } 582 } 583 584 template<class vfloat> eval(const Vertex v[4][4],const Vertex f[2][2],const size_t i,const vfloat & uu,const vfloat & vv,const Vec4<vfloat> & u_n,const Vec4<vfloat> & v_n,vfloat & matrix_11,vfloat & matrix_12,vfloat & matrix_22,vfloat & matrix_21)585 static __forceinline vfloat eval(const Vertex v[4][4], const Vertex f[2][2], 586 const size_t i, const vfloat& uu, const vfloat& vv, const Vec4<vfloat>& u_n, const Vec4<vfloat>& v_n, 587 vfloat& matrix_11, vfloat& matrix_12, vfloat& matrix_22, vfloat& matrix_21) 588 { 589 const vfloat curve0_x = madd(v_n[0],vfloat(v[0][0][i]),madd(v_n[1],vfloat(v[1][0][i]),madd(v_n[2],vfloat(v[2][0][i]),v_n[3] * vfloat(v[3][0][i])))); 590 const vfloat curve1_x = madd(v_n[0],vfloat(v[0][1][i]),madd(v_n[1],vfloat(matrix_11 ),madd(v_n[2],vfloat(matrix_21 ),v_n[3] * vfloat(v[3][1][i])))); 591 const vfloat curve2_x = madd(v_n[0],vfloat(v[0][2][i]),madd(v_n[1],vfloat(matrix_12 ),madd(v_n[2],vfloat(matrix_22 ),v_n[3] * vfloat(v[3][2][i])))); 592 const vfloat curve3_x = madd(v_n[0],vfloat(v[0][3][i]),madd(v_n[1],vfloat(v[1][3][i]),madd(v_n[2],vfloat(v[2][3][i]),v_n[3] * vfloat(v[3][3][i])))); 593 return madd(u_n[0],curve0_x,madd(u_n[1],curve1_x,madd(u_n[2],curve2_x,u_n[3] * curve3_x))); 594 } 595 596 template<typename vbool, typename vfloat> eval(const Vertex v[4][4],const Vertex f[2][2],const vbool & valid,const vfloat & uu,const vfloat & vv,float * P,float * dPdu,float * dPdv,float * ddPdudu,float * ddPdvdv,float * ddPdudv,const float dscale,const size_t dstride,const size_t N)597 static __forceinline void eval(const Vertex v[4][4], const Vertex f[2][2], 598 const vbool& valid, const vfloat& uu, const vfloat& vv, 599 float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv, 600 const float dscale, const size_t dstride, const size_t N) 601 { 602 if (P) { 603 const Vec4<vfloat> u_n = BezierBasis::eval(uu); 604 const Vec4<vfloat> v_n = BezierBasis::eval(vv); 605 for (size_t i=0; i<N; i++) { 606 vfloat matrix_11, matrix_12, matrix_22, matrix_21; 607 computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times 608 vfloat::store(valid,P+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)); 609 } 610 } 611 if (dPdu) 612 { 613 { 614 assert(dPdu); 615 const Vec4<vfloat> u_n = BezierBasis::derivative(uu); 616 const Vec4<vfloat> v_n = BezierBasis::eval(vv); 617 for (size_t i=0; i<N; i++) { 618 vfloat matrix_11, matrix_12, matrix_22, matrix_21; 619 computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times 620 vfloat::store(valid,dPdu+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*dscale); 621 } 622 } 623 { 624 assert(dPdv); 625 const Vec4<vfloat> u_n = BezierBasis::eval(uu); 626 const Vec4<vfloat> v_n = BezierBasis::derivative(vv); 627 for (size_t i=0; i<N; i++) { 628 vfloat matrix_11, matrix_12, matrix_22, matrix_21; 629 computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times 630 vfloat::store(valid,dPdv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*dscale); 631 } 632 } 633 } 634 if (ddPdudu) 635 { 636 { 637 assert(ddPdudu); 638 const Vec4<vfloat> u_n = BezierBasis::derivative2(uu); 639 const Vec4<vfloat> v_n = BezierBasis::eval(vv); 640 for (size_t i=0; i<N; i++) { 641 vfloat matrix_11, matrix_12, matrix_22, matrix_21; 642 computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times 643 vfloat::store(valid,ddPdudu+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(dscale)); 644 } 645 } 646 { 647 assert(ddPdvdv); 648 const Vec4<vfloat> u_n = BezierBasis::eval(uu); 649 const Vec4<vfloat> v_n = BezierBasis::derivative2(vv); 650 for (size_t i=0; i<N; i++) { 651 vfloat matrix_11, matrix_12, matrix_22, matrix_21; 652 computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times 653 vfloat::store(valid,ddPdvdv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(dscale)); 654 } 655 } 656 { 657 assert(ddPdudv); 658 const Vec4<vfloat> u_n = BezierBasis::derivative(uu); 659 const Vec4<vfloat> v_n = BezierBasis::derivative(vv); 660 for (size_t i=0; i<N; i++) { 661 vfloat matrix_11, matrix_12, matrix_22, matrix_21; 662 computeInnerVertices(v,f,i,uu,vv,matrix_11,matrix_12,matrix_22,matrix_21); // FIXME: calculated multiple times 663 vfloat::store(valid,ddPdudv+i*dstride,eval(v,f,i,uu,vv,u_n,v_n,matrix_11,matrix_12,matrix_22,matrix_21)*sqr(dscale)); 664 } 665 } 666 } 667 } 668 669 template<typename vbool, typename vfloat> eval(const vbool & valid,const vfloat & uu,const vfloat & vv,float * P,float * dPdu,float * dPdv,float * ddPdudu,float * ddPdvdv,float * ddPdudv,const float dscale,const size_t dstride,const size_t N)670 __forceinline void eval(const vbool& valid, const vfloat& uu, const vfloat& vv, 671 float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv, 672 const float dscale, const size_t dstride, const size_t N) const { 673 eval(v,f,valid,uu,vv,P,dPdu,dPdv,ddPdudu,ddPdvdv,ddPdudv,dscale,dstride,N); 674 } 675 676 template<class T> eval_t(const Vertex matrix[4][4],const Vec3<T> f[2][2],const T & uu,const T & vv)677 static __forceinline Vec3<T> eval_t(const Vertex matrix[4][4], const Vec3<T> f[2][2], const T& uu, const T& vv) 678 { 679 typedef typename T::Bool M; 680 const M m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f); 681 682 const Vec3<T> f0_p = Vec3<T>(matrix[1][1].x,matrix[1][1].y,matrix[1][1].z); 683 const Vec3<T> f1_p = Vec3<T>(matrix[1][2].x,matrix[1][2].y,matrix[1][2].z); 684 const Vec3<T> f2_p = Vec3<T>(matrix[2][2].x,matrix[2][2].y,matrix[2][2].z); 685 const Vec3<T> f3_p = Vec3<T>(matrix[2][1].x,matrix[2][1].y,matrix[2][1].z); 686 687 const Vec3<T> f0_m = f[0][0]; 688 const Vec3<T> f1_m = f[0][1]; 689 const Vec3<T> f2_m = f[1][1]; 690 const Vec3<T> f3_m = f[1][0]; 691 692 const T one_minus_uu = T(1.0f) - uu; 693 const T one_minus_vv = T(1.0f) - vv; 694 695 const Vec3<T> f0_i = ( uu * f0_p + vv * f0_m) * rcp(uu+vv); 696 const Vec3<T> f1_i = (one_minus_uu * f1_m + vv * f1_p) * rcp(one_minus_uu+vv); 697 const Vec3<T> f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv); 698 const Vec3<T> f3_i = ( uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv); 699 700 const Vec3<T> F0( select(m_border,f0_p.x,f0_i.x), select(m_border,f0_p.y,f0_i.y), select(m_border,f0_p.z,f0_i.z) ); 701 const Vec3<T> F1( select(m_border,f1_p.x,f1_i.x), select(m_border,f1_p.y,f1_i.y), select(m_border,f1_p.z,f1_i.z) ); 702 const Vec3<T> F2( select(m_border,f2_p.x,f2_i.x), select(m_border,f2_p.y,f2_i.y), select(m_border,f2_p.z,f2_i.z) ); 703 const Vec3<T> F3( select(m_border,f3_p.x,f3_i.x), select(m_border,f3_p.y,f3_i.y), select(m_border,f3_p.z,f3_i.z) ); 704 705 const T B0_u = one_minus_uu * one_minus_uu * one_minus_uu; 706 const T B0_v = one_minus_vv * one_minus_vv * one_minus_vv; 707 const T B1_u = 3.0f * (one_minus_uu * uu * one_minus_uu); 708 const T B1_v = 3.0f * (one_minus_vv * vv * one_minus_vv); 709 const T B2_u = 3.0f * (uu * one_minus_uu * uu); 710 const T B2_v = 3.0f * (vv * one_minus_vv * vv); 711 const T B3_u = uu * uu * uu; 712 const T B3_v = vv * vv * vv; 713 714 const T x = madd(B0_v,madd(B0_u,matrix[0][0].x,madd(B1_u,matrix[0][1].x,madd(B2_u,matrix[0][2].x,B3_u * matrix[0][3].x))), 715 madd(B1_v,madd(B0_u,matrix[1][0].x,madd(B1_u,F0.x ,madd(B2_u,F1.x ,B3_u * matrix[1][3].x))), 716 madd(B2_v,madd(B0_u,matrix[2][0].x,madd(B1_u,F3.x ,madd(B2_u,F2.x ,B3_u * matrix[2][3].x))), 717 B3_v*madd(B0_u,matrix[3][0].x,madd(B1_u,matrix[3][1].x,madd(B2_u,matrix[3][2].x,B3_u * matrix[3][3].x)))))); 718 719 const T y = madd(B0_v,madd(B0_u,matrix[0][0].y,madd(B1_u,matrix[0][1].y,madd(B2_u,matrix[0][2].y,B3_u * matrix[0][3].y))), 720 madd(B1_v,madd(B0_u,matrix[1][0].y,madd(B1_u,F0.y ,madd(B2_u,F1.y ,B3_u * matrix[1][3].y))), 721 madd(B2_v,madd(B0_u,matrix[2][0].y,madd(B1_u,F3.y ,madd(B2_u,F2.y ,B3_u * matrix[2][3].y))), 722 B3_v*madd(B0_u,matrix[3][0].y,madd(B1_u,matrix[3][1].y,madd(B2_u,matrix[3][2].y,B3_u * matrix[3][3].y)))))); 723 724 const T z = madd(B0_v,madd(B0_u,matrix[0][0].z,madd(B1_u,matrix[0][1].z,madd(B2_u,matrix[0][2].z,B3_u * matrix[0][3].z))), 725 madd(B1_v,madd(B0_u,matrix[1][0].z,madd(B1_u,F0.z ,madd(B2_u,F1.z ,B3_u * matrix[1][3].z))), 726 madd(B2_v,madd(B0_u,matrix[2][0].z,madd(B1_u,F3.z ,madd(B2_u,F2.z ,B3_u * matrix[2][3].z))), 727 B3_v*madd(B0_u,matrix[3][0].z,madd(B1_u,matrix[3][1].z,madd(B2_u,matrix[3][2].z,B3_u * matrix[3][3].z)))))); 728 729 return Vec3<T>(x,y,z); 730 } 731 732 template<class T> eval(const T & uu,const T & vv)733 __forceinline Vec3<T> eval(const T& uu, const T& vv) const 734 { 735 Vec3<T> ff[2][2]; 736 ff[0][0] = Vec3<T>(f[0][0]); 737 ff[0][1] = Vec3<T>(f[0][1]); 738 ff[1][1] = Vec3<T>(f[1][1]); 739 ff[1][0] = Vec3<T>(f[1][0]); 740 return eval_t(v,ff,uu,vv); 741 } 742 743 template<class T> normal_t(const Vertex matrix[4][4],const Vec3<T> f[2][2],const T & uu,const T & vv)744 static __forceinline Vec3<T> normal_t(const Vertex matrix[4][4], const Vec3<T> f[2][2], const T& uu, const T& vv) 745 { 746 typedef typename T::Bool M; 747 748 const Vec3<T> f0_p = Vec3<T>(matrix[1][1].x,matrix[1][1].y,matrix[1][1].z); 749 const Vec3<T> f1_p = Vec3<T>(matrix[1][2].x,matrix[1][2].y,matrix[1][2].z); 750 const Vec3<T> f2_p = Vec3<T>(matrix[2][2].x,matrix[2][2].y,matrix[2][2].z); 751 const Vec3<T> f3_p = Vec3<T>(matrix[2][1].x,matrix[2][1].y,matrix[2][1].z); 752 753 const Vec3<T> f0_m = f[0][0]; 754 const Vec3<T> f1_m = f[0][1]; 755 const Vec3<T> f2_m = f[1][1]; 756 const Vec3<T> f3_m = f[1][0]; 757 758 const T one_minus_uu = T(1.0f) - uu; 759 const T one_minus_vv = T(1.0f) - vv; 760 761 const Vec3<T> f0_i = ( uu * f0_p + vv * f0_m) * rcp(uu+vv); 762 const Vec3<T> f1_i = (one_minus_uu * f1_m + vv * f1_p) * rcp(one_minus_uu+vv); 763 const Vec3<T> f2_i = (one_minus_uu * f2_p + one_minus_vv * f2_m) * rcp(one_minus_uu+one_minus_vv); 764 const Vec3<T> f3_i = ( uu * f3_m + one_minus_vv * f3_p) * rcp(uu+one_minus_vv); 765 766 #if 1 767 const M m_corner0 = (uu == 0.0f) & (vv == 0.0f); 768 const M m_corner1 = (uu == 1.0f) & (vv == 0.0f); 769 const M m_corner2 = (uu == 1.0f) & (vv == 1.0f); 770 const M m_corner3 = (uu == 0.0f) & (vv == 1.0f); 771 const Vec3<T> matrix_11( select(m_corner0,f0_p.x,f0_i.x), select(m_corner0,f0_p.y,f0_i.y), select(m_corner0,f0_p.z,f0_i.z) ); 772 const Vec3<T> matrix_12( select(m_corner1,f1_p.x,f1_i.x), select(m_corner1,f1_p.y,f1_i.y), select(m_corner1,f1_p.z,f1_i.z) ); 773 const Vec3<T> matrix_22( select(m_corner2,f2_p.x,f2_i.x), select(m_corner2,f2_p.y,f2_i.y), select(m_corner2,f2_p.z,f2_i.z) ); 774 const Vec3<T> matrix_21( select(m_corner3,f3_p.x,f3_i.x), select(m_corner3,f3_p.y,f3_i.y), select(m_corner3,f3_p.z,f3_i.z) ); 775 #else 776 const M m_border = (uu == 0.0f) | (uu == 1.0f) | (vv == 0.0f) | (vv == 1.0f); 777 const Vec3<T> matrix_11( select(m_border,f0_p.x,f0_i.x), select(m_border,f0_p.y,f0_i.y), select(m_border,f0_p.z,f0_i.z) ); 778 const Vec3<T> matrix_12( select(m_border,f1_p.x,f1_i.x), select(m_border,f1_p.y,f1_i.y), select(m_border,f1_p.z,f1_i.z) ); 779 const Vec3<T> matrix_22( select(m_border,f2_p.x,f2_i.x), select(m_border,f2_p.y,f2_i.y), select(m_border,f2_p.z,f2_i.z) ); 780 const Vec3<T> matrix_21( select(m_border,f3_p.x,f3_i.x), select(m_border,f3_p.y,f3_i.y), select(m_border,f3_p.z,f3_i.z) ); 781 #endif 782 783 const Vec3<T> matrix_00 = Vec3<T>(matrix[0][0].x,matrix[0][0].y,matrix[0][0].z); 784 const Vec3<T> matrix_10 = Vec3<T>(matrix[1][0].x,matrix[1][0].y,matrix[1][0].z); 785 const Vec3<T> matrix_20 = Vec3<T>(matrix[2][0].x,matrix[2][0].y,matrix[2][0].z); 786 const Vec3<T> matrix_30 = Vec3<T>(matrix[3][0].x,matrix[3][0].y,matrix[3][0].z); 787 788 const Vec3<T> matrix_01 = Vec3<T>(matrix[0][1].x,matrix[0][1].y,matrix[0][1].z); 789 const Vec3<T> matrix_02 = Vec3<T>(matrix[0][2].x,matrix[0][2].y,matrix[0][2].z); 790 const Vec3<T> matrix_03 = Vec3<T>(matrix[0][3].x,matrix[0][3].y,matrix[0][3].z); 791 792 const Vec3<T> matrix_31 = Vec3<T>(matrix[3][1].x,matrix[3][1].y,matrix[3][1].z); 793 const Vec3<T> matrix_32 = Vec3<T>(matrix[3][2].x,matrix[3][2].y,matrix[3][2].z); 794 const Vec3<T> matrix_33 = Vec3<T>(matrix[3][3].x,matrix[3][3].y,matrix[3][3].z); 795 796 const Vec3<T> matrix_13 = Vec3<T>(matrix[1][3].x,matrix[1][3].y,matrix[1][3].z); 797 const Vec3<T> matrix_23 = Vec3<T>(matrix[2][3].x,matrix[2][3].y,matrix[2][3].z); 798 799 /* tangentU */ 800 const Vec3<T> col0 = deCasteljau(vv, matrix_00, matrix_10, matrix_20, matrix_30); 801 const Vec3<T> col1 = deCasteljau(vv, matrix_01, matrix_11, matrix_21, matrix_31); 802 const Vec3<T> col2 = deCasteljau(vv, matrix_02, matrix_12, matrix_22, matrix_32); 803 const Vec3<T> col3 = deCasteljau(vv, matrix_03, matrix_13, matrix_23, matrix_33); 804 805 const Vec3<T> tangentU = deCasteljau_tangent(uu, col0, col1, col2, col3); 806 807 /* tangentV */ 808 const Vec3<T> row0 = deCasteljau(uu, matrix_00, matrix_01, matrix_02, matrix_03); 809 const Vec3<T> row1 = deCasteljau(uu, matrix_10, matrix_11, matrix_12, matrix_13); 810 const Vec3<T> row2 = deCasteljau(uu, matrix_20, matrix_21, matrix_22, matrix_23); 811 const Vec3<T> row3 = deCasteljau(uu, matrix_30, matrix_31, matrix_32, matrix_33); 812 813 const Vec3<T> tangentV = deCasteljau_tangent(vv, row0, row1, row2, row3); 814 815 /* normal = tangentU x tangentV */ 816 const Vec3<T> n = cross(tangentU,tangentV); 817 return n; 818 } 819 820 template<class T> normal(const T & uu,const T & vv)821 __forceinline Vec3<T> normal(const T& uu, const T& vv) const 822 { 823 Vec3<T> ff[2][2]; 824 ff[0][0] = Vec3<T>(f[0][0]); 825 ff[0][1] = Vec3<T>(f[0][1]); 826 ff[1][1] = Vec3<T>(f[1][1]); 827 ff[1][0] = Vec3<T>(f[1][0]); 828 return normal_t(v,ff,uu,vv); 829 } 830 bounds()831 __forceinline BBox<Vertex> bounds() const 832 { 833 const Vertex *const cv = &v[0][0]; 834 BBox<Vertex> bounds (cv[0]); 835 for (size_t i=1; i<16; i++) 836 bounds.extend( cv[i] ); 837 bounds.extend(f[0][0]); 838 bounds.extend(f[1][0]); 839 bounds.extend(f[1][1]); 840 bounds.extend(f[1][1]); 841 return bounds; 842 } 843 844 friend embree_ostream operator<<(embree_ostream o, const GregoryPatchT& p) 845 { 846 for (size_t y=0; y<4; y++) 847 for (size_t x=0; x<4; x++) 848 o << "v[" << y << "][" << x << "] " << p.v[y][x] << embree_endl; 849 850 for (size_t y=0; y<2; y++) 851 for (size_t x=0; x<2; x++) 852 o << "f[" << y << "][" << x << "] " << p.f[y][x] << embree_endl; 853 return o; 854 } 855 }; 856 857 typedef GregoryPatchT<Vec3fa,Vec3fa_t> GregoryPatch3fa; 858 859 template<typename Vertex, typename Vertex_t> BezierPatchT(const HalfEdge * edge,const char * vertices,size_t stride)860 __forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT (const HalfEdge* edge, const char* vertices, size_t stride) 861 { 862 CatmullClarkPatchT<Vertex,Vertex_t> patch(edge,vertices,stride); 863 GregoryPatchT<Vertex,Vertex_t> gpatch(patch); 864 gpatch.convert_to_bezier(); 865 for (size_t y=0; y<4; y++) 866 for (size_t x=0; x<4; x++) 867 matrix[y][x] = (Vertex_t)gpatch.v[y][x]; 868 } 869 870 template<typename Vertex, typename Vertex_t> BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t> & patch)871 __forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch) 872 { 873 GregoryPatchT<Vertex,Vertex_t> gpatch(patch); 874 gpatch.convert_to_bezier(); 875 for (size_t y=0; y<4; y++) 876 for (size_t x=0; x<4; x++) 877 matrix[y][x] = (Vertex_t)gpatch.v[y][x]; 878 } 879 880 template<typename Vertex, typename Vertex_t> BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t> & patch,const BezierCurveT<Vertex> * border0,const BezierCurveT<Vertex> * border1,const BezierCurveT<Vertex> * border2,const BezierCurveT<Vertex> * border3)881 __forceinline BezierPatchT<Vertex,Vertex_t>::BezierPatchT(const CatmullClarkPatchT<Vertex,Vertex_t>& patch, 882 const BezierCurveT<Vertex>* border0, 883 const BezierCurveT<Vertex>* border1, 884 const BezierCurveT<Vertex>* border2, 885 const BezierCurveT<Vertex>* border3) 886 { 887 GregoryPatchT<Vertex,Vertex_t> gpatch(patch,border0,border1,border2,border3); 888 gpatch.convert_to_bezier(); 889 for (size_t y=0; y<4; y++) 890 for (size_t x=0; x<4; x++) 891 matrix[y][x] = (Vertex_t)gpatch.v[y][x]; 892 } 893 } 894