1 // Copyright 2009-2021 Intel Corporation 2 // SPDX-License-Identifier: Apache-2.0 3 4 #pragma once 5 6 #include "gregory_patch.h" 7 8 namespace embree 9 { 10 class __aligned(64) DenseGregoryPatch3fa 11 { 12 typedef Vec3fa Vec3fa_4x4[4][4]; 13 public: 14 DenseGregoryPatch3fa(const GregoryPatch3fa & patch)15 __forceinline DenseGregoryPatch3fa (const GregoryPatch3fa& patch) 16 { 17 for (size_t y=0; y<4; y++) 18 for (size_t x=0; x<4; x++) 19 matrix[y][x] = Vec3ff(patch.v[y][x], 0.0f); 20 21 matrix[0][0].w = patch.f[0][0].x; 22 matrix[0][1].w = patch.f[0][0].y; 23 matrix[0][2].w = patch.f[0][0].z; 24 matrix[0][3].w = 0.0f; 25 26 matrix[1][0].w = patch.f[0][1].x; 27 matrix[1][1].w = patch.f[0][1].y; 28 matrix[1][2].w = patch.f[0][1].z; 29 matrix[1][3].w = 0.0f; 30 31 matrix[2][0].w = patch.f[1][1].x; 32 matrix[2][1].w = patch.f[1][1].y; 33 matrix[2][2].w = patch.f[1][1].z; 34 matrix[2][3].w = 0.0f; 35 36 matrix[3][0].w = patch.f[1][0].x; 37 matrix[3][1].w = patch.f[1][0].y; 38 matrix[3][2].w = patch.f[1][0].z; 39 matrix[3][3].w = 0.0f; 40 } 41 extract_f_m(Vec3fa f_m[2][2])42 __forceinline void extract_f_m(Vec3fa f_m[2][2]) const 43 { 44 f_m[0][0] = Vec3fa( matrix[0][0].w, matrix[0][1].w, matrix[0][2].w ); 45 f_m[0][1] = Vec3fa( matrix[1][0].w, matrix[1][1].w, matrix[1][2].w ); 46 f_m[1][1] = Vec3fa( matrix[2][0].w, matrix[2][1].w, matrix[2][2].w ); 47 f_m[1][0] = Vec3fa( matrix[3][0].w, matrix[3][1].w, matrix[3][2].w ); 48 } 49 eval(const float uu,const float vv)50 __forceinline Vec3fa eval(const float uu, const float vv) const 51 { 52 __aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m); 53 return GregoryPatch3fa::eval(*(Vec3fa_4x4*)&matrix,f_m,uu,vv); 54 } 55 normal(const float uu,const float vv)56 __forceinline Vec3fa normal(const float uu, const float vv) const 57 { 58 __aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m); 59 return GregoryPatch3fa::normal(*(Vec3fa_4x4*)&matrix,f_m,uu,vv); 60 } 61 62 template<class T> eval(const T & uu,const T & vv)63 __forceinline Vec3<T> eval(const T &uu, const T &vv) const 64 { 65 Vec3<T> f_m[2][2]; 66 f_m[0][0] = Vec3<T>( matrix[0][0].w, matrix[0][1].w, matrix[0][2].w ); 67 f_m[0][1] = Vec3<T>( matrix[1][0].w, matrix[1][1].w, matrix[1][2].w ); 68 f_m[1][1] = Vec3<T>( matrix[2][0].w, matrix[2][1].w, matrix[2][2].w ); 69 f_m[1][0] = Vec3<T>( matrix[3][0].w, matrix[3][1].w, matrix[3][2].w ); 70 return GregoryPatch3fa::eval_t(*(Vec3fa_4x4*)&matrix,f_m,uu,vv); 71 } 72 73 template<class T> normal(const T & uu,const T & vv)74 __forceinline Vec3<T> normal(const T &uu, const T &vv) const 75 { 76 Vec3<T> f_m[2][2]; 77 f_m[0][0] = Vec3<T>( matrix[0][0].w, matrix[0][1].w, matrix[0][2].w ); 78 f_m[0][1] = Vec3<T>( matrix[1][0].w, matrix[1][1].w, matrix[1][2].w ); 79 f_m[1][1] = Vec3<T>( matrix[2][0].w, matrix[2][1].w, matrix[2][2].w ); 80 f_m[1][0] = Vec3<T>( matrix[3][0].w, matrix[3][1].w, matrix[3][2].w ); 81 return GregoryPatch3fa::normal_t(*(Vec3fa_4x4*)&matrix,f_m,uu,vv); 82 } 83 84 __forceinline void eval(const float u, const float v, 85 Vec3fa* P, Vec3fa* dPdu, Vec3fa* dPdv, Vec3fa* ddPdudu, Vec3fa* ddPdvdv, Vec3fa* ddPdudv, 86 const float dscale = 1.0f) const 87 { 88 __aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m); 89 if (P) { 90 *P = GregoryPatch3fa::eval(*(Vec3fa_4x4*)&matrix,f_m,u,v); 91 } 92 if (dPdu) { 93 assert(dPdu); *dPdu = GregoryPatch3fa::eval_du(*(Vec3fa_4x4*)&matrix,f_m,u,v)*dscale; 94 assert(dPdv); *dPdv = GregoryPatch3fa::eval_dv(*(Vec3fa_4x4*)&matrix,f_m,u,v)*dscale; 95 } 96 if (ddPdudu) { 97 assert(ddPdudu); *ddPdudu = GregoryPatch3fa::eval_dudu(*(Vec3fa_4x4*)&matrix,f_m,u,v)*sqr(dscale); 98 assert(ddPdvdv); *ddPdvdv = GregoryPatch3fa::eval_dvdv(*(Vec3fa_4x4*)&matrix,f_m,u,v)*sqr(dscale); 99 assert(ddPdudv); *ddPdudv = GregoryPatch3fa::eval_dudv(*(Vec3fa_4x4*)&matrix,f_m,u,v)*sqr(dscale); 100 } 101 } 102 103 template<typename vbool, typename vfloat> eval(const vbool & valid,const vfloat & uu,const vfloat & vv,float * P,float * dPdu,float * dPdv,const float dscale,const size_t dstride,const size_t N)104 __forceinline void eval(const vbool& valid, const vfloat& uu, const vfloat& vv, float* P, float* dPdu, float* dPdv, const float dscale, const size_t dstride, const size_t N) const 105 { 106 __aligned(64) Vec3fa f_m[2][2]; extract_f_m(f_m); 107 GregoryPatch3fa::eval(matrix,f_m,valid,uu,vv,P,dPdu,dPdv,dscale,dstride,N); 108 } 109 110 private: 111 Vec3ff matrix[4][4]; // f_p/m points are stored in 4th component 112 }; 113 } 114