1 /* 2 ----------------------------------------------------------------------------- 3 This source file is part of OGRE 4 (Object-oriented Graphics Rendering Engine) 5 For the latest info, see http://www.ogre3d.org/ 6 7 Copyright (c) 2000-2013 Torus Knot Software Ltd 8 9 Permission is hereby granted, free of charge, to any person obtaining a copy 10 of this software and associated documentation files (the "Software"), to deal 11 in the Software without restriction, including without limitation the rights 12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 copies of the Software, and to permit persons to whom the Software is 14 furnished to do so, subject to the following conditions: 15 16 The above copyright notice and this permission notice shall be included in 17 all copies or substantial portions of the Software. 18 19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 THE SOFTWARE. 26 ----------------------------------------------------------------------------- 27 */ 28 #include "OgreStableHeaders.h" 29 30 #include "OgreOptimisedUtil.h" 31 32 #include "OgreVector3.h" 33 #include "OgreMatrix4.h" 34 35 namespace Ogre { 36 37 //------------------------------------------------------------------------- 38 // Local classes 39 //------------------------------------------------------------------------- 40 41 /** General implementation of OptimisedUtil. 42 @note 43 Don't use this class directly, use OptimisedUtil instead. 44 */ 45 class _OgrePrivate OptimisedUtilGeneral : public OptimisedUtil 46 { 47 public: 48 /// @copydoc OptimisedUtil::softwareVertexSkinning 49 virtual void softwareVertexSkinning( 50 const float *srcPosPtr, float *destPosPtr, 51 const float *srcNormPtr, float *destNormPtr, 52 const float *blendWeightPtr, const unsigned char* blendIndexPtr, 53 const Matrix4* const* blendMatrices, 54 size_t srcPosStride, size_t destPosStride, 55 size_t srcNormStride, size_t destNormStride, 56 size_t blendWeightStride, size_t blendIndexStride, 57 size_t numWeightsPerVertex, 58 size_t numVertices); 59 60 /// @copydoc OptimisedUtil::softwareVertexMorph 61 virtual void softwareVertexMorph( 62 Real t, 63 const float *srcPos1, const float *srcPos2, 64 float *dstPos, 65 size_t pos1VSize, size_t pos2VSize, size_t dstVSize, 66 size_t numVertices, 67 bool morphNormals); 68 69 /// @copydoc OptimisedUtil::concatenateAffineMatrices 70 virtual void concatenateAffineMatrices( 71 const Matrix4& baseMatrix, 72 const Matrix4* srcMatrices, 73 Matrix4* dstMatrices, 74 size_t numMatrices); 75 76 /// @copydoc OptimisedUtil::calculateFaceNormals 77 virtual void calculateFaceNormals( 78 const float *positions, 79 const EdgeData::Triangle *triangles, 80 Vector4 *faceNormals, 81 size_t numTriangles); 82 83 /// @copydoc OptimisedUtil::calculateLightFacing 84 virtual void calculateLightFacing( 85 const Vector4& lightPos, 86 const Vector4* faceNormals, 87 char* lightFacings, 88 size_t numFaces); 89 90 /// @copydoc OptimisedUtil::extrudeVertices 91 virtual void extrudeVertices( 92 const Vector4& lightPos, 93 Real extrudeDist, 94 const float* srcPositions, 95 float* destPositions, 96 size_t numVertices); 97 }; 98 //--------------------------------------------------------------------- 99 //--------------------------------------------------------------------- 100 //--------------------------------------------------------------------- softwareVertexSkinning(const float * pSrcPos,float * pDestPos,const float * pSrcNorm,float * pDestNorm,const float * pBlendWeight,const unsigned char * pBlendIndex,const Matrix4 * const * blendMatrices,size_t srcPosStride,size_t destPosStride,size_t srcNormStride,size_t destNormStride,size_t blendWeightStride,size_t blendIndexStride,size_t numWeightsPerVertex,size_t numVertices)101 void OptimisedUtilGeneral::softwareVertexSkinning( 102 const float *pSrcPos, float *pDestPos, 103 const float *pSrcNorm, float *pDestNorm, 104 const float *pBlendWeight, const unsigned char* pBlendIndex, 105 const Matrix4* const* blendMatrices, 106 size_t srcPosStride, size_t destPosStride, 107 size_t srcNormStride, size_t destNormStride, 108 size_t blendWeightStride, size_t blendIndexStride, 109 size_t numWeightsPerVertex, 110 size_t numVertices) 111 { 112 // Source vectors 113 Vector3 sourceVec = Vector3::ZERO, sourceNorm = Vector3::ZERO; 114 // Accumulation vectors 115 Vector3 accumVecPos, accumVecNorm; 116 117 // Loop per vertex 118 for (size_t vertIdx = 0; vertIdx < numVertices; ++vertIdx) 119 { 120 // Load source vertex elements 121 sourceVec.x = pSrcPos[0]; 122 sourceVec.y = pSrcPos[1]; 123 sourceVec.z = pSrcPos[2]; 124 125 if (pSrcNorm) 126 { 127 sourceNorm.x = pSrcNorm[0]; 128 sourceNorm.y = pSrcNorm[1]; 129 sourceNorm.z = pSrcNorm[2]; 130 } 131 132 // Load accumulators 133 accumVecPos = Vector3::ZERO; 134 accumVecNorm = Vector3::ZERO; 135 136 // Loop per blend weight 137 // 138 // Note: Don't change "unsigned short" here!!! If use "size_t" instead, 139 // VC7.1 unroll this loop to four blend weights pre-iteration, and then 140 // loss performance 10% in this function. Ok, this give a hint that we 141 // should unroll this loop manually for better performance, will do that 142 // later. 143 // 144 for (unsigned short blendIdx = 0; blendIdx < numWeightsPerVertex; ++blendIdx) 145 { 146 // Blend by multiplying source by blend matrix and scaling by weight 147 // Add to accumulator 148 // NB weights must be normalised!! 149 Real weight = pBlendWeight[blendIdx]; 150 if (weight) 151 { 152 // Blend position, use 3x4 matrix 153 const Matrix4& mat = *blendMatrices[pBlendIndex[blendIdx]]; 154 accumVecPos.x += 155 (mat[0][0] * sourceVec.x + 156 mat[0][1] * sourceVec.y + 157 mat[0][2] * sourceVec.z + 158 mat[0][3]) 159 * weight; 160 accumVecPos.y += 161 (mat[1][0] * sourceVec.x + 162 mat[1][1] * sourceVec.y + 163 mat[1][2] * sourceVec.z + 164 mat[1][3]) 165 * weight; 166 accumVecPos.z += 167 (mat[2][0] * sourceVec.x + 168 mat[2][1] * sourceVec.y + 169 mat[2][2] * sourceVec.z + 170 mat[2][3]) 171 * weight; 172 if (pSrcNorm) 173 { 174 // Blend normal 175 // We should blend by inverse transpose here, but because we're assuming the 3x3 176 // aspect of the matrix is orthogonal (no non-uniform scaling), the inverse transpose 177 // is equal to the main 3x3 matrix 178 // Note because it's a normal we just extract the rotational part, saves us renormalising here 179 accumVecNorm.x += 180 (mat[0][0] * sourceNorm.x + 181 mat[0][1] * sourceNorm.y + 182 mat[0][2] * sourceNorm.z) 183 * weight; 184 accumVecNorm.y += 185 (mat[1][0] * sourceNorm.x + 186 mat[1][1] * sourceNorm.y + 187 mat[1][2] * sourceNorm.z) 188 * weight; 189 accumVecNorm.z += 190 (mat[2][0] * sourceNorm.x + 191 mat[2][1] * sourceNorm.y + 192 mat[2][2] * sourceNorm.z) 193 * weight; 194 } 195 } 196 } 197 198 // Stored blended vertex in hardware buffer 199 pDestPos[0] = accumVecPos.x; 200 pDestPos[1] = accumVecPos.y; 201 pDestPos[2] = accumVecPos.z; 202 203 // Stored blended vertex in temp buffer 204 if (pSrcNorm) 205 { 206 // Normalise 207 accumVecNorm.normalise(); 208 pDestNorm[0] = accumVecNorm.x; 209 pDestNorm[1] = accumVecNorm.y; 210 pDestNorm[2] = accumVecNorm.z; 211 // Advance pointers 212 advanceRawPointer(pSrcNorm, srcNormStride); 213 advanceRawPointer(pDestNorm, destNormStride); 214 } 215 216 // Advance pointers 217 advanceRawPointer(pSrcPos, srcPosStride); 218 advanceRawPointer(pDestPos, destPosStride); 219 advanceRawPointer(pBlendWeight, blendWeightStride); 220 advanceRawPointer(pBlendIndex, blendIndexStride); 221 } 222 } 223 //--------------------------------------------------------------------- concatenateAffineMatrices(const Matrix4 & baseMatrix,const Matrix4 * pSrcMat,Matrix4 * pDstMat,size_t numMatrices)224 void OptimisedUtilGeneral::concatenateAffineMatrices( 225 const Matrix4& baseMatrix, 226 const Matrix4* pSrcMat, 227 Matrix4* pDstMat, 228 size_t numMatrices) 229 { 230 const Matrix4& m = baseMatrix; 231 232 for (size_t i = 0; i < numMatrices; ++i) 233 { 234 const Matrix4& s = *pSrcMat; 235 Matrix4& d = *pDstMat; 236 237 // TODO: Promote following code to Matrix4 class. 238 239 d[0][0] = m[0][0] * s[0][0] + m[0][1] * s[1][0] + m[0][2] * s[2][0]; 240 d[0][1] = m[0][0] * s[0][1] + m[0][1] * s[1][1] + m[0][2] * s[2][1]; 241 d[0][2] = m[0][0] * s[0][2] + m[0][1] * s[1][2] + m[0][2] * s[2][2]; 242 d[0][3] = m[0][0] * s[0][3] + m[0][1] * s[1][3] + m[0][2] * s[2][3] + m[0][3]; 243 244 d[1][0] = m[1][0] * s[0][0] + m[1][1] * s[1][0] + m[1][2] * s[2][0]; 245 d[1][1] = m[1][0] * s[0][1] + m[1][1] * s[1][1] + m[1][2] * s[2][1]; 246 d[1][2] = m[1][0] * s[0][2] + m[1][1] * s[1][2] + m[1][2] * s[2][2]; 247 d[1][3] = m[1][0] * s[0][3] + m[1][1] * s[1][3] + m[1][2] * s[2][3] + m[1][3]; 248 249 d[2][0] = m[2][0] * s[0][0] + m[2][1] * s[1][0] + m[2][2] * s[2][0]; 250 d[2][1] = m[2][0] * s[0][1] + m[2][1] * s[1][1] + m[2][2] * s[2][1]; 251 d[2][2] = m[2][0] * s[0][2] + m[2][1] * s[1][2] + m[2][2] * s[2][2]; 252 d[2][3] = m[2][0] * s[0][3] + m[2][1] * s[1][3] + m[2][2] * s[2][3] + m[2][3]; 253 254 d[3][0] = 0; 255 d[3][1] = 0; 256 d[3][2] = 0; 257 d[3][3] = 1; 258 259 ++pSrcMat; 260 ++pDstMat; 261 } 262 } 263 //--------------------------------------------------------------------- softwareVertexMorph(Real t,const float * pSrc1,const float * pSrc2,float * pDst,size_t pos1VSize,size_t pos2VSize,size_t dstVSize,size_t numVertices,bool morphNormals)264 void OptimisedUtilGeneral::softwareVertexMorph( 265 Real t, 266 const float *pSrc1, const float *pSrc2, 267 float *pDst, 268 size_t pos1VSize, size_t pos2VSize, size_t dstVSize, 269 size_t numVertices, 270 bool morphNormals) 271 { 272 size_t src1Skip = pos1VSize/sizeof(float) - 3 - (morphNormals ? 3 : 0); 273 size_t src2Skip = pos2VSize/sizeof(float) - 3 - (morphNormals ? 3 : 0); 274 size_t dstSkip = dstVSize/sizeof(float) - 3 - (morphNormals ? 3 : 0); 275 276 Vector3 nlerpNormal; 277 for (size_t i = 0; i < numVertices; ++i) 278 { 279 // x 280 *pDst++ = *pSrc1 + t * (*pSrc2 - *pSrc1) ; 281 ++pSrc1; ++pSrc2; 282 // y 283 *pDst++ = *pSrc1 + t * (*pSrc2 - *pSrc1) ; 284 ++pSrc1; ++pSrc2; 285 // z 286 *pDst++ = *pSrc1 + t * (*pSrc2 - *pSrc1) ; 287 ++pSrc1; ++pSrc2; 288 289 if (morphNormals) 290 { 291 // normals must be in the same buffer as pos 292 // perform an nlerp 293 // we don't have enough information for a spherical interp 294 nlerpNormal.x = *pSrc1 + t * (*pSrc2 - *pSrc1); 295 ++pSrc1; ++pSrc2; 296 nlerpNormal.y = *pSrc1 + t * (*pSrc2 - *pSrc1); 297 ++pSrc1; ++pSrc2; 298 nlerpNormal.z = *pSrc1 + t * (*pSrc2 - *pSrc1); 299 ++pSrc1; ++pSrc2; 300 nlerpNormal.normalise(); 301 *pDst++ = nlerpNormal.x; 302 *pDst++ = nlerpNormal.y; 303 *pDst++ = nlerpNormal.z; 304 } 305 306 pSrc1 += src1Skip; 307 pSrc2 += src2Skip; 308 pDst += dstSkip; 309 310 } 311 } 312 //--------------------------------------------------------------------- calculateFaceNormals(const float * positions,const EdgeData::Triangle * triangles,Vector4 * faceNormals,size_t numTriangles)313 void OptimisedUtilGeneral::calculateFaceNormals( 314 const float *positions, 315 const EdgeData::Triangle *triangles, 316 Vector4 *faceNormals, 317 size_t numTriangles) 318 { 319 for ( ; numTriangles; --numTriangles) 320 { 321 const EdgeData::Triangle& t = *triangles++; 322 size_t offset; 323 324 offset = t.vertIndex[0] * 3; 325 Vector3 v1(positions[offset+0], positions[offset+1], positions[offset+2]); 326 327 offset = t.vertIndex[1] * 3; 328 Vector3 v2(positions[offset+0], positions[offset+1], positions[offset+2]); 329 330 offset = t.vertIndex[2] * 3; 331 Vector3 v3(positions[offset+0], positions[offset+1], positions[offset+2]); 332 333 *faceNormals++ = Math::calculateFaceNormalWithoutNormalize(v1, v2, v3); 334 } 335 } 336 //--------------------------------------------------------------------- calculateLightFacing(const Vector4 & lightPos,const Vector4 * faceNormals,char * lightFacings,size_t numFaces)337 void OptimisedUtilGeneral::calculateLightFacing( 338 const Vector4& lightPos, 339 const Vector4* faceNormals, 340 char* lightFacings, 341 size_t numFaces) 342 { 343 for (size_t i = 0; i < numFaces; ++i) 344 { 345 *lightFacings++ = (lightPos.dotProduct(*faceNormals++) > 0); 346 } 347 } 348 //--------------------------------------------------------------------- extrudeVertices(const Vector4 & lightPos,Real extrudeDist,const float * pSrcPos,float * pDestPos,size_t numVertices)349 void OptimisedUtilGeneral::extrudeVertices( 350 const Vector4& lightPos, 351 Real extrudeDist, 352 const float* pSrcPos, 353 float* pDestPos, 354 size_t numVertices) 355 { 356 if (lightPos.w == 0.0f) 357 { 358 // Directional light, extrusion is along light direction 359 360 Vector3 extrusionDir( 361 -lightPos.x, 362 -lightPos.y, 363 -lightPos.z); 364 extrusionDir.normalise(); 365 extrusionDir *= extrudeDist; 366 367 for (size_t vert = 0; vert < numVertices; ++vert) 368 { 369 *pDestPos++ = *pSrcPos++ + extrusionDir.x; 370 *pDestPos++ = *pSrcPos++ + extrusionDir.y; 371 *pDestPos++ = *pSrcPos++ + extrusionDir.z; 372 } 373 } 374 else 375 { 376 // Point light, calculate extrusionDir for every vertex 377 assert(lightPos.w == 1.0f); 378 379 for (size_t vert = 0; vert < numVertices; ++vert) 380 { 381 Vector3 extrusionDir( 382 pSrcPos[0] - lightPos.x, 383 pSrcPos[1] - lightPos.y, 384 pSrcPos[2] - lightPos.z); 385 extrusionDir.normalise(); 386 extrusionDir *= extrudeDist; 387 388 *pDestPos++ = *pSrcPos++ + extrusionDir.x; 389 *pDestPos++ = *pSrcPos++ + extrusionDir.y; 390 *pDestPos++ = *pSrcPos++ + extrusionDir.z; 391 } 392 } 393 } 394 //--------------------------------------------------------------------- 395 //--------------------------------------------------------------------- 396 //--------------------------------------------------------------------- _getOptimisedUtilGeneral(void)397 extern OptimisedUtil* _getOptimisedUtilGeneral(void) 398 { 399 static OptimisedUtilGeneral msOptimisedUtilGeneral; 400 return &msOptimisedUtilGeneral; 401 } 402 403 } 404