1 /*
2 -----------------------------------------------------------------------------
3 This source file is part of OGRE
4     (Object-oriented Graphics Rendering Engine)
5 For the latest info, see http://www.ogre3d.org/
6 
7 Copyright (c) 2000-2013 Torus Knot Software Ltd
8 
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
15 
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18 
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 THE SOFTWARE.
26 -----------------------------------------------------------------------------
27 */
28 #include "OgreStableHeaders.h"
29 
30 #include "OgreOptimisedUtil.h"
31 
32 #include "OgreVector3.h"
33 #include "OgreMatrix4.h"
34 
35 namespace Ogre {
36 
37 //-------------------------------------------------------------------------
38 // Local classes
39 //-------------------------------------------------------------------------
40 
41     /** General implementation of OptimisedUtil.
42     @note
43         Don't use this class directly, use OptimisedUtil instead.
44     */
45     class _OgrePrivate OptimisedUtilGeneral : public OptimisedUtil
46     {
47     public:
48         /// @copydoc OptimisedUtil::softwareVertexSkinning
49         virtual void softwareVertexSkinning(
50             const float *srcPosPtr, float *destPosPtr,
51             const float *srcNormPtr, float *destNormPtr,
52             const float *blendWeightPtr, const unsigned char* blendIndexPtr,
53             const Matrix4* const* blendMatrices,
54             size_t srcPosStride, size_t destPosStride,
55             size_t srcNormStride, size_t destNormStride,
56             size_t blendWeightStride, size_t blendIndexStride,
57             size_t numWeightsPerVertex,
58             size_t numVertices);
59 
60         /// @copydoc OptimisedUtil::softwareVertexMorph
61         virtual void softwareVertexMorph(
62             Real t,
63             const float *srcPos1, const float *srcPos2,
64             float *dstPos,
65 			size_t pos1VSize, size_t pos2VSize, size_t dstVSize,
66             size_t numVertices,
67 			bool morphNormals);
68 
69         /// @copydoc OptimisedUtil::concatenateAffineMatrices
70         virtual void concatenateAffineMatrices(
71             const Matrix4& baseMatrix,
72             const Matrix4* srcMatrices,
73             Matrix4* dstMatrices,
74             size_t numMatrices);
75 
76         /// @copydoc OptimisedUtil::calculateFaceNormals
77         virtual void calculateFaceNormals(
78             const float *positions,
79             const EdgeData::Triangle *triangles,
80             Vector4 *faceNormals,
81             size_t numTriangles);
82 
83         /// @copydoc OptimisedUtil::calculateLightFacing
84         virtual void calculateLightFacing(
85             const Vector4& lightPos,
86             const Vector4* faceNormals,
87             char* lightFacings,
88             size_t numFaces);
89 
90         /// @copydoc OptimisedUtil::extrudeVertices
91         virtual void extrudeVertices(
92             const Vector4& lightPos,
93             Real extrudeDist,
94             const float* srcPositions,
95             float* destPositions,
96             size_t numVertices);
97     };
98     //---------------------------------------------------------------------
99     //---------------------------------------------------------------------
100     //---------------------------------------------------------------------
softwareVertexSkinning(const float * pSrcPos,float * pDestPos,const float * pSrcNorm,float * pDestNorm,const float * pBlendWeight,const unsigned char * pBlendIndex,const Matrix4 * const * blendMatrices,size_t srcPosStride,size_t destPosStride,size_t srcNormStride,size_t destNormStride,size_t blendWeightStride,size_t blendIndexStride,size_t numWeightsPerVertex,size_t numVertices)101     void OptimisedUtilGeneral::softwareVertexSkinning(
102         const float *pSrcPos, float *pDestPos,
103         const float *pSrcNorm, float *pDestNorm,
104         const float *pBlendWeight, const unsigned char* pBlendIndex,
105         const Matrix4* const* blendMatrices,
106         size_t srcPosStride, size_t destPosStride,
107         size_t srcNormStride, size_t destNormStride,
108         size_t blendWeightStride, size_t blendIndexStride,
109         size_t numWeightsPerVertex,
110         size_t numVertices)
111     {
112         // Source vectors
113         Vector3 sourceVec = Vector3::ZERO, sourceNorm = Vector3::ZERO;
114         // Accumulation vectors
115         Vector3 accumVecPos, accumVecNorm;
116 
117         // Loop per vertex
118         for (size_t vertIdx = 0; vertIdx < numVertices; ++vertIdx)
119         {
120             // Load source vertex elements
121             sourceVec.x = pSrcPos[0];
122             sourceVec.y = pSrcPos[1];
123             sourceVec.z = pSrcPos[2];
124 
125             if (pSrcNorm)
126             {
127                 sourceNorm.x = pSrcNorm[0];
128                 sourceNorm.y = pSrcNorm[1];
129                 sourceNorm.z = pSrcNorm[2];
130             }
131 
132             // Load accumulators
133             accumVecPos = Vector3::ZERO;
134             accumVecNorm = Vector3::ZERO;
135 
136             // Loop per blend weight
137             //
138             // Note: Don't change "unsigned short" here!!! If use "size_t" instead,
139             // VC7.1 unroll this loop to four blend weights pre-iteration, and then
140             // loss performance 10% in this function. Ok, this give a hint that we
141             // should unroll this loop manually for better performance, will do that
142             // later.
143             //
144             for (unsigned short blendIdx = 0; blendIdx < numWeightsPerVertex; ++blendIdx)
145             {
146                 // Blend by multiplying source by blend matrix and scaling by weight
147                 // Add to accumulator
148                 // NB weights must be normalised!!
149                 Real weight = pBlendWeight[blendIdx];
150                 if (weight)
151                 {
152                     // Blend position, use 3x4 matrix
153                     const Matrix4& mat = *blendMatrices[pBlendIndex[blendIdx]];
154                     accumVecPos.x +=
155                         (mat[0][0] * sourceVec.x +
156                          mat[0][1] * sourceVec.y +
157                          mat[0][2] * sourceVec.z +
158                          mat[0][3])
159                          * weight;
160                     accumVecPos.y +=
161                         (mat[1][0] * sourceVec.x +
162                          mat[1][1] * sourceVec.y +
163                          mat[1][2] * sourceVec.z +
164                          mat[1][3])
165                          * weight;
166                     accumVecPos.z +=
167                         (mat[2][0] * sourceVec.x +
168                          mat[2][1] * sourceVec.y +
169                          mat[2][2] * sourceVec.z +
170                          mat[2][3])
171                          * weight;
172                     if (pSrcNorm)
173                     {
174                         // Blend normal
175                         // We should blend by inverse transpose here, but because we're assuming the 3x3
176                         // aspect of the matrix is orthogonal (no non-uniform scaling), the inverse transpose
177                         // is equal to the main 3x3 matrix
178                         // Note because it's a normal we just extract the rotational part, saves us renormalising here
179                         accumVecNorm.x +=
180                             (mat[0][0] * sourceNorm.x +
181                              mat[0][1] * sourceNorm.y +
182                              mat[0][2] * sourceNorm.z)
183                              * weight;
184                         accumVecNorm.y +=
185                             (mat[1][0] * sourceNorm.x +
186                              mat[1][1] * sourceNorm.y +
187                              mat[1][2] * sourceNorm.z)
188                             * weight;
189                         accumVecNorm.z +=
190                             (mat[2][0] * sourceNorm.x +
191                              mat[2][1] * sourceNorm.y +
192                              mat[2][2] * sourceNorm.z)
193                             * weight;
194                     }
195                 }
196             }
197 
198             // Stored blended vertex in hardware buffer
199             pDestPos[0] = accumVecPos.x;
200             pDestPos[1] = accumVecPos.y;
201             pDestPos[2] = accumVecPos.z;
202 
203             // Stored blended vertex in temp buffer
204             if (pSrcNorm)
205             {
206                 // Normalise
207                 accumVecNorm.normalise();
208                 pDestNorm[0] = accumVecNorm.x;
209                 pDestNorm[1] = accumVecNorm.y;
210                 pDestNorm[2] = accumVecNorm.z;
211                 // Advance pointers
212                 advanceRawPointer(pSrcNorm, srcNormStride);
213                 advanceRawPointer(pDestNorm, destNormStride);
214             }
215 
216             // Advance pointers
217             advanceRawPointer(pSrcPos, srcPosStride);
218             advanceRawPointer(pDestPos, destPosStride);
219             advanceRawPointer(pBlendWeight, blendWeightStride);
220             advanceRawPointer(pBlendIndex, blendIndexStride);
221         }
222     }
223     //---------------------------------------------------------------------
concatenateAffineMatrices(const Matrix4 & baseMatrix,const Matrix4 * pSrcMat,Matrix4 * pDstMat,size_t numMatrices)224     void OptimisedUtilGeneral::concatenateAffineMatrices(
225         const Matrix4& baseMatrix,
226         const Matrix4* pSrcMat,
227         Matrix4* pDstMat,
228         size_t numMatrices)
229     {
230         const Matrix4& m = baseMatrix;
231 
232         for (size_t i = 0; i < numMatrices; ++i)
233         {
234             const Matrix4& s = *pSrcMat;
235             Matrix4& d = *pDstMat;
236 
237             // TODO: Promote following code to Matrix4 class.
238 
239             d[0][0] = m[0][0] * s[0][0] + m[0][1] * s[1][0] + m[0][2] * s[2][0];
240             d[0][1] = m[0][0] * s[0][1] + m[0][1] * s[1][1] + m[0][2] * s[2][1];
241             d[0][2] = m[0][0] * s[0][2] + m[0][1] * s[1][2] + m[0][2] * s[2][2];
242             d[0][3] = m[0][0] * s[0][3] + m[0][1] * s[1][3] + m[0][2] * s[2][3] + m[0][3];
243 
244             d[1][0] = m[1][0] * s[0][0] + m[1][1] * s[1][0] + m[1][2] * s[2][0];
245             d[1][1] = m[1][0] * s[0][1] + m[1][1] * s[1][1] + m[1][2] * s[2][1];
246             d[1][2] = m[1][0] * s[0][2] + m[1][1] * s[1][2] + m[1][2] * s[2][2];
247             d[1][3] = m[1][0] * s[0][3] + m[1][1] * s[1][3] + m[1][2] * s[2][3] + m[1][3];
248 
249             d[2][0] = m[2][0] * s[0][0] + m[2][1] * s[1][0] + m[2][2] * s[2][0];
250             d[2][1] = m[2][0] * s[0][1] + m[2][1] * s[1][1] + m[2][2] * s[2][1];
251             d[2][2] = m[2][0] * s[0][2] + m[2][1] * s[1][2] + m[2][2] * s[2][2];
252             d[2][3] = m[2][0] * s[0][3] + m[2][1] * s[1][3] + m[2][2] * s[2][3] + m[2][3];
253 
254             d[3][0] = 0;
255             d[3][1] = 0;
256             d[3][2] = 0;
257             d[3][3] = 1;
258 
259             ++pSrcMat;
260             ++pDstMat;
261         }
262     }
263     //---------------------------------------------------------------------
softwareVertexMorph(Real t,const float * pSrc1,const float * pSrc2,float * pDst,size_t pos1VSize,size_t pos2VSize,size_t dstVSize,size_t numVertices,bool morphNormals)264     void OptimisedUtilGeneral::softwareVertexMorph(
265         Real t,
266         const float *pSrc1, const float *pSrc2,
267         float *pDst,
268 		size_t pos1VSize, size_t pos2VSize, size_t dstVSize,
269         size_t numVertices,
270 		bool morphNormals)
271     {
272 		size_t src1Skip = pos1VSize/sizeof(float) - 3 - (morphNormals ? 3 : 0);
273 		size_t src2Skip = pos2VSize/sizeof(float) - 3 - (morphNormals ? 3 : 0);
274 		size_t dstSkip = dstVSize/sizeof(float) - 3 - (morphNormals ? 3 : 0);
275 
276 		Vector3 nlerpNormal;
277         for (size_t i = 0; i < numVertices; ++i)
278         {
279             // x
280             *pDst++ = *pSrc1 + t * (*pSrc2 - *pSrc1) ;
281             ++pSrc1; ++pSrc2;
282             // y
283             *pDst++ = *pSrc1 + t * (*pSrc2 - *pSrc1) ;
284             ++pSrc1; ++pSrc2;
285             // z
286             *pDst++ = *pSrc1 + t * (*pSrc2 - *pSrc1) ;
287             ++pSrc1; ++pSrc2;
288 
289 			if (morphNormals)
290 			{
291 				// normals must be in the same buffer as pos
292 				// perform an nlerp
293 				// we don't have enough information for a spherical interp
294 				nlerpNormal.x = *pSrc1 + t * (*pSrc2 - *pSrc1);
295 				++pSrc1; ++pSrc2;
296 				nlerpNormal.y = *pSrc1 + t * (*pSrc2 - *pSrc1);
297 				++pSrc1; ++pSrc2;
298 				nlerpNormal.z = *pSrc1 + t * (*pSrc2 - *pSrc1);
299 				++pSrc1; ++pSrc2;
300 				nlerpNormal.normalise();
301 				*pDst++ = nlerpNormal.x;
302 				*pDst++ = nlerpNormal.y;
303 				*pDst++ = nlerpNormal.z;
304 			}
305 
306 			pSrc1 += src1Skip;
307 			pSrc2 += src2Skip;
308 			pDst += dstSkip;
309 
310         }
311     }
312     //---------------------------------------------------------------------
calculateFaceNormals(const float * positions,const EdgeData::Triangle * triangles,Vector4 * faceNormals,size_t numTriangles)313     void OptimisedUtilGeneral::calculateFaceNormals(
314         const float *positions,
315         const EdgeData::Triangle *triangles,
316         Vector4 *faceNormals,
317         size_t numTriangles)
318     {
319         for ( ; numTriangles; --numTriangles)
320         {
321             const EdgeData::Triangle& t = *triangles++;
322             size_t offset;
323 
324             offset = t.vertIndex[0] * 3;
325             Vector3 v1(positions[offset+0], positions[offset+1], positions[offset+2]);
326 
327             offset = t.vertIndex[1] * 3;
328             Vector3 v2(positions[offset+0], positions[offset+1], positions[offset+2]);
329 
330             offset = t.vertIndex[2] * 3;
331             Vector3 v3(positions[offset+0], positions[offset+1], positions[offset+2]);
332 
333             *faceNormals++ = Math::calculateFaceNormalWithoutNormalize(v1, v2, v3);
334         }
335     }
336     //---------------------------------------------------------------------
calculateLightFacing(const Vector4 & lightPos,const Vector4 * faceNormals,char * lightFacings,size_t numFaces)337     void OptimisedUtilGeneral::calculateLightFacing(
338         const Vector4& lightPos,
339         const Vector4* faceNormals,
340         char* lightFacings,
341         size_t numFaces)
342     {
343         for (size_t i = 0; i < numFaces; ++i)
344         {
345             *lightFacings++ = (lightPos.dotProduct(*faceNormals++) > 0);
346         }
347     }
348     //---------------------------------------------------------------------
extrudeVertices(const Vector4 & lightPos,Real extrudeDist,const float * pSrcPos,float * pDestPos,size_t numVertices)349     void OptimisedUtilGeneral::extrudeVertices(
350         const Vector4& lightPos,
351         Real extrudeDist,
352         const float* pSrcPos,
353         float* pDestPos,
354         size_t numVertices)
355     {
356         if (lightPos.w == 0.0f)
357         {
358             // Directional light, extrusion is along light direction
359 
360             Vector3 extrusionDir(
361                 -lightPos.x,
362                 -lightPos.y,
363                 -lightPos.z);
364             extrusionDir.normalise();
365             extrusionDir *= extrudeDist;
366 
367             for (size_t vert = 0; vert < numVertices; ++vert)
368             {
369                 *pDestPos++ = *pSrcPos++ + extrusionDir.x;
370                 *pDestPos++ = *pSrcPos++ + extrusionDir.y;
371                 *pDestPos++ = *pSrcPos++ + extrusionDir.z;
372             }
373         }
374         else
375         {
376             // Point light, calculate extrusionDir for every vertex
377             assert(lightPos.w == 1.0f);
378 
379             for (size_t vert = 0; vert < numVertices; ++vert)
380             {
381                 Vector3 extrusionDir(
382                     pSrcPos[0] - lightPos.x,
383                     pSrcPos[1] - lightPos.y,
384                     pSrcPos[2] - lightPos.z);
385                 extrusionDir.normalise();
386                 extrusionDir *= extrudeDist;
387 
388                 *pDestPos++ = *pSrcPos++ + extrusionDir.x;
389                 *pDestPos++ = *pSrcPos++ + extrusionDir.y;
390                 *pDestPos++ = *pSrcPos++ + extrusionDir.z;
391             }
392         }
393     }
394     //---------------------------------------------------------------------
395     //---------------------------------------------------------------------
396     //---------------------------------------------------------------------
_getOptimisedUtilGeneral(void)397     extern OptimisedUtil* _getOptimisedUtilGeneral(void)
398     {
399         static OptimisedUtilGeneral msOptimisedUtilGeneral;
400         return &msOptimisedUtilGeneral;
401     }
402 
403 }
404