1 /*
2 -----------------------------------------------------------------------------
3 This source file is part of OGRE
4 (Object-oriented Graphics Rendering Engine)
5 For the latest info, see http://www.ogre3d.org/
6 
7 Copyright (c) 2000-2014 Torus Knot Software Ltd
8 
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
15 
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18 
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 THE SOFTWARE.
26 -----------------------------------------------------------------------------
27 */
28 #include "OgreStableHeaders.h"
29 #include "OgreInstanceBatchVTF.h"
30 #include "OgreHardwarePixelBuffer.h"
31 #include "OgreInstancedEntity.h"
32 #include "OgreMaterial.h"
33 #include "OgreDualQuaternion.h"
34 
35 namespace Ogre
36 {
37     static const uint16 c_maxTexWidth   = 4096;
38     static const uint16 c_maxTexHeight  = 4096;
39 
BaseInstanceBatchVTF(InstanceManager * creator,MeshPtr & meshReference,const MaterialPtr & material,size_t instancesPerBatch,const Mesh::IndexMap * indexToBoneMap,const String & batchName)40     BaseInstanceBatchVTF::BaseInstanceBatchVTF( InstanceManager *creator, MeshPtr &meshReference,
41                                         const MaterialPtr &material, size_t instancesPerBatch,
42                                         const Mesh::IndexMap *indexToBoneMap, const String &batchName) :
43                 InstanceBatch( creator, meshReference, material, instancesPerBatch,
44                                 indexToBoneMap, batchName ),
45                 mMatricesPerInstance(0),
46                 mNumWorldMatrices( instancesPerBatch ),
47                 mWidthFloatsPadding( 0 ),
48                 mMaxFloatsPerLine( std::numeric_limits<size_t>::max() ),
49                 mRowLength(3),
50                 mWeightCount(1),
51                 mTempTransformsArray3x4(0),
52                 mUseBoneMatrixLookup(false),
53                 mMaxLookupTableInstances(16),
54                 mUseBoneDualQuaternions(false),
55                 mForceOneWeight(false),
56                 mUseOneWeight(false)
57     {
58         cloneMaterial( mMaterial );
59     }
60 
~BaseInstanceBatchVTF()61     BaseInstanceBatchVTF::~BaseInstanceBatchVTF()
62     {
63         //Remove cloned caster materials (if any)
64         Material::Techniques::const_iterator it;
65         for(it = mMaterial->getTechniques().begin(); it != mMaterial->getTechniques().end(); ++it)
66         {
67             Technique *technique = *it;
68 
69             if (technique->getShadowCasterMaterial())
70                 MaterialManager::getSingleton().remove( technique->getShadowCasterMaterial() );
71         }
72 
73         //Remove cloned material
74         MaterialManager::getSingleton().remove( mMaterial );
75 
76         //Remove the VTF texture
77         if( mMatrixTexture )
78             TextureManager::getSingleton().remove( mMatrixTexture );
79 
80         OGRE_FREE(mTempTransformsArray3x4, MEMCATEGORY_GENERAL);
81     }
82 
83     //-----------------------------------------------------------------------
buildFrom(const SubMesh * baseSubMesh,const RenderOperation & renderOperation)84     void BaseInstanceBatchVTF::buildFrom( const SubMesh *baseSubMesh, const RenderOperation &renderOperation )
85     {
86         if (useBoneMatrixLookup())
87         {
88             //when using bone matrix lookup resource are not shared
89             //
90             //Future implementation: while the instance vertex buffer can't be shared
91             //The texture can be.
92             //
93             build(baseSubMesh);
94         }
95         else
96         {
97             createVertexTexture( baseSubMesh );
98             InstanceBatch::buildFrom( baseSubMesh, renderOperation );
99         }
100     }
101     //-----------------------------------------------------------------------
cloneMaterial(const MaterialPtr & material)102     void BaseInstanceBatchVTF::cloneMaterial( const MaterialPtr &material )
103     {
104         //Used to track down shadow casters, so the same material caster doesn't get cloned twice
105         typedef std::map<String, MaterialPtr> MatMap;
106         MatMap clonedMaterials;
107 
108         //We need to clone the material so we can have different textures for each batch.
109         mMaterial = material->clone( mName + "/VTFMaterial" );
110 
111         //Now do the same with the techniques which have a material shadow caster
112         Material::Techniques::const_iterator it;
113         for(it = material->getTechniques().begin(); it != material->getTechniques().end(); ++it)
114         {
115             Technique *technique = *it;
116 
117             if( technique->getShadowCasterMaterial() )
118             {
119                 const MaterialPtr &casterMat    = technique->getShadowCasterMaterial();
120                 const String &casterName        = casterMat->getName();
121 
122                 //Was this material already cloned?
123                 MatMap::const_iterator itor = clonedMaterials.find(casterName);
124 
125                 if( itor == clonedMaterials.end() )
126                 {
127                     //No? Clone it and track it
128                     MaterialPtr cloned = casterMat->clone( mName + "/VTFMaterialCaster" +
129                                                     StringConverter::toString(clonedMaterials.size()) );
130                     technique->setShadowCasterMaterial( cloned );
131                     clonedMaterials[casterName] = cloned;
132                 }
133                 else
134                     technique->setShadowCasterMaterial( itor->second ); //Reuse the previously cloned mat
135             }
136         }
137     }
138     //-----------------------------------------------------------------------
retrieveBoneIdx(VertexData * baseVertexData,HWBoneIdxVec & outBoneIdx)139     void BaseInstanceBatchVTF::retrieveBoneIdx( VertexData *baseVertexData, HWBoneIdxVec &outBoneIdx )
140     {
141         const VertexElement *ve = baseVertexData->vertexDeclaration->
142                                                             findElementBySemantic( VES_BLEND_INDICES );
143         const VertexElement *veWeights = baseVertexData->vertexDeclaration->findElementBySemantic( VES_BLEND_WEIGHTS );
144 
145         HardwareVertexBufferSharedPtr buff = baseVertexData->vertexBufferBinding->getBuffer(ve->getSource());
146         HardwareBufferLockGuard baseVertexLock(buff, HardwareBuffer::HBL_READ_ONLY);
147         char const *baseBuffer = static_cast<char const*>(baseVertexLock.pData);
148 
149         for( size_t i=0; i<baseVertexData->vertexCount; ++i )
150         {
151             float const *pWeights = reinterpret_cast<float const*>(baseBuffer + veWeights->getOffset());
152 
153             uint8 biggestWeightIdx = 0;
154             for( uint8 j=1; j< uint8(mWeightCount); ++j )
155             {
156                 biggestWeightIdx = pWeights[biggestWeightIdx] < pWeights[j] ? j : biggestWeightIdx;
157             }
158 
159             uint8 const *pIndex = reinterpret_cast<uint8 const*>(baseBuffer + ve->getOffset());
160             outBoneIdx[i] = pIndex[biggestWeightIdx];
161 
162             baseBuffer += baseVertexData->vertexDeclaration->getVertexSize(ve->getSource());
163         }
164     }
165 
166     //-----------------------------------------------------------------------
retrieveBoneIdxWithWeights(VertexData * baseVertexData,HWBoneIdxVec & outBoneIdx,HWBoneWgtVec & outBoneWgt)167     void BaseInstanceBatchVTF::retrieveBoneIdxWithWeights(VertexData *baseVertexData, HWBoneIdxVec &outBoneIdx, HWBoneWgtVec &outBoneWgt)
168     {
169         const VertexElement *ve = baseVertexData->vertexDeclaration->findElementBySemantic( VES_BLEND_INDICES );
170         const VertexElement *veWeights = baseVertexData->vertexDeclaration->findElementBySemantic( VES_BLEND_WEIGHTS );
171 
172         HardwareVertexBufferSharedPtr buff = baseVertexData->vertexBufferBinding->getBuffer(ve->getSource());
173         HardwareBufferLockGuard baseVertexLock(buff, HardwareBuffer::HBL_READ_ONLY);
174         char const *baseBuffer = static_cast<char const*>(baseVertexLock.pData);
175 
176         for( size_t i=0; i<baseVertexData->vertexCount * mWeightCount; i += mWeightCount)
177         {
178             float const *pWeights = reinterpret_cast<float const*>(baseBuffer + veWeights->getOffset());
179             uint8 const *pIndex = reinterpret_cast<uint8 const*>(baseBuffer + ve->getOffset());
180 
181             float weightMagnitude = 0.0f;
182             for( size_t j=0; j < mWeightCount; ++j )
183             {
184                 outBoneWgt[i+j] = pWeights[j];
185                 weightMagnitude += pWeights[j];
186                 outBoneIdx[i+j] = pIndex[j];
187             }
188 
189             //Normalize the bone weights so they add to one
190             for(size_t j=0; j < mWeightCount; ++j)
191             {
192                 outBoneWgt[i+j] /= weightMagnitude;
193             }
194 
195             baseBuffer += baseVertexData->vertexDeclaration->getVertexSize(ve->getSource());
196         }
197     }
198 
199     //-----------------------------------------------------------------------
setupMaterialToUseVTF(TextureType textureType,MaterialPtr & material) const200     void BaseInstanceBatchVTF::setupMaterialToUseVTF( TextureType textureType, MaterialPtr &material ) const
201     {
202         Material::Techniques::const_iterator t;
203         for(t = material->getTechniques().begin(); t != material->getTechniques().end(); ++t)
204         {
205             Technique *technique = *t;
206             Technique::Passes::const_iterator i;
207             for(i = technique->getPasses().begin(); i != technique->getPasses().end(); ++i)
208             {
209                 Pass *pass = *i;
210                 Pass::TextureUnitStates::const_iterator it;
211                 for(it = pass->getTextureUnitStates().begin(); it != pass->getTextureUnitStates().end(); ++it)
212                 {
213                     TextureUnitState *texUnit = *it;
214 
215                     if( texUnit->getName() == "InstancingVTF" )
216                     {
217                         texUnit->setTextureName( mMatrixTexture->getName(), textureType );
218                         texUnit->setTextureFiltering( TFO_NONE );
219                         texUnit->setBindingType( TextureUnitState::BT_VERTEX );
220                     }
221                 }
222             }
223 
224             if( technique->getShadowCasterMaterial() )
225             {
226                 MaterialPtr matCaster = technique->getShadowCasterMaterial();
227                 setupMaterialToUseVTF( textureType, matCaster );
228             }
229         }
230     }
231     //-----------------------------------------------------------------------
createVertexTexture(const SubMesh * baseSubMesh)232     void BaseInstanceBatchVTF::createVertexTexture( const SubMesh* baseSubMesh )
233     {
234         /*
235         TODO: Find a way to retrieve max texture resolution,
236         http://www.ogre3d.org/forums/viewtopic.php?t=38305
237 
238         Currently assuming it's 4096x4096, which is a safe bet for any hardware with decent VTF*/
239 
240         size_t uniqueAnimations = mInstancesPerBatch;
241         if (useBoneMatrixLookup())
242         {
243             uniqueAnimations = std::min<size_t>(getMaxLookupTableInstances(), uniqueAnimations);
244         }
245         mMatricesPerInstance = std::max<size_t>( 1, baseSubMesh->blendIndexToBoneIndexMap.size() );
246 
247         if(mUseBoneDualQuaternions && !mTempTransformsArray3x4)
248         {
249             mTempTransformsArray3x4 = OGRE_ALLOC_T(float, mMatricesPerInstance * 3 * 4, MEMCATEGORY_GENERAL);
250         }
251 
252         mNumWorldMatrices = uniqueAnimations * mMatricesPerInstance;
253 
254         //Calculate the width & height required to hold all the matrices. Start by filling the width
255         //first (i.e. 4096x1 4096x2 4096x3, etc)
256 
257         size_t texWidth         = std::min<size_t>( mNumWorldMatrices * mRowLength, c_maxTexWidth );
258         size_t maxUsableWidth   = texWidth;
259         if( matricesTogetherPerRow() )
260         {
261             //The technique requires all matrices from the same instance in the same row
262             //i.e. 4094 -> 4095 -> skip 4096 -> 0 (next row) contains data from a new instance
263             mWidthFloatsPadding = texWidth % (mMatricesPerInstance * mRowLength);
264 
265             if( mWidthFloatsPadding )
266             {
267                 mMaxFloatsPerLine = texWidth - mWidthFloatsPadding;
268 
269                 maxUsableWidth = mMaxFloatsPerLine;
270 
271                 //Values are in pixels, convert them to floats (1 pixel = 4 floats)
272                 mWidthFloatsPadding *= 4;
273                 mMaxFloatsPerLine       *= 4;
274             }
275         }
276 
277         size_t texHeight = mNumWorldMatrices * mRowLength / maxUsableWidth;
278 
279         if( (mNumWorldMatrices * mRowLength) % maxUsableWidth )
280             texHeight += 1;
281 
282         //Don't use 1D textures, as OGL goes crazy because the shader should be calling texture1D()...
283         //TextureType texType = texHeight == 1 ? TEX_TYPE_1D : TEX_TYPE_2D;
284         TextureType texType = TEX_TYPE_2D;
285 
286         mMatrixTexture = TextureManager::getSingleton().createManual(
287                                         mName + "/VTF", mMeshReference->getGroup(), texType,
288                                         (uint)texWidth, (uint)texHeight,
289                                         0, PF_FLOAT32_RGBA, TU_DYNAMIC_WRITE_ONLY_DISCARDABLE );
290 
291         //Set our cloned material to use this custom texture!
292         setupMaterialToUseVTF( texType, mMaterial );
293     }
294 
295     //-----------------------------------------------------------------------
convert3x4MatricesToDualQuaternions(float * matrices,size_t numOfMatrices,float * outDualQuaternions)296     size_t BaseInstanceBatchVTF::convert3x4MatricesToDualQuaternions(float* matrices, size_t numOfMatrices, float* outDualQuaternions)
297     {
298         DualQuaternion dQuat;
299         Affine3 matrix;
300         size_t floatsWritten = 0;
301 
302         for (size_t m = 0; m < numOfMatrices; ++m)
303         {
304             for(int i = 0; i < 3; ++i)
305             {
306                 for(int b = 0; b < 4; ++b)
307                 {
308                     matrix[i][b] = *matrices++;
309                 }
310             }
311 
312             dQuat.fromTransformationMatrix(matrix);
313 
314             //Copy the 2x4 matrix
315             for(int i = 0; i < 8; ++i)
316             {
317                 *outDualQuaternions++ = static_cast<float>( dQuat[i] );
318                 ++floatsWritten;
319             }
320         }
321 
322         return floatsWritten;
323     }
324 
325     //-----------------------------------------------------------------------
updateVertexTexture(void)326     void BaseInstanceBatchVTF::updateVertexTexture(void)
327     {
328         //Now lock the texture and copy the 4x3 matrices!
329         HardwareBufferLockGuard matTexLock(mMatrixTexture->getBuffer(), HardwareBuffer::HBL_DISCARD);
330         const PixelBox &pixelBox = mMatrixTexture->getBuffer()->getCurrentLock();
331 
332         float *pDest = reinterpret_cast<float*>(pixelBox.data);
333 
334         InstancedEntityVec::const_iterator itor = mInstancedEntities.begin();
335         InstancedEntityVec::const_iterator end  = mInstancedEntities.end();
336 
337         float* transforms;
338 
339         //If using dual quaternion skinning, write the transforms to a temporary buffer,
340         //then convert to dual quaternions, then later write to the pixel buffer
341         //Otherwise simply write the transforms to the pixel buffer directly
342         if(mUseBoneDualQuaternions)
343         {
344             transforms = mTempTransformsArray3x4;
345         }
346         else
347         {
348             transforms = pDest;
349         }
350 
351 
352         while( itor != end )
353         {
354             size_t floatsWritten = (*itor)->getTransforms3x4( transforms );
355 
356             if( mManager->getCameraRelativeRendering() )
357                 makeMatrixCameraRelative3x4( transforms, floatsWritten );
358 
359             if(mUseBoneDualQuaternions)
360             {
361                 floatsWritten = convert3x4MatricesToDualQuaternions(transforms, floatsWritten / 12, pDest);
362                 pDest += floatsWritten;
363             }
364             else
365             {
366                 transforms += floatsWritten;
367             }
368 
369             ++itor;
370         }
371     }
372     /** update the lookup numbers for entities with shared transforms */
updateSharedLookupIndexes()373     void BaseInstanceBatchVTF::updateSharedLookupIndexes()
374     {
375         if (mTransformSharingDirty)
376         {
377             if (useBoneMatrixLookup())
378             {
379                 //In each entity update the "transform lookup number" so that:
380                 // 1. All entities sharing the same transformation will share the same unique number
381                 // 2. "transform lookup number" will be numbered from 0 up to getMaxLookupTableInstances
382                 uint16 lookupCounter = 0;
383                 typedef std::map<Affine3*,uint16> MapTransformId;
384                 MapTransformId transformToId;
385                 InstancedEntityVec::const_iterator itEnt = mInstancedEntities.begin(),
386                     itEntEnd = mInstancedEntities.end();
387                 for(;itEnt != itEntEnd ; ++itEnt)
388                 {
389                     if ((*itEnt)->isInScene())
390                     {
391                         Affine3* transformUniqueId = (*itEnt)->mBoneMatrices;
392                         MapTransformId::iterator itLu = transformToId.find(transformUniqueId);
393                         if (itLu == transformToId.end())
394                         {
395                             itLu = transformToId.insert(std::make_pair(transformUniqueId,lookupCounter)).first;
396                             ++lookupCounter;
397                         }
398                         (*itEnt)->setTransformLookupNumber(itLu->second);
399                     }
400                     else
401                     {
402                         (*itEnt)->setTransformLookupNumber(0);
403                     }
404                 }
405 
406                 if (lookupCounter > getMaxLookupTableInstances())
407                 {
408                     OGRE_EXCEPT(Exception::ERR_INVALID_STATE,"Number of unique bone matrix states exceeds current limitation.","BaseInstanceBatchVTF::updateSharedLookupIndexes()");
409                 }
410             }
411 
412             mTransformSharingDirty = false;
413         }
414     }
415 
416     //-----------------------------------------------------------------------
generateInstancedEntity(size_t num)417     InstancedEntity* BaseInstanceBatchVTF::generateInstancedEntity(size_t num)
418     {
419         InstancedEntity* sharedTransformEntity = NULL;
420         if ((useBoneMatrixLookup()) && (num >= getMaxLookupTableInstances()))
421         {
422             sharedTransformEntity = mInstancedEntities[num % getMaxLookupTableInstances()];
423             if (sharedTransformEntity->mSharedTransformEntity)
424             {
425                 sharedTransformEntity = sharedTransformEntity->mSharedTransformEntity;
426             }
427         }
428 
429         return OGRE_NEW InstancedEntity(this, static_cast<uint32>(num), sharedTransformEntity);
430     }
431 
432 
433     //-----------------------------------------------------------------------
getWorldTransforms(Matrix4 * xform) const434     void BaseInstanceBatchVTF::getWorldTransforms( Matrix4* xform ) const
435     {
436         *xform = Matrix4::IDENTITY;
437     }
438     //-----------------------------------------------------------------------
getNumWorldTransforms(void) const439     unsigned short BaseInstanceBatchVTF::getNumWorldTransforms(void) const
440     {
441         return 1;
442     }
443     //-----------------------------------------------------------------------
_updateRenderQueue(RenderQueue * queue)444     void BaseInstanceBatchVTF::_updateRenderQueue(RenderQueue* queue)
445     {
446         InstanceBatch::_updateRenderQueue( queue );
447 
448         if( mBoundsUpdated || mDirtyAnimation || mManager->getCameraRelativeRendering() )
449             updateVertexTexture();
450 
451         mBoundsUpdated = false;
452     }
453     //-----------------------------------------------------------------------
454     // InstanceBatchVTF
455     //-----------------------------------------------------------------------
InstanceBatchVTF(InstanceManager * creator,MeshPtr & meshReference,const MaterialPtr & material,size_t instancesPerBatch,const Mesh::IndexMap * indexToBoneMap,const String & batchName)456     InstanceBatchVTF::InstanceBatchVTF(
457         InstanceManager *creator, MeshPtr &meshReference,
458         const MaterialPtr &material, size_t instancesPerBatch,
459         const Mesh::IndexMap *indexToBoneMap, const String &batchName )
460             : BaseInstanceBatchVTF (creator, meshReference, material,
461                                     instancesPerBatch, indexToBoneMap, batchName)
462     {
463 
464     }
465     //-----------------------------------------------------------------------
~InstanceBatchVTF()466     InstanceBatchVTF::~InstanceBatchVTF()
467     {
468     }
469     //-----------------------------------------------------------------------
setupVertices(const SubMesh * baseSubMesh)470     void InstanceBatchVTF::setupVertices( const SubMesh* baseSubMesh )
471     {
472         mRenderOperation.vertexData = OGRE_NEW VertexData();
473         mRemoveOwnVertexData = true; //Raise flag to remove our own vertex data in the end (not always needed)
474 
475         VertexData *thisVertexData = mRenderOperation.vertexData;
476         VertexData *baseVertexData = baseSubMesh->vertexData;
477 
478         thisVertexData->vertexStart = 0;
479         thisVertexData->vertexCount = baseVertexData->vertexCount * mInstancesPerBatch;
480 
481         HardwareBufferManager::getSingleton().destroyVertexDeclaration( thisVertexData->vertexDeclaration );
482         thisVertexData->vertexDeclaration = baseVertexData->vertexDeclaration->clone();
483 
484         HWBoneIdxVec hwBoneIdx;
485         HWBoneWgtVec hwBoneWgt;
486 
487         //Blend weights may not be present because HW_VTF does not require to be skeletally animated
488         const VertexElement *veWeights = baseVertexData->vertexDeclaration->
489                                                     findElementBySemantic( VES_BLEND_WEIGHTS );
490         if( veWeights )
491         {
492             //One weight is recommended for VTF
493             mWeightCount = (forceOneWeight() || useOneWeight()) ?
494                                 1 : veWeights->getSize() / sizeof(float);
495         }
496         else
497         {
498             mWeightCount = 1;
499         }
500 
501         hwBoneIdx.resize( baseVertexData->vertexCount * mWeightCount, 0 );
502 
503         if( mMeshReference->hasSkeleton() && mMeshReference->getSkeleton() )
504         {
505             if(mWeightCount > 1)
506             {
507                 hwBoneWgt.resize( baseVertexData->vertexCount * mWeightCount, 0 );
508                 retrieveBoneIdxWithWeights(baseVertexData, hwBoneIdx, hwBoneWgt);
509             }
510             else
511             {
512                 retrieveBoneIdx( baseVertexData, hwBoneIdx );
513                 thisVertexData->vertexDeclaration->removeElement( VES_BLEND_INDICES );
514                 thisVertexData->vertexDeclaration->removeElement( VES_BLEND_WEIGHTS );
515 
516                 thisVertexData->vertexDeclaration->closeGapsInSource();
517             }
518 
519         }
520 
521         for( unsigned short i=0; i<thisVertexData->vertexDeclaration->getMaxSource()+1; ++i )
522         {
523             //Create our own vertex buffer
524             HardwareVertexBufferSharedPtr vertexBuffer =
525                 HardwareBufferManager::getSingleton().createVertexBuffer(
526                 thisVertexData->vertexDeclaration->getVertexSize(i),
527                 thisVertexData->vertexCount,
528                 HardwareBuffer::HBU_STATIC_WRITE_ONLY );
529             thisVertexData->vertexBufferBinding->setBinding( i, vertexBuffer );
530 
531             //Grab the base submesh data
532             HardwareVertexBufferSharedPtr baseVertexBuffer =
533                 baseVertexData->vertexBufferBinding->getBuffer(i);
534 
535             HardwareBufferLockGuard thisLock(vertexBuffer, HardwareBuffer::HBL_DISCARD);
536             HardwareBufferLockGuard baseLock(baseVertexBuffer, HardwareBuffer::HBL_READ_ONLY);
537             char* thisBuf = static_cast<char*>(thisLock.pData);
538             char* baseBuf = static_cast<char*>(baseLock.pData);
539 
540             //Copy and repeat
541             for( size_t j=0; j<mInstancesPerBatch; ++j )
542             {
543                 const size_t sizeOfBuffer = baseVertexData->vertexCount *
544                     baseVertexData->vertexDeclaration->getVertexSize(i);
545                 memcpy( thisBuf + j * sizeOfBuffer, baseBuf, sizeOfBuffer );
546             }
547         }
548 
549         createVertexTexture( baseSubMesh );
550         createVertexSemantics( thisVertexData, baseVertexData, hwBoneIdx, hwBoneWgt);
551     }
552     //-----------------------------------------------------------------------
setupIndices(const SubMesh * baseSubMesh)553     void InstanceBatchVTF::setupIndices( const SubMesh* baseSubMesh )
554     {
555         mRenderOperation.indexData = OGRE_NEW IndexData();
556         mRemoveOwnIndexData = true; //Raise flag to remove our own index data in the end (not always needed)
557 
558         IndexData *thisIndexData = mRenderOperation.indexData;
559         IndexData *baseIndexData = baseSubMesh->indexData;
560 
561         thisIndexData->indexStart = 0;
562         thisIndexData->indexCount = baseIndexData->indexCount * mInstancesPerBatch;
563 
564         //TODO: Check numVertices is below max supported by GPU
565         HardwareIndexBuffer::IndexType indexType = HardwareIndexBuffer::IT_16BIT;
566         if( mRenderOperation.vertexData->vertexCount > 65535 )
567             indexType = HardwareIndexBuffer::IT_32BIT;
568         thisIndexData->indexBuffer = HardwareBufferManager::getSingleton().createIndexBuffer(
569             indexType, thisIndexData->indexCount, HardwareBuffer::HBU_STATIC_WRITE_ONLY );
570 
571         HardwareBufferLockGuard thisLock(thisIndexData->indexBuffer, HardwareBuffer::HBL_DISCARD);
572         HardwareBufferLockGuard baseLock(baseIndexData->indexBuffer, HardwareBuffer::HBL_READ_ONLY);
573         uint16 *thisBuf16 = static_cast<uint16*>(thisLock.pData);
574         uint32 *thisBuf32 = static_cast<uint32*>(thisLock.pData);
575         bool baseIndex16bit = baseIndexData->indexBuffer->getType() == HardwareIndexBuffer::IT_16BIT;
576 
577         for( size_t i=0; i<mInstancesPerBatch; ++i )
578         {
579             const size_t vertexOffset = i * mRenderOperation.vertexData->vertexCount / mInstancesPerBatch;
580 
581             const uint16 *initBuf16 = static_cast<const uint16 *>(baseLock.pData);
582             const uint32 *initBuf32 = static_cast<const uint32 *>(baseLock.pData);
583 
584             for( size_t j=0; j<baseIndexData->indexCount; ++j )
585             {
586                 uint32 originalVal = baseIndex16bit ? *initBuf16++ : *initBuf32++;
587 
588                 if( indexType == HardwareIndexBuffer::IT_16BIT )
589                     *thisBuf16++ = static_cast<uint16>(originalVal + vertexOffset);
590                 else
591                     *thisBuf32++ = static_cast<uint32>(originalVal + vertexOffset);
592             }
593         }
594     }
595     //-----------------------------------------------------------------------
createVertexSemantics(VertexData * thisVertexData,VertexData * baseVertexData,const HWBoneIdxVec & hwBoneIdx,const HWBoneWgtVec & hwBoneWgt)596     void InstanceBatchVTF::createVertexSemantics(
597         VertexData *thisVertexData, VertexData *baseVertexData, const HWBoneIdxVec &hwBoneIdx, const HWBoneWgtVec &hwBoneWgt)
598     {
599         const size_t texWidth  = mMatrixTexture->getWidth();
600         const size_t texHeight = mMatrixTexture->getHeight();
601 
602         //Calculate the texel offsets to correct them offline
603         //Akwardly enough, the offset is needed in OpenGL too
604         Vector2 texelOffsets;
605         //RenderSystem *renderSystem = Root::getSingleton().getRenderSystem();
606         texelOffsets.x = /*renderSystem->getHorizontalTexelOffset()*/ -0.5f / (float)texWidth;
607         texelOffsets.y = /*renderSystem->getVerticalTexelOffset()*/ -0.5f / (float)texHeight;
608 
609         //Only one weight per vertex is supported. It would not only be complex, but prohibitively slow.
610         //Put them in a new buffer, since it's 32 bytes aligned :-)
611         const unsigned short newSource = thisVertexData->vertexDeclaration->getMaxSource() + 1;
612         size_t maxFloatsPerVector = 4;
613         size_t offset = 0;
614 
615         for(size_t i = 0; i < mWeightCount; i += maxFloatsPerVector / mRowLength)
616         {
617             offset += thisVertexData->vertexDeclaration->addElement( newSource, offset, VET_FLOAT4, VES_TEXTURE_COORDINATES,
618                 thisVertexData->vertexDeclaration->
619                 getNextFreeTextureCoordinate() ).getSize();
620             offset += thisVertexData->vertexDeclaration->addElement( newSource, offset, VET_FLOAT4, VES_TEXTURE_COORDINATES,
621                 thisVertexData->vertexDeclaration->
622                 getNextFreeTextureCoordinate() ).getSize();
623         }
624 
625         //Add the weights (supports up to four, which is Ogre's limit)
626         if(mWeightCount > 1)
627         {
628             thisVertexData->vertexDeclaration->addElement(newSource, offset, VET_FLOAT4, VES_BLEND_WEIGHTS,
629                                         thisVertexData->vertexDeclaration->getNextFreeTextureCoordinate() ).getSize();
630         }
631 
632         //Create our own vertex buffer
633         HardwareVertexBufferSharedPtr vertexBuffer =
634             HardwareBufferManager::getSingleton().createVertexBuffer(
635             thisVertexData->vertexDeclaration->getVertexSize(newSource),
636             thisVertexData->vertexCount,
637             HardwareBuffer::HBU_STATIC_WRITE_ONLY );
638         thisVertexData->vertexBufferBinding->setBinding( newSource, vertexBuffer );
639 
640         HardwareBufferLockGuard vertexLock(vertexBuffer, HardwareBuffer::HBL_DISCARD);
641         float *thisFloat = static_cast<float*>(vertexLock.pData);
642 
643         //Copy and repeat
644         for( size_t i=0; i<mInstancesPerBatch; ++i )
645         {
646             for( size_t j=0; j<baseVertexData->vertexCount * mWeightCount; j += mWeightCount )
647             {
648                 size_t numberOfMatricesInLine = 0;
649 
650                 for(size_t wgtIdx = 0; wgtIdx < mWeightCount; ++wgtIdx)
651                 {
652                     for( size_t k=0; k < mRowLength; ++k)
653                     {
654                         size_t instanceIdx = (hwBoneIdx[j+wgtIdx] + i * mMatricesPerInstance) * mRowLength + k;
655                         //x
656                         *thisFloat++ = ((instanceIdx % texWidth) / (float)texWidth) - (float)texelOffsets.x;
657                         //y
658                         *thisFloat++ = ((instanceIdx / texWidth) / (float)texHeight) - (float)texelOffsets.y;
659                     }
660 
661                     ++numberOfMatricesInLine;
662 
663                     //If another matrix can't be fit, we're on another line, or if this is the last weight
664                     if((numberOfMatricesInLine + 1) * mRowLength > maxFloatsPerVector || (wgtIdx+1) == mWeightCount)
665                     {
666                         //Place zeroes in the remaining coordinates
667                         for ( size_t k=mRowLength * numberOfMatricesInLine; k < maxFloatsPerVector; ++k)
668                         {
669                             *thisFloat++ = 0.0f;
670                             *thisFloat++ = 0.0f;
671                         }
672 
673                         numberOfMatricesInLine = 0;
674                     }
675                 }
676 
677                 //Don't need to write weights if there is only one
678                 if(mWeightCount > 1)
679                 {
680                     //Write the weights
681                     for(size_t wgtIdx = 0; wgtIdx < mWeightCount; ++wgtIdx)
682                     {
683                         *thisFloat++ = hwBoneWgt[j+wgtIdx];
684                     }
685 
686                     //Fill the rest of the line with zeros
687                     for(size_t wgtIdx = mWeightCount; wgtIdx < maxFloatsPerVector; ++wgtIdx)
688                     {
689                         *thisFloat++ = 0.0f;
690                     }
691                 }
692             }
693         }
694     }
695     //-----------------------------------------------------------------------
calculateMaxNumInstances(const SubMesh * baseSubMesh,uint16 flags) const696     size_t InstanceBatchVTF::calculateMaxNumInstances(
697                     const SubMesh *baseSubMesh, uint16 flags ) const
698     {
699         size_t retVal = 0;
700 
701         RenderSystem *renderSystem = Root::getSingleton().getRenderSystem();
702         const RenderSystemCapabilities *capabilities = renderSystem->getCapabilities();
703 
704         //VTF must be supported
705         if( capabilities->hasCapability( RSC_VERTEX_TEXTURE_FETCH ) )
706         {
707             //TODO: Check PF_FLOAT32_RGBA is supported (should be, since it was the 1st one)
708             const size_t numBones = std::max<size_t>( 1, baseSubMesh->blendIndexToBoneIndexMap.size() );
709             retVal = c_maxTexWidth * c_maxTexHeight / mRowLength / numBones;
710 
711             if( flags & IM_USE16BIT )
712             {
713                 if( baseSubMesh->vertexData->vertexCount * retVal > 0xFFFF )
714                     retVal = 0xFFFF / baseSubMesh->vertexData->vertexCount;
715             }
716 
717             if( flags & IM_VTFBESTFIT )
718             {
719                 const size_t instancesPerBatch = std::min( retVal, mInstancesPerBatch );
720                 //Do the same as in createVertexTexture()
721                 const size_t numWorldMatrices = instancesPerBatch * numBones;
722 
723                 size_t texWidth  = std::min<size_t>( numWorldMatrices * mRowLength, c_maxTexWidth );
724                 size_t texHeight = numWorldMatrices * mRowLength / c_maxTexWidth;
725 
726                 const size_t remainder = (numWorldMatrices * mRowLength) % c_maxTexWidth;
727 
728                 if( remainder && texHeight > 0 )
729                     retVal = static_cast<size_t>(texWidth * texHeight / (float)mRowLength / (float)(numBones));
730             }
731         }
732 
733         return retVal;
734 
735     }
736 }
737