1 /* 2 ----------------------------------------------------------------------------- 3 This source file is part of OGRE 4 (Object-oriented Graphics Rendering Engine) 5 For the latest info, see http://www.ogre3d.org/ 6 7 Copyright (c) 2000-2014 Torus Knot Software Ltd 8 9 Permission is hereby granted, free of charge, to any person obtaining a copy 10 of this software and associated documentation files (the "Software"), to deal 11 in the Software without restriction, including without limitation the rights 12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 copies of the Software, and to permit persons to whom the Software is 14 furnished to do so, subject to the following conditions: 15 16 The above copyright notice and this permission notice shall be included in 17 all copies or substantial portions of the Software. 18 19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 THE SOFTWARE. 26 ----------------------------------------------------------------------------- 27 */ 28 #include "OgreStableHeaders.h" 29 #include "OgreInstanceBatchVTF.h" 30 #include "OgreHardwarePixelBuffer.h" 31 #include "OgreInstancedEntity.h" 32 #include "OgreMaterial.h" 33 #include "OgreDualQuaternion.h" 34 35 namespace Ogre 36 { 37 static const uint16 c_maxTexWidth = 4096; 38 static const uint16 c_maxTexHeight = 4096; 39 BaseInstanceBatchVTF(InstanceManager * creator,MeshPtr & meshReference,const MaterialPtr & material,size_t instancesPerBatch,const Mesh::IndexMap * indexToBoneMap,const String & batchName)40 BaseInstanceBatchVTF::BaseInstanceBatchVTF( InstanceManager *creator, MeshPtr &meshReference, 41 const MaterialPtr &material, size_t instancesPerBatch, 42 const Mesh::IndexMap *indexToBoneMap, const String &batchName) : 43 InstanceBatch( creator, meshReference, material, instancesPerBatch, 44 indexToBoneMap, batchName ), 45 mMatricesPerInstance(0), 46 mNumWorldMatrices( instancesPerBatch ), 47 mWidthFloatsPadding( 0 ), 48 mMaxFloatsPerLine( std::numeric_limits<size_t>::max() ), 49 mRowLength(3), 50 mWeightCount(1), 51 mTempTransformsArray3x4(0), 52 mUseBoneMatrixLookup(false), 53 mMaxLookupTableInstances(16), 54 mUseBoneDualQuaternions(false), 55 mForceOneWeight(false), 56 mUseOneWeight(false) 57 { 58 cloneMaterial( mMaterial ); 59 } 60 ~BaseInstanceBatchVTF()61 BaseInstanceBatchVTF::~BaseInstanceBatchVTF() 62 { 63 //Remove cloned caster materials (if any) 64 Material::Techniques::const_iterator it; 65 for(it = mMaterial->getTechniques().begin(); it != mMaterial->getTechniques().end(); ++it) 66 { 67 Technique *technique = *it; 68 69 if (technique->getShadowCasterMaterial()) 70 MaterialManager::getSingleton().remove( technique->getShadowCasterMaterial() ); 71 } 72 73 //Remove cloned material 74 MaterialManager::getSingleton().remove( mMaterial ); 75 76 //Remove the VTF texture 77 if( mMatrixTexture ) 78 TextureManager::getSingleton().remove( mMatrixTexture ); 79 80 OGRE_FREE(mTempTransformsArray3x4, MEMCATEGORY_GENERAL); 81 } 82 83 //----------------------------------------------------------------------- buildFrom(const SubMesh * baseSubMesh,const RenderOperation & renderOperation)84 void BaseInstanceBatchVTF::buildFrom( const SubMesh *baseSubMesh, const RenderOperation &renderOperation ) 85 { 86 if (useBoneMatrixLookup()) 87 { 88 //when using bone matrix lookup resource are not shared 89 // 90 //Future implementation: while the instance vertex buffer can't be shared 91 //The texture can be. 92 // 93 build(baseSubMesh); 94 } 95 else 96 { 97 createVertexTexture( baseSubMesh ); 98 InstanceBatch::buildFrom( baseSubMesh, renderOperation ); 99 } 100 } 101 //----------------------------------------------------------------------- cloneMaterial(const MaterialPtr & material)102 void BaseInstanceBatchVTF::cloneMaterial( const MaterialPtr &material ) 103 { 104 //Used to track down shadow casters, so the same material caster doesn't get cloned twice 105 typedef std::map<String, MaterialPtr> MatMap; 106 MatMap clonedMaterials; 107 108 //We need to clone the material so we can have different textures for each batch. 109 mMaterial = material->clone( mName + "/VTFMaterial" ); 110 111 //Now do the same with the techniques which have a material shadow caster 112 Material::Techniques::const_iterator it; 113 for(it = material->getTechniques().begin(); it != material->getTechniques().end(); ++it) 114 { 115 Technique *technique = *it; 116 117 if( technique->getShadowCasterMaterial() ) 118 { 119 const MaterialPtr &casterMat = technique->getShadowCasterMaterial(); 120 const String &casterName = casterMat->getName(); 121 122 //Was this material already cloned? 123 MatMap::const_iterator itor = clonedMaterials.find(casterName); 124 125 if( itor == clonedMaterials.end() ) 126 { 127 //No? Clone it and track it 128 MaterialPtr cloned = casterMat->clone( mName + "/VTFMaterialCaster" + 129 StringConverter::toString(clonedMaterials.size()) ); 130 technique->setShadowCasterMaterial( cloned ); 131 clonedMaterials[casterName] = cloned; 132 } 133 else 134 technique->setShadowCasterMaterial( itor->second ); //Reuse the previously cloned mat 135 } 136 } 137 } 138 //----------------------------------------------------------------------- retrieveBoneIdx(VertexData * baseVertexData,HWBoneIdxVec & outBoneIdx)139 void BaseInstanceBatchVTF::retrieveBoneIdx( VertexData *baseVertexData, HWBoneIdxVec &outBoneIdx ) 140 { 141 const VertexElement *ve = baseVertexData->vertexDeclaration-> 142 findElementBySemantic( VES_BLEND_INDICES ); 143 const VertexElement *veWeights = baseVertexData->vertexDeclaration->findElementBySemantic( VES_BLEND_WEIGHTS ); 144 145 HardwareVertexBufferSharedPtr buff = baseVertexData->vertexBufferBinding->getBuffer(ve->getSource()); 146 HardwareBufferLockGuard baseVertexLock(buff, HardwareBuffer::HBL_READ_ONLY); 147 char const *baseBuffer = static_cast<char const*>(baseVertexLock.pData); 148 149 for( size_t i=0; i<baseVertexData->vertexCount; ++i ) 150 { 151 float const *pWeights = reinterpret_cast<float const*>(baseBuffer + veWeights->getOffset()); 152 153 uint8 biggestWeightIdx = 0; 154 for( uint8 j=1; j< uint8(mWeightCount); ++j ) 155 { 156 biggestWeightIdx = pWeights[biggestWeightIdx] < pWeights[j] ? j : biggestWeightIdx; 157 } 158 159 uint8 const *pIndex = reinterpret_cast<uint8 const*>(baseBuffer + ve->getOffset()); 160 outBoneIdx[i] = pIndex[biggestWeightIdx]; 161 162 baseBuffer += baseVertexData->vertexDeclaration->getVertexSize(ve->getSource()); 163 } 164 } 165 166 //----------------------------------------------------------------------- retrieveBoneIdxWithWeights(VertexData * baseVertexData,HWBoneIdxVec & outBoneIdx,HWBoneWgtVec & outBoneWgt)167 void BaseInstanceBatchVTF::retrieveBoneIdxWithWeights(VertexData *baseVertexData, HWBoneIdxVec &outBoneIdx, HWBoneWgtVec &outBoneWgt) 168 { 169 const VertexElement *ve = baseVertexData->vertexDeclaration->findElementBySemantic( VES_BLEND_INDICES ); 170 const VertexElement *veWeights = baseVertexData->vertexDeclaration->findElementBySemantic( VES_BLEND_WEIGHTS ); 171 172 HardwareVertexBufferSharedPtr buff = baseVertexData->vertexBufferBinding->getBuffer(ve->getSource()); 173 HardwareBufferLockGuard baseVertexLock(buff, HardwareBuffer::HBL_READ_ONLY); 174 char const *baseBuffer = static_cast<char const*>(baseVertexLock.pData); 175 176 for( size_t i=0; i<baseVertexData->vertexCount * mWeightCount; i += mWeightCount) 177 { 178 float const *pWeights = reinterpret_cast<float const*>(baseBuffer + veWeights->getOffset()); 179 uint8 const *pIndex = reinterpret_cast<uint8 const*>(baseBuffer + ve->getOffset()); 180 181 float weightMagnitude = 0.0f; 182 for( size_t j=0; j < mWeightCount; ++j ) 183 { 184 outBoneWgt[i+j] = pWeights[j]; 185 weightMagnitude += pWeights[j]; 186 outBoneIdx[i+j] = pIndex[j]; 187 } 188 189 //Normalize the bone weights so they add to one 190 for(size_t j=0; j < mWeightCount; ++j) 191 { 192 outBoneWgt[i+j] /= weightMagnitude; 193 } 194 195 baseBuffer += baseVertexData->vertexDeclaration->getVertexSize(ve->getSource()); 196 } 197 } 198 199 //----------------------------------------------------------------------- setupMaterialToUseVTF(TextureType textureType,MaterialPtr & material) const200 void BaseInstanceBatchVTF::setupMaterialToUseVTF( TextureType textureType, MaterialPtr &material ) const 201 { 202 Material::Techniques::const_iterator t; 203 for(t = material->getTechniques().begin(); t != material->getTechniques().end(); ++t) 204 { 205 Technique *technique = *t; 206 Technique::Passes::const_iterator i; 207 for(i = technique->getPasses().begin(); i != technique->getPasses().end(); ++i) 208 { 209 Pass *pass = *i; 210 Pass::TextureUnitStates::const_iterator it; 211 for(it = pass->getTextureUnitStates().begin(); it != pass->getTextureUnitStates().end(); ++it) 212 { 213 TextureUnitState *texUnit = *it; 214 215 if( texUnit->getName() == "InstancingVTF" ) 216 { 217 texUnit->setTextureName( mMatrixTexture->getName(), textureType ); 218 texUnit->setTextureFiltering( TFO_NONE ); 219 texUnit->setBindingType( TextureUnitState::BT_VERTEX ); 220 } 221 } 222 } 223 224 if( technique->getShadowCasterMaterial() ) 225 { 226 MaterialPtr matCaster = technique->getShadowCasterMaterial(); 227 setupMaterialToUseVTF( textureType, matCaster ); 228 } 229 } 230 } 231 //----------------------------------------------------------------------- createVertexTexture(const SubMesh * baseSubMesh)232 void BaseInstanceBatchVTF::createVertexTexture( const SubMesh* baseSubMesh ) 233 { 234 /* 235 TODO: Find a way to retrieve max texture resolution, 236 http://www.ogre3d.org/forums/viewtopic.php?t=38305 237 238 Currently assuming it's 4096x4096, which is a safe bet for any hardware with decent VTF*/ 239 240 size_t uniqueAnimations = mInstancesPerBatch; 241 if (useBoneMatrixLookup()) 242 { 243 uniqueAnimations = std::min<size_t>(getMaxLookupTableInstances(), uniqueAnimations); 244 } 245 mMatricesPerInstance = std::max<size_t>( 1, baseSubMesh->blendIndexToBoneIndexMap.size() ); 246 247 if(mUseBoneDualQuaternions && !mTempTransformsArray3x4) 248 { 249 mTempTransformsArray3x4 = OGRE_ALLOC_T(float, mMatricesPerInstance * 3 * 4, MEMCATEGORY_GENERAL); 250 } 251 252 mNumWorldMatrices = uniqueAnimations * mMatricesPerInstance; 253 254 //Calculate the width & height required to hold all the matrices. Start by filling the width 255 //first (i.e. 4096x1 4096x2 4096x3, etc) 256 257 size_t texWidth = std::min<size_t>( mNumWorldMatrices * mRowLength, c_maxTexWidth ); 258 size_t maxUsableWidth = texWidth; 259 if( matricesTogetherPerRow() ) 260 { 261 //The technique requires all matrices from the same instance in the same row 262 //i.e. 4094 -> 4095 -> skip 4096 -> 0 (next row) contains data from a new instance 263 mWidthFloatsPadding = texWidth % (mMatricesPerInstance * mRowLength); 264 265 if( mWidthFloatsPadding ) 266 { 267 mMaxFloatsPerLine = texWidth - mWidthFloatsPadding; 268 269 maxUsableWidth = mMaxFloatsPerLine; 270 271 //Values are in pixels, convert them to floats (1 pixel = 4 floats) 272 mWidthFloatsPadding *= 4; 273 mMaxFloatsPerLine *= 4; 274 } 275 } 276 277 size_t texHeight = mNumWorldMatrices * mRowLength / maxUsableWidth; 278 279 if( (mNumWorldMatrices * mRowLength) % maxUsableWidth ) 280 texHeight += 1; 281 282 //Don't use 1D textures, as OGL goes crazy because the shader should be calling texture1D()... 283 //TextureType texType = texHeight == 1 ? TEX_TYPE_1D : TEX_TYPE_2D; 284 TextureType texType = TEX_TYPE_2D; 285 286 mMatrixTexture = TextureManager::getSingleton().createManual( 287 mName + "/VTF", mMeshReference->getGroup(), texType, 288 (uint)texWidth, (uint)texHeight, 289 0, PF_FLOAT32_RGBA, TU_DYNAMIC_WRITE_ONLY_DISCARDABLE ); 290 291 //Set our cloned material to use this custom texture! 292 setupMaterialToUseVTF( texType, mMaterial ); 293 } 294 295 //----------------------------------------------------------------------- convert3x4MatricesToDualQuaternions(float * matrices,size_t numOfMatrices,float * outDualQuaternions)296 size_t BaseInstanceBatchVTF::convert3x4MatricesToDualQuaternions(float* matrices, size_t numOfMatrices, float* outDualQuaternions) 297 { 298 DualQuaternion dQuat; 299 Affine3 matrix; 300 size_t floatsWritten = 0; 301 302 for (size_t m = 0; m < numOfMatrices; ++m) 303 { 304 for(int i = 0; i < 3; ++i) 305 { 306 for(int b = 0; b < 4; ++b) 307 { 308 matrix[i][b] = *matrices++; 309 } 310 } 311 312 dQuat.fromTransformationMatrix(matrix); 313 314 //Copy the 2x4 matrix 315 for(int i = 0; i < 8; ++i) 316 { 317 *outDualQuaternions++ = static_cast<float>( dQuat[i] ); 318 ++floatsWritten; 319 } 320 } 321 322 return floatsWritten; 323 } 324 325 //----------------------------------------------------------------------- updateVertexTexture(void)326 void BaseInstanceBatchVTF::updateVertexTexture(void) 327 { 328 //Now lock the texture and copy the 4x3 matrices! 329 HardwareBufferLockGuard matTexLock(mMatrixTexture->getBuffer(), HardwareBuffer::HBL_DISCARD); 330 const PixelBox &pixelBox = mMatrixTexture->getBuffer()->getCurrentLock(); 331 332 float *pDest = reinterpret_cast<float*>(pixelBox.data); 333 334 InstancedEntityVec::const_iterator itor = mInstancedEntities.begin(); 335 InstancedEntityVec::const_iterator end = mInstancedEntities.end(); 336 337 float* transforms; 338 339 //If using dual quaternion skinning, write the transforms to a temporary buffer, 340 //then convert to dual quaternions, then later write to the pixel buffer 341 //Otherwise simply write the transforms to the pixel buffer directly 342 if(mUseBoneDualQuaternions) 343 { 344 transforms = mTempTransformsArray3x4; 345 } 346 else 347 { 348 transforms = pDest; 349 } 350 351 352 while( itor != end ) 353 { 354 size_t floatsWritten = (*itor)->getTransforms3x4( transforms ); 355 356 if( mManager->getCameraRelativeRendering() ) 357 makeMatrixCameraRelative3x4( transforms, floatsWritten ); 358 359 if(mUseBoneDualQuaternions) 360 { 361 floatsWritten = convert3x4MatricesToDualQuaternions(transforms, floatsWritten / 12, pDest); 362 pDest += floatsWritten; 363 } 364 else 365 { 366 transforms += floatsWritten; 367 } 368 369 ++itor; 370 } 371 } 372 /** update the lookup numbers for entities with shared transforms */ updateSharedLookupIndexes()373 void BaseInstanceBatchVTF::updateSharedLookupIndexes() 374 { 375 if (mTransformSharingDirty) 376 { 377 if (useBoneMatrixLookup()) 378 { 379 //In each entity update the "transform lookup number" so that: 380 // 1. All entities sharing the same transformation will share the same unique number 381 // 2. "transform lookup number" will be numbered from 0 up to getMaxLookupTableInstances 382 uint16 lookupCounter = 0; 383 typedef std::map<Affine3*,uint16> MapTransformId; 384 MapTransformId transformToId; 385 InstancedEntityVec::const_iterator itEnt = mInstancedEntities.begin(), 386 itEntEnd = mInstancedEntities.end(); 387 for(;itEnt != itEntEnd ; ++itEnt) 388 { 389 if ((*itEnt)->isInScene()) 390 { 391 Affine3* transformUniqueId = (*itEnt)->mBoneMatrices; 392 MapTransformId::iterator itLu = transformToId.find(transformUniqueId); 393 if (itLu == transformToId.end()) 394 { 395 itLu = transformToId.insert(std::make_pair(transformUniqueId,lookupCounter)).first; 396 ++lookupCounter; 397 } 398 (*itEnt)->setTransformLookupNumber(itLu->second); 399 } 400 else 401 { 402 (*itEnt)->setTransformLookupNumber(0); 403 } 404 } 405 406 if (lookupCounter > getMaxLookupTableInstances()) 407 { 408 OGRE_EXCEPT(Exception::ERR_INVALID_STATE,"Number of unique bone matrix states exceeds current limitation.","BaseInstanceBatchVTF::updateSharedLookupIndexes()"); 409 } 410 } 411 412 mTransformSharingDirty = false; 413 } 414 } 415 416 //----------------------------------------------------------------------- generateInstancedEntity(size_t num)417 InstancedEntity* BaseInstanceBatchVTF::generateInstancedEntity(size_t num) 418 { 419 InstancedEntity* sharedTransformEntity = NULL; 420 if ((useBoneMatrixLookup()) && (num >= getMaxLookupTableInstances())) 421 { 422 sharedTransformEntity = mInstancedEntities[num % getMaxLookupTableInstances()]; 423 if (sharedTransformEntity->mSharedTransformEntity) 424 { 425 sharedTransformEntity = sharedTransformEntity->mSharedTransformEntity; 426 } 427 } 428 429 return OGRE_NEW InstancedEntity(this, static_cast<uint32>(num), sharedTransformEntity); 430 } 431 432 433 //----------------------------------------------------------------------- getWorldTransforms(Matrix4 * xform) const434 void BaseInstanceBatchVTF::getWorldTransforms( Matrix4* xform ) const 435 { 436 *xform = Matrix4::IDENTITY; 437 } 438 //----------------------------------------------------------------------- getNumWorldTransforms(void) const439 unsigned short BaseInstanceBatchVTF::getNumWorldTransforms(void) const 440 { 441 return 1; 442 } 443 //----------------------------------------------------------------------- _updateRenderQueue(RenderQueue * queue)444 void BaseInstanceBatchVTF::_updateRenderQueue(RenderQueue* queue) 445 { 446 InstanceBatch::_updateRenderQueue( queue ); 447 448 if( mBoundsUpdated || mDirtyAnimation || mManager->getCameraRelativeRendering() ) 449 updateVertexTexture(); 450 451 mBoundsUpdated = false; 452 } 453 //----------------------------------------------------------------------- 454 // InstanceBatchVTF 455 //----------------------------------------------------------------------- InstanceBatchVTF(InstanceManager * creator,MeshPtr & meshReference,const MaterialPtr & material,size_t instancesPerBatch,const Mesh::IndexMap * indexToBoneMap,const String & batchName)456 InstanceBatchVTF::InstanceBatchVTF( 457 InstanceManager *creator, MeshPtr &meshReference, 458 const MaterialPtr &material, size_t instancesPerBatch, 459 const Mesh::IndexMap *indexToBoneMap, const String &batchName ) 460 : BaseInstanceBatchVTF (creator, meshReference, material, 461 instancesPerBatch, indexToBoneMap, batchName) 462 { 463 464 } 465 //----------------------------------------------------------------------- ~InstanceBatchVTF()466 InstanceBatchVTF::~InstanceBatchVTF() 467 { 468 } 469 //----------------------------------------------------------------------- setupVertices(const SubMesh * baseSubMesh)470 void InstanceBatchVTF::setupVertices( const SubMesh* baseSubMesh ) 471 { 472 mRenderOperation.vertexData = OGRE_NEW VertexData(); 473 mRemoveOwnVertexData = true; //Raise flag to remove our own vertex data in the end (not always needed) 474 475 VertexData *thisVertexData = mRenderOperation.vertexData; 476 VertexData *baseVertexData = baseSubMesh->vertexData; 477 478 thisVertexData->vertexStart = 0; 479 thisVertexData->vertexCount = baseVertexData->vertexCount * mInstancesPerBatch; 480 481 HardwareBufferManager::getSingleton().destroyVertexDeclaration( thisVertexData->vertexDeclaration ); 482 thisVertexData->vertexDeclaration = baseVertexData->vertexDeclaration->clone(); 483 484 HWBoneIdxVec hwBoneIdx; 485 HWBoneWgtVec hwBoneWgt; 486 487 //Blend weights may not be present because HW_VTF does not require to be skeletally animated 488 const VertexElement *veWeights = baseVertexData->vertexDeclaration-> 489 findElementBySemantic( VES_BLEND_WEIGHTS ); 490 if( veWeights ) 491 { 492 //One weight is recommended for VTF 493 mWeightCount = (forceOneWeight() || useOneWeight()) ? 494 1 : veWeights->getSize() / sizeof(float); 495 } 496 else 497 { 498 mWeightCount = 1; 499 } 500 501 hwBoneIdx.resize( baseVertexData->vertexCount * mWeightCount, 0 ); 502 503 if( mMeshReference->hasSkeleton() && mMeshReference->getSkeleton() ) 504 { 505 if(mWeightCount > 1) 506 { 507 hwBoneWgt.resize( baseVertexData->vertexCount * mWeightCount, 0 ); 508 retrieveBoneIdxWithWeights(baseVertexData, hwBoneIdx, hwBoneWgt); 509 } 510 else 511 { 512 retrieveBoneIdx( baseVertexData, hwBoneIdx ); 513 thisVertexData->vertexDeclaration->removeElement( VES_BLEND_INDICES ); 514 thisVertexData->vertexDeclaration->removeElement( VES_BLEND_WEIGHTS ); 515 516 thisVertexData->vertexDeclaration->closeGapsInSource(); 517 } 518 519 } 520 521 for( unsigned short i=0; i<thisVertexData->vertexDeclaration->getMaxSource()+1; ++i ) 522 { 523 //Create our own vertex buffer 524 HardwareVertexBufferSharedPtr vertexBuffer = 525 HardwareBufferManager::getSingleton().createVertexBuffer( 526 thisVertexData->vertexDeclaration->getVertexSize(i), 527 thisVertexData->vertexCount, 528 HardwareBuffer::HBU_STATIC_WRITE_ONLY ); 529 thisVertexData->vertexBufferBinding->setBinding( i, vertexBuffer ); 530 531 //Grab the base submesh data 532 HardwareVertexBufferSharedPtr baseVertexBuffer = 533 baseVertexData->vertexBufferBinding->getBuffer(i); 534 535 HardwareBufferLockGuard thisLock(vertexBuffer, HardwareBuffer::HBL_DISCARD); 536 HardwareBufferLockGuard baseLock(baseVertexBuffer, HardwareBuffer::HBL_READ_ONLY); 537 char* thisBuf = static_cast<char*>(thisLock.pData); 538 char* baseBuf = static_cast<char*>(baseLock.pData); 539 540 //Copy and repeat 541 for( size_t j=0; j<mInstancesPerBatch; ++j ) 542 { 543 const size_t sizeOfBuffer = baseVertexData->vertexCount * 544 baseVertexData->vertexDeclaration->getVertexSize(i); 545 memcpy( thisBuf + j * sizeOfBuffer, baseBuf, sizeOfBuffer ); 546 } 547 } 548 549 createVertexTexture( baseSubMesh ); 550 createVertexSemantics( thisVertexData, baseVertexData, hwBoneIdx, hwBoneWgt); 551 } 552 //----------------------------------------------------------------------- setupIndices(const SubMesh * baseSubMesh)553 void InstanceBatchVTF::setupIndices( const SubMesh* baseSubMesh ) 554 { 555 mRenderOperation.indexData = OGRE_NEW IndexData(); 556 mRemoveOwnIndexData = true; //Raise flag to remove our own index data in the end (not always needed) 557 558 IndexData *thisIndexData = mRenderOperation.indexData; 559 IndexData *baseIndexData = baseSubMesh->indexData; 560 561 thisIndexData->indexStart = 0; 562 thisIndexData->indexCount = baseIndexData->indexCount * mInstancesPerBatch; 563 564 //TODO: Check numVertices is below max supported by GPU 565 HardwareIndexBuffer::IndexType indexType = HardwareIndexBuffer::IT_16BIT; 566 if( mRenderOperation.vertexData->vertexCount > 65535 ) 567 indexType = HardwareIndexBuffer::IT_32BIT; 568 thisIndexData->indexBuffer = HardwareBufferManager::getSingleton().createIndexBuffer( 569 indexType, thisIndexData->indexCount, HardwareBuffer::HBU_STATIC_WRITE_ONLY ); 570 571 HardwareBufferLockGuard thisLock(thisIndexData->indexBuffer, HardwareBuffer::HBL_DISCARD); 572 HardwareBufferLockGuard baseLock(baseIndexData->indexBuffer, HardwareBuffer::HBL_READ_ONLY); 573 uint16 *thisBuf16 = static_cast<uint16*>(thisLock.pData); 574 uint32 *thisBuf32 = static_cast<uint32*>(thisLock.pData); 575 bool baseIndex16bit = baseIndexData->indexBuffer->getType() == HardwareIndexBuffer::IT_16BIT; 576 577 for( size_t i=0; i<mInstancesPerBatch; ++i ) 578 { 579 const size_t vertexOffset = i * mRenderOperation.vertexData->vertexCount / mInstancesPerBatch; 580 581 const uint16 *initBuf16 = static_cast<const uint16 *>(baseLock.pData); 582 const uint32 *initBuf32 = static_cast<const uint32 *>(baseLock.pData); 583 584 for( size_t j=0; j<baseIndexData->indexCount; ++j ) 585 { 586 uint32 originalVal = baseIndex16bit ? *initBuf16++ : *initBuf32++; 587 588 if( indexType == HardwareIndexBuffer::IT_16BIT ) 589 *thisBuf16++ = static_cast<uint16>(originalVal + vertexOffset); 590 else 591 *thisBuf32++ = static_cast<uint32>(originalVal + vertexOffset); 592 } 593 } 594 } 595 //----------------------------------------------------------------------- createVertexSemantics(VertexData * thisVertexData,VertexData * baseVertexData,const HWBoneIdxVec & hwBoneIdx,const HWBoneWgtVec & hwBoneWgt)596 void InstanceBatchVTF::createVertexSemantics( 597 VertexData *thisVertexData, VertexData *baseVertexData, const HWBoneIdxVec &hwBoneIdx, const HWBoneWgtVec &hwBoneWgt) 598 { 599 const size_t texWidth = mMatrixTexture->getWidth(); 600 const size_t texHeight = mMatrixTexture->getHeight(); 601 602 //Calculate the texel offsets to correct them offline 603 //Akwardly enough, the offset is needed in OpenGL too 604 Vector2 texelOffsets; 605 //RenderSystem *renderSystem = Root::getSingleton().getRenderSystem(); 606 texelOffsets.x = /*renderSystem->getHorizontalTexelOffset()*/ -0.5f / (float)texWidth; 607 texelOffsets.y = /*renderSystem->getVerticalTexelOffset()*/ -0.5f / (float)texHeight; 608 609 //Only one weight per vertex is supported. It would not only be complex, but prohibitively slow. 610 //Put them in a new buffer, since it's 32 bytes aligned :-) 611 const unsigned short newSource = thisVertexData->vertexDeclaration->getMaxSource() + 1; 612 size_t maxFloatsPerVector = 4; 613 size_t offset = 0; 614 615 for(size_t i = 0; i < mWeightCount; i += maxFloatsPerVector / mRowLength) 616 { 617 offset += thisVertexData->vertexDeclaration->addElement( newSource, offset, VET_FLOAT4, VES_TEXTURE_COORDINATES, 618 thisVertexData->vertexDeclaration-> 619 getNextFreeTextureCoordinate() ).getSize(); 620 offset += thisVertexData->vertexDeclaration->addElement( newSource, offset, VET_FLOAT4, VES_TEXTURE_COORDINATES, 621 thisVertexData->vertexDeclaration-> 622 getNextFreeTextureCoordinate() ).getSize(); 623 } 624 625 //Add the weights (supports up to four, which is Ogre's limit) 626 if(mWeightCount > 1) 627 { 628 thisVertexData->vertexDeclaration->addElement(newSource, offset, VET_FLOAT4, VES_BLEND_WEIGHTS, 629 thisVertexData->vertexDeclaration->getNextFreeTextureCoordinate() ).getSize(); 630 } 631 632 //Create our own vertex buffer 633 HardwareVertexBufferSharedPtr vertexBuffer = 634 HardwareBufferManager::getSingleton().createVertexBuffer( 635 thisVertexData->vertexDeclaration->getVertexSize(newSource), 636 thisVertexData->vertexCount, 637 HardwareBuffer::HBU_STATIC_WRITE_ONLY ); 638 thisVertexData->vertexBufferBinding->setBinding( newSource, vertexBuffer ); 639 640 HardwareBufferLockGuard vertexLock(vertexBuffer, HardwareBuffer::HBL_DISCARD); 641 float *thisFloat = static_cast<float*>(vertexLock.pData); 642 643 //Copy and repeat 644 for( size_t i=0; i<mInstancesPerBatch; ++i ) 645 { 646 for( size_t j=0; j<baseVertexData->vertexCount * mWeightCount; j += mWeightCount ) 647 { 648 size_t numberOfMatricesInLine = 0; 649 650 for(size_t wgtIdx = 0; wgtIdx < mWeightCount; ++wgtIdx) 651 { 652 for( size_t k=0; k < mRowLength; ++k) 653 { 654 size_t instanceIdx = (hwBoneIdx[j+wgtIdx] + i * mMatricesPerInstance) * mRowLength + k; 655 //x 656 *thisFloat++ = ((instanceIdx % texWidth) / (float)texWidth) - (float)texelOffsets.x; 657 //y 658 *thisFloat++ = ((instanceIdx / texWidth) / (float)texHeight) - (float)texelOffsets.y; 659 } 660 661 ++numberOfMatricesInLine; 662 663 //If another matrix can't be fit, we're on another line, or if this is the last weight 664 if((numberOfMatricesInLine + 1) * mRowLength > maxFloatsPerVector || (wgtIdx+1) == mWeightCount) 665 { 666 //Place zeroes in the remaining coordinates 667 for ( size_t k=mRowLength * numberOfMatricesInLine; k < maxFloatsPerVector; ++k) 668 { 669 *thisFloat++ = 0.0f; 670 *thisFloat++ = 0.0f; 671 } 672 673 numberOfMatricesInLine = 0; 674 } 675 } 676 677 //Don't need to write weights if there is only one 678 if(mWeightCount > 1) 679 { 680 //Write the weights 681 for(size_t wgtIdx = 0; wgtIdx < mWeightCount; ++wgtIdx) 682 { 683 *thisFloat++ = hwBoneWgt[j+wgtIdx]; 684 } 685 686 //Fill the rest of the line with zeros 687 for(size_t wgtIdx = mWeightCount; wgtIdx < maxFloatsPerVector; ++wgtIdx) 688 { 689 *thisFloat++ = 0.0f; 690 } 691 } 692 } 693 } 694 } 695 //----------------------------------------------------------------------- calculateMaxNumInstances(const SubMesh * baseSubMesh,uint16 flags) const696 size_t InstanceBatchVTF::calculateMaxNumInstances( 697 const SubMesh *baseSubMesh, uint16 flags ) const 698 { 699 size_t retVal = 0; 700 701 RenderSystem *renderSystem = Root::getSingleton().getRenderSystem(); 702 const RenderSystemCapabilities *capabilities = renderSystem->getCapabilities(); 703 704 //VTF must be supported 705 if( capabilities->hasCapability( RSC_VERTEX_TEXTURE_FETCH ) ) 706 { 707 //TODO: Check PF_FLOAT32_RGBA is supported (should be, since it was the 1st one) 708 const size_t numBones = std::max<size_t>( 1, baseSubMesh->blendIndexToBoneIndexMap.size() ); 709 retVal = c_maxTexWidth * c_maxTexHeight / mRowLength / numBones; 710 711 if( flags & IM_USE16BIT ) 712 { 713 if( baseSubMesh->vertexData->vertexCount * retVal > 0xFFFF ) 714 retVal = 0xFFFF / baseSubMesh->vertexData->vertexCount; 715 } 716 717 if( flags & IM_VTFBESTFIT ) 718 { 719 const size_t instancesPerBatch = std::min( retVal, mInstancesPerBatch ); 720 //Do the same as in createVertexTexture() 721 const size_t numWorldMatrices = instancesPerBatch * numBones; 722 723 size_t texWidth = std::min<size_t>( numWorldMatrices * mRowLength, c_maxTexWidth ); 724 size_t texHeight = numWorldMatrices * mRowLength / c_maxTexWidth; 725 726 const size_t remainder = (numWorldMatrices * mRowLength) % c_maxTexWidth; 727 728 if( remainder && texHeight > 0 ) 729 retVal = static_cast<size_t>(texWidth * texHeight / (float)mRowLength / (float)(numBones)); 730 } 731 } 732 733 return retVal; 734 735 } 736 } 737