1 /*
2 ---------------------------------------------------------------------------
3 Open Asset Import Library (assimp)
4 ---------------------------------------------------------------------------
5
6 Copyright (c) 2006-2021, assimp team
7
8
9
10 All rights reserved.
11
12 Redistribution and use of this software in source and binary forms,
13 with or without modification, are permitted provided that the following
14 conditions are met:
15
16 * Redistributions of source code must retain the above
17 copyright notice, this list of conditions and the
18 following disclaimer.
19
20 * Redistributions in binary form must reproduce the above
21 copyright notice, this list of conditions and the
22 following disclaimer in the documentation and/or other
23 materials provided with the distribution.
24
25 * Neither the name of the assimp team, nor the names of its
26 contributors may be used to endorse or promote products
27 derived from this software without specific prior
28 written permission of the assimp team.
29
30 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
31 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
32 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
33 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
34 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
36 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
40 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 ---------------------------------------------------------------------------
42 */
43
44 /** @file Implementation of the post processing step to improve the cache locality of a mesh.
45 * <br>
46 * The algorithm is roughly basing on this paper:
47 * http://www.cs.princeton.edu/gfx/pubs/Sander_2007_%3ETR/tipsy.pdf
48 * .. although overdraw reduction isn't implemented yet ...
49 */
50
51 // internal headers
52 #include "PostProcessing/ImproveCacheLocality.h"
53 #include "Common/VertexTriangleAdjacency.h"
54
55 #include <assimp/StringUtils.h>
56 #include <assimp/postprocess.h>
57 #include <assimp/scene.h>
58 #include <assimp/DefaultLogger.hpp>
59 #include <stdio.h>
60 #include <stack>
61
62 using namespace Assimp;
63
64 // ------------------------------------------------------------------------------------------------
65 // Constructor to be privately used by Importer
ImproveCacheLocalityProcess()66 ImproveCacheLocalityProcess::ImproveCacheLocalityProcess()
67 : mConfigCacheDepth(PP_ICL_PTCACHE_SIZE) {
68 // empty
69 }
70
71 // ------------------------------------------------------------------------------------------------
72 // Destructor, private as well
~ImproveCacheLocalityProcess()73 ImproveCacheLocalityProcess::~ImproveCacheLocalityProcess() {
74 // nothing to do here
75 }
76
77 // ------------------------------------------------------------------------------------------------
78 // Returns whether the processing step is present in the given flag field.
IsActive(unsigned int pFlags) const79 bool ImproveCacheLocalityProcess::IsActive( unsigned int pFlags) const {
80 return (pFlags & aiProcess_ImproveCacheLocality) != 0;
81 }
82
83 // ------------------------------------------------------------------------------------------------
84 // Setup configuration
SetupProperties(const Importer * pImp)85 void ImproveCacheLocalityProcess::SetupProperties(const Importer* pImp) {
86 // AI_CONFIG_PP_ICL_PTCACHE_SIZE controls the target cache size for the optimizer
87 mConfigCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE,PP_ICL_PTCACHE_SIZE);
88 }
89
90 // ------------------------------------------------------------------------------------------------
91 // Executes the post processing step on the given imported data.
Execute(aiScene * pScene)92 void ImproveCacheLocalityProcess::Execute( aiScene* pScene) {
93 if (!pScene->mNumMeshes) {
94 ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess skipped; there are no meshes");
95 return;
96 }
97
98 ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess begin");
99
100 float out = 0.f;
101 unsigned int numf = 0, numm = 0;
102 for( unsigned int a = 0; a < pScene->mNumMeshes; ++a ){
103 const float res = ProcessMesh( pScene->mMeshes[a],a);
104 if (res) {
105 numf += pScene->mMeshes[a]->mNumFaces;
106 out += res;
107 ++numm;
108 }
109 }
110 if (!DefaultLogger::isNullLogger()) {
111 if (numf > 0) {
112 ASSIMP_LOG_INFO("Cache relevant are ", numm, " meshes (", numf, " faces). Average output ACMR is ", out / numf);
113 }
114 ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess finished. ");
115 }
116 }
117
118 // ------------------------------------------------------------------------------------------------
119 // Improves the cache coherency of a specific mesh
ProcessMesh(aiMesh * pMesh,unsigned int meshNum)120 ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshNum) {
121 // TODO: rewrite this to use std::vector or boost::shared_array
122 ai_assert(nullptr != pMesh);
123
124 // Check whether the input data is valid
125 // - there must be vertices and faces
126 // - all faces must be triangulated or we can't operate on them
127 if (!pMesh->HasFaces() || !pMesh->HasPositions())
128 return static_cast<ai_real>(0.f);
129
130 if (pMesh->mPrimitiveTypes != aiPrimitiveType_TRIANGLE) {
131 ASSIMP_LOG_ERROR("This algorithm works on triangle meshes only");
132 return static_cast<ai_real>(0.f);
133 }
134
135 if(pMesh->mNumVertices <= mConfigCacheDepth) {
136 return static_cast<ai_real>(0.f);
137 }
138
139 ai_real fACMR = 3.f;
140 const aiFace* const pcEnd = pMesh->mFaces+pMesh->mNumFaces;
141
142 // Input ACMR is for logging purposes only
143 if (!DefaultLogger::isNullLogger()) {
144
145 unsigned int* piFIFOStack = new unsigned int[mConfigCacheDepth];
146 memset(piFIFOStack,0xff,mConfigCacheDepth*sizeof(unsigned int));
147 unsigned int* piCur = piFIFOStack;
148 const unsigned int* const piCurEnd = piFIFOStack + mConfigCacheDepth;
149
150 // count the number of cache misses
151 unsigned int iCacheMisses = 0;
152 for (const aiFace* pcFace = pMesh->mFaces;pcFace != pcEnd;++pcFace) {
153 for (unsigned int qq = 0; qq < 3;++qq) {
154 bool bInCache = false;
155 for (unsigned int* pp = piFIFOStack;pp < piCurEnd;++pp) {
156 if (*pp == pcFace->mIndices[qq]) {
157 // the vertex is in cache
158 bInCache = true;
159 break;
160 }
161 }
162 if (!bInCache) {
163 ++iCacheMisses;
164 if (piCurEnd == piCur) {
165 piCur = piFIFOStack;
166 }
167 *piCur++ = pcFace->mIndices[qq];
168 }
169 }
170 }
171 delete[] piFIFOStack;
172 fACMR = (ai_real) iCacheMisses / pMesh->mNumFaces;
173 if (3.0 == fACMR) {
174 char szBuff[128]; // should be sufficiently large in every case
175
176 // the JoinIdenticalVertices process has not been executed on this
177 // mesh, otherwise this value would normally be at least minimally
178 // smaller than 3.0 ...
179 ai_snprintf(szBuff,128,"Mesh %u: Not suitable for vcache optimization",meshNum);
180 ASSIMP_LOG_WARN(szBuff);
181 return static_cast<ai_real>(0.f);
182 }
183 }
184
185 // first we need to build a vertex-triangle adjacency list
186 VertexTriangleAdjacency adj(pMesh->mFaces,pMesh->mNumFaces, pMesh->mNumVertices,true);
187
188 // build a list to store per-vertex caching time stamps
189 unsigned int* const piCachingStamps = new unsigned int[pMesh->mNumVertices];
190 memset(piCachingStamps,0x0,pMesh->mNumVertices*sizeof(unsigned int));
191
192 // allocate an empty output index buffer. We store the output indices in one large array.
193 // Since the number of triangles won't change the input faces can be reused. This is how
194 // we save thousands of redundant mini allocations for aiFace::mIndices
195 const unsigned int iIdxCnt = pMesh->mNumFaces*3;
196 unsigned int* const piIBOutput = new unsigned int[iIdxCnt];
197 unsigned int* piCSIter = piIBOutput;
198
199 // allocate the flag array to hold the information
200 // whether a face has already been emitted or not
201 std::vector<bool> abEmitted(pMesh->mNumFaces,false);
202
203 // dead-end vertex index stack
204 std::stack<unsigned int, std::vector<unsigned int> > sDeadEndVStack;
205
206 // create a copy of the piNumTriPtr buffer
207 unsigned int* const piNumTriPtr = adj.mLiveTriangles;
208 const std::vector<unsigned int> piNumTriPtrNoModify(piNumTriPtr, piNumTriPtr + pMesh->mNumVertices);
209
210 // get the largest number of referenced triangles and allocate the "candidate buffer"
211 unsigned int iMaxRefTris = 0; {
212 const unsigned int* piCur = adj.mLiveTriangles;
213 const unsigned int* const piCurEnd = adj.mLiveTriangles+pMesh->mNumVertices;
214 for (;piCur != piCurEnd;++piCur) {
215 iMaxRefTris = std::max(iMaxRefTris,*piCur);
216 }
217 }
218 ai_assert(iMaxRefTris > 0);
219 unsigned int* piCandidates = new unsigned int[iMaxRefTris*3];
220 unsigned int iCacheMisses = 0;
221
222 // ...................................................................................
223 /** PSEUDOCODE for the algorithm
224
225 A = Build-Adjacency(I) Vertex-triangle adjacency
226 L = Get-Triangle-Counts(A) Per-vertex live triangle counts
227 C = Zero(Vertex-Count(I)) Per-vertex caching time stamps
228 D = Empty-Stack() Dead-end vertex stack
229 E = False(Triangle-Count(I)) Per triangle emitted flag
230 O = Empty-Index-Buffer() Empty output buffer
231 f = 0 Arbitrary starting vertex
232 s = k+1, i = 1 Time stamp and cursor
233 while f >= 0 For all valid fanning vertices
234 N = Empty-Set() 1-ring of next candidates
235 for each Triangle t in Neighbors(A, f)
236 if !Emitted(E,t)
237 for each Vertex v in t
238 Append(O,v) Output vertex
239 Push(D,v) Add to dead-end stack
240 Insert(N,v) Register as candidate
241 L[v] = L[v]-1 Decrease live triangle count
242 if s-C[v] > k If not in cache
243 C[v] = s Set time stamp
244 s = s+1 Increment time stamp
245 E[t] = true Flag triangle as emitted
246 Select next fanning vertex
247 f = Get-Next-Vertex(I,i,k,N,C,s,L,D)
248 return O
249 */
250 // ...................................................................................
251
252 int ivdx = 0;
253 int ics = 1;
254 int iStampCnt = mConfigCacheDepth+1;
255 while (ivdx >= 0) {
256
257 unsigned int icnt = piNumTriPtrNoModify[ivdx];
258 unsigned int* piList = adj.GetAdjacentTriangles(ivdx);
259 unsigned int* piCurCandidate = piCandidates;
260
261 // get all triangles in the neighborhood
262 for (unsigned int tri = 0; tri < icnt;++tri) {
263
264 // if they have not yet been emitted, add them to the output IB
265 const unsigned int fidx = *piList++;
266 if (!abEmitted[fidx]) {
267
268 // so iterate through all vertices of the current triangle
269 const aiFace* pcFace = &pMesh->mFaces[ fidx ];
270 unsigned nind = pcFace->mNumIndices;
271 for (unsigned ind = 0; ind < nind; ind++) {
272 unsigned dp = pcFace->mIndices[ind];
273
274 // the current vertex won't have any free triangles after this step
275 if (ivdx != (int)dp) {
276 // append the vertex to the dead-end stack
277 sDeadEndVStack.push(dp);
278
279 // register as candidate for the next step
280 *piCurCandidate++ = dp;
281
282 // decrease the per-vertex triangle counts
283 piNumTriPtr[dp]--;
284 }
285
286 // append the vertex to the output index buffer
287 *piCSIter++ = dp;
288
289 // if the vertex is not yet in cache, set its cache count
290 if (iStampCnt-piCachingStamps[dp] > mConfigCacheDepth) {
291 piCachingStamps[dp] = iStampCnt++;
292 ++iCacheMisses;
293 }
294 }
295 // flag triangle as emitted
296 abEmitted[fidx] = true;
297 }
298 }
299
300 // the vertex has now no living adjacent triangles anymore
301 piNumTriPtr[ivdx] = 0;
302
303 // get next fanning vertex
304 ivdx = -1;
305 int max_priority = -1;
306 for (unsigned int* piCur = piCandidates;piCur != piCurCandidate;++piCur) {
307 const unsigned int dp = *piCur;
308
309 // must have live triangles
310 if (piNumTriPtr[dp] > 0) {
311 int priority = 0;
312
313 // will the vertex be in cache, even after fanning occurs?
314 unsigned int tmp;
315 if ((tmp = iStampCnt-piCachingStamps[dp]) + 2*piNumTriPtr[dp] <= mConfigCacheDepth) {
316 priority = tmp;
317 }
318
319 // keep best candidate
320 if (priority > max_priority) {
321 max_priority = priority;
322 ivdx = dp;
323 }
324 }
325 }
326 // did we reach a dead end?
327 if (-1 == ivdx) {
328 // need to get a non-local vertex for which we have a good chance that it is still
329 // in the cache ...
330 while (!sDeadEndVStack.empty()) {
331 unsigned int iCachedIdx = sDeadEndVStack.top();
332 sDeadEndVStack.pop();
333 if (piNumTriPtr[ iCachedIdx ] > 0) {
334 ivdx = iCachedIdx;
335 break;
336 }
337 }
338
339 if (-1 == ivdx) {
340 // well, there isn't such a vertex. Simply get the next vertex in input order and
341 // hope it is not too bad ...
342 while (ics < (int)pMesh->mNumVertices) {
343 ++ics;
344 if (piNumTriPtr[ics] > 0) {
345 ivdx = ics;
346 break;
347 }
348 }
349 }
350 }
351 }
352 ai_real fACMR2 = 0.0f;
353 if (!DefaultLogger::isNullLogger()) {
354 fACMR2 = (float)iCacheMisses / pMesh->mNumFaces;
355
356 // very intense verbose logging ... prepare for much text if there are many meshes
357 if ( DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) {
358 ASSIMP_LOG_VERBOSE_DEBUG("Mesh %u | ACMR in: ", meshNum, " out: ", fACMR, " | ~", fACMR2, ((fACMR - fACMR2) / fACMR) * 100.f);
359 }
360
361 fACMR2 *= pMesh->mNumFaces;
362 }
363 // sort the output index buffer back to the input array
364 piCSIter = piIBOutput;
365 for (aiFace* pcFace = pMesh->mFaces; pcFace != pcEnd;++pcFace) {
366 unsigned nind = pcFace->mNumIndices;
367 unsigned * ind = pcFace->mIndices;
368 if (nind > 0) ind[0] = *piCSIter++;
369 if (nind > 1) ind[1] = *piCSIter++;
370 if (nind > 2) ind[2] = *piCSIter++;
371 }
372
373 // delete temporary storage
374 delete[] piCachingStamps;
375 delete[] piIBOutput;
376 delete[] piCandidates;
377
378 return fACMR2;
379 }
380