1 /*
2 ---------------------------------------------------------------------------
3 Open Asset Import Library (assimp)
4 ---------------------------------------------------------------------------
5
6 Copyright (c) 2006-2016, assimp team
7
8 All rights reserved.
9
10 Redistribution and use of this software in source and binary forms,
11 with or without modification, are permitted provided that the following
12 conditions are met:
13
14 * Redistributions of source code must retain the above
15 copyright notice, this list of conditions and the
16 following disclaimer.
17
18 * Redistributions in binary form must reproduce the above
19 copyright notice, this list of conditions and the
20 following disclaimer in the documentation and/or other
21 materials provided with the distribution.
22
23 * Neither the name of the assimp team, nor the names of its
24 contributors may be used to endorse or promote products
25 derived from this software without specific prior
26 written permission of the assimp team.
27
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 ---------------------------------------------------------------------------
40 */
41
42 /** @file Implementation of the post processing step to improve the cache locality of a mesh.
43 * <br>
44 * The algorithm is roughly basing on this paper:
45 * http://www.cs.princeton.edu/gfx/pubs/Sander_2007_%3ETR/tipsy.pdf
46 * .. although overdraw rduction isn't implemented yet ...
47 */
48
49
50
51 // internal headers
52 #include "ImproveCacheLocality.h"
53 #include "VertexTriangleAdjacency.h"
54 #include "StringUtils.h"
55 #include <assimp/postprocess.h>
56 #include <assimp/scene.h>
57 #include <assimp/DefaultLogger.hpp>
58 #include <stdio.h>
59 #include <stack>
60
61 using namespace Assimp;
62
63 // ------------------------------------------------------------------------------------------------
64 // Constructor to be privately used by Importer
ImproveCacheLocalityProcess()65 ImproveCacheLocalityProcess::ImproveCacheLocalityProcess() {
66 configCacheDepth = PP_ICL_PTCACHE_SIZE;
67 }
68
69 // ------------------------------------------------------------------------------------------------
70 // Destructor, private as well
~ImproveCacheLocalityProcess()71 ImproveCacheLocalityProcess::~ImproveCacheLocalityProcess()
72 {
73 // nothing to do here
74 }
75
76 // ------------------------------------------------------------------------------------------------
77 // Returns whether the processing step is present in the given flag field.
IsActive(unsigned int pFlags) const78 bool ImproveCacheLocalityProcess::IsActive( unsigned int pFlags) const
79 {
80 return (pFlags & aiProcess_ImproveCacheLocality) != 0;
81 }
82
83 // ------------------------------------------------------------------------------------------------
84 // Setup configuration
SetupProperties(const Importer * pImp)85 void ImproveCacheLocalityProcess::SetupProperties(const Importer* pImp)
86 {
87 // AI_CONFIG_PP_ICL_PTCACHE_SIZE controls the target cache size for the optimizer
88 configCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE,PP_ICL_PTCACHE_SIZE);
89 }
90
91 // ------------------------------------------------------------------------------------------------
92 // Executes the post processing step on the given imported data.
Execute(aiScene * pScene)93 void ImproveCacheLocalityProcess::Execute( aiScene* pScene)
94 {
95 if (!pScene->mNumMeshes) {
96 DefaultLogger::get()->debug("ImproveCacheLocalityProcess skipped; there are no meshes");
97 return;
98 }
99
100 DefaultLogger::get()->debug("ImproveCacheLocalityProcess begin");
101
102 float out = 0.f;
103 unsigned int numf = 0, numm = 0;
104 for( unsigned int a = 0; a < pScene->mNumMeshes; a++){
105 const float res = ProcessMesh( pScene->mMeshes[a],a);
106 if (res) {
107 numf += pScene->mMeshes[a]->mNumFaces;
108 out += res;
109 ++numm;
110 }
111 }
112 if (!DefaultLogger::isNullLogger()) {
113 char szBuff[128]; // should be sufficiently large in every case
114 ai_snprintf(szBuff,128,"Cache relevant are %u meshes (%u faces). Average output ACMR is %f",
115 numm,numf,out/numf);
116
117 DefaultLogger::get()->info(szBuff);
118 DefaultLogger::get()->debug("ImproveCacheLocalityProcess finished. ");
119 }
120 }
121
122 // ------------------------------------------------------------------------------------------------
123 // Improves the cache coherency of a specific mesh
ProcessMesh(aiMesh * pMesh,unsigned int meshNum)124 float ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshNum)
125 {
126 // TODO: rewrite this to use std::vector or boost::shared_array
127 ai_assert(NULL != pMesh);
128
129 // Check whether the input data is valid
130 // - there must be vertices and faces
131 // - all faces must be triangulated or we can't operate on them
132 if (!pMesh->HasFaces() || !pMesh->HasPositions())
133 return 0.f;
134
135 if (pMesh->mPrimitiveTypes != aiPrimitiveType_TRIANGLE) {
136 DefaultLogger::get()->error("This algorithm works on triangle meshes only");
137 return 0.f;
138 }
139
140 if(pMesh->mNumVertices <= configCacheDepth) {
141 return 0.f;
142 }
143
144 float fACMR = 3.f;
145 const aiFace* const pcEnd = pMesh->mFaces+pMesh->mNumFaces;
146
147 // Input ACMR is for logging purposes only
148 if (!DefaultLogger::isNullLogger()) {
149
150 unsigned int* piFIFOStack = new unsigned int[configCacheDepth];
151 memset(piFIFOStack,0xff,configCacheDepth*sizeof(unsigned int));
152 unsigned int* piCur = piFIFOStack;
153 const unsigned int* const piCurEnd = piFIFOStack + configCacheDepth;
154
155 // count the number of cache misses
156 unsigned int iCacheMisses = 0;
157 for (const aiFace* pcFace = pMesh->mFaces;pcFace != pcEnd;++pcFace) {
158
159 for (unsigned int qq = 0; qq < 3;++qq) {
160 bool bInCache = false;
161
162 for (unsigned int* pp = piFIFOStack;pp < piCurEnd;++pp) {
163 if (*pp == pcFace->mIndices[qq]) {
164 // the vertex is in cache
165 bInCache = true;
166 break;
167 }
168 }
169 if (!bInCache) {
170 ++iCacheMisses;
171 if (piCurEnd == piCur) {
172 piCur = piFIFOStack;
173 }
174 *piCur++ = pcFace->mIndices[qq];
175 }
176 }
177 }
178 delete[] piFIFOStack;
179 fACMR = (float)iCacheMisses / pMesh->mNumFaces;
180 if (3.0 == fACMR) {
181 char szBuff[128]; // should be sufficiently large in every case
182
183 // the JoinIdenticalVertices process has not been executed on this
184 // mesh, otherwise this value would normally be at least minimally
185 // smaller than 3.0 ...
186 ai_snprintf(szBuff,128,"Mesh %u: Not suitable for vcache optimization",meshNum);
187 DefaultLogger::get()->warn(szBuff);
188 return 0.f;
189 }
190 }
191
192 // first we need to build a vertex-triangle adjacency list
193 VertexTriangleAdjacency adj(pMesh->mFaces,pMesh->mNumFaces, pMesh->mNumVertices,true);
194
195 // build a list to store per-vertex caching time stamps
196 unsigned int* const piCachingStamps = new unsigned int[pMesh->mNumVertices];
197 memset(piCachingStamps,0x0,pMesh->mNumVertices*sizeof(unsigned int));
198
199 // allocate an empty output index buffer. We store the output indices in one large array.
200 // Since the number of triangles won't change the input faces can be reused. This is how
201 // we save thousands of redundant mini allocations for aiFace::mIndices
202 const unsigned int iIdxCnt = pMesh->mNumFaces*3;
203 unsigned int* const piIBOutput = new unsigned int[iIdxCnt];
204 unsigned int* piCSIter = piIBOutput;
205
206 // allocate the flag array to hold the information
207 // whether a face has already been emitted or not
208 std::vector<bool> abEmitted(pMesh->mNumFaces,false);
209
210 // dead-end vertex index stack
211 std::stack<unsigned int, std::vector<unsigned int> > sDeadEndVStack;
212
213 // create a copy of the piNumTriPtr buffer
214 unsigned int* const piNumTriPtr = adj.mLiveTriangles;
215 const std::vector<unsigned int> piNumTriPtrNoModify(piNumTriPtr, piNumTriPtr + pMesh->mNumVertices);
216
217 // get the largest number of referenced triangles and allocate the "candidate buffer"
218 unsigned int iMaxRefTris = 0; {
219 const unsigned int* piCur = adj.mLiveTriangles;
220 const unsigned int* const piCurEnd = adj.mLiveTriangles+pMesh->mNumVertices;
221 for (;piCur != piCurEnd;++piCur) {
222 iMaxRefTris = std::max(iMaxRefTris,*piCur);
223 }
224 }
225 unsigned int* piCandidates = new unsigned int[iMaxRefTris*3];
226 unsigned int iCacheMisses = 0;
227
228 // ...................................................................................
229 /** PSEUDOCODE for the algorithm
230
231 A = Build-Adjacency(I) Vertex-triangle adjacency
232 L = Get-Triangle-Counts(A) Per-vertex live triangle counts
233 C = Zero(Vertex-Count(I)) Per-vertex caching time stamps
234 D = Empty-Stack() Dead-end vertex stack
235 E = False(Triangle-Count(I)) Per triangle emitted flag
236 O = Empty-Index-Buffer() Empty output buffer
237 f = 0 Arbitrary starting vertex
238 s = k+1, i = 1 Time stamp and cursor
239 while f >= 0 For all valid fanning vertices
240 N = Empty-Set() 1-ring of next candidates
241 for each Triangle t in Neighbors(A, f)
242 if !Emitted(E,t)
243 for each Vertex v in t
244 Append(O,v) Output vertex
245 Push(D,v) Add to dead-end stack
246 Insert(N,v) Register as candidate
247 L[v] = L[v]-1 Decrease live triangle count
248 if s-C[v] > k If not in cache
249 C[v] = s Set time stamp
250 s = s+1 Increment time stamp
251 E[t] = true Flag triangle as emitted
252 Select next fanning vertex
253 f = Get-Next-Vertex(I,i,k,N,C,s,L,D)
254 return O
255 */
256 // ...................................................................................
257
258 int ivdx = 0;
259 int ics = 1;
260 int iStampCnt = configCacheDepth+1;
261 while (ivdx >= 0) {
262
263 unsigned int icnt = piNumTriPtrNoModify[ivdx];
264 unsigned int* piList = adj.GetAdjacentTriangles(ivdx);
265 unsigned int* piCurCandidate = piCandidates;
266
267 // get all triangles in the neighborhood
268 for (unsigned int tri = 0; tri < icnt;++tri) {
269
270 // if they have not yet been emitted, add them to the output IB
271 const unsigned int fidx = *piList++;
272 if (!abEmitted[fidx]) {
273
274 // so iterate through all vertices of the current triangle
275 const aiFace* pcFace = &pMesh->mFaces[ fidx ];
276 for (unsigned int* p = pcFace->mIndices, *p2 = pcFace->mIndices+3;p != p2;++p) {
277 const unsigned int dp = *p;
278
279 // the current vertex won't have any free triangles after this step
280 if (ivdx != (int)dp) {
281 // append the vertex to the dead-end stack
282 sDeadEndVStack.push(dp);
283
284 // register as candidate for the next step
285 *piCurCandidate++ = dp;
286
287 // decrease the per-vertex triangle counts
288 piNumTriPtr[dp]--;
289 }
290
291 // append the vertex to the output index buffer
292 *piCSIter++ = dp;
293
294 // if the vertex is not yet in cache, set its cache count
295 if (iStampCnt-piCachingStamps[dp] > configCacheDepth) {
296 piCachingStamps[dp] = iStampCnt++;
297 ++iCacheMisses;
298 }
299 }
300 // flag triangle as emitted
301 abEmitted[fidx] = true;
302 }
303 }
304
305 // the vertex has now no living adjacent triangles anymore
306 piNumTriPtr[ivdx] = 0;
307
308 // get next fanning vertex
309 ivdx = -1;
310 int max_priority = -1;
311 for (unsigned int* piCur = piCandidates;piCur != piCurCandidate;++piCur) {
312 const unsigned int dp = *piCur;
313
314 // must have live triangles
315 if (piNumTriPtr[dp] > 0) {
316 int priority = 0;
317
318 // will the vertex be in cache, even after fanning occurs?
319 unsigned int tmp;
320 if ((tmp = iStampCnt-piCachingStamps[dp]) + 2*piNumTriPtr[dp] <= configCacheDepth) {
321 priority = tmp;
322 }
323
324 // keep best candidate
325 if (priority > max_priority) {
326 max_priority = priority;
327 ivdx = dp;
328 }
329 }
330 }
331 // did we reach a dead end?
332 if (-1 == ivdx) {
333 // need to get a non-local vertex for which we have a good chance that it is still
334 // in the cache ...
335 while (!sDeadEndVStack.empty()) {
336 unsigned int iCachedIdx = sDeadEndVStack.top();
337 sDeadEndVStack.pop();
338 if (piNumTriPtr[ iCachedIdx ] > 0) {
339 ivdx = iCachedIdx;
340 break;
341 }
342 }
343
344 if (-1 == ivdx) {
345 // well, there isn't such a vertex. Simply get the next vertex in input order and
346 // hope it is not too bad ...
347 while (ics < (int)pMesh->mNumVertices) {
348 ++ics;
349 if (piNumTriPtr[ics] > 0) {
350 ivdx = ics;
351 break;
352 }
353 }
354 }
355 }
356 }
357 float fACMR2 = 0.0f;
358 if (!DefaultLogger::isNullLogger()) {
359 fACMR2 = (float)iCacheMisses / pMesh->mNumFaces;
360
361 // very intense verbose logging ... prepare for much text if there are many meshes
362 if ( DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) {
363 char szBuff[128]; // should be sufficiently large in every case
364
365 ai_snprintf(szBuff,128,"Mesh %u | ACMR in: %f out: %f | ~%.1f%%",meshNum,fACMR,fACMR2,
366 ((fACMR - fACMR2) / fACMR) * 100.f);
367 DefaultLogger::get()->debug(szBuff);
368 }
369
370 fACMR2 *= pMesh->mNumFaces;
371 }
372 // sort the output index buffer back to the input array
373 piCSIter = piIBOutput;
374 for (aiFace* pcFace = pMesh->mFaces; pcFace != pcEnd;++pcFace) {
375 pcFace->mIndices[0] = *piCSIter++;
376 pcFace->mIndices[1] = *piCSIter++;
377 pcFace->mIndices[2] = *piCSIter++;
378 }
379
380 // delete temporary storage
381 delete[] piCachingStamps;
382 delete[] piIBOutput;
383 delete[] piCandidates;
384
385 return fACMR2;
386 }
387