1 /*
2 ---------------------------------------------------------------------------
3 Open Asset Import Library (assimp)
4 ---------------------------------------------------------------------------
5 
6 Copyright (c) 2006-2021, assimp team
7 
8 
9 
10 All rights reserved.
11 
12 Redistribution and use of this software in source and binary forms,
13 with or without modification, are permitted provided that the following
14 conditions are met:
15 
16 * Redistributions of source code must retain the above
17   copyright notice, this list of conditions and the
18   following disclaimer.
19 
20 * Redistributions in binary form must reproduce the above
21   copyright notice, this list of conditions and the
22   following disclaimer in the documentation and/or other
23   materials provided with the distribution.
24 
25 * Neither the name of the assimp team, nor the names of its
26   contributors may be used to endorse or promote products
27   derived from this software without specific prior
28   written permission of the assimp team.
29 
30 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
31 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
32 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
33 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
34 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
36 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
40 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 ---------------------------------------------------------------------------
42 */
43 
44 /** @file Implementation of the post processing step to improve the cache locality of a mesh.
45  * <br>
46  * The algorithm is roughly basing on this paper:
47  * http://www.cs.princeton.edu/gfx/pubs/Sander_2007_%3ETR/tipsy.pdf
48  *   .. although overdraw reduction isn't implemented yet ...
49  */
50 
51 // internal headers
52 #include "PostProcessing/ImproveCacheLocality.h"
53 #include "Common/VertexTriangleAdjacency.h"
54 
55 #include <assimp/StringUtils.h>
56 #include <assimp/postprocess.h>
57 #include <assimp/scene.h>
58 #include <assimp/DefaultLogger.hpp>
59 #include <stdio.h>
60 #include <stack>
61 
62 using namespace Assimp;
63 
64 // ------------------------------------------------------------------------------------------------
65 // Constructor to be privately used by Importer
ImproveCacheLocalityProcess()66 ImproveCacheLocalityProcess::ImproveCacheLocalityProcess()
67 : mConfigCacheDepth(PP_ICL_PTCACHE_SIZE) {
68     // empty
69 }
70 
71 // ------------------------------------------------------------------------------------------------
72 // Destructor, private as well
~ImproveCacheLocalityProcess()73 ImproveCacheLocalityProcess::~ImproveCacheLocalityProcess() {
74     // nothing to do here
75 }
76 
77 // ------------------------------------------------------------------------------------------------
78 // Returns whether the processing step is present in the given flag field.
IsActive(unsigned int pFlags) const79 bool ImproveCacheLocalityProcess::IsActive( unsigned int pFlags) const {
80     return (pFlags & aiProcess_ImproveCacheLocality) != 0;
81 }
82 
83 // ------------------------------------------------------------------------------------------------
84 // Setup configuration
SetupProperties(const Importer * pImp)85 void ImproveCacheLocalityProcess::SetupProperties(const Importer* pImp) {
86     // AI_CONFIG_PP_ICL_PTCACHE_SIZE controls the target cache size for the optimizer
87     mConfigCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE,PP_ICL_PTCACHE_SIZE);
88 }
89 
90 // ------------------------------------------------------------------------------------------------
91 // Executes the post processing step on the given imported data.
Execute(aiScene * pScene)92 void ImproveCacheLocalityProcess::Execute( aiScene* pScene) {
93     if (!pScene->mNumMeshes) {
94         ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess skipped; there are no meshes");
95         return;
96     }
97 
98     ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess begin");
99 
100     float out = 0.f;
101     unsigned int numf = 0, numm = 0;
102     for( unsigned int a = 0; a < pScene->mNumMeshes; ++a ){
103         const float res = ProcessMesh( pScene->mMeshes[a],a);
104         if (res) {
105             numf += pScene->mMeshes[a]->mNumFaces;
106             out  += res;
107             ++numm;
108         }
109     }
110     if (!DefaultLogger::isNullLogger()) {
111         if (numf > 0) {
112             ASSIMP_LOG_INFO("Cache relevant are ", numm, " meshes (", numf, " faces). Average output ACMR is ", out / numf);
113         }
114         ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess finished. ");
115     }
116 }
117 
118 // ------------------------------------------------------------------------------------------------
119 // Improves the cache coherency of a specific mesh
ProcessMesh(aiMesh * pMesh,unsigned int meshNum)120 ai_real ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshNum) {
121     // TODO: rewrite this to use std::vector or boost::shared_array
122     ai_assert(nullptr != pMesh);
123 
124     // Check whether the input data is valid
125     // - there must be vertices and faces
126     // - all faces must be triangulated or we can't operate on them
127     if (!pMesh->HasFaces() || !pMesh->HasPositions())
128         return static_cast<ai_real>(0.f);
129 
130     if (pMesh->mPrimitiveTypes != aiPrimitiveType_TRIANGLE) {
131         ASSIMP_LOG_ERROR("This algorithm works on triangle meshes only");
132         return static_cast<ai_real>(0.f);
133     }
134 
135     if(pMesh->mNumVertices <= mConfigCacheDepth) {
136         return static_cast<ai_real>(0.f);
137     }
138 
139     ai_real fACMR = 3.f;
140     const aiFace* const pcEnd = pMesh->mFaces+pMesh->mNumFaces;
141 
142     // Input ACMR is for logging purposes only
143     if (!DefaultLogger::isNullLogger())     {
144 
145         unsigned int* piFIFOStack = new unsigned int[mConfigCacheDepth];
146         memset(piFIFOStack,0xff,mConfigCacheDepth*sizeof(unsigned int));
147         unsigned int* piCur = piFIFOStack;
148         const unsigned int* const piCurEnd = piFIFOStack + mConfigCacheDepth;
149 
150         // count the number of cache misses
151         unsigned int iCacheMisses = 0;
152         for (const aiFace* pcFace = pMesh->mFaces;pcFace != pcEnd;++pcFace) {
153             for (unsigned int qq = 0; qq < 3;++qq) {
154                 bool bInCache = false;
155                 for (unsigned int* pp = piFIFOStack;pp < piCurEnd;++pp) {
156                     if (*pp == pcFace->mIndices[qq])    {
157                         // the vertex is in cache
158                         bInCache = true;
159                         break;
160                     }
161                 }
162                 if (!bInCache)  {
163                     ++iCacheMisses;
164                     if (piCurEnd == piCur) {
165                         piCur = piFIFOStack;
166                     }
167                     *piCur++ = pcFace->mIndices[qq];
168                 }
169             }
170         }
171         delete[] piFIFOStack;
172         fACMR = (ai_real) iCacheMisses / pMesh->mNumFaces;
173         if (3.0 == fACMR)   {
174             char szBuff[128]; // should be sufficiently large in every case
175 
176             // the JoinIdenticalVertices process has not been executed on this
177             // mesh, otherwise this value would normally be at least minimally
178             // smaller than 3.0 ...
179             ai_snprintf(szBuff,128,"Mesh %u: Not suitable for vcache optimization",meshNum);
180             ASSIMP_LOG_WARN(szBuff);
181             return static_cast<ai_real>(0.f);
182         }
183     }
184 
185     // first we need to build a vertex-triangle adjacency list
186     VertexTriangleAdjacency adj(pMesh->mFaces,pMesh->mNumFaces, pMesh->mNumVertices,true);
187 
188     // build a list to store per-vertex caching time stamps
189     unsigned int* const piCachingStamps = new unsigned int[pMesh->mNumVertices];
190     memset(piCachingStamps,0x0,pMesh->mNumVertices*sizeof(unsigned int));
191 
192     // allocate an empty output index buffer. We store the output indices in one large array.
193     // Since the number of triangles won't change the input faces can be reused. This is how
194     // we save thousands of redundant mini allocations for aiFace::mIndices
195     const unsigned int iIdxCnt = pMesh->mNumFaces*3;
196     unsigned int* const piIBOutput = new unsigned int[iIdxCnt];
197     unsigned int* piCSIter = piIBOutput;
198 
199     // allocate the flag array to hold the information
200     // whether a face has already been emitted or not
201     std::vector<bool> abEmitted(pMesh->mNumFaces,false);
202 
203     // dead-end vertex index stack
204     std::stack<unsigned int, std::vector<unsigned int> > sDeadEndVStack;
205 
206     // create a copy of the piNumTriPtr buffer
207     unsigned int* const piNumTriPtr = adj.mLiveTriangles;
208     const std::vector<unsigned int> piNumTriPtrNoModify(piNumTriPtr, piNumTriPtr + pMesh->mNumVertices);
209 
210     // get the largest number of referenced triangles and allocate the "candidate buffer"
211     unsigned int iMaxRefTris = 0; {
212         const unsigned int* piCur = adj.mLiveTriangles;
213         const unsigned int* const piCurEnd = adj.mLiveTriangles+pMesh->mNumVertices;
214         for (;piCur != piCurEnd;++piCur) {
215             iMaxRefTris = std::max(iMaxRefTris,*piCur);
216         }
217     }
218     ai_assert(iMaxRefTris > 0);
219     unsigned int* piCandidates = new unsigned int[iMaxRefTris*3];
220     unsigned int iCacheMisses = 0;
221 
222     // ...................................................................................
223     /** PSEUDOCODE for the algorithm
224 
225         A = Build-Adjacency(I) Vertex-triangle adjacency
226         L = Get-Triangle-Counts(A) Per-vertex live triangle counts
227         C = Zero(Vertex-Count(I)) Per-vertex caching time stamps
228         D = Empty-Stack() Dead-end vertex stack
229         E = False(Triangle-Count(I)) Per triangle emitted flag
230         O = Empty-Index-Buffer() Empty output buffer
231         f = 0 Arbitrary starting vertex
232         s = k+1, i = 1 Time stamp and cursor
233         while f >= 0 For all valid fanning vertices
234             N = Empty-Set() 1-ring of next candidates
235             for each Triangle t in Neighbors(A, f)
236                 if !Emitted(E,t)
237                     for each Vertex v in t
238                         Append(O,v) Output vertex
239                         Push(D,v) Add to dead-end stack
240                         Insert(N,v) Register as candidate
241                         L[v] = L[v]-1 Decrease live triangle count
242                         if s-C[v] > k If not in cache
243                             C[v] = s Set time stamp
244                             s = s+1 Increment time stamp
245                     E[t] = true Flag triangle as emitted
246             Select next fanning vertex
247             f = Get-Next-Vertex(I,i,k,N,C,s,L,D)
248         return O
249         */
250     // ...................................................................................
251 
252     int ivdx = 0;
253     int ics = 1;
254     int iStampCnt = mConfigCacheDepth+1;
255     while (ivdx >= 0)   {
256 
257         unsigned int icnt = piNumTriPtrNoModify[ivdx];
258         unsigned int* piList = adj.GetAdjacentTriangles(ivdx);
259         unsigned int* piCurCandidate = piCandidates;
260 
261         // get all triangles in the neighborhood
262         for (unsigned int tri = 0; tri < icnt;++tri)    {
263 
264             // if they have not yet been emitted, add them to the output IB
265             const unsigned int fidx = *piList++;
266             if (!abEmitted[fidx])   {
267 
268                 // so iterate through all vertices of the current triangle
269                 const aiFace* pcFace = &pMesh->mFaces[ fidx ];
270                 unsigned nind = pcFace->mNumIndices;
271                 for (unsigned ind = 0; ind < nind; ind++) {
272                     unsigned dp = pcFace->mIndices[ind];
273 
274                     // the current vertex won't have any free triangles after this step
275                     if (ivdx != (int)dp) {
276                         // append the vertex to the dead-end stack
277                         sDeadEndVStack.push(dp);
278 
279                         // register as candidate for the next step
280                         *piCurCandidate++ = dp;
281 
282                         // decrease the per-vertex triangle counts
283                         piNumTriPtr[dp]--;
284                     }
285 
286                     // append the vertex to the output index buffer
287                     *piCSIter++ = dp;
288 
289                     // if the vertex is not yet in cache, set its cache count
290                     if (iStampCnt-piCachingStamps[dp] > mConfigCacheDepth) {
291                         piCachingStamps[dp] = iStampCnt++;
292                         ++iCacheMisses;
293                     }
294                 }
295                 // flag triangle as emitted
296                 abEmitted[fidx] = true;
297             }
298         }
299 
300         // the vertex has now no living adjacent triangles anymore
301         piNumTriPtr[ivdx] = 0;
302 
303         // get next fanning vertex
304         ivdx = -1;
305         int max_priority = -1;
306         for (unsigned int* piCur = piCandidates;piCur != piCurCandidate;++piCur)    {
307             const unsigned int dp = *piCur;
308 
309             // must have live triangles
310             if (piNumTriPtr[dp] > 0)    {
311                 int priority = 0;
312 
313                 // will the vertex be in cache, even after fanning occurs?
314                 unsigned int tmp;
315                 if ((tmp = iStampCnt-piCachingStamps[dp]) + 2*piNumTriPtr[dp] <= mConfigCacheDepth) {
316                     priority = tmp;
317                 }
318 
319                 // keep best candidate
320                 if (priority > max_priority) {
321                     max_priority = priority;
322                     ivdx = dp;
323                 }
324             }
325         }
326         // did we reach a dead end?
327         if (-1 == ivdx) {
328             // need to get a non-local vertex for which we have a good chance that it is still
329             // in the cache ...
330             while (!sDeadEndVStack.empty()) {
331                 unsigned int iCachedIdx = sDeadEndVStack.top();
332                 sDeadEndVStack.pop();
333                 if (piNumTriPtr[ iCachedIdx ] > 0)  {
334                     ivdx = iCachedIdx;
335                     break;
336                 }
337             }
338 
339             if (-1 == ivdx) {
340                 // well, there isn't such a vertex. Simply get the next vertex in input order and
341                 // hope it is not too bad ...
342                 while (ics < (int)pMesh->mNumVertices)  {
343                     ++ics;
344                     if (piNumTriPtr[ics] > 0)   {
345                         ivdx = ics;
346                         break;
347                     }
348                 }
349             }
350         }
351     }
352     ai_real fACMR2 = 0.0f;
353     if (!DefaultLogger::isNullLogger()) {
354         fACMR2 = (float)iCacheMisses / pMesh->mNumFaces;
355 
356         // very intense verbose logging ... prepare for much text if there are many meshes
357         if ( DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) {
358             ASSIMP_LOG_VERBOSE_DEBUG("Mesh %u | ACMR in: ", meshNum, " out: ", fACMR, " | ~", fACMR2, ((fACMR - fACMR2) / fACMR) * 100.f);
359         }
360 
361         fACMR2 *= pMesh->mNumFaces;
362     }
363     // sort the output index buffer back to the input array
364     piCSIter = piIBOutput;
365     for (aiFace* pcFace = pMesh->mFaces; pcFace != pcEnd;++pcFace)  {
366         unsigned nind = pcFace->mNumIndices;
367         unsigned * ind = pcFace->mIndices;
368         if (nind > 0) ind[0] = *piCSIter++;
369         if (nind > 1) ind[1] = *piCSIter++;
370         if (nind > 2) ind[2] = *piCSIter++;
371     }
372 
373     // delete temporary storage
374     delete[] piCachingStamps;
375     delete[] piIBOutput;
376     delete[] piCandidates;
377 
378     return fACMR2;
379 }
380