1 /*
2 ---------------------------------------------------------------------------
3 Open Asset Import Library (assimp)
4 ---------------------------------------------------------------------------
5 
6 Copyright (c) 2006-2016, assimp team
7 
8 All rights reserved.
9 
10 Redistribution and use of this software in source and binary forms,
11 with or without modification, are permitted provided that the following
12 conditions are met:
13 
14 * Redistributions of source code must retain the above
15   copyright notice, this list of conditions and the
16   following disclaimer.
17 
18 * Redistributions in binary form must reproduce the above
19   copyright notice, this list of conditions and the
20   following disclaimer in the documentation and/or other
21   materials provided with the distribution.
22 
23 * Neither the name of the assimp team, nor the names of its
24   contributors may be used to endorse or promote products
25   derived from this software without specific prior
26   written permission of the assimp team.
27 
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 ---------------------------------------------------------------------------
40 */
41 
42 /** @file Implementation of the post processing step to improve the cache locality of a mesh.
43  * <br>
44  * The algorithm is roughly basing on this paper:
45  * http://www.cs.princeton.edu/gfx/pubs/Sander_2007_%3ETR/tipsy.pdf
46  *   .. although overdraw rduction isn't implemented yet ...
47  */
48 
49 
50 
51 // internal headers
52 #include "ImproveCacheLocality.h"
53 #include "VertexTriangleAdjacency.h"
54 #include "StringUtils.h"
55 #include <assimp/postprocess.h>
56 #include <assimp/scene.h>
57 #include <assimp/DefaultLogger.hpp>
58 #include <stdio.h>
59 #include <stack>
60 
61 using namespace Assimp;
62 
63 // ------------------------------------------------------------------------------------------------
64 // Constructor to be privately used by Importer
ImproveCacheLocalityProcess()65 ImproveCacheLocalityProcess::ImproveCacheLocalityProcess() {
66     configCacheDepth = PP_ICL_PTCACHE_SIZE;
67 }
68 
69 // ------------------------------------------------------------------------------------------------
70 // Destructor, private as well
~ImproveCacheLocalityProcess()71 ImproveCacheLocalityProcess::~ImproveCacheLocalityProcess()
72 {
73     // nothing to do here
74 }
75 
76 // ------------------------------------------------------------------------------------------------
77 // Returns whether the processing step is present in the given flag field.
IsActive(unsigned int pFlags) const78 bool ImproveCacheLocalityProcess::IsActive( unsigned int pFlags) const
79 {
80     return (pFlags & aiProcess_ImproveCacheLocality) != 0;
81 }
82 
83 // ------------------------------------------------------------------------------------------------
84 // Setup configuration
SetupProperties(const Importer * pImp)85 void ImproveCacheLocalityProcess::SetupProperties(const Importer* pImp)
86 {
87     // AI_CONFIG_PP_ICL_PTCACHE_SIZE controls the target cache size for the optimizer
88     configCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE,PP_ICL_PTCACHE_SIZE);
89 }
90 
91 // ------------------------------------------------------------------------------------------------
92 // Executes the post processing step on the given imported data.
Execute(aiScene * pScene)93 void ImproveCacheLocalityProcess::Execute( aiScene* pScene)
94 {
95     if (!pScene->mNumMeshes) {
96         DefaultLogger::get()->debug("ImproveCacheLocalityProcess skipped; there are no meshes");
97         return;
98     }
99 
100     DefaultLogger::get()->debug("ImproveCacheLocalityProcess begin");
101 
102     float out = 0.f;
103     unsigned int numf = 0, numm = 0;
104     for( unsigned int a = 0; a < pScene->mNumMeshes; a++){
105         const float res = ProcessMesh( pScene->mMeshes[a],a);
106         if (res) {
107             numf += pScene->mMeshes[a]->mNumFaces;
108             out  += res;
109             ++numm;
110         }
111     }
112     if (!DefaultLogger::isNullLogger()) {
113         char szBuff[128]; // should be sufficiently large in every case
114         ai_snprintf(szBuff,128,"Cache relevant are %u meshes (%u faces). Average output ACMR is %f",
115             numm,numf,out/numf);
116 
117         DefaultLogger::get()->info(szBuff);
118         DefaultLogger::get()->debug("ImproveCacheLocalityProcess finished. ");
119     }
120 }
121 
122 // ------------------------------------------------------------------------------------------------
123 // Improves the cache coherency of a specific mesh
ProcessMesh(aiMesh * pMesh,unsigned int meshNum)124 float ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshNum)
125 {
126     // TODO: rewrite this to use std::vector or boost::shared_array
127     ai_assert(NULL != pMesh);
128 
129     // Check whether the input data is valid
130     // - there must be vertices and faces
131     // - all faces must be triangulated or we can't operate on them
132     if (!pMesh->HasFaces() || !pMesh->HasPositions())
133         return 0.f;
134 
135     if (pMesh->mPrimitiveTypes != aiPrimitiveType_TRIANGLE) {
136         DefaultLogger::get()->error("This algorithm works on triangle meshes only");
137         return 0.f;
138     }
139 
140     if(pMesh->mNumVertices <= configCacheDepth) {
141         return 0.f;
142     }
143 
144     float fACMR = 3.f;
145     const aiFace* const pcEnd = pMesh->mFaces+pMesh->mNumFaces;
146 
147     // Input ACMR is for logging purposes only
148     if (!DefaultLogger::isNullLogger())     {
149 
150         unsigned int* piFIFOStack = new unsigned int[configCacheDepth];
151         memset(piFIFOStack,0xff,configCacheDepth*sizeof(unsigned int));
152         unsigned int* piCur = piFIFOStack;
153         const unsigned int* const piCurEnd = piFIFOStack + configCacheDepth;
154 
155         // count the number of cache misses
156         unsigned int iCacheMisses = 0;
157         for (const aiFace* pcFace = pMesh->mFaces;pcFace != pcEnd;++pcFace) {
158 
159             for (unsigned int qq = 0; qq < 3;++qq) {
160                 bool bInCache = false;
161 
162                 for (unsigned int* pp = piFIFOStack;pp < piCurEnd;++pp) {
163                     if (*pp == pcFace->mIndices[qq])    {
164                         // the vertex is in cache
165                         bInCache = true;
166                         break;
167                     }
168                 }
169                 if (!bInCache)  {
170                     ++iCacheMisses;
171                     if (piCurEnd == piCur) {
172                         piCur = piFIFOStack;
173                     }
174                     *piCur++ = pcFace->mIndices[qq];
175                 }
176             }
177         }
178         delete[] piFIFOStack;
179         fACMR = (float)iCacheMisses / pMesh->mNumFaces;
180         if (3.0 == fACMR)   {
181             char szBuff[128]; // should be sufficiently large in every case
182 
183             // the JoinIdenticalVertices process has not been executed on this
184             // mesh, otherwise this value would normally be at least minimally
185             // smaller than 3.0 ...
186             ai_snprintf(szBuff,128,"Mesh %u: Not suitable for vcache optimization",meshNum);
187             DefaultLogger::get()->warn(szBuff);
188             return 0.f;
189         }
190     }
191 
192     // first we need to build a vertex-triangle adjacency list
193     VertexTriangleAdjacency adj(pMesh->mFaces,pMesh->mNumFaces, pMesh->mNumVertices,true);
194 
195     // build a list to store per-vertex caching time stamps
196     unsigned int* const piCachingStamps = new unsigned int[pMesh->mNumVertices];
197     memset(piCachingStamps,0x0,pMesh->mNumVertices*sizeof(unsigned int));
198 
199     // allocate an empty output index buffer. We store the output indices in one large array.
200     // Since the number of triangles won't change the input faces can be reused. This is how
201     // we save thousands of redundant mini allocations for aiFace::mIndices
202     const unsigned int iIdxCnt = pMesh->mNumFaces*3;
203     unsigned int* const piIBOutput = new unsigned int[iIdxCnt];
204     unsigned int* piCSIter = piIBOutput;
205 
206     // allocate the flag array to hold the information
207     // whether a face has already been emitted or not
208     std::vector<bool> abEmitted(pMesh->mNumFaces,false);
209 
210     // dead-end vertex index stack
211     std::stack<unsigned int, std::vector<unsigned int> > sDeadEndVStack;
212 
213     // create a copy of the piNumTriPtr buffer
214     unsigned int* const piNumTriPtr = adj.mLiveTriangles;
215     const std::vector<unsigned int> piNumTriPtrNoModify(piNumTriPtr, piNumTriPtr + pMesh->mNumVertices);
216 
217     // get the largest number of referenced triangles and allocate the "candidate buffer"
218     unsigned int iMaxRefTris = 0; {
219         const unsigned int* piCur = adj.mLiveTriangles;
220         const unsigned int* const piCurEnd = adj.mLiveTriangles+pMesh->mNumVertices;
221         for (;piCur != piCurEnd;++piCur) {
222             iMaxRefTris = std::max(iMaxRefTris,*piCur);
223         }
224     }
225     unsigned int* piCandidates = new unsigned int[iMaxRefTris*3];
226     unsigned int iCacheMisses = 0;
227 
228     // ...................................................................................
229     /** PSEUDOCODE for the algorithm
230 
231         A = Build-Adjacency(I) Vertex-triangle adjacency
232         L = Get-Triangle-Counts(A) Per-vertex live triangle counts
233         C = Zero(Vertex-Count(I)) Per-vertex caching time stamps
234         D = Empty-Stack() Dead-end vertex stack
235         E = False(Triangle-Count(I)) Per triangle emitted flag
236         O = Empty-Index-Buffer() Empty output buffer
237         f = 0 Arbitrary starting vertex
238         s = k+1, i = 1 Time stamp and cursor
239         while f >= 0 For all valid fanning vertices
240             N = Empty-Set() 1-ring of next candidates
241             for each Triangle t in Neighbors(A, f)
242                 if !Emitted(E,t)
243                     for each Vertex v in t
244                         Append(O,v) Output vertex
245                         Push(D,v) Add to dead-end stack
246                         Insert(N,v) Register as candidate
247                         L[v] = L[v]-1 Decrease live triangle count
248                         if s-C[v] > k If not in cache
249                             C[v] = s Set time stamp
250                             s = s+1 Increment time stamp
251                     E[t] = true Flag triangle as emitted
252             Select next fanning vertex
253             f = Get-Next-Vertex(I,i,k,N,C,s,L,D)
254         return O
255         */
256     // ...................................................................................
257 
258     int ivdx = 0;
259     int ics = 1;
260     int iStampCnt = configCacheDepth+1;
261     while (ivdx >= 0)   {
262 
263         unsigned int icnt = piNumTriPtrNoModify[ivdx];
264         unsigned int* piList = adj.GetAdjacentTriangles(ivdx);
265         unsigned int* piCurCandidate = piCandidates;
266 
267         // get all triangles in the neighborhood
268         for (unsigned int tri = 0; tri < icnt;++tri)    {
269 
270             // if they have not yet been emitted, add them to the output IB
271             const unsigned int fidx = *piList++;
272             if (!abEmitted[fidx])   {
273 
274                 // so iterate through all vertices of the current triangle
275                 const aiFace* pcFace = &pMesh->mFaces[ fidx ];
276                 for (unsigned int* p = pcFace->mIndices, *p2 = pcFace->mIndices+3;p != p2;++p)  {
277                     const unsigned int dp = *p;
278 
279                     // the current vertex won't have any free triangles after this step
280                     if (ivdx != (int)dp) {
281                         // append the vertex to the dead-end stack
282                         sDeadEndVStack.push(dp);
283 
284                         // register as candidate for the next step
285                         *piCurCandidate++ = dp;
286 
287                         // decrease the per-vertex triangle counts
288                         piNumTriPtr[dp]--;
289                     }
290 
291                     // append the vertex to the output index buffer
292                     *piCSIter++ = dp;
293 
294                     // if the vertex is not yet in cache, set its cache count
295                     if (iStampCnt-piCachingStamps[dp] > configCacheDepth) {
296                         piCachingStamps[dp] = iStampCnt++;
297                         ++iCacheMisses;
298                     }
299                 }
300                 // flag triangle as emitted
301                 abEmitted[fidx] = true;
302             }
303         }
304 
305         // the vertex has now no living adjacent triangles anymore
306         piNumTriPtr[ivdx] = 0;
307 
308         // get next fanning vertex
309         ivdx = -1;
310         int max_priority = -1;
311         for (unsigned int* piCur = piCandidates;piCur != piCurCandidate;++piCur)    {
312             const unsigned int dp = *piCur;
313 
314             // must have live triangles
315             if (piNumTriPtr[dp] > 0)    {
316                 int priority = 0;
317 
318                 // will the vertex be in cache, even after fanning occurs?
319                 unsigned int tmp;
320                 if ((tmp = iStampCnt-piCachingStamps[dp]) + 2*piNumTriPtr[dp] <= configCacheDepth) {
321                     priority = tmp;
322                 }
323 
324                 // keep best candidate
325                 if (priority > max_priority) {
326                     max_priority = priority;
327                     ivdx = dp;
328                 }
329             }
330         }
331         // did we reach a dead end?
332         if (-1 == ivdx) {
333             // need to get a non-local vertex for which we have a good chance that it is still
334             // in the cache ...
335             while (!sDeadEndVStack.empty()) {
336                 unsigned int iCachedIdx = sDeadEndVStack.top();
337                 sDeadEndVStack.pop();
338                 if (piNumTriPtr[ iCachedIdx ] > 0)  {
339                     ivdx = iCachedIdx;
340                     break;
341                 }
342             }
343 
344             if (-1 == ivdx) {
345                 // well, there isn't such a vertex. Simply get the next vertex in input order and
346                 // hope it is not too bad ...
347                 while (ics < (int)pMesh->mNumVertices)  {
348                     ++ics;
349                     if (piNumTriPtr[ics] > 0)   {
350                         ivdx = ics;
351                         break;
352                     }
353                 }
354             }
355         }
356     }
357     float fACMR2 = 0.0f;
358     if (!DefaultLogger::isNullLogger()) {
359         fACMR2 = (float)iCacheMisses / pMesh->mNumFaces;
360 
361         // very intense verbose logging ... prepare for much text if there are many meshes
362         if ( DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) {
363             char szBuff[128]; // should be sufficiently large in every case
364 
365             ai_snprintf(szBuff,128,"Mesh %u | ACMR in: %f out: %f | ~%.1f%%",meshNum,fACMR,fACMR2,
366                 ((fACMR - fACMR2) / fACMR) * 100.f);
367             DefaultLogger::get()->debug(szBuff);
368         }
369 
370         fACMR2 *= pMesh->mNumFaces;
371     }
372     // sort the output index buffer back to the input array
373     piCSIter = piIBOutput;
374     for (aiFace* pcFace = pMesh->mFaces; pcFace != pcEnd;++pcFace)  {
375         pcFace->mIndices[0] = *piCSIter++;
376         pcFace->mIndices[1] = *piCSIter++;
377         pcFace->mIndices[2] = *piCSIter++;
378     }
379 
380     // delete temporary storage
381     delete[] piCachingStamps;
382     delete[] piIBOutput;
383     delete[] piCandidates;
384 
385     return fACMR2;
386 }
387