1 /*
2  * vcacheopt.h - Vertex Cache Optimizer
3  * Copyright 2009 Michael Georgoulpoulos <mgeorgoulopoulos at gmail>
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a copy
6  * of this software and associated documentation files (the "Software"), to deal
7  * in the Software without restriction, including without limitation the rights
8  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9  * copies of the Software, and to permit persons to whom the Software is
10  * furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21  * THE SOFTWARE.
22  */
23 
24 #ifndef _VCACHEOPT_H_
25 #define _VCACHEOPT_H_
26 
27 #include <vector>
28 #include <cmath>
29 #include <cassert>
30 #include <climits>
31 
32 template <typename T, int ERR_VAL>
33 class TVertexCacheData
34 {
35 public:
36 	int position_in_cache;
37 	float current_score;
38 	int total_valence; // toatl number of triangles using this vertex
39 	int remaining_valence; // number of triangles using it but not yet rendered
40 	std::vector<T> tri_indices; // indices to the indices that use this vertex
41 	bool calculated; // was the score calculated during this iteration?
42 
43 
FindTriangle(const T tri)44 	T FindTriangle(const T tri) const
45 	{
46 		for (size_t i=0; i<tri_indices.size(); ++i)	{
47 			if (tri_indices[i] == tri)
48 				return i;
49 		}
50 		return ERR_VAL;
51 	}
52 
MoveTriangleToEnd(const T tri)53 	void MoveTriangleToEnd(const T tri)
54 	{
55 		T t_ind = FindTriangle(tri);
56 		assert(t_ind >= 0);
57 		tri_indices.erase(tri_indices.begin() + t_ind, tri_indices.begin() + t_ind + 1);
58 		tri_indices.push_back(tri);
59 	}
60 
TVertexCacheData()61 	TVertexCacheData()
62 	{
63 		position_in_cache = -1;
64 		current_score = 0.0f;
65 		total_valence = 0;
66 		remaining_valence = 0;
67 	}
68 };
69 typedef TVertexCacheData<int, INT_MAX >					VertexCacheDataInt;
70 typedef TVertexCacheData<unsigned int, INT_MAX >		VertexCacheDataUInt;
71 typedef TVertexCacheData<unsigned short, INT_MAX >		VertexCacheDataUShort;
72 
73 template <typename T, int ERR_VAL>
74 class TTriangleCacheData
75 {
76 public:
77 	bool rendered; // has the triangle been added to the draw list yet?
78 	bool calculated; // was the score calculated during this iteration?
79 	float current_score; // sum of the score of its vertices
80 	T verts[3]; // indices to the triangle's vertices
81 
TTriangleCacheData()82 	TTriangleCacheData() : rendered(false), calculated(false), current_score(0.0f)	{
83 		verts[0] = verts[1] = verts[2] = ERR_VAL;
84 	}
85 };
86 typedef TTriangleCacheData<int, INT_MAX >				TriangleCacheDataInt;
87 typedef TTriangleCacheData<unsigned int, INT_MAX >		TriangleCacheDataUInt;
88 typedef TTriangleCacheData<unsigned short, INT_MAX >	TriangleCacheDataUShort;
89 
90 template <typename T, int N, int ERR_VAL>
91 class TVertexCache
92 {
93 protected:
94 	T cache[N];
95 	int misses; // cache miss count
96 
FindVertex(const T v)97 	T FindVertex(const T v) const
98 	{
99 		for (int i=0; i<(N-1); ++i)	{
100 			if (cache[i] == v) return i;
101 		}
102 
103 		return ERR_VAL;
104 	}
105 
RemoveVertex(const int stack_index)106 	void RemoveVertex(const int stack_index)
107 	{
108 		for (int i=stack_index; i<(N-1); ++i) {
109 			cache[i] = cache[i+1];
110 		}
111 	}
112 
113 public:
114 	// the vertex will be placed on top
115 	// if the vertex didn't exist previewsly in
116 	// the cache, then miss count is incermented
AddVertex(const int v)117 	void AddVertex(const int v)
118 	{
119 		int w = FindVertex(v);
120 		if (w >= 0)	{
121 			// remove the vertex from the cache (to reinsert it later on the top)
122 			RemoveVertex(w);
123 		} else {
124 			// the vertex was not found in the cache - increment misses
125 			++misses;
126 		}
127 
128 		// shift all vertices down (to make room for the new top vertex)
129 		for (int i=(N-1); i>0; --i) {
130 			cache[i] = cache[i-1];
131 		}
132 
133 		// add the new vertex on top
134 		cache[0] = v;
135 	}
136 
Clear()137 	void Clear()
138 	{
139 		for (int i=0; i<N; ++i) cache[i] = ERR_VAL;
140 		misses = 0;
141 	}
142 
TVertexCache()143 	TVertexCache()
144 	{
145 		Clear();
146 	}
147 
GetCacheMissCount()148 	int GetCacheMissCount() const
149 	{
150 		return misses;
151 	}
152 
GetCachedVertex(int which)153 	T GetCachedVertex(int which) const
154 	{
155 		return cache[which];
156 	}
157 
GetCacheMissCount(int * inds,int tri_count)158 	int GetCacheMissCount(int *inds, int tri_count)
159 	{
160 		Clear();
161 
162 		for (int i=0; i<3*tri_count; i++)
163 		{
164 			AddVertex(inds[i]);
165 		}
166 
167 		return misses;
168 	}
169 };
170 typedef TVertexCache<int, 40, INT_MAX >					VertexCacheInt;
171 typedef TVertexCache<unsigned int, 40, INT_MAX >		VertexCacheUInt;
172 typedef TVertexCache<unsigned short, 40, INT_MAX >		VertexCacheUShort;
173 
174 template <typename T, int N, int ERR_VAL>
175 class TVertexCacheOptimizer
176 {
177 public:
178 	// CalculateVertexScore constants
179 	float CacheDecayPower;
180 	float LastTriScore;
181 	float ValenceBoostScale;
182 	float ValenceBoostPower;
183 
184 	enum Result
185 	{
186 		Success = 0,
187 		Fail_BadIndex,
188 		Fail_NoVerts
189 	};
190 
Failed(Result r)191 	static bool Failed(Result r)
192 	{
193 		return r != Success;
194 	}
195 
196 protected:
197 	std::vector<TVertexCacheData<T, ERR_VAL > > verts;
198 	std::vector<TTriangleCacheData<T, ERR_VAL > > tris;
199 	std::vector<T> inds;
200 	int best_tri; // the next triangle to add to the render list
201 	TVertexCache<T, N, ERR_VAL > vertex_cache;
202 	std::vector<T> draw_list;
203 
CalculateVertexScore(const int vertex)204 	float CalculateVertexScore(const int vertex)
205 	{
206 		TVertexCacheData<T, ERR_VAL > *v = &verts[vertex];
207 		if (v->remaining_valence <= 0) {
208 			// No tri needs this vertex!
209 			return -1.0f;
210 		}
211 
212 		float ret = 0.0f;
213 		if (v->position_in_cache < 0) {
214 			// Vertex is not in FIFO cache - no score.
215 		} else {
216 			if (v->position_in_cache < 3) {
217 				// This vertex was used in the last triangle,
218 				// so it has a fixed score, whichever of the three
219 				// it's in. Otherwise, you can get very different
220 				// answers depending on whether you add
221 				// the triangle 1,2,3 or 3,1,2 - which is silly.
222 				ret = LastTriScore;
223 			} else {
224 				// Points for being high in the cache.
225 				const float Scaler = 1.0f / (32  - 3);
226 				ret = 1.0f - (v->position_in_cache - 3) * Scaler;
227 				ret = powf(ret, CacheDecayPower);
228 			}
229 		}
230 
231 		// Bonus points for having a low number of tris still to
232 		// use the vert, so we get rid of lone verts quickly.
233 		float valence_boost = powf((float)v->remaining_valence, -ValenceBoostPower);
234 		ret += ValenceBoostScale * valence_boost;
235 
236 		return ret;
237 	}
238 
239 	// returns the index of the triangle with the highest score
240 	// (or -1, if there aren't any active triangles)
FullScoreRecalculation()241 	int FullScoreRecalculation()
242 	{
243 		// calculate score for all vertices
244 		for (size_t i=0; i<verts.size(); ++i) {
245 			verts[i].current_score = CalculateVertexScore(i);
246 		}
247 
248 		// calculate scores for all active triangles
249 		float max_score;
250 		int max_score_tri = -1;
251 		bool first_time = true;
252 		for (int i=0; i<(int)tris.size(); ++i) {
253 			if (tris[i].rendered) continue;
254 			// sum the score of all the triangle's vertices
255 			float sc = verts[tris[i].verts[0]].current_score +
256 				verts[tris[i].verts[1]].current_score +
257 				verts[tris[i].verts[2]].current_score;
258 
259 			tris[i].current_score = sc;
260 
261 			if (first_time || sc > max_score) {
262 				first_time = false;
263 				max_score = sc;
264 				max_score_tri = i;
265 			}
266 		}
267 
268 		return max_score_tri;
269 	}
270 
InitialPass()271 	Result InitialPass()
272 	{
273 		for (size_t i=0; i<inds.size(); ++i)
274 		{
275 			int index = inds[i];
276 			if (ERR_VAL==index || index >= (int)verts.size()) return Fail_BadIndex;
277 
278 			verts[index].total_valence++;
279 			verts[index].remaining_valence++;
280 
281 			verts[index].tri_indices.push_back(i/3);
282 		}
283 
284 		best_tri = FullScoreRecalculation();
285 
286 		return Success;
287 	}
288 
Init(T * inds,const int tri_count,const int vertex_count)289 	Result Init(T *inds, const int tri_count, const int vertex_count)
290 	{
291 		// clear the draw list
292 		draw_list.clear();
293 
294 		// allocate and initialize vertices and triangles
295 		verts.clear();
296 		for (int i=0; i<vertex_count; ++i) verts.push_back( TVertexCacheData<T, ERR_VAL >() );
297 
298 		tris.clear();
299 		for (int i=0; i<tri_count; ++i) {
300 			TTriangleCacheData<T, ERR_VAL > dat;
301 			for (int j=0; j<3; ++j)	{
302 				dat.verts[j] = inds[i * 3 + j];
303 			}
304 			tris.push_back(dat);
305 		}
306 
307 		// copy the indices
308 		this->inds.clear();
309 		for (int i=0; i<tri_count * 3; ++i) this->inds.push_back(inds[i]);
310 
311 		vertex_cache.Clear();
312 		best_tri = -1;
313 
314 		return InitialPass();
315 	}
316 
AddTriangleToDrawList(const int tri)317 	void AddTriangleToDrawList(const int tri)
318 	{
319 		// reset all cache positions
320 		for (int i=0; i<32; ++i) {
321 			int ind = vertex_cache.GetCachedVertex(i);
322 			if (ERR_VAL==ind) continue;
323 			verts[ind].position_in_cache = -1;
324 		}
325 
326 		TTriangleCacheData<T, ERR_VAL > *t = &tris[tri];
327 		if (t->rendered) return; // triangle is already in the draw list
328 
329 		for (int i=0; i<3; ++i)	{
330 			// add all triangle vertices to the cache
331 			vertex_cache.AddVertex(t->verts[i]);
332 
333 			TVertexCacheData<T, ERR_VAL > *v = &verts[t->verts[i]];
334 
335 			// decrease remaining velence
336 			v->remaining_valence--;
337 
338 			// move the added triangle to the end of the vertex's
339 			// triangle index list, so that the first 'remaining_valence'
340 			// triangles in the list are the active ones
341 			v->MoveTriangleToEnd(tri);
342 		}
343 
344 		draw_list.push_back(tri);
345 
346 		t->rendered = true;
347 
348 		// update all vertex cache positions
349 		for (int i=0; i<32; ++i) {
350 			T ind = vertex_cache.GetCachedVertex(i);
351 			if (ERR_VAL==ind) continue;
352 			verts[ind].position_in_cache = i;
353 		}
354 
355 	}
356 
357 	// Optimization: to avoid duplicate calculations durind the same iteration,
358 	// both vertices and triangles have a 'calculated' flag. This flag
359 	// must be cleared at the beginning of the iteration to all *active* triangles
360 	// that have one or more of their vertices currently cached, and all their
361 	// other vertices.
362 	// If there aren't any active triangles in the cache, the function returns
363 	// false and full recalculation is performed.
CleanCalculationFlags()364 	bool CleanCalculationFlags()
365 	{
366 		bool ret = false;
367 		for (int i=0; i<32; i++) {
368 			T vert = vertex_cache.GetCachedVertex(i);
369 			if (ERR_VAL==vert) continue;
370 
371 			TVertexCacheData<T, ERR_VAL > *v = &verts[vert];
372 
373 			for (int j=0; j<v->remaining_valence; j++) {
374 				TTriangleCacheData<T, ERR_VAL > *t = &tris[v->tri_indices[j]];
375 
376 				// we actually found a triangle to process
377 				ret = true;
378 
379 				// clear triangle flag
380 				t->calculated = false;
381 
382 				// clear vertex flags
383 				for (int tri_vert=0; tri_vert<3; tri_vert++) {
384 					verts[t->verts[tri_vert]].calculated = false;
385 				}
386 			}
387 		}
388 
389 		return ret;
390 	}
391 
TriangleScoreRecalculation(const int tri)392 	void TriangleScoreRecalculation(const int tri)
393 	{
394 		TTriangleCacheData<T, ERR_VAL > *t = &tris[tri];
395 
396 		// calculate vertex scores
397 		float sum = 0.0f;
398 		for (int i=0; i<3; i++)	{
399 			TVertexCacheData<T, ERR_VAL > *v = &verts[t->verts[i]];
400 			float sc = v->current_score;
401 			if (!v->calculated)	{
402 				sc = CalculateVertexScore(t->verts[i]);
403 			}
404 			v->current_score = sc;
405 			v->calculated = true;
406 			sum += sc;
407 		}
408 
409 		t->current_score = sum;
410 		t->calculated = true;
411 	}
412 
PartialScoreRecalculation()413 	int PartialScoreRecalculation()
414 	{
415 		// iterate through all the vertices of the cache
416 		bool first_time = true;
417 		float max_score;
418 		int max_score_tri = -1;
419 		for (int i=0; i<32; i++) {
420 			int vert = vertex_cache.GetCachedVertex(i);
421 			if (ERR_VAL==vert) continue;
422 
423 			TVertexCacheData<T, ERR_VAL > *v = &verts[vert];
424 
425 			// iterate through all *active* triangles of this vertex
426 			for (int j=0; j<v->remaining_valence; j++) {
427 				int tri = v->tri_indices[j];
428 				TTriangleCacheData<T, ERR_VAL > *t = &tris[tri];
429 				if (!t->calculated) {
430 					// calculate triangle score
431 					TriangleScoreRecalculation(tri);
432 				}
433 
434 				float sc = t->current_score;
435 
436 				// we actually found a triangle to process
437 				if (first_time || sc > max_score) {
438 					first_time = false;
439 					max_score = sc;
440 					max_score_tri = tri;
441 				}
442 			}
443 		}
444 
445 		return max_score_tri;
446 	}
447 
448 	// returns true while there are more steps to take
449 	// false when optimization is complete
Iterate()450 	bool Iterate()
451 	{
452 		if (draw_list.size() == tris.size()) return false;
453 
454 		// add the selected triangle to the draw list
455 		AddTriangleToDrawList(best_tri);
456 
457 		// recalculate vertex and triangle scores and
458 		// select the best triangle for the next iteration
459 		if (CleanCalculationFlags()) {
460 			best_tri = PartialScoreRecalculation();
461 		} else {
462 			best_tri = FullScoreRecalculation();
463 		}
464 
465 		return true;
466 	}
467 
468 public:
TVertexCacheOptimizer()469 	TVertexCacheOptimizer()
470 	{
471 		// initialize constants
472 		CacheDecayPower = 1.5f;
473 		LastTriScore = 0.75f;
474 		ValenceBoostScale = 2.0f;
475 		ValenceBoostPower = 0.5f;
476 
477 		best_tri = 0;
478 	}
479 
480 	// stores new indices in place
Optimize(T * inds,int tri_count)481 	Result Optimize(T *inds, int tri_count)
482 	{
483 		// find vertex count
484 		Sint64 max_vert = -1;
485 		for (int i=0; i<tri_count * 3; i++)	{
486 			if (inds[i] > max_vert) max_vert = inds[i];
487 		}
488 
489 		if (max_vert == -1) return Fail_NoVerts;
490 
491 		Result res = Init(inds, tri_count, max_vert + 1);
492 		if (res) return res;
493 
494 		// iterate until Iterate returns false
495 		while (Iterate());
496 
497 		// rewrite optimized index list
498 		for (size_t i=0; i<draw_list.size(); i++)	{
499 			inds[3 * i + 0] = tris[draw_list[i]].verts[0];
500 			inds[3 * i + 1] = tris[draw_list[i]].verts[1];
501 			inds[3 * i + 2] = tris[draw_list[i]].verts[2];
502 		}
503 
504 		return Success;
505 	}
506 };
507 typedef TVertexCacheOptimizer<int, 40, INT_MAX >				VertexCacheOptimizerInt;
508 typedef TVertexCacheOptimizer<unsigned int, 40, INT_MAX >		VertexCacheOptimizerUInt;
509 typedef TVertexCacheOptimizer<unsigned short, 40, INT_MAX >		VertexCacheOptimizerUShort;
510 
511 #endif // ndef _VCACHEOPT_H_
512