1 /* 2 * vcacheopt.h - Vertex Cache Optimizer 3 * Copyright 2009 Michael Georgoulpoulos <mgeorgoulopoulos at gmail> 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a copy 6 * of this software and associated documentation files (the "Software"), to deal 7 * in the Software without restriction, including without limitation the rights 8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 * copies of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 * THE SOFTWARE. 22 */ 23 24 #ifndef _VCACHEOPT_H_ 25 #define _VCACHEOPT_H_ 26 27 #include <vector> 28 #include <cmath> 29 #include <cassert> 30 #include <climits> 31 32 template <typename T, int ERR_VAL> 33 class TVertexCacheData 34 { 35 public: 36 int position_in_cache; 37 float current_score; 38 int total_valence; // toatl number of triangles using this vertex 39 int remaining_valence; // number of triangles using it but not yet rendered 40 std::vector<T> tri_indices; // indices to the indices that use this vertex 41 bool calculated; // was the score calculated during this iteration? 42 43 FindTriangle(const T tri)44 T FindTriangle(const T tri) const 45 { 46 for (size_t i=0; i<tri_indices.size(); ++i) { 47 if (tri_indices[i] == tri) 48 return i; 49 } 50 return ERR_VAL; 51 } 52 MoveTriangleToEnd(const T tri)53 void MoveTriangleToEnd(const T tri) 54 { 55 T t_ind = FindTriangle(tri); 56 assert(t_ind >= 0); 57 tri_indices.erase(tri_indices.begin() + t_ind, tri_indices.begin() + t_ind + 1); 58 tri_indices.push_back(tri); 59 } 60 TVertexCacheData()61 TVertexCacheData() 62 { 63 position_in_cache = -1; 64 current_score = 0.0f; 65 total_valence = 0; 66 remaining_valence = 0; 67 } 68 }; 69 typedef TVertexCacheData<int, INT_MAX > VertexCacheDataInt; 70 typedef TVertexCacheData<unsigned int, INT_MAX > VertexCacheDataUInt; 71 typedef TVertexCacheData<unsigned short, INT_MAX > VertexCacheDataUShort; 72 73 template <typename T, int ERR_VAL> 74 class TTriangleCacheData 75 { 76 public: 77 bool rendered; // has the triangle been added to the draw list yet? 78 bool calculated; // was the score calculated during this iteration? 79 float current_score; // sum of the score of its vertices 80 T verts[3]; // indices to the triangle's vertices 81 TTriangleCacheData()82 TTriangleCacheData() : rendered(false), calculated(false), current_score(0.0f) { 83 verts[0] = verts[1] = verts[2] = ERR_VAL; 84 } 85 }; 86 typedef TTriangleCacheData<int, INT_MAX > TriangleCacheDataInt; 87 typedef TTriangleCacheData<unsigned int, INT_MAX > TriangleCacheDataUInt; 88 typedef TTriangleCacheData<unsigned short, INT_MAX > TriangleCacheDataUShort; 89 90 template <typename T, int N, int ERR_VAL> 91 class TVertexCache 92 { 93 protected: 94 T cache[N]; 95 int misses; // cache miss count 96 FindVertex(const T v)97 T FindVertex(const T v) const 98 { 99 for (int i=0; i<(N-1); ++i) { 100 if (cache[i] == v) return i; 101 } 102 103 return ERR_VAL; 104 } 105 RemoveVertex(const int stack_index)106 void RemoveVertex(const int stack_index) 107 { 108 for (int i=stack_index; i<(N-1); ++i) { 109 cache[i] = cache[i+1]; 110 } 111 } 112 113 public: 114 // the vertex will be placed on top 115 // if the vertex didn't exist previewsly in 116 // the cache, then miss count is incermented AddVertex(const int v)117 void AddVertex(const int v) 118 { 119 int w = FindVertex(v); 120 if (w >= 0) { 121 // remove the vertex from the cache (to reinsert it later on the top) 122 RemoveVertex(w); 123 } else { 124 // the vertex was not found in the cache - increment misses 125 ++misses; 126 } 127 128 // shift all vertices down (to make room for the new top vertex) 129 for (int i=(N-1); i>0; --i) { 130 cache[i] = cache[i-1]; 131 } 132 133 // add the new vertex on top 134 cache[0] = v; 135 } 136 Clear()137 void Clear() 138 { 139 for (int i=0; i<N; ++i) cache[i] = ERR_VAL; 140 misses = 0; 141 } 142 TVertexCache()143 TVertexCache() 144 { 145 Clear(); 146 } 147 GetCacheMissCount()148 int GetCacheMissCount() const 149 { 150 return misses; 151 } 152 GetCachedVertex(int which)153 T GetCachedVertex(int which) const 154 { 155 return cache[which]; 156 } 157 GetCacheMissCount(int * inds,int tri_count)158 int GetCacheMissCount(int *inds, int tri_count) 159 { 160 Clear(); 161 162 for (int i=0; i<3*tri_count; i++) 163 { 164 AddVertex(inds[i]); 165 } 166 167 return misses; 168 } 169 }; 170 typedef TVertexCache<int, 40, INT_MAX > VertexCacheInt; 171 typedef TVertexCache<unsigned int, 40, INT_MAX > VertexCacheUInt; 172 typedef TVertexCache<unsigned short, 40, INT_MAX > VertexCacheUShort; 173 174 template <typename T, int N, int ERR_VAL> 175 class TVertexCacheOptimizer 176 { 177 public: 178 // CalculateVertexScore constants 179 float CacheDecayPower; 180 float LastTriScore; 181 float ValenceBoostScale; 182 float ValenceBoostPower; 183 184 enum Result 185 { 186 Success = 0, 187 Fail_BadIndex, 188 Fail_NoVerts 189 }; 190 Failed(Result r)191 static bool Failed(Result r) 192 { 193 return r != Success; 194 } 195 196 protected: 197 std::vector<TVertexCacheData<T, ERR_VAL > > verts; 198 std::vector<TTriangleCacheData<T, ERR_VAL > > tris; 199 std::vector<T> inds; 200 int best_tri; // the next triangle to add to the render list 201 TVertexCache<T, N, ERR_VAL > vertex_cache; 202 std::vector<T> draw_list; 203 CalculateVertexScore(const int vertex)204 float CalculateVertexScore(const int vertex) 205 { 206 TVertexCacheData<T, ERR_VAL > *v = &verts[vertex]; 207 if (v->remaining_valence <= 0) { 208 // No tri needs this vertex! 209 return -1.0f; 210 } 211 212 float ret = 0.0f; 213 if (v->position_in_cache < 0) { 214 // Vertex is not in FIFO cache - no score. 215 } else { 216 if (v->position_in_cache < 3) { 217 // This vertex was used in the last triangle, 218 // so it has a fixed score, whichever of the three 219 // it's in. Otherwise, you can get very different 220 // answers depending on whether you add 221 // the triangle 1,2,3 or 3,1,2 - which is silly. 222 ret = LastTriScore; 223 } else { 224 // Points for being high in the cache. 225 const float Scaler = 1.0f / (32 - 3); 226 ret = 1.0f - (v->position_in_cache - 3) * Scaler; 227 ret = powf(ret, CacheDecayPower); 228 } 229 } 230 231 // Bonus points for having a low number of tris still to 232 // use the vert, so we get rid of lone verts quickly. 233 float valence_boost = powf((float)v->remaining_valence, -ValenceBoostPower); 234 ret += ValenceBoostScale * valence_boost; 235 236 return ret; 237 } 238 239 // returns the index of the triangle with the highest score 240 // (or -1, if there aren't any active triangles) FullScoreRecalculation()241 int FullScoreRecalculation() 242 { 243 // calculate score for all vertices 244 for (size_t i=0; i<verts.size(); ++i) { 245 verts[i].current_score = CalculateVertexScore(i); 246 } 247 248 // calculate scores for all active triangles 249 float max_score; 250 int max_score_tri = -1; 251 bool first_time = true; 252 for (int i=0; i<(int)tris.size(); ++i) { 253 if (tris[i].rendered) continue; 254 // sum the score of all the triangle's vertices 255 float sc = verts[tris[i].verts[0]].current_score + 256 verts[tris[i].verts[1]].current_score + 257 verts[tris[i].verts[2]].current_score; 258 259 tris[i].current_score = sc; 260 261 if (first_time || sc > max_score) { 262 first_time = false; 263 max_score = sc; 264 max_score_tri = i; 265 } 266 } 267 268 return max_score_tri; 269 } 270 InitialPass()271 Result InitialPass() 272 { 273 for (size_t i=0; i<inds.size(); ++i) 274 { 275 int index = inds[i]; 276 if (ERR_VAL==index || index >= (int)verts.size()) return Fail_BadIndex; 277 278 verts[index].total_valence++; 279 verts[index].remaining_valence++; 280 281 verts[index].tri_indices.push_back(i/3); 282 } 283 284 best_tri = FullScoreRecalculation(); 285 286 return Success; 287 } 288 Init(T * inds,const int tri_count,const int vertex_count)289 Result Init(T *inds, const int tri_count, const int vertex_count) 290 { 291 // clear the draw list 292 draw_list.clear(); 293 294 // allocate and initialize vertices and triangles 295 verts.clear(); 296 for (int i=0; i<vertex_count; ++i) verts.push_back( TVertexCacheData<T, ERR_VAL >() ); 297 298 tris.clear(); 299 for (int i=0; i<tri_count; ++i) { 300 TTriangleCacheData<T, ERR_VAL > dat; 301 for (int j=0; j<3; ++j) { 302 dat.verts[j] = inds[i * 3 + j]; 303 } 304 tris.push_back(dat); 305 } 306 307 // copy the indices 308 this->inds.clear(); 309 for (int i=0; i<tri_count * 3; ++i) this->inds.push_back(inds[i]); 310 311 vertex_cache.Clear(); 312 best_tri = -1; 313 314 return InitialPass(); 315 } 316 AddTriangleToDrawList(const int tri)317 void AddTriangleToDrawList(const int tri) 318 { 319 // reset all cache positions 320 for (int i=0; i<32; ++i) { 321 int ind = vertex_cache.GetCachedVertex(i); 322 if (ERR_VAL==ind) continue; 323 verts[ind].position_in_cache = -1; 324 } 325 326 TTriangleCacheData<T, ERR_VAL > *t = &tris[tri]; 327 if (t->rendered) return; // triangle is already in the draw list 328 329 for (int i=0; i<3; ++i) { 330 // add all triangle vertices to the cache 331 vertex_cache.AddVertex(t->verts[i]); 332 333 TVertexCacheData<T, ERR_VAL > *v = &verts[t->verts[i]]; 334 335 // decrease remaining velence 336 v->remaining_valence--; 337 338 // move the added triangle to the end of the vertex's 339 // triangle index list, so that the first 'remaining_valence' 340 // triangles in the list are the active ones 341 v->MoveTriangleToEnd(tri); 342 } 343 344 draw_list.push_back(tri); 345 346 t->rendered = true; 347 348 // update all vertex cache positions 349 for (int i=0; i<32; ++i) { 350 T ind = vertex_cache.GetCachedVertex(i); 351 if (ERR_VAL==ind) continue; 352 verts[ind].position_in_cache = i; 353 } 354 355 } 356 357 // Optimization: to avoid duplicate calculations durind the same iteration, 358 // both vertices and triangles have a 'calculated' flag. This flag 359 // must be cleared at the beginning of the iteration to all *active* triangles 360 // that have one or more of their vertices currently cached, and all their 361 // other vertices. 362 // If there aren't any active triangles in the cache, the function returns 363 // false and full recalculation is performed. CleanCalculationFlags()364 bool CleanCalculationFlags() 365 { 366 bool ret = false; 367 for (int i=0; i<32; i++) { 368 T vert = vertex_cache.GetCachedVertex(i); 369 if (ERR_VAL==vert) continue; 370 371 TVertexCacheData<T, ERR_VAL > *v = &verts[vert]; 372 373 for (int j=0; j<v->remaining_valence; j++) { 374 TTriangleCacheData<T, ERR_VAL > *t = &tris[v->tri_indices[j]]; 375 376 // we actually found a triangle to process 377 ret = true; 378 379 // clear triangle flag 380 t->calculated = false; 381 382 // clear vertex flags 383 for (int tri_vert=0; tri_vert<3; tri_vert++) { 384 verts[t->verts[tri_vert]].calculated = false; 385 } 386 } 387 } 388 389 return ret; 390 } 391 TriangleScoreRecalculation(const int tri)392 void TriangleScoreRecalculation(const int tri) 393 { 394 TTriangleCacheData<T, ERR_VAL > *t = &tris[tri]; 395 396 // calculate vertex scores 397 float sum = 0.0f; 398 for (int i=0; i<3; i++) { 399 TVertexCacheData<T, ERR_VAL > *v = &verts[t->verts[i]]; 400 float sc = v->current_score; 401 if (!v->calculated) { 402 sc = CalculateVertexScore(t->verts[i]); 403 } 404 v->current_score = sc; 405 v->calculated = true; 406 sum += sc; 407 } 408 409 t->current_score = sum; 410 t->calculated = true; 411 } 412 PartialScoreRecalculation()413 int PartialScoreRecalculation() 414 { 415 // iterate through all the vertices of the cache 416 bool first_time = true; 417 float max_score; 418 int max_score_tri = -1; 419 for (int i=0; i<32; i++) { 420 int vert = vertex_cache.GetCachedVertex(i); 421 if (ERR_VAL==vert) continue; 422 423 TVertexCacheData<T, ERR_VAL > *v = &verts[vert]; 424 425 // iterate through all *active* triangles of this vertex 426 for (int j=0; j<v->remaining_valence; j++) { 427 int tri = v->tri_indices[j]; 428 TTriangleCacheData<T, ERR_VAL > *t = &tris[tri]; 429 if (!t->calculated) { 430 // calculate triangle score 431 TriangleScoreRecalculation(tri); 432 } 433 434 float sc = t->current_score; 435 436 // we actually found a triangle to process 437 if (first_time || sc > max_score) { 438 first_time = false; 439 max_score = sc; 440 max_score_tri = tri; 441 } 442 } 443 } 444 445 return max_score_tri; 446 } 447 448 // returns true while there are more steps to take 449 // false when optimization is complete Iterate()450 bool Iterate() 451 { 452 if (draw_list.size() == tris.size()) return false; 453 454 // add the selected triangle to the draw list 455 AddTriangleToDrawList(best_tri); 456 457 // recalculate vertex and triangle scores and 458 // select the best triangle for the next iteration 459 if (CleanCalculationFlags()) { 460 best_tri = PartialScoreRecalculation(); 461 } else { 462 best_tri = FullScoreRecalculation(); 463 } 464 465 return true; 466 } 467 468 public: TVertexCacheOptimizer()469 TVertexCacheOptimizer() 470 { 471 // initialize constants 472 CacheDecayPower = 1.5f; 473 LastTriScore = 0.75f; 474 ValenceBoostScale = 2.0f; 475 ValenceBoostPower = 0.5f; 476 477 best_tri = 0; 478 } 479 480 // stores new indices in place Optimize(T * inds,int tri_count)481 Result Optimize(T *inds, int tri_count) 482 { 483 // find vertex count 484 Sint64 max_vert = -1; 485 for (int i=0; i<tri_count * 3; i++) { 486 if (inds[i] > max_vert) max_vert = inds[i]; 487 } 488 489 if (max_vert == -1) return Fail_NoVerts; 490 491 Result res = Init(inds, tri_count, max_vert + 1); 492 if (res) return res; 493 494 // iterate until Iterate returns false 495 while (Iterate()); 496 497 // rewrite optimized index list 498 for (size_t i=0; i<draw_list.size(); i++) { 499 inds[3 * i + 0] = tris[draw_list[i]].verts[0]; 500 inds[3 * i + 1] = tris[draw_list[i]].verts[1]; 501 inds[3 * i + 2] = tris[draw_list[i]].verts[2]; 502 } 503 504 return Success; 505 } 506 }; 507 typedef TVertexCacheOptimizer<int, 40, INT_MAX > VertexCacheOptimizerInt; 508 typedef TVertexCacheOptimizer<unsigned int, 40, INT_MAX > VertexCacheOptimizerUInt; 509 typedef TVertexCacheOptimizer<unsigned short, 40, INT_MAX > VertexCacheOptimizerUShort; 510 511 #endif // ndef _VCACHEOPT_H_ 512