1 /* ----------------------------------------------------------------------------- 2 3 Copyright (c) 2006 Simon Brown si@sjbrown.co.uk 4 Copyright (c) 2006 Ignacio Castano icastano@nvidia.com 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice shall be included 15 in all copies or substantial portions of the Software. 16 17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 21 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 25 -------------------------------------------------------------------------- */ 26 27 #include "weightedclusterfit.h" 28 #include "colourset.h" 29 #include "colourblock.h" 30 #include <cfloat> 31 32 33 namespace squish { 34 WeightedClusterFit()35 WeightedClusterFit::WeightedClusterFit() 36 { 37 } 38 SetColourSet(ColourSet const * colours,int flags)39 void WeightedClusterFit::SetColourSet( ColourSet const* colours, int flags ) 40 { 41 ColourFit::SetColourSet( colours, flags ); 42 43 // initialise the best error 44 #if SQUISH_USE_SIMD 45 m_besterror = VEC4_CONST( FLT_MAX ); 46 Vec3 metric = m_metric.GetVec3(); 47 #else 48 m_besterror = FLT_MAX; 49 Vec3 metric = m_metric; 50 #endif 51 52 // cache some values 53 int const count = m_colours->GetCount(); 54 Vec3 const* values = m_colours->GetPoints(); 55 56 // get the covariance matrix 57 Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights(), metric ); 58 59 // compute the principle component 60 Vec3 principle = ComputePrincipleComponent( covariance ); 61 62 // build the list of values 63 float dps[16]; 64 for( int i = 0; i < count; ++i ) 65 { 66 dps[i] = Dot( values[i], principle ); 67 m_order[i] = i; 68 } 69 70 // stable sort 71 for( int i = 0; i < count; ++i ) 72 { 73 for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j ) 74 { 75 std::swap( dps[j], dps[j - 1] ); 76 std::swap( m_order[j], m_order[j - 1] ); 77 } 78 } 79 80 // weight all the points 81 #if SQUISH_USE_SIMD 82 Vec4 const* unweighted = m_colours->GetPointsSimd(); 83 Vec4 const* weights = m_colours->GetWeightsSimd(); 84 m_xxsum = VEC4_CONST( 0.0f ); 85 m_xsum = VEC4_CONST( 0.0f ); 86 #else 87 Vec3 const* unweighted = m_colours->GetPoints(); 88 float const* weights = m_colours->GetWeights(); 89 m_xxsum = Vec3( 0.0f ); 90 m_xsum = Vec3( 0.0f ); 91 m_wsum = 0.0f; 92 #endif 93 94 for( int i = 0; i < count; ++i ) 95 { 96 int p = m_order[i]; 97 m_weighted[i] = weights[p] * unweighted[p]; 98 m_xxsum += m_weighted[i] * m_weighted[i]; 99 m_xsum += m_weighted[i]; 100 #if !SQUISH_USE_SIMD 101 m_weights[i] = weights[p]; 102 m_wsum += m_weights[i]; 103 #endif 104 } 105 } 106 107 SetMetric(float r,float g,float b)108 void WeightedClusterFit::SetMetric(float r, float g, float b) 109 { 110 #if SQUISH_USE_SIMD 111 m_metric = Vec4(r, g, b, 0); 112 #else 113 m_metric = Vec3(r, g, b); 114 #endif 115 m_metricSqr = m_metric * m_metric; 116 } 117 GetBestError() const118 float WeightedClusterFit::GetBestError() const 119 { 120 #if SQUISH_USE_SIMD 121 Vec4 x = m_xxsum * m_metricSqr; 122 Vec4 error = m_besterror + x.SplatX() + x.SplatY() + x.SplatZ(); 123 return error.GetVec3().X(); 124 #else 125 return m_besterror + Dot(m_xxsum, m_metricSqr); 126 #endif 127 128 } 129 130 #if SQUISH_USE_SIMD 131 Compress3(void * block)132 void WeightedClusterFit::Compress3( void* block ) 133 { 134 int const count = m_colours->GetCount(); 135 Vec4 const one = VEC4_CONST(1.0f); 136 Vec4 const zero = VEC4_CONST(0.0f); 137 Vec4 const half(0.5f, 0.5f, 0.5f, 0.25f); 138 Vec4 const two = VEC4_CONST(2.0); 139 Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); 140 Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); 141 142 // declare variables 143 Vec4 beststart = VEC4_CONST( 0.0f ); 144 Vec4 bestend = VEC4_CONST( 0.0f ); 145 Vec4 besterror = VEC4_CONST( FLT_MAX ); 146 147 Vec4 x0 = zero; 148 149 int b0 = 0, b1 = 0; 150 151 // check all possible clusters for this total order 152 for( int c0 = 0; c0 <= count; c0++) 153 { 154 Vec4 x1 = zero; 155 156 for( int c1 = 0; c1 <= count-c0; c1++) 157 { 158 Vec4 const x2 = m_xsum - x1 - x0; 159 160 //Vec3 const alphax_sum = x0 + x1 * 0.5f; 161 //float const alpha2_sum = w0 + w1 * 0.25f; 162 Vec4 const alphax_sum = MultiplyAdd(x1, half, x0); // alphax_sum, alpha2_sum 163 Vec4 const alpha2_sum = alphax_sum.SplatW(); 164 165 //Vec3 const betax_sum = x2 + x1 * 0.5f; 166 //float const beta2_sum = w2 + w1 * 0.25f; 167 Vec4 const betax_sum = MultiplyAdd(x1, half, x2); // betax_sum, beta2_sum 168 Vec4 const beta2_sum = betax_sum.SplatW(); 169 170 //float const alphabeta_sum = w1 * 0.25f; 171 Vec4 const alphabeta_sum = (x1 * half).SplatW(); // alphabeta_sum 172 173 // float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); 174 Vec4 const factor = Reciprocal( NegativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) ); 175 176 Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; 177 Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor; 178 179 // clamp to the grid 180 a = Min( one, Max( zero, a ) ); 181 b = Min( one, Max( zero, b ) ); 182 a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp; 183 b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp; 184 185 // compute the error (we skip the constant xxsum) 186 Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); 187 Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); 188 Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 ); 189 Vec4 e4 = MultiplyAdd( two, e3, e1 ); 190 191 // apply the metric to the error term 192 Vec4 e5 = e4 * m_metricSqr; 193 Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); 194 195 // keep the solution if it wins 196 if( CompareAnyLessThan( error, besterror ) ) 197 { 198 besterror = error; 199 beststart = a; 200 bestend = b; 201 b0 = c0; 202 b1 = c1; 203 } 204 205 x1 += m_weighted[c0+c1]; 206 } 207 208 x0 += m_weighted[c0]; 209 } 210 211 // save the block if necessary 212 if( CompareAnyLessThan( besterror, m_besterror ) ) 213 { 214 // compute indices from cluster sizes. 215 u8 bestindices[16]; 216 { 217 int i = 0; 218 for(; i < b0; i++) { 219 bestindices[i] = 0; 220 } 221 for(; i < b0+b1; i++) { 222 bestindices[i] = 2; 223 } 224 for(; i < count; i++) { 225 bestindices[i] = 1; 226 } 227 } 228 229 // remap the indices 230 u8 ordered[16]; 231 for( int i = 0; i < count; ++i ) 232 ordered[m_order[i]] = bestindices[i]; 233 234 m_colours->RemapIndices( ordered, bestindices ); 235 236 237 // save the block 238 WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block ); 239 240 // save the error 241 m_besterror = besterror; 242 } 243 } 244 Compress4(void * block)245 void WeightedClusterFit::Compress4( void* block ) 246 { 247 int const count = m_colours->GetCount(); 248 Vec4 const one = VEC4_CONST(1.0f); 249 Vec4 const zero = VEC4_CONST(0.0f); 250 Vec4 const half = VEC4_CONST(0.5f); 251 Vec4 const two = VEC4_CONST(2.0); 252 Vec4 const onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f ); 253 Vec4 const twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f ); 254 Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f ); 255 Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); 256 Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); 257 258 // declare variables 259 Vec4 beststart = VEC4_CONST( 0.0f ); 260 Vec4 bestend = VEC4_CONST( 0.0f ); 261 Vec4 besterror = VEC4_CONST( FLT_MAX ); 262 263 Vec4 x0 = zero; 264 int b0 = 0, b1 = 0, b2 = 0; 265 266 // check all possible clusters for this total order 267 for( int c0 = 0; c0 <= count; c0++) 268 { 269 Vec4 x1 = zero; 270 271 for( int c1 = 0; c1 <= count-c0; c1++) 272 { 273 Vec4 x2 = zero; 274 275 for( int c2 = 0; c2 <= count-c0-c1; c2++) 276 { 277 Vec4 const x3 = m_xsum - x2 - x1 - x0; 278 279 //Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f); 280 //float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f); 281 Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum 282 Vec4 const alpha2_sum = alphax_sum.SplatW(); 283 284 //Vec3 const betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f); 285 //float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f); 286 Vec4 const betax_sum = MultiplyAdd(x2, twothirds, MultiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum 287 Vec4 const beta2_sum = betax_sum.SplatW(); 288 289 //float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f); 290 Vec4 const alphabeta_sum = twonineths*( x1 + x2 ).SplatW(); // alphabeta_sum 291 292 // float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); 293 Vec4 const factor = Reciprocal( NegativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) ); 294 295 Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; 296 Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor; 297 298 // clamp to the grid 299 a = Min( one, Max( zero, a ) ); 300 b = Min( one, Max( zero, b ) ); 301 a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp; 302 b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp; 303 304 // compute the error (we skip the constant xxsum) 305 Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); 306 Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); 307 Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 ); 308 Vec4 e4 = MultiplyAdd( two, e3, e1 ); 309 310 // apply the metric to the error term 311 Vec4 e5 = e4 * m_metricSqr; 312 Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); 313 314 // keep the solution if it wins 315 if( CompareAnyLessThan( error, besterror ) ) 316 { 317 besterror = error; 318 beststart = a; 319 bestend = b; 320 b0 = c0; 321 b1 = c1; 322 b2 = c2; 323 } 324 325 x2 += m_weighted[c0+c1+c2]; 326 } 327 328 x1 += m_weighted[c0+c1]; 329 } 330 331 x0 += m_weighted[c0]; 332 } 333 334 // save the block if necessary 335 if( CompareAnyLessThan( besterror, m_besterror ) ) 336 { 337 // compute indices from cluster sizes. 338 u8 bestindices[16]; 339 { 340 int i = 0; 341 for(; i < b0; i++) { 342 bestindices[i] = 0; 343 } 344 for(; i < b0+b1; i++) { 345 bestindices[i] = 2; 346 } 347 for(; i < b0+b1+b2; i++) { 348 bestindices[i] = 3; 349 } 350 for(; i < count; i++) { 351 bestindices[i] = 1; 352 } 353 } 354 355 // remap the indices 356 u8 ordered[16]; 357 for( int i = 0; i < count; ++i ) 358 ordered[m_order[i]] = bestindices[i]; 359 360 m_colours->RemapIndices( ordered, bestindices ); 361 362 // save the block 363 WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block ); 364 365 // save the error 366 m_besterror = besterror; 367 } 368 } 369 370 #else 371 Compress3(void * block)372 void WeightedClusterFit::Compress3( void* block ) 373 { 374 int const count = m_colours->GetCount(); 375 Vec3 const one( 1.0f ); 376 Vec3 const zero( 0.0f ); 377 Vec3 const half( 0.5f ); 378 Vec3 const grid( 31.0f, 63.0f, 31.0f ); 379 Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); 380 381 // declare variables 382 Vec3 beststart( 0.0f ); 383 Vec3 bestend( 0.0f ); 384 float besterror = FLT_MAX; 385 386 Vec3 x0(0.0f); 387 float w0 = 0.0f; 388 389 int b0 = 0, b1 = 0; 390 391 // check all possible clusters for this total order 392 for( int c0 = 0; c0 <= count; c0++) 393 { 394 Vec3 x1(0.0f); 395 float w1 = 0.0f; 396 397 for( int c1 = 0; c1 <= count-c0; c1++) 398 { 399 float w2 = m_wsum - w0 - w1; 400 401 // These factors could be entirely precomputed. 402 float const alpha2_sum = w0 + w1 * 0.25f; 403 float const beta2_sum = w2 + w1 * 0.25f; 404 float const alphabeta_sum = w1 * 0.25f; 405 float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); 406 407 Vec3 const alphax_sum = x0 + x1 * 0.5f; 408 Vec3 const betax_sum = m_xsum - alphax_sum; 409 410 Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor; 411 Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor; 412 413 // clamp to the grid 414 a = Min( one, Max( zero, a ) ); 415 b = Min( one, Max( zero, b ) ); 416 a = Floor( grid*a + half )*gridrcp; 417 b = Floor( grid*b + half )*gridrcp; 418 419 // compute the error 420 Vec3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum ); 421 422 // apply the metric to the error term 423 float error = Dot( e1, m_metricSqr ); 424 425 // keep the solution if it wins 426 if( error < besterror ) 427 { 428 besterror = error; 429 beststart = a; 430 bestend = b; 431 b0 = c0; 432 b1 = c1; 433 } 434 435 x1 += m_weighted[c0+c1]; 436 w1 += m_weights[c0+c1]; 437 } 438 439 x0 += m_weighted[c0]; 440 w0 += m_weights[c0]; 441 } 442 443 // save the block if necessary 444 if( besterror < m_besterror ) 445 { 446 // compute indices from cluster sizes. 447 u8 bestindices[16]; 448 { 449 int i = 0; 450 for(; i < b0; i++) { 451 bestindices[i] = 0; 452 } 453 for(; i < b0+b1; i++) { 454 bestindices[i] = 2; 455 } 456 for(; i < count; i++) { 457 bestindices[i] = 1; 458 } 459 } 460 461 // remap the indices 462 u8 ordered[16]; 463 for( int i = 0; i < count; ++i ) 464 ordered[m_order[i]] = bestindices[i]; 465 466 m_colours->RemapIndices( ordered, bestindices ); 467 468 // save the block 469 WriteColourBlock3( beststart, bestend, bestindices, block ); 470 471 // save the error 472 m_besterror = besterror; 473 } 474 } 475 Compress4(void * block)476 void WeightedClusterFit::Compress4( void* block ) 477 { 478 int const count = m_colours->GetCount(); 479 Vec3 const one( 1.0f ); 480 Vec3 const zero( 0.0f ); 481 Vec3 const half( 0.5f ); 482 Vec3 const grid( 31.0f, 63.0f, 31.0f ); 483 Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f ); 484 485 // declare variables 486 Vec3 beststart( 0.0f ); 487 Vec3 bestend( 0.0f ); 488 float besterror = FLT_MAX; 489 490 Vec3 x0(0.0f); 491 float w0 = 0.0f; 492 int b0 = 0, b1 = 0, b2 = 0; 493 494 // check all possible clusters for this total order 495 for( int c0 = 0; c0 <= count; c0++) 496 { 497 Vec3 x1(0.0f); 498 float w1 = 0.0f; 499 500 for( int c1 = 0; c1 <= count-c0; c1++) 501 { 502 Vec3 x2(0.0f); 503 float w2 = 0.0f; 504 505 for( int c2 = 0; c2 <= count-c0-c1; c2++) 506 { 507 float w3 = m_wsum - w0 - w1 - w2; 508 509 float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f); 510 float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f); 511 float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f); 512 float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); 513 514 Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f); 515 Vec3 const betax_sum = m_xsum - alphax_sum; 516 517 Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor; 518 Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor; 519 520 // clamp to the grid 521 a = Min( one, Max( zero, a ) ); 522 b = Min( one, Max( zero, b ) ); 523 a = Floor( grid*a + half )*gridrcp; 524 b = Floor( grid*b + half )*gridrcp; 525 526 // compute the error 527 Vec3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum ); 528 529 // apply the metric to the error term 530 float error = Dot( e1, m_metricSqr ); 531 532 // keep the solution if it wins 533 if( error < besterror ) 534 { 535 besterror = error; 536 beststart = a; 537 bestend = b; 538 b0 = c0; 539 b1 = c1; 540 b2 = c2; 541 } 542 543 x2 += m_weighted[c0+c1+c2]; 544 w2 += m_weights[c0+c1+c2]; 545 } 546 547 x1 += m_weighted[c0+c1]; 548 w1 += m_weights[c0+c1]; 549 } 550 551 x0 += m_weighted[c0]; 552 w0 += m_weights[c0]; 553 } 554 555 // save the block if necessary 556 if( besterror < m_besterror ) 557 { 558 // compute indices from cluster sizes. 559 u8 bestindices[16]; 560 { 561 int i = 0; 562 for(; i < b0; i++) { 563 bestindices[i] = 0; 564 } 565 for(; i < b0+b1; i++) { 566 bestindices[i] = 2; 567 } 568 for(; i < b0+b1+b2; i++) { 569 bestindices[i] = 3; 570 } 571 for(; i < count; i++) { 572 bestindices[i] = 1; 573 } 574 } 575 576 // remap the indices 577 u8 ordered[16]; 578 for( int i = 0; i < count; ++i ) 579 ordered[m_order[i]] = bestindices[i]; 580 581 m_colours->RemapIndices( ordered, bestindices ); 582 583 // save the block 584 WriteColourBlock4( beststart, bestend, bestindices, block ); 585 586 // save the error 587 m_besterror = besterror; 588 } 589 } 590 591 #endif 592 593 } // namespace squish 594