1 /* -----------------------------------------------------------------------------
2 
3 Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
4 Copyright (c) 2006 Ignacio Castano                      icastano@nvidia.com
5 
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to	deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13 
14 The above copyright notice and this permission notice shall be included
15 in all copies or substantial portions of the Software.
16 
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 
25 -------------------------------------------------------------------------- */
26 
27 #include "weightedclusterfit.h"
28 #include "colourset.h"
29 #include "colourblock.h"
30 #include <cfloat>
31 
32 
33 namespace squish {
34 
WeightedClusterFit()35 	WeightedClusterFit::WeightedClusterFit()
36 	{
37 	}
38 
SetColourSet(ColourSet const * colours,int flags)39 	void WeightedClusterFit::SetColourSet( ColourSet const* colours, int flags )
40 	{
41 		ColourFit::SetColourSet( colours, flags );
42 
43 		// initialise the best error
44 #if SQUISH_USE_SIMD
45 		m_besterror = VEC4_CONST( FLT_MAX );
46 		Vec3 metric = m_metric.GetVec3();
47 #else
48 		m_besterror = FLT_MAX;
49 		Vec3 metric = m_metric;
50 #endif
51 
52 		// cache some values
53 		int const count = m_colours->GetCount();
54 		Vec3 const* values = m_colours->GetPoints();
55 
56 		// get the covariance matrix
57 		Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights(), metric );
58 
59 		// compute the principle component
60 		Vec3 principle = ComputePrincipleComponent( covariance );
61 
62 		// build the list of values
63 		float dps[16];
64 		for( int i = 0; i < count; ++i )
65 		{
66 			dps[i] = Dot( values[i], principle );
67 			m_order[i] = i;
68 		}
69 
70 		// stable sort
71 		for( int i = 0; i < count; ++i )
72 		{
73 			for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
74 			{
75 				std::swap( dps[j], dps[j - 1] );
76 				std::swap( m_order[j], m_order[j - 1] );
77 			}
78 		}
79 
80 		// weight all the points
81 #if SQUISH_USE_SIMD
82 		Vec4 const* unweighted = m_colours->GetPointsSimd();
83 		Vec4 const* weights = m_colours->GetWeightsSimd();
84 		m_xxsum = VEC4_CONST( 0.0f );
85 		m_xsum = VEC4_CONST( 0.0f );
86 #else
87 		Vec3 const* unweighted = m_colours->GetPoints();
88 		float const* weights = m_colours->GetWeights();
89 		m_xxsum = Vec3( 0.0f );
90 		m_xsum = Vec3( 0.0f );
91 		m_wsum = 0.0f;
92 #endif
93 
94 		for( int i = 0; i < count; ++i )
95 		{
96 			int p = m_order[i];
97 			m_weighted[i] = weights[p] * unweighted[p];
98 			m_xxsum += m_weighted[i] * m_weighted[i];
99 			m_xsum += m_weighted[i];
100 #if !SQUISH_USE_SIMD
101 			m_weights[i] = weights[p];
102 			m_wsum += m_weights[i];
103 #endif
104 		}
105 	}
106 
107 
SetMetric(float r,float g,float b)108 	void WeightedClusterFit::SetMetric(float r, float g, float b)
109 	{
110 #if SQUISH_USE_SIMD
111 		m_metric = Vec4(r, g, b, 0);
112 #else
113 		m_metric = Vec3(r, g, b);
114 #endif
115 		m_metricSqr = m_metric * m_metric;
116 	}
117 
GetBestError() const118 	float WeightedClusterFit::GetBestError() const
119 	{
120 #if SQUISH_USE_SIMD
121 		Vec4 x = m_xxsum * m_metricSqr;
122 		Vec4 error = m_besterror + x.SplatX() + x.SplatY() + x.SplatZ();
123 		return error.GetVec3().X();
124 #else
125 		return m_besterror + Dot(m_xxsum, m_metricSqr);
126 #endif
127 
128 	}
129 
130 #if SQUISH_USE_SIMD
131 
Compress3(void * block)132 	void WeightedClusterFit::Compress3( void* block )
133 	{
134 		int const count = m_colours->GetCount();
135 		Vec4 const one = VEC4_CONST(1.0f);
136 		Vec4 const zero = VEC4_CONST(0.0f);
137 		Vec4 const half(0.5f, 0.5f, 0.5f, 0.25f);
138 		Vec4 const two = VEC4_CONST(2.0);
139 		Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
140 		Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
141 
142 		// declare variables
143 		Vec4 beststart = VEC4_CONST( 0.0f );
144 		Vec4 bestend = VEC4_CONST( 0.0f );
145 		Vec4 besterror = VEC4_CONST( FLT_MAX );
146 
147 		Vec4 x0 = zero;
148 
149 		int b0 = 0, b1 = 0;
150 
151 		// check all possible clusters for this total order
152 		for( int c0 = 0; c0 <= count; c0++)
153 		{
154 			Vec4 x1 = zero;
155 
156 			for( int c1 = 0; c1 <= count-c0; c1++)
157 			{
158 				Vec4 const x2 = m_xsum - x1 - x0;
159 
160 				//Vec3 const alphax_sum = x0 + x1 * 0.5f;
161 				//float const alpha2_sum = w0 + w1 * 0.25f;
162 				Vec4 const alphax_sum = MultiplyAdd(x1, half, x0); // alphax_sum, alpha2_sum
163 				Vec4 const alpha2_sum = alphax_sum.SplatW();
164 
165 				//Vec3 const betax_sum = x2 + x1 * 0.5f;
166 				//float const beta2_sum = w2 + w1 * 0.25f;
167 				Vec4 const betax_sum = MultiplyAdd(x1, half, x2); // betax_sum, beta2_sum
168 				Vec4 const beta2_sum = betax_sum.SplatW();
169 
170 				//float const alphabeta_sum = w1 * 0.25f;
171 				Vec4 const alphabeta_sum = (x1 * half).SplatW(); // alphabeta_sum
172 
173 				// float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
174 				Vec4 const factor = Reciprocal( NegativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
175 
176 				Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
177 				Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
178 
179 				// clamp to the grid
180 				a = Min( one, Max( zero, a ) );
181 				b = Min( one, Max( zero, b ) );
182 				a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
183 				b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
184 
185 				// compute the error (we skip the constant xxsum)
186 				Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
187 				Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
188 				Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
189 				Vec4 e4 = MultiplyAdd( two, e3, e1 );
190 
191 				// apply the metric to the error term
192 				Vec4 e5 = e4 * m_metricSqr;
193 				Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
194 
195 				// keep the solution if it wins
196 				if( CompareAnyLessThan( error, besterror ) )
197 				{
198 					besterror = error;
199 					beststart = a;
200 					bestend = b;
201 					b0 = c0;
202 					b1 = c1;
203 				}
204 
205 				x1 += m_weighted[c0+c1];
206 			}
207 
208 			x0 += m_weighted[c0];
209 		}
210 
211 		// save the block if necessary
212 		if( CompareAnyLessThan( besterror, m_besterror ) )
213 		{
214 			// compute indices from cluster sizes.
215 			u8 bestindices[16];
216 			{
217 				int i = 0;
218 				for(; i < b0; i++) {
219 					bestindices[i] = 0;
220 				}
221 				for(; i < b0+b1; i++) {
222 					bestindices[i] = 2;
223 				}
224 				for(; i < count; i++) {
225 					bestindices[i] = 1;
226 				}
227 			}
228 
229 			// remap the indices
230 			u8 ordered[16];
231 			for( int i = 0; i < count; ++i )
232 				ordered[m_order[i]] = bestindices[i];
233 
234 			m_colours->RemapIndices( ordered, bestindices );
235 
236 
237 			// save the block
238 			WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
239 
240 			// save the error
241 			m_besterror = besterror;
242 		}
243 	}
244 
Compress4(void * block)245 	void WeightedClusterFit::Compress4( void* block )
246 	{
247 		int const count = m_colours->GetCount();
248 		Vec4 const one = VEC4_CONST(1.0f);
249 		Vec4 const zero = VEC4_CONST(0.0f);
250 		Vec4 const half = VEC4_CONST(0.5f);
251 		Vec4 const two = VEC4_CONST(2.0);
252 		Vec4 const onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
253 		Vec4 const twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
254 		Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
255 		Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
256 		Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
257 
258 		// declare variables
259 		Vec4 beststart = VEC4_CONST( 0.0f );
260 		Vec4 bestend = VEC4_CONST( 0.0f );
261 		Vec4 besterror = VEC4_CONST( FLT_MAX );
262 
263 		Vec4 x0 = zero;
264 		int b0 = 0, b1 = 0, b2 = 0;
265 
266 		// check all possible clusters for this total order
267 		for( int c0 = 0; c0 <= count; c0++)
268 		{
269 			Vec4 x1 = zero;
270 
271 			for( int c1 = 0; c1 <= count-c0; c1++)
272 			{
273 				Vec4 x2 = zero;
274 
275 				for( int c2 = 0; c2 <= count-c0-c1; c2++)
276 				{
277 					Vec4 const x3 = m_xsum - x2 - x1 - x0;
278 
279 					//Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
280 					//float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
281 					Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum
282 					Vec4 const alpha2_sum = alphax_sum.SplatW();
283 
284 					//Vec3 const betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f);
285 					//float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
286 					Vec4 const betax_sum = MultiplyAdd(x2, twothirds, MultiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum
287 					Vec4 const beta2_sum = betax_sum.SplatW();
288 
289 					//float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
290 					Vec4 const alphabeta_sum = twonineths*( x1 + x2 ).SplatW(); // alphabeta_sum
291 
292 					// float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
293 					Vec4 const factor = Reciprocal( NegativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
294 
295 					Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
296 					Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
297 
298 					// clamp to the grid
299 					a = Min( one, Max( zero, a ) );
300 					b = Min( one, Max( zero, b ) );
301 					a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
302 					b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
303 
304 					// compute the error (we skip the constant xxsum)
305 					Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
306 					Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
307 					Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
308 					Vec4 e4 = MultiplyAdd( two, e3, e1 );
309 
310 					// apply the metric to the error term
311 					Vec4 e5 = e4 * m_metricSqr;
312 					Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
313 
314 					// keep the solution if it wins
315 					if( CompareAnyLessThan( error, besterror ) )
316 					{
317 						besterror = error;
318 						beststart = a;
319 						bestend = b;
320 						b0 = c0;
321 						b1 = c1;
322 						b2 = c2;
323 					}
324 
325 					x2 += m_weighted[c0+c1+c2];
326 				}
327 
328 				x1 += m_weighted[c0+c1];
329 			}
330 
331 			x0 += m_weighted[c0];
332 		}
333 
334 		// save the block if necessary
335 		if( CompareAnyLessThan( besterror, m_besterror ) )
336 		{
337 			// compute indices from cluster sizes.
338 			u8 bestindices[16];
339 			{
340 				int i = 0;
341 				for(; i < b0; i++) {
342 					bestindices[i] = 0;
343 				}
344 				for(; i < b0+b1; i++) {
345 					bestindices[i] = 2;
346 				}
347 				for(; i < b0+b1+b2; i++) {
348 					bestindices[i] = 3;
349 				}
350 				for(; i < count; i++) {
351 					bestindices[i] = 1;
352 				}
353 			}
354 
355 			// remap the indices
356 			u8 ordered[16];
357 			for( int i = 0; i < count; ++i )
358 				ordered[m_order[i]] = bestindices[i];
359 
360 			m_colours->RemapIndices( ordered, bestindices );
361 
362 			// save the block
363 			WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
364 
365 			// save the error
366 			m_besterror = besterror;
367 		}
368 	}
369 
370 #else
371 
Compress3(void * block)372 	void WeightedClusterFit::Compress3( void* block )
373 	{
374 		int const count = m_colours->GetCount();
375 		Vec3 const one( 1.0f );
376 		Vec3 const zero( 0.0f );
377 		Vec3 const half( 0.5f );
378 		Vec3 const grid( 31.0f, 63.0f, 31.0f );
379 		Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
380 
381 		// declare variables
382 		Vec3 beststart( 0.0f );
383 		Vec3 bestend( 0.0f );
384 		float besterror = FLT_MAX;
385 
386 		Vec3 x0(0.0f);
387 		float w0 = 0.0f;
388 
389 		int b0 = 0, b1 = 0;
390 
391 		// check all possible clusters for this total order
392 		for( int c0 = 0; c0 <= count; c0++)
393 		{
394 			Vec3 x1(0.0f);
395 			float w1 = 0.0f;
396 
397 			for( int c1 = 0; c1 <= count-c0; c1++)
398 			{
399 				float w2 = m_wsum - w0 - w1;
400 
401 				// These factors could be entirely precomputed.
402 				float const alpha2_sum = w0 + w1 * 0.25f;
403 				float const beta2_sum = w2 + w1 * 0.25f;
404 				float const alphabeta_sum = w1 * 0.25f;
405 				float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
406 
407 				Vec3 const alphax_sum = x0 + x1 * 0.5f;
408 				Vec3 const betax_sum = m_xsum - alphax_sum;
409 
410 				Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor;
411 				Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor;
412 
413 				// clamp to the grid
414 				a = Min( one, Max( zero, a ) );
415 				b = Min( one, Max( zero, b ) );
416 				a = Floor( grid*a + half )*gridrcp;
417 				b = Floor( grid*b + half )*gridrcp;
418 
419 				// compute the error
420 				Vec3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
421 
422 				// apply the metric to the error term
423 				float error = Dot( e1, m_metricSqr );
424 
425 				// keep the solution if it wins
426 				if( error < besterror )
427 				{
428 					besterror = error;
429 					beststart = a;
430 					bestend = b;
431 					b0 = c0;
432 					b1 = c1;
433 				}
434 
435 				x1 += m_weighted[c0+c1];
436 				w1 += m_weights[c0+c1];
437 			}
438 
439 			x0 += m_weighted[c0];
440 			w0 += m_weights[c0];
441 		}
442 
443 		// save the block if necessary
444 		if( besterror < m_besterror )
445 		{
446 			// compute indices from cluster sizes.
447 			u8 bestindices[16];
448 			{
449 				int i = 0;
450 				for(; i < b0; i++) {
451 					bestindices[i] = 0;
452 				}
453 				for(; i < b0+b1; i++) {
454 					bestindices[i] = 2;
455 				}
456 				for(; i < count; i++) {
457 					bestindices[i] = 1;
458 				}
459 			}
460 
461 			// remap the indices
462 			u8 ordered[16];
463 			for( int i = 0; i < count; ++i )
464 				ordered[m_order[i]] = bestindices[i];
465 
466 			m_colours->RemapIndices( ordered, bestindices );
467 
468 			// save the block
469 			WriteColourBlock3( beststart, bestend, bestindices, block );
470 
471 			// save the error
472 			m_besterror = besterror;
473 		}
474 	}
475 
Compress4(void * block)476 	void WeightedClusterFit::Compress4( void* block )
477 	{
478 		int const count = m_colours->GetCount();
479 		Vec3 const one( 1.0f );
480 		Vec3 const zero( 0.0f );
481 		Vec3 const half( 0.5f );
482 		Vec3 const grid( 31.0f, 63.0f, 31.0f );
483 		Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
484 
485 		// declare variables
486 		Vec3 beststart( 0.0f );
487 		Vec3 bestend( 0.0f );
488 		float besterror = FLT_MAX;
489 
490 		Vec3 x0(0.0f);
491 		float w0 = 0.0f;
492 		int b0 = 0, b1 = 0, b2 = 0;
493 
494 		// check all possible clusters for this total order
495 		for( int c0 = 0; c0 <= count; c0++)
496 		{
497 			Vec3 x1(0.0f);
498 			float w1 = 0.0f;
499 
500 			for( int c1 = 0; c1 <= count-c0; c1++)
501 			{
502 				Vec3 x2(0.0f);
503 				float w2 = 0.0f;
504 
505 				for( int c2 = 0; c2 <= count-c0-c1; c2++)
506 				{
507 					float w3 = m_wsum - w0 - w1 - w2;
508 
509 					float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
510 					float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
511 					float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
512 					float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
513 
514 					Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
515 					Vec3 const betax_sum = m_xsum - alphax_sum;
516 
517 					Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
518 					Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
519 
520 					// clamp to the grid
521 					a = Min( one, Max( zero, a ) );
522 					b = Min( one, Max( zero, b ) );
523 					a = Floor( grid*a + half )*gridrcp;
524 					b = Floor( grid*b + half )*gridrcp;
525 
526 					// compute the error
527 					Vec3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
528 
529 					// apply the metric to the error term
530 					float error = Dot( e1, m_metricSqr );
531 
532 					// keep the solution if it wins
533 					if( error < besterror )
534 					{
535 						besterror = error;
536 						beststart = a;
537 						bestend = b;
538 						b0 = c0;
539 						b1 = c1;
540 						b2 = c2;
541 					}
542 
543 					x2 += m_weighted[c0+c1+c2];
544 					w2 += m_weights[c0+c1+c2];
545 				}
546 
547 				x1 += m_weighted[c0+c1];
548 				w1 += m_weights[c0+c1];
549 			}
550 
551 			x0 += m_weighted[c0];
552 			w0 += m_weights[c0];
553 		}
554 
555 		// save the block if necessary
556 		if( besterror < m_besterror )
557 		{
558 			// compute indices from cluster sizes.
559 			u8 bestindices[16];
560 			{
561 				int i = 0;
562 				for(; i < b0; i++) {
563 					bestindices[i] = 0;
564 				}
565 				for(; i < b0+b1; i++) {
566 					bestindices[i] = 2;
567 				}
568 				for(; i < b0+b1+b2; i++) {
569 					bestindices[i] = 3;
570 				}
571 				for(; i < count; i++) {
572 					bestindices[i] = 1;
573 				}
574 			}
575 
576 			// remap the indices
577 			u8 ordered[16];
578 			for( int i = 0; i < count; ++i )
579 				ordered[m_order[i]] = bestindices[i];
580 
581 			m_colours->RemapIndices( ordered, bestindices );
582 
583 			// save the block
584 			WriteColourBlock4( beststart, bestend, bestindices, block );
585 
586 			// save the error
587 			m_besterror = besterror;
588 		}
589 	}
590 
591 #endif
592 
593 } // namespace squish
594