1 /* -----------------------------------------------------------------------------
2 
3 	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
4 	Copyright (c) 2007 Ignacio Castano                   icastano@nvidia.com
5 
6 	Permission is hereby granted, free of charge, to any person obtaining
7 	a copy of this software and associated documentation files (the
8 	"Software"), to	deal in the Software without restriction, including
9 	without limitation the rights to use, copy, modify, merge, publish,
10 	distribute, sublicense, and/or sell copies of the Software, and to
11 	permit persons to whom the Software is furnished to do so, subject to
12 	the following conditions:
13 
14 	The above copyright notice and this permission notice shall be included
15 	in all copies or substantial portions of the Software.
16 
17 	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21 	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 
25    -------------------------------------------------------------------------- */
26 
27 #include "clusterfit.h"
28 #include "colourset.h"
29 #include "colourblock.h"
30 #include <cfloat>
31 
32 namespace squish {
33 
ClusterFit(ColourSet const * colours,int flags)34 ClusterFit::ClusterFit( ColourSet const* colours, int flags )
35   : ColourFit( colours, flags )
36 {
37 	// set the iteration count
38 	m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;
39 
40 	// initialise the best error
41 	m_besterror = VEC4_CONST( FLT_MAX );
42 
43 	// initialise the metric
44 	bool perceptual = ( ( m_flags & kColourMetricPerceptual ) != 0 );
45 	if( perceptual )
46 		m_metric = Vec4( 0.2126f, 0.7152f, 0.0722f, 0.0f );
47 	else
48 		m_metric = VEC4_CONST( 1.0f );
49 
50 	// cache some values
51 	int const count = m_colours->GetCount();
52 	Vec3 const* values = m_colours->GetPoints();
53 
54 	// get the covariance matrix
55 	Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
56 
57 	// compute the principle component
58 	m_principle = ComputePrincipleComponent( covariance );
59 }
60 
ConstructOrdering(Vec3 const & axis,int iteration)61 bool ClusterFit::ConstructOrdering( Vec3 const& axis, int iteration )
62 {
63 	// cache some values
64 	int const count = m_colours->GetCount();
65 	Vec3 const* values = m_colours->GetPoints();
66 
67 	// build the list of dot products
68 	float dps[16];
69 	u8* order = ( u8* )m_order + 16*iteration;
70 	for( int i = 0; i < count; ++i )
71 	{
72 		dps[i] = Dot( values[i], axis );
73 		order[i] = ( u8 )i;
74 	}
75 
76 	// stable sort using them
77 	for( int i = 0; i < count; ++i )
78 	{
79 		for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
80 		{
81 			std::swap( dps[j], dps[j - 1] );
82 			std::swap( order[j], order[j - 1] );
83 		}
84 	}
85 
86 	// check this ordering is unique
87 	for( int it = 0; it < iteration; ++it )
88 	{
89 		u8 const* prev = ( u8* )m_order + 16*it;
90 		bool same = true;
91 		for( int i = 0; i < count; ++i )
92 		{
93 			if( order[i] != prev[i] )
94 			{
95 				same = false;
96 				break;
97 			}
98 		}
99 		if( same )
100 			return false;
101 	}
102 
103 	// copy the ordering and weight all the points
104 	Vec3 const* unweighted = m_colours->GetPoints();
105 	float const* weights = m_colours->GetWeights();
106 	m_xsum_wsum = VEC4_CONST( 0.0f );
107 	for( int i = 0; i < count; ++i )
108 	{
109 		int j = order[i];
110 		Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), 1.0f );
111 		Vec4 w( weights[j] );
112 		Vec4 x = p*w;
113 		m_points_weights[i] = x;
114 		m_xsum_wsum += x;
115 	}
116 	return true;
117 }
118 
Compress3(void * block)119 void ClusterFit::Compress3( void* block )
120 {
121 	// declare variables
122 	int const count = m_colours->GetCount();
123 	Vec4 const two = VEC4_CONST( 2.0 );
124 	Vec4 const one = VEC4_CONST( 1.0f );
125 	Vec4 const half_half2( 0.5f, 0.5f, 0.5f, 0.25f );
126 	Vec4 const zero = VEC4_CONST( 0.0f );
127 	Vec4 const half = VEC4_CONST( 0.5f );
128 	Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
129 	Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
130 
131 	// prepare an ordering using the principle axis
132 	ConstructOrdering( m_principle, 0 );
133 
134 	// check all possible clusters and iterate on the total order
135 	Vec4 beststart = VEC4_CONST( 0.0f );
136 	Vec4 bestend = VEC4_CONST( 0.0f );
137 	Vec4 besterror = m_besterror;
138 	u8 bestindices[16];
139 	int bestiteration = 0;
140 	int besti = 0, bestj = 0;
141 
142 	// loop over iterations (we avoid the case that all points in first or last cluster)
143 	for( int iterationIndex = 0;; )
144 	{
145 		// first cluster [0,i) is at the start
146 		Vec4 part0 = VEC4_CONST( 0.0f );
147 		for( int i = 0; i < count; ++i )
148 		{
149 			// second cluster [i,j) is half along
150 			Vec4 part1 = ( i == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
151 			int jmin = ( i == 0 ) ? 1 : i;
152 			for( int j = jmin;; )
153 			{
154 				// last cluster [j,count) is at the end
155 				Vec4 part2 = m_xsum_wsum - part1 - part0;
156 
157 				// compute least squares terms directly
158 				Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 );
159 				Vec4 alpha2_sum = alphax_sum.SplatW();
160 
161 				Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 );
162 				Vec4 beta2_sum = betax_sum.SplatW();
163 
164 				Vec4 alphabeta_sum = ( part1*half_half2 ).SplatW();
165 
166 				// compute the least-squares optimal points
167 				Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
168 				Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
169 				Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
170 
171 				// clamp to the grid
172 				a = Min( one, Max( zero, a ) );
173 				b = Min( one, Max( zero, b ) );
174 				a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
175 				b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
176 
177 				// compute the error (we skip the constant xxsum)
178 				Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
179 				Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
180 				Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
181 				Vec4 e4 = MultiplyAdd( two, e3, e1 );
182 
183 				// apply the metric to the error term
184 				Vec4 e5 = e4*m_metric;
185 				Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
186 
187 				// keep the solution if it wins
188 				if( CompareAnyLessThan( error, besterror ) )
189 				{
190 					beststart = a;
191 					bestend = b;
192 					besti = i;
193 					bestj = j;
194 					besterror = error;
195 					bestiteration = iterationIndex;
196 				}
197 
198 				// advance
199 				if( j == count )
200 					break;
201 				part1 += m_points_weights[j];
202 				++j;
203 			}
204 
205 			// advance
206 			part0 += m_points_weights[i];
207 		}
208 
209 		// stop if we didn't improve in this iteration
210 		if( bestiteration != iterationIndex )
211 			break;
212 
213 		// advance if possible
214 		++iterationIndex;
215 		if( iterationIndex == m_iterationCount )
216 			break;
217 
218 		// stop if a new iteration is an ordering that has already been tried
219 		Vec3 axis = ( bestend - beststart ).GetVec3();
220 		if( !ConstructOrdering( axis, iterationIndex ) )
221 			break;
222 	}
223 
224 	// save the block if necessary
225 	if( CompareAnyLessThan( besterror, m_besterror ) )
226 	{
227 		// remap the indices
228 		u8 const* order = ( u8* )m_order + 16*bestiteration;
229 
230 		u8 unordered[16];
231 		for( int m = 0; m < besti; ++m )
232 			unordered[order[m]] = 0;
233 		for( int m = besti; m < bestj; ++m )
234 			unordered[order[m]] = 2;
235 		for( int m = bestj; m < count; ++m )
236 			unordered[order[m]] = 1;
237 
238 		m_colours->RemapIndices( unordered, bestindices );
239 
240 		// save the block
241 		WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
242 
243 		// save the error
244 		m_besterror = besterror;
245 	}
246 }
247 
Compress4(void * block)248 void ClusterFit::Compress4( void* block )
249 {
250 	// declare variables
251 	int const count = m_colours->GetCount();
252 	Vec4 const two = VEC4_CONST( 2.0f );
253 	Vec4 const one = VEC4_CONST( 1.0f );
254 	Vec4 const onethird_onethird2( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
255 	Vec4 const twothirds_twothirds2( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
256 	Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
257 	Vec4 const zero = VEC4_CONST( 0.0f );
258 	Vec4 const half = VEC4_CONST( 0.5f );
259 	Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
260 	Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
261 
262 	// prepare an ordering using the principle axis
263 	ConstructOrdering( m_principle, 0 );
264 
265 	// check all possible clusters and iterate on the total order
266 	Vec4 beststart = VEC4_CONST( 0.0f );
267 	Vec4 bestend = VEC4_CONST( 0.0f );
268 	Vec4 besterror = m_besterror;
269 	u8 bestindices[16];
270 	int bestiteration = 0;
271 	int besti = 0, bestj = 0, bestk = 0;
272 
273 	// loop over iterations (we avoid the case that all points in first or last cluster)
274 	for( int iterationIndex = 0;; )
275 	{
276 		// first cluster [0,i) is at the start
277 		Vec4 part0 = VEC4_CONST( 0.0f );
278 		for( int i = 0; i < count; ++i )
279 		{
280 			// second cluster [i,j) is one third along
281 			Vec4 part1 = VEC4_CONST( 0.0f );
282 			for( int j = i;; )
283 			{
284 				// third cluster [j,k) is two thirds along
285 				Vec4 part2 = ( j == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
286 				int kmin = ( j == 0 ) ? 1 : j;
287 				for( int k = kmin;; )
288 				{
289 					// last cluster [k,count) is at the end
290 					Vec4 part3 = m_xsum_wsum - part2 - part1 - part0;
291 
292 					// compute least squares terms directly
293 					Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) );
294 					Vec4 const alpha2_sum = alphax_sum.SplatW();
295 
296 					Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) );
297 					Vec4 const beta2_sum = betax_sum.SplatW();
298 
299 					Vec4 const alphabeta_sum = twonineths*( part1 + part2 ).SplatW();
300 
301 					// compute the least-squares optimal points
302 					Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
303 					Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
304 					Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
305 
306 					// clamp to the grid
307 					a = Min( one, Max( zero, a ) );
308 					b = Min( one, Max( zero, b ) );
309 					a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
310 					b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
311 
312 					// compute the error (we skip the constant xxsum)
313 					Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
314 					Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
315 					Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
316 					Vec4 e4 = MultiplyAdd( two, e3, e1 );
317 
318 					// apply the metric to the error term
319 					Vec4 e5 = e4*m_metric;
320 					Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
321 
322 					// keep the solution if it wins
323 					if( CompareAnyLessThan( error, besterror ) )
324 					{
325 						beststart = a;
326 						bestend = b;
327 						besterror = error;
328 						besti = i;
329 						bestj = j;
330 						bestk = k;
331 						bestiteration = iterationIndex;
332 					}
333 
334 					// advance
335 					if( k == count )
336 						break;
337 					part2 += m_points_weights[k];
338 					++k;
339 				}
340 
341 				// advance
342 				if( j == count )
343 					break;
344 				part1 += m_points_weights[j];
345 				++j;
346 			}
347 
348 			// advance
349 			part0 += m_points_weights[i];
350 		}
351 
352 		// stop if we didn't improve in this iteration
353 		if( bestiteration != iterationIndex )
354 			break;
355 
356 		// advance if possible
357 		++iterationIndex;
358 		if( iterationIndex == m_iterationCount )
359 			break;
360 
361 		// stop if a new iteration is an ordering that has already been tried
362 		Vec3 axis = ( bestend - beststart ).GetVec3();
363 		if( !ConstructOrdering( axis, iterationIndex ) )
364 			break;
365 	}
366 
367 	// save the block if necessary
368 	if( CompareAnyLessThan( besterror, m_besterror ) )
369 	{
370 		// remap the indices
371 		u8 const* order = ( u8* )m_order + 16*bestiteration;
372 
373 		u8 unordered[16];
374 		for( int m = 0; m < besti; ++m )
375 			unordered[order[m]] = 0;
376 		for( int m = besti; m < bestj; ++m )
377 			unordered[order[m]] = 2;
378 		for( int m = bestj; m < bestk; ++m )
379 			unordered[order[m]] = 3;
380 		for( int m = bestk; m < count; ++m )
381 			unordered[order[m]] = 1;
382 
383 		m_colours->RemapIndices( unordered, bestindices );
384 
385 		// save the block
386 		WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
387 
388 		// save the error
389 		m_besterror = besterror;
390 	}
391 }
392 
393 } // namespace squish
394