1 /* -----------------------------------------------------------------------------
2 
3 	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
4 	Copyright (c) 2007 Ignacio Castano                   icastano@nvidia.com
5 
6 	Permission is hereby granted, free of charge, to any person obtaining
7 	a copy of this software and associated documentation files (the
8 	"Software"), to	deal in the Software without restriction, including
9 	without limitation the rights to use, copy, modify, merge, publish,
10 	distribute, sublicense, and/or sell copies of the Software, and to
11 	permit persons to whom the Software is furnished to do so, subject to
12 	the following conditions:
13 
14 	The above copyright notice and this permission notice shall be included
15 	in all copies or substantial portions of the Software.
16 
17 	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21 	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 
25    -------------------------------------------------------------------------- */
26 
27 #include "clusterfit.h"
28 #include "colourset.h"
29 #include "colourblock.h"
30 #include <cfloat>
31 
32 namespace squish {
33 
ClusterFit(ColourSet const * colours,int flags,float * metric)34 ClusterFit::ClusterFit( ColourSet const* colours, int flags, float* metric )
35   : ColourFit( colours, flags )
36 {
37 	// set the iteration count
38 	m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;
39 
40 	// initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
41 	if( metric )
42 		m_metric = Vec4( metric[0], metric[1], metric[2], 1.0f );
43 	else
44 		m_metric = VEC4_CONST( 1.0f );
45 
46 	// initialise the best error
47 	m_besterror = VEC4_CONST( FLT_MAX );
48 
49 	// cache some values
50 	int const count = m_colours->GetCount();
51 	Vec3 const* values = m_colours->GetPoints();
52 
53 	// get the covariance matrix
54 	Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
55 
56 	// compute the principle component
57 	m_principle = ComputePrincipleComponent( covariance );
58 }
59 
ConstructOrdering(Vec3 const & axis,int iteration)60 bool ClusterFit::ConstructOrdering( Vec3 const& axis, int iteration )
61 {
62 	// cache some values
63 	int const count = m_colours->GetCount();
64 	Vec3 const* values = m_colours->GetPoints();
65 
66 	// build the list of dot products
67 	float dps[16];
68 	u8* order = ( u8* )m_order + 16*iteration;
69 	for( int i = 0; i < count; ++i )
70 	{
71 		dps[i] = Dot( values[i], axis );
72 		order[i] = ( u8 )i;
73 	}
74 
75 	// stable sort using them
76 	for( int i = 0; i < count; ++i )
77 	{
78 		for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
79 		{
80 			std::swap( dps[j], dps[j - 1] );
81 			std::swap( order[j], order[j - 1] );
82 		}
83 	}
84 
85 	// check this ordering is unique
86 	for( int it = 0; it < iteration; ++it )
87 	{
88 		u8 const* prev = ( u8* )m_order + 16*it;
89 		bool same = true;
90 		for( int i = 0; i < count; ++i )
91 		{
92 			if( order[i] != prev[i] )
93 			{
94 				same = false;
95 				break;
96 			}
97 		}
98 		if( same )
99 			return false;
100 	}
101 
102 	// copy the ordering and weight all the points
103 	Vec3 const* unweighted = m_colours->GetPoints();
104 	float const* weights = m_colours->GetWeights();
105 	m_xsum_wsum = VEC4_CONST( 0.0f );
106 	for( int i = 0; i < count; ++i )
107 	{
108 		int j = order[i];
109 		Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), 1.0f );
110 		Vec4 w( weights[j] );
111 		Vec4 x = p*w;
112 		m_points_weights[i] = x;
113 		m_xsum_wsum += x;
114 	}
115 	return true;
116 }
117 
Compress3(void * block)118 void ClusterFit::Compress3( void* block )
119 {
120 	// declare variables
121 	int const count = m_colours->GetCount();
122 	Vec4 const two = VEC4_CONST( 2.0 );
123 	Vec4 const one = VEC4_CONST( 1.0f );
124 	Vec4 const half_half2( 0.5f, 0.5f, 0.5f, 0.25f );
125 	Vec4 const zero = VEC4_CONST( 0.0f );
126 	Vec4 const half = VEC4_CONST( 0.5f );
127 	Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
128 	Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
129 
130 	// prepare an ordering using the principle axis
131 	ConstructOrdering( m_principle, 0 );
132 
133 	// check all possible clusters and iterate on the total order
134 	Vec4 beststart = VEC4_CONST( 0.0f );
135 	Vec4 bestend = VEC4_CONST( 0.0f );
136 	Vec4 besterror = m_besterror;
137 	u8 bestindices[16];
138 	int bestiteration = 0;
139 	int besti = 0, bestj = 0;
140 
141 	// loop over iterations (we avoid the case that all points in first or last cluster)
142 	for( int iterationIndex = 0;; )
143 	{
144 		// first cluster [0,i) is at the start
145 		Vec4 part0 = VEC4_CONST( 0.0f );
146 		for( int i = 0; i < count; ++i )
147 		{
148 			// second cluster [i,j) is half along
149 			Vec4 part1 = ( i == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
150 			int jmin = ( i == 0 ) ? 1 : i;
151 			for( int j = jmin;; )
152 			{
153 				// last cluster [j,count) is at the end
154 				Vec4 part2 = m_xsum_wsum - part1 - part0;
155 
156 				// compute least squares terms directly
157 				Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 );
158 				Vec4 alpha2_sum = alphax_sum.SplatW();
159 
160 				Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 );
161 				Vec4 beta2_sum = betax_sum.SplatW();
162 
163 				Vec4 alphabeta_sum = ( part1*half_half2 ).SplatW();
164 
165 				// compute the least-squares optimal points
166 				Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
167 				Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
168 				Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
169 
170 				// clamp to the grid
171 				a = Min( one, Max( zero, a ) );
172 				b = Min( one, Max( zero, b ) );
173 				a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
174 				b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
175 
176 				// compute the error (we skip the constant xxsum)
177 				Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
178 				Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
179 				Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
180 				Vec4 e4 = MultiplyAdd( two, e3, e1 );
181 
182 				// apply the metric to the error term
183 				Vec4 e5 = e4*m_metric;
184 				Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
185 
186 				// keep the solution if it wins
187 				if( CompareAnyLessThan( error, besterror ) )
188 				{
189 					beststart = a;
190 					bestend = b;
191 					besti = i;
192 					bestj = j;
193 					besterror = error;
194 					bestiteration = iterationIndex;
195 				}
196 
197 				// advance
198 				if( j == count )
199 					break;
200 				part1 += m_points_weights[j];
201 				++j;
202 			}
203 
204 			// advance
205 			part0 += m_points_weights[i];
206 		}
207 
208 		// stop if we didn't improve in this iteration
209 		if( bestiteration != iterationIndex )
210 			break;
211 
212 		// advance if possible
213 		++iterationIndex;
214 		if( iterationIndex == m_iterationCount )
215 			break;
216 
217 		// stop if a new iteration is an ordering that has already been tried
218 		Vec3 axis = ( bestend - beststart ).GetVec3();
219 		if( !ConstructOrdering( axis, iterationIndex ) )
220 			break;
221 	}
222 
223 	// save the block if necessary
224 	if( CompareAnyLessThan( besterror, m_besterror ) )
225 	{
226 		// remap the indices
227 		u8 const* order = ( u8* )m_order + 16*bestiteration;
228 
229 		u8 unordered[16];
230 		for( int m = 0; m < besti; ++m )
231 			unordered[order[m]] = 0;
232 		for( int m = besti; m < bestj; ++m )
233 			unordered[order[m]] = 2;
234 		for( int m = bestj; m < count; ++m )
235 			unordered[order[m]] = 1;
236 
237 		m_colours->RemapIndices( unordered, bestindices );
238 
239 		// save the block
240 		WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
241 
242 		// save the error
243 		m_besterror = besterror;
244 	}
245 }
246 
Compress4(void * block)247 void ClusterFit::Compress4( void* block )
248 {
249 	// declare variables
250 	int const count = m_colours->GetCount();
251 	Vec4 const two = VEC4_CONST( 2.0f );
252 	Vec4 const one = VEC4_CONST( 1.0f );
253 	Vec4 const onethird_onethird2( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
254 	Vec4 const twothirds_twothirds2( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
255 	Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
256 	Vec4 const zero = VEC4_CONST( 0.0f );
257 	Vec4 const half = VEC4_CONST( 0.5f );
258 	Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
259 	Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
260 
261 	// prepare an ordering using the principle axis
262 	ConstructOrdering( m_principle, 0 );
263 
264 	// check all possible clusters and iterate on the total order
265 	Vec4 beststart = VEC4_CONST( 0.0f );
266 	Vec4 bestend = VEC4_CONST( 0.0f );
267 	Vec4 besterror = m_besterror;
268 	u8 bestindices[16];
269 	int bestiteration = 0;
270 	int besti = 0, bestj = 0, bestk = 0;
271 
272 	// loop over iterations (we avoid the case that all points in first or last cluster)
273 	for( int iterationIndex = 0;; )
274 	{
275 		// first cluster [0,i) is at the start
276 		Vec4 part0 = VEC4_CONST( 0.0f );
277 		for( int i = 0; i < count; ++i )
278 		{
279 			// second cluster [i,j) is one third along
280 			Vec4 part1 = VEC4_CONST( 0.0f );
281 			for( int j = i;; )
282 			{
283 				// third cluster [j,k) is two thirds along
284 				Vec4 part2 = ( j == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
285 				int kmin = ( j == 0 ) ? 1 : j;
286 				for( int k = kmin;; )
287 				{
288 					// last cluster [k,count) is at the end
289 					Vec4 part3 = m_xsum_wsum - part2 - part1 - part0;
290 
291 					// compute least squares terms directly
292 					Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) );
293 					Vec4 const alpha2_sum = alphax_sum.SplatW();
294 
295 					Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) );
296 					Vec4 const beta2_sum = betax_sum.SplatW();
297 
298 					Vec4 const alphabeta_sum = twonineths*( part1 + part2 ).SplatW();
299 
300 					// compute the least-squares optimal points
301 					Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
302 					Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
303 					Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
304 
305 					// clamp to the grid
306 					a = Min( one, Max( zero, a ) );
307 					b = Min( one, Max( zero, b ) );
308 					a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
309 					b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
310 
311 					// compute the error (we skip the constant xxsum)
312 					Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
313 					Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
314 					Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
315 					Vec4 e4 = MultiplyAdd( two, e3, e1 );
316 
317 					// apply the metric to the error term
318 					Vec4 e5 = e4*m_metric;
319 					Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
320 
321 					// keep the solution if it wins
322 					if( CompareAnyLessThan( error, besterror ) )
323 					{
324 						beststart = a;
325 						bestend = b;
326 						besterror = error;
327 						besti = i;
328 						bestj = j;
329 						bestk = k;
330 						bestiteration = iterationIndex;
331 					}
332 
333 					// advance
334 					if( k == count )
335 						break;
336 					part2 += m_points_weights[k];
337 					++k;
338 				}
339 
340 				// advance
341 				if( j == count )
342 					break;
343 				part1 += m_points_weights[j];
344 				++j;
345 			}
346 
347 			// advance
348 			part0 += m_points_weights[i];
349 		}
350 
351 		// stop if we didn't improve in this iteration
352 		if( bestiteration != iterationIndex )
353 			break;
354 
355 		// advance if possible
356 		++iterationIndex;
357 		if( iterationIndex == m_iterationCount )
358 			break;
359 
360 		// stop if a new iteration is an ordering that has already been tried
361 		Vec3 axis = ( bestend - beststart ).GetVec3();
362 		if( !ConstructOrdering( axis, iterationIndex ) )
363 			break;
364 	}
365 
366 	// save the block if necessary
367 	if( CompareAnyLessThan( besterror, m_besterror ) )
368 	{
369 		// remap the indices
370 		u8 const* order = ( u8* )m_order + 16*bestiteration;
371 
372 		u8 unordered[16];
373 		for( int m = 0; m < besti; ++m )
374 			unordered[order[m]] = 0;
375 		for( int m = besti; m < bestj; ++m )
376 			unordered[order[m]] = 2;
377 		for( int m = bestj; m < bestk; ++m )
378 			unordered[order[m]] = 3;
379 		for( int m = bestk; m < count; ++m )
380 			unordered[order[m]] = 1;
381 
382 		m_colours->RemapIndices( unordered, bestindices );
383 
384 		// save the block
385 		WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
386 
387 		// save the error
388 		m_besterror = besterror;
389 	}
390 }
391 
392 } // namespace squish
393