1 /* -----------------------------------------------------------------------------
2
3 Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
4 Copyright (c) 2007 Ignacio Castano icastano@nvidia.com
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice shall be included
15 in all copies or substantial portions of the Software.
16
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
25 -------------------------------------------------------------------------- */
26
27 #include "clusterfit.h"
28 #include "colourset.h"
29 #include "colourblock.h"
30 #include <cfloat>
31
32 namespace squish {
33
ClusterFit(ColourSet const * colours,int flags)34 ClusterFit::ClusterFit( ColourSet const* colours, int flags )
35 : ColourFit( colours, flags )
36 {
37 // set the iteration count
38 m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;
39
40 // initialise the best error
41 m_besterror = VEC4_CONST( FLT_MAX );
42
43 // initialise the metric
44 bool perceptual = ( ( m_flags & kColourMetricPerceptual ) != 0 );
45 if( perceptual )
46 m_metric = Vec4( 0.2126f, 0.7152f, 0.0722f, 0.0f );
47 else
48 m_metric = VEC4_CONST( 1.0f );
49
50 // cache some values
51 int const count = m_colours->GetCount();
52 Vec3 const* values = m_colours->GetPoints();
53
54 // get the covariance matrix
55 Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
56
57 // compute the principle component
58 m_principle = ComputePrincipleComponent( covariance );
59 }
60
ConstructOrdering(Vec3 const & axis,int iteration)61 bool ClusterFit::ConstructOrdering( Vec3 const& axis, int iteration )
62 {
63 // cache some values
64 int const count = m_colours->GetCount();
65 Vec3 const* values = m_colours->GetPoints();
66
67 // build the list of dot products
68 float dps[16];
69 u8* order = ( u8* )m_order + 16*iteration;
70 for( int i = 0; i < count; ++i )
71 {
72 dps[i] = Dot( values[i], axis );
73 order[i] = ( u8 )i;
74 }
75
76 // stable sort using them
77 for( int i = 0; i < count; ++i )
78 {
79 for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
80 {
81 std::swap( dps[j], dps[j - 1] );
82 std::swap( order[j], order[j - 1] );
83 }
84 }
85
86 // check this ordering is unique
87 for( int it = 0; it < iteration; ++it )
88 {
89 u8 const* prev = ( u8* )m_order + 16*it;
90 bool same = true;
91 for( int i = 0; i < count; ++i )
92 {
93 if( order[i] != prev[i] )
94 {
95 same = false;
96 break;
97 }
98 }
99 if( same )
100 return false;
101 }
102
103 // copy the ordering and weight all the points
104 Vec3 const* unweighted = m_colours->GetPoints();
105 float const* weights = m_colours->GetWeights();
106 m_xsum_wsum = VEC4_CONST( 0.0f );
107 for( int i = 0; i < count; ++i )
108 {
109 int j = order[i];
110 Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), 1.0f );
111 Vec4 w( weights[j] );
112 Vec4 x = p*w;
113 m_points_weights[i] = x;
114 m_xsum_wsum += x;
115 }
116 return true;
117 }
118
Compress3(void * block)119 void ClusterFit::Compress3( void* block )
120 {
121 // declare variables
122 int const count = m_colours->GetCount();
123 Vec4 const two = VEC4_CONST( 2.0 );
124 Vec4 const one = VEC4_CONST( 1.0f );
125 Vec4 const half_half2( 0.5f, 0.5f, 0.5f, 0.25f );
126 Vec4 const zero = VEC4_CONST( 0.0f );
127 Vec4 const half = VEC4_CONST( 0.5f );
128 Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
129 Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
130
131 // prepare an ordering using the principle axis
132 ConstructOrdering( m_principle, 0 );
133
134 // check all possible clusters and iterate on the total order
135 Vec4 beststart = VEC4_CONST( 0.0f );
136 Vec4 bestend = VEC4_CONST( 0.0f );
137 Vec4 besterror = m_besterror;
138 u8 bestindices[16];
139 int bestiteration = 0;
140 int besti = 0, bestj = 0;
141
142 // loop over iterations (we avoid the case that all points in first or last cluster)
143 for( int iterationIndex = 0;; )
144 {
145 // first cluster [0,i) is at the start
146 Vec4 part0 = VEC4_CONST( 0.0f );
147 for( int i = 0; i < count; ++i )
148 {
149 // second cluster [i,j) is half along
150 Vec4 part1 = ( i == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
151 int jmin = ( i == 0 ) ? 1 : i;
152 for( int j = jmin;; )
153 {
154 // last cluster [j,count) is at the end
155 Vec4 part2 = m_xsum_wsum - part1 - part0;
156
157 // compute least squares terms directly
158 Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 );
159 Vec4 alpha2_sum = alphax_sum.SplatW();
160
161 Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 );
162 Vec4 beta2_sum = betax_sum.SplatW();
163
164 Vec4 alphabeta_sum = ( part1*half_half2 ).SplatW();
165
166 // compute the least-squares optimal points
167 Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
168 Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
169 Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
170
171 // clamp to the grid
172 a = Min( one, Max( zero, a ) );
173 b = Min( one, Max( zero, b ) );
174 a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
175 b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
176
177 // compute the error (we skip the constant xxsum)
178 Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
179 Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
180 Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
181 Vec4 e4 = MultiplyAdd( two, e3, e1 );
182
183 // apply the metric to the error term
184 Vec4 e5 = e4*m_metric;
185 Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
186
187 // keep the solution if it wins
188 if( CompareAnyLessThan( error, besterror ) )
189 {
190 beststart = a;
191 bestend = b;
192 besti = i;
193 bestj = j;
194 besterror = error;
195 bestiteration = iterationIndex;
196 }
197
198 // advance
199 if( j == count )
200 break;
201 part1 += m_points_weights[j];
202 ++j;
203 }
204
205 // advance
206 part0 += m_points_weights[i];
207 }
208
209 // stop if we didn't improve in this iteration
210 if( bestiteration != iterationIndex )
211 break;
212
213 // advance if possible
214 ++iterationIndex;
215 if( iterationIndex == m_iterationCount )
216 break;
217
218 // stop if a new iteration is an ordering that has already been tried
219 Vec3 axis = ( bestend - beststart ).GetVec3();
220 if( !ConstructOrdering( axis, iterationIndex ) )
221 break;
222 }
223
224 // save the block if necessary
225 if( CompareAnyLessThan( besterror, m_besterror ) )
226 {
227 // remap the indices
228 u8 const* order = ( u8* )m_order + 16*bestiteration;
229
230 u8 unordered[16];
231 for( int m = 0; m < besti; ++m )
232 unordered[order[m]] = 0;
233 for( int m = besti; m < bestj; ++m )
234 unordered[order[m]] = 2;
235 for( int m = bestj; m < count; ++m )
236 unordered[order[m]] = 1;
237
238 m_colours->RemapIndices( unordered, bestindices );
239
240 // save the block
241 WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
242
243 // save the error
244 m_besterror = besterror;
245 }
246 }
247
Compress4(void * block)248 void ClusterFit::Compress4( void* block )
249 {
250 // declare variables
251 int const count = m_colours->GetCount();
252 Vec4 const two = VEC4_CONST( 2.0f );
253 Vec4 const one = VEC4_CONST( 1.0f );
254 Vec4 const onethird_onethird2( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
255 Vec4 const twothirds_twothirds2( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
256 Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
257 Vec4 const zero = VEC4_CONST( 0.0f );
258 Vec4 const half = VEC4_CONST( 0.5f );
259 Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
260 Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
261
262 // prepare an ordering using the principle axis
263 ConstructOrdering( m_principle, 0 );
264
265 // check all possible clusters and iterate on the total order
266 Vec4 beststart = VEC4_CONST( 0.0f );
267 Vec4 bestend = VEC4_CONST( 0.0f );
268 Vec4 besterror = m_besterror;
269 u8 bestindices[16];
270 int bestiteration = 0;
271 int besti = 0, bestj = 0, bestk = 0;
272
273 // loop over iterations (we avoid the case that all points in first or last cluster)
274 for( int iterationIndex = 0;; )
275 {
276 // first cluster [0,i) is at the start
277 Vec4 part0 = VEC4_CONST( 0.0f );
278 for( int i = 0; i < count; ++i )
279 {
280 // second cluster [i,j) is one third along
281 Vec4 part1 = VEC4_CONST( 0.0f );
282 for( int j = i;; )
283 {
284 // third cluster [j,k) is two thirds along
285 Vec4 part2 = ( j == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
286 int kmin = ( j == 0 ) ? 1 : j;
287 for( int k = kmin;; )
288 {
289 // last cluster [k,count) is at the end
290 Vec4 part3 = m_xsum_wsum - part2 - part1 - part0;
291
292 // compute least squares terms directly
293 Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) );
294 Vec4 const alpha2_sum = alphax_sum.SplatW();
295
296 Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) );
297 Vec4 const beta2_sum = betax_sum.SplatW();
298
299 Vec4 const alphabeta_sum = twonineths*( part1 + part2 ).SplatW();
300
301 // compute the least-squares optimal points
302 Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
303 Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
304 Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
305
306 // clamp to the grid
307 a = Min( one, Max( zero, a ) );
308 b = Min( one, Max( zero, b ) );
309 a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
310 b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
311
312 // compute the error (we skip the constant xxsum)
313 Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
314 Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
315 Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
316 Vec4 e4 = MultiplyAdd( two, e3, e1 );
317
318 // apply the metric to the error term
319 Vec4 e5 = e4*m_metric;
320 Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
321
322 // keep the solution if it wins
323 if( CompareAnyLessThan( error, besterror ) )
324 {
325 beststart = a;
326 bestend = b;
327 besterror = error;
328 besti = i;
329 bestj = j;
330 bestk = k;
331 bestiteration = iterationIndex;
332 }
333
334 // advance
335 if( k == count )
336 break;
337 part2 += m_points_weights[k];
338 ++k;
339 }
340
341 // advance
342 if( j == count )
343 break;
344 part1 += m_points_weights[j];
345 ++j;
346 }
347
348 // advance
349 part0 += m_points_weights[i];
350 }
351
352 // stop if we didn't improve in this iteration
353 if( bestiteration != iterationIndex )
354 break;
355
356 // advance if possible
357 ++iterationIndex;
358 if( iterationIndex == m_iterationCount )
359 break;
360
361 // stop if a new iteration is an ordering that has already been tried
362 Vec3 axis = ( bestend - beststart ).GetVec3();
363 if( !ConstructOrdering( axis, iterationIndex ) )
364 break;
365 }
366
367 // save the block if necessary
368 if( CompareAnyLessThan( besterror, m_besterror ) )
369 {
370 // remap the indices
371 u8 const* order = ( u8* )m_order + 16*bestiteration;
372
373 u8 unordered[16];
374 for( int m = 0; m < besti; ++m )
375 unordered[order[m]] = 0;
376 for( int m = besti; m < bestj; ++m )
377 unordered[order[m]] = 2;
378 for( int m = bestj; m < bestk; ++m )
379 unordered[order[m]] = 3;
380 for( int m = bestk; m < count; ++m )
381 unordered[order[m]] = 1;
382
383 m_colours->RemapIndices( unordered, bestindices );
384
385 // save the block
386 WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
387
388 // save the error
389 m_besterror = besterror;
390 }
391 }
392
393 } // namespace squish
394