1 /* -----------------------------------------------------------------------------
2
3 Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
4
5 Permission is hereby granted, free of charge, to any person obtaining
6 a copy of this software and associated documentation files (the
7 "Software"), to deal in the Software without restriction, including
8 without limitation the rights to use, copy, modify, merge, publish,
9 distribute, sublicense, and/or sell copies of the Software, and to
10 permit persons to whom the Software is furnished to do so, subject to
11 the following conditions:
12
13 The above copyright notice and this permission notice shall be included
14 in all copies or substantial portions of the Software.
15
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 -------------------------------------------------------------------------- */
25
26 #include <string.h>
27 #include "squish.h"
28 #include "colourset.h"
29 #include "maths.h"
30 #include "rangefit.h"
31 #include "clusterfit.h"
32 #include "colourblock.h"
33 #include "alpha.h"
34 #include "singlecolourfit.h"
35
36 namespace squish {
37
FixFlags(int flags)38 static int FixFlags( int flags )
39 {
40 // grab the flag bits
41 int method = flags & ( kDxt1 | kDxt3 | kDxt5 | kBc4 | kBc5 );
42 int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit );
43 int extra = flags & kWeightColourByAlpha;
44
45 // set defaults
46 if ( method != kDxt3
47 && method != kDxt5
48 && method != kBc4
49 && method != kBc5 )
50 {
51 method = kDxt1;
52 }
53 if( fit != kColourRangeFit && fit != kColourIterativeClusterFit )
54 fit = kColourClusterFit;
55
56 // done
57 return method | fit | extra;
58 }
59
CompressMasked(u8 const * rgba,int mask,void * block,int flags,float * metric)60 void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric )
61 {
62 // fix any bad flags
63 flags = FixFlags( flags );
64
65 if ( ( flags & ( kBc4 | kBc5 ) ) != 0 )
66 {
67 u8 alpha[16*4];
68 for( int i = 0; i < 16; ++i )
69 {
70 alpha[i*4 + 3] = rgba[i*4 + 0]; // copy R to A
71 }
72
73 u8* rBlock = reinterpret_cast< u8* >( block );
74 CompressAlphaDxt5( alpha, mask, rBlock );
75
76 if ( ( flags & ( kBc5 ) ) != 0 )
77 {
78 for( int i = 0; i < 16; ++i )
79 {
80 alpha[i*4 + 3] = rgba[i*4 + 1]; // copy G to A
81 }
82
83 u8* gBlock = reinterpret_cast< u8* >( block ) + 8;
84 CompressAlphaDxt5( alpha, mask, gBlock );
85 }
86
87 return;
88 }
89
90 // get the block locations
91 void* colourBlock = block;
92 void* alphaBlock = block;
93 if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
94 colourBlock = reinterpret_cast< u8* >( block ) + 8;
95
96 // create the minimal point set
97 ColourSet colours( rgba, mask, flags );
98
99 // check the compression type and compress colour
100 if( colours.GetCount() == 1 )
101 {
102 // always do a single colour fit
103 SingleColourFit fit( &colours, flags );
104 fit.Compress( colourBlock );
105 }
106 else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 )
107 {
108 // do a range fit
109 RangeFit fit( &colours, flags, metric );
110 fit.Compress( colourBlock );
111 }
112 else
113 {
114 // default to a cluster fit (could be iterative or not)
115 ClusterFit fit( &colours, flags, metric );
116 fit.Compress( colourBlock );
117 }
118
119 // compress alpha separately if necessary
120 if( ( flags & kDxt3 ) != 0 )
121 CompressAlphaDxt3( rgba, mask, alphaBlock );
122 else if( ( flags & kDxt5 ) != 0 )
123 CompressAlphaDxt5( rgba, mask, alphaBlock );
124 }
125
Decompress(u8 * rgba,void const * block,int flags)126 void Decompress( u8* rgba, void const* block, int flags )
127 {
128 // fix any bad flags
129 flags = FixFlags( flags );
130
131 // get the block locations
132 void const* colourBlock = block;
133 void const* alphaBlock = block;
134 if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
135 colourBlock = reinterpret_cast< u8 const* >( block ) + 8;
136
137 // decompress colour
138 DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
139
140 // decompress alpha separately if necessary
141 if( ( flags & kDxt3 ) != 0 )
142 DecompressAlphaDxt3( rgba, alphaBlock );
143 else if( ( flags & kDxt5 ) != 0 )
144 DecompressAlphaDxt5( rgba, alphaBlock );
145 }
146
GetStorageRequirements(int width,int height,int flags)147 int GetStorageRequirements( int width, int height, int flags )
148 {
149 // fix any bad flags
150 flags = FixFlags( flags );
151
152 // compute the storage requirements
153 int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 );
154 int blocksize = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
155 return blockcount*blocksize;
156 }
157
CopyRGBA(u8 const * source,u8 * dest,int flags)158 void CopyRGBA( u8 const* source, u8* dest, int flags )
159 {
160 if (flags & kSourceBGRA)
161 {
162 // convert from bgra to rgba
163 dest[0] = source[2];
164 dest[1] = source[1];
165 dest[2] = source[0];
166 dest[3] = source[3];
167 }
168 else
169 {
170 for( int i = 0; i < 4; ++i )
171 *dest++ = *source++;
172 }
173 }
174
CompressImage(u8 const * rgba,int width,int height,int pitch,void * blocks,int flags,float * metric)175 void CompressImage( u8 const* rgba, int width, int height, int pitch, void* blocks, int flags, float* metric )
176 {
177 // fix any bad flags
178 flags = FixFlags( flags );
179
180 // loop over blocks
181 #ifdef SQUISH_USE_OPENMP
182 # pragma omp parallel for
183 #endif
184 for( int y = 0; y < height; y += 4 )
185 {
186 // initialise the block output
187 u8* targetBlock = reinterpret_cast< u8* >( blocks );
188 int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
189 targetBlock += ( (y / 4) * ( (width + 3) / 4) ) * bytesPerBlock;
190
191 for( int x = 0; x < width; x += 4 )
192 {
193 // build the 4x4 block of pixels
194 u8 sourceRgba[16*4];
195 u8* targetPixel = sourceRgba;
196 int mask = 0;
197 for( int py = 0; py < 4; ++py )
198 {
199 for( int px = 0; px < 4; ++px )
200 {
201 // get the source pixel in the image
202 int sx = x + px;
203 int sy = y + py;
204
205 // enable if we're in the image
206 if( sx < width && sy < height )
207 {
208 // copy the rgba value
209 u8 const* sourcePixel = rgba + pitch*sy + 4*sx;
210 CopyRGBA(sourcePixel, targetPixel, flags);
211 // enable this pixel
212 mask |= ( 1 << ( 4*py + px ) );
213 }
214
215 // advance to the next pixel
216 targetPixel += 4;
217 }
218 }
219
220 // compress it into the output
221 CompressMasked( sourceRgba, mask, targetBlock, flags, metric );
222
223 // advance
224 targetBlock += bytesPerBlock;
225 }
226 }
227 }
228
CompressImage(u8 const * rgba,int width,int height,void * blocks,int flags,float * metric)229 void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric )
230 {
231 CompressImage(rgba, width, height, width*4, blocks, flags, metric);
232 }
233
DecompressImage(u8 * rgba,int width,int height,int pitch,void const * blocks,int flags)234 void DecompressImage( u8* rgba, int width, int height, int pitch, void const* blocks, int flags )
235 {
236 // fix any bad flags
237 flags = FixFlags( flags );
238
239 // loop over blocks
240 #ifdef SQUISH_USE_OPENMP
241 # pragma omp parallel for
242 #endif
243 for( int y = 0; y < height; y += 4 )
244 {
245 // initialise the block input
246 u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
247 int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
248 sourceBlock += ( (y / 4) * ( (width + 3) / 4) ) * bytesPerBlock;
249
250 for( int x = 0; x < width; x += 4 )
251 {
252 // decompress the block
253 u8 targetRgba[4*16];
254 Decompress( targetRgba, sourceBlock, flags );
255
256 // write the decompressed pixels to the correct image locations
257 u8 const* sourcePixel = targetRgba;
258 for( int py = 0; py < 4; ++py )
259 {
260 for( int px = 0; px < 4; ++px )
261 {
262 // get the target location
263 int sx = x + px;
264 int sy = y + py;
265
266 // write if we're in the image
267 if( sx < width && sy < height )
268 {
269 // copy the rgba value
270 u8* targetPixel = rgba + pitch*sy + 4*sx;
271 CopyRGBA(sourcePixel, targetPixel, flags);
272 }
273
274 // advance to the next pixel
275 sourcePixel += 4;
276 }
277 }
278
279 // advance
280 sourceBlock += bytesPerBlock;
281 }
282 }
283 }
284
DecompressImage(u8 * rgba,int width,int height,void const * blocks,int flags)285 void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags )
286 {
287 DecompressImage( rgba, width, height, width*4, blocks, flags );
288 }
289
ErrorSq(double x,double y)290 static double ErrorSq(double x, double y)
291 {
292 return (x - y) * (x - y);
293 }
294
ComputeBlockWMSE(u8 const * original,u8 const * compressed,unsigned int w,unsigned int h,double & cmse,double & amse)295 static void ComputeBlockWMSE(u8 const *original, u8 const *compressed, unsigned int w, unsigned int h, double &cmse, double &amse)
296 {
297 // Computes the MSE for the block and weights it by the variance of the original block.
298 // If the variance of the original block is less than 4 (i.e. a standard deviation of 1 per channel)
299 // then the block is close to being a single colour. Quantisation errors in single colour blocks
300 // are easier to see than similar errors in blocks that contain more colours, particularly when there
301 // are many such blocks in a large area (eg a blue sky background) as they cause banding. Given that
302 // banding is easier to see than small errors in "complex" blocks, we weight the errors by a factor
303 // of 5. This implies that images with large, single colour areas will have a higher potential WMSE
304 // than images with lots of detail.
305
306 cmse = amse = 0;
307 unsigned int sum_p[4]; // per channel sum of pixels
308 unsigned int sum_p2[4]; // per channel sum of pixels squared
309 memset(sum_p, 0, sizeof(sum_p));
310 memset(sum_p2, 0, sizeof(sum_p2));
311 for( unsigned int py = 0; py < 4; ++py )
312 {
313 for( unsigned int px = 0; px < 4; ++px )
314 {
315 if( px < w && py < h )
316 {
317 double pixelCMSE = 0;
318 for( int i = 0; i < 3; ++i )
319 {
320 pixelCMSE += ErrorSq(original[i], compressed[i]);
321 sum_p[i] += original[i];
322 sum_p2[i] += (unsigned int)original[i]*original[i];
323 }
324 if( original[3] == 0 && compressed[3] == 0 )
325 pixelCMSE = 0; // transparent in both, so colour is inconsequential
326 amse += ErrorSq(original[3], compressed[3]);
327 cmse += pixelCMSE;
328 sum_p[3] += original[3];
329 sum_p2[3] += (unsigned int)original[3]*original[3];
330 }
331 original += 4;
332 compressed += 4;
333 }
334 }
335 unsigned int variance = 0;
336 for( int i = 0; i < 4; ++i )
337 variance += w*h*sum_p2[i] - sum_p[i]*sum_p[i];
338 if( variance < 4 * w * w * h * h )
339 {
340 amse *= 5;
341 cmse *= 5;
342 }
343 }
344
ComputeMSE(u8 const * rgba,int width,int height,int pitch,u8 const * dxt,int flags,double & colourMSE,double & alphaMSE)345 void ComputeMSE( u8 const *rgba, int width, int height, int pitch, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
346 {
347 // fix any bad flags
348 flags = FixFlags( flags );
349 colourMSE = alphaMSE = 0;
350
351 // initialise the block input
352 squish::u8 const* sourceBlock = dxt;
353 int bytesPerBlock = ( ( flags & squish::kDxt1 ) != 0 ) ? 8 : 16;
354
355 // loop over blocks
356 for( int y = 0; y < height; y += 4 )
357 {
358 for( int x = 0; x < width; x += 4 )
359 {
360 // decompress the block
361 u8 targetRgba[4*16];
362 Decompress( targetRgba, sourceBlock, flags );
363 u8 const* sourcePixel = targetRgba;
364
365 // copy across to a similar pixel block
366 u8 originalRgba[4*16];
367 u8* originalPixel = originalRgba;
368
369 for( int py = 0; py < 4; ++py )
370 {
371 for( int px = 0; px < 4; ++px )
372 {
373 int sx = x + px;
374 int sy = y + py;
375 if( sx < width && sy < height )
376 {
377 u8 const* targetPixel = rgba + pitch*sy + 4*sx;
378 CopyRGBA(targetPixel, originalPixel, flags);
379 }
380 sourcePixel += 4;
381 originalPixel += 4;
382 }
383 }
384
385 // compute the weighted MSE of the block
386 double blockCMSE, blockAMSE;
387 ComputeBlockWMSE(originalRgba, targetRgba, std::min(4, width - x), std::min(4, height - y), blockCMSE, blockAMSE);
388 colourMSE += blockCMSE;
389 alphaMSE += blockAMSE;
390 // advance
391 sourceBlock += bytesPerBlock;
392 }
393 }
394 colourMSE /= (width * height * 3);
395 alphaMSE /= (width * height);
396 }
397
ComputeMSE(u8 const * rgba,int width,int height,u8 const * dxt,int flags,double & colourMSE,double & alphaMSE)398 void ComputeMSE( u8 const *rgba, int width, int height, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
399 {
400 ComputeMSE(rgba, width, height, width*4, dxt, flags, colourMSE, alphaMSE);
401 }
402
403 } // namespace squish
404