1 /* -----------------------------------------------------------------------------
2 
3     Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
4 
5     Permission is hereby granted, free of charge, to any person obtaining
6     a copy of this software and associated documentation files (the
7     "Software"), to deal in the Software without restriction, including
8     without limitation the rights to use, copy, modify, merge, publish,
9     distribute, sublicense, and/or sell copies of the Software, and to
10     permit persons to whom the Software is furnished to do so, subject to
11     the following conditions:
12 
13     The above copyright notice and this permission notice shall be included
14     in all copies or substantial portions of the Software.
15 
16     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17     OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19     IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20     CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21     TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22     SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 
24    -------------------------------------------------------------------------- */
25 
26 #include <string.h>
27 #include "squish.h"
28 #include "colourset.h"
29 #include "maths.h"
30 #include "rangefit.h"
31 #include "clusterfit.h"
32 #include "colourblock.h"
33 #include "alpha.h"
34 #include "singlecolourfit.h"
35 
36 namespace squish {
37 
FixFlags(int flags)38 static int FixFlags( int flags )
39 {
40     // grab the flag bits
41     int method = flags & ( kDxt1 | kDxt3 | kDxt5 | kBc4 | kBc5 );
42     int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit );
43     int extra = flags & kWeightColourByAlpha;
44 
45     // set defaults
46     if ( method != kDxt3
47     &&   method != kDxt5
48     &&   method != kBc4
49     &&   method != kBc5 )
50     {
51         method = kDxt1;
52     }
53     if( fit != kColourRangeFit && fit != kColourIterativeClusterFit )
54         fit = kColourClusterFit;
55 
56     // done
57     return method | fit | extra;
58 }
59 
CompressMasked(u8 const * rgba,int mask,void * block,int flags,float * metric)60 void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric )
61 {
62     // fix any bad flags
63     flags = FixFlags( flags );
64 
65     if ( ( flags & ( kBc4 | kBc5 ) ) != 0 )
66     {
67         u8 alpha[16*4];
68         for( int i = 0; i < 16; ++i )
69         {
70             alpha[i*4 + 3] = rgba[i*4 + 0]; // copy R to A
71         }
72 
73         u8* rBlock = reinterpret_cast< u8* >( block );
74         CompressAlphaDxt5( alpha, mask, rBlock );
75 
76         if ( ( flags & ( kBc5 ) ) != 0 )
77         {
78             for( int i = 0; i < 16; ++i )
79             {
80                 alpha[i*4 + 3] = rgba[i*4 + 1]; // copy G to A
81             }
82 
83             u8* gBlock = reinterpret_cast< u8* >( block ) + 8;
84             CompressAlphaDxt5( alpha, mask, gBlock );
85         }
86 
87         return;
88     }
89 
90     // get the block locations
91     void* colourBlock = block;
92     void* alphaBlock = block;
93     if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
94         colourBlock = reinterpret_cast< u8* >( block ) + 8;
95 
96     // create the minimal point set
97     ColourSet colours( rgba, mask, flags );
98 
99     // check the compression type and compress colour
100     if( colours.GetCount() == 1 )
101     {
102         // always do a single colour fit
103         SingleColourFit fit( &colours, flags );
104         fit.Compress( colourBlock );
105     }
106     else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 )
107     {
108         // do a range fit
109         RangeFit fit( &colours, flags, metric );
110         fit.Compress( colourBlock );
111     }
112     else
113     {
114         // default to a cluster fit (could be iterative or not)
115         ClusterFit fit( &colours, flags, metric );
116         fit.Compress( colourBlock );
117     }
118 
119     // compress alpha separately if necessary
120     if( ( flags & kDxt3 ) != 0 )
121         CompressAlphaDxt3( rgba, mask, alphaBlock );
122     else if( ( flags & kDxt5 ) != 0 )
123         CompressAlphaDxt5( rgba, mask, alphaBlock );
124 }
125 
Decompress(u8 * rgba,void const * block,int flags)126 void Decompress( u8* rgba, void const* block, int flags )
127 {
128     // fix any bad flags
129     flags = FixFlags( flags );
130 
131     // get the block locations
132     void const* colourBlock = block;
133     void const* alphaBlock = block;
134     if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
135         colourBlock = reinterpret_cast< u8 const* >( block ) + 8;
136 
137     // decompress colour
138     DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
139 
140     // decompress alpha separately if necessary
141     if( ( flags & kDxt3 ) != 0 )
142         DecompressAlphaDxt3( rgba, alphaBlock );
143     else if( ( flags & kDxt5 ) != 0 )
144         DecompressAlphaDxt5( rgba, alphaBlock );
145 }
146 
GetStorageRequirements(int width,int height,int flags)147 int GetStorageRequirements( int width, int height, int flags )
148 {
149     // fix any bad flags
150     flags = FixFlags( flags );
151 
152     // compute the storage requirements
153     int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 );
154     int blocksize = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
155     return blockcount*blocksize;
156 }
157 
CopyRGBA(u8 const * source,u8 * dest,int flags)158 void CopyRGBA( u8 const* source, u8* dest, int flags )
159 {
160     if (flags & kSourceBGRA)
161     {
162         // convert from bgra to rgba
163         dest[0] = source[2];
164         dest[1] = source[1];
165         dest[2] = source[0];
166         dest[3] = source[3];
167     }
168     else
169     {
170         for( int i = 0; i < 4; ++i )
171             *dest++ = *source++;
172     }
173 }
174 
CompressImage(u8 const * rgba,int width,int height,int pitch,void * blocks,int flags,float * metric)175 void CompressImage( u8 const* rgba, int width, int height, int pitch, void* blocks, int flags, float* metric )
176 {
177     // fix any bad flags
178     flags = FixFlags( flags );
179 
180     // loop over blocks
181 #ifdef SQUISH_USE_OPENMP
182 #   pragma omp parallel for
183 #endif
184     for( int y = 0; y < height; y += 4 )
185     {
186         // initialise the block output
187         u8* targetBlock = reinterpret_cast< u8* >( blocks );
188         int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
189         targetBlock += ( (y / 4) * ( (width + 3) / 4) ) * bytesPerBlock;
190 
191         for( int x = 0; x < width; x += 4 )
192         {
193             // build the 4x4 block of pixels
194             u8 sourceRgba[16*4];
195             u8* targetPixel = sourceRgba;
196             int mask = 0;
197             for( int py = 0; py < 4; ++py )
198             {
199                 for( int px = 0; px < 4; ++px )
200                 {
201                     // get the source pixel in the image
202                     int sx = x + px;
203                     int sy = y + py;
204 
205                     // enable if we're in the image
206                     if( sx < width && sy < height )
207                     {
208                         // copy the rgba value
209                         u8 const* sourcePixel = rgba + pitch*sy + 4*sx;
210                         CopyRGBA(sourcePixel, targetPixel, flags);
211                         // enable this pixel
212                         mask |= ( 1 << ( 4*py + px ) );
213                     }
214 
215                     // advance to the next pixel
216                     targetPixel += 4;
217                 }
218             }
219 
220             // compress it into the output
221             CompressMasked( sourceRgba, mask, targetBlock, flags, metric );
222 
223             // advance
224             targetBlock += bytesPerBlock;
225         }
226     }
227 }
228 
CompressImage(u8 const * rgba,int width,int height,void * blocks,int flags,float * metric)229 void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric )
230 {
231     CompressImage(rgba, width, height, width*4, blocks, flags, metric);
232 }
233 
DecompressImage(u8 * rgba,int width,int height,int pitch,void const * blocks,int flags)234 void DecompressImage( u8* rgba, int width, int height, int pitch, void const* blocks, int flags )
235 {
236     // fix any bad flags
237     flags = FixFlags( flags );
238 
239     // loop over blocks
240 #ifdef SQUISH_USE_OPENMP
241 #   pragma omp parallel for
242 #endif
243     for( int y = 0; y < height; y += 4 )
244     {
245         // initialise the block input
246         u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
247         int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
248         sourceBlock += ( (y / 4) * ( (width + 3) / 4) ) * bytesPerBlock;
249 
250         for( int x = 0; x < width; x += 4 )
251         {
252             // decompress the block
253             u8 targetRgba[4*16];
254             Decompress( targetRgba, sourceBlock, flags );
255 
256             // write the decompressed pixels to the correct image locations
257             u8 const* sourcePixel = targetRgba;
258             for( int py = 0; py < 4; ++py )
259             {
260                 for( int px = 0; px < 4; ++px )
261                 {
262                     // get the target location
263                     int sx = x + px;
264                     int sy = y + py;
265 
266                     // write if we're in the image
267                     if( sx < width && sy < height )
268                     {
269                         // copy the rgba value
270                         u8* targetPixel = rgba + pitch*sy + 4*sx;
271                         CopyRGBA(sourcePixel, targetPixel, flags);
272                     }
273 
274                     // advance to the next pixel
275                     sourcePixel += 4;
276                 }
277             }
278 
279             // advance
280             sourceBlock += bytesPerBlock;
281         }
282     }
283 }
284 
DecompressImage(u8 * rgba,int width,int height,void const * blocks,int flags)285 void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags )
286 {
287     DecompressImage( rgba, width, height, width*4, blocks, flags );
288 }
289 
ErrorSq(double x,double y)290 static double ErrorSq(double x, double y)
291 {
292     return (x - y) * (x - y);
293 }
294 
ComputeBlockWMSE(u8 const * original,u8 const * compressed,unsigned int w,unsigned int h,double & cmse,double & amse)295 static void ComputeBlockWMSE(u8 const *original, u8 const *compressed, unsigned int w, unsigned int h, double &cmse, double &amse)
296 {
297     // Computes the MSE for the block and weights it by the variance of the original block.
298     // If the variance of the original block is less than 4 (i.e. a standard deviation of 1 per channel)
299     // then the block is close to being a single colour. Quantisation errors in single colour blocks
300     // are easier to see than similar errors in blocks that contain more colours, particularly when there
301     // are many such blocks in a large area (eg a blue sky background) as they cause banding.  Given that
302     // banding is easier to see than small errors in "complex" blocks, we weight the errors by a factor
303     // of 5. This implies that images with large, single colour areas will have a higher potential WMSE
304     // than images with lots of detail.
305 
306     cmse = amse = 0;
307     unsigned int sum_p[4];  // per channel sum of pixels
308     unsigned int sum_p2[4]; // per channel sum of pixels squared
309     memset(sum_p, 0, sizeof(sum_p));
310     memset(sum_p2, 0, sizeof(sum_p2));
311     for( unsigned int py = 0; py < 4; ++py )
312     {
313         for( unsigned int px = 0; px < 4; ++px )
314         {
315             if( px < w && py < h )
316             {
317                 double pixelCMSE = 0;
318                 for( int i = 0; i < 3; ++i )
319                 {
320                     pixelCMSE += ErrorSq(original[i], compressed[i]);
321                     sum_p[i] += original[i];
322                     sum_p2[i] += (unsigned int)original[i]*original[i];
323                 }
324                 if( original[3] == 0 && compressed[3] == 0 )
325                     pixelCMSE = 0; // transparent in both, so colour is inconsequential
326                 amse += ErrorSq(original[3], compressed[3]);
327                 cmse += pixelCMSE;
328                 sum_p[3] += original[3];
329                 sum_p2[3] += (unsigned int)original[3]*original[3];
330             }
331             original += 4;
332             compressed += 4;
333         }
334     }
335     unsigned int variance = 0;
336     for( int i = 0; i < 4; ++i )
337         variance += w*h*sum_p2[i] - sum_p[i]*sum_p[i];
338     if( variance < 4 * w * w * h * h )
339     {
340         amse *= 5;
341         cmse *= 5;
342     }
343 }
344 
ComputeMSE(u8 const * rgba,int width,int height,int pitch,u8 const * dxt,int flags,double & colourMSE,double & alphaMSE)345 void ComputeMSE( u8 const *rgba, int width, int height, int pitch, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
346 {
347     // fix any bad flags
348     flags = FixFlags( flags );
349     colourMSE = alphaMSE = 0;
350 
351     // initialise the block input
352     squish::u8 const* sourceBlock = dxt;
353     int bytesPerBlock = ( ( flags & squish::kDxt1 ) != 0 ) ? 8 : 16;
354 
355     // loop over blocks
356     for( int y = 0; y < height; y += 4 )
357     {
358         for( int x = 0; x < width; x += 4 )
359         {
360             // decompress the block
361             u8 targetRgba[4*16];
362             Decompress( targetRgba, sourceBlock, flags );
363             u8 const* sourcePixel = targetRgba;
364 
365             // copy across to a similar pixel block
366             u8 originalRgba[4*16];
367             u8* originalPixel = originalRgba;
368 
369             for( int py = 0; py < 4; ++py )
370             {
371                 for( int px = 0; px < 4; ++px )
372                 {
373                     int sx = x + px;
374                     int sy = y + py;
375                     if( sx < width && sy < height )
376                     {
377                         u8 const* targetPixel = rgba + pitch*sy + 4*sx;
378                         CopyRGBA(targetPixel, originalPixel, flags);
379                     }
380                     sourcePixel += 4;
381                     originalPixel += 4;
382                 }
383             }
384 
385             // compute the weighted MSE of the block
386             double blockCMSE, blockAMSE;
387             ComputeBlockWMSE(originalRgba, targetRgba, std::min(4, width - x), std::min(4, height - y), blockCMSE, blockAMSE);
388             colourMSE += blockCMSE;
389             alphaMSE += blockAMSE;
390             // advance
391             sourceBlock += bytesPerBlock;
392         }
393     }
394     colourMSE /= (width * height * 3);
395     alphaMSE /= (width * height);
396 }
397 
ComputeMSE(u8 const * rgba,int width,int height,u8 const * dxt,int flags,double & colourMSE,double & alphaMSE)398 void ComputeMSE( u8 const *rgba, int width, int height, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
399 {
400     ComputeMSE(rgba, width, height, width*4, dxt, flags, colourMSE, alphaMSE);
401 }
402 
403 } // namespace squish
404