1 /* -----------------------------------------------------------------------------
2 
3     Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
4 
5     Permission is hereby granted, free of charge, to any person obtaining
6     a copy of this software and associated documentation files (the
7     "Software"), to deal in the Software without restriction, including
8     without limitation the rights to use, copy, modify, merge, publish,
9     distribute, sublicense, and/or sell copies of the Software, and to
10     permit persons to whom the Software is furnished to do so, subject to
11     the following conditions:
12 
13     The above copyright notice and this permission notice shall be included
14     in all copies or substantial portions of the Software.
15 
16     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17     OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19     IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20     CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21     TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22     SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 
24    -------------------------------------------------------------------------- */
25 
26 #include <string.h>
27 #include "squish.h"
28 #include "colourset.h"
29 #include "maths.h"
30 #include "rangefit.h"
31 #include "clusterfit.h"
32 #include "colourblock.h"
33 #include "alpha.h"
34 #include "singlecolourfit.h"
35 
36 namespace squish {
37 
FixFlags(int flags)38 static int FixFlags( int flags )
39 {
40     // grab the flag bits
41     int method = flags & ( kDxt1 | kDxt3 | kDxt5 | kBc4 | kBc5 );
42     int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit );
43     int extra = flags & kWeightColourByAlpha;
44 
45     // set defaults
46     if ( method != kDxt3
47     &&   method != kDxt5
48     &&   method != kBc4
49     &&   method != kBc5 )
50     {
51         method = kDxt1;
52     }
53     if( fit != kColourRangeFit && fit != kColourIterativeClusterFit )
54         fit = kColourClusterFit;
55 
56     // done
57     return method | fit | extra;
58 }
59 
CompressMasked(u8 const * rgba,int mask,void * block,int flags,float * metric)60 void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric )
61 {
62     // fix any bad flags
63     flags = FixFlags( flags );
64 
65     if ( ( flags & ( kBc4 | kBc5 ) ) != 0 )
66     {
67         u8 alpha[16*4];
68         for( int i = 0; i < 16; ++i )
69         {
70             alpha[i*4 + 3] = rgba[i*4 + 0]; // copy R to A
71         }
72 
73         u8* rBlock = reinterpret_cast< u8* >( block );
74         CompressAlphaDxt5( alpha, mask, rBlock );
75 
76         if ( ( flags & ( kBc5 ) ) != 0 )
77         {
78             for( int i = 0; i < 16; ++i )
79             {
80                 alpha[i*4 + 3] = rgba[i*4 + 1]; // copy G to A
81             }
82 
83             u8* gBlock = reinterpret_cast< u8* >( block ) + 8;
84             CompressAlphaDxt5( alpha, mask, gBlock );
85         }
86 
87         return;
88     }
89 
90     // get the block locations
91     void* colourBlock = block;
92     void* alphaBlock = block;
93     if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
94         colourBlock = reinterpret_cast< u8* >( block ) + 8;
95 
96     // create the minimal point set
97     ColourSet colours( rgba, mask, flags );
98 
99     // check the compression type and compress colour
100     if( colours.GetCount() == 1 )
101     {
102         // always do a single colour fit
103         SingleColourFit fit( &colours, flags );
104         fit.Compress( colourBlock );
105     }
106     else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 )
107     {
108         // do a range fit
109         RangeFit fit( &colours, flags, metric );
110         fit.Compress( colourBlock );
111     }
112     else
113     {
114         // default to a cluster fit (could be iterative or not)
115         ClusterFit fit( &colours, flags, metric );
116         fit.Compress( colourBlock );
117     }
118 
119     // compress alpha separately if necessary
120     if( ( flags & kDxt3 ) != 0 )
121         CompressAlphaDxt3( rgba, mask, alphaBlock );
122     else if( ( flags & kDxt5 ) != 0 )
123         CompressAlphaDxt5( rgba, mask, alphaBlock );
124 }
125 
Decompress(u8 * rgba,void const * block,int flags)126 void Decompress( u8* rgba, void const* block, int flags )
127 {
128     // fix any bad flags
129     flags = FixFlags( flags );
130 
131     // get the block locations
132     void const* colourBlock = block;
133     void const* alphaBlock = block;
134     if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
135         colourBlock = reinterpret_cast< u8 const* >( block ) + 8;
136 
137     // decompress colour
138     // -- GODOT start --
139     //DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
140     if(( flags & ( kBc5 ) ) != 0)
141         DecompressColourBc5( rgba, colourBlock);
142     else
143         DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
144     // -- GODOT end --
145 
146     // decompress alpha separately if necessary
147     if( ( flags & kDxt3 ) != 0 )
148         DecompressAlphaDxt3( rgba, alphaBlock );
149     else if( ( flags & kDxt5 ) != 0 )
150         DecompressAlphaDxt5( rgba, alphaBlock );
151 }
152 
GetStorageRequirements(int width,int height,int flags)153 int GetStorageRequirements( int width, int height, int flags )
154 {
155     // fix any bad flags
156     flags = FixFlags( flags );
157 
158     // compute the storage requirements
159     int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 );
160     int blocksize = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
161     return blockcount*blocksize;
162 }
163 
CopyRGBA(u8 const * source,u8 * dest,int flags)164 void CopyRGBA( u8 const* source, u8* dest, int flags )
165 {
166     if (flags & kSourceBGRA)
167     {
168         // convert from bgra to rgba
169         dest[0] = source[2];
170         dest[1] = source[1];
171         dest[2] = source[0];
172         dest[3] = source[3];
173     }
174     else
175     {
176         for( int i = 0; i < 4; ++i )
177             *dest++ = *source++;
178     }
179 }
180 
CompressImage(u8 const * rgba,int width,int height,int pitch,void * blocks,int flags,float * metric)181 void CompressImage( u8 const* rgba, int width, int height, int pitch, void* blocks, int flags, float* metric )
182 {
183     // fix any bad flags
184     flags = FixFlags( flags );
185 
186     // loop over blocks
187 #ifdef SQUISH_USE_OPENMP
188 #   pragma omp parallel for
189 #endif
190     for( int y = 0; y < height; y += 4 )
191     {
192         // initialise the block output
193         u8* targetBlock = reinterpret_cast< u8* >( blocks );
194         int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
195         targetBlock += ( (y / 4) * ( (width + 3) / 4) ) * bytesPerBlock;
196 
197         for( int x = 0; x < width; x += 4 )
198         {
199             // build the 4x4 block of pixels
200             u8 sourceRgba[16*4];
201             u8* targetPixel = sourceRgba;
202             int mask = 0;
203             for( int py = 0; py < 4; ++py )
204             {
205                 for( int px = 0; px < 4; ++px )
206                 {
207                     // get the source pixel in the image
208                     int sx = x + px;
209                     int sy = y + py;
210 
211                     // enable if we're in the image
212                     if( sx < width && sy < height )
213                     {
214                         // copy the rgba value
215                         u8 const* sourcePixel = rgba + pitch*sy + 4*sx;
216                         CopyRGBA(sourcePixel, targetPixel, flags);
217                         // enable this pixel
218                         mask |= ( 1 << ( 4*py + px ) );
219                     }
220 
221                     // advance to the next pixel
222                     targetPixel += 4;
223                 }
224             }
225 
226             // compress it into the output
227             CompressMasked( sourceRgba, mask, targetBlock, flags, metric );
228 
229             // advance
230             targetBlock += bytesPerBlock;
231         }
232     }
233 }
234 
CompressImage(u8 const * rgba,int width,int height,void * blocks,int flags,float * metric)235 void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric )
236 {
237     CompressImage(rgba, width, height, width*4, blocks, flags, metric);
238 }
239 
DecompressImage(u8 * rgba,int width,int height,int pitch,void const * blocks,int flags)240 void DecompressImage( u8* rgba, int width, int height, int pitch, void const* blocks, int flags )
241 {
242     // fix any bad flags
243     flags = FixFlags( flags );
244 
245     // loop over blocks
246 #ifdef SQUISH_USE_OPENMP
247 #   pragma omp parallel for
248 #endif
249     for( int y = 0; y < height; y += 4 )
250     {
251         // initialise the block input
252         u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
253         int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
254         sourceBlock += ( (y / 4) * ( (width + 3) / 4) ) * bytesPerBlock;
255 
256         for( int x = 0; x < width; x += 4 )
257         {
258             // decompress the block
259             u8 targetRgba[4*16];
260             Decompress( targetRgba, sourceBlock, flags );
261 
262             // write the decompressed pixels to the correct image locations
263             u8 const* sourcePixel = targetRgba;
264             for( int py = 0; py < 4; ++py )
265             {
266                 for( int px = 0; px < 4; ++px )
267                 {
268                     // get the target location
269                     int sx = x + px;
270                     int sy = y + py;
271 
272                     // write if we're in the image
273                     if( sx < width && sy < height )
274                     {
275                         // copy the rgba value
276                         u8* targetPixel = rgba + pitch*sy + 4*sx;
277                         CopyRGBA(sourcePixel, targetPixel, flags);
278                     }
279 
280                     // advance to the next pixel
281                     sourcePixel += 4;
282                 }
283             }
284 
285             // advance
286             sourceBlock += bytesPerBlock;
287         }
288     }
289 }
290 
DecompressImage(u8 * rgba,int width,int height,void const * blocks,int flags)291 void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags )
292 {
293     DecompressImage( rgba, width, height, width*4, blocks, flags );
294 }
295 
ErrorSq(double x,double y)296 static double ErrorSq(double x, double y)
297 {
298     return (x - y) * (x - y);
299 }
300 
ComputeBlockWMSE(u8 const * original,u8 const * compressed,unsigned int w,unsigned int h,double & cmse,double & amse)301 static void ComputeBlockWMSE(u8 const *original, u8 const *compressed, unsigned int w, unsigned int h, double &cmse, double &amse)
302 {
303     // Computes the MSE for the block and weights it by the variance of the original block.
304     // If the variance of the original block is less than 4 (i.e. a standard deviation of 1 per channel)
305     // then the block is close to being a single colour. Quantisation errors in single colour blocks
306     // are easier to see than similar errors in blocks that contain more colours, particularly when there
307     // are many such blocks in a large area (eg a blue sky background) as they cause banding.  Given that
308     // banding is easier to see than small errors in "complex" blocks, we weight the errors by a factor
309     // of 5. This implies that images with large, single colour areas will have a higher potential WMSE
310     // than images with lots of detail.
311 
312     cmse = amse = 0;
313     unsigned int sum_p[4];  // per channel sum of pixels
314     unsigned int sum_p2[4]; // per channel sum of pixels squared
315     memset(sum_p, 0, sizeof(sum_p));
316     memset(sum_p2, 0, sizeof(sum_p2));
317     for( unsigned int py = 0; py < 4; ++py )
318     {
319         for( unsigned int px = 0; px < 4; ++px )
320         {
321             if( px < w && py < h )
322             {
323                 double pixelCMSE = 0;
324                 for( int i = 0; i < 3; ++i )
325                 {
326                     pixelCMSE += ErrorSq(original[i], compressed[i]);
327                     sum_p[i] += original[i];
328                     sum_p2[i] += (unsigned int)original[i]*original[i];
329                 }
330                 if( original[3] == 0 && compressed[3] == 0 )
331                     pixelCMSE = 0; // transparent in both, so colour is inconsequential
332                 amse += ErrorSq(original[3], compressed[3]);
333                 cmse += pixelCMSE;
334                 sum_p[3] += original[3];
335                 sum_p2[3] += (unsigned int)original[3]*original[3];
336             }
337             original += 4;
338             compressed += 4;
339         }
340     }
341     unsigned int variance = 0;
342     for( int i = 0; i < 4; ++i )
343         variance += w*h*sum_p2[i] - sum_p[i]*sum_p[i];
344     if( variance < 4 * w * w * h * h )
345     {
346         amse *= 5;
347         cmse *= 5;
348     }
349 }
350 
ComputeMSE(u8 const * rgba,int width,int height,int pitch,u8 const * dxt,int flags,double & colourMSE,double & alphaMSE)351 void ComputeMSE( u8 const *rgba, int width, int height, int pitch, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
352 {
353     // fix any bad flags
354     flags = FixFlags( flags );
355     colourMSE = alphaMSE = 0;
356 
357     // initialise the block input
358     squish::u8 const* sourceBlock = dxt;
359     int bytesPerBlock = ( ( flags & squish::kDxt1 ) != 0 ) ? 8 : 16;
360 
361     // loop over blocks
362     for( int y = 0; y < height; y += 4 )
363     {
364         for( int x = 0; x < width; x += 4 )
365         {
366             // decompress the block
367             u8 targetRgba[4*16];
368             Decompress( targetRgba, sourceBlock, flags );
369             u8 const* sourcePixel = targetRgba;
370 
371             // copy across to a similar pixel block
372             u8 originalRgba[4*16];
373             u8* originalPixel = originalRgba;
374 
375             for( int py = 0; py < 4; ++py )
376             {
377                 for( int px = 0; px < 4; ++px )
378                 {
379                     int sx = x + px;
380                     int sy = y + py;
381                     if( sx < width && sy < height )
382                     {
383                         u8 const* targetPixel = rgba + pitch*sy + 4*sx;
384                         CopyRGBA(targetPixel, originalPixel, flags);
385                     }
386                     sourcePixel += 4;
387                     originalPixel += 4;
388                 }
389             }
390 
391             // compute the weighted MSE of the block
392             double blockCMSE, blockAMSE;
393             ComputeBlockWMSE(originalRgba, targetRgba, std::min(4, width - x), std::min(4, height - y), blockCMSE, blockAMSE);
394             colourMSE += blockCMSE;
395             alphaMSE += blockAMSE;
396             // advance
397             sourceBlock += bytesPerBlock;
398         }
399     }
400     colourMSE /= (width * height * 3);
401     alphaMSE /= (width * height);
402 }
403 
ComputeMSE(u8 const * rgba,int width,int height,u8 const * dxt,int flags,double & colourMSE,double & alphaMSE)404 void ComputeMSE( u8 const *rgba, int width, int height, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
405 {
406     ComputeMSE(rgba, width, height, width*4, dxt, flags, colourMSE, alphaMSE);
407 }
408 
409 } // namespace squish
410