1 // stb_dxt.h - v1.07 - DXT1/DXT5 compressor - public domain
2 // original by fabian "ryg" giesen - ported to C by stb
3 // use '#define STB_DXT_IMPLEMENTATION' before including to create the implementation
4 //
5 // USAGE:
6 //   call stb_compress_dxt_block() for every block (you must pad)
7 //     source should be a 4x4 block of RGBA data in row-major order;
8 //     A is ignored if you specify alpha=0; you can turn on dithering
9 //     and "high quality" using mode.
10 //
11 // version history:
12 //   v1.07  - bc4; allow not using libc; add STB_DXT_STATIC
13 //   v1.06  - (stb) fix to known-broken 1.05
14 //   v1.05  - (stb) support bc5/3dc (Arvids Kokins), use extern "C" in C++ (Pavel Krajcevski)
15 //   v1.04  - (ryg) default to no rounding bias for lerped colors (as per S3TC/DX10 spec);
16 //            single color match fix (allow for inexact color interpolation);
17 //            optimal DXT5 index finder; "high quality" mode that runs multiple refinement steps.
18 //   v1.03  - (stb) endianness support
19 //   v1.02  - (stb) fix alpha encoding bug
20 //   v1.01  - (stb) fix bug converting to RGB that messed up quality, thanks ryg & cbloom
21 //   v1.00  - (stb) first release
22 //
23 // contributors:
24 //   Kevin Schmidt (#defines for "freestanding" compilation)
25 //   github:ppiastucki (BC4 support)
26 //
27 // LICENSE
28 //
29 //   See end of file for license information.
30 
31 #ifndef STB_INCLUDE_STB_DXT_H
32 #define STB_INCLUDE_STB_DXT_H
33 
34 // compression mode (bitflags)
35 #define STB_DXT_NORMAL    0
36 #define STB_DXT_DITHER    1   // use dithering. dubious win. never use for normal maps and the like!
37 #define STB_DXT_HIGHQUAL  2   // high quality mode, does two refinement steps instead of 1. ~30-40% slower.
38 
39 #ifdef __cplusplus
40 extern "C" {
41 #endif
42 
43 #ifdef STB_DXT_STATIC
44 #define STBDDEF static
45 #else
46 #define STBDDEF extern
47 #endif
48 
49 STBDDEF void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src_rgba_four_bytes_per_pixel, int alpha, int mode);
50 STBDDEF void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src_r_one_byte_per_pixel);
51 STBDDEF void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src_rg_two_byte_per_pixel);
52 
53 #ifdef __cplusplus
54 }
55 #endif
56 
57 #define STB_COMPRESS_DXT_BLOCK
58 
59 #ifdef STB_DXT_IMPLEMENTATION
60 
61 // configuration options for DXT encoder. set them in the project/makefile or just define
62 // them at the top.
63 
64 // STB_DXT_USE_ROUNDING_BIAS
65 //     use a rounding bias during color interpolation. this is closer to what "ideal"
66 //     interpolation would do but doesn't match the S3TC/DX10 spec. old versions (pre-1.03)
67 //     implicitly had this turned on.
68 //
69 //     in case you're targeting a specific type of hardware (e.g. console programmers):
70 //     NVidia and Intel GPUs (as of 2010) as well as DX9 ref use DXT decoders that are closer
71 //     to STB_DXT_USE_ROUNDING_BIAS. AMD/ATI, S3 and DX10 ref are closer to rounding with no bias.
72 //     you also see "(a*5 + b*3) / 8" on some old GPU designs.
73 // #define STB_DXT_USE_ROUNDING_BIAS
74 
75 #include <stdlib.h>
76 
77 #if !defined(STBD_ABS) || !defined(STBI_FABS)
78 #include <math.h>
79 #endif
80 
81 #ifndef STBD_ABS
82 #define STBD_ABS(i)           abs(i)
83 #endif
84 
85 #ifndef STBD_FABS
86 #define STBD_FABS(x)          fabs(x)
87 #endif
88 
89 #ifndef STBD_MEMSET
90 #include <string.h>
91 #define STBD_MEMSET(x)        memset(x)
92 #endif
93 
94 static unsigned char stb__Expand5[32];
95 static unsigned char stb__Expand6[64];
96 static unsigned char stb__OMatch5[256][2];
97 static unsigned char stb__OMatch6[256][2];
98 static unsigned char stb__QuantRBTab[256+16];
99 static unsigned char stb__QuantGTab[256+16];
100 
stb__Mul8Bit(int a,int b)101 static int stb__Mul8Bit(int a, int b)
102 {
103   int t = a*b + 128;
104   return (t + (t >> 8)) >> 8;
105 }
106 
stb__From16Bit(unsigned char * out,unsigned short v)107 static void stb__From16Bit(unsigned char *out, unsigned short v)
108 {
109    int rv = (v & 0xf800) >> 11;
110    int gv = (v & 0x07e0) >>  5;
111    int bv = (v & 0x001f) >>  0;
112 
113    out[0] = stb__Expand5[rv];
114    out[1] = stb__Expand6[gv];
115    out[2] = stb__Expand5[bv];
116    out[3] = 0;
117 }
118 
stb__As16Bit(int r,int g,int b)119 static unsigned short stb__As16Bit(int r, int g, int b)
120 {
121    return (stb__Mul8Bit(r,31) << 11) + (stb__Mul8Bit(g,63) << 5) + stb__Mul8Bit(b,31);
122 }
123 
124 // linear interpolation at 1/3 point between a and b, using desired rounding type
stb__Lerp13(int a,int b)125 static int stb__Lerp13(int a, int b)
126 {
127 #ifdef STB_DXT_USE_ROUNDING_BIAS
128    // with rounding bias
129    return a + stb__Mul8Bit(b-a, 0x55);
130 #else
131    // without rounding bias
132    // replace "/ 3" by "* 0xaaab) >> 17" if your compiler sucks or you really need every ounce of speed.
133    return (2*a + b) / 3;
134 #endif
135 }
136 
137 // lerp RGB color
stb__Lerp13RGB(unsigned char * out,unsigned char * p1,unsigned char * p2)138 static void stb__Lerp13RGB(unsigned char *out, unsigned char *p1, unsigned char *p2)
139 {
140    out[0] = stb__Lerp13(p1[0], p2[0]);
141    out[1] = stb__Lerp13(p1[1], p2[1]);
142    out[2] = stb__Lerp13(p1[2], p2[2]);
143 }
144 
145 /****************************************************************************/
146 
147 // compute table to reproduce constant colors as accurately as possible
stb__PrepareOptTable(unsigned char * Table,const unsigned char * expand,int size)148 static void stb__PrepareOptTable(unsigned char *Table,const unsigned char *expand,int size)
149 {
150    int i,mn,mx;
151    for (i=0;i<256;i++) {
152       int bestErr = 256;
153       for (mn=0;mn<size;mn++) {
154          for (mx=0;mx<size;mx++) {
155             int mine = expand[mn];
156             int maxe = expand[mx];
157             int err = STBD_ABS(stb__Lerp13(maxe, mine) - i);
158 
159             // DX10 spec says that interpolation must be within 3% of "correct" result,
160             // add this as error term. (normally we'd expect a random distribution of
161             // +-1.5% error, but nowhere in the spec does it say that the error has to be
162             // unbiased - better safe than sorry).
163             err += STBD_ABS(maxe - mine) * 3 / 100;
164 
165             if(err < bestErr)
166             {
167                Table[i*2+0] = mx;
168                Table[i*2+1] = mn;
169                bestErr = err;
170             }
171          }
172       }
173    }
174 }
175 
stb__EvalColors(unsigned char * color,unsigned short c0,unsigned short c1)176 static void stb__EvalColors(unsigned char *color,unsigned short c0,unsigned short c1)
177 {
178    stb__From16Bit(color+ 0, c0);
179    stb__From16Bit(color+ 4, c1);
180    stb__Lerp13RGB(color+ 8, color+0, color+4);
181    stb__Lerp13RGB(color+12, color+4, color+0);
182 }
183 
184 // Block dithering function. Simply dithers a block to 565 RGB.
185 // (Floyd-Steinberg)
stb__DitherBlock(unsigned char * dest,unsigned char * block)186 static void stb__DitherBlock(unsigned char *dest, unsigned char *block)
187 {
188   int err[8],*ep1 = err,*ep2 = err+4, *et;
189   int ch,y;
190 
191   // process channels seperately
192   for (ch=0; ch<3; ++ch) {
193       unsigned char *bp = block+ch, *dp = dest+ch;
194       unsigned char *quant = (ch == 1) ? stb__QuantGTab+8 : stb__QuantRBTab+8;
195       STBD_MEMSET(err, 0, sizeof(err));
196       for(y=0; y<4; ++y) {
197          dp[ 0] = quant[bp[ 0] + ((3*ep2[1] + 5*ep2[0]) >> 4)];
198          ep1[0] = bp[ 0] - dp[ 0];
199          dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)];
200          ep1[1] = bp[ 4] - dp[ 4];
201          dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)];
202          ep1[2] = bp[ 8] - dp[ 8];
203          dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)];
204          ep1[3] = bp[12] - dp[12];
205          bp += 16;
206          dp += 16;
207          et = ep1, ep1 = ep2, ep2 = et; // swap
208       }
209    }
210 }
211 
212 // The color matching function
stb__MatchColorsBlock(unsigned char * block,unsigned char * color,int dither)213 static unsigned int stb__MatchColorsBlock(unsigned char *block, unsigned char *color,int dither)
214 {
215    unsigned int mask = 0;
216    int dirr = color[0*4+0] - color[1*4+0];
217    int dirg = color[0*4+1] - color[1*4+1];
218    int dirb = color[0*4+2] - color[1*4+2];
219    int dots[16];
220    int stops[4];
221    int i;
222    int c0Point, halfPoint, c3Point;
223 
224    for(i=0;i<16;i++)
225       dots[i] = block[i*4+0]*dirr + block[i*4+1]*dirg + block[i*4+2]*dirb;
226 
227    for(i=0;i<4;i++)
228       stops[i] = color[i*4+0]*dirr + color[i*4+1]*dirg + color[i*4+2]*dirb;
229 
230    // think of the colors as arranged on a line; project point onto that line, then choose
231    // next color out of available ones. we compute the crossover points for "best color in top
232    // half"/"best in bottom half" and then the same inside that subinterval.
233    //
234    // relying on this 1d approximation isn't always optimal in terms of euclidean distance,
235    // but it's very close and a lot faster.
236    // http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html
237 
238    c0Point   = (stops[1] + stops[3]) >> 1;
239    halfPoint = (stops[3] + stops[2]) >> 1;
240    c3Point   = (stops[2] + stops[0]) >> 1;
241 
242    if(!dither) {
243       // the version without dithering is straightforward
244       for (i=15;i>=0;i--) {
245          int dot = dots[i];
246          mask <<= 2;
247 
248          if(dot < halfPoint)
249            mask |= (dot < c0Point) ? 1 : 3;
250          else
251            mask |= (dot < c3Point) ? 2 : 0;
252       }
253   } else {
254       // with floyd-steinberg dithering
255       int err[8],*ep1 = err,*ep2 = err+4;
256       int *dp = dots, y;
257 
258       c0Point   <<= 4;
259       halfPoint <<= 4;
260       c3Point   <<= 4;
261       for(i=0;i<8;i++)
262          err[i] = 0;
263 
264       for(y=0;y<4;y++)
265       {
266          int dot,lmask,step;
267 
268          dot = (dp[0] << 4) + (3*ep2[1] + 5*ep2[0]);
269          if(dot < halfPoint)
270            step = (dot < c0Point) ? 1 : 3;
271          else
272            step = (dot < c3Point) ? 2 : 0;
273          ep1[0] = dp[0] - stops[step];
274          lmask = step;
275 
276          dot = (dp[1] << 4) + (7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]);
277          if(dot < halfPoint)
278            step = (dot < c0Point) ? 1 : 3;
279          else
280            step = (dot < c3Point) ? 2 : 0;
281          ep1[1] = dp[1] - stops[step];
282          lmask |= step<<2;
283 
284          dot = (dp[2] << 4) + (7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]);
285          if(dot < halfPoint)
286            step = (dot < c0Point) ? 1 : 3;
287          else
288            step = (dot < c3Point) ? 2 : 0;
289          ep1[2] = dp[2] - stops[step];
290          lmask |= step<<4;
291 
292          dot = (dp[3] << 4) + (7*ep1[2] + 5*ep2[3] + ep2[2]);
293          if(dot < halfPoint)
294            step = (dot < c0Point) ? 1 : 3;
295          else
296            step = (dot < c3Point) ? 2 : 0;
297          ep1[3] = dp[3] - stops[step];
298          lmask |= step<<6;
299 
300          dp += 4;
301          mask |= lmask << (y*8);
302          { int *et = ep1; ep1 = ep2; ep2 = et; } // swap
303       }
304    }
305 
306    return mask;
307 }
308 
309 // The color optimization function. (Clever code, part 1)
stb__OptimizeColorsBlock(unsigned char * block,unsigned short * pmax16,unsigned short * pmin16)310 static void stb__OptimizeColorsBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16)
311 {
312   int mind = 0x7fffffff,maxd = -0x7fffffff;
313   unsigned char *minp, *maxp;
314   double magn;
315   int v_r,v_g,v_b;
316   static const int nIterPower = 4;
317   float covf[6],vfr,vfg,vfb;
318 
319   // determine color distribution
320   int cov[6];
321   int mu[3],min[3],max[3];
322   int ch,i,iter;
323 
324   for(ch=0;ch<3;ch++)
325   {
326     const unsigned char *bp = ((const unsigned char *) block) + ch;
327     int muv,minv,maxv;
328 
329     muv = minv = maxv = bp[0];
330     for(i=4;i<64;i+=4)
331     {
332       muv += bp[i];
333       if (bp[i] < minv) minv = bp[i];
334       else if (bp[i] > maxv) maxv = bp[i];
335     }
336 
337     mu[ch] = (muv + 8) >> 4;
338     min[ch] = minv;
339     max[ch] = maxv;
340   }
341 
342   // determine covariance matrix
343   for (i=0;i<6;i++)
344      cov[i] = 0;
345 
346   for (i=0;i<16;i++)
347   {
348     int r = block[i*4+0] - mu[0];
349     int g = block[i*4+1] - mu[1];
350     int b = block[i*4+2] - mu[2];
351 
352     cov[0] += r*r;
353     cov[1] += r*g;
354     cov[2] += r*b;
355     cov[3] += g*g;
356     cov[4] += g*b;
357     cov[5] += b*b;
358   }
359 
360   // convert covariance matrix to float, find principal axis via power iter
361   for(i=0;i<6;i++)
362     covf[i] = cov[i] / 255.0f;
363 
364   vfr = (float) (max[0] - min[0]);
365   vfg = (float) (max[1] - min[1]);
366   vfb = (float) (max[2] - min[2]);
367 
368   for(iter=0;iter<nIterPower;iter++)
369   {
370     float r = vfr*covf[0] + vfg*covf[1] + vfb*covf[2];
371     float g = vfr*covf[1] + vfg*covf[3] + vfb*covf[4];
372     float b = vfr*covf[2] + vfg*covf[4] + vfb*covf[5];
373 
374     vfr = r;
375     vfg = g;
376     vfb = b;
377   }
378 
379   magn = STBD_FABS(vfr);
380   if (STBD_FABS(vfg) > magn) magn = STBD_FABS(vfg);
381   if (STBD_FABS(vfb) > magn) magn = STBD_FABS(vfb);
382 
383    if(magn < 4.0f) { // too small, default to luminance
384       v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000.
385       v_g = 587;
386       v_b = 114;
387    } else {
388       magn = 512.0 / magn;
389       v_r = (int) (vfr * magn);
390       v_g = (int) (vfg * magn);
391       v_b = (int) (vfb * magn);
392    }
393 
394    // Pick colors at extreme points
395    for(i=0;i<16;i++)
396    {
397       int dot = block[i*4+0]*v_r + block[i*4+1]*v_g + block[i*4+2]*v_b;
398 
399       if (dot < mind) {
400          mind = dot;
401          minp = block+i*4;
402       }
403 
404       if (dot > maxd) {
405          maxd = dot;
406          maxp = block+i*4;
407       }
408    }
409 
410    *pmax16 = stb__As16Bit(maxp[0],maxp[1],maxp[2]);
411    *pmin16 = stb__As16Bit(minp[0],minp[1],minp[2]);
412 }
413 
stb__sclamp(float y,int p0,int p1)414 static int stb__sclamp(float y, int p0, int p1)
415 {
416    int x = (int) y;
417    if (x < p0) return p0;
418    if (x > p1) return p1;
419    return x;
420 }
421 
422 // The refinement function. (Clever code, part 2)
423 // Tries to optimize colors to suit block contents better.
424 // (By solving a least squares system via normal equations+Cramer's rule)
stb__RefineBlock(unsigned char * block,unsigned short * pmax16,unsigned short * pmin16,unsigned int mask)425 static int stb__RefineBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16, unsigned int mask)
426 {
427    static const int w1Tab[4] = { 3,0,2,1 };
428    static const int prods[4] = { 0x090000,0x000900,0x040102,0x010402 };
429    // ^some magic to save a lot of multiplies in the accumulating loop...
430    // (precomputed products of weights for least squares system, accumulated inside one 32-bit register)
431 
432    float frb,fg;
433    unsigned short oldMin, oldMax, min16, max16;
434    int i, akku = 0, xx,xy,yy;
435    int At1_r,At1_g,At1_b;
436    int At2_r,At2_g,At2_b;
437    unsigned int cm = mask;
438 
439    oldMin = *pmin16;
440    oldMax = *pmax16;
441 
442    if((mask ^ (mask<<2)) < 4) // all pixels have the same index?
443    {
444       // yes, linear system would be singular; solve using optimal
445       // single-color match on average color
446       int r = 8, g = 8, b = 8;
447       for (i=0;i<16;++i) {
448          r += block[i*4+0];
449          g += block[i*4+1];
450          b += block[i*4+2];
451       }
452 
453       r >>= 4; g >>= 4; b >>= 4;
454 
455       max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
456       min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
457    } else {
458       At1_r = At1_g = At1_b = 0;
459       At2_r = At2_g = At2_b = 0;
460       for (i=0;i<16;++i,cm>>=2) {
461          int step = cm&3;
462          int w1 = w1Tab[step];
463          int r = block[i*4+0];
464          int g = block[i*4+1];
465          int b = block[i*4+2];
466 
467          akku    += prods[step];
468          At1_r   += w1*r;
469          At1_g   += w1*g;
470          At1_b   += w1*b;
471          At2_r   += r;
472          At2_g   += g;
473          At2_b   += b;
474       }
475 
476       At2_r = 3*At2_r - At1_r;
477       At2_g = 3*At2_g - At1_g;
478       At2_b = 3*At2_b - At1_b;
479 
480       // extract solutions and decide solvability
481       xx = akku >> 16;
482       yy = (akku >> 8) & 0xff;
483       xy = (akku >> 0) & 0xff;
484 
485       frb = 3.0f * 31.0f / 255.0f / (xx*yy - xy*xy);
486       fg = frb * 63.0f / 31.0f;
487 
488       // solve.
489       max16 =   stb__sclamp((At1_r*yy - At2_r*xy)*frb+0.5f,0,31) << 11;
490       max16 |=  stb__sclamp((At1_g*yy - At2_g*xy)*fg +0.5f,0,63) << 5;
491       max16 |=  stb__sclamp((At1_b*yy - At2_b*xy)*frb+0.5f,0,31) << 0;
492 
493       min16 =   stb__sclamp((At2_r*xx - At1_r*xy)*frb+0.5f,0,31) << 11;
494       min16 |=  stb__sclamp((At2_g*xx - At1_g*xy)*fg +0.5f,0,63) << 5;
495       min16 |=  stb__sclamp((At2_b*xx - At1_b*xy)*frb+0.5f,0,31) << 0;
496    }
497 
498    *pmin16 = min16;
499    *pmax16 = max16;
500    return oldMin != min16 || oldMax != max16;
501 }
502 
503 // Color block compression
stb__CompressColorBlock(unsigned char * dest,unsigned char * block,int mode)504 static void stb__CompressColorBlock(unsigned char *dest, unsigned char *block, int mode)
505 {
506    unsigned int mask;
507    int i;
508    int dither;
509    int refinecount;
510    unsigned short max16, min16;
511    unsigned char dblock[16*4],color[4*4];
512 
513    dither = mode & STB_DXT_DITHER;
514    refinecount = (mode & STB_DXT_HIGHQUAL) ? 2 : 1;
515 
516    // check if block is constant
517    for (i=1;i<16;i++)
518       if (((unsigned int *) block)[i] != ((unsigned int *) block)[0])
519          break;
520 
521    if(i == 16) { // constant color
522       int r = block[0], g = block[1], b = block[2];
523       mask  = 0xaaaaaaaa;
524       max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
525       min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
526    } else {
527       // first step: compute dithered version for PCA if desired
528       if(dither)
529          stb__DitherBlock(dblock,block);
530 
531       // second step: pca+map along principal axis
532       stb__OptimizeColorsBlock(dither ? dblock : block,&max16,&min16);
533       if (max16 != min16) {
534          stb__EvalColors(color,max16,min16);
535          mask = stb__MatchColorsBlock(block,color,dither);
536       } else
537          mask = 0;
538 
539       // third step: refine (multiple times if requested)
540       for (i=0;i<refinecount;i++) {
541          unsigned int lastmask = mask;
542 
543          if (stb__RefineBlock(dither ? dblock : block,&max16,&min16,mask)) {
544             if (max16 != min16) {
545                stb__EvalColors(color,max16,min16);
546                mask = stb__MatchColorsBlock(block,color,dither);
547             } else {
548                mask = 0;
549                break;
550             }
551          }
552 
553          if(mask == lastmask)
554             break;
555       }
556   }
557 
558   // write the color block
559   if(max16 < min16)
560   {
561      unsigned short t = min16;
562      min16 = max16;
563      max16 = t;
564      mask ^= 0x55555555;
565   }
566 
567   dest[0] = (unsigned char) (max16);
568   dest[1] = (unsigned char) (max16 >> 8);
569   dest[2] = (unsigned char) (min16);
570   dest[3] = (unsigned char) (min16 >> 8);
571   dest[4] = (unsigned char) (mask);
572   dest[5] = (unsigned char) (mask >> 8);
573   dest[6] = (unsigned char) (mask >> 16);
574   dest[7] = (unsigned char) (mask >> 24);
575 }
576 
577 // Alpha block compression (this is easy for a change)
stb__CompressAlphaBlock(unsigned char * dest,unsigned char * src,int stride)578 static void stb__CompressAlphaBlock(unsigned char *dest,unsigned char *src, int stride)
579 {
580    int i,dist,bias,dist4,dist2,bits,mask;
581 
582    // find min/max color
583    int mn,mx;
584    mn = mx = src[0];
585 
586    for (i=1;i<16;i++)
587    {
588       if (src[i*stride] < mn) mn = src[i*stride];
589       else if (src[i*stride] > mx) mx = src[i*stride];
590    }
591 
592    // encode them
593    ((unsigned char *)dest)[0] = mx;
594    ((unsigned char *)dest)[1] = mn;
595    dest += 2;
596 
597    // determine bias and emit color indices
598    // given the choice of mx/mn, these indices are optimal:
599    // http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/
600    dist = mx-mn;
601    dist4 = dist*4;
602    dist2 = dist*2;
603    bias = (dist < 8) ? (dist - 1) : (dist/2 + 2);
604    bias -= mn * 7;
605    bits = 0,mask=0;
606 
607    for (i=0;i<16;i++) {
608       int a = src[i*stride]*7 + bias;
609       int ind,t;
610 
611       // select index. this is a "linear scale" lerp factor between 0 (val=min) and 7 (val=max).
612       t = (a >= dist4) ? -1 : 0; ind =  t & 4; a -= dist4 & t;
613       t = (a >= dist2) ? -1 : 0; ind += t & 2; a -= dist2 & t;
614       ind += (a >= dist);
615 
616       // turn linear scale into DXT index (0/1 are extremal pts)
617       ind = -ind & 7;
618       ind ^= (2 > ind);
619 
620       // write index
621       mask |= ind << bits;
622       if((bits += 3) >= 8) {
623          *dest++ = mask;
624          mask >>= 8;
625          bits -= 8;
626       }
627    }
628 }
629 
stb__InitDXT()630 static void stb__InitDXT()
631 {
632    int i;
633    for(i=0;i<32;i++)
634       stb__Expand5[i] = (i<<3)|(i>>2);
635 
636    for(i=0;i<64;i++)
637       stb__Expand6[i] = (i<<2)|(i>>4);
638 
639    for(i=0;i<256+16;i++)
640    {
641       int v = i-8 < 0 ? 0 : i-8 > 255 ? 255 : i-8;
642       stb__QuantRBTab[i] = stb__Expand5[stb__Mul8Bit(v,31)];
643       stb__QuantGTab[i] = stb__Expand6[stb__Mul8Bit(v,63)];
644    }
645 
646    stb__PrepareOptTable(&stb__OMatch5[0][0],stb__Expand5,32);
647    stb__PrepareOptTable(&stb__OMatch6[0][0],stb__Expand6,64);
648 }
649 
stb_compress_dxt_block(unsigned char * dest,const unsigned char * src,int alpha,int mode)650 void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src, int alpha, int mode)
651 {
652    static int init=1;
653    if (init) {
654       stb__InitDXT();
655       init=0;
656    }
657 
658    if (alpha) {
659       stb__CompressAlphaBlock(dest,(unsigned char*) src+3, 4);
660       dest += 8;
661    }
662 
663    stb__CompressColorBlock(dest,(unsigned char*) src,mode);
664 }
665 
stb_compress_bc4_block(unsigned char * dest,const unsigned char * src)666 void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src)
667 {
668    stb__CompressAlphaBlock(dest,(unsigned char*) src, 1);
669 }
670 
stb_compress_bc5_block(unsigned char * dest,const unsigned char * src)671 void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src)
672 {
673    stb__CompressAlphaBlock(dest,(unsigned char*) src,2);
674    stb__CompressAlphaBlock(dest + 8,(unsigned char*) src+1,2);
675 }
676 #endif // STB_DXT_IMPLEMENTATION
677 #endif // STB_INCLUDE_STB_DXT_H
678 
679 /*
680 ------------------------------------------------------------------------------
681 This software is available under 2 licenses -- choose whichever you prefer.
682 ------------------------------------------------------------------------------
683 ALTERNATIVE A - MIT License
684 Copyright (c) 2017 Sean Barrett
685 Permission is hereby granted, free of charge, to any person obtaining a copy of
686 this software and associated documentation files (the "Software"), to deal in
687 the Software without restriction, including without limitation the rights to
688 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
689 of the Software, and to permit persons to whom the Software is furnished to do
690 so, subject to the following conditions:
691 The above copyright notice and this permission notice shall be included in all
692 copies or substantial portions of the Software.
693 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
694 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
695 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
696 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
697 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
698 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
699 SOFTWARE.
700 ------------------------------------------------------------------------------
701 ALTERNATIVE B - Public Domain (www.unlicense.org)
702 This is free and unencumbered software released into the public domain.
703 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
704 software, either in source code form or as a compiled binary, for any purpose,
705 commercial or non-commercial, and by any means.
706 In jurisdictions that recognize copyright laws, the author or authors of this
707 software dedicate any and all copyright interest in the software to the public
708 domain. We make this dedication for the benefit of the public at large and to
709 the detriment of our heirs and successors. We intend this dedication to be an
710 overt act of relinquishment in perpetuity of all present and future rights to
711 this software under copyright law.
712 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
713 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
714 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
715 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
716 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
717 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
718 ------------------------------------------------------------------------------
719 */
720