1 // stb_dxt.h - v1.08b - DXT1/DXT5 compressor - public domain
2 // original by fabian "ryg" giesen - ported to C by stb
3 // use '#define STB_DXT_IMPLEMENTATION' before including to create the implementation
4 //
5 // USAGE:
6 //   call stb_compress_dxt_block() for every block (you must pad)
7 //     source should be a 4x4 block of RGBA data in row-major order;
8 //     A is ignored if you specify alpha=0; you can turn on dithering
9 //     and "high quality" using mode.
10 //
11 // version history:
12 //   v1.08  - (sbt) fix bug in dxt-with-alpha block
13 //   v1.07  - (stb) bc4; allow not using libc; add STB_DXT_STATIC
14 //   v1.06  - (stb) fix to known-broken 1.05
15 //   v1.05  - (stb) support bc5/3dc (Arvids Kokins), use extern "C" in C++ (Pavel Krajcevski)
16 //   v1.04  - (ryg) default to no rounding bias for lerped colors (as per S3TC/DX10 spec);
17 //            single color match fix (allow for inexact color interpolation);
18 //            optimal DXT5 index finder; "high quality" mode that runs multiple refinement steps.
19 //   v1.03  - (stb) endianness support
20 //   v1.02  - (stb) fix alpha encoding bug
21 //   v1.01  - (stb) fix bug converting to RGB that messed up quality, thanks ryg & cbloom
22 //   v1.00  - (stb) first release
23 //
24 // contributors:
25 //   Kevin Schmidt (#defines for "freestanding" compilation)
26 //   github:ppiastucki (BC4 support)
27 //
28 // LICENSE
29 //
30 //   See end of file for license information.
31 
32 #ifndef STB_INCLUDE_STB_DXT_H
33 #define STB_INCLUDE_STB_DXT_H
34 
35 #ifdef __cplusplus
36 extern "C" {
37 #endif
38 
39 #ifdef STB_DXT_STATIC
40 #define STBDDEF static
41 #else
42 #define STBDDEF extern
43 #endif
44 
45 // compression mode (bitflags)
46 #define STB_DXT_NORMAL    0
47 #define STB_DXT_DITHER    1   // use dithering. dubious win. never use for normal maps and the like!
48 #define STB_DXT_HIGHQUAL  2   // high quality mode, does two refinement steps instead of 1. ~30-40% slower.
49 
50 STBDDEF void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src_rgba_four_bytes_per_pixel, int alpha, int mode);
51 STBDDEF void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src_r_one_byte_per_pixel);
52 STBDDEF void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src_rg_two_byte_per_pixel);
53 
54 #define STB_COMPRESS_DXT_BLOCK
55 
56 #ifdef __cplusplus
57 }
58 #endif
59 #endif // STB_INCLUDE_STB_DXT_H
60 
61 #ifdef STB_DXT_IMPLEMENTATION
62 
63 // configuration options for DXT encoder. set them in the project/makefile or just define
64 // them at the top.
65 
66 // STB_DXT_USE_ROUNDING_BIAS
67 //     use a rounding bias during color interpolation. this is closer to what "ideal"
68 //     interpolation would do but doesn't match the S3TC/DX10 spec. old versions (pre-1.03)
69 //     implicitly had this turned on.
70 //
71 //     in case you're targeting a specific type of hardware (e.g. console programmers):
72 //     NVidia and Intel GPUs (as of 2010) as well as DX9 ref use DXT decoders that are closer
73 //     to STB_DXT_USE_ROUNDING_BIAS. AMD/ATI, S3 and DX10 ref are closer to rounding with no bias.
74 //     you also see "(a*5 + b*3) / 8" on some old GPU designs.
75 // #define STB_DXT_USE_ROUNDING_BIAS
76 
77 #include <stdlib.h>
78 
79 #if !defined(STBD_ABS) || !defined(STBI_FABS)
80 #include <math.h>
81 #endif
82 
83 #ifndef STBD_ABS
84 #define STBD_ABS(i)           abs(i)
85 #endif
86 
87 #ifndef STBD_FABS
88 #define STBD_FABS(x)          fabs(x)
89 #endif
90 
91 #ifndef STBD_MEMSET
92 #include <string.h>
93 #define STBD_MEMSET           memset
94 #endif
95 
96 static unsigned char stb__Expand5[32];
97 static unsigned char stb__Expand6[64];
98 static unsigned char stb__OMatch5[256][2];
99 static unsigned char stb__OMatch6[256][2];
100 static unsigned char stb__QuantRBTab[256+16];
101 static unsigned char stb__QuantGTab[256+16];
102 
stb__Mul8Bit(int a,int b)103 static int stb__Mul8Bit(int a, int b)
104 {
105   int t = a*b + 128;
106   return (t + (t >> 8)) >> 8;
107 }
108 
stb__From16Bit(unsigned char * out,unsigned short v)109 static void stb__From16Bit(unsigned char *out, unsigned short v)
110 {
111    int rv = (v & 0xf800) >> 11;
112    int gv = (v & 0x07e0) >>  5;
113    int bv = (v & 0x001f) >>  0;
114 
115    out[0] = stb__Expand5[rv];
116    out[1] = stb__Expand6[gv];
117    out[2] = stb__Expand5[bv];
118    out[3] = 0;
119 }
120 
stb__As16Bit(int r,int g,int b)121 static unsigned short stb__As16Bit(int r, int g, int b)
122 {
123    return (stb__Mul8Bit(r,31) << 11) + (stb__Mul8Bit(g,63) << 5) + stb__Mul8Bit(b,31);
124 }
125 
126 // linear interpolation at 1/3 point between a and b, using desired rounding type
stb__Lerp13(int a,int b)127 static int stb__Lerp13(int a, int b)
128 {
129 #ifdef STB_DXT_USE_ROUNDING_BIAS
130    // with rounding bias
131    return a + stb__Mul8Bit(b-a, 0x55);
132 #else
133    // without rounding bias
134    // replace "/ 3" by "* 0xaaab) >> 17" if your compiler sucks or you really need every ounce of speed.
135    return (2*a + b) / 3;
136 #endif
137 }
138 
139 // lerp RGB color
stb__Lerp13RGB(unsigned char * out,unsigned char * p1,unsigned char * p2)140 static void stb__Lerp13RGB(unsigned char *out, unsigned char *p1, unsigned char *p2)
141 {
142    out[0] = stb__Lerp13(p1[0], p2[0]);
143    out[1] = stb__Lerp13(p1[1], p2[1]);
144    out[2] = stb__Lerp13(p1[2], p2[2]);
145 }
146 
147 /****************************************************************************/
148 
149 // compute table to reproduce constant colors as accurately as possible
stb__PrepareOptTable(unsigned char * Table,const unsigned char * expand,int size)150 static void stb__PrepareOptTable(unsigned char *Table,const unsigned char *expand,int size)
151 {
152    int i,mn,mx;
153    for (i=0;i<256;i++) {
154       int bestErr = 256;
155       for (mn=0;mn<size;mn++) {
156          for (mx=0;mx<size;mx++) {
157             int mine = expand[mn];
158             int maxe = expand[mx];
159             int err = STBD_ABS(stb__Lerp13(maxe, mine) - i);
160 
161             // DX10 spec says that interpolation must be within 3% of "correct" result,
162             // add this as error term. (normally we'd expect a random distribution of
163             // +-1.5% error, but nowhere in the spec does it say that the error has to be
164             // unbiased - better safe than sorry).
165             err += STBD_ABS(maxe - mine) * 3 / 100;
166 
167             if(err < bestErr)
168             {
169                Table[i*2+0] = mx;
170                Table[i*2+1] = mn;
171                bestErr = err;
172             }
173          }
174       }
175    }
176 }
177 
stb__EvalColors(unsigned char * color,unsigned short c0,unsigned short c1)178 static void stb__EvalColors(unsigned char *color,unsigned short c0,unsigned short c1)
179 {
180    stb__From16Bit(color+ 0, c0);
181    stb__From16Bit(color+ 4, c1);
182    stb__Lerp13RGB(color+ 8, color+0, color+4);
183    stb__Lerp13RGB(color+12, color+4, color+0);
184 }
185 
186 // Block dithering function. Simply dithers a block to 565 RGB.
187 // (Floyd-Steinberg)
stb__DitherBlock(unsigned char * dest,unsigned char * block)188 static void stb__DitherBlock(unsigned char *dest, unsigned char *block)
189 {
190   int err[8],*ep1 = err,*ep2 = err+4, *et;
191   int ch,y;
192 
193   // process channels separately
194   for (ch=0; ch<3; ++ch) {
195       unsigned char *bp = block+ch, *dp = dest+ch;
196       unsigned char *quant = (ch == 1) ? stb__QuantGTab+8 : stb__QuantRBTab+8;
197       STBD_MEMSET(err, 0, sizeof(err));
198       for(y=0; y<4; ++y) {
199          dp[ 0] = quant[bp[ 0] + ((3*ep2[1] + 5*ep2[0]) >> 4)];
200          ep1[0] = bp[ 0] - dp[ 0];
201          dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)];
202          ep1[1] = bp[ 4] - dp[ 4];
203          dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)];
204          ep1[2] = bp[ 8] - dp[ 8];
205          dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)];
206          ep1[3] = bp[12] - dp[12];
207          bp += 16;
208          dp += 16;
209          et = ep1, ep1 = ep2, ep2 = et; // swap
210       }
211    }
212 }
213 
214 // The color matching function
stb__MatchColorsBlock(unsigned char * block,unsigned char * color,int dither)215 static unsigned int stb__MatchColorsBlock(unsigned char *block, unsigned char *color,int dither)
216 {
217    unsigned int mask = 0;
218    int dirr = color[0*4+0] - color[1*4+0];
219    int dirg = color[0*4+1] - color[1*4+1];
220    int dirb = color[0*4+2] - color[1*4+2];
221    int dots[16];
222    int stops[4];
223    int i;
224    int c0Point, halfPoint, c3Point;
225 
226    for(i=0;i<16;i++)
227       dots[i] = block[i*4+0]*dirr + block[i*4+1]*dirg + block[i*4+2]*dirb;
228 
229    for(i=0;i<4;i++)
230       stops[i] = color[i*4+0]*dirr + color[i*4+1]*dirg + color[i*4+2]*dirb;
231 
232    // think of the colors as arranged on a line; project point onto that line, then choose
233    // next color out of available ones. we compute the crossover points for "best color in top
234    // half"/"best in bottom half" and then the same inside that subinterval.
235    //
236    // relying on this 1d approximation isn't always optimal in terms of euclidean distance,
237    // but it's very close and a lot faster.
238    // http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html
239 
240    c0Point   = (stops[1] + stops[3]) >> 1;
241    halfPoint = (stops[3] + stops[2]) >> 1;
242    c3Point   = (stops[2] + stops[0]) >> 1;
243 
244    if(!dither) {
245       // the version without dithering is straightforward
246       for (i=15;i>=0;i--) {
247          int dot = dots[i];
248          mask <<= 2;
249 
250          if(dot < halfPoint)
251            mask |= (dot < c0Point) ? 1 : 3;
252          else
253            mask |= (dot < c3Point) ? 2 : 0;
254       }
255   } else {
256       // with floyd-steinberg dithering
257       int err[8],*ep1 = err,*ep2 = err+4;
258       int *dp = dots, y;
259 
260       c0Point   <<= 4;
261       halfPoint <<= 4;
262       c3Point   <<= 4;
263       for(i=0;i<8;i++)
264          err[i] = 0;
265 
266       for(y=0;y<4;y++)
267       {
268          int dot,lmask,step;
269 
270          dot = (dp[0] << 4) + (3*ep2[1] + 5*ep2[0]);
271          if(dot < halfPoint)
272            step = (dot < c0Point) ? 1 : 3;
273          else
274            step = (dot < c3Point) ? 2 : 0;
275          ep1[0] = dp[0] - stops[step];
276          lmask = step;
277 
278          dot = (dp[1] << 4) + (7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]);
279          if(dot < halfPoint)
280            step = (dot < c0Point) ? 1 : 3;
281          else
282            step = (dot < c3Point) ? 2 : 0;
283          ep1[1] = dp[1] - stops[step];
284          lmask |= step<<2;
285 
286          dot = (dp[2] << 4) + (7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]);
287          if(dot < halfPoint)
288            step = (dot < c0Point) ? 1 : 3;
289          else
290            step = (dot < c3Point) ? 2 : 0;
291          ep1[2] = dp[2] - stops[step];
292          lmask |= step<<4;
293 
294          dot = (dp[3] << 4) + (7*ep1[2] + 5*ep2[3] + ep2[2]);
295          if(dot < halfPoint)
296            step = (dot < c0Point) ? 1 : 3;
297          else
298            step = (dot < c3Point) ? 2 : 0;
299          ep1[3] = dp[3] - stops[step];
300          lmask |= step<<6;
301 
302          dp += 4;
303          mask |= lmask << (y*8);
304          { int *et = ep1; ep1 = ep2; ep2 = et; } // swap
305       }
306    }
307 
308    return mask;
309 }
310 
311 // The color optimization function. (Clever code, part 1)
stb__OptimizeColorsBlock(unsigned char * block,unsigned short * pmax16,unsigned short * pmin16)312 static void stb__OptimizeColorsBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16)
313 {
314   int mind = 0x7fffffff,maxd = -0x7fffffff;
315   unsigned char *minp, *maxp;
316   double magn;
317   int v_r,v_g,v_b;
318   static const int nIterPower = 4;
319   float covf[6],vfr,vfg,vfb;
320 
321   // determine color distribution
322   int cov[6];
323   int mu[3],min[3],max[3];
324   int ch,i,iter;
325 
326   for(ch=0;ch<3;ch++)
327   {
328     const unsigned char *bp = ((const unsigned char *) block) + ch;
329     int muv,minv,maxv;
330 
331     muv = minv = maxv = bp[0];
332     for(i=4;i<64;i+=4)
333     {
334       muv += bp[i];
335       if (bp[i] < minv) minv = bp[i];
336       else if (bp[i] > maxv) maxv = bp[i];
337     }
338 
339     mu[ch] = (muv + 8) >> 4;
340     min[ch] = minv;
341     max[ch] = maxv;
342   }
343 
344   // determine covariance matrix
345   for (i=0;i<6;i++)
346      cov[i] = 0;
347 
348   for (i=0;i<16;i++)
349   {
350     int r = block[i*4+0] - mu[0];
351     int g = block[i*4+1] - mu[1];
352     int b = block[i*4+2] - mu[2];
353 
354     cov[0] += r*r;
355     cov[1] += r*g;
356     cov[2] += r*b;
357     cov[3] += g*g;
358     cov[4] += g*b;
359     cov[5] += b*b;
360   }
361 
362   // convert covariance matrix to float, find principal axis via power iter
363   for(i=0;i<6;i++)
364     covf[i] = cov[i] / 255.0f;
365 
366   vfr = (float) (max[0] - min[0]);
367   vfg = (float) (max[1] - min[1]);
368   vfb = (float) (max[2] - min[2]);
369 
370   for(iter=0;iter<nIterPower;iter++)
371   {
372     float r = vfr*covf[0] + vfg*covf[1] + vfb*covf[2];
373     float g = vfr*covf[1] + vfg*covf[3] + vfb*covf[4];
374     float b = vfr*covf[2] + vfg*covf[4] + vfb*covf[5];
375 
376     vfr = r;
377     vfg = g;
378     vfb = b;
379   }
380 
381   magn = STBD_FABS(vfr);
382   if (STBD_FABS(vfg) > magn) magn = STBD_FABS(vfg);
383   if (STBD_FABS(vfb) > magn) magn = STBD_FABS(vfb);
384 
385    if(magn < 4.0f) { // too small, default to luminance
386       v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000.
387       v_g = 587;
388       v_b = 114;
389    } else {
390       magn = 512.0 / magn;
391       v_r = (int) (vfr * magn);
392       v_g = (int) (vfg * magn);
393       v_b = (int) (vfb * magn);
394    }
395 
396    // Pick colors at extreme points
397    for(i=0;i<16;i++)
398    {
399       int dot = block[i*4+0]*v_r + block[i*4+1]*v_g + block[i*4+2]*v_b;
400 
401       if (dot < mind) {
402          mind = dot;
403          minp = block+i*4;
404       }
405 
406       if (dot > maxd) {
407          maxd = dot;
408          maxp = block+i*4;
409       }
410    }
411 
412    *pmax16 = stb__As16Bit(maxp[0],maxp[1],maxp[2]);
413    *pmin16 = stb__As16Bit(minp[0],minp[1],minp[2]);
414 }
415 
stb__sclamp(float y,int p0,int p1)416 static int stb__sclamp(float y, int p0, int p1)
417 {
418    int x = (int) y;
419    if (x < p0) return p0;
420    if (x > p1) return p1;
421    return x;
422 }
423 
424 // The refinement function. (Clever code, part 2)
425 // Tries to optimize colors to suit block contents better.
426 // (By solving a least squares system via normal equations+Cramer's rule)
stb__RefineBlock(unsigned char * block,unsigned short * pmax16,unsigned short * pmin16,unsigned int mask)427 static int stb__RefineBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16, unsigned int mask)
428 {
429    static const int w1Tab[4] = { 3,0,2,1 };
430    static const int prods[4] = { 0x090000,0x000900,0x040102,0x010402 };
431    // ^some magic to save a lot of multiplies in the accumulating loop...
432    // (precomputed products of weights for least squares system, accumulated inside one 32-bit register)
433 
434    float frb,fg;
435    unsigned short oldMin, oldMax, min16, max16;
436    int i, akku = 0, xx,xy,yy;
437    int At1_r,At1_g,At1_b;
438    int At2_r,At2_g,At2_b;
439    unsigned int cm = mask;
440 
441    oldMin = *pmin16;
442    oldMax = *pmax16;
443 
444    if((mask ^ (mask<<2)) < 4) // all pixels have the same index?
445    {
446       // yes, linear system would be singular; solve using optimal
447       // single-color match on average color
448       int r = 8, g = 8, b = 8;
449       for (i=0;i<16;++i) {
450          r += block[i*4+0];
451          g += block[i*4+1];
452          b += block[i*4+2];
453       }
454 
455       r >>= 4; g >>= 4; b >>= 4;
456 
457       max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
458       min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
459    } else {
460       At1_r = At1_g = At1_b = 0;
461       At2_r = At2_g = At2_b = 0;
462       for (i=0;i<16;++i,cm>>=2) {
463          int step = cm&3;
464          int w1 = w1Tab[step];
465          int r = block[i*4+0];
466          int g = block[i*4+1];
467          int b = block[i*4+2];
468 
469          akku    += prods[step];
470          At1_r   += w1*r;
471          At1_g   += w1*g;
472          At1_b   += w1*b;
473          At2_r   += r;
474          At2_g   += g;
475          At2_b   += b;
476       }
477 
478       At2_r = 3*At2_r - At1_r;
479       At2_g = 3*At2_g - At1_g;
480       At2_b = 3*At2_b - At1_b;
481 
482       // extract solutions and decide solvability
483       xx = akku >> 16;
484       yy = (akku >> 8) & 0xff;
485       xy = (akku >> 0) & 0xff;
486 
487       frb = 3.0f * 31.0f / 255.0f / (xx*yy - xy*xy);
488       fg = frb * 63.0f / 31.0f;
489 
490       // solve.
491       max16 =   stb__sclamp((At1_r*yy - At2_r*xy)*frb+0.5f,0,31) << 11;
492       max16 |=  stb__sclamp((At1_g*yy - At2_g*xy)*fg +0.5f,0,63) << 5;
493       max16 |=  stb__sclamp((At1_b*yy - At2_b*xy)*frb+0.5f,0,31) << 0;
494 
495       min16 =   stb__sclamp((At2_r*xx - At1_r*xy)*frb+0.5f,0,31) << 11;
496       min16 |=  stb__sclamp((At2_g*xx - At1_g*xy)*fg +0.5f,0,63) << 5;
497       min16 |=  stb__sclamp((At2_b*xx - At1_b*xy)*frb+0.5f,0,31) << 0;
498    }
499 
500    *pmin16 = min16;
501    *pmax16 = max16;
502    return oldMin != min16 || oldMax != max16;
503 }
504 
505 // Color block compression
stb__CompressColorBlock(unsigned char * dest,unsigned char * block,int mode)506 static void stb__CompressColorBlock(unsigned char *dest, unsigned char *block, int mode)
507 {
508    unsigned int mask;
509    int i;
510    int dither;
511    int refinecount;
512    unsigned short max16, min16;
513    unsigned char dblock[16*4],color[4*4];
514 
515    dither = mode & STB_DXT_DITHER;
516    refinecount = (mode & STB_DXT_HIGHQUAL) ? 2 : 1;
517 
518    // check if block is constant
519    for (i=1;i<16;i++)
520       if (((unsigned int *) block)[i] != ((unsigned int *) block)[0])
521          break;
522 
523    if(i == 16) { // constant color
524       int r = block[0], g = block[1], b = block[2];
525       mask  = 0xaaaaaaaa;
526       max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
527       min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
528    } else {
529       // first step: compute dithered version for PCA if desired
530       if(dither)
531          stb__DitherBlock(dblock,block);
532 
533       // second step: pca+map along principal axis
534       stb__OptimizeColorsBlock(dither ? dblock : block,&max16,&min16);
535       if (max16 != min16) {
536          stb__EvalColors(color,max16,min16);
537          mask = stb__MatchColorsBlock(block,color,dither);
538       } else
539          mask = 0;
540 
541       // third step: refine (multiple times if requested)
542       for (i=0;i<refinecount;i++) {
543          unsigned int lastmask = mask;
544 
545          if (stb__RefineBlock(dither ? dblock : block,&max16,&min16,mask)) {
546             if (max16 != min16) {
547                stb__EvalColors(color,max16,min16);
548                mask = stb__MatchColorsBlock(block,color,dither);
549             } else {
550                mask = 0;
551                break;
552             }
553          }
554 
555          if(mask == lastmask)
556             break;
557       }
558   }
559 
560   // write the color block
561   if(max16 < min16)
562   {
563      unsigned short t = min16;
564      min16 = max16;
565      max16 = t;
566      mask ^= 0x55555555;
567   }
568 
569   dest[0] = (unsigned char) (max16);
570   dest[1] = (unsigned char) (max16 >> 8);
571   dest[2] = (unsigned char) (min16);
572   dest[3] = (unsigned char) (min16 >> 8);
573   dest[4] = (unsigned char) (mask);
574   dest[5] = (unsigned char) (mask >> 8);
575   dest[6] = (unsigned char) (mask >> 16);
576   dest[7] = (unsigned char) (mask >> 24);
577 }
578 
579 // Alpha block compression (this is easy for a change)
stb__CompressAlphaBlock(unsigned char * dest,unsigned char * src,int stride)580 static void stb__CompressAlphaBlock(unsigned char *dest,unsigned char *src, int stride)
581 {
582    int i,dist,bias,dist4,dist2,bits,mask;
583 
584    // find min/max color
585    int mn,mx;
586    mn = mx = src[0];
587 
588    for (i=1;i<16;i++)
589    {
590       if (src[i*stride] < mn) mn = src[i*stride];
591       else if (src[i*stride] > mx) mx = src[i*stride];
592    }
593 
594    // encode them
595    ((unsigned char *)dest)[0] = mx;
596    ((unsigned char *)dest)[1] = mn;
597    dest += 2;
598 
599    // determine bias and emit color indices
600    // given the choice of mx/mn, these indices are optimal:
601    // http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/
602    dist = mx-mn;
603    dist4 = dist*4;
604    dist2 = dist*2;
605    bias = (dist < 8) ? (dist - 1) : (dist/2 + 2);
606    bias -= mn * 7;
607    bits = 0,mask=0;
608 
609    for (i=0;i<16;i++) {
610       int a = src[i*stride]*7 + bias;
611       int ind,t;
612 
613       // select index. this is a "linear scale" lerp factor between 0 (val=min) and 7 (val=max).
614       t = (a >= dist4) ? -1 : 0; ind =  t & 4; a -= dist4 & t;
615       t = (a >= dist2) ? -1 : 0; ind += t & 2; a -= dist2 & t;
616       ind += (a >= dist);
617 
618       // turn linear scale into DXT index (0/1 are extremal pts)
619       ind = -ind & 7;
620       ind ^= (2 > ind);
621 
622       // write index
623       mask |= ind << bits;
624       if((bits += 3) >= 8) {
625          *dest++ = mask;
626          mask >>= 8;
627          bits -= 8;
628       }
629    }
630 }
631 
stb__InitDXT()632 static void stb__InitDXT()
633 {
634    int i;
635    for(i=0;i<32;i++)
636       stb__Expand5[i] = (i<<3)|(i>>2);
637 
638    for(i=0;i<64;i++)
639       stb__Expand6[i] = (i<<2)|(i>>4);
640 
641    for(i=0;i<256+16;i++)
642    {
643       int v = i-8 < 0 ? 0 : i-8 > 255 ? 255 : i-8;
644       stb__QuantRBTab[i] = stb__Expand5[stb__Mul8Bit(v,31)];
645       stb__QuantGTab[i] = stb__Expand6[stb__Mul8Bit(v,63)];
646    }
647 
648    stb__PrepareOptTable(&stb__OMatch5[0][0],stb__Expand5,32);
649    stb__PrepareOptTable(&stb__OMatch6[0][0],stb__Expand6,64);
650 }
651 
stb_compress_dxt_block(unsigned char * dest,const unsigned char * src,int alpha,int mode)652 void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src, int alpha, int mode)
653 {
654    unsigned char data[16][4];
655    static int init=1;
656    if (init) {
657       stb__InitDXT();
658       init=0;
659    }
660 
661    if (alpha) {
662       int i;
663       stb__CompressAlphaBlock(dest,(unsigned char*) src+3, 4);
664       dest += 8;
665       // make a new copy of the data in which alpha is opaque,
666       // because code uses a fast test for color constancy
667       memcpy(data, src, 4*16);
668       for (i=0; i < 16; ++i)
669          data[i][3] = 255;
670       src = &data[0][0];
671    }
672 
673    stb__CompressColorBlock(dest,(unsigned char*) src,mode);
674 }
675 
stb_compress_bc4_block(unsigned char * dest,const unsigned char * src)676 void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src)
677 {
678    stb__CompressAlphaBlock(dest,(unsigned char*) src, 1);
679 }
680 
stb_compress_bc5_block(unsigned char * dest,const unsigned char * src)681 void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src)
682 {
683    stb__CompressAlphaBlock(dest,(unsigned char*) src,2);
684    stb__CompressAlphaBlock(dest + 8,(unsigned char*) src+1,2);
685 }
686 #endif // STB_DXT_IMPLEMENTATION
687 
688 /*
689 ------------------------------------------------------------------------------
690 This software is available under 2 licenses -- choose whichever you prefer.
691 ------------------------------------------------------------------------------
692 ALTERNATIVE A - MIT License
693 Copyright (c) 2017 Sean Barrett
694 Permission is hereby granted, free of charge, to any person obtaining a copy of
695 this software and associated documentation files (the "Software"), to deal in
696 the Software without restriction, including without limitation the rights to
697 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
698 of the Software, and to permit persons to whom the Software is furnished to do
699 so, subject to the following conditions:
700 The above copyright notice and this permission notice shall be included in all
701 copies or substantial portions of the Software.
702 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
703 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
704 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
705 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
706 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
707 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
708 SOFTWARE.
709 ------------------------------------------------------------------------------
710 ALTERNATIVE B - Public Domain (www.unlicense.org)
711 This is free and unencumbered software released into the public domain.
712 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
713 software, either in source code form or as a compiled binary, for any purpose,
714 commercial or non-commercial, and by any means.
715 In jurisdictions that recognize copyright laws, the author or authors of this
716 software dedicate any and all copyright interest in the software to the public
717 domain. We make this dedication for the benefit of the public at large and to
718 the detriment of our heirs and successors. We intend this dedication to be an
719 overt act of relinquishment in perpetuity of all present and future rights to
720 this software under copyright law.
721 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
722 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
723 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
724 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
725 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
726 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
727 ------------------------------------------------------------------------------
728 */
729