1 // stb_dxt.h - v1.09 - DXT1/DXT5 compressor - public domain
2 // original by fabian "ryg" giesen - ported to C by stb
3 // use '#define STB_DXT_IMPLEMENTATION' before including to create the implementation
4 //
5 // USAGE:
6 //   call stb_compress_dxt_block() for every block (you must pad)
7 //     source should be a 4x4 block of RGBA data in row-major order;
8 //     Alpha channel is not stored if you specify alpha=0 (but you
9 //     must supply some constant alpha in the alpha channel).
10 //     You can turn on dithering and "high quality" using mode.
11 //
12 // version history:
13 //   v1.09  - (stb) update documentation re: surprising alpha channel requirement
14 //   v1.08  - (stb) fix bug in dxt-with-alpha block
15 //   v1.07  - (stb) bc4; allow not using libc; add STB_DXT_STATIC
16 //   v1.06  - (stb) fix to known-broken 1.05
17 //   v1.05  - (stb) support bc5/3dc (Arvids Kokins), use extern "C" in C++ (Pavel Krajcevski)
18 //   v1.04  - (ryg) default to no rounding bias for lerped colors (as per S3TC/DX10 spec);
19 //            single color match fix (allow for inexact color interpolation);
20 //            optimal DXT5 index finder; "high quality" mode that runs multiple refinement steps.
21 //   v1.03  - (stb) endianness support
22 //   v1.02  - (stb) fix alpha encoding bug
23 //   v1.01  - (stb) fix bug converting to RGB that messed up quality, thanks ryg & cbloom
24 //   v1.00  - (stb) first release
25 //
26 // contributors:
27 //   Kevin Schmidt (#defines for "freestanding" compilation)
28 //   github:ppiastucki (BC4 support)
29 //
30 // LICENSE
31 //
32 //   See end of file for license information.
33 
34 #ifndef STB_INCLUDE_STB_DXT_H
35 #define STB_INCLUDE_STB_DXT_H
36 
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
40 
41 #ifdef STB_DXT_STATIC
42 #define STBDDEF static
43 #else
44 #define STBDDEF extern
45 #endif
46 
47 // compression mode (bitflags)
48 #define STB_DXT_NORMAL    0
49 #define STB_DXT_DITHER    1   // use dithering. dubious win. never use for normal maps and the like!
50 #define STB_DXT_HIGHQUAL  2   // high quality mode, does two refinement steps instead of 1. ~30-40% slower.
51 
52 STBDDEF void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src_rgba_four_bytes_per_pixel, int alpha, int mode);
53 STBDDEF void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src_r_one_byte_per_pixel);
54 STBDDEF void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src_rg_two_byte_per_pixel);
55 
56 #define STB_COMPRESS_DXT_BLOCK
57 
58 #ifdef __cplusplus
59 }
60 #endif
61 #endif // STB_INCLUDE_STB_DXT_H
62 
63 #ifdef STB_DXT_IMPLEMENTATION
64 
65 // configuration options for DXT encoder. set them in the project/makefile or just define
66 // them at the top.
67 
68 // STB_DXT_USE_ROUNDING_BIAS
69 //     use a rounding bias during color interpolation. this is closer to what "ideal"
70 //     interpolation would do but doesn't match the S3TC/DX10 spec. old versions (pre-1.03)
71 //     implicitly had this turned on.
72 //
73 //     in case you're targeting a specific type of hardware (e.g. console programmers):
74 //     NVidia and Intel GPUs (as of 2010) as well as DX9 ref use DXT decoders that are closer
75 //     to STB_DXT_USE_ROUNDING_BIAS. AMD/ATI, S3 and DX10 ref are closer to rounding with no bias.
76 //     you also see "(a*5 + b*3) / 8" on some old GPU designs.
77 // #define STB_DXT_USE_ROUNDING_BIAS
78 
79 #include <stdlib.h>
80 
81 #if !defined(STBD_ABS) || !defined(STBI_FABS)
82 #include <math.h>
83 #endif
84 
85 #ifndef STBD_ABS
86 #define STBD_ABS(i)           abs(i)
87 #endif
88 
89 #ifndef STBD_FABS
90 #define STBD_FABS(x)          fabs(x)
91 #endif
92 
93 #ifndef STBD_MEMSET
94 #include <string.h>
95 #define STBD_MEMSET           memset
96 #endif
97 
98 static unsigned char stb__Expand5[32];
99 static unsigned char stb__Expand6[64];
100 static unsigned char stb__OMatch5[256][2];
101 static unsigned char stb__OMatch6[256][2];
102 static unsigned char stb__QuantRBTab[256+16];
103 static unsigned char stb__QuantGTab[256+16];
104 
stb__Mul8Bit(int a,int b)105 static int stb__Mul8Bit(int a, int b)
106 {
107   int t = a*b + 128;
108   return (t + (t >> 8)) >> 8;
109 }
110 
stb__From16Bit(unsigned char * out,unsigned short v)111 static void stb__From16Bit(unsigned char *out, unsigned short v)
112 {
113    int rv = (v & 0xf800) >> 11;
114    int gv = (v & 0x07e0) >>  5;
115    int bv = (v & 0x001f) >>  0;
116 
117    out[0] = stb__Expand5[rv];
118    out[1] = stb__Expand6[gv];
119    out[2] = stb__Expand5[bv];
120    out[3] = 0;
121 }
122 
stb__As16Bit(int r,int g,int b)123 static unsigned short stb__As16Bit(int r, int g, int b)
124 {
125    return (unsigned short)((stb__Mul8Bit(r,31) << 11) + (stb__Mul8Bit(g,63) << 5) + stb__Mul8Bit(b,31));
126 }
127 
128 // linear interpolation at 1/3 point between a and b, using desired rounding type
stb__Lerp13(int a,int b)129 static int stb__Lerp13(int a, int b)
130 {
131 #ifdef STB_DXT_USE_ROUNDING_BIAS
132    // with rounding bias
133    return a + stb__Mul8Bit(b-a, 0x55);
134 #else
135    // without rounding bias
136    // replace "/ 3" by "* 0xaaab) >> 17" if your compiler sucks or you really need every ounce of speed.
137    return (2*a + b) / 3;
138 #endif
139 }
140 
141 // lerp RGB color
stb__Lerp13RGB(unsigned char * out,unsigned char * p1,unsigned char * p2)142 static void stb__Lerp13RGB(unsigned char *out, unsigned char *p1, unsigned char *p2)
143 {
144    out[0] = (unsigned char)stb__Lerp13(p1[0], p2[0]);
145    out[1] = (unsigned char)stb__Lerp13(p1[1], p2[1]);
146    out[2] = (unsigned char)stb__Lerp13(p1[2], p2[2]);
147 }
148 
149 /****************************************************************************/
150 
151 // compute table to reproduce constant colors as accurately as possible
stb__PrepareOptTable(unsigned char * Table,const unsigned char * expand,int size)152 static void stb__PrepareOptTable(unsigned char *Table,const unsigned char *expand,int size)
153 {
154    int i,mn,mx;
155    for (i=0;i<256;i++) {
156       int bestErr = 256;
157       for (mn=0;mn<size;mn++) {
158          for (mx=0;mx<size;mx++) {
159             int mine = expand[mn];
160             int maxe = expand[mx];
161             int err = STBD_ABS(stb__Lerp13(maxe, mine) - i);
162 
163             // DX10 spec says that interpolation must be within 3% of "correct" result,
164             // add this as error term. (normally we'd expect a random distribution of
165             // +-1.5% error, but nowhere in the spec does it say that the error has to be
166             // unbiased - better safe than sorry).
167             err += STBD_ABS(maxe - mine) * 3 / 100;
168 
169             if(err < bestErr)
170             {
171                Table[i*2+0] = (unsigned char)mx;
172                Table[i*2+1] = (unsigned char)mn;
173                bestErr = err;
174             }
175          }
176       }
177    }
178 }
179 
stb__EvalColors(unsigned char * color,unsigned short c0,unsigned short c1)180 static void stb__EvalColors(unsigned char *color,unsigned short c0,unsigned short c1)
181 {
182    stb__From16Bit(color+ 0, c0);
183    stb__From16Bit(color+ 4, c1);
184    stb__Lerp13RGB(color+ 8, color+0, color+4);
185    stb__Lerp13RGB(color+12, color+4, color+0);
186 }
187 
188 // Block dithering function. Simply dithers a block to 565 RGB.
189 // (Floyd-Steinberg)
stb__DitherBlock(unsigned char * dest,unsigned char * block)190 static void stb__DitherBlock(unsigned char *dest, unsigned char *block)
191 {
192   int err[8],*ep1 = err,*ep2 = err+4, *et;
193   int ch,y;
194 
195   // process channels separately
196   for (ch=0; ch<3; ++ch) {
197       unsigned char *bp = block+ch, *dp = dest+ch;
198       unsigned char *quant = (ch == 1) ? stb__QuantGTab+8 : stb__QuantRBTab+8;
199       STBD_MEMSET(err, 0, sizeof(err));
200       for(y=0; y<4; ++y) {
201          dp[ 0] = quant[bp[ 0] + ((3*ep2[1] + 5*ep2[0]) >> 4)];
202          ep1[0] = bp[ 0] - dp[ 0];
203          dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)];
204          ep1[1] = bp[ 4] - dp[ 4];
205          dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)];
206          ep1[2] = bp[ 8] - dp[ 8];
207          dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)];
208          ep1[3] = bp[12] - dp[12];
209          bp += 16;
210          dp += 16;
211          et = ep1, ep1 = ep2, ep2 = et; // swap
212       }
213    }
214 }
215 
216 // The color matching function
stb__MatchColorsBlock(unsigned char * block,unsigned char * color,int dither)217 static unsigned int stb__MatchColorsBlock(unsigned char *block, unsigned char *color,int dither)
218 {
219    unsigned int mask = 0;
220    int dirr = color[0*4+0] - color[1*4+0];
221    int dirg = color[0*4+1] - color[1*4+1];
222    int dirb = color[0*4+2] - color[1*4+2];
223    int dots[16];
224    int stops[4];
225    int i;
226    int c0Point, halfPoint, c3Point;
227 
228    for(i=0;i<16;i++)
229       dots[i] = block[i*4+0]*dirr + block[i*4+1]*dirg + block[i*4+2]*dirb;
230 
231    for(i=0;i<4;i++)
232       stops[i] = color[i*4+0]*dirr + color[i*4+1]*dirg + color[i*4+2]*dirb;
233 
234    // think of the colors as arranged on a line; project point onto that line, then choose
235    // next color out of available ones. we compute the crossover points for "best color in top
236    // half"/"best in bottom half" and then the same inside that subinterval.
237    //
238    // relying on this 1d approximation isn't always optimal in terms of euclidean distance,
239    // but it's very close and a lot faster.
240    // http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html
241 
242    c0Point   = (stops[1] + stops[3]) >> 1;
243    halfPoint = (stops[3] + stops[2]) >> 1;
244    c3Point   = (stops[2] + stops[0]) >> 1;
245 
246    if(!dither) {
247       // the version without dithering is straightforward
248       for (i=15;i>=0;i--) {
249          int dot = dots[i];
250          mask <<= 2;
251 
252          if(dot < halfPoint)
253            mask |= (dot < c0Point) ? 1 : 3;
254          else
255            mask |= (dot < c3Point) ? 2 : 0;
256       }
257   } else {
258       // with floyd-steinberg dithering
259       int err[8],*ep1 = err,*ep2 = err+4;
260       int *dp = dots, y;
261 
262       c0Point   <<= 4;
263       halfPoint <<= 4;
264       c3Point   <<= 4;
265       for(i=0;i<8;i++)
266          err[i] = 0;
267 
268       for(y=0;y<4;y++)
269       {
270          int dot,lmask,step;
271 
272          dot = (dp[0] << 4) + (3*ep2[1] + 5*ep2[0]);
273          if(dot < halfPoint)
274            step = (dot < c0Point) ? 1 : 3;
275          else
276            step = (dot < c3Point) ? 2 : 0;
277          ep1[0] = dp[0] - stops[step];
278          lmask = step;
279 
280          dot = (dp[1] << 4) + (7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]);
281          if(dot < halfPoint)
282            step = (dot < c0Point) ? 1 : 3;
283          else
284            step = (dot < c3Point) ? 2 : 0;
285          ep1[1] = dp[1] - stops[step];
286          lmask |= step<<2;
287 
288          dot = (dp[2] << 4) + (7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]);
289          if(dot < halfPoint)
290            step = (dot < c0Point) ? 1 : 3;
291          else
292            step = (dot < c3Point) ? 2 : 0;
293          ep1[2] = dp[2] - stops[step];
294          lmask |= step<<4;
295 
296          dot = (dp[3] << 4) + (7*ep1[2] + 5*ep2[3] + ep2[2]);
297          if(dot < halfPoint)
298            step = (dot < c0Point) ? 1 : 3;
299          else
300            step = (dot < c3Point) ? 2 : 0;
301          ep1[3] = dp[3] - stops[step];
302          lmask |= step<<6;
303 
304          dp += 4;
305          mask |= lmask << (y*8);
306          { int *et = ep1; ep1 = ep2; ep2 = et; } // swap
307       }
308    }
309 
310    return mask;
311 }
312 
313 // The color optimization function. (Clever code, part 1)
stb__OptimizeColorsBlock(unsigned char * block,unsigned short * pmax16,unsigned short * pmin16)314 static void stb__OptimizeColorsBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16)
315 {
316   int mind = 0x7fffffff,maxd = -0x7fffffff;
317   unsigned char *minp, *maxp;
318   double magn;
319   int v_r,v_g,v_b;
320   static const int nIterPower = 4;
321   float covf[6],vfr,vfg,vfb;
322 
323   // determine color distribution
324   int cov[6];
325   int mu[3],min[3],max[3];
326   int ch,i,iter;
327 
328   for(ch=0;ch<3;ch++)
329   {
330     const unsigned char *bp = ((const unsigned char *) block) + ch;
331     int muv,minv,maxv;
332 
333     muv = minv = maxv = bp[0];
334     for(i=4;i<64;i+=4)
335     {
336       muv += bp[i];
337       if (bp[i] < minv) minv = bp[i];
338       else if (bp[i] > maxv) maxv = bp[i];
339     }
340 
341     mu[ch] = (muv + 8) >> 4;
342     min[ch] = minv;
343     max[ch] = maxv;
344   }
345 
346   // determine covariance matrix
347   for (i=0;i<6;i++)
348      cov[i] = 0;
349 
350   for (i=0;i<16;i++)
351   {
352     int r = block[i*4+0] - mu[0];
353     int g = block[i*4+1] - mu[1];
354     int b = block[i*4+2] - mu[2];
355 
356     cov[0] += r*r;
357     cov[1] += r*g;
358     cov[2] += r*b;
359     cov[3] += g*g;
360     cov[4] += g*b;
361     cov[5] += b*b;
362   }
363 
364   // convert covariance matrix to float, find principal axis via power iter
365   for(i=0;i<6;i++)
366     covf[i] = cov[i] / 255.0f;
367 
368   vfr = (float) (max[0] - min[0]);
369   vfg = (float) (max[1] - min[1]);
370   vfb = (float) (max[2] - min[2]);
371 
372   for(iter=0;iter<nIterPower;iter++)
373   {
374     float r = vfr*covf[0] + vfg*covf[1] + vfb*covf[2];
375     float g = vfr*covf[1] + vfg*covf[3] + vfb*covf[4];
376     float b = vfr*covf[2] + vfg*covf[4] + vfb*covf[5];
377 
378     vfr = r;
379     vfg = g;
380     vfb = b;
381   }
382 
383   magn = STBD_FABS(vfr);
384   if (STBD_FABS(vfg) > magn) magn = STBD_FABS(vfg);
385   if (STBD_FABS(vfb) > magn) magn = STBD_FABS(vfb);
386 
387    if(magn < 4.0f) { // too small, default to luminance
388       v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000.
389       v_g = 587;
390       v_b = 114;
391    } else {
392       magn = 512.0 / magn;
393       v_r = (int) (vfr * magn);
394       v_g = (int) (vfg * magn);
395       v_b = (int) (vfb * magn);
396    }
397 
398    // Pick colors at extreme points
399    for(i=0;i<16;i++)
400    {
401       int dot = block[i*4+0]*v_r + block[i*4+1]*v_g + block[i*4+2]*v_b;
402 
403       if (dot < mind) {
404          mind = dot;
405          minp = block+i*4;
406       }
407 
408       if (dot > maxd) {
409          maxd = dot;
410          maxp = block+i*4;
411       }
412    }
413 
414    *pmax16 = stb__As16Bit(maxp[0],maxp[1],maxp[2]);
415    *pmin16 = stb__As16Bit(minp[0],minp[1],minp[2]);
416 }
417 
stb__sclamp(float y,int p0,int p1)418 static int stb__sclamp(float y, int p0, int p1)
419 {
420    int x = (int) y;
421    if (x < p0) return p0;
422    if (x > p1) return p1;
423    return x;
424 }
425 
426 // The refinement function. (Clever code, part 2)
427 // Tries to optimize colors to suit block contents better.
428 // (By solving a least squares system via normal equations+Cramer's rule)
stb__RefineBlock(unsigned char * block,unsigned short * pmax16,unsigned short * pmin16,unsigned int mask)429 static int stb__RefineBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16, unsigned int mask)
430 {
431    static const int w1Tab[4] = { 3,0,2,1 };
432    static const int prods[4] = { 0x090000,0x000900,0x040102,0x010402 };
433    // ^some magic to save a lot of multiplies in the accumulating loop...
434    // (precomputed products of weights for least squares system, accumulated inside one 32-bit register)
435 
436    float frb,fg;
437    unsigned short oldMin, oldMax, min16, max16;
438    int i, akku = 0, xx,xy,yy;
439    int At1_r,At1_g,At1_b;
440    int At2_r,At2_g,At2_b;
441    unsigned int cm = mask;
442 
443    oldMin = *pmin16;
444    oldMax = *pmax16;
445 
446    if((mask ^ (mask<<2)) < 4) // all pixels have the same index?
447    {
448       // yes, linear system would be singular; solve using optimal
449       // single-color match on average color
450       int r = 8, g = 8, b = 8;
451       for (i=0;i<16;++i) {
452          r += block[i*4+0];
453          g += block[i*4+1];
454          b += block[i*4+2];
455       }
456 
457       r >>= 4; g >>= 4; b >>= 4;
458 
459       max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
460       min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
461    } else {
462       At1_r = At1_g = At1_b = 0;
463       At2_r = At2_g = At2_b = 0;
464       for (i=0;i<16;++i,cm>>=2) {
465          int step = cm&3;
466          int w1 = w1Tab[step];
467          int r = block[i*4+0];
468          int g = block[i*4+1];
469          int b = block[i*4+2];
470 
471          akku    += prods[step];
472          At1_r   += w1*r;
473          At1_g   += w1*g;
474          At1_b   += w1*b;
475          At2_r   += r;
476          At2_g   += g;
477          At2_b   += b;
478       }
479 
480       At2_r = 3*At2_r - At1_r;
481       At2_g = 3*At2_g - At1_g;
482       At2_b = 3*At2_b - At1_b;
483 
484       // extract solutions and decide solvability
485       xx = akku >> 16;
486       yy = (akku >> 8) & 0xff;
487       xy = (akku >> 0) & 0xff;
488 
489       frb = 3.0f * 31.0f / 255.0f / (xx*yy - xy*xy);
490       fg = frb * 63.0f / 31.0f;
491 
492       // solve.
493       max16 =  (unsigned short)(stb__sclamp((At1_r*yy - At2_r*xy)*frb+0.5f,0,31) << 11);
494       max16 |= (unsigned short)(stb__sclamp((At1_g*yy - At2_g*xy)*fg +0.5f,0,63) << 5);
495       max16 |= (unsigned short)(stb__sclamp((At1_b*yy - At2_b*xy)*frb+0.5f,0,31) << 0);
496 
497       min16 =  (unsigned short)(stb__sclamp((At2_r*xx - At1_r*xy)*frb+0.5f,0,31) << 11);
498       min16 |= (unsigned short)(stb__sclamp((At2_g*xx - At1_g*xy)*fg +0.5f,0,63) << 5);
499       min16 |= (unsigned short)(stb__sclamp((At2_b*xx - At1_b*xy)*frb+0.5f,0,31) << 0);
500    }
501 
502    *pmin16 = min16;
503    *pmax16 = max16;
504    return oldMin != min16 || oldMax != max16;
505 }
506 
507 // Color block compression
stb__CompressColorBlock(unsigned char * dest,unsigned char * block,int mode)508 static void stb__CompressColorBlock(unsigned char *dest, unsigned char *block, int mode)
509 {
510    unsigned int mask;
511    int i;
512    int dither;
513    int refinecount;
514    unsigned short max16, min16;
515    unsigned char dblock[16*4],color[4*4];
516 
517    dither = mode & STB_DXT_DITHER;
518    refinecount = (mode & STB_DXT_HIGHQUAL) ? 2 : 1;
519 
520    // check if block is constant
521    for (i=1;i<16;i++)
522       if (((unsigned int *) block)[i] != ((unsigned int *) block)[0])
523          break;
524 
525    if(i == 16) { // constant color
526       int r = block[0], g = block[1], b = block[2];
527       mask  = 0xaaaaaaaa;
528       max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
529       min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
530    } else {
531       // first step: compute dithered version for PCA if desired
532       if(dither)
533          stb__DitherBlock(dblock,block);
534 
535       // second step: pca+map along principal axis
536       stb__OptimizeColorsBlock(dither ? dblock : block,&max16,&min16);
537       if (max16 != min16) {
538          stb__EvalColors(color,max16,min16);
539          mask = stb__MatchColorsBlock(block,color,dither);
540       } else
541          mask = 0;
542 
543       // third step: refine (multiple times if requested)
544       for (i=0;i<refinecount;i++) {
545          unsigned int lastmask = mask;
546 
547          if (stb__RefineBlock(dither ? dblock : block,&max16,&min16,mask)) {
548             if (max16 != min16) {
549                stb__EvalColors(color,max16,min16);
550                mask = stb__MatchColorsBlock(block,color,dither);
551             } else {
552                mask = 0;
553                break;
554             }
555          }
556 
557          if(mask == lastmask)
558             break;
559       }
560   }
561 
562   // write the color block
563   if(max16 < min16)
564   {
565      unsigned short t = min16;
566      min16 = max16;
567      max16 = t;
568      mask ^= 0x55555555;
569   }
570 
571   dest[0] = (unsigned char) (max16);
572   dest[1] = (unsigned char) (max16 >> 8);
573   dest[2] = (unsigned char) (min16);
574   dest[3] = (unsigned char) (min16 >> 8);
575   dest[4] = (unsigned char) (mask);
576   dest[5] = (unsigned char) (mask >> 8);
577   dest[6] = (unsigned char) (mask >> 16);
578   dest[7] = (unsigned char) (mask >> 24);
579 }
580 
581 // Alpha block compression (this is easy for a change)
stb__CompressAlphaBlock(unsigned char * dest,unsigned char * src,int stride)582 static void stb__CompressAlphaBlock(unsigned char *dest,unsigned char *src, int stride)
583 {
584    int i,dist,bias,dist4,dist2,bits,mask;
585 
586    // find min/max color
587    int mn,mx;
588    mn = mx = src[0];
589 
590    for (i=1;i<16;i++)
591    {
592       if (src[i*stride] < mn) mn = src[i*stride];
593       else if (src[i*stride] > mx) mx = src[i*stride];
594    }
595 
596    // encode them
597    dest[0] = (unsigned char)mx;
598    dest[1] = (unsigned char)mn;
599    dest += 2;
600 
601    // determine bias and emit color indices
602    // given the choice of mx/mn, these indices are optimal:
603    // http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/
604    dist = mx-mn;
605    dist4 = dist*4;
606    dist2 = dist*2;
607    bias = (dist < 8) ? (dist - 1) : (dist/2 + 2);
608    bias -= mn * 7;
609    bits = 0,mask=0;
610 
611    for (i=0;i<16;i++) {
612       int a = src[i*stride]*7 + bias;
613       int ind,t;
614 
615       // select index. this is a "linear scale" lerp factor between 0 (val=min) and 7 (val=max).
616       t = (a >= dist4) ? -1 : 0; ind =  t & 4; a -= dist4 & t;
617       t = (a >= dist2) ? -1 : 0; ind += t & 2; a -= dist2 & t;
618       ind += (a >= dist);
619 
620       // turn linear scale into DXT index (0/1 are extremal pts)
621       ind = -ind & 7;
622       ind ^= (2 > ind);
623 
624       // write index
625       mask |= ind << bits;
626       if((bits += 3) >= 8) {
627          *dest++ = (unsigned char)mask;
628          mask >>= 8;
629          bits -= 8;
630       }
631    }
632 }
633 
stb__InitDXT()634 static void stb__InitDXT()
635 {
636    int i;
637    for(i=0;i<32;i++)
638       stb__Expand5[i] = (unsigned char)((i<<3)|(i>>2));
639 
640    for(i=0;i<64;i++)
641       stb__Expand6[i] = (unsigned char)((i<<2)|(i>>4));
642 
643    for(i=0;i<256+16;i++)
644    {
645       int v = i-8 < 0 ? 0 : i-8 > 255 ? 255 : i-8;
646       stb__QuantRBTab[i] = stb__Expand5[stb__Mul8Bit(v,31)];
647       stb__QuantGTab[i] = stb__Expand6[stb__Mul8Bit(v,63)];
648    }
649 
650    stb__PrepareOptTable(&stb__OMatch5[0][0],stb__Expand5,32);
651    stb__PrepareOptTable(&stb__OMatch6[0][0],stb__Expand6,64);
652 }
653 
stb_compress_dxt_block(unsigned char * dest,const unsigned char * src,int alpha,int mode)654 void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src, int alpha, int mode)
655 {
656    unsigned char data[16][4];
657    static int init=1;
658    if (init) {
659       stb__InitDXT();
660       init=0;
661    }
662 
663    if (alpha) {
664       int i;
665       stb__CompressAlphaBlock(dest,(unsigned char*) src+3, 4);
666       dest += 8;
667       // make a new copy of the data in which alpha is opaque,
668       // because code uses a fast test for color constancy
669       memcpy(data, src, 4*16);
670       for (i=0; i < 16; ++i)
671          data[i][3] = 255;
672       src = &data[0][0];
673    }
674 
675    stb__CompressColorBlock(dest,(unsigned char*) src,mode);
676 }
677 
stb_compress_bc4_block(unsigned char * dest,const unsigned char * src)678 void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src)
679 {
680    stb__CompressAlphaBlock(dest,(unsigned char*) src, 1);
681 }
682 
stb_compress_bc5_block(unsigned char * dest,const unsigned char * src)683 void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src)
684 {
685    stb__CompressAlphaBlock(dest,(unsigned char*) src,2);
686    stb__CompressAlphaBlock(dest + 8,(unsigned char*) src+1,2);
687 }
688 #endif // STB_DXT_IMPLEMENTATION
689 
690 /*
691 ------------------------------------------------------------------------------
692 This software is available under 2 licenses -- choose whichever you prefer.
693 ------------------------------------------------------------------------------
694 ALTERNATIVE A - MIT License
695 Copyright (c) 2017 Sean Barrett
696 Permission is hereby granted, free of charge, to any person obtaining a copy of
697 this software and associated documentation files (the "Software"), to deal in
698 the Software without restriction, including without limitation the rights to
699 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
700 of the Software, and to permit persons to whom the Software is furnished to do
701 so, subject to the following conditions:
702 The above copyright notice and this permission notice shall be included in all
703 copies or substantial portions of the Software.
704 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
705 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
706 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
707 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
708 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
709 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
710 SOFTWARE.
711 ------------------------------------------------------------------------------
712 ALTERNATIVE B - Public Domain (www.unlicense.org)
713 This is free and unencumbered software released into the public domain.
714 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
715 software, either in source code form or as a compiled binary, for any purpose,
716 commercial or non-commercial, and by any means.
717 In jurisdictions that recognize copyright laws, the author or authors of this
718 software dedicate any and all copyright interest in the software to the public
719 domain. We make this dedication for the benefit of the public at large and to
720 the detriment of our heirs and successors. We intend this dedication to be an
721 overt act of relinquishment in perpetuity of all present and future rights to
722 this software under copyright law.
723 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
724 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
725 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
726 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
727 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
728 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
729 ------------------------------------------------------------------------------
730 */
731