1 // stb_dxt.h - v1.09 - DXT1/DXT5 compressor - public domain
2 // original by fabian "ryg" giesen - ported to C by stb
3 // use '#define STB_DXT_IMPLEMENTATION' before including to create the implementation
4 //
5 // USAGE:
6 // call stb_compress_dxt_block() for every block (you must pad)
7 // source should be a 4x4 block of RGBA data in row-major order;
8 // Alpha channel is not stored if you specify alpha=0 (but you
9 // must supply some constant alpha in the alpha channel).
10 // You can turn on dithering and "high quality" using mode.
11 //
12 // version history:
13 // v1.09 - (stb) update documentation re: surprising alpha channel requirement
14 // v1.08 - (stb) fix bug in dxt-with-alpha block
15 // v1.07 - (stb) bc4; allow not using libc; add STB_DXT_STATIC
16 // v1.06 - (stb) fix to known-broken 1.05
17 // v1.05 - (stb) support bc5/3dc (Arvids Kokins), use extern "C" in C++ (Pavel Krajcevski)
18 // v1.04 - (ryg) default to no rounding bias for lerped colors (as per S3TC/DX10 spec);
19 // single color match fix (allow for inexact color interpolation);
20 // optimal DXT5 index finder; "high quality" mode that runs multiple refinement steps.
21 // v1.03 - (stb) endianness support
22 // v1.02 - (stb) fix alpha encoding bug
23 // v1.01 - (stb) fix bug converting to RGB that messed up quality, thanks ryg & cbloom
24 // v1.00 - (stb) first release
25 //
26 // contributors:
27 // Kevin Schmidt (#defines for "freestanding" compilation)
28 // github:ppiastucki (BC4 support)
29 //
30 // LICENSE
31 //
32 // See end of file for license information.
33
34 #ifndef STB_INCLUDE_STB_DXT_H
35 #define STB_INCLUDE_STB_DXT_H
36
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
40
41 #ifdef STB_DXT_STATIC
42 #define STBDDEF static
43 #else
44 #define STBDDEF extern
45 #endif
46
47 // compression mode (bitflags)
48 #define STB_DXT_NORMAL 0
49 #define STB_DXT_DITHER 1 // use dithering. dubious win. never use for normal maps and the like!
50 #define STB_DXT_HIGHQUAL 2 // high quality mode, does two refinement steps instead of 1. ~30-40% slower.
51
52 STBDDEF void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src_rgba_four_bytes_per_pixel, int alpha, int mode);
53 STBDDEF void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src_r_one_byte_per_pixel);
54 STBDDEF void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src_rg_two_byte_per_pixel);
55
56 #define STB_COMPRESS_DXT_BLOCK
57
58 #ifdef __cplusplus
59 }
60 #endif
61 #endif // STB_INCLUDE_STB_DXT_H
62
63 #ifdef STB_DXT_IMPLEMENTATION
64
65 // configuration options for DXT encoder. set them in the project/makefile or just define
66 // them at the top.
67
68 // STB_DXT_USE_ROUNDING_BIAS
69 // use a rounding bias during color interpolation. this is closer to what "ideal"
70 // interpolation would do but doesn't match the S3TC/DX10 spec. old versions (pre-1.03)
71 // implicitly had this turned on.
72 //
73 // in case you're targeting a specific type of hardware (e.g. console programmers):
74 // NVidia and Intel GPUs (as of 2010) as well as DX9 ref use DXT decoders that are closer
75 // to STB_DXT_USE_ROUNDING_BIAS. AMD/ATI, S3 and DX10 ref are closer to rounding with no bias.
76 // you also see "(a*5 + b*3) / 8" on some old GPU designs.
77 // #define STB_DXT_USE_ROUNDING_BIAS
78
79 #include <stdlib.h>
80
81 #if !defined(STBD_ABS) || !defined(STBI_FABS)
82 #include <math.h>
83 #endif
84
85 #ifndef STBD_ABS
86 #define STBD_ABS(i) abs(i)
87 #endif
88
89 #ifndef STBD_FABS
90 #define STBD_FABS(x) fabs(x)
91 #endif
92
93 #ifndef STBD_MEMSET
94 #include <string.h>
95 #define STBD_MEMSET memset
96 #endif
97
98 static unsigned char stb__Expand5[32];
99 static unsigned char stb__Expand6[64];
100 static unsigned char stb__OMatch5[256][2];
101 static unsigned char stb__OMatch6[256][2];
102 static unsigned char stb__QuantRBTab[256+16];
103 static unsigned char stb__QuantGTab[256+16];
104
stb__Mul8Bit(int a,int b)105 static int stb__Mul8Bit(int a, int b)
106 {
107 int t = a*b + 128;
108 return (t + (t >> 8)) >> 8;
109 }
110
stb__From16Bit(unsigned char * out,unsigned short v)111 static void stb__From16Bit(unsigned char *out, unsigned short v)
112 {
113 int rv = (v & 0xf800) >> 11;
114 int gv = (v & 0x07e0) >> 5;
115 int bv = (v & 0x001f) >> 0;
116
117 out[0] = stb__Expand5[rv];
118 out[1] = stb__Expand6[gv];
119 out[2] = stb__Expand5[bv];
120 out[3] = 0;
121 }
122
stb__As16Bit(int r,int g,int b)123 static unsigned short stb__As16Bit(int r, int g, int b)
124 {
125 return (unsigned short)((stb__Mul8Bit(r,31) << 11) + (stb__Mul8Bit(g,63) << 5) + stb__Mul8Bit(b,31));
126 }
127
128 // linear interpolation at 1/3 point between a and b, using desired rounding type
stb__Lerp13(int a,int b)129 static int stb__Lerp13(int a, int b)
130 {
131 #ifdef STB_DXT_USE_ROUNDING_BIAS
132 // with rounding bias
133 return a + stb__Mul8Bit(b-a, 0x55);
134 #else
135 // without rounding bias
136 // replace "/ 3" by "* 0xaaab) >> 17" if your compiler sucks or you really need every ounce of speed.
137 return (2*a + b) / 3;
138 #endif
139 }
140
141 // lerp RGB color
stb__Lerp13RGB(unsigned char * out,unsigned char * p1,unsigned char * p2)142 static void stb__Lerp13RGB(unsigned char *out, unsigned char *p1, unsigned char *p2)
143 {
144 out[0] = (unsigned char)stb__Lerp13(p1[0], p2[0]);
145 out[1] = (unsigned char)stb__Lerp13(p1[1], p2[1]);
146 out[2] = (unsigned char)stb__Lerp13(p1[2], p2[2]);
147 }
148
149 /****************************************************************************/
150
151 // compute table to reproduce constant colors as accurately as possible
stb__PrepareOptTable(unsigned char * Table,const unsigned char * expand,int size)152 static void stb__PrepareOptTable(unsigned char *Table,const unsigned char *expand,int size)
153 {
154 int i,mn,mx;
155 for (i=0;i<256;i++) {
156 int bestErr = 256;
157 for (mn=0;mn<size;mn++) {
158 for (mx=0;mx<size;mx++) {
159 int mine = expand[mn];
160 int maxe = expand[mx];
161 int err = STBD_ABS(stb__Lerp13(maxe, mine) - i);
162
163 // DX10 spec says that interpolation must be within 3% of "correct" result,
164 // add this as error term. (normally we'd expect a random distribution of
165 // +-1.5% error, but nowhere in the spec does it say that the error has to be
166 // unbiased - better safe than sorry).
167 err += STBD_ABS(maxe - mine) * 3 / 100;
168
169 if(err < bestErr)
170 {
171 Table[i*2+0] = (unsigned char)mx;
172 Table[i*2+1] = (unsigned char)mn;
173 bestErr = err;
174 }
175 }
176 }
177 }
178 }
179
stb__EvalColors(unsigned char * color,unsigned short c0,unsigned short c1)180 static void stb__EvalColors(unsigned char *color,unsigned short c0,unsigned short c1)
181 {
182 stb__From16Bit(color+ 0, c0);
183 stb__From16Bit(color+ 4, c1);
184 stb__Lerp13RGB(color+ 8, color+0, color+4);
185 stb__Lerp13RGB(color+12, color+4, color+0);
186 }
187
188 // Block dithering function. Simply dithers a block to 565 RGB.
189 // (Floyd-Steinberg)
stb__DitherBlock(unsigned char * dest,unsigned char * block)190 static void stb__DitherBlock(unsigned char *dest, unsigned char *block)
191 {
192 int err[8],*ep1 = err,*ep2 = err+4, *et;
193 int ch,y;
194
195 // process channels separately
196 for (ch=0; ch<3; ++ch) {
197 unsigned char *bp = block+ch, *dp = dest+ch;
198 unsigned char *quant = (ch == 1) ? stb__QuantGTab+8 : stb__QuantRBTab+8;
199 STBD_MEMSET(err, 0, sizeof(err));
200 for(y=0; y<4; ++y) {
201 dp[ 0] = quant[bp[ 0] + ((3*ep2[1] + 5*ep2[0]) >> 4)];
202 ep1[0] = bp[ 0] - dp[ 0];
203 dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)];
204 ep1[1] = bp[ 4] - dp[ 4];
205 dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)];
206 ep1[2] = bp[ 8] - dp[ 8];
207 dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)];
208 ep1[3] = bp[12] - dp[12];
209 bp += 16;
210 dp += 16;
211 et = ep1, ep1 = ep2, ep2 = et; // swap
212 }
213 }
214 }
215
216 // The color matching function
stb__MatchColorsBlock(unsigned char * block,unsigned char * color,int dither)217 static unsigned int stb__MatchColorsBlock(unsigned char *block, unsigned char *color,int dither)
218 {
219 unsigned int mask = 0;
220 int dirr = color[0*4+0] - color[1*4+0];
221 int dirg = color[0*4+1] - color[1*4+1];
222 int dirb = color[0*4+2] - color[1*4+2];
223 int dots[16];
224 int stops[4];
225 int i;
226 int c0Point, halfPoint, c3Point;
227
228 for(i=0;i<16;i++)
229 dots[i] = block[i*4+0]*dirr + block[i*4+1]*dirg + block[i*4+2]*dirb;
230
231 for(i=0;i<4;i++)
232 stops[i] = color[i*4+0]*dirr + color[i*4+1]*dirg + color[i*4+2]*dirb;
233
234 // think of the colors as arranged on a line; project point onto that line, then choose
235 // next color out of available ones. we compute the crossover points for "best color in top
236 // half"/"best in bottom half" and then the same inside that subinterval.
237 //
238 // relying on this 1d approximation isn't always optimal in terms of euclidean distance,
239 // but it's very close and a lot faster.
240 // http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html
241
242 c0Point = (stops[1] + stops[3]) >> 1;
243 halfPoint = (stops[3] + stops[2]) >> 1;
244 c3Point = (stops[2] + stops[0]) >> 1;
245
246 if(!dither) {
247 // the version without dithering is straightforward
248 for (i=15;i>=0;i--) {
249 int dot = dots[i];
250 mask <<= 2;
251
252 if(dot < halfPoint)
253 mask |= (dot < c0Point) ? 1 : 3;
254 else
255 mask |= (dot < c3Point) ? 2 : 0;
256 }
257 } else {
258 // with floyd-steinberg dithering
259 int err[8],*ep1 = err,*ep2 = err+4;
260 int *dp = dots, y;
261
262 c0Point <<= 4;
263 halfPoint <<= 4;
264 c3Point <<= 4;
265 for(i=0;i<8;i++)
266 err[i] = 0;
267
268 for(y=0;y<4;y++)
269 {
270 int dot,lmask,step;
271
272 dot = (dp[0] << 4) + (3*ep2[1] + 5*ep2[0]);
273 if(dot < halfPoint)
274 step = (dot < c0Point) ? 1 : 3;
275 else
276 step = (dot < c3Point) ? 2 : 0;
277 ep1[0] = dp[0] - stops[step];
278 lmask = step;
279
280 dot = (dp[1] << 4) + (7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]);
281 if(dot < halfPoint)
282 step = (dot < c0Point) ? 1 : 3;
283 else
284 step = (dot < c3Point) ? 2 : 0;
285 ep1[1] = dp[1] - stops[step];
286 lmask |= step<<2;
287
288 dot = (dp[2] << 4) + (7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]);
289 if(dot < halfPoint)
290 step = (dot < c0Point) ? 1 : 3;
291 else
292 step = (dot < c3Point) ? 2 : 0;
293 ep1[2] = dp[2] - stops[step];
294 lmask |= step<<4;
295
296 dot = (dp[3] << 4) + (7*ep1[2] + 5*ep2[3] + ep2[2]);
297 if(dot < halfPoint)
298 step = (dot < c0Point) ? 1 : 3;
299 else
300 step = (dot < c3Point) ? 2 : 0;
301 ep1[3] = dp[3] - stops[step];
302 lmask |= step<<6;
303
304 dp += 4;
305 mask |= lmask << (y*8);
306 { int *et = ep1; ep1 = ep2; ep2 = et; } // swap
307 }
308 }
309
310 return mask;
311 }
312
313 // The color optimization function. (Clever code, part 1)
stb__OptimizeColorsBlock(unsigned char * block,unsigned short * pmax16,unsigned short * pmin16)314 static void stb__OptimizeColorsBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16)
315 {
316 int mind = 0x7fffffff,maxd = -0x7fffffff;
317 unsigned char *minp, *maxp;
318 double magn;
319 int v_r,v_g,v_b;
320 static const int nIterPower = 4;
321 float covf[6],vfr,vfg,vfb;
322
323 // determine color distribution
324 int cov[6];
325 int mu[3],min[3],max[3];
326 int ch,i,iter;
327
328 for(ch=0;ch<3;ch++)
329 {
330 const unsigned char *bp = ((const unsigned char *) block) + ch;
331 int muv,minv,maxv;
332
333 muv = minv = maxv = bp[0];
334 for(i=4;i<64;i+=4)
335 {
336 muv += bp[i];
337 if (bp[i] < minv) minv = bp[i];
338 else if (bp[i] > maxv) maxv = bp[i];
339 }
340
341 mu[ch] = (muv + 8) >> 4;
342 min[ch] = minv;
343 max[ch] = maxv;
344 }
345
346 // determine covariance matrix
347 for (i=0;i<6;i++)
348 cov[i] = 0;
349
350 for (i=0;i<16;i++)
351 {
352 int r = block[i*4+0] - mu[0];
353 int g = block[i*4+1] - mu[1];
354 int b = block[i*4+2] - mu[2];
355
356 cov[0] += r*r;
357 cov[1] += r*g;
358 cov[2] += r*b;
359 cov[3] += g*g;
360 cov[4] += g*b;
361 cov[5] += b*b;
362 }
363
364 // convert covariance matrix to float, find principal axis via power iter
365 for(i=0;i<6;i++)
366 covf[i] = cov[i] / 255.0f;
367
368 vfr = (float) (max[0] - min[0]);
369 vfg = (float) (max[1] - min[1]);
370 vfb = (float) (max[2] - min[2]);
371
372 for(iter=0;iter<nIterPower;iter++)
373 {
374 float r = vfr*covf[0] + vfg*covf[1] + vfb*covf[2];
375 float g = vfr*covf[1] + vfg*covf[3] + vfb*covf[4];
376 float b = vfr*covf[2] + vfg*covf[4] + vfb*covf[5];
377
378 vfr = r;
379 vfg = g;
380 vfb = b;
381 }
382
383 magn = STBD_FABS(vfr);
384 if (STBD_FABS(vfg) > magn) magn = STBD_FABS(vfg);
385 if (STBD_FABS(vfb) > magn) magn = STBD_FABS(vfb);
386
387 if(magn < 4.0f) { // too small, default to luminance
388 v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000.
389 v_g = 587;
390 v_b = 114;
391 } else {
392 magn = 512.0 / magn;
393 v_r = (int) (vfr * magn);
394 v_g = (int) (vfg * magn);
395 v_b = (int) (vfb * magn);
396 }
397
398 // Pick colors at extreme points
399 for(i=0;i<16;i++)
400 {
401 int dot = block[i*4+0]*v_r + block[i*4+1]*v_g + block[i*4+2]*v_b;
402
403 if (dot < mind) {
404 mind = dot;
405 minp = block+i*4;
406 }
407
408 if (dot > maxd) {
409 maxd = dot;
410 maxp = block+i*4;
411 }
412 }
413
414 *pmax16 = stb__As16Bit(maxp[0],maxp[1],maxp[2]);
415 *pmin16 = stb__As16Bit(minp[0],minp[1],minp[2]);
416 }
417
stb__sclamp(float y,int p0,int p1)418 static int stb__sclamp(float y, int p0, int p1)
419 {
420 int x = (int) y;
421 if (x < p0) return p0;
422 if (x > p1) return p1;
423 return x;
424 }
425
426 // The refinement function. (Clever code, part 2)
427 // Tries to optimize colors to suit block contents better.
428 // (By solving a least squares system via normal equations+Cramer's rule)
stb__RefineBlock(unsigned char * block,unsigned short * pmax16,unsigned short * pmin16,unsigned int mask)429 static int stb__RefineBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16, unsigned int mask)
430 {
431 static const int w1Tab[4] = { 3,0,2,1 };
432 static const int prods[4] = { 0x090000,0x000900,0x040102,0x010402 };
433 // ^some magic to save a lot of multiplies in the accumulating loop...
434 // (precomputed products of weights for least squares system, accumulated inside one 32-bit register)
435
436 float frb,fg;
437 unsigned short oldMin, oldMax, min16, max16;
438 int i, akku = 0, xx,xy,yy;
439 int At1_r,At1_g,At1_b;
440 int At2_r,At2_g,At2_b;
441 unsigned int cm = mask;
442
443 oldMin = *pmin16;
444 oldMax = *pmax16;
445
446 if((mask ^ (mask<<2)) < 4) // all pixels have the same index?
447 {
448 // yes, linear system would be singular; solve using optimal
449 // single-color match on average color
450 int r = 8, g = 8, b = 8;
451 for (i=0;i<16;++i) {
452 r += block[i*4+0];
453 g += block[i*4+1];
454 b += block[i*4+2];
455 }
456
457 r >>= 4; g >>= 4; b >>= 4;
458
459 max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
460 min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
461 } else {
462 At1_r = At1_g = At1_b = 0;
463 At2_r = At2_g = At2_b = 0;
464 for (i=0;i<16;++i,cm>>=2) {
465 int step = cm&3;
466 int w1 = w1Tab[step];
467 int r = block[i*4+0];
468 int g = block[i*4+1];
469 int b = block[i*4+2];
470
471 akku += prods[step];
472 At1_r += w1*r;
473 At1_g += w1*g;
474 At1_b += w1*b;
475 At2_r += r;
476 At2_g += g;
477 At2_b += b;
478 }
479
480 At2_r = 3*At2_r - At1_r;
481 At2_g = 3*At2_g - At1_g;
482 At2_b = 3*At2_b - At1_b;
483
484 // extract solutions and decide solvability
485 xx = akku >> 16;
486 yy = (akku >> 8) & 0xff;
487 xy = (akku >> 0) & 0xff;
488
489 frb = 3.0f * 31.0f / 255.0f / (xx*yy - xy*xy);
490 fg = frb * 63.0f / 31.0f;
491
492 // solve.
493 max16 = (unsigned short)(stb__sclamp((At1_r*yy - At2_r*xy)*frb+0.5f,0,31) << 11);
494 max16 |= (unsigned short)(stb__sclamp((At1_g*yy - At2_g*xy)*fg +0.5f,0,63) << 5);
495 max16 |= (unsigned short)(stb__sclamp((At1_b*yy - At2_b*xy)*frb+0.5f,0,31) << 0);
496
497 min16 = (unsigned short)(stb__sclamp((At2_r*xx - At1_r*xy)*frb+0.5f,0,31) << 11);
498 min16 |= (unsigned short)(stb__sclamp((At2_g*xx - At1_g*xy)*fg +0.5f,0,63) << 5);
499 min16 |= (unsigned short)(stb__sclamp((At2_b*xx - At1_b*xy)*frb+0.5f,0,31) << 0);
500 }
501
502 *pmin16 = min16;
503 *pmax16 = max16;
504 return oldMin != min16 || oldMax != max16;
505 }
506
507 // Color block compression
stb__CompressColorBlock(unsigned char * dest,unsigned char * block,int mode)508 static void stb__CompressColorBlock(unsigned char *dest, unsigned char *block, int mode)
509 {
510 unsigned int mask;
511 int i;
512 int dither;
513 int refinecount;
514 unsigned short max16, min16;
515 unsigned char dblock[16*4],color[4*4];
516
517 dither = mode & STB_DXT_DITHER;
518 refinecount = (mode & STB_DXT_HIGHQUAL) ? 2 : 1;
519
520 // check if block is constant
521 for (i=1;i<16;i++)
522 if (((unsigned int *) block)[i] != ((unsigned int *) block)[0])
523 break;
524
525 if(i == 16) { // constant color
526 int r = block[0], g = block[1], b = block[2];
527 mask = 0xaaaaaaaa;
528 max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
529 min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
530 } else {
531 // first step: compute dithered version for PCA if desired
532 if(dither)
533 stb__DitherBlock(dblock,block);
534
535 // second step: pca+map along principal axis
536 stb__OptimizeColorsBlock(dither ? dblock : block,&max16,&min16);
537 if (max16 != min16) {
538 stb__EvalColors(color,max16,min16);
539 mask = stb__MatchColorsBlock(block,color,dither);
540 } else
541 mask = 0;
542
543 // third step: refine (multiple times if requested)
544 for (i=0;i<refinecount;i++) {
545 unsigned int lastmask = mask;
546
547 if (stb__RefineBlock(dither ? dblock : block,&max16,&min16,mask)) {
548 if (max16 != min16) {
549 stb__EvalColors(color,max16,min16);
550 mask = stb__MatchColorsBlock(block,color,dither);
551 } else {
552 mask = 0;
553 break;
554 }
555 }
556
557 if(mask == lastmask)
558 break;
559 }
560 }
561
562 // write the color block
563 if(max16 < min16)
564 {
565 unsigned short t = min16;
566 min16 = max16;
567 max16 = t;
568 mask ^= 0x55555555;
569 }
570
571 dest[0] = (unsigned char) (max16);
572 dest[1] = (unsigned char) (max16 >> 8);
573 dest[2] = (unsigned char) (min16);
574 dest[3] = (unsigned char) (min16 >> 8);
575 dest[4] = (unsigned char) (mask);
576 dest[5] = (unsigned char) (mask >> 8);
577 dest[6] = (unsigned char) (mask >> 16);
578 dest[7] = (unsigned char) (mask >> 24);
579 }
580
581 // Alpha block compression (this is easy for a change)
stb__CompressAlphaBlock(unsigned char * dest,unsigned char * src,int stride)582 static void stb__CompressAlphaBlock(unsigned char *dest,unsigned char *src, int stride)
583 {
584 int i,dist,bias,dist4,dist2,bits,mask;
585
586 // find min/max color
587 int mn,mx;
588 mn = mx = src[0];
589
590 for (i=1;i<16;i++)
591 {
592 if (src[i*stride] < mn) mn = src[i*stride];
593 else if (src[i*stride] > mx) mx = src[i*stride];
594 }
595
596 // encode them
597 dest[0] = (unsigned char)mx;
598 dest[1] = (unsigned char)mn;
599 dest += 2;
600
601 // determine bias and emit color indices
602 // given the choice of mx/mn, these indices are optimal:
603 // http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/
604 dist = mx-mn;
605 dist4 = dist*4;
606 dist2 = dist*2;
607 bias = (dist < 8) ? (dist - 1) : (dist/2 + 2);
608 bias -= mn * 7;
609 bits = 0,mask=0;
610
611 for (i=0;i<16;i++) {
612 int a = src[i*stride]*7 + bias;
613 int ind,t;
614
615 // select index. this is a "linear scale" lerp factor between 0 (val=min) and 7 (val=max).
616 t = (a >= dist4) ? -1 : 0; ind = t & 4; a -= dist4 & t;
617 t = (a >= dist2) ? -1 : 0; ind += t & 2; a -= dist2 & t;
618 ind += (a >= dist);
619
620 // turn linear scale into DXT index (0/1 are extremal pts)
621 ind = -ind & 7;
622 ind ^= (2 > ind);
623
624 // write index
625 mask |= ind << bits;
626 if((bits += 3) >= 8) {
627 *dest++ = (unsigned char)mask;
628 mask >>= 8;
629 bits -= 8;
630 }
631 }
632 }
633
stb__InitDXT()634 static void stb__InitDXT()
635 {
636 int i;
637 for(i=0;i<32;i++)
638 stb__Expand5[i] = (unsigned char)((i<<3)|(i>>2));
639
640 for(i=0;i<64;i++)
641 stb__Expand6[i] = (unsigned char)((i<<2)|(i>>4));
642
643 for(i=0;i<256+16;i++)
644 {
645 int v = i-8 < 0 ? 0 : i-8 > 255 ? 255 : i-8;
646 stb__QuantRBTab[i] = stb__Expand5[stb__Mul8Bit(v,31)];
647 stb__QuantGTab[i] = stb__Expand6[stb__Mul8Bit(v,63)];
648 }
649
650 stb__PrepareOptTable(&stb__OMatch5[0][0],stb__Expand5,32);
651 stb__PrepareOptTable(&stb__OMatch6[0][0],stb__Expand6,64);
652 }
653
stb_compress_dxt_block(unsigned char * dest,const unsigned char * src,int alpha,int mode)654 void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src, int alpha, int mode)
655 {
656 unsigned char data[16][4];
657 static int init=1;
658 if (init) {
659 stb__InitDXT();
660 init=0;
661 }
662
663 if (alpha) {
664 int i;
665 stb__CompressAlphaBlock(dest,(unsigned char*) src+3, 4);
666 dest += 8;
667 // make a new copy of the data in which alpha is opaque,
668 // because code uses a fast test for color constancy
669 memcpy(data, src, 4*16);
670 for (i=0; i < 16; ++i)
671 data[i][3] = 255;
672 src = &data[0][0];
673 }
674
675 stb__CompressColorBlock(dest,(unsigned char*) src,mode);
676 }
677
stb_compress_bc4_block(unsigned char * dest,const unsigned char * src)678 void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src)
679 {
680 stb__CompressAlphaBlock(dest,(unsigned char*) src, 1);
681 }
682
stb_compress_bc5_block(unsigned char * dest,const unsigned char * src)683 void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src)
684 {
685 stb__CompressAlphaBlock(dest,(unsigned char*) src,2);
686 stb__CompressAlphaBlock(dest + 8,(unsigned char*) src+1,2);
687 }
688 #endif // STB_DXT_IMPLEMENTATION
689
690 /*
691 ------------------------------------------------------------------------------
692 This software is available under 2 licenses -- choose whichever you prefer.
693 ------------------------------------------------------------------------------
694 ALTERNATIVE A - MIT License
695 Copyright (c) 2017 Sean Barrett
696 Permission is hereby granted, free of charge, to any person obtaining a copy of
697 this software and associated documentation files (the "Software"), to deal in
698 the Software without restriction, including without limitation the rights to
699 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
700 of the Software, and to permit persons to whom the Software is furnished to do
701 so, subject to the following conditions:
702 The above copyright notice and this permission notice shall be included in all
703 copies or substantial portions of the Software.
704 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
705 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
706 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
707 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
708 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
709 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
710 SOFTWARE.
711 ------------------------------------------------------------------------------
712 ALTERNATIVE B - Public Domain (www.unlicense.org)
713 This is free and unencumbered software released into the public domain.
714 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
715 software, either in source code form or as a compiled binary, for any purpose,
716 commercial or non-commercial, and by any means.
717 In jurisdictions that recognize copyright laws, the author or authors of this
718 software dedicate any and all copyright interest in the software to the public
719 domain. We make this dedication for the benefit of the public at large and to
720 the detriment of our heirs and successors. We intend this dedication to be an
721 overt act of relinquishment in perpetuity of all present and future rights to
722 this software under copyright law.
723 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
724 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
725 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
726 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
727 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
728 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
729 ------------------------------------------------------------------------------
730 */
731