1 /*
2 * Copyright (C) 2011 Rudolf Polzer All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * RUDOLF POLZER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21 #define S2TC_LICENSE_IDENTIFIER s2tc_algorithm_license
22 #include "s2tc_license.h"
23
24 #include <math.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <stdio.h>
28 #include <stdint.h>
29
30 #include "s2tc_algorithm.h"
31 #include "s2tc_common.h"
32
33 namespace
34 {
swap(T & a,T & b)35 template<class T> void swap(T& a, T& b)
36 {
37 T h = a;
38 a = b;
39 b = h;
40 }
41 template<class T> struct color_type_info
42 {
43 };
44 template<> struct color_type_info<unsigned char>
45 {
46 static const unsigned char min_value = 0;
47 static const unsigned char max_value = 255;
48 };
49
50 struct color_t
51 {
52 signed char r, g, b;
53 };
make_color_t()54 inline color_t make_color_t()
55 {
56 return (color_t) {0, 0, 0};
57 }
make_color_t(signed char r_,signed char g_,signed char b_)58 inline color_t make_color_t(signed char r_, signed char g_, signed char b_)
59 {
60 return (color_t) {r_, g_, b_};
61 }
make_color_t(int i)62 inline color_t make_color_t(int i)
63 {
64 return (color_t) {(signed char)(i >> 3), (signed char)(i >> 2), (signed char)(i >> 3)};
65 }
operator ==(const color_t & a,const color_t & b)66 inline bool operator==(const color_t &a, const color_t &b)
67 {
68 return a.r == b.r && a.g == b.g && a.b == b.b;
69 }
operator <(const color_t & a,const color_t & b)70 inline bool operator<(const color_t &a, const color_t &b)
71 {
72 signed char d;
73 d = a.r - b.r;
74 if(d)
75 return d < 0;
76 d = a.g - b.g;
77 if(d)
78 return d < 0;
79 d = a.b - b.b;
80 return d < 0;
81 }
operator --(color_t & c)82 inline color_t &operator--(color_t &c)
83 {
84 if(c.b > 0)
85 {
86 --c.b;
87 }
88 else if(c.g > 0)
89 {
90 c.b = 31;
91 --c.g;
92 }
93 else if(c.r > 0)
94 {
95 c.b = 31;
96 c.g = 63;
97 --c.r;
98 }
99 else
100 {
101 c.b = 31;
102 c.g = 63;
103 c.r = 31;
104 }
105 return c;
106 }
operator ++(color_t & c)107 inline color_t &operator++(color_t &c)
108 {
109 if(c.b < 31)
110 {
111 ++c.b;
112 }
113 else if(c.g < 63)
114 {
115 c.b = 0;
116 ++c.g;
117 }
118 else if(c.r < 31)
119 {
120 c.b = 0;
121 c.g = 0;
122 ++c.r;
123 }
124 else
125 {
126 c.b = 0;
127 c.g = 0;
128 c.r = 0;
129 }
130 return c;
131 }
132 template<> struct color_type_info<color_t>
133 {
134 static const color_t min_value;
135 static const color_t max_value;
136 };
137 const color_t color_type_info<color_t>::min_value = { 0, 0, 0 };
138 const color_t color_type_info<color_t>::max_value = { 31, 63, 31 };
139
140 struct bigcolor_t
141 {
142 int r, g, b;
143
bigcolor_t__anon57fb91a40111::bigcolor_t144 inline bigcolor_t(): r(0), g(0), b(0)
145 {
146 }
147
operator +=__anon57fb91a40111::bigcolor_t148 inline bigcolor_t &operator+=(const color_t &c)
149 {
150 r += c.r;
151 g += c.g;
152 b += c.b;
153 return *this;
154 }
155
operator +=__anon57fb91a40111::bigcolor_t156 inline bigcolor_t &operator+=(int v)
157 {
158 r += v;
159 g += v;
160 b += v;
161 return *this;
162 }
163
operator +__anon57fb91a40111::bigcolor_t164 inline bigcolor_t operator+(int v)
165 {
166 bigcolor_t out = *this;
167 out += v;
168 return out;
169 }
170
operator /=__anon57fb91a40111::bigcolor_t171 inline bigcolor_t &operator/=(int v)
172 {
173 r /= v;
174 g /= v;
175 b /= v;
176 return *this;
177 }
178
operator /__anon57fb91a40111::bigcolor_t179 inline bigcolor_t operator/(int v)
180 {
181 bigcolor_t out = *this;
182 out /= v;
183 return out;
184 }
185
operator <<=__anon57fb91a40111::bigcolor_t186 inline bigcolor_t &operator<<=(int v)
187 {
188 r <<= v;
189 g <<= v;
190 b <<= v;
191 return *this;
192 }
193
operator <<__anon57fb91a40111::bigcolor_t194 inline bigcolor_t operator<<(int v)
195 {
196 bigcolor_t out = *this;
197 out <<= v;
198 return out;
199 }
200
operator color_t__anon57fb91a40111::bigcolor_t201 inline operator color_t()
202 {
203 color_t out;
204 out.r = r & 31;
205 out.g = g & 63;
206 out.b = b & 31;
207 return out;
208 }
209 };
210
211 // 16 differences must fit in int
212 // i.e. a difference must be lower than 2^27
213
214 // shift right, rounded
215 #define SHRR(a,n) (((a) + (1 << ((n)-1))) >> (n))
216
color_dist_avg(const color_t & a,const color_t & b)217 inline int color_dist_avg(const color_t &a, const color_t &b)
218 {
219 int dr = a.r - b.r; // multiplier: 31 (-1..1)
220 int dg = a.g - b.g; // multiplier: 63 (-1..1)
221 int db = a.b - b.b; // multiplier: 31 (-1..1)
222 return ((dr*dr) << 2) + dg*dg + ((db*db) << 2);
223 }
224
color_dist_w0avg(const color_t & a,const color_t & b)225 inline int color_dist_w0avg(const color_t &a, const color_t &b)
226 {
227 int dr = a.r - b.r; // multiplier: 31 (-1..1)
228 int dg = a.g - b.g; // multiplier: 63 (-1..1)
229 int db = a.b - b.b; // multiplier: 31 (-1..1)
230 return dr*dr + dg*dg + db*db;
231 // weighted 1:4:1
232 }
233
color_dist_wavg(const color_t & a,const color_t & b)234 inline int color_dist_wavg(const color_t &a, const color_t &b)
235 {
236 int dr = a.r - b.r; // multiplier: 31 (-1..1)
237 int dg = a.g - b.g; // multiplier: 63 (-1..1)
238 int db = a.b - b.b; // multiplier: 31 (-1..1)
239 return ((dr*dr) << 2) + ((dg*dg) << 2) + (db*db);
240 // weighted 4:16:1
241 }
242
color_dist_yuv(const color_t & a,const color_t & b)243 inline int color_dist_yuv(const color_t &a, const color_t &b)
244 {
245 int dr = a.r - b.r; // multiplier: 31 (-1..1)
246 int dg = a.g - b.g; // multiplier: 63 (-1..1)
247 int db = a.b - b.b; // multiplier: 31 (-1..1)
248 int y = dr * 30*2 + dg * 59 + db * 11*2; // multiplier: 6259
249 int u = dr * 202 - y; // * 0.5 / (1 - 0.30)
250 int v = db * 202 - y; // * 0.5 / (1 - 0.11)
251 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
252 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
253 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
254 }
255
color_dist_rgb(const color_t & a,const color_t & b)256 inline int color_dist_rgb(const color_t &a, const color_t &b)
257 {
258 int dr = a.r - b.r; // multiplier: 31 (-1..1)
259 int dg = a.g - b.g; // multiplier: 63 (-1..1)
260 int db = a.b - b.b; // multiplier: 31 (-1..1)
261 int y = dr * 21*2 + dg * 72 + db * 7*2; // multiplier: 6272
262 int u = dr * 202 - y; // * 0.5 / (1 - 0.21)
263 int v = db * 202 - y; // * 0.5 / (1 - 0.07)
264 return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
265 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.21)) = 0.395
266 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.07)) = 0.328
267 }
268
color_dist_srgb(const color_t & a,const color_t & b)269 inline int color_dist_srgb(const color_t &a, const color_t &b)
270 {
271 int dr = a.r * (int) a.r - b.r * (int) b.r; // multiplier: 31*31
272 int dg = a.g * (int) a.g - b.g * (int) b.g; // multiplier: 63*63
273 int db = a.b * (int) a.b - b.b * (int) b.b; // multiplier: 31*31
274 int y = dr * 21*2*2 + dg * 72 + db * 7*2*2; // multiplier: 393400
275 int u = dr * 409 - y; // * 0.5 / (1 - 0.30)
276 int v = db * 409 - y; // * 0.5 / (1 - 0.11)
277 int sy = SHRR(y, 3) * SHRR(y, 4);
278 int su = SHRR(u, 3) * SHRR(u, 4);
279 int sv = SHRR(v, 3) * SHRR(v, 4);
280 return SHRR(sy, 4) + SHRR(su, 8) + SHRR(sv, 9);
281 // weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
282 // weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
283 }
284
srgb_get_y(const color_t & a)285 inline int srgb_get_y(const color_t &a)
286 {
287 // convert to linear
288 int r = a.r * (int) a.r;
289 int g = a.g * (int) a.g;
290 int b = a.b * (int) a.b;
291 // find luminance
292 int y = 37 * (r * 21*2*2 + g * 72 + b * 7*2*2); // multiplier: 14555800
293 // square root it (!)
294 y = sqrtf(y) + 0.5f; // now in range 0 to 3815
295 return y;
296 }
297
color_dist_srgb_mixed(const color_t & a,const color_t & b)298 inline int color_dist_srgb_mixed(const color_t &a, const color_t &b)
299 {
300 // get Y
301 int ay = srgb_get_y(a);
302 int by = srgb_get_y(b);
303 // get UV
304 int au = a.r * 191 - ay;
305 int av = a.b * 191 - ay;
306 int bu = b.r * 191 - by;
307 int bv = b.b * 191 - by;
308 // get differences
309 int y = ay - by;
310 int u = au - bu;
311 int v = av - bv;
312 return ((y*y) << 3) + SHRR(u*u, 1) + SHRR(v*v, 2);
313 // weight for u: ???
314 // weight for v: ???
315 }
316
color_dist_normalmap(const color_t & a,const color_t & b)317 inline int color_dist_normalmap(const color_t &a, const color_t &b)
318 {
319 float ca[3], cb[3], n;
320 ca[0] = a.r / 31.0f * 2 - 1;
321 ca[1] = a.g / 63.0f * 2 - 1;
322 ca[2] = a.b / 31.0f * 2 - 1;
323 cb[0] = b.r / 31.0f * 2 - 1;
324 cb[1] = b.g / 63.0f * 2 - 1;
325 cb[2] = b.b / 31.0f * 2 - 1;
326 n = ca[0] * ca[0] + ca[1] * ca[1] + ca[2] * ca[2];
327 if(n > 0)
328 {
329 n = 1.0f / sqrtf(n);
330 ca[0] *= n;
331 ca[1] *= n;
332 ca[2] *= n;
333 }
334 n = cb[0] * cb[0] + cb[1] * cb[1] + cb[2] * cb[2];
335 if(n > 0)
336 {
337 n = 1.0f / sqrtf(n);
338 cb[0] *= n;
339 cb[1] *= n;
340 cb[2] *= n;
341 }
342
343 return
344 100000 *
345 (
346 (cb[0] - ca[0]) * (cb[0] - ca[0])
347 +
348 (cb[1] - ca[1]) * (cb[1] - ca[1])
349 +
350 (cb[2] - ca[2]) * (cb[2] - ca[2])
351 )
352 ;
353 // max value: 1000 * (4 + 4 + 4) = 6000
354 }
355
356 typedef int ColorDistFunc(const color_t &a, const color_t &b);
357
alpha_dist(unsigned char a,unsigned char b)358 inline int alpha_dist(unsigned char a, unsigned char b)
359 {
360 return (a - (int) b) * (a - (int) b);
361 }
362
363 template <class T, class F>
364 // n: input count
365 // m: total color count (including non-counted inputs)
366 // m >= n
reduce_colors_inplace(T * c,int n,int m,F dist)367 inline void reduce_colors_inplace(T *c, int n, int m, F dist)
368 {
369 int i, j, k;
370 int bestsum = -1;
371 int besti = 0;
372 int bestj = 1;
373 int dists[m][n];
374 // first the square
375 for(i = 0; i < n; ++i)
376 {
377 dists[i][i] = 0;
378 for(j = i+1; j < n; ++j)
379 {
380 int d = dist(c[i], c[j]);
381 dists[i][j] = dists[j][i] = d;
382 }
383 }
384 // then the box
385 for(; i < m; ++i)
386 {
387 for(j = 0; j < n; ++j)
388 {
389 int d = dist(c[i], c[j]);
390 dists[i][j] = d;
391 }
392 }
393 for(i = 0; i < m; ++i)
394 for(j = i+1; j < m; ++j)
395 {
396 int sum = 0;
397 for(k = 0; k < n; ++k)
398 {
399 int di = dists[i][k];
400 int dj = dists[j][k];
401 int m = min(di, dj);
402 sum += m;
403 }
404 if(bestsum < 0 || sum < bestsum)
405 {
406 bestsum = sum;
407 besti = i;
408 bestj = j;
409 }
410 }
411 T c0 = c[besti];
412 c[1] = c[bestj];
413 c[0] = c0;
414 }
415 template <class T, class F>
reduce_colors_inplace_2fixpoints(T * c,int n,int m,F dist,const T & fix0,const T & fix1)416 inline void reduce_colors_inplace_2fixpoints(T *c, int n, int m, F dist, const T &fix0, const T &fix1)
417 {
418 // TODO fix this for ramp encoding!
419 int i, j, k;
420 int bestsum = -1;
421 int besti = 0;
422 int bestj = 1;
423 int dists[m+2][n];
424 // first the square
425 for(i = 0; i < n; ++i)
426 {
427 dists[i][i] = 0;
428 for(j = i+1; j < n; ++j)
429 {
430 int d = dist(c[i], c[j]);
431 dists[i][j] = dists[j][i] = d;
432 }
433 }
434 // then the box
435 for(; i < m; ++i)
436 {
437 for(j = 0; j < n; ++j)
438 {
439 int d = dist(c[i], c[j]);
440 dists[i][j] = d;
441 }
442 }
443 // then the two extra rows
444 for(j = 0; j < n; ++j)
445 {
446 int d = dist(fix0, c[j]);
447 dists[m][j] = d;
448 }
449 for(j = 0; j < n; ++j)
450 {
451 int d = dist(fix1, c[j]);
452 dists[m+1][j] = d;
453 }
454 for(i = 0; i < m; ++i)
455 for(j = i+1; j < m; ++j)
456 {
457 int sum = 0;
458 for(k = 0; k < n; ++k)
459 {
460 int di = dists[i][k];
461 int dj = dists[j][k];
462 int d0 = dists[m][k];
463 int d1 = dists[m+1][k];
464 int m = min(min(di, dj), min(d0, d1));
465 sum += m;
466 }
467 if(bestsum < 0 || sum < bestsum)
468 {
469 bestsum = sum;
470 besti = i;
471 bestj = j;
472 }
473 }
474 if(besti != 0)
475 c[0] = c[besti];
476 if(bestj != 1)
477 c[1] = c[bestj];
478 }
479
480 enum CompressionMode
481 {
482 MODE_NORMAL,
483 MODE_FAST
484 };
485
refine_component_encode(int comp)486 template<ColorDistFunc ColorDist> inline int refine_component_encode(int comp)
487 {
488 return comp;
489 }
refine_component_encode(int comp)490 template<> inline int refine_component_encode<color_dist_srgb>(int comp)
491 {
492 return comp * comp;
493 }
refine_component_encode(int comp)494 template<> inline int refine_component_encode<color_dist_srgb_mixed>(int comp)
495 {
496 return comp * comp;
497 }
498
refine_component_decode(int comp)499 template<ColorDistFunc ColorDist> inline int refine_component_decode(int comp)
500 {
501 return comp;
502 }
refine_component_decode(int comp)503 template<> inline int refine_component_decode<color_dist_srgb>(int comp)
504 {
505 return sqrtf(comp) + 0.5f;
506 }
refine_component_decode(int comp)507 template<> inline int refine_component_decode<color_dist_srgb_mixed>(int comp)
508 {
509 return sqrtf(comp) + 0.5f;
510 }
511
512 template <class T, class Big, int scale_l>
513 struct s2tc_evaluate_colors_result_t;
514
515 template <class T, class Big>
516 struct s2tc_evaluate_colors_result_t<T, Big, 1>
517 {
518 // uses:
519 // Big << int
520 // Big / int
521 // Big + int
522 // Big += T
523 int n0, n1;
524 Big S0, S1;
s2tc_evaluate_colors_result_t__anon57fb91a40111::s2tc_evaluate_colors_result_t525 inline s2tc_evaluate_colors_result_t():
526 n0(), n1(), S0(), S1()
527 {
528 }
add__anon57fb91a40111::s2tc_evaluate_colors_result_t529 inline void add(int l, T a)
530 {
531 if(l)
532 {
533 ++n1;
534 S1 += a;
535 }
536 else
537 {
538 ++n0;
539 S0 += a;
540 }
541 }
evaluate__anon57fb91a40111::s2tc_evaluate_colors_result_t542 inline bool evaluate(T &a, T &b)
543 {
544 if(!n0 && !n1)
545 return false;
546 if(n0)
547 a = ((S0 << 1) + n0) / (n0 << 1);
548 if(n1)
549 b = ((S1 << 1) + n1) / (n1 << 1);
550 return true;
551 }
552 };
553
554 template <class T, class Big, int scale_l>
555 struct s2tc_evaluate_colors_result_t
556 {
557 // a possible implementation of inferred color/alpha values
558 // refining would go here
559 };
560
561 template <class T>
562 struct s2tc_evaluate_colors_result_null_t
563 {
add__anon57fb91a40111::s2tc_evaluate_colors_result_null_t564 inline void add(int l, T a)
565 {
566 }
567 };
568
get(const unsigned char * buf)569 template<class T> T get(const unsigned char *buf)
570 {
571 T c;
572 c.r = buf[0];
573 c.g = buf[1];
574 c.b = buf[2];
575 return c;
576 }
get(const unsigned char * buf)577 template<> unsigned char get<unsigned char>(const unsigned char *buf)
578 {
579 return buf[3]; // extract alpha
580 }
581
582 template<class T, class Big, int bpp, bool have_trans, bool have_0_255, int n_input, class Dist, class Eval, class Arr>
s2tc_try_encode_block(Arr & out,Eval & res,Dist ColorDist,const unsigned char * in,int iw,int w,int h,const T colors_ref[])583 inline unsigned int s2tc_try_encode_block(
584 Arr &out,
585 Eval &res,
586 Dist ColorDist,
587 const unsigned char *in, int iw, int w, int h,
588 const T colors_ref[])
589 {
590 unsigned int score = 0;
591 for(int x = 0; x < w; ++x) for(int y = 0; y < h; ++y)
592 {
593 int i = y * 4 + x;
594 const unsigned char *pix = &in[(y * iw + x) * 4];
595
596 if(have_trans)
597 {
598 if(pix[3] == 0)
599 {
600 out.do_or(i, (1 << bpp) - 1);
601 continue;
602 }
603 }
604
605 T color(get<T>(pix));
606 int best = 0;
607 int bestdist = ColorDist(color, colors_ref[0]);
608 for(int k = 1; k < n_input; ++k)
609 {
610 int dist = ColorDist(color, colors_ref[k]);
611 if(dist < bestdist)
612 {
613 bestdist = dist;
614 best = k;
615 }
616 }
617 if(have_0_255)
618 {
619 int dist_0 = ColorDist(color, color_type_info<T>::min_value);
620 if(dist_0 <= bestdist)
621 {
622 bestdist = dist_0;
623 out.do_or(i, (1 << bpp) - 2);
624 score += bestdist;
625 continue;
626 }
627 int dist_255 = ColorDist(color, color_type_info<T>::max_value);
628 if(dist_255 <= bestdist)
629 {
630 bestdist = dist_255;
631 out.do_or(i, (1 << bpp) - 1);
632 score += bestdist;
633 continue;
634 }
635 }
636
637 // record
638 res.add(best, color);
639 out.do_or(i, best);
640 score += bestdist;
641 }
642 return score;
643 }
644
645 // REFINE_LOOP: refine, take result over only if score improved, loop until it did not
s2tc_dxt5_encode_alpha_refine_loop(bitarray<uint64_t,16,3> & out,const unsigned char * in,int iw,int w,int h,unsigned char & a0,unsigned char & a1)646 inline void s2tc_dxt5_encode_alpha_refine_loop(bitarray<uint64_t, 16, 3> &out, const unsigned char *in, int iw, int w, int h, unsigned char &a0, unsigned char &a1)
647 {
648 bitarray<uint64_t, 16, 3> out2;
649 unsigned char a0next = a0, a1next = a1;
650 unsigned int s = 0x7FFFFFFF;
651 for(;;)
652 {
653 unsigned char ramp[2] = {
654 a0next,
655 a1next
656 };
657 s2tc_evaluate_colors_result_t<unsigned char, int, 1> r2;
658 unsigned int s2 = s2tc_try_encode_block<unsigned char, int, 3, false, true, 2>(out2, r2, alpha_dist, in, iw, w, h, ramp);
659 if(s2 < s)
660 {
661 out = out2;
662 s = s2;
663 a0 = a0next;
664 a1 = a1next;
665 if(!r2.evaluate(a0next, a1next))
666 break;
667 }
668 else
669 break;
670 out2.clear();
671 }
672
673 if(a1 == a0)
674 {
675 if(a0 == 255)
676 --a1;
677 else
678 ++a1;
679 for(int i = 0; i < 16; ++i) switch(out.get(i))
680 {
681 case 1:
682 out.set(i, 0);
683 break;
684 }
685 }
686
687 if(a1 < a0)
688 {
689 swap(a0, a1);
690 for(int i = 0; i < 16; ++i) switch(out.get(i))
691 {
692 case 0:
693 out.set(i, 1);
694 break;
695 case 1:
696 out.set(i, 0);
697 break;
698 case 6:
699 case 7:
700 break;
701 default:
702 out.set(i, 7 - out.get(i));
703 break;
704 }
705 }
706 }
707
708 // REFINE_ALWAYS: refine, do not check
s2tc_dxt5_encode_alpha_refine_always(bitarray<uint64_t,16,3> & out,const unsigned char * in,int iw,int w,int h,unsigned char & a0,unsigned char & a1)709 inline void s2tc_dxt5_encode_alpha_refine_always(bitarray<uint64_t, 16, 3> &out, const unsigned char *in, int iw, int w, int h, unsigned char &a0, unsigned char &a1)
710 {
711 unsigned char ramp[2] = {
712 a0,
713 a1
714 };
715 s2tc_evaluate_colors_result_t<unsigned char, int, 1> r2;
716 s2tc_try_encode_block<unsigned char, int, 3, false, true, 2>(out, r2, alpha_dist, in, iw, w, h, ramp);
717 r2.evaluate(a0, a1);
718
719 if(a1 == a0)
720 {
721 if(a0 == 255)
722 --a1;
723 else
724 ++a1;
725 for(int i = 0; i < 16; ++i) switch(out.get(i))
726 {
727 case 1:
728 out.set(i, 0);
729 break;
730 }
731 }
732
733 if(a1 < a0)
734 {
735 swap(a0, a1);
736 for(int i = 0; i < 16; ++i) switch(out.get(i))
737 {
738 case 0:
739 out.set(i, 1);
740 break;
741 case 1:
742 out.set(i, 0);
743 break;
744 case 6:
745 case 7:
746 break;
747 default:
748 out.set(i, 7 - out.get(i));
749 break;
750 }
751 }
752 }
753
754 // REFINE_NEVER: do not refine
s2tc_dxt5_encode_alpha_refine_never(bitarray<uint64_t,16,3> & out,const unsigned char * in,int iw,int w,int h,unsigned char & a0,unsigned char & a1)755 inline void s2tc_dxt5_encode_alpha_refine_never(bitarray<uint64_t, 16, 3> &out, const unsigned char *in, int iw, int w, int h, unsigned char &a0, unsigned char &a1)
756 {
757 if(a1 < a0)
758 swap(a0, a1);
759 unsigned char ramp[6] = {
760 a0,
761 a1
762 };
763 s2tc_evaluate_colors_result_null_t<unsigned char> r2;
764 s2tc_try_encode_block<unsigned char, int, 3, false, true, 2>(out, r2, alpha_dist, in, iw, w, h, ramp);
765 }
766
767 // REFINE_LOOP: refine, take result over only if score improved, loop until it did not
768 template<ColorDistFunc ColorDist, bool have_trans>
s2tc_dxt1_encode_color_refine_loop(bitarray<uint32_t,16,2> & out,const unsigned char * in,int iw,int w,int h,color_t & c0,color_t & c1)769 inline void s2tc_dxt1_encode_color_refine_loop(bitarray<uint32_t, 16, 2> &out, const unsigned char *in, int iw, int w, int h, color_t &c0, color_t &c1)
770 {
771 bitarray<uint32_t, 16, 2> out2;
772 color_t c0next = c0, c1next = c1;
773 unsigned int s = 0x7FFFFFFF;
774 for(;;)
775 {
776 color_t ramp[2] = {
777 c0next,
778 c1next
779 };
780 s2tc_evaluate_colors_result_t<color_t, bigcolor_t, 1> r2;
781 unsigned int s2 = s2tc_try_encode_block<color_t, bigcolor_t, 2, have_trans, false, 2>(out2, r2, ColorDist, in, iw, w, h, ramp);
782 if(s2 < s)
783 {
784 out = out2;
785 s = s2;
786 c0 = c0next;
787 c1 = c1next;
788 if(!r2.evaluate(c0next, c1next))
789 break;
790 }
791 else
792 break;
793 out2.clear();
794 }
795
796 if(c0 == c1)
797 {
798 if(c0 == color_type_info<color_t>::max_value)
799 --c1;
800 else
801 ++c1;
802 for(int i = 0; i < 16; ++i)
803 if(!(out.get(i) == 1))
804 out.set(i, 0);
805 }
806
807 if(have_trans ? c1 < c0 : c0 < c1)
808 {
809 swap(c0, c1);
810 for(int i = 0; i < 16; ++i)
811 if(!(out.get(i) & 2))
812 out.do_xor(i, 1);
813 }
814 }
815
816 // REFINE_ALWAYS: refine, do not check
817 template<ColorDistFunc ColorDist, bool have_trans>
s2tc_dxt1_encode_color_refine_always(bitarray<uint32_t,16,2> & out,const unsigned char * in,int iw,int w,int h,color_t & c0,color_t & c1)818 inline void s2tc_dxt1_encode_color_refine_always(bitarray<uint32_t, 16, 2> &out, const unsigned char *in, int iw, int w, int h, color_t &c0, color_t &c1)
819 {
820 color_t ramp[2] = {
821 c0,
822 c1
823 };
824 s2tc_evaluate_colors_result_t<color_t, bigcolor_t, 1> r2;
825 s2tc_try_encode_block<color_t, bigcolor_t, 2, have_trans, false, 2>(out, r2, ColorDist, in, iw, w, h, ramp);
826 r2.evaluate(c0, c1);
827
828 if(c0 == c1)
829 {
830 if(c0 == color_type_info<color_t>::max_value)
831 --c1;
832 else
833 ++c1;
834 for(int i = 0; i < 16; ++i)
835 if(!(out.get(i) == 1))
836 out.set(i, 0);
837 }
838
839 if(have_trans ? c1 < c0 : c0 < c1)
840 {
841 swap(c0, c1);
842 for(int i = 0; i < 16; ++i)
843 if(!(out.get(i) & 2))
844 out.do_xor(i, 1);
845 }
846 }
847
848 // REFINE_NEVER: do not refine
849 template<ColorDistFunc ColorDist, bool have_trans>
s2tc_dxt1_encode_color_refine_never(bitarray<uint32_t,16,2> & out,const unsigned char * in,int iw,int w,int h,color_t & c0,color_t & c1)850 inline void s2tc_dxt1_encode_color_refine_never(bitarray<uint32_t, 16, 2> &out, const unsigned char *in, int iw, int w, int h, color_t &c0, color_t &c1)
851 {
852 if(have_trans ? c1 < c0 : c0 < c1)
853 swap(c0, c1);
854 color_t ramp[2] = {
855 c0,
856 c1
857 };
858 s2tc_evaluate_colors_result_null_t<color_t> r2;
859 s2tc_try_encode_block<color_t, bigcolor_t, 2, have_trans, false, 2>(out, r2, ColorDist, in, iw, w, h, ramp);
860 }
861
s2tc_dxt3_encode_alpha(bitarray<uint64_t,16,4> & out,const unsigned char * in,int iw,int w,int h)862 inline void s2tc_dxt3_encode_alpha(bitarray<uint64_t, 16, 4> &out, const unsigned char *in, int iw, int w, int h)
863 {
864 for(int x = 0; x < w; ++x) for(int y = 0; y < h; ++y)
865 {
866 int i = y * 4 + x;
867 const unsigned char *pix = &in[(y * iw + x) * 4];
868 out.do_or(i, pix[3]);
869 }
870 }
871
872 template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode, RefinementMode refine>
s2tc_encode_block(unsigned char * out,const unsigned char * rgba,int iw,int w,int h,int nrandom)873 inline void s2tc_encode_block(unsigned char *out, const unsigned char *rgba, int iw, int w, int h, int nrandom)
874 {
875 color_t c[16 + (nrandom >= 0 ? nrandom : 0)];
876 unsigned char ca[16 + (nrandom >= 0 ? nrandom : 0)];
877 int x, y;
878
879 if(mode == MODE_FAST)
880 {
881 // FAST: trick from libtxc_dxtn: just get brightest and darkest colors, and encode using these
882
883 color_t c0 = make_color_t(0, 0, 0);
884
885 // dummy values because we don't know whether the first pixel will write
886 c[0].r = 31;
887 c[0].g = 63;
888 c[0].b = 31;
889 c[1].r = 0;
890 c[1].g = 0;
891 c[1].b = 0;
892 int dmin = 0x7FFFFFFF;
893 int dmax = 0;
894 if(dxt == DXT5)
895 {
896 ca[0] = rgba[3];
897 ca[1] = ca[0];
898 }
899
900 for(x = 0; x < w; ++x)
901 for(y = 0; y < h; ++y)
902 {
903 c[2].r = rgba[(x + y * iw) * 4 + 0];
904 c[2].g = rgba[(x + y * iw) * 4 + 1];
905 c[2].b = rgba[(x + y * iw) * 4 + 2];
906 ca[2] = rgba[(x + y * iw) * 4 + 3];
907 if (dxt == DXT1)
908 if(ca[2] == 0)
909 continue;
910 // MODE_FAST doesn't work for normalmaps, so this works
911
912 int d = ColorDist(c[2], c0);
913 if(d > dmax)
914 {
915 dmax = d;
916 c[1] = c[2];
917 }
918 if(d < dmin)
919 {
920 dmin = d;
921 c[0] = c[2];
922 }
923
924 if(dxt == DXT5)
925 {
926 if(ca[2] != 255)
927 {
928 if(ca[2] > ca[1])
929 ca[1] = ca[2];
930 if(ca[2] < ca[0])
931 ca[0] = ca[2];
932 }
933 }
934 }
935 }
936 else
937 {
938 int n = 0, m = 0;
939
940 for(x = 0; x < w; ++x)
941 for(y = 0; y < h; ++y)
942 {
943 c[n].r = rgba[(x + y * iw) * 4 + 0];
944 c[n].g = rgba[(x + y * iw) * 4 + 1];
945 c[n].b = rgba[(x + y * iw) * 4 + 2];
946 ca[n] = rgba[(x + y * iw) * 4 + 3];
947 if (dxt == DXT1)
948 if(ca[n] == 0)
949 continue;
950 ++n;
951 }
952 if(n == 0)
953 {
954 n = 1;
955 c[0].r = 0;
956 c[0].g = 0;
957 c[0].b = 0;
958 ca[0] = 0;
959 }
960 m = n;
961
962 if(nrandom > 0)
963 {
964 color_t mins = c[0];
965 color_t maxs = c[0];
966 unsigned char mina = (dxt == DXT5) ? ca[0] : 0;
967 unsigned char maxa = (dxt == DXT5) ? ca[0] : 0;
968 for(x = 1; x < n; ++x)
969 {
970 mins.r = min(mins.r, c[x].r);
971 mins.g = min(mins.g, c[x].g);
972 mins.b = min(mins.b, c[x].b);
973 maxs.r = max(maxs.r, c[x].r);
974 maxs.g = max(maxs.g, c[x].g);
975 maxs.b = max(maxs.b, c[x].b);
976 if(dxt == DXT5)
977 {
978 mina = min(mina, ca[x]);
979 maxa = max(maxa, ca[x]);
980 }
981 }
982 color_t len = make_color_t(maxs.r - mins.r + 1, maxs.g - mins.g + 1, maxs.b - mins.b + 1);
983 int lena = (dxt == DXT5) ? (maxa - (int) mina + 1) : 0;
984 for(x = 0; x < nrandom; ++x)
985 {
986 c[m].r = mins.r + rand() % len.r;
987 c[m].g = mins.g + rand() % len.g;
988 c[m].b = mins.b + rand() % len.b;
989 if(dxt == DXT5)
990 ca[m] = mina + rand() % lena;
991 ++m;
992 }
993 }
994 else
995 {
996 // hack for last miplevel
997 if(n == 1)
998 {
999 c[1] = c[0];
1000 m = n = 2;
1001 }
1002 }
1003
1004 reduce_colors_inplace(c, n, m, ColorDist);
1005 if(dxt == DXT5)
1006 reduce_colors_inplace_2fixpoints(ca, n, m, alpha_dist, (unsigned char) 0, (unsigned char) 255);
1007 }
1008
1009 // equal colors are BAD
1010 if(c[0] == c[1])
1011 {
1012 if(c[0] == color_type_info<color_t>::max_value)
1013 --c[1];
1014 else
1015 ++c[1];
1016 }
1017
1018 if(dxt == DXT5)
1019 {
1020 if(ca[0] == ca[1])
1021 {
1022 if(ca[0] == 255)
1023 --ca[1];
1024 else
1025 ++ca[1];
1026 }
1027 }
1028
1029 switch(dxt)
1030 {
1031 case DXT1:
1032 {
1033 bitarray<uint32_t, 16, 2> colorblock;
1034 switch(refine)
1035 {
1036 case REFINE_NEVER:
1037 s2tc_dxt1_encode_color_refine_never<ColorDist, true>(colorblock, rgba, iw, w, h, c[0], c[1]);
1038 break;
1039 case REFINE_ALWAYS:
1040 s2tc_dxt1_encode_color_refine_always<ColorDist, true>(colorblock, rgba, iw, w, h, c[0], c[1]);
1041 break;
1042 case REFINE_LOOP:
1043 s2tc_dxt1_encode_color_refine_loop<ColorDist, true>(colorblock, rgba, iw, w, h, c[0], c[1]);
1044 break;
1045 }
1046 out[0] = ((c[0].g & 0x07) << 5) | c[0].b;
1047 out[1] = (c[0].r << 3) | (c[0].g >> 3);
1048 out[2] = ((c[1].g & 0x07) << 5) | c[1].b;
1049 out[3] = (c[1].r << 3) | (c[1].g >> 3);
1050 colorblock.tobytes(&out[4]);
1051 }
1052 break;
1053 case DXT3:
1054 {
1055 bitarray<uint32_t, 16, 2> colorblock;
1056 bitarray<uint64_t, 16, 4> alphablock;
1057 switch(refine)
1058 {
1059 case REFINE_NEVER:
1060 s2tc_dxt1_encode_color_refine_never<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1061 break;
1062 case REFINE_ALWAYS:
1063 s2tc_dxt1_encode_color_refine_always<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1064 break;
1065 case REFINE_LOOP:
1066 s2tc_dxt1_encode_color_refine_loop<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1067 break;
1068 }
1069 s2tc_dxt3_encode_alpha(alphablock, rgba, iw, w, h);
1070 alphablock.tobytes(&out[0]);
1071 out[8] = ((c[0].g & 0x07) << 5) | c[0].b;
1072 out[9] = (c[0].r << 3) | (c[0].g >> 3);
1073 out[10] = ((c[1].g & 0x07) << 5) | c[1].b;
1074 out[11] = (c[1].r << 3) | (c[1].g >> 3);
1075 colorblock.tobytes(&out[12]);
1076 }
1077 break;
1078 case DXT5:
1079 {
1080 bitarray<uint32_t, 16, 2> colorblock;
1081 bitarray<uint64_t, 16, 3> alphablock;
1082 switch(refine)
1083 {
1084 case REFINE_NEVER:
1085 s2tc_dxt1_encode_color_refine_never<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1086 s2tc_dxt5_encode_alpha_refine_never(alphablock, rgba, iw, w, h, ca[0], ca[1]);
1087 break;
1088 case REFINE_ALWAYS:
1089 s2tc_dxt1_encode_color_refine_always<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1090 s2tc_dxt5_encode_alpha_refine_always(alphablock, rgba, iw, w, h, ca[0], ca[1]);
1091 break;
1092 case REFINE_LOOP:
1093 s2tc_dxt1_encode_color_refine_loop<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1094 s2tc_dxt5_encode_alpha_refine_loop(alphablock, rgba, iw, w, h, ca[0], ca[1]);
1095 break;
1096 }
1097 out[0] = ca[0];
1098 out[1] = ca[1];
1099 alphablock.tobytes(&out[2]);
1100 out[8] = ((c[0].g & 0x07) << 5) | c[0].b;
1101 out[9] = (c[0].r << 3) | (c[0].g >> 3);
1102 out[10] = ((c[1].g & 0x07) << 5) | c[1].b;
1103 out[11] = (c[1].r << 3) | (c[1].g >> 3);
1104 colorblock.tobytes(&out[12]);
1105 }
1106 break;
1107 }
1108 }
1109
1110 // compile time dispatch magic
1111 template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode>
s2tc_encode_block_func(RefinementMode refine)1112 inline s2tc_encode_block_func_t s2tc_encode_block_func(RefinementMode refine)
1113 {
1114 switch(refine)
1115 {
1116 case REFINE_NEVER:
1117 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_NEVER>;
1118 case REFINE_LOOP:
1119 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_LOOP>;
1120 default:
1121 case REFINE_ALWAYS:
1122 return s2tc_encode_block<dxt, ColorDist, mode, REFINE_ALWAYS>;
1123 }
1124 }
1125
1126 // these color dist functions do not need the refinement check, as they always improve the situation
1127 template<ColorDistFunc ColorDist> struct supports_fast
1128 {
1129 static const bool value = true;
1130 };
1131 template<> struct supports_fast<color_dist_normalmap>
1132 {
1133 static const bool value = false;
1134 };
1135
1136 template<DxtMode dxt, ColorDistFunc ColorDist>
s2tc_encode_block_func(int nrandom,RefinementMode refine)1137 inline s2tc_encode_block_func_t s2tc_encode_block_func(int nrandom, RefinementMode refine)
1138 {
1139 if(!supports_fast<ColorDist>::value || nrandom >= 0)
1140 return s2tc_encode_block_func<dxt, ColorDist, MODE_NORMAL>(refine);
1141 else
1142 return s2tc_encode_block_func<dxt, ColorDist, MODE_FAST>(refine);
1143 }
1144
1145 template<ColorDistFunc ColorDist>
s2tc_encode_block_func(DxtMode dxt,int nrandom,RefinementMode refine)1146 inline s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, int nrandom, RefinementMode refine)
1147 {
1148 switch(dxt)
1149 {
1150 case DXT1:
1151 return s2tc_encode_block_func<DXT1, ColorDist>(nrandom, refine);
1152 break;
1153 case DXT3:
1154 return s2tc_encode_block_func<DXT3, ColorDist>(nrandom, refine);
1155 break;
1156 default:
1157 case DXT5:
1158 return s2tc_encode_block_func<DXT5, ColorDist>(nrandom, refine);
1159 break;
1160 }
1161 }
1162 };
1163
s2tc_encode_block_func(DxtMode dxt,ColorDistMode cd,int nrandom,RefinementMode refine)1164 s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, ColorDistMode cd, int nrandom, RefinementMode refine)
1165 {
1166 switch(cd)
1167 {
1168 case RGB:
1169 return s2tc_encode_block_func<color_dist_rgb>(dxt, nrandom, refine);
1170 break;
1171 case YUV:
1172 return s2tc_encode_block_func<color_dist_yuv>(dxt, nrandom, refine);
1173 break;
1174 case SRGB:
1175 return s2tc_encode_block_func<color_dist_srgb>(dxt, nrandom, refine);
1176 break;
1177 case SRGB_MIXED:
1178 return s2tc_encode_block_func<color_dist_srgb_mixed>(dxt, nrandom, refine);
1179 break;
1180 case AVG:
1181 return s2tc_encode_block_func<color_dist_avg>(dxt, nrandom, refine);
1182 break;
1183 default:
1184 case WAVG:
1185 return s2tc_encode_block_func<color_dist_wavg>(dxt, nrandom, refine);
1186 break;
1187 case W0AVG:
1188 return s2tc_encode_block_func<color_dist_w0avg>(dxt, nrandom, refine);
1189 break;
1190 case NORMALMAP:
1191 return s2tc_encode_block_func<color_dist_normalmap>(dxt, nrandom, refine);
1192 break;
1193 }
1194 }
1195
1196 namespace
1197 {
diffuse(int * diff,int src,int shift)1198 inline int diffuse(int *diff, int src, int shift)
1199 {
1200 const int maxval = (1 << (8 - shift)) - 1;
1201 src += *diff;
1202 int ret = max(0, min(src >> shift, maxval));
1203 // simulate decoding ("loop filter")
1204 int loop = (ret << shift) | (ret >> (8 - 2 * shift));
1205 *diff = src - loop;
1206 return ret;
1207 }
diffuse1(int * diff,int src)1208 inline int diffuse1(int *diff, int src)
1209 {
1210 src += *diff;
1211 int ret = (src >= 128);
1212 // simulate decoding ("loop filter")
1213 int loop = ret ? 255 : 0;
1214 *diff = src - loop;
1215 return ret;
1216 }
1217
floyd(int * thisrow,int * downrow,int src,int shift)1218 inline int floyd(int *thisrow, int *downrow, int src, int shift)
1219 {
1220 const int maxval = (1 << (8 - shift)) - 1;
1221 src = (src << 4) | (src >> 4);
1222 src += thisrow[1];
1223 int ret = max(0, min(src >> (shift + 4), maxval));
1224 // simulate decoding ("loop filter")
1225 int loop = (ret * 4095 / maxval);
1226 int err = src - loop;
1227 int e7 = (err * 7 + 8) / 16;
1228 err -= e7;
1229 int e3 = (err * 3 + 4) / 9;
1230 err -= e3;
1231 int e5 = (err * 5 + 3) / 6;
1232 err -= e5;
1233 int e1 = err;
1234 thisrow[2] += e7;
1235 downrow[0] += e3;
1236 downrow[1] += e5;
1237 downrow[2] += e1;
1238 return ret;
1239 }
1240
floyd1(int * thisrow,int * downrow,int src)1241 inline int floyd1(int *thisrow, int *downrow, int src)
1242 {
1243 src = (src << 4) | (src >> 4);
1244 src += thisrow[1];
1245 int ret = (src >= 2048);
1246 // simulate decoding ("loop filter")
1247 int loop = ret ? 4095 : 0;
1248 int err = src - loop;
1249 int e7 = (err * 7 + 8) / 16;
1250 err -= e7;
1251 int e3 = (err * 3 + 4) / 9;
1252 err -= e3;
1253 int e5 = (err * 5 + 3) / 6;
1254 err -= e5;
1255 int e1 = err;
1256 thisrow[2] += e7;
1257 downrow[0] += e3;
1258 downrow[1] += e5;
1259 downrow[2] += e1;
1260 return ret;
1261 }
1262
1263 template<int srccomps, int alphabits, DitherMode dither>
rgb565_image(unsigned char * out,const unsigned char * rgba,int w,int h)1264 inline void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h)
1265 {
1266 int x, y;
1267 switch(dither)
1268 {
1269 case DITHER_NONE:
1270 {
1271 for(y = 0; y < h; ++y)
1272 for(x = 0; x < w; ++x)
1273 {
1274 out[(x + y * w) * 4 + 0] = rgba[(x + y * w) * srccomps + 0] >> 3;
1275 out[(x + y * w) * 4 + 1] = rgba[(x + y * w) * srccomps + 1] >> 2;
1276 out[(x + y * w) * 4 + 2] = rgba[(x + y * w) * srccomps + 2] >> 3;
1277 }
1278 if(srccomps == 4)
1279 {
1280 if(alphabits == 1)
1281 {
1282 for(y = 0; y < h; ++y)
1283 for(x = 0; x < w; ++x)
1284 out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3] >> 7;
1285 }
1286 else if(alphabits == 8)
1287 {
1288 for(y = 0; y < h; ++y)
1289 for(x = 0; x < w; ++x)
1290 out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1291 }
1292 else
1293 {
1294 for(y = 0; y < h; ++y)
1295 for(x = 0; x < w; ++x)
1296 out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3] >> (8 - alphabits);
1297 }
1298 }
1299 else
1300 {
1301 for(y = 0; y < h; ++y)
1302 for(x = 0; x < w; ++x)
1303 out[(x + y * w) * 4 + 3] = (1 << alphabits) - 1;
1304 }
1305 }
1306 break;
1307 case DITHER_SIMPLE:
1308 {
1309 int x, y;
1310 int diffuse_r = 0;
1311 int diffuse_g = 0;
1312 int diffuse_b = 0;
1313 int diffuse_a = 0;
1314 for(y = 0; y < h; ++y)
1315 for(x = 0; x < w; ++x)
1316 {
1317 out[(x + y * w) * 4 + 0] = diffuse(&diffuse_r, rgba[(x + y * w) * srccomps + 0], 3);
1318 out[(x + y * w) * 4 + 1] = diffuse(&diffuse_g, rgba[(x + y * w) * srccomps + 1], 2);
1319 out[(x + y * w) * 4 + 2] = diffuse(&diffuse_b, rgba[(x + y * w) * srccomps + 2], 3);
1320 }
1321 if(srccomps == 4)
1322 {
1323 if(alphabits == 1)
1324 {
1325 for(y = 0; y < h; ++y)
1326 for(x = 0; x < w; ++x)
1327 out[(x + y * w) * 4 + 3] = diffuse1(&diffuse_a, rgba[(x + y * w) * srccomps + 3]);
1328 }
1329 else if(alphabits == 8)
1330 {
1331 for(y = 0; y < h; ++y)
1332 for(x = 0; x < w; ++x)
1333 out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1334 }
1335 else
1336 {
1337 for(y = 0; y < h; ++y)
1338 for(x = 0; x < w; ++x)
1339 out[(x + y * w) * 4 + 3] = diffuse(&diffuse_a, rgba[(x + y * w) * srccomps + 3], 8 - alphabits);
1340 }
1341 }
1342 else
1343 {
1344 for(y = 0; y < h; ++y)
1345 for(x = 0; x < w; ++x)
1346 out[(x + y * w) * 4 + 3] = (1 << alphabits) - 1;
1347 }
1348 }
1349 break;
1350 case DITHER_FLOYDSTEINBERG:
1351 {
1352 int x, y;
1353 int pw = w+2;
1354 int downrow[6*pw];
1355 memset(downrow, 0, sizeof(downrow));
1356 int *thisrow_r, *thisrow_g, *thisrow_b, *thisrow_a;
1357 int *downrow_r, *downrow_g, *downrow_b, *downrow_a;
1358 for(y = 0; y < h; ++y)
1359 {
1360 thisrow_r = downrow + ((y&1)?3:0) * pw;
1361 downrow_r = downrow + ((y&1)?0:3) * pw;
1362 memset(downrow_r, 0, sizeof(*downrow_r) * (3*pw));
1363 thisrow_g = thisrow_r + pw;
1364 thisrow_b = thisrow_g + pw;
1365 downrow_g = downrow_r + pw;
1366 downrow_b = downrow_g + pw;
1367 for(x = 0; x < w; ++x)
1368 {
1369 out[(x + y * w) * 4 + 0] = floyd(&thisrow_r[x], &downrow_r[x], rgba[(x + y * w) * srccomps + 0], 3);
1370 out[(x + y * w) * 4 + 1] = floyd(&thisrow_g[x], &downrow_g[x], rgba[(x + y * w) * srccomps + 1], 2);
1371 out[(x + y * w) * 4 + 2] = floyd(&thisrow_b[x], &downrow_b[x], rgba[(x + y * w) * srccomps + 2], 3);
1372 }
1373 }
1374 if(srccomps == 4)
1375 {
1376 if(alphabits == 1)
1377 {
1378 for(y = 0; y < h; ++y)
1379 {
1380 thisrow_a = downrow + (y&1) * pw;
1381 downrow_a = downrow + !(y&1) * pw;
1382 memset(downrow_a, 0, sizeof(*downrow_a) * pw);
1383 for(x = 0; x < w; ++x)
1384 out[(x + y * w) * 4 + 3] = floyd1(&thisrow_a[x], &downrow_a[x], rgba[(x + y * w) * srccomps + 3]);
1385 }
1386 }
1387 else if(alphabits == 8)
1388 {
1389 for(y = 0; y < h; ++y)
1390 for(x = 0; x < w; ++x)
1391 out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1392 }
1393 else
1394 {
1395 for(y = 0; y < h; ++y)
1396 {
1397 thisrow_a = downrow + (y&1) * pw;
1398 downrow_a = downrow + !(y&1) * pw;
1399 memset(downrow_a, 0, sizeof(*downrow_a) * pw);
1400 for(x = 0; x < w; ++x)
1401 out[(x + y * w) * 4 + 3] = floyd(&thisrow_a[x], &downrow_a[x], rgba[(x + y * w) * srccomps + 3], 8 - alphabits);
1402 }
1403 }
1404 }
1405 else
1406 {
1407 for(y = 0; y < h; ++y)
1408 for(x = 0; x < w; ++x)
1409 out[(x + y * w) * 4 + 3] = (1 << alphabits) - 1;
1410 }
1411 }
1412 break;
1413 }
1414 }
1415
1416 template<int srccomps, int alphabits>
rgb565_image(unsigned char * out,const unsigned char * rgba,int w,int h,DitherMode dither)1417 inline void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, DitherMode dither)
1418 {
1419 switch(dither)
1420 {
1421 case DITHER_NONE:
1422 rgb565_image<srccomps, alphabits, DITHER_NONE>(out, rgba, w, h);
1423 break;
1424 default:
1425 case DITHER_SIMPLE:
1426 rgb565_image<srccomps, alphabits, DITHER_SIMPLE>(out, rgba, w, h);
1427 break;
1428 case DITHER_FLOYDSTEINBERG:
1429 rgb565_image<srccomps, alphabits, DITHER_FLOYDSTEINBERG>(out, rgba, w, h);
1430 break;
1431 }
1432 }
1433
1434 template<int srccomps>
rgb565_image(unsigned char * out,const unsigned char * rgba,int w,int h,int alphabits,DitherMode dither)1435 inline void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, int alphabits, DitherMode dither)
1436 {
1437 switch(alphabits)
1438 {
1439 case 1:
1440 rgb565_image<srccomps, 1>(out, rgba, w, h, dither);
1441 break;
1442 case 4:
1443 rgb565_image<srccomps, 4>(out, rgba, w, h, dither);
1444 break;
1445 default:
1446 case 8:
1447 rgb565_image<srccomps, 8>(out, rgba, w, h, dither);
1448 break;
1449 }
1450 }
1451 };
1452
rgb565_image(unsigned char * out,const unsigned char * rgba,int w,int h,int srccomps,int alphabits,DitherMode dither)1453 void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, int srccomps, int alphabits, DitherMode dither)
1454 {
1455 switch(srccomps)
1456 {
1457 case 3:
1458 rgb565_image<3>(out, rgba, w, h, alphabits, dither);
1459 break;
1460 case 4:
1461 default:
1462 rgb565_image<4>(out, rgba, w, h, alphabits, dither);
1463 break;
1464 }
1465 }
1466