1 /*
2  * Copyright (C) 2011  Rudolf Polzer   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * RUDOLF POLZER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20  */
21 #define S2TC_LICENSE_IDENTIFIER s2tc_algorithm_license
22 #include "s2tc_license.h"
23 
24 #include <math.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <stdio.h>
28 #include <stdint.h>
29 
30 #include "s2tc_algorithm.h"
31 #include "s2tc_common.h"
32 
33 namespace
34 {
swap(T & a,T & b)35 	template<class T> void swap(T& a, T& b)
36 	{
37 		T h = a;
38 		a = b;
39 		b = h;
40 	}
41 	template<class T> struct color_type_info
42 	{
43 	};
44 	template<> struct color_type_info<unsigned char>
45 	{
46 		static const unsigned char min_value = 0;
47 		static const unsigned char max_value = 255;
48 	};
49 
50 	struct color_t
51 	{
52 		signed char r, g, b;
53 	};
make_color_t()54 	inline color_t make_color_t()
55 	{
56 		return (color_t) {0, 0, 0};
57 	}
make_color_t(signed char r_,signed char g_,signed char b_)58 	inline color_t make_color_t(signed char r_, signed char g_, signed char b_)
59 	{
60 		return (color_t) {r_, g_, b_};
61 	}
make_color_t(int i)62 	inline color_t make_color_t(int i)
63 	{
64 		return (color_t) {(signed char)(i >> 3), (signed char)(i >> 2), (signed char)(i >> 3)};
65 	}
operator ==(const color_t & a,const color_t & b)66 	inline bool operator==(const color_t &a, const color_t &b)
67 	{
68 		return a.r == b.r && a.g == b.g && a.b == b.b;
69 	}
operator <(const color_t & a,const color_t & b)70 	inline bool operator<(const color_t &a, const color_t &b)
71 	{
72 		signed char d;
73 		d = a.r - b.r;
74 		if(d)
75 			return d < 0;
76 		d = a.g - b.g;
77 		if(d)
78 			return d < 0;
79 		d = a.b - b.b;
80 		return d < 0;
81 	}
operator --(color_t & c)82 	inline color_t &operator--(color_t &c)
83 	{
84 		if(c.b > 0)
85 		{
86 			--c.b;
87 		}
88 		else if(c.g > 0)
89 		{
90 			c.b = 31;
91 			--c.g;
92 		}
93 		else if(c.r > 0)
94 		{
95 			c.b = 31;
96 			c.g = 63;
97 			--c.r;
98 		}
99 		else
100 		{
101 			c.b = 31;
102 			c.g = 63;
103 			c.r = 31;
104 		}
105 		return c;
106 	}
operator ++(color_t & c)107 	inline color_t &operator++(color_t &c)
108 	{
109 		if(c.b < 31)
110 		{
111 			++c.b;
112 		}
113 		else if(c.g < 63)
114 		{
115 			c.b = 0;
116 			++c.g;
117 		}
118 		else if(c.r < 31)
119 		{
120 			c.b = 0;
121 			c.g = 0;
122 			++c.r;
123 		}
124 		else
125 		{
126 			c.b = 0;
127 			c.g = 0;
128 			c.r = 0;
129 		}
130 		return c;
131 	}
132 	template<> struct color_type_info<color_t>
133 	{
134 		static const color_t min_value;
135 		static const color_t max_value;
136 	};
137 	const color_t color_type_info<color_t>::min_value = { 0, 0, 0 };
138 	const color_t color_type_info<color_t>::max_value = { 31, 63, 31 };
139 
140 	struct bigcolor_t
141 	{
142 		int r, g, b;
143 
bigcolor_t__anon57fb91a40111::bigcolor_t144 		inline bigcolor_t(): r(0), g(0), b(0)
145 		{
146 		}
147 
operator +=__anon57fb91a40111::bigcolor_t148 		inline bigcolor_t &operator+=(const color_t &c)
149 		{
150 			r += c.r;
151 			g += c.g;
152 			b += c.b;
153 			return *this;
154 		}
155 
operator +=__anon57fb91a40111::bigcolor_t156 		inline bigcolor_t &operator+=(int v)
157 		{
158 			r += v;
159 			g += v;
160 			b += v;
161 			return *this;
162 		}
163 
operator +__anon57fb91a40111::bigcolor_t164 		inline bigcolor_t operator+(int v)
165 		{
166 			bigcolor_t out = *this;
167 			out += v;
168 			return out;
169 		}
170 
operator /=__anon57fb91a40111::bigcolor_t171 		inline bigcolor_t &operator/=(int v)
172 		{
173 			r /= v;
174 			g /= v;
175 			b /= v;
176 			return *this;
177 		}
178 
operator /__anon57fb91a40111::bigcolor_t179 		inline bigcolor_t operator/(int v)
180 		{
181 			bigcolor_t out = *this;
182 			out /= v;
183 			return out;
184 		}
185 
operator <<=__anon57fb91a40111::bigcolor_t186 		inline bigcolor_t &operator<<=(int v)
187 		{
188 			r <<= v;
189 			g <<= v;
190 			b <<= v;
191 			return *this;
192 		}
193 
operator <<__anon57fb91a40111::bigcolor_t194 		inline bigcolor_t operator<<(int v)
195 		{
196 			bigcolor_t out = *this;
197 			out <<= v;
198 			return out;
199 		}
200 
operator color_t__anon57fb91a40111::bigcolor_t201 		inline operator color_t()
202 		{
203 			color_t out;
204 			out.r = r & 31;
205 			out.g = g & 63;
206 			out.b = b & 31;
207 			return out;
208 		}
209 	};
210 
211 	// 16 differences must fit in int
212 	// i.e. a difference must be lower than 2^27
213 
214 	// shift right, rounded
215 #define SHRR(a,n) (((a) + (1 << ((n)-1))) >> (n))
216 
color_dist_avg(const color_t & a,const color_t & b)217 	inline int color_dist_avg(const color_t &a, const color_t &b)
218 	{
219 		int dr = a.r - b.r; // multiplier: 31 (-1..1)
220 		int dg = a.g - b.g; // multiplier: 63 (-1..1)
221 		int db = a.b - b.b; // multiplier: 31 (-1..1)
222 		return ((dr*dr) << 2) + dg*dg + ((db*db) << 2);
223 	}
224 
color_dist_w0avg(const color_t & a,const color_t & b)225 	inline int color_dist_w0avg(const color_t &a, const color_t &b)
226 	{
227 		int dr = a.r - b.r; // multiplier: 31 (-1..1)
228 		int dg = a.g - b.g; // multiplier: 63 (-1..1)
229 		int db = a.b - b.b; // multiplier: 31 (-1..1)
230 		return dr*dr + dg*dg + db*db;
231 		// weighted 1:4:1
232 	}
233 
color_dist_wavg(const color_t & a,const color_t & b)234 	inline int color_dist_wavg(const color_t &a, const color_t &b)
235 	{
236 		int dr = a.r - b.r; // multiplier: 31 (-1..1)
237 		int dg = a.g - b.g; // multiplier: 63 (-1..1)
238 		int db = a.b - b.b; // multiplier: 31 (-1..1)
239 		return ((dr*dr) << 2) + ((dg*dg) << 2) + (db*db);
240 		// weighted 4:16:1
241 	}
242 
color_dist_yuv(const color_t & a,const color_t & b)243 	inline int color_dist_yuv(const color_t &a, const color_t &b)
244 	{
245 		int dr = a.r - b.r; // multiplier: 31 (-1..1)
246 		int dg = a.g - b.g; // multiplier: 63 (-1..1)
247 		int db = a.b - b.b; // multiplier: 31 (-1..1)
248 		int y = dr * 30*2 + dg * 59 + db * 11*2; // multiplier: 6259
249 		int u = dr * 202 - y; // * 0.5 / (1 - 0.30)
250 		int v = db * 202 - y; // * 0.5 / (1 - 0.11)
251 		return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
252 		// weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
253 		// weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
254 	}
255 
color_dist_rgb(const color_t & a,const color_t & b)256 	inline int color_dist_rgb(const color_t &a, const color_t &b)
257 	{
258 		int dr = a.r - b.r; // multiplier: 31 (-1..1)
259 		int dg = a.g - b.g; // multiplier: 63 (-1..1)
260 		int db = a.b - b.b; // multiplier: 31 (-1..1)
261 		int y = dr * 21*2 + dg * 72 + db * 7*2; // multiplier: 6272
262 		int u = dr * 202 - y; // * 0.5 / (1 - 0.21)
263 		int v = db * 202 - y; // * 0.5 / (1 - 0.07)
264 		return ((y*y) << 1) + SHRR(u*u, 3) + SHRR(v*v, 4);
265 		// weight for u: sqrt(2^-4) / (0.5 / (1 - 0.21)) = 0.395
266 		// weight for v: sqrt(2^-5) / (0.5 / (1 - 0.07)) = 0.328
267 	}
268 
color_dist_srgb(const color_t & a,const color_t & b)269 	inline int color_dist_srgb(const color_t &a, const color_t &b)
270 	{
271 		int dr = a.r * (int) a.r - b.r * (int) b.r; // multiplier: 31*31
272 		int dg = a.g * (int) a.g - b.g * (int) b.g; // multiplier: 63*63
273 		int db = a.b * (int) a.b - b.b * (int) b.b; // multiplier: 31*31
274 		int y = dr * 21*2*2 + dg * 72 + db * 7*2*2; // multiplier: 393400
275 		int u = dr * 409 - y; // * 0.5 / (1 - 0.30)
276 		int v = db * 409 - y; // * 0.5 / (1 - 0.11)
277 		int sy = SHRR(y, 3) * SHRR(y, 4);
278 		int su = SHRR(u, 3) * SHRR(u, 4);
279 		int sv = SHRR(v, 3) * SHRR(v, 4);
280 		return SHRR(sy, 4) + SHRR(su, 8) + SHRR(sv, 9);
281 		// weight for u: sqrt(2^-4) / (0.5 / (1 - 0.30)) = 0.350
282 		// weight for v: sqrt(2^-5) / (0.5 / (1 - 0.11)) = 0.315
283 	}
284 
srgb_get_y(const color_t & a)285 	inline int srgb_get_y(const color_t &a)
286 	{
287 		// convert to linear
288 		int r = a.r * (int) a.r;
289 		int g = a.g * (int) a.g;
290 		int b = a.b * (int) a.b;
291 		// find luminance
292 		int y = 37 * (r * 21*2*2 + g * 72 + b * 7*2*2); // multiplier: 14555800
293 		// square root it (!)
294 		y = sqrtf(y) + 0.5f; // now in range 0 to 3815
295 		return y;
296 	}
297 
color_dist_srgb_mixed(const color_t & a,const color_t & b)298 	inline int color_dist_srgb_mixed(const color_t &a, const color_t &b)
299 	{
300 		// get Y
301 		int ay = srgb_get_y(a);
302 		int by = srgb_get_y(b);
303 		// get UV
304 		int au = a.r * 191 - ay;
305 		int av = a.b * 191 - ay;
306 		int bu = b.r * 191 - by;
307 		int bv = b.b * 191 - by;
308 		// get differences
309 		int y = ay - by;
310 		int u = au - bu;
311 		int v = av - bv;
312 		return ((y*y) << 3) + SHRR(u*u, 1) + SHRR(v*v, 2);
313 		// weight for u: ???
314 		// weight for v: ???
315 	}
316 
color_dist_normalmap(const color_t & a,const color_t & b)317 	inline int color_dist_normalmap(const color_t &a, const color_t &b)
318 	{
319 		float ca[3], cb[3], n;
320 		ca[0] = a.r / 31.0f * 2 - 1;
321 		ca[1] = a.g / 63.0f * 2 - 1;
322 		ca[2] = a.b / 31.0f * 2 - 1;
323 		cb[0] = b.r / 31.0f * 2 - 1;
324 		cb[1] = b.g / 63.0f * 2 - 1;
325 		cb[2] = b.b / 31.0f * 2 - 1;
326 		n = ca[0] * ca[0] + ca[1] * ca[1] + ca[2] * ca[2];
327 		if(n > 0)
328 		{
329 			n = 1.0f / sqrtf(n);
330 			ca[0] *= n;
331 			ca[1] *= n;
332 			ca[2] *= n;
333 		}
334 		n = cb[0] * cb[0] + cb[1] * cb[1] + cb[2] * cb[2];
335 		if(n > 0)
336 		{
337 			n = 1.0f / sqrtf(n);
338 			cb[0] *= n;
339 			cb[1] *= n;
340 			cb[2] *= n;
341 		}
342 
343 		return
344 			100000 *
345 			(
346 				(cb[0] - ca[0]) * (cb[0] - ca[0])
347 				+
348 				(cb[1] - ca[1]) * (cb[1] - ca[1])
349 				+
350 				(cb[2] - ca[2]) * (cb[2] - ca[2])
351 			)
352 			;
353 		// max value: 1000 * (4 + 4 + 4) = 6000
354 	}
355 
356 	typedef int ColorDistFunc(const color_t &a, const color_t &b);
357 
alpha_dist(unsigned char a,unsigned char b)358 	inline int alpha_dist(unsigned char a, unsigned char b)
359 	{
360 		return (a - (int) b) * (a - (int) b);
361 	}
362 
363 	template <class T, class F>
364 	// n: input count
365 	// m: total color count (including non-counted inputs)
366 	// m >= n
reduce_colors_inplace(T * c,int n,int m,F dist)367 	inline void reduce_colors_inplace(T *c, int n, int m, F dist)
368 	{
369 		int i, j, k;
370 		int bestsum = -1;
371 		int besti = 0;
372 		int bestj = 1;
373 		int dists[m][n];
374 		// first the square
375 		for(i = 0; i < n; ++i)
376 		{
377 			dists[i][i] = 0;
378 			for(j = i+1; j < n; ++j)
379 			{
380 				int d = dist(c[i], c[j]);
381 				dists[i][j] = dists[j][i] = d;
382 			}
383 		}
384 		// then the box
385 		for(; i < m; ++i)
386 		{
387 			for(j = 0; j < n; ++j)
388 			{
389 				int d = dist(c[i], c[j]);
390 				dists[i][j] = d;
391 			}
392 		}
393 		for(i = 0; i < m; ++i)
394 			for(j = i+1; j < m; ++j)
395 			{
396 				int sum = 0;
397 				for(k = 0; k < n; ++k)
398 				{
399 					int di = dists[i][k];
400 					int dj = dists[j][k];
401 					int m  = min(di, dj);
402 					sum += m;
403 				}
404 				if(bestsum < 0 || sum < bestsum)
405 				{
406 					bestsum = sum;
407 					besti = i;
408 					bestj = j;
409 				}
410 			}
411 		T c0 = c[besti];
412 		c[1] = c[bestj];
413 		c[0] = c0;
414 	}
415 	template <class T, class F>
reduce_colors_inplace_2fixpoints(T * c,int n,int m,F dist,const T & fix0,const T & fix1)416 	inline void reduce_colors_inplace_2fixpoints(T *c, int n, int m, F dist, const T &fix0, const T &fix1)
417 	{
418 		// TODO fix this for ramp encoding!
419 		int i, j, k;
420 		int bestsum = -1;
421 		int besti = 0;
422 		int bestj = 1;
423 		int dists[m+2][n];
424 		// first the square
425 		for(i = 0; i < n; ++i)
426 		{
427 			dists[i][i] = 0;
428 			for(j = i+1; j < n; ++j)
429 			{
430 				int d = dist(c[i], c[j]);
431 				dists[i][j] = dists[j][i] = d;
432 			}
433 		}
434 		// then the box
435 		for(; i < m; ++i)
436 		{
437 			for(j = 0; j < n; ++j)
438 			{
439 				int d = dist(c[i], c[j]);
440 				dists[i][j] = d;
441 			}
442 		}
443 		// then the two extra rows
444 		for(j = 0; j < n; ++j)
445 		{
446 			int d = dist(fix0, c[j]);
447 			dists[m][j] = d;
448 		}
449 		for(j = 0; j < n; ++j)
450 		{
451 			int d = dist(fix1, c[j]);
452 			dists[m+1][j] = d;
453 		}
454 		for(i = 0; i < m; ++i)
455 			for(j = i+1; j < m; ++j)
456 			{
457 				int sum = 0;
458 				for(k = 0; k < n; ++k)
459 				{
460 					int di = dists[i][k];
461 					int dj = dists[j][k];
462 					int d0 = dists[m][k];
463 					int d1 = dists[m+1][k];
464 					int m  = min(min(di, dj), min(d0, d1));
465 					sum += m;
466 				}
467 				if(bestsum < 0 || sum < bestsum)
468 				{
469 					bestsum = sum;
470 					besti = i;
471 					bestj = j;
472 				}
473 			}
474 		if(besti != 0)
475 			c[0] = c[besti];
476 		if(bestj != 1)
477 			c[1] = c[bestj];
478 	}
479 
480 	enum CompressionMode
481 	{
482 		MODE_NORMAL,
483 		MODE_FAST
484 	};
485 
refine_component_encode(int comp)486 	template<ColorDistFunc ColorDist> inline int refine_component_encode(int comp)
487 	{
488 		return comp;
489 	}
refine_component_encode(int comp)490 	template<> inline int refine_component_encode<color_dist_srgb>(int comp)
491 	{
492 		return comp * comp;
493 	}
refine_component_encode(int comp)494 	template<> inline int refine_component_encode<color_dist_srgb_mixed>(int comp)
495 	{
496 		return comp * comp;
497 	}
498 
refine_component_decode(int comp)499 	template<ColorDistFunc ColorDist> inline int refine_component_decode(int comp)
500 	{
501 		return comp;
502 	}
refine_component_decode(int comp)503 	template<> inline int refine_component_decode<color_dist_srgb>(int comp)
504 	{
505 		return sqrtf(comp) + 0.5f;
506 	}
refine_component_decode(int comp)507 	template<> inline int refine_component_decode<color_dist_srgb_mixed>(int comp)
508 	{
509 		return sqrtf(comp) + 0.5f;
510 	}
511 
512 	template <class T, class Big, int scale_l>
513 	struct s2tc_evaluate_colors_result_t;
514 
515 	template <class T, class Big>
516 	struct s2tc_evaluate_colors_result_t<T, Big, 1>
517 	{
518 		// uses:
519 		//   Big << int
520 		//   Big / int
521 		//   Big + int
522 		//   Big += T
523 		int n0, n1;
524 		Big S0, S1;
s2tc_evaluate_colors_result_t__anon57fb91a40111::s2tc_evaluate_colors_result_t525 		inline s2tc_evaluate_colors_result_t():
526 			n0(), n1(), S0(), S1()
527 		{
528 		}
add__anon57fb91a40111::s2tc_evaluate_colors_result_t529 		inline void add(int l, T a)
530 		{
531 			if(l)
532 			{
533 				++n1;
534 				S1 += a;
535 			}
536 			else
537 			{
538 				++n0;
539 				S0 += a;
540 			}
541 		}
evaluate__anon57fb91a40111::s2tc_evaluate_colors_result_t542 		inline bool evaluate(T &a, T &b)
543 		{
544 			if(!n0 && !n1)
545 				return false;
546 			if(n0)
547 				a = ((S0 << 1) + n0) / (n0 << 1);
548 			if(n1)
549 				b = ((S1 << 1) + n1) / (n1 << 1);
550 			return true;
551 		}
552 	};
553 
554 	template <class T, class Big, int scale_l>
555 	struct s2tc_evaluate_colors_result_t
556 	{
557 		// a possible implementation of inferred color/alpha values
558 		// refining would go here
559 	};
560 
561 	template <class T>
562 	struct s2tc_evaluate_colors_result_null_t
563 	{
add__anon57fb91a40111::s2tc_evaluate_colors_result_null_t564 		inline void add(int l, T a)
565 		{
566 		}
567 	};
568 
get(const unsigned char * buf)569 	template<class T> T get(const unsigned char *buf)
570 	{
571 		T c;
572 		c.r = buf[0];
573 		c.g = buf[1];
574 		c.b = buf[2];
575 		return c;
576 	}
get(const unsigned char * buf)577 	template<> unsigned char get<unsigned char>(const unsigned char *buf)
578 	{
579 		return buf[3]; // extract alpha
580 	}
581 
582 	template<class T, class Big, int bpp, bool have_trans, bool have_0_255, int n_input, class Dist, class Eval, class Arr>
s2tc_try_encode_block(Arr & out,Eval & res,Dist ColorDist,const unsigned char * in,int iw,int w,int h,const T colors_ref[])583 	inline unsigned int s2tc_try_encode_block(
584 			Arr &out,
585 			Eval &res,
586 			Dist ColorDist,
587 			const unsigned char *in, int iw, int w, int h,
588 			const T colors_ref[])
589 	{
590 		unsigned int score = 0;
591 		for(int x = 0; x < w; ++x) for(int y = 0; y < h; ++y)
592 		{
593 			int i = y * 4 + x;
594 			const unsigned char *pix = &in[(y * iw + x) * 4];
595 
596 			if(have_trans)
597 			{
598 				if(pix[3] == 0)
599 				{
600 					out.do_or(i, (1 << bpp) - 1);
601 					continue;
602 				}
603 			}
604 
605 			T color(get<T>(pix));
606 			int best = 0;
607 			int bestdist = ColorDist(color, colors_ref[0]);
608 			for(int k = 1; k < n_input; ++k)
609 			{
610 				int dist = ColorDist(color, colors_ref[k]);
611 				if(dist < bestdist)
612 				{
613 					bestdist = dist;
614 					best = k;
615 				}
616 			}
617 			if(have_0_255)
618 			{
619 				int dist_0 = ColorDist(color, color_type_info<T>::min_value);
620 				if(dist_0 <= bestdist)
621 				{
622 					bestdist = dist_0;
623 					out.do_or(i, (1 << bpp) - 2);
624 					score += bestdist;
625 					continue;
626 				}
627 				int dist_255 = ColorDist(color, color_type_info<T>::max_value);
628 				if(dist_255 <= bestdist)
629 				{
630 					bestdist = dist_255;
631 					out.do_or(i, (1 << bpp) - 1);
632 					score += bestdist;
633 					continue;
634 				}
635 			}
636 
637 			// record
638 			res.add(best, color);
639 			out.do_or(i, best);
640 			score += bestdist;
641 		}
642 		return score;
643 	}
644 
645 	// REFINE_LOOP: refine, take result over only if score improved, loop until it did not
s2tc_dxt5_encode_alpha_refine_loop(bitarray<uint64_t,16,3> & out,const unsigned char * in,int iw,int w,int h,unsigned char & a0,unsigned char & a1)646 	inline void s2tc_dxt5_encode_alpha_refine_loop(bitarray<uint64_t, 16, 3> &out, const unsigned char *in, int iw, int w, int h, unsigned char &a0, unsigned char &a1)
647 	{
648 		bitarray<uint64_t, 16, 3> out2;
649 		unsigned char a0next = a0, a1next = a1;
650 		unsigned int s = 0x7FFFFFFF;
651 		for(;;)
652 		{
653 			unsigned char ramp[2] = {
654 				a0next,
655 				a1next
656 			};
657 			s2tc_evaluate_colors_result_t<unsigned char, int, 1> r2;
658 			unsigned int s2 = s2tc_try_encode_block<unsigned char, int, 3, false, true, 2>(out2, r2, alpha_dist, in, iw, w, h, ramp);
659 			if(s2 < s)
660 			{
661 				out = out2;
662 				s = s2;
663 				a0 = a0next;
664 				a1 = a1next;
665 				if(!r2.evaluate(a0next, a1next))
666 					break;
667 			}
668 			else
669 				break;
670 			out2.clear();
671 		}
672 
673 		if(a1 == a0)
674 		{
675 			if(a0 == 255)
676 				--a1;
677 			else
678 				++a1;
679 			for(int i = 0; i < 16; ++i) switch(out.get(i))
680 			{
681 				case 1:
682 					out.set(i, 0);
683 					break;
684 			}
685 		}
686 
687 		if(a1 < a0)
688 		{
689 			swap(a0, a1);
690 			for(int i = 0; i < 16; ++i) switch(out.get(i))
691 			{
692 				case 0:
693 					out.set(i, 1);
694 					break;
695 				case 1:
696 					out.set(i, 0);
697 					break;
698 				case 6:
699 				case 7:
700 					break;
701 				default:
702 					out.set(i, 7 - out.get(i));
703 					break;
704 			}
705 		}
706 	}
707 
708 	// REFINE_ALWAYS: refine, do not check
s2tc_dxt5_encode_alpha_refine_always(bitarray<uint64_t,16,3> & out,const unsigned char * in,int iw,int w,int h,unsigned char & a0,unsigned char & a1)709 	inline void s2tc_dxt5_encode_alpha_refine_always(bitarray<uint64_t, 16, 3> &out, const unsigned char *in, int iw, int w, int h, unsigned char &a0, unsigned char &a1)
710 	{
711 		unsigned char ramp[2] = {
712 			a0,
713 			a1
714 		};
715 		s2tc_evaluate_colors_result_t<unsigned char, int, 1> r2;
716 		s2tc_try_encode_block<unsigned char, int, 3, false, true, 2>(out, r2, alpha_dist, in, iw, w, h, ramp);
717 		r2.evaluate(a0, a1);
718 
719 		if(a1 == a0)
720 		{
721 			if(a0 == 255)
722 				--a1;
723 			else
724 				++a1;
725 			for(int i = 0; i < 16; ++i) switch(out.get(i))
726 			{
727 				case 1:
728 					out.set(i, 0);
729 					break;
730 			}
731 		}
732 
733 		if(a1 < a0)
734 		{
735 			swap(a0, a1);
736 			for(int i = 0; i < 16; ++i) switch(out.get(i))
737 			{
738 				case 0:
739 					out.set(i, 1);
740 					break;
741 				case 1:
742 					out.set(i, 0);
743 					break;
744 				case 6:
745 				case 7:
746 					break;
747 				default:
748 					out.set(i, 7 - out.get(i));
749 					break;
750 			}
751 		}
752 	}
753 
754 	// REFINE_NEVER: do not refine
s2tc_dxt5_encode_alpha_refine_never(bitarray<uint64_t,16,3> & out,const unsigned char * in,int iw,int w,int h,unsigned char & a0,unsigned char & a1)755 	inline void s2tc_dxt5_encode_alpha_refine_never(bitarray<uint64_t, 16, 3> &out, const unsigned char *in, int iw, int w, int h, unsigned char &a0, unsigned char &a1)
756 	{
757 		if(a1 < a0)
758 			swap(a0, a1);
759 		unsigned char ramp[6] = {
760 			a0,
761 			a1
762 		};
763 		s2tc_evaluate_colors_result_null_t<unsigned char> r2;
764 		s2tc_try_encode_block<unsigned char, int, 3, false, true, 2>(out, r2, alpha_dist, in, iw, w, h, ramp);
765 	}
766 
767 	// REFINE_LOOP: refine, take result over only if score improved, loop until it did not
768 	template<ColorDistFunc ColorDist, bool have_trans>
s2tc_dxt1_encode_color_refine_loop(bitarray<uint32_t,16,2> & out,const unsigned char * in,int iw,int w,int h,color_t & c0,color_t & c1)769 	inline void s2tc_dxt1_encode_color_refine_loop(bitarray<uint32_t, 16, 2> &out, const unsigned char *in, int iw, int w, int h, color_t &c0, color_t &c1)
770 	{
771 		bitarray<uint32_t, 16, 2> out2;
772 		color_t c0next = c0, c1next = c1;
773 		unsigned int s = 0x7FFFFFFF;
774 		for(;;)
775 		{
776 			color_t ramp[2] = {
777 				c0next,
778 				c1next
779 			};
780 			s2tc_evaluate_colors_result_t<color_t, bigcolor_t, 1> r2;
781 			unsigned int s2 = s2tc_try_encode_block<color_t, bigcolor_t, 2, have_trans, false, 2>(out2, r2, ColorDist, in, iw, w, h, ramp);
782 			if(s2 < s)
783 			{
784 				out = out2;
785 				s = s2;
786 				c0 = c0next;
787 				c1 = c1next;
788 				if(!r2.evaluate(c0next, c1next))
789 					break;
790 			}
791 			else
792 				break;
793 			out2.clear();
794 		}
795 
796 		if(c0 == c1)
797 		{
798 			if(c0 == color_type_info<color_t>::max_value)
799 				--c1;
800 			else
801 				++c1;
802 			for(int i = 0; i < 16; ++i)
803 				if(!(out.get(i) == 1))
804 					out.set(i, 0);
805 		}
806 
807 		if(have_trans ? c1 < c0 : c0 < c1)
808 		{
809 			swap(c0, c1);
810 			for(int i = 0; i < 16; ++i)
811 				if(!(out.get(i) & 2))
812 					out.do_xor(i, 1);
813 		}
814 	}
815 
816 	// REFINE_ALWAYS: refine, do not check
817 	template<ColorDistFunc ColorDist, bool have_trans>
s2tc_dxt1_encode_color_refine_always(bitarray<uint32_t,16,2> & out,const unsigned char * in,int iw,int w,int h,color_t & c0,color_t & c1)818 	inline void s2tc_dxt1_encode_color_refine_always(bitarray<uint32_t, 16, 2> &out, const unsigned char *in, int iw, int w, int h, color_t &c0, color_t &c1)
819 	{
820 		color_t ramp[2] = {
821 			c0,
822 			c1
823 		};
824 		s2tc_evaluate_colors_result_t<color_t, bigcolor_t, 1> r2;
825 		s2tc_try_encode_block<color_t, bigcolor_t, 2, have_trans, false, 2>(out, r2, ColorDist, in, iw, w, h, ramp);
826 		r2.evaluate(c0, c1);
827 
828 		if(c0 == c1)
829 		{
830 			if(c0 == color_type_info<color_t>::max_value)
831 				--c1;
832 			else
833 				++c1;
834 			for(int i = 0; i < 16; ++i)
835 				if(!(out.get(i) == 1))
836 					out.set(i, 0);
837 		}
838 
839 		if(have_trans ? c1 < c0 : c0 < c1)
840 		{
841 			swap(c0, c1);
842 			for(int i = 0; i < 16; ++i)
843 				if(!(out.get(i) & 2))
844 					out.do_xor(i, 1);
845 		}
846 	}
847 
848 	// REFINE_NEVER: do not refine
849 	template<ColorDistFunc ColorDist, bool have_trans>
s2tc_dxt1_encode_color_refine_never(bitarray<uint32_t,16,2> & out,const unsigned char * in,int iw,int w,int h,color_t & c0,color_t & c1)850 	inline void s2tc_dxt1_encode_color_refine_never(bitarray<uint32_t, 16, 2> &out, const unsigned char *in, int iw, int w, int h, color_t &c0, color_t &c1)
851 	{
852 		if(have_trans ? c1 < c0 : c0 < c1)
853 			swap(c0, c1);
854 		color_t ramp[2] = {
855 			c0,
856 			c1
857 		};
858 		s2tc_evaluate_colors_result_null_t<color_t> r2;
859 		s2tc_try_encode_block<color_t, bigcolor_t, 2, have_trans, false, 2>(out, r2, ColorDist, in, iw, w, h, ramp);
860 	}
861 
s2tc_dxt3_encode_alpha(bitarray<uint64_t,16,4> & out,const unsigned char * in,int iw,int w,int h)862 	inline void s2tc_dxt3_encode_alpha(bitarray<uint64_t, 16, 4> &out, const unsigned char *in, int iw, int w, int h)
863 	{
864 		for(int x = 0; x < w; ++x) for(int y = 0; y < h; ++y)
865 		{
866 			int i = y * 4 + x;
867 			const unsigned char *pix = &in[(y * iw + x) * 4];
868 			out.do_or(i, pix[3]);
869 		}
870 	}
871 
872 	template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode, RefinementMode refine>
s2tc_encode_block(unsigned char * out,const unsigned char * rgba,int iw,int w,int h,int nrandom)873 	inline void s2tc_encode_block(unsigned char *out, const unsigned char *rgba, int iw, int w, int h, int nrandom)
874 	{
875 		color_t c[16 + (nrandom >= 0 ? nrandom : 0)];
876 		unsigned char ca[16 + (nrandom >= 0 ? nrandom : 0)];
877 		int x, y;
878 
879 		if(mode == MODE_FAST)
880 		{
881 			// FAST: trick from libtxc_dxtn: just get brightest and darkest colors, and encode using these
882 
883 			color_t c0 = make_color_t(0, 0, 0);
884 
885 			// dummy values because we don't know whether the first pixel will write
886 			c[0].r = 31;
887 			c[0].g = 63;
888 			c[0].b = 31;
889 			c[1].r = 0;
890 			c[1].g = 0;
891 			c[1].b = 0;
892 			int dmin = 0x7FFFFFFF;
893 			int dmax = 0;
894 			if(dxt == DXT5)
895 			{
896 				ca[0] = rgba[3];
897 				ca[1] = ca[0];
898 			}
899 
900 			for(x = 0; x < w; ++x)
901 				for(y = 0; y < h; ++y)
902 				{
903 					c[2].r = rgba[(x + y * iw) * 4 + 0];
904 					c[2].g = rgba[(x + y * iw) * 4 + 1];
905 					c[2].b = rgba[(x + y * iw) * 4 + 2];
906 					ca[2]  = rgba[(x + y * iw) * 4 + 3];
907 					if (dxt == DXT1)
908 						if(ca[2] == 0)
909 							continue;
910 					// MODE_FAST doesn't work for normalmaps, so this works
911 
912 					int d = ColorDist(c[2], c0);
913 					if(d > dmax)
914 					{
915 						dmax = d;
916 						c[1] = c[2];
917 					}
918 					if(d < dmin)
919 					{
920 						dmin = d;
921 						c[0] = c[2];
922 					}
923 
924 					if(dxt == DXT5)
925 					{
926 						if(ca[2] != 255)
927 						{
928 							if(ca[2] > ca[1])
929 								ca[1] = ca[2];
930 							if(ca[2] < ca[0])
931 								ca[0] = ca[2];
932 						}
933 					}
934 				}
935 		}
936 		else
937 		{
938 			int n = 0, m = 0;
939 
940 			for(x = 0; x < w; ++x)
941 				for(y = 0; y < h; ++y)
942 				{
943 					c[n].r = rgba[(x + y * iw) * 4 + 0];
944 					c[n].g = rgba[(x + y * iw) * 4 + 1];
945 					c[n].b = rgba[(x + y * iw) * 4 + 2];
946 					ca[n]  = rgba[(x + y * iw) * 4 + 3];
947 					if (dxt == DXT1)
948 						if(ca[n] == 0)
949 							continue;
950 					++n;
951 				}
952 			if(n == 0)
953 			{
954 				n = 1;
955 				c[0].r = 0;
956 				c[0].g = 0;
957 				c[0].b = 0;
958 				ca[0] = 0;
959 			}
960 			m = n;
961 
962 			if(nrandom > 0)
963 			{
964 				color_t mins = c[0];
965 				color_t maxs = c[0];
966 				unsigned char mina = (dxt == DXT5) ? ca[0] : 0;
967 				unsigned char maxa = (dxt == DXT5) ? ca[0] : 0;
968 				for(x = 1; x < n; ++x)
969 				{
970 					mins.r = min(mins.r, c[x].r);
971 					mins.g = min(mins.g, c[x].g);
972 					mins.b = min(mins.b, c[x].b);
973 					maxs.r = max(maxs.r, c[x].r);
974 					maxs.g = max(maxs.g, c[x].g);
975 					maxs.b = max(maxs.b, c[x].b);
976 					if(dxt == DXT5)
977 					{
978 						mina = min(mina, ca[x]);
979 						maxa = max(maxa, ca[x]);
980 					}
981 				}
982 				color_t len = make_color_t(maxs.r - mins.r + 1, maxs.g - mins.g + 1, maxs.b - mins.b + 1);
983 				int lena = (dxt == DXT5) ? (maxa - (int) mina + 1) : 0;
984 				for(x = 0; x < nrandom; ++x)
985 				{
986 					c[m].r = mins.r + rand() % len.r;
987 					c[m].g = mins.g + rand() % len.g;
988 					c[m].b = mins.b + rand() % len.b;
989 					if(dxt == DXT5)
990 						ca[m] = mina + rand() % lena;
991 					++m;
992 				}
993 			}
994 			else
995 			{
996 				// hack for last miplevel
997 				if(n == 1)
998 				{
999 					c[1] = c[0];
1000 					m = n = 2;
1001 				}
1002 			}
1003 
1004 			reduce_colors_inplace(c, n, m, ColorDist);
1005 			if(dxt == DXT5)
1006 				reduce_colors_inplace_2fixpoints(ca, n, m, alpha_dist, (unsigned char) 0, (unsigned char) 255);
1007 		}
1008 
1009 		// equal colors are BAD
1010 		if(c[0] == c[1])
1011 		{
1012 			if(c[0] == color_type_info<color_t>::max_value)
1013 				--c[1];
1014 			else
1015 				++c[1];
1016 		}
1017 
1018 		if(dxt == DXT5)
1019 		{
1020 			if(ca[0] == ca[1])
1021 			{
1022 				if(ca[0] == 255)
1023 					--ca[1];
1024 				else
1025 					++ca[1];
1026 			}
1027 		}
1028 
1029 		switch(dxt)
1030 		{
1031 			case DXT1:
1032 				{
1033 					bitarray<uint32_t, 16, 2> colorblock;
1034 					switch(refine)
1035 					{
1036 						case REFINE_NEVER:
1037 							s2tc_dxt1_encode_color_refine_never<ColorDist, true>(colorblock, rgba, iw, w, h, c[0], c[1]);
1038 							break;
1039 						case REFINE_ALWAYS:
1040 							s2tc_dxt1_encode_color_refine_always<ColorDist, true>(colorblock, rgba, iw, w, h, c[0], c[1]);
1041 							break;
1042 						case REFINE_LOOP:
1043 							s2tc_dxt1_encode_color_refine_loop<ColorDist, true>(colorblock, rgba, iw, w, h, c[0], c[1]);
1044 							break;
1045 					}
1046 					out[0] = ((c[0].g & 0x07) << 5) | c[0].b;
1047 					out[1] = (c[0].r << 3) | (c[0].g >> 3);
1048 					out[2] = ((c[1].g & 0x07) << 5) | c[1].b;
1049 					out[3] = (c[1].r << 3) | (c[1].g >> 3);
1050 					colorblock.tobytes(&out[4]);
1051 				}
1052 				break;
1053 			case DXT3:
1054 				{
1055 					bitarray<uint32_t, 16, 2> colorblock;
1056 					bitarray<uint64_t, 16, 4> alphablock;
1057 					switch(refine)
1058 					{
1059 						case REFINE_NEVER:
1060 							s2tc_dxt1_encode_color_refine_never<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1061 							break;
1062 						case REFINE_ALWAYS:
1063 							s2tc_dxt1_encode_color_refine_always<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1064 							break;
1065 						case REFINE_LOOP:
1066 							s2tc_dxt1_encode_color_refine_loop<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1067 							break;
1068 					}
1069 					s2tc_dxt3_encode_alpha(alphablock, rgba, iw, w, h);
1070 					alphablock.tobytes(&out[0]);
1071 					out[8] = ((c[0].g & 0x07) << 5) | c[0].b;
1072 					out[9] = (c[0].r << 3) | (c[0].g >> 3);
1073 					out[10] = ((c[1].g & 0x07) << 5) | c[1].b;
1074 					out[11] = (c[1].r << 3) | (c[1].g >> 3);
1075 					colorblock.tobytes(&out[12]);
1076 				}
1077 				break;
1078 			case DXT5:
1079 				{
1080 					bitarray<uint32_t, 16, 2> colorblock;
1081 					bitarray<uint64_t, 16, 3> alphablock;
1082 					switch(refine)
1083 					{
1084 						case REFINE_NEVER:
1085 							s2tc_dxt1_encode_color_refine_never<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1086 							s2tc_dxt5_encode_alpha_refine_never(alphablock, rgba, iw, w, h, ca[0], ca[1]);
1087 							break;
1088 						case REFINE_ALWAYS:
1089 							s2tc_dxt1_encode_color_refine_always<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1090 							s2tc_dxt5_encode_alpha_refine_always(alphablock, rgba, iw, w, h, ca[0], ca[1]);
1091 							break;
1092 						case REFINE_LOOP:
1093 							s2tc_dxt1_encode_color_refine_loop<ColorDist, false>(colorblock, rgba, iw, w, h, c[0], c[1]);
1094 							s2tc_dxt5_encode_alpha_refine_loop(alphablock, rgba, iw, w, h, ca[0], ca[1]);
1095 							break;
1096 					}
1097 					out[0] = ca[0];
1098 					out[1] = ca[1];
1099 					alphablock.tobytes(&out[2]);
1100 					out[8] = ((c[0].g & 0x07) << 5) | c[0].b;
1101 					out[9] = (c[0].r << 3) | (c[0].g >> 3);
1102 					out[10] = ((c[1].g & 0x07) << 5) | c[1].b;
1103 					out[11] = (c[1].r << 3) | (c[1].g >> 3);
1104 					colorblock.tobytes(&out[12]);
1105 				}
1106 				break;
1107 		}
1108 	}
1109 
1110 	// compile time dispatch magic
1111 	template<DxtMode dxt, ColorDistFunc ColorDist, CompressionMode mode>
s2tc_encode_block_func(RefinementMode refine)1112 	inline s2tc_encode_block_func_t s2tc_encode_block_func(RefinementMode refine)
1113 	{
1114 		switch(refine)
1115 		{
1116 			case REFINE_NEVER:
1117 				return s2tc_encode_block<dxt, ColorDist, mode, REFINE_NEVER>;
1118 			case REFINE_LOOP:
1119 				return s2tc_encode_block<dxt, ColorDist, mode, REFINE_LOOP>;
1120 			default:
1121 			case REFINE_ALWAYS:
1122 				return s2tc_encode_block<dxt, ColorDist, mode, REFINE_ALWAYS>;
1123 		}
1124 	}
1125 
1126 	// these color dist functions do not need the refinement check, as they always improve the situation
1127 	template<ColorDistFunc ColorDist> struct supports_fast
1128 	{
1129 		static const bool value = true;
1130 	};
1131 	template<> struct supports_fast<color_dist_normalmap>
1132 	{
1133 		static const bool value = false;
1134 	};
1135 
1136 	template<DxtMode dxt, ColorDistFunc ColorDist>
s2tc_encode_block_func(int nrandom,RefinementMode refine)1137 	inline s2tc_encode_block_func_t s2tc_encode_block_func(int nrandom, RefinementMode refine)
1138 	{
1139 		if(!supports_fast<ColorDist>::value || nrandom >= 0)
1140 			return s2tc_encode_block_func<dxt, ColorDist, MODE_NORMAL>(refine);
1141 		else
1142 			return s2tc_encode_block_func<dxt, ColorDist, MODE_FAST>(refine);
1143 	}
1144 
1145 	template<ColorDistFunc ColorDist>
s2tc_encode_block_func(DxtMode dxt,int nrandom,RefinementMode refine)1146 	inline s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, int nrandom, RefinementMode refine)
1147 	{
1148 		switch(dxt)
1149 		{
1150 			case DXT1:
1151 				return s2tc_encode_block_func<DXT1, ColorDist>(nrandom, refine);
1152 				break;
1153 			case DXT3:
1154 				return s2tc_encode_block_func<DXT3, ColorDist>(nrandom, refine);
1155 				break;
1156 			default:
1157 			case DXT5:
1158 				return s2tc_encode_block_func<DXT5, ColorDist>(nrandom, refine);
1159 				break;
1160 		}
1161 	}
1162 };
1163 
s2tc_encode_block_func(DxtMode dxt,ColorDistMode cd,int nrandom,RefinementMode refine)1164 s2tc_encode_block_func_t s2tc_encode_block_func(DxtMode dxt, ColorDistMode cd, int nrandom, RefinementMode refine)
1165 {
1166 	switch(cd)
1167 	{
1168 		case RGB:
1169 			return s2tc_encode_block_func<color_dist_rgb>(dxt, nrandom, refine);
1170 			break;
1171 		case YUV:
1172 			return s2tc_encode_block_func<color_dist_yuv>(dxt, nrandom, refine);
1173 			break;
1174 		case SRGB:
1175 			return s2tc_encode_block_func<color_dist_srgb>(dxt, nrandom, refine);
1176 			break;
1177 		case SRGB_MIXED:
1178 			return s2tc_encode_block_func<color_dist_srgb_mixed>(dxt, nrandom, refine);
1179 			break;
1180 		case AVG:
1181 			return s2tc_encode_block_func<color_dist_avg>(dxt, nrandom, refine);
1182 			break;
1183 		default:
1184 		case WAVG:
1185 			return s2tc_encode_block_func<color_dist_wavg>(dxt, nrandom, refine);
1186 			break;
1187 		case W0AVG:
1188 			return s2tc_encode_block_func<color_dist_w0avg>(dxt, nrandom, refine);
1189 			break;
1190 		case NORMALMAP:
1191 			return s2tc_encode_block_func<color_dist_normalmap>(dxt, nrandom, refine);
1192 			break;
1193 	}
1194 }
1195 
1196 namespace
1197 {
diffuse(int * diff,int src,int shift)1198 	inline int diffuse(int *diff, int src, int shift)
1199 	{
1200 		const int maxval = (1 << (8 - shift)) - 1;
1201 		src += *diff;
1202 		int ret = max(0, min(src >> shift, maxval));
1203 		// simulate decoding ("loop filter")
1204 		int loop = (ret << shift) | (ret >> (8 - 2 * shift));
1205 		*diff = src - loop;
1206 		return ret;
1207 	}
diffuse1(int * diff,int src)1208 	inline int diffuse1(int *diff, int src)
1209 	{
1210 		src += *diff;
1211 		int ret = (src >= 128);
1212 		// simulate decoding ("loop filter")
1213 		int loop = ret ? 255 : 0;
1214 		*diff = src - loop;
1215 		return ret;
1216 	}
1217 
floyd(int * thisrow,int * downrow,int src,int shift)1218 	inline int floyd(int *thisrow, int *downrow, int src, int shift)
1219 	{
1220 		const int maxval = (1 << (8 - shift)) - 1;
1221 		src = (src << 4) | (src >> 4);
1222 		src += thisrow[1];
1223 		int ret = max(0, min(src >> (shift + 4), maxval));
1224 		// simulate decoding ("loop filter")
1225 		int loop = (ret * 4095 / maxval);
1226 		int err = src - loop;
1227 		int e7 = (err * 7 + 8) / 16;
1228 		err -= e7;
1229 		int e3 = (err * 3 + 4) / 9;
1230 		err -= e3;
1231 		int e5 = (err * 5 + 3) / 6;
1232 		err -= e5;
1233 		int e1 = err;
1234 		thisrow[2] += e7;
1235 		downrow[0] += e3;
1236 		downrow[1] += e5;
1237 		downrow[2] += e1;
1238 		return ret;
1239 	}
1240 
floyd1(int * thisrow,int * downrow,int src)1241 	inline int floyd1(int *thisrow, int *downrow, int src)
1242 	{
1243 		src = (src << 4) | (src >> 4);
1244 		src += thisrow[1];
1245 		int ret = (src >= 2048);
1246 		// simulate decoding ("loop filter")
1247 		int loop = ret ? 4095 : 0;
1248 		int err = src - loop;
1249 		int e7 = (err * 7 + 8) / 16;
1250 		err -= e7;
1251 		int e3 = (err * 3 + 4) / 9;
1252 		err -= e3;
1253 		int e5 = (err * 5 + 3) / 6;
1254 		err -= e5;
1255 		int e1 = err;
1256 		thisrow[2] += e7;
1257 		downrow[0] += e3;
1258 		downrow[1] += e5;
1259 		downrow[2] += e1;
1260 		return ret;
1261 	}
1262 
1263 	template<int srccomps, int alphabits, DitherMode dither>
rgb565_image(unsigned char * out,const unsigned char * rgba,int w,int h)1264 	inline void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h)
1265 	{
1266 		int x, y;
1267 		switch(dither)
1268 		{
1269 			case DITHER_NONE:
1270 				{
1271 					for(y = 0; y < h; ++y)
1272 						for(x = 0; x < w; ++x)
1273 						{
1274 							out[(x + y * w) * 4 + 0] = rgba[(x + y * w) * srccomps + 0] >> 3;
1275 							out[(x + y * w) * 4 + 1] = rgba[(x + y * w) * srccomps + 1] >> 2;
1276 							out[(x + y * w) * 4 + 2] = rgba[(x + y * w) * srccomps + 2] >> 3;
1277 						}
1278 					if(srccomps == 4)
1279 					{
1280 						if(alphabits == 1)
1281 						{
1282 							for(y = 0; y < h; ++y)
1283 								for(x = 0; x < w; ++x)
1284 									out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3] >> 7;
1285 						}
1286 						else if(alphabits == 8)
1287 						{
1288 							for(y = 0; y < h; ++y)
1289 								for(x = 0; x < w; ++x)
1290 									out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1291 						}
1292 						else
1293 						{
1294 							for(y = 0; y < h; ++y)
1295 								for(x = 0; x < w; ++x)
1296 									out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3] >> (8 - alphabits);
1297 						}
1298 					}
1299 					else
1300 					{
1301 						for(y = 0; y < h; ++y)
1302 							for(x = 0; x < w; ++x)
1303 								out[(x + y * w) * 4 + 3] = (1 << alphabits) - 1;
1304 					}
1305 				}
1306 				break;
1307 			case DITHER_SIMPLE:
1308 				{
1309 					int x, y;
1310 					int diffuse_r = 0;
1311 					int diffuse_g = 0;
1312 					int diffuse_b = 0;
1313 					int diffuse_a = 0;
1314 					for(y = 0; y < h; ++y)
1315 						for(x = 0; x < w; ++x)
1316 						{
1317 							out[(x + y * w) * 4 + 0] = diffuse(&diffuse_r, rgba[(x + y * w) * srccomps + 0], 3);
1318 							out[(x + y * w) * 4 + 1] = diffuse(&diffuse_g, rgba[(x + y * w) * srccomps + 1], 2);
1319 							out[(x + y * w) * 4 + 2] = diffuse(&diffuse_b, rgba[(x + y * w) * srccomps + 2], 3);
1320 						}
1321 					if(srccomps == 4)
1322 					{
1323 						if(alphabits == 1)
1324 						{
1325 							for(y = 0; y < h; ++y)
1326 								for(x = 0; x < w; ++x)
1327 									out[(x + y * w) * 4 + 3] = diffuse1(&diffuse_a, rgba[(x + y * w) * srccomps + 3]);
1328 						}
1329 						else if(alphabits == 8)
1330 						{
1331 							for(y = 0; y < h; ++y)
1332 								for(x = 0; x < w; ++x)
1333 									out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1334 						}
1335 						else
1336 						{
1337 							for(y = 0; y < h; ++y)
1338 								for(x = 0; x < w; ++x)
1339 									out[(x + y * w) * 4 + 3] = diffuse(&diffuse_a, rgba[(x + y * w) * srccomps + 3], 8 - alphabits);
1340 						}
1341 					}
1342 					else
1343 					{
1344 						for(y = 0; y < h; ++y)
1345 							for(x = 0; x < w; ++x)
1346 								out[(x + y * w) * 4 + 3] = (1 << alphabits) - 1;
1347 					}
1348 				}
1349 				break;
1350 			case DITHER_FLOYDSTEINBERG:
1351 				{
1352 					int x, y;
1353 					int pw = w+2;
1354 					int downrow[6*pw];
1355 					memset(downrow, 0, sizeof(downrow));
1356 					int *thisrow_r, *thisrow_g, *thisrow_b, *thisrow_a;
1357 					int *downrow_r, *downrow_g, *downrow_b, *downrow_a;
1358 					for(y = 0; y < h; ++y)
1359 					{
1360 						thisrow_r = downrow + ((y&1)?3:0) * pw;
1361 						downrow_r = downrow + ((y&1)?0:3) * pw;
1362 						memset(downrow_r, 0, sizeof(*downrow_r) * (3*pw));
1363 						thisrow_g = thisrow_r + pw;
1364 						thisrow_b = thisrow_g + pw;
1365 						downrow_g = downrow_r + pw;
1366 						downrow_b = downrow_g + pw;
1367 						for(x = 0; x < w; ++x)
1368 						{
1369 							out[(x + y * w) * 4 + 0] = floyd(&thisrow_r[x], &downrow_r[x], rgba[(x + y * w) * srccomps + 0], 3);
1370 							out[(x + y * w) * 4 + 1] = floyd(&thisrow_g[x], &downrow_g[x], rgba[(x + y * w) * srccomps + 1], 2);
1371 							out[(x + y * w) * 4 + 2] = floyd(&thisrow_b[x], &downrow_b[x], rgba[(x + y * w) * srccomps + 2], 3);
1372 						}
1373 					}
1374 					if(srccomps == 4)
1375 					{
1376 						if(alphabits == 1)
1377 						{
1378 							for(y = 0; y < h; ++y)
1379 							{
1380 								thisrow_a = downrow + (y&1) * pw;
1381 								downrow_a = downrow + !(y&1) * pw;
1382 								memset(downrow_a, 0, sizeof(*downrow_a) * pw);
1383 								for(x = 0; x < w; ++x)
1384 									out[(x + y * w) * 4 + 3] = floyd1(&thisrow_a[x], &downrow_a[x], rgba[(x + y * w) * srccomps + 3]);
1385 							}
1386 						}
1387 						else if(alphabits == 8)
1388 						{
1389 							for(y = 0; y < h; ++y)
1390 								for(x = 0; x < w; ++x)
1391 									out[(x + y * w) * 4 + 3] = rgba[(x + y * w) * srccomps + 3]; // no conversion
1392 						}
1393 						else
1394 						{
1395 							for(y = 0; y < h; ++y)
1396 							{
1397 								thisrow_a = downrow + (y&1) * pw;
1398 								downrow_a = downrow + !(y&1) * pw;
1399 								memset(downrow_a, 0, sizeof(*downrow_a) * pw);
1400 								for(x = 0; x < w; ++x)
1401 									out[(x + y * w) * 4 + 3] = floyd(&thisrow_a[x], &downrow_a[x], rgba[(x + y * w) * srccomps + 3], 8 - alphabits);
1402 							}
1403 						}
1404 					}
1405 					else
1406 					{
1407 						for(y = 0; y < h; ++y)
1408 							for(x = 0; x < w; ++x)
1409 								out[(x + y * w) * 4 + 3] = (1 << alphabits) - 1;
1410 					}
1411 				}
1412 				break;
1413 		}
1414 	}
1415 
1416 	template<int srccomps, int alphabits>
rgb565_image(unsigned char * out,const unsigned char * rgba,int w,int h,DitherMode dither)1417 	inline void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, DitherMode dither)
1418 	{
1419 		switch(dither)
1420 		{
1421 			case DITHER_NONE:
1422 				rgb565_image<srccomps, alphabits, DITHER_NONE>(out, rgba, w, h);
1423 				break;
1424 			default:
1425 			case DITHER_SIMPLE:
1426 				rgb565_image<srccomps, alphabits, DITHER_SIMPLE>(out, rgba, w, h);
1427 				break;
1428 			case DITHER_FLOYDSTEINBERG:
1429 				rgb565_image<srccomps, alphabits, DITHER_FLOYDSTEINBERG>(out, rgba, w, h);
1430 				break;
1431 		}
1432 	}
1433 
1434 	template<int srccomps>
rgb565_image(unsigned char * out,const unsigned char * rgba,int w,int h,int alphabits,DitherMode dither)1435 	inline void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, int alphabits, DitherMode dither)
1436 	{
1437 		switch(alphabits)
1438 		{
1439 			case 1:
1440 				rgb565_image<srccomps, 1>(out, rgba, w, h, dither);
1441 				break;
1442 			case 4:
1443 				rgb565_image<srccomps, 4>(out, rgba, w, h, dither);
1444 				break;
1445 			default:
1446 			case 8:
1447 				rgb565_image<srccomps, 8>(out, rgba, w, h, dither);
1448 				break;
1449 		}
1450 	}
1451 };
1452 
rgb565_image(unsigned char * out,const unsigned char * rgba,int w,int h,int srccomps,int alphabits,DitherMode dither)1453 void rgb565_image(unsigned char *out, const unsigned char *rgba, int w, int h, int srccomps, int alphabits, DitherMode dither)
1454 {
1455 	switch(srccomps)
1456 	{
1457 		case 3:
1458 			rgb565_image<3>(out, rgba, w, h, alphabits, dither);
1459 			break;
1460 		case 4:
1461 		default:
1462 			rgb565_image<4>(out, rgba, w, h, alphabits, dither);
1463 			break;
1464 	}
1465 }
1466