1 /*----------------------------------------------------------------------------*/
2 /**
3  *	This confidential and proprietary software may be used only as
4  *	authorised by a licensing agreement from ARM Limited
5  *	(C) COPYRIGHT 2011-2012 ARM Limited
6  *	ALL RIGHTS RESERVED
7  *
8  *	The entire notice above must be reproduced on all authorised
9  *	copies and copies may only be made to the extent permitted
10  *	by a licensing agreement from ARM Limited.
11  *
12  *	@brief	Functions to pick the best ASTC endpoint format for a given block.
13  */
14 /*----------------------------------------------------------------------------*/
15 #include "astc_codec_internals.h"
16 
17 #ifdef DEBUG_PRINT_DIAGNOSTICS
18 	#include <stdio.h>
19 #endif
20 
21 #include <math.h>
22 
23 /*
24    functions to determine, for a given partitioning, which color endpoint formats are the best to use.
25 
26  */
27 
28 
29 // for a given partition, compute for every (integer-component-count, quantization-level)
30 // the color error.
31 
32 
compute_color_error_for_every_integer_count_and_quantization_level(int encode_hdr_rgb,int encode_hdr_alpha,int partition_index,const partition_info * pi,const encoding_choice_errors * eci,const endpoints * ep,float4 error_weightings[4],float best_error[21][4],int format_of_choice[21][4])33 static void compute_color_error_for_every_integer_count_and_quantization_level(int encode_hdr_rgb,	// 1 = perform HDR encoding, 0 = perform LDR encoding.
34 																			   int encode_hdr_alpha, int partition_index, const partition_info * pi,
35 																				const encoding_choice_errors * eci,	// pointer to the structure for the CURRENT partition.
36 																			   const endpoints * ep, float4 error_weightings[4],
37 																			   // arrays to return results back through.
38 																			   float best_error[21][4], int format_of_choice[21][4])
39 {
40 	int i, j;
41 	int partition_size = pi->texels_per_partition[partition_index];
42 
43 	static const float baseline_quant_error[21] = {
44 		(65536.0f * 65536.0f / 18.0f),				// 2 values, 1 step
45 		(65536.0f * 65536.0f / 18.0f) / (2 * 2),	// 3 values, 2 steps
46 		(65536.0f * 65536.0f / 18.0f) / (3 * 3),	// 4 values, 3 steps
47 		(65536.0f * 65536.0f / 18.0f) / (4 * 4),	// 5 values
48 		(65536.0f * 65536.0f / 18.0f) / (5 * 5),
49 		(65536.0f * 65536.0f / 18.0f) / (7 * 7),
50 		(65536.0f * 65536.0f / 18.0f) / (9 * 9),
51 		(65536.0f * 65536.0f / 18.0f) / (11 * 11),
52 		(65536.0f * 65536.0f / 18.0f) / (15 * 15),
53 		(65536.0f * 65536.0f / 18.0f) / (19 * 19),
54 		(65536.0f * 65536.0f / 18.0f) / (23 * 23),
55 		(65536.0f * 65536.0f / 18.0f) / (31 * 31),
56 		(65536.0f * 65536.0f / 18.0f) / (39 * 39),
57 		(65536.0f * 65536.0f / 18.0f) / (47 * 47),
58 		(65536.0f * 65536.0f / 18.0f) / (63 * 63),
59 		(65536.0f * 65536.0f / 18.0f) / (79 * 79),
60 		(65536.0f * 65536.0f / 18.0f) / (95 * 95),
61 		(65536.0f * 65536.0f / 18.0f) / (127 * 127),
62 		(65536.0f * 65536.0f / 18.0f) / (159 * 159),
63 		(65536.0f * 65536.0f / 18.0f) / (191 * 191),
64 		(65536.0f * 65536.0f / 18.0f) / (255 * 255)
65 	};
66 
67 	float4 ep0 = ep->endpt0[partition_index];
68 	float4 ep1 = ep->endpt1[partition_index];
69 
70 	float ep0_max = MAX(MAX(ep0.x, ep0.y), ep0.z);
71 	float ep0_min = MIN(MIN(ep0.x, ep0.y), ep0.z);
72 	float ep1_max = MAX(MAX(ep1.x, ep1.y), ep1.z);
73 	float ep1_min = MIN(MIN(ep1.x, ep1.y), ep1.z);
74 
75 	ep0_min = MAX(ep0_min, 0.0f);
76 	ep1_min = MAX(ep1_min, 0.0f);
77 	ep0_max = MAX(ep0_max, 1e-10f);
78 	ep1_max = MAX(ep1_max, 1e-10f);
79 
80 	float4 error_weight = error_weightings[partition_index];
81 
82 	float error_weight_rgbsum = error_weight.x + error_weight.y + error_weight.z;
83 
84 	float range_upper_limit_rgb = encode_hdr_rgb ? 61440.0f : 65535.0f;
85 	float range_upper_limit_alpha = encode_hdr_alpha ? 61440.0f : 65535.0f;
86 
87 	// it is possible to get endpoint colors significantly outside [0,upper-limit]
88 	// even if the input data are safely contained in [0,upper-limit];
89 	// we need to add an error term for this situation,
90 	float4 ep0_range_error_high;
91 	float4 ep1_range_error_high;
92 	float4 ep0_range_error_low;
93 	float4 ep1_range_error_low;
94 
95 	ep0_range_error_high.x = MAX(0.0f, ep0.x - range_upper_limit_rgb);
96 	ep0_range_error_high.y = MAX(0.0f, ep0.y - range_upper_limit_rgb);
97 	ep0_range_error_high.z = MAX(0.0f, ep0.z - range_upper_limit_rgb);
98 	ep0_range_error_high.w = MAX(0.0f, ep0.w - range_upper_limit_alpha);
99 	ep1_range_error_high.x = MAX(0.0f, ep1.x - range_upper_limit_rgb);
100 	ep1_range_error_high.y = MAX(0.0f, ep1.y - range_upper_limit_rgb);
101 	ep1_range_error_high.z = MAX(0.0f, ep1.z - range_upper_limit_rgb);
102 	ep1_range_error_high.w = MAX(0.0f, ep1.w - range_upper_limit_alpha);
103 
104 	ep0_range_error_low.x = MIN(0.0f, ep0.x);
105 	ep0_range_error_low.y = MIN(0.0f, ep0.y);
106 	ep0_range_error_low.z = MIN(0.0f, ep0.z);
107 	ep0_range_error_low.w = MIN(0.0f, ep0.w);
108 	ep1_range_error_low.x = MIN(0.0f, ep1.x);
109 	ep1_range_error_low.y = MIN(0.0f, ep1.y);
110 	ep1_range_error_low.z = MIN(0.0f, ep1.z);
111 	ep1_range_error_low.w = MIN(0.0f, ep1.w);
112 
113 	float4 sum_range_error =
114 		(ep0_range_error_low * ep0_range_error_low) + (ep1_range_error_low * ep1_range_error_low) + (ep0_range_error_high * ep0_range_error_high) + (ep1_range_error_high * ep1_range_error_high);
115 	float rgb_range_error = dot(sum_range_error.xyz, error_weight.xyz) * 0.5f * partition_size;
116 	float alpha_range_error = sum_range_error.w * error_weight.w * 0.5f * partition_size;
117 
118 
119 	#ifdef DEBUG_PRINT_DIAGNOSTICS
120 		if (print_diagnostics)
121 		{
122 			printf("%s : partition=%d\nrgb-error_wt=%f  alpha_error_wt=%f\n", __func__, partition_index, error_weight_rgbsum, error_weight.w);
123 
124 			printf("ep0 = %f %f %f %f\n", ep0.x, ep0.y, ep0.z, ep0.w);
125 			printf("ep1 = %f %f %f %f\n", ep1.x, ep1.y, ep1.z, ep1.w);
126 
127 
128 			printf("rgb_range_error = %f, alpha_range_error = %f\n", rgb_range_error, alpha_range_error);
129 
130 			printf("rgb-luma-error: %f\n", eci->rgb_luma_error);
131 		}
132 	#endif
133 
134 	if (encode_hdr_rgb)
135 	{
136 
137 		// collect some statistics
138 		float af, cf;
139 		if (ep1.x > ep1.y && ep1.x > ep1.z)
140 		{
141 			af = ep1.x;
142 			cf = ep1.x - ep0.x;
143 		}
144 		else if (ep1.y > ep1.z)
145 		{
146 			af = ep1.y;
147 			cf = ep1.y - ep0.y;
148 		}
149 		else
150 		{
151 			af = ep1.z;
152 			cf = ep1.z - ep0.z;
153 		}
154 
155 		float bf = af - ep1_min;	// estimate of color-component spread in high endpoint color
156 		float3 prd = ep1.xyz - float3(cf, cf, cf);
157 		float3 pdif = prd - ep0.xyz;
158 		// estimate of color-component spread in low endpoint color
159 		float df = MAX(MAX(fabs(pdif.x), fabs(pdif.y)), fabs(pdif.z));
160 
161 		int b = (int)bf;
162 		int c = (int)cf;
163 		int d = (int)df;
164 
165 
166 		// determine which one of the 6 submodes is likely to be used in
167 		// case of an RGBO-mode
168 		int rgbo_mode = 5;		// 7 bits per component
169 		// mode 4: 8 7 6
170 		if (b < 32768 && c < 16384)
171 			rgbo_mode = 4;
172 		// mode 3: 9 6 7
173 		if (b < 8192 && c < 16384)
174 			rgbo_mode = 3;
175 		// mode 2: 10 5 8
176 		if (b < 2048 && c < 16384)
177 			rgbo_mode = 2;
178 		// mode 1: 11 6 5
179 		if (b < 2048 && c < 1024)
180 			rgbo_mode = 1;
181 		// mode 0: 11 5 7
182 		if (b < 1024 && c < 4096)
183 			rgbo_mode = 0;
184 
185 		// determine which one of the 9 submodes is likely to be used in
186 		// case of an RGB-mode.
187 		int rgb_mode = 8;		// 8 bits per component, except 7 bits for blue
188 
189 		// mode 0: 9 7 6 7
190 		if (b < 16384 && c < 8192 && d < 8192)
191 			rgb_mode = 0;
192 		// mode 1: 9 8 6 6
193 		if (b < 32768 && c < 8192 && d < 4096)
194 			rgb_mode = 1;
195 		// mode 2: 10 6 7 7
196 		if (b < 4096 && c < 8192 && d < 4096)
197 			rgb_mode = 2;
198 		// mode 3: 10 7 7 6
199 		if (b < 8192 && c < 8192 && d < 2048)
200 			rgb_mode = 3;
201 		// mode 4: 11 8 6 5
202 		if (b < 8192 && c < 2048 && d < 512)
203 			rgb_mode = 4;
204 		// mode 5: 11 6 8 6
205 		if (b < 2048 && c < 8192 && d < 1024)
206 			rgb_mode = 5;
207 		// mode 6: 12 7 7 5
208 		if (b < 2048 && c < 2048 && d < 256)
209 			rgb_mode = 6;
210 		// mode 7: 12 6 7 6
211 		if (b < 1024 && c < 2048 && d < 512)
212 			rgb_mode = 7;
213 
214 
215 		static const float rgbo_error_scales[6] = { 4.0f, 4.0f, 16.0f, 64.0f, 256.0f, 1024.0f };
216 		static const float rgb_error_scales[9] = { 64.0f, 64.0f, 16.0f, 16.0f, 4.0f, 4.0f, 1.0f, 1.0f, 384.0f };
217 
218 		float mode7mult = rgbo_error_scales[rgbo_mode] * 0.0015f;	// empirically determined ....
219 		float mode11mult = rgb_error_scales[rgb_mode] * 0.010f;	// empirically determined ....
220 
221 
222 		float lum_high = (ep1.x + ep1.y + ep1.z) * (1.0f / 3.0f);
223 		float lum_low = (ep0.x + ep0.y + ep0.z) * (1.0f / 3.0f);
224 		float lumdif = lum_high - lum_low;
225 		float mode23mult = lumdif < 960 ? 4.0f : lumdif < 3968 ? 16.0f : 128.0f;
226 
227 		mode23mult *= 0.0005f;	// empirically determined ....
228 
229 
230 
231 		// pick among the available HDR endpoint modes
232 		for (i = 0; i < 8; i++)
233 		{
234 			best_error[i][3] = 1e30f;
235 			format_of_choice[i][3] = encode_hdr_alpha ? FMT_HDR_RGBA : FMT_HDR_RGB_LDR_ALPHA;
236 			best_error[i][2] = 1e30f;
237 			format_of_choice[i][2] = FMT_HDR_RGB;
238 			best_error[i][1] = 1e30f;
239 			format_of_choice[i][1] = FMT_HDR_RGB_SCALE;
240 			best_error[i][0] = 1e30f;
241 			format_of_choice[i][0] = FMT_HDR_LUMINANCE_LARGE_RANGE;
242 		}
243 
244 
245 		for (i = 8; i < 21; i++)
246 		{
247 			// base_quant_error should depend on the scale-factor that would be used
248 			// during actual encode of the color value.
249 
250 			float base_quant_error = baseline_quant_error[i] * partition_size * 1.0f;
251 			float rgb_quantization_error = error_weight_rgbsum * base_quant_error * 2.0f;
252 			float alpha_quantization_error = error_weight.w * base_quant_error * 2.0f;
253 			float rgba_quantization_error = rgb_quantization_error + alpha_quantization_error;
254 
255 			#ifdef DEBUG_PRINT_DIAGNOSTICS
256 				if (print_diagnostics)
257 					printf("rgba-quant = %f can_offset_encode=%d\n", rgba_quantization_error, eci->can_offset_encode);
258 			#endif
259 
260 			// for 8 integers, we have two encodings: one with HDR alpha and another one
261 			// with LDR alpha.
262 
263 			float full_hdr_rgba_error = rgba_quantization_error + rgb_range_error + alpha_range_error;
264 			best_error[i][3] = full_hdr_rgba_error;
265 			format_of_choice[i][3] = encode_hdr_alpha ? FMT_HDR_RGBA : FMT_HDR_RGB_LDR_ALPHA;
266 
267 			// for 6 integers, we have one HDR-RGB encoding
268 			float full_hdr_rgb_error = (rgb_quantization_error * mode11mult) + rgb_range_error + eci->alpha_drop_error;
269 			best_error[i][2] = full_hdr_rgb_error;
270 			format_of_choice[i][2] = FMT_HDR_RGB;
271 
272 			// for 4 integers, we have one HDR-RGB-Scale encoding
273 			float hdr_rgb_scale_error = (rgb_quantization_error * mode7mult) + rgb_range_error + eci->alpha_drop_error + eci->rgb_luma_error;
274 
275 			best_error[i][1] = hdr_rgb_scale_error;
276 			format_of_choice[i][1] = FMT_HDR_RGB_SCALE;
277 
278 			// for 2 integers, we assume luminance-with-large-range
279 			float hdr_luminance_error = (rgb_quantization_error * mode23mult) + rgb_range_error + eci->alpha_drop_error + eci->luminance_error;
280 			best_error[i][0] = hdr_luminance_error;
281 			format_of_choice[i][0] = FMT_HDR_LUMINANCE_LARGE_RANGE;
282 
283 			#ifdef DEBUG_PRINT_DIAGNOSTICS
284 				if (print_diagnostics)
285 				{
286 					for (j = 0; j < 4; j++)
287 					{
288 						printf("(hdr) quant-level=%d ints=%d format=%d error=%f\n", i, j, format_of_choice[i][j], best_error[i][j]);
289 					}
290 				}
291 			#endif
292 		}
293 	}
294 
295 
296 	else
297 	{
298 		for (i = 0; i < 4; i++)
299 		{
300 			best_error[i][3] = 1e30f;
301 			best_error[i][2] = 1e30f;
302 			best_error[i][1] = 1e30f;
303 			best_error[i][0] = 1e30f;
304 
305 			format_of_choice[i][3] = FMT_RGBA;
306 			format_of_choice[i][2] = FMT_RGB;
307 			format_of_choice[i][1] = FMT_RGB_SCALE;
308 			format_of_choice[i][0] = FMT_LUMINANCE;
309 		}
310 
311 
312 		// pick among the available LDR endpoint modes
313 		for (i = 4; i < 21; i++)
314 		{
315 			float base_quant_error = baseline_quant_error[i] * partition_size * 1.0f;
316 			float rgb_quantization_error = error_weight_rgbsum * base_quant_error;
317 			float alpha_quantization_error = error_weight.w * base_quant_error;
318 			float rgba_quantization_error = rgb_quantization_error + alpha_quantization_error;
319 
320 			#ifdef DEBUG_PRINT_DIAGNOSTICS
321 				if (print_diagnostics)
322 					printf("rgba-quant = %f can_offset_encode=%d\n", rgba_quantization_error, eci->can_offset_encode);
323 			#endif
324 
325 			// for 8 integers, the available encodings are:
326 			// full LDR RGB-Alpha
327 			float full_ldr_rgba_error = rgba_quantization_error;
328 			if (eci->can_blue_contract)
329 				full_ldr_rgba_error *= 0.625f;
330 			if (eci->can_offset_encode && i <= 18)
331 				full_ldr_rgba_error *= 0.5f;
332 			full_ldr_rgba_error += rgb_range_error + alpha_range_error;
333 
334 			best_error[i][3] = full_ldr_rgba_error;
335 			format_of_choice[i][3] = FMT_RGBA;
336 
337 			// for 6 integers, we have:
338 			// - an LDR-RGB encoding
339 			// - an RGBS + Alpha encoding (LDR)
340 
341 			float full_ldr_rgb_error = rgb_quantization_error;
342 			if (eci->can_blue_contract)
343 				full_ldr_rgb_error *= 0.5f;
344 			if (eci->can_offset_encode && i <= 18)
345 				full_ldr_rgb_error *= 0.25f;
346 			full_ldr_rgb_error += eci->alpha_drop_error + rgb_range_error;
347 
348 			float rgbs_alpha_error = rgba_quantization_error + eci->rgb_scale_error + rgb_range_error + alpha_range_error;
349 
350 			if (rgbs_alpha_error < full_ldr_rgb_error)
351 			{
352 				best_error[i][2] = rgbs_alpha_error;
353 				format_of_choice[i][2] = FMT_RGB_SCALE_ALPHA;
354 			}
355 			else
356 			{
357 				best_error[i][2] = full_ldr_rgb_error;
358 				format_of_choice[i][2] = FMT_RGB;
359 			}
360 
361 
362 			// for 4 integers, we have a Luminance-Alpha encoding and the RGBS encoding
363 			float ldr_rgbs_error = rgb_quantization_error + eci->alpha_drop_error + eci->rgb_scale_error + rgb_range_error;
364 
365 			float lum_alpha_error = rgba_quantization_error + eci->luminance_error + rgb_range_error + alpha_range_error;
366 
367 			if (ldr_rgbs_error < lum_alpha_error)
368 			{
369 				best_error[i][1] = ldr_rgbs_error;
370 				format_of_choice[i][1] = FMT_RGB_SCALE;
371 			}
372 			else
373 			{
374 				best_error[i][1] = lum_alpha_error;
375 				format_of_choice[i][1] = FMT_LUMINANCE_ALPHA;
376 			}
377 
378 
379 			// for 2 integers, we have a Luminance-encoding and an Alpha-encoding.
380 			float luminance_error = rgb_quantization_error + eci->alpha_drop_error + eci->luminance_error + rgb_range_error;
381 
382 			best_error[i][0] = luminance_error;
383 			format_of_choice[i][0] = FMT_LUMINANCE;
384 
385 			#ifdef DEBUG_PRINT_DIAGNOSTICS
386 				if (print_diagnostics)
387 				{
388 					for (j = 0; j < 4; j++)
389 					{
390 						printf(" (ldr) quant-level=%d ints=%d format=%d error=%f\n", i, j, format_of_choice[i][j], best_error[i][j]);
391 					}
392 				}
393 			#endif
394 		}
395 	}
396 }
397 
398 
399 
400 // for 1 partition, find the best combination (one format + a quantization level) for a given bitcount
401 
one_partition_find_best_combination_for_bitcount(float combined_best_error[21][4],int formats_of_choice[21][4],int bits_available,int * best_quantization_level,int * best_formats,float * error_of_best_combination)402 static void one_partition_find_best_combination_for_bitcount(float combined_best_error[21][4],
403 															 int formats_of_choice[21][4], int bits_available, int *best_quantization_level, int *best_formats, float *error_of_best_combination)
404 {
405 	int i;
406 	int best_integer_count = -1;
407 	float best_integer_count_error = 1e20f;
408 	for (i = 0; i < 4; i++)
409 	{
410 		// compute the quantization level for a given number of integers and a given number of bits.
411 		int quantization_level = quantization_mode_table[i + 1][bits_available];
412 		if (quantization_level == -1)
413 			continue;			// used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
414 		if (combined_best_error[quantization_level][i] < best_integer_count_error)
415 		{
416 			best_integer_count_error = combined_best_error[quantization_level][i];
417 			best_integer_count = i;
418 		}
419 	}
420 
421 	int ql = quantization_mode_table[best_integer_count + 1][bits_available];
422 
423 	*best_quantization_level = ql;
424 	*error_of_best_combination = best_integer_count_error;
425 	if (ql >= 0)
426 		*best_formats = formats_of_choice[ql][best_integer_count];
427 	else
428 		*best_formats = FMT_LUMINANCE;
429 
430 }
431 
432 
433 
434 // for 2 partitions, find the best format combinations for every (quantization-mode, integer-count) combination
435 
two_partitions_find_best_combination_for_every_quantization_and_integer_count(float best_error[2][21][4],int format_of_choice[2][21][4],float combined_best_error[21][7],int formats_of_choice[21][7][2])436 static void two_partitions_find_best_combination_for_every_quantization_and_integer_count(float best_error[2][21][4],	// indexed by (partition, quant-level, integer-pair-count-minus-1)
437 																						  int format_of_choice[2][21][4],
438 																						  float combined_best_error[21][7],	// indexed by (quant-level, integer-pair-count-minus-2)
439 																						  int formats_of_choice[21][7][2])
440 {
441 	int i, j;
442 
443 	for (i = 0; i < 21; i++)
444 		for (j = 0; j < 7; j++)
445 			combined_best_error[i][j] = 1e30f;
446 
447 	int quant;
448 	for (quant = 5; quant < 21; quant++)
449 	{
450 		for (i = 0; i < 4; i++)	// integer-count for first endpoint-pair
451 		{
452 			for (j = 0; j < 4; j++)	// integer-count for second endpoint-pair
453 			{
454 				int low2 = MIN(i, j);
455 				int high2 = MAX(i, j);
456 				if ((high2 - low2) > 1)
457 					continue;
458 
459 				int intcnt = i + j;
460 				float errorterm = MIN(best_error[0][quant][i] + best_error[1][quant][j], 1e10f);
461 				if (errorterm <= combined_best_error[quant][intcnt])
462 				{
463 					combined_best_error[quant][intcnt] = errorterm;
464 					formats_of_choice[quant][intcnt][0] = format_of_choice[0][quant][i];
465 					formats_of_choice[quant][intcnt][1] = format_of_choice[1][quant][j];
466 				}
467 			}
468 		}
469 	}
470 }
471 
472 
473 // for 2 partitions, find the best combination (two formats + a quantization level) for a given bitcount
474 
two_partitions_find_best_combination_for_bitcount(float combined_best_error[21][7],int formats_of_choice[21][7][2],int bits_available,int * best_quantization_level,int * best_quantization_level_mod,int * best_formats,float * error_of_best_combination)475 static void two_partitions_find_best_combination_for_bitcount(float combined_best_error[21][7],
476 															  int formats_of_choice[21][7][2],
477 															  int bits_available, int *best_quantization_level, int *best_quantization_level_mod, int *best_formats, float *error_of_best_combination)
478 {
479 	int i;
480 
481 	int best_integer_count = 0;
482 	float best_integer_count_error = 1e20f;
483 	int integer_count;
484 
485 	for (integer_count = 2; integer_count <= 8; integer_count++)
486 	{
487 		// compute the quantization level for a given number of integers and a given number of bits.
488 		int quantization_level = quantization_mode_table[integer_count][bits_available];
489 		if (quantization_level == -1)
490 			break;				// used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
491 		float integer_count_error = combined_best_error[quantization_level][integer_count - 2];
492 		if (integer_count_error < best_integer_count_error)
493 		{
494 			best_integer_count_error = integer_count_error;
495 			best_integer_count = integer_count;
496 		}
497 	}
498 
499 	int ql = quantization_mode_table[best_integer_count][bits_available];
500 	int ql_mod = quantization_mode_table[best_integer_count][bits_available + 2];
501 
502 	*best_quantization_level = ql;
503 	*best_quantization_level_mod = ql_mod;
504 	*error_of_best_combination = best_integer_count_error;
505 	if (ql >= 0)
506 	{
507 		for (i = 0; i < 2; i++)
508 			best_formats[i] = formats_of_choice[ql][best_integer_count - 2][i];
509 	}
510 	else
511 	{
512 		for (i = 0; i < 2; i++)
513 			best_formats[i] = FMT_LUMINANCE;
514 	}
515 }
516 
517 
518 
519 
520 // for 3 partitions, find the best format combinations for every (quantization-mode, integer-count) combination
521 
three_partitions_find_best_combination_for_every_quantization_and_integer_count(float best_error[3][21][4],int format_of_choice[3][21][4],float combined_best_error[21][10],int formats_of_choice[21][10][3])522 static void three_partitions_find_best_combination_for_every_quantization_and_integer_count(float best_error[3][21][4],	// indexed by (partition, quant-level, integer-count)
523 																							int format_of_choice[3][21][4], float combined_best_error[21][10], int formats_of_choice[21][10][3])
524 {
525 	int i, j, k;
526 
527 	for (i = 0; i < 21; i++)
528 		for (j = 0; j < 10; j++)
529 			combined_best_error[i][j] = 1e30f;
530 
531 	int quant;
532 	for (quant = 5; quant < 21; quant++)
533 	{
534 		for (i = 0; i < 4; i++)	// integer-count for first endpoint-pair
535 		{
536 			for (j = 0; j < 4; j++)	// integer-count for second endpoint-pair
537 			{
538 				int low2 = MIN(i, j);
539 				int high2 = MAX(i, j);
540 				if ((high2 - low2) > 1)
541 					continue;
542 				for (k = 0; k < 4; k++)	// integer-count for third endpoint-pair
543 				{
544 					int low3 = MIN(k, low2);
545 					int high3 = MAX(k, high2);
546 					if ((high3 - low3) > 1)
547 						continue;
548 
549 					int intcnt = i + j + k;
550 					float errorterm = MIN(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k], 1e10f);
551 					if (errorterm <= combined_best_error[quant][intcnt])
552 					{
553 						combined_best_error[quant][intcnt] = errorterm;
554 						formats_of_choice[quant][intcnt][0] = format_of_choice[0][quant][i];
555 						formats_of_choice[quant][intcnt][1] = format_of_choice[1][quant][j];
556 						formats_of_choice[quant][intcnt][2] = format_of_choice[2][quant][k];
557 					}
558 				}
559 			}
560 		}
561 	}
562 }
563 
564 
565 // for 3 partitions, find the best combination (three formats + a quantization level) for a given bitcount
566 
three_partitions_find_best_combination_for_bitcount(float combined_best_error[21][10],int formats_of_choice[21][10][3],int bits_available,int * best_quantization_level,int * best_quantization_level_mod,int * best_formats,float * error_of_best_combination)567 static void three_partitions_find_best_combination_for_bitcount(float combined_best_error[21][10],
568 																int formats_of_choice[21][10][3],
569 																int bits_available, int *best_quantization_level, int *best_quantization_level_mod, int *best_formats, float *error_of_best_combination)
570 {
571 	int i;
572 
573 	int best_integer_count = 0;
574 	float best_integer_count_error = 1e20f;
575 	int integer_count;
576 
577 	for (integer_count = 3; integer_count <= 9; integer_count++)
578 	{
579 		// compute the quantization level for a given number of integers and a given number of bits.
580 		int quantization_level = quantization_mode_table[integer_count][bits_available];
581 		if (quantization_level == -1)
582 			break;				// used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
583 		float integer_count_error = combined_best_error[quantization_level][integer_count - 3];
584 		if (integer_count_error < best_integer_count_error)
585 		{
586 			best_integer_count_error = integer_count_error;
587 			best_integer_count = integer_count;
588 		}
589 	}
590 
591 	int ql = quantization_mode_table[best_integer_count][bits_available];
592 	int ql_mod = quantization_mode_table[best_integer_count][bits_available + 5];
593 
594 	*best_quantization_level = ql;
595 	*best_quantization_level_mod = ql_mod;
596 	*error_of_best_combination = best_integer_count_error;
597 	if (ql >= 0)
598 	{
599 		for (i = 0; i < 3; i++)
600 			best_formats[i] = formats_of_choice[ql][best_integer_count - 3][i];
601 	}
602 	else
603 	{
604 		for (i = 0; i < 3; i++)
605 			best_formats[i] = FMT_LUMINANCE;
606 	}
607 }
608 
609 
610 
611 
612 // for 4 partitions, find the best format combinations for every (quantization-mode, integer-count) combination
613 
four_partitions_find_best_combination_for_every_quantization_and_integer_count(float best_error[4][21][4],int format_of_choice[4][21][4],float combined_best_error[21][13],int formats_of_choice[21][13][4])614 static void four_partitions_find_best_combination_for_every_quantization_and_integer_count(float best_error[4][21][4],	// indexed by (partition, quant-level, integer-count)
615 																						   int format_of_choice[4][21][4], float combined_best_error[21][13], int formats_of_choice[21][13][4])
616 {
617 	int i, j, k, l;
618 
619 	for (i = 0; i < 21; i++)
620 		for (j = 0; j < 13; j++)
621 			combined_best_error[i][j] = 1e30f;
622 
623 	int quant;
624 	for (quant = 5; quant < 21; quant++)
625 	{
626 		for (i = 0; i < 4; i++)	// integer-count for first endpoint-pair
627 		{
628 			for (j = 0; j < 4; j++)	// integer-count for second endpoint-pair
629 			{
630 				int low2 = MIN(i, j);
631 				int high2 = MAX(i, j);
632 				if ((high2 - low2) > 1)
633 					continue;
634 				for (k = 0; k < 4; k++)	// integer-count for third endpoint-pair
635 				{
636 					int low3 = MIN(k, low2);
637 					int high3 = MAX(k, high2);
638 					if ((high3 - low3) > 1)
639 						continue;
640 					for (l = 0; l < 4; l++)	// integer-count for fourth endpoint-pair
641 					{
642 						int low4 = MIN(l, low3);
643 						int high4 = MAX(l, high3);
644 						if ((high4 - low4) > 1)
645 							continue;
646 
647 						int intcnt = i + j + k + l;
648 						float errorterm = MIN(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k] + best_error[3][quant][l], 1e10f);
649 						if (errorterm <= combined_best_error[quant][intcnt])
650 						{
651 							combined_best_error[quant][intcnt] = errorterm;
652 							formats_of_choice[quant][intcnt][0] = format_of_choice[0][quant][i];
653 							formats_of_choice[quant][intcnt][1] = format_of_choice[1][quant][j];
654 							formats_of_choice[quant][intcnt][2] = format_of_choice[2][quant][k];
655 							formats_of_choice[quant][intcnt][3] = format_of_choice[3][quant][l];
656 						}
657 					}
658 				}
659 			}
660 		}
661 	}
662 }
663 
664 
665 
666 
667 
668 
669 // for 4 partitions, find the best combination (four formats + a quantization level) for a given bitcount
670 
four_partitions_find_best_combination_for_bitcount(float combined_best_error[21][13],int formats_of_choice[21][13][4],int bits_available,int * best_quantization_level,int * best_quantization_level_mod,int * best_formats,float * error_of_best_combination)671 static void four_partitions_find_best_combination_for_bitcount(float combined_best_error[21][13],
672 															   int formats_of_choice[21][13][4],
673 															   int bits_available, int *best_quantization_level, int *best_quantization_level_mod, int *best_formats, float *error_of_best_combination)
674 {
675 	int i;
676 	int best_integer_count = 0;
677 	float best_integer_count_error = 1e20f;
678 	int integer_count;
679 
680 	for (integer_count = 4; integer_count <= 9; integer_count++)
681 	{
682 		// compute the quantization level for a given number of integers and a given number of bits.
683 		int quantization_level = quantization_mode_table[integer_count][bits_available];
684 		if (quantization_level == -1)
685 			break;				// used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
686 		float integer_count_error = combined_best_error[quantization_level][integer_count - 4];
687 		if (integer_count_error < best_integer_count_error)
688 		{
689 			best_integer_count_error = integer_count_error;
690 			best_integer_count = integer_count;
691 		}
692 	}
693 
694 	int ql = quantization_mode_table[best_integer_count][bits_available];
695 	int ql_mod = quantization_mode_table[best_integer_count][bits_available + 8];
696 
697 	*best_quantization_level = ql;
698 	*best_quantization_level_mod = ql_mod;
699 	*error_of_best_combination = best_integer_count_error;
700 	if (ql >= 0)
701 	{
702 		for (i = 0; i < 4; i++)
703 			best_formats[i] = formats_of_choice[ql][best_integer_count - 4][i];
704 	}
705 	else
706 	{
707 		for (i = 0; i < 4; i++)
708 			best_formats[i] = FMT_LUMINANCE;
709 	}
710 }
711 
712 
713 
714 /*
715 	The determine_optimal_set_of_endpoint_formats_to_use() function.
716 
717 	It identifies, for each mode, which set of color endpoint encodings
718 	produces the best overall result. It then reports back which 4 modes
719 	look best, along with the ideal color encoding combination for each.
720 
721 	It takes as input:
722 		a partitioning an imageblock,
723 		a set of color endpoints.
724 		for each mode, the number of bits available for color encoding and the error incurred by quantization.
725 		in case of 2 plane of weights, a specifier for which color component to use for the second plane of weights.
726 
727 	It delivers as output for each of the 4 selected modes:
728 		format specifier
729 		for each partition
730 			quantization level to use
731 			modified quantization level to use
732 		(when all format specifiers are equal)
733  */
734 
determine_optimal_set_of_endpoint_formats_to_use(int xdim,int ydim,int zdim,const partition_info * pt,const imageblock * blk,const error_weight_block * ewb,const endpoints * ep,int separate_component,const int * qwt_bitcounts,const float * qwt_errors,int partition_format_specifiers[4][4],int quantized_weight[4],int quantization_level[4],int quantization_level_mod[4])735 void determine_optimal_set_of_endpoint_formats_to_use(int xdim, int ydim, int zdim,
736 													  const partition_info * pt, const imageblock * blk, const error_weight_block * ewb,
737 													  const endpoints * ep,
738 													  int separate_component,	// separate color component for 2-plane mode; -1 for single-plane mode
739 													  // bitcounts and errors computed for the various quantization methods
740 													  const int *qwt_bitcounts, const float *qwt_errors,
741 													  // output data
742 													  int partition_format_specifiers[4][4], int quantized_weight[4],
743 													  int quantization_level[4], int quantization_level_mod[4])
744 {
745 	int i, j;
746 	int partition_count = pt->partition_count;
747 
748 	int encode_hdr_rgb = blk->rgb_lns[0];
749 	int encode_hdr_alpha = blk->alpha_lns[0];
750 
751 
752 	// call a helper function to compute the errors that result from various
753 	// encoding choices (such as using luminance instead of RGB, discarding Alpha,
754 	// using RGB-scale in place of two separate RGB endpoints and so on)
755 	encoding_choice_errors eci[4];
756 	compute_encoding_choice_errors(xdim, ydim, zdim, blk, pt, ewb, separate_component, eci);
757 
758 	// for each partition, compute the error weights to apply for that partition.
759 	float4 error_weightings[4];
760 	float4 dummied_color_scalefactors[4];	// only used to receive data
761 	compute_partition_error_color_weightings(xdim, ydim, zdim, ewb, pt, error_weightings, dummied_color_scalefactors);
762 
763 
764 	float best_error[4][21][4];
765 	int format_of_choice[4][21][4];
766 	for (i = 0; i < partition_count; i++)
767 		compute_color_error_for_every_integer_count_and_quantization_level(encode_hdr_rgb, encode_hdr_alpha, i, pt, &(eci[i]), ep, error_weightings, best_error[i], format_of_choice[i]);
768 
769 	float errors_of_best_combination[MAX_WEIGHT_MODES];
770 	int best_quantization_levels[MAX_WEIGHT_MODES];
771 	int best_quantization_levels_mod[MAX_WEIGHT_MODES];
772 	int best_ep_formats[MAX_WEIGHT_MODES][4];
773 
774 	// code for the case where the block contains 1 partition
775 	if (partition_count == 1)
776 	{
777 		int best_quantization_level;
778 		int best_format;
779 		float error_of_best_combination;
780 		for (i = 0; i < MAX_WEIGHT_MODES; i++)
781 		{
782 			if (qwt_errors[i] >= 1e29f)
783 			{
784 				errors_of_best_combination[i] = 1e30f;
785 				continue;
786 			}
787 
788 			one_partition_find_best_combination_for_bitcount(best_error[0], format_of_choice[0], qwt_bitcounts[i], &best_quantization_level, &best_format, &error_of_best_combination);
789 			error_of_best_combination += qwt_errors[i];
790 
791 			errors_of_best_combination[i] = error_of_best_combination;
792 			best_quantization_levels[i] = best_quantization_level;
793 			best_quantization_levels_mod[i] = best_quantization_level;
794 			best_ep_formats[i][0] = best_format;
795 		}
796 	}
797 
798 	// code for the case where the block contains 2 partitions
799 	else if (partition_count == 2)
800 	{
801 		int best_quantization_level;
802 		int best_quantization_level_mod;
803 		int best_formats[2];
804 		float error_of_best_combination;
805 
806 		float combined_best_error[21][7];
807 		int formats_of_choice[21][7][2];
808 
809 		two_partitions_find_best_combination_for_every_quantization_and_integer_count(best_error, format_of_choice, combined_best_error, formats_of_choice);
810 
811 
812 		for (i = 0; i < MAX_WEIGHT_MODES; i++)
813 		{
814 			if (qwt_errors[i] >= 1e29f)
815 			{
816 				errors_of_best_combination[i] = 1e30f;
817 				continue;
818 			}
819 
820 			two_partitions_find_best_combination_for_bitcount(combined_best_error, formats_of_choice, qwt_bitcounts[i],
821 															  &best_quantization_level, &best_quantization_level_mod, best_formats, &error_of_best_combination);
822 
823 			error_of_best_combination += qwt_errors[i];
824 
825 			errors_of_best_combination[i] = error_of_best_combination;
826 			best_quantization_levels[i] = best_quantization_level;
827 			best_quantization_levels_mod[i] = best_quantization_level_mod;
828 			best_ep_formats[i][0] = best_formats[0];
829 			best_ep_formats[i][1] = best_formats[1];
830 		}
831 	}
832 
833 	// code for the case where the block contains 3 partitions
834 	else if (partition_count == 3)
835 	{
836 		int best_quantization_level;
837 		int best_quantization_level_mod;
838 		int best_formats[3];
839 		float error_of_best_combination;
840 
841 		float combined_best_error[21][10];
842 		int formats_of_choice[21][10][3];
843 
844 		three_partitions_find_best_combination_for_every_quantization_and_integer_count(best_error, format_of_choice, combined_best_error, formats_of_choice);
845 
846 		for (i = 0; i < MAX_WEIGHT_MODES; i++)
847 		{
848 			if (qwt_errors[i] >= 1e29f)
849 			{
850 				errors_of_best_combination[i] = 1e30f;
851 				continue;
852 			}
853 
854 			three_partitions_find_best_combination_for_bitcount(combined_best_error,
855 																formats_of_choice, qwt_bitcounts[i], &best_quantization_level, &best_quantization_level_mod, best_formats, &error_of_best_combination);
856 			error_of_best_combination += qwt_errors[i];
857 
858 			errors_of_best_combination[i] = error_of_best_combination;
859 			best_quantization_levels[i] = best_quantization_level;
860 			best_quantization_levels_mod[i] = best_quantization_level_mod;
861 			best_ep_formats[i][0] = best_formats[0];
862 			best_ep_formats[i][1] = best_formats[1];
863 			best_ep_formats[i][2] = best_formats[2];
864 		}
865 	}
866 
867 	// code for the case where the block contains 4 partitions
868 	else if (partition_count == 4)
869 	{
870 		int best_quantization_level;
871 		int best_quantization_level_mod;
872 		int best_formats[4];
873 		float error_of_best_combination;
874 
875 		float combined_best_error[21][13];
876 		int formats_of_choice[21][13][4];
877 
878 		four_partitions_find_best_combination_for_every_quantization_and_integer_count(best_error, format_of_choice, combined_best_error, formats_of_choice);
879 
880 		for (i = 0; i < MAX_WEIGHT_MODES; i++)
881 		{
882 			if (qwt_errors[i] >= 1e29f)
883 			{
884 				errors_of_best_combination[i] = 1e30f;
885 				continue;
886 			}
887 			four_partitions_find_best_combination_for_bitcount(combined_best_error,
888 															   formats_of_choice, qwt_bitcounts[i], &best_quantization_level, &best_quantization_level_mod, best_formats, &error_of_best_combination);
889 			error_of_best_combination += qwt_errors[i];
890 
891 			errors_of_best_combination[i] = error_of_best_combination;
892 			best_quantization_levels[i] = best_quantization_level;
893 			best_quantization_levels_mod[i] = best_quantization_level_mod;
894 			best_ep_formats[i][0] = best_formats[0];
895 			best_ep_formats[i][1] = best_formats[1];
896 			best_ep_formats[i][2] = best_formats[2];
897 			best_ep_formats[i][3] = best_formats[3];
898 		}
899 	}
900 
901 	// finally, go through the results and pick the 4 best-looking modes.
902 
903 	int best_error_weights[4];
904 
905 	for (i = 0; i < 4; i++)
906 	{
907 		float best_ep_error = 1e30f;
908 		int best_error_index = -1;
909 		for (j = 0; j < MAX_WEIGHT_MODES; j++)
910 		{
911 			if (errors_of_best_combination[j] < best_ep_error && best_quantization_levels[j] >= 5)
912 			{
913 				best_ep_error = errors_of_best_combination[j];
914 				best_error_index = j;
915 			}
916 		}
917 		best_error_weights[i] = best_error_index;
918 
919 		if(best_error_index >= 0)
920 		{
921 			errors_of_best_combination[best_error_index] = 1e30f;
922 		}
923 	}
924 
925 	for (i = 0; i < 4; i++)
926 	{
927 		quantized_weight[i] = best_error_weights[i];
928 		if (quantized_weight[i] >= 0)
929 		{
930 			quantization_level[i] = best_quantization_levels[best_error_weights[i]];
931 			quantization_level_mod[i] = best_quantization_levels_mod[best_error_weights[i]];
932 			for (j = 0; j < partition_count; j++)
933 			{
934 				partition_format_specifiers[i][j] = best_ep_formats[best_error_weights[i]][j];
935 			}
936 		}
937 	}
938 }
939