1 /*----------------------------------------------------------------------------*/
2 /**
3 * This confidential and proprietary software may be used only as
4 * authorised by a licensing agreement from ARM Limited
5 * (C) COPYRIGHT 2011-2012 ARM Limited
6 * ALL RIGHTS RESERVED
7 *
8 * The entire notice above must be reproduced on all authorised
9 * copies and copies may only be made to the extent permitted
10 * by a licensing agreement from ARM Limited.
11 *
12 * @brief Functions to pick the best ASTC endpoint format for a given block.
13 */
14 /*----------------------------------------------------------------------------*/
15 #include "astc_codec_internals.h"
16
17 #ifdef DEBUG_PRINT_DIAGNOSTICS
18 #include <stdio.h>
19 #endif
20
21 #include <math.h>
22
23 /*
24 functions to determine, for a given partitioning, which color endpoint formats are the best to use.
25
26 */
27
28
29 // for a given partition, compute for every (integer-component-count, quantization-level)
30 // the color error.
31
32
compute_color_error_for_every_integer_count_and_quantization_level(int encode_hdr_rgb,int encode_hdr_alpha,int partition_index,const partition_info * pi,const encoding_choice_errors * eci,const endpoints * ep,float4 error_weightings[4],float best_error[21][4],int format_of_choice[21][4])33 static void compute_color_error_for_every_integer_count_and_quantization_level(int encode_hdr_rgb, // 1 = perform HDR encoding, 0 = perform LDR encoding.
34 int encode_hdr_alpha, int partition_index, const partition_info * pi,
35 const encoding_choice_errors * eci, // pointer to the structure for the CURRENT partition.
36 const endpoints * ep, float4 error_weightings[4],
37 // arrays to return results back through.
38 float best_error[21][4], int format_of_choice[21][4])
39 {
40 int i, j;
41 int partition_size = pi->texels_per_partition[partition_index];
42
43 static const float baseline_quant_error[21] = {
44 (65536.0f * 65536.0f / 18.0f), // 2 values, 1 step
45 (65536.0f * 65536.0f / 18.0f) / (2 * 2), // 3 values, 2 steps
46 (65536.0f * 65536.0f / 18.0f) / (3 * 3), // 4 values, 3 steps
47 (65536.0f * 65536.0f / 18.0f) / (4 * 4), // 5 values
48 (65536.0f * 65536.0f / 18.0f) / (5 * 5),
49 (65536.0f * 65536.0f / 18.0f) / (7 * 7),
50 (65536.0f * 65536.0f / 18.0f) / (9 * 9),
51 (65536.0f * 65536.0f / 18.0f) / (11 * 11),
52 (65536.0f * 65536.0f / 18.0f) / (15 * 15),
53 (65536.0f * 65536.0f / 18.0f) / (19 * 19),
54 (65536.0f * 65536.0f / 18.0f) / (23 * 23),
55 (65536.0f * 65536.0f / 18.0f) / (31 * 31),
56 (65536.0f * 65536.0f / 18.0f) / (39 * 39),
57 (65536.0f * 65536.0f / 18.0f) / (47 * 47),
58 (65536.0f * 65536.0f / 18.0f) / (63 * 63),
59 (65536.0f * 65536.0f / 18.0f) / (79 * 79),
60 (65536.0f * 65536.0f / 18.0f) / (95 * 95),
61 (65536.0f * 65536.0f / 18.0f) / (127 * 127),
62 (65536.0f * 65536.0f / 18.0f) / (159 * 159),
63 (65536.0f * 65536.0f / 18.0f) / (191 * 191),
64 (65536.0f * 65536.0f / 18.0f) / (255 * 255)
65 };
66
67 float4 ep0 = ep->endpt0[partition_index];
68 float4 ep1 = ep->endpt1[partition_index];
69
70 float ep0_max = MAX(MAX(ep0.x, ep0.y), ep0.z);
71 float ep0_min = MIN(MIN(ep0.x, ep0.y), ep0.z);
72 float ep1_max = MAX(MAX(ep1.x, ep1.y), ep1.z);
73 float ep1_min = MIN(MIN(ep1.x, ep1.y), ep1.z);
74
75 ep0_min = MAX(ep0_min, 0.0f);
76 ep1_min = MAX(ep1_min, 0.0f);
77 ep0_max = MAX(ep0_max, 1e-10f);
78 ep1_max = MAX(ep1_max, 1e-10f);
79
80 float4 error_weight = error_weightings[partition_index];
81
82 float error_weight_rgbsum = error_weight.x + error_weight.y + error_weight.z;
83
84 float range_upper_limit_rgb = encode_hdr_rgb ? 61440.0f : 65535.0f;
85 float range_upper_limit_alpha = encode_hdr_alpha ? 61440.0f : 65535.0f;
86
87 // it is possible to get endpoint colors significantly outside [0,upper-limit]
88 // even if the input data are safely contained in [0,upper-limit];
89 // we need to add an error term for this situation,
90 float4 ep0_range_error_high;
91 float4 ep1_range_error_high;
92 float4 ep0_range_error_low;
93 float4 ep1_range_error_low;
94
95 ep0_range_error_high.x = MAX(0.0f, ep0.x - range_upper_limit_rgb);
96 ep0_range_error_high.y = MAX(0.0f, ep0.y - range_upper_limit_rgb);
97 ep0_range_error_high.z = MAX(0.0f, ep0.z - range_upper_limit_rgb);
98 ep0_range_error_high.w = MAX(0.0f, ep0.w - range_upper_limit_alpha);
99 ep1_range_error_high.x = MAX(0.0f, ep1.x - range_upper_limit_rgb);
100 ep1_range_error_high.y = MAX(0.0f, ep1.y - range_upper_limit_rgb);
101 ep1_range_error_high.z = MAX(0.0f, ep1.z - range_upper_limit_rgb);
102 ep1_range_error_high.w = MAX(0.0f, ep1.w - range_upper_limit_alpha);
103
104 ep0_range_error_low.x = MIN(0.0f, ep0.x);
105 ep0_range_error_low.y = MIN(0.0f, ep0.y);
106 ep0_range_error_low.z = MIN(0.0f, ep0.z);
107 ep0_range_error_low.w = MIN(0.0f, ep0.w);
108 ep1_range_error_low.x = MIN(0.0f, ep1.x);
109 ep1_range_error_low.y = MIN(0.0f, ep1.y);
110 ep1_range_error_low.z = MIN(0.0f, ep1.z);
111 ep1_range_error_low.w = MIN(0.0f, ep1.w);
112
113 float4 sum_range_error =
114 (ep0_range_error_low * ep0_range_error_low) + (ep1_range_error_low * ep1_range_error_low) + (ep0_range_error_high * ep0_range_error_high) + (ep1_range_error_high * ep1_range_error_high);
115 float rgb_range_error = dot(sum_range_error.xyz, error_weight.xyz) * 0.5f * partition_size;
116 float alpha_range_error = sum_range_error.w * error_weight.w * 0.5f * partition_size;
117
118
119 #ifdef DEBUG_PRINT_DIAGNOSTICS
120 if (print_diagnostics)
121 {
122 printf("%s : partition=%d\nrgb-error_wt=%f alpha_error_wt=%f\n", __func__, partition_index, error_weight_rgbsum, error_weight.w);
123
124 printf("ep0 = %f %f %f %f\n", ep0.x, ep0.y, ep0.z, ep0.w);
125 printf("ep1 = %f %f %f %f\n", ep1.x, ep1.y, ep1.z, ep1.w);
126
127
128 printf("rgb_range_error = %f, alpha_range_error = %f\n", rgb_range_error, alpha_range_error);
129
130 printf("rgb-luma-error: %f\n", eci->rgb_luma_error);
131 }
132 #endif
133
134 if (encode_hdr_rgb)
135 {
136
137 // collect some statistics
138 float af, cf;
139 if (ep1.x > ep1.y && ep1.x > ep1.z)
140 {
141 af = ep1.x;
142 cf = ep1.x - ep0.x;
143 }
144 else if (ep1.y > ep1.z)
145 {
146 af = ep1.y;
147 cf = ep1.y - ep0.y;
148 }
149 else
150 {
151 af = ep1.z;
152 cf = ep1.z - ep0.z;
153 }
154
155 float bf = af - ep1_min; // estimate of color-component spread in high endpoint color
156 float3 prd = ep1.xyz - float3(cf, cf, cf);
157 float3 pdif = prd - ep0.xyz;
158 // estimate of color-component spread in low endpoint color
159 float df = MAX(MAX(fabs(pdif.x), fabs(pdif.y)), fabs(pdif.z));
160
161 int b = (int)bf;
162 int c = (int)cf;
163 int d = (int)df;
164
165
166 // determine which one of the 6 submodes is likely to be used in
167 // case of an RGBO-mode
168 int rgbo_mode = 5; // 7 bits per component
169 // mode 4: 8 7 6
170 if (b < 32768 && c < 16384)
171 rgbo_mode = 4;
172 // mode 3: 9 6 7
173 if (b < 8192 && c < 16384)
174 rgbo_mode = 3;
175 // mode 2: 10 5 8
176 if (b < 2048 && c < 16384)
177 rgbo_mode = 2;
178 // mode 1: 11 6 5
179 if (b < 2048 && c < 1024)
180 rgbo_mode = 1;
181 // mode 0: 11 5 7
182 if (b < 1024 && c < 4096)
183 rgbo_mode = 0;
184
185 // determine which one of the 9 submodes is likely to be used in
186 // case of an RGB-mode.
187 int rgb_mode = 8; // 8 bits per component, except 7 bits for blue
188
189 // mode 0: 9 7 6 7
190 if (b < 16384 && c < 8192 && d < 8192)
191 rgb_mode = 0;
192 // mode 1: 9 8 6 6
193 if (b < 32768 && c < 8192 && d < 4096)
194 rgb_mode = 1;
195 // mode 2: 10 6 7 7
196 if (b < 4096 && c < 8192 && d < 4096)
197 rgb_mode = 2;
198 // mode 3: 10 7 7 6
199 if (b < 8192 && c < 8192 && d < 2048)
200 rgb_mode = 3;
201 // mode 4: 11 8 6 5
202 if (b < 8192 && c < 2048 && d < 512)
203 rgb_mode = 4;
204 // mode 5: 11 6 8 6
205 if (b < 2048 && c < 8192 && d < 1024)
206 rgb_mode = 5;
207 // mode 6: 12 7 7 5
208 if (b < 2048 && c < 2048 && d < 256)
209 rgb_mode = 6;
210 // mode 7: 12 6 7 6
211 if (b < 1024 && c < 2048 && d < 512)
212 rgb_mode = 7;
213
214
215 static const float rgbo_error_scales[6] = { 4.0f, 4.0f, 16.0f, 64.0f, 256.0f, 1024.0f };
216 static const float rgb_error_scales[9] = { 64.0f, 64.0f, 16.0f, 16.0f, 4.0f, 4.0f, 1.0f, 1.0f, 384.0f };
217
218 float mode7mult = rgbo_error_scales[rgbo_mode] * 0.0015f; // empirically determined ....
219 float mode11mult = rgb_error_scales[rgb_mode] * 0.010f; // empirically determined ....
220
221
222 float lum_high = (ep1.x + ep1.y + ep1.z) * (1.0f / 3.0f);
223 float lum_low = (ep0.x + ep0.y + ep0.z) * (1.0f / 3.0f);
224 float lumdif = lum_high - lum_low;
225 float mode23mult = lumdif < 960 ? 4.0f : lumdif < 3968 ? 16.0f : 128.0f;
226
227 mode23mult *= 0.0005f; // empirically determined ....
228
229
230
231 // pick among the available HDR endpoint modes
232 for (i = 0; i < 8; i++)
233 {
234 best_error[i][3] = 1e30f;
235 format_of_choice[i][3] = encode_hdr_alpha ? FMT_HDR_RGBA : FMT_HDR_RGB_LDR_ALPHA;
236 best_error[i][2] = 1e30f;
237 format_of_choice[i][2] = FMT_HDR_RGB;
238 best_error[i][1] = 1e30f;
239 format_of_choice[i][1] = FMT_HDR_RGB_SCALE;
240 best_error[i][0] = 1e30f;
241 format_of_choice[i][0] = FMT_HDR_LUMINANCE_LARGE_RANGE;
242 }
243
244
245 for (i = 8; i < 21; i++)
246 {
247 // base_quant_error should depend on the scale-factor that would be used
248 // during actual encode of the color value.
249
250 float base_quant_error = baseline_quant_error[i] * partition_size * 1.0f;
251 float rgb_quantization_error = error_weight_rgbsum * base_quant_error * 2.0f;
252 float alpha_quantization_error = error_weight.w * base_quant_error * 2.0f;
253 float rgba_quantization_error = rgb_quantization_error + alpha_quantization_error;
254
255 #ifdef DEBUG_PRINT_DIAGNOSTICS
256 if (print_diagnostics)
257 printf("rgba-quant = %f can_offset_encode=%d\n", rgba_quantization_error, eci->can_offset_encode);
258 #endif
259
260 // for 8 integers, we have two encodings: one with HDR alpha and another one
261 // with LDR alpha.
262
263 float full_hdr_rgba_error = rgba_quantization_error + rgb_range_error + alpha_range_error;
264 best_error[i][3] = full_hdr_rgba_error;
265 format_of_choice[i][3] = encode_hdr_alpha ? FMT_HDR_RGBA : FMT_HDR_RGB_LDR_ALPHA;
266
267 // for 6 integers, we have one HDR-RGB encoding
268 float full_hdr_rgb_error = (rgb_quantization_error * mode11mult) + rgb_range_error + eci->alpha_drop_error;
269 best_error[i][2] = full_hdr_rgb_error;
270 format_of_choice[i][2] = FMT_HDR_RGB;
271
272 // for 4 integers, we have one HDR-RGB-Scale encoding
273 float hdr_rgb_scale_error = (rgb_quantization_error * mode7mult) + rgb_range_error + eci->alpha_drop_error + eci->rgb_luma_error;
274
275 best_error[i][1] = hdr_rgb_scale_error;
276 format_of_choice[i][1] = FMT_HDR_RGB_SCALE;
277
278 // for 2 integers, we assume luminance-with-large-range
279 float hdr_luminance_error = (rgb_quantization_error * mode23mult) + rgb_range_error + eci->alpha_drop_error + eci->luminance_error;
280 best_error[i][0] = hdr_luminance_error;
281 format_of_choice[i][0] = FMT_HDR_LUMINANCE_LARGE_RANGE;
282
283 #ifdef DEBUG_PRINT_DIAGNOSTICS
284 if (print_diagnostics)
285 {
286 for (j = 0; j < 4; j++)
287 {
288 printf("(hdr) quant-level=%d ints=%d format=%d error=%f\n", i, j, format_of_choice[i][j], best_error[i][j]);
289 }
290 }
291 #endif
292 }
293 }
294
295
296 else
297 {
298 for (i = 0; i < 4; i++)
299 {
300 best_error[i][3] = 1e30f;
301 best_error[i][2] = 1e30f;
302 best_error[i][1] = 1e30f;
303 best_error[i][0] = 1e30f;
304
305 format_of_choice[i][3] = FMT_RGBA;
306 format_of_choice[i][2] = FMT_RGB;
307 format_of_choice[i][1] = FMT_RGB_SCALE;
308 format_of_choice[i][0] = FMT_LUMINANCE;
309 }
310
311
312 // pick among the available LDR endpoint modes
313 for (i = 4; i < 21; i++)
314 {
315 float base_quant_error = baseline_quant_error[i] * partition_size * 1.0f;
316 float rgb_quantization_error = error_weight_rgbsum * base_quant_error;
317 float alpha_quantization_error = error_weight.w * base_quant_error;
318 float rgba_quantization_error = rgb_quantization_error + alpha_quantization_error;
319
320 #ifdef DEBUG_PRINT_DIAGNOSTICS
321 if (print_diagnostics)
322 printf("rgba-quant = %f can_offset_encode=%d\n", rgba_quantization_error, eci->can_offset_encode);
323 #endif
324
325 // for 8 integers, the available encodings are:
326 // full LDR RGB-Alpha
327 float full_ldr_rgba_error = rgba_quantization_error;
328 if (eci->can_blue_contract)
329 full_ldr_rgba_error *= 0.625f;
330 if (eci->can_offset_encode && i <= 18)
331 full_ldr_rgba_error *= 0.5f;
332 full_ldr_rgba_error += rgb_range_error + alpha_range_error;
333
334 best_error[i][3] = full_ldr_rgba_error;
335 format_of_choice[i][3] = FMT_RGBA;
336
337 // for 6 integers, we have:
338 // - an LDR-RGB encoding
339 // - an RGBS + Alpha encoding (LDR)
340
341 float full_ldr_rgb_error = rgb_quantization_error;
342 if (eci->can_blue_contract)
343 full_ldr_rgb_error *= 0.5f;
344 if (eci->can_offset_encode && i <= 18)
345 full_ldr_rgb_error *= 0.25f;
346 full_ldr_rgb_error += eci->alpha_drop_error + rgb_range_error;
347
348 float rgbs_alpha_error = rgba_quantization_error + eci->rgb_scale_error + rgb_range_error + alpha_range_error;
349
350 if (rgbs_alpha_error < full_ldr_rgb_error)
351 {
352 best_error[i][2] = rgbs_alpha_error;
353 format_of_choice[i][2] = FMT_RGB_SCALE_ALPHA;
354 }
355 else
356 {
357 best_error[i][2] = full_ldr_rgb_error;
358 format_of_choice[i][2] = FMT_RGB;
359 }
360
361
362 // for 4 integers, we have a Luminance-Alpha encoding and the RGBS encoding
363 float ldr_rgbs_error = rgb_quantization_error + eci->alpha_drop_error + eci->rgb_scale_error + rgb_range_error;
364
365 float lum_alpha_error = rgba_quantization_error + eci->luminance_error + rgb_range_error + alpha_range_error;
366
367 if (ldr_rgbs_error < lum_alpha_error)
368 {
369 best_error[i][1] = ldr_rgbs_error;
370 format_of_choice[i][1] = FMT_RGB_SCALE;
371 }
372 else
373 {
374 best_error[i][1] = lum_alpha_error;
375 format_of_choice[i][1] = FMT_LUMINANCE_ALPHA;
376 }
377
378
379 // for 2 integers, we have a Luminance-encoding and an Alpha-encoding.
380 float luminance_error = rgb_quantization_error + eci->alpha_drop_error + eci->luminance_error + rgb_range_error;
381
382 best_error[i][0] = luminance_error;
383 format_of_choice[i][0] = FMT_LUMINANCE;
384
385 #ifdef DEBUG_PRINT_DIAGNOSTICS
386 if (print_diagnostics)
387 {
388 for (j = 0; j < 4; j++)
389 {
390 printf(" (ldr) quant-level=%d ints=%d format=%d error=%f\n", i, j, format_of_choice[i][j], best_error[i][j]);
391 }
392 }
393 #endif
394 }
395 }
396 }
397
398
399
400 // for 1 partition, find the best combination (one format + a quantization level) for a given bitcount
401
one_partition_find_best_combination_for_bitcount(float combined_best_error[21][4],int formats_of_choice[21][4],int bits_available,int * best_quantization_level,int * best_formats,float * error_of_best_combination)402 static void one_partition_find_best_combination_for_bitcount(float combined_best_error[21][4],
403 int formats_of_choice[21][4], int bits_available, int *best_quantization_level, int *best_formats, float *error_of_best_combination)
404 {
405 int i;
406 int best_integer_count = -1;
407 float best_integer_count_error = 1e20f;
408 for (i = 0; i < 4; i++)
409 {
410 // compute the quantization level for a given number of integers and a given number of bits.
411 int quantization_level = quantization_mode_table[i + 1][bits_available];
412 if (quantization_level == -1)
413 continue; // used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
414 if (combined_best_error[quantization_level][i] < best_integer_count_error)
415 {
416 best_integer_count_error = combined_best_error[quantization_level][i];
417 best_integer_count = i;
418 }
419 }
420
421 int ql = quantization_mode_table[best_integer_count + 1][bits_available];
422
423 *best_quantization_level = ql;
424 *error_of_best_combination = best_integer_count_error;
425 if (ql >= 0)
426 *best_formats = formats_of_choice[ql][best_integer_count];
427 else
428 *best_formats = FMT_LUMINANCE;
429
430 }
431
432
433
434 // for 2 partitions, find the best format combinations for every (quantization-mode, integer-count) combination
435
two_partitions_find_best_combination_for_every_quantization_and_integer_count(float best_error[2][21][4],int format_of_choice[2][21][4],float combined_best_error[21][7],int formats_of_choice[21][7][2])436 static void two_partitions_find_best_combination_for_every_quantization_and_integer_count(float best_error[2][21][4], // indexed by (partition, quant-level, integer-pair-count-minus-1)
437 int format_of_choice[2][21][4],
438 float combined_best_error[21][7], // indexed by (quant-level, integer-pair-count-minus-2)
439 int formats_of_choice[21][7][2])
440 {
441 int i, j;
442
443 for (i = 0; i < 21; i++)
444 for (j = 0; j < 7; j++)
445 combined_best_error[i][j] = 1e30f;
446
447 int quant;
448 for (quant = 5; quant < 21; quant++)
449 {
450 for (i = 0; i < 4; i++) // integer-count for first endpoint-pair
451 {
452 for (j = 0; j < 4; j++) // integer-count for second endpoint-pair
453 {
454 int low2 = MIN(i, j);
455 int high2 = MAX(i, j);
456 if ((high2 - low2) > 1)
457 continue;
458
459 int intcnt = i + j;
460 float errorterm = MIN(best_error[0][quant][i] + best_error[1][quant][j], 1e10f);
461 if (errorterm <= combined_best_error[quant][intcnt])
462 {
463 combined_best_error[quant][intcnt] = errorterm;
464 formats_of_choice[quant][intcnt][0] = format_of_choice[0][quant][i];
465 formats_of_choice[quant][intcnt][1] = format_of_choice[1][quant][j];
466 }
467 }
468 }
469 }
470 }
471
472
473 // for 2 partitions, find the best combination (two formats + a quantization level) for a given bitcount
474
two_partitions_find_best_combination_for_bitcount(float combined_best_error[21][7],int formats_of_choice[21][7][2],int bits_available,int * best_quantization_level,int * best_quantization_level_mod,int * best_formats,float * error_of_best_combination)475 static void two_partitions_find_best_combination_for_bitcount(float combined_best_error[21][7],
476 int formats_of_choice[21][7][2],
477 int bits_available, int *best_quantization_level, int *best_quantization_level_mod, int *best_formats, float *error_of_best_combination)
478 {
479 int i;
480
481 int best_integer_count = 0;
482 float best_integer_count_error = 1e20f;
483 int integer_count;
484
485 for (integer_count = 2; integer_count <= 8; integer_count++)
486 {
487 // compute the quantization level for a given number of integers and a given number of bits.
488 int quantization_level = quantization_mode_table[integer_count][bits_available];
489 if (quantization_level == -1)
490 break; // used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
491 float integer_count_error = combined_best_error[quantization_level][integer_count - 2];
492 if (integer_count_error < best_integer_count_error)
493 {
494 best_integer_count_error = integer_count_error;
495 best_integer_count = integer_count;
496 }
497 }
498
499 int ql = quantization_mode_table[best_integer_count][bits_available];
500 int ql_mod = quantization_mode_table[best_integer_count][bits_available + 2];
501
502 *best_quantization_level = ql;
503 *best_quantization_level_mod = ql_mod;
504 *error_of_best_combination = best_integer_count_error;
505 if (ql >= 0)
506 {
507 for (i = 0; i < 2; i++)
508 best_formats[i] = formats_of_choice[ql][best_integer_count - 2][i];
509 }
510 else
511 {
512 for (i = 0; i < 2; i++)
513 best_formats[i] = FMT_LUMINANCE;
514 }
515 }
516
517
518
519
520 // for 3 partitions, find the best format combinations for every (quantization-mode, integer-count) combination
521
three_partitions_find_best_combination_for_every_quantization_and_integer_count(float best_error[3][21][4],int format_of_choice[3][21][4],float combined_best_error[21][10],int formats_of_choice[21][10][3])522 static void three_partitions_find_best_combination_for_every_quantization_and_integer_count(float best_error[3][21][4], // indexed by (partition, quant-level, integer-count)
523 int format_of_choice[3][21][4], float combined_best_error[21][10], int formats_of_choice[21][10][3])
524 {
525 int i, j, k;
526
527 for (i = 0; i < 21; i++)
528 for (j = 0; j < 10; j++)
529 combined_best_error[i][j] = 1e30f;
530
531 int quant;
532 for (quant = 5; quant < 21; quant++)
533 {
534 for (i = 0; i < 4; i++) // integer-count for first endpoint-pair
535 {
536 for (j = 0; j < 4; j++) // integer-count for second endpoint-pair
537 {
538 int low2 = MIN(i, j);
539 int high2 = MAX(i, j);
540 if ((high2 - low2) > 1)
541 continue;
542 for (k = 0; k < 4; k++) // integer-count for third endpoint-pair
543 {
544 int low3 = MIN(k, low2);
545 int high3 = MAX(k, high2);
546 if ((high3 - low3) > 1)
547 continue;
548
549 int intcnt = i + j + k;
550 float errorterm = MIN(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k], 1e10f);
551 if (errorterm <= combined_best_error[quant][intcnt])
552 {
553 combined_best_error[quant][intcnt] = errorterm;
554 formats_of_choice[quant][intcnt][0] = format_of_choice[0][quant][i];
555 formats_of_choice[quant][intcnt][1] = format_of_choice[1][quant][j];
556 formats_of_choice[quant][intcnt][2] = format_of_choice[2][quant][k];
557 }
558 }
559 }
560 }
561 }
562 }
563
564
565 // for 3 partitions, find the best combination (three formats + a quantization level) for a given bitcount
566
three_partitions_find_best_combination_for_bitcount(float combined_best_error[21][10],int formats_of_choice[21][10][3],int bits_available,int * best_quantization_level,int * best_quantization_level_mod,int * best_formats,float * error_of_best_combination)567 static void three_partitions_find_best_combination_for_bitcount(float combined_best_error[21][10],
568 int formats_of_choice[21][10][3],
569 int bits_available, int *best_quantization_level, int *best_quantization_level_mod, int *best_formats, float *error_of_best_combination)
570 {
571 int i;
572
573 int best_integer_count = 0;
574 float best_integer_count_error = 1e20f;
575 int integer_count;
576
577 for (integer_count = 3; integer_count <= 9; integer_count++)
578 {
579 // compute the quantization level for a given number of integers and a given number of bits.
580 int quantization_level = quantization_mode_table[integer_count][bits_available];
581 if (quantization_level == -1)
582 break; // used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
583 float integer_count_error = combined_best_error[quantization_level][integer_count - 3];
584 if (integer_count_error < best_integer_count_error)
585 {
586 best_integer_count_error = integer_count_error;
587 best_integer_count = integer_count;
588 }
589 }
590
591 int ql = quantization_mode_table[best_integer_count][bits_available];
592 int ql_mod = quantization_mode_table[best_integer_count][bits_available + 5];
593
594 *best_quantization_level = ql;
595 *best_quantization_level_mod = ql_mod;
596 *error_of_best_combination = best_integer_count_error;
597 if (ql >= 0)
598 {
599 for (i = 0; i < 3; i++)
600 best_formats[i] = formats_of_choice[ql][best_integer_count - 3][i];
601 }
602 else
603 {
604 for (i = 0; i < 3; i++)
605 best_formats[i] = FMT_LUMINANCE;
606 }
607 }
608
609
610
611
612 // for 4 partitions, find the best format combinations for every (quantization-mode, integer-count) combination
613
four_partitions_find_best_combination_for_every_quantization_and_integer_count(float best_error[4][21][4],int format_of_choice[4][21][4],float combined_best_error[21][13],int formats_of_choice[21][13][4])614 static void four_partitions_find_best_combination_for_every_quantization_and_integer_count(float best_error[4][21][4], // indexed by (partition, quant-level, integer-count)
615 int format_of_choice[4][21][4], float combined_best_error[21][13], int formats_of_choice[21][13][4])
616 {
617 int i, j, k, l;
618
619 for (i = 0; i < 21; i++)
620 for (j = 0; j < 13; j++)
621 combined_best_error[i][j] = 1e30f;
622
623 int quant;
624 for (quant = 5; quant < 21; quant++)
625 {
626 for (i = 0; i < 4; i++) // integer-count for first endpoint-pair
627 {
628 for (j = 0; j < 4; j++) // integer-count for second endpoint-pair
629 {
630 int low2 = MIN(i, j);
631 int high2 = MAX(i, j);
632 if ((high2 - low2) > 1)
633 continue;
634 for (k = 0; k < 4; k++) // integer-count for third endpoint-pair
635 {
636 int low3 = MIN(k, low2);
637 int high3 = MAX(k, high2);
638 if ((high3 - low3) > 1)
639 continue;
640 for (l = 0; l < 4; l++) // integer-count for fourth endpoint-pair
641 {
642 int low4 = MIN(l, low3);
643 int high4 = MAX(l, high3);
644 if ((high4 - low4) > 1)
645 continue;
646
647 int intcnt = i + j + k + l;
648 float errorterm = MIN(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k] + best_error[3][quant][l], 1e10f);
649 if (errorterm <= combined_best_error[quant][intcnt])
650 {
651 combined_best_error[quant][intcnt] = errorterm;
652 formats_of_choice[quant][intcnt][0] = format_of_choice[0][quant][i];
653 formats_of_choice[quant][intcnt][1] = format_of_choice[1][quant][j];
654 formats_of_choice[quant][intcnt][2] = format_of_choice[2][quant][k];
655 formats_of_choice[quant][intcnt][3] = format_of_choice[3][quant][l];
656 }
657 }
658 }
659 }
660 }
661 }
662 }
663
664
665
666
667
668
669 // for 4 partitions, find the best combination (four formats + a quantization level) for a given bitcount
670
four_partitions_find_best_combination_for_bitcount(float combined_best_error[21][13],int formats_of_choice[21][13][4],int bits_available,int * best_quantization_level,int * best_quantization_level_mod,int * best_formats,float * error_of_best_combination)671 static void four_partitions_find_best_combination_for_bitcount(float combined_best_error[21][13],
672 int formats_of_choice[21][13][4],
673 int bits_available, int *best_quantization_level, int *best_quantization_level_mod, int *best_formats, float *error_of_best_combination)
674 {
675 int i;
676 int best_integer_count = 0;
677 float best_integer_count_error = 1e20f;
678 int integer_count;
679
680 for (integer_count = 4; integer_count <= 9; integer_count++)
681 {
682 // compute the quantization level for a given number of integers and a given number of bits.
683 int quantization_level = quantization_mode_table[integer_count][bits_available];
684 if (quantization_level == -1)
685 break; // used to indicate the case where we don't have enough bits to represent a given endpoint format at all.
686 float integer_count_error = combined_best_error[quantization_level][integer_count - 4];
687 if (integer_count_error < best_integer_count_error)
688 {
689 best_integer_count_error = integer_count_error;
690 best_integer_count = integer_count;
691 }
692 }
693
694 int ql = quantization_mode_table[best_integer_count][bits_available];
695 int ql_mod = quantization_mode_table[best_integer_count][bits_available + 8];
696
697 *best_quantization_level = ql;
698 *best_quantization_level_mod = ql_mod;
699 *error_of_best_combination = best_integer_count_error;
700 if (ql >= 0)
701 {
702 for (i = 0; i < 4; i++)
703 best_formats[i] = formats_of_choice[ql][best_integer_count - 4][i];
704 }
705 else
706 {
707 for (i = 0; i < 4; i++)
708 best_formats[i] = FMT_LUMINANCE;
709 }
710 }
711
712
713
714 /*
715 The determine_optimal_set_of_endpoint_formats_to_use() function.
716
717 It identifies, for each mode, which set of color endpoint encodings
718 produces the best overall result. It then reports back which 4 modes
719 look best, along with the ideal color encoding combination for each.
720
721 It takes as input:
722 a partitioning an imageblock,
723 a set of color endpoints.
724 for each mode, the number of bits available for color encoding and the error incurred by quantization.
725 in case of 2 plane of weights, a specifier for which color component to use for the second plane of weights.
726
727 It delivers as output for each of the 4 selected modes:
728 format specifier
729 for each partition
730 quantization level to use
731 modified quantization level to use
732 (when all format specifiers are equal)
733 */
734
determine_optimal_set_of_endpoint_formats_to_use(int xdim,int ydim,int zdim,const partition_info * pt,const imageblock * blk,const error_weight_block * ewb,const endpoints * ep,int separate_component,const int * qwt_bitcounts,const float * qwt_errors,int partition_format_specifiers[4][4],int quantized_weight[4],int quantization_level[4],int quantization_level_mod[4])735 void determine_optimal_set_of_endpoint_formats_to_use(int xdim, int ydim, int zdim,
736 const partition_info * pt, const imageblock * blk, const error_weight_block * ewb,
737 const endpoints * ep,
738 int separate_component, // separate color component for 2-plane mode; -1 for single-plane mode
739 // bitcounts and errors computed for the various quantization methods
740 const int *qwt_bitcounts, const float *qwt_errors,
741 // output data
742 int partition_format_specifiers[4][4], int quantized_weight[4],
743 int quantization_level[4], int quantization_level_mod[4])
744 {
745 int i, j;
746 int partition_count = pt->partition_count;
747
748 int encode_hdr_rgb = blk->rgb_lns[0];
749 int encode_hdr_alpha = blk->alpha_lns[0];
750
751
752 // call a helper function to compute the errors that result from various
753 // encoding choices (such as using luminance instead of RGB, discarding Alpha,
754 // using RGB-scale in place of two separate RGB endpoints and so on)
755 encoding_choice_errors eci[4];
756 compute_encoding_choice_errors(xdim, ydim, zdim, blk, pt, ewb, separate_component, eci);
757
758 // for each partition, compute the error weights to apply for that partition.
759 float4 error_weightings[4];
760 float4 dummied_color_scalefactors[4]; // only used to receive data
761 compute_partition_error_color_weightings(xdim, ydim, zdim, ewb, pt, error_weightings, dummied_color_scalefactors);
762
763
764 float best_error[4][21][4];
765 int format_of_choice[4][21][4];
766 for (i = 0; i < partition_count; i++)
767 compute_color_error_for_every_integer_count_and_quantization_level(encode_hdr_rgb, encode_hdr_alpha, i, pt, &(eci[i]), ep, error_weightings, best_error[i], format_of_choice[i]);
768
769 float errors_of_best_combination[MAX_WEIGHT_MODES];
770 int best_quantization_levels[MAX_WEIGHT_MODES];
771 int best_quantization_levels_mod[MAX_WEIGHT_MODES];
772 int best_ep_formats[MAX_WEIGHT_MODES][4];
773
774 // code for the case where the block contains 1 partition
775 if (partition_count == 1)
776 {
777 int best_quantization_level;
778 int best_format;
779 float error_of_best_combination;
780 for (i = 0; i < MAX_WEIGHT_MODES; i++)
781 {
782 if (qwt_errors[i] >= 1e29f)
783 {
784 errors_of_best_combination[i] = 1e30f;
785 continue;
786 }
787
788 one_partition_find_best_combination_for_bitcount(best_error[0], format_of_choice[0], qwt_bitcounts[i], &best_quantization_level, &best_format, &error_of_best_combination);
789 error_of_best_combination += qwt_errors[i];
790
791 errors_of_best_combination[i] = error_of_best_combination;
792 best_quantization_levels[i] = best_quantization_level;
793 best_quantization_levels_mod[i] = best_quantization_level;
794 best_ep_formats[i][0] = best_format;
795 }
796 }
797
798 // code for the case where the block contains 2 partitions
799 else if (partition_count == 2)
800 {
801 int best_quantization_level;
802 int best_quantization_level_mod;
803 int best_formats[2];
804 float error_of_best_combination;
805
806 float combined_best_error[21][7];
807 int formats_of_choice[21][7][2];
808
809 two_partitions_find_best_combination_for_every_quantization_and_integer_count(best_error, format_of_choice, combined_best_error, formats_of_choice);
810
811
812 for (i = 0; i < MAX_WEIGHT_MODES; i++)
813 {
814 if (qwt_errors[i] >= 1e29f)
815 {
816 errors_of_best_combination[i] = 1e30f;
817 continue;
818 }
819
820 two_partitions_find_best_combination_for_bitcount(combined_best_error, formats_of_choice, qwt_bitcounts[i],
821 &best_quantization_level, &best_quantization_level_mod, best_formats, &error_of_best_combination);
822
823 error_of_best_combination += qwt_errors[i];
824
825 errors_of_best_combination[i] = error_of_best_combination;
826 best_quantization_levels[i] = best_quantization_level;
827 best_quantization_levels_mod[i] = best_quantization_level_mod;
828 best_ep_formats[i][0] = best_formats[0];
829 best_ep_formats[i][1] = best_formats[1];
830 }
831 }
832
833 // code for the case where the block contains 3 partitions
834 else if (partition_count == 3)
835 {
836 int best_quantization_level;
837 int best_quantization_level_mod;
838 int best_formats[3];
839 float error_of_best_combination;
840
841 float combined_best_error[21][10];
842 int formats_of_choice[21][10][3];
843
844 three_partitions_find_best_combination_for_every_quantization_and_integer_count(best_error, format_of_choice, combined_best_error, formats_of_choice);
845
846 for (i = 0; i < MAX_WEIGHT_MODES; i++)
847 {
848 if (qwt_errors[i] >= 1e29f)
849 {
850 errors_of_best_combination[i] = 1e30f;
851 continue;
852 }
853
854 three_partitions_find_best_combination_for_bitcount(combined_best_error,
855 formats_of_choice, qwt_bitcounts[i], &best_quantization_level, &best_quantization_level_mod, best_formats, &error_of_best_combination);
856 error_of_best_combination += qwt_errors[i];
857
858 errors_of_best_combination[i] = error_of_best_combination;
859 best_quantization_levels[i] = best_quantization_level;
860 best_quantization_levels_mod[i] = best_quantization_level_mod;
861 best_ep_formats[i][0] = best_formats[0];
862 best_ep_formats[i][1] = best_formats[1];
863 best_ep_formats[i][2] = best_formats[2];
864 }
865 }
866
867 // code for the case where the block contains 4 partitions
868 else if (partition_count == 4)
869 {
870 int best_quantization_level;
871 int best_quantization_level_mod;
872 int best_formats[4];
873 float error_of_best_combination;
874
875 float combined_best_error[21][13];
876 int formats_of_choice[21][13][4];
877
878 four_partitions_find_best_combination_for_every_quantization_and_integer_count(best_error, format_of_choice, combined_best_error, formats_of_choice);
879
880 for (i = 0; i < MAX_WEIGHT_MODES; i++)
881 {
882 if (qwt_errors[i] >= 1e29f)
883 {
884 errors_of_best_combination[i] = 1e30f;
885 continue;
886 }
887 four_partitions_find_best_combination_for_bitcount(combined_best_error,
888 formats_of_choice, qwt_bitcounts[i], &best_quantization_level, &best_quantization_level_mod, best_formats, &error_of_best_combination);
889 error_of_best_combination += qwt_errors[i];
890
891 errors_of_best_combination[i] = error_of_best_combination;
892 best_quantization_levels[i] = best_quantization_level;
893 best_quantization_levels_mod[i] = best_quantization_level_mod;
894 best_ep_formats[i][0] = best_formats[0];
895 best_ep_formats[i][1] = best_formats[1];
896 best_ep_formats[i][2] = best_formats[2];
897 best_ep_formats[i][3] = best_formats[3];
898 }
899 }
900
901 // finally, go through the results and pick the 4 best-looking modes.
902
903 int best_error_weights[4];
904
905 for (i = 0; i < 4; i++)
906 {
907 float best_ep_error = 1e30f;
908 int best_error_index = -1;
909 for (j = 0; j < MAX_WEIGHT_MODES; j++)
910 {
911 if (errors_of_best_combination[j] < best_ep_error && best_quantization_levels[j] >= 5)
912 {
913 best_ep_error = errors_of_best_combination[j];
914 best_error_index = j;
915 }
916 }
917 best_error_weights[i] = best_error_index;
918
919 if(best_error_index >= 0)
920 {
921 errors_of_best_combination[best_error_index] = 1e30f;
922 }
923 }
924
925 for (i = 0; i < 4; i++)
926 {
927 quantized_weight[i] = best_error_weights[i];
928 if (quantized_weight[i] >= 0)
929 {
930 quantization_level[i] = best_quantization_levels[best_error_weights[i]];
931 quantization_level_mod[i] = best_quantization_levels_mod[best_error_weights[i]];
932 for (j = 0; j < partition_count; j++)
933 {
934 partition_format_specifiers[i][j] = best_ep_formats[best_error_weights[i]][j];
935 }
936 }
937 }
938 }
939