1 /*----------------------------------------------------------------------------*/
2 /**
3 * This confidential and proprietary software may be used only as
4 * authorised by a licensing agreement from ARM Limited
5 * (C) COPYRIGHT 2011-2012 ARM Limited
6 * ALL RIGHTS RESERVED
7 *
8 * The entire notice above must be reproduced on all authorised
9 * copies and copies may only be made to the extent permitted
10 * by a licensing agreement from ARM Limited.
11 *
12 * @brief Compress a block of colors, expressed as a symbolic block, for ASTC.
13 */
14 /*----------------------------------------------------------------------------*/
15
16 #include "astc_codec_internals.h"
17
18 #include "softfloat.h"
19 #include <math.h>
20 #include <string.h>
21 #include <stdio.h>
22
23 #ifdef DEBUG_CAPTURE_NAN
24 #ifndef _GNU_SOURCE
25 #define _GNU_SOURCE
26 #endif
27
28 #include <fenv.h>
29 #endif
30
31 #include <stdio.h>
32
realign_weights(astc_decode_mode decode_mode,int xdim,int ydim,int zdim,const imageblock * blk,const error_weight_block * ewb,symbolic_compressed_block * scb,uint8_t * weight_set8,uint8_t * plane2_weight_set8)33 int realign_weights(astc_decode_mode decode_mode,
34 int xdim, int ydim, int zdim, const imageblock * blk, const error_weight_block * ewb, symbolic_compressed_block * scb, uint8_t * weight_set8, uint8_t * plane2_weight_set8)
35 {
36 int i, j;
37
38 // get the appropriate partition descriptor.
39 int partition_count = scb->partition_count;
40 const partition_info *pt = get_partition_table(xdim, ydim, zdim, partition_count);
41 pt += scb->partition_index;
42
43 // get the appropriate block descriptor
44 const block_size_descriptor *bsd = get_block_size_descriptor(xdim, ydim, zdim);
45 const decimation_table *const *ixtab2 = bsd->decimation_tables;
46
47 const decimation_table *it = ixtab2[bsd->block_modes[scb->block_mode].decimation_mode];
48
49 int is_dual_plane = bsd->block_modes[scb->block_mode].is_dual_plane;
50
51 // get quantization-parameters
52 int weight_quantization_level = bsd->block_modes[scb->block_mode].quantization_mode;
53
54
55 // decode the color endpoints
56 ushort4 color_endpoint0[4];
57 ushort4 color_endpoint1[4];
58 int rgb_hdr[4];
59 int alpha_hdr[4];
60 int nan_endpoint[4];
61
62
63 for (i = 0; i < partition_count; i++)
64 unpack_color_endpoints(decode_mode,
65 scb->color_formats[i], scb->color_quantization_level, scb->color_values[i], &rgb_hdr[i], &alpha_hdr[i], &nan_endpoint[i], &(color_endpoint0[i]), &(color_endpoint1[i]));
66
67
68 float uq_plane1_weights[MAX_WEIGHTS_PER_BLOCK];
69 float uq_plane2_weights[MAX_WEIGHTS_PER_BLOCK];
70 int weight_count = it->num_weights;
71
72 // read and unquantize the weights.
73
74 const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quantization_level]);
75
76 for (i = 0; i < weight_count; i++)
77 {
78 uq_plane1_weights[i] = qat->unquantized_value_flt[weight_set8[i]];
79 }
80 if (is_dual_plane)
81 {
82 for (i = 0; i < weight_count; i++)
83 uq_plane2_weights[i] = qat->unquantized_value_flt[plane2_weight_set8[i]];
84 }
85
86
87 int plane2_color_component = is_dual_plane ? scb->plane2_color_component : -1;
88
89 // for each weight, unquantize the weight, use it to compute a color and a color error.
90 // then, increment the weight until the color error stops decreasing
91 // then, decrement the weight until the color error stops increasing
92
93 #define COMPUTE_ERROR( errorvar ) \
94 errorvar = 0.0f; \
95 for(j=0;j<texels_to_evaluate;j++) \
96 { \
97 int texel = it->weight_texel[i][j]; \
98 int partition = pt->partition_of_texel[texel]; \
99 float plane1_weight = compute_value_of_texel_flt( texel, it, uq_plane1_weights ); \
100 float plane2_weight = 0.0f; \
101 if( is_dual_plane ) \
102 plane2_weight = compute_value_of_texel_flt( texel, it, uq_plane2_weights ); \
103 int int_plane1_weight = static_cast<int>(floor( plane1_weight*64.0f + 0.5f ) ); \
104 int int_plane2_weight = static_cast<int>(floor( plane2_weight*64.0f + 0.5f ) ); \
105 ushort4 lrp_color = lerp_color_int( \
106 decode_mode, \
107 color_endpoint0[partition], \
108 color_endpoint1[partition], \
109 int_plane1_weight, \
110 int_plane2_weight, \
111 plane2_color_component ); \
112 float4 color = float4( lrp_color.x, lrp_color.y, lrp_color.z, lrp_color.w ); \
113 float4 origcolor = float4( \
114 blk->work_data[4*texel], \
115 blk->work_data[4*texel+1], \
116 blk->work_data[4*texel+2], \
117 blk->work_data[4*texel+3] ); \
118 float4 error_weight = ewb->error_weights[texel]; \
119 float4 colordiff = color - origcolor; \
120 errorvar += dot( colordiff*colordiff, error_weight ); \
121 }
122
123
124 int adjustments = 0;
125
126 for (i = 0; i < weight_count; i++)
127 {
128 int current_wt = weight_set8[i];
129 int texels_to_evaluate = it->weight_num_texels[i];
130
131 float current_error;
132
133 COMPUTE_ERROR(current_error);
134
135 // increment until error starts increasing.
136 while (1)
137 {
138 int next_wt = qat->next_quantized_value[current_wt];
139 if (next_wt == current_wt)
140 break;
141 uq_plane1_weights[i] = qat->unquantized_value_flt[next_wt];
142 float next_error;
143 COMPUTE_ERROR(next_error);
144 if (next_error < current_error)
145 {
146 // succeeded, increment the weight
147 current_wt = next_wt;
148 current_error = next_error;
149 adjustments++;
150 }
151 else
152 {
153 // failed, back out the attempted increment
154 uq_plane1_weights[i] = qat->unquantized_value_flt[current_wt];
155 break;
156 }
157 }
158 // decrement until error starts increasing
159 while (1)
160 {
161 int prev_wt = qat->prev_quantized_value[current_wt];
162 if (prev_wt == current_wt)
163 break;
164 uq_plane1_weights[i] = qat->unquantized_value_flt[prev_wt];
165 float prev_error;
166 COMPUTE_ERROR(prev_error);
167 if (prev_error < current_error)
168 {
169 // succeeded, decrement the weight
170 current_wt = prev_wt;
171 current_error = prev_error;
172 adjustments++;
173 }
174 else
175 {
176 // failed, back out the attempted decrement
177 uq_plane1_weights[i] = qat->unquantized_value_flt[current_wt];
178 break;
179 }
180 }
181
182 weight_set8[i] = current_wt;
183 }
184
185 if (!is_dual_plane)
186 return adjustments;
187
188 // processing of the second plane of weights
189 for (i = 0; i < weight_count; i++)
190 {
191 int current_wt = plane2_weight_set8[i];
192 int texels_to_evaluate = it->weight_num_texels[i];
193
194 float current_error;
195
196 COMPUTE_ERROR(current_error);
197
198 // increment until error starts increasing.
199 while (1)
200 {
201 int next_wt = qat->next_quantized_value[current_wt];
202 if (next_wt == current_wt)
203 break;
204 uq_plane2_weights[i] = qat->unquantized_value_flt[next_wt];
205 float next_error;
206 COMPUTE_ERROR(next_error);
207 if (next_error < current_error)
208 {
209 // succeeded, increment the weight
210 current_wt = next_wt;
211 current_error = next_error;
212 adjustments++;
213 }
214 else
215 {
216 // failed, back out the attempted increment
217 uq_plane2_weights[i] = qat->unquantized_value_flt[current_wt];
218 break;
219 }
220 }
221 // decrement until error starts increasing
222 while (1)
223 {
224 int prev_wt = qat->prev_quantized_value[current_wt];
225 if (prev_wt == current_wt)
226 break;
227 uq_plane2_weights[i] = qat->unquantized_value_flt[prev_wt];
228 float prev_error;
229 COMPUTE_ERROR(prev_error);
230 if (prev_error < current_error)
231 {
232 // succeeded, decrement the weight
233 current_wt = prev_wt;
234 current_error = prev_error;
235 adjustments++;
236 }
237 else
238 {
239 // failed, back out the attempted decrement
240 uq_plane2_weights[i] = qat->unquantized_value_flt[current_wt];
241 break;
242 }
243 }
244
245 plane2_weight_set8[i] = current_wt;
246 }
247
248 return adjustments;
249 }
250
251 /*
252 function for compressing a block symbolically, given that we have already decided on a partition
253 */
254
255
256
compress_symbolic_block_fixed_partition_1_plane(astc_decode_mode decode_mode,float mode_cutoff,int max_refinement_iters,int xdim,int ydim,int zdim,int partition_count,int partition_index,const imageblock * blk,const error_weight_block * ewb,symbolic_compressed_block * scb,compress_fixed_partition_buffers * tmpbuf)257 static void compress_symbolic_block_fixed_partition_1_plane(astc_decode_mode decode_mode,
258 float mode_cutoff,
259 int max_refinement_iters,
260 int xdim, int ydim, int zdim,
261 int partition_count, int partition_index,
262 const imageblock * blk, const error_weight_block * ewb, symbolic_compressed_block * scb,
263 compress_fixed_partition_buffers * tmpbuf)
264 {
265 int i, j, k;
266
267
268 static const int free_bits_for_partition_count[5] = { 0, 115 - 4, 111 - 4 - PARTITION_BITS, 108 - 4 - PARTITION_BITS, 105 - 4 - PARTITION_BITS };
269
270 const partition_info *pi = get_partition_table(xdim, ydim, zdim, partition_count);
271 pi += partition_index;
272
273 // first, compute ideal weights and endpoint colors, under thre assumption that
274 // there is no quantization or decimation going on.
275 endpoints_and_weights *ei = tmpbuf->ei1;
276 endpoints_and_weights *eix = tmpbuf->eix1;
277 compute_endpoints_and_ideal_weights_1_plane(xdim, ydim, zdim, pi, blk, ewb, ei);
278
279 // next, compute ideal weights and endpoint colors for every decimation.
280 const block_size_descriptor *bsd = get_block_size_descriptor(xdim, ydim, zdim);
281 const decimation_table *const *ixtab2 = bsd->decimation_tables;
282 // int block_mode_count = bsd->single_plane_block_mode_count;
283
284
285 float *decimated_quantized_weights = tmpbuf->decimated_quantized_weights;
286 float *decimated_weights = tmpbuf->decimated_weights;
287 float *flt_quantized_decimated_quantized_weights = tmpbuf->flt_quantized_decimated_quantized_weights;
288 uint8_t *u8_quantized_decimated_quantized_weights = tmpbuf->u8_quantized_decimated_quantized_weights;
289
290 // for each decimation mode, compute an ideal set of weights
291 // (that is, weights computed with the assumption that they are not quantized)
292 for (i = 0; i < MAX_DECIMATION_MODES; i++)
293 {
294 if (bsd->permit_encode[i] == 0 || bsd->decimation_mode_maxprec_1plane[i] < 0 || bsd->decimation_mode_percentile[i] > mode_cutoff)
295 continue;
296 eix[i] = *ei;
297 compute_ideal_weights_for_decimation_table(&(eix[i]), ixtab2[i], decimated_quantized_weights + i * MAX_WEIGHTS_PER_BLOCK, decimated_weights + i * MAX_WEIGHTS_PER_BLOCK);
298
299 }
300
301 // compute maximum colors for the endpoints and ideal weights.
302 // for each endpoint-and-ideal-weight pair, compute the smallest weight value
303 // that will result in a color value greater than 1.
304
305
306 float4 min_ep = float4(10, 10, 10, 10);
307 for (i = 0; i < partition_count; i++)
308 {
309 #ifdef DEBUG_CAPTURE_NAN
310 fedisableexcept(FE_DIVBYZERO | FE_INVALID);
311 #endif
312
313 float4 ep = (float4(1, 1, 1, 1) - ei->ep.endpt0[i]) / (ei->ep.endpt1[i] - ei->ep.endpt0[i]);
314 if (ep.x > 0.5f && ep.x < min_ep.x)
315 min_ep.x = ep.x;
316 if (ep.y > 0.5f && ep.y < min_ep.y)
317 min_ep.y = ep.y;
318 if (ep.z > 0.5f && ep.z < min_ep.z)
319 min_ep.z = ep.z;
320 if (ep.w > 0.5f && ep.w < min_ep.w)
321 min_ep.w = ep.w;
322
323 #ifdef DEBUG_CAPTURE_NAN
324 feenableexcept(FE_DIVBYZERO | FE_INVALID);
325 #endif
326 }
327
328 float min_wt_cutoff = MIN(MIN(min_ep.x, min_ep.y), MIN(min_ep.z, min_ep.w));
329
330 // for each mode, use the angular method to compute a shift.
331 float weight_low_value[MAX_WEIGHT_MODES];
332 float weight_high_value[MAX_WEIGHT_MODES];
333
334 compute_angular_endpoints_1plane(mode_cutoff, bsd, decimated_quantized_weights, decimated_weights, weight_low_value, weight_high_value);
335
336 // for each mode (which specifies a decimation and a quantization):
337 // * compute number of bits needed for the quantized weights.
338 // * generate an optimized set of quantized weights.
339 // * compute quantization errors for the mode.
340
341 int qwt_bitcounts[MAX_WEIGHT_MODES];
342 float qwt_errors[MAX_WEIGHT_MODES];
343
344 for (i = 0; i < MAX_WEIGHT_MODES; i++)
345 {
346 if (bsd->block_modes[i].permit_encode == 0 || bsd->block_modes[i].is_dual_plane != 0 || bsd->block_modes[i].percentile > mode_cutoff)
347 {
348 qwt_errors[i] = 1e38f;
349 continue;
350 }
351 if (weight_high_value[i] > 1.02f * min_wt_cutoff)
352 weight_high_value[i] = 1.0f;
353
354 int decimation_mode = bsd->block_modes[i].decimation_mode;
355 if (bsd->decimation_mode_percentile[decimation_mode] > mode_cutoff)
356 ASTC_CODEC_INTERNAL_ERROR;
357
358
359 // compute weight bitcount for the mode
360 int bits_used_by_weights = compute_ise_bitcount(ixtab2[decimation_mode]->num_weights,
361 (quantization_method) bsd->block_modes[i].quantization_mode);
362 int bitcount = free_bits_for_partition_count[partition_count] - bits_used_by_weights;
363 if (bitcount <= 0 || bits_used_by_weights < 24 || bits_used_by_weights > 96)
364 {
365 qwt_errors[i] = 1e38f;
366 continue;
367 }
368 qwt_bitcounts[i] = bitcount;
369
370
371 // then, generate the optimized set of weights for the weight mode.
372 compute_ideal_quantized_weights_for_decimation_table(&(eix[decimation_mode]),
373 ixtab2[decimation_mode],
374 weight_low_value[i], weight_high_value[i],
375 decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * decimation_mode,
376 flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i,
377 u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i,
378 bsd->block_modes[i].quantization_mode);
379
380 // then, compute weight-errors for the weight mode.
381 qwt_errors[i] = compute_error_of_weight_set(&(eix[decimation_mode]), ixtab2[decimation_mode], flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i);
382
383 #ifdef DEBUG_PRINT_DIAGNOSTICS
384 if (print_diagnostics)
385 printf("Block mode %d -> weight error = %f\n", i, qwt_errors[i]);
386 #endif
387 }
388
389 // for each weighting mode, determine the optimal combination of color endpoint encodings
390 // and weight encodings; return results for the 4 best-looking modes.
391
392 int partition_format_specifiers[4][4];
393 int quantized_weight[4];
394 int color_quantization_level[4];
395 int color_quantization_level_mod[4];
396 determine_optimal_set_of_endpoint_formats_to_use(xdim, ydim, zdim, pi, blk, ewb, &(ei->ep), -1, // used to flag that we are in single-weight mode
397 qwt_bitcounts, qwt_errors, partition_format_specifiers, quantized_weight, color_quantization_level, color_quantization_level_mod);
398
399
400 // then iterate over the 4 believed-to-be-best modes to find out which one is
401 // actually best.
402 for (i = 0; i < 4; i++)
403 {
404 uint8_t *u8_weight_src;
405 int weights_to_copy;
406
407 if (quantized_weight[i] < 0)
408 {
409 scb->error_block = 1;
410 scb++;
411 continue;
412 }
413
414 int decimation_mode = bsd->block_modes[quantized_weight[i]].decimation_mode;
415 int weight_quantization_mode = bsd->block_modes[quantized_weight[i]].quantization_mode;
416 const decimation_table *it = ixtab2[decimation_mode];
417
418 #ifdef DEBUG_PRINT_DIAGNOSTICS
419 if (print_diagnostics)
420 {
421 printf("Selected mode = %d\n", quantized_weight[i]);
422 printf("Selected decimation mode = %d\n", decimation_mode);
423 printf("Selected weight-quantization mode = %d\n", weight_quantization_mode);
424 }
425 #endif
426
427 u8_weight_src = u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * quantized_weight[i];
428
429 weights_to_copy = it->num_weights;
430
431 // recompute the ideal color endpoints before storing them.
432 float4 rgbs_colors[4];
433 float4 rgbo_colors[4];
434 float2 lum_intervals[4];
435
436 int l;
437 for (l = 0; l < max_refinement_iters; l++)
438 {
439 recompute_ideal_colors(xdim, ydim, zdim, weight_quantization_mode, &(eix[decimation_mode].ep), rgbs_colors, rgbo_colors, lum_intervals, u8_weight_src, NULL, -1, pi, it, blk, ewb);
440
441 // quantize the chosen color
442
443 // store the colors for the block
444 for (j = 0; j < partition_count; j++)
445 {
446 scb->color_formats[j] = pack_color_endpoints(decode_mode,
447 eix[decimation_mode].ep.endpt0[j],
448 eix[decimation_mode].ep.endpt1[j],
449 rgbs_colors[j], rgbo_colors[j], lum_intervals[j], partition_format_specifiers[i][j], scb->color_values[j], color_quantization_level[i]);
450 }
451
452
453 // if all the color endpoint modes are the same, we get a few more
454 // bits to store colors; let's see if we can take advantage of this:
455 // requantize all the colors and see if the endpoint modes remain the same;
456 // if they do, then exploit it.
457 scb->color_formats_matched = 0;
458
459 if ((partition_count >= 2 && scb->color_formats[0] == scb->color_formats[1]
460 && color_quantization_level != color_quantization_level_mod)
461 && (partition_count == 2 || (scb->color_formats[0] == scb->color_formats[2] && (partition_count == 3 || (scb->color_formats[0] == scb->color_formats[3])))))
462 {
463 int colorvals[4][12];
464 int color_formats_mod[4];
465 for (j = 0; j < partition_count; j++)
466 {
467 color_formats_mod[j] = pack_color_endpoints(decode_mode,
468 eix[decimation_mode].ep.endpt0[j],
469 eix[decimation_mode].ep.endpt1[j],
470 rgbs_colors[j], rgbo_colors[j], lum_intervals[j], partition_format_specifiers[i][j], colorvals[j], color_quantization_level_mod[i]);
471 }
472 if (color_formats_mod[0] == color_formats_mod[1]
473 && (partition_count == 2 || (color_formats_mod[0] == color_formats_mod[2] && (partition_count == 3 || (color_formats_mod[0] == color_formats_mod[3])))))
474 {
475 scb->color_formats_matched = 1;
476 for (j = 0; j < 4; j++)
477 for (k = 0; k < 12; k++)
478 scb->color_values[j][k] = colorvals[j][k];
479 for (j = 0; j < 4; j++)
480 scb->color_formats[j] = color_formats_mod[j];
481 }
482 }
483
484
485 // store header fields
486 scb->partition_count = partition_count;
487 scb->partition_index = partition_index;
488 scb->color_quantization_level = scb->color_formats_matched ? color_quantization_level_mod[i] : color_quantization_level[i];
489 scb->block_mode = quantized_weight[i];
490 scb->error_block = 0;
491
492 if (scb->color_quantization_level < 4)
493 {
494 scb->error_block = 1; // should never happen, but cannot prove it impossible.
495 }
496
497 // perform a final pass over the weights to try to improve them.
498 int adjustments = realign_weights(decode_mode,
499 xdim, ydim, zdim,
500 blk, ewb, scb,
501 u8_weight_src,
502 NULL);
503
504 if (adjustments == 0)
505 break;
506 }
507
508 for (j = 0; j < weights_to_copy; j++)
509 scb->plane1_weights[j] = u8_weight_src[j];
510
511 scb++;
512 }
513
514 }
515
516
517
518
519
520
compress_symbolic_block_fixed_partition_2_planes(astc_decode_mode decode_mode,float mode_cutoff,int max_refinement_iters,int xdim,int ydim,int zdim,int partition_count,int partition_index,int separate_component,const imageblock * blk,const error_weight_block * ewb,symbolic_compressed_block * scb,compress_fixed_partition_buffers * tmpbuf)521 static void compress_symbolic_block_fixed_partition_2_planes(astc_decode_mode decode_mode,
522 float mode_cutoff,
523 int max_refinement_iters,
524 int xdim, int ydim, int zdim,
525 int partition_count, int partition_index,
526 int separate_component, const imageblock * blk, const error_weight_block * ewb,
527 symbolic_compressed_block * scb,
528 compress_fixed_partition_buffers * tmpbuf)
529 {
530 int i, j, k;
531
532 static const int free_bits_for_partition_count[5] =
533 { 0, 113 - 4, 109 - 4 - PARTITION_BITS, 106 - 4 - PARTITION_BITS, 103 - 4 - PARTITION_BITS };
534
535 const partition_info *pi = get_partition_table(xdim, ydim, zdim, partition_count);
536 pi += partition_index;
537
538 // first, compute ideal weights and endpoint colors
539 endpoints_and_weights *ei1 = tmpbuf->ei1;
540 endpoints_and_weights *ei2 = tmpbuf->ei2;
541 endpoints_and_weights *eix1 = tmpbuf->eix1;
542 endpoints_and_weights *eix2 = tmpbuf->eix2;
543 compute_endpoints_and_ideal_weights_2_planes(xdim, ydim, zdim, pi, blk, ewb, separate_component, ei1, ei2);
544
545 // next, compute ideal weights and endpoint colors for every decimation.
546 const block_size_descriptor *bsd = get_block_size_descriptor(xdim, ydim, zdim);
547 const decimation_table *const *ixtab2 = bsd->decimation_tables;
548
549
550 float *decimated_quantized_weights = tmpbuf->decimated_quantized_weights;
551 float *decimated_weights = tmpbuf->decimated_weights;
552 float *flt_quantized_decimated_quantized_weights = tmpbuf->flt_quantized_decimated_quantized_weights;
553 uint8_t *u8_quantized_decimated_quantized_weights = tmpbuf->u8_quantized_decimated_quantized_weights;
554
555 // for each decimation mode, compute an ideal set of weights
556 for (i = 0; i < MAX_DECIMATION_MODES; i++)
557 {
558 if (bsd->permit_encode[i] == 0 || bsd->decimation_mode_maxprec_2planes[i] < 0 || bsd->decimation_mode_percentile[i] > mode_cutoff)
559 continue;
560
561 eix1[i] = *ei1;
562 eix2[i] = *ei2;
563 compute_ideal_weights_for_decimation_table(&(eix1[i]), ixtab2[i], decimated_quantized_weights + (2 * i) * MAX_WEIGHTS_PER_BLOCK, decimated_weights + (2 * i) * MAX_WEIGHTS_PER_BLOCK);
564 compute_ideal_weights_for_decimation_table(&(eix2[i]), ixtab2[i], decimated_quantized_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK, decimated_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK);
565 }
566
567 // compute maximum colors for the endpoints and ideal weights.
568 // for each endpoint-and-ideal-weight pair, compute the smallest weight value
569 // that will result in a color value greater than 1.
570
571 float4 min_ep1 = float4(10, 10, 10, 10);
572 float4 min_ep2 = float4(10, 10, 10, 10);
573 for (i = 0; i < partition_count; i++)
574 {
575
576 #ifdef DEBUG_CAPTURE_NAN
577 fedisableexcept(FE_DIVBYZERO | FE_INVALID);
578 #endif
579
580 float4 ep1 = (float4(1, 1, 1, 1) - ei1->ep.endpt0[i]) / (ei1->ep.endpt1[i] - ei1->ep.endpt0[i]);
581 if (ep1.x > 0.5f && ep1.x < min_ep1.x)
582 min_ep1.x = ep1.x;
583 if (ep1.y > 0.5f && ep1.y < min_ep1.y)
584 min_ep1.y = ep1.y;
585 if (ep1.z > 0.5f && ep1.z < min_ep1.z)
586 min_ep1.z = ep1.z;
587 if (ep1.w > 0.5f && ep1.w < min_ep1.w)
588 min_ep1.w = ep1.w;
589 float4 ep2 = (float4(1, 1, 1, 1) - ei2->ep.endpt0[i]) / (ei2->ep.endpt1[i] - ei2->ep.endpt0[i]);
590 if (ep2.x > 0.5f && ep2.x < min_ep2.x)
591 min_ep2.x = ep2.x;
592 if (ep2.y > 0.5f && ep2.y < min_ep2.y)
593 min_ep2.y = ep2.y;
594 if (ep2.z > 0.5f && ep2.z < min_ep2.z)
595 min_ep2.z = ep2.z;
596 if (ep2.w > 0.5f && ep2.w < min_ep2.w)
597 min_ep2.w = ep2.w;
598
599 #ifdef DEBUG_CAPTURE_NAN
600 feenableexcept(FE_DIVBYZERO | FE_INVALID);
601 #endif
602 }
603
604 float min_wt_cutoff1, min_wt_cutoff2;
605 switch (separate_component)
606 {
607 case 0:
608 min_wt_cutoff2 = min_ep2.x;
609 min_ep1.x = 1e30f;
610 break;
611 case 1:
612 min_wt_cutoff2 = min_ep2.y;
613 min_ep1.y = 1e30f;
614 break;
615 case 2:
616 min_wt_cutoff2 = min_ep2.z;
617 min_ep1.z = 1e30f;
618 break;
619 case 3:
620 min_wt_cutoff2 = min_ep2.w;
621 min_ep1.w = 1e30f;
622 break;
623 default:
624 min_wt_cutoff2 = 1e30f;
625 }
626
627 min_wt_cutoff1 = MIN(MIN(min_ep1.x, min_ep1.y), MIN(min_ep1.z, min_ep1.w));
628
629 float weight_low_value1[MAX_WEIGHT_MODES];
630 float weight_high_value1[MAX_WEIGHT_MODES];
631 float weight_low_value2[MAX_WEIGHT_MODES];
632 float weight_high_value2[MAX_WEIGHT_MODES];
633
634 compute_angular_endpoints_2planes(mode_cutoff, bsd, decimated_quantized_weights, decimated_weights, weight_low_value1, weight_high_value1, weight_low_value2, weight_high_value2);
635
636 // for each mode (which specifies a decimation and a quantization):
637 // * generate an optimized set of quantized weights.
638 // * compute quantization errors for each mode
639 // * compute number of bits needed for the quantized weights.
640
641 int qwt_bitcounts[MAX_WEIGHT_MODES];
642 float qwt_errors[MAX_WEIGHT_MODES];
643 for (i = 0; i < MAX_WEIGHT_MODES; i++)
644 {
645 if (bsd->block_modes[i].permit_encode == 0 || bsd->block_modes[i].is_dual_plane != 1 || bsd->block_modes[i].percentile > mode_cutoff)
646 {
647 qwt_errors[i] = 1e38f;
648 continue;
649 }
650 int decimation_mode = bsd->block_modes[i].decimation_mode;
651
652 if (weight_high_value1[i] > 1.02f * min_wt_cutoff1)
653 weight_high_value1[i] = 1.0f;
654 if (weight_high_value2[i] > 1.02f * min_wt_cutoff2)
655 weight_high_value2[i] = 1.0f;
656
657 // compute weight bitcount for the mode
658 int bits_used_by_weights = compute_ise_bitcount(2 * ixtab2[decimation_mode]->num_weights,
659 (quantization_method) bsd->block_modes[i].quantization_mode);
660 int bitcount = free_bits_for_partition_count[partition_count] - bits_used_by_weights;
661 if (bitcount <= 0 || bits_used_by_weights < 24 || bits_used_by_weights > 96)
662 {
663 qwt_errors[i] = 1e38f;
664 continue;
665 }
666 qwt_bitcounts[i] = bitcount;
667
668
669 // then, generate the optimized set of weights for the mode.
670 compute_ideal_quantized_weights_for_decimation_table(&(eix1[decimation_mode]),
671 ixtab2[decimation_mode],
672 weight_low_value1[i],
673 weight_high_value1[i],
674 decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * decimation_mode),
675 flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i),
676 u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i), bsd->block_modes[i].quantization_mode);
677 compute_ideal_quantized_weights_for_decimation_table(&(eix2[decimation_mode]),
678 ixtab2[decimation_mode],
679 weight_low_value2[i],
680 weight_high_value2[i],
681 decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * decimation_mode + 1),
682 flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1),
683 u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1), bsd->block_modes[i].quantization_mode);
684
685
686 // then, compute quantization errors for the block mode.
687 qwt_errors[i] =
688 compute_error_of_weight_set(&(eix1[decimation_mode]),
689 ixtab2[decimation_mode],
690 flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i))
691 + compute_error_of_weight_set(&(eix2[decimation_mode]), ixtab2[decimation_mode], flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1));
692 }
693
694
695 // decide the optimal combination of color endpoint encodings and weight encoodings.
696 int partition_format_specifiers[4][4];
697 int quantized_weight[4];
698 int color_quantization_level[4];
699 int color_quantization_level_mod[4];
700
701 endpoints epm;
702 merge_endpoints(&(ei1->ep), &(ei2->ep), separate_component, &epm);
703
704 determine_optimal_set_of_endpoint_formats_to_use(xdim, ydim, zdim,
705 pi,
706 blk,
707 ewb,
708 &epm, separate_component, qwt_bitcounts, qwt_errors, partition_format_specifiers, quantized_weight, color_quantization_level, color_quantization_level_mod);
709
710 for (i = 0; i < 4; i++)
711 {
712 if (quantized_weight[i] < 0)
713 {
714 scb->error_block = 1;
715 scb++;
716 continue;
717 }
718
719 uint8_t *u8_weight1_src;
720 uint8_t *u8_weight2_src;
721 int weights_to_copy;
722
723 int decimation_mode = bsd->block_modes[quantized_weight[i]].decimation_mode;
724 int weight_quantization_mode = bsd->block_modes[quantized_weight[i]].quantization_mode;
725 const decimation_table *it = ixtab2[decimation_mode];
726
727 u8_weight1_src = u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * quantized_weight[i]);
728 u8_weight2_src = u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * quantized_weight[i] + 1);
729
730
731 weights_to_copy = it->num_weights;
732
733 // recompute the ideal color endpoints before storing them.
734 merge_endpoints(&(eix1[decimation_mode].ep), &(eix2[decimation_mode].ep), separate_component, &epm);
735
736 float4 rgbs_colors[4];
737 float4 rgbo_colors[4];
738 float2 lum_intervals[4];
739
740 int l;
741 for (l = 0; l < max_refinement_iters; l++)
742 {
743 recompute_ideal_colors(xdim, ydim, zdim, weight_quantization_mode, &epm, rgbs_colors, rgbo_colors, lum_intervals, u8_weight1_src, u8_weight2_src, separate_component, pi, it, blk, ewb);
744
745 // store the colors for the block
746 for (j = 0; j < partition_count; j++)
747 {
748 scb->color_formats[j] = pack_color_endpoints(decode_mode,
749 epm.endpt0[j],
750 epm.endpt1[j],
751 rgbs_colors[j], rgbo_colors[j], lum_intervals[j], partition_format_specifiers[i][j], scb->color_values[j], color_quantization_level[i]);
752 }
753 scb->color_formats_matched = 0;
754
755 if ((partition_count >= 2 && scb->color_formats[0] == scb->color_formats[1]
756 && color_quantization_level != color_quantization_level_mod)
757 && (partition_count == 2 || (scb->color_formats[0] == scb->color_formats[2] && (partition_count == 3 || (scb->color_formats[0] == scb->color_formats[3])))))
758 {
759 int colorvals[4][12];
760 int color_formats_mod[4];
761 for (j = 0; j < partition_count; j++)
762 {
763 color_formats_mod[j] = pack_color_endpoints(decode_mode,
764 epm.endpt0[j],
765 epm.endpt1[j],
766 rgbs_colors[j], rgbo_colors[j], lum_intervals[j], partition_format_specifiers[i][j], colorvals[j], color_quantization_level_mod[i]);
767 }
768 if (color_formats_mod[0] == color_formats_mod[1]
769 && (partition_count == 2 || (color_formats_mod[0] == color_formats_mod[2] && (partition_count == 3 || (color_formats_mod[0] == color_formats_mod[3])))))
770 {
771 scb->color_formats_matched = 1;
772 for (j = 0; j < 4; j++)
773 for (k = 0; k < 12; k++)
774 scb->color_values[j][k] = colorvals[j][k];
775 for (j = 0; j < 4; j++)
776 scb->color_formats[j] = color_formats_mod[j];
777 }
778 }
779
780
781 // store header fields
782 scb->partition_count = partition_count;
783 scb->partition_index = partition_index;
784 scb->color_quantization_level = scb->color_formats_matched ? color_quantization_level_mod[i] : color_quantization_level[i];
785 scb->block_mode = quantized_weight[i];
786 scb->plane2_color_component = separate_component;
787 scb->error_block = 0;
788
789 if (scb->color_quantization_level < 4)
790 {
791 scb->error_block = 1; // should never happen, but cannot prove it impossible
792 }
793
794 int adjustments = realign_weights(decode_mode,
795 xdim, ydim, zdim,
796 blk, ewb, scb,
797 u8_weight1_src,
798 u8_weight2_src);
799
800 if (adjustments == 0)
801 break;
802 }
803
804 for (j = 0; j < weights_to_copy; j++)
805 {
806 scb->plane1_weights[j] = u8_weight1_src[j];
807 scb->plane2_weights[j] = u8_weight2_src[j];
808 }
809
810 scb++;
811 }
812
813 }
814
815
816
817
818
expand_block_artifact_suppression(int xdim,int ydim,int zdim,error_weighting_params * ewp)819 void expand_block_artifact_suppression(int xdim, int ydim, int zdim, error_weighting_params * ewp)
820 {
821 int x, y, z;
822 float centerpos_x = (xdim - 1) * 0.5f;
823 float centerpos_y = (ydim - 1) * 0.5f;
824 float centerpos_z = (zdim - 1) * 0.5f;
825 float *bef = ewp->block_artifact_suppression_expanded;
826
827 for (z = 0; z < zdim; z++)
828 for (y = 0; y < ydim; y++)
829 for (x = 0; x < xdim; x++)
830 {
831 float xdif = (x - centerpos_x) / xdim;
832 float ydif = (y - centerpos_y) / ydim;
833 float zdif = (z - centerpos_z) / zdim;
834
835 float wdif = 0.36f;
836 float dist = sqrt(xdif * xdif + ydif * ydif + zdif * zdif + wdif * wdif);
837 *bef = pow(dist, ewp->block_artifact_suppression);
838 bef++;
839 }
840 }
841
842
843
844 // Function to set error weights for each color component for each texel in a block.
845 // Returns the sum of all the error values set.
846
prepare_error_weight_block(const astc_codec_image * input_image,int xdim,int ydim,int zdim,const error_weighting_params * ewp,const imageblock * blk,error_weight_block * ewb,error_weight_block_orig * ewbo)847 float prepare_error_weight_block(const astc_codec_image * input_image,
848 int xdim, int ydim, int zdim, const error_weighting_params * ewp, const imageblock * blk, error_weight_block * ewb, error_weight_block_orig * ewbo)
849 {
850
851 int x, y, z;
852 int idx = 0;
853
854 int any_mean_stdev_weight =
855 ewp->rgb_base_weight != 1.0 || ewp->alpha_base_weight != 1.0 || ewp->rgb_mean_weight != 0.0 || ewp->rgb_stdev_weight != 0.0 || ewp->alpha_mean_weight != 0.0 || ewp->alpha_stdev_weight != 0.0;
856
857 float4 color_weights = float4(ewp->rgba_weights[0],
858 ewp->rgba_weights[1],
859 ewp->rgba_weights[2],
860 ewp->rgba_weights[3]);
861
862 ewb->contains_zeroweight_texels = 0;
863
864 for (z = 0; z < zdim; z++)
865 for (y = 0; y < ydim; y++)
866 for (x = 0; x < xdim; x++)
867 {
868 int xpos = x + blk->xpos;
869 int ypos = y + blk->ypos;
870 int zpos = z + blk->zpos;
871
872 if (xpos >= input_image->xsize || ypos >= input_image->ysize || zpos >= input_image->zsize)
873 {
874 float4 weights = float4(1e-11f, 1e-11f, 1e-11f, 1e-11f);
875 ewb->error_weights[idx] = weights;
876 ewb->contains_zeroweight_texels = 1;
877 }
878 else
879 {
880 float4 error_weight = float4(ewp->rgb_base_weight,
881 ewp->rgb_base_weight,
882 ewp->rgb_base_weight,
883 ewp->alpha_base_weight);
884
885 if (any_mean_stdev_weight)
886 {
887 float4 avg = input_averages[zpos][ypos][xpos];
888 if (avg.x < 6e-5f)
889 avg.x = 6e-5f;
890 if (avg.y < 6e-5f)
891 avg.y = 6e-5f;
892 if (avg.z < 6e-5f)
893 avg.z = 6e-5f;
894 if (avg.w < 6e-5f)
895 avg.w = 6e-5f;
896 /*
897 printf("avg: %f %f %f %f\n", avg.x, avg.y, avg.z, avg.w ); */
898 avg = avg * avg;
899
900 float4 variance = input_variances[zpos][ypos][xpos];
901 variance = variance * variance;
902
903 float favg = (avg.x + avg.y + avg.z) * (1.0f / 3.0f);
904 float fvar = (variance.x + variance.y + variance.z) * (1.0f / 3.0f);
905
906 float mixing = ewp->rgb_mean_and_stdev_mixing;
907 avg.xyz = float3(favg, favg, favg) * mixing + avg.xyz * (1.0f - mixing);
908 variance.xyz = float3(fvar, fvar, fvar) * mixing + variance.xyz * (1.0f - mixing);
909
910 float4 stdev = float4(sqrt(MAX(variance.x, 0.0f)),
911 sqrt(MAX(variance.y, 0.0f)),
912 sqrt(MAX(variance.z, 0.0f)),
913 sqrt(MAX(variance.w, 0.0f)));
914
915 avg.xyz = avg.xyz * ewp->rgb_mean_weight;
916 avg.w = avg.w * ewp->alpha_mean_weight;
917 stdev.xyz = stdev.xyz * ewp->rgb_stdev_weight;
918 stdev.w = stdev.w * ewp->alpha_stdev_weight;
919 error_weight = error_weight + avg + stdev;
920
921 error_weight = float4(1.0f, 1.0f, 1.0f, 1.0f) / error_weight;
922 }
923
924 if (ewp->ra_normal_angular_scale)
925 {
926 float x = (blk->orig_data[4 * idx] - 0.5f) * 2.0f;
927 float y = (blk->orig_data[4 * idx + 3] - 0.5f) * 2.0f;
928 float denom = 1.0f - x * x - y * y;
929 if (denom < 0.1f)
930 denom = 0.1f;
931 denom = 1.0f / denom;
932 error_weight.x *= 1.0f + x * x * denom;
933 error_weight.w *= 1.0f + y * y * denom;
934 }
935
936 if (ewp->enable_rgb_scale_with_alpha)
937 {
938 float alpha_scale;
939 if (ewp->alpha_radius != 0)
940 alpha_scale = input_alpha_averages[zpos][ypos][xpos];
941 else
942 alpha_scale = blk->orig_data[4 * idx + 3];
943 if (alpha_scale < 0.0001f)
944 alpha_scale = 0.0001f;
945 alpha_scale *= alpha_scale;
946 error_weight.xyz = error_weight.xyz * alpha_scale;
947 }
948 error_weight = error_weight * color_weights;
949 error_weight = error_weight * ewp->block_artifact_suppression_expanded[idx];
950
951 // if we perform a conversion from linear to sRGB, then we multiply
952 // the weight with the derivative of the linear->sRGB transform function.
953 if (perform_srgb_transform)
954 {
955 float r = blk->orig_data[4 * idx];
956 float g = blk->orig_data[4 * idx + 1];
957 float b = blk->orig_data[4 * idx + 2];
958 if (r < 0.0031308f)
959 r = 12.92f;
960 else
961 r = 0.4396f * pow(r, -0.58333f);
962 if (g < 0.0031308f)
963 g = 12.92f;
964 else
965 g = 0.4396f * pow(g, -0.58333f);
966 if (b < 0.0031308f)
967 b = 12.92f;
968 else
969 b = 0.4396f * pow(b, -0.58333f);
970 error_weight.x *= r;
971 error_weight.y *= g;
972 error_weight.z *= b;
973 }
974
975 /*
976 printf("%f %f %f %f\n", error_weight.x, error_weight.y, error_weight.z, error_weight.w );
977 */
978
979 // when we loaded the block to begin with, we applied a transfer function
980 // and computed the derivative of the transfer function. However, the
981 // error-weight computation so far is based on the original color values,
982 // not the transfer-function values. As such, we must multiply the
983 // error weights by the derivative of the inverse of the transfer function,
984 // which is equivalent to dividing by the derivative of the transfer
985 // function.
986
987 ewbo->error_weights[idx] = error_weight;
988
989 error_weight.x /= (blk->deriv_data[4 * idx] * blk->deriv_data[4 * idx] * 1e-10f);
990 error_weight.y /= (blk->deriv_data[4 * idx + 1] * blk->deriv_data[4 * idx + 1] * 1e-10f);
991 error_weight.z /= (blk->deriv_data[4 * idx + 2] * blk->deriv_data[4 * idx + 2] * 1e-10f);
992 error_weight.w /= (blk->deriv_data[4 * idx + 3] * blk->deriv_data[4 * idx + 3] * 1e-10f);
993
994 /*
995 printf("--> %f %f %f %f\n", error_weight.x, error_weight.y, error_weight.z, error_weight.w );
996 */
997
998 ewb->error_weights[idx] = error_weight;
999 if (dot(error_weight, float4(1, 1, 1, 1)) < 1e-10f)
1000 ewb->contains_zeroweight_texels = 1;
1001 }
1002 idx++;
1003 }
1004
1005 int i;
1006
1007 float4 error_weight_sum = float4(0, 0, 0, 0);
1008 int texels_per_block = xdim * ydim * zdim;
1009
1010 for (i = 0; i < texels_per_block; i++)
1011 {
1012 error_weight_sum = error_weight_sum + ewb->error_weights[i];
1013
1014 ewb->texel_weight_r[i] = ewb->error_weights[i].x;
1015 ewb->texel_weight_g[i] = ewb->error_weights[i].y;
1016 ewb->texel_weight_b[i] = ewb->error_weights[i].z;
1017 ewb->texel_weight_a[i] = ewb->error_weights[i].w;
1018
1019 ewb->texel_weight_rg[i] = (ewb->error_weights[i].x + ewb->error_weights[i].y) * 0.5f;
1020 ewb->texel_weight_rb[i] = (ewb->error_weights[i].x + ewb->error_weights[i].z) * 0.5f;
1021 ewb->texel_weight_gb[i] = (ewb->error_weights[i].y + ewb->error_weights[i].z) * 0.5f;
1022 ewb->texel_weight_ra[i] = (ewb->error_weights[i].x + ewb->error_weights[i].w) * 0.5f;
1023
1024 ewb->texel_weight_gba[i] = (ewb->error_weights[i].y + ewb->error_weights[i].z + ewb->error_weights[i].w) * 0.333333f;
1025 ewb->texel_weight_rba[i] = (ewb->error_weights[i].x + ewb->error_weights[i].z + ewb->error_weights[i].w) * 0.333333f;
1026 ewb->texel_weight_rga[i] = (ewb->error_weights[i].x + ewb->error_weights[i].y + ewb->error_weights[i].w) * 0.333333f;
1027 ewb->texel_weight_rgb[i] = (ewb->error_weights[i].x + ewb->error_weights[i].y + ewb->error_weights[i].z) * 0.333333f;
1028 ewb->texel_weight[i] = (ewb->error_weights[i].x + ewb->error_weights[i].y + ewb->error_weights[i].z + ewb->error_weights[i].w) * 0.25f;
1029 }
1030
1031 return dot(error_weight_sum, float4(1, 1, 1, 1));
1032 }
1033
1034
1035 /*
1036 functions to analyze block statistical properties:
1037 * simple properties: * mean * variance
1038 * covariance-matrix correllation coefficients
1039 */
1040
1041
1042 // compute averages and covariance matrices for 4 components
compute_covariance_matrix(int xdim,int ydim,int zdim,const imageblock * blk,const error_weight_block * ewb,mat4 * cov_matrix)1043 static void compute_covariance_matrix(int xdim, int ydim, int zdim, const imageblock * blk, const error_weight_block * ewb, mat4 * cov_matrix)
1044 {
1045 int i;
1046
1047 int texels_per_block = xdim * ydim * zdim;
1048
1049 float r_sum = 0.0f;
1050 float g_sum = 0.0f;
1051 float b_sum = 0.0f;
1052 float a_sum = 0.0f;
1053 float rr_sum = 0.0f;
1054 float gg_sum = 0.0f;
1055 float bb_sum = 0.0f;
1056 float aa_sum = 0.0f;
1057 float rg_sum = 0.0f;
1058 float rb_sum = 0.0f;
1059 float ra_sum = 0.0f;
1060 float gb_sum = 0.0f;
1061 float ga_sum = 0.0f;
1062 float ba_sum = 0.0f;
1063
1064 float weight_sum = 0.0f;
1065
1066 for (i = 0; i < texels_per_block; i++)
1067 {
1068 float weight = ewb->texel_weight[i];
1069 if (weight < 0.0f)
1070 ASTC_CODEC_INTERNAL_ERROR;
1071 weight_sum += weight;
1072 float r = blk->work_data[4 * i];
1073 float g = blk->work_data[4 * i + 1];
1074 float b = blk->work_data[4 * i + 2];
1075 float a = blk->work_data[4 * i + 3];
1076 r_sum += r * weight;
1077 rr_sum += r * (r * weight);
1078 rg_sum += g * (r * weight);
1079 rb_sum += b * (r * weight);
1080 ra_sum += a * (r * weight);
1081 g_sum += g * weight;
1082 gg_sum += g * (g * weight);
1083 gb_sum += b * (g * weight);
1084 ga_sum += a * (g * weight);
1085 b_sum += b * weight;
1086 bb_sum += b * (b * weight);
1087 ba_sum += a * (b * weight);
1088 a_sum += a * weight;
1089 aa_sum += a * (a * weight);
1090 }
1091
1092 float rpt = 1.0f / MAX(weight_sum, 1e-7f);
1093 float rs = r_sum;
1094 float gs = g_sum;
1095 float bs = b_sum;
1096 float as = a_sum;
1097
1098 cov_matrix->v[0] = float4(rr_sum - rs * rs * rpt, rg_sum - rs * gs * rpt, rb_sum - rs * bs * rpt, ra_sum - rs * as * rpt);
1099 cov_matrix->v[1] = float4(rg_sum - rs * gs * rpt, gg_sum - gs * gs * rpt, gb_sum - gs * bs * rpt, ga_sum - gs * as * rpt);
1100 cov_matrix->v[2] = float4(rb_sum - rs * bs * rpt, gb_sum - gs * bs * rpt, bb_sum - bs * bs * rpt, ba_sum - bs * as * rpt);
1101 cov_matrix->v[3] = float4(ra_sum - rs * as * rpt, ga_sum - gs * as * rpt, ba_sum - bs * as * rpt, aa_sum - as * as * rpt);
1102
1103 }
1104
1105
1106
prepare_block_statistics(int xdim,int ydim,int zdim,const imageblock * blk,const error_weight_block * ewb,int * is_normal_map,float * lowest_correl)1107 void prepare_block_statistics(int xdim, int ydim, int zdim, const imageblock * blk, const error_weight_block * ewb, int *is_normal_map, float *lowest_correl)
1108 {
1109 int i;
1110
1111 mat4 cov_matrix;
1112
1113 compute_covariance_matrix(xdim, ydim, zdim, blk, ewb, &cov_matrix);
1114
1115 // use the covariance matrix to compute
1116 // correllation coefficients
1117 float rr_var = cov_matrix.v[0].x;
1118 float gg_var = cov_matrix.v[1].y;
1119 float bb_var = cov_matrix.v[2].z;
1120 float aa_var = cov_matrix.v[3].w;
1121
1122 float rg_correlation = cov_matrix.v[0].y / sqrt(MAX(rr_var * gg_var, 1e-30f));
1123 float rb_correlation = cov_matrix.v[0].z / sqrt(MAX(rr_var * bb_var, 1e-30f));
1124 float ra_correlation = cov_matrix.v[0].w / sqrt(MAX(rr_var * aa_var, 1e-30f));
1125 float gb_correlation = cov_matrix.v[1].z / sqrt(MAX(gg_var * bb_var, 1e-30f));
1126 float ga_correlation = cov_matrix.v[1].w / sqrt(MAX(gg_var * aa_var, 1e-30f));
1127 float ba_correlation = cov_matrix.v[2].w / sqrt(MAX(bb_var * aa_var, 1e-30f));
1128
1129 if (astc_isnan(rg_correlation))
1130 rg_correlation = 1.0f;
1131 if (astc_isnan(rb_correlation))
1132 rb_correlation = 1.0f;
1133 if (astc_isnan(ra_correlation))
1134 ra_correlation = 1.0f;
1135 if (astc_isnan(gb_correlation))
1136 gb_correlation = 1.0f;
1137 if (astc_isnan(ga_correlation))
1138 ga_correlation = 1.0f;
1139 if (astc_isnan(ba_correlation))
1140 ba_correlation = 1.0f;
1141
1142 float lowest_correlation = MIN(fabs(rg_correlation), fabs(rb_correlation));
1143 lowest_correlation = MIN(lowest_correlation, fabs(ra_correlation));
1144 lowest_correlation = MIN(lowest_correlation, fabs(gb_correlation));
1145 lowest_correlation = MIN(lowest_correlation, fabs(ga_correlation));
1146 lowest_correlation = MIN(lowest_correlation, fabs(ba_correlation));
1147 *lowest_correl = lowest_correlation;
1148
1149 // compute a "normal-map" factor
1150 // this factor should be exactly 0.0 for a normal map, while it may be all over the
1151 // place for anything that is NOT a normal map. We can probably assume that a factor
1152 // of less than 0.2f represents a normal map.
1153
1154 float nf_sum = 0.0f;
1155
1156 int texels_per_block = xdim * ydim * zdim;
1157
1158 for (i = 0; i < texels_per_block; i++)
1159 {
1160 float3 val = float3(blk->orig_data[4 * i],
1161 blk->orig_data[4 * i + 1],
1162 blk->orig_data[4 * i + 2]);
1163 val = (val - float3(0.5f, 0.5f, 0.5f)) * 2.0f;
1164 float length_squared = dot(val, val);
1165 float nf = fabs(length_squared - 1.0f);
1166 nf_sum += nf;
1167 }
1168 float nf_avg = nf_sum / texels_per_block;
1169 *is_normal_map = nf_avg < 0.2;
1170 }
1171
1172
1173
1174
1175
compress_constant_color_block(int xdim,int ydim,int zdim,const imageblock * blk,const error_weight_block * ewb,symbolic_compressed_block * scb)1176 void compress_constant_color_block(int xdim, int ydim, int zdim, const imageblock * blk, const error_weight_block * ewb, symbolic_compressed_block * scb)
1177 {
1178 int texel_count = xdim * ydim * zdim;
1179 int i;
1180
1181 float4 color_sum = float4(0, 0, 0, 0);
1182 float4 color_weight_sum = float4(0, 0, 0, 0);
1183
1184 const float *clp = blk->work_data;
1185 for (i = 0; i < texel_count; i++)
1186 {
1187 float4 weights = ewb->error_weights[i];
1188 float4 color_data = float4(clp[4 * i], clp[4 * i + 1], clp[4 * i + 2], clp[4 * i + 3]);
1189 color_sum = color_sum + (color_data * weights);
1190 color_weight_sum = color_weight_sum + weights;
1191 }
1192
1193 float4 avg_color = color_sum / color_weight_sum;
1194
1195 int use_fp16 = blk->rgb_lns[0];
1196
1197 #ifdef DEBUG_PRINT_DIAGNOSTICS
1198 if (print_diagnostics)
1199 {
1200 printf("Averaged color: %f %f %f %f\n", avg_color.x, avg_color.y, avg_color.z, avg_color.w);
1201 }
1202 #endif
1203
1204 // convert the color
1205 if (blk->rgb_lns[0])
1206 {
1207 int avg_red = static_cast < int >(floor(avg_color.x + 0.5f));
1208 int avg_green = static_cast < int >(floor(avg_color.y + 0.5f));
1209 int avg_blue = static_cast < int >(floor(avg_color.z + 0.5f));
1210
1211 if (avg_red < 0)
1212 avg_red = 0;
1213 else if (avg_red > 65535)
1214 avg_red = 65535;
1215
1216 if (avg_green < 0)
1217 avg_green = 0;
1218 else if (avg_green > 65535)
1219 avg_green = 65535;
1220
1221 if (avg_blue < 0)
1222 avg_blue = 0;
1223 else if (avg_blue > 65535)
1224 avg_blue = 65535;
1225
1226 avg_color.x = sf16_to_float(lns_to_sf16(avg_red));
1227 avg_color.y = sf16_to_float(lns_to_sf16(avg_green));
1228 avg_color.z = sf16_to_float(lns_to_sf16(avg_blue));
1229 }
1230 else
1231 {
1232 avg_color.x *= (1.0f / 65535.0f);
1233 avg_color.y *= (1.0f / 65535.0f);
1234 avg_color.z *= (1.0f / 65535.0f);
1235 }
1236 if (blk->alpha_lns[0])
1237 {
1238 int avg_alpha = static_cast < int >(floor(avg_color.w + 0.5f));
1239
1240 if (avg_alpha < 0)
1241 avg_alpha = 0;
1242 else if (avg_alpha > 65535)
1243 avg_alpha = 65535;
1244
1245 avg_color.w = sf16_to_float(lns_to_sf16(avg_alpha));
1246 }
1247 else
1248 {
1249 avg_color.w *= (1.0f / 65535.0f);
1250 }
1251
1252 #ifdef DEBUG_PRINT_DIAGNOSTICS
1253 if (print_diagnostics)
1254 {
1255 printf("Averaged color: %f %f %f %f (%d)\n", avg_color.x, avg_color.y, avg_color.z, avg_color.w, use_fp16);
1256
1257 }
1258 #endif
1259
1260 if (use_fp16)
1261 {
1262 scb->error_block = 0;
1263 scb->block_mode = -1;
1264 scb->partition_count = 0;
1265 scb->constant_color[0] = float_to_sf16(avg_color.x, SF_NEARESTEVEN);
1266 scb->constant_color[1] = float_to_sf16(avg_color.y, SF_NEARESTEVEN);
1267 scb->constant_color[2] = float_to_sf16(avg_color.z, SF_NEARESTEVEN);
1268 scb->constant_color[3] = float_to_sf16(avg_color.w, SF_NEARESTEVEN);
1269 }
1270
1271 else
1272 {
1273 scb->error_block = 0;
1274 scb->block_mode = -2;
1275 scb->partition_count = 0;
1276 float red = avg_color.x;
1277 float green = avg_color.y;
1278 float blue = avg_color.z;
1279 float alpha = avg_color.w;
1280 if (red < 0)
1281 red = 0;
1282 else if (red > 1)
1283 red = 1;
1284 if (green < 0)
1285 green = 0;
1286 else if (green > 1)
1287 green = 1;
1288 if (blue < 0)
1289 blue = 0;
1290 else if (blue > 1)
1291 blue = 1;
1292 if (alpha < 0)
1293 alpha = 0;
1294 else if (alpha > 1)
1295 alpha = 1;
1296 scb->constant_color[0] = static_cast < int >(floor(red * 65535.0f + 0.5f));
1297 scb->constant_color[1] = static_cast < int >(floor(green * 65535.0f + 0.5f));
1298 scb->constant_color[2] = static_cast < int >(floor(blue * 65535.0f + 0.5f));
1299 scb->constant_color[3] = static_cast < int >(floor(alpha * 65535.0f + 0.5f));
1300 }
1301 }
1302
1303 int block_mode_histogram[2048];
1304
compress_symbolic_block(const astc_codec_image * input_image,astc_decode_mode decode_mode,int xdim,int ydim,int zdim,const error_weighting_params * ewp,const imageblock * blk,symbolic_compressed_block * scb,compress_symbolic_block_buffers * tmpbuf)1305 float compress_symbolic_block(const astc_codec_image * input_image,
1306 astc_decode_mode decode_mode, int xdim, int ydim, int zdim, const error_weighting_params * ewp, const imageblock * blk, symbolic_compressed_block * scb,
1307 compress_symbolic_block_buffers * tmpbuf)
1308 {
1309 int i, j;
1310 int xpos = blk->xpos;
1311 int ypos = blk->ypos;
1312 int zpos = blk->zpos;
1313
1314 int x, y, z;
1315
1316
1317 #ifdef DEBUG_PRINT_DIAGNOSTICS
1318 if (print_diagnostics)
1319 {
1320 printf("Diagnostics of block of dimension %d x %d x %d\n\n", xdim, ydim, zdim);
1321
1322 printf("XPos: %d YPos: %d ZPos: %d\n", xpos, ypos, zpos);
1323
1324 printf("Red-min: %f Red-max: %f\n", blk->red_min, blk->red_max);
1325 printf("Green-min: %f Green-max: %f\n", blk->green_min, blk->green_max);
1326 printf("Blue-min: %f Blue-max: %f\n", blk->blue_min, blk->blue_max);
1327 printf("Alpha-min: %f Alpha-max: %f\n", blk->alpha_min, blk->alpha_max);
1328 printf("Grayscale: %d\n", blk->grayscale);
1329
1330 for (z = 0; z < zdim; z++)
1331 for (y = 0; y < ydim; y++)
1332 for (x = 0; x < xdim; x++)
1333 {
1334 int idx = ((z * ydim + y) * xdim + x) * 4;
1335 printf("Texel (%d %d %d) : orig=< %g, %g, %g, %g >, work=< %g, %g, %g, %g >\n",
1336 x, y, z,
1337 blk->orig_data[idx],
1338 blk->orig_data[idx + 1], blk->orig_data[idx + 2], blk->orig_data[idx + 3], blk->work_data[idx], blk->work_data[idx + 1], blk->work_data[idx + 2], blk->work_data[idx + 3]);
1339 }
1340 printf("\n");
1341 }
1342 #endif
1343
1344
1345 if (blk->red_min == blk->red_max && blk->green_min == blk->green_max && blk->blue_min == blk->blue_max && blk->alpha_min == blk->alpha_max)
1346 {
1347
1348 // detected a constant-color block. Encode as FP16 if using HDR
1349 scb->error_block = 0;
1350
1351 if (rgb_force_use_of_hdr)
1352 {
1353 scb->block_mode = -1;
1354 scb->partition_count = 0;
1355 scb->constant_color[0] = float_to_sf16(blk->orig_data[0], SF_NEARESTEVEN);
1356 scb->constant_color[1] = float_to_sf16(blk->orig_data[1], SF_NEARESTEVEN);
1357 scb->constant_color[2] = float_to_sf16(blk->orig_data[2], SF_NEARESTEVEN);
1358 scb->constant_color[3] = float_to_sf16(blk->orig_data[3], SF_NEARESTEVEN);
1359 }
1360 else
1361 {
1362 // Encode as UNORM16 if NOT using HDR.
1363 scb->block_mode = -2;
1364 scb->partition_count = 0;
1365 float red = blk->orig_data[0];
1366 float green = blk->orig_data[1];
1367 float blue = blk->orig_data[2];
1368 float alpha = blk->orig_data[3];
1369 if (red < 0)
1370 red = 0;
1371 else if (red > 1)
1372 red = 1;
1373 if (green < 0)
1374 green = 0;
1375 else if (green > 1)
1376 green = 1;
1377 if (blue < 0)
1378 blue = 0;
1379 else if (blue > 1)
1380 blue = 1;
1381 if (alpha < 0)
1382 alpha = 0;
1383 else if (alpha > 1)
1384 alpha = 1;
1385 scb->constant_color[0] = (int)floor(red * 65535.0f + 0.5f);
1386 scb->constant_color[1] = (int)floor(green * 65535.0f + 0.5f);
1387 scb->constant_color[2] = (int)floor(blue * 65535.0f + 0.5f);
1388 scb->constant_color[3] = (int)floor(alpha * 65535.0f + 0.5f);
1389 }
1390
1391 #ifdef DEBUG_PRINT_DIAGNOSTICS
1392 if (print_diagnostics)
1393 {
1394 printf("Block is single-color <%4.4X %4.4X %4.4X %4.4X>\n", scb->constant_color[0], scb->constant_color[1], scb->constant_color[2], scb->constant_color[3]);
1395 }
1396 #endif
1397
1398 if (print_tile_errors)
1399 printf("0\n");
1400
1401 physical_compressed_block psb = symbolic_to_physical(xdim, ydim, zdim, scb);
1402 physical_to_symbolic(xdim, ydim, zdim, psb, scb);
1403
1404 return 0.0f;
1405 }
1406
1407 error_weight_block *ewb = tmpbuf->ewb;
1408 error_weight_block_orig *ewbo = tmpbuf->ewbo;
1409
1410 float error_weight_sum = prepare_error_weight_block(input_image,
1411 xdim, ydim, zdim,
1412 ewp, blk, ewb, ewbo);
1413
1414 #ifdef DEBUG_PRINT_DIAGNOSTICS
1415 if (print_diagnostics)
1416 {
1417 printf("\n");
1418 for (z = 0; z < zdim; z++)
1419 for (y = 0; y < ydim; y++)
1420 for (x = 0; x < xdim; x++)
1421 {
1422 int idx = (z * ydim + y) * xdim + x;
1423 printf("ErrorWeight (%d %d %d) : < %g, %g, %g, %g >\n", x, y, z, ewb->error_weights[idx].x, ewb->error_weights[idx].y, ewb->error_weights[idx].z, ewb->error_weights[idx].w);
1424 }
1425 printf("\n");
1426 }
1427 #endif
1428
1429 symbolic_compressed_block *tempblocks = tmpbuf->tempblocks;
1430
1431 float error_of_best_block = 1e20f;
1432 // int modesel=0;
1433
1434 imageblock *temp = tmpbuf->temp;
1435
1436 float best_errorvals_in_modes[17];
1437 for (i = 0; i < 17; i++)
1438 best_errorvals_in_modes[i] = 1e30f;
1439
1440 int uses_alpha = imageblock_uses_alpha(xdim, ydim, zdim, blk);
1441
1442
1443 // compression of average-color blocks disabled for the time being;
1444 // they produce extremely severe block artifacts.
1445 #if 0
1446 // first, compress an averaged-color block
1447 compress_constant_color_block(xdim, ydim, zdim, blk, ewb, scb);
1448
1449 decompress_symbolic_block(decode_mode, xdim, ydim, zdim, xpos, ypos, zpos, scb, temp);
1450
1451 float avgblock_errorval = compute_imageblock_difference(xdim, ydim, zdim,
1452 blk, temp, ewb) * 4.0f; // bias somewhat against the average-color block.
1453
1454 #ifdef DEBUG_PRINT_DIAGNOSTICS
1455 if (print_diagnostics)
1456 {
1457 printf("\n-----------------------------------\n");
1458 printf("Average-color block test completed\n");
1459 printf("Resulting error value: %g\n", avgblock_errorval);
1460 }
1461 #endif
1462
1463
1464 if (avgblock_errorval < error_of_best_block)
1465 {
1466 #ifdef DEBUG_PRINT_DIAGNOSTICS
1467 if (print_diagnostics)
1468 printf("Accepted as better than previous-best-error, which was %g\n", error_of_best_block);
1469 #endif
1470
1471 error_of_best_block = avgblock_errorval;
1472 // *scb = tempblocks[j];
1473 modesel = 0;
1474 }
1475
1476 #ifdef DEBUG_PRINT_DIAGNOSTICS
1477 if (print_diagnostics)
1478 {
1479 printf("-----------------------------------\n");
1480 }
1481 #endif
1482 #endif
1483
1484
1485 float mode_cutoff = ewp->block_mode_cutoff;
1486
1487 // next, test mode #0. This mode uses 1 plane of weights and 1 partition.
1488 // we test it twice, first with a modecutoff of 0, then with the specified mode-cutoff.
1489 // This causes an early-out that speeds up encoding of "easy" content.
1490
1491 float modecutoffs[2];
1492 float errorval_mult[2] = { 2.5, 1 };
1493 modecutoffs[0] = 0;
1494 modecutoffs[1] = mode_cutoff;
1495
1496 #if 0
1497 if ((error_of_best_block / error_weight_sum) < ewp->texel_avg_error_limit)
1498 goto END_OF_TESTS;
1499 #endif
1500
1501 float best_errorval_in_mode;
1502 for (i = 0; i < 2; i++)
1503 {
1504 compress_symbolic_block_fixed_partition_1_plane(decode_mode, modecutoffs[i], ewp->max_refinement_iters, xdim, ydim, zdim, 1, // partition count
1505 0, // partition index
1506 blk, ewb, tempblocks, tmpbuf->plane1);
1507
1508 best_errorval_in_mode = 1e30f;
1509 for (j = 0; j < 4; j++)
1510 {
1511 if (tempblocks[j].error_block)
1512 continue;
1513 decompress_symbolic_block(decode_mode, xdim, ydim, zdim, xpos, ypos, zpos, tempblocks + j, temp);
1514 float errorval = compute_imageblock_difference(xdim, ydim, zdim,
1515 blk, temp, ewb) * errorval_mult[i];
1516
1517 #ifdef DEBUG_PRINT_DIAGNOSTICS
1518 if (print_diagnostics)
1519 {
1520 printf("\n-----------------------------------\n");
1521 printf("Single-weight partition test 0 (1 partition) completed\n");
1522 printf("Resulting error value: %g\n", errorval);
1523 }
1524 #endif
1525
1526 if (errorval < best_errorval_in_mode)
1527 best_errorval_in_mode = errorval;
1528
1529 if (errorval < error_of_best_block)
1530 {
1531 #ifdef DEBUG_PRINT_DIAGNOSTICS
1532 if (print_diagnostics)
1533 printf("Accepted as better than previous-best-error, which was %g\n", error_of_best_block);
1534 #endif
1535
1536 error_of_best_block = errorval;
1537 *scb = tempblocks[j];
1538
1539 // modesel = 0;
1540 }
1541
1542 #ifdef DEBUG_PRINT_DIAGNOSTICS
1543 if (print_diagnostics)
1544 {
1545 printf("-----------------------------------\n");
1546 }
1547 #endif
1548 }
1549
1550 best_errorvals_in_modes[0] = best_errorval_in_mode;
1551 if ((error_of_best_block / error_weight_sum) < ewp->texel_avg_error_limit)
1552 goto END_OF_TESTS;
1553 }
1554
1555 int is_normal_map;
1556 float lowest_correl;
1557 prepare_block_statistics(xdim, ydim, zdim, blk, ewb, &is_normal_map, &lowest_correl);
1558
1559 if (is_normal_map && lowest_correl < 0.99f)
1560 lowest_correl = 0.99f;
1561
1562 // next, test the four possible 1-partition, 2-planes modes
1563 for (i = 0; i < 4; i++)
1564 {
1565
1566 if (lowest_correl > ewp->lowest_correlation_cutoff)
1567 continue;
1568
1569 if (blk->grayscale && i != 3)
1570 continue;
1571
1572 if (!uses_alpha && i == 3)
1573 continue;
1574
1575 compress_symbolic_block_fixed_partition_2_planes(decode_mode, mode_cutoff, ewp->max_refinement_iters, xdim, ydim, zdim, 1, // partition count
1576 0, // partition index
1577 i, // the color component to test a separate plane of weights for.
1578 blk, ewb, tempblocks, tmpbuf->planes2);
1579
1580 best_errorval_in_mode = 1e30f;
1581 for (j = 0; j < 4; j++)
1582 {
1583 if (tempblocks[j].error_block)
1584 continue;
1585 decompress_symbolic_block(decode_mode, xdim, ydim, zdim, xpos, ypos, zpos, tempblocks + j, temp);
1586 float errorval = compute_imageblock_difference(xdim, ydim, zdim,
1587 blk, temp, ewb);
1588
1589 #ifdef DEBUG_PRINT_DIAGNOSTICS
1590 if (print_diagnostics)
1591 {
1592 printf("\n-----------------------------------\n");
1593 printf("Dual-weight partition test %d (1 partition) completed\n", i);
1594 printf("Resulting error value: %g\n", errorval);
1595 }
1596 #endif
1597
1598 if (errorval < best_errorval_in_mode)
1599 best_errorval_in_mode = errorval;
1600
1601 if (errorval < error_of_best_block)
1602 {
1603 #ifdef DEBUG_PRINT_DIAGNOSTICS
1604 if (print_diagnostics)
1605 printf("Accepted as better than previous-best-error, which was %g\n", error_of_best_block);
1606 #endif
1607
1608 error_of_best_block = errorval;
1609 *scb = tempblocks[j];
1610
1611 // modesel = i+1;
1612 }
1613
1614 #ifdef DEBUG_PRINT_DIAGNOSTICS
1615 if (print_diagnostics)
1616 {
1617 printf("-----------------------------------\n");
1618 }
1619 #endif
1620
1621 best_errorvals_in_modes[i + 1] = best_errorval_in_mode;
1622 }
1623
1624 if ((error_of_best_block / error_weight_sum) < ewp->texel_avg_error_limit)
1625 goto END_OF_TESTS;
1626 }
1627
1628 // find best blocks for 2, 3 and 4 partitions
1629 int partition_count;
1630 for (partition_count = 2; partition_count <= 4; partition_count++)
1631 {
1632 int partition_indices_1plane[2];
1633 int partition_indices_2planes[2];
1634
1635 find_best_partitionings(ewp->partition_search_limit,
1636 xdim, ydim, zdim, partition_count, blk, ewb, 1,
1637 &(partition_indices_1plane[0]), &(partition_indices_1plane[1]), &(partition_indices_2planes[0]));
1638
1639 for (i = 0; i < 2; i++)
1640 {
1641 compress_symbolic_block_fixed_partition_1_plane(decode_mode, mode_cutoff, ewp->max_refinement_iters, xdim, ydim, zdim, partition_count, partition_indices_1plane[i], blk, ewb, tempblocks, tmpbuf->plane1);
1642
1643 best_errorval_in_mode = 1e30f;
1644 for (j = 0; j < 4; j++)
1645 {
1646 if (tempblocks[j].error_block)
1647 continue;
1648 decompress_symbolic_block(decode_mode, xdim, ydim, zdim, xpos, ypos, zpos, tempblocks + j, temp);
1649 float errorval = compute_imageblock_difference(xdim, ydim, zdim,
1650 blk, temp, ewb);
1651
1652 #ifdef DEBUG_PRINT_DIAGNOSTICS
1653 if (print_diagnostics)
1654 {
1655 printf("\n-----------------------------------\n");
1656 printf("Single-weight partition test %d (%d partitions) completed\n", i, partition_count);
1657 printf("Resulting error value: %g\n", errorval);
1658 }
1659 #endif
1660
1661 if (errorval < best_errorval_in_mode)
1662 best_errorval_in_mode = errorval;
1663
1664 if (errorval < error_of_best_block)
1665 {
1666 #ifdef DEBUG_PRINT_DIAGNOSTICS
1667 if (print_diagnostics)
1668 printf("Accepted as better than previous-best-error, which was %g\n", error_of_best_block);
1669 #endif
1670
1671 error_of_best_block = errorval;
1672 *scb = tempblocks[j];
1673
1674 // modesel = 4*(partition_count-2) + 5 + i;
1675 }
1676 }
1677
1678 best_errorvals_in_modes[4 * (partition_count - 2) + 5 + i] = best_errorval_in_mode;
1679
1680 #ifdef DEBUG_PRINT_DIAGNOSTICS
1681 if (print_diagnostics)
1682 {
1683 printf("-----------------------------------\n");
1684 }
1685 #endif
1686
1687 if ((error_of_best_block / error_weight_sum) < ewp->texel_avg_error_limit)
1688 goto END_OF_TESTS;
1689 }
1690
1691
1692 if (partition_count == 2 && !is_normal_map && MIN(best_errorvals_in_modes[5], best_errorvals_in_modes[6]) > (best_errorvals_in_modes[0] * ewp->partition_1_to_2_limit))
1693 goto END_OF_TESTS;
1694
1695 // don't bother to check 4 partitions for dual plane of weightss, ever.
1696 if (partition_count == 4)
1697 break;
1698
1699 for (i = 0; i < 2; i++)
1700 {
1701 if (lowest_correl > ewp->lowest_correlation_cutoff)
1702 continue;
1703 compress_symbolic_block_fixed_partition_2_planes(decode_mode,
1704 mode_cutoff,
1705 ewp->max_refinement_iters,
1706 xdim, ydim, zdim,
1707 partition_count,
1708 partition_indices_2planes[i] & (PARTITION_COUNT - 1), partition_indices_2planes[i] >> PARTITION_BITS,
1709 blk, ewb, tempblocks, tmpbuf->planes2);
1710
1711 best_errorval_in_mode = 1e30f;
1712 for (j = 0; j < 4; j++)
1713 {
1714 if (tempblocks[j].error_block)
1715 continue;
1716 decompress_symbolic_block(decode_mode, xdim, ydim, zdim, xpos, ypos, zpos, tempblocks + j, temp);
1717
1718 float errorval = compute_imageblock_difference(xdim, ydim, zdim,
1719 blk, temp, ewb);
1720
1721 #ifdef DEBUG_PRINT_DIAGNOSTICS
1722 if (print_diagnostics)
1723 {
1724 printf("\n-----------------------------------\n");
1725 printf("Dual-weight partition test %d (%d partitions) completed\n", i, partition_count);
1726 printf("Resulting error value: %g\n", errorval);
1727 }
1728 #endif
1729
1730 if (errorval < best_errorval_in_mode)
1731 best_errorval_in_mode = errorval;
1732
1733 if (errorval < error_of_best_block)
1734 {
1735 #ifdef DEBUG_PRINT_DIAGNOSTICS
1736 if (print_diagnostics)
1737 printf("Accepted as better than previous-best-error, which was %g\n", error_of_best_block);
1738 #endif
1739
1740 error_of_best_block = errorval;
1741 *scb = tempblocks[j];
1742
1743 // modesel = 4*(partition_count-2) + 5 + 2 + i;
1744 }
1745 }
1746
1747 best_errorvals_in_modes[4 * (partition_count - 2) + 5 + 2 + i] = best_errorval_in_mode;
1748
1749 #ifdef DEBUG_PRINT_DIAGNOSTICS
1750 if (print_diagnostics)
1751 {
1752 printf("-----------------------------------\n");
1753 }
1754 #endif
1755
1756 if ((error_of_best_block / error_weight_sum) < ewp->texel_avg_error_limit)
1757 goto END_OF_TESTS;
1758 }
1759 }
1760
1761 END_OF_TESTS:
1762
1763 #if 0
1764 if (print_statistics)
1765 {
1766 for (i = 0; i < 13; i++)
1767 printf("%f ", best_errorvals_in_modes[i]);
1768
1769 printf("%d %f %f %f ", modesel, error_of_best_block,
1770 MIN(best_errorvals_in_modes[1], best_errorvals_in_modes[2]) / best_errorvals_in_modes[0],
1771 MIN(MIN(best_errorvals_in_modes[7], best_errorvals_in_modes[8]), best_errorvals_in_modes[9]) / best_errorvals_in_modes[0]);
1772
1773 printf("\n");
1774 }
1775 #endif
1776
1777 if (scb->block_mode >= 0)
1778 block_mode_histogram[scb->block_mode & 0x7ff]++;
1779
1780
1781 // compress/decompress to a physical block
1782 physical_compressed_block psb = symbolic_to_physical(xdim, ydim, zdim, scb);
1783 physical_to_symbolic(xdim, ydim, zdim, psb, scb);
1784
1785
1786 if (print_tile_errors)
1787 printf("%g\n", error_of_best_block);
1788
1789
1790 // mean squared error per color component.
1791 return error_of_best_block / ((float)xdim * ydim * zdim);
1792 }
1793