1 /*----------------------------------------------------------------------------*/
2 /**
3  *	This confidential and proprietary software may be used only as
4  *	authorised by a licensing agreement from ARM Limited
5  *	(C) COPYRIGHT 2011-2012 ARM Limited
6  *	ALL RIGHTS RESERVED
7  *
8  *	The entire notice above must be reproduced on all authorised
9  *	copies and copies may only be made to the extent permitted
10  *	by a licensing agreement from ARM Limited.
11  *
12  *	@brief	Compress a block of colors, expressed as a symbolic block, for ASTC.
13  */
14 /*----------------------------------------------------------------------------*/
15 
16 #include "astc_codec_internals.h"
17 
18 #include "softfloat.h"
19 #include <math.h>
20 #include <string.h>
21 #include <stdio.h>
22 
23 #ifdef DEBUG_CAPTURE_NAN
24 	#ifndef _GNU_SOURCE
25 		#define _GNU_SOURCE
26 	#endif
27 
28 	#include <fenv.h>
29 #endif
30 
31 #include <stdio.h>
32 
realign_weights(astc_decode_mode decode_mode,int xdim,int ydim,int zdim,const imageblock * blk,const error_weight_block * ewb,symbolic_compressed_block * scb,uint8_t * weight_set8,uint8_t * plane2_weight_set8)33 int realign_weights(astc_decode_mode decode_mode,
34 					int xdim, int ydim, int zdim, const imageblock * blk, const error_weight_block * ewb, symbolic_compressed_block * scb, uint8_t * weight_set8, uint8_t * plane2_weight_set8)
35 {
36 	int i, j;
37 
38 	// get the appropriate partition descriptor.
39 	int partition_count = scb->partition_count;
40 	const partition_info *pt = get_partition_table(xdim, ydim, zdim, partition_count);
41 	pt += scb->partition_index;
42 
43 	// get the appropriate block descriptor
44 	const block_size_descriptor *bsd = get_block_size_descriptor(xdim, ydim, zdim);
45 	const decimation_table *const *ixtab2 = bsd->decimation_tables;
46 
47 	const decimation_table *it = ixtab2[bsd->block_modes[scb->block_mode].decimation_mode];
48 
49 	int is_dual_plane = bsd->block_modes[scb->block_mode].is_dual_plane;
50 
51 	// get quantization-parameters
52 	int weight_quantization_level = bsd->block_modes[scb->block_mode].quantization_mode;
53 
54 
55 	// decode the color endpoints
56 	ushort4 color_endpoint0[4];
57 	ushort4 color_endpoint1[4];
58 	int rgb_hdr[4];
59 	int alpha_hdr[4];
60 	int nan_endpoint[4];
61 
62 
63 	for (i = 0; i < partition_count; i++)
64 		unpack_color_endpoints(decode_mode,
65 							   scb->color_formats[i], scb->color_quantization_level, scb->color_values[i], &rgb_hdr[i], &alpha_hdr[i], &nan_endpoint[i], &(color_endpoint0[i]), &(color_endpoint1[i]));
66 
67 
68 	float uq_plane1_weights[MAX_WEIGHTS_PER_BLOCK];
69 	float uq_plane2_weights[MAX_WEIGHTS_PER_BLOCK];
70 	int weight_count = it->num_weights;
71 
72 	// read and unquantize the weights.
73 
74 	const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quantization_level]);
75 
76 	for (i = 0; i < weight_count; i++)
77 	{
78 		uq_plane1_weights[i] = qat->unquantized_value_flt[weight_set8[i]];
79 	}
80 	if (is_dual_plane)
81 	{
82 		for (i = 0; i < weight_count; i++)
83 			uq_plane2_weights[i] = qat->unquantized_value_flt[plane2_weight_set8[i]];
84 	}
85 
86 
87 	int plane2_color_component = is_dual_plane ? scb->plane2_color_component : -1;
88 
89 	// for each weight, unquantize the weight, use it to compute a color and a color error.
90 	// then, increment the weight until the color error stops decreasing
91 	// then, decrement the weight until the color error stops increasing
92 
93 	#define COMPUTE_ERROR( errorvar ) \
94 		errorvar = 0.0f; \
95 		for(j=0;j<texels_to_evaluate;j++) \
96 			{ \
97 			int texel = it->weight_texel[i][j]; \
98 			int partition = pt->partition_of_texel[texel]; \
99 			float plane1_weight = compute_value_of_texel_flt( texel, it, uq_plane1_weights ); \
100 			float plane2_weight = 0.0f; \
101 			if( is_dual_plane ) \
102 				plane2_weight = compute_value_of_texel_flt( texel, it, uq_plane2_weights ); \
103 			int int_plane1_weight = static_cast<int>(floor( plane1_weight*64.0f + 0.5f ) ); \
104 			int int_plane2_weight = static_cast<int>(floor( plane2_weight*64.0f + 0.5f ) ); \
105 			ushort4 lrp_color = lerp_color_int( \
106 				decode_mode, \
107 				color_endpoint0[partition], \
108 				color_endpoint1[partition], \
109 				int_plane1_weight, \
110 				int_plane2_weight, \
111 				plane2_color_component ); \
112 			float4 color = float4( lrp_color.x, lrp_color.y, lrp_color.z, lrp_color.w ); \
113 			float4 origcolor = float4( \
114 				blk->work_data[4*texel], \
115 				blk->work_data[4*texel+1], \
116 				blk->work_data[4*texel+2], \
117 				blk->work_data[4*texel+3] ); \
118 			float4 error_weight = ewb->error_weights[texel]; \
119 			float4 colordiff = color - origcolor; \
120 			errorvar += dot( colordiff*colordiff, error_weight ); \
121 			}
122 
123 
124 	int adjustments = 0;
125 
126 	for (i = 0; i < weight_count; i++)
127 	{
128 		int current_wt = weight_set8[i];
129 		int texels_to_evaluate = it->weight_num_texels[i];
130 
131 		float current_error;
132 
133 		COMPUTE_ERROR(current_error);
134 
135 		// increment until error starts increasing.
136 		while (1)
137 		{
138 			int next_wt = qat->next_quantized_value[current_wt];
139 			if (next_wt == current_wt)
140 				break;
141 			uq_plane1_weights[i] = qat->unquantized_value_flt[next_wt];
142 			float next_error;
143 			COMPUTE_ERROR(next_error);
144 			if (next_error < current_error)
145 			{
146 				// succeeded, increment the weight
147 				current_wt = next_wt;
148 				current_error = next_error;
149 				adjustments++;
150 			}
151 			else
152 			{
153 				// failed, back out the attempted increment
154 				uq_plane1_weights[i] = qat->unquantized_value_flt[current_wt];
155 				break;
156 			}
157 		}
158 		// decrement until error starts increasing
159 		while (1)
160 		{
161 			int prev_wt = qat->prev_quantized_value[current_wt];
162 			if (prev_wt == current_wt)
163 				break;
164 			uq_plane1_weights[i] = qat->unquantized_value_flt[prev_wt];
165 			float prev_error;
166 			COMPUTE_ERROR(prev_error);
167 			if (prev_error < current_error)
168 			{
169 				// succeeded, decrement the weight
170 				current_wt = prev_wt;
171 				current_error = prev_error;
172 				adjustments++;
173 			}
174 			else
175 			{
176 				// failed, back out the attempted decrement
177 				uq_plane1_weights[i] = qat->unquantized_value_flt[current_wt];
178 				break;
179 			}
180 		}
181 
182 		weight_set8[i] = current_wt;
183 	}
184 
185 	if (!is_dual_plane)
186 		return adjustments;
187 
188 	// processing of the second plane of weights
189 	for (i = 0; i < weight_count; i++)
190 	{
191 		int current_wt = plane2_weight_set8[i];
192 		int texels_to_evaluate = it->weight_num_texels[i];
193 
194 		float current_error;
195 
196 		COMPUTE_ERROR(current_error);
197 
198 		// increment until error starts increasing.
199 		while (1)
200 		{
201 			int next_wt = qat->next_quantized_value[current_wt];
202 			if (next_wt == current_wt)
203 				break;
204 			uq_plane2_weights[i] = qat->unquantized_value_flt[next_wt];
205 			float next_error;
206 			COMPUTE_ERROR(next_error);
207 			if (next_error < current_error)
208 			{
209 				// succeeded, increment the weight
210 				current_wt = next_wt;
211 				current_error = next_error;
212 				adjustments++;
213 			}
214 			else
215 			{
216 				// failed, back out the attempted increment
217 				uq_plane2_weights[i] = qat->unquantized_value_flt[current_wt];
218 				break;
219 			}
220 		}
221 		// decrement until error starts increasing
222 		while (1)
223 		{
224 			int prev_wt = qat->prev_quantized_value[current_wt];
225 			if (prev_wt == current_wt)
226 				break;
227 			uq_plane2_weights[i] = qat->unquantized_value_flt[prev_wt];
228 			float prev_error;
229 			COMPUTE_ERROR(prev_error);
230 			if (prev_error < current_error)
231 			{
232 				// succeeded, decrement the weight
233 				current_wt = prev_wt;
234 				current_error = prev_error;
235 				adjustments++;
236 			}
237 			else
238 			{
239 				// failed, back out the attempted decrement
240 				uq_plane2_weights[i] = qat->unquantized_value_flt[current_wt];
241 				break;
242 			}
243 		}
244 
245 		plane2_weight_set8[i] = current_wt;
246 	}
247 
248 	return adjustments;
249 }
250 
251 /*
252 	function for compressing a block symbolically, given that we have already decided on a partition
253 */
254 
255 
256 
compress_symbolic_block_fixed_partition_1_plane(astc_decode_mode decode_mode,float mode_cutoff,int max_refinement_iters,int xdim,int ydim,int zdim,int partition_count,int partition_index,const imageblock * blk,const error_weight_block * ewb,symbolic_compressed_block * scb,compress_fixed_partition_buffers * tmpbuf)257 static void compress_symbolic_block_fixed_partition_1_plane(astc_decode_mode decode_mode,
258 															float mode_cutoff,
259 															int max_refinement_iters,
260 															int xdim, int ydim, int zdim,
261 															int partition_count, int partition_index,
262 															const imageblock * blk, const error_weight_block * ewb, symbolic_compressed_block * scb,
263 															compress_fixed_partition_buffers * tmpbuf)
264 {
265 	int i, j, k;
266 
267 
268 	static const int free_bits_for_partition_count[5] = { 0, 115 - 4, 111 - 4 - PARTITION_BITS, 108 - 4 - PARTITION_BITS, 105 - 4 - PARTITION_BITS };
269 
270 	const partition_info *pi = get_partition_table(xdim, ydim, zdim, partition_count);
271 	pi += partition_index;
272 
273 	// first, compute ideal weights and endpoint colors, under thre assumption that
274 	// there is no quantization or decimation going on.
275 	endpoints_and_weights *ei = tmpbuf->ei1;
276 	endpoints_and_weights *eix = tmpbuf->eix1;
277 	compute_endpoints_and_ideal_weights_1_plane(xdim, ydim, zdim, pi, blk, ewb, ei);
278 
279 	// next, compute ideal weights and endpoint colors for every decimation.
280 	const block_size_descriptor *bsd = get_block_size_descriptor(xdim, ydim, zdim);
281 	const decimation_table *const *ixtab2 = bsd->decimation_tables;
282 	// int block_mode_count = bsd->single_plane_block_mode_count;
283 
284 
285 	float *decimated_quantized_weights = tmpbuf->decimated_quantized_weights;
286 	float *decimated_weights = tmpbuf->decimated_weights;
287 	float *flt_quantized_decimated_quantized_weights = tmpbuf->flt_quantized_decimated_quantized_weights;
288 	uint8_t *u8_quantized_decimated_quantized_weights = tmpbuf->u8_quantized_decimated_quantized_weights;
289 
290 	// for each decimation mode, compute an ideal set of weights
291 	// (that is, weights computed with the assumption that they are not quantized)
292 	for (i = 0; i < MAX_DECIMATION_MODES; i++)
293 	{
294 		if (bsd->permit_encode[i] == 0 || bsd->decimation_mode_maxprec_1plane[i] < 0 || bsd->decimation_mode_percentile[i] > mode_cutoff)
295 			continue;
296 		eix[i] = *ei;
297 		compute_ideal_weights_for_decimation_table(&(eix[i]), ixtab2[i], decimated_quantized_weights + i * MAX_WEIGHTS_PER_BLOCK, decimated_weights + i * MAX_WEIGHTS_PER_BLOCK);
298 
299 	}
300 
301 	// compute maximum colors for the endpoints and ideal weights.
302 	// for each endpoint-and-ideal-weight pair, compute the smallest weight value
303 	// that will result in a color value greater than 1.
304 
305 
306 	float4 min_ep = float4(10, 10, 10, 10);
307 	for (i = 0; i < partition_count; i++)
308 	{
309 		#ifdef DEBUG_CAPTURE_NAN
310 			fedisableexcept(FE_DIVBYZERO | FE_INVALID);
311 		#endif
312 
313 		float4 ep = (float4(1, 1, 1, 1) - ei->ep.endpt0[i]) / (ei->ep.endpt1[i] - ei->ep.endpt0[i]);
314 		if (ep.x > 0.5f && ep.x < min_ep.x)
315 			min_ep.x = ep.x;
316 		if (ep.y > 0.5f && ep.y < min_ep.y)
317 			min_ep.y = ep.y;
318 		if (ep.z > 0.5f && ep.z < min_ep.z)
319 			min_ep.z = ep.z;
320 		if (ep.w > 0.5f && ep.w < min_ep.w)
321 			min_ep.w = ep.w;
322 
323 		#ifdef DEBUG_CAPTURE_NAN
324 			feenableexcept(FE_DIVBYZERO | FE_INVALID);
325 		#endif
326 	}
327 
328 	float min_wt_cutoff = MIN(MIN(min_ep.x, min_ep.y), MIN(min_ep.z, min_ep.w));
329 
330 	// for each mode, use the angular method to compute a shift.
331 	float weight_low_value[MAX_WEIGHT_MODES];
332 	float weight_high_value[MAX_WEIGHT_MODES];
333 
334 	compute_angular_endpoints_1plane(mode_cutoff, bsd, decimated_quantized_weights, decimated_weights, weight_low_value, weight_high_value);
335 
336 	// for each mode (which specifies a decimation and a quantization):
337 	// * compute number of bits needed for the quantized weights.
338 	// * generate an optimized set of quantized weights.
339 	// * compute quantization errors for the mode.
340 
341 	int qwt_bitcounts[MAX_WEIGHT_MODES];
342 	float qwt_errors[MAX_WEIGHT_MODES];
343 
344 	for (i = 0; i < MAX_WEIGHT_MODES; i++)
345 	{
346 		if (bsd->block_modes[i].permit_encode == 0 || bsd->block_modes[i].is_dual_plane != 0 || bsd->block_modes[i].percentile > mode_cutoff)
347 		{
348 			qwt_errors[i] = 1e38f;
349 			continue;
350 		}
351 		if (weight_high_value[i] > 1.02f * min_wt_cutoff)
352 			weight_high_value[i] = 1.0f;
353 
354 		int decimation_mode = bsd->block_modes[i].decimation_mode;
355 		if (bsd->decimation_mode_percentile[decimation_mode] > mode_cutoff)
356 			ASTC_CODEC_INTERNAL_ERROR;
357 
358 
359 		// compute weight bitcount for the mode
360 		int bits_used_by_weights = compute_ise_bitcount(ixtab2[decimation_mode]->num_weights,
361 														(quantization_method) bsd->block_modes[i].quantization_mode);
362 		int bitcount = free_bits_for_partition_count[partition_count] - bits_used_by_weights;
363 		if (bitcount <= 0 || bits_used_by_weights < 24 || bits_used_by_weights > 96)
364 		{
365 			qwt_errors[i] = 1e38f;
366 			continue;
367 		}
368 		qwt_bitcounts[i] = bitcount;
369 
370 
371 		// then, generate the optimized set of weights for the weight mode.
372 		compute_ideal_quantized_weights_for_decimation_table(&(eix[decimation_mode]),
373 															 ixtab2[decimation_mode],
374 															 weight_low_value[i], weight_high_value[i],
375 															 decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * decimation_mode,
376 															 flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i,
377 															 u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i,
378 															 bsd->block_modes[i].quantization_mode);
379 
380 		// then, compute weight-errors for the weight mode.
381 		qwt_errors[i] = compute_error_of_weight_set(&(eix[decimation_mode]), ixtab2[decimation_mode], flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i);
382 
383 		#ifdef DEBUG_PRINT_DIAGNOSTICS
384 			if (print_diagnostics)
385 				printf("Block mode %d -> weight error = %f\n", i, qwt_errors[i]);
386 		#endif
387 	}
388 
389 	// for each weighting mode, determine the optimal combination of color endpoint encodings
390 	// and weight encodings; return results for the 4 best-looking modes.
391 
392 	int partition_format_specifiers[4][4];
393 	int quantized_weight[4];
394 	int color_quantization_level[4];
395 	int color_quantization_level_mod[4];
396 	determine_optimal_set_of_endpoint_formats_to_use(xdim, ydim, zdim, pi, blk, ewb, &(ei->ep), -1,	// used to flag that we are in single-weight mode
397 													 qwt_bitcounts, qwt_errors, partition_format_specifiers, quantized_weight, color_quantization_level, color_quantization_level_mod);
398 
399 
400 	// then iterate over the 4 believed-to-be-best modes to find out which one is
401 	// actually best.
402 	for (i = 0; i < 4; i++)
403 	{
404 		uint8_t *u8_weight_src;
405 		int weights_to_copy;
406 
407 		if (quantized_weight[i] < 0)
408 		{
409 			scb->error_block = 1;
410 			scb++;
411 			continue;
412 		}
413 
414 		int decimation_mode = bsd->block_modes[quantized_weight[i]].decimation_mode;
415 		int weight_quantization_mode = bsd->block_modes[quantized_weight[i]].quantization_mode;
416 		const decimation_table *it = ixtab2[decimation_mode];
417 
418 		#ifdef DEBUG_PRINT_DIAGNOSTICS
419 			if (print_diagnostics)
420 			{
421 				printf("Selected mode = %d\n", quantized_weight[i]);
422 				printf("Selected decimation mode = %d\n", decimation_mode);
423 				printf("Selected weight-quantization mode = %d\n", weight_quantization_mode);
424 			}
425 		#endif
426 
427 		u8_weight_src = u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * quantized_weight[i];
428 
429 		weights_to_copy = it->num_weights;
430 
431 		// recompute the ideal color endpoints before storing them.
432 		float4 rgbs_colors[4];
433 		float4 rgbo_colors[4];
434 		float2 lum_intervals[4];
435 
436 		int l;
437 		for (l = 0; l < max_refinement_iters; l++)
438 		{
439 			recompute_ideal_colors(xdim, ydim, zdim, weight_quantization_mode, &(eix[decimation_mode].ep), rgbs_colors, rgbo_colors, lum_intervals, u8_weight_src, NULL, -1, pi, it, blk, ewb);
440 
441 			// quantize the chosen color
442 
443 			// store the colors for the block
444 			for (j = 0; j < partition_count; j++)
445 			{
446 				scb->color_formats[j] = pack_color_endpoints(decode_mode,
447 															 eix[decimation_mode].ep.endpt0[j],
448 															 eix[decimation_mode].ep.endpt1[j],
449 															 rgbs_colors[j], rgbo_colors[j], lum_intervals[j], partition_format_specifiers[i][j], scb->color_values[j], color_quantization_level[i]);
450 			}
451 
452 
453 			// if all the color endpoint modes are the same, we get a few more
454 			// bits to store colors; let's see if we can take advantage of this:
455 			// requantize all the colors and see if the endpoint modes remain the same;
456 			// if they do, then exploit it.
457 			scb->color_formats_matched = 0;
458 
459 			if ((partition_count >= 2 && scb->color_formats[0] == scb->color_formats[1]
460 				 && color_quantization_level != color_quantization_level_mod)
461 				&& (partition_count == 2 || (scb->color_formats[0] == scb->color_formats[2] && (partition_count == 3 || (scb->color_formats[0] == scb->color_formats[3])))))
462 			{
463 				int colorvals[4][12];
464 				int color_formats_mod[4];
465 				for (j = 0; j < partition_count; j++)
466 				{
467 					color_formats_mod[j] = pack_color_endpoints(decode_mode,
468 																eix[decimation_mode].ep.endpt0[j],
469 																eix[decimation_mode].ep.endpt1[j],
470 																rgbs_colors[j], rgbo_colors[j], lum_intervals[j], partition_format_specifiers[i][j], colorvals[j], color_quantization_level_mod[i]);
471 				}
472 				if (color_formats_mod[0] == color_formats_mod[1]
473 					&& (partition_count == 2 || (color_formats_mod[0] == color_formats_mod[2] && (partition_count == 3 || (color_formats_mod[0] == color_formats_mod[3])))))
474 				{
475 					scb->color_formats_matched = 1;
476 					for (j = 0; j < 4; j++)
477 						for (k = 0; k < 12; k++)
478 							scb->color_values[j][k] = colorvals[j][k];
479 					for (j = 0; j < 4; j++)
480 						scb->color_formats[j] = color_formats_mod[j];
481 				}
482 			}
483 
484 
485 			// store header fields
486 			scb->partition_count = partition_count;
487 			scb->partition_index = partition_index;
488 			scb->color_quantization_level = scb->color_formats_matched ? color_quantization_level_mod[i] : color_quantization_level[i];
489 			scb->block_mode = quantized_weight[i];
490 			scb->error_block = 0;
491 
492 			if (scb->color_quantization_level < 4)
493 			{
494 				scb->error_block = 1;	// should never happen, but cannot prove it impossible.
495 			}
496 
497 			// perform a final pass over the weights to try to improve them.
498 			int adjustments = realign_weights(decode_mode,
499 											  xdim, ydim, zdim,
500 											  blk, ewb, scb,
501 											  u8_weight_src,
502 											  NULL);
503 
504 			if (adjustments == 0)
505 				break;
506 		}
507 
508 		for (j = 0; j < weights_to_copy; j++)
509 			scb->plane1_weights[j] = u8_weight_src[j];
510 
511 		scb++;
512 	}
513 
514 }
515 
516 
517 
518 
519 
520 
compress_symbolic_block_fixed_partition_2_planes(astc_decode_mode decode_mode,float mode_cutoff,int max_refinement_iters,int xdim,int ydim,int zdim,int partition_count,int partition_index,int separate_component,const imageblock * blk,const error_weight_block * ewb,symbolic_compressed_block * scb,compress_fixed_partition_buffers * tmpbuf)521 static void compress_symbolic_block_fixed_partition_2_planes(astc_decode_mode decode_mode,
522 															 float mode_cutoff,
523 															 int max_refinement_iters,
524 															 int xdim, int ydim, int zdim,
525 															 int partition_count, int partition_index,
526 															 int separate_component, const imageblock * blk, const error_weight_block * ewb,
527 															 symbolic_compressed_block * scb,
528 															 compress_fixed_partition_buffers * tmpbuf)
529 {
530 	int i, j, k;
531 
532 	static const int free_bits_for_partition_count[5] =
533 		{ 0, 113 - 4, 109 - 4 - PARTITION_BITS, 106 - 4 - PARTITION_BITS, 103 - 4 - PARTITION_BITS };
534 
535 	const partition_info *pi = get_partition_table(xdim, ydim, zdim, partition_count);
536 	pi += partition_index;
537 
538 	// first, compute ideal weights and endpoint colors
539 	endpoints_and_weights *ei1 = tmpbuf->ei1;
540 	endpoints_and_weights *ei2 = tmpbuf->ei2;
541 	endpoints_and_weights *eix1 = tmpbuf->eix1;
542 	endpoints_and_weights *eix2 = tmpbuf->eix2;
543 	compute_endpoints_and_ideal_weights_2_planes(xdim, ydim, zdim, pi, blk, ewb, separate_component, ei1, ei2);
544 
545 	// next, compute ideal weights and endpoint colors for every decimation.
546 	const block_size_descriptor *bsd = get_block_size_descriptor(xdim, ydim, zdim);
547 	const decimation_table *const *ixtab2 = bsd->decimation_tables;
548 
549 
550 	float *decimated_quantized_weights = tmpbuf->decimated_quantized_weights;
551 	float *decimated_weights = tmpbuf->decimated_weights;
552 	float *flt_quantized_decimated_quantized_weights = tmpbuf->flt_quantized_decimated_quantized_weights;
553 	uint8_t *u8_quantized_decimated_quantized_weights = tmpbuf->u8_quantized_decimated_quantized_weights;
554 
555 	// for each decimation mode, compute an ideal set of weights
556 	for (i = 0; i < MAX_DECIMATION_MODES; i++)
557 	{
558 		if (bsd->permit_encode[i] == 0 || bsd->decimation_mode_maxprec_2planes[i] < 0 || bsd->decimation_mode_percentile[i] > mode_cutoff)
559 			continue;
560 
561 		eix1[i] = *ei1;
562 		eix2[i] = *ei2;
563 		compute_ideal_weights_for_decimation_table(&(eix1[i]), ixtab2[i], decimated_quantized_weights + (2 * i) * MAX_WEIGHTS_PER_BLOCK, decimated_weights + (2 * i) * MAX_WEIGHTS_PER_BLOCK);
564 		compute_ideal_weights_for_decimation_table(&(eix2[i]), ixtab2[i], decimated_quantized_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK, decimated_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK);
565 	}
566 
567 	// compute maximum colors for the endpoints and ideal weights.
568 	// for each endpoint-and-ideal-weight pair, compute the smallest weight value
569 	// that will result in a color value greater than 1.
570 
571 	float4 min_ep1 = float4(10, 10, 10, 10);
572 	float4 min_ep2 = float4(10, 10, 10, 10);
573 	for (i = 0; i < partition_count; i++)
574 	{
575 
576 		#ifdef DEBUG_CAPTURE_NAN
577 			fedisableexcept(FE_DIVBYZERO | FE_INVALID);
578 		#endif
579 
580 		float4 ep1 = (float4(1, 1, 1, 1) - ei1->ep.endpt0[i]) / (ei1->ep.endpt1[i] - ei1->ep.endpt0[i]);
581 		if (ep1.x > 0.5f && ep1.x < min_ep1.x)
582 			min_ep1.x = ep1.x;
583 		if (ep1.y > 0.5f && ep1.y < min_ep1.y)
584 			min_ep1.y = ep1.y;
585 		if (ep1.z > 0.5f && ep1.z < min_ep1.z)
586 			min_ep1.z = ep1.z;
587 		if (ep1.w > 0.5f && ep1.w < min_ep1.w)
588 			min_ep1.w = ep1.w;
589 		float4 ep2 = (float4(1, 1, 1, 1) - ei2->ep.endpt0[i]) / (ei2->ep.endpt1[i] - ei2->ep.endpt0[i]);
590 		if (ep2.x > 0.5f && ep2.x < min_ep2.x)
591 			min_ep2.x = ep2.x;
592 		if (ep2.y > 0.5f && ep2.y < min_ep2.y)
593 			min_ep2.y = ep2.y;
594 		if (ep2.z > 0.5f && ep2.z < min_ep2.z)
595 			min_ep2.z = ep2.z;
596 		if (ep2.w > 0.5f && ep2.w < min_ep2.w)
597 			min_ep2.w = ep2.w;
598 
599 		#ifdef DEBUG_CAPTURE_NAN
600 			feenableexcept(FE_DIVBYZERO | FE_INVALID);
601 		#endif
602 	}
603 
604 	float min_wt_cutoff1, min_wt_cutoff2;
605 	switch (separate_component)
606 	{
607 	case 0:
608 		min_wt_cutoff2 = min_ep2.x;
609 		min_ep1.x = 1e30f;
610 		break;
611 	case 1:
612 		min_wt_cutoff2 = min_ep2.y;
613 		min_ep1.y = 1e30f;
614 		break;
615 	case 2:
616 		min_wt_cutoff2 = min_ep2.z;
617 		min_ep1.z = 1e30f;
618 		break;
619 	case 3:
620 		min_wt_cutoff2 = min_ep2.w;
621 		min_ep1.w = 1e30f;
622 		break;
623 	default:
624 		min_wt_cutoff2 = 1e30f;
625 	}
626 
627 	min_wt_cutoff1 = MIN(MIN(min_ep1.x, min_ep1.y), MIN(min_ep1.z, min_ep1.w));
628 
629 	float weight_low_value1[MAX_WEIGHT_MODES];
630 	float weight_high_value1[MAX_WEIGHT_MODES];
631 	float weight_low_value2[MAX_WEIGHT_MODES];
632 	float weight_high_value2[MAX_WEIGHT_MODES];
633 
634 	compute_angular_endpoints_2planes(mode_cutoff, bsd, decimated_quantized_weights, decimated_weights, weight_low_value1, weight_high_value1, weight_low_value2, weight_high_value2);
635 
636 	// for each mode (which specifies a decimation and a quantization):
637 	// * generate an optimized set of quantized weights.
638 	// * compute quantization errors for each mode
639 	// * compute number of bits needed for the quantized weights.
640 
641 	int qwt_bitcounts[MAX_WEIGHT_MODES];
642 	float qwt_errors[MAX_WEIGHT_MODES];
643 	for (i = 0; i < MAX_WEIGHT_MODES; i++)
644 	{
645 		if (bsd->block_modes[i].permit_encode == 0 || bsd->block_modes[i].is_dual_plane != 1 || bsd->block_modes[i].percentile > mode_cutoff)
646 		{
647 			qwt_errors[i] = 1e38f;
648 			continue;
649 		}
650 		int decimation_mode = bsd->block_modes[i].decimation_mode;
651 
652 		if (weight_high_value1[i] > 1.02f * min_wt_cutoff1)
653 			weight_high_value1[i] = 1.0f;
654 		if (weight_high_value2[i] > 1.02f * min_wt_cutoff2)
655 			weight_high_value2[i] = 1.0f;
656 
657 		// compute weight bitcount for the mode
658 		int bits_used_by_weights = compute_ise_bitcount(2 * ixtab2[decimation_mode]->num_weights,
659 														(quantization_method) bsd->block_modes[i].quantization_mode);
660 		int bitcount = free_bits_for_partition_count[partition_count] - bits_used_by_weights;
661 		if (bitcount <= 0 || bits_used_by_weights < 24 || bits_used_by_weights > 96)
662 		{
663 			qwt_errors[i] = 1e38f;
664 			continue;
665 		}
666 		qwt_bitcounts[i] = bitcount;
667 
668 
669 		// then, generate the optimized set of weights for the mode.
670 		compute_ideal_quantized_weights_for_decimation_table(&(eix1[decimation_mode]),
671 															 ixtab2[decimation_mode],
672 															 weight_low_value1[i],
673 															 weight_high_value1[i],
674 															 decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * decimation_mode),
675 															 flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i),
676 															 u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i), bsd->block_modes[i].quantization_mode);
677 		compute_ideal_quantized_weights_for_decimation_table(&(eix2[decimation_mode]),
678 															 ixtab2[decimation_mode],
679 															 weight_low_value2[i],
680 															 weight_high_value2[i],
681 															 decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * decimation_mode + 1),
682 															 flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1),
683 															 u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1), bsd->block_modes[i].quantization_mode);
684 
685 
686 		// then, compute quantization errors for the block mode.
687 		qwt_errors[i] =
688 			compute_error_of_weight_set(&(eix1[decimation_mode]),
689 									   ixtab2[decimation_mode],
690 									   flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i))
691 			+ compute_error_of_weight_set(&(eix2[decimation_mode]), ixtab2[decimation_mode], flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1));
692 	}
693 
694 
695 	// decide the optimal combination of color endpoint encodings and weight encoodings.
696 	int partition_format_specifiers[4][4];
697 	int quantized_weight[4];
698 	int color_quantization_level[4];
699 	int color_quantization_level_mod[4];
700 
701 	endpoints epm;
702 	merge_endpoints(&(ei1->ep), &(ei2->ep), separate_component, &epm);
703 
704 	determine_optimal_set_of_endpoint_formats_to_use(xdim, ydim, zdim,
705 													 pi,
706 													 blk,
707 													 ewb,
708 													 &epm, separate_component, qwt_bitcounts, qwt_errors, partition_format_specifiers, quantized_weight, color_quantization_level, color_quantization_level_mod);
709 
710 	for (i = 0; i < 4; i++)
711 	{
712 		if (quantized_weight[i] < 0)
713 		{
714 			scb->error_block = 1;
715 			scb++;
716 			continue;
717 		}
718 
719 		uint8_t *u8_weight1_src;
720 		uint8_t *u8_weight2_src;
721 		int weights_to_copy;
722 
723 		int decimation_mode = bsd->block_modes[quantized_weight[i]].decimation_mode;
724 		int weight_quantization_mode = bsd->block_modes[quantized_weight[i]].quantization_mode;
725 		const decimation_table *it = ixtab2[decimation_mode];
726 
727 		u8_weight1_src = u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * quantized_weight[i]);
728 		u8_weight2_src = u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * quantized_weight[i] + 1);
729 
730 
731 		weights_to_copy = it->num_weights;
732 
733 		// recompute the ideal color endpoints before storing them.
734 		merge_endpoints(&(eix1[decimation_mode].ep), &(eix2[decimation_mode].ep), separate_component, &epm);
735 
736 		float4 rgbs_colors[4];
737 		float4 rgbo_colors[4];
738 		float2 lum_intervals[4];
739 
740 		int l;
741 		for (l = 0; l < max_refinement_iters; l++)
742 		{
743 			recompute_ideal_colors(xdim, ydim, zdim, weight_quantization_mode, &epm, rgbs_colors, rgbo_colors, lum_intervals, u8_weight1_src, u8_weight2_src, separate_component, pi, it, blk, ewb);
744 
745 			// store the colors for the block
746 			for (j = 0; j < partition_count; j++)
747 			{
748 				scb->color_formats[j] = pack_color_endpoints(decode_mode,
749 															 epm.endpt0[j],
750 															 epm.endpt1[j],
751 															 rgbs_colors[j], rgbo_colors[j], lum_intervals[j], partition_format_specifiers[i][j], scb->color_values[j], color_quantization_level[i]);
752 			}
753 			scb->color_formats_matched = 0;
754 
755 			if ((partition_count >= 2 && scb->color_formats[0] == scb->color_formats[1]
756 				 && color_quantization_level != color_quantization_level_mod)
757 				&& (partition_count == 2 || (scb->color_formats[0] == scb->color_formats[2] && (partition_count == 3 || (scb->color_formats[0] == scb->color_formats[3])))))
758 			{
759 				int colorvals[4][12];
760 				int color_formats_mod[4];
761 				for (j = 0; j < partition_count; j++)
762 				{
763 					color_formats_mod[j] = pack_color_endpoints(decode_mode,
764 																epm.endpt0[j],
765 																epm.endpt1[j],
766 																rgbs_colors[j], rgbo_colors[j], lum_intervals[j], partition_format_specifiers[i][j], colorvals[j], color_quantization_level_mod[i]);
767 				}
768 				if (color_formats_mod[0] == color_formats_mod[1]
769 					&& (partition_count == 2 || (color_formats_mod[0] == color_formats_mod[2] && (partition_count == 3 || (color_formats_mod[0] == color_formats_mod[3])))))
770 				{
771 					scb->color_formats_matched = 1;
772 					for (j = 0; j < 4; j++)
773 						for (k = 0; k < 12; k++)
774 							scb->color_values[j][k] = colorvals[j][k];
775 					for (j = 0; j < 4; j++)
776 						scb->color_formats[j] = color_formats_mod[j];
777 				}
778 			}
779 
780 
781 			// store header fields
782 			scb->partition_count = partition_count;
783 			scb->partition_index = partition_index;
784 			scb->color_quantization_level = scb->color_formats_matched ? color_quantization_level_mod[i] : color_quantization_level[i];
785 			scb->block_mode = quantized_weight[i];
786 			scb->plane2_color_component = separate_component;
787 			scb->error_block = 0;
788 
789 			if (scb->color_quantization_level < 4)
790 			{
791 				scb->error_block = 1;	// should never happen, but cannot prove it impossible
792 			}
793 
794 			int adjustments = realign_weights(decode_mode,
795 											  xdim, ydim, zdim,
796 											  blk, ewb, scb,
797 											  u8_weight1_src,
798 											  u8_weight2_src);
799 
800 			if (adjustments == 0)
801 				break;
802 		}
803 
804 		for (j = 0; j < weights_to_copy; j++)
805 		{
806 			scb->plane1_weights[j] = u8_weight1_src[j];
807 			scb->plane2_weights[j] = u8_weight2_src[j];
808 		}
809 
810 		scb++;
811 	}
812 
813 }
814 
815 
816 
817 
818 
expand_block_artifact_suppression(int xdim,int ydim,int zdim,error_weighting_params * ewp)819 void expand_block_artifact_suppression(int xdim, int ydim, int zdim, error_weighting_params * ewp)
820 {
821 	int x, y, z;
822 	float centerpos_x = (xdim - 1) * 0.5f;
823 	float centerpos_y = (ydim - 1) * 0.5f;
824 	float centerpos_z = (zdim - 1) * 0.5f;
825 	float *bef = ewp->block_artifact_suppression_expanded;
826 
827 	for (z = 0; z < zdim; z++)
828 		for (y = 0; y < ydim; y++)
829 			for (x = 0; x < xdim; x++)
830 			{
831 				float xdif = (x - centerpos_x) / xdim;
832 				float ydif = (y - centerpos_y) / ydim;
833 				float zdif = (z - centerpos_z) / zdim;
834 
835 				float wdif = 0.36f;
836 				float dist = sqrt(xdif * xdif + ydif * ydif + zdif * zdif + wdif * wdif);
837 				*bef = pow(dist, ewp->block_artifact_suppression);
838 				bef++;
839 			}
840 }
841 
842 
843 
844 // Function to set error weights for each color component for each texel in a block.
845 // Returns the sum of all the error values set.
846 
prepare_error_weight_block(const astc_codec_image * input_image,int xdim,int ydim,int zdim,const error_weighting_params * ewp,const imageblock * blk,error_weight_block * ewb,error_weight_block_orig * ewbo)847 float prepare_error_weight_block(const astc_codec_image * input_image,
848 								 int xdim, int ydim, int zdim, const error_weighting_params * ewp, const imageblock * blk, error_weight_block * ewb, error_weight_block_orig * ewbo)
849 {
850 
851 	int x, y, z;
852 	int idx = 0;
853 
854 	int any_mean_stdev_weight =
855 		ewp->rgb_base_weight != 1.0 || ewp->alpha_base_weight != 1.0 || ewp->rgb_mean_weight != 0.0 || ewp->rgb_stdev_weight != 0.0 || ewp->alpha_mean_weight != 0.0 || ewp->alpha_stdev_weight != 0.0;
856 
857 	float4 color_weights = float4(ewp->rgba_weights[0],
858 								  ewp->rgba_weights[1],
859 								  ewp->rgba_weights[2],
860 								  ewp->rgba_weights[3]);
861 
862 	ewb->contains_zeroweight_texels = 0;
863 
864 	for (z = 0; z < zdim; z++)
865 		for (y = 0; y < ydim; y++)
866 			for (x = 0; x < xdim; x++)
867 			{
868 				int xpos = x + blk->xpos;
869 				int ypos = y + blk->ypos;
870 				int zpos = z + blk->zpos;
871 
872 				if (xpos >= input_image->xsize || ypos >= input_image->ysize || zpos >= input_image->zsize)
873 				{
874 					float4 weights = float4(1e-11f, 1e-11f, 1e-11f, 1e-11f);
875 					ewb->error_weights[idx] = weights;
876 					ewb->contains_zeroweight_texels = 1;
877 				}
878 				else
879 				{
880 					float4 error_weight = float4(ewp->rgb_base_weight,
881 												 ewp->rgb_base_weight,
882 												 ewp->rgb_base_weight,
883 												 ewp->alpha_base_weight);
884 
885 					if (any_mean_stdev_weight)
886 					{
887 						float4 avg = input_averages[zpos][ypos][xpos];
888 						if (avg.x < 6e-5f)
889 							avg.x = 6e-5f;
890 						if (avg.y < 6e-5f)
891 							avg.y = 6e-5f;
892 						if (avg.z < 6e-5f)
893 							avg.z = 6e-5f;
894 						if (avg.w < 6e-5f)
895 							avg.w = 6e-5f;
896 						/*
897 						   printf("avg: %f %f %f %f\n", avg.x, avg.y, avg.z, avg.w ); */
898 						avg = avg * avg;
899 
900 						float4 variance = input_variances[zpos][ypos][xpos];
901 						variance = variance * variance;
902 
903 						float favg = (avg.x + avg.y + avg.z) * (1.0f / 3.0f);
904 						float fvar = (variance.x + variance.y + variance.z) * (1.0f / 3.0f);
905 
906 						float mixing = ewp->rgb_mean_and_stdev_mixing;
907 						avg.xyz = float3(favg, favg, favg) * mixing + avg.xyz * (1.0f - mixing);
908 						variance.xyz = float3(fvar, fvar, fvar) * mixing + variance.xyz * (1.0f - mixing);
909 
910 						float4 stdev = float4(sqrt(MAX(variance.x, 0.0f)),
911 											  sqrt(MAX(variance.y, 0.0f)),
912 											  sqrt(MAX(variance.z, 0.0f)),
913 											  sqrt(MAX(variance.w, 0.0f)));
914 
915 						avg.xyz = avg.xyz * ewp->rgb_mean_weight;
916 						avg.w = avg.w * ewp->alpha_mean_weight;
917 						stdev.xyz = stdev.xyz * ewp->rgb_stdev_weight;
918 						stdev.w = stdev.w * ewp->alpha_stdev_weight;
919 						error_weight = error_weight + avg + stdev;
920 
921 						error_weight = float4(1.0f, 1.0f, 1.0f, 1.0f) / error_weight;
922 					}
923 
924 					if (ewp->ra_normal_angular_scale)
925 					{
926 						float x = (blk->orig_data[4 * idx] - 0.5f) * 2.0f;
927 						float y = (blk->orig_data[4 * idx + 3] - 0.5f) * 2.0f;
928 						float denom = 1.0f - x * x - y * y;
929 						if (denom < 0.1f)
930 							denom = 0.1f;
931 						denom = 1.0f / denom;
932 						error_weight.x *= 1.0f + x * x * denom;
933 						error_weight.w *= 1.0f + y * y * denom;
934 					}
935 
936 					if (ewp->enable_rgb_scale_with_alpha)
937 					{
938 						float alpha_scale;
939 						if (ewp->alpha_radius != 0)
940 							alpha_scale = input_alpha_averages[zpos][ypos][xpos];
941 						else
942 							alpha_scale = blk->orig_data[4 * idx + 3];
943 						if (alpha_scale < 0.0001f)
944 							alpha_scale = 0.0001f;
945 						alpha_scale *= alpha_scale;
946 						error_weight.xyz = error_weight.xyz * alpha_scale;
947 					}
948 					error_weight = error_weight * color_weights;
949 					error_weight = error_weight * ewp->block_artifact_suppression_expanded[idx];
950 
951 					// if we perform a conversion from linear to sRGB, then we multiply
952 					// the weight with the derivative of the linear->sRGB transform function.
953 					if (perform_srgb_transform)
954 					{
955 						float r = blk->orig_data[4 * idx];
956 						float g = blk->orig_data[4 * idx + 1];
957 						float b = blk->orig_data[4 * idx + 2];
958 						if (r < 0.0031308f)
959 							r = 12.92f;
960 						else
961 							r = 0.4396f * pow(r, -0.58333f);
962 						if (g < 0.0031308f)
963 							g = 12.92f;
964 						else
965 							g = 0.4396f * pow(g, -0.58333f);
966 						if (b < 0.0031308f)
967 							b = 12.92f;
968 						else
969 							b = 0.4396f * pow(b, -0.58333f);
970 						error_weight.x *= r;
971 						error_weight.y *= g;
972 						error_weight.z *= b;
973 					}
974 
975 					/*
976 						printf("%f %f %f %f\n", error_weight.x, error_weight.y, error_weight.z, error_weight.w );
977 					*/
978 
979 					// when we loaded the block to begin with, we applied a transfer function
980 					// and computed the derivative of the transfer function. However, the
981 					// error-weight computation so far is based on the original color values,
982 					// not the transfer-function values. As such, we must multiply the
983 					// error weights by the derivative of the inverse of the transfer function,
984 					// which is equivalent to dividing by the derivative of the transfer
985 					// function.
986 
987 					ewbo->error_weights[idx] = error_weight;
988 
989 					error_weight.x /= (blk->deriv_data[4 * idx] * blk->deriv_data[4 * idx] * 1e-10f);
990 					error_weight.y /= (blk->deriv_data[4 * idx + 1] * blk->deriv_data[4 * idx + 1] * 1e-10f);
991 					error_weight.z /= (blk->deriv_data[4 * idx + 2] * blk->deriv_data[4 * idx + 2] * 1e-10f);
992 					error_weight.w /= (blk->deriv_data[4 * idx + 3] * blk->deriv_data[4 * idx + 3] * 1e-10f);
993 
994 					/*
995 						printf("--> %f %f %f %f\n", error_weight.x, error_weight.y, error_weight.z, error_weight.w );
996 					*/
997 
998 					ewb->error_weights[idx] = error_weight;
999 					if (dot(error_weight, float4(1, 1, 1, 1)) < 1e-10f)
1000 						ewb->contains_zeroweight_texels = 1;
1001 				}
1002 				idx++;
1003 			}
1004 
1005 	int i;
1006 
1007 	float4 error_weight_sum = float4(0, 0, 0, 0);
1008 	int texels_per_block = xdim * ydim * zdim;
1009 
1010 	for (i = 0; i < texels_per_block; i++)
1011 	{
1012 		error_weight_sum = error_weight_sum + ewb->error_weights[i];
1013 
1014 		ewb->texel_weight_r[i] = ewb->error_weights[i].x;
1015 		ewb->texel_weight_g[i] = ewb->error_weights[i].y;
1016 		ewb->texel_weight_b[i] = ewb->error_weights[i].z;
1017 		ewb->texel_weight_a[i] = ewb->error_weights[i].w;
1018 
1019 		ewb->texel_weight_rg[i] = (ewb->error_weights[i].x + ewb->error_weights[i].y) * 0.5f;
1020 		ewb->texel_weight_rb[i] = (ewb->error_weights[i].x + ewb->error_weights[i].z) * 0.5f;
1021 		ewb->texel_weight_gb[i] = (ewb->error_weights[i].y + ewb->error_weights[i].z) * 0.5f;
1022 		ewb->texel_weight_ra[i] = (ewb->error_weights[i].x + ewb->error_weights[i].w) * 0.5f;
1023 
1024 		ewb->texel_weight_gba[i] = (ewb->error_weights[i].y + ewb->error_weights[i].z + ewb->error_weights[i].w) * 0.333333f;
1025 		ewb->texel_weight_rba[i] = (ewb->error_weights[i].x + ewb->error_weights[i].z + ewb->error_weights[i].w) * 0.333333f;
1026 		ewb->texel_weight_rga[i] = (ewb->error_weights[i].x + ewb->error_weights[i].y + ewb->error_weights[i].w) * 0.333333f;
1027 		ewb->texel_weight_rgb[i] = (ewb->error_weights[i].x + ewb->error_weights[i].y + ewb->error_weights[i].z) * 0.333333f;
1028 		ewb->texel_weight[i] = (ewb->error_weights[i].x + ewb->error_weights[i].y + ewb->error_weights[i].z + ewb->error_weights[i].w) * 0.25f;
1029 	}
1030 
1031 	return dot(error_weight_sum, float4(1, 1, 1, 1));
1032 }
1033 
1034 
1035 /*
1036 	functions to analyze block statistical properties:
1037 		* simple properties: * mean * variance
1038 		* covariance-matrix correllation coefficients
1039  */
1040 
1041 
1042 // compute averages and covariance matrices for 4 components
compute_covariance_matrix(int xdim,int ydim,int zdim,const imageblock * blk,const error_weight_block * ewb,mat4 * cov_matrix)1043 static void compute_covariance_matrix(int xdim, int ydim, int zdim, const imageblock * blk, const error_weight_block * ewb, mat4 * cov_matrix)
1044 {
1045 	int i;
1046 
1047 	int texels_per_block = xdim * ydim * zdim;
1048 
1049 	float r_sum = 0.0f;
1050 	float g_sum = 0.0f;
1051 	float b_sum = 0.0f;
1052 	float a_sum = 0.0f;
1053 	float rr_sum = 0.0f;
1054 	float gg_sum = 0.0f;
1055 	float bb_sum = 0.0f;
1056 	float aa_sum = 0.0f;
1057 	float rg_sum = 0.0f;
1058 	float rb_sum = 0.0f;
1059 	float ra_sum = 0.0f;
1060 	float gb_sum = 0.0f;
1061 	float ga_sum = 0.0f;
1062 	float ba_sum = 0.0f;
1063 
1064 	float weight_sum = 0.0f;
1065 
1066 	for (i = 0; i < texels_per_block; i++)
1067 	{
1068 		float weight = ewb->texel_weight[i];
1069 		if (weight < 0.0f)
1070 			ASTC_CODEC_INTERNAL_ERROR;
1071 		weight_sum += weight;
1072 		float r = blk->work_data[4 * i];
1073 		float g = blk->work_data[4 * i + 1];
1074 		float b = blk->work_data[4 * i + 2];
1075 		float a = blk->work_data[4 * i + 3];
1076 		r_sum += r * weight;
1077 		rr_sum += r * (r * weight);
1078 		rg_sum += g * (r * weight);
1079 		rb_sum += b * (r * weight);
1080 		ra_sum += a * (r * weight);
1081 		g_sum += g * weight;
1082 		gg_sum += g * (g * weight);
1083 		gb_sum += b * (g * weight);
1084 		ga_sum += a * (g * weight);
1085 		b_sum += b * weight;
1086 		bb_sum += b * (b * weight);
1087 		ba_sum += a * (b * weight);
1088 		a_sum += a * weight;
1089 		aa_sum += a * (a * weight);
1090 	}
1091 
1092 	float rpt = 1.0f / MAX(weight_sum, 1e-7f);
1093 	float rs = r_sum;
1094 	float gs = g_sum;
1095 	float bs = b_sum;
1096 	float as = a_sum;
1097 
1098 	cov_matrix->v[0] = float4(rr_sum - rs * rs * rpt, rg_sum - rs * gs * rpt, rb_sum - rs * bs * rpt, ra_sum - rs * as * rpt);
1099 	cov_matrix->v[1] = float4(rg_sum - rs * gs * rpt, gg_sum - gs * gs * rpt, gb_sum - gs * bs * rpt, ga_sum - gs * as * rpt);
1100 	cov_matrix->v[2] = float4(rb_sum - rs * bs * rpt, gb_sum - gs * bs * rpt, bb_sum - bs * bs * rpt, ba_sum - bs * as * rpt);
1101 	cov_matrix->v[3] = float4(ra_sum - rs * as * rpt, ga_sum - gs * as * rpt, ba_sum - bs * as * rpt, aa_sum - as * as * rpt);
1102 
1103 }
1104 
1105 
1106 
prepare_block_statistics(int xdim,int ydim,int zdim,const imageblock * blk,const error_weight_block * ewb,int * is_normal_map,float * lowest_correl)1107 void prepare_block_statistics(int xdim, int ydim, int zdim, const imageblock * blk, const error_weight_block * ewb, int *is_normal_map, float *lowest_correl)
1108 {
1109 	int i;
1110 
1111 	mat4 cov_matrix;
1112 
1113 	compute_covariance_matrix(xdim, ydim, zdim, blk, ewb, &cov_matrix);
1114 
1115 	// use the covariance matrix to compute
1116 	// correllation coefficients
1117 	float rr_var = cov_matrix.v[0].x;
1118 	float gg_var = cov_matrix.v[1].y;
1119 	float bb_var = cov_matrix.v[2].z;
1120 	float aa_var = cov_matrix.v[3].w;
1121 
1122 	float rg_correlation = cov_matrix.v[0].y / sqrt(MAX(rr_var * gg_var, 1e-30f));
1123 	float rb_correlation = cov_matrix.v[0].z / sqrt(MAX(rr_var * bb_var, 1e-30f));
1124 	float ra_correlation = cov_matrix.v[0].w / sqrt(MAX(rr_var * aa_var, 1e-30f));
1125 	float gb_correlation = cov_matrix.v[1].z / sqrt(MAX(gg_var * bb_var, 1e-30f));
1126 	float ga_correlation = cov_matrix.v[1].w / sqrt(MAX(gg_var * aa_var, 1e-30f));
1127 	float ba_correlation = cov_matrix.v[2].w / sqrt(MAX(bb_var * aa_var, 1e-30f));
1128 
1129 	if (astc_isnan(rg_correlation))
1130 		rg_correlation = 1.0f;
1131 	if (astc_isnan(rb_correlation))
1132 		rb_correlation = 1.0f;
1133 	if (astc_isnan(ra_correlation))
1134 		ra_correlation = 1.0f;
1135 	if (astc_isnan(gb_correlation))
1136 		gb_correlation = 1.0f;
1137 	if (astc_isnan(ga_correlation))
1138 		ga_correlation = 1.0f;
1139 	if (astc_isnan(ba_correlation))
1140 		ba_correlation = 1.0f;
1141 
1142 	float lowest_correlation = MIN(fabs(rg_correlation), fabs(rb_correlation));
1143 	lowest_correlation = MIN(lowest_correlation, fabs(ra_correlation));
1144 	lowest_correlation = MIN(lowest_correlation, fabs(gb_correlation));
1145 	lowest_correlation = MIN(lowest_correlation, fabs(ga_correlation));
1146 	lowest_correlation = MIN(lowest_correlation, fabs(ba_correlation));
1147 	*lowest_correl = lowest_correlation;
1148 
1149 	// compute a "normal-map" factor
1150 	// this factor should be exactly 0.0 for a normal map, while it may be all over the
1151 	// place for anything that is NOT a normal map. We can probably assume that a factor
1152 	// of less than 0.2f represents a normal map.
1153 
1154 	float nf_sum = 0.0f;
1155 
1156 	int texels_per_block = xdim * ydim * zdim;
1157 
1158 	for (i = 0; i < texels_per_block; i++)
1159 	{
1160 		float3 val = float3(blk->orig_data[4 * i],
1161 							blk->orig_data[4 * i + 1],
1162 							blk->orig_data[4 * i + 2]);
1163 		val = (val - float3(0.5f, 0.5f, 0.5f)) * 2.0f;
1164 		float length_squared = dot(val, val);
1165 		float nf = fabs(length_squared - 1.0f);
1166 		nf_sum += nf;
1167 	}
1168 	float nf_avg = nf_sum / texels_per_block;
1169 	*is_normal_map = nf_avg < 0.2;
1170 }
1171 
1172 
1173 
1174 
1175 
compress_constant_color_block(int xdim,int ydim,int zdim,const imageblock * blk,const error_weight_block * ewb,symbolic_compressed_block * scb)1176 void compress_constant_color_block(int xdim, int ydim, int zdim, const imageblock * blk, const error_weight_block * ewb, symbolic_compressed_block * scb)
1177 {
1178 	int texel_count = xdim * ydim * zdim;
1179 	int i;
1180 
1181 	float4 color_sum = float4(0, 0, 0, 0);
1182 	float4 color_weight_sum = float4(0, 0, 0, 0);
1183 
1184 	const float *clp = blk->work_data;
1185 	for (i = 0; i < texel_count; i++)
1186 	{
1187 		float4 weights = ewb->error_weights[i];
1188 		float4 color_data = float4(clp[4 * i], clp[4 * i + 1], clp[4 * i + 2], clp[4 * i + 3]);
1189 		color_sum = color_sum + (color_data * weights);
1190 		color_weight_sum = color_weight_sum + weights;
1191 	}
1192 
1193 	float4 avg_color = color_sum / color_weight_sum;
1194 
1195 	int use_fp16 = blk->rgb_lns[0];
1196 
1197 	#ifdef DEBUG_PRINT_DIAGNOSTICS
1198 		if (print_diagnostics)
1199 		{
1200 			printf("Averaged color: %f %f %f %f\n", avg_color.x, avg_color.y, avg_color.z, avg_color.w);
1201 		}
1202 	#endif
1203 
1204 	// convert the color
1205 	if (blk->rgb_lns[0])
1206 	{
1207 		int avg_red = static_cast < int >(floor(avg_color.x + 0.5f));
1208 		int avg_green = static_cast < int >(floor(avg_color.y + 0.5f));
1209 		int avg_blue = static_cast < int >(floor(avg_color.z + 0.5f));
1210 
1211 		if (avg_red < 0)
1212 			avg_red = 0;
1213 		else if (avg_red > 65535)
1214 			avg_red = 65535;
1215 
1216 		if (avg_green < 0)
1217 			avg_green = 0;
1218 		else if (avg_green > 65535)
1219 			avg_green = 65535;
1220 
1221 		if (avg_blue < 0)
1222 			avg_blue = 0;
1223 		else if (avg_blue > 65535)
1224 			avg_blue = 65535;
1225 
1226 		avg_color.x = sf16_to_float(lns_to_sf16(avg_red));
1227 		avg_color.y = sf16_to_float(lns_to_sf16(avg_green));
1228 		avg_color.z = sf16_to_float(lns_to_sf16(avg_blue));
1229 	}
1230 	else
1231 	{
1232 		avg_color.x *= (1.0f / 65535.0f);
1233 		avg_color.y *= (1.0f / 65535.0f);
1234 		avg_color.z *= (1.0f / 65535.0f);
1235 	}
1236 	if (blk->alpha_lns[0])
1237 	{
1238 		int avg_alpha = static_cast < int >(floor(avg_color.w + 0.5f));
1239 
1240 		if (avg_alpha < 0)
1241 			avg_alpha = 0;
1242 		else if (avg_alpha > 65535)
1243 			avg_alpha = 65535;
1244 
1245 		avg_color.w = sf16_to_float(lns_to_sf16(avg_alpha));
1246 	}
1247 	else
1248 	{
1249 		avg_color.w *= (1.0f / 65535.0f);
1250 	}
1251 
1252 #ifdef DEBUG_PRINT_DIAGNOSTICS
1253 	if (print_diagnostics)
1254 	{
1255 		printf("Averaged color: %f %f %f %f   (%d)\n", avg_color.x, avg_color.y, avg_color.z, avg_color.w, use_fp16);
1256 
1257 	}
1258 #endif
1259 
1260 	if (use_fp16)
1261 	{
1262 		scb->error_block = 0;
1263 		scb->block_mode = -1;
1264 		scb->partition_count = 0;
1265 		scb->constant_color[0] = float_to_sf16(avg_color.x, SF_NEARESTEVEN);
1266 		scb->constant_color[1] = float_to_sf16(avg_color.y, SF_NEARESTEVEN);
1267 		scb->constant_color[2] = float_to_sf16(avg_color.z, SF_NEARESTEVEN);
1268 		scb->constant_color[3] = float_to_sf16(avg_color.w, SF_NEARESTEVEN);
1269 	}
1270 
1271 	else
1272 	{
1273 		scb->error_block = 0;
1274 		scb->block_mode = -2;
1275 		scb->partition_count = 0;
1276 		float red = avg_color.x;
1277 		float green = avg_color.y;
1278 		float blue = avg_color.z;
1279 		float alpha = avg_color.w;
1280 		if (red < 0)
1281 			red = 0;
1282 		else if (red > 1)
1283 			red = 1;
1284 		if (green < 0)
1285 			green = 0;
1286 		else if (green > 1)
1287 			green = 1;
1288 		if (blue < 0)
1289 			blue = 0;
1290 		else if (blue > 1)
1291 			blue = 1;
1292 		if (alpha < 0)
1293 			alpha = 0;
1294 		else if (alpha > 1)
1295 			alpha = 1;
1296 		scb->constant_color[0] = static_cast < int >(floor(red * 65535.0f + 0.5f));
1297 		scb->constant_color[1] = static_cast < int >(floor(green * 65535.0f + 0.5f));
1298 		scb->constant_color[2] = static_cast < int >(floor(blue * 65535.0f + 0.5f));
1299 		scb->constant_color[3] = static_cast < int >(floor(alpha * 65535.0f + 0.5f));
1300 	}
1301 }
1302 
1303 int block_mode_histogram[2048];
1304 
compress_symbolic_block(const astc_codec_image * input_image,astc_decode_mode decode_mode,int xdim,int ydim,int zdim,const error_weighting_params * ewp,const imageblock * blk,symbolic_compressed_block * scb,compress_symbolic_block_buffers * tmpbuf)1305 float compress_symbolic_block(const astc_codec_image * input_image,
1306 							  astc_decode_mode decode_mode, int xdim, int ydim, int zdim, const error_weighting_params * ewp, const imageblock * blk, symbolic_compressed_block * scb,
1307 							  compress_symbolic_block_buffers * tmpbuf)
1308 {
1309 	int i, j;
1310 	int xpos = blk->xpos;
1311 	int ypos = blk->ypos;
1312 	int zpos = blk->zpos;
1313 
1314 	int x, y, z;
1315 
1316 
1317 	#ifdef DEBUG_PRINT_DIAGNOSTICS
1318 		if (print_diagnostics)
1319 		{
1320 			printf("Diagnostics of block of dimension %d x %d x %d\n\n", xdim, ydim, zdim);
1321 
1322 			printf("XPos: %d  YPos: %d  ZPos: %d\n", xpos, ypos, zpos);
1323 
1324 			printf("Red-min: %f   Red-max: %f\n", blk->red_min, blk->red_max);
1325 			printf("Green-min: %f   Green-max: %f\n", blk->green_min, blk->green_max);
1326 			printf("Blue-min: %f   Blue-max: %f\n", blk->blue_min, blk->blue_max);
1327 			printf("Alpha-min: %f   Alpha-max: %f\n", blk->alpha_min, blk->alpha_max);
1328 			printf("Grayscale: %d\n", blk->grayscale);
1329 
1330 			for (z = 0; z < zdim; z++)
1331 				for (y = 0; y < ydim; y++)
1332 					for (x = 0; x < xdim; x++)
1333 					{
1334 						int idx = ((z * ydim + y) * xdim + x) * 4;
1335 						printf("Texel (%d %d %d) : orig=< %g, %g, %g, %g >, work=< %g, %g, %g, %g >\n",
1336 							x, y, z,
1337 							blk->orig_data[idx],
1338 							blk->orig_data[idx + 1], blk->orig_data[idx + 2], blk->orig_data[idx + 3], blk->work_data[idx], blk->work_data[idx + 1], blk->work_data[idx + 2], blk->work_data[idx + 3]);
1339 					}
1340 			printf("\n");
1341 		}
1342 	#endif
1343 
1344 
1345 	if (blk->red_min == blk->red_max && blk->green_min == blk->green_max && blk->blue_min == blk->blue_max && blk->alpha_min == blk->alpha_max)
1346 	{
1347 
1348 		// detected a constant-color block. Encode as FP16 if using HDR
1349 		scb->error_block = 0;
1350 
1351 		if (rgb_force_use_of_hdr)
1352 		{
1353 			scb->block_mode = -1;
1354 			scb->partition_count = 0;
1355 			scb->constant_color[0] = float_to_sf16(blk->orig_data[0], SF_NEARESTEVEN);
1356 			scb->constant_color[1] = float_to_sf16(blk->orig_data[1], SF_NEARESTEVEN);
1357 			scb->constant_color[2] = float_to_sf16(blk->orig_data[2], SF_NEARESTEVEN);
1358 			scb->constant_color[3] = float_to_sf16(blk->orig_data[3], SF_NEARESTEVEN);
1359 		}
1360 		else
1361 		{
1362 			// Encode as UNORM16 if NOT using HDR.
1363 			scb->block_mode = -2;
1364 			scb->partition_count = 0;
1365 			float red = blk->orig_data[0];
1366 			float green = blk->orig_data[1];
1367 			float blue = blk->orig_data[2];
1368 			float alpha = blk->orig_data[3];
1369 			if (red < 0)
1370 				red = 0;
1371 			else if (red > 1)
1372 				red = 1;
1373 			if (green < 0)
1374 				green = 0;
1375 			else if (green > 1)
1376 				green = 1;
1377 			if (blue < 0)
1378 				blue = 0;
1379 			else if (blue > 1)
1380 				blue = 1;
1381 			if (alpha < 0)
1382 				alpha = 0;
1383 			else if (alpha > 1)
1384 				alpha = 1;
1385 			scb->constant_color[0] = (int)floor(red * 65535.0f + 0.5f);
1386 			scb->constant_color[1] = (int)floor(green * 65535.0f + 0.5f);
1387 			scb->constant_color[2] = (int)floor(blue * 65535.0f + 0.5f);
1388 			scb->constant_color[3] = (int)floor(alpha * 65535.0f + 0.5f);
1389 		}
1390 
1391 		#ifdef DEBUG_PRINT_DIAGNOSTICS
1392 			if (print_diagnostics)
1393 			{
1394 				printf("Block is single-color <%4.4X %4.4X %4.4X %4.4X>\n", scb->constant_color[0], scb->constant_color[1], scb->constant_color[2], scb->constant_color[3]);
1395 			}
1396 		#endif
1397 
1398 		if (print_tile_errors)
1399 			printf("0\n");
1400 
1401 		physical_compressed_block psb = symbolic_to_physical(xdim, ydim, zdim, scb);
1402 		physical_to_symbolic(xdim, ydim, zdim, psb, scb);
1403 
1404 		return 0.0f;
1405 	}
1406 
1407 	error_weight_block *ewb = tmpbuf->ewb;
1408 	error_weight_block_orig *ewbo = tmpbuf->ewbo;
1409 
1410 	float error_weight_sum = prepare_error_weight_block(input_image,
1411 														xdim, ydim, zdim,
1412 														ewp, blk, ewb, ewbo);
1413 
1414 	#ifdef DEBUG_PRINT_DIAGNOSTICS
1415 		if (print_diagnostics)
1416 		{
1417 			printf("\n");
1418 			for (z = 0; z < zdim; z++)
1419 				for (y = 0; y < ydim; y++)
1420 					for (x = 0; x < xdim; x++)
1421 					{
1422 						int idx = (z * ydim + y) * xdim + x;
1423 						printf("ErrorWeight (%d %d %d) : < %g, %g, %g, %g >\n", x, y, z, ewb->error_weights[idx].x, ewb->error_weights[idx].y, ewb->error_weights[idx].z, ewb->error_weights[idx].w);
1424 					}
1425 			printf("\n");
1426 		}
1427 	#endif
1428 
1429 	symbolic_compressed_block *tempblocks = tmpbuf->tempblocks;
1430 
1431 	float error_of_best_block = 1e20f;
1432 	// int modesel=0;
1433 
1434 	imageblock *temp = tmpbuf->temp;
1435 
1436 	float best_errorvals_in_modes[17];
1437 	for (i = 0; i < 17; i++)
1438 		best_errorvals_in_modes[i] = 1e30f;
1439 
1440 	int uses_alpha = imageblock_uses_alpha(xdim, ydim, zdim, blk);
1441 
1442 
1443 	// compression of average-color blocks disabled for the time being;
1444 	// they produce extremely severe block artifacts.
1445 #if 0
1446 	// first, compress an averaged-color block
1447 	compress_constant_color_block(xdim, ydim, zdim, blk, ewb, scb);
1448 
1449 	decompress_symbolic_block(decode_mode, xdim, ydim, zdim, xpos, ypos, zpos, scb, temp);
1450 
1451 	float avgblock_errorval = compute_imageblock_difference(xdim, ydim, zdim,
1452 															blk, temp, ewb) * 4.0f;	// bias somewhat against the average-color block.
1453 
1454 	#ifdef DEBUG_PRINT_DIAGNOSTICS
1455 		if (print_diagnostics)
1456 		{
1457 			printf("\n-----------------------------------\n");
1458 			printf("Average-color block test completed\n");
1459 			printf("Resulting error value: %g\n", avgblock_errorval);
1460 		}
1461 	#endif
1462 
1463 
1464 	if (avgblock_errorval < error_of_best_block)
1465 	{
1466 		#ifdef DEBUG_PRINT_DIAGNOSTICS
1467 			if (print_diagnostics)
1468 				printf("Accepted as better than previous-best-error, which was %g\n", error_of_best_block);
1469 		#endif
1470 
1471 		error_of_best_block = avgblock_errorval;
1472 		// *scb = tempblocks[j];
1473 		modesel = 0;
1474 	}
1475 
1476 	#ifdef DEBUG_PRINT_DIAGNOSTICS
1477 		if (print_diagnostics)
1478 		{
1479 			printf("-----------------------------------\n");
1480 		}
1481 	#endif
1482 #endif
1483 
1484 
1485 	float mode_cutoff = ewp->block_mode_cutoff;
1486 
1487 	// next, test mode #0. This mode uses 1 plane of weights and 1 partition.
1488 	// we test it twice, first with a modecutoff of 0, then with the specified mode-cutoff.
1489 	// This causes an early-out that speeds up encoding of "easy" content.
1490 
1491 	float modecutoffs[2];
1492 	float errorval_mult[2] = { 2.5, 1 };
1493 	modecutoffs[0] = 0;
1494 	modecutoffs[1] = mode_cutoff;
1495 
1496 	#if 0
1497 		if ((error_of_best_block / error_weight_sum) < ewp->texel_avg_error_limit)
1498 			goto END_OF_TESTS;
1499 	#endif
1500 
1501 	float best_errorval_in_mode;
1502 	for (i = 0; i < 2; i++)
1503 	{
1504 		compress_symbolic_block_fixed_partition_1_plane(decode_mode, modecutoffs[i], ewp->max_refinement_iters, xdim, ydim, zdim, 1,	// partition count
1505 														0,	// partition index
1506 														blk, ewb, tempblocks, tmpbuf->plane1);
1507 
1508 		best_errorval_in_mode = 1e30f;
1509 		for (j = 0; j < 4; j++)
1510 		{
1511 			if (tempblocks[j].error_block)
1512 				continue;
1513 			decompress_symbolic_block(decode_mode, xdim, ydim, zdim, xpos, ypos, zpos, tempblocks + j, temp);
1514 			float errorval = compute_imageblock_difference(xdim, ydim, zdim,
1515 														   blk, temp, ewb) * errorval_mult[i];
1516 
1517 			#ifdef DEBUG_PRINT_DIAGNOSTICS
1518 				if (print_diagnostics)
1519 				{
1520 					printf("\n-----------------------------------\n");
1521 					printf("Single-weight partition test 0 (1 partition) completed\n");
1522 					printf("Resulting error value: %g\n", errorval);
1523 				}
1524 			#endif
1525 
1526 			if (errorval < best_errorval_in_mode)
1527 				best_errorval_in_mode = errorval;
1528 
1529 			if (errorval < error_of_best_block)
1530 			{
1531 				#ifdef DEBUG_PRINT_DIAGNOSTICS
1532 					if (print_diagnostics)
1533 						printf("Accepted as better than previous-best-error, which was %g\n", error_of_best_block);
1534 				#endif
1535 
1536 				error_of_best_block = errorval;
1537 				*scb = tempblocks[j];
1538 
1539 				// modesel = 0;
1540 			}
1541 
1542 			#ifdef DEBUG_PRINT_DIAGNOSTICS
1543 				if (print_diagnostics)
1544 				{
1545 					printf("-----------------------------------\n");
1546 				}
1547 			#endif
1548 		}
1549 
1550 		best_errorvals_in_modes[0] = best_errorval_in_mode;
1551 		if ((error_of_best_block / error_weight_sum) < ewp->texel_avg_error_limit)
1552 			goto END_OF_TESTS;
1553 	}
1554 
1555 	int is_normal_map;
1556 	float lowest_correl;
1557 	prepare_block_statistics(xdim, ydim, zdim, blk, ewb, &is_normal_map, &lowest_correl);
1558 
1559 	if (is_normal_map && lowest_correl < 0.99f)
1560 		lowest_correl = 0.99f;
1561 
1562 	// next, test the four possible 1-partition, 2-planes modes
1563 	for (i = 0; i < 4; i++)
1564 	{
1565 
1566 		if (lowest_correl > ewp->lowest_correlation_cutoff)
1567 			continue;
1568 
1569 		if (blk->grayscale && i != 3)
1570 			continue;
1571 
1572 		if (!uses_alpha && i == 3)
1573 			continue;
1574 
1575 		compress_symbolic_block_fixed_partition_2_planes(decode_mode, mode_cutoff, ewp->max_refinement_iters, xdim, ydim, zdim, 1,	// partition count
1576 														 0,	// partition index
1577 														 i,	// the color component to test a separate plane of weights for.
1578 														 blk, ewb, tempblocks, tmpbuf->planes2);
1579 
1580 		best_errorval_in_mode = 1e30f;
1581 		for (j = 0; j < 4; j++)
1582 		{
1583 			if (tempblocks[j].error_block)
1584 				continue;
1585 			decompress_symbolic_block(decode_mode, xdim, ydim, zdim, xpos, ypos, zpos, tempblocks + j, temp);
1586 			float errorval = compute_imageblock_difference(xdim, ydim, zdim,
1587 														   blk, temp, ewb);
1588 
1589 			#ifdef DEBUG_PRINT_DIAGNOSTICS
1590 				if (print_diagnostics)
1591 				{
1592 					printf("\n-----------------------------------\n");
1593 					printf("Dual-weight partition test %d (1 partition) completed\n", i);
1594 					printf("Resulting error value: %g\n", errorval);
1595 				}
1596 			#endif
1597 
1598 			if (errorval < best_errorval_in_mode)
1599 				best_errorval_in_mode = errorval;
1600 
1601 			if (errorval < error_of_best_block)
1602 			{
1603 				#ifdef DEBUG_PRINT_DIAGNOSTICS
1604 					if (print_diagnostics)
1605 						printf("Accepted as better than previous-best-error, which was %g\n", error_of_best_block);
1606 				#endif
1607 
1608 				error_of_best_block = errorval;
1609 				*scb = tempblocks[j];
1610 
1611 				// modesel = i+1;
1612 			}
1613 
1614 			#ifdef DEBUG_PRINT_DIAGNOSTICS
1615 				if (print_diagnostics)
1616 				{
1617 					printf("-----------------------------------\n");
1618 				}
1619 			#endif
1620 
1621 			best_errorvals_in_modes[i + 1] = best_errorval_in_mode;
1622 		}
1623 
1624 		if ((error_of_best_block / error_weight_sum) < ewp->texel_avg_error_limit)
1625 			goto END_OF_TESTS;
1626 	}
1627 
1628 	// find best blocks for 2, 3 and 4 partitions
1629 	int partition_count;
1630 	for (partition_count = 2; partition_count <= 4; partition_count++)
1631 	{
1632 		int partition_indices_1plane[2];
1633 		int partition_indices_2planes[2];
1634 
1635 		find_best_partitionings(ewp->partition_search_limit,
1636 								xdim, ydim, zdim, partition_count, blk, ewb, 1,
1637 								&(partition_indices_1plane[0]), &(partition_indices_1plane[1]), &(partition_indices_2planes[0]));
1638 
1639 		for (i = 0; i < 2; i++)
1640 		{
1641 			compress_symbolic_block_fixed_partition_1_plane(decode_mode, mode_cutoff, ewp->max_refinement_iters, xdim, ydim, zdim, partition_count, partition_indices_1plane[i], blk, ewb, tempblocks, tmpbuf->plane1);
1642 
1643 			best_errorval_in_mode = 1e30f;
1644 			for (j = 0; j < 4; j++)
1645 			{
1646 				if (tempblocks[j].error_block)
1647 					continue;
1648 				decompress_symbolic_block(decode_mode, xdim, ydim, zdim, xpos, ypos, zpos, tempblocks + j, temp);
1649 				float errorval = compute_imageblock_difference(xdim, ydim, zdim,
1650 															   blk, temp, ewb);
1651 
1652 				#ifdef DEBUG_PRINT_DIAGNOSTICS
1653 					if (print_diagnostics)
1654 					{
1655 						printf("\n-----------------------------------\n");
1656 						printf("Single-weight partition test %d (%d partitions) completed\n", i, partition_count);
1657 						printf("Resulting error value: %g\n", errorval);
1658 					}
1659 				#endif
1660 
1661 				if (errorval < best_errorval_in_mode)
1662 					best_errorval_in_mode = errorval;
1663 
1664 				if (errorval < error_of_best_block)
1665 				{
1666 					#ifdef DEBUG_PRINT_DIAGNOSTICS
1667 						if (print_diagnostics)
1668 							printf("Accepted as better than previous-best-error, which was %g\n", error_of_best_block);
1669 					#endif
1670 
1671 					error_of_best_block = errorval;
1672 					*scb = tempblocks[j];
1673 
1674 					// modesel = 4*(partition_count-2) + 5 + i;
1675 				}
1676 			}
1677 
1678 			best_errorvals_in_modes[4 * (partition_count - 2) + 5 + i] = best_errorval_in_mode;
1679 
1680 			#ifdef DEBUG_PRINT_DIAGNOSTICS
1681 				if (print_diagnostics)
1682 				{
1683 					printf("-----------------------------------\n");
1684 				}
1685 			#endif
1686 
1687 			if ((error_of_best_block / error_weight_sum) < ewp->texel_avg_error_limit)
1688 				goto END_OF_TESTS;
1689 		}
1690 
1691 
1692 		if (partition_count == 2 && !is_normal_map && MIN(best_errorvals_in_modes[5], best_errorvals_in_modes[6]) > (best_errorvals_in_modes[0] * ewp->partition_1_to_2_limit))
1693 			goto END_OF_TESTS;
1694 
1695 		// don't bother to check 4 partitions for dual plane of weightss, ever.
1696 		if (partition_count == 4)
1697 			break;
1698 
1699 		for (i = 0; i < 2; i++)
1700 		{
1701 			if (lowest_correl > ewp->lowest_correlation_cutoff)
1702 				continue;
1703 			compress_symbolic_block_fixed_partition_2_planes(decode_mode,
1704 															 mode_cutoff,
1705 															 ewp->max_refinement_iters,
1706 															 xdim, ydim, zdim,
1707 															 partition_count,
1708 															 partition_indices_2planes[i] & (PARTITION_COUNT - 1), partition_indices_2planes[i] >> PARTITION_BITS,
1709 															 blk, ewb, tempblocks, tmpbuf->planes2);
1710 
1711 			best_errorval_in_mode = 1e30f;
1712 			for (j = 0; j < 4; j++)
1713 			{
1714 				if (tempblocks[j].error_block)
1715 					continue;
1716 				decompress_symbolic_block(decode_mode, xdim, ydim, zdim, xpos, ypos, zpos, tempblocks + j, temp);
1717 
1718 				float errorval = compute_imageblock_difference(xdim, ydim, zdim,
1719 															   blk, temp, ewb);
1720 
1721 				#ifdef DEBUG_PRINT_DIAGNOSTICS
1722 					if (print_diagnostics)
1723 					{
1724 						printf("\n-----------------------------------\n");
1725 						printf("Dual-weight partition test %d (%d partitions) completed\n", i, partition_count);
1726 						printf("Resulting error value: %g\n", errorval);
1727 					}
1728 				#endif
1729 
1730 				if (errorval < best_errorval_in_mode)
1731 					best_errorval_in_mode = errorval;
1732 
1733 				if (errorval < error_of_best_block)
1734 				{
1735 					#ifdef DEBUG_PRINT_DIAGNOSTICS
1736 						if (print_diagnostics)
1737 							printf("Accepted as better than previous-best-error, which was %g\n", error_of_best_block);
1738 					#endif
1739 
1740 					error_of_best_block = errorval;
1741 					*scb = tempblocks[j];
1742 
1743 					// modesel = 4*(partition_count-2) + 5 + 2 + i;
1744 				}
1745 			}
1746 
1747 			best_errorvals_in_modes[4 * (partition_count - 2) + 5 + 2 + i] = best_errorval_in_mode;
1748 
1749 			#ifdef DEBUG_PRINT_DIAGNOSTICS
1750 				if (print_diagnostics)
1751 				{
1752 					printf("-----------------------------------\n");
1753 				}
1754 			#endif
1755 
1756 			if ((error_of_best_block / error_weight_sum) < ewp->texel_avg_error_limit)
1757 				goto END_OF_TESTS;
1758 		}
1759 	}
1760 
1761   END_OF_TESTS:
1762 
1763 	#if 0
1764 		if (print_statistics)
1765 		{
1766 			for (i = 0; i < 13; i++)
1767 				printf("%f ", best_errorvals_in_modes[i]);
1768 
1769 			printf("%d  %f  %f  %f ", modesel, error_of_best_block,
1770 				MIN(best_errorvals_in_modes[1], best_errorvals_in_modes[2]) / best_errorvals_in_modes[0],
1771 				MIN(MIN(best_errorvals_in_modes[7], best_errorvals_in_modes[8]), best_errorvals_in_modes[9]) / best_errorvals_in_modes[0]);
1772 
1773 			printf("\n");
1774 		}
1775 	#endif
1776 
1777 	if (scb->block_mode >= 0)
1778 		block_mode_histogram[scb->block_mode & 0x7ff]++;
1779 
1780 
1781 	// compress/decompress to a physical block
1782 	physical_compressed_block psb = symbolic_to_physical(xdim, ydim, zdim, scb);
1783 	physical_to_symbolic(xdim, ydim, zdim, psb, scb);
1784 
1785 
1786 	if (print_tile_errors)
1787 		printf("%g\n", error_of_best_block);
1788 
1789 
1790 	// mean squared error per color component.
1791 	return error_of_best_block / ((float)xdim * ydim * zdim);
1792 }
1793