1 /*
2 Copyright 2007 nVidia, Inc.
3 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
4 
5 You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
6 
7 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
8 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 
10 See the License for the specific language governing permissions and limitations under the License.
11 */
12 
13 // Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis
14 
15 // x1000 777.1x4 64p 2bi (30b)
16 
17 #include "bits.h"
18 #include "tile.h"
19 #include "avpcl.h"
20 #include "nvcore/debug.h"
21 #include "nvmath/vector.inl"
22 #include "nvmath/matrix.inl"
23 #include "nvmath/fitting.h"
24 #include "avpcl_utils.h"
25 #include "endpts.h"
26 #include <string.h>
27 #include <float.h>
28 
29 #include "shapes_two.h"
30 
31 using namespace nv;
32 using namespace AVPCL;
33 
34 #define	NLSBMODES	4		// number of different lsb modes per region. since we have two .1 per region, that can have 4 values
35 
36 #define NINDICES	4
37 #define	INDEXBITS	2
38 #define	HIGH_INDEXBIT	(1<<(INDEXBITS-1))
39 #define	DENOM		(NINDICES-1)
40 #define	BIAS		(DENOM/2)
41 
42 // WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
43 // i.e. can we search shapes in a particular order so we can see the global error minima easily and
44 // stop without having to touch all shapes?
45 
46 #define	POS_TO_X(pos)	((pos)&3)
47 #define	POS_TO_Y(pos)	(((pos)>>2)&3)
48 
49 #define	NBITSIZES	(NREGIONS*2)
50 #define	ABITINDEX(region)	(2*(region)+0)
51 #define	BBITINDEX(region)	(2*(region)+1)
52 
53 struct ChanBits
54 {
55 	int nbitsizes[NBITSIZES];	// bitsizes for one channel
56 };
57 
58 struct Pattern
59 {
60 	ChanBits chan[NCHANNELS_RGB];//  bit patterns used per channel
61 	int transformed;		// if 0, deltas are unsigned and no transform; otherwise, signed and transformed
62 	int mode;				// associated mode value
63 	int modebits;			// number of mode bits
64 	const char *encoding;			// verilog description of encoding for this mode
65 };
66 
67 #define	NPATTERNS 1
68 #define	NREGIONS  2
69 
70 static Pattern patterns[NPATTERNS] =
71 {
72 	// red		green		blue		xfm	mode  mb
73 	7,7,7,7,	7,7,7,7,	7,7,7,7,	0,	0x8, 4, "",
74 };
75 
76 struct RegionPrec
77 {
78 	int	endpt_a_prec[NCHANNELS_RGB];
79 	int endpt_b_prec[NCHANNELS_RGB];
80 };
81 
82 struct PatternPrec
83 {
84 	RegionPrec region_precs[NREGIONS];
85 };
86 
87 
88 // this is the precision for each channel and region
89 // NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
90 static PatternPrec pattern_precs[NPATTERNS] =
91 {
92 	7,7,7, 7,7,7, 7,7,7, 7,7,7,
93 };
94 
95 // return # of bits needed to store n. handle signed or unsigned cases properly
nbits(int n,bool issigned)96 static int nbits(int n, bool issigned)
97 {
98 	int nb;
99 	if (n==0)
100 		return 0;	// no bits needed for 0, signed or not
101 	else if (n > 0)
102 	{
103 		for (nb=0; n; ++nb, n>>=1) ;
104 		return nb + (issigned?1:0);
105 	}
106 	else
107 	{
108 		nvAssert (issigned);
109 		for (nb=0; n<-1; ++nb, n>>=1) ;
110 		return nb + 1;
111 	}
112 }
113 
transform_forward(IntEndptsRGB_2 ep[NREGIONS])114 static void transform_forward(IntEndptsRGB_2 ep[NREGIONS])
115 {
116 	nvUnreachable();
117 }
118 
transform_inverse(IntEndptsRGB_2 ep[NREGIONS])119 static void transform_inverse(IntEndptsRGB_2 ep[NREGIONS])
120 {
121 	nvUnreachable();
122 }
123 
124 // endpoints are 888,888; reduce to 777,777 and put the lsb bit majority in compr_bits
compress_one(const IntEndptsRGB & endpts,IntEndptsRGB_2 & compr_endpts)125 static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpts)
126 {
127 	int onescnt;
128 
129 	onescnt = 0;
130 	for (int j=0; j<NCHANNELS_RGB; ++j)
131 	{
132 		onescnt += endpts.A[j] & 1;
133 		compr_endpts.A[j] = endpts.A[j] >> 1;
134 		nvAssert (compr_endpts.A[j] < 128);
135 	}
136 	compr_endpts.a_lsb = onescnt >= 2;
137 
138 	onescnt = 0;
139 	for (int j=0; j<NCHANNELS_RGB; ++j)
140 	{
141 		onescnt += endpts.B[j] & 1;
142 		compr_endpts.B[j] = endpts.B[j] >> 1;
143 		nvAssert (compr_endpts.B[j] < 128);
144 	}
145 	compr_endpts.b_lsb = onescnt >= 2;
146 }
147 
uncompress_one(const IntEndptsRGB_2 & compr_endpts,IntEndptsRGB & endpts)148 static void uncompress_one(const IntEndptsRGB_2& compr_endpts, IntEndptsRGB& endpts)
149 {
150 	for (int j=0; j<NCHANNELS_RGB; ++j)
151 	{
152 		endpts.A[j] = (compr_endpts.A[j] << 1) | compr_endpts.a_lsb;
153 		endpts.B[j] = (compr_endpts.B[j] << 1) | compr_endpts.b_lsb;
154 	}
155 }
156 
uncompress_endpoints(const IntEndptsRGB_2 compr_endpts[NREGIONS],IntEndptsRGB endpts[NREGIONS])157 static void uncompress_endpoints(const IntEndptsRGB_2 compr_endpts[NREGIONS], IntEndptsRGB endpts[NREGIONS])
158 {
159 	for (int i=0; i<NREGIONS; ++i)
160 		uncompress_one(compr_endpts[i], endpts[i]);
161 }
162 
compress_endpoints(const IntEndptsRGB endpts[NREGIONS],IntEndptsRGB_2 compr_endpts[NREGIONS])163 static void compress_endpoints(const IntEndptsRGB endpts[NREGIONS], IntEndptsRGB_2 compr_endpts[NREGIONS])
164 {
165 	for (int i=0; i<NREGIONS; ++i)
166 		compress_one(endpts[i], compr_endpts[i]);
167 }
168 
169 
quantize_endpts(const FltEndpts endpts[NREGIONS],const PatternPrec & pattern_prec,IntEndptsRGB_2 q_endpts[NREGIONS])170 static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGB_2 q_endpts[NREGIONS])
171 {
172 	IntEndptsRGB full_endpts[NREGIONS];
173 
174 	for (int region = 0; region < NREGIONS; ++region)
175 	{
176 		full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
177 		full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
178 		full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
179 		full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
180 		full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
181 		full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
182 		compress_one(full_endpts[region], q_endpts[region]);
183 	}
184 }
185 
186 // swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
swap_indices(IntEndptsRGB_2 endpts[NREGIONS],int indices[Tile::TILE_H][Tile::TILE_W],int shapeindex)187 static void swap_indices(IntEndptsRGB_2 endpts[NREGIONS], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
188 {
189 	for (int region = 0; region < NREGIONS; ++region)
190 	{
191 		int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
192 
193 		int x = POS_TO_X(position);
194 		int y = POS_TO_Y(position);
195 		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
196 		if (indices[y][x] & HIGH_INDEXBIT)
197 		{
198 			// high bit is set, swap the endpts and indices for this region
199 			int t;
200 			for (int i=0; i<NCHANNELS_RGB; ++i)
201 			{
202 				t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t;
203 			}
204 			t = endpts[region].a_lsb; endpts[region].a_lsb = endpts[region].b_lsb; endpts[region].b_lsb = t;
205 
206 			for (int y = 0; y < Tile::TILE_H; y++)
207 			for (int x = 0; x < Tile::TILE_W; x++)
208 				if (REGION(x,y,shapeindex) == region)
209 					indices[y][x] = NINDICES - 1 - indices[y][x];
210 		}
211 	}
212 }
213 
endpts_fit(IntEndptsRGB_2 endpts[NREGIONS],const Pattern & p)214 static bool endpts_fit(IntEndptsRGB_2 endpts[NREGIONS], const Pattern &p)
215 {
216 	return true;
217 }
218 
write_header(const IntEndptsRGB_2 endpts[NREGIONS],int shapeindex,const Pattern & p,Bits & out)219 static void write_header(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, const Pattern &p, Bits &out)
220 {
221 	out.write(p.mode, p.modebits);
222 	out.write(shapeindex, SHAPEBITS);
223 
224 	for (int j=0; j<NCHANNELS_RGB; ++j)
225 		for (int i=0; i<NREGIONS; ++i)
226 		{
227 			out.write(endpts[i].A[j], p.chan[j].nbitsizes[ABITINDEX(i)]);
228 			out.write(endpts[i].B[j], p.chan[j].nbitsizes[BBITINDEX(i)]);
229 		}
230 
231 	for (int i=0; i<NREGIONS; ++i)
232 	{
233 		out.write(endpts[i].a_lsb, 1);
234 		out.write(endpts[i].b_lsb, 1);
235 	}
236 
237 	nvAssert (out.getptr() == 98);
238 }
239 
read_header(Bits & in,IntEndptsRGB_2 endpts[NREGIONS],int & shapeindex,Pattern & p,int & pat_index)240 static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
241 {
242 	int mode = AVPCL::getmode(in);
243 
244 	pat_index = 0;
245 	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
246 	nvAssert (in.getptr() == patterns[pat_index].modebits);
247 
248 	shapeindex = in.read(SHAPEBITS);
249 	p = patterns[pat_index];
250 
251 	for (int j=0; j<NCHANNELS_RGB; ++j)
252 		for (int i=0; i<NREGIONS; ++i)
253 		{
254 			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[ABITINDEX(i)]);
255 			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[BBITINDEX(i)]);
256 		}
257 
258 	for (int i=0; i<NREGIONS; ++i)
259 	{
260 		endpts[i].a_lsb  = in.read(1);
261 		endpts[i].b_lsb  = in.read(1);
262 	}
263 
264 	nvAssert (in.getptr() == 98);
265 }
266 
write_indices(const int indices[Tile::TILE_H][Tile::TILE_W],int shapeindex,Bits & out)267 static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
268 {
269 	int positions[NREGIONS];
270 
271 	for (int r = 0; r < NREGIONS; ++r)
272 		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
273 
274 	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
275 	{
276 		int x = POS_TO_X(pos);
277 		int y = POS_TO_Y(pos);
278 
279 		bool match = false;
280 
281 		for (int r = 0; r < NREGIONS; ++r)
282 			if (positions[r] == pos) { match = true; break; }
283 
284 		out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
285 	}
286 }
287 
read_indices(Bits & in,int shapeindex,int indices[Tile::TILE_H][Tile::TILE_W])288 static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
289 {
290 	int positions[NREGIONS];
291 
292 	for (int r = 0; r < NREGIONS; ++r)
293 		positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
294 
295 	for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
296 	{
297 		int x = POS_TO_X(pos);
298 		int y = POS_TO_Y(pos);
299 
300 		bool match = false;
301 
302 		for (int r = 0; r < NREGIONS; ++r)
303 			if (positions[r] == pos) { match = true; break; }
304 
305 		indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
306 	}
307 }
308 
emit_block(const IntEndptsRGB_2 endpts[NREGIONS],int shapeindex,const Pattern & p,const int indices[Tile::TILE_H][Tile::TILE_W],char * block)309 static void emit_block(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
310 {
311 	Bits out(block, AVPCL::BITSIZE);
312 
313 	write_header(endpts, shapeindex, p, out);
314 
315 	write_indices(indices, shapeindex, out);
316 
317 	nvAssert(out.getptr() == AVPCL::BITSIZE);
318 }
319 
generate_palette_quantized(const IntEndptsRGB_2 & endpts_2,const RegionPrec & region_prec,Vector4 palette[NINDICES])320 static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const RegionPrec &region_prec, Vector4 palette[NINDICES])
321 {
322 	IntEndptsRGB endpts;
323 
324 	uncompress_one(endpts_2, endpts);
325 
326 	// scale endpoints
327 	int a, b;			// really need a IntVec4...
328 
329 	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]+1);	// +1 since we are in uncompressed space
330 	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]+1);
331 
332 	// interpolate
333 	for (int i = 0; i < NINDICES; ++i)
334 		palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
335 
336 	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1);
337 	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
338 
339 	// interpolate
340 	for (int i = 0; i < NINDICES; ++i)
341 		palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
342 
343 	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1);
344 	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
345 
346 	// interpolate
347 	for (int i = 0; i < NINDICES; ++i)
348 		palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
349 
350 	// constant alpha
351 	for (int i = 0; i < NINDICES; ++i)
352 		palette[i].w = 255.0f;
353 }
354 
sign_extend(Pattern & p,IntEndptsRGB_2 endpts[NREGIONS])355 static void sign_extend(Pattern &p, IntEndptsRGB_2 endpts[NREGIONS])
356 {
357 	nvUnreachable();
358 }
359 
decompress_mode3(const char * block,Tile & t)360 void AVPCL::decompress_mode3(const char *block, Tile &t)
361 {
362 	Bits in(block, AVPCL::BITSIZE);
363 
364 	Pattern p;
365 	IntEndptsRGB_2 endpts[NREGIONS];
366 	int shapeindex, pat_index;
367 
368 	read_header(in, endpts, shapeindex, p, pat_index);
369 
370 	if (p.transformed)
371 	{
372 		sign_extend(p, endpts);
373 		transform_inverse(endpts);
374 	}
375 
376 	Vector4 palette[NREGIONS][NINDICES];
377 	for (int r = 0; r < NREGIONS; ++r)
378 		generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
379 
380 	int indices[Tile::TILE_H][Tile::TILE_W];
381 
382 	read_indices(in, shapeindex, indices);
383 
384 	nvAssert(in.getptr() == AVPCL::BITSIZE);
385 
386 	// lookup
387 	for (int y = 0; y < Tile::TILE_H; y++)
388 	for (int x = 0; x < Tile::TILE_W; x++)
389 		t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
390 }
391 
392 // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
map_colors(const Vector4 colors[],const float importance[],int np,const IntEndptsRGB_2 & endpts,const RegionPrec & region_prec,float current_err,int indices[Tile::TILE_TOTAL])393 static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
394 {
395 	Vector4 palette[NINDICES];
396 	float toterr = 0;
397 	Vector4 err;
398 
399 	generate_palette_quantized(endpts, region_prec, palette);
400 
401 	for (int i = 0; i < np; ++i)
402 	{
403 		float besterr = FLT_MAX;
404 
405 		for (int j = 0; j < NINDICES && besterr > 0; ++j)
406 		{
407             float err = Utils::metric4(colors[i], palette[j]) * importance[i];
408 
409 			if (err > besterr)	// error increased, so we're done searching
410 				break;
411 			if (err < besterr)
412 			{
413 				besterr = err;
414 				indices[i] = j;
415 			}
416 		}
417 		toterr += besterr;
418 
419 		// check for early exit
420 		if (toterr > current_err)
421 		{
422 			// fill out bogus index values so it's initialized at least
423 			for (int k = i; k < np; ++k)
424 				indices[k] = -1;
425 
426 			return FLT_MAX;
427 		}
428 	}
429 	return toterr;
430 }
431 
assign_indices(const Tile & tile,int shapeindex,IntEndptsRGB_2 endpts[NREGIONS],const PatternPrec & pattern_prec,int indices[Tile::TILE_H][Tile::TILE_W],float toterr[NREGIONS])432 static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endpts[NREGIONS], const PatternPrec &pattern_prec,
433 						   int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
434 {
435 	// build list of possibles
436 	Vector4 palette[NREGIONS][NINDICES];
437 
438 	for (int region = 0; region < NREGIONS; ++region)
439 	{
440 		generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]);
441 		toterr[region] = 0;
442 	}
443 
444 	Vector4 err;
445 
446 	for (int y = 0; y < tile.size_y; y++)
447 	for (int x = 0; x < tile.size_x; x++)
448 	{
449 		int region = REGION(x,y,shapeindex);
450 		float err, besterr = FLT_MAX;
451 
452 		for (int i = 0; i < NINDICES && besterr > 0; ++i)
453 		{
454 			err = Utils::metric4(tile.data[y][x], palette[region][i]);
455 
456 			if (err > besterr)	// error increased, so we're done searching
457 				break;
458 			if (err < besterr)
459 			{
460 				besterr = err;
461 				indices[y][x] = i;
462 			}
463 		}
464 		toterr[region] += besterr;
465 	}
466 }
467 
468 // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
469 // this function returns either old_err or a value smaller (if it was successful in improving the error)
perturb_one(const Vector4 colors[],const float importance[],int np,int ch,const RegionPrec & region_prec,const IntEndptsRGB_2 & old_endpts,IntEndptsRGB_2 & new_endpts,float old_err,int do_b,int indices[Tile::TILE_TOTAL])470 static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts,
471 						  float old_err, int do_b, int indices[Tile::TILE_TOTAL])
472 {
473 	// we have the old endpoints: old_endpts
474 	// we have the perturbed endpoints: new_endpts
475 	// we have the temporary endpoints: temp_endpts
476 
477 	IntEndptsRGB_2 temp_endpts;
478 	float min_err = old_err;		// start with the best current error
479 	int beststep;
480 	int temp_indices[Tile::TILE_TOTAL];
481 
482 	for (int i=0; i<np; ++i)
483 		indices[i] = -1;
484 
485 	// copy real endpoints so we can perturb them
486 	temp_endpts = new_endpts = old_endpts;
487 
488 	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
489 
490 	// do a logarithmic search for the best error for this endpoint (which)
491 	for (int step = 1 << (prec-1); step; step >>= 1)
492 	{
493 		bool improved = false;
494 		for (int sign = -1; sign <= 1; sign += 2)
495 		{
496 			if (do_b == 0)
497 			{
498 				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
499 				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
500 					continue;
501 			}
502 			else
503 			{
504 				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
505 				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
506 					continue;
507 			}
508 
509             float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
510 
511 			if (err < min_err)
512 			{
513 				improved = true;
514 				min_err = err;
515 				beststep = sign * step;
516 				for (int i=0; i<np; ++i)
517 					indices[i] = temp_indices[i];
518 			}
519 		}
520 		// if this was an improvement, move the endpoint and continue search from there
521 		if (improved)
522 		{
523 			if (do_b == 0)
524 				new_endpts.A[ch] += beststep;
525 			else
526 				new_endpts.B[ch] += beststep;
527 		}
528 	}
529 	return min_err;
530 }
531 
532 // the larger the error the more time it is worth spending on an exhaustive search.
533 // perturb the endpoints at least -3 to 3.
534 // if err > 5000 perturb endpoints 50% of precision
535 // if err > 1000 25%
536 // if err > 200 12.5%
537 // if err > 40  6.25%
538 // for np = 16 -- adjust error thresholds as a function of np
539 // always ensure endpoint ordering is preserved (no need to overlap the scan)
540 // if orig_err returned from this is less than its input value, then indices[] will contain valid indices
exhaustive(const Vector4 colors[],const float importance[],int np,int ch,const RegionPrec & region_prec,float & orig_err,IntEndptsRGB_2 & opt_endpts,int indices[Tile::TILE_TOTAL])541 static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec &region_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
542 {
543 	IntEndptsRGB_2 temp_endpts;
544 	float best_err = orig_err;
545 	int aprec = region_prec.endpt_a_prec[ch];
546 	int bprec = region_prec.endpt_b_prec[ch];
547 	int good_indices[Tile::TILE_TOTAL];
548 	int temp_indices[Tile::TILE_TOTAL];
549 
550 	for (int i=0; i<np; ++i)
551 		indices[i] = -1;
552 
553 	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
554 
555 	if (orig_err == 0) return orig_err;
556 
557 	int adelta = 0, bdelta = 0;
558 	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
559 	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
560 	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
561 	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
562 	adelta = max(adelta, 3);
563 	bdelta = max(bdelta, 3);
564 
565 #ifdef	DISABLE_EXHAUSTIVE
566 	adelta = bdelta = 3;
567 #endif
568 
569 	temp_endpts = opt_endpts;
570 
571 	// ok figure out the range of A and B
572 	int alow = max(0, opt_endpts.A[ch] - adelta);
573 	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
574 	int blow = max(0, opt_endpts.B[ch] - bdelta);
575 	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
576 
577 	// now there's no need to swap the ordering of A and B
578 	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
579 
580 	int amin, bmin;
581 
582 	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
583 	{
584 		// keep a <= b
585 		for (int a = alow; a <= ahigh; ++a)
586 		for (int b = max(a, blow); b < bhigh; ++b)
587 		{
588 			temp_endpts.A[ch] = a;
589 			temp_endpts.B[ch] = b;
590 
591             float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
592 			if (err < best_err)
593 			{
594 				amin = a;
595 				bmin = b;
596 				best_err = err;
597 				for (int i=0; i<np; ++i)
598 					good_indices[i] = temp_indices[i];
599 			}
600 		}
601 	}
602 	else
603 	{
604 		// keep b <= a
605 		for (int b = blow; b < bhigh; ++b)
606 		for (int a = max(b, alow); a <= ahigh; ++a)
607 		{
608 			temp_endpts.A[ch] = a;
609 			temp_endpts.B[ch] = b;
610 
611             float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
612 			if (err < best_err)
613 			{
614 				amin = a;
615 				bmin = b;
616 				best_err = err;
617 				for (int i=0; i<np; ++i)
618 					good_indices[i] = temp_indices[i];
619 			}
620 		}
621 	}
622 	if (best_err < orig_err)
623 	{
624 		opt_endpts.A[ch] = amin;
625 		opt_endpts.B[ch] = bmin;
626 		orig_err = best_err;
627 		// if we actually improved, update the indices
628 		for (int i=0; i<np; ++i)
629 			indices[i] = good_indices[i];
630 	}
631 	return best_err;
632 }
633 
optimize_one(const Vector4 colors[],const float importance[],int np,float orig_err,const IntEndptsRGB_2 & orig_endpts,const RegionPrec & region_prec,IntEndptsRGB_2 & opt_endpts)634 static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_2 &opt_endpts)
635 {
636 	float opt_err = orig_err;
637 
638 	opt_endpts = orig_endpts;
639 
640 	/*
641 		err0 = perturb(rgb0, delta0)
642 		err1 = perturb(rgb1, delta1)
643 		if (err0 < err1)
644 			if (err0 >= initial_error) break
645 			rgb0 += delta0
646 			next = 1
647 		else
648 			if (err1 >= initial_error) break
649 			rgb1 += delta1
650 			next = 0
651 		initial_err = map()
652 		for (;;)
653 			err = perturb(next ? rgb1:rgb0, delta)
654 			if (err >= initial_err) break
655 			next? rgb1 : rgb0 += delta
656 			initial_err = err
657 	*/
658 	IntEndptsRGB_2 new_a, new_b;
659 	IntEndptsRGB_2 new_endpt;
660 	int do_b;
661 	int orig_indices[Tile::TILE_TOTAL];
662 	int new_indices[Tile::TILE_TOTAL];
663 	int temp_indices0[Tile::TILE_TOTAL];
664 	int temp_indices1[Tile::TILE_TOTAL];
665 
666 	// now optimize each channel separately
667 	// for the first error improvement, we save the indices. then, for any later improvement, we compare the indices
668 	// if they differ, we restart the loop (which then falls back to looking for a first improvement.)
669 	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
670 	{
671 		// figure out which endpoint when perturbed gives the most improvement and start there
672 		// if we just alternate, we can easily end up in a local minima
673 		float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
674         float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
675 
676 		if (err0 < err1)
677 		{
678 			if (err0 >= opt_err)
679 				continue;
680 
681 			for (int i=0; i<np; ++i)
682 			{
683 				new_indices[i] = orig_indices[i] = temp_indices0[i];
684 				nvAssert (orig_indices[i] != -1);
685 			}
686 
687 			opt_endpts.A[ch] = new_a.A[ch];
688 			opt_err = err0;
689 			do_b = 1;		// do B next
690 		}
691 		else
692 		{
693 			if (err1 >= opt_err)
694 				continue;
695 
696 			for (int i=0; i<np; ++i)
697 			{
698 				new_indices[i] = orig_indices[i] = temp_indices1[i];
699 				nvAssert (orig_indices[i] != -1);
700 			}
701 
702 			opt_endpts.B[ch] = new_b.B[ch];
703 			opt_err = err1;
704 			do_b = 0;		// do A next
705 		}
706 
707 		// now alternate endpoints and keep trying until there is no improvement
708 		for (;;)
709 		{
710             float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
711 			if (err >= opt_err)
712 				break;
713 
714 			for (int i=0; i<np; ++i)
715 			{
716 				new_indices[i] = temp_indices0[i];
717 				nvAssert (new_indices[i] != -1);
718 			}
719 
720 			if (do_b == 0)
721 				opt_endpts.A[ch] = new_endpt.A[ch];
722 			else
723 				opt_endpts.B[ch] = new_endpt.B[ch];
724 			opt_err = err;
725 			do_b = 1 - do_b;	// now move the other endpoint
726 		}
727 
728 		// see if the indices have changed
729 		int i;
730 		for (i=0; i<np; ++i)
731 			if (orig_indices[i] != new_indices[i])
732 				break;
733 
734 		if (i<np)
735 			ch = -1;	// start over
736 	}
737 
738 	// finally, do a small exhaustive search around what we think is the global minima to be sure
739 	// note this is independent of the above search, so we don't care about the indices from the above
740 	// we don't care about the above because if they differ, so what? we've already started at ch=0
741 	bool first = true;
742 	for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
743 	{
744         float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
745 
746 		if (new_err < opt_err)
747 		{
748 			opt_err = new_err;
749 
750 			if (first)
751 			{
752 				for (int i=0; i<np; ++i)
753 				{
754 					orig_indices[i] = temp_indices0[i];
755 					nvAssert (orig_indices[i] != -1);
756 				}
757 				first = false;
758 			}
759 			else
760 			{
761 				// see if the indices have changed
762 				int i;
763 				for (i=0; i<np; ++i)
764 					if (orig_indices[i] != temp_indices0[i])
765 						break;
766 
767 				if (i<np)
768 				{
769 					ch = -1;	// start over
770 					first = true;
771 				}
772 			}
773 		}
774 	}
775 
776 	return opt_err;
777 }
778 
779 // this will return a valid set of endpoints in opt_endpts regardless of whether it improve orig_endpts or not
optimize_endpts(const Tile & tile,int shapeindex,const float orig_err[NREGIONS],const IntEndptsRGB_2 orig_endpts[NREGIONS],const PatternPrec & pattern_prec,float opt_err[NREGIONS],IntEndptsRGB_2 opt_endpts[NREGIONS])780 static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS],
781 							const IntEndptsRGB_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_2 opt_endpts[NREGIONS])
782 {
783 	Vector4 pixels[Tile::TILE_TOTAL];
784     float importance[Tile::TILE_TOTAL];
785 	IntEndptsRGB_2 temp_in, temp_out;
786 	int temp_indices[Tile::TILE_TOTAL];
787 
788 	for (int region=0; region<NREGIONS; ++region)
789 	{
790 		// collect the pixels in the region
791 		int np = 0;
792 
793         for (int y = 0; y < tile.size_y; y++) {
794             for (int x = 0; x < tile.size_x; x++) {
795                 if (REGION(x, y, shapeindex) == region) {
796                     pixels[np] = tile.data[y][x];
797                     importance[np] = tile.importance_map[y][x];
798                     np++;
799                 }
800             }
801         }
802 
803 		opt_endpts[region] = temp_in = orig_endpts[region];
804 		opt_err[region] = orig_err[region];
805 
806 		float best_err = orig_err[region];
807 
808 		for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
809 		{
810 			temp_in.a_lsb = lsbmode & 1;
811 			temp_in.b_lsb = (lsbmode >> 1) & 1;
812 
813 			// make sure we have a valid error for temp_in
814 			// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
815 			// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
816             float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
817 
818 			// now try to optimize these endpoints
819             float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
820 
821 			// if we find an improvement, update the best so far and correct the output endpoints and errors
822 			if (temp_out_err < best_err)
823 			{
824 				best_err = temp_out_err;
825 				opt_err[region] = temp_out_err;
826 				opt_endpts[region] = temp_out;
827 			}
828 		}
829 	}
830 }
831 
832 /* optimization algorithm
833 	for each pattern
834 		convert endpoints using pattern precision
835 		assign indices and get initial error
836 		compress indices (and possibly reorder endpoints)
837 		transform endpoints
838 		if transformed endpoints fit pattern
839 			get original endpoints back
840 			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
841 			compress new indices
842 			transform new endpoints
843 			if new endpoints fit pattern AND if error is improved
844 				emit compressed block with new data
845 			else
846 				emit compressed block with original data // to try to preserve maximum endpoint precision
847 */
848 
refine(const Tile & tile,int shapeindex_best,const FltEndpts endpts[NREGIONS],char * block)849 static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
850 {
851 	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
852 	IntEndptsRGB_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
853 	int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
854 
855 	for (int sp = 0; sp < NPATTERNS; ++sp)
856 	{
857 		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
858 		assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
859 		swap_indices(orig_endpts, orig_indices, shapeindex_best);
860 		if (patterns[sp].transformed)
861 			transform_forward(orig_endpts);
862 		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
863 		// the assumption made is that if they don't fit now, they won't fit after optimizing.
864 		if (endpts_fit(orig_endpts, patterns[sp]))
865 		{
866 			if (patterns[sp].transformed)
867 				transform_inverse(orig_endpts);
868 			optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
869 			assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
870 			// (nreed) Commented out asserts because they go off all the time...not sure why
871 			//for (int i=0; i<NREGIONS; ++i)
872 			//	nvAssert(expected_opt_err[i] == opt_err[i]);
873 			swap_indices(opt_endpts, opt_indices, shapeindex_best);
874 			if (patterns[sp].transformed)
875 				transform_forward(opt_endpts);
876 			orig_toterr = opt_toterr = 0;
877 			for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
878 			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
879 			{
880 				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
881 				return opt_toterr;
882 			}
883 			else
884 			{
885 				// either it stopped fitting when we optimized it, or there was no improvement
886 				// so go back to the unoptimized endpoints which we know will fit
887 				if (patterns[sp].transformed)
888 					transform_forward(orig_endpts);
889 				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, block);
890 				return orig_toterr;
891 			}
892 		}
893 	}
894 	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 3).";
895 	return FLT_MAX;
896 }
897 
clamp(Vector4 & v)898 static void clamp(Vector4 &v)
899 {
900 	if (v.x < 0.0f) v.x = 0.0f;
901 	if (v.x > 255.0f) v.x = 255.0f;
902 	if (v.y < 0.0f) v.y = 0.0f;
903 	if (v.y > 255.0f) v.y = 255.0f;
904 	if (v.z < 0.0f) v.z = 0.0f;
905 	if (v.z > 255.0f) v.z = 255.0f;
906 	v.w = 255.0f;
907 }
908 
generate_palette_unquantized(const FltEndpts endpts[NREGIONS],Vector4 palette[NREGIONS][NINDICES])909 static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
910 {
911 	for (int region = 0; region < NREGIONS; ++region)
912 	for (int i = 0; i < NINDICES; ++i)
913 		palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
914 }
915 
916 // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
map_colors(const Tile & tile,int shapeindex,const FltEndpts endpts[NREGIONS])917 static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
918 {
919 	// build list of possibles
920 	Vector4 palette[NREGIONS][NINDICES];
921 
922 	generate_palette_unquantized(endpts, palette);
923 
924 	float toterr = 0;
925 	Vector4 err;
926 
927 	for (int y = 0; y < tile.size_y; y++)
928 	for (int x = 0; x < tile.size_x; x++)
929 	{
930 		int region = REGION(x,y,shapeindex);
931 		float err, besterr = FLT_MAX;
932 
933 		for (int i = 0; i < NINDICES && besterr > 0; ++i)
934 		{
935 			err = Utils::metric4(tile.data[y][x], palette[region][i]);
936 
937 			if (err > besterr)	// error increased, so we're done searching. this works for most norms.
938 				break;
939 			if (err < besterr)
940 				besterr = err;
941 		}
942 		toterr += besterr;
943 	}
944 	return toterr;
945 }
946 
rough(const Tile & tile,int shapeindex,FltEndpts endpts[NREGIONS])947 static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
948 {
949 	for (int region=0; region<NREGIONS; ++region)
950 	{
951 		int np = 0;
952 		Vector3 colors[Tile::TILE_TOTAL];
953 		float alphas[2];
954 		Vector4 mean(0,0,0,0);
955 
956 		for (int y = 0; y < tile.size_y; y++)
957 		for (int x = 0; x < tile.size_x; x++)
958 			if (REGION(x,y,shapeindex) == region)
959 			{
960 				colors[np] = tile.data[y][x].xyz();
961 				if (np < 2) alphas[np] = tile.data[y][x].w;
962 				mean += tile.data[y][x];
963 				++np;
964 			}
965 
966 		// handle simple cases
967 		if (np == 0)
968 		{
969 			Vector4 zero(0,0,0,255.0f);
970 			endpts[region].A = zero;
971 			endpts[region].B = zero;
972 			continue;
973 		}
974 		else if (np == 1)
975 		{
976 			endpts[region].A = Vector4(colors[0], alphas[0]);
977 			endpts[region].B = Vector4(colors[0], alphas[0]);
978 			continue;
979 		}
980 		else if (np == 2)
981 		{
982 			endpts[region].A = Vector4(colors[0], alphas[0]);
983 			endpts[region].B = Vector4(colors[1], alphas[1]);
984 			continue;
985 		}
986 
987 		mean /= float(np);
988 
989 		Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
990 
991 		// project each pixel value along the principal direction
992 		float minp = FLT_MAX, maxp = -FLT_MAX;
993 		for (int i = 0; i < np; i++)
994 		{
995 			float dp = dot(colors[i]-mean.xyz(), direction);
996 			if (dp < minp) minp = dp;
997 			if (dp > maxp) maxp = dp;
998 		}
999 
1000 		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
1001 		endpts[region].A = mean + minp*Vector4(direction, 0);
1002 		endpts[region].B = mean + maxp*Vector4(direction, 0);
1003 
1004 		// clamp endpoints
1005 		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
1006 		// shape based on endpoints being clamped
1007 		clamp(endpts[region].A);
1008 		clamp(endpts[region].B);
1009 	}
1010 
1011 	return map_colors(tile, shapeindex, endpts);
1012 }
1013 
swap(float * list1,int * list2,int i,int j)1014 static void swap(float *list1, int *list2, int i, int j)
1015 {
1016 	float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
1017 	int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
1018 }
1019 
compress_mode3(const Tile & t,char * block)1020 float AVPCL::compress_mode3(const Tile &t, char *block)
1021 {
1022 	// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
1023 	// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
1024 	const int NITEMS=NSHAPES/4;
1025 
1026 	// pick the best NITEMS shapes and refine these.
1027 	struct {
1028 		FltEndpts endpts[NREGIONS];
1029 	} all[NSHAPES];
1030 	float roughmse[NSHAPES];
1031 	int index[NSHAPES];
1032 	char tempblock[AVPCL::BLOCKSIZE];
1033 	float msebest = FLT_MAX;
1034 
1035 	for (int i=0; i<NSHAPES; ++i)
1036 	{
1037 		roughmse[i] = rough(t, i, &all[i].endpts[0]);
1038 		index[i] = i;
1039 	}
1040 
1041 	// bubble sort -- only need to bubble up the first NITEMS items
1042 	for (int i=0; i<NITEMS; ++i)
1043 	for (int j=i+1; j<NSHAPES; ++j)
1044 		if (roughmse[i] > roughmse[j])
1045 			swap(roughmse, index, i, j);
1046 
1047 	for (int i=0; i<NITEMS && msebest>0; ++i)
1048 	{
1049 		int shape = index[i];
1050 		float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
1051 		if (mse < msebest)
1052 		{
1053 			memcpy(block, tempblock, sizeof(tempblock));
1054 			msebest = mse;
1055 		}
1056 	}
1057 	return msebest;
1058 }
1059 
1060