1 /*
2 Copyright 2007 nVidia, Inc.
3 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
4 
5 You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
6 
7 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
8 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 
10 See the License for the specific language governing permissions and limitations under the License.
11 */
12 
13 // Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis
14 
15 // x100000 2r 777x2 8x2 2bi 2bi
16 
17 #include "bits.h"
18 #include "tile.h"
19 #include "avpcl.h"
20 #include "nvcore/debug.h"
21 #include "nvmath/vector.inl"
22 #include "nvmath/matrix.inl"
23 #include "nvmath/fitting.h"
24 #include "avpcl_utils.h"
25 #include "endpts.h"
26 #include <string.h>
27 #include <float.h>
28 
29 using namespace nv;
30 using namespace AVPCL;
31 
32 // there are 2 index arrays. INDEXMODE selects between the arrays being 2 & 3 bits or 3 & 2 bits
33 // array 0 is always the RGB array and array 1 is always the A array
34 #define	NINDEXARRAYS	2
35 #define	INDEXARRAY_RGB	0
36 #define INDEXARRAY_A	1
37 #define INDEXARRAY_2BITS(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXARRAY_A : INDEXARRAY_RGB)
38 #define INDEXARRAY_3BITS(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_3BITS) ? INDEXARRAY_A : INDEXARRAY_RGB)
39 
40 #define NINDICES3	4
41 #define	INDEXBITS3	2
42 #define	HIGH_INDEXBIT3	(1<<(INDEXBITS3-1))
43 #define	DENOM3		(NINDICES3-1)
44 #define	BIAS3		(DENOM3/2)
45 
46 #define NINDICES2	4
47 #define	INDEXBITS2	2
48 #define	HIGH_INDEXBIT2	(1<<(INDEXBITS2-1))
49 #define	DENOM2		(NINDICES2-1)
50 #define	BIAS2		(DENOM2/2)
51 
52 #define	NINDICES_RGB(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES3 : NINDICES2)
53 #define	INDEXBITS_RGB(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS3 : INDEXBITS2)
54 #define	HIGH_INDEXBIT_RGB(indexmode)((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT3 : HIGH_INDEXBIT2)
55 #define	DENOM_RGB(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM3 : DENOM2)
56 #define	BIAS_RGB(indexmode)			((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS3 : BIAS2)
57 
58 #define	NINDICES_A(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES2 : NINDICES3)
59 #define	INDEXBITS_A(indexmode)		((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS2 : INDEXBITS3)
60 #define	HIGH_INDEXBIT_A(indexmode)	((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT2 : HIGH_INDEXBIT3)
61 #define	DENOM_A(indexmode)			((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM2 : DENOM3)
62 #define	BIAS_A(indexmode)			((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS2 : BIAS3)
63 
64 #define	NSHAPES	1
65 
66 static int shapes[NSHAPES] =
67 {
68 	0x0000,
69 };
70 
71 #define	REGION(x,y,shapeindex)	((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0)
72 
73 #define NREGIONS	1			// keep the region stuff in just in case...
74 
75 // encoded index compression location: region 0 is always at 0,0.
76 
77 #define	NBITSIZES	2			// one endpoint pair
78 
79 struct ChanBits
80 {
81 	int nbitsizes[NBITSIZES];	// bitsizes for one channel
82 };
83 
84 struct Pattern
85 {
86 	ChanBits chan[NCHANNELS_RGBA];//  bit patterns used per channel
87 	int transform_mode;		// x0 means alpha channel not transformed, x1 otherwise. 0x rgb not transformed, 1x otherwise.
88 	int mode;				// associated mode value
89 	int modebits;			// number of mode bits
90 	const char *encoding;			// verilog description of encoding for this mode
91 };
92 
93 #define	TRANSFORM_MODE_ALPHA	1
94 #define	TRANSFORM_MODE_RGB	2
95 
96 #define	NPATTERNS 1
97 
98 static Pattern patterns[NPATTERNS] =
99 {
100 	// red		green		blue		alpha	xfm	mode  mb encoding
101 	7,7,		7,7,		7,7,		8,8,	0x0, 0x20, 6, "",
102 };
103 
104 struct RegionPrec
105 {
106 	int	endpt_a_prec[NCHANNELS_RGBA];
107 	int endpt_b_prec[NCHANNELS_RGBA];
108 };
109 
110 struct PatternPrec
111 {
112 	RegionPrec region_precs[NREGIONS];
113 };
114 
115 // this is the precision for each channel and region
116 // NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
117 static PatternPrec pattern_precs[NPATTERNS] =
118 {
119 	7,7,7,8,	7,7,7,8,
120 };
121 
122 
123 // return # of bits needed to store n. handle signed or unsigned cases properly
nbits(int n,bool issigned)124 static int nbits(int n, bool issigned)
125 {
126 	int nb;
127 	if (n==0)
128 		return 0;	// no bits needed for 0, signed or not
129 	else if (n > 0)
130 	{
131 		for (nb=0; n; ++nb, n>>=1) ;
132 		return nb + (issigned?1:0);
133 	}
134 	else
135 	{
136 		nvAssert (issigned);
137 		for (nb=0; n<-1; ++nb, n>>=1) ;
138 		return nb + 1;
139 	}
140 }
141 
142 #define	R_0	ep[0].A[i]
143 #define	R_1 ep[0].B[i]
144 
transform_forward(int transform_mode,IntEndptsRGBA ep[NREGIONS])145 static void transform_forward(int transform_mode, IntEndptsRGBA ep[NREGIONS])
146 {
147 	int i;
148 
149 	if (transform_mode & TRANSFORM_MODE_RGB)
150 		for (i=CHANNEL_R; i<CHANNEL_A; ++i)
151 			R_1 -= R_0;
152 	if (transform_mode & TRANSFORM_MODE_ALPHA)
153 	{
154 		i = CHANNEL_A;
155 		R_1 -= R_0;
156 	}
157 }
158 
transform_inverse(int transform_mode,IntEndptsRGBA ep[NREGIONS])159 static void transform_inverse(int transform_mode, IntEndptsRGBA ep[NREGIONS])
160 {
161 	int i;
162 
163 	if (transform_mode & TRANSFORM_MODE_RGB)
164 		for (i=CHANNEL_R; i<CHANNEL_A; ++i)
165 			R_1 += R_0;
166 	if (transform_mode & TRANSFORM_MODE_ALPHA)
167 	{
168 		i = CHANNEL_A;
169 		R_1 += R_0;
170 	}
171 }
172 
quantize_endpts(const FltEndpts endpts[NREGIONS],const PatternPrec & pattern_prec,IntEndptsRGBA q_endpts[NREGIONS])173 static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGBA q_endpts[NREGIONS])
174 {
175 	for (int region = 0; region < NREGIONS; ++region)
176 	{
177 		q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]);
178 		q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]);
179 		q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]);
180 		q_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]);
181 
182 		q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]);
183 		q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]);
184 		q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]);
185 		q_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]);
186 	}
187 }
188 
189 // swap endpoints as needed to ensure that the indices at index_one and index_two have a 0 high-order bit
190 // index_two is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
swap_indices(int shapeindex,int indexmode,IntEndptsRGBA endpts[NREGIONS],int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])191 static void swap_indices(int shapeindex, int indexmode, IntEndptsRGBA endpts[NREGIONS], int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])
192 {
193 	int index_positions[NREGIONS];
194 
195 	index_positions[0] = 0;			// since WLOG we have the high bit of the shapes at 0
196 
197 	for (int region = 0; region < NREGIONS; ++region)
198 	{
199 		int x = index_positions[region] & 3;
200 		int y = (index_positions[region] >> 2) & 3;
201 		nvAssert(REGION(x,y,shapeindex) == region);		// double check the table
202 
203 		// swap RGB
204 		if (indices[INDEXARRAY_RGB][y][x] & HIGH_INDEXBIT_RGB(indexmode))
205 		{
206 			// high bit is set, swap the endpts and indices for this region
207 			int t;
208 			for (int i=CHANNEL_R; i<=CHANNEL_B; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
209 
210 			for (int y = 0; y < Tile::TILE_H; y++)
211 			for (int x = 0; x < Tile::TILE_W; x++)
212 				if (REGION(x,y,shapeindex) == region)
213 					indices[INDEXARRAY_RGB][y][x] = NINDICES_RGB(indexmode) - 1 - indices[INDEXARRAY_RGB][y][x];
214 		}
215 
216 		// swap A
217 		if (indices[INDEXARRAY_A][y][x] & HIGH_INDEXBIT_A(indexmode))
218 		{
219 			// high bit is set, swap the endpts and indices for this region
220 			int t;
221 			for (int i=CHANNEL_A; i<=CHANNEL_A; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
222 
223 			for (int y = 0; y < Tile::TILE_H; y++)
224 			for (int x = 0; x < Tile::TILE_W; x++)
225 				if (REGION(x,y,shapeindex) == region)
226 					indices[INDEXARRAY_A][y][x] = NINDICES_A(indexmode) - 1 - indices[INDEXARRAY_A][y][x];
227 		}
228 	}
229 }
230 
endpts_fit(IntEndptsRGBA endpts[NREGIONS],const Pattern & p)231 static bool endpts_fit(IntEndptsRGBA endpts[NREGIONS], const Pattern &p)
232 {
233 	return true;
234 }
235 
write_header(const IntEndptsRGBA endpts[NREGIONS],int shapeindex,const Pattern & p,int rotatemode,int indexmode,Bits & out)236 static void write_header(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, int rotatemode, int indexmode, Bits &out)
237 {
238 	// ignore shapeindex
239 	out.write(p.mode, p.modebits);
240 	out.write(rotatemode, ROTATEMODE_BITS);
241 //	out.write(indexmode, INDEXMODE_BITS);
242 	for (int i=0; i<NREGIONS; ++i)
243 		for (int j=0; j<NCHANNELS_RGBA; ++j)
244 		{
245 			out.write(endpts[i].A[j], p.chan[j].nbitsizes[0]);
246 			out.write(endpts[i].B[j], p.chan[j].nbitsizes[1]);
247 		}
248 	nvAssert (out.getptr() == 66);
249 }
250 
read_header(Bits & in,IntEndptsRGBA endpts[NREGIONS],int & shapeindex,int & rotatemode,int & indexmode,Pattern & p,int & pat_index)251 static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeindex, int &rotatemode, int &indexmode, Pattern &p, int &pat_index)
252 {
253 	int mode = AVPCL::getmode(in);
254 
255 	pat_index = 0;
256 
257 	nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
258 	nvAssert (in.getptr() == patterns[pat_index].modebits);
259 
260 	p = patterns[pat_index];
261 
262 	shapeindex = 0;		// we don't have any
263 
264 	rotatemode = in.read(ROTATEMODE_BITS);
265 
266 	indexmode = 0;		// we don't have any
267 
268 	for (int i=0; i<NREGIONS; ++i)
269 		for (int j=0; j<NCHANNELS_RGBA; ++j)
270 		{
271 			endpts[i].A[j] = in.read(p.chan[j].nbitsizes[0]);
272 			endpts[i].B[j] = in.read(p.chan[j].nbitsizes[1]);
273 		}
274 	nvAssert (in.getptr() == 66);
275 }
276 
write_indices(const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W],int shapeindex,int indexmode,Bits & out)277 static void write_indices(const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int shapeindex, int indexmode, Bits &out)
278 {
279 	// the indices we shorten is always index 0
280 
281 	// do the 2 bit indices first
282 	nvAssert ((indices[INDEXARRAY_2BITS(indexmode)][0][0] & HIGH_INDEXBIT2) == 0);
283 	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
284 		out.write(indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3], INDEXBITS2 - (i==0?1:0));	// write i..[1:0] or i..[0]
285 
286 	// then the 3 bit indices
287 	nvAssert ((indices[INDEXARRAY_3BITS(indexmode)][0][0] & HIGH_INDEXBIT3) == 0);
288 	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
289 		out.write(indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3], INDEXBITS3 - (i==0?1:0));	// write i..[2:0] or i..[1:0]
290 }
291 
read_indices(Bits & in,int shapeindex,int indexmode,int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])292 static void read_indices(Bits &in, int shapeindex, int indexmode, int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])
293 {
294 	// the indices we shorten is always index 0
295 
296 	// do the 2 bit indices first
297 	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
298 		indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS2 - (i==0?1:0));		// read i..[1:0] or i..[0]
299 
300 	// then the 3 bit indices
301 	for (int i = 0; i < Tile::TILE_TOTAL; ++i)
302 		indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS3 - (i==0?1:0));		// read i..[1:0] or i..[0]
303 }
304 
emit_block(const IntEndptsRGBA endpts[NREGIONS],int shapeindex,const Pattern & p,const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W],int rotatemode,int indexmode,char * block)305 static void emit_block(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int rotatemode, int indexmode, char *block)
306 {
307 	Bits out(block, AVPCL::BITSIZE);
308 
309 	write_header(endpts, shapeindex, p, rotatemode, indexmode, out);
310 
311 	write_indices(indices, shapeindex, indexmode, out);
312 
313 	nvAssert(out.getptr() == AVPCL::BITSIZE);
314 }
315 
generate_palette_quantized_rgb_a(const IntEndptsRGBA & endpts,const RegionPrec & region_prec,int indexmode,Vector3 palette_rgb[NINDICES3],float palette_a[NINDICES3])316 static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const RegionPrec &region_prec, int indexmode, Vector3 palette_rgb[NINDICES3], float palette_a[NINDICES3])
317 {
318 	// scale endpoints for RGB
319 	int a, b;
320 
321 	a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]);
322 	b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]);
323 
324 	// interpolate R
325 	for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
326 		palette_rgb[i].x = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
327 
328 	a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]);
329 	b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]);
330 
331 	// interpolate G
332 	for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
333 		palette_rgb[i].y = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
334 
335 	a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]);
336 	b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]);
337 
338 	// interpolate B
339 	for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
340 		palette_rgb[i].z = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
341 
342 	a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]);
343 	b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]);
344 
345 	// interpolate A
346 	for (int i = 0; i < NINDICES_A(indexmode); ++i)
347 		palette_a[i] = float(Utils::lerp(a, b, i, BIAS_A(indexmode), DENOM_A(indexmode)));
348 }
349 
sign_extend(Pattern & p,IntEndptsRGBA endpts[NREGIONS])350 static void sign_extend(Pattern &p, IntEndptsRGBA endpts[NREGIONS])
351 {
352 	for (int i=0; i<NCHANNELS_RGBA; ++i)
353 	{
354 		if (p.transform_mode)
355 		{
356 			// endpts[0].A[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);	// always positive here
357 			endpts[0].B[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);
358 			endpts[1].A[i] = SIGN_EXTEND(endpts[1].A[i], p.chan[i].nbitsizes[1]);
359 			endpts[1].B[i] = SIGN_EXTEND(endpts[1].B[i], p.chan[i].nbitsizes[1]);
360 		}
361 	}
362 }
363 
rotate_tile(const Tile & in,int rotatemode,Tile & out)364 static void rotate_tile(const Tile &in, int rotatemode, Tile &out)
365 {
366 	out.size_x = in.size_x;
367 	out.size_y = in.size_y;
368 
369 	for (int y=0; y<in.size_y; ++y)
370 	for (int x=0; x<in.size_x; ++x)
371 	{
372 		float t;
373 		out.data[y][x] = in.data[y][x];
374 
375 		switch(rotatemode)
376 		{
377 		case ROTATEMODE_RGBA_RGBA: break;
378 		case ROTATEMODE_RGBA_AGBR: t = (out.data[y][x]).x; (out.data[y][x]).x = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
379 		case ROTATEMODE_RGBA_RABG: t = (out.data[y][x]).y; (out.data[y][x]).y = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
380 		case ROTATEMODE_RGBA_RGAB: t = (out.data[y][x]).z; (out.data[y][x]).z = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
381 		default: nvUnreachable();
382 		}
383 	}
384 }
385 
decompress_mode5(const char * block,Tile & t)386 void AVPCL::decompress_mode5(const char *block, Tile &t)
387 {
388 	Bits in(block, AVPCL::BITSIZE);
389 
390 	Pattern p;
391 	IntEndptsRGBA endpts[NREGIONS];
392 	int shapeindex, pat_index, rotatemode, indexmode;
393 
394 	read_header(in, endpts, shapeindex, rotatemode, indexmode, p, pat_index);
395 
396 	sign_extend(p, endpts);
397 
398 	if (p.transform_mode)
399 		transform_inverse(p.transform_mode, endpts);
400 
401 	Vector3 palette_rgb[NREGIONS][NINDICES3];	// could be nindices2
402 	float palette_a[NREGIONS][NINDICES3];	// could be nindices2
403 
404 	for (int region = 0; region < NREGIONS; ++region)
405 		generate_palette_quantized_rgb_a(endpts[region], pattern_precs[pat_index].region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]);
406 
407 	int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
408 
409 	read_indices(in, shapeindex, indexmode, indices);
410 
411 	nvAssert(in.getptr() == AVPCL::BITSIZE);
412 
413 	Tile temp(t.size_x, t.size_y);
414 
415 	// lookup
416 	for (int y = 0; y < Tile::TILE_H; y++)
417 	for (int x = 0; x < Tile::TILE_W; x++)
418 		temp.data[y][x] = Vector4(palette_rgb[REGION(x,y,shapeindex)][indices[INDEXARRAY_RGB][y][x]], palette_a[REGION(x,y,shapeindex)][indices[INDEXARRAY_A][y][x]]);
419 
420 	rotate_tile(temp, rotatemode, t);
421 }
422 
423 // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
424 // we already have a candidate mapping when we call this function, thus an error. take an early exit if the accumulated error so far
425 // exceeds what we already have
map_colors(const Vector4 colors[],const float importance[],int np,int rotatemode,int indexmode,const IntEndptsRGBA & endpts,const RegionPrec & region_prec,float current_besterr,int indices[NINDEXARRAYS][Tile::TILE_TOTAL])426 static float map_colors(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec &region_prec, float current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
427 {
428 	Vector3 palette_rgb[NINDICES3];	// could be nindices2
429 	float palette_a[NINDICES3];	// could be nindices2
430 	float toterr = 0;
431 
432 	generate_palette_quantized_rgb_a(endpts, region_prec, indexmode, &palette_rgb[0], &palette_a[0]);
433 
434 	Vector3 rgb;
435 	float a;
436 
437 	for (int i = 0; i < np; ++i)
438 	{
439 		float err, besterr;
440 		float palette_alpha = 0, tile_alpha = 0;
441 
442 		if(AVPCL::flag_premult)
443 				tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (colors[i]).x :
444 							 (rotatemode == ROTATEMODE_RGBA_RABG) ? (colors[i]).y :
445 							 (rotatemode == ROTATEMODE_RGBA_RGAB) ? (colors[i]).z : (colors[i]).w;
446 
447 		rgb.x = (colors[i]).x;
448 		rgb.y = (colors[i]).y;
449 		rgb.z = (colors[i]).z;
450 		a = (colors[i]).w;
451 
452 		// compute the two indices separately
453 		// if we're doing premultiplied alpha, we need to choose first the index that
454 		// determines the alpha value, and then do the other index
455 
456 		if (rotatemode == ROTATEMODE_RGBA_RGBA)
457 		{
458 			// do A index first as it has the alpha
459 			besterr = FLT_MAX;
460 			for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
461 			{
462 				err = Utils::metric1(a, palette_a[j], rotatemode);
463 
464 				if (err > besterr)	// error increased, so we're done searching
465 					break;
466 				if (err < besterr)
467 				{
468 					besterr = err;
469 					palette_alpha = palette_a[j];
470 					indices[INDEXARRAY_A][i] = j;
471 				}
472 			}
473 			toterr += besterr;		// squared-error norms are additive since we don't do the square root
474 
475 			// do RGB index
476 			besterr = FLT_MAX;
477 			for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
478 			{
479 				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) :
480 											 Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[j], palette_alpha);
481 
482 				if (err > besterr)	// error increased, so we're done searching
483 					break;
484 				if (err < besterr)
485 				{
486 					besterr = err;
487 					indices[INDEXARRAY_RGB][i] = j;
488 				}
489 			}
490 			toterr += besterr;
491 			if (toterr > current_besterr)
492 			{
493 				// fill out bogus index values so it's initialized at least
494 				for (int k = i; k < np; ++k)
495 				{
496 					indices[INDEXARRAY_RGB][k] = -1;
497 					indices[INDEXARRAY_A][k] = -1;
498 				}
499 				return FLT_MAX;
500 			}
501 		}
502 		else
503 		{
504 			// do RGB index
505 			besterr = FLT_MAX;
506 			int bestindex;
507 			for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
508 			{
509 				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) :
510 											 Utils::metric3premult_alphain(rgb, palette_rgb[j], rotatemode);
511 
512 				if (err > besterr)	// error increased, so we're done searching
513 					break;
514 				if (err < besterr)
515 				{
516 					besterr = err;
517 					bestindex = j;
518 					indices[INDEXARRAY_RGB][i] = j;
519 				}
520 			}
521 			palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).x :
522 							(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).y :
523 							(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : nvCheckMacro(0);
524 			toterr += besterr;
525 
526 			// do A index
527 			besterr = FLT_MAX;
528 			for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
529 			{
530 				err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[j], rotatemode) :
531 											 Utils::metric1premult(a, tile_alpha, palette_a[j], palette_alpha, rotatemode);
532 
533 				if (err > besterr)	// error increased, so we're done searching
534 					break;
535 				if (err < besterr)
536 				{
537 					besterr = err;
538 					indices[INDEXARRAY_A][i] = j;
539 				}
540 			}
541 			toterr += besterr;		// squared-error norms are additive since we don't do the square root
542 			if (toterr > current_besterr)
543 			{
544 				// fill out bogus index values so it's initialized at least
545 				for (int k = i; k < np; ++k)
546 				{
547 					indices[INDEXARRAY_RGB][k] = -1;
548 					indices[INDEXARRAY_A][k] = -1;
549 				}
550 				return FLT_MAX;
551 			}
552 		}
553 	}
554 	return toterr;
555 }
556 
557 // assign indices given a tile, shape, and quantized endpoints, return toterr for each region
assign_indices(const Tile & tile,int shapeindex,int rotatemode,int indexmode,IntEndptsRGBA endpts[NREGIONS],const PatternPrec & pattern_prec,int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W],float toterr[NREGIONS])558 static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int indexmode, IntEndptsRGBA endpts[NREGIONS], const PatternPrec &pattern_prec,
559 						   int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
560 {
561 	Vector3 palette_rgb[NREGIONS][NINDICES3];	// could be nindices2
562 	float palette_a[NREGIONS][NINDICES3];	// could be nindices2
563 
564 	for (int region = 0; region < NREGIONS; ++region)
565 	{
566 		generate_palette_quantized_rgb_a(endpts[region], pattern_prec.region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]);
567 		toterr[region] = 0;
568 	}
569 
570 	Vector3 rgb;
571 	float a;
572 
573 	for (int y = 0; y < tile.size_y; y++)
574 	for (int x = 0; x < tile.size_x; x++)
575 	{
576 		int region = REGION(x,y,shapeindex);
577 		float err, besterr;
578 		float palette_alpha = 0, tile_alpha = 0;
579 
580 		rgb.x = (tile.data[y][x]).x;
581 		rgb.y = (tile.data[y][x]).y;
582 		rgb.z = (tile.data[y][x]).z;
583 		a = (tile.data[y][x]).w;
584 
585 		if(AVPCL::flag_premult)
586 				tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (tile.data[y][x]).x :
587 							 (rotatemode == ROTATEMODE_RGBA_RABG) ? (tile.data[y][x]).y :
588 							 (rotatemode == ROTATEMODE_RGBA_RGAB) ? (tile.data[y][x]).z : (tile.data[y][x]).w;
589 
590 		// compute the two indices separately
591 		// if we're doing premultiplied alpha, we need to choose first the index that
592 		// determines the alpha value, and then do the other index
593 
594 		if (rotatemode == ROTATEMODE_RGBA_RGBA)
595 		{
596 			// do A index first as it has the alpha
597 			besterr = FLT_MAX;
598 			for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
599 			{
600 				err = Utils::metric1(a, palette_a[region][i], rotatemode);
601 
602 				if (err > besterr)	// error increased, so we're done searching
603 					break;
604 				if (err < besterr)
605 				{
606 					besterr = err;
607 					indices[INDEXARRAY_A][y][x] = i;
608 					palette_alpha = palette_a[region][i];
609 				}
610 			}
611 			toterr[region] += besterr;		// squared-error norms are additive since we don't do the square root
612 
613 			// do RGB index
614 			besterr = FLT_MAX;
615 			for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
616 			{
617 				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) :
618 											 Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[region][i], palette_alpha);
619 
620 				if (err > besterr)	// error increased, so we're done searching
621 					break;
622 				if (err < besterr)
623 				{
624 					besterr = err;
625 					indices[INDEXARRAY_RGB][y][x] = i;
626 				}
627 			}
628 			toterr[region] += besterr;
629 		}
630 		else
631 		{
632 			// do RGB index first as it has the alpha
633 			besterr = FLT_MAX;
634 			int bestindex;
635 			for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
636 			{
637 				err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) :
638 											 Utils::metric3premult_alphain(rgb, palette_rgb[region][i], rotatemode);
639 
640 				if (err > besterr)	// error increased, so we're done searching
641 					break;
642 				if (err < besterr)
643 				{
644 					besterr = err;
645 					indices[INDEXARRAY_RGB][y][x] = i;
646 					bestindex = i;
647 				}
648 			}
649 			palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).x :
650 							(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).y :
651 							(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : nvCheckMacro(0);
652 			toterr[region] += besterr;
653 
654 			// do A index
655 			besterr = FLT_MAX;
656 			for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
657 			{
658 				err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[region][i], rotatemode) :
659 											 Utils::metric1premult(a, tile_alpha, palette_a[region][i], palette_alpha, rotatemode);
660 
661 				if (err > besterr)	// error increased, so we're done searching
662 					break;
663 				if (err < besterr)
664 				{
665 					besterr = err;
666 					indices[INDEXARRAY_A][y][x] = i;
667 				}
668 			}
669 			toterr[region] += besterr;		// squared-error norms are additive since we don't do the square root
670 		}
671 	}
672 }
673 
674 // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
675 // this function returns either old_err or a value smaller (if it was successful in improving the error)
perturb_one(const Vector4 colors[],const float importance[],int np,int rotatemode,int indexmode,int ch,const RegionPrec & region_prec,const IntEndptsRGBA & old_endpts,IntEndptsRGBA & new_endpts,float old_err,int do_b,int indices[NINDEXARRAYS][Tile::TILE_TOTAL])676 static float perturb_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts,
677 						  float old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
678 {
679 	// we have the old endpoints: old_endpts
680 	// we have the perturbed endpoints: new_endpts
681 	// we have the temporary endpoints: temp_endpts
682 
683 	IntEndptsRGBA temp_endpts;
684 	float min_err = old_err;		// start with the best current error
685 	int beststep;
686 	int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
687 
688 	for (int j=0; j<NINDEXARRAYS; ++j)
689 	for (int i=0; i<np; ++i)
690 		indices[j][i] = -1;
691 
692 	// copy real endpoints so we can perturb them
693 	temp_endpts = new_endpts = old_endpts;
694 
695 	int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
696 
697 	// do a logarithmic search for the best error for this endpoint (which)
698 	for (int step = 1 << (prec-1); step; step >>= 1)
699 	{
700 		bool improved = false;
701 		for (int sign = -1; sign <= 1; sign += 2)
702 		{
703 			if (do_b == 0)
704 			{
705 				temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
706 				if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
707 					continue;
708 			}
709 			else
710 			{
711 				temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
712 				if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
713 					continue;
714 			}
715 
716             float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, min_err, temp_indices);
717 
718 			if (err < min_err)
719 			{
720 				improved = true;
721 				min_err = err;
722 				beststep = sign * step;
723 				for (int j=0; j<NINDEXARRAYS; ++j)
724 				for (int i=0; i<np; ++i)
725 					indices[j][i] = temp_indices[j][i];
726 			}
727 		}
728 		// if this was an improvement, move the endpoint and continue search from there
729 		if (improved)
730 		{
731 			if (do_b == 0)
732 				new_endpts.A[ch] += beststep;
733 			else
734 				new_endpts.B[ch] += beststep;
735 		}
736 	}
737 	return min_err;
738 }
739 
740 // the larger the error the more time it is worth spending on an exhaustive search.
741 // perturb the endpoints at least -3 to 3.
742 // if err > 5000 perturb endpoints 50% of precision
743 // if err > 1000 25%
744 // if err > 200 12.5%
745 // if err > 40  6.25%
746 // for np = 16 -- adjust error thresholds as a function of np
747 // always ensure endpoint ordering is preserved (no need to overlap the scan)
exhaustive(const Vector4 colors[],const float importance[],int np,int rotatemode,int indexmode,int ch,const RegionPrec & region_prec,float orig_err,IntEndptsRGBA & opt_endpts,int indices[NINDEXARRAYS][Tile::TILE_TOTAL])748 static float exhaustive(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
749 {
750 	IntEndptsRGBA temp_endpts;
751 	float best_err = orig_err;
752 	int aprec = region_prec.endpt_a_prec[ch];
753 	int bprec = region_prec.endpt_b_prec[ch];
754 	int good_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
755 	int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
756 
757 	for (int j=0; j<NINDEXARRAYS; ++j)
758 	for (int i=0; i<np; ++i)
759 		indices[j][i] = -1;
760 
761 	float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
762 
763 	if (orig_err == 0) return orig_err;
764 
765 	int adelta = 0, bdelta = 0;
766 	if (orig_err > 5000.0*thr_scale)		{ adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
767 	else if (orig_err > 1000.0*thr_scale)	{ adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
768 	else if (orig_err > 200.0*thr_scale)	{ adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
769 	else if (orig_err > 40.0*thr_scale)		{ adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
770 	adelta = max(adelta, 3);
771 	bdelta = max(bdelta, 3);
772 
773 #ifdef	DISABLE_EXHAUSTIVE
774 	adelta = bdelta = 3;
775 #endif
776 
777 	temp_endpts = opt_endpts;
778 
779 	// ok figure out the range of A and B
780 	int alow = max(0, opt_endpts.A[ch] - adelta);
781 	int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
782 	int blow = max(0, opt_endpts.B[ch] - bdelta);
783 	int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
784 
785 	// now there's no need to swap the ordering of A and B
786 	bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
787 
788 	int amin, bmin;
789 
790 	if (opt_endpts.A[ch] <= opt_endpts.B[ch])
791 	{
792 		// keep a <= b
793 		for (int a = alow; a <= ahigh; ++a)
794 		for (int b = max(a, blow); b < bhigh; ++b)
795 		{
796 			temp_endpts.A[ch] = a;
797 			temp_endpts.B[ch] = b;
798 
799             float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
800 			if (err < best_err)
801 			{
802 				amin = a;
803 				bmin = b;
804 				best_err = err;
805 				for (int j=0; j<NINDEXARRAYS; ++j)
806 				for (int i=0; i<np; ++i)
807 					good_indices[j][i] = temp_indices[j][i];
808 			}
809 		}
810 	}
811 	else
812 	{
813 		// keep b <= a
814 		for (int b = blow; b < bhigh; ++b)
815 		for (int a = max(b, alow); a <= ahigh; ++a)
816 		{
817 			temp_endpts.A[ch] = a;
818 			temp_endpts.B[ch] = b;
819 
820             float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
821 			if (err < best_err)
822 			{
823 				amin = a;
824 				bmin = b;
825 				best_err = err;
826 				for (int j=0; j<NINDEXARRAYS; ++j)
827 				for (int i=0; i<np; ++i)
828 					good_indices[j][i] = temp_indices[j][i];
829 			}
830 		}
831 	}
832 	if (best_err < orig_err)
833 	{
834 		opt_endpts.A[ch] = amin;
835 		opt_endpts.B[ch] = bmin;
836 		orig_err = best_err;
837 		for (int j=0; j<NINDEXARRAYS; ++j)
838 		for (int i=0; i<np; ++i)
839 			indices[j][i] = good_indices[j][i];
840 	}
841 
842 	return best_err;
843 }
844 
optimize_one(const Vector4 colors[],const float importance[],int np,int rotatemode,int indexmode,float orig_err,const IntEndptsRGBA & orig_endpts,const RegionPrec & region_prec,IntEndptsRGBA & opt_endpts)845 static float optimize_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, float orig_err, const IntEndptsRGBA &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA &opt_endpts)
846 {
847 	float opt_err = orig_err;
848 
849 	opt_endpts = orig_endpts;
850 
851 	/*
852 		err0 = perturb(rgb0, delta0)
853 		err1 = perturb(rgb1, delta1)
854 		if (err0 < err1)
855 			if (err0 >= initial_error) break
856 			rgb0 += delta0
857 			next = 1
858 		else
859 			if (err1 >= initial_error) break
860 			rgb1 += delta1
861 			next = 0
862 		initial_err = map()
863 		for (;;)
864 			err = perturb(next ? rgb1:rgb0, delta)
865 			if (err >= initial_err) break
866 			next? rgb1 : rgb0 += delta
867 			initial_err = err
868 	*/
869 	IntEndptsRGBA new_a, new_b;
870 	IntEndptsRGBA new_endpt;
871 	int do_b;
872 	int orig_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
873 	int new_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
874 	int temp_indices0[NINDEXARRAYS][Tile::TILE_TOTAL];
875 	int temp_indices1[NINDEXARRAYS][Tile::TILE_TOTAL];
876 
877 	// now optimize each channel separately
878 	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
879 	{
880 		// figure out which endpoint when perturbed gives the most improvement and start there
881 		// if we just alternate, we can easily end up in a local minima
882         float err0 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0);	// perturb endpt A
883         float err1 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1);	// perturb endpt B
884 
885 		if (err0 < err1)
886 		{
887 			if (err0 >= opt_err)
888 				continue;
889 
890 			for (int j=0; j<NINDEXARRAYS; ++j)
891 			for (int i=0; i<np; ++i)
892 			{
893 				new_indices[j][i] = orig_indices[j][i] = temp_indices0[j][i];
894 				nvAssert (orig_indices[j][i] != -1);
895 			}
896 
897 			opt_endpts.A[ch] = new_a.A[ch];
898 			opt_err = err0;
899 			do_b = 1;		// do B next
900 		}
901 		else
902 		{
903 			if (err1 >= opt_err)
904 				continue;
905 
906 			for (int j=0; j<NINDEXARRAYS; ++j)
907 			for (int i=0; i<np; ++i)
908 			{
909 				new_indices[j][i] = orig_indices[j][i] = temp_indices1[j][i];
910 				nvAssert (orig_indices[j][i] != -1);
911 			}
912 
913 			opt_endpts.B[ch] = new_b.B[ch];
914 			opt_err = err1;
915 			do_b = 0;		// do A next
916 		}
917 
918 		// now alternate endpoints and keep trying until there is no improvement
919 		for (;;)
920 		{
921             float err = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
922 			if (err >= opt_err)
923 				break;
924 
925 			for (int j=0; j<NINDEXARRAYS; ++j)
926 			for (int i=0; i<np; ++i)
927 			{
928 				new_indices[j][i] = temp_indices0[j][i];
929 				nvAssert (orig_indices[j][i] != -1);
930 			}
931 
932 			if (do_b == 0)
933 				opt_endpts.A[ch] = new_endpt.A[ch];
934 			else
935 				opt_endpts.B[ch] = new_endpt.B[ch];
936 			opt_err = err;
937 			do_b = 1 - do_b;	// now move the other endpoint
938 		}
939 
940 		// see if the indices have changed
941 		int i;
942 		for (i=0; i<np; ++i)
943 			if (orig_indices[INDEXARRAY_RGB][i] != new_indices[INDEXARRAY_RGB][i] || orig_indices[INDEXARRAY_A][i] != new_indices[INDEXARRAY_A][i])
944 				break;
945 
946 		if (i<np)
947 			ch = -1;	// start over
948 	}
949 
950 	// finally, do a small exhaustive search around what we think is the global minima to be sure
951 	bool first = true;
952 	for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
953 	{
954         float new_err = exhaustive(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_err, opt_endpts, temp_indices0);
955 
956 		if (new_err < opt_err)
957 		{
958 			opt_err = new_err;
959 
960 			if (first)
961 			{
962 				for (int j=0; j<NINDEXARRAYS; ++j)
963 				for (int i=0; i<np; ++i)
964 				{
965 					orig_indices[j][i] = temp_indices0[j][i];
966 					nvAssert (orig_indices[j][i] != -1);
967 				}
968 				first = false;
969 			}
970 			else
971 			{
972 				// see if the indices have changed
973 				int i;
974 				for (i=0; i<np; ++i)
975 					if (orig_indices[INDEXARRAY_RGB][i] != temp_indices0[INDEXARRAY_RGB][i] || orig_indices[INDEXARRAY_A][i] != temp_indices0[INDEXARRAY_A][i])
976 						break;
977 
978 				if (i<np)
979 				{
980 					ch = -1;	// start over
981 					first = true;
982 				}
983 			}
984 		}
985 	}
986 
987 	return opt_err;
988 }
989 
optimize_endpts(const Tile & tile,int shapeindex,int rotatemode,int indexmode,const float orig_err[NREGIONS],const IntEndptsRGBA orig_endpts[NREGIONS],const PatternPrec & pattern_prec,float opt_err[NREGIONS],IntEndptsRGBA opt_endpts[NREGIONS])990 static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, int indexmode, const float orig_err[NREGIONS],
991 							const IntEndptsRGBA orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA opt_endpts[NREGIONS])
992 {
993 	Vector4 pixels[Tile::TILE_TOTAL];
994     float importance[Tile::TILE_TOTAL];
995 	IntEndptsRGBA temp_in, temp_out;
996 
997 	for (int region=0; region<NREGIONS; ++region)
998 	{
999 		// collect the pixels in the region
1000 		int np = 0;
1001 
1002         for (int y = 0; y < tile.size_y; y++) {
1003             for (int x = 0; x < tile.size_x; x++) {
1004                 if (REGION(x, y, shapeindex) == region) {
1005                     pixels[np] = tile.data[y][x];
1006                     importance[np] = tile.importance_map[y][x];
1007                     np++;
1008                 }
1009             }
1010         }
1011 
1012 		opt_endpts[region] = temp_in = orig_endpts[region];
1013 		opt_err[region] = orig_err[region];
1014 
1015 		float best_err = orig_err[region];
1016 
1017 		// make sure we have a valid error for temp_in
1018 		// we didn't change temp_in, so orig_err[region] is still valid
1019 		float temp_in_err = orig_err[region];
1020 
1021 		// now try to optimize these endpoints
1022         float temp_out_err = optimize_one(pixels, importance, np, rotatemode, indexmode, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
1023 
1024 		// if we find an improvement, update the best so far and correct the output endpoints and errors
1025 		if (temp_out_err < best_err)
1026 		{
1027 			best_err = temp_out_err;
1028 			opt_err[region] = temp_out_err;
1029 			opt_endpts[region] = temp_out;
1030 		}
1031 	}
1032 }
1033 
1034 /* optimization algorithm
1035 	for each pattern
1036 		convert endpoints using pattern precision
1037 		assign indices and get initial error
1038 		compress indices (and possibly reorder endpoints)
1039 		transform endpoints
1040 		if transformed endpoints fit pattern
1041 			get original endpoints back
1042 			optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
1043 			compress new indices
1044 			transform new endpoints
1045 			if new endpoints fit pattern AND if error is improved
1046 				emit compressed block with new data
1047 			else
1048 				emit compressed block with original data // to try to preserve maximum endpoint precision
1049 */
1050 
refine(const Tile & tile,int shapeindex_best,int rotatemode,int indexmode,const FltEndpts endpts[NREGIONS],char * block)1051 static float refine(const Tile &tile, int shapeindex_best, int rotatemode, int indexmode, const FltEndpts endpts[NREGIONS], char *block)
1052 {
1053 	float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
1054 	IntEndptsRGBA orig_endpts[NREGIONS], opt_endpts[NREGIONS];
1055 	int orig_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], opt_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
1056 
1057 	for (int sp = 0; sp < NPATTERNS; ++sp)
1058 	{
1059 		quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
1060 
1061 		assign_indices(tile, shapeindex_best, rotatemode, indexmode, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
1062 		swap_indices(shapeindex_best, indexmode, orig_endpts, orig_indices);
1063 
1064 		if (patterns[sp].transform_mode)
1065 			transform_forward(patterns[sp].transform_mode, orig_endpts);
1066 
1067 		// apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
1068 		// the assumption made is that if they don't fit now, they won't fit after optimizing.
1069 		if (endpts_fit(orig_endpts, patterns[sp]))
1070 		{
1071 			if (patterns[sp].transform_mode)
1072 				transform_inverse(patterns[sp].transform_mode, orig_endpts);
1073 
1074 			optimize_endpts(tile, shapeindex_best, rotatemode, indexmode, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
1075 
1076 			assign_indices(tile, shapeindex_best, rotatemode, indexmode, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
1077 			// (nreed) Commented out asserts because they go off all the time...not sure why
1078 			//for (int i=0; i<NREGIONS; ++i)
1079 			//	nvAssert(expected_opt_err[i] == opt_err[i]);
1080 			swap_indices(shapeindex_best, indexmode, opt_endpts, opt_indices);
1081 
1082 			if (patterns[sp].transform_mode)
1083 				transform_forward(patterns[sp].transform_mode, opt_endpts);
1084 
1085 			orig_toterr = opt_toterr = 0;
1086 			for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
1087 			if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
1088 			{
1089 				emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, rotatemode, indexmode, block);
1090 				return opt_toterr;
1091 			}
1092 			else
1093 			{
1094 				// either it stopped fitting when we optimized it, or there was no improvement
1095 				// so go back to the unoptimized endpoints which we know will fit
1096 				if (patterns[sp].transform_mode)
1097 					transform_forward(patterns[sp].transform_mode, orig_endpts);
1098 				emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, rotatemode, indexmode, block);
1099 				return orig_toterr;
1100 			}
1101 		}
1102 	}
1103 	nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 5).";
1104 	return FLT_MAX;
1105 }
1106 
clamp(Vector4 & v)1107 static void clamp(Vector4 &v)
1108 {
1109 	if (v.x < 0.0f) v.x = 0.0f;
1110 	if (v.x > 255.0f) v.x = 255.0f;
1111 	if (v.y < 0.0f) v.y = 0.0f;
1112 	if (v.y > 255.0f) v.y = 255.0f;
1113 	if (v.z < 0.0f) v.z = 0.0f;
1114 	if (v.z > 255.0f) v.z = 255.0f;
1115 	if (v.w < 0.0f) v.w = 0.0f;
1116 	if (v.w > 255.0f) v.w = 255.0f;
1117 }
1118 
1119 // compute initial endpoints for the "RGB" portion and the "A" portion.
1120 // Note these channels may have been rotated.
rough(const Tile & tile,int shapeindex,FltEndpts endpts[NREGIONS])1121 static void rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
1122 {
1123 	for (int region=0; region<NREGIONS; ++region)
1124 	{
1125 		int np = 0;
1126 		Vector3 colors[Tile::TILE_TOTAL];
1127 		float alphas[Tile::TILE_TOTAL];
1128 		Vector4 mean(0,0,0,0);
1129 
1130 		for (int y = 0; y < tile.size_y; y++)
1131 		for (int x = 0; x < tile.size_x; x++)
1132 			if (REGION(x,y,shapeindex) == region)
1133 			{
1134 				colors[np] = tile.data[y][x].xyz();
1135 				alphas[np] = tile.data[y][x].w;
1136 				mean += tile.data[y][x];
1137 				++np;
1138 			}
1139 
1140 		// handle simple cases
1141 		if (np == 0)
1142 		{
1143 			Vector4 zero(0,0,0,255.0f);
1144 			endpts[region].A = zero;
1145 			endpts[region].B = zero;
1146 			continue;
1147 		}
1148 		else if (np == 1)
1149 		{
1150 			endpts[region].A = Vector4(colors[0], alphas[0]);
1151 			endpts[region].B = Vector4(colors[0], alphas[0]);
1152 			continue;
1153 		}
1154 		else if (np == 2)
1155 		{
1156 			endpts[region].A = Vector4(colors[0], alphas[0]);
1157 			endpts[region].B = Vector4(colors[1], alphas[1]);
1158 			continue;
1159 		}
1160 
1161 		mean /= float(np);
1162 
1163 		Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
1164 
1165 		// project each pixel value along the principal direction
1166 		float minp = FLT_MAX, maxp = -FLT_MAX;
1167 		float mina = FLT_MAX, maxa = -FLT_MAX;
1168 		for (int i = 0; i < np; i++)
1169 		{
1170 			float dp = dot(colors[i]-mean.xyz(), direction);
1171 			if (dp < minp) minp = dp;
1172 			if (dp > maxp) maxp = dp;
1173 
1174 			dp = alphas[i] - mean.w;
1175 			if (dp < mina) mina = dp;
1176 			if (dp > maxa) maxa = dp;
1177 		}
1178 
1179 		// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
1180 		endpts[region].A = mean + Vector4(minp*direction, mina);
1181 		endpts[region].B = mean + Vector4(maxp*direction, maxa);
1182 
1183 		// clamp endpoints
1184 		// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
1185 		// shape based on endpoints being clamped
1186 		clamp(endpts[region].A);
1187 		clamp(endpts[region].B);
1188 	}
1189 }
1190 
compress_mode5(const Tile & t,char * block)1191 float AVPCL::compress_mode5(const Tile &t, char *block)
1192 {
1193 	FltEndpts endpts[NREGIONS];
1194 	char tempblock[AVPCL::BLOCKSIZE];
1195 	float msebest = FLT_MAX;
1196 	int shape = 0;
1197 	Tile t1;
1198 
1199 	// try all rotations. refine tries the 2 different indexings.
1200 	for (int r = 0; r < NROTATEMODES && msebest > 0; ++r)
1201 	{
1202 		rotate_tile(t, r, t1);
1203 		rough(t1, shape, endpts);
1204 //		for (int i = 0; i < NINDEXMODES && msebest > 0; ++i)
1205 		for (int i = 0; i < 1 && msebest > 0; ++i)
1206 		{
1207 			float mse = refine(t1, shape, r, i, endpts, tempblock);
1208 			if (mse < msebest)
1209 			{
1210 				memcpy(block, tempblock, sizeof(tempblock));
1211 				msebest = mse;
1212 			}
1213 		}
1214 	}
1215 	return msebest;
1216 }
1217