1 /*
2 Copyright 2007 nVidia, Inc.
3 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
4
5 You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
6
7 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
8 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
10 See the License for the specific language governing permissions and limitations under the License.
11 */
12
13 // Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis
14
15 // x100000 2r 777x2 8x2 2bi 2bi
16
17 #include "bits.h"
18 #include "tile.h"
19 #include "avpcl.h"
20 #include "nvcore/debug.h"
21 #include "nvmath/vector.inl"
22 #include "nvmath/matrix.inl"
23 #include "nvmath/fitting.h"
24 #include "avpcl_utils.h"
25 #include "endpts.h"
26 #include <string.h>
27 #include <float.h>
28
29 using namespace nv;
30 using namespace AVPCL;
31
32 // there are 2 index arrays. INDEXMODE selects between the arrays being 2 & 3 bits or 3 & 2 bits
33 // array 0 is always the RGB array and array 1 is always the A array
34 #define NINDEXARRAYS 2
35 #define INDEXARRAY_RGB 0
36 #define INDEXARRAY_A 1
37 #define INDEXARRAY_2BITS(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXARRAY_A : INDEXARRAY_RGB)
38 #define INDEXARRAY_3BITS(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_3BITS) ? INDEXARRAY_A : INDEXARRAY_RGB)
39
40 #define NINDICES3 4
41 #define INDEXBITS3 2
42 #define HIGH_INDEXBIT3 (1<<(INDEXBITS3-1))
43 #define DENOM3 (NINDICES3-1)
44 #define BIAS3 (DENOM3/2)
45
46 #define NINDICES2 4
47 #define INDEXBITS2 2
48 #define HIGH_INDEXBIT2 (1<<(INDEXBITS2-1))
49 #define DENOM2 (NINDICES2-1)
50 #define BIAS2 (DENOM2/2)
51
52 #define NINDICES_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES3 : NINDICES2)
53 #define INDEXBITS_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS3 : INDEXBITS2)
54 #define HIGH_INDEXBIT_RGB(indexmode)((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT3 : HIGH_INDEXBIT2)
55 #define DENOM_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM3 : DENOM2)
56 #define BIAS_RGB(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS3 : BIAS2)
57
58 #define NINDICES_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? NINDICES2 : NINDICES3)
59 #define INDEXBITS_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? INDEXBITS2 : INDEXBITS3)
60 #define HIGH_INDEXBIT_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? HIGH_INDEXBIT2 : HIGH_INDEXBIT3)
61 #define DENOM_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? DENOM2 : DENOM3)
62 #define BIAS_A(indexmode) ((indexmode == INDEXMODE_ALPHA_IS_2BITS) ? BIAS2 : BIAS3)
63
64 #define NSHAPES 1
65
66 static int shapes[NSHAPES] =
67 {
68 0x0000,
69 };
70
71 #define REGION(x,y,shapeindex) ((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0)
72
73 #define NREGIONS 1 // keep the region stuff in just in case...
74
75 // encoded index compression location: region 0 is always at 0,0.
76
77 #define NBITSIZES 2 // one endpoint pair
78
79 struct ChanBits
80 {
81 int nbitsizes[NBITSIZES]; // bitsizes for one channel
82 };
83
84 struct Pattern
85 {
86 ChanBits chan[NCHANNELS_RGBA];// bit patterns used per channel
87 int transform_mode; // x0 means alpha channel not transformed, x1 otherwise. 0x rgb not transformed, 1x otherwise.
88 int mode; // associated mode value
89 int modebits; // number of mode bits
90 const char *encoding; // verilog description of encoding for this mode
91 };
92
93 #define TRANSFORM_MODE_ALPHA 1
94 #define TRANSFORM_MODE_RGB 2
95
96 #define NPATTERNS 1
97
98 static Pattern patterns[NPATTERNS] =
99 {
100 // red green blue alpha xfm mode mb encoding
101 7,7, 7,7, 7,7, 8,8, 0x0, 0x20, 6, "",
102 };
103
104 struct RegionPrec
105 {
106 int endpt_a_prec[NCHANNELS_RGBA];
107 int endpt_b_prec[NCHANNELS_RGBA];
108 };
109
110 struct PatternPrec
111 {
112 RegionPrec region_precs[NREGIONS];
113 };
114
115 // this is the precision for each channel and region
116 // NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
117 static PatternPrec pattern_precs[NPATTERNS] =
118 {
119 7,7,7,8, 7,7,7,8,
120 };
121
122
123 // return # of bits needed to store n. handle signed or unsigned cases properly
nbits(int n,bool issigned)124 static int nbits(int n, bool issigned)
125 {
126 int nb;
127 if (n==0)
128 return 0; // no bits needed for 0, signed or not
129 else if (n > 0)
130 {
131 for (nb=0; n; ++nb, n>>=1) ;
132 return nb + (issigned?1:0);
133 }
134 else
135 {
136 nvAssert (issigned);
137 for (nb=0; n<-1; ++nb, n>>=1) ;
138 return nb + 1;
139 }
140 }
141
142 #define R_0 ep[0].A[i]
143 #define R_1 ep[0].B[i]
144
transform_forward(int transform_mode,IntEndptsRGBA ep[NREGIONS])145 static void transform_forward(int transform_mode, IntEndptsRGBA ep[NREGIONS])
146 {
147 int i;
148
149 if (transform_mode & TRANSFORM_MODE_RGB)
150 for (i=CHANNEL_R; i<CHANNEL_A; ++i)
151 R_1 -= R_0;
152 if (transform_mode & TRANSFORM_MODE_ALPHA)
153 {
154 i = CHANNEL_A;
155 R_1 -= R_0;
156 }
157 }
158
transform_inverse(int transform_mode,IntEndptsRGBA ep[NREGIONS])159 static void transform_inverse(int transform_mode, IntEndptsRGBA ep[NREGIONS])
160 {
161 int i;
162
163 if (transform_mode & TRANSFORM_MODE_RGB)
164 for (i=CHANNEL_R; i<CHANNEL_A; ++i)
165 R_1 += R_0;
166 if (transform_mode & TRANSFORM_MODE_ALPHA)
167 {
168 i = CHANNEL_A;
169 R_1 += R_0;
170 }
171 }
172
quantize_endpts(const FltEndpts endpts[NREGIONS],const PatternPrec & pattern_prec,IntEndptsRGBA q_endpts[NREGIONS])173 static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGBA q_endpts[NREGIONS])
174 {
175 for (int region = 0; region < NREGIONS; ++region)
176 {
177 q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]);
178 q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]);
179 q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]);
180 q_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]);
181
182 q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]);
183 q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]);
184 q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]);
185 q_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]);
186 }
187 }
188
189 // swap endpoints as needed to ensure that the indices at index_one and index_two have a 0 high-order bit
190 // index_two is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
swap_indices(int shapeindex,int indexmode,IntEndptsRGBA endpts[NREGIONS],int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])191 static void swap_indices(int shapeindex, int indexmode, IntEndptsRGBA endpts[NREGIONS], int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])
192 {
193 int index_positions[NREGIONS];
194
195 index_positions[0] = 0; // since WLOG we have the high bit of the shapes at 0
196
197 for (int region = 0; region < NREGIONS; ++region)
198 {
199 int x = index_positions[region] & 3;
200 int y = (index_positions[region] >> 2) & 3;
201 nvAssert(REGION(x,y,shapeindex) == region); // double check the table
202
203 // swap RGB
204 if (indices[INDEXARRAY_RGB][y][x] & HIGH_INDEXBIT_RGB(indexmode))
205 {
206 // high bit is set, swap the endpts and indices for this region
207 int t;
208 for (int i=CHANNEL_R; i<=CHANNEL_B; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
209
210 for (int y = 0; y < Tile::TILE_H; y++)
211 for (int x = 0; x < Tile::TILE_W; x++)
212 if (REGION(x,y,shapeindex) == region)
213 indices[INDEXARRAY_RGB][y][x] = NINDICES_RGB(indexmode) - 1 - indices[INDEXARRAY_RGB][y][x];
214 }
215
216 // swap A
217 if (indices[INDEXARRAY_A][y][x] & HIGH_INDEXBIT_A(indexmode))
218 {
219 // high bit is set, swap the endpts and indices for this region
220 int t;
221 for (int i=CHANNEL_A; i<=CHANNEL_A; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
222
223 for (int y = 0; y < Tile::TILE_H; y++)
224 for (int x = 0; x < Tile::TILE_W; x++)
225 if (REGION(x,y,shapeindex) == region)
226 indices[INDEXARRAY_A][y][x] = NINDICES_A(indexmode) - 1 - indices[INDEXARRAY_A][y][x];
227 }
228 }
229 }
230
endpts_fit(IntEndptsRGBA endpts[NREGIONS],const Pattern & p)231 static bool endpts_fit(IntEndptsRGBA endpts[NREGIONS], const Pattern &p)
232 {
233 return true;
234 }
235
write_header(const IntEndptsRGBA endpts[NREGIONS],int shapeindex,const Pattern & p,int rotatemode,int indexmode,Bits & out)236 static void write_header(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, int rotatemode, int indexmode, Bits &out)
237 {
238 // ignore shapeindex
239 out.write(p.mode, p.modebits);
240 out.write(rotatemode, ROTATEMODE_BITS);
241 // out.write(indexmode, INDEXMODE_BITS);
242 for (int i=0; i<NREGIONS; ++i)
243 for (int j=0; j<NCHANNELS_RGBA; ++j)
244 {
245 out.write(endpts[i].A[j], p.chan[j].nbitsizes[0]);
246 out.write(endpts[i].B[j], p.chan[j].nbitsizes[1]);
247 }
248 nvAssert (out.getptr() == 66);
249 }
250
read_header(Bits & in,IntEndptsRGBA endpts[NREGIONS],int & shapeindex,int & rotatemode,int & indexmode,Pattern & p,int & pat_index)251 static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeindex, int &rotatemode, int &indexmode, Pattern &p, int &pat_index)
252 {
253 int mode = AVPCL::getmode(in);
254
255 pat_index = 0;
256
257 nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
258 nvAssert (in.getptr() == patterns[pat_index].modebits);
259
260 p = patterns[pat_index];
261
262 shapeindex = 0; // we don't have any
263
264 rotatemode = in.read(ROTATEMODE_BITS);
265
266 indexmode = 0; // we don't have any
267
268 for (int i=0; i<NREGIONS; ++i)
269 for (int j=0; j<NCHANNELS_RGBA; ++j)
270 {
271 endpts[i].A[j] = in.read(p.chan[j].nbitsizes[0]);
272 endpts[i].B[j] = in.read(p.chan[j].nbitsizes[1]);
273 }
274 nvAssert (in.getptr() == 66);
275 }
276
write_indices(const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W],int shapeindex,int indexmode,Bits & out)277 static void write_indices(const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int shapeindex, int indexmode, Bits &out)
278 {
279 // the indices we shorten is always index 0
280
281 // do the 2 bit indices first
282 nvAssert ((indices[INDEXARRAY_2BITS(indexmode)][0][0] & HIGH_INDEXBIT2) == 0);
283 for (int i = 0; i < Tile::TILE_TOTAL; ++i)
284 out.write(indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3], INDEXBITS2 - (i==0?1:0)); // write i..[1:0] or i..[0]
285
286 // then the 3 bit indices
287 nvAssert ((indices[INDEXARRAY_3BITS(indexmode)][0][0] & HIGH_INDEXBIT3) == 0);
288 for (int i = 0; i < Tile::TILE_TOTAL; ++i)
289 out.write(indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3], INDEXBITS3 - (i==0?1:0)); // write i..[2:0] or i..[1:0]
290 }
291
read_indices(Bits & in,int shapeindex,int indexmode,int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])292 static void read_indices(Bits &in, int shapeindex, int indexmode, int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W])
293 {
294 // the indices we shorten is always index 0
295
296 // do the 2 bit indices first
297 for (int i = 0; i < Tile::TILE_TOTAL; ++i)
298 indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS2 - (i==0?1:0)); // read i..[1:0] or i..[0]
299
300 // then the 3 bit indices
301 for (int i = 0; i < Tile::TILE_TOTAL; ++i)
302 indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3] = in.read(INDEXBITS3 - (i==0?1:0)); // read i..[1:0] or i..[0]
303 }
304
emit_block(const IntEndptsRGBA endpts[NREGIONS],int shapeindex,const Pattern & p,const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W],int rotatemode,int indexmode,char * block)305 static void emit_block(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int rotatemode, int indexmode, char *block)
306 {
307 Bits out(block, AVPCL::BITSIZE);
308
309 write_header(endpts, shapeindex, p, rotatemode, indexmode, out);
310
311 write_indices(indices, shapeindex, indexmode, out);
312
313 nvAssert(out.getptr() == AVPCL::BITSIZE);
314 }
315
generate_palette_quantized_rgb_a(const IntEndptsRGBA & endpts,const RegionPrec & region_prec,int indexmode,Vector3 palette_rgb[NINDICES3],float palette_a[NINDICES3])316 static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const RegionPrec ®ion_prec, int indexmode, Vector3 palette_rgb[NINDICES3], float palette_a[NINDICES3])
317 {
318 // scale endpoints for RGB
319 int a, b;
320
321 a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]);
322 b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]);
323
324 // interpolate R
325 for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
326 palette_rgb[i].x = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
327
328 a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]);
329 b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]);
330
331 // interpolate G
332 for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
333 palette_rgb[i].y = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
334
335 a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]);
336 b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]);
337
338 // interpolate B
339 for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
340 palette_rgb[i].z = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
341
342 a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]);
343 b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]);
344
345 // interpolate A
346 for (int i = 0; i < NINDICES_A(indexmode); ++i)
347 palette_a[i] = float(Utils::lerp(a, b, i, BIAS_A(indexmode), DENOM_A(indexmode)));
348 }
349
sign_extend(Pattern & p,IntEndptsRGBA endpts[NREGIONS])350 static void sign_extend(Pattern &p, IntEndptsRGBA endpts[NREGIONS])
351 {
352 for (int i=0; i<NCHANNELS_RGBA; ++i)
353 {
354 if (p.transform_mode)
355 {
356 // endpts[0].A[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]); // always positive here
357 endpts[0].B[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);
358 endpts[1].A[i] = SIGN_EXTEND(endpts[1].A[i], p.chan[i].nbitsizes[1]);
359 endpts[1].B[i] = SIGN_EXTEND(endpts[1].B[i], p.chan[i].nbitsizes[1]);
360 }
361 }
362 }
363
rotate_tile(const Tile & in,int rotatemode,Tile & out)364 static void rotate_tile(const Tile &in, int rotatemode, Tile &out)
365 {
366 out.size_x = in.size_x;
367 out.size_y = in.size_y;
368
369 for (int y=0; y<in.size_y; ++y)
370 for (int x=0; x<in.size_x; ++x)
371 {
372 float t;
373 out.data[y][x] = in.data[y][x];
374
375 switch(rotatemode)
376 {
377 case ROTATEMODE_RGBA_RGBA: break;
378 case ROTATEMODE_RGBA_AGBR: t = (out.data[y][x]).x; (out.data[y][x]).x = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
379 case ROTATEMODE_RGBA_RABG: t = (out.data[y][x]).y; (out.data[y][x]).y = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
380 case ROTATEMODE_RGBA_RGAB: t = (out.data[y][x]).z; (out.data[y][x]).z = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
381 default: nvUnreachable();
382 }
383 }
384 }
385
decompress_mode5(const char * block,Tile & t)386 void AVPCL::decompress_mode5(const char *block, Tile &t)
387 {
388 Bits in(block, AVPCL::BITSIZE);
389
390 Pattern p;
391 IntEndptsRGBA endpts[NREGIONS];
392 int shapeindex, pat_index, rotatemode, indexmode;
393
394 read_header(in, endpts, shapeindex, rotatemode, indexmode, p, pat_index);
395
396 sign_extend(p, endpts);
397
398 if (p.transform_mode)
399 transform_inverse(p.transform_mode, endpts);
400
401 Vector3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2
402 float palette_a[NREGIONS][NINDICES3]; // could be nindices2
403
404 for (int region = 0; region < NREGIONS; ++region)
405 generate_palette_quantized_rgb_a(endpts[region], pattern_precs[pat_index].region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]);
406
407 int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
408
409 read_indices(in, shapeindex, indexmode, indices);
410
411 nvAssert(in.getptr() == AVPCL::BITSIZE);
412
413 Tile temp(t.size_x, t.size_y);
414
415 // lookup
416 for (int y = 0; y < Tile::TILE_H; y++)
417 for (int x = 0; x < Tile::TILE_W; x++)
418 temp.data[y][x] = Vector4(palette_rgb[REGION(x,y,shapeindex)][indices[INDEXARRAY_RGB][y][x]], palette_a[REGION(x,y,shapeindex)][indices[INDEXARRAY_A][y][x]]);
419
420 rotate_tile(temp, rotatemode, t);
421 }
422
423 // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
424 // we already have a candidate mapping when we call this function, thus an error. take an early exit if the accumulated error so far
425 // exceeds what we already have
map_colors(const Vector4 colors[],const float importance[],int np,int rotatemode,int indexmode,const IntEndptsRGBA & endpts,const RegionPrec & region_prec,float current_besterr,int indices[NINDEXARRAYS][Tile::TILE_TOTAL])426 static float map_colors(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec ®ion_prec, float current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
427 {
428 Vector3 palette_rgb[NINDICES3]; // could be nindices2
429 float palette_a[NINDICES3]; // could be nindices2
430 float toterr = 0;
431
432 generate_palette_quantized_rgb_a(endpts, region_prec, indexmode, &palette_rgb[0], &palette_a[0]);
433
434 Vector3 rgb;
435 float a;
436
437 for (int i = 0; i < np; ++i)
438 {
439 float err, besterr;
440 float palette_alpha = 0, tile_alpha = 0;
441
442 if(AVPCL::flag_premult)
443 tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (colors[i]).x :
444 (rotatemode == ROTATEMODE_RGBA_RABG) ? (colors[i]).y :
445 (rotatemode == ROTATEMODE_RGBA_RGAB) ? (colors[i]).z : (colors[i]).w;
446
447 rgb.x = (colors[i]).x;
448 rgb.y = (colors[i]).y;
449 rgb.z = (colors[i]).z;
450 a = (colors[i]).w;
451
452 // compute the two indices separately
453 // if we're doing premultiplied alpha, we need to choose first the index that
454 // determines the alpha value, and then do the other index
455
456 if (rotatemode == ROTATEMODE_RGBA_RGBA)
457 {
458 // do A index first as it has the alpha
459 besterr = FLT_MAX;
460 for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
461 {
462 err = Utils::metric1(a, palette_a[j], rotatemode);
463
464 if (err > besterr) // error increased, so we're done searching
465 break;
466 if (err < besterr)
467 {
468 besterr = err;
469 palette_alpha = palette_a[j];
470 indices[INDEXARRAY_A][i] = j;
471 }
472 }
473 toterr += besterr; // squared-error norms are additive since we don't do the square root
474
475 // do RGB index
476 besterr = FLT_MAX;
477 for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
478 {
479 err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) :
480 Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[j], palette_alpha);
481
482 if (err > besterr) // error increased, so we're done searching
483 break;
484 if (err < besterr)
485 {
486 besterr = err;
487 indices[INDEXARRAY_RGB][i] = j;
488 }
489 }
490 toterr += besterr;
491 if (toterr > current_besterr)
492 {
493 // fill out bogus index values so it's initialized at least
494 for (int k = i; k < np; ++k)
495 {
496 indices[INDEXARRAY_RGB][k] = -1;
497 indices[INDEXARRAY_A][k] = -1;
498 }
499 return FLT_MAX;
500 }
501 }
502 else
503 {
504 // do RGB index
505 besterr = FLT_MAX;
506 int bestindex;
507 for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
508 {
509 err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) :
510 Utils::metric3premult_alphain(rgb, palette_rgb[j], rotatemode);
511
512 if (err > besterr) // error increased, so we're done searching
513 break;
514 if (err < besterr)
515 {
516 besterr = err;
517 bestindex = j;
518 indices[INDEXARRAY_RGB][i] = j;
519 }
520 }
521 palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).x :
522 (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).y :
523 (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : nvCheckMacro(0);
524 toterr += besterr;
525
526 // do A index
527 besterr = FLT_MAX;
528 for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
529 {
530 err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[j], rotatemode) :
531 Utils::metric1premult(a, tile_alpha, palette_a[j], palette_alpha, rotatemode);
532
533 if (err > besterr) // error increased, so we're done searching
534 break;
535 if (err < besterr)
536 {
537 besterr = err;
538 indices[INDEXARRAY_A][i] = j;
539 }
540 }
541 toterr += besterr; // squared-error norms are additive since we don't do the square root
542 if (toterr > current_besterr)
543 {
544 // fill out bogus index values so it's initialized at least
545 for (int k = i; k < np; ++k)
546 {
547 indices[INDEXARRAY_RGB][k] = -1;
548 indices[INDEXARRAY_A][k] = -1;
549 }
550 return FLT_MAX;
551 }
552 }
553 }
554 return toterr;
555 }
556
557 // assign indices given a tile, shape, and quantized endpoints, return toterr for each region
assign_indices(const Tile & tile,int shapeindex,int rotatemode,int indexmode,IntEndptsRGBA endpts[NREGIONS],const PatternPrec & pattern_prec,int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W],float toterr[NREGIONS])558 static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int indexmode, IntEndptsRGBA endpts[NREGIONS], const PatternPrec &pattern_prec,
559 int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
560 {
561 Vector3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2
562 float palette_a[NREGIONS][NINDICES3]; // could be nindices2
563
564 for (int region = 0; region < NREGIONS; ++region)
565 {
566 generate_palette_quantized_rgb_a(endpts[region], pattern_prec.region_precs[region], indexmode, &palette_rgb[region][0], &palette_a[region][0]);
567 toterr[region] = 0;
568 }
569
570 Vector3 rgb;
571 float a;
572
573 for (int y = 0; y < tile.size_y; y++)
574 for (int x = 0; x < tile.size_x; x++)
575 {
576 int region = REGION(x,y,shapeindex);
577 float err, besterr;
578 float palette_alpha = 0, tile_alpha = 0;
579
580 rgb.x = (tile.data[y][x]).x;
581 rgb.y = (tile.data[y][x]).y;
582 rgb.z = (tile.data[y][x]).z;
583 a = (tile.data[y][x]).w;
584
585 if(AVPCL::flag_premult)
586 tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (tile.data[y][x]).x :
587 (rotatemode == ROTATEMODE_RGBA_RABG) ? (tile.data[y][x]).y :
588 (rotatemode == ROTATEMODE_RGBA_RGAB) ? (tile.data[y][x]).z : (tile.data[y][x]).w;
589
590 // compute the two indices separately
591 // if we're doing premultiplied alpha, we need to choose first the index that
592 // determines the alpha value, and then do the other index
593
594 if (rotatemode == ROTATEMODE_RGBA_RGBA)
595 {
596 // do A index first as it has the alpha
597 besterr = FLT_MAX;
598 for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
599 {
600 err = Utils::metric1(a, palette_a[region][i], rotatemode);
601
602 if (err > besterr) // error increased, so we're done searching
603 break;
604 if (err < besterr)
605 {
606 besterr = err;
607 indices[INDEXARRAY_A][y][x] = i;
608 palette_alpha = palette_a[region][i];
609 }
610 }
611 toterr[region] += besterr; // squared-error norms are additive since we don't do the square root
612
613 // do RGB index
614 besterr = FLT_MAX;
615 for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
616 {
617 err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) :
618 Utils::metric3premult_alphaout(rgb, tile_alpha, palette_rgb[region][i], palette_alpha);
619
620 if (err > besterr) // error increased, so we're done searching
621 break;
622 if (err < besterr)
623 {
624 besterr = err;
625 indices[INDEXARRAY_RGB][y][x] = i;
626 }
627 }
628 toterr[region] += besterr;
629 }
630 else
631 {
632 // do RGB index first as it has the alpha
633 besterr = FLT_MAX;
634 int bestindex;
635 for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
636 {
637 err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) :
638 Utils::metric3premult_alphain(rgb, palette_rgb[region][i], rotatemode);
639
640 if (err > besterr) // error increased, so we're done searching
641 break;
642 if (err < besterr)
643 {
644 besterr = err;
645 indices[INDEXARRAY_RGB][y][x] = i;
646 bestindex = i;
647 }
648 }
649 palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).x :
650 (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).y :
651 (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : nvCheckMacro(0);
652 toterr[region] += besterr;
653
654 // do A index
655 besterr = FLT_MAX;
656 for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
657 {
658 err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[region][i], rotatemode) :
659 Utils::metric1premult(a, tile_alpha, palette_a[region][i], palette_alpha, rotatemode);
660
661 if (err > besterr) // error increased, so we're done searching
662 break;
663 if (err < besterr)
664 {
665 besterr = err;
666 indices[INDEXARRAY_A][y][x] = i;
667 }
668 }
669 toterr[region] += besterr; // squared-error norms are additive since we don't do the square root
670 }
671 }
672 }
673
674 // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
675 // this function returns either old_err or a value smaller (if it was successful in improving the error)
perturb_one(const Vector4 colors[],const float importance[],int np,int rotatemode,int indexmode,int ch,const RegionPrec & region_prec,const IntEndptsRGBA & old_endpts,IntEndptsRGBA & new_endpts,float old_err,int do_b,int indices[NINDEXARRAYS][Tile::TILE_TOTAL])676 static float perturb_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts,
677 float old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
678 {
679 // we have the old endpoints: old_endpts
680 // we have the perturbed endpoints: new_endpts
681 // we have the temporary endpoints: temp_endpts
682
683 IntEndptsRGBA temp_endpts;
684 float min_err = old_err; // start with the best current error
685 int beststep;
686 int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
687
688 for (int j=0; j<NINDEXARRAYS; ++j)
689 for (int i=0; i<np; ++i)
690 indices[j][i] = -1;
691
692 // copy real endpoints so we can perturb them
693 temp_endpts = new_endpts = old_endpts;
694
695 int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
696
697 // do a logarithmic search for the best error for this endpoint (which)
698 for (int step = 1 << (prec-1); step; step >>= 1)
699 {
700 bool improved = false;
701 for (int sign = -1; sign <= 1; sign += 2)
702 {
703 if (do_b == 0)
704 {
705 temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
706 if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
707 continue;
708 }
709 else
710 {
711 temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
712 if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
713 continue;
714 }
715
716 float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, min_err, temp_indices);
717
718 if (err < min_err)
719 {
720 improved = true;
721 min_err = err;
722 beststep = sign * step;
723 for (int j=0; j<NINDEXARRAYS; ++j)
724 for (int i=0; i<np; ++i)
725 indices[j][i] = temp_indices[j][i];
726 }
727 }
728 // if this was an improvement, move the endpoint and continue search from there
729 if (improved)
730 {
731 if (do_b == 0)
732 new_endpts.A[ch] += beststep;
733 else
734 new_endpts.B[ch] += beststep;
735 }
736 }
737 return min_err;
738 }
739
740 // the larger the error the more time it is worth spending on an exhaustive search.
741 // perturb the endpoints at least -3 to 3.
742 // if err > 5000 perturb endpoints 50% of precision
743 // if err > 1000 25%
744 // if err > 200 12.5%
745 // if err > 40 6.25%
746 // for np = 16 -- adjust error thresholds as a function of np
747 // always ensure endpoint ordering is preserved (no need to overlap the scan)
exhaustive(const Vector4 colors[],const float importance[],int np,int rotatemode,int indexmode,int ch,const RegionPrec & region_prec,float orig_err,IntEndptsRGBA & opt_endpts,int indices[NINDEXARRAYS][Tile::TILE_TOTAL])748 static float exhaustive(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
749 {
750 IntEndptsRGBA temp_endpts;
751 float best_err = orig_err;
752 int aprec = region_prec.endpt_a_prec[ch];
753 int bprec = region_prec.endpt_b_prec[ch];
754 int good_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
755 int temp_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
756
757 for (int j=0; j<NINDEXARRAYS; ++j)
758 for (int i=0; i<np; ++i)
759 indices[j][i] = -1;
760
761 float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
762
763 if (orig_err == 0) return orig_err;
764
765 int adelta = 0, bdelta = 0;
766 if (orig_err > 5000.0*thr_scale) { adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
767 else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
768 else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
769 else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
770 adelta = max(adelta, 3);
771 bdelta = max(bdelta, 3);
772
773 #ifdef DISABLE_EXHAUSTIVE
774 adelta = bdelta = 3;
775 #endif
776
777 temp_endpts = opt_endpts;
778
779 // ok figure out the range of A and B
780 int alow = max(0, opt_endpts.A[ch] - adelta);
781 int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
782 int blow = max(0, opt_endpts.B[ch] - bdelta);
783 int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
784
785 // now there's no need to swap the ordering of A and B
786 bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
787
788 int amin, bmin;
789
790 if (opt_endpts.A[ch] <= opt_endpts.B[ch])
791 {
792 // keep a <= b
793 for (int a = alow; a <= ahigh; ++a)
794 for (int b = max(a, blow); b < bhigh; ++b)
795 {
796 temp_endpts.A[ch] = a;
797 temp_endpts.B[ch] = b;
798
799 float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
800 if (err < best_err)
801 {
802 amin = a;
803 bmin = b;
804 best_err = err;
805 for (int j=0; j<NINDEXARRAYS; ++j)
806 for (int i=0; i<np; ++i)
807 good_indices[j][i] = temp_indices[j][i];
808 }
809 }
810 }
811 else
812 {
813 // keep b <= a
814 for (int b = blow; b < bhigh; ++b)
815 for (int a = max(b, alow); a <= ahigh; ++a)
816 {
817 temp_endpts.A[ch] = a;
818 temp_endpts.B[ch] = b;
819
820 float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
821 if (err < best_err)
822 {
823 amin = a;
824 bmin = b;
825 best_err = err;
826 for (int j=0; j<NINDEXARRAYS; ++j)
827 for (int i=0; i<np; ++i)
828 good_indices[j][i] = temp_indices[j][i];
829 }
830 }
831 }
832 if (best_err < orig_err)
833 {
834 opt_endpts.A[ch] = amin;
835 opt_endpts.B[ch] = bmin;
836 orig_err = best_err;
837 for (int j=0; j<NINDEXARRAYS; ++j)
838 for (int i=0; i<np; ++i)
839 indices[j][i] = good_indices[j][i];
840 }
841
842 return best_err;
843 }
844
optimize_one(const Vector4 colors[],const float importance[],int np,int rotatemode,int indexmode,float orig_err,const IntEndptsRGBA & orig_endpts,const RegionPrec & region_prec,IntEndptsRGBA & opt_endpts)845 static float optimize_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, float orig_err, const IntEndptsRGBA &orig_endpts, const RegionPrec ®ion_prec, IntEndptsRGBA &opt_endpts)
846 {
847 float opt_err = orig_err;
848
849 opt_endpts = orig_endpts;
850
851 /*
852 err0 = perturb(rgb0, delta0)
853 err1 = perturb(rgb1, delta1)
854 if (err0 < err1)
855 if (err0 >= initial_error) break
856 rgb0 += delta0
857 next = 1
858 else
859 if (err1 >= initial_error) break
860 rgb1 += delta1
861 next = 0
862 initial_err = map()
863 for (;;)
864 err = perturb(next ? rgb1:rgb0, delta)
865 if (err >= initial_err) break
866 next? rgb1 : rgb0 += delta
867 initial_err = err
868 */
869 IntEndptsRGBA new_a, new_b;
870 IntEndptsRGBA new_endpt;
871 int do_b;
872 int orig_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
873 int new_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
874 int temp_indices0[NINDEXARRAYS][Tile::TILE_TOTAL];
875 int temp_indices1[NINDEXARRAYS][Tile::TILE_TOTAL];
876
877 // now optimize each channel separately
878 for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
879 {
880 // figure out which endpoint when perturbed gives the most improvement and start there
881 // if we just alternate, we can easily end up in a local minima
882 float err0 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
883 float err1 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
884
885 if (err0 < err1)
886 {
887 if (err0 >= opt_err)
888 continue;
889
890 for (int j=0; j<NINDEXARRAYS; ++j)
891 for (int i=0; i<np; ++i)
892 {
893 new_indices[j][i] = orig_indices[j][i] = temp_indices0[j][i];
894 nvAssert (orig_indices[j][i] != -1);
895 }
896
897 opt_endpts.A[ch] = new_a.A[ch];
898 opt_err = err0;
899 do_b = 1; // do B next
900 }
901 else
902 {
903 if (err1 >= opt_err)
904 continue;
905
906 for (int j=0; j<NINDEXARRAYS; ++j)
907 for (int i=0; i<np; ++i)
908 {
909 new_indices[j][i] = orig_indices[j][i] = temp_indices1[j][i];
910 nvAssert (orig_indices[j][i] != -1);
911 }
912
913 opt_endpts.B[ch] = new_b.B[ch];
914 opt_err = err1;
915 do_b = 0; // do A next
916 }
917
918 // now alternate endpoints and keep trying until there is no improvement
919 for (;;)
920 {
921 float err = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
922 if (err >= opt_err)
923 break;
924
925 for (int j=0; j<NINDEXARRAYS; ++j)
926 for (int i=0; i<np; ++i)
927 {
928 new_indices[j][i] = temp_indices0[j][i];
929 nvAssert (orig_indices[j][i] != -1);
930 }
931
932 if (do_b == 0)
933 opt_endpts.A[ch] = new_endpt.A[ch];
934 else
935 opt_endpts.B[ch] = new_endpt.B[ch];
936 opt_err = err;
937 do_b = 1 - do_b; // now move the other endpoint
938 }
939
940 // see if the indices have changed
941 int i;
942 for (i=0; i<np; ++i)
943 if (orig_indices[INDEXARRAY_RGB][i] != new_indices[INDEXARRAY_RGB][i] || orig_indices[INDEXARRAY_A][i] != new_indices[INDEXARRAY_A][i])
944 break;
945
946 if (i<np)
947 ch = -1; // start over
948 }
949
950 // finally, do a small exhaustive search around what we think is the global minima to be sure
951 bool first = true;
952 for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
953 {
954 float new_err = exhaustive(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_err, opt_endpts, temp_indices0);
955
956 if (new_err < opt_err)
957 {
958 opt_err = new_err;
959
960 if (first)
961 {
962 for (int j=0; j<NINDEXARRAYS; ++j)
963 for (int i=0; i<np; ++i)
964 {
965 orig_indices[j][i] = temp_indices0[j][i];
966 nvAssert (orig_indices[j][i] != -1);
967 }
968 first = false;
969 }
970 else
971 {
972 // see if the indices have changed
973 int i;
974 for (i=0; i<np; ++i)
975 if (orig_indices[INDEXARRAY_RGB][i] != temp_indices0[INDEXARRAY_RGB][i] || orig_indices[INDEXARRAY_A][i] != temp_indices0[INDEXARRAY_A][i])
976 break;
977
978 if (i<np)
979 {
980 ch = -1; // start over
981 first = true;
982 }
983 }
984 }
985 }
986
987 return opt_err;
988 }
989
optimize_endpts(const Tile & tile,int shapeindex,int rotatemode,int indexmode,const float orig_err[NREGIONS],const IntEndptsRGBA orig_endpts[NREGIONS],const PatternPrec & pattern_prec,float opt_err[NREGIONS],IntEndptsRGBA opt_endpts[NREGIONS])990 static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, int indexmode, const float orig_err[NREGIONS],
991 const IntEndptsRGBA orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA opt_endpts[NREGIONS])
992 {
993 Vector4 pixels[Tile::TILE_TOTAL];
994 float importance[Tile::TILE_TOTAL];
995 IntEndptsRGBA temp_in, temp_out;
996
997 for (int region=0; region<NREGIONS; ++region)
998 {
999 // collect the pixels in the region
1000 int np = 0;
1001
1002 for (int y = 0; y < tile.size_y; y++) {
1003 for (int x = 0; x < tile.size_x; x++) {
1004 if (REGION(x, y, shapeindex) == region) {
1005 pixels[np] = tile.data[y][x];
1006 importance[np] = tile.importance_map[y][x];
1007 np++;
1008 }
1009 }
1010 }
1011
1012 opt_endpts[region] = temp_in = orig_endpts[region];
1013 opt_err[region] = orig_err[region];
1014
1015 float best_err = orig_err[region];
1016
1017 // make sure we have a valid error for temp_in
1018 // we didn't change temp_in, so orig_err[region] is still valid
1019 float temp_in_err = orig_err[region];
1020
1021 // now try to optimize these endpoints
1022 float temp_out_err = optimize_one(pixels, importance, np, rotatemode, indexmode, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
1023
1024 // if we find an improvement, update the best so far and correct the output endpoints and errors
1025 if (temp_out_err < best_err)
1026 {
1027 best_err = temp_out_err;
1028 opt_err[region] = temp_out_err;
1029 opt_endpts[region] = temp_out;
1030 }
1031 }
1032 }
1033
1034 /* optimization algorithm
1035 for each pattern
1036 convert endpoints using pattern precision
1037 assign indices and get initial error
1038 compress indices (and possibly reorder endpoints)
1039 transform endpoints
1040 if transformed endpoints fit pattern
1041 get original endpoints back
1042 optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
1043 compress new indices
1044 transform new endpoints
1045 if new endpoints fit pattern AND if error is improved
1046 emit compressed block with new data
1047 else
1048 emit compressed block with original data // to try to preserve maximum endpoint precision
1049 */
1050
refine(const Tile & tile,int shapeindex_best,int rotatemode,int indexmode,const FltEndpts endpts[NREGIONS],char * block)1051 static float refine(const Tile &tile, int shapeindex_best, int rotatemode, int indexmode, const FltEndpts endpts[NREGIONS], char *block)
1052 {
1053 float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
1054 IntEndptsRGBA orig_endpts[NREGIONS], opt_endpts[NREGIONS];
1055 int orig_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], opt_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
1056
1057 for (int sp = 0; sp < NPATTERNS; ++sp)
1058 {
1059 quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
1060
1061 assign_indices(tile, shapeindex_best, rotatemode, indexmode, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
1062 swap_indices(shapeindex_best, indexmode, orig_endpts, orig_indices);
1063
1064 if (patterns[sp].transform_mode)
1065 transform_forward(patterns[sp].transform_mode, orig_endpts);
1066
1067 // apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
1068 // the assumption made is that if they don't fit now, they won't fit after optimizing.
1069 if (endpts_fit(orig_endpts, patterns[sp]))
1070 {
1071 if (patterns[sp].transform_mode)
1072 transform_inverse(patterns[sp].transform_mode, orig_endpts);
1073
1074 optimize_endpts(tile, shapeindex_best, rotatemode, indexmode, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
1075
1076 assign_indices(tile, shapeindex_best, rotatemode, indexmode, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
1077 // (nreed) Commented out asserts because they go off all the time...not sure why
1078 //for (int i=0; i<NREGIONS; ++i)
1079 // nvAssert(expected_opt_err[i] == opt_err[i]);
1080 swap_indices(shapeindex_best, indexmode, opt_endpts, opt_indices);
1081
1082 if (patterns[sp].transform_mode)
1083 transform_forward(patterns[sp].transform_mode, opt_endpts);
1084
1085 orig_toterr = opt_toterr = 0;
1086 for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
1087 if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
1088 {
1089 emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, rotatemode, indexmode, block);
1090 return opt_toterr;
1091 }
1092 else
1093 {
1094 // either it stopped fitting when we optimized it, or there was no improvement
1095 // so go back to the unoptimized endpoints which we know will fit
1096 if (patterns[sp].transform_mode)
1097 transform_forward(patterns[sp].transform_mode, orig_endpts);
1098 emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, rotatemode, indexmode, block);
1099 return orig_toterr;
1100 }
1101 }
1102 }
1103 nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 5).";
1104 return FLT_MAX;
1105 }
1106
clamp(Vector4 & v)1107 static void clamp(Vector4 &v)
1108 {
1109 if (v.x < 0.0f) v.x = 0.0f;
1110 if (v.x > 255.0f) v.x = 255.0f;
1111 if (v.y < 0.0f) v.y = 0.0f;
1112 if (v.y > 255.0f) v.y = 255.0f;
1113 if (v.z < 0.0f) v.z = 0.0f;
1114 if (v.z > 255.0f) v.z = 255.0f;
1115 if (v.w < 0.0f) v.w = 0.0f;
1116 if (v.w > 255.0f) v.w = 255.0f;
1117 }
1118
1119 // compute initial endpoints for the "RGB" portion and the "A" portion.
1120 // Note these channels may have been rotated.
rough(const Tile & tile,int shapeindex,FltEndpts endpts[NREGIONS])1121 static void rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
1122 {
1123 for (int region=0; region<NREGIONS; ++region)
1124 {
1125 int np = 0;
1126 Vector3 colors[Tile::TILE_TOTAL];
1127 float alphas[Tile::TILE_TOTAL];
1128 Vector4 mean(0,0,0,0);
1129
1130 for (int y = 0; y < tile.size_y; y++)
1131 for (int x = 0; x < tile.size_x; x++)
1132 if (REGION(x,y,shapeindex) == region)
1133 {
1134 colors[np] = tile.data[y][x].xyz();
1135 alphas[np] = tile.data[y][x].w;
1136 mean += tile.data[y][x];
1137 ++np;
1138 }
1139
1140 // handle simple cases
1141 if (np == 0)
1142 {
1143 Vector4 zero(0,0,0,255.0f);
1144 endpts[region].A = zero;
1145 endpts[region].B = zero;
1146 continue;
1147 }
1148 else if (np == 1)
1149 {
1150 endpts[region].A = Vector4(colors[0], alphas[0]);
1151 endpts[region].B = Vector4(colors[0], alphas[0]);
1152 continue;
1153 }
1154 else if (np == 2)
1155 {
1156 endpts[region].A = Vector4(colors[0], alphas[0]);
1157 endpts[region].B = Vector4(colors[1], alphas[1]);
1158 continue;
1159 }
1160
1161 mean /= float(np);
1162
1163 Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
1164
1165 // project each pixel value along the principal direction
1166 float minp = FLT_MAX, maxp = -FLT_MAX;
1167 float mina = FLT_MAX, maxa = -FLT_MAX;
1168 for (int i = 0; i < np; i++)
1169 {
1170 float dp = dot(colors[i]-mean.xyz(), direction);
1171 if (dp < minp) minp = dp;
1172 if (dp > maxp) maxp = dp;
1173
1174 dp = alphas[i] - mean.w;
1175 if (dp < mina) mina = dp;
1176 if (dp > maxa) maxa = dp;
1177 }
1178
1179 // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
1180 endpts[region].A = mean + Vector4(minp*direction, mina);
1181 endpts[region].B = mean + Vector4(maxp*direction, maxa);
1182
1183 // clamp endpoints
1184 // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
1185 // shape based on endpoints being clamped
1186 clamp(endpts[region].A);
1187 clamp(endpts[region].B);
1188 }
1189 }
1190
compress_mode5(const Tile & t,char * block)1191 float AVPCL::compress_mode5(const Tile &t, char *block)
1192 {
1193 FltEndpts endpts[NREGIONS];
1194 char tempblock[AVPCL::BLOCKSIZE];
1195 float msebest = FLT_MAX;
1196 int shape = 0;
1197 Tile t1;
1198
1199 // try all rotations. refine tries the 2 different indexings.
1200 for (int r = 0; r < NROTATEMODES && msebest > 0; ++r)
1201 {
1202 rotate_tile(t, r, t1);
1203 rough(t1, shape, endpts);
1204 // for (int i = 0; i < NINDEXMODES && msebest > 0; ++i)
1205 for (int i = 0; i < 1 && msebest > 0; ++i)
1206 {
1207 float mse = refine(t1, shape, r, i, endpts, tempblock);
1208 if (mse < msebest)
1209 {
1210 memcpy(block, tempblock, sizeof(tempblock));
1211 msebest = mse;
1212 }
1213 }
1214 }
1215 return msebest;
1216 }
1217