1 /*
2 Copyright 2007 nVidia, Inc.
3 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
4
5 You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
6
7 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
8 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
10 See the License for the specific language governing permissions and limitations under the License.
11 */
12
13 // Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis
14
15 // x1000 777.1x4 64p 2bi (30b)
16
17 #include "bits.h"
18 #include "tile.h"
19 #include "avpcl.h"
20 #include "nvcore/debug.h"
21 #include "nvmath/vector.inl"
22 #include "nvmath/matrix.inl"
23 #include "nvmath/fitting.h"
24 #include "avpcl_utils.h"
25 #include "endpts.h"
26 #include <string.h>
27 #include <float.h>
28
29 #include "shapes_two.h"
30
31 using namespace nv;
32 using namespace AVPCL;
33
34 #define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values
35
36 #define NINDICES 4
37 #define INDEXBITS 2
38 #define HIGH_INDEXBIT (1<<(INDEXBITS-1))
39 #define DENOM (NINDICES-1)
40 #define BIAS (DENOM/2)
41
42 // WORK: determine optimal traversal pattern to search for best shape -- what does the error curve look like?
43 // i.e. can we search shapes in a particular order so we can see the global error minima easily and
44 // stop without having to touch all shapes?
45
46 #define POS_TO_X(pos) ((pos)&3)
47 #define POS_TO_Y(pos) (((pos)>>2)&3)
48
49 #define NBITSIZES (NREGIONS*2)
50 #define ABITINDEX(region) (2*(region)+0)
51 #define BBITINDEX(region) (2*(region)+1)
52
53 struct ChanBits
54 {
55 int nbitsizes[NBITSIZES]; // bitsizes for one channel
56 };
57
58 struct Pattern
59 {
60 ChanBits chan[NCHANNELS_RGB];// bit patterns used per channel
61 int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed
62 int mode; // associated mode value
63 int modebits; // number of mode bits
64 const char *encoding; // verilog description of encoding for this mode
65 };
66
67 #define NPATTERNS 1
68 #define NREGIONS 2
69
70 static Pattern patterns[NPATTERNS] =
71 {
72 // red green blue xfm mode mb
73 7,7,7,7, 7,7,7,7, 7,7,7,7, 0, 0x8, 4, "",
74 };
75
76 struct RegionPrec
77 {
78 int endpt_a_prec[NCHANNELS_RGB];
79 int endpt_b_prec[NCHANNELS_RGB];
80 };
81
82 struct PatternPrec
83 {
84 RegionPrec region_precs[NREGIONS];
85 };
86
87
88 // this is the precision for each channel and region
89 // NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
90 static PatternPrec pattern_precs[NPATTERNS] =
91 {
92 7,7,7, 7,7,7, 7,7,7, 7,7,7,
93 };
94
95 // return # of bits needed to store n. handle signed or unsigned cases properly
nbits(int n,bool issigned)96 static int nbits(int n, bool issigned)
97 {
98 int nb;
99 if (n==0)
100 return 0; // no bits needed for 0, signed or not
101 else if (n > 0)
102 {
103 for (nb=0; n; ++nb, n>>=1) ;
104 return nb + (issigned?1:0);
105 }
106 else
107 {
108 nvAssert (issigned);
109 for (nb=0; n<-1; ++nb, n>>=1) ;
110 return nb + 1;
111 }
112 }
113
transform_forward(IntEndptsRGB_2 ep[NREGIONS])114 static void transform_forward(IntEndptsRGB_2 ep[NREGIONS])
115 {
116 nvUnreachable();
117 }
118
transform_inverse(IntEndptsRGB_2 ep[NREGIONS])119 static void transform_inverse(IntEndptsRGB_2 ep[NREGIONS])
120 {
121 nvUnreachable();
122 }
123
124 // endpoints are 888,888; reduce to 777,777 and put the lsb bit majority in compr_bits
compress_one(const IntEndptsRGB & endpts,IntEndptsRGB_2 & compr_endpts)125 static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpts)
126 {
127 int onescnt;
128
129 onescnt = 0;
130 for (int j=0; j<NCHANNELS_RGB; ++j)
131 {
132 onescnt += endpts.A[j] & 1;
133 compr_endpts.A[j] = endpts.A[j] >> 1;
134 nvAssert (compr_endpts.A[j] < 128);
135 }
136 compr_endpts.a_lsb = onescnt >= 2;
137
138 onescnt = 0;
139 for (int j=0; j<NCHANNELS_RGB; ++j)
140 {
141 onescnt += endpts.B[j] & 1;
142 compr_endpts.B[j] = endpts.B[j] >> 1;
143 nvAssert (compr_endpts.B[j] < 128);
144 }
145 compr_endpts.b_lsb = onescnt >= 2;
146 }
147
uncompress_one(const IntEndptsRGB_2 & compr_endpts,IntEndptsRGB & endpts)148 static void uncompress_one(const IntEndptsRGB_2& compr_endpts, IntEndptsRGB& endpts)
149 {
150 for (int j=0; j<NCHANNELS_RGB; ++j)
151 {
152 endpts.A[j] = (compr_endpts.A[j] << 1) | compr_endpts.a_lsb;
153 endpts.B[j] = (compr_endpts.B[j] << 1) | compr_endpts.b_lsb;
154 }
155 }
156
uncompress_endpoints(const IntEndptsRGB_2 compr_endpts[NREGIONS],IntEndptsRGB endpts[NREGIONS])157 static void uncompress_endpoints(const IntEndptsRGB_2 compr_endpts[NREGIONS], IntEndptsRGB endpts[NREGIONS])
158 {
159 for (int i=0; i<NREGIONS; ++i)
160 uncompress_one(compr_endpts[i], endpts[i]);
161 }
162
compress_endpoints(const IntEndptsRGB endpts[NREGIONS],IntEndptsRGB_2 compr_endpts[NREGIONS])163 static void compress_endpoints(const IntEndptsRGB endpts[NREGIONS], IntEndptsRGB_2 compr_endpts[NREGIONS])
164 {
165 for (int i=0; i<NREGIONS; ++i)
166 compress_one(endpts[i], compr_endpts[i]);
167 }
168
169
quantize_endpts(const FltEndpts endpts[NREGIONS],const PatternPrec & pattern_prec,IntEndptsRGB_2 q_endpts[NREGIONS])170 static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec &pattern_prec, IntEndptsRGB_2 q_endpts[NREGIONS])
171 {
172 IntEndptsRGB full_endpts[NREGIONS];
173
174 for (int region = 0; region < NREGIONS; ++region)
175 {
176 full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space
177 full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
178 full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
179 full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
180 full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
181 full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
182 compress_one(full_endpts[region], q_endpts[region]);
183 }
184 }
185
186 // swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
swap_indices(IntEndptsRGB_2 endpts[NREGIONS],int indices[Tile::TILE_H][Tile::TILE_W],int shapeindex)187 static void swap_indices(IntEndptsRGB_2 endpts[NREGIONS], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
188 {
189 for (int region = 0; region < NREGIONS; ++region)
190 {
191 int position = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,region);
192
193 int x = POS_TO_X(position);
194 int y = POS_TO_Y(position);
195 nvAssert(REGION(x,y,shapeindex) == region); // double check the table
196 if (indices[y][x] & HIGH_INDEXBIT)
197 {
198 // high bit is set, swap the endpts and indices for this region
199 int t;
200 for (int i=0; i<NCHANNELS_RGB; ++i)
201 {
202 t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t;
203 }
204 t = endpts[region].a_lsb; endpts[region].a_lsb = endpts[region].b_lsb; endpts[region].b_lsb = t;
205
206 for (int y = 0; y < Tile::TILE_H; y++)
207 for (int x = 0; x < Tile::TILE_W; x++)
208 if (REGION(x,y,shapeindex) == region)
209 indices[y][x] = NINDICES - 1 - indices[y][x];
210 }
211 }
212 }
213
endpts_fit(IntEndptsRGB_2 endpts[NREGIONS],const Pattern & p)214 static bool endpts_fit(IntEndptsRGB_2 endpts[NREGIONS], const Pattern &p)
215 {
216 return true;
217 }
218
write_header(const IntEndptsRGB_2 endpts[NREGIONS],int shapeindex,const Pattern & p,Bits & out)219 static void write_header(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, const Pattern &p, Bits &out)
220 {
221 out.write(p.mode, p.modebits);
222 out.write(shapeindex, SHAPEBITS);
223
224 for (int j=0; j<NCHANNELS_RGB; ++j)
225 for (int i=0; i<NREGIONS; ++i)
226 {
227 out.write(endpts[i].A[j], p.chan[j].nbitsizes[ABITINDEX(i)]);
228 out.write(endpts[i].B[j], p.chan[j].nbitsizes[BBITINDEX(i)]);
229 }
230
231 for (int i=0; i<NREGIONS; ++i)
232 {
233 out.write(endpts[i].a_lsb, 1);
234 out.write(endpts[i].b_lsb, 1);
235 }
236
237 nvAssert (out.getptr() == 98);
238 }
239
read_header(Bits & in,IntEndptsRGB_2 endpts[NREGIONS],int & shapeindex,Pattern & p,int & pat_index)240 static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
241 {
242 int mode = AVPCL::getmode(in);
243
244 pat_index = 0;
245 nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
246 nvAssert (in.getptr() == patterns[pat_index].modebits);
247
248 shapeindex = in.read(SHAPEBITS);
249 p = patterns[pat_index];
250
251 for (int j=0; j<NCHANNELS_RGB; ++j)
252 for (int i=0; i<NREGIONS; ++i)
253 {
254 endpts[i].A[j] = in.read(p.chan[j].nbitsizes[ABITINDEX(i)]);
255 endpts[i].B[j] = in.read(p.chan[j].nbitsizes[BBITINDEX(i)]);
256 }
257
258 for (int i=0; i<NREGIONS; ++i)
259 {
260 endpts[i].a_lsb = in.read(1);
261 endpts[i].b_lsb = in.read(1);
262 }
263
264 nvAssert (in.getptr() == 98);
265 }
266
write_indices(const int indices[Tile::TILE_H][Tile::TILE_W],int shapeindex,Bits & out)267 static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
268 {
269 int positions[NREGIONS];
270
271 for (int r = 0; r < NREGIONS; ++r)
272 positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
273
274 for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
275 {
276 int x = POS_TO_X(pos);
277 int y = POS_TO_Y(pos);
278
279 bool match = false;
280
281 for (int r = 0; r < NREGIONS; ++r)
282 if (positions[r] == pos) { match = true; break; }
283
284 out.write(indices[y][x], INDEXBITS - (match ? 1 : 0));
285 }
286 }
287
read_indices(Bits & in,int shapeindex,int indices[Tile::TILE_H][Tile::TILE_W])288 static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
289 {
290 int positions[NREGIONS];
291
292 for (int r = 0; r < NREGIONS; ++r)
293 positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r);
294
295 for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
296 {
297 int x = POS_TO_X(pos);
298 int y = POS_TO_Y(pos);
299
300 bool match = false;
301
302 for (int r = 0; r < NREGIONS; ++r)
303 if (positions[r] == pos) { match = true; break; }
304
305 indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0));
306 }
307 }
308
emit_block(const IntEndptsRGB_2 endpts[NREGIONS],int shapeindex,const Pattern & p,const int indices[Tile::TILE_H][Tile::TILE_W],char * block)309 static void emit_block(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
310 {
311 Bits out(block, AVPCL::BITSIZE);
312
313 write_header(endpts, shapeindex, p, out);
314
315 write_indices(indices, shapeindex, out);
316
317 nvAssert(out.getptr() == AVPCL::BITSIZE);
318 }
319
generate_palette_quantized(const IntEndptsRGB_2 & endpts_2,const RegionPrec & region_prec,Vector4 palette[NINDICES])320 static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const RegionPrec ®ion_prec, Vector4 palette[NINDICES])
321 {
322 IntEndptsRGB endpts;
323
324 uncompress_one(endpts_2, endpts);
325
326 // scale endpoints
327 int a, b; // really need a IntVec4...
328
329 a = Utils::unquantize(endpts.A[0], region_prec.endpt_a_prec[0]+1); // +1 since we are in uncompressed space
330 b = Utils::unquantize(endpts.B[0], region_prec.endpt_b_prec[0]+1);
331
332 // interpolate
333 for (int i = 0; i < NINDICES; ++i)
334 palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
335
336 a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1);
337 b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
338
339 // interpolate
340 for (int i = 0; i < NINDICES; ++i)
341 palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
342
343 a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1);
344 b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
345
346 // interpolate
347 for (int i = 0; i < NINDICES; ++i)
348 palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
349
350 // constant alpha
351 for (int i = 0; i < NINDICES; ++i)
352 palette[i].w = 255.0f;
353 }
354
sign_extend(Pattern & p,IntEndptsRGB_2 endpts[NREGIONS])355 static void sign_extend(Pattern &p, IntEndptsRGB_2 endpts[NREGIONS])
356 {
357 nvUnreachable();
358 }
359
decompress_mode3(const char * block,Tile & t)360 void AVPCL::decompress_mode3(const char *block, Tile &t)
361 {
362 Bits in(block, AVPCL::BITSIZE);
363
364 Pattern p;
365 IntEndptsRGB_2 endpts[NREGIONS];
366 int shapeindex, pat_index;
367
368 read_header(in, endpts, shapeindex, p, pat_index);
369
370 if (p.transformed)
371 {
372 sign_extend(p, endpts);
373 transform_inverse(endpts);
374 }
375
376 Vector4 palette[NREGIONS][NINDICES];
377 for (int r = 0; r < NREGIONS; ++r)
378 generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
379
380 int indices[Tile::TILE_H][Tile::TILE_W];
381
382 read_indices(in, shapeindex, indices);
383
384 nvAssert(in.getptr() == AVPCL::BITSIZE);
385
386 // lookup
387 for (int y = 0; y < Tile::TILE_H; y++)
388 for (int x = 0; x < Tile::TILE_W; x++)
389 t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
390 }
391
392 // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
map_colors(const Vector4 colors[],const float importance[],int np,const IntEndptsRGB_2 & endpts,const RegionPrec & region_prec,float current_err,int indices[Tile::TILE_TOTAL])393 static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec ®ion_prec, float current_err, int indices[Tile::TILE_TOTAL])
394 {
395 Vector4 palette[NINDICES];
396 float toterr = 0;
397 Vector4 err;
398
399 generate_palette_quantized(endpts, region_prec, palette);
400
401 for (int i = 0; i < np; ++i)
402 {
403 float besterr = FLT_MAX;
404
405 for (int j = 0; j < NINDICES && besterr > 0; ++j)
406 {
407 float err = Utils::metric4(colors[i], palette[j]) * importance[i];
408
409 if (err > besterr) // error increased, so we're done searching
410 break;
411 if (err < besterr)
412 {
413 besterr = err;
414 indices[i] = j;
415 }
416 }
417 toterr += besterr;
418
419 // check for early exit
420 if (toterr > current_err)
421 {
422 // fill out bogus index values so it's initialized at least
423 for (int k = i; k < np; ++k)
424 indices[k] = -1;
425
426 return FLT_MAX;
427 }
428 }
429 return toterr;
430 }
431
assign_indices(const Tile & tile,int shapeindex,IntEndptsRGB_2 endpts[NREGIONS],const PatternPrec & pattern_prec,int indices[Tile::TILE_H][Tile::TILE_W],float toterr[NREGIONS])432 static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endpts[NREGIONS], const PatternPrec &pattern_prec,
433 int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
434 {
435 // build list of possibles
436 Vector4 palette[NREGIONS][NINDICES];
437
438 for (int region = 0; region < NREGIONS; ++region)
439 {
440 generate_palette_quantized(endpts[region], pattern_prec.region_precs[region], &palette[region][0]);
441 toterr[region] = 0;
442 }
443
444 Vector4 err;
445
446 for (int y = 0; y < tile.size_y; y++)
447 for (int x = 0; x < tile.size_x; x++)
448 {
449 int region = REGION(x,y,shapeindex);
450 float err, besterr = FLT_MAX;
451
452 for (int i = 0; i < NINDICES && besterr > 0; ++i)
453 {
454 err = Utils::metric4(tile.data[y][x], palette[region][i]);
455
456 if (err > besterr) // error increased, so we're done searching
457 break;
458 if (err < besterr)
459 {
460 besterr = err;
461 indices[y][x] = i;
462 }
463 }
464 toterr[region] += besterr;
465 }
466 }
467
468 // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
469 // this function returns either old_err or a value smaller (if it was successful in improving the error)
perturb_one(const Vector4 colors[],const float importance[],int np,int ch,const RegionPrec & region_prec,const IntEndptsRGB_2 & old_endpts,IntEndptsRGB_2 & new_endpts,float old_err,int do_b,int indices[Tile::TILE_TOTAL])470 static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts,
471 float old_err, int do_b, int indices[Tile::TILE_TOTAL])
472 {
473 // we have the old endpoints: old_endpts
474 // we have the perturbed endpoints: new_endpts
475 // we have the temporary endpoints: temp_endpts
476
477 IntEndptsRGB_2 temp_endpts;
478 float min_err = old_err; // start with the best current error
479 int beststep;
480 int temp_indices[Tile::TILE_TOTAL];
481
482 for (int i=0; i<np; ++i)
483 indices[i] = -1;
484
485 // copy real endpoints so we can perturb them
486 temp_endpts = new_endpts = old_endpts;
487
488 int prec = do_b ? region_prec.endpt_b_prec[ch] : region_prec.endpt_a_prec[ch];
489
490 // do a logarithmic search for the best error for this endpoint (which)
491 for (int step = 1 << (prec-1); step; step >>= 1)
492 {
493 bool improved = false;
494 for (int sign = -1; sign <= 1; sign += 2)
495 {
496 if (do_b == 0)
497 {
498 temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
499 if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
500 continue;
501 }
502 else
503 {
504 temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
505 if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
506 continue;
507 }
508
509 float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
510
511 if (err < min_err)
512 {
513 improved = true;
514 min_err = err;
515 beststep = sign * step;
516 for (int i=0; i<np; ++i)
517 indices[i] = temp_indices[i];
518 }
519 }
520 // if this was an improvement, move the endpoint and continue search from there
521 if (improved)
522 {
523 if (do_b == 0)
524 new_endpts.A[ch] += beststep;
525 else
526 new_endpts.B[ch] += beststep;
527 }
528 }
529 return min_err;
530 }
531
532 // the larger the error the more time it is worth spending on an exhaustive search.
533 // perturb the endpoints at least -3 to 3.
534 // if err > 5000 perturb endpoints 50% of precision
535 // if err > 1000 25%
536 // if err > 200 12.5%
537 // if err > 40 6.25%
538 // for np = 16 -- adjust error thresholds as a function of np
539 // always ensure endpoint ordering is preserved (no need to overlap the scan)
540 // if orig_err returned from this is less than its input value, then indices[] will contain valid indices
exhaustive(const Vector4 colors[],const float importance[],int np,int ch,const RegionPrec & region_prec,float & orig_err,IntEndptsRGB_2 & opt_endpts,int indices[Tile::TILE_TOTAL])541 static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
542 {
543 IntEndptsRGB_2 temp_endpts;
544 float best_err = orig_err;
545 int aprec = region_prec.endpt_a_prec[ch];
546 int bprec = region_prec.endpt_b_prec[ch];
547 int good_indices[Tile::TILE_TOTAL];
548 int temp_indices[Tile::TILE_TOTAL];
549
550 for (int i=0; i<np; ++i)
551 indices[i] = -1;
552
553 float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
554
555 if (orig_err == 0) return orig_err;
556
557 int adelta = 0, bdelta = 0;
558 if (orig_err > 5000.0*thr_scale) { adelta = (1 << aprec)/2; bdelta = (1 << bprec)/2; }
559 else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
560 else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
561 else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
562 adelta = max(adelta, 3);
563 bdelta = max(bdelta, 3);
564
565 #ifdef DISABLE_EXHAUSTIVE
566 adelta = bdelta = 3;
567 #endif
568
569 temp_endpts = opt_endpts;
570
571 // ok figure out the range of A and B
572 int alow = max(0, opt_endpts.A[ch] - adelta);
573 int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
574 int blow = max(0, opt_endpts.B[ch] - bdelta);
575 int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
576
577 // now there's no need to swap the ordering of A and B
578 bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
579
580 int amin, bmin;
581
582 if (opt_endpts.A[ch] <= opt_endpts.B[ch])
583 {
584 // keep a <= b
585 for (int a = alow; a <= ahigh; ++a)
586 for (int b = max(a, blow); b < bhigh; ++b)
587 {
588 temp_endpts.A[ch] = a;
589 temp_endpts.B[ch] = b;
590
591 float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
592 if (err < best_err)
593 {
594 amin = a;
595 bmin = b;
596 best_err = err;
597 for (int i=0; i<np; ++i)
598 good_indices[i] = temp_indices[i];
599 }
600 }
601 }
602 else
603 {
604 // keep b <= a
605 for (int b = blow; b < bhigh; ++b)
606 for (int a = max(b, alow); a <= ahigh; ++a)
607 {
608 temp_endpts.A[ch] = a;
609 temp_endpts.B[ch] = b;
610
611 float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
612 if (err < best_err)
613 {
614 amin = a;
615 bmin = b;
616 best_err = err;
617 for (int i=0; i<np; ++i)
618 good_indices[i] = temp_indices[i];
619 }
620 }
621 }
622 if (best_err < orig_err)
623 {
624 opt_endpts.A[ch] = amin;
625 opt_endpts.B[ch] = bmin;
626 orig_err = best_err;
627 // if we actually improved, update the indices
628 for (int i=0; i<np; ++i)
629 indices[i] = good_indices[i];
630 }
631 return best_err;
632 }
633
optimize_one(const Vector4 colors[],const float importance[],int np,float orig_err,const IntEndptsRGB_2 & orig_endpts,const RegionPrec & region_prec,IntEndptsRGB_2 & opt_endpts)634 static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec ®ion_prec, IntEndptsRGB_2 &opt_endpts)
635 {
636 float opt_err = orig_err;
637
638 opt_endpts = orig_endpts;
639
640 /*
641 err0 = perturb(rgb0, delta0)
642 err1 = perturb(rgb1, delta1)
643 if (err0 < err1)
644 if (err0 >= initial_error) break
645 rgb0 += delta0
646 next = 1
647 else
648 if (err1 >= initial_error) break
649 rgb1 += delta1
650 next = 0
651 initial_err = map()
652 for (;;)
653 err = perturb(next ? rgb1:rgb0, delta)
654 if (err >= initial_err) break
655 next? rgb1 : rgb0 += delta
656 initial_err = err
657 */
658 IntEndptsRGB_2 new_a, new_b;
659 IntEndptsRGB_2 new_endpt;
660 int do_b;
661 int orig_indices[Tile::TILE_TOTAL];
662 int new_indices[Tile::TILE_TOTAL];
663 int temp_indices0[Tile::TILE_TOTAL];
664 int temp_indices1[Tile::TILE_TOTAL];
665
666 // now optimize each channel separately
667 // for the first error improvement, we save the indices. then, for any later improvement, we compare the indices
668 // if they differ, we restart the loop (which then falls back to looking for a first improvement.)
669 for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
670 {
671 // figure out which endpoint when perturbed gives the most improvement and start there
672 // if we just alternate, we can easily end up in a local minima
673 float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
674 float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
675
676 if (err0 < err1)
677 {
678 if (err0 >= opt_err)
679 continue;
680
681 for (int i=0; i<np; ++i)
682 {
683 new_indices[i] = orig_indices[i] = temp_indices0[i];
684 nvAssert (orig_indices[i] != -1);
685 }
686
687 opt_endpts.A[ch] = new_a.A[ch];
688 opt_err = err0;
689 do_b = 1; // do B next
690 }
691 else
692 {
693 if (err1 >= opt_err)
694 continue;
695
696 for (int i=0; i<np; ++i)
697 {
698 new_indices[i] = orig_indices[i] = temp_indices1[i];
699 nvAssert (orig_indices[i] != -1);
700 }
701
702 opt_endpts.B[ch] = new_b.B[ch];
703 opt_err = err1;
704 do_b = 0; // do A next
705 }
706
707 // now alternate endpoints and keep trying until there is no improvement
708 for (;;)
709 {
710 float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
711 if (err >= opt_err)
712 break;
713
714 for (int i=0; i<np; ++i)
715 {
716 new_indices[i] = temp_indices0[i];
717 nvAssert (new_indices[i] != -1);
718 }
719
720 if (do_b == 0)
721 opt_endpts.A[ch] = new_endpt.A[ch];
722 else
723 opt_endpts.B[ch] = new_endpt.B[ch];
724 opt_err = err;
725 do_b = 1 - do_b; // now move the other endpoint
726 }
727
728 // see if the indices have changed
729 int i;
730 for (i=0; i<np; ++i)
731 if (orig_indices[i] != new_indices[i])
732 break;
733
734 if (i<np)
735 ch = -1; // start over
736 }
737
738 // finally, do a small exhaustive search around what we think is the global minima to be sure
739 // note this is independent of the above search, so we don't care about the indices from the above
740 // we don't care about the above because if they differ, so what? we've already started at ch=0
741 bool first = true;
742 for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
743 {
744 float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
745
746 if (new_err < opt_err)
747 {
748 opt_err = new_err;
749
750 if (first)
751 {
752 for (int i=0; i<np; ++i)
753 {
754 orig_indices[i] = temp_indices0[i];
755 nvAssert (orig_indices[i] != -1);
756 }
757 first = false;
758 }
759 else
760 {
761 // see if the indices have changed
762 int i;
763 for (i=0; i<np; ++i)
764 if (orig_indices[i] != temp_indices0[i])
765 break;
766
767 if (i<np)
768 {
769 ch = -1; // start over
770 first = true;
771 }
772 }
773 }
774 }
775
776 return opt_err;
777 }
778
779 // this will return a valid set of endpoints in opt_endpts regardless of whether it improve orig_endpts or not
optimize_endpts(const Tile & tile,int shapeindex,const float orig_err[NREGIONS],const IntEndptsRGB_2 orig_endpts[NREGIONS],const PatternPrec & pattern_prec,float opt_err[NREGIONS],IntEndptsRGB_2 opt_endpts[NREGIONS])780 static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS],
781 const IntEndptsRGB_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_2 opt_endpts[NREGIONS])
782 {
783 Vector4 pixels[Tile::TILE_TOTAL];
784 float importance[Tile::TILE_TOTAL];
785 IntEndptsRGB_2 temp_in, temp_out;
786 int temp_indices[Tile::TILE_TOTAL];
787
788 for (int region=0; region<NREGIONS; ++region)
789 {
790 // collect the pixels in the region
791 int np = 0;
792
793 for (int y = 0; y < tile.size_y; y++) {
794 for (int x = 0; x < tile.size_x; x++) {
795 if (REGION(x, y, shapeindex) == region) {
796 pixels[np] = tile.data[y][x];
797 importance[np] = tile.importance_map[y][x];
798 np++;
799 }
800 }
801 }
802
803 opt_endpts[region] = temp_in = orig_endpts[region];
804 opt_err[region] = orig_err[region];
805
806 float best_err = orig_err[region];
807
808 for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
809 {
810 temp_in.a_lsb = lsbmode & 1;
811 temp_in.b_lsb = (lsbmode >> 1) & 1;
812
813 // make sure we have a valid error for temp_in
814 // we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
815 // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
816 float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
817
818 // now try to optimize these endpoints
819 float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
820
821 // if we find an improvement, update the best so far and correct the output endpoints and errors
822 if (temp_out_err < best_err)
823 {
824 best_err = temp_out_err;
825 opt_err[region] = temp_out_err;
826 opt_endpts[region] = temp_out;
827 }
828 }
829 }
830 }
831
832 /* optimization algorithm
833 for each pattern
834 convert endpoints using pattern precision
835 assign indices and get initial error
836 compress indices (and possibly reorder endpoints)
837 transform endpoints
838 if transformed endpoints fit pattern
839 get original endpoints back
840 optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
841 compress new indices
842 transform new endpoints
843 if new endpoints fit pattern AND if error is improved
844 emit compressed block with new data
845 else
846 emit compressed block with original data // to try to preserve maximum endpoint precision
847 */
848
refine(const Tile & tile,int shapeindex_best,const FltEndpts endpts[NREGIONS],char * block)849 static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
850 {
851 float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
852 IntEndptsRGB_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
853 int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
854
855 for (int sp = 0; sp < NPATTERNS; ++sp)
856 {
857 quantize_endpts(endpts, pattern_precs[sp], orig_endpts);
858 assign_indices(tile, shapeindex_best, orig_endpts, pattern_precs[sp], orig_indices, orig_err);
859 swap_indices(orig_endpts, orig_indices, shapeindex_best);
860 if (patterns[sp].transformed)
861 transform_forward(orig_endpts);
862 // apply a heuristic here -- we check if the endpoints fit before we try to optimize them.
863 // the assumption made is that if they don't fit now, they won't fit after optimizing.
864 if (endpts_fit(orig_endpts, patterns[sp]))
865 {
866 if (patterns[sp].transformed)
867 transform_inverse(orig_endpts);
868 optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
869 assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
870 // (nreed) Commented out asserts because they go off all the time...not sure why
871 //for (int i=0; i<NREGIONS; ++i)
872 // nvAssert(expected_opt_err[i] == opt_err[i]);
873 swap_indices(opt_endpts, opt_indices, shapeindex_best);
874 if (patterns[sp].transformed)
875 transform_forward(opt_endpts);
876 orig_toterr = opt_toterr = 0;
877 for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
878 if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
879 {
880 emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
881 return opt_toterr;
882 }
883 else
884 {
885 // either it stopped fitting when we optimized it, or there was no improvement
886 // so go back to the unoptimized endpoints which we know will fit
887 if (patterns[sp].transformed)
888 transform_forward(orig_endpts);
889 emit_block(orig_endpts, shapeindex_best, patterns[sp], orig_indices, block);
890 return orig_toterr;
891 }
892 }
893 }
894 nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 3).";
895 return FLT_MAX;
896 }
897
clamp(Vector4 & v)898 static void clamp(Vector4 &v)
899 {
900 if (v.x < 0.0f) v.x = 0.0f;
901 if (v.x > 255.0f) v.x = 255.0f;
902 if (v.y < 0.0f) v.y = 0.0f;
903 if (v.y > 255.0f) v.y = 255.0f;
904 if (v.z < 0.0f) v.z = 0.0f;
905 if (v.z > 255.0f) v.z = 255.0f;
906 v.w = 255.0f;
907 }
908
generate_palette_unquantized(const FltEndpts endpts[NREGIONS],Vector4 palette[NREGIONS][NINDICES])909 static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
910 {
911 for (int region = 0; region < NREGIONS; ++region)
912 for (int i = 0; i < NINDICES; ++i)
913 palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
914 }
915
916 // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
map_colors(const Tile & tile,int shapeindex,const FltEndpts endpts[NREGIONS])917 static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
918 {
919 // build list of possibles
920 Vector4 palette[NREGIONS][NINDICES];
921
922 generate_palette_unquantized(endpts, palette);
923
924 float toterr = 0;
925 Vector4 err;
926
927 for (int y = 0; y < tile.size_y; y++)
928 for (int x = 0; x < tile.size_x; x++)
929 {
930 int region = REGION(x,y,shapeindex);
931 float err, besterr = FLT_MAX;
932
933 for (int i = 0; i < NINDICES && besterr > 0; ++i)
934 {
935 err = Utils::metric4(tile.data[y][x], palette[region][i]);
936
937 if (err > besterr) // error increased, so we're done searching. this works for most norms.
938 break;
939 if (err < besterr)
940 besterr = err;
941 }
942 toterr += besterr;
943 }
944 return toterr;
945 }
946
rough(const Tile & tile,int shapeindex,FltEndpts endpts[NREGIONS])947 static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
948 {
949 for (int region=0; region<NREGIONS; ++region)
950 {
951 int np = 0;
952 Vector3 colors[Tile::TILE_TOTAL];
953 float alphas[2];
954 Vector4 mean(0,0,0,0);
955
956 for (int y = 0; y < tile.size_y; y++)
957 for (int x = 0; x < tile.size_x; x++)
958 if (REGION(x,y,shapeindex) == region)
959 {
960 colors[np] = tile.data[y][x].xyz();
961 if (np < 2) alphas[np] = tile.data[y][x].w;
962 mean += tile.data[y][x];
963 ++np;
964 }
965
966 // handle simple cases
967 if (np == 0)
968 {
969 Vector4 zero(0,0,0,255.0f);
970 endpts[region].A = zero;
971 endpts[region].B = zero;
972 continue;
973 }
974 else if (np == 1)
975 {
976 endpts[region].A = Vector4(colors[0], alphas[0]);
977 endpts[region].B = Vector4(colors[0], alphas[0]);
978 continue;
979 }
980 else if (np == 2)
981 {
982 endpts[region].A = Vector4(colors[0], alphas[0]);
983 endpts[region].B = Vector4(colors[1], alphas[1]);
984 continue;
985 }
986
987 mean /= float(np);
988
989 Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
990
991 // project each pixel value along the principal direction
992 float minp = FLT_MAX, maxp = -FLT_MAX;
993 for (int i = 0; i < np; i++)
994 {
995 float dp = dot(colors[i]-mean.xyz(), direction);
996 if (dp < minp) minp = dp;
997 if (dp > maxp) maxp = dp;
998 }
999
1000 // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
1001 endpts[region].A = mean + minp*Vector4(direction, 0);
1002 endpts[region].B = mean + maxp*Vector4(direction, 0);
1003
1004 // clamp endpoints
1005 // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
1006 // shape based on endpoints being clamped
1007 clamp(endpts[region].A);
1008 clamp(endpts[region].B);
1009 }
1010
1011 return map_colors(tile, shapeindex, endpts);
1012 }
1013
swap(float * list1,int * list2,int i,int j)1014 static void swap(float *list1, int *list2, int i, int j)
1015 {
1016 float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
1017 int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
1018 }
1019
compress_mode3(const Tile & t,char * block)1020 float AVPCL::compress_mode3(const Tile &t, char *block)
1021 {
1022 // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
1023 // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
1024 const int NITEMS=NSHAPES/4;
1025
1026 // pick the best NITEMS shapes and refine these.
1027 struct {
1028 FltEndpts endpts[NREGIONS];
1029 } all[NSHAPES];
1030 float roughmse[NSHAPES];
1031 int index[NSHAPES];
1032 char tempblock[AVPCL::BLOCKSIZE];
1033 float msebest = FLT_MAX;
1034
1035 for (int i=0; i<NSHAPES; ++i)
1036 {
1037 roughmse[i] = rough(t, i, &all[i].endpts[0]);
1038 index[i] = i;
1039 }
1040
1041 // bubble sort -- only need to bubble up the first NITEMS items
1042 for (int i=0; i<NITEMS; ++i)
1043 for (int j=i+1; j<NSHAPES; ++j)
1044 if (roughmse[i] > roughmse[j])
1045 swap(roughmse, index, i, j);
1046
1047 for (int i=0; i<NITEMS && msebest>0; ++i)
1048 {
1049 int shape = index[i];
1050 float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
1051 if (mse < msebest)
1052 {
1053 memcpy(block, tempblock, sizeof(tempblock));
1054 msebest = mse;
1055 }
1056 }
1057 return msebest;
1058 }
1059
1060