1 /*
2 Copyright 2007 nVidia, Inc.
3 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
4
5 You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
6
7 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
8 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
10 See the License for the specific language governing permissions and limitations under the License.
11 */
12
13 // one region zoh compress/decompress code
14 // Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis
15
16 #include "bits.h"
17 #include "tile.h"
18 #include "zoh.h"
19 #include "zoh_utils.h"
20
21 #include "nvmath/vector.inl"
22 #include "nvmath/fitting.h"
23
24 #include <string.h> // strlen
25 #include <float.h> // FLT_MAX
26
27 using namespace nv;
28 using namespace ZOH;
29
30 #define NINDICES 16
31 #define INDEXBITS 4
32 #define HIGH_INDEXBIT (1<<(INDEXBITS-1))
33 #define DENOM (NINDICES-1)
34
35 #define NSHAPES 1
36
37 static const int shapes[NSHAPES] =
38 {
39 0x0000
40 }; // only 1 shape
41
42 #define REGION(x,y,shapeindex) ((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0)
43
44 #define POS_TO_X(pos) ((pos)&3)
45 #define POS_TO_Y(pos) (((pos)>>2)&3)
46
47 #define NDELTA 2
48
49 struct Chanpat
50 {
51 int prec[NDELTA]; // precision pattern for one channel
52 };
53
54 struct Pattern
55 {
56 Chanpat chan[NCHANNELS];// allow different bit patterns per channel -- but we still want constant precision per channel
57 int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed
58 int mode; // associated mode value
59 int modebits; // number of mode bits
60 const char *encoding; // verilog description of encoding for this mode
61 };
62
63 #define MAXMODEBITS 5
64 #define MAXMODES (1<<MAXMODEBITS)
65
66 #define NPATTERNS 4
67
68 static const Pattern patterns[NPATTERNS] =
69 {
70 16,4, 16,4, 16,4, 1, 0x0f, 5, "bw[10],bw[11],bw[12],bw[13],bw[14],bw[15],bx[3:0],gw[10],gw[11],gw[12],gw[13],gw[14],gw[15],gx[3:0],rw[10],rw[11],rw[12],rw[13],rw[14],rw[15],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
71 12,8, 12,8, 12,8, 1, 0x0b, 5, "bw[10],bw[11],bx[7:0],gw[10],gw[11],gx[7:0],rw[10],rw[11],rx[7:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
72 11,9, 11,9, 11,9, 1, 0x07, 5, "bw[10],bx[8:0],gw[10],gx[8:0],rw[10],rx[8:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
73 10,10, 10,10, 10,10, 0, 0x03, 5, "bx[9:0],gx[9:0],rx[9:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]",
74 };
75
76 // mapping of mode to the corresponding index in pattern
77 static const int mode_to_pat[MAXMODES] = {
78 -1,-1,-1,
79 3, // 0x03
80 -1,-1,-1,
81 2, // 0x07
82 -1,-1,-1,
83 1, // 0x0b
84 -1,-1,-1,
85 0, // 0x0f
86 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
87 };
88
89 #define R_0(ep) (ep)[0].A[i]
90 #define R_1(ep) (ep)[0].B[i]
91 #define MASK(n) ((1<<(n))-1)
92
93 // compress endpoints
compress_endpts(const IntEndpts in[NREGIONS_ONE],ComprEndpts out[NREGIONS_ONE],const Pattern & p)94 static void compress_endpts(const IntEndpts in[NREGIONS_ONE], ComprEndpts out[NREGIONS_ONE], const Pattern &p)
95 {
96 if (p.transformed)
97 {
98 for (int i=0; i<NCHANNELS; ++i)
99 {
100 R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
101 R_1(out) = (R_1(in) - R_0(in)) & MASK(p.chan[i].prec[1]);
102 }
103 }
104 else
105 {
106 for (int i=0; i<NCHANNELS; ++i)
107 {
108 R_0(out) = R_0(in) & MASK(p.chan[i].prec[0]);
109 R_1(out) = R_1(in) & MASK(p.chan[i].prec[1]);
110 }
111 }
112 }
113
114 // decompress endpoints
decompress_endpts(const ComprEndpts in[NREGIONS_ONE],IntEndpts out[NREGIONS_ONE],const Pattern & p)115 static void decompress_endpts(const ComprEndpts in[NREGIONS_ONE], IntEndpts out[NREGIONS_ONE], const Pattern &p)
116 {
117 bool issigned = Utils::FORMAT == SIGNED_F16;
118
119 if (p.transformed)
120 {
121 for (int i=0; i<NCHANNELS; ++i)
122 {
123 R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
124 int t;
125 t = SIGN_EXTEND(R_1(in), p.chan[i].prec[1]);
126 t = (t + R_0(in)) & MASK(p.chan[i].prec[0]);
127 R_1(out) = issigned ? SIGN_EXTEND(t,p.chan[i].prec[0]) : t;
128 }
129 }
130 else
131 {
132 for (int i=0; i<NCHANNELS; ++i)
133 {
134 R_0(out) = issigned ? SIGN_EXTEND(R_0(in),p.chan[i].prec[0]) : R_0(in);
135 R_1(out) = issigned ? SIGN_EXTEND(R_1(in),p.chan[i].prec[1]) : R_1(in);
136 }
137 }
138 }
139
quantize_endpts(const FltEndpts endpts[NREGIONS_ONE],int prec,IntEndpts q_endpts[NREGIONS_ONE])140 static void quantize_endpts(const FltEndpts endpts[NREGIONS_ONE], int prec, IntEndpts q_endpts[NREGIONS_ONE])
141 {
142 for (int region = 0; region < NREGIONS_ONE; ++region)
143 {
144 q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, prec);
145 q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, prec);
146 q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, prec);
147 q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, prec);
148 q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, prec);
149 q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, prec);
150 }
151 }
152
153 // swap endpoints as needed to ensure that the indices at index_one and index_one have a 0 high-order bit
154 // index_one is 0 at x=0 y=0 and 15 at x=3 y=3 so y = (index >> 2) & 3 and x = index & 3
swap_indices(IntEndpts endpts[NREGIONS_ONE],int indices[Tile::TILE_H][Tile::TILE_W],int shapeindex)155 static void swap_indices(IntEndpts endpts[NREGIONS_ONE], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex)
156 {
157 int index_positions[NREGIONS_ONE];
158
159 index_positions[0] = 0; // since WLOG we have the high bit of the shapes at 0
160
161 for (int region = 0; region < NREGIONS_ONE; ++region)
162 {
163 int x = index_positions[region] & 3;
164 int y = (index_positions[region] >> 2) & 3;
165 nvDebugCheck(REGION(x,y,shapeindex) == region); // double check the table
166 if (indices[y][x] & HIGH_INDEXBIT)
167 {
168 // high bit is set, swap the endpts and indices for this region
169 int t;
170 for (int i=0; i<NCHANNELS; ++i) { t = endpts[region].A[i]; endpts[region].A[i] = endpts[region].B[i]; endpts[region].B[i] = t; }
171
172 for (int y = 0; y < Tile::TILE_H; y++)
173 for (int x = 0; x < Tile::TILE_W; x++)
174 if (REGION(x,y,shapeindex) == region)
175 indices[y][x] = NINDICES - 1 - indices[y][x];
176 }
177 }
178 }
179
180 // endpoints fit only if the compression was lossless
endpts_fit(const IntEndpts orig[NREGIONS_ONE],const ComprEndpts compressed[NREGIONS_ONE],const Pattern & p)181 static bool endpts_fit(const IntEndpts orig[NREGIONS_ONE], const ComprEndpts compressed[NREGIONS_ONE], const Pattern &p)
182 {
183 IntEndpts uncompressed[NREGIONS_ONE];
184
185 decompress_endpts(compressed, uncompressed, p);
186
187 for (int j=0; j<NREGIONS_ONE; ++j)
188 for (int i=0; i<NCHANNELS; ++i)
189 {
190 if (orig[j].A[i] != uncompressed[j].A[i]) return false;
191 if (orig[j].B[i] != uncompressed[j].B[i]) return false;
192 }
193 return true;
194 }
195
write_header(const ComprEndpts endpts[NREGIONS_ONE],const Pattern & p,Bits & out)196 static void write_header(const ComprEndpts endpts[NREGIONS_ONE], const Pattern &p, Bits &out)
197 {
198 // interpret the verilog backwards and process it
199 int m = p.mode;
200 int rw = endpts[0].A[0], rx = endpts[0].B[0];
201 int gw = endpts[0].A[1], gx = endpts[0].B[1];
202 int bw = endpts[0].A[2], bx = endpts[0].B[2];
203 int ptr = int(strlen(p.encoding));
204 while (ptr)
205 {
206 Field field;
207 int endbit, len;
208
209 // !!!UNDONE: get rid of string parsing!!!
210 Utils::parse(p.encoding, ptr, field, endbit, len);
211 switch(field)
212 {
213 case FIELD_M: out.write( m >> endbit, len); break;
214 case FIELD_RW: out.write(rw >> endbit, len); break;
215 case FIELD_RX: out.write(rx >> endbit, len); break;
216 case FIELD_GW: out.write(gw >> endbit, len); break;
217 case FIELD_GX: out.write(gx >> endbit, len); break;
218 case FIELD_BW: out.write(bw >> endbit, len); break;
219 case FIELD_BX: out.write(bx >> endbit, len); break;
220
221 case FIELD_D:
222 case FIELD_RY:
223 case FIELD_RZ:
224 case FIELD_GY:
225 case FIELD_GZ:
226 case FIELD_BY:
227 case FIELD_BZ:
228 default: nvUnreachable();
229 }
230 }
231 }
232
read_header(Bits & in,ComprEndpts endpts[NREGIONS_ONE],Pattern & p)233 static void read_header(Bits &in, ComprEndpts endpts[NREGIONS_ONE], Pattern &p)
234 {
235 // reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode
236 int mode = in.read(2);
237 if (mode != 0x00 && mode != 0x01)
238 mode = (in.read(3) << 2) | mode;
239
240 int pat_index = mode_to_pat[mode];
241
242 nvDebugCheck (pat_index >= 0 && pat_index < NPATTERNS);
243 nvDebugCheck (in.getptr() == patterns[pat_index].modebits);
244
245 p = patterns[pat_index];
246
247 int d;
248 int rw, rx;
249 int gw, gx;
250 int bw, bx;
251
252 d = 0;
253 rw = rx = 0;
254 gw = gx = 0;
255 bw = bx = 0;
256
257 int ptr = int(strlen(p.encoding));
258
259 while (ptr)
260 {
261 Field field;
262 int endbit, len;
263
264 // !!!UNDONE: get rid of string parsing!!!
265 Utils::parse(p.encoding, ptr, field, endbit, len);
266
267 switch(field)
268 {
269 case FIELD_M: break; // already processed so ignore
270 case FIELD_RW: rw |= in.read(len) << endbit; break;
271 case FIELD_RX: rx |= in.read(len) << endbit; break;
272 case FIELD_GW: gw |= in.read(len) << endbit; break;
273 case FIELD_GX: gx |= in.read(len) << endbit; break;
274 case FIELD_BW: bw |= in.read(len) << endbit; break;
275 case FIELD_BX: bx |= in.read(len) << endbit; break;
276
277 case FIELD_D:
278 case FIELD_RY:
279 case FIELD_RZ:
280 case FIELD_GY:
281 case FIELD_GZ:
282 case FIELD_BY:
283 case FIELD_BZ:
284 default: nvUnreachable();
285 }
286 }
287
288 nvDebugCheck (in.getptr() == 128 - 63);
289
290 endpts[0].A[0] = rw; endpts[0].B[0] = rx;
291 endpts[0].A[1] = gw; endpts[0].B[1] = gx;
292 endpts[0].A[2] = bw; endpts[0].B[2] = bx;
293 }
294
295 // compress index 0
write_indices(const int indices[Tile::TILE_H][Tile::TILE_W],int shapeindex,Bits & out)296 static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
297 {
298 for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
299 {
300 int x = POS_TO_X(pos);
301 int y = POS_TO_Y(pos);
302
303 out.write(indices[y][x], INDEXBITS - ((pos == 0) ? 1 : 0));
304 }
305 }
306
emit_block(const ComprEndpts endpts[NREGIONS_ONE],int shapeindex,const Pattern & p,const int indices[Tile::TILE_H][Tile::TILE_W],char * block)307 static void emit_block(const ComprEndpts endpts[NREGIONS_ONE], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block)
308 {
309 Bits out(block, ZOH::BITSIZE);
310
311 write_header(endpts, p, out);
312
313 write_indices(indices, shapeindex, out);
314
315 nvDebugCheck(out.getptr() == ZOH::BITSIZE);
316 }
317
generate_palette_quantized(const IntEndpts & endpts,int prec,Vector3 palette[NINDICES])318 static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector3 palette[NINDICES])
319 {
320 // scale endpoints
321 int a, b; // really need a IntVector3...
322
323 a = Utils::unquantize(endpts.A[0], prec);
324 b = Utils::unquantize(endpts.B[0], prec);
325
326 // interpolate
327 for (int i = 0; i < NINDICES; ++i)
328 palette[i].x = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
329
330 a = Utils::unquantize(endpts.A[1], prec);
331 b = Utils::unquantize(endpts.B[1], prec);
332
333 // interpolate
334 for (int i = 0; i < NINDICES; ++i)
335 palette[i].y = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
336
337 a = Utils::unquantize(endpts.A[2], prec);
338 b = Utils::unquantize(endpts.B[2], prec);
339
340 // interpolate
341 for (int i = 0; i < NINDICES; ++i)
342 palette[i].z = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
343 }
344
345 // position 0 was compressed
read_indices(Bits & in,int shapeindex,int indices[Tile::TILE_H][Tile::TILE_W])346 static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
347 {
348 for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos)
349 {
350 int x = POS_TO_X(pos);
351 int y = POS_TO_Y(pos);
352
353 indices[y][x]= in.read(INDEXBITS - ((pos == 0) ? 1 : 0));
354 }
355 }
356
decompressone(const char * block,Tile & t)357 void ZOH::decompressone(const char *block, Tile &t)
358 {
359 Bits in(block, ZOH::BITSIZE);
360
361 Pattern p;
362 IntEndpts endpts[NREGIONS_ONE];
363 ComprEndpts compr_endpts[NREGIONS_ONE];
364
365 read_header(in, compr_endpts, p);
366 int shapeindex = 0; // only one shape
367
368 decompress_endpts(compr_endpts, endpts, p);
369
370 Vector3 palette[NREGIONS_ONE][NINDICES];
371 for (int r = 0; r < NREGIONS_ONE; ++r)
372 generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]);
373
374 // read indices
375 int indices[Tile::TILE_H][Tile::TILE_W];
376
377 read_indices(in, shapeindex, indices);
378
379 nvDebugCheck(in.getptr() == ZOH::BITSIZE);
380
381 // lookup
382 for (int y = 0; y < Tile::TILE_H; y++)
383 for (int x = 0; x < Tile::TILE_W; x++)
384 t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]];
385 }
386
387 // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
map_colors(const Vector3 colors[],const float importance[],int np,const IntEndpts & endpts,int prec)388 static float map_colors(const Vector3 colors[], const float importance[], int np, const IntEndpts &endpts, int prec)
389 {
390 Vector3 palette[NINDICES];
391 float toterr = 0;
392 Vector3 err;
393
394 generate_palette_quantized(endpts, prec, palette);
395
396 for (int i = 0; i < np; ++i)
397 {
398 float err, besterr;
399
400 besterr = Utils::norm(colors[i], palette[0]) * importance[i];
401
402 for (int j = 1; j < NINDICES && besterr > 0; ++j)
403 {
404 err = Utils::norm(colors[i], palette[j]) * importance[i];
405
406 if (err > besterr) // error increased, so we're done searching
407 break;
408 if (err < besterr)
409 besterr = err;
410 }
411 toterr += besterr;
412 }
413 return toterr;
414 }
415
416 // assign indices given a tile, shape, and quantized endpoints, return toterr for each region
assign_indices(const Tile & tile,int shapeindex,IntEndpts endpts[NREGIONS_ONE],int prec,int indices[Tile::TILE_H][Tile::TILE_W],float toterr[NREGIONS_ONE])417 static void assign_indices(const Tile &tile, int shapeindex, IntEndpts endpts[NREGIONS_ONE], int prec,
418 int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_ONE])
419 {
420 // build list of possibles
421 Vector3 palette[NREGIONS_ONE][NINDICES];
422
423 for (int region = 0; region < NREGIONS_ONE; ++region)
424 {
425 generate_palette_quantized(endpts[region], prec, &palette[region][0]);
426 toterr[region] = 0;
427 }
428
429 Vector3 err;
430
431 for (int y = 0; y < tile.size_y; y++)
432 for (int x = 0; x < tile.size_x; x++)
433 {
434 int region = REGION(x,y,shapeindex);
435 float err, besterr;
436
437 besterr = Utils::norm(tile.data[y][x], palette[region][0]);
438 indices[y][x] = 0;
439
440 for (int i = 1; i < NINDICES && besterr > 0; ++i)
441 {
442 err = Utils::norm(tile.data[y][x], palette[region][i]);
443
444 if (err > besterr) // error increased, so we're done searching
445 break;
446 if (err < besterr)
447 {
448 besterr = err;
449 indices[y][x] = i;
450 }
451 }
452 toterr[region] += besterr;
453 }
454 }
455
perturb_one(const Vector3 colors[],const float importance[],int np,int ch,int prec,const IntEndpts & old_endpts,IntEndpts & new_endpts,float old_err,int do_b)456 static float perturb_one(const Vector3 colors[], const float importance[], int np, int ch, int prec, const IntEndpts &old_endpts, IntEndpts &new_endpts,
457 float old_err, int do_b)
458 {
459 // we have the old endpoints: old_endpts
460 // we have the perturbed endpoints: new_endpts
461 // we have the temporary endpoints: temp_endpts
462
463 IntEndpts temp_endpts;
464 float min_err = old_err; // start with the best current error
465 int beststep;
466
467 // copy real endpoints so we can perturb them
468 for (int i=0; i<NCHANNELS; ++i) { temp_endpts.A[i] = new_endpts.A[i] = old_endpts.A[i]; temp_endpts.B[i] = new_endpts.B[i] = old_endpts.B[i]; }
469
470 // do a logarithmic search for the best error for this endpoint (which)
471 for (int step = 1 << (prec-1); step; step >>= 1)
472 {
473 bool improved = false;
474 for (int sign = -1; sign <= 1; sign += 2)
475 {
476 if (do_b == 0)
477 {
478 temp_endpts.A[ch] = new_endpts.A[ch] + sign * step;
479 if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec))
480 continue;
481 }
482 else
483 {
484 temp_endpts.B[ch] = new_endpts.B[ch] + sign * step;
485 if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec))
486 continue;
487 }
488
489 float err = map_colors(colors, importance, np, temp_endpts, prec);
490
491 if (err < min_err)
492 {
493 improved = true;
494 min_err = err;
495 beststep = sign * step;
496 }
497 }
498 // if this was an improvement, move the endpoint and continue search from there
499 if (improved)
500 {
501 if (do_b == 0)
502 new_endpts.A[ch] += beststep;
503 else
504 new_endpts.B[ch] += beststep;
505 }
506 }
507 return min_err;
508 }
509
optimize_one(const Vector3 colors[],const float importance[],int np,float orig_err,const IntEndpts & orig_endpts,int prec,IntEndpts & opt_endpts)510 static void optimize_one(const Vector3 colors[], const float importance[], int np, float orig_err, const IntEndpts &orig_endpts, int prec, IntEndpts &opt_endpts)
511 {
512 float opt_err = orig_err;
513 for (int ch = 0; ch < NCHANNELS; ++ch)
514 {
515 opt_endpts.A[ch] = orig_endpts.A[ch];
516 opt_endpts.B[ch] = orig_endpts.B[ch];
517 }
518 /*
519 err0 = perturb(rgb0, delta0)
520 err1 = perturb(rgb1, delta1)
521 if (err0 < err1)
522 if (err0 >= initial_error) break
523 rgb0 += delta0
524 next = 1
525 else
526 if (err1 >= initial_error) break
527 rgb1 += delta1
528 next = 0
529 initial_err = map()
530 for (;;)
531 err = perturb(next ? rgb1:rgb0, delta)
532 if (err >= initial_err) break
533 next? rgb1 : rgb0 += delta
534 initial_err = err
535 */
536 IntEndpts new_a, new_b;
537 IntEndpts new_endpt;
538 int do_b;
539
540 // now optimize each channel separately
541 for (int ch = 0; ch < NCHANNELS; ++ch)
542 {
543 // figure out which endpoint when perturbed gives the most improvement and start there
544 // if we just alternate, we can easily end up in a local minima
545 float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0); // perturb endpt A
546 float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1); // perturb endpt B
547
548 if (err0 < err1)
549 {
550 if (err0 >= opt_err)
551 continue;
552
553 opt_endpts.A[ch] = new_a.A[ch];
554 opt_err = err0;
555 do_b = 1; // do B next
556 }
557 else
558 {
559 if (err1 >= opt_err)
560 continue;
561 opt_endpts.B[ch] = new_b.B[ch];
562 opt_err = err1;
563 do_b = 0; // do A next
564 }
565
566 // now alternate endpoints and keep trying until there is no improvement
567 for (;;)
568 {
569 float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b);
570 if (err >= opt_err)
571 break;
572 if (do_b == 0)
573 opt_endpts.A[ch] = new_endpt.A[ch];
574 else
575 opt_endpts.B[ch] = new_endpt.B[ch];
576 opt_err = err;
577 do_b = 1 - do_b; // now move the other endpoint
578 }
579 }
580 }
581
optimize_endpts(const Tile & tile,int shapeindex,const float orig_err[NREGIONS_ONE],const IntEndpts orig_endpts[NREGIONS_ONE],int prec,IntEndpts opt_endpts[NREGIONS_ONE])582 static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS_ONE],
583 const IntEndpts orig_endpts[NREGIONS_ONE], int prec, IntEndpts opt_endpts[NREGIONS_ONE])
584 {
585 Vector3 pixels[Tile::TILE_TOTAL];
586 float importance[Tile::TILE_TOTAL];
587 float err = 0;
588
589 for (int region=0; region<NREGIONS_ONE; ++region)
590 {
591 // collect the pixels in the region
592 int np = 0;
593
594 for (int y = 0; y < tile.size_y; y++) {
595 for (int x = 0; x < tile.size_x; x++) {
596 if (REGION(x, y, shapeindex) == region) {
597 pixels[np] = tile.data[y][x];
598 importance[np] = tile.importance_map[y][x];
599 ++np;
600 }
601 }
602 }
603
604 optimize_one(pixels, importance, np, orig_err[region], orig_endpts[region], prec, opt_endpts[region]);
605 }
606 }
607
608 /* optimization algorithm
609 for each pattern
610 convert endpoints using pattern precision
611 assign indices and get initial error
612 compress indices (and possibly reorder endpoints)
613 transform endpoints
614 if transformed endpoints fit pattern
615 get original endpoints back
616 optimize endpoints, get new endpoints, new indices, and new error // new error will almost always be better
617 compress new indices
618 transform new endpoints
619 if new endpoints fit pattern AND if error is improved
620 emit compressed block with new data
621 else
622 emit compressed block with original data // to try to preserve maximum endpoint precision
623 */
624
refineone(const Tile & tile,int shapeindex_best,const FltEndpts endpts[NREGIONS_ONE],char * block)625 float ZOH::refineone(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_ONE], char *block)
626 {
627 float orig_err[NREGIONS_ONE], opt_err[NREGIONS_ONE], orig_toterr, opt_toterr;
628 IntEndpts orig_endpts[NREGIONS_ONE], opt_endpts[NREGIONS_ONE];
629 ComprEndpts compr_orig[NREGIONS_ONE], compr_opt[NREGIONS_ONE];
630 int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
631
632 for (int sp = 0; sp < NPATTERNS; ++sp)
633 {
634 // precisions for all channels need to be the same
635 for (int i=1; i<NCHANNELS; ++i) nvDebugCheck (patterns[sp].chan[0].prec[0] == patterns[sp].chan[i].prec[0]);
636
637 quantize_endpts(endpts, patterns[sp].chan[0].prec[0], orig_endpts);
638 assign_indices(tile, shapeindex_best, orig_endpts, patterns[sp].chan[0].prec[0], orig_indices, orig_err);
639 swap_indices(orig_endpts, orig_indices, shapeindex_best);
640 compress_endpts(orig_endpts, compr_orig, patterns[sp]);
641 if (endpts_fit(orig_endpts, compr_orig, patterns[sp]))
642 {
643 optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, patterns[sp].chan[0].prec[0], opt_endpts);
644 assign_indices(tile, shapeindex_best, opt_endpts, patterns[sp].chan[0].prec[0], opt_indices, opt_err);
645 swap_indices(opt_endpts, opt_indices, shapeindex_best);
646 compress_endpts(opt_endpts, compr_opt, patterns[sp]);
647 orig_toterr = opt_toterr = 0;
648 for (int i=0; i < NREGIONS_ONE; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
649
650 if (endpts_fit(opt_endpts, compr_opt, patterns[sp]) && opt_toterr < orig_toterr)
651 {
652 emit_block(compr_opt, shapeindex_best, patterns[sp], opt_indices, block);
653 return opt_toterr;
654 }
655 else
656 {
657 // either it stopped fitting when we optimized it, or there was no improvement
658 // so go back to the unoptimized endpoints which we know will fit
659 emit_block(compr_orig, shapeindex_best, patterns[sp], orig_indices, block);
660 return orig_toterr;
661 }
662 }
663 }
664
665 nvAssert (false); // "No candidate found, should never happen (refineone.)";
666 return FLT_MAX;
667 }
668
generate_palette_unquantized(const FltEndpts endpts[NREGIONS_ONE],Vector3 palette[NREGIONS_ONE][NINDICES])669 static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_ONE], Vector3 palette[NREGIONS_ONE][NINDICES])
670 {
671 for (int region = 0; region < NREGIONS_ONE; ++region)
672 for (int i = 0; i < NINDICES; ++i)
673 palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, DENOM);
674 }
675
676 // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
map_colors(const Tile & tile,int shapeindex,const FltEndpts endpts[NREGIONS_ONE])677 static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_ONE])
678 {
679 // build list of possibles
680 Vector3 palette[NREGIONS_ONE][NINDICES];
681
682 generate_palette_unquantized(endpts, palette);
683
684 float toterr = 0;
685 Vector3 err;
686
687 for (int y = 0; y < tile.size_y; y++)
688 for (int x = 0; x < tile.size_x; x++)
689 {
690 int region = REGION(x,y,shapeindex);
691 float err, besterr;
692
693 besterr = Utils::norm(tile.data[y][x], palette[region][0]) * tile.importance_map[y][x];
694
695 for (int i = 1; i < NINDICES && besterr > 0; ++i)
696 {
697 err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x];
698
699 if (err > besterr) // error increased, so we're done searching
700 break;
701 if (err < besterr)
702 besterr = err;
703 }
704 toterr += besterr;
705 }
706 return toterr;
707 }
708
roughone(const Tile & tile,int shapeindex,FltEndpts endpts[NREGIONS_ONE])709 float ZOH::roughone(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_ONE])
710 {
711 for (int region=0; region<NREGIONS_ONE; ++region)
712 {
713 int np = 0;
714 Vector3 colors[Tile::TILE_TOTAL];
715 Vector3 mean(0,0,0);
716
717 for (int y = 0; y < tile.size_y; y++) {
718 for (int x = 0; x < tile.size_x; x++) {
719 if (REGION(x,y,shapeindex) == region)
720 {
721 colors[np] = tile.data[y][x];
722 mean += tile.data[y][x];
723 ++np;
724 }
725 }
726 }
727
728 // handle simple cases
729 if (np == 0)
730 {
731 Vector3 zero(0,0,0);
732 endpts[region].A = zero;
733 endpts[region].B = zero;
734 continue;
735 }
736 else if (np == 1)
737 {
738 endpts[region].A = colors[0];
739 endpts[region].B = colors[0];
740 continue;
741 }
742 else if (np == 2)
743 {
744 endpts[region].A = colors[0];
745 endpts[region].B = colors[1];
746 continue;
747 }
748
749 mean /= float(np);
750
751 Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
752
753 // project each pixel value along the principal direction
754 float minp = FLT_MAX, maxp = -FLT_MAX;
755 for (int i = 0; i < np; i++)
756 {
757 float dp = dot(colors[i]-mean, direction);
758 if (dp < minp) minp = dp;
759 if (dp > maxp) maxp = dp;
760 }
761
762 // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
763 endpts[region].A = mean + minp*direction;
764 endpts[region].B = mean + maxp*direction;
765
766 // clamp endpoints
767 // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
768 // shape based on endpoints being clamped
769 Utils::clamp(endpts[region].A);
770 Utils::clamp(endpts[region].B);
771 }
772
773 return map_colors(tile, shapeindex, endpts);
774 }
775
compressone(const Tile & t,char * block)776 float ZOH::compressone(const Tile &t, char *block)
777 {
778 int shapeindex_best = 0;
779 FltEndpts endptsbest[NREGIONS_ONE], tempendpts[NREGIONS_ONE];
780 float msebest = FLT_MAX;
781
782 /*
783 collect the mse values that are within 5% of the best values
784 optimize each one and choose the best
785 */
786 // hack for now -- just use the best value WORK
787 for (int i=0; i<NSHAPES && msebest>0.0; ++i)
788 {
789 float mse = roughone(t, i, tempendpts);
790 if (mse < msebest)
791 {
792 msebest = mse;
793 shapeindex_best = i;
794 memcpy(endptsbest, tempendpts, sizeof(endptsbest));
795 }
796
797 }
798 return refineone(t, shapeindex_best, endptsbest, block);
799 }
800