1 //
2 // SPDX-License-Identifier: BSD-3-Clause
3 // Copyright (c) Contributors to the OpenEXR Project.
4 //
5
6 #include <cstddef>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <math.h>
10 #include <vector>
11 #include <assert.h>
12
13 #include <OpenEXRConfig.h>
14 #include <OpenEXRConfigInternal.h>
15
16 #if __cplusplus >= 201103L
17 #include <thread>
18 #endif
19
20 #ifdef OPENEXR_IMF_HAVE_SYSCONF_NPROCESSORS_ONLN
21 #include <unistd.h>
22 #endif
23
24 #include <half.h>
25 #include <IlmThread.h>
26 #include <IlmThreadSemaphore.h>
27 #include <ImfIO.h>
28 #include <ImfXdr.h>
29 #include "dwaLookups.h"
30 #include "ImfNamespace.h"
31
32 //
33 // This test uses the code that generates the dwaLookups.h header to
34 // validate the the values in the tables are correct.
35 //
36
37 using namespace OPENEXR_IMF_NAMESPACE;
38 using namespace OPENEXR_IMF_NAMESPACE;
39
40 OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_ENTER
41
42 #include "dwaLookups.h"
43
44 OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_EXIT
45
46 namespace {
47
48 class LutHeaderWorker
49 {
50 public:
51 class Runner : public ILMTHREAD_NAMESPACE::Thread
52 {
53 public:
Runner(LutHeaderWorker & worker,bool output)54 Runner(LutHeaderWorker &worker, bool output):
55 ILMTHREAD_NAMESPACE::Thread(),
56 _worker(worker),
57 _output(output)
58 {
59 start();
60 }
61
~Runner()62 virtual ~Runner()
63 {
64 _semaphore.wait();
65 }
66
run()67 virtual void run()
68 {
69 _semaphore.post();
70 _worker.run(_output);
71 }
72
73 private:
74 LutHeaderWorker &_worker;
75 bool _output;
76 ILMTHREAD_NAMESPACE::Semaphore _semaphore;
77
78 }; // class LutHeaderWorker::Runner
79
80
LutHeaderWorker(size_t startValue,size_t endValue)81 LutHeaderWorker(size_t startValue,
82 size_t endValue):
83 _lastCandidateCount(0),
84 _startValue(startValue),
85 _endValue(endValue),
86 _numElements(0),
87 _offset(new size_t[numValues()]),
88 _elements(new unsigned short[1024*1024*2])
89 {
90 }
91
~LutHeaderWorker()92 ~LutHeaderWorker()
93 {
94 delete[] _offset;
95 delete[] _elements;
96 }
97
lastCandidateCount() const98 size_t lastCandidateCount() const
99 {
100 return _lastCandidateCount;
101 }
102
numValues() const103 size_t numValues() const
104 {
105 return _endValue - _startValue;
106 }
107
numElements() const108 size_t numElements() const
109 {
110 return _numElements;
111 }
112
offset() const113 const size_t* offset() const
114 {
115 return _offset;
116 }
117
elements() const118 const unsigned short* elements() const
119 {
120 return _elements;
121 }
122
run(bool outputProgress)123 void run(bool outputProgress)
124 {
125 half candidate[16];
126 int candidateCount = 0;
127
128 for (size_t input=_startValue; input<_endValue; ++input) {
129
130 int numSetBits = countSetBits(input);
131 half inputHalf, closestHalf;
132
133 inputHalf.setBits(input);
134 closestHalf.setBits(0);
135
136 _offset[input - _startValue] = _numElements;
137
138 // Gather candidates
139 candidateCount = 0;
140 for (int targetNumSetBits=numSetBits-1; targetNumSetBits>=0;
141 --targetNumSetBits) {
142 bool valueFound = false;
143
144 for (int i=0; i<65536; ++i) {
145 if (countSetBits(i) != targetNumSetBits) continue;
146
147 if (!valueFound) {
148 closestHalf.setBits(i);
149 valueFound = true;
150 } else {
151 half tmpHalf;
152
153 tmpHalf.setBits(i);
154
155 if (fabs((float)inputHalf - (float)tmpHalf) <
156 fabs((float)inputHalf - (float)closestHalf)) {
157 closestHalf = tmpHalf;
158 }
159 }
160 }
161
162 candidate[candidateCount] = closestHalf;
163 candidateCount++;
164 }
165
166 // Sort candidates by increasing number of bits set
167 for (int i=0; i<candidateCount; ++i) {
168 for (int j=i+1; j<candidateCount; ++j) {
169
170 int iCnt = countSetBits(candidate[i].bits());
171 int jCnt = countSetBits(candidate[j].bits());
172
173 if (jCnt < iCnt) {
174 half tmp = candidate[i];
175 candidate[i] = candidate[j];
176 candidate[j] = tmp;
177 }
178 }
179 }
180
181 // Copy candidates to the data buffer;
182 for (int i=0; i<candidateCount; ++i) {
183 _elements[_numElements] = candidate[i].bits();
184 _numElements++;
185 }
186
187 if (input == _endValue-1) {
188 _lastCandidateCount = candidateCount;
189 }
190 }
191 }
192
193 private:
194
195 size_t _lastCandidateCount;
196 size_t _startValue;
197 size_t _endValue;
198 size_t _numElements;
199 size_t *_offset;
200 unsigned short *_elements;
201
202 //
203 // Precomputing the bit count runs faster than using
204 // the builtin instruction, at least in one case..
205 //
206 // Precomputing 8-bits is no slower than 16-bits,
207 // and saves a fair bit of overhead..
208 //
countSetBits(unsigned short src)209 int countSetBits(unsigned short src)
210 {
211 static const unsigned short numBitsSet[256] =
212 {
213 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
214 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
215 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
216 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
217 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
218 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
219 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
220 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
221 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
222 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
223 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
224 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
225 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
226 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
227 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
228 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
229 };
230
231 return numBitsSet[src & 0xff] + numBitsSet[src >> 8];
232 }
233
234 }; // class LutHeaderWorker
235
236 } // namespace
237
238
239 //
240 // Generate a no-op LUT, to cut down in conditional branches
241 //
242 void
testNoop()243 testNoop()
244 {
245 printf("test dwaCompressorNoOp[] \n");
246
247 for (int i=0; i<65536; ++i)
248 {
249 unsigned short src = (unsigned short)i;
250 assert (src == OPENEXR_IMF_INTERNAL_NAMESPACE::dwaCompressorNoOp[i]);
251 }
252 }
253
254 //
255 // Nonlinearly encode luminance. For values below 1.0, we want
256 // to use a gamma 2.2 function to match what is fairly common
257 // for storing output referred. However, > 1, gamma functions blow up,
258 // and log functions are much better behaved. We could use a log
259 // function everywhere, but it tends to over-sample dark
260 // regions and undersample the brighter regions, when
261 // compared to the way real devices reproduce values.
262 //
263 // So, above 1, use a log function which is a smooth blend
264 // into the gamma function.
265 //
266 // Nonlinear(linear) =
267 //
268 // linear^(1./2.2) / linear <= 1.0
269 // |
270 // ln(linear)/ln(e^2.2) + 1 \ otherwise
271 //
272 //
273 // toNonlinear[] needs to take in XDR format half float values,
274 // and output NATIVE format float.
275 //
276 // toLinear[] does the opposite - takes in NATIVE half and
277 // outputs XDR half values.
278 //
279
280 void
testToLinear()281 testToLinear()
282 {
283 unsigned short toLinear[65536];
284
285 toLinear[0] = 0;
286
287 for (int i=1; i<65536; ++i) {
288 half h;
289 float sign = 1;
290 float logBase = pow(2.7182818, 2.2);
291
292 // map NaN and inf to 0
293 if ((i & 0x7c00) == 0x7c00) {
294 toLinear[i] = 0;
295 continue;
296 }
297
298 //
299 // _toLinear - assume i is NATIVE, but our output needs
300 // to get flipped to XDR
301 //
302 h.setBits(i);
303 sign = 1;
304 if ((float)h < 0) {
305 sign = -1;
306 }
307
308 if ( fabs( (float)h) <= 1.0 ) {
309 h = (half)(sign * pow((float)fabs((float)h), 2.2f));
310 } else {
311 h = (half)(sign * pow(logBase, (float)(fabs((float)h) - 1.0)));
312 }
313
314 {
315 char *tmp = (char *)(&toLinear[i]);
316
317 Xdr::write <CharPtrIO> ( tmp, h.bits());
318 }
319 }
320
321 printf("test dwaCompressorToLinear[]\n");
322 for (int i=0; i<65536; ++i)
323 assert (toLinear[i] == OPENEXR_IMF_INTERNAL_NAMESPACE::dwaCompressorToLinear[i]);
324 }
325
326
327 void
testToNonlinear()328 testToNonlinear()
329 {
330 unsigned short toNonlinear[65536];
331
332 toNonlinear[0] = 0;
333
334 for (int i=1; i<65536; ++i) {
335 unsigned short usNative, usXdr;
336 half h;
337 float sign = 1;
338 float logBase = pow(2.7182818, 2.2);
339
340 usXdr = i;
341
342 {
343 const char *tmp = (char *)(&usXdr);
344
345 Xdr::read<CharPtrIO>(tmp, usNative);
346 }
347
348 // map NaN and inf to 0
349 if ((usNative & 0x7c00) == 0x7c00) {
350 toNonlinear[i] = 0;
351 continue;
352 }
353
354 //
355 // toNonlinear - assume i is XDR
356 //
357 h.setBits(usNative);
358 sign = 1;
359 if ((float)h < 0) {
360 sign = -1;
361 }
362
363 if ( fabs( (float)h ) <= 1.0) {
364 h = (half)(sign * pow(fabs((float)h), 1.f/2.2f));
365 } else {
366 h = (half)(sign * ( log(fabs((float)h)) / log(logBase) + 1.0) );
367 }
368 toNonlinear[i] = h.bits();
369 }
370
371 printf("test dwaCompressorToNonlinear[]\n");
372 for (int i=0; i<65536; ++i)
373 assert (toNonlinear[i] == OPENEXR_IMF_INTERNAL_NAMESPACE::dwaCompressorToNonlinear[i]);
374 }
375
376 //
377 // Attempt to get available CPUs in a somewhat portable way.
378 //
379
380 int
cpuCount()381 cpuCount()
382 {
383 if (!ILMTHREAD_NAMESPACE::supportsThreads()) return 1;
384
385 int cpuCount = 1;
386
387 #if __cplusplus >= 201103L
388 cpuCount = std::thread::hardware_concurrency();
389 #else
390
391 #if defined (OPENEXR_IMF_HAVE_SYSCONF_NPROCESSORS_ONLN)
392
393 cpuCount = sysconf(_SC_NPROCESSORS_ONLN);
394
395 #elif defined (_WIN32)
396
397 SYSTEM_INFO sysinfo;
398 GetSystemInfo( &sysinfo );
399 cpuCount = sysinfo.dwNumberOfProcessors;
400
401 #endif
402
403 #endif
404 if (cpuCount < 1) cpuCount = 1;
405 return cpuCount;
406 }
407
408 //
409 // Generate acceleration luts for the quantization.
410 //
411 // For each possible input value, we want to find the closest numbers
412 // which have one fewer bits set than before.
413 //
414 // This gives us num_bits(input)-1 values per input. If we alloc
415 // space for everything, that's like a 2MB table. We can do better
416 // by compressing all the values to be contigious and using offset
417 // pointers.
418 //
419 // After we've found the candidates with fewer bits set, sort them
420 // based on increasing numbers of bits set. This way, on quantize(),
421 // we can scan through the list and halt once we find the first
422 // candidate within the error range. For small values that can
423 // be quantized to 0, 0 is the first value tested and the search
424 // can exit fairly quickly.
425 //
426
427 void
testLutHeader()428 testLutHeader()
429 {
430 std::vector<LutHeaderWorker*> workers;
431
432 size_t numWorkers = cpuCount();
433 size_t workerInterval = 65536 / numWorkers;
434
435 for (size_t i=0; i<numWorkers; ++i) {
436 if (i != numWorkers-1) {
437 workers.push_back( new LutHeaderWorker( i *workerInterval,
438 (i+1)*workerInterval) );
439 } else {
440 workers.push_back( new LutHeaderWorker(i*workerInterval, 65536) );
441 }
442 }
443
444 if (ILMTHREAD_NAMESPACE::supportsThreads()) {
445 std::vector<LutHeaderWorker::Runner*> runners;
446 for (size_t i=0; i<workers.size(); ++i) {
447 runners.push_back( new LutHeaderWorker::Runner(*workers[i], (i==0)) );
448 }
449
450 for (size_t i=0; i<workers.size(); ++i) {
451 delete runners[i];
452 }
453 } else {
454 for (size_t i=0; i<workers.size(); ++i) {
455 workers[i]->run(i == 0);
456 }
457 }
458
459 printf("test closestDataOffset[]\n");
460 int offsetIdx = 0;
461 int offsetPrev = 0;
462 for (size_t i=0; i<workers.size(); ++i) {
463 for (size_t value=0; value<workers[i]->numValues(); ++value)
464 {
465 assert (OPENEXR_IMF_INTERNAL_NAMESPACE::closestDataOffset[offsetIdx] == workers[i]->offset()[value] + offsetPrev);
466 offsetIdx++;
467 }
468 offsetPrev += workers[i]->offset()[workers[i]->numValues()-1] +
469 workers[i]->lastCandidateCount();
470 }
471
472 printf("test closestData[]\n");
473 int elementIdx = 0;
474 for (size_t i=0; i<workers.size(); ++i) {
475 for (size_t element=0; element<workers[i]->numElements(); ++element)
476 {
477 assert (OPENEXR_IMF_INTERNAL_NAMESPACE::closestData[elementIdx] == workers[i]->elements()[element]);
478 elementIdx++;
479 }
480 }
481
482 for (size_t i=0; i<workers.size(); ++i) {
483 delete workers[i];
484 }
485 }
486
487 void
testDwaLookups(const std::string &)488 testDwaLookups(const std::string&)
489 {
490 testNoop();
491 testToLinear();
492 testToNonlinear();
493 testLutHeader();
494 }
495