1 /*****************************************************************************/
2 // Copyright 2006-2019 Adobe Systems Incorporated
3 // All Rights Reserved.
4 //
5 // NOTICE: Adobe permits you to use, modify, and distribute this file in
6 // accordance with the terms of the Adobe license agreement accompanying it.
7 /*****************************************************************************/
8
9 #include "dng_utils.h"
10
11 #include "dng_area_task.h"
12 #include "dng_assertions.h"
13 #include "dng_bottlenecks.h"
14 #include "dng_flags.h"
15 #include "dng_globals.h"
16 #include "dng_host.h"
17 #include "dng_image.h"
18 #include "dng_mutex.h"
19 #include "dng_point.h"
20 #include "dng_rect.h"
21 #include "dng_simd_type.h"
22 #include "dng_tile_iterator.h"
23
24 #if qMacOS
25 #include <CoreServices/CoreServices.h>
26 #endif
27
28 #if qiPhone || qMacOS
29 // these provide timers
30 #include <mach/mach.h>
31 #include <mach/mach_time.h>
32 #endif
33
34 #if qiPhone || qLinux
35 #include <signal.h> // for raise
36 #endif
37
38 #if qWinOS
39 #include <windows.h>
40 #else
41 #include <sys/time.h>
42 #include <stdarg.h> // for va_start/va_end
43 #endif
44
45 #include <atomic>
46
47 /*****************************************************************************/
48
49 #if qDNGDebug
50
51 /*****************************************************************************/
52
53 #if qMacOS
54 #define DNG_DEBUG_BREAK __asm__ volatile ("int3")
55 #elif qiPhone
56 #if qiPhoneSimulator
57 // simulator is running on Intel
58 #define DNG_DEBUG_BREAK __asm__ volatile ("int3")
59 #else
60 // You'll be one level deeper in __kill. Works on Linux, Android too.
61 #define DNG_DEBUG_BREAK raise(SIGTRAP)
62 #endif
63 #elif qWinOS
64 // DebugBreak has to be emulated on WinRT
65 #define DNG_DEBUG_BREAK DebugBreak()
66 #elif qAndroid
67 #define DNG_DEBUG_BREAK raise(SIGTRAP)
68 #elif qLinux
69 #define DNG_DEBUG_BREAK raise(SIGTRAP)
70 #else
71 #define DNG_DEBUG_BREAK
72 #endif
73
74 /*****************************************************************************/
75
dng_show_message(const char * s)76 void dng_show_message (const char *s)
77 {
78 // only append a newline if there isn't already one
79 const char* nl = "\n";
80 if (s[0] && (s[strlen(s)-1] == '\n'))
81 nl = "";
82
83 #if qDNGPrintMessages
84
85 // display the message
86 if (gPrintAsserts)
87 fprintf (stderr, "%s%s", s, nl);
88
89 #elif qiPhone || qAndroid || qLinux
90
91 if (gPrintAsserts)
92 fprintf (stderr, "%s%s", s, nl);
93
94 // iOS doesn't print a message to the console like DebugStr and MessageBox do, so we have to do both
95 // You'll have to advance the program counter manually past this statement
96 if (gBreakOnAsserts)
97 DNG_DEBUG_BREAK;
98
99 #elif qMacOS
100
101 if (gBreakOnAsserts)
102 {
103 // truncate the to 255 chars
104 char ss [256];
105
106 uint32 len = (uint32) strlen (s);
107 if (len > 255)
108 len = 255;
109 strncpy (&(ss [1]), s, len );
110 ss [0] = (unsigned char) len;
111
112 DebugStr ((unsigned char *) ss);
113 }
114 else if (gPrintAsserts)
115 {
116 fprintf (stderr, "%s%s", s, nl);
117 }
118
119 #elif qWinOS
120
121 // display a dialog
122 // This is not thread safe. Multiple message boxes can be launched.
123 // Should also be launched in its own thread so main msg queue isn't thrown off.
124 if (gBreakOnAsserts)
125 MessageBoxA (NULL, (LPSTR) s, NULL, MB_OK);
126 else if (gPrintAsserts)
127 fprintf (stderr, "%s%s", s, nl);
128
129 #endif
130
131 }
132
133 /*****************************************************************************/
134
dng_show_message_f(const char * fmt,...)135 void dng_show_message_f (const char *fmt, ... )
136 {
137
138 char buffer [2048];
139
140 va_list ap;
141 va_start (ap, fmt);
142
143 vsnprintf (buffer, sizeof (buffer), fmt, ap);
144
145 va_end (ap);
146
147 dng_show_message (buffer);
148
149 }
150
151 /*****************************************************************************/
152
153 #endif
154
155 /*****************************************************************************/
156
ComputeBufferSize(uint32 pixelType,const dng_point & tileSize,uint32 numPlanes,PaddingType paddingType)157 uint32 ComputeBufferSize (uint32 pixelType,
158 const dng_point &tileSize,
159 uint32 numPlanes,
160 PaddingType paddingType)
161 {
162
163 // Convert tile size to uint32.
164
165 if (tileSize.h < 0 || tileSize.v < 0)
166 {
167 ThrowMemoryFull ("Negative tile size");
168 }
169
170 const uint32 tileSizeH = static_cast<uint32> (tileSize.h);
171 const uint32 tileSizeV = static_cast<uint32> (tileSize.v);
172
173 const uint32 pixelSize = TagTypeSize (pixelType);
174
175 // Add padding to width if necessary.
176
177 uint32 paddedWidth = tileSizeH;
178
179 if (paddingType == padSIMDBytes)
180 {
181
182 if (!RoundUpForPixelSize (paddedWidth,
183 pixelSize,
184 &paddedWidth))
185 {
186 ThrowOverflow ("Arithmetic overflow computing buffer size");
187 }
188
189 }
190
191 // Compute buffer size.
192
193 uint32 bufferSize;
194
195 if (!SafeUint32Mult (paddedWidth, tileSizeV, &bufferSize) ||
196 !SafeUint32Mult (bufferSize, pixelSize, &bufferSize) ||
197 !SafeUint32Mult (bufferSize, numPlanes, &bufferSize))
198 {
199 ThrowOverflow ("Arithmetic overflow computing buffer size");
200 }
201
202 return bufferSize;
203
204 }
205
206 /*****************************************************************************/
207
TickTimeInSeconds()208 real64 TickTimeInSeconds ()
209 {
210
211 #if qWinOS
212
213 // One might think it prudent to cache the frequency here, however
214 // low-power CPU modes can, and do, change the value returned.
215 // Thus the frequencey needs to be retrieved each time.
216
217 // Note that the frequency changing can cause the return
218 // result to jump backwards, which is why the TickCountInSeconds
219 // (below) also exists.
220
221 // Just plug in laptop when doing timings to minimize this.
222 // QPC/QPH is a slow call compared to rtdsc.
223 // but QPC/QPF is not tied to speed step, it's the northbridge timer.
224 // caching the invFrequency also avoids a costly divide
225
226 static real64 freqMultiplier = 0.0;
227
228 if (freqMultiplier == 0.0)
229 {
230
231 LARGE_INTEGER freq;
232
233 QueryPerformanceFrequency (&freq);
234
235 freqMultiplier = 1.0 / (real64) freq.QuadPart;
236
237 }
238
239 LARGE_INTEGER cycles;
240
241 QueryPerformanceCounter (&cycles);
242
243 return (real64) cycles.QuadPart * freqMultiplier;
244
245 #elif qiPhone || qMacOS
246
247 // cache frequency of high-perf timer
248 static real64 freqMultiplier = 0.0;
249 if (freqMultiplier == 0.0)
250 {
251
252 mach_timebase_info_data_t freq;
253 mach_timebase_info(&freq);
254
255 // converts from nanos to micros
256 // numer = 125, denom = 3 * 1000
257 freqMultiplier = ((real64)freq.numer / (real64)freq.denom) * 1.0e-9;
258
259 }
260
261 return mach_absolute_time() * freqMultiplier;
262
263 #elif qAndroid || qLinux
264
265 //this is a fast timer to nanos
266 struct timespec now;
267 clock_gettime(CLOCK_MONOTONIC, &now);
268 return now.tv_sec + (real64)now.tv_nsec * 1.0e-9;
269
270 #else
271
272 // Perhaps a better call exists. (e.g. avoid adjtime effects)
273
274 struct timeval tv;
275
276 gettimeofday (&tv, NULL);
277
278 return tv.tv_sec + (real64)tv.tv_usec * 1.0e-6;
279
280 #endif
281
282 }
283
284 /*****************************************************************************/
285
TickCountInSeconds()286 real64 TickCountInSeconds ()
287 {
288
289 return TickTimeInSeconds ();
290
291 }
292
293 /*****************************************************************************/
294
295 static std::atomic_int sTimerLevel (0);
296
297 /*****************************************************************************/
298
DNGIncrementTimerLevel()299 void DNGIncrementTimerLevel ()
300 {
301
302 // This isn't thread coherent, multiple threads can create/destroy cr_timer
303 // causing the tabbing to be invalid. Imagecore disables this.
304
305 if (!gImagecore)
306 {
307
308 sTimerLevel++;
309
310 }
311
312 }
313
314 /*****************************************************************************/
315
DNGDecrementTimerLevel()316 int32 DNGDecrementTimerLevel ()
317 {
318
319 if (gImagecore)
320 {
321
322 return 0;
323
324 }
325
326 else
327 {
328
329 return (int32) (--sTimerLevel);
330
331 }
332
333 }
334
335 /*****************************************************************************/
336
dng_timer(const char * message)337 dng_timer::dng_timer (const char *message)
338
339 : fMessage (message )
340 , fStartTime (TickTimeInSeconds ())
341
342 {
343
344 DNGIncrementTimerLevel ();
345
346 }
347
348 /*****************************************************************************/
349
~dng_timer()350 dng_timer::~dng_timer ()
351 {
352
353 uint32 level = Pin_int32 (0, DNGDecrementTimerLevel (), 10);
354
355 if (!gDNGShowTimers)
356 return;
357
358 real64 totalTime = TickTimeInSeconds () - fStartTime;
359
360 #if defined(qCRLogging) && qCRLogging && defined(cr_logi)
361
362 if (gImagecore)
363 {
364 // Imagecore force includes cr_log and overrides DNG to go to its logging under a mutex.
365 // don't use indenting or fprintf to stderr, want these buffered
366 cr_logi("timer", "%s: %0.3f sec\n", fMessage, totalTime);
367 return;
368 }
369
370 #endif
371
372 fprintf (stderr, "%*s%s: %0.3f sec\n", level*2, "", fMessage, totalTime);
373
374 }
375
376 /*****************************************************************************/
377
MaxSquaredDistancePointToRect(const dng_point_real64 & point,const dng_rect_real64 & rect)378 real64 MaxSquaredDistancePointToRect (const dng_point_real64 &point,
379 const dng_rect_real64 &rect)
380 {
381
382 real64 distSqr = DistanceSquared (point,
383 rect.TL ());
384
385 distSqr = Max_real64 (distSqr,
386 DistanceSquared (point,
387 rect.BL ()));
388
389 distSqr = Max_real64 (distSqr,
390 DistanceSquared (point,
391 rect.BR ()));
392
393 distSqr = Max_real64 (distSqr,
394 DistanceSquared (point,
395 rect.TR ()));
396
397 return distSqr;
398
399 }
400
401 /*****************************************************************************/
402
MaxDistancePointToRect(const dng_point_real64 & point,const dng_rect_real64 & rect)403 real64 MaxDistancePointToRect (const dng_point_real64 &point,
404 const dng_rect_real64 &rect)
405 {
406
407 return sqrt (MaxSquaredDistancePointToRect (point,
408 rect));
409
410 }
411
412 /*****************************************************************************/
413
dng_dither()414 dng_dither::dng_dither ()
415
416 : fNoiseBuffer ()
417
418 {
419
420 const uint32 kSeed = 1;
421
422 fNoiseBuffer.Allocate (kRNGSize2D * sizeof (uint16));
423
424 uint16 *buffer = fNoiseBuffer.Buffer_uint16 ();
425
426 uint32 seed = kSeed;
427
428 for (uint32 i = 0; i < kRNGSize2D; i++)
429 {
430
431 // The correct math for 16 to 8-bit dither would be:
432 //
433 // y = (x * 255 + r) / 65535; (0 <= r <= 65534)
434 //
435 // The bottlnecks are using a faster approximation of
436 // this math (using a power of two for the division):
437 //
438 // y = (x * 255 + r) / 65536; (255 <= r <= 65535)
439 //
440 // To insure that all exact 8 bit values in 16 bit space
441 // round trip exactly to the same 8-bit, we need to limit
442 // r values to the range 255 to 65535.
443 //
444 // This results in the dither effect being slightly
445 // imperfect, but correct round-tripping of 8-bit values
446 // is far more important.
447
448 uint16 value;
449
450 do
451 {
452
453 seed = DNG_Random (seed);
454
455 value = (uint16) seed;
456
457 }
458 while (value < 255);
459
460 buffer [i] = value;
461
462 }
463
464 }
465
466 /******************************************************************************/
467
Get()468 const dng_dither & dng_dither::Get ()
469 {
470
471 static dng_dither dither;
472
473 return dither;
474
475 }
476
477 /*****************************************************************************/
478
HistogramArea(dng_host &,const dng_image & image,const dng_rect & area,uint32 * hist,uint32 maxValue,uint32 plane)479 void HistogramArea (dng_host & /* host */,
480 const dng_image &image,
481 const dng_rect &area,
482 uint32 *hist,
483 uint32 maxValue,
484 uint32 plane)
485 {
486
487 DNG_ASSERT (image.PixelType () == ttShort, "Unsupported pixel type");
488
489 DoZeroBytes (hist, (maxValue + 1) * (uint32) sizeof (uint32));
490
491 dng_rect tile;
492
493 dng_tile_iterator iter (image, area);
494
495 while (iter.GetOneTile (tile))
496 {
497
498 dng_const_tile_buffer buffer (image, tile);
499
500 const void *sPtr = buffer.ConstPixel (tile.t,
501 tile.l,
502 plane);
503
504 uint32 count0 = 1;
505 uint32 count1 = tile.H ();
506 uint32 count2 = tile.W ();
507
508 int32 step0 = 0;
509 int32 step1 = buffer.fRowStep;
510 int32 step2 = buffer.fColStep;
511
512 OptimizeOrder (sPtr,
513 buffer.fPixelSize,
514 count0,
515 count1,
516 count2,
517 step0,
518 step1,
519 step2);
520
521 DNG_ASSERT (count0 == 1, "OptimizeOrder logic error");
522
523 const uint16 *s1 = (const uint16 *) sPtr;
524
525 for (uint32 row = 0; row < count1; row++)
526 {
527
528 if (maxValue == 0x0FFFF && step2 == 1)
529 {
530
531 for (uint32 col = 0; col < count2; col++)
532 {
533
534 uint32 x = s1 [col];
535
536 hist [x] ++;
537
538 }
539
540 }
541
542 else
543 {
544
545 const uint16 *s2 = s1;
546
547 for (uint32 col = 0; col < count2; col++)
548 {
549
550 uint32 x = s2 [0];
551
552 if (x <= maxValue)
553 {
554
555 hist [x] ++;
556
557 }
558
559 s2 += step2;
560
561 }
562
563 }
564
565 s1 += step1;
566
567 }
568
569 }
570
571 }
572
573 /*****************************************************************************/
574
575 template <SIMDType simd>
576 class dng_limit_float_depth_task: public dng_area_task
577 {
578
579 private:
580
581 const dng_image &fSrcImage;
582
583 dng_image &fDstImage;
584
585 uint32 fBitDepth;
586
587 real32 fScale;
588
589 public:
590
591 dng_limit_float_depth_task (const dng_image &srcImage,
592 dng_image &dstImage,
593 uint32 bitDepth,
594 real32 scale);
595
RepeatingTile1() const596 virtual dng_rect RepeatingTile1 () const
597 {
598 return fSrcImage.RepeatingTile ();
599 }
600
RepeatingTile2() const601 virtual dng_rect RepeatingTile2 () const
602 {
603 return fDstImage.RepeatingTile ();
604 }
605
606 virtual void Process (uint32 threadIndex,
607 const dng_rect &tile,
608 dng_abort_sniffer *sniffer);
609
610 };
611
612 /*****************************************************************************/
613
614 template <SIMDType simd>
dng_limit_float_depth_task(const dng_image & srcImage,dng_image & dstImage,uint32 bitDepth,real32 scale)615 dng_limit_float_depth_task<simd>::dng_limit_float_depth_task
616 (const dng_image &srcImage,
617 dng_image &dstImage,
618 uint32 bitDepth,
619 real32 scale)
620
621 : dng_area_task ("dng_limit_float_depth_task")
622
623 , fSrcImage (srcImage)
624 , fDstImage (dstImage)
625 , fBitDepth (bitDepth)
626 , fScale (scale)
627
628 {
629
630 }
631
632 /*****************************************************************************/
633
634 template <SIMDType simd>
Process(uint32,const dng_rect & tile,dng_abort_sniffer *)635 void dng_limit_float_depth_task<simd>::Process (uint32 /* threadIndex */,
636 const dng_rect &tile,
637 dng_abort_sniffer * /* sniffer */)
638 {
639
640 INTEL_COMPILER_NEEDED_NOTE
641
642 SET_CPU_FEATURE (simd);
643
644 dng_const_tile_buffer srcBuffer (fSrcImage, tile);
645 dng_dirty_tile_buffer dstBuffer (fDstImage, tile);
646
647 uint32 count0 = tile.H ();
648 uint32 count1 = tile.W ();
649 uint32 count2 = fDstImage.Planes ();
650
651 int32 sStep0 = srcBuffer.fRowStep;
652 int32 sStep1 = srcBuffer.fColStep;
653 int32 sStep2 = srcBuffer.fPlaneStep;
654
655 int32 dStep0 = dstBuffer.fRowStep;
656 int32 dStep1 = dstBuffer.fColStep;
657 int32 dStep2 = dstBuffer.fPlaneStep;
658
659 const void *sPtr = srcBuffer.ConstPixel (tile.t,
660 tile.l,
661 0);
662
663 void *dPtr = dstBuffer.DirtyPixel (tile.t,
664 tile.l,
665 0);
666
667 OptimizeOrder (sPtr,
668 dPtr,
669 srcBuffer.fPixelSize,
670 dstBuffer.fPixelSize,
671 count0,
672 count1,
673 count2,
674 sStep0,
675 sStep1,
676 sStep2,
677 dStep0,
678 dStep1,
679 dStep2);
680
681 const real32 *sPtr0 = (const real32 *) sPtr;
682 real32 *dPtr0 = ( real32 *) dPtr;
683
684 real32 scale = fScale;
685
686 bool limit16 = (fBitDepth == 16);
687 bool limit24 = (fBitDepth == 24);
688
689 for (uint32 index0 = 0; index0 < count0; index0++)
690 {
691
692 const real32 *sPtr1 = sPtr0;
693 real32 *dPtr1 = dPtr0;
694
695 for (uint32 index1 = 0; index1 < count1; index1++)
696 {
697
698 // If the scale is a NOP, and the data is packed solid, we can just do memory
699 // copy.
700
701 if (scale == 1.0f && sStep2 == 1 && dStep2 == 1)
702 {
703
704 if (dPtr1 != sPtr1) // srcImage != dstImage
705 {
706
707 memcpy (dPtr1, sPtr1, count2 * (uint32) sizeof (real32));
708
709 }
710
711 }
712
713 else
714 {
715
716 const real32 *sPtr2 = sPtr1;
717 real32 *dPtr2 = dPtr1;
718 INTEL_PRAGMA_SIMD_ASSERT_VECLEN_FLOAT(simd)
719 for (uint32 index2 = 0; index2 < count2; index2++)
720 {
721
722 real32 x = sPtr2 [0];
723
724 x *= scale;
725
726 dPtr2 [0] = x;
727
728 sPtr2 += sStep2;
729 dPtr2 += dStep2;
730
731 }
732
733 }
734
735 // The data is now in the destination buffer.
736
737 if (limit16)
738 {
739
740 //start by using intrinsic __m256�_mm256_cvtph_ps�(__m128i�a)
741 //once the intrinsic is written, merge this branch with previous one
742
743 uint32 *dPtr2 = (uint32 *) dPtr1;
744
745 INTEL_PRAGMA_SIMD_ASSERT_VECLEN_INT32(simd)
746
747 for (uint32 index2 = 0; index2 < count2; index2++)
748 {
749
750 uint32 x = dPtr2 [0];
751
752 uint16 y = DNG_FloatToHalf (x);
753
754 x = DNG_HalfToFloat (y);
755
756 dPtr2 [0] = x;
757
758 dPtr2 += dStep2;
759
760 }
761
762 }
763
764 else if (limit24)
765 {
766
767 uint32 *dPtr2 = (uint32 *) dPtr1;
768
769 for (uint32 index2 = 0; index2 < count2; index2++)
770 {
771
772 uint32 x = dPtr2 [0];
773
774 uint8 temp [3];
775
776 DNG_FloatToFP24 (x, temp);
777
778 x = DNG_FP24ToFloat (temp);
779
780 dPtr2 [0] = x;
781
782 dPtr2 += dStep2;
783
784 }
785
786 }
787
788 sPtr1 += sStep1;
789 dPtr1 += dStep1;
790
791 }
792
793 sPtr0 += sStep0;
794 dPtr0 += dStep0;
795
796 }
797
798 }
799
800 /******************************************************************************/
801
802 template <SIMDType simd>
LimitFloatBitDepth(dng_host & host,const dng_image & srcImage,dng_image & dstImage,uint32 bitDepth,real32 scale)803 void LimitFloatBitDepth (dng_host &host,
804 const dng_image &srcImage,
805 dng_image &dstImage,
806 uint32 bitDepth,
807 real32 scale)
808 {
809
810 DNG_ASSERT (srcImage.PixelType () == ttFloat, "Floating point image expected");
811 DNG_ASSERT (dstImage.PixelType () == ttFloat, "Floating point image expected");
812
813 dng_limit_float_depth_task<simd> task (srcImage,
814 dstImage,
815 bitDepth,
816 scale);
817
818 host.PerformAreaTask (task, dstImage.Bounds ());
819
820 }
821
822 /*****************************************************************************/
823
824 template
825 void LimitFloatBitDepth<Scalar> (dng_host &host,
826 const dng_image &srcImage,
827 dng_image &dstImage,
828 uint32 bitDepth,
829 real32 scale);
830
831 /*****************************************************************************/
832
833 #if qDNGIntelCompiler
834
835 template
836 void LimitFloatBitDepth<AVX2> (dng_host &host,
837 const dng_image &srcImage,
838 dng_image &dstImage,
839 uint32 bitDepth,
840 real32 scale);
841
842 #endif // qDNGIntelCompiler
843
844 /*****************************************************************************/
845
LimitFloatBitDepth(dng_host & host,const dng_image & srcImage,dng_image & dstImage,uint32 bitDepth,real32 scale)846 void LimitFloatBitDepth (dng_host &host,
847 const dng_image &srcImage,
848 dng_image &dstImage,
849 uint32 bitDepth,
850 real32 scale)
851 {
852
853 // Kludge: Turning this off for now because the AVX2 path produces
854 // slightly different results from the Scalar routine causing a mis-match
855 // in raw digest values when building HDR merge result negatives which
856 // causes the client to display a "file appears to be damaged" warning.
857 // -bury 11/13/2017
858
859 #if (qDNGIntelCompiler && qDNGExperimental && 0)
860
861 if (gDNGMaxSIMD >= AVX2)
862 {
863
864 LimitFloatBitDepth<AVX2> (host,
865 srcImage,
866 dstImage,
867 bitDepth,
868 scale);
869
870 }
871
872 else
873
874 #endif // qDNGIntelCompiler && qDNGExperimental
875
876 {
877
878 LimitFloatBitDepth<Scalar> (host,
879 srcImage,
880 dstImage,
881 bitDepth,
882 scale);
883
884 }
885
886 }
887
888 /*****************************************************************************/
889