1 /*****************************************************************************/
2 // Copyright 2006-2019 Adobe Systems Incorporated
3 // All Rights Reserved.
4 //
5 // NOTICE:  Adobe permits you to use, modify, and distribute this file in
6 // accordance with the terms of the Adobe license agreement accompanying it.
7 /*****************************************************************************/
8 
9 #include "dng_utils.h"
10 
11 #include "dng_area_task.h"
12 #include "dng_assertions.h"
13 #include "dng_bottlenecks.h"
14 #include "dng_flags.h"
15 #include "dng_globals.h"
16 #include "dng_host.h"
17 #include "dng_image.h"
18 #include "dng_mutex.h"
19 #include "dng_point.h"
20 #include "dng_rect.h"
21 #include "dng_simd_type.h"
22 #include "dng_tile_iterator.h"
23 
24 #if qMacOS
25 #include <CoreServices/CoreServices.h>
26 #endif
27 
28 #if qiPhone || qMacOS
29 // these provide timers
30 #include <mach/mach.h>
31 #include <mach/mach_time.h>
32 #endif
33 
34 #if qiPhone || qLinux
35 #include <signal.h> // for raise
36 #endif
37 
38 #if qWinOS
39 #include <windows.h>
40 #else
41 #include <sys/time.h>
42 #include <stdarg.h> // for va_start/va_end
43 #endif
44 
45 #include <atomic>
46 
47 /*****************************************************************************/
48 
49 #if qDNGDebug
50 
51 /*****************************************************************************/
52 
53 #if qMacOS
54 	#define DNG_DEBUG_BREAK __asm__ volatile ("int3")
55 #elif qiPhone
56 	#if qiPhoneSimulator
57         // simulator is running on Intel
58 		#define DNG_DEBUG_BREAK __asm__ volatile ("int3")
59 	#else
60         // You'll be one level deeper in __kill. Works on Linux, Android too.
61 		#define DNG_DEBUG_BREAK raise(SIGTRAP)
62 	#endif
63 #elif qWinOS
64 	// DebugBreak has to be emulated on WinRT
65     #define DNG_DEBUG_BREAK DebugBreak()
66 #elif qAndroid
67 	#define DNG_DEBUG_BREAK raise(SIGTRAP)
68 #elif qLinux
69 	#define DNG_DEBUG_BREAK raise(SIGTRAP)
70 #else
71 	#define DNG_DEBUG_BREAK
72 #endif
73 
74 /*****************************************************************************/
75 
dng_show_message(const char * s)76 void dng_show_message (const char *s)
77 	{
78     // only append a newline if there isn't already one
79     const char* nl = "\n";
80 	if (s[0] && (s[strlen(s)-1] == '\n'))
81         nl = "";
82 
83 	#if qDNGPrintMessages
84 
85 	// display the message
86 	if (gPrintAsserts)
87 		fprintf (stderr, "%s%s", s, nl);
88 
89 	#elif qiPhone || qAndroid || qLinux
90 
91 	if (gPrintAsserts)
92 		fprintf (stderr, "%s%s", s, nl);
93 
94 	// iOS doesn't print a message to the console like DebugStr and MessageBox do, so we have to do both
95 	// You'll have to advance the program counter manually past this statement
96 	if (gBreakOnAsserts)
97 		DNG_DEBUG_BREAK;
98 
99 	#elif qMacOS
100 
101 	if (gBreakOnAsserts)
102 		{
103 		// truncate the to 255 chars
104 		char ss [256];
105 
106 		uint32 len = (uint32) strlen (s);
107 		if (len > 255)
108 			len = 255;
109 		strncpy (&(ss [1]), s, len );
110 		ss [0] = (unsigned char) len;
111 
112 		DebugStr ((unsigned char *) ss);
113 		}
114 	 else if (gPrintAsserts)
115 		{
116 		fprintf (stderr, "%s%s", s, nl);
117 		}
118 
119 	#elif qWinOS
120 
121 	// display a dialog
122 	// This is not thread safe.  Multiple message boxes can be launched.
123 	// Should also be launched in its own thread so main msg queue isn't thrown off.
124 	if (gBreakOnAsserts)
125 		MessageBoxA (NULL, (LPSTR) s, NULL, MB_OK);
126 	else if (gPrintAsserts)
127 		fprintf (stderr, "%s%s", s, nl);
128 
129 	#endif
130 
131 	}
132 
133 /*****************************************************************************/
134 
dng_show_message_f(const char * fmt,...)135 void dng_show_message_f (const char *fmt, ... )
136 	{
137 
138 	char buffer [2048];
139 
140 	va_list ap;
141 	va_start (ap, fmt);
142 
143 	vsnprintf (buffer, sizeof (buffer), fmt, ap);
144 
145 	va_end (ap);
146 
147 	dng_show_message (buffer);
148 
149 	}
150 
151 /*****************************************************************************/
152 
153 #endif
154 
155 /*****************************************************************************/
156 
ComputeBufferSize(uint32 pixelType,const dng_point & tileSize,uint32 numPlanes,PaddingType paddingType)157 uint32 ComputeBufferSize (uint32 pixelType,
158 						  const dng_point &tileSize,
159 						  uint32 numPlanes,
160 						  PaddingType paddingType)
161 	{
162 
163 	// Convert tile size to uint32.
164 
165 	if (tileSize.h < 0 || tileSize.v < 0)
166 		{
167 		ThrowMemoryFull ("Negative tile size");
168 		}
169 
170 	const uint32 tileSizeH = static_cast<uint32> (tileSize.h);
171 	const uint32 tileSizeV = static_cast<uint32> (tileSize.v);
172 
173 	const uint32 pixelSize = TagTypeSize (pixelType);
174 
175 	// Add padding to width if necessary.
176 
177 	uint32 paddedWidth = tileSizeH;
178 
179 	if (paddingType == padSIMDBytes)
180 		{
181 
182 		if (!RoundUpForPixelSize (paddedWidth,
183 								  pixelSize,
184 								  &paddedWidth))
185 			{
186 			ThrowOverflow ("Arithmetic overflow computing buffer size");
187 			}
188 
189 		}
190 
191 	// Compute buffer size.
192 
193 	uint32 bufferSize;
194 
195 	if (!SafeUint32Mult (paddedWidth, tileSizeV, &bufferSize) ||
196 		!SafeUint32Mult (bufferSize,  pixelSize, &bufferSize) ||
197 		!SafeUint32Mult (bufferSize,  numPlanes, &bufferSize))
198 		{
199 		ThrowOverflow ("Arithmetic overflow computing buffer size");
200 		}
201 
202 	return bufferSize;
203 
204 	}
205 
206 /*****************************************************************************/
207 
TickTimeInSeconds()208 real64 TickTimeInSeconds ()
209 	{
210 
211 	#if qWinOS
212 
213 	// One might think it prudent to cache the frequency here, however
214 	// low-power CPU modes can, and do, change the value returned.
215 	// Thus the frequencey needs to be retrieved each time.
216 
217 	// Note that the frequency changing can cause the return
218 	// result to jump backwards, which is why the TickCountInSeconds
219 	// (below) also exists.
220 
221 	// Just plug in laptop when doing timings to minimize this.
222 	//  QPC/QPH is a slow call compared to rtdsc.
223     //  but QPC/QPF is not tied to speed step, it's the northbridge timer.
224     //  caching the invFrequency also avoids a costly divide
225 
226 	static real64 freqMultiplier = 0.0;
227 
228 	if (freqMultiplier == 0.0)
229 		{
230 
231 		LARGE_INTEGER freq;
232 
233 		QueryPerformanceFrequency (&freq);
234 
235 		freqMultiplier = 1.0 / (real64) freq.QuadPart;
236 
237 		}
238 
239 	LARGE_INTEGER cycles;
240 
241 	QueryPerformanceCounter (&cycles);
242 
243 	return (real64) cycles.QuadPart * freqMultiplier;
244 
245 	#elif qiPhone || qMacOS
246 
247     // cache frequency of high-perf timer
248 	static real64 freqMultiplier = 0.0;
249 	if (freqMultiplier == 0.0)
250 		{
251 
252 		mach_timebase_info_data_t freq;
253 		mach_timebase_info(&freq);
254 
255 		// converts from nanos to micros
256 		//  numer = 125, denom = 3 * 1000
257 		freqMultiplier = ((real64)freq.numer / (real64)freq.denom) * 1.0e-9;
258 
259         }
260 
261 	return mach_absolute_time() * freqMultiplier;
262 
263 	#elif qAndroid || qLinux
264 
265 	//this is a fast timer to nanos
266     struct timespec now;
267 	clock_gettime(CLOCK_MONOTONIC, &now);
268 	return now.tv_sec + (real64)now.tv_nsec * 1.0e-9;
269 
270 	#else
271 
272 	// Perhaps a better call exists. (e.g. avoid adjtime effects)
273 
274 	struct timeval tv;
275 
276 	gettimeofday (&tv, NULL);
277 
278 	return tv.tv_sec + (real64)tv.tv_usec * 1.0e-6;
279 
280 	#endif
281 
282 	}
283 
284 /*****************************************************************************/
285 
TickCountInSeconds()286 real64 TickCountInSeconds ()
287 	{
288 
289     return TickTimeInSeconds ();
290 
291 	}
292 
293 /*****************************************************************************/
294 
295 static std::atomic_int sTimerLevel (0);
296 
297 /*****************************************************************************/
298 
DNGIncrementTimerLevel()299 void DNGIncrementTimerLevel ()
300     {
301 
302     // This isn't thread coherent, multiple threads can create/destroy cr_timer
303     //   causing the tabbing to be invalid.  Imagecore disables this.
304 
305     if (!gImagecore)
306         {
307 
308         sTimerLevel++;
309 
310         }
311 
312     }
313 
314 /*****************************************************************************/
315 
DNGDecrementTimerLevel()316 int32 DNGDecrementTimerLevel ()
317     {
318 
319     if (gImagecore)
320         {
321 
322         return 0;
323 
324         }
325 
326     else
327         {
328 
329         return (int32) (--sTimerLevel);
330 
331         }
332 
333    }
334 
335 /*****************************************************************************/
336 
dng_timer(const char * message)337 dng_timer::dng_timer (const char *message)
338 
339 	:	fMessage   (message             )
340 	,	fStartTime (TickTimeInSeconds ())
341 
342 	{
343 
344     DNGIncrementTimerLevel ();
345 
346 	}
347 
348 /*****************************************************************************/
349 
~dng_timer()350 dng_timer::~dng_timer ()
351 	{
352 
353     uint32 level = Pin_int32 (0, DNGDecrementTimerLevel (), 10);
354 
355 	if (!gDNGShowTimers)
356 		return;
357 
358 	real64 totalTime = TickTimeInSeconds () - fStartTime;
359 
360     #if defined(qCRLogging) && qCRLogging && defined(cr_logi)
361 
362     if (gImagecore)
363         {
364         // Imagecore force includes cr_log and overrides DNG to go to its logging under a mutex.
365         // don't use indenting or fprintf to stderr, want these buffered
366         cr_logi("timer", "%s: %0.3f sec\n", fMessage, totalTime);
367         return;
368         }
369 
370     #endif
371 
372     fprintf (stderr, "%*s%s: %0.3f sec\n", level*2, "", fMessage, totalTime);
373 
374     }
375 
376 /*****************************************************************************/
377 
MaxSquaredDistancePointToRect(const dng_point_real64 & point,const dng_rect_real64 & rect)378 real64 MaxSquaredDistancePointToRect (const dng_point_real64 &point,
379 									  const dng_rect_real64 &rect)
380 	{
381 
382 	real64 distSqr = DistanceSquared (point,
383 									  rect.TL ());
384 
385 	distSqr = Max_real64 (distSqr,
386 						  DistanceSquared (point,
387 										   rect.BL ()));
388 
389 	distSqr = Max_real64 (distSqr,
390 						  DistanceSquared (point,
391 										   rect.BR ()));
392 
393 	distSqr = Max_real64 (distSqr,
394 						  DistanceSquared (point,
395 										   rect.TR ()));
396 
397 	return distSqr;
398 
399 	}
400 
401 /*****************************************************************************/
402 
MaxDistancePointToRect(const dng_point_real64 & point,const dng_rect_real64 & rect)403 real64 MaxDistancePointToRect (const dng_point_real64 &point,
404 							   const dng_rect_real64 &rect)
405 	{
406 
407 	return sqrt (MaxSquaredDistancePointToRect (point,
408 												rect));
409 
410 	}
411 
412 /*****************************************************************************/
413 
dng_dither()414 dng_dither::dng_dither ()
415 
416 	:	fNoiseBuffer ()
417 
418 	{
419 
420 	const uint32 kSeed = 1;
421 
422 	fNoiseBuffer.Allocate (kRNGSize2D * sizeof (uint16));
423 
424 	uint16 *buffer = fNoiseBuffer.Buffer_uint16 ();
425 
426 	uint32 seed = kSeed;
427 
428 	for (uint32 i = 0; i < kRNGSize2D; i++)
429 		{
430 
431         // The correct math for 16 to 8-bit dither would be:
432         //
433         // y = (x * 255 + r) / 65535;  (0 <= r <= 65534)
434         //
435         // The bottlnecks are using a faster approximation of
436         // this math (using a power of two for the division):
437         //
438         // y = (x * 255 + r) / 65536;  (255 <= r <= 65535)
439         //
440         // To insure that all exact 8 bit values in 16 bit space
441         // round trip exactly to the same 8-bit, we need to limit
442         // r values to the range 255 to 65535.
443         //
444         // This results in the dither effect being slightly
445         // imperfect, but correct round-tripping of 8-bit values
446         // is far more important.
447 
448         uint16 value;
449 
450         do
451             {
452 
453             seed = DNG_Random (seed);
454 
455             value = (uint16) seed;
456 
457             }
458         while (value < 255);
459 
460 		buffer [i] = value;
461 
462 		}
463 
464 	}
465 
466 /******************************************************************************/
467 
Get()468 const dng_dither & dng_dither::Get ()
469 	{
470 
471 	static dng_dither dither;
472 
473 	return dither;
474 
475 	}
476 
477 /*****************************************************************************/
478 
HistogramArea(dng_host &,const dng_image & image,const dng_rect & area,uint32 * hist,uint32 maxValue,uint32 plane)479 void HistogramArea (dng_host & /* host */,
480 					const dng_image &image,
481 					const dng_rect &area,
482 					uint32 *hist,
483 					uint32 maxValue,
484 					uint32 plane)
485 	{
486 
487 	DNG_ASSERT (image.PixelType () == ttShort, "Unsupported pixel type");
488 
489 	DoZeroBytes (hist, (maxValue + 1) * (uint32) sizeof (uint32));
490 
491 	dng_rect tile;
492 
493 	dng_tile_iterator iter (image, area);
494 
495 	while (iter.GetOneTile (tile))
496 		{
497 
498 		dng_const_tile_buffer buffer (image, tile);
499 
500 		const void *sPtr = buffer.ConstPixel (tile.t,
501 											  tile.l,
502 											  plane);
503 
504 		uint32 count0 = 1;
505 		uint32 count1 = tile.H ();
506 		uint32 count2 = tile.W ();
507 
508 		int32 step0 = 0;
509 		int32 step1 = buffer.fRowStep;
510 		int32 step2 = buffer.fColStep;
511 
512 		OptimizeOrder (sPtr,
513 					   buffer.fPixelSize,
514 					   count0,
515 					   count1,
516 					   count2,
517 					   step0,
518 					   step1,
519 					   step2);
520 
521 		DNG_ASSERT (count0 == 1, "OptimizeOrder logic error");
522 
523 		const uint16 *s1 = (const uint16 *) sPtr;
524 
525 		for (uint32 row = 0; row < count1; row++)
526 			{
527 
528 			if (maxValue == 0x0FFFF && step2 == 1)
529 				{
530 
531 				for (uint32 col = 0; col < count2; col++)
532 					{
533 
534 					uint32 x = s1 [col];
535 
536 					hist [x] ++;
537 
538 					}
539 
540 				}
541 
542 			else
543 				{
544 
545 				const uint16 *s2 = s1;
546 
547 				for (uint32 col = 0; col < count2; col++)
548 					{
549 
550 					uint32 x = s2 [0];
551 
552 					if (x <= maxValue)
553 						{
554 
555 						hist [x] ++;
556 
557 						}
558 
559 					s2 += step2;
560 
561 					}
562 
563 				}
564 
565 			s1 += step1;
566 
567 			}
568 
569 		}
570 
571 	}
572 
573 /*****************************************************************************/
574 
575 template <SIMDType simd>
576 class dng_limit_float_depth_task: public dng_area_task
577 	{
578 
579 	private:
580 
581 		const dng_image &fSrcImage;
582 
583 		dng_image &fDstImage;
584 
585 		uint32 fBitDepth;
586 
587 		real32 fScale;
588 
589 	public:
590 
591 		dng_limit_float_depth_task (const dng_image &srcImage,
592 									dng_image &dstImage,
593 									uint32 bitDepth,
594 									real32 scale);
595 
RepeatingTile1() const596 		virtual dng_rect RepeatingTile1 () const
597 			{
598 			return fSrcImage.RepeatingTile ();
599 			}
600 
RepeatingTile2() const601 		virtual dng_rect RepeatingTile2 () const
602 			{
603 			return fDstImage.RepeatingTile ();
604 			}
605 
606 		virtual void Process (uint32 threadIndex,
607 							  const dng_rect &tile,
608 							  dng_abort_sniffer *sniffer);
609 
610 	};
611 
612 /*****************************************************************************/
613 
614 template <SIMDType simd>
dng_limit_float_depth_task(const dng_image & srcImage,dng_image & dstImage,uint32 bitDepth,real32 scale)615 dng_limit_float_depth_task<simd>::dng_limit_float_depth_task
616 	(const dng_image &srcImage,
617 	 dng_image &dstImage,
618 	 uint32 bitDepth,
619 	 real32 scale)
620 
621 	:	dng_area_task ("dng_limit_float_depth_task")
622 
623 	,	fSrcImage (srcImage)
624 	,	fDstImage (dstImage)
625 	,	fBitDepth (bitDepth)
626 	,	fScale    (scale)
627 
628 	{
629 
630 	}
631 
632 /*****************************************************************************/
633 
634 template <SIMDType simd>
Process(uint32,const dng_rect & tile,dng_abort_sniffer *)635 void dng_limit_float_depth_task<simd>::Process (uint32 /* threadIndex */,
636 												const dng_rect &tile,
637 												dng_abort_sniffer * /* sniffer */)
638 	{
639 
640 	INTEL_COMPILER_NEEDED_NOTE
641 
642 	SET_CPU_FEATURE (simd);
643 
644 	dng_const_tile_buffer srcBuffer (fSrcImage, tile);
645 	dng_dirty_tile_buffer dstBuffer (fDstImage, tile);
646 
647 	uint32 count0 = tile.H ();
648 	uint32 count1 = tile.W ();
649 	uint32 count2 = fDstImage.Planes ();
650 
651 	int32 sStep0 = srcBuffer.fRowStep;
652 	int32 sStep1 = srcBuffer.fColStep;
653 	int32 sStep2 = srcBuffer.fPlaneStep;
654 
655 	int32 dStep0 = dstBuffer.fRowStep;
656 	int32 dStep1 = dstBuffer.fColStep;
657 	int32 dStep2 = dstBuffer.fPlaneStep;
658 
659 	const void *sPtr = srcBuffer.ConstPixel (tile.t,
660 											 tile.l,
661 											 0);
662 
663 		  void *dPtr = dstBuffer.DirtyPixel (tile.t,
664 											 tile.l,
665 											 0);
666 
667 	OptimizeOrder (sPtr,
668 			       dPtr,
669 				   srcBuffer.fPixelSize,
670 				   dstBuffer.fPixelSize,
671 				   count0,
672 				   count1,
673 				   count2,
674 				   sStep0,
675 				   sStep1,
676 				   sStep2,
677 				   dStep0,
678 				   dStep1,
679 				   dStep2);
680 
681 	const real32 *sPtr0 = (const real32 *) sPtr;
682 		  real32 *dPtr0 = (      real32 *) dPtr;
683 
684 	real32 scale = fScale;
685 
686 	bool limit16 = (fBitDepth == 16);
687 	bool limit24 = (fBitDepth == 24);
688 
689 	for (uint32 index0 = 0; index0 < count0; index0++)
690 		{
691 
692 		const real32 *sPtr1 = sPtr0;
693 			  real32 *dPtr1 = dPtr0;
694 
695 		for (uint32 index1 = 0; index1 < count1; index1++)
696 			{
697 
698 			// If the scale is a NOP, and the data is packed solid, we can just do memory
699 			// copy.
700 
701 			if (scale == 1.0f && sStep2 == 1 && dStep2 == 1)
702 				{
703 
704 				if (dPtr1 != sPtr1)			// srcImage != dstImage
705 					{
706 
707 					memcpy (dPtr1, sPtr1, count2 * (uint32) sizeof (real32));
708 
709 					}
710 
711 				}
712 
713 			else
714 				{
715 
716 				const real32 *sPtr2 = sPtr1;
717 					  real32 *dPtr2 = dPtr1;
718 				INTEL_PRAGMA_SIMD_ASSERT_VECLEN_FLOAT(simd)
719 				for (uint32 index2 = 0; index2 < count2; index2++)
720 					{
721 
722 					real32 x = sPtr2 [0];
723 
724 					x *= scale;
725 
726 					dPtr2 [0] = x;
727 
728 					sPtr2 += sStep2;
729 					dPtr2 += dStep2;
730 
731 					}
732 
733 				}
734 
735 			// The data is now in the destination buffer.
736 
737 			if (limit16)
738 				{
739 
740 				//start by using intrinsic __m256�_mm256_cvtph_ps�(__m128i�a)
741 				//once the intrinsic is written, merge this branch with previous one
742 
743 				uint32 *dPtr2 = (uint32 *) dPtr1;
744 
745 				INTEL_PRAGMA_SIMD_ASSERT_VECLEN_INT32(simd)
746 
747 				for (uint32 index2 = 0; index2 < count2; index2++)
748 					{
749 
750 					uint32 x = dPtr2 [0];
751 
752 					uint16 y = DNG_FloatToHalf (x);
753 
754 					x = DNG_HalfToFloat (y);
755 
756 					dPtr2 [0] = x;
757 
758 					dPtr2 += dStep2;
759 
760 					}
761 
762 				}
763 
764 			else if (limit24)
765 				{
766 
767 				uint32 *dPtr2 = (uint32 *) dPtr1;
768 
769 				for (uint32 index2 = 0; index2 < count2; index2++)
770 					{
771 
772 					uint32 x = dPtr2 [0];
773 
774 					uint8 temp [3];
775 
776 					DNG_FloatToFP24 (x, temp);
777 
778 					x = DNG_FP24ToFloat (temp);
779 
780 					dPtr2 [0] = x;
781 
782 					dPtr2 += dStep2;
783 
784 					}
785 
786 				}
787 
788 			sPtr1 += sStep1;
789 			dPtr1 += dStep1;
790 
791 			}
792 
793 		sPtr0 += sStep0;
794 		dPtr0 += dStep0;
795 
796 		}
797 
798 	}
799 
800 /******************************************************************************/
801 
802 template <SIMDType simd>
LimitFloatBitDepth(dng_host & host,const dng_image & srcImage,dng_image & dstImage,uint32 bitDepth,real32 scale)803 void LimitFloatBitDepth (dng_host &host,
804 						 const dng_image &srcImage,
805 						 dng_image &dstImage,
806 						 uint32 bitDepth,
807 						 real32 scale)
808 	{
809 
810 	DNG_ASSERT (srcImage.PixelType () == ttFloat, "Floating point image expected");
811 	DNG_ASSERT (dstImage.PixelType () == ttFloat, "Floating point image expected");
812 
813 	dng_limit_float_depth_task<simd> task (srcImage,
814 									 dstImage,
815 									 bitDepth,
816 									 scale);
817 
818 	host.PerformAreaTask (task, dstImage.Bounds ());
819 
820 	}
821 
822 /*****************************************************************************/
823 
824 template
825 void LimitFloatBitDepth<Scalar> (dng_host &host,
826 								 const dng_image &srcImage,
827 								 dng_image &dstImage,
828 								 uint32 bitDepth,
829 								 real32 scale);
830 
831 /*****************************************************************************/
832 
833 #if qDNGIntelCompiler
834 
835 template
836 void LimitFloatBitDepth<AVX2> (dng_host &host,
837 							   const dng_image &srcImage,
838 							   dng_image &dstImage,
839 							   uint32 bitDepth,
840 							   real32 scale);
841 
842 #endif	// qDNGIntelCompiler
843 
844 /*****************************************************************************/
845 
LimitFloatBitDepth(dng_host & host,const dng_image & srcImage,dng_image & dstImage,uint32 bitDepth,real32 scale)846 void LimitFloatBitDepth (dng_host &host,
847 						 const dng_image &srcImage,
848 						 dng_image &dstImage,
849 						 uint32 bitDepth,
850 						 real32 scale)
851 	{
852 
853 	// Kludge: Turning this off for now because the AVX2 path produces
854 	// slightly different results from the Scalar routine causing a mis-match
855 	// in raw digest values when building HDR merge result negatives which
856 	// causes the client to display a "file appears to be damaged" warning.
857 	// -bury 11/13/2017
858 
859 	#if (qDNGIntelCompiler && qDNGExperimental && 0)
860 
861 	if (gDNGMaxSIMD >= AVX2)
862 		{
863 
864 		LimitFloatBitDepth<AVX2> (host,
865 								  srcImage,
866 								  dstImage,
867 								  bitDepth,
868 								  scale);
869 
870 		}
871 
872 	else
873 
874 	#endif	// qDNGIntelCompiler && qDNGExperimental
875 
876 		{
877 
878 		LimitFloatBitDepth<Scalar> (host,
879 									srcImage,
880 									dstImage,
881 									bitDepth,
882 									scale);
883 
884 		}
885 
886 	}
887 
888 /*****************************************************************************/
889