1 /******************************************************************************
2  * $Id$
3  *
4  * Project:  GDAL Core
5  * Purpose:  Inline C++ templates
6  * Author:   Phil Vachon, <philippe at cowpig.ca>
7  *
8  ******************************************************************************
9  * Copyright (c) 2009, Phil Vachon, <philippe at cowpig.ca>
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included
19  * in all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27  * DEALINGS IN THE SOFTWARE.
28  ****************************************************************************/
29 
30 #ifndef GDAL_PRIV_TEMPLATES_HPP_INCLUDED
31 #define GDAL_PRIV_TEMPLATES_HPP_INCLUDED
32 
33 #include "cpl_port.h"
34 
35 #include <limits>
36 
37 /************************************************************************/
38 /*                        GDALGetDataLimits()                           */
39 /************************************************************************/
40 /**
41  * Compute the limits of values that can be placed in Tout in terms of
42  * Tin. Usually used for output clamping, when the output data type's
43  * limits are stable relative to the input type (i.e. no roundoff error).
44  *
45  * @param tMaxValue the returned maximum value
46  * @param tMinValue the returned minimum value
47  */
48 
49 template <class Tin, class Tout>
50 inline void GDALGetDataLimits(Tin &tMaxValue, Tin &tMinValue)
51 {
52     tMaxValue = std::numeric_limits<Tin>::max();
53     tMinValue = std::numeric_limits<Tin>::min();
54 
55     // Compute the actual minimum value of Tout in terms of Tin.
56     if (std::numeric_limits<Tout>::is_signed && std::numeric_limits<Tout>::is_integer)
57     {
58         // the minimum value is less than zero
59         if (std::numeric_limits<Tout>::digits < std::numeric_limits<Tin>::digits ||
60                         !std::numeric_limits<Tin>::is_integer)
61         {
62             // Tout is smaller than Tin, so we need to clamp values in input
63             // to the range of Tout's min/max values
64             if (std::numeric_limits<Tin>::is_signed)
65             {
66                 tMinValue = static_cast<Tin>(std::numeric_limits<Tout>::min());
67             }
68             tMaxValue = static_cast<Tin>(std::numeric_limits<Tout>::max());
69         }
70     }
71     else if (std::numeric_limits<Tout>::is_integer)
72     {
73         // the output is unsigned, so we just need to determine the max
74         /* coverity[same_on_both_sides] */
75         if (std::numeric_limits<Tout>::digits <= std::numeric_limits<Tin>::digits)
76         {
77             // Tout is smaller than Tin, so we need to clamp the input values
78             // to the range of Tout's max
79             tMaxValue = static_cast<Tin>(std::numeric_limits<Tout>::max());
80         }
81         tMinValue = 0;
82     }
83 
84 }
85 
86 /************************************************************************/
87 /*                          GDALClampValue()                            */
88 /************************************************************************/
89 /**
90  * Clamp values of type T to a specified range
91  *
92  * @param tValue the value
93  * @param tMax the max value
94  * @param tMin the min value
95  */
96 template <class T>
97 inline T GDALClampValue(const T tValue, const T tMax, const T tMin)
98 {
99     return tValue > tMax ? tMax :
100            tValue < tMin ? tMin : tValue;
101 }
102 
103 /************************************************************************/
104 /*                         GDALIsValueInRange()                         */
105 /************************************************************************/
106 /**
107  * Returns whether a value is in the type range.
108  * NaN is considered not to be in type range.
GetDomainList()109  *
110  * @param dfValue the value
111  * @return whether the value is in the type range.
112  */
113 template <class T> inline bool GDALIsValueInRange(double dfValue)
114 {
115     return dfValue >= std::numeric_limits<T>::min() &&
116            dfValue <= std::numeric_limits<T>::max();
117 }
118 
119 template <> inline bool GDALIsValueInRange<double>(double dfValue)
120 {
121     return !CPLIsNan(dfValue);
122 }
123 
124 template <> inline bool GDALIsValueInRange<float>(double dfValue)
125 {
126     return CPLIsInf(dfValue) ||
127            (dfValue >= -std::numeric_limits<float>::max() &&
128             dfValue <= std::numeric_limits<float>::max());
129 }
130 
131 /************************************************************************/
132 /*                          GDALCopyWord()                              */
133 /************************************************************************/
134 
135 template<class Tin, class Tout> struct sGDALCopyWord
136 {
137     static inline void f(const Tin tValueIn, Tout &tValueOut)
138     {
139         Tin tMaxVal, tMinVal;
140         GDALGetDataLimits<Tin, Tout>(tMaxVal, tMinVal);
141         tValueOut = static_cast<Tout>(GDALClampValue(tValueIn, tMaxVal, tMinVal));
142     }
143 };
144 
145 template<class Tin> struct sGDALCopyWord<Tin, float>
146 {
147     static inline void f(const Tin tValueIn, float &fValueOut)
148     {
149         fValueOut = static_cast<float>(tValueIn);
150     }
151 };
152 
153 template<class Tin> struct sGDALCopyWord<Tin, double>
154 {
155     static inline void f(const Tin tValueIn, double &dfValueOut)
156     {
157         dfValueOut = tValueIn;
158     }
159 };
160 
161 template<> struct sGDALCopyWord<double, double>
162 {
163     static inline void f(const double dfValueIn, double &dfValueOut)
164     {
165         dfValueOut = dfValueIn;
166     }
167 };
168 
169 template<> struct sGDALCopyWord<float, float>
170 {
ToHandle(GDALMajorObject * poMajorObject)171     static inline void f(const float fValueIn, float &fValueOut)
172     {
173         fValueOut = fValueIn;
174     }
175 };
176 
FromHandle(GDALMajorObjectH hMajorObject)177 template<> struct sGDALCopyWord<float, double>
178 {
179     static inline void f(const float fValueIn, double &dfValueOut)
180     {
181         dfValueOut = fValueIn;
182     }
183 };
184 
185 template<> struct sGDALCopyWord<double, float>
186 {
187     static inline void f(const double dfValueIn, float &fValueOut)
188     {
189         if( dfValueIn > std::numeric_limits<float>::max() )
190         {
191             fValueOut = std::numeric_limits<float>::infinity();
192             return;
193         }
194         if( dfValueIn < -std::numeric_limits<float>::max() )
195         {
196             fValueOut = -std::numeric_limits<float>::infinity();
197             return;
198         }
199 
200         fValueOut = static_cast<float>(dfValueIn);
201     }
202 };
203 
204 template <class Tout> struct sGDALCopyWord<float, Tout>
205 {
206     static inline void f(const float fValueIn, Tout &tValueOut)
207     {
208         if( CPLIsNan(fValueIn) )
209         {
210             tValueOut = 0;
211             return;
212         }
213         float fMaxVal, fMinVal;
214         GDALGetDataLimits<float, Tout>(fMaxVal, fMinVal);
215         tValueOut = static_cast<Tout>(
216             GDALClampValue(fValueIn + 0.5f, fMaxVal, fMinVal));
217     }
218 };
219 
220 template<> struct sGDALCopyWord<float, short>
221 {
222     static inline void f(const float fValueIn, short &nValueOut)
223     {
224         if( CPLIsNan(fValueIn) )
225         {
226             nValueOut = 0;
227             return;
228         }
229         float fMaxVal, fMinVal;
230         GDALGetDataLimits<float, short>(fMaxVal, fMinVal);
231         float fValue = fValueIn >= 0.0f ? fValueIn + 0.5f :
232             fValueIn - 0.5f;
233         nValueOut = static_cast<short>(
234             GDALClampValue(fValue, fMaxVal, fMinVal));
235     }
236 };
237 
238 template<class Tout> struct sGDALCopyWord<double, Tout>
239 {
240     static inline void f(const double dfValueIn, Tout &tValueOut)
241     {
242         if( CPLIsNan(dfValueIn) )
243         {
244             tValueOut = 0;
245             return;
246         }
247         double dfMaxVal, dfMinVal;
248         GDALGetDataLimits<double, Tout>(dfMaxVal, dfMinVal);
249         tValueOut = static_cast<Tout>(
250             GDALClampValue(dfValueIn + 0.5, dfMaxVal, dfMinVal));
251     }
252 };
253 
254 template<> struct sGDALCopyWord<double, int>
255 {
GetSiblingFiles()256     static inline void f(const double dfValueIn, int &nValueOut)
257     {
258         if( CPLIsNan(dfValueIn) )
259         {
260             nValueOut = 0;
261             return;
262         }
263         double dfMaxVal, dfMinVal;
264         GDALGetDataLimits<double, int>(dfMaxVal, dfMinVal);
265         double dfValue = dfValueIn >= 0.0 ? dfValueIn + 0.5 :
266             dfValueIn - 0.5;
267         nValueOut = static_cast<int>(
268             GDALClampValue(dfValue, dfMaxVal, dfMinVal));
269     }
270 };
271 
272 template<> struct sGDALCopyWord<double, short>
273 {
274     static inline void f(const double dfValueIn, short &nValueOut)
275     {
276         if( CPLIsNan(dfValueIn) )
277         {
278             nValueOut = 0;
279             return;
280         }
281         double dfMaxVal, dfMinVal;
282         GDALGetDataLimits<double, short>(dfMaxVal, dfMinVal);
283         double dfValue = dfValueIn > 0.0 ? dfValueIn + 0.5 :
284             dfValueIn - 0.5;
285         nValueOut = static_cast<short>(
286             GDALClampValue(dfValue, dfMaxVal, dfMinVal));
287     }
288 };
289 
290 // Roundoff occurs for Float32 -> int32 for max/min. Overload GDALCopyWord
291 // specifically for this case.
292 template<> struct sGDALCopyWord<float, int>
293 {
294     static inline void f(const float fValueIn, int &nValueOut)
295     {
296         if (fValueIn >= static_cast<float>(std::numeric_limits<int>::max()))
297         {
298             nValueOut = std::numeric_limits<int>::max();
299         }
300         else if (fValueIn <= static_cast<float>(std::numeric_limits<int>::min()))
301         {
302             nValueOut = std::numeric_limits<int>::min();
303         }
304         else
305         {
306             nValueOut = static_cast<int>(fValueIn > 0.0f ?
307                 fValueIn + 0.5f : fValueIn - 0.5f);
308         }
309     }
310 };
311 
312 // Roundoff occurs for Float32 -> uint32 for max. Overload GDALCopyWord
313 // specifically for this case.
314 template<> struct sGDALCopyWord<float, unsigned int>
315 {
316     static inline void f(const float fValueIn, unsigned int &nValueOut)
317     {
318         if (fValueIn >= static_cast<float>(std::numeric_limits<unsigned int>::max()))
319         {
320             nValueOut = std::numeric_limits<unsigned int>::max();
321         }
322         else if (fValueIn <= static_cast<float>(std::numeric_limits<unsigned int>::min()))
323         {
324             nValueOut = std::numeric_limits<unsigned int>::min();
325         }
326         else
327         {
328             nValueOut = static_cast<unsigned int>(fValueIn + 0.5f);
329         }
330     }
331 };
332 
333 /**
334  * Copy a single word, optionally rounding if appropriate (i.e. going
335  * from the float to the integer case). Note that this is the function
336  * you should specialize if you're adding a new data type.
337  *
338  * @param tValueIn value of type Tin; the input value to be converted
339  * @param tValueOut value of type Tout; the output value
340  */
341 
342 template <class Tin, class Tout>
343 inline void GDALCopyWord(const Tin tValueIn, Tout &tValueOut)
344 {
345     sGDALCopyWord<Tin, Tout>::f(tValueIn, tValueOut);
346 }
347 
348 /************************************************************************/
349 /*                         GDALCopy4Words()                             */
350 /************************************************************************/
351 /**
352  * Copy 4 packed words to 4 packed words, optionally rounding if appropriate
353  * (i.e. going from the float to the integer case).
354  *
355  * @param pValueIn pointer to 4 input values of type Tin.
356  * @param pValueOut pointer to 4 output values of type Tout.
357  */
358 
359 template <class Tin, class Tout>
360 inline void GDALCopy4Words(const Tin* pValueIn, Tout* const pValueOut)
361 {
362     GDALCopyWord(pValueIn[0], pValueOut[0]);
363     GDALCopyWord(pValueIn[1], pValueOut[1]);
364     GDALCopyWord(pValueIn[2], pValueOut[2]);
365     GDALCopyWord(pValueIn[3], pValueOut[3]);
366 }
367 
368 /************************************************************************/
369 /*                         GDALCopy8Words()                             */
370 /************************************************************************/
371 /**
372  * Copy 8 packed words to 8 packed words, optionally rounding if appropriate
373  * (i.e. going from the float to the integer case).
374  *
375  * @param pValueIn pointer to 8 input values of type Tin.
376  * @param pValueOut pointer to 8 output values of type Tout.
377  */
378 
379 template<class Tin, class Tout>
380 inline void GDALCopy8Words(const Tin* pValueIn, Tout* const pValueOut)
381 {
382     GDALCopy4Words(pValueIn, pValueOut);
383     GDALCopy4Words(pValueIn+4, pValueOut+4);
384 }
385 
386 // Needs SSE2
387 // _mm_cvtsi128_si64 doesn't work gcc 3.4
388 #if (defined(__x86_64) || defined(_M_X64)) && !(defined(__GNUC__) && __GNUC__ < 4)
389 
390 #include <emmintrin.h>
391 
392 static inline void GDALCopyXMMToInt32(const __m128i xmm, void* pDest)
393 {
394 #ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS
395     int n32 = _mm_cvtsi128_si32 (xmm);     // Extract lower 32 bit word
396     memcpy(pDest, &n32, sizeof(n32));
397 #else
398     *static_cast<int*>(pDest) = _mm_cvtsi128_si32 (xmm);
399 #endif
400 }
401 
402 static inline void GDALCopyXMMToInt64(const __m128i xmm, void* pDest)
403 {
404 #ifdef CPL_CPU_REQUIRES_ALIGNED_ACCESS
405     GInt64 n64 = _mm_cvtsi128_si64 (xmm);   // Extract lower 64 bit word
406     memcpy(pDest, &n64, sizeof(n64));
407 #else
408     *static_cast<GInt64*>(pDest) = _mm_cvtsi128_si64 (xmm);
409 #endif
410 }
411 
412 #if __SSSE3__
413 #include <tmmintrin.h>
414 #endif
415 
416 #if __SSE4_1__
417 #include <smmintrin.h>
418 #endif
419 
420 template<>
421 inline void GDALCopy4Words(const float* pValueIn, GByte* const pValueOut)
422 {
423     __m128 xmm = _mm_loadu_ps(pValueIn);
424 
425     // The following clamping would be useless due to the final saturating
426     // packing if we could guarantee the input range in [INT_MIN,INT_MAX]
427     const __m128 p0d5 = _mm_set1_ps(0.5f);
428     const __m128 xmm_max = _mm_set1_ps(255);
429     xmm = _mm_add_ps(xmm, p0d5);
430     xmm = _mm_min_ps(_mm_max_ps(xmm, p0d5), xmm_max);
431 
432     __m128i xmm_i = _mm_cvttps_epi32 (xmm);
433 
434 #if __SSSE3__
435     xmm_i = _mm_shuffle_epi8(xmm_i, _mm_cvtsi32_si128(0 | (4 << 8) | (8 << 16) | (12 << 24)));
436 #else
437     xmm_i = _mm_packs_epi32(xmm_i, xmm_i);   // Pack int32 to int16
438     xmm_i = _mm_packus_epi16(xmm_i, xmm_i);  // Pack int16 to uint8
439 #endif
440     GDALCopyXMMToInt32(xmm_i, pValueOut);
441 }
442 
443 template<>
444 inline void GDALCopy4Words(const float* pValueIn, GInt16* const pValueOut)
445 {
446     __m128 xmm = _mm_loadu_ps(pValueIn);
447 
448     const __m128 xmm_min = _mm_set1_ps(-32768);
449     const __m128 xmm_max = _mm_set1_ps(32767);
450     xmm = _mm_min_ps(_mm_max_ps(xmm, xmm_min), xmm_max);
451 
452     const __m128 p0d5 = _mm_set1_ps(0.5f);
453     const __m128 m0d5 = _mm_set1_ps(-0.5f);
454     const __m128 mask = _mm_cmpge_ps(xmm, p0d5);
455     // f >= 0.5f ? f + 0.5f : f - 0.5f
456     xmm = _mm_add_ps(xmm, _mm_or_ps(_mm_and_ps(mask, p0d5),
457                                     _mm_andnot_ps(mask, m0d5)));
458 
459     __m128i xmm_i = _mm_cvttps_epi32 (xmm);
460 
461     xmm_i = _mm_packs_epi32(xmm_i, xmm_i);   // Pack int32 to int16
462     GDALCopyXMMToInt64(xmm_i, pValueOut);
463 }
464 
465 template<>
466 inline void GDALCopy4Words(const float* pValueIn, GUInt16* const pValueOut)
467 {
468     __m128 xmm = _mm_loadu_ps(pValueIn);
469 
470     const __m128 p0d5 = _mm_set1_ps(0.5f);
471     const __m128 xmm_max = _mm_set1_ps(65535);
472     xmm = _mm_add_ps(xmm, p0d5);
473     xmm = _mm_min_ps(_mm_max_ps(xmm, p0d5), xmm_max);
474 
475     __m128i xmm_i = _mm_cvttps_epi32 (xmm);
476 
477 #if __SSE4_1__
Bands(GDALDataset * poSelf)478      xmm_i = _mm_packus_epi32(xmm_i, xmm_i);   // Pack int32 to uint16
479 #else
480     // Translate to int16 range because _mm_packus_epi32 is SSE4.1 only
481     xmm_i = _mm_add_epi32(xmm_i, _mm_set1_epi32(-32768));
482     xmm_i = _mm_packs_epi32(xmm_i, xmm_i);   // Pack int32 to int16
483     // Translate back to uint16 range (actually -32768==32768 in int16)
484     xmm_i = _mm_add_epi16(xmm_i, _mm_set1_epi16(-32768));
485 #endif
486     GDALCopyXMMToInt64(xmm_i, pValueOut);
487 }
488 
489 #ifdef __AVX2__
490 
491 #include <immintrin.h>
492 template<>
493 inline void GDALCopy8Words(const float* pValueIn, GByte* const pValueOut)
494 {
495     __m256 ymm = _mm256_loadu_ps(pValueIn);
496 
497     const __m256 p0d5 = _mm256_set1_ps(0.5f);
498     const __m256 ymm_max = _mm256_set1_ps(255);
499     ymm = _mm256_add_ps(ymm, p0d5);
500     ymm = _mm256_min_ps(_mm256_max_ps(ymm, p0d5), ymm_max);
501 
502     __m256i ymm_i = _mm256_cvttps_epi32 (ymm);
503 
504     ymm_i = _mm256_packus_epi32(ymm_i, ymm_i);   // Pack int32 to uint16
505     ymm_i = _mm256_permute4x64_epi64(ymm_i, 0 | (2 << 2)); // AVX2
506 
507     __m128i xmm_i = _mm256_castsi256_si128(ymm_i);
508     xmm_i = _mm_packus_epi16(xmm_i, xmm_i);
509     GDALCopyXMMToInt64(xmm_i, pValueOut);
510 }
511 
512 template<>
513 inline void GDALCopy8Words(const float* pValueIn, GUInt16* const pValueOut)
514 {
515     __m256 ymm = _mm256_loadu_ps(pValueIn);
516 
517     const __m256 p0d5 = _mm256_set1_ps(0.5f);
518     const __m256 ymm_max = _mm256_set1_ps(65535);
519     ymm = _mm256_add_ps(ymm, p0d5);
520     ymm = _mm256_min_ps(_mm256_max_ps(ymm, p0d5), ymm_max);
521 
522     __m256i ymm_i = _mm256_cvttps_epi32 (ymm);
523 
524     ymm_i = _mm256_packus_epi32(ymm_i, ymm_i);   // Pack int32 to uint16
525     ymm_i = _mm256_permute4x64_epi64(ymm_i, 0 | (2 << 2)); // AVX2
526 
527     _mm_storeu_si128( reinterpret_cast<__m128i*>(pValueOut), _mm256_castsi256_si128(ymm_i) );
528 }
529 #else
530 template<>
531 inline void GDALCopy8Words(const float* pValueIn, GUInt16* const pValueOut)
532 {
533     __m128 xmm = _mm_loadu_ps(pValueIn);
534     __m128 xmm1 = _mm_loadu_ps(pValueIn+4);
535 
536     const __m128 p0d5 = _mm_set1_ps(0.5f);
537     const __m128 xmm_max = _mm_set1_ps(65535);
538     xmm = _mm_add_ps(xmm, p0d5);
539     xmm1 = _mm_add_ps(xmm1, p0d5);
540     xmm = _mm_min_ps(_mm_max_ps(xmm, p0d5), xmm_max);
541     xmm1 = _mm_min_ps(_mm_max_ps(xmm1, p0d5), xmm_max);
542 
543     __m128i xmm_i = _mm_cvttps_epi32 (xmm);
544     __m128i xmm1_i = _mm_cvttps_epi32 (xmm1);
545 
546 #if __SSE4_1__
547     xmm_i = _mm_packus_epi32(xmm_i, xmm1_i);   // Pack int32 to uint16
548 #else
549     // Translate to int16 range because _mm_packus_epi32 is SSE4.1 only
550     xmm_i = _mm_add_epi32(xmm_i, _mm_set1_epi32(-32768));
551     xmm1_i = _mm_add_epi32(xmm1_i, _mm_set1_epi32(-32768));
552     xmm_i = _mm_packs_epi32(xmm_i, xmm1_i);   // Pack int32 to int16
553     // Translate back to uint16 range (actually -32768==32768 in int16)
554     xmm_i = _mm_add_epi16(xmm_i, _mm_set1_epi16(-32768));
555 #endif
556     _mm_storeu_si128( reinterpret_cast<__m128i*>(pValueOut), xmm_i );
557 }
558 #endif
559 
560 
561 #ifdef notdef_because_slightly_slower_than_default_implementation
562 template<>
563 inline void GDALCopy4Words(const double* pValueIn, float* const pValueOut)
564 {
565     __m128d float_posmax = _mm_set1_pd(std::numeric_limits<float>::max());
566     __m128d float_negmax = _mm_set1_pd(-std::numeric_limits<float>::max());
567     __m128d float_posinf = _mm_set1_pd(std::numeric_limits<float>::infinity());
568     __m128d float_neginf = _mm_set1_pd(-std::numeric_limits<float>::infinity());
569     __m128d val01 = _mm_loadu_pd(pValueIn);
570     __m128d val23 = _mm_loadu_pd(pValueIn+2);
571     __m128d mask_max = _mm_cmpge_pd( val01, float_posmax );
572     __m128d mask_max23 = _mm_cmpge_pd( val23, float_posmax );
573     val01 = _mm_or_pd(_mm_and_pd(mask_max, float_posinf), _mm_andnot_pd(mask_max, val01));
574     val23 = _mm_or_pd(_mm_and_pd(mask_max23, float_posinf), _mm_andnot_pd(mask_max23, val23));
575     __m128d mask_min = _mm_cmple_pd( val01, float_negmax );
576     __m128d mask_min23 = _mm_cmple_pd( val23, float_negmax );
577     val01 = _mm_or_pd(_mm_and_pd(mask_min, float_neginf), _mm_andnot_pd(mask_min, val01));
578     val23 = _mm_or_pd(_mm_and_pd(mask_min23, float_neginf), _mm_andnot_pd(mask_min23, val23));
579     __m128 val01_s =  _mm_cvtpd_ps ( val01);
580     __m128 val23_s =  _mm_cvtpd_ps ( val23);
581     __m128i val01_i = _mm_castps_si128(val01_s);
582     __m128i val23_i = _mm_castps_si128(val23_s);
583     GDALCopyXMMToInt64(val01_i, pValueOut);
584     GDALCopyXMMToInt64(val23_i, pValueOut+2);
585 }
586 #endif
587 
588 #endif //  defined(__x86_64) || defined(_M_X64)
589 
590 #endif // GDAL_PRIV_TEMPLATES_HPP_INCLUDED
591