1 /** 2 * Copyright 2015 Kurt Kanzenbach 3 * 4 * Distributed under the Boost Software License, Version 1.0. (See accompanying 5 * file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt) 6 */ 7 8 #ifndef FLAT_ARRAY_DETAIL_SHORT_VEC_HELPERS_HPP 9 #define FLAT_ARRAY_DETAIL_SHORT_VEC_HELPERS_HPP 10 11 #include <libflatarray/config.h> 12 #include <cassert> 13 14 // uintptr_t is only available through C++11 15 #ifdef LIBFLATARRAY_WITH_CPP14 16 #include <cstdint> 17 #define _SHORTVEC_UINTPTR_T std::uintptr_t 18 #else 19 #define _SHORTVEC_UINTPTR_T unsigned long long 20 #endif 21 22 #ifdef __SSE4_1__ 23 #include <smmintrin.h> 24 #endif 25 26 /** 27 * This macro asserts that the pointer is correctly aligned. 28 * 29 * @param ptr pointer to check 30 * @param alignment alignement 31 */ 32 #define SHORTVEC_ASSERT_ALIGNED(ptr, alignment) \ 33 do { \ 34 assert((reinterpret_cast<_SHORTVEC_UINTPTR_T>(ptr) % (alignment)) == 0); \ 35 } while (0) 36 37 /** 38 * For some implementations there is the problem, that the compiler does not 39 * see, that some variables should be used uninitialized. 40 * Therefore here are compiler specific macros to disable and enable this warning. 41 */ 42 #if defined(__GNUC__) && !defined(__clang__) 43 #define SHORTVEC_DISABLE_WARNING_UNINITIALIZED \ 44 _Pragma("GCC diagnostic push") \ 45 _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") 46 #define SHORTVEC_ENABLE_WARNING_UNINITIALIZED \ 47 _Pragma("GCC diagnostic pop") 48 #endif 49 50 #ifdef __clang__ 51 #define SHORTVEC_DISABLE_WARNING_UNINITIALIZED \ 52 _Pragma("clang diagnostic push") \ 53 _Pragma("clang diagnostic ignored \"-Wuninitialized\"") 54 #define SHORTVEC_ENABLE_WARNING_UNINITIALIZED \ 55 _Pragma("clang diagnostic pop") 56 #endif 57 58 /** 59 * If compiler is not gcc and not clang, just remove these macros. 60 */ 61 #ifndef SHORTVEC_DISABLE_WARNING_UNINITIALIZED 62 #define SHORTVEC_DISABLE_WARNING_UNINITIALIZED 63 #endif 64 #ifndef SHORTVEC_ENABLE_WARNING_UNINITIALIZED 65 #define SHORTVEC_ENABLE_WARNING_UNINITIALIZED 66 #endif 67 68 69 #ifdef __SSE4_1__ 70 71 /** 72 * Insertps instruction which allows to insert an memory location 73 * into a xmm register. 74 * Instruction: insertps xmm, xmm/m32, imm8 75 * 76 * @param a xmm register 77 * @param base base pointer 78 * @param offset offset 79 * @param idx index, has to be a constant number like 0x10, no variable 80 */ 81 #define SHORTVEC_INSERT_PS(a, base, offset, idx) \ 82 do { \ 83 asm volatile ("insertps %1, (%q2, %q3, 4), %0\n" \ 84 : "+x" (a) : "N" (idx), "r" (base), "r" (offset) : "memory"); \ 85 } while (0) 86 87 #endif 88 89 #ifdef __AVX__ 90 91 /** 92 * Same as above just for AVX. 93 * Instruction: vinsertps xmm, xmm, xmm/m32, imm8 94 * 95 * @param a xmm register 96 * @param base base pointer 97 * @param offset offset 98 * @param idx index, has to be a constant number like 0x10, no variable 99 */ 100 #define SHORTVEC_INSERT_PS_AVX(a, base, offset, idx) \ 101 do { \ 102 asm volatile ("vinsertps %1, (%q2, %q3, 4), %0, %0\n" \ 103 : "+x" (a) : "N" (idx), "r" (base), "r" (offset) : "memory"); \ 104 } while (0) 105 106 #endif 107 108 namespace LibFlatArray { 109 110 namespace ShortVecHelpers { 111 112 #ifdef __SSE4_1__ 113 114 /** 115 * _mm_extract_ps returns an integer, but we need a float. 116 * This union can be used to get a float back. 117 */ 118 union ExtractResult { 119 int i; 120 float f; 121 }; 122 123 #endif 124 125 } 126 127 } 128 129 #endif 130