1 /**
2  * Copyright 2015 Kurt Kanzenbach
3  *
4  * Distributed under the Boost Software License, Version 1.0. (See accompanying
5  * file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
6  */
7 
8 #ifndef FLAT_ARRAY_DETAIL_SHORT_VEC_HELPERS_HPP
9 #define FLAT_ARRAY_DETAIL_SHORT_VEC_HELPERS_HPP
10 
11 #include <libflatarray/config.h>
12 #include <cassert>
13 
14 // uintptr_t is only available through C++11
15 #ifdef LIBFLATARRAY_WITH_CPP14
16 #include <cstdint>
17 #define _SHORTVEC_UINTPTR_T std::uintptr_t
18 #else
19 #define _SHORTVEC_UINTPTR_T unsigned long long
20 #endif
21 
22 #ifdef __SSE4_1__
23 #include <smmintrin.h>
24 #endif
25 
26 /**
27  * This macro asserts that the pointer is correctly aligned.
28  *
29  * @param ptr pointer to check
30  * @param alignment alignement
31  */
32 #define SHORTVEC_ASSERT_ALIGNED(ptr, alignment)                         \
33     do {                                                                \
34         assert((reinterpret_cast<_SHORTVEC_UINTPTR_T>(ptr) % (alignment)) == 0); \
35     } while (0)
36 
37 /**
38  * For some implementations there is the problem, that the compiler does not
39  * see, that some variables should be used uninitialized.
40  * Therefore here are compiler specific macros to disable and enable this warning.
41  */
42 #if defined(__GNUC__) && !defined(__clang__)
43 #define SHORTVEC_DISABLE_WARNING_UNINITIALIZED             \
44     _Pragma("GCC diagnostic push")                         \
45     _Pragma("GCC diagnostic ignored \"-Wuninitialized\"")
46 #define SHORTVEC_ENABLE_WARNING_UNINITIALIZED   \
47     _Pragma("GCC diagnostic pop")
48 #endif
49 
50 #ifdef __clang__
51 #define SHORTVEC_DISABLE_WARNING_UNINITIALIZED              \
52     _Pragma("clang diagnostic push")                        \
53     _Pragma("clang diagnostic ignored \"-Wuninitialized\"")
54 #define SHORTVEC_ENABLE_WARNING_UNINITIALIZED   \
55     _Pragma("clang diagnostic pop")
56 #endif
57 
58 /**
59  * If compiler is not gcc and not clang, just remove these macros.
60  */
61 #ifndef SHORTVEC_DISABLE_WARNING_UNINITIALIZED
62 #define SHORTVEC_DISABLE_WARNING_UNINITIALIZED
63 #endif
64 #ifndef SHORTVEC_ENABLE_WARNING_UNINITIALIZED
65 #define SHORTVEC_ENABLE_WARNING_UNINITIALIZED
66 #endif
67 
68 
69 #ifdef __SSE4_1__
70 
71 /**
72  * Insertps instruction which allows to insert an memory location
73  * into a xmm register.
74  * Instruction: insertps xmm, xmm/m32, imm8
75  *
76  * @param a xmm register
77  * @param base base pointer
78  * @param offset offset
79  * @param idx index, has to be a constant number like 0x10, no variable
80  */
81 #define SHORTVEC_INSERT_PS(a, base, offset, idx)                        \
82     do {                                                                \
83         asm volatile ("insertps %1, (%q2, %q3, 4), %0\n"                \
84                       : "+x" (a) : "N" (idx), "r" (base), "r" (offset) : "memory"); \
85     } while (0)
86 
87 #endif
88 
89 #ifdef __AVX__
90 
91 /**
92  * Same as above just for AVX.
93  * Instruction: vinsertps xmm, xmm, xmm/m32, imm8
94  *
95  * @param a xmm register
96  * @param base base pointer
97  * @param offset offset
98  * @param idx index, has to be a constant number like 0x10, no variable
99  */
100 #define SHORTVEC_INSERT_PS_AVX(a, base, offset, idx)                    \
101     do {                                                                \
102         asm volatile ("vinsertps %1, (%q2, %q3, 4), %0, %0\n"           \
103                       : "+x" (a) : "N" (idx), "r" (base), "r" (offset) : "memory"); \
104     } while (0)
105 
106 #endif
107 
108 namespace LibFlatArray {
109 
110 namespace ShortVecHelpers {
111 
112 #ifdef __SSE4_1__
113 
114 /**
115  * _mm_extract_ps returns an integer, but we need a float.
116  * This union can be used to get a float back.
117  */
118 union ExtractResult {
119     int i;
120     float f;
121 };
122 
123 #endif
124 
125 }
126 
127 }
128 
129 #endif
130