1 /****************************  instrset.h   **********************************
2 * Author:        Agner Fog
3 * Date created:  2012-05-30
4 * Last modified: 2016-11-25
5 * Version:       1.25
6 * Project:       vector classes
7 * Description:
8 * Header file for various compiler-specific tasks and other common tasks to
9 * vector class library:
10 * > selects the supported instruction set
11 * > defines integer types
12 * > defines compiler version macros
13 * > undefines certain macros that prevent function overloading
14 * > defines template class to represent compile-time integer constant
15 * > defines template for compile-time error messages
16 *
17 * (c) Copyright 2012-2016 GNU General Public License www.gnu.org/licenses
18 ******************************************************************************/
19 
20 #ifndef INSTRSET_H
21 #define INSTRSET_H 125
22 
23 // Detect 64 bit mode
24 #if (defined(_M_AMD64) || defined(_M_X64) || defined(__amd64) ) && ! defined(__x86_64__)
25 #define __x86_64__ 1  // There are many different macros for this, decide on only one
26 #endif
27 
28 // Find instruction set from compiler macros if INSTRSET not defined
29 // Note: Most of these macros are not defined in Microsoft compilers
30 #ifndef INSTRSET
31 #if defined ( __AVX512F__ ) || defined ( __AVX512__ )
32 #define INSTRSET 9
33 #elif defined ( __AVX2__ )
34 #define INSTRSET 8
35 #elif defined ( __AVX__ )
36 #define INSTRSET 7
37 #elif defined ( __SSE4_2__ )
38 #define INSTRSET 6
39 #elif defined ( __SSE4_1__ )
40 #define INSTRSET 5
41 #elif defined ( __SSSE3__ )
42 #define INSTRSET 4
43 #elif defined ( __SSE3__ )
44 #define INSTRSET 3
45 #elif defined ( __SSE2__ ) || defined ( __x86_64__ )
46 #define INSTRSET 2
47 #elif defined ( __SSE__ )
48 #define INSTRSET 1
49 #elif defined ( _M_IX86_FP )           // Defined in MS compiler. 1: SSE, 2: SSE2
50 #define INSTRSET _M_IX86_FP
51 #else
52 #define INSTRSET 0
53 #endif // instruction set defines
54 #endif // INSTRSET
55 
56 // Include the appropriate header file for intrinsic functions
57 #if INSTRSET > 7                       // AVX2 and later
58 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)
59 #include <x86intrin.h>                 // x86intrin.h includes header files for whatever instruction
60                                        // sets are specified on the compiler command line, such as:
61                                        // xopintrin.h, fma4intrin.h
62 #else
63 #include <immintrin.h>                 // MS version of immintrin.h covers AVX, AVX2 and FMA3
64 #endif // __GNUC__
65 #elif INSTRSET == 7
66 #include <immintrin.h>                 // AVX
67 #elif INSTRSET == 6
68 #include <nmmintrin.h>                 // SSE4.2
69 #elif INSTRSET == 5
70 #include <smmintrin.h>                 // SSE4.1
71 #elif INSTRSET == 4
72 #include <tmmintrin.h>                 // SSSE3
73 #elif INSTRSET == 3
74 #include <pmmintrin.h>                 // SSE3
75 #elif INSTRSET == 2
76 #include <emmintrin.h>                 // SSE2
77 #elif INSTRSET == 1
78 #include <xmmintrin.h>                 // SSE
79 #endif // INSTRSET
80 
81 #if INSTRSET >= 8 && !defined(__FMA__)
82 // Assume that all processors that have AVX2 also have FMA3
83 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) && ! defined (__clang__)
84 // Prevent error message in g++ when using FMA intrinsics with avx2:
85 #pragma message "It is recommended to specify also option -mfma when using -mavx2 or higher"
86 #else
87 #define __FMA__  1
88 #endif
89 #endif
90 
91 // AMD  instruction sets
92 #if defined (__XOP__) || defined (__FMA4__)
93 #ifdef __GNUC__
94 #include <x86intrin.h>                 // AMD XOP (Gnu)
95 #else
96 #include <ammintrin.h>                 // AMD XOP (Microsoft)
97 #endif //  __GNUC__
98 #elif defined (__SSE4A__)              // AMD SSE4A
99 #include <ammintrin.h>
100 #endif // __XOP__
101 
102 // FMA3 instruction set
103 #if defined (__FMA__) && (defined(__GNUC__) || defined(__clang__))  && ! defined (__INTEL_COMPILER)
104 #include <fmaintrin.h>
105 #endif // __FMA__
106 
107 // FMA4 instruction set
108 #if defined (__FMA4__) && (defined(__GNUC__) || defined(__clang__))
109 #include <fma4intrin.h> // must have both x86intrin.h and fma4intrin.h, don't know why
110 #endif // __FMA4__
111 
112 
113 // Define integer types with known size
114 #if defined(__GNUC__) || defined(__clang__) || (defined(_MSC_VER) && _MSC_VER >= 1600)
115   // Compilers supporting C99 or C++0x have stdint.h defining these integer types
116   #include <stdint.h>
117 #elif defined(_MSC_VER)
118   // Older Microsoft compilers have their own definitions
119   typedef signed   __int8  int8_t;
120   typedef unsigned __int8  uint8_t;
121   typedef signed   __int16 int16_t;
122   typedef unsigned __int16 uint16_t;
123   typedef signed   __int32 int32_t;
124   typedef unsigned __int32 uint32_t;
125   typedef signed   __int64 int64_t;
126   typedef unsigned __int64 uint64_t;
127   #ifndef _INTPTR_T_DEFINED
128     #define _INTPTR_T_DEFINED
129     #ifdef  __x86_64__
130       typedef int64_t intptr_t;
131     #else
132       typedef int32_t intptr_t;
133     #endif
134   #endif
135 #else
136   // This works with most compilers
137   typedef signed   char      int8_t;
138   typedef unsigned char      uint8_t;
139   typedef signed   short int int16_t;
140   typedef unsigned short int uint16_t;
141   typedef signed   int       int32_t;
142   typedef unsigned int       uint32_t;
143   typedef long long          int64_t;
144   typedef unsigned long long uint64_t;
145   #ifdef  __x86_64__
146     typedef int64_t intptr_t;
147   #else
148     typedef int32_t intptr_t;
149   #endif
150 #endif
151 
152 #include <stdlib.h>                              // define abs(int)
153 
154 #ifdef _MSC_VER                                  // Microsoft compiler or compatible Intel compiler
155 #include <intrin.h>                              // define _BitScanReverse(int), __cpuid(int[4],int), _xgetbv(int)
156 #endif // _MSC_VER
157 
158 // functions in instrset_detect.cpp
159 #ifdef VCL_NAMESPACE
160 namespace VCL_NAMESPACE {
161 #endif
162     int  instrset_detect(void);                      // tells which instruction sets are supported
163     bool hasFMA3(void);                              // true if FMA3 instructions supported
164     bool hasFMA4(void);                              // true if FMA4 instructions supported
165     bool hasXOP(void);                               // true if XOP  instructions supported
166     bool hasAVX512ER(void);                          // true if AVX512ER instructions supported
167 #ifdef VCL_NAMESPACE
168 }
169 #endif
170 
171 // GCC version
172 #if defined(__GNUC__) && !defined (GCC_VERSION) && !defined (__clang__)
173 #define GCC_VERSION  ((__GNUC__) * 10000 + (__GNUC_MINOR__) * 100 + (__GNUC_PATCHLEVEL__))
174 #endif
175 
176 // Clang version
177 #if defined (__clang__)
178 #define CLANG_VERSION  ((__clang_major__) * 10000 + (__clang_minor__) * 100 + (__clang_patchlevel__))
179 // Problem: The version number is not consistent across platforms
180 // http://llvm.org/bugs/show_bug.cgi?id=12643
181 // Apple bug 18746972
182 #endif
183 
184 // Fix problem with non-overloadable macros named min and max in WinDef.h
185 #ifdef _MSC_VER
186 #if defined (_WINDEF_) && defined(min) && defined(max)
187 #undef min
188 #undef max
189 #endif
190 #ifndef NOMINMAX
191 #define NOMINMAX
192 #endif
193 #endif
194 
195 #ifdef VCL_NAMESPACE
196 namespace VCL_NAMESPACE {
197 #endif
198     // Template class to represent compile-time integer constant
199     template <int32_t  n> class Const_int_t {};       // represent compile-time signed integer constant
200     template <uint32_t n> class Const_uint_t {};      // represent compile-time unsigned integer constant
201     #define const_int(n)  (Const_int_t <n>())         // n must be compile-time integer constant
202     #define const_uint(n) (Const_uint_t<n>())         // n must be compile-time unsigned integer constant
203 
204     // Template for compile-time error messages
205     template <bool> class Static_error_check {
Static_error_check()206     public:  Static_error_check() {};
207     };
208     template <> class Static_error_check<false> {     // generate compile-time error if false
Static_error_check()209     private: Static_error_check() {};
210     };
211 #ifdef VCL_NAMESPACE
212 }
213 #endif
214 
215 
216 #endif // INSTRSET_H
217 
218 // Local Variables:
219 // mode: C++
220 // tab-width: 4
221 // indent-tabs-mode: nil
222 // c-basic-offset: 4
223 // End:
224 // vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
225