1 /**************************** instrset.h ********************************** 2 * Author: Agner Fog 3 * Date created: 2012-05-30 4 * Last modified: 2016-11-25 5 * Version: 1.25 6 * Project: vector classes 7 * Description: 8 * Header file for various compiler-specific tasks and other common tasks to 9 * vector class library: 10 * > selects the supported instruction set 11 * > defines integer types 12 * > defines compiler version macros 13 * > undefines certain macros that prevent function overloading 14 * > defines template class to represent compile-time integer constant 15 * > defines template for compile-time error messages 16 * 17 * (c) Copyright 2012-2016 GNU General Public License www.gnu.org/licenses 18 ******************************************************************************/ 19 20 #ifndef INSTRSET_H 21 #define INSTRSET_H 125 22 23 // Detect 64 bit mode 24 #if (defined(_M_AMD64) || defined(_M_X64) || defined(__amd64) ) && ! defined(__x86_64__) 25 #define __x86_64__ 1 // There are many different macros for this, decide on only one 26 #endif 27 28 // Find instruction set from compiler macros if INSTRSET not defined 29 // Note: Most of these macros are not defined in Microsoft compilers 30 #ifndef INSTRSET 31 #if defined ( __AVX512F__ ) || defined ( __AVX512__ ) 32 #define INSTRSET 9 33 #elif defined ( __AVX2__ ) 34 #define INSTRSET 8 35 #elif defined ( __AVX__ ) 36 #define INSTRSET 7 37 #elif defined ( __SSE4_2__ ) 38 #define INSTRSET 6 39 #elif defined ( __SSE4_1__ ) 40 #define INSTRSET 5 41 #elif defined ( __SSSE3__ ) 42 #define INSTRSET 4 43 #elif defined ( __SSE3__ ) 44 #define INSTRSET 3 45 #elif defined ( __SSE2__ ) || defined ( __x86_64__ ) 46 #define INSTRSET 2 47 #elif defined ( __SSE__ ) 48 #define INSTRSET 1 49 #elif defined ( _M_IX86_FP ) // Defined in MS compiler. 1: SSE, 2: SSE2 50 #define INSTRSET _M_IX86_FP 51 #else 52 #define INSTRSET 0 53 #endif // instruction set defines 54 #endif // INSTRSET 55 56 // Include the appropriate header file for intrinsic functions 57 #if INSTRSET > 7 // AVX2 and later 58 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) 59 #include <x86intrin.h> // x86intrin.h includes header files for whatever instruction 60 // sets are specified on the compiler command line, such as: 61 // xopintrin.h, fma4intrin.h 62 #else 63 #include <immintrin.h> // MS version of immintrin.h covers AVX, AVX2 and FMA3 64 #endif // __GNUC__ 65 #elif INSTRSET == 7 66 #include <immintrin.h> // AVX 67 #elif INSTRSET == 6 68 #include <nmmintrin.h> // SSE4.2 69 #elif INSTRSET == 5 70 #include <smmintrin.h> // SSE4.1 71 #elif INSTRSET == 4 72 #include <tmmintrin.h> // SSSE3 73 #elif INSTRSET == 3 74 #include <pmmintrin.h> // SSE3 75 #elif INSTRSET == 2 76 #include <emmintrin.h> // SSE2 77 #elif INSTRSET == 1 78 #include <xmmintrin.h> // SSE 79 #endif // INSTRSET 80 81 #if INSTRSET >= 8 && !defined(__FMA__) 82 // Assume that all processors that have AVX2 also have FMA3 83 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) && ! defined (__clang__) 84 // Prevent error message in g++ when using FMA intrinsics with avx2: 85 #pragma message "It is recommended to specify also option -mfma when using -mavx2 or higher" 86 #else 87 #define __FMA__ 1 88 #endif 89 #endif 90 91 // AMD instruction sets 92 #if defined (__XOP__) || defined (__FMA4__) 93 #ifdef __GNUC__ 94 #include <x86intrin.h> // AMD XOP (Gnu) 95 #else 96 #include <ammintrin.h> // AMD XOP (Microsoft) 97 #endif // __GNUC__ 98 #elif defined (__SSE4A__) // AMD SSE4A 99 #include <ammintrin.h> 100 #endif // __XOP__ 101 102 // FMA3 instruction set 103 #if defined (__FMA__) && (defined(__GNUC__) || defined(__clang__)) && ! defined (__INTEL_COMPILER) 104 #include <fmaintrin.h> 105 #endif // __FMA__ 106 107 // FMA4 instruction set 108 #if defined (__FMA4__) && (defined(__GNUC__) || defined(__clang__)) 109 #include <fma4intrin.h> // must have both x86intrin.h and fma4intrin.h, don't know why 110 #endif // __FMA4__ 111 112 113 // Define integer types with known size 114 #if defined(__GNUC__) || defined(__clang__) || (defined(_MSC_VER) && _MSC_VER >= 1600) 115 // Compilers supporting C99 or C++0x have stdint.h defining these integer types 116 #include <stdint.h> 117 #elif defined(_MSC_VER) 118 // Older Microsoft compilers have their own definitions 119 typedef signed __int8 int8_t; 120 typedef unsigned __int8 uint8_t; 121 typedef signed __int16 int16_t; 122 typedef unsigned __int16 uint16_t; 123 typedef signed __int32 int32_t; 124 typedef unsigned __int32 uint32_t; 125 typedef signed __int64 int64_t; 126 typedef unsigned __int64 uint64_t; 127 #ifndef _INTPTR_T_DEFINED 128 #define _INTPTR_T_DEFINED 129 #ifdef __x86_64__ 130 typedef int64_t intptr_t; 131 #else 132 typedef int32_t intptr_t; 133 #endif 134 #endif 135 #else 136 // This works with most compilers 137 typedef signed char int8_t; 138 typedef unsigned char uint8_t; 139 typedef signed short int int16_t; 140 typedef unsigned short int uint16_t; 141 typedef signed int int32_t; 142 typedef unsigned int uint32_t; 143 typedef long long int64_t; 144 typedef unsigned long long uint64_t; 145 #ifdef __x86_64__ 146 typedef int64_t intptr_t; 147 #else 148 typedef int32_t intptr_t; 149 #endif 150 #endif 151 152 #include <stdlib.h> // define abs(int) 153 154 #ifdef _MSC_VER // Microsoft compiler or compatible Intel compiler 155 #include <intrin.h> // define _BitScanReverse(int), __cpuid(int[4],int), _xgetbv(int) 156 #endif // _MSC_VER 157 158 // functions in instrset_detect.cpp 159 #ifdef VCL_NAMESPACE 160 namespace VCL_NAMESPACE { 161 #endif 162 int instrset_detect(void); // tells which instruction sets are supported 163 bool hasFMA3(void); // true if FMA3 instructions supported 164 bool hasFMA4(void); // true if FMA4 instructions supported 165 bool hasXOP(void); // true if XOP instructions supported 166 bool hasAVX512ER(void); // true if AVX512ER instructions supported 167 #ifdef VCL_NAMESPACE 168 } 169 #endif 170 171 // GCC version 172 #if defined(__GNUC__) && !defined (GCC_VERSION) && !defined (__clang__) 173 #define GCC_VERSION ((__GNUC__) * 10000 + (__GNUC_MINOR__) * 100 + (__GNUC_PATCHLEVEL__)) 174 #endif 175 176 // Clang version 177 #if defined (__clang__) 178 #define CLANG_VERSION ((__clang_major__) * 10000 + (__clang_minor__) * 100 + (__clang_patchlevel__)) 179 // Problem: The version number is not consistent across platforms 180 // http://llvm.org/bugs/show_bug.cgi?id=12643 181 // Apple bug 18746972 182 #endif 183 184 // Fix problem with non-overloadable macros named min and max in WinDef.h 185 #ifdef _MSC_VER 186 #if defined (_WINDEF_) && defined(min) && defined(max) 187 #undef min 188 #undef max 189 #endif 190 #ifndef NOMINMAX 191 #define NOMINMAX 192 #endif 193 #endif 194 195 #ifdef VCL_NAMESPACE 196 namespace VCL_NAMESPACE { 197 #endif 198 // Template class to represent compile-time integer constant 199 template <int32_t n> class Const_int_t {}; // represent compile-time signed integer constant 200 template <uint32_t n> class Const_uint_t {}; // represent compile-time unsigned integer constant 201 #define const_int(n) (Const_int_t <n>()) // n must be compile-time integer constant 202 #define const_uint(n) (Const_uint_t<n>()) // n must be compile-time unsigned integer constant 203 204 // Template for compile-time error messages 205 template <bool> class Static_error_check { Static_error_check()206 public: Static_error_check() {}; 207 }; 208 template <> class Static_error_check<false> { // generate compile-time error if false Static_error_check()209 private: Static_error_check() {}; 210 }; 211 #ifdef VCL_NAMESPACE 212 } 213 #endif 214 215 216 #endif // INSTRSET_H 217 218 // Local Variables: 219 // mode: C++ 220 // tab-width: 4 221 // indent-tabs-mode: nil 222 // c-basic-offset: 4 223 // End: 224 // vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s 225