1 #ifndef NPY_CPU_DISPATCH_H_ 2 #define NPY_CPU_DISPATCH_H_ 3 /** 4 * This file is part of the NumPy CPU dispatcher. Please have a look at doc/reference/simd-optimizations.html 5 * To get a better understanding of the mechanism behind it. 6 */ 7 #include "npy_cpu_features.h" // NPY_CPU_HAVE 8 #include "numpy/utils.h" // NPY_EXPAND, NPY_CAT 9 /** 10 * Including the main configuration header 'npy_cpu_dispatch_config.h'. 11 * 12 * This header is generated by the distutils module 'ccompiler_opt', 13 * and contains all the #definitions and headers for platform-specific instruction-sets 14 * that had been configured through command arguments '--cpu-baseline' and '--cpu-dispatch'. 15 * 16 * It also contains extra C #definitions and macros that are used for implementing 17 * NumPy module's attributes `__cpu_baseline__` and `__cpu_dispaٍtch__`. 18 */ 19 /** 20 * Note: Always guard the generated headers within 'NPY_DISABLE_OPTIMIZATION', 21 * due the nature of command argument '--disable-optimization', 22 * which is explicitly disabling the module ccompiler_opt. 23 */ 24 #ifndef NPY_DISABLE_OPTIMIZATION 25 #if defined(__powerpc64__) && !defined(__cplusplus) && defined(bool) 26 /** 27 * "altivec.h" header contains the definitions(bool, vector, pixel), 28 * usually in c++ we undefine them after including the header. 29 * It's better anyway to take them off and use built-in types(__vector, __pixel, __bool) instead, 30 * since c99 supports bool variables which may lead to ambiguous errors. 31 */ 32 // backup 'bool' before including '_cpu_dispatch.h', since it may not defined as a compiler token. 33 #define NPY__DISPATCH_DEFBOOL 34 typedef bool npy__dispatch_bkbool; 35 #endif 36 #include "npy_cpu_dispatch_config.h" 37 #ifdef NPY_HAVE_VSX 38 #undef bool 39 #undef vector 40 #undef pixel 41 #ifdef NPY__DISPATCH_DEFBOOL 42 #define bool npy__dispatch_bkbool 43 #endif 44 #endif 45 #endif // !NPY_DISABLE_OPTIMIZATION 46 /** 47 * Macro NPY_CPU_DISPATCH_CURFX(NAME) 48 * 49 * Returns @NAME suffixed with "_" + "the current target" during compiling 50 * the wrapped sources that generated from the dispatch-able sources according 51 * to the provided configuration statements. 52 * 53 * It also returns @NAME as-is without any suffix when it comes to the baseline or 54 * in case if the optimization is disabled. 55 * 56 * The idea behind this Macro is to allow exporting certain symbols and to 57 * avoid linking duplications due to the nature of the dispatch-able sources. 58 * 59 * Example: 60 * @targets baseline avx avx512_skx vsx3 asimdhp // configration statments 61 * 62 * void NPY_CPU_DISPATCH_CURFX(dispatch_me)(const int *src, int *dst) 63 * { 64 * // the kernel 65 * } 66 * 67 * By assuming the required optimizations are enabled via '--cpu-dspatch' and 68 * the compiler supported them too, then the generated symbols will be named as follows: 69 * 70 * - x86: 71 * dispatch_me(const int*, int*) // baseline 72 * dispatch_me_AVX(const int*, int*) 73 * dispatch_me_AVX512_SKX(const int*, int*) 74 * 75 * - ppc64: 76 * dispatch_me(const int*, int*) 77 * dispatch_me_VSX3(const int*, int*) 78 * 79 * - ARM: 80 * dispatch_me(const int*, int*) 81 * dispatch_me_ASIMHP(const int*, int*) 82 * 83 * - unsupported arch or when optimization is disabled: 84 * dispatch_me(const int*, int*) 85 * 86 * For forward declarations, see 'NPY_CPU_DISPATCH_DECLARE'. 87 */ 88 #ifdef NPY__CPU_TARGET_CURRENT 89 // 'NPY__CPU_TARGET_CURRENT': only defined by the dispatch-able sources 90 #define NPY_CPU_DISPATCH_CURFX(NAME) NPY_CAT(NPY_CAT(NAME, _), NPY__CPU_TARGET_CURRENT) 91 #else 92 #define NPY_CPU_DISPATCH_CURFX(NAME) NPY_EXPAND(NAME) 93 #endif 94 /** 95 * Defining the default behavior for the configurable macros of dispatch-able sources, 96 * 'NPY__CPU_DISPATCH_CALL(...)' and 'NPY__CPU_DISPATCH_BASELINE_CALL(...)' 97 * 98 * These macros are defined inside the generated config files that been derived from 99 * the configuration statements of the dispatch-able sources. 100 * 101 * The generated config file takes the same name of the dispatch-able source with replacing 102 * the extension to '.h' instead of '.c', and it should be treated as a header template. 103 * 104 * For more clarification, please have a look at doc/reference/simd-optimizations.html. 105 */ 106 #ifndef NPY_DISABLE_OPTIMIZATION 107 #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ 108 &&"Expected config header of the dispatch-able source"; 109 #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ 110 &&"Expected config header of the dispatch-able source"; 111 #else 112 /** 113 * We assume by default that all configuration statements contains 'baseline' option however, 114 * if the dispatch-able source doesn't require it, then the dispatch-able source and following macros 115 * need to be guard it with '#ifndef NPY_DISABLE_OPTIMIZATION' 116 */ 117 #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ 118 NPY_EXPAND(CB(__VA_ARGS__)) 119 #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) 120 #endif // !NPY_DISABLE_OPTIMIZATION 121 /** 122 * Macro NPY_CPU_DISPATCH_DECLARE(LEFT, ...) is used to provide forward 123 * declarations for the exported variables and functions that defined inside 124 * the dispatch-able sources. 125 * 126 * The first argument should ends with the exported function or variable name, 127 * while the Macro pasting the extra arguments. 128 * 129 * Examples: 130 * #ifndef NPY_DISABLE_OPTIMIZATION 131 * #include "dispatchable_source_name.dispatch.h" 132 * #endif 133 * 134 * NPY_CPU_DISPATCH_DECLARE(void dispatch_me, (const int*, int*)) 135 * NPY_CPU_DISPATCH_DECLARE(extern cb_type callback_tab, [TAB_SIZE]) 136 * 137 * By assuming the provided config header derived from a dispatch-able source, 138 * that configured with "@targets baseline sse41 vsx3 asimdhp", 139 * they supported by the compiler and enabled via '--cpu-dspatch', 140 * then the prototype declrations at the above example will equivalent to the follows: 141 * 142 * - x86: 143 * void dispatch_me(const int*, int*); // baseline 144 * void dispatch_me_SSE41(const int*, int*); 145 * 146 * extern cb_type callback_tab[TAB_SIZE]; 147 * extern cb_type callback_tab_SSE41[TAB_SIZE]; 148 * 149 * - ppc64: 150 * void dispatch_me(const int*, int*); 151 * void dispatch_me_VSX3(const int*, int*); 152 * 153 * extern cb_type callback_tab[TAB_SIZE]; 154 * extern cb_type callback_tab_VSX3[TAB_SIZE]; 155 * 156 * - ARM: 157 * void dispatch_me(const int*, int*); 158 * void dispatch_me_ASIMDHP(const int*, int*); 159 * 160 * extern cb_type callback_tab[TAB_SIZE]; 161 * extern cb_type callback_tab_ASIMDHP[TAB_SIZE]; 162 * 163 * - unsupported arch or when optimization is disabled: 164 * void dispatch_me(const int*, int*); 165 * extern cb_type callback_tab[TAB_SIZE]; 166 * 167 * For runtime dispatching, see 'NPY_CPU_DISPATCH_CALL' 168 */ 169 #define NPY_CPU_DISPATCH_DECLARE(...) \ 170 NPY__CPU_DISPATCH_CALL(NPY_CPU_DISPATCH_DECLARE_CHK_, NPY_CPU_DISPATCH_DECLARE_CB_, __VA_ARGS__) \ 171 NPY__CPU_DISPATCH_BASELINE_CALL(NPY_CPU_DISPATCH_DECLARE_BASE_CB_, __VA_ARGS__) 172 // Preprocessor callbacks 173 #define NPY_CPU_DISPATCH_DECLARE_CB_(DUMMY, TARGET_NAME, LEFT, ...) \ 174 NPY_CAT(NPY_CAT(LEFT, _), TARGET_NAME) __VA_ARGS__; 175 #define NPY_CPU_DISPATCH_DECLARE_BASE_CB_(LEFT, ...) \ 176 LEFT __VA_ARGS__; 177 // Dummy CPU runtime checking 178 #define NPY_CPU_DISPATCH_DECLARE_CHK_(FEATURE) 179 /** 180 * Macro NPY_CPU_DISPATCH_DECLARE_XB(LEFT, ...) 181 * 182 * Same as `NPY_CPU_DISPATCH_DECLARE` but exclude the baseline declaration even 183 * if it was provided within the configration statments. 184 */ 185 #define NPY_CPU_DISPATCH_DECLARE_XB(...) \ 186 NPY__CPU_DISPATCH_CALL(NPY_CPU_DISPATCH_DECLARE_CHK_, NPY_CPU_DISPATCH_DECLARE_CB_, __VA_ARGS__) 187 /** 188 * Macro NPY_CPU_DISPATCH_CALL(LEFT, ...) is used for runtime dispatching 189 * of the exported functions and variables within the dispatch-able sources 190 * according to the highested interesed CPU features that supported by the 191 * running machine depending on the required optimizations. 192 * 193 * The first argument should ends with the exported function or variable name, 194 * while the Macro pasting the extra arguments. 195 * 196 * Example: 197 * Assume we have a dispatch-able source exporting the following function: 198 * 199 * @targets baseline avx2 avx512_skx // configration statments 200 * 201 * void NPY_CPU_DISPATCH_CURFX(dispatch_me)(const int *src, int *dst) 202 * { 203 * // the kernel 204 * } 205 * 206 * In order to call or to assign the pointer of it from outside the dispatch-able source, 207 * you have to use this Macro as follows: 208 * 209 * // bring the generated config header of the dispatch-able source 210 * #ifndef NPY_DISABLE_OPTIMIZATION 211 * #include "dispatchable_source_name.dispatch.h" 212 * #endif 213 * // forward declaration 214 * NPY_CPU_DISPATCH_DECLARE(dispatch_me, (const int *src, int *dst)) 215 * 216 * typedef void(*func_type)(const int*, int*); 217 * func_type the_callee(const int *src, int *dst, func_type *cb) 218 * { 219 * // direct call 220 * NPY_CPU_DISPATCH_CALL(dispatch_me, (src, dst)); 221 * // assign the pointer 222 * *cb = NPY_CPU_DISPATCH_CALL(dispatch_me); 223 * // or 224 * NPY_CPU_DISPATCH_CALL(*cb = dispatch_me); 225 * // return the pointer 226 * return NPY_CPU_DISPATCH_CALL(dispatch_me); 227 * } 228 */ 229 #define NPY_CPU_DISPATCH_CALL(...) \ 230 NPY__CPU_DISPATCH_CALL(NPY_CPU_HAVE, NPY_CPU_DISPATCH_CALL_CB_, __VA_ARGS__) \ 231 NPY__CPU_DISPATCH_BASELINE_CALL(NPY_CPU_DISPATCH_CALL_BASE_CB_, __VA_ARGS__) 232 // Preprocessor callbacks 233 #define NPY_CPU_DISPATCH_CALL_CB_(TESTED_FEATURES, TARGET_NAME, LEFT, ...) \ 234 (TESTED_FEATURES) ? (NPY_CAT(NPY_CAT(LEFT, _), TARGET_NAME) __VA_ARGS__) : 235 #define NPY_CPU_DISPATCH_CALL_BASE_CB_(LEFT, ...) \ 236 (LEFT __VA_ARGS__) 237 /** 238 * Macro NPY_CPU_DISPATCH_CALL_XB(LEFT, ...) 239 * 240 * Same as `NPY_CPU_DISPATCH_DECLARE` but exclude the baseline declaration even 241 * if it was provided within the configration statements. 242 * Returns void. 243 */ 244 #define NPY_CPU_DISPATCH_CALL_XB_CB_(TESTED_FEATURES, TARGET_NAME, LEFT, ...) \ 245 (TESTED_FEATURES) ? (void) (NPY_CAT(NPY_CAT(LEFT, _), TARGET_NAME) __VA_ARGS__) : 246 #define NPY_CPU_DISPATCH_CALL_XB(...) \ 247 NPY__CPU_DISPATCH_CALL(NPY_CPU_HAVE, NPY_CPU_DISPATCH_CALL_XB_CB_, __VA_ARGS__) \ 248 ((void) 0 /* discarded expression value */) 249 /** 250 * Macro NPY_CPU_DISPATCH_CALL_ALL(LEFT, ...) 251 * 252 * Same as `NPY_CPU_DISPATCH_CALL` but dispatching all the required optimizations for 253 * the exported functions and variables instead of highest interested one. 254 * Returns void. 255 */ 256 #define NPY_CPU_DISPATCH_CALL_ALL(...) \ 257 (NPY__CPU_DISPATCH_CALL(NPY_CPU_HAVE, NPY_CPU_DISPATCH_CALL_ALL_CB_, __VA_ARGS__) \ 258 NPY__CPU_DISPATCH_BASELINE_CALL(NPY_CPU_DISPATCH_CALL_ALL_BASE_CB_, __VA_ARGS__)) 259 // Preprocessor callbacks 260 #define NPY_CPU_DISPATCH_CALL_ALL_CB_(TESTED_FEATURES, TARGET_NAME, LEFT, ...) \ 261 ((TESTED_FEATURES) ? (NPY_CAT(NPY_CAT(LEFT, _), TARGET_NAME) __VA_ARGS__) : (void) 0), 262 #define NPY_CPU_DISPATCH_CALL_ALL_BASE_CB_(LEFT, ...) \ 263 ( LEFT __VA_ARGS__ ) 264 265 #endif // NPY_CPU_DISPATCH_H_ 266