1 #ifndef NPY_CPU_DISPATCH_H_
2 #define NPY_CPU_DISPATCH_H_
3 /**
4  * This file is part of the NumPy CPU dispatcher. Please have a look at doc/reference/simd-optimizations.html
5  * To get a better understanding of the mechanism behind it.
6  */
7 #include "npy_cpu_features.h" // NPY_CPU_HAVE
8 #include "numpy/utils.h" // NPY_EXPAND, NPY_CAT
9 /**
10  * Including the main configuration header 'npy_cpu_dispatch_config.h'.
11  *
12  * This header is generated by the distutils module 'ccompiler_opt',
13  * and contains all the #definitions and headers for platform-specific instruction-sets
14  * that had been configured through command arguments '--cpu-baseline' and '--cpu-dispatch'.
15  *
16  * It also contains extra C #definitions and macros that are used for implementing
17  * NumPy module's attributes `__cpu_baseline__` and `__cpu_dispaٍtch__`.
18  */
19 /**
20  * Note: Always guard the generated headers within 'NPY_DISABLE_OPTIMIZATION',
21  * due the nature of command argument '--disable-optimization',
22  * which is explicitly disabling the module ccompiler_opt.
23  */
24 #ifndef NPY_DISABLE_OPTIMIZATION
25     #if defined(__powerpc64__) && !defined(__cplusplus) && defined(bool)
26         /**
27          * "altivec.h" header contains the definitions(bool, vector, pixel),
28          * usually in c++ we undefine them after including the header.
29          * It's better anyway to take them off and use built-in types(__vector, __pixel, __bool) instead,
30          * since c99 supports bool variables which may lead to ambiguous errors.
31         */
32         // backup 'bool' before including '_cpu_dispatch.h', since it may not defined as a compiler token.
33         #define NPY__DISPATCH_DEFBOOL
34         typedef bool npy__dispatch_bkbool;
35     #endif
36     #include "npy_cpu_dispatch_config.h"
37     #ifdef NPY_HAVE_VSX
38         #undef bool
39         #undef vector
40         #undef pixel
41         #ifdef NPY__DISPATCH_DEFBOOL
42             #define bool npy__dispatch_bkbool
43         #endif
44     #endif
45 #endif // !NPY_DISABLE_OPTIMIZATION
46 /**
47  * Macro NPY_CPU_DISPATCH_CURFX(NAME)
48  *
49  * Returns @NAME suffixed with "_" + "the current target" during compiling
50  * the wrapped sources that generated from the dispatch-able sources according
51  * to the provided configuration statements.
52  *
53  * It also returns @NAME as-is without any suffix when it comes to the baseline or
54  * in case if the optimization is disabled.
55  *
56  * The idea behind this Macro is to allow exporting certain symbols and to
57  * avoid linking duplications due to the nature of the dispatch-able sources.
58  *
59  * Example:
60  *    @targets baseline avx avx512_skx vsx3 asimdhp // configration statments
61  *
62  *    void NPY_CPU_DISPATCH_CURFX(dispatch_me)(const int *src, int *dst)
63  *    {
64  *       // the kernel
65  *    }
66  *
67  * By assuming the required optimizations are enabled via '--cpu-dspatch' and
68  * the compiler supported them too, then the generated symbols will be named as follows:
69  *
70  * - x86:
71  *      dispatch_me(const int*, int*) // baseline
72  *      dispatch_me_AVX(const int*, int*)
73  *      dispatch_me_AVX512_SKX(const int*, int*)
74  *
75  * - ppc64:
76  *      dispatch_me(const int*, int*)
77  *      dispatch_me_VSX3(const int*, int*)
78  *
79  * - ARM:
80  *      dispatch_me(const int*, int*)
81  *      dispatch_me_ASIMHP(const int*, int*)
82  *
83  * - unsupported arch or when optimization is disabled:
84  *      dispatch_me(const int*, int*)
85  *
86  * For forward declarations, see 'NPY_CPU_DISPATCH_DECLARE'.
87  */
88 #ifdef NPY__CPU_TARGET_CURRENT
89     // 'NPY__CPU_TARGET_CURRENT': only defined by the dispatch-able sources
90     #define NPY_CPU_DISPATCH_CURFX(NAME) NPY_CAT(NPY_CAT(NAME, _), NPY__CPU_TARGET_CURRENT)
91 #else
92     #define NPY_CPU_DISPATCH_CURFX(NAME) NPY_EXPAND(NAME)
93 #endif
94 /**
95  * Defining the default behavior for the configurable macros of dispatch-able sources,
96  * 'NPY__CPU_DISPATCH_CALL(...)' and 'NPY__CPU_DISPATCH_BASELINE_CALL(...)'
97  *
98  * These macros are defined inside the generated config files that been derived from
99  * the configuration statements of the dispatch-able sources.
100  *
101  * The generated config file takes the same name of the dispatch-able source with replacing
102  * the extension to '.h' instead of '.c', and it should be treated as a header template.
103  *
104  * For more clarification, please have a look at doc/reference/simd-optimizations.html.
105  */
106 #ifndef NPY_DISABLE_OPTIMIZATION
107     #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \
108         &&"Expected config header of the dispatch-able source";
109     #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \
110         &&"Expected config header of the dispatch-able source";
111 #else
112     /**
113      * We assume by default that all configuration statements contains 'baseline' option however,
114      * if the dispatch-able source doesn't require it, then the dispatch-able source and following macros
115      * need to be guard it with '#ifndef NPY_DISABLE_OPTIMIZATION'
116      */
117     #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \
118         NPY_EXPAND(CB(__VA_ARGS__))
119     #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...)
120 #endif // !NPY_DISABLE_OPTIMIZATION
121 /**
122  * Macro NPY_CPU_DISPATCH_DECLARE(LEFT, ...) is used to provide forward
123  * declarations for the exported variables and functions that defined inside
124  * the dispatch-able sources.
125  *
126  * The first argument should ends with the exported function or variable name,
127  * while the Macro pasting the extra arguments.
128  *
129  * Examples:
130  *    #ifndef NPY_DISABLE_OPTIMIZATION
131  *       #include "dispatchable_source_name.dispatch.h"
132  *    #endif
133  *
134  *    NPY_CPU_DISPATCH_DECLARE(void dispatch_me, (const int*, int*))
135  *    NPY_CPU_DISPATCH_DECLARE(extern cb_type callback_tab, [TAB_SIZE])
136  *
137  * By assuming the provided config header derived from a dispatch-able source,
138  * that configured with "@targets baseline sse41 vsx3 asimdhp",
139  * they supported by the compiler and enabled via '--cpu-dspatch',
140  * then the prototype declrations at the above example will equivalent to the follows:
141  *
142  * - x86:
143  *      void dispatch_me(const int*, int*); // baseline
144  *      void dispatch_me_SSE41(const int*, int*);
145  *
146  *      extern cb_type callback_tab[TAB_SIZE];
147  *      extern cb_type callback_tab_SSE41[TAB_SIZE];
148  *
149  * - ppc64:
150  *      void dispatch_me(const int*, int*);
151  *      void dispatch_me_VSX3(const int*, int*);
152  *
153  *      extern cb_type callback_tab[TAB_SIZE];
154  *      extern cb_type callback_tab_VSX3[TAB_SIZE];
155  *
156  * - ARM:
157  *     void dispatch_me(const int*, int*);
158  *     void dispatch_me_ASIMDHP(const int*, int*);
159  *
160  *     extern cb_type callback_tab[TAB_SIZE];
161  *     extern cb_type callback_tab_ASIMDHP[TAB_SIZE];
162  *
163  * - unsupported arch or when optimization is disabled:
164  *     void dispatch_me(const int*, int*);
165  *     extern cb_type callback_tab[TAB_SIZE];
166  *
167  * For runtime dispatching, see 'NPY_CPU_DISPATCH_CALL'
168  */
169 #define NPY_CPU_DISPATCH_DECLARE(...) \
170     NPY__CPU_DISPATCH_CALL(NPY_CPU_DISPATCH_DECLARE_CHK_, NPY_CPU_DISPATCH_DECLARE_CB_, __VA_ARGS__) \
171     NPY__CPU_DISPATCH_BASELINE_CALL(NPY_CPU_DISPATCH_DECLARE_BASE_CB_, __VA_ARGS__)
172 // Preprocessor callbacks
173 #define NPY_CPU_DISPATCH_DECLARE_CB_(DUMMY, TARGET_NAME, LEFT, ...) \
174     NPY_CAT(NPY_CAT(LEFT, _), TARGET_NAME) __VA_ARGS__;
175 #define NPY_CPU_DISPATCH_DECLARE_BASE_CB_(LEFT, ...) \
176     LEFT __VA_ARGS__;
177 // Dummy CPU runtime checking
178 #define NPY_CPU_DISPATCH_DECLARE_CHK_(FEATURE)
179 /**
180  * Macro NPY_CPU_DISPATCH_DECLARE_XB(LEFT, ...)
181  *
182  * Same as `NPY_CPU_DISPATCH_DECLARE` but exclude the baseline declaration even
183  * if it was provided within the configration statments.
184  */
185 #define NPY_CPU_DISPATCH_DECLARE_XB(...) \
186     NPY__CPU_DISPATCH_CALL(NPY_CPU_DISPATCH_DECLARE_CHK_, NPY_CPU_DISPATCH_DECLARE_CB_, __VA_ARGS__)
187 /**
188  * Macro NPY_CPU_DISPATCH_CALL(LEFT, ...) is used for runtime dispatching
189  * of the exported functions and variables within the dispatch-able sources
190  * according to the highested interesed CPU features that supported by the
191  * running machine depending on the required optimizations.
192  *
193  * The first argument should ends with the exported function or variable name,
194  * while the Macro pasting the extra arguments.
195  *
196  * Example:
197  *  Assume we have a dispatch-able source exporting the following function:
198  *
199  *    @targets baseline avx2 avx512_skx // configration statments
200  *
201  *    void NPY_CPU_DISPATCH_CURFX(dispatch_me)(const int *src, int *dst)
202  *    {
203  *       // the kernel
204  *    }
205  *
206  *  In order to call or to assign the pointer of it from outside the dispatch-able source,
207  *  you have to use this Macro as follows:
208  *
209  *    // bring the generated config header of the dispatch-able source
210  *    #ifndef NPY_DISABLE_OPTIMIZATION
211  *        #include "dispatchable_source_name.dispatch.h"
212  *    #endif
213  *    // forward declaration
214  *    NPY_CPU_DISPATCH_DECLARE(dispatch_me, (const int *src, int *dst))
215  *
216  *    typedef void(*func_type)(const int*, int*);
217  *    func_type the_callee(const int *src, int *dst, func_type *cb)
218  *    {
219  *        // direct call
220  *        NPY_CPU_DISPATCH_CALL(dispatch_me, (src, dst));
221  *        // assign the pointer
222  *        *cb = NPY_CPU_DISPATCH_CALL(dispatch_me);
223  *        // or
224  *        NPY_CPU_DISPATCH_CALL(*cb = dispatch_me);
225  *        // return the pointer
226  *        return NPY_CPU_DISPATCH_CALL(dispatch_me);
227  *    }
228  */
229 #define NPY_CPU_DISPATCH_CALL(...) \
230     NPY__CPU_DISPATCH_CALL(NPY_CPU_HAVE, NPY_CPU_DISPATCH_CALL_CB_, __VA_ARGS__) \
231     NPY__CPU_DISPATCH_BASELINE_CALL(NPY_CPU_DISPATCH_CALL_BASE_CB_, __VA_ARGS__)
232 // Preprocessor callbacks
233 #define NPY_CPU_DISPATCH_CALL_CB_(TESTED_FEATURES, TARGET_NAME, LEFT, ...) \
234     (TESTED_FEATURES) ? (NPY_CAT(NPY_CAT(LEFT, _), TARGET_NAME) __VA_ARGS__) :
235 #define NPY_CPU_DISPATCH_CALL_BASE_CB_(LEFT, ...) \
236     (LEFT __VA_ARGS__)
237 /**
238  * Macro NPY_CPU_DISPATCH_CALL_XB(LEFT, ...)
239  *
240  * Same as `NPY_CPU_DISPATCH_DECLARE` but exclude the baseline declaration even
241  * if it was provided within the configration statements.
242  * Returns void.
243  */
244 #define NPY_CPU_DISPATCH_CALL_XB_CB_(TESTED_FEATURES, TARGET_NAME, LEFT, ...) \
245     (TESTED_FEATURES) ? (void) (NPY_CAT(NPY_CAT(LEFT, _), TARGET_NAME) __VA_ARGS__) :
246 #define NPY_CPU_DISPATCH_CALL_XB(...) \
247     NPY__CPU_DISPATCH_CALL(NPY_CPU_HAVE, NPY_CPU_DISPATCH_CALL_XB_CB_, __VA_ARGS__) \
248     ((void) 0 /* discarded expression value */)
249 /**
250  * Macro NPY_CPU_DISPATCH_CALL_ALL(LEFT, ...)
251  *
252  * Same as `NPY_CPU_DISPATCH_CALL` but dispatching all the required optimizations for
253  * the exported functions and variables instead of highest interested one.
254  * Returns void.
255  */
256 #define NPY_CPU_DISPATCH_CALL_ALL(...) \
257     (NPY__CPU_DISPATCH_CALL(NPY_CPU_HAVE, NPY_CPU_DISPATCH_CALL_ALL_CB_, __VA_ARGS__) \
258     NPY__CPU_DISPATCH_BASELINE_CALL(NPY_CPU_DISPATCH_CALL_ALL_BASE_CB_, __VA_ARGS__))
259 // Preprocessor callbacks
260 #define NPY_CPU_DISPATCH_CALL_ALL_CB_(TESTED_FEATURES, TARGET_NAME, LEFT, ...) \
261     ((TESTED_FEATURES) ? (NPY_CAT(NPY_CAT(LEFT, _), TARGET_NAME) __VA_ARGS__) : (void) 0),
262 #define NPY_CPU_DISPATCH_CALL_ALL_BASE_CB_(LEFT, ...) \
263     ( LEFT __VA_ARGS__ )
264 
265 #endif // NPY_CPU_DISPATCH_H_
266