1 /*******************************************************************************
2 * Copyright 2017-2021 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16 
17 #ifndef DNNL_COMMON_HPP
18 #define DNNL_COMMON_HPP
19 
20 #include <functional>
21 #include <stddef.h>
22 #include <stdint.h>
23 
24 #include <vector>
25 
26 #include "oneapi/dnnl/dnnl.h"
27 #include "src/common/bfloat16.hpp"
28 #include "src/common/float16.hpp"
29 #include "src/common/nstl.hpp"
30 
31 int check_pd_cache(dnnl_primitive_desc_t pd);
32 int check_primitive_cache(dnnl_primitive_t p);
33 
34 #include "common.hpp"
35 #include "dnn_types.hpp"
36 #include "dnnl_debug.hpp"
37 #include "utils/dims.hpp"
38 
39 #define for_ for
40 
41 #define DNN_SAFE(f, s) \
42     do { \
43         dnnl_status_t status__ = f; \
44         if (status__ != dnnl_success) { \
45             if (s == CRIT || s == WARN) { \
46                 BENCHDNN_PRINT(0, "error [%s:%d]: '%s' -> %s(%d)\n", \
47                         __PRETTY_FUNCTION__, __LINE__, #f, \
48                         status2str(status__), (int)status__); \
49                 fflush(0); \
50                 if (s == CRIT) exit(2); \
51             } \
52             return FAIL; \
53         } \
54     } while (0)
55 
56 #define DNN_SAFE_V(f) \
57     do { \
58         dnnl_status_t status__ = f; \
59         if (status__ != dnnl_success) { \
60             BENCHDNN_PRINT(0, "error [%s:%d]: '%s' -> %s(%d)\n", \
61                     __PRETTY_FUNCTION__, __LINE__, STRINGIFY(f), \
62                     status2str(status__), (int)status__); \
63             fflush(0); \
64             exit(2); \
65         } \
66     } while (0)
67 
68 /* aux */
69 using bfloat16_t = dnnl::impl::bfloat16_t;
70 using float16_t = dnnl::impl::float16_t;
71 template <dnnl_data_type_t>
72 struct prec_traits;
73 template <>
74 struct prec_traits<dnnl_bf16> {
75     typedef bfloat16_t type;
76 };
77 template <>
78 struct prec_traits<dnnl_f16> {
79     typedef float16_t type;
80 };
81 template <>
82 struct prec_traits<dnnl_f32> {
83     typedef float type;
84 };
85 template <>
86 struct prec_traits<dnnl_s32> {
87     typedef int32_t type;
88 };
89 template <>
90 struct prec_traits<dnnl_s8> {
91     typedef int8_t type;
92 };
93 template <>
94 struct prec_traits<dnnl_u8> {
95     typedef uint8_t type;
96 };
97 
98 #define CASE_ALL(dt) \
99     switch (dt) { \
100         CASE(dnnl_bf16); \
101         CASE(dnnl_f16); \
102         CASE(dnnl_f32); \
103         CASE(dnnl_s32); \
104         CASE(dnnl_s8); \
105         CASE(dnnl_u8); \
106         default: assert(!"bad data_type"); \
107     }
108 
sizeof_dt(dnnl_data_type_t dt)109 inline size_t sizeof_dt(dnnl_data_type_t dt) {
110 #define CASE(dt) \
111     case dt: return sizeof(typename prec_traits<dt>::type);
112 
113     CASE_ALL(dt);
114 
115 #undef CASE
116     return 0;
117 }
118 
119 /* std::numeric_limits::digits functionality */
digits_dt(dnnl_data_type_t dt)120 inline int digits_dt(dnnl_data_type_t dt) {
121 #define CASE(dt) \
122     case dt: \
123         return dnnl::impl::nstl::numeric_limits< \
124                 typename prec_traits<dt>::type>::digits;
125 
126     CASE_ALL(dt);
127 
128 #undef CASE
129     return 0;
130 }
131 
epsilon_dt(dnnl_data_type_t dt)132 inline float epsilon_dt(dnnl_data_type_t dt) {
133 #define CASE(dt) \
134     case dt: \
135         return (float)dnnl::impl::nstl::numeric_limits< \
136                 typename prec_traits<dt>::type>::epsilon();
137 
138     CASE_ALL(dt);
139 
140 #undef CASE
141 
142     return 0;
143 }
144 
lowest_dt(dnnl_data_type_t dt)145 inline float lowest_dt(dnnl_data_type_t dt) {
146 #define CASE(dt) \
147     case dt: \
148         return (float)dnnl::impl::nstl::numeric_limits< \
149                 typename prec_traits<dt>::type>::lowest();
150 
151     CASE_ALL(dt);
152 
153 #undef CASE
154 
155     return 0;
156 }
157 
max_dt(dnnl_data_type_t dt)158 inline float max_dt(dnnl_data_type_t dt) {
159 #define CASE(dt) \
160     case dt: \
161         return (float)dnnl::impl::nstl::numeric_limits< \
162                 typename prec_traits<dt>::type>::max();
163 
164     CASE_ALL(dt);
165 
166 #undef CASE
167 
168     return 0;
169 }
170 
171 #undef CASE_ALL
172 
173 #define BENCHDNN_S32_TO_F32_SAT_CONST 2147483520.f
174 
175 template <dnnl_data_type_t dt>
saturate_and_round(float val)176 inline float saturate_and_round(float val) {
177     const float dt_max = max_dt(dt);
178     const float dt_min = (float)dnnl::impl::nstl::numeric_limits<
179             typename prec_traits<dt>::type>::lowest();
180     if (dt == dnnl_s32 && val >= max_dt(dnnl_s32)) return max_dt(dnnl_s32);
181     if (val > dt_max) val = dt_max;
182     if (val < dt_min) val = dt_min;
183     return mxcsr_cvt(val);
184 }
185 
is_integral_dt(dnnl_data_type_t dt)186 inline bool is_integral_dt(dnnl_data_type_t dt) {
187     return dt == dnnl_s32 || dt == dnnl_s8 || dt == dnnl_u8;
188 }
189 
maybe_saturate(dnnl_data_type_t dt,float value)190 inline float maybe_saturate(dnnl_data_type_t dt, float value) {
191     if (!is_integral_dt(dt)) return value;
192 
193     switch (dt) {
194 #define CASE(dt) \
195     case dt: return saturate_and_round<dt>(value);
196         CASE(dnnl_s32);
197         CASE(dnnl_s8);
198         CASE(dnnl_u8);
199 #undef CASE
200         default: assert(!"bad data_type");
201     }
202     return 0;
203 }
204 
205 float round_to_nearest_representable(dnnl_data_type_t dt, float value);
206 
207 extern dnnl_engine_kind_t engine_tgt_kind;
208 extern size_t engine_index;
209 extern isa_hints_t hints;
210 
211 // Extended version of dnnl_sycl_interop_memory_kind_t enumeration.
212 enum class memory_kind_ext_t {
213     usm, // Same as dnnl_sycl_interop_usm
214     buffer, // Same as dnnl_sycl_interop_buffer
215     usm_device, // USM allocated via malloc_device()
216     usm_shared, // USM allocated via malloc_shared()
217 };
218 
219 const memory_kind_ext_t default_memory_kind = memory_kind_ext_t::usm;
220 
221 extern memory_kind_ext_t memory_kind;
222 
223 void init_isa_settings();
224 
query_impl_info(const_dnnl_primitive_desc_t pd)225 inline const char *query_impl_info(const_dnnl_primitive_desc_t pd) {
226     const char *str;
227     dnnl_primitive_desc_query(pd, dnnl_query_impl_info_str, 0, &str);
228     return str;
229 }
230 
231 struct dnn_mem_t;
232 
233 struct args_t {
234     args_t &set(int arg, const dnn_mem_t &mem);
235     args_t &set(
236             const std::vector<int> &args, const std::vector<dnn_mem_t> &mems);
clearargs_t237     void clear() { args_.clear(); }
238 
sizeargs_t239     int size() const { return (int)args_.size(); }
240 
241     const dnn_mem_t &find(int arg) const;
242 
argargs_t243     int arg(int index) const { return args_[index].first; }
dnn_memargs_t244     const dnn_mem_t &dnn_mem(int index) const { return *args_[index].second; }
245 
246 private:
247     std::vector<std::pair<int, const dnn_mem_t *>> args_;
248 };
249 
250 template <typename T>
251 struct dnnl_api_traits;
252 //{
253 //    static void destroy(T t) {}
254 //};
255 
256 template <>
257 struct dnnl_api_traits<dnnl_primitive_t> {
destroydnnl_api_traits258     static void destroy(dnnl_primitive_t t) {
259         DNN_SAFE_V(dnnl_primitive_destroy(t));
260     }
261 };
262 
263 template <>
264 struct dnnl_api_traits<dnnl_primitive_desc_t> {
destroydnnl_api_traits265     static void destroy(dnnl_primitive_desc_t t) {
266         DNN_SAFE_V(dnnl_primitive_desc_destroy(t));
267     }
268 };
269 
270 template <>
271 struct dnnl_api_traits<dnnl_primitive_attr_t> {
destroydnnl_api_traits272     static void destroy(dnnl_primitive_attr_t t) {
273         DNN_SAFE_V(dnnl_primitive_attr_destroy(t));
274     }
275 };
276 
277 // Generic class providing RAII support for DNNL objects in benchdnn
278 template <typename T>
279 struct benchdnn_dnnl_wrapper_t {
benchdnn_dnnl_wrapper_tbenchdnn_dnnl_wrapper_t280     benchdnn_dnnl_wrapper_t(T t = nullptr) : t_(t) {
281         static_assert(std::is_pointer<T>::value, "T is not a pointer type.");
282     }
283 
benchdnn_dnnl_wrapper_tbenchdnn_dnnl_wrapper_t284     benchdnn_dnnl_wrapper_t(benchdnn_dnnl_wrapper_t &&rhs) {
285         T t = rhs.release();
286         t_ = t;
287     }
288 
~benchdnn_dnnl_wrapper_tbenchdnn_dnnl_wrapper_t289     ~benchdnn_dnnl_wrapper_t() { do_destroy(); }
290 
releasebenchdnn_dnnl_wrapper_t291     T release() {
292         T tmp = t_;
293         t_ = nullptr;
294         return tmp;
295     }
296 
resetbenchdnn_dnnl_wrapper_t297     void reset(T t) {
298         do_destroy();
299         t_ = t;
300     }
301 
operator Tbenchdnn_dnnl_wrapper_t302     operator T() const { return t_; }
303 
304     BENCHDNN_DISALLOW_COPY_AND_ASSIGN(benchdnn_dnnl_wrapper_t);
305 
306 private:
307     T t_;
308 
do_destroybenchdnn_dnnl_wrapper_t309     void do_destroy() {
310         if (t_) { dnnl_api_traits<T>::destroy(t_); }
311     }
312 };
313 
314 // Constructs a wrapper object (providing RAII support)
315 template <typename T>
make_benchdnn_dnnl_wrapper(T t)316 benchdnn_dnnl_wrapper_t<T> make_benchdnn_dnnl_wrapper(T t) {
317     return benchdnn_dnnl_wrapper_t<T>(t);
318 }
319 
320 struct engine_t {
321     engine_t(dnnl_engine_kind_t engine_kind);
322     engine_t(dnnl_engine_t engine);
323     engine_t(const engine_t &other);
324     ~engine_t();
operator dnnl_engine_tengine_t325     operator dnnl_engine_t() const { return engine_; }
326 
327 private:
328     engine_t &operator=(engine_t &other) = delete;
329     dnnl_engine_t engine_;
330     bool is_owner_;
331 };
332 
333 struct stream_t {
334     stream_t(dnnl_engine_t engine);
335     ~stream_t();
operator dnnl_stream_tstream_t336     operator dnnl_stream_t() const { return stream_; }
337 
338 private:
339     BENCHDNN_DISALLOW_COPY_AND_ASSIGN(stream_t);
340     dnnl_stream_t stream_;
341 };
342 
343 // Engine used to run oneDNN primitives for testing.
get_test_engine()344 inline const engine_t &get_test_engine() {
345     static const engine_t instance(engine_tgt_kind);
346     return instance;
347 }
348 
349 // Engine used to run reference implementations (fast-ref-gpu option).
get_cpu_engine()350 inline const engine_t &get_cpu_engine() {
351 #if DNNL_CPU_RUNTIME == DNNL_RUNTIME_NONE
352     fprintf(stderr,
353             "CPU engine is not available for GPU only configurations\n");
354     SAFE_V(FAIL);
355     assert(!"unexpected");
356 #endif
357     static const engine_t instance(dnnl_cpu);
358     return instance;
359 }
360 
361 int get_memory_footprint(const_dnnl_primitive_desc_t pd, res_t *res);
362 int check_same_pd(res_t *res, const dnnl_primitive_desc_t &pd_no_attr);
363 
364 template <typename op_desc_t>
check_pd_w_and_wo_attr(res_t * res,const attr_t & attr,const op_desc_t & op_desc)365 int check_pd_w_and_wo_attr(
366         res_t *res, const attr_t &attr, const op_desc_t &op_desc) {
367     if (attr_same_pd_check && !attr.is_def()) {
368         dnnl_primitive_desc_t pd_no_attr {};
369         dnnl_primitive_attr_t dnnl_empty_attrs {};
370         DNN_SAFE(dnnl_primitive_desc_create(&pd_no_attr, &op_desc,
371                          dnnl_empty_attrs, get_test_engine(), nullptr),
372                 WARN);
373         auto pd_no_attr_wrapper = make_benchdnn_dnnl_wrapper(pd_no_attr);
374         SAFE(check_same_pd(res, pd_no_attr_wrapper), WARN);
375     }
376     return OK;
377 }
378 
379 template <typename func_t, typename prb_t>
init_prim(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> & user_prim,const func_t & init_pd_func,prb_t * prb,res_t * res,dir_t dir=FLAG_FWD,const_dnnl_primitive_desc_t hint=nullptr)380 int init_prim(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &user_prim,
381         const func_t &init_pd_func, prb_t *prb, res_t *res,
382         dir_t dir = FLAG_FWD, const_dnnl_primitive_desc_t hint = nullptr) {
383     dnnl_primitive_desc_t pd_ {};
384     dnnl_primitive_t prim_ {};
385     benchdnn_dnnl_wrapper_t<dnnl_primitive_desc_t> pd;
386     benchdnn_dnnl_wrapper_t<dnnl_primitive_t> prim;
387 
388 #ifndef DNNL_DISABLE_PRIMITIVE_CACHE
389 
390     // The first primitive creation using a temporary engine.
391 #ifdef DNNL_USE_RT_OBJECTS_IN_PRIMITIVE_CACHE
392     // The idea is to create the requested primitive twice using different
393     // engines but the same device and context in the case of OpenCL and DPCPP.
394     // Rationale: make sure that the primitive cache is robust in the case
395     // where CPU and GPU engines are re-created because this is a commonly
396     // used scenario in the frameworks.
397     engine_t engine(get_test_engine());
398 #else
399     // The idea is to create the requested primitive twice using
400     // different engines.
401     // Rationale:
402     // 1. Make sure that the primitive cache is robust for the cases when:
403     //   - CPU engine is re-created
404     //   - GPU engine is re-created for the same device but different context
405     // These 2 cases are commonly used or expected to be used in the frameworks.
406     // 2. (for GPU only) Identify context dependent parts in primitive
407     // implementations, e.g. if a primitive implementation contains
408     // a memory_storage_t (for scales, zero points or buffers), which depends
409     // on a particular engine then it should fail at execution time.
410     engine_t engine(engine_tgt_kind);
411 #endif
412 
413     SAFE(init_pd_func(engine, prb, pd_, res, dir, hint), WARN);
414     if (res->state == SKIPPED || res->state == UNIMPLEMENTED) return OK;
415     DNN_SAFE(dnnl_primitive_create(&prim_, pd_), WARN);
416 
417     pd.reset(pd_);
418     prim.reset(prim_);
419 #endif
420     // The second (if the cache is enabled) primitive creation using
421     // the global test engine.
422     SAFE(init_pd_func(get_test_engine(), prb, pd_, res, dir, hint), WARN);
423     if (res->state == SKIPPED || res->state == UNIMPLEMENTED) return OK;
424     // This primitive is expected to come from the cache.
425     DNN_SAFE(dnnl_primitive_create(&prim_, pd_), WARN);
426 
427     pd.reset(pd_);
428     prim.reset(prim_);
429 
430     SAFE(check_pd_cache(pd), WARN);
431     SAFE(check_primitive_cache(prim), WARN);
432     // Collect memory footprint for a given primitive descriptor.
433     SAFE(get_memory_footprint(pd, res), WARN);
434 
435     user_prim.reset(prim.release());
436 
437     return OK;
438 }
439 
440 typedef std::function<dnnl_status_t(
441         const dnnl_stream_t &, const std::vector<dnnl_exec_arg_t> &)>
442         perf_function_t;
443 
444 int execute_and_wait(perf_function_t &exec_func, const dnnl_engine_t &engine,
445         const args_t &args);
446 int execute_and_wait(dnnl_primitive_t prim, const args_t &args);
447 
448 int measure_perf(res_t *res, perf_function_t &perf_func, args_t &args);
449 int measure_perf(res_t *res, dnnl_primitive_t prim, args_t &args);
450 
451 void maybe_prepare_runtime_scales(dnn_mem_t &scales_m,
452         const attr_t::scale_t &scale, int64_t scale_cnt, const float *scales);
453 
454 void maybe_prepare_runtime_zero_points(dnn_mem_t &zero_points_m,
455         const attr_t &attr, int arg, int64_t count, const int32_t *zero_points);
456 
457 std::vector<float> prepare_po_vals(const dnn_mem_t &dst_m, const args_t &args,
458         const std::vector<std::pair<int, int>> &v_po_masks,
459         const size_t dst_off);
460 
461 bool check_md_consistency_with_tag(
462         const dnnl_memory_desc_t &md, const std::string &tag);
463 
464 void check_known_skipped_case_common(
465         const std::vector<dnnl_data_type_t> &v_dt, dir_t dir, res_t *res);
466 void check_binary_post_ops(const attr_t &attr, res_t *res);
467 void check_sum_post_ops(const attr_t &attr, res_t *res,
468         dnnl_data_type_t dst_dt = dnnl_data_type_undef);
469 
470 bool is_cpu(const dnnl_engine_t &engine = get_test_engine());
471 bool is_gpu(const dnnl_engine_t &engine = get_test_engine());
472 bool is_sycl_engine(const dnnl_engine_t &engine = get_test_engine());
473 bool is_opencl_engine(const dnnl_engine_t &engine = get_test_engine());
474 bool is_nvidia_gpu(const dnnl_engine_t &engine = get_test_engine());
475 bool is_nvidia_eltwise_ok(
476         dir_t dir, attr_t::post_ops_t::kind_t alg, float alpha);
is_nvidia_eltwise_ok(dir_t dir,const attr_t::post_ops_t::entry_t & e)477 inline bool is_nvidia_eltwise_ok(
478         dir_t dir, const attr_t::post_ops_t::entry_t &e) {
479     return is_nvidia_eltwise_ok(dir, e.kind, e.eltwise.alpha);
480 }
481 
482 int init_md(dnnl_memory_desc_t *md, int ndims, const dnnl_dims_t dims,
483         dnnl_data_type_t data_type, const std::string &tag,
484         const dims_t &strides_ = {});
485 int check_mem_size(const dnnl_memory_desc_t &md);
486 int check_mem_size(const_dnnl_primitive_desc_t const_pd);
487 
488 memory_kind_ext_t str2memory_kind(const char *str);
489 
490 float reorder_rescale_factor();
491 dims_t md2dims(const dnnl_memory_desc_t &md);
492 
493 #endif
494