1 /*******************************************************************************
2 * Copyright 2017-2021 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16
17 #ifndef DNNL_COMMON_HPP
18 #define DNNL_COMMON_HPP
19
20 #include <functional>
21 #include <stddef.h>
22 #include <stdint.h>
23
24 #include <vector>
25
26 #include "oneapi/dnnl/dnnl.h"
27 #include "src/common/bfloat16.hpp"
28 #include "src/common/float16.hpp"
29 #include "src/common/nstl.hpp"
30
31 int check_pd_cache(dnnl_primitive_desc_t pd);
32 int check_primitive_cache(dnnl_primitive_t p);
33
34 #include "common.hpp"
35 #include "dnn_types.hpp"
36 #include "dnnl_debug.hpp"
37 #include "utils/dims.hpp"
38
39 #define for_ for
40
41 #define DNN_SAFE(f, s) \
42 do { \
43 dnnl_status_t status__ = f; \
44 if (status__ != dnnl_success) { \
45 if (s == CRIT || s == WARN) { \
46 BENCHDNN_PRINT(0, "error [%s:%d]: '%s' -> %s(%d)\n", \
47 __PRETTY_FUNCTION__, __LINE__, #f, \
48 status2str(status__), (int)status__); \
49 fflush(0); \
50 if (s == CRIT) exit(2); \
51 } \
52 return FAIL; \
53 } \
54 } while (0)
55
56 #define DNN_SAFE_V(f) \
57 do { \
58 dnnl_status_t status__ = f; \
59 if (status__ != dnnl_success) { \
60 BENCHDNN_PRINT(0, "error [%s:%d]: '%s' -> %s(%d)\n", \
61 __PRETTY_FUNCTION__, __LINE__, STRINGIFY(f), \
62 status2str(status__), (int)status__); \
63 fflush(0); \
64 exit(2); \
65 } \
66 } while (0)
67
68 /* aux */
69 using bfloat16_t = dnnl::impl::bfloat16_t;
70 using float16_t = dnnl::impl::float16_t;
71 template <dnnl_data_type_t>
72 struct prec_traits;
73 template <>
74 struct prec_traits<dnnl_bf16> {
75 typedef bfloat16_t type;
76 };
77 template <>
78 struct prec_traits<dnnl_f16> {
79 typedef float16_t type;
80 };
81 template <>
82 struct prec_traits<dnnl_f32> {
83 typedef float type;
84 };
85 template <>
86 struct prec_traits<dnnl_s32> {
87 typedef int32_t type;
88 };
89 template <>
90 struct prec_traits<dnnl_s8> {
91 typedef int8_t type;
92 };
93 template <>
94 struct prec_traits<dnnl_u8> {
95 typedef uint8_t type;
96 };
97
98 #define CASE_ALL(dt) \
99 switch (dt) { \
100 CASE(dnnl_bf16); \
101 CASE(dnnl_f16); \
102 CASE(dnnl_f32); \
103 CASE(dnnl_s32); \
104 CASE(dnnl_s8); \
105 CASE(dnnl_u8); \
106 default: assert(!"bad data_type"); \
107 }
108
sizeof_dt(dnnl_data_type_t dt)109 inline size_t sizeof_dt(dnnl_data_type_t dt) {
110 #define CASE(dt) \
111 case dt: return sizeof(typename prec_traits<dt>::type);
112
113 CASE_ALL(dt);
114
115 #undef CASE
116 return 0;
117 }
118
119 /* std::numeric_limits::digits functionality */
digits_dt(dnnl_data_type_t dt)120 inline int digits_dt(dnnl_data_type_t dt) {
121 #define CASE(dt) \
122 case dt: \
123 return dnnl::impl::nstl::numeric_limits< \
124 typename prec_traits<dt>::type>::digits;
125
126 CASE_ALL(dt);
127
128 #undef CASE
129 return 0;
130 }
131
epsilon_dt(dnnl_data_type_t dt)132 inline float epsilon_dt(dnnl_data_type_t dt) {
133 #define CASE(dt) \
134 case dt: \
135 return (float)dnnl::impl::nstl::numeric_limits< \
136 typename prec_traits<dt>::type>::epsilon();
137
138 CASE_ALL(dt);
139
140 #undef CASE
141
142 return 0;
143 }
144
lowest_dt(dnnl_data_type_t dt)145 inline float lowest_dt(dnnl_data_type_t dt) {
146 #define CASE(dt) \
147 case dt: \
148 return (float)dnnl::impl::nstl::numeric_limits< \
149 typename prec_traits<dt>::type>::lowest();
150
151 CASE_ALL(dt);
152
153 #undef CASE
154
155 return 0;
156 }
157
max_dt(dnnl_data_type_t dt)158 inline float max_dt(dnnl_data_type_t dt) {
159 #define CASE(dt) \
160 case dt: \
161 return (float)dnnl::impl::nstl::numeric_limits< \
162 typename prec_traits<dt>::type>::max();
163
164 CASE_ALL(dt);
165
166 #undef CASE
167
168 return 0;
169 }
170
171 #undef CASE_ALL
172
173 #define BENCHDNN_S32_TO_F32_SAT_CONST 2147483520.f
174
175 template <dnnl_data_type_t dt>
saturate_and_round(float val)176 inline float saturate_and_round(float val) {
177 const float dt_max = max_dt(dt);
178 const float dt_min = (float)dnnl::impl::nstl::numeric_limits<
179 typename prec_traits<dt>::type>::lowest();
180 if (dt == dnnl_s32 && val >= max_dt(dnnl_s32)) return max_dt(dnnl_s32);
181 if (val > dt_max) val = dt_max;
182 if (val < dt_min) val = dt_min;
183 return mxcsr_cvt(val);
184 }
185
is_integral_dt(dnnl_data_type_t dt)186 inline bool is_integral_dt(dnnl_data_type_t dt) {
187 return dt == dnnl_s32 || dt == dnnl_s8 || dt == dnnl_u8;
188 }
189
maybe_saturate(dnnl_data_type_t dt,float value)190 inline float maybe_saturate(dnnl_data_type_t dt, float value) {
191 if (!is_integral_dt(dt)) return value;
192
193 switch (dt) {
194 #define CASE(dt) \
195 case dt: return saturate_and_round<dt>(value);
196 CASE(dnnl_s32);
197 CASE(dnnl_s8);
198 CASE(dnnl_u8);
199 #undef CASE
200 default: assert(!"bad data_type");
201 }
202 return 0;
203 }
204
205 float round_to_nearest_representable(dnnl_data_type_t dt, float value);
206
207 extern dnnl_engine_kind_t engine_tgt_kind;
208 extern size_t engine_index;
209 extern isa_hints_t hints;
210
211 // Extended version of dnnl_sycl_interop_memory_kind_t enumeration.
212 enum class memory_kind_ext_t {
213 usm, // Same as dnnl_sycl_interop_usm
214 buffer, // Same as dnnl_sycl_interop_buffer
215 usm_device, // USM allocated via malloc_device()
216 usm_shared, // USM allocated via malloc_shared()
217 };
218
219 const memory_kind_ext_t default_memory_kind = memory_kind_ext_t::usm;
220
221 extern memory_kind_ext_t memory_kind;
222
223 void init_isa_settings();
224
query_impl_info(const_dnnl_primitive_desc_t pd)225 inline const char *query_impl_info(const_dnnl_primitive_desc_t pd) {
226 const char *str;
227 dnnl_primitive_desc_query(pd, dnnl_query_impl_info_str, 0, &str);
228 return str;
229 }
230
231 struct dnn_mem_t;
232
233 struct args_t {
234 args_t &set(int arg, const dnn_mem_t &mem);
235 args_t &set(
236 const std::vector<int> &args, const std::vector<dnn_mem_t> &mems);
clearargs_t237 void clear() { args_.clear(); }
238
sizeargs_t239 int size() const { return (int)args_.size(); }
240
241 const dnn_mem_t &find(int arg) const;
242
argargs_t243 int arg(int index) const { return args_[index].first; }
dnn_memargs_t244 const dnn_mem_t &dnn_mem(int index) const { return *args_[index].second; }
245
246 private:
247 std::vector<std::pair<int, const dnn_mem_t *>> args_;
248 };
249
250 template <typename T>
251 struct dnnl_api_traits;
252 //{
253 // static void destroy(T t) {}
254 //};
255
256 template <>
257 struct dnnl_api_traits<dnnl_primitive_t> {
destroydnnl_api_traits258 static void destroy(dnnl_primitive_t t) {
259 DNN_SAFE_V(dnnl_primitive_destroy(t));
260 }
261 };
262
263 template <>
264 struct dnnl_api_traits<dnnl_primitive_desc_t> {
destroydnnl_api_traits265 static void destroy(dnnl_primitive_desc_t t) {
266 DNN_SAFE_V(dnnl_primitive_desc_destroy(t));
267 }
268 };
269
270 template <>
271 struct dnnl_api_traits<dnnl_primitive_attr_t> {
destroydnnl_api_traits272 static void destroy(dnnl_primitive_attr_t t) {
273 DNN_SAFE_V(dnnl_primitive_attr_destroy(t));
274 }
275 };
276
277 // Generic class providing RAII support for DNNL objects in benchdnn
278 template <typename T>
279 struct benchdnn_dnnl_wrapper_t {
benchdnn_dnnl_wrapper_tbenchdnn_dnnl_wrapper_t280 benchdnn_dnnl_wrapper_t(T t = nullptr) : t_(t) {
281 static_assert(std::is_pointer<T>::value, "T is not a pointer type.");
282 }
283
benchdnn_dnnl_wrapper_tbenchdnn_dnnl_wrapper_t284 benchdnn_dnnl_wrapper_t(benchdnn_dnnl_wrapper_t &&rhs) {
285 T t = rhs.release();
286 t_ = t;
287 }
288
~benchdnn_dnnl_wrapper_tbenchdnn_dnnl_wrapper_t289 ~benchdnn_dnnl_wrapper_t() { do_destroy(); }
290
releasebenchdnn_dnnl_wrapper_t291 T release() {
292 T tmp = t_;
293 t_ = nullptr;
294 return tmp;
295 }
296
resetbenchdnn_dnnl_wrapper_t297 void reset(T t) {
298 do_destroy();
299 t_ = t;
300 }
301
operator Tbenchdnn_dnnl_wrapper_t302 operator T() const { return t_; }
303
304 BENCHDNN_DISALLOW_COPY_AND_ASSIGN(benchdnn_dnnl_wrapper_t);
305
306 private:
307 T t_;
308
do_destroybenchdnn_dnnl_wrapper_t309 void do_destroy() {
310 if (t_) { dnnl_api_traits<T>::destroy(t_); }
311 }
312 };
313
314 // Constructs a wrapper object (providing RAII support)
315 template <typename T>
make_benchdnn_dnnl_wrapper(T t)316 benchdnn_dnnl_wrapper_t<T> make_benchdnn_dnnl_wrapper(T t) {
317 return benchdnn_dnnl_wrapper_t<T>(t);
318 }
319
320 struct engine_t {
321 engine_t(dnnl_engine_kind_t engine_kind);
322 engine_t(dnnl_engine_t engine);
323 engine_t(const engine_t &other);
324 ~engine_t();
operator dnnl_engine_tengine_t325 operator dnnl_engine_t() const { return engine_; }
326
327 private:
328 engine_t &operator=(engine_t &other) = delete;
329 dnnl_engine_t engine_;
330 bool is_owner_;
331 };
332
333 struct stream_t {
334 stream_t(dnnl_engine_t engine);
335 ~stream_t();
operator dnnl_stream_tstream_t336 operator dnnl_stream_t() const { return stream_; }
337
338 private:
339 BENCHDNN_DISALLOW_COPY_AND_ASSIGN(stream_t);
340 dnnl_stream_t stream_;
341 };
342
343 // Engine used to run oneDNN primitives for testing.
get_test_engine()344 inline const engine_t &get_test_engine() {
345 static const engine_t instance(engine_tgt_kind);
346 return instance;
347 }
348
349 // Engine used to run reference implementations (fast-ref-gpu option).
get_cpu_engine()350 inline const engine_t &get_cpu_engine() {
351 #if DNNL_CPU_RUNTIME == DNNL_RUNTIME_NONE
352 fprintf(stderr,
353 "CPU engine is not available for GPU only configurations\n");
354 SAFE_V(FAIL);
355 assert(!"unexpected");
356 #endif
357 static const engine_t instance(dnnl_cpu);
358 return instance;
359 }
360
361 int get_memory_footprint(const_dnnl_primitive_desc_t pd, res_t *res);
362 int check_same_pd(res_t *res, const dnnl_primitive_desc_t &pd_no_attr);
363
364 template <typename op_desc_t>
check_pd_w_and_wo_attr(res_t * res,const attr_t & attr,const op_desc_t & op_desc)365 int check_pd_w_and_wo_attr(
366 res_t *res, const attr_t &attr, const op_desc_t &op_desc) {
367 if (attr_same_pd_check && !attr.is_def()) {
368 dnnl_primitive_desc_t pd_no_attr {};
369 dnnl_primitive_attr_t dnnl_empty_attrs {};
370 DNN_SAFE(dnnl_primitive_desc_create(&pd_no_attr, &op_desc,
371 dnnl_empty_attrs, get_test_engine(), nullptr),
372 WARN);
373 auto pd_no_attr_wrapper = make_benchdnn_dnnl_wrapper(pd_no_attr);
374 SAFE(check_same_pd(res, pd_no_attr_wrapper), WARN);
375 }
376 return OK;
377 }
378
379 template <typename func_t, typename prb_t>
init_prim(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> & user_prim,const func_t & init_pd_func,prb_t * prb,res_t * res,dir_t dir=FLAG_FWD,const_dnnl_primitive_desc_t hint=nullptr)380 int init_prim(benchdnn_dnnl_wrapper_t<dnnl_primitive_t> &user_prim,
381 const func_t &init_pd_func, prb_t *prb, res_t *res,
382 dir_t dir = FLAG_FWD, const_dnnl_primitive_desc_t hint = nullptr) {
383 dnnl_primitive_desc_t pd_ {};
384 dnnl_primitive_t prim_ {};
385 benchdnn_dnnl_wrapper_t<dnnl_primitive_desc_t> pd;
386 benchdnn_dnnl_wrapper_t<dnnl_primitive_t> prim;
387
388 #ifndef DNNL_DISABLE_PRIMITIVE_CACHE
389
390 // The first primitive creation using a temporary engine.
391 #ifdef DNNL_USE_RT_OBJECTS_IN_PRIMITIVE_CACHE
392 // The idea is to create the requested primitive twice using different
393 // engines but the same device and context in the case of OpenCL and DPCPP.
394 // Rationale: make sure that the primitive cache is robust in the case
395 // where CPU and GPU engines are re-created because this is a commonly
396 // used scenario in the frameworks.
397 engine_t engine(get_test_engine());
398 #else
399 // The idea is to create the requested primitive twice using
400 // different engines.
401 // Rationale:
402 // 1. Make sure that the primitive cache is robust for the cases when:
403 // - CPU engine is re-created
404 // - GPU engine is re-created for the same device but different context
405 // These 2 cases are commonly used or expected to be used in the frameworks.
406 // 2. (for GPU only) Identify context dependent parts in primitive
407 // implementations, e.g. if a primitive implementation contains
408 // a memory_storage_t (for scales, zero points or buffers), which depends
409 // on a particular engine then it should fail at execution time.
410 engine_t engine(engine_tgt_kind);
411 #endif
412
413 SAFE(init_pd_func(engine, prb, pd_, res, dir, hint), WARN);
414 if (res->state == SKIPPED || res->state == UNIMPLEMENTED) return OK;
415 DNN_SAFE(dnnl_primitive_create(&prim_, pd_), WARN);
416
417 pd.reset(pd_);
418 prim.reset(prim_);
419 #endif
420 // The second (if the cache is enabled) primitive creation using
421 // the global test engine.
422 SAFE(init_pd_func(get_test_engine(), prb, pd_, res, dir, hint), WARN);
423 if (res->state == SKIPPED || res->state == UNIMPLEMENTED) return OK;
424 // This primitive is expected to come from the cache.
425 DNN_SAFE(dnnl_primitive_create(&prim_, pd_), WARN);
426
427 pd.reset(pd_);
428 prim.reset(prim_);
429
430 SAFE(check_pd_cache(pd), WARN);
431 SAFE(check_primitive_cache(prim), WARN);
432 // Collect memory footprint for a given primitive descriptor.
433 SAFE(get_memory_footprint(pd, res), WARN);
434
435 user_prim.reset(prim.release());
436
437 return OK;
438 }
439
440 typedef std::function<dnnl_status_t(
441 const dnnl_stream_t &, const std::vector<dnnl_exec_arg_t> &)>
442 perf_function_t;
443
444 int execute_and_wait(perf_function_t &exec_func, const dnnl_engine_t &engine,
445 const args_t &args);
446 int execute_and_wait(dnnl_primitive_t prim, const args_t &args);
447
448 int measure_perf(res_t *res, perf_function_t &perf_func, args_t &args);
449 int measure_perf(res_t *res, dnnl_primitive_t prim, args_t &args);
450
451 void maybe_prepare_runtime_scales(dnn_mem_t &scales_m,
452 const attr_t::scale_t &scale, int64_t scale_cnt, const float *scales);
453
454 void maybe_prepare_runtime_zero_points(dnn_mem_t &zero_points_m,
455 const attr_t &attr, int arg, int64_t count, const int32_t *zero_points);
456
457 std::vector<float> prepare_po_vals(const dnn_mem_t &dst_m, const args_t &args,
458 const std::vector<std::pair<int, int>> &v_po_masks,
459 const size_t dst_off);
460
461 bool check_md_consistency_with_tag(
462 const dnnl_memory_desc_t &md, const std::string &tag);
463
464 void check_known_skipped_case_common(
465 const std::vector<dnnl_data_type_t> &v_dt, dir_t dir, res_t *res);
466 void check_binary_post_ops(const attr_t &attr, res_t *res);
467 void check_sum_post_ops(const attr_t &attr, res_t *res,
468 dnnl_data_type_t dst_dt = dnnl_data_type_undef);
469
470 bool is_cpu(const dnnl_engine_t &engine = get_test_engine());
471 bool is_gpu(const dnnl_engine_t &engine = get_test_engine());
472 bool is_sycl_engine(const dnnl_engine_t &engine = get_test_engine());
473 bool is_opencl_engine(const dnnl_engine_t &engine = get_test_engine());
474 bool is_nvidia_gpu(const dnnl_engine_t &engine = get_test_engine());
475 bool is_nvidia_eltwise_ok(
476 dir_t dir, attr_t::post_ops_t::kind_t alg, float alpha);
is_nvidia_eltwise_ok(dir_t dir,const attr_t::post_ops_t::entry_t & e)477 inline bool is_nvidia_eltwise_ok(
478 dir_t dir, const attr_t::post_ops_t::entry_t &e) {
479 return is_nvidia_eltwise_ok(dir, e.kind, e.eltwise.alpha);
480 }
481
482 int init_md(dnnl_memory_desc_t *md, int ndims, const dnnl_dims_t dims,
483 dnnl_data_type_t data_type, const std::string &tag,
484 const dims_t &strides_ = {});
485 int check_mem_size(const dnnl_memory_desc_t &md);
486 int check_mem_size(const_dnnl_primitive_desc_t const_pd);
487
488 memory_kind_ext_t str2memory_kind(const char *str);
489
490 float reorder_rescale_factor();
491 dims_t md2dims(const dnnl_memory_desc_t &md);
492
493 #endif
494