1 /*******************************************************************************
2 * Copyright 2020-2021 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16
17 #include "dnnl.h"
18
19 #include "dnnl_common.hpp"
20 #include "dnnl_memory.hpp"
21
22 #include "tests/test_thread.hpp"
23
24 #include "zeropad/zeropad.hpp"
25
26 extern "C" {
27 dnnl_status_t dnnl_impl_zero_pad(
28 const dnnl::impl::memory_t *memory, dnnl::impl::stream_t *stream);
29 }
30
31 namespace zeropad {
32
compare(const dnn_mem_t & test_mem,res_t * res)33 static int compare(const dnn_mem_t &test_mem, res_t *res) {
34 const int ndims = test_mem.md_.ndims;
35 const auto *dims = test_mem.md_.dims;
36
37 if (ndims == 0) return OK;
38 if (test_mem.md_.format_kind != dnnl_blocked) return OK;
39
40 std::atomic<int> ok(true);
41
42 const uint8_t *mem = (const uint8_t *)test_mem;
43 size_t type_size = test_mem.sizeof_dt();
44
45 const auto increment
46 = [&](dnnl_dims_t &pos, dnnl_dim_t &idx, bool &done, int stop_dim) {
47 for (int i = ndims - 1; i >= stop_dim; i--) {
48 pos[i]++;
49 if (pos[i] < dims[i]) {
50 break;
51 } else {
52 pos[i] = 0;
53 if (i == stop_dim) done = true;
54 }
55 }
56 idx = md_off_v(test_mem.md_, pos);
57 };
58
59 dnnl::impl::parallel_nd(dims[0], [&](dnnl_dim_t dim0) {
60 dnnl_dims_t pos = {0};
61 pos[0] = dim0;
62 dnnl_dim_t idx = md_off_v(test_mem.md_, pos);
63 bool done = false;
64
65 while (!done && ok) {
66 for (size_t i = 0; i < type_size; i++) {
67 uint8_t mem_value = mem[type_size * idx + i];
68 if (mem_value != dnnl_mem_default_value) ok = false;
69 }
70 increment(pos, idx, done, 1);
71 }
72 });
73
74 // Serially check for errors for data dumping purposes
75 if (!ok) {
76 int errors = 0;
77 dnnl_dims_t pos = {0};
78 dnnl_dim_t idx = md_off_v(test_mem.md_, pos);
79 bool done = false;
80 while (!done) {
81 for (size_t i = 0; i < type_size; i++) {
82 uint8_t mem_value = mem[type_size * idx + i];
83 bool idx_ok = (mem_value == dnnl_mem_default_value);
84 if (!idx_ok) errors++;
85 const bool dump = (!idx_ok && (errors < 10 || verbose >= 10))
86 || (verbose >= 99);
87 if (dump) {
88 BENCHDNN_PRINT(0,
89 "[%4ld][arg:%d]"
90 "[" IFMT "," IFMT "," IFMT "," IFMT "," IFMT
91 "," IFMT "] dt:% 9.6g \n",
92 (long)idx, test_mem.dt(), pos[0], pos[1], pos[2],
93 pos[3], pos[4], pos[5], test_mem.get_elem(idx));
94 break;
95 }
96 }
97 increment(pos, idx, done, 0);
98 }
99
100 BENCHDNN_PRINT(0, "@@@ [arg:%d] check_non_zeroed_elements failed\n",
101 test_mem.dt());
102 res->errors += errors;
103 }
104
105 int errors = 0;
106 auto status = check_zero_padding(test_mem, test_mem.dt(), &errors);
107 res->errors += errors;
108
109 bool passed = ok && (status == OK);
110 res->state = passed ? PASSED : FAILED;
111 return passed ? OK : FAIL;
112 }
113
perf_func(const dnnl_stream_t & stream,const std::vector<dnnl_exec_arg_t> & args)114 static dnnl_status_t perf_func(
115 const dnnl_stream_t &stream, const std::vector<dnnl_exec_arg_t> &args) {
116 return dnnl_impl_zero_pad(args[0].memory, stream);
117 }
118
check_known_skipped_case(const prb_t * prb,res_t * res)119 void check_known_skipped_case(const prb_t *prb, res_t *res) {
120 check_known_skipped_case_common({prb->dt}, FWD_D, res);
121 if (res->state == SKIPPED) return;
122
123 if (is_nvidia_gpu()) {
124 res->state = SKIPPED, res->reason = CASE_NOT_SUPPORTED;
125 return;
126 }
127 }
128
doit(const prb_t * prb,res_t * res)129 int doit(const prb_t *prb, res_t *res) {
130 if (bench_mode == LIST) return res->state = LISTED, OK;
131
132 check_known_skipped_case(prb, res);
133 if (res->state == SKIPPED) return OK;
134
135 dnnl_memory_desc_t data_md {};
136 SAFE(init_md(&data_md, prb->ndims, prb->dims.data(), prb->dt, prb->tag),
137 WARN);
138 if (res->state == SKIPPED || res->state == UNIMPLEMENTED) return OK;
139
140 if (check_mem_size(data_md) != OK) {
141 return res->state = SKIPPED, res->reason = NOT_ENOUGH_RAM, OK;
142 }
143
144 const auto &test_engine = get_test_engine();
145
146 dnn_mem_t test_mem(data_md, test_engine);
147
148 args_t args;
149 args.set(0, test_mem);
150 perf_function_t perf_func_ = &perf_func;
151
152 if (is_bench_mode(CORR)) {
153 execute_and_wait(perf_func_, test_engine, args);
154 SAFE(compare(test_mem, res), WARN);
155 }
156 if (is_bench_mode(PERF)) {
157 // Get plain memory desc size to have a proper padded area size.
158 dnnl_memory_desc_t plain_data_md {};
159 SAFE(init_md(&plain_data_md, prb->ndims, prb->dims.data(), prb->dt,
160 tag::abx),
161 WARN);
162 // Fill output bytes for perf_report.
163 res->ibytes = 0; // Since we don't read any data from padding.
164 res->obytes = dnnl_memory_desc_get_size(&data_md)
165 - dnnl_memory_desc_get_size(&plain_data_md);
166 }
167
168 measure_perf(res, perf_func_, args);
169
170 return OK;
171 }
172
173 } // namespace zeropad
174