1 /*******************************************************************************
2 * Copyright 2020-2021 Intel Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
16 
17 #include "dnnl.h"
18 
19 #include "dnnl_common.hpp"
20 #include "dnnl_memory.hpp"
21 
22 #include "tests/test_thread.hpp"
23 
24 #include "zeropad/zeropad.hpp"
25 
26 extern "C" {
27 dnnl_status_t dnnl_impl_zero_pad(
28         const dnnl::impl::memory_t *memory, dnnl::impl::stream_t *stream);
29 }
30 
31 namespace zeropad {
32 
compare(const dnn_mem_t & test_mem,res_t * res)33 static int compare(const dnn_mem_t &test_mem, res_t *res) {
34     const int ndims = test_mem.md_.ndims;
35     const auto *dims = test_mem.md_.dims;
36 
37     if (ndims == 0) return OK;
38     if (test_mem.md_.format_kind != dnnl_blocked) return OK;
39 
40     std::atomic<int> ok(true);
41 
42     const uint8_t *mem = (const uint8_t *)test_mem;
43     size_t type_size = test_mem.sizeof_dt();
44 
45     const auto increment
46             = [&](dnnl_dims_t &pos, dnnl_dim_t &idx, bool &done, int stop_dim) {
47                   for (int i = ndims - 1; i >= stop_dim; i--) {
48                       pos[i]++;
49                       if (pos[i] < dims[i]) {
50                           break;
51                       } else {
52                           pos[i] = 0;
53                           if (i == stop_dim) done = true;
54                       }
55                   }
56                   idx = md_off_v(test_mem.md_, pos);
57               };
58 
59     dnnl::impl::parallel_nd(dims[0], [&](dnnl_dim_t dim0) {
60         dnnl_dims_t pos = {0};
61         pos[0] = dim0;
62         dnnl_dim_t idx = md_off_v(test_mem.md_, pos);
63         bool done = false;
64 
65         while (!done && ok) {
66             for (size_t i = 0; i < type_size; i++) {
67                 uint8_t mem_value = mem[type_size * idx + i];
68                 if (mem_value != dnnl_mem_default_value) ok = false;
69             }
70             increment(pos, idx, done, 1);
71         }
72     });
73 
74     // Serially check for errors for data dumping purposes
75     if (!ok) {
76         int errors = 0;
77         dnnl_dims_t pos = {0};
78         dnnl_dim_t idx = md_off_v(test_mem.md_, pos);
79         bool done = false;
80         while (!done) {
81             for (size_t i = 0; i < type_size; i++) {
82                 uint8_t mem_value = mem[type_size * idx + i];
83                 bool idx_ok = (mem_value == dnnl_mem_default_value);
84                 if (!idx_ok) errors++;
85                 const bool dump = (!idx_ok && (errors < 10 || verbose >= 10))
86                         || (verbose >= 99);
87                 if (dump) {
88                     BENCHDNN_PRINT(0,
89                             "[%4ld][arg:%d]"
90                             "[" IFMT "," IFMT "," IFMT "," IFMT "," IFMT
91                             "," IFMT "] dt:% 9.6g \n",
92                             (long)idx, test_mem.dt(), pos[0], pos[1], pos[2],
93                             pos[3], pos[4], pos[5], test_mem.get_elem(idx));
94                     break;
95                 }
96             }
97             increment(pos, idx, done, 0);
98         }
99 
100         BENCHDNN_PRINT(0, "@@@ [arg:%d] check_non_zeroed_elements failed\n",
101                 test_mem.dt());
102         res->errors += errors;
103     }
104 
105     int errors = 0;
106     auto status = check_zero_padding(test_mem, test_mem.dt(), &errors);
107     res->errors += errors;
108 
109     bool passed = ok && (status == OK);
110     res->state = passed ? PASSED : FAILED;
111     return passed ? OK : FAIL;
112 }
113 
perf_func(const dnnl_stream_t & stream,const std::vector<dnnl_exec_arg_t> & args)114 static dnnl_status_t perf_func(
115         const dnnl_stream_t &stream, const std::vector<dnnl_exec_arg_t> &args) {
116     return dnnl_impl_zero_pad(args[0].memory, stream);
117 }
118 
check_known_skipped_case(const prb_t * prb,res_t * res)119 void check_known_skipped_case(const prb_t *prb, res_t *res) {
120     check_known_skipped_case_common({prb->dt}, FWD_D, res);
121     if (res->state == SKIPPED) return;
122 
123     if (is_nvidia_gpu()) {
124         res->state = SKIPPED, res->reason = CASE_NOT_SUPPORTED;
125         return;
126     }
127 }
128 
doit(const prb_t * prb,res_t * res)129 int doit(const prb_t *prb, res_t *res) {
130     if (bench_mode == LIST) return res->state = LISTED, OK;
131 
132     check_known_skipped_case(prb, res);
133     if (res->state == SKIPPED) return OK;
134 
135     dnnl_memory_desc_t data_md {};
136     SAFE(init_md(&data_md, prb->ndims, prb->dims.data(), prb->dt, prb->tag),
137             WARN);
138     if (res->state == SKIPPED || res->state == UNIMPLEMENTED) return OK;
139 
140     if (check_mem_size(data_md) != OK) {
141         return res->state = SKIPPED, res->reason = NOT_ENOUGH_RAM, OK;
142     }
143 
144     const auto &test_engine = get_test_engine();
145 
146     dnn_mem_t test_mem(data_md, test_engine);
147 
148     args_t args;
149     args.set(0, test_mem);
150     perf_function_t perf_func_ = &perf_func;
151 
152     if (is_bench_mode(CORR)) {
153         execute_and_wait(perf_func_, test_engine, args);
154         SAFE(compare(test_mem, res), WARN);
155     }
156     if (is_bench_mode(PERF)) {
157         // Get plain memory desc size to have a proper padded area size.
158         dnnl_memory_desc_t plain_data_md {};
159         SAFE(init_md(&plain_data_md, prb->ndims, prb->dims.data(), prb->dt,
160                      tag::abx),
161                 WARN);
162         // Fill output bytes for perf_report.
163         res->ibytes = 0; // Since we don't read any data from padding.
164         res->obytes = dnnl_memory_desc_get_size(&data_md)
165                 - dnnl_memory_desc_get_size(&plain_data_md);
166     }
167 
168     measure_perf(res->timer, perf_func_, args);
169 
170     return OK;
171 }
172 
173 } // namespace zeropad
174