1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20 /* \file mkldnn_requantize-inl.h
21 * \brief
22 * \author Jin Huang, Xinyu Chen
23 */
24
25 #ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
26 #define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
27 #if MXNET_USE_MKLDNN == 1
28 #include <string>
29 #include <algorithm>
30 #include <vector>
31 #include "../requantize-inl.h"
32 #include "../../nn/mkldnn/mkldnn_base-inl.h"
33
34 namespace mxnet {
35 namespace op {
36
37 template <typename DstType>
MKLDNNRequantizeForwardKer(const nnvm::NodeAttrs & attrs,const OpContext & ctx,const std::vector<NDArray> & inputs,const std::vector<OpReqType> & req,const std::vector<NDArray> & outputs,const float real_range)38 static void MKLDNNRequantizeForwardKer(const nnvm::NodeAttrs& attrs,
39 const OpContext& ctx,
40 const std::vector<NDArray>& inputs,
41 const std::vector<OpReqType>& req,
42 const std::vector<NDArray>& outputs,
43 const float real_range) {
44 using namespace mshadow;
45 using namespace mxnet_op;
46 using red::limits::MaxValue;
47 using red::limits::MinValue;
48 typedef int32_t SrcDType;
49 // check shapes
50 size_t i_dim = inputs[0].shape().ndim();
51 size_t o_dim = outputs[0].shape().ndim();
52 CHECK_EQ(i_dim, o_dim);
53 float first_quantized_range = MinAbs(MinValue<SrcDType>(),
54 MaxValue<SrcDType>());
55 float first_real_range = MaxAbs(*inputs[1].data().dptr<float>(),
56 *inputs[2].data().dptr<float>());
57 float first_scale = first_real_range / first_quantized_range;
58 float second_real_range = real_range;
59 float second_quantized_range = 0.f;
60 if (std::is_same<DstType, int8_t>::value) {
61 second_quantized_range = MinAbs(MaxValue<DstType>(), MinValue<DstType>());
62 *outputs[1].data().dptr<float>() = -second_real_range;
63 *outputs[2].data().dptr<float>() = second_real_range;
64 } else if (std::is_same<DstType, uint8_t>::value) {
65 second_quantized_range = MaxValue<DstType>();
66 *outputs[1].data().dptr<float>() = 0.f;
67 *outputs[2].data().dptr<float>() = second_real_range;
68 } else {
69 LOG(FATAL) << "Unsupported requantize output type";
70 }
71 float second_scale = second_quantized_range / second_real_range;
72 float scale = first_scale * second_scale;
73
74 mkldnn::primitive_attr attr;
75 const int mask = 0;
76 std::vector<float> scales = {scale};
77 attr.set_output_scales(mask, scales);
78 mkldnn::engine cpu_engine = mxnet::CpuEngine::Get()->get_engine();
79
80 NDArray in_buffer = inputs[0];
81 if (inputs[0].IsView() && inputs[0].IsMKLDNNData())
82 in_buffer = inputs[0].Reorder2Default();
83
84 auto i_mem = in_buffer.GetMKLDNNData();
85 auto i_desc = i_mem->get_desc();
86 auto o_desc = i_desc;
87 o_desc.data.data_type = get_mkldnn_type_t<DstType>();
88 auto reorder_pd = mkldnn::reorder::primitive_desc(cpu_engine, i_desc, cpu_engine, o_desc, attr);
89 auto o_mem = CreateMKLDNNMem(outputs[0], o_desc, req[0]);
90 MKLDNNStream::Get()->RegisterPrimArgs(
91 mkldnn::reorder(reorder_pd), {{MKLDNN_ARG_FROM, *i_mem}, {MKLDNN_ARG_TO, *o_mem.second}});
92 CommitOutput(outputs[0], o_mem);
93 MKLDNNStream::Get()->Submit();
94 }
95
MKLDNNRequantizeForward(const nnvm::NodeAttrs & attrs,const OpContext & ctx,const std::vector<NDArray> & inputs,const std::vector<OpReqType> & req,const std::vector<NDArray> & outputs)96 static void MKLDNNRequantizeForward(const nnvm::NodeAttrs& attrs,
97 const OpContext& ctx,
98 const std::vector<NDArray>& inputs,
99 const std::vector<OpReqType>& req,
100 const std::vector<NDArray>& outputs) {
101 using namespace mshadow;
102 using namespace mxnet_op;
103 using red::limits::MaxValue;
104 using red::limits::MinValue;
105 typedef int32_t SrcDType;
106 typedef int8_t DstDType;
107 const RequantizeParam& param = nnvm::get<RequantizeParam>(attrs.parsed);
108 float real_range;
109 // Model is calibrated
110 if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) {
111 real_range =
112 MaxAbs(param.min_calib_range.value(), param.max_calib_range.value());
113 // Model is not calibrated
114 } else {
115 NDArray in_buffer = inputs[0].Reorder2Default();
116 auto in_ptr = in_buffer.data().dptr<SrcDType>();
117 auto nthreads = engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
118 SrcDType data_min = MaxValue<SrcDType>();
119 SrcDType data_max = MinValue<SrcDType>();
120 std::vector<SrcDType> data_maxs(nthreads, data_max);
121 std::vector<SrcDType> data_mins(nthreads, data_min);
122 #pragma omp parallel for num_threads(nthreads)
123 for (index_t i = 0; i < static_cast<index_t>(in_buffer.shape().Size()); i++) {
124 int tid = omp_get_thread_num();
125 if (in_ptr[i] > data_maxs[tid]) data_maxs[tid] = in_ptr[i];
126 if (in_ptr[i] < data_mins[tid]) data_mins[tid] = in_ptr[i];
127 }
128 for (index_t i = 0; i < nthreads; i++) {
129 if (data_maxs[i] > data_max) data_max = data_maxs[i];
130 if (data_mins[i] < data_min) data_min = data_mins[i];
131 }
132 float src_range = MinAbs(MinValue<SrcDType>(), MaxValue<SrcDType>());
133 SrcDType data_range = MaxAbs(data_min, data_max);
134 float data_scale = MaxAbs(*inputs[1].data().dptr<float>(), *inputs[2].data().dptr<float>());
135 real_range = data_range * data_scale / src_range;
136 }
137 auto out_type = GetQuantizeOutputType(param);
138 if (out_type == mshadow::kUint8) {
139 MKLDNNRequantizeForwardKer<uint8_t>(attrs, ctx, inputs, req, outputs, real_range);
140 } else if (out_type == mshadow::kInt8) {
141 MKLDNNRequantizeForwardKer<int8_t>(attrs, ctx, inputs, req, outputs, real_range);
142 } else {
143 LOG(FATAL) << "mkldnn requantize op only supports int8 and uint8 as output type";
144 }
145 }
146
147 } // namespace op
148 } // namespace mxnet
149
150 #endif // MXNET_USE_MKLDNN == 1
151 #endif // MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
152