1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements.  See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership.  The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License.  You may obtain a copy of the License at
9  *
10  *   http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied.  See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 /* \file mkldnn_requantize-inl.h
21  * \brief
22  * \author Jin Huang, Xinyu Chen
23  */
24 
25 #ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
26 #define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
27 #if MXNET_USE_MKLDNN == 1
28 #include <string>
29 #include <algorithm>
30 #include <vector>
31 #include "../requantize-inl.h"
32 #include "../../nn/mkldnn/mkldnn_base-inl.h"
33 
34 namespace mxnet {
35 namespace op {
36 
37 template <typename DstType>
MKLDNNRequantizeForwardKer(const nnvm::NodeAttrs & attrs,const OpContext & ctx,const std::vector<NDArray> & inputs,const std::vector<OpReqType> & req,const std::vector<NDArray> & outputs,const float real_range)38 static void MKLDNNRequantizeForwardKer(const nnvm::NodeAttrs& attrs,
39                                        const OpContext& ctx,
40                                        const std::vector<NDArray>& inputs,
41                                        const std::vector<OpReqType>& req,
42                                        const std::vector<NDArray>& outputs,
43                                        const float real_range) {
44   using namespace mshadow;
45   using namespace mxnet_op;
46   using red::limits::MaxValue;
47   using red::limits::MinValue;
48   typedef int32_t SrcDType;
49   // check shapes
50   size_t i_dim = inputs[0].shape().ndim();
51   size_t o_dim = outputs[0].shape().ndim();
52   CHECK_EQ(i_dim, o_dim);
53   float first_quantized_range = MinAbs(MinValue<SrcDType>(),
54                                        MaxValue<SrcDType>());
55   float first_real_range = MaxAbs(*inputs[1].data().dptr<float>(),
56                                   *inputs[2].data().dptr<float>());
57   float first_scale = first_real_range / first_quantized_range;
58   float second_real_range = real_range;
59   float second_quantized_range = 0.f;
60   if (std::is_same<DstType, int8_t>::value) {
61     second_quantized_range = MinAbs(MaxValue<DstType>(), MinValue<DstType>());
62     *outputs[1].data().dptr<float>() = -second_real_range;
63     *outputs[2].data().dptr<float>() = second_real_range;
64   } else if (std::is_same<DstType, uint8_t>::value) {
65     second_quantized_range = MaxValue<DstType>();
66     *outputs[1].data().dptr<float>() = 0.f;
67     *outputs[2].data().dptr<float>() = second_real_range;
68   } else {
69     LOG(FATAL) << "Unsupported requantize output type";
70   }
71   float second_scale = second_quantized_range / second_real_range;
72   float scale = first_scale * second_scale;
73 
74   mkldnn::primitive_attr attr;
75   const int mask = 0;
76   std::vector<float> scales = {scale};
77   attr.set_output_scales(mask, scales);
78   mkldnn::engine cpu_engine = mxnet::CpuEngine::Get()->get_engine();
79 
80   NDArray in_buffer = inputs[0];
81   if (inputs[0].IsView() && inputs[0].IsMKLDNNData())
82     in_buffer = inputs[0].Reorder2Default();
83 
84   auto i_mem = in_buffer.GetMKLDNNData();
85   auto i_desc = i_mem->get_desc();
86   auto o_desc = i_desc;
87   o_desc.data.data_type = get_mkldnn_type_t<DstType>();
88   auto reorder_pd  = mkldnn::reorder::primitive_desc(cpu_engine, i_desc, cpu_engine, o_desc, attr);
89   auto o_mem = CreateMKLDNNMem(outputs[0], o_desc, req[0]);
90   MKLDNNStream::Get()->RegisterPrimArgs(
91       mkldnn::reorder(reorder_pd), {{MKLDNN_ARG_FROM, *i_mem}, {MKLDNN_ARG_TO, *o_mem.second}});
92   CommitOutput(outputs[0], o_mem);
93   MKLDNNStream::Get()->Submit();
94 }
95 
MKLDNNRequantizeForward(const nnvm::NodeAttrs & attrs,const OpContext & ctx,const std::vector<NDArray> & inputs,const std::vector<OpReqType> & req,const std::vector<NDArray> & outputs)96 static void MKLDNNRequantizeForward(const nnvm::NodeAttrs& attrs,
97                                     const OpContext& ctx,
98                                     const std::vector<NDArray>& inputs,
99                                     const std::vector<OpReqType>& req,
100                                     const std::vector<NDArray>& outputs) {
101   using namespace mshadow;
102   using namespace mxnet_op;
103   using red::limits::MaxValue;
104   using red::limits::MinValue;
105   typedef int32_t SrcDType;
106   typedef int8_t  DstDType;
107   const RequantizeParam& param = nnvm::get<RequantizeParam>(attrs.parsed);
108   float real_range;
109   // Model is calibrated
110   if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) {
111     real_range =
112           MaxAbs(param.min_calib_range.value(), param.max_calib_range.value());
113   // Model is not calibrated
114   } else {
115     NDArray in_buffer = inputs[0].Reorder2Default();
116     auto in_ptr = in_buffer.data().dptr<SrcDType>();
117     auto nthreads = engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
118     SrcDType data_min = MaxValue<SrcDType>();
119     SrcDType data_max = MinValue<SrcDType>();
120     std::vector<SrcDType> data_maxs(nthreads, data_max);
121     std::vector<SrcDType> data_mins(nthreads, data_min);
122 #pragma omp parallel for num_threads(nthreads)
123     for (index_t i = 0; i < static_cast<index_t>(in_buffer.shape().Size()); i++) {
124       int tid = omp_get_thread_num();
125       if (in_ptr[i] > data_maxs[tid]) data_maxs[tid] = in_ptr[i];
126       if (in_ptr[i] < data_mins[tid]) data_mins[tid] = in_ptr[i];
127     }
128     for (index_t i = 0; i < nthreads; i++) {
129       if (data_maxs[i] > data_max) data_max = data_maxs[i];
130       if (data_mins[i] < data_min) data_min = data_mins[i];
131     }
132     float src_range = MinAbs(MinValue<SrcDType>(), MaxValue<SrcDType>());
133     SrcDType data_range = MaxAbs(data_min, data_max);
134     float data_scale = MaxAbs(*inputs[1].data().dptr<float>(), *inputs[2].data().dptr<float>());
135     real_range = data_range * data_scale / src_range;
136   }
137   auto out_type = GetQuantizeOutputType(param);
138   if (out_type == mshadow::kUint8) {
139     MKLDNNRequantizeForwardKer<uint8_t>(attrs, ctx, inputs, req, outputs, real_range);
140   } else if (out_type == mshadow::kInt8) {
141     MKLDNNRequantizeForwardKer<int8_t>(attrs, ctx, inputs, req, outputs, real_range);
142   } else {
143     LOG(FATAL) << "mkldnn requantize op only supports int8 and uint8 as output type";
144   }
145 }
146 
147 }  // namespace op
148 }  // namespace mxnet
149 
150 #endif  // MXNET_USE_MKLDNN == 1
151 #endif  // MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
152