1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19 /*!
20 * \file bilinear_resize.cc
21 * \brief bilinear resize operator
22 * \author Hang Zhang
23 */
24 #include "bilinear_resize-inl.h"
25 #include "../elemwise_op_common.h"
26
27 namespace mxnet {
28 namespace op {
29
30 using namespace mshadow;
31
32 template<typename xpu, typename DType, typename AccReal>
SpatialUpSamplingBilinearUpdateOutput(mshadow::Stream<cpu> * s,const std::vector<TBlob> & input,const std::vector<TBlob> & output,bool align_corners)33 void SpatialUpSamplingBilinearUpdateOutput(mshadow::Stream<cpu> *s,
34 const std::vector<TBlob> &input,
35 const std::vector<TBlob> &output,
36 bool align_corners) {
37 Tensor<xpu, 4, DType> itensor = input[0].get<xpu, 4, DType>(s);
38 Tensor<xpu, 4, DType> otensor = output[0].get<xpu, 4, DType>(s);
39 int nbatch = otensor.size(0);
40 int channels = otensor.size(1);
41 int outputHeight = otensor.size(2);
42 int outputWidth = otensor.size(3);
43 int inputHeight = itensor.size(2);
44 int inputWidth = itensor.size(3);
45
46 const auto nthreads = engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
47
48 DType *idata = itensor.dptr_;
49 DType *odata = otensor.dptr_;
50 channels = nbatch * channels;
51 const int input_elems_per_channel = inputWidth * inputHeight;
52 const int output_elems_per_channel = outputWidth * outputHeight;
53
54 // special case: just copy
55 if (inputHeight == outputHeight && inputWidth == outputWidth) {
56 #pragma omp parallel for num_threads(nthreads)
57 for (int index = 0; index < output_elems_per_channel; index++) {
58 const int h2 = index / outputWidth;
59 const int h1 = h2;
60 const int w2 = index % outputWidth;
61 const int w1 = w2;
62 const DType* pos1 = &idata[h1 * inputWidth + w1];
63 DType* pos2 = &odata[index];
64 for (int c = 0; c < channels; ++c) {
65 *pos2 = *pos1;
66 pos1 += input_elems_per_channel;
67 pos2 += output_elems_per_channel;
68 }
69 }
70 return;
71 }
72 const float rheight = area_pixel_compute_scale<float>(
73 inputHeight, outputHeight, align_corners);
74 const float rwidth = area_pixel_compute_scale<float>(
75 inputWidth, outputWidth, align_corners);
76
77 #pragma omp parallel for num_threads(nthreads)
78 for (int index = 0; index < output_elems_per_channel; index++) {
79 const int h2 = index / outputWidth;
80 const int w2 = index % outputWidth;
81
82 const float h1r = area_pixel_compute_source_index<float>(
83 rheight, h2, align_corners, false);
84 const int h1 = h1r;
85 const int h1p = (h1 < inputHeight - 1) ? 1 : 0;
86 const DType h1lambda = h1r - h1;
87 const DType h0lambda = (DType)1. - h1lambda;
88
89 const float w1r = area_pixel_compute_source_index<float>(
90 rwidth, w2, align_corners, false);
91 const int w1 = w1r;
92 const int w1p = (w1 < inputWidth - 1) ? 1 : 0;
93 const DType w1lambda = w1r - w1;
94 const DType w0lambda = (DType)1. - w1lambda;
95 const DType* pos1 = &idata[h1 * inputWidth + w1];
96 DType* pos2 = &odata[index];
97
98 for (int c = 0; c < channels; ++c) {
99 *pos2 = h0lambda * (w0lambda * (*pos1) + w1lambda * *(pos1 + w1p))
100 + h1lambda * (w0lambda * *(pos1 + h1p * inputWidth)
101 + w1lambda * *(pos1 + h1p * inputWidth + w1p));
102 pos1 += input_elems_per_channel;
103 pos2 += output_elems_per_channel;
104 }
105 }
106 }
107
108 template<typename xpu, typename DType, typename AccReal>
SpatialUpSamplingBilinearUpdateGradInput(mshadow::Stream<cpu> * s,const std::vector<TBlob> & input,const std::vector<TBlob> & output,bool modeLike,bool align_corners)109 void SpatialUpSamplingBilinearUpdateGradInput(mshadow::Stream<cpu> *s,
110 const std::vector<TBlob> &input,
111 const std::vector<TBlob> &output,
112 bool modeLike,
113 bool align_corners) {
114 Tensor<xpu, 4, DType> gradOutput = input[0].get<xpu, 4, DType>(s);
115 Tensor<xpu, 4, DType> gradInput = output[0].get<xpu, 4, DType>(s);
116
117 int nbatch = gradInput.size(0);
118 int channels = gradInput.size(1);
119 int outputHeight = gradOutput.size(2);
120 int outputWidth = gradOutput.size(3);
121 int inputHeight = gradInput.size(2);
122 int inputWidth = gradInput.size(3);
123
124 const auto nthreads = engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
125
126 DType *dataInput = gradInput.dptr_;
127 DType *dataOutput = gradOutput.dptr_;
128 channels = nbatch * channels;
129 const int input_elems_per_channel = inputWidth * inputHeight;
130 const int output_elems_per_channel = outputWidth * outputHeight;
131
132 // special case: same-size matching grids
133 if (inputHeight == outputHeight && inputWidth == outputWidth) {
134 #pragma omp parallel for num_threads(nthreads)
135 for (int index = 0; index < output_elems_per_channel; index++) {
136 const int h2 = index / outputWidth;
137 const int h1 = h2;
138 const int w2 = index % outputWidth;
139 const int w1 = w2;
140 DType* pos1 = &dataInput[h1 * inputWidth + w1];
141 const DType* pos2 = &dataOutput[index];
142 for (int c = 0; c < channels; ++c) {
143 *pos1 += *pos2;
144 pos1 += input_elems_per_channel;
145 pos2 += output_elems_per_channel;
146 }
147 }
148 return;
149 }
150 const float rheight = area_pixel_compute_scale<float>(
151 inputHeight, outputHeight, align_corners);
152 const float rwidth = area_pixel_compute_scale<float>(
153 inputWidth, outputWidth, align_corners);
154 #pragma omp parallel for num_threads(nthreads)
155 for (int index = 0; index < output_elems_per_channel; index++) {
156 const int h2 = index / outputWidth;
157 const int w2 = index % outputWidth;
158
159 const float h1r = area_pixel_compute_source_index<float>(
160 rheight, h2, align_corners, false);
161 const int h1 = h1r;
162 const int h1p = (h1 < inputHeight - 1) ? 1 : 0;
163 const DType h1lambda = h1r - h1;
164 const DType h0lambda = (DType)1. - h1lambda;
165
166 const float w1r = area_pixel_compute_source_index<float>(
167 rwidth, w2, align_corners, false);
168 const int w1 = w1r;
169 const int w1p = (w1 < inputWidth - 1) ? 1 : 0;
170 const DType w1lambda = w1r - w1;
171 const DType w0lambda = (DType)1. - w1lambda;
172
173 DType* posInput = &dataInput[h1 * inputWidth + w1];
174 const DType* posOutput = &dataOutput[index];
175 for (int c = 0; c < channels; ++c) {
176 #pragma omp critical
177 {
178 *posInput += h0lambda * w0lambda * (*posOutput);
179 *(posInput + w1p) += h0lambda * w1lambda * (*posOutput);
180 *(posInput + h1p * inputWidth) += h1lambda * w0lambda * (*posOutput);
181 *(posInput + h1p * inputWidth + w1p) += h1lambda * w1lambda * (*posOutput);
182 }
183 posInput += input_elems_per_channel;
184 posOutput += output_elems_per_channel;
185 }
186 }
187
188 if (modeLike) {
189 Tensor<xpu, 4, DType> gradInputLike = output[1].get<xpu, 4, DType>(s);
190 int inputHeightLike = gradInputLike.size(2);
191 int inputWidthLike = gradInputLike.size(3);
192 DType *dataInputLike = gradInputLike.dptr_;
193 int channelsLike = nbatch * gradInputLike.size(1);
194
195 const int inputLike_elems_per_channel = inputHeightLike * inputWidthLike;
196 #pragma omp parallel for num_threads(nthreads)
197 for (int index = 0; index < inputLike_elems_per_channel; index++) {
198 DType *posInput = &dataInputLike[index];
199 for (int c = 0; c < channelsLike; ++c) {
200 *posInput = 0;
201 posInput += inputLike_elems_per_channel;
202 }
203 }
204 }
205 }
206
207 DMLC_REGISTER_PARAMETER(BilinearSampleParam);
208
209 NNVM_REGISTER_OP(_contrib_BilinearResize2D)
210 .describe(R"code(
211 Perform 2D resizing (upsampling or downsampling) for 4D input using bilinear interpolation.
212
213 Expected input is a 4 dimensional NDArray (NCHW) and the output
214 with the shape of (N x C x height x width).
215 The key idea of bilinear interpolation is to perform linear interpolation
216 first in one direction, and then again in the other direction. See the wikipedia of
217 `Bilinear interpolation <https://en.wikipedia.org/wiki/Bilinear_interpolation>`_
218 for more details.
219 )code" ADD_FILELINE)
220 .set_attr_parser(ParamParser<BilinearSampleParam>)
221 .set_num_inputs(BilinearSampleOpNumInputs)
222 .set_num_outputs(1)
223 .set_attr<nnvm::FListInputNames>("FListInputNames", BilinearSampleOpInputNames)
224 .set_attr<mxnet::FInferShape>("FInferShape", BilinearSampleOpInferShape)
225 .set_attr<FCompute>("FCompute<cpu>", BilinearSampleOpForward<cpu>)
226 .set_attr<nnvm::FGradient>("FGradient",
227 ElemwiseGradUseNone{"_backward_contrib_BilinearResize2D"})
228 .add_argument("data", "NDArray-or-Symbol", "Input data")
229 .add_argument("like", "NDArray-or-Symbol", "Resize data to it's shape")
230 .add_arguments(BilinearSampleParam::__FIELDS__());
231
232 NNVM_REGISTER_OP(_backward_contrib_BilinearResize2D)
233 .set_attr_parser(ParamParser<BilinearSampleParam>)
234 .set_num_inputs(1)
235 .set_num_outputs(BilinearSampleOpNumBackwardOutputs)
236 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
237 .set_attr<FCompute>("FCompute<cpu>", BilinearSampleOpBackward<cpu>);
238
239
240 } // namespace op
241 } // namespace mxnet
242