1 /*!
2 ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
3 *
4 * COPYRIGHT
5 *
6 * All contributions by the University of California:
7 * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
8 * All rights reserved.
9 *
10 * All other contributions:
11 * Copyright (c) 2014-2017, the respective contributors
12 * All rights reserved.
13 *
14 * Caffe uses a shared copyright model: each contributor holds copyright over
15 * their contributions to Caffe. The project versioning records all such
16 * contribution and copyright details. If a contributor wants to further mark
17 * their specific copyright on a particular contribution, they should indicate
18 * their copyright solely in the commit message of the change when it is
19 * committed.
20 *
21 * LICENSE
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions are met:
25 *
26 * 1. Redistributions of source code must retain the above copyright notice, this
27 * list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright notice,
29 * this list of conditions and the following disclaimer in the documentation
30 * and/or other materials provided with the distribution.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
33 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
34 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
35 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
36 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
37 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
39 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
41 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 *
43 * CONTRIBUTION AGREEMENT
44 *
45 * By contributing to the BVLC/caffe repository through pull-request, comment,
46 * or otherwise, the contributor releases their content to the
47 * license and copyright terms herein.
48 *
49 ***************** END Caffe Copyright Notice and Disclaimer ********************
50 *
51 * \file pool.h
52 * \brief Function definitions of pooling 1/2/3-D images.
53 * We adopted looping 2-D image pixels from Caffe and extended it to 1-D and 3-D cases.
54 * \ref https://github.com/BVLC/caffe/blob/master/src/caffe/layers/pooling_layer.cpp
55 * \author Jun Wu
56 */
57
58 #ifndef MXNET_OPERATOR_NN_POOL_H_
59 #define MXNET_OPERATOR_NN_POOL_H_
60
61 #include <mxnet/base.h>
62 #include <mxnet/operator.h>
63 #include <vector>
64 #include <algorithm>
65 #include "./pool_utils.h"
66 #include "../mxnet_op.h"
67 #include "../mshadow_op.h"
68
69 namespace mxnet {
70 namespace op {
71
72 namespace pool_enum {
73 enum PoolingOpInputs {kData};
74 enum PoolingOpOutputs {kOut, kMask};
75 enum PoolingOpType {kMaxPooling, kAvgPooling, kSumPooling, kLpPooling};
76 enum PoolingOpPadConventionType {kValid, kFull, kSame};
77 } // namespace pool_enum
78
79 /*!
80 * \brief max pooling cpu function for 1-D images in 'ncw' layout.
81 * Do not call this kernel directly. Use the interface pool().
82 */
83 template<typename DType>
pool_max_1d_ncw_cpu(const DType * in_data,const mxnet::TShape & ishape,const mxnet::TShape & oshape,const mxnet::TShape & kernel,const mxnet::TShape & pad,const mxnet::TShape & stride,DType * out_data)84 inline void pool_max_1d_ncw_cpu(const DType *in_data, const mxnet::TShape &ishape,
85 const mxnet::TShape &oshape, const mxnet::TShape &kernel,
86 const mxnet::TShape &pad, const mxnet::TShape &stride,
87 DType *out_data) {
88 using mshadow::red::limits::MinValue;
89 const int width = ishape[2];
90 const int pooled_width = oshape[2];
91 const int kernel_w = kernel[0];
92 const int pad_w = pad[0];
93 const int stride_w = stride[0];
94 const index_t in_data_offset = ishape[2];
95 const index_t out_data_offset = oshape[2];
96 for (index_t n = 0; n < oshape[0]; ++n) {
97 for (index_t c = 0; c < oshape[1]; ++c) {
98 for (int pw = 0; pw < pooled_width; ++pw) {
99 int wstart = pw * stride_w - pad_w;
100 int wend = std::min(wstart + kernel_w, width);
101 wstart = std::max(wstart, 0);
102 DType max_val = MinValue<DType>();
103 for (int w = wstart; w < wend; ++w) {
104 if (in_data[w] > max_val) {
105 max_val = in_data[w];
106 }
107 }
108 out_data[pw] = max_val;
109 }
110 in_data += in_data_offset;
111 out_data += out_data_offset;
112 }
113 }
114 }
115
116 /*!
117 * \brief max pooling cpu function for 1-D images in 'nwc' layout.
118 * Do not call this kernel directly. Use the interface pool().
119 */
120 template<typename DType>
pool_max_1d_nwc_cpu(const DType * in_data,const mxnet::TShape & ishape,const mxnet::TShape & oshape,const mxnet::TShape & kernel,const mxnet::TShape & pad,const mxnet::TShape & stride,DType * out_data)121 inline void pool_max_1d_nwc_cpu(const DType* in_data, const mxnet::TShape& ishape,
122 const mxnet::TShape& oshape, const mxnet::TShape& kernel,
123 const mxnet::TShape& pad, const mxnet::TShape& stride,
124 DType* out_data) {
125 using mshadow::red::limits::MinValue;
126 const int width = ishape[1];
127 const int pooled_width = oshape[1];
128 const int kernel_w = kernel[0];
129 const int pad_w = pad[0];
130 const int stride_w = stride[0];
131 const int features = oshape[2];
132 const index_t in_data_offset = ishape[1] * features;
133 const index_t out_data_offset = oshape[1] * features;
134 std::vector<DType> max_vals(features);
135 for (index_t n = 0; n < oshape[0]; ++n) {
136 for (int pw = 0; pw < pooled_width; ++pw) {
137 int wstart = pw * stride_w - pad_w;
138 int wend = std::min(wstart + kernel_w, width);
139 wstart = std::max(wstart, 0);
140 std::fill(max_vals.begin(), max_vals.end(), MinValue<DType>());
141 for (int w = wstart; w < wend; ++w) {
142 for (index_t c = 0; c < features; ++c) {
143 if (in_data[w * features + c] > max_vals[c]) {
144 max_vals[c] = in_data[w * features + c];
145 }
146 }
147 }
148 for (index_t c = 0; c < features; ++c)
149 out_data[pw * features + c] = max_vals[c];
150 }
151 in_data += in_data_offset;
152 out_data += out_data_offset;
153 }
154 }
155
156 /*!
157 * \brief max pooling cpu function for 2-D images in 'nchw' layout.
158 * Do not call this kernel directly. Use the interface pool().
159 */
160 template<typename DType>
pool_max_2d_nchw_cpu(const DType * in_data,const mxnet::TShape & ishape,const mxnet::TShape & oshape,const mxnet::TShape & kernel,const mxnet::TShape & pad,const mxnet::TShape & stride,DType * out_data)161 inline void pool_max_2d_nchw_cpu(const DType *in_data, const mxnet::TShape &ishape,
162 const mxnet::TShape &oshape, const mxnet::TShape &kernel,
163 const mxnet::TShape &pad, const mxnet::TShape &stride,
164 DType *out_data) {
165 using mshadow::red::limits::MinValue;
166 const int height = ishape[2], width = ishape[3];
167 const int pooled_height = oshape[2], pooled_width = oshape[3];
168 const int kernel_h = kernel[0], kernel_w = kernel[1];
169 const int pad_h = pad[0], pad_w = pad[1];
170 const int stride_h = stride[0], stride_w = stride[1];
171 const index_t in_data_offset = ishape[2] * ishape[3];
172 const index_t out_data_offset = oshape[2] * oshape[3];
173 for (index_t n = 0; n < oshape[0]; ++n) {
174 for (index_t c = 0; c < oshape[1]; ++c) {
175 for (int ph = 0; ph < pooled_height; ++ph) {
176 for (int pw = 0; pw < pooled_width; ++pw) {
177 int hstart = ph * stride_h - pad_h;
178 int wstart = pw * stride_w - pad_w;
179 int hend = std::min(hstart + kernel_h, height);
180 int wend = std::min(wstart + kernel_w, width);
181 hstart = std::max(hstart, 0);
182 wstart = std::max(wstart, 0);
183 const int pool_index = ph * pooled_width + pw;
184 DType max_val = MinValue<DType>();
185 for (int h = hstart; h < hend; ++h) {
186 for (int w = wstart; w < wend; ++w) {
187 const int in_index = h * width + w;
188 if (in_data[in_index] > max_val) {
189 max_val = in_data[in_index];
190 }
191 }
192 }
193 out_data[pool_index] = max_val;
194 }
195 }
196 in_data += in_data_offset;
197 out_data += out_data_offset;
198 }
199 }
200 }
201
202 /*!
203 * \brief max pooling cpu function for 2-D images in 'nhwc' layout.
204 * Do not call this kernel directly. Use the interface pool().
205 */
206 template<typename DType>
pool_max_2d_nhwc_cpu(const DType * in_data,const mxnet::TShape & ishape,const mxnet::TShape & oshape,const mxnet::TShape & kernel,const mxnet::TShape & pad,const mxnet::TShape & stride,DType * out_data)207 inline void pool_max_2d_nhwc_cpu(const DType* in_data, const mxnet::TShape& ishape,
208 const mxnet::TShape& oshape, const mxnet::TShape& kernel,
209 const mxnet::TShape& pad, const mxnet::TShape& stride,
210 DType* out_data) {
211 using mshadow::red::limits::MinValue;
212 const int height = ishape[1], width = ishape[2];
213 const int pooled_height = oshape[1], pooled_width = oshape[2];
214 const int kernel_h = kernel[0], kernel_w = kernel[1];
215 const int pad_h = pad[0], pad_w = pad[1];
216 const int stride_h = stride[0], stride_w = stride[1];
217 const int features = oshape[3];
218 const index_t in_data_offset = ishape[1] * ishape[2] * features;
219 const index_t out_data_offset = oshape[1] * oshape[2] * features;
220 std::vector<DType> max_vals(features);
221 for (index_t n = 0; n < oshape[0]; ++n) {
222 for (int ph = 0; ph < pooled_height; ++ph) {
223 for (int pw = 0; pw < pooled_width; ++pw) {
224 int hstart = ph * stride_h - pad_h;
225 int wstart = pw * stride_w - pad_w;
226 int hend = std::min(hstart + kernel_h, height);
227 int wend = std::min(wstart + kernel_w, width);
228 hstart = std::max(hstart, 0);
229 wstart = std::max(wstart, 0);
230 const int pool_index = ph * pooled_width + pw;
231 std::fill(max_vals.begin(), max_vals.end(), MinValue<DType>());
232 for (int h = hstart; h < hend; ++h) {
233 for (int w = wstart; w < wend; ++w) {
234 const int in_index = h * width + w;
235 for (index_t c = 0; c < features; ++c) {
236 if (in_data[in_index * features + c] > max_vals[c]) {
237 max_vals[c] = in_data[in_index * features + c];
238 }
239 }
240 }
241 }
242 for (index_t c = 0; c < features; ++c)
243 out_data[pool_index * features + c] = max_vals[c];
244 }
245 }
246 in_data += in_data_offset;
247 out_data += out_data_offset;
248 }
249 }
250
251 /*!
252 * \brief max pooling cpu function for 3-D images in 'ncdhw' layout.
253 * Do not call this kernel directly. Use the interface pool().
254 */
255 template<typename DType>
pool_max_3d_ncdhw_cpu(const DType * in_data,const mxnet::TShape & ishape,const mxnet::TShape & oshape,const mxnet::TShape & kernel,const mxnet::TShape & pad,const mxnet::TShape & stride,DType * out_data)256 inline void pool_max_3d_ncdhw_cpu(const DType *in_data, const mxnet::TShape &ishape,
257 const mxnet::TShape &oshape, const mxnet::TShape &kernel,
258 const mxnet::TShape &pad, const mxnet::TShape &stride,
259 DType *out_data) {
260 using mshadow::red::limits::MinValue;
261 const int depth = ishape[2], height = ishape[3], width = ishape[4];
262 const int pooled_depth = oshape[2], pooled_height = oshape[3], pooled_width = oshape[4];
263 const int kernel_d = kernel[0], kernel_h = kernel[1], kernel_w = kernel[2];
264 const int pad_d = pad[0], pad_h = pad[1], pad_w = pad[2];
265 const int stride_d = stride[0], stride_h = stride[1], stride_w = stride[2];
266 const index_t in_data_offset = ishape[2] * ishape[3] * ishape[4];
267 const index_t out_data_offset = oshape[2] * oshape[3] * oshape[4];
268 for (index_t n = 0; n < oshape[0]; ++n) {
269 for (index_t c = 0; c < oshape[1]; ++c) {
270 for (int pd = 0; pd < pooled_depth; ++pd) {
271 for (int ph = 0; ph < pooled_height; ++ph) {
272 for (int pw = 0; pw < pooled_width; ++pw) {
273 int dstart = pd * stride_d - pad_d;
274 int hstart = ph * stride_h - pad_h;
275 int wstart = pw * stride_w - pad_w;
276 int dend = std::min(dstart + kernel_d, depth);
277 int hend = std::min(hstart + kernel_h, height);
278 int wend = std::min(wstart + kernel_w, width);
279 dstart = std::max(dstart, 0);
280 hstart = std::max(hstart, 0);
281 wstart = std::max(wstart, 0);
282 const int pool_index = (pd * pooled_height + ph) * pooled_width + pw;
283 DType max_val = MinValue<DType>();
284 for (int d = dstart; d < dend; ++d) {
285 for (int h = hstart; h < hend; ++h) {
286 for (int w = wstart; w < wend; ++w) {
287 const int in_index = (d * height + h) * width + w;
288 if (in_data[in_index] > max_val) {
289 max_val = in_data[in_index];
290 }
291 }
292 }
293 }
294 out_data[pool_index] = max_val;
295 }
296 }
297 }
298 in_data += in_data_offset;
299 out_data += out_data_offset;
300 }
301 }
302 }
303
304 /*!
305 * \brief max pooling cpu function for 3-D images in 'ndhwc' layout.
306 * Do not call this kernel directly. Use the interface pool().
307 */
308 template<typename DType>
pool_max_3d_ndhwc_cpu(const DType * in_data,const mxnet::TShape & ishape,const mxnet::TShape & oshape,const mxnet::TShape & kernel,const mxnet::TShape & pad,const mxnet::TShape & stride,DType * out_data)309 inline void pool_max_3d_ndhwc_cpu(const DType* in_data, const mxnet::TShape& ishape,
310 const mxnet::TShape& oshape, const mxnet::TShape& kernel,
311 const mxnet::TShape& pad, const mxnet::TShape& stride,
312 DType* out_data) {
313 using mshadow::red::limits::MinValue;
314 const int depth = ishape[1], height = ishape[2], width = ishape[3];
315 const int pooled_depth = oshape[1], pooled_height = oshape[2], pooled_width = oshape[3];
316 const int kernel_d = kernel[0], kernel_h = kernel[1], kernel_w = kernel[2];
317 const int pad_d = pad[0], pad_h = pad[1], pad_w = pad[2];
318 const int stride_d = stride[0], stride_h = stride[1], stride_w = stride[2];
319 const int features = oshape[4];
320 const index_t in_data_offset = ishape[1] * ishape[2] * ishape[3] * features;
321 const index_t out_data_offset = oshape[1] * oshape[2] * oshape[3] * features;
322 std::vector<DType> max_vals(features);
323 for (index_t n = 0; n < oshape[0]; ++n) {
324 for (int pd = 0; pd < pooled_depth; ++pd) {
325 for (int ph = 0; ph < pooled_height; ++ph) {
326 for (int pw = 0; pw < pooled_width; ++pw) {
327 int dstart = pd * stride_d - pad_d;
328 int hstart = ph * stride_h - pad_h;
329 int wstart = pw * stride_w - pad_w;
330 int dend = std::min(dstart + kernel_d, depth);
331 int hend = std::min(hstart + kernel_h, height);
332 int wend = std::min(wstart + kernel_w, width);
333 dstart = std::max(dstart, 0);
334 hstart = std::max(hstart, 0);
335 wstart = std::max(wstart, 0);
336 const int pool_index = (pd * pooled_height + ph) * pooled_width + pw;
337 std::fill(max_vals.begin(), max_vals.end(), MinValue<DType>());
338 for (int d = dstart; d < dend; ++d) {
339 for (int h = hstart; h < hend; ++h) {
340 for (int w = wstart; w < wend; ++w) {
341 const int in_index = (d * height + h) * width + w;
342 for (index_t c = 0; c < features; ++c) {
343 if (in_data[in_index * features + c] > max_vals[c]) {
344 max_vals[c] = in_data[in_index * features + c];
345 }
346 }
347 }
348 }
349 }
350 for (index_t c = 0; c < features; ++c)
351 out_data[pool_index * features + c] = max_vals[c];
352 }
353 }
354 }
355 in_data += in_data_offset;
356 out_data += out_data_offset;
357 }
358 }
359
360 /*!
361 * \brief avg/sum pooling cpu function for 1-D images in 'ncw' layout.
362 * Do not call this kernel directly. Use the interface pool().
363 */
364 template<typename DType, int p = 1>
365 inline void pool_sum_1d_ncw_cpu(const DType *in_data, const mxnet::TShape &ishape,
366 const mxnet::TShape &oshape, const mxnet::TShape &kernel,
367 const mxnet::TShape &pad, const mxnet::TShape &stride,
368 DType *out_data,
369 const bool get_avg = false, const bool count_include_pad = true) {
370 using AccType = typename PoolingTypes<DType>::AccType;
371 const int width = ishape[2];
372 const int pooled_width = oshape[2];
373 const int kernel_w = kernel[0];
374 const int pad_w = pad[0];
375 const int stride_w = stride[0];
376 const index_t in_data_offset = ishape[2];
377 const index_t out_data_offset = oshape[2];
378 for (index_t n = 0; n < oshape[0]; ++n) {
379 for (index_t c = 0; c < oshape[1]; ++c) {
380 for (int pw = 0; pw < pooled_width; ++pw) {
381 int wstart = pw * stride_w - pad_w;
382 int wend = std::min(wstart + kernel_w, width + pad_w);
383 int pool_size = (get_avg ? (wend - wstart) : 1);
384 wstart = std::max(wstart, 0);
385 wend = std::min(wend, width);
386 if (get_avg && !count_include_pad) {
387 pool_size = (wend - wstart);
388 }
389 AccType sum = 0;
390 for (int w = wstart; w < wend; ++w) {
391 sum += a_pow_p<AccType, p>::Map(in_data[w]) / pool_size;
392 }
393 out_data[pw] = a_root_p<AccType, p>::Map(sum);
394 }
395 in_data += in_data_offset;
396 out_data += out_data_offset;
397 }
398 }
399 }
400
401 /*!
402 * \brief avg/sum pooling cpu function for 1-D images in 'nwc' layout.
403 * Do not call this kernel directly. Use the interface pool().
404 */
405 template<typename DType, int p = 1>
406 inline void pool_sum_1d_nwc_cpu(const DType* in_data, const mxnet::TShape& ishape,
407 const mxnet::TShape& oshape, const mxnet::TShape& kernel,
408 const mxnet::TShape& pad, const mxnet::TShape& stride,
409 DType* out_data,
410 const bool get_avg = false, const bool count_include_pad = true) {
411 using AccType = typename PoolingTypes<DType>::AccType;
412 const int width = ishape[1];
413 const int pooled_width = oshape[1];
414 const int kernel_w = kernel[0];
415 const int pad_w = pad[0];
416 const int stride_w = stride[0];
417 const int features = oshape[2];
418 const index_t in_data_offset = ishape[1] * features;
419 const index_t out_data_offset = oshape[1] * features;
420 std::vector<AccType> sums(features);
421 for (index_t n = 0; n < oshape[0]; ++n) {
422 for (int pw = 0; pw < pooled_width; ++pw) {
423 int wstart = pw * stride_w - pad_w;
424 int wend = std::min(wstart + kernel_w, width + pad_w);
425 int pool_size = (get_avg ? (wend - wstart) : 1);
426 wstart = std::max(wstart, 0);
427 wend = std::min(wend, width);
428 if (get_avg && !count_include_pad) {
429 pool_size = (wend - wstart);
430 }
431 std::fill(sums.begin(), sums.end(), 0);
432 for (int w = wstart; w < wend; ++w) {
433 for (index_t c = 0; c < features; ++c) {
434 sums[c] += a_pow_p<AccType, p>::Map(in_data[w * features + c]) / pool_size;
435 }
436 }
437 for (index_t c = 0; c < features; ++c)
438 out_data[pw * features + c] = a_root_p<AccType, p>::Map(sums[c]);
439 }
440 in_data += in_data_offset;
441 out_data += out_data_offset;
442 }
443 }
444
445 /*!
446 * \brief avg/sum pooling cpu function for 2-D images in 'nchw' layout.
447 * Do not call this kernel directly. Use the interface pool().
448 */
449 template<typename DType, int p = 1>
450 inline void pool_sum_2d_nchw_cpu(const DType *in_data, const mxnet::TShape &ishape,
451 const mxnet::TShape &oshape, const mxnet::TShape &kernel,
452 const mxnet::TShape &pad, const mxnet::TShape &stride,
453 DType *out_data,
454 const bool get_avg = false, const bool count_include_pad = true) {
455 using AccType = typename PoolingTypes<DType>::AccType;
456 const int height = ishape[2], width = ishape[3];
457 const int pooled_height = oshape[2], pooled_width = oshape[3];
458 const int kernel_h = kernel[0], kernel_w = kernel[1];
459 const int pad_h = pad[0], pad_w = pad[1];
460 const int stride_h = stride[0], stride_w = stride[1];
461 const index_t in_data_offset = ishape[2] * ishape[3];
462 const index_t out_data_offset = oshape[2] * oshape[3];
463 for (index_t n = 0; n < oshape[0]; ++n) {
464 for (index_t c = 0; c < oshape[1]; ++c) {
465 for (int ph = 0; ph < pooled_height; ++ph) {
466 for (int pw = 0; pw < pooled_width; ++pw) {
467 int hstart = ph * stride_h - pad_h;
468 int wstart = pw * stride_w - pad_w;
469 int hend = std::min(hstart + kernel_h, height + pad_h);
470 int wend = std::min(wstart + kernel_w, width + pad_w);
471 int pool_size = (get_avg ? (hend - hstart) * (wend - wstart) : 1);
472 hstart = std::max(hstart, 0);
473 wstart = std::max(wstart, 0);
474 hend = std::min(hend, height);
475 wend = std::min(wend, width);
476 if (get_avg && !count_include_pad) {
477 pool_size = (hend - hstart) * (wend - wstart);
478 }
479 AccType sum = 0;
480 for (int h = hstart; h < hend; ++h) {
481 for (int w = wstart; w < wend; ++w) {
482 sum += a_pow_p<AccType, p>::Map(in_data[h*width+w]) / pool_size;
483 }
484 }
485 out_data[ph*pooled_width+pw] = a_root_p<AccType, p>::Map(sum);
486 }
487 }
488 in_data += in_data_offset;
489 out_data += out_data_offset;
490 }
491 }
492 }
493
494 /*!
495 * \brief avg/sum pooling cpu function for 2-D images in 'nhwc' layout.
496 * Do not call this kernel directly. Use the interface pool().
497 */
498 template<typename DType, int p = 1>
499 inline void pool_sum_2d_nhwc_cpu(const DType* in_data, const mxnet::TShape& ishape,
500 const mxnet::TShape& oshape, const mxnet::TShape& kernel,
501 const mxnet::TShape& pad, const mxnet::TShape& stride,
502 DType* out_data,
503 const bool get_avg = false, const bool count_include_pad = true) {
504 using AccType = typename PoolingTypes<DType>::AccType;
505 const int height = ishape[1], width = ishape[2];
506 const int pooled_height = oshape[1], pooled_width = oshape[2];
507 const int kernel_h = kernel[0], kernel_w = kernel[1];
508 const int pad_h = pad[0], pad_w = pad[1];
509 const int stride_h = stride[0], stride_w = stride[1];
510 const int features = oshape[3];
511 const index_t in_data_offset = ishape[1] * ishape[2] * features;
512 const index_t out_data_offset = oshape[1] * oshape[2] * features;
513 std::vector<AccType> sums(features);
514 for (index_t n = 0; n < oshape[0]; ++n) {
515 for (int ph = 0; ph < pooled_height; ++ph) {
516 for (int pw = 0; pw < pooled_width; ++pw) {
517 int hstart = ph * stride_h - pad_h;
518 int wstart = pw * stride_w - pad_w;
519 int hend = std::min(hstart + kernel_h, height + pad_h);
520 int wend = std::min(wstart + kernel_w, width + pad_w);
521 int pool_size = (get_avg ? (hend - hstart) * (wend - wstart) : 1);
522 hstart = std::max(hstart, 0);
523 wstart = std::max(wstart, 0);
524 hend = std::min(hend, height);
525 wend = std::min(wend, width);
526 if (get_avg && !count_include_pad) {
527 pool_size = (hend - hstart) * (wend - wstart);
528 }
529 const int pool_index = ph * pooled_width + pw;
530 std::fill(sums.begin(), sums.end(), 0);
531 for (int h = hstart; h < hend; ++h) {
532 for (int w = wstart; w < wend; ++w) {
533 const int in_index = h * width + w;
534 for (index_t c = 0; c < features; ++c) {
535 sums[c] += a_pow_p<AccType, p>::Map(in_data[in_index * features + c]) / pool_size;
536 }
537 }
538 }
539 for (index_t c = 0; c < features; ++c)
540 out_data[pool_index * features + c] = a_root_p<AccType, p>::Map(sums[c]);
541 }
542 }
543 in_data += in_data_offset;
544 out_data += out_data_offset;
545 }
546 }
547
548 /*!
549 * \brief avg/sum pooling cpu function for 3-D images in 'ncdhw' layout.
550 * Do not call this kernel directly. Use the interface pool().
551 */
552 template<typename DType, int p = 1>
553 inline void pool_sum_3d_ncdhw_cpu(const DType *in_data, const mxnet::TShape &ishape,
554 const mxnet::TShape &oshape, const mxnet::TShape &kernel,
555 const mxnet::TShape &pad, const mxnet::TShape &stride,
556 DType *out_data,
557 const bool get_avg = false, const bool count_include_pad = true) {
558 using AccType = typename PoolingTypes<DType>::AccType;
559 const int depth = ishape[2], height = ishape[3], width = ishape[4];
560 const int pooled_depth = oshape[2], pooled_height = oshape[3], pooled_width = oshape[4];
561 const int kernel_d = kernel[0], kernel_h = kernel[1], kernel_w = kernel[2];
562 const int pad_d = pad[0], pad_h = pad[1], pad_w = pad[2];
563 const int stride_d = stride[0], stride_h = stride[1], stride_w = stride[2];
564 const index_t in_data_offset = ishape[2] * ishape[3] * ishape[4];
565 const index_t out_data_offset = oshape[2] * oshape[3] * oshape[4];
566 for (index_t n = 0; n < oshape[0]; ++n) {
567 for (index_t c = 0; c < oshape[1]; ++c) {
568 for (int pd = 0; pd < pooled_depth; ++pd) {
569 for (int ph = 0; ph < pooled_height; ++ph) {
570 for (int pw = 0; pw < pooled_width; ++pw) {
571 int dstart = pd * stride_d - pad_d;
572 int hstart = ph * stride_h - pad_h;
573 int wstart = pw * stride_w - pad_w;
574 int dend = std::min(dstart + kernel_d, depth + pad_d);
575 int hend = std::min(hstart + kernel_h, height + pad_h);
576 int wend = std::min(wstart + kernel_w, width + pad_w);
577 int pool_size = (get_avg ? (dend - dstart) * (hend - hstart) * (wend - wstart) : 1);
578 dstart = std::max(dstart, 0);
579 hstart = std::max(hstart, 0);
580 wstart = std::max(wstart, 0);
581 dend = std::min(dend, depth);
582 hend = std::min(hend, height);
583 wend = std::min(wend, width);
584 if (get_avg && !count_include_pad) {
585 pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart);
586 }
587 AccType sum = 0;
588 for (int d = dstart; d < dend; ++d) {
589 for (int h = hstart; h < hend; ++h) {
590 for (int w = wstart; w < wend; ++w) {
591 sum += a_pow_p<AccType, p>::Map(in_data[(d*height+h)*width+w]) / pool_size;
592 }
593 }
594 }
595 out_data[(pd*pooled_height+ph)*pooled_width+pw] = (pool_size == 0) ?
596 AccType(nanf("")) :
597 a_root_p<AccType, p>::Map(sum);
598 }
599 }
600 }
601 in_data += in_data_offset;
602 out_data += out_data_offset;
603 }
604 }
605 }
606
607 /*!
608 * \brief avg/sum pooling cpu function for 3-D images in 'ndhwc' layout.
609 * Do not call this kernel directly. Use the interface pool().
610 */
611 template<typename DType, int p = 1>
612 inline void pool_sum_3d_ndhwc_cpu(const DType* in_data, const mxnet::TShape& ishape,
613 const mxnet::TShape& oshape, const mxnet::TShape& kernel,
614 const mxnet::TShape& pad, const mxnet::TShape& stride,
615 DType* out_data,
616 const bool get_avg = false, const bool count_include_pad = true) {
617 using AccType = typename PoolingTypes<DType>::AccType;
618 const int depth = ishape[1], height = ishape[2], width = ishape[3];
619 const int pooled_depth = oshape[1], pooled_height = oshape[2], pooled_width = oshape[3];
620 const int kernel_d = kernel[0], kernel_h = kernel[1], kernel_w = kernel[2];
621 const int pad_d = pad[0], pad_h = pad[1], pad_w = pad[2];
622 const int stride_d = stride[0], stride_h = stride[1], stride_w = stride[2];
623 const int features = oshape[4];
624 const index_t in_data_offset = ishape[1] * ishape[2] * ishape[3] * features;
625 const index_t out_data_offset = oshape[1] * oshape[2] * oshape[3] * features;
626 std::vector<AccType> sums(features);
627 for (index_t n = 0; n < oshape[0]; ++n) {
628 for (int pd = 0; pd < pooled_depth; ++pd) {
629 for (int ph = 0; ph < pooled_height; ++ph) {
630 for (int pw = 0; pw < pooled_width; ++pw) {
631 int dstart = pd * stride_d - pad_d;
632 int hstart = ph * stride_h - pad_h;
633 int wstart = pw * stride_w - pad_w;
634 int dend = std::min(dstart + kernel_d, depth + pad_d);
635 int hend = std::min(hstart + kernel_h, height + pad_h);
636 int wend = std::min(wstart + kernel_w, width + pad_w);
637 int pool_size = (get_avg ? (dend - dstart) * (hend - hstart) * (wend - wstart) : 1);
638 dstart = std::max(dstart, 0);
639 hstart = std::max(hstart, 0);
640 wstart = std::max(wstart, 0);
641 dend = std::min(dend, depth);
642 hend = std::min(hend, height);
643 wend = std::min(wend, width);
644 if (get_avg && !count_include_pad) {
645 pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart);
646 }
647 const int pool_index = (pd * pooled_height + ph) * pooled_width + pw;
648 std::fill(sums.begin(), sums.end(), 0);
649 for (int d = dstart; d < dend; ++d) {
650 for (int h = hstart; h < hend; ++h) {
651 for (int w = wstart; w < wend; ++w) {
652 const int in_index = (d * height + h) * width + w;
653 for (index_t c = 0; c < features; ++c) {
654 sums[c] += a_pow_p<AccType, p>::Map(in_data[in_index * features + c]) / pool_size;
655 }
656 }
657 }
658 }
659 for (index_t c = 0; c < features; ++c)
660 out_data[pool_index * features + c] = (pool_size == 0) ?
661 AccType(nanf("")) :
662 a_root_p<AccType, p>::Map(sums[c]);
663 }
664 }
665 }
666 in_data += in_data_offset;
667 out_data += out_data_offset;
668 }
669 }
670
671 /*!
672 * \brief max unpooling cpu function for 1-D images in 'ncw' layout.
673 * Do not call this kernel directly. Use the interface unpool().
674 */
675 template<typename DType>
unpool_max_1d_ncw_cpu(const DType * out_grad,const DType * in_data,const DType * out_data,const mxnet::TShape & ishape,const mxnet::TShape & oshape,const mxnet::TShape & kernel,const mxnet::TShape & pad,const mxnet::TShape & stride,DType * in_grad)676 inline void unpool_max_1d_ncw_cpu(const DType *out_grad, const DType *in_data,
677 const DType *out_data, const mxnet::TShape &ishape,
678 const mxnet::TShape &oshape, const mxnet::TShape &kernel,
679 const mxnet::TShape &pad, const mxnet::TShape &stride,
680 DType *in_grad) {
681 const int width = ishape[2];
682 const int pooled_width = oshape[2];
683 const int kernel_w = kernel[0];
684 const int pad_w = pad[0];
685 const int stride_w = stride[0];
686 const index_t in_offset = ishape[2];
687 const index_t out_offset = oshape[2];
688 for (index_t n = 0; n < oshape[0]; ++n) {
689 for (index_t c = 0; c < oshape[1]; ++c) {
690 for (int pw = 0; pw < pooled_width; ++pw) {
691 int wstart = pw * stride_w - pad_w;
692 int wend = std::min(wstart + kernel_w, width);
693 wstart = std::max(wstart, 0);
694 int max_idx = -1;
695 for (int w = wstart; w < wend; ++w) {
696 if (in_data[w] == out_data[pw]) {
697 max_idx = w;
698 break;
699 }
700 }
701 // In the case where pad > 0 and kernel = 1, for example,
702 // max_idx can be -1 reaching this step.
703 if (max_idx >= 0) {
704 in_grad[max_idx] += out_grad[pw];
705 }
706 }
707 in_data += in_offset;
708 in_grad += in_offset;
709 out_data += out_offset;
710 out_grad += out_offset;
711 }
712 }
713 }
714
715 /*!
716 * \brief max unpooling cpu function for 1-D images in 'nwc' layout.
717 * Do not call this kernel directly. Use the interface unpool().
718 */
719 template<typename DType>
unpool_max_1d_nwc_cpu(const DType * out_grad,const DType * in_data,const DType * out_data,const mxnet::TShape & ishape,const mxnet::TShape & oshape,const mxnet::TShape & kernel,const mxnet::TShape & pad,const mxnet::TShape & stride,DType * in_grad)720 inline void unpool_max_1d_nwc_cpu(const DType* out_grad, const DType* in_data,
721 const DType* out_data, const mxnet::TShape& ishape,
722 const mxnet::TShape& oshape, const mxnet::TShape& kernel,
723 const mxnet::TShape& pad, const mxnet::TShape& stride,
724 DType* in_grad) {
725 const int width = ishape[1];
726 const int pooled_width = oshape[1];
727 const int kernel_w = kernel[0];
728 const int pad_w = pad[0];
729 const int stride_w = stride[0];
730 const int features = oshape[2];
731 const index_t in_offset = ishape[1] * features;
732 const index_t out_offset = oshape[1] * features;
733 std::vector<int> max_idxs(features);
734 for (index_t n = 0; n < oshape[0]; ++n) {
735 for (int pw = 0; pw < pooled_width; ++pw) {
736 int wstart = pw * stride_w - pad_w;
737 int wend = std::min(wstart + kernel_w, width);
738 wstart = std::max(wstart, 0);
739 std::fill(max_idxs.begin(), max_idxs.end(), -1);
740 for (index_t c = 0; c < features; ++c) {
741 for (int w = wstart; w < wend; ++w) {
742 if (in_data[w * features + c] == out_data[pw * features + c]) {
743 max_idxs[c] = w;
744 break;
745 }
746 }
747 }
748 // In the case where pad > 0 and kernel = 1, for example,
749 // max_idx can be -1 reaching this step.
750 for (index_t c = 0; c < features; ++c) {
751 if (max_idxs[c] >= 0) {
752 in_grad[max_idxs[c] * features + c] += out_grad[pw * features + c];
753 }
754 }
755 }
756 in_data += in_offset;
757 in_grad += in_offset;
758 out_data += out_offset;
759 out_grad += out_offset;
760 }
761 }
762
763 /*!
764 * \brief max unpooling cpu function for 2-D images in 'nchw' layout.
765 * Do not call this kernel directly. Use the interface unpool().
766 */
767 template<typename DType>
unpool_max_2d_nchw_cpu(const DType * out_grad,const DType * in_data,const DType * out_data,const mxnet::TShape & ishape,const mxnet::TShape & oshape,const mxnet::TShape & kernel,const mxnet::TShape & pad,const mxnet::TShape & stride,DType * in_grad)768 inline void unpool_max_2d_nchw_cpu(const DType *out_grad, const DType *in_data,
769 const DType *out_data, const mxnet::TShape &ishape,
770 const mxnet::TShape &oshape, const mxnet::TShape &kernel,
771 const mxnet::TShape &pad, const mxnet::TShape &stride,
772 DType *in_grad) {
773 const int height = ishape[2], width = ishape[3];
774 const int pooled_height = oshape[2], pooled_width = oshape[3];
775 const int kernel_h = kernel[0], kernel_w = kernel[1];
776 const int pad_h = pad[0], pad_w = pad[1];
777 const int stride_h = stride[0], stride_w = stride[1];
778 const index_t in_offset = ishape[2] * ishape[3];
779 const index_t out_offset = oshape[2] * oshape[3];
780 for (index_t n = 0; n < oshape[0]; ++n) {
781 for (index_t c = 0; c < oshape[1]; ++c) {
782 for (int ph = 0; ph < pooled_height; ++ph) {
783 for (int pw = 0; pw < pooled_width; ++pw) {
784 int hstart = ph * stride_h - pad_h;
785 int wstart = pw * stride_w - pad_w;
786 int hend = std::min(hstart + kernel_h, height);
787 int wend = std::min(wstart + kernel_w, width);
788 hstart = std::max(hstart, 0);
789 wstart = std::max(wstart, 0);
790 const int pool_index = ph * pooled_width + pw;
791 int max_idx = -1;
792 bool found = false;
793 for (int h = hstart; h < hend; ++h) {
794 for (int w = wstart; w < wend; ++w) {
795 const int idx = h * width + w;
796 if (in_data[idx] == out_data[pool_index]) {
797 max_idx = idx;
798 found = true;
799 break;
800 }
801 }
802 if (found) break;
803 }
804 // In the case where pad > 0 and kernel = 1, for example,
805 // max_idx can be -1 reaching this step.
806 if (max_idx >= 0) {
807 in_grad[max_idx] += out_grad[pool_index];
808 }
809 }
810 }
811 in_data += in_offset;
812 in_grad += in_offset;
813 out_data += out_offset;
814 out_grad += out_offset;
815 }
816 }
817 }
818
819 /*!
820 * \brief max unpooling cpu function for 2-D images in 'nhwc' layout.
821 * Do not call this kernel directly. Use the interface unpool().
822 */
823 template<typename DType>
unpool_max_2d_nhwc_cpu(const DType * out_grad,const DType * in_data,const DType * out_data,const mxnet::TShape & ishape,const mxnet::TShape & oshape,const mxnet::TShape & kernel,const mxnet::TShape & pad,const mxnet::TShape & stride,DType * in_grad)824 inline void unpool_max_2d_nhwc_cpu(const DType* out_grad, const DType* in_data,
825 const DType* out_data, const mxnet::TShape& ishape,
826 const mxnet::TShape& oshape, const mxnet::TShape& kernel,
827 const mxnet::TShape& pad, const mxnet::TShape& stride,
828 DType* in_grad) {
829 const int height = ishape[1], width = ishape[2];
830 const int pooled_height = oshape[1], pooled_width = oshape[2];
831 const int kernel_h = kernel[0], kernel_w = kernel[1];
832 const int pad_h = pad[0], pad_w = pad[1];
833 const int stride_h = stride[0], stride_w = stride[1];
834 const int features = oshape[3];
835 const index_t in_offset = ishape[1] * ishape[2] * features;
836 const index_t out_offset = oshape[1] * oshape[2] * features;
837 std::vector<int> max_idxs(features);
838 for (index_t n = 0; n < oshape[0]; ++n) {
839 for (int ph = 0; ph < pooled_height; ++ph) {
840 for (int pw = 0; pw < pooled_width; ++pw) {
841 int hstart = ph * stride_h - pad_h;
842 int wstart = pw * stride_w - pad_w;
843 int hend = std::min(hstart + kernel_h, height);
844 int wend = std::min(wstart + kernel_w, width);
845 hstart = std::max(hstart, 0);
846 wstart = std::max(wstart, 0);
847 const int pool_index = ph * pooled_width + pw;
848 std::fill(max_idxs.begin(), max_idxs.end(), -1);
849 for (index_t c = 0; c < features; ++c) {
850 bool found = false;
851 for (int h = hstart; h < hend; ++h) {
852 for (int w = wstart; w < wend; ++w) {
853 const int idx = h * width + w;
854 if (in_data[idx * features + c] == out_data[pool_index * features + c]) {
855 max_idxs[c] = idx;
856 found = true;
857 break;
858 }
859 }
860 if (found) break;
861 }
862 }
863 // In the case where pad > 0 and kernel = 1, for example,
864 // max_idx can be -1 reaching this step.
865 for (index_t c = 0; c < features; ++c) {
866 if (max_idxs[c] >= 0) {
867 in_grad[max_idxs[c] * features + c] += out_grad[pool_index * features + c];
868 }
869 }
870 }
871 }
872 in_data += in_offset;
873 in_grad += in_offset;
874 out_data += out_offset;
875 out_grad += out_offset;
876 }
877 }
878
879 /*!
880 * \brief max unpooling cpu function for 3-D images in 'ncdhw' layout.
881 * Do not call this kernel directly. Use the interface unpool().
882 */
883 template<typename DType>
unpool_max_3d_ncdhw_cpu(const DType * out_grad,const DType * in_data,const DType * out_data,const mxnet::TShape & ishape,const mxnet::TShape & oshape,const mxnet::TShape & kernel,const mxnet::TShape & pad,const mxnet::TShape & stride,DType * in_grad)884 inline void unpool_max_3d_ncdhw_cpu(const DType *out_grad, const DType *in_data,
885 const DType *out_data, const mxnet::TShape &ishape,
886 const mxnet::TShape &oshape, const mxnet::TShape &kernel,
887 const mxnet::TShape &pad, const mxnet::TShape &stride,
888 DType *in_grad) {
889 const int depth = ishape[2], height = ishape[3], width = ishape[4];
890 const int pooled_depth = oshape[2], pooled_height = oshape[3], pooled_width = oshape[4];
891 const int kernel_d = kernel[0], kernel_h = kernel[1], kernel_w = kernel[2];
892 const int pad_d = pad[0], pad_h = pad[1], pad_w = pad[2];
893 const int stride_d = stride[0], stride_h = stride[1], stride_w = stride[2];
894 const index_t in_offset = ishape[2] * ishape[3] * ishape[4];
895 const index_t out_offset = oshape[2] * oshape[3] * oshape[4];
896 for (index_t n = 0; n < oshape[0]; ++n) {
897 for (index_t c = 0; c < oshape[1]; ++c) {
898 for (int pd = 0; pd < pooled_depth; ++pd) {
899 for (int ph = 0; ph < pooled_height; ++ph) {
900 for (int pw = 0; pw < pooled_width; ++pw) {
901 int dstart = pd * stride_d - pad_d;
902 int hstart = ph * stride_h - pad_h;
903 int wstart = pw * stride_w - pad_w;
904 int dend = std::min(dstart + kernel_d, depth);
905 int hend = std::min(hstart + kernel_h, height);
906 int wend = std::min(wstart + kernel_w, width);
907 dstart = std::max(dstart, 0);
908 hstart = std::max(hstart, 0);
909 wstart = std::max(wstart, 0);
910 const int pool_index = (pd * pooled_height + ph) * pooled_width + pw;
911 int max_idx = -1;
912 bool found = false;
913 for (int d = dstart; d < dend; ++d) {
914 for (int h = hstart; h < hend; ++h) {
915 for (int w = wstart; w < wend; ++w) {
916 const int idx = (d * height + h) * width + w;
917 if (in_data[idx] == out_data[pool_index]) {
918 max_idx = idx;
919 found = true;
920 break;
921 }
922 }
923 if (found) break;
924 }
925 if (found) break;
926 }
927 // In the case where pad > 0 and kernel = 1, for example,
928 // max_idx can be -1 reaching this step.
929 if (max_idx >= 0) {
930 in_grad[max_idx] += out_grad[pool_index];
931 }
932 }
933 }
934 }
935 in_data += in_offset;
936 in_grad += in_offset;
937 out_data += out_offset;
938 out_grad += out_offset;
939 }
940 }
941 }
942
943 /*!
944 * \brief max unpooling cpu function for 3-D images in 'ndhwc' layout.
945 * Do not call this kernel directly. Use the interface unpool().
946 */
947 template<typename DType>
unpool_max_3d_ndhwc_cpu(const DType * out_grad,const DType * in_data,const DType * out_data,const mxnet::TShape & ishape,const mxnet::TShape & oshape,const mxnet::TShape & kernel,const mxnet::TShape & pad,const mxnet::TShape & stride,DType * in_grad)948 inline void unpool_max_3d_ndhwc_cpu(const DType* out_grad, const DType* in_data,
949 const DType* out_data, const mxnet::TShape& ishape,
950 const mxnet::TShape& oshape, const mxnet::TShape& kernel,
951 const mxnet::TShape& pad, const mxnet::TShape& stride,
952 DType* in_grad) {
953 const int depth = ishape[1], height = ishape[2], width = ishape[3];
954 const int pooled_depth = oshape[1], pooled_height = oshape[2], pooled_width = oshape[3];
955 const int kernel_d = kernel[0], kernel_h = kernel[1], kernel_w = kernel[2];
956 const int pad_d = pad[0], pad_h = pad[1], pad_w = pad[2];
957 const int stride_d = stride[0], stride_h = stride[1], stride_w = stride[2];
958 const int features = oshape[4];
959 const index_t in_offset = ishape[1] * ishape[2] * ishape[3] * features;
960 const index_t out_offset = oshape[1] * oshape[2] * oshape[3] * features;
961 std::vector<int> max_idxs(features);
962 for (index_t n = 0; n < oshape[0]; ++n) {
963 for (int pd = 0; pd < pooled_depth; ++pd) {
964 for (int ph = 0; ph < pooled_height; ++ph) {
965 for (int pw = 0; pw < pooled_width; ++pw) {
966 int dstart = pd * stride_d - pad_d;
967 int hstart = ph * stride_h - pad_h;
968 int wstart = pw * stride_w - pad_w;
969 int dend = std::min(dstart + kernel_d, depth);
970 int hend = std::min(hstart + kernel_h, height);
971 int wend = std::min(wstart + kernel_w, width);
972 dstart = std::max(dstart, 0);
973 hstart = std::max(hstart, 0);
974 wstart = std::max(wstart, 0);
975 const int pool_index = (pd * pooled_height + ph) * pooled_width + pw;
976 std::fill(max_idxs.begin(), max_idxs.end(), -1);
977 for (index_t c = 0; c < features; ++c) {
978 bool found = false;
979 for (int d = dstart; d < dend; ++d) {
980 for (int h = hstart; h < hend; ++h) {
981 for (int w = wstart; w < wend; ++w) {
982 const int idx = (d * height + h) * width + w;
983 if (in_data[idx * features + c] == out_data[pool_index * features + c]) {
984 max_idxs[c] = idx;
985 found = true;
986 break;
987 }
988 }
989 if (found) break;
990 }
991 if (found) break;
992 }
993 }
994 // In the case where pad > 0 and kernel = 1, for example,
995 // max_idx can be -1 reaching this step.
996 for (index_t c = 0; c < features; ++c) {
997 if (max_idxs[c] >= 0) {
998 in_grad[max_idxs[c] * features + c] += out_grad[pool_index * features + c];
999 }
1000 }
1001 }
1002 }
1003 }
1004 in_data += in_offset;
1005 in_grad += in_offset;
1006 out_data += out_offset;
1007 out_grad += out_offset;
1008 }
1009 }
1010
1011 /*!
1012 * \brief avg/sum unpooling cpu function for 1-D images in 'ncw' layout.
1013 * Do not call this kernel directly. Use the interface unpool().
1014 */
1015 template<typename DType, int p = 1>
1016 inline void unpool_sum_1d_ncw_cpu(const DType *out_grad, const DType *in_data,
1017 const DType *out_data,
1018 const mxnet::TShape &ishape, const mxnet::TShape &oshape,
1019 const mxnet::TShape &kernel, const mxnet::TShape &pad,
1020 const mxnet::TShape &stride, DType *in_grad,
1021 const bool is_avg = false, const bool count_include_pad = true) {
1022 const int width = ishape[2];
1023 const int pooled_width = oshape[2];
1024 const int kernel_w = kernel[0];
1025 const int pad_w = pad[0];
1026 const int stride_w = stride[0];
1027 const index_t in_grad_offset = ishape[2];
1028 const index_t out_grad_offset = oshape[2];
1029 for (index_t n = 0; n < oshape[0]; ++n) {
1030 for (index_t c = 0; c < oshape[1]; ++c) {
1031 for (int pw = 0; pw < pooled_width; ++pw) {
1032 int wstart = pw * stride_w - pad_w;
1033 int wend = std::min(wstart + kernel_w, width + pad_w);
1034 int pool_size = (is_avg ? (wend - wstart) : 1);
1035 wstart = std::max(wstart, 0);
1036 wend = std::min(wend, width);
1037 if (is_avg && !count_include_pad) {
1038 pool_size = (wend - wstart);
1039 }
1040 for (int w = wstart; w < wend; ++w) {
1041 in_grad[w] += lp_grad<DType, p>::Map(out_grad[pw], in_data[w], out_data[pw]) / pool_size;
1042 }
1043 }
1044 in_grad += in_grad_offset;
1045 in_data += in_grad_offset;
1046 out_grad += out_grad_offset;
1047 out_data += out_grad_offset;
1048 }
1049 }
1050 }
1051
1052 /*!
1053 * \brief avg/sum unpooling cpu function for 1-D images in 'nwc' layout.
1054 * Do not call this kernel directly. Use the interface unpool().
1055 */
1056 template<typename DType, int p = 1>
1057 inline void unpool_sum_1d_nwc_cpu(const DType* out_grad, const DType* in_data,
1058 const DType *out_data, const mxnet::TShape &ishape,
1059 const mxnet::TShape &oshape, const mxnet::TShape &kernel,
1060 const mxnet::TShape &pad, const mxnet::TShape &stride,
1061 DType *in_grad, const bool is_avg = false,
1062 const bool count_include_pad = true) {
1063 const int width = ishape[1];
1064 const int pooled_width = oshape[1];
1065 const int kernel_w = kernel[0];
1066 const int pad_w = pad[0];
1067 const int stride_w = stride[0];
1068 const int features = oshape[2];
1069 const index_t in_grad_offset = ishape[1] * features;
1070 const index_t out_grad_offset = oshape[1] * features;
1071 for (index_t n = 0; n < oshape[0]; ++n) {
1072 for (int pw = 0; pw < pooled_width; ++pw) {
1073 int wstart = pw * stride_w - pad_w;
1074 int wend = std::min(wstart + kernel_w, width + pad_w);
1075 int pool_size = (is_avg ? (wend - wstart) : 1);
1076 wstart = std::max(wstart, 0);
1077 wend = std::min(wend, width);
1078 if (is_avg && !count_include_pad) {
1079 pool_size = (wend - wstart);
1080 }
1081 for (int w = wstart; w < wend; ++w) {
1082 for (index_t c = 0; c < features; ++c) {
1083 in_grad[w * features + c] +=
1084 lp_grad<DType, p>::Map(out_grad[pw * features + c],
1085 in_data[w * features + c],
1086 out_data[pw * features + c]) / pool_size;
1087 }
1088 }
1089 }
1090 in_grad += in_grad_offset;
1091 in_data += in_grad_offset;
1092 out_grad += out_grad_offset;
1093 out_data += out_grad_offset;
1094 }
1095 }
1096
1097 /*!
1098 * \brief avg/sum unpooling cpu function for 2-D images in 'nchw' layout.
1099 * Do not call this kernel directly. Use the interface unpool().
1100 */
1101 template<typename DType, int p = 1>
1102 inline void unpool_sum_2d_nchw_cpu(const DType *out_grad, const DType *in_data,
1103 const DType *out_data, const mxnet::TShape &ishape,
1104 const mxnet::TShape &oshape, const mxnet::TShape &kernel,
1105 const mxnet::TShape &pad, const mxnet::TShape &stride,
1106 DType *in_grad, const bool is_avg = false,
1107 const bool count_include_pad = true) {
1108 const int height = ishape[2], width = ishape[3];
1109 const int pooled_height = oshape[2], pooled_width = oshape[3];
1110 const int kernel_h = kernel[0], kernel_w = kernel[1];
1111 const int pad_h = pad[0], pad_w = pad[1];
1112 const int stride_h = stride[0], stride_w = stride[1];
1113 const index_t in_grad_offset = ishape[2] * ishape[3];
1114 const index_t out_grad_offset = oshape[2] * oshape[3];
1115 for (index_t n = 0; n < oshape[0]; ++n) {
1116 for (index_t c = 0; c < oshape[1]; ++c) {
1117 for (int ph = 0; ph < pooled_height; ++ph) {
1118 for (int pw = 0; pw < pooled_width; ++pw) {
1119 int hstart = ph * stride_h - pad_h;
1120 int wstart = pw * stride_w - pad_w;
1121 int hend = std::min(hstart + kernel_h, height + pad_h);
1122 int wend = std::min(wstart + kernel_w, width + pad_w);
1123 int pool_size = (is_avg ? (hend - hstart) * (wend - wstart) : 1);
1124 hstart = std::max(hstart, 0);
1125 wstart = std::max(wstart, 0);
1126 hend = std::min(hend, height);
1127 wend = std::min(wend, width);
1128 if (is_avg && !count_include_pad) {
1129 pool_size = (hend - hstart) * (wend - wstart);
1130 }
1131 const int pool_index = ph * pooled_width + pw;
1132 for (int h = hstart; h < hend; ++h) {
1133 for (int w = wstart; w < wend; ++w) {
1134 in_grad[h*width+w] +=
1135 lp_grad<DType, p>::Map(out_grad[pool_index],
1136 in_data[h*width+w],
1137 out_data[pool_index]) / pool_size;
1138 }
1139 }
1140 }
1141 }
1142 in_grad += in_grad_offset;
1143 in_data += in_grad_offset;
1144 out_grad += out_grad_offset;
1145 out_data += out_grad_offset;
1146 }
1147 }
1148 }
1149
1150 /*!
1151 * \brief avg/sum unpooling cpu function for 2-D images in 'nhwc' layout.
1152 * Do not call this kernel directly. Use the interface unpool().
1153 */
1154 template<typename DType, int p = 1>
1155 inline void unpool_sum_2d_nhwc_cpu(const DType* out_grad, const DType* in_data,
1156 const DType *out_data, const mxnet::TShape &ishape,
1157 const mxnet::TShape &oshape, const mxnet::TShape &kernel,
1158 const mxnet::TShape &pad, const mxnet::TShape &stride,
1159 DType *in_grad, const bool is_avg = false,
1160 const bool count_include_pad = true) {
1161 const int height = ishape[1], width = ishape[2];
1162 const int pooled_height = oshape[1], pooled_width = oshape[2];
1163 const int kernel_h = kernel[0], kernel_w = kernel[1];
1164 const int pad_h = pad[0], pad_w = pad[1];
1165 const int features = oshape[3];
1166 const int stride_h = stride[0], stride_w = stride[1];
1167 const index_t in_grad_offset = ishape[1] * ishape[2] * features;
1168 const index_t out_grad_offset = oshape[1] * oshape[2] * features;
1169 for (index_t n = 0; n < oshape[0]; ++n) {
1170 for (int ph = 0; ph < pooled_height; ++ph) {
1171 for (int pw = 0; pw < pooled_width; ++pw) {
1172 int hstart = ph * stride_h - pad_h;
1173 int wstart = pw * stride_w - pad_w;
1174 int hend = std::min(hstart + kernel_h, height + pad_h);
1175 int wend = std::min(wstart + kernel_w, width + pad_w);
1176 int pool_size = (is_avg ? (hend - hstart) * (wend - wstart) : 1);
1177 hstart = std::max(hstart, 0);
1178 wstart = std::max(wstart, 0);
1179 hend = std::min(hend, height);
1180 wend = std::min(wend, width);
1181 if (is_avg && !count_include_pad) {
1182 pool_size = (hend - hstart) * (wend - wstart);
1183 }
1184 const int pool_index = ph * pooled_width + pw;
1185 for (int h = hstart; h < hend; ++h) {
1186 for (int w = wstart; w < wend; ++w) {
1187 const int in_index = h * width + w;
1188 for (index_t c = 0; c < features; ++c) {
1189 in_grad[in_index * features + c] +=
1190 lp_grad<DType, p>::Map(out_grad[pool_index * features + c],
1191 in_data[in_index * features + c],
1192 out_data[pool_index * features + c]) / pool_size;
1193 }
1194 }
1195 }
1196 }
1197 }
1198 in_grad += in_grad_offset;
1199 in_data += in_grad_offset;
1200 out_grad += out_grad_offset;
1201 out_data += out_grad_offset;
1202 }
1203 }
1204
1205 /*!
1206 * \brief avg/sum unpooling cpu function for 3-D images in 'ncdhw' layout.
1207 * Do not call this kernel directly. Use the interface unpool().
1208 */
1209 template<typename DType, int p = 1>
1210 inline void unpool_sum_3d_ncdhw_cpu(const DType *out_grad, const DType *in_data,
1211 const DType *out_data, const mxnet::TShape &ishape,
1212 const mxnet::TShape &oshape, const mxnet::TShape &kernel,
1213 const mxnet::TShape &pad, const mxnet::TShape &stride,
1214 DType *in_grad, const bool is_avg = false,
1215 const bool count_include_pad = true) {
1216 const int depth = ishape[2], height = ishape[3], width = ishape[4];
1217 const int pooled_depth = oshape[2], pooled_height = oshape[3], pooled_width = oshape[4];
1218 const int kernel_d = kernel[0], kernel_h = kernel[1], kernel_w = kernel[2];
1219 const int pad_d = pad[0], pad_h = pad[1], pad_w = pad[2];
1220 const int stride_d = stride[0], stride_h = stride[1], stride_w = stride[2];
1221 const index_t in_grad_offset = ishape[2] * ishape[3] * ishape[4];
1222 const index_t out_grad_offset = oshape[2] * oshape[3] * oshape[4];
1223 for (index_t n = 0; n < oshape[0]; ++n) {
1224 for (index_t c = 0; c < oshape[1]; ++c) {
1225 for (int pd = 0; pd < pooled_depth; ++pd) {
1226 for (int ph = 0; ph < pooled_height; ++ph) {
1227 for (int pw = 0; pw < pooled_width; ++pw) {
1228 int dstart = pd * stride_d - pad_d;
1229 int hstart = ph * stride_h - pad_h;
1230 int wstart = pw * stride_w - pad_w;
1231 int dend = std::min(dstart + kernel_d, depth + pad_d);
1232 int hend = std::min(hstart + kernel_h, height + pad_h);
1233 int wend = std::min(wstart + kernel_w, width + pad_w);
1234 int pool_size = (is_avg ? (dend - dstart) * (hend - hstart) * (wend - wstart) : 1);
1235 dstart = std::max(dstart, 0);
1236 hstart = std::max(hstart, 0);
1237 wstart = std::max(wstart, 0);
1238 dend = std::min(dend, depth);
1239 hend = std::min(hend, height);
1240 wend = std::min(wend, width);
1241 if (is_avg && !count_include_pad) {
1242 pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart);
1243 }
1244 const int pool_index = (pd * pooled_height + ph) * pooled_width + pw;
1245 for (int d = dstart; d < dend; ++d) {
1246 for (int h = hstart; h < hend; ++h) {
1247 for (int w = wstart; w < wend; ++w) {
1248 in_grad[(d*height+h)*width+w] +=
1249 lp_grad<DType, p>::Map(out_grad[pool_index],
1250 in_data[(d*height+h)*width+w],
1251 out_data[pool_index]) / pool_size;
1252 }
1253 }
1254 }
1255 }
1256 }
1257 }
1258 in_grad += in_grad_offset;
1259 in_data += in_grad_offset;
1260 out_grad += out_grad_offset;
1261 out_data += out_grad_offset;
1262 }
1263 }
1264 }
1265
1266 /*!
1267 * \brief avg/sum unpooling cpu function for 3-D images in 'ndhwc' layout.
1268 * Do not call this kernel directly. Use the interface unpool().
1269 */
1270 template<typename DType, int p = 1>
1271 inline void unpool_sum_3d_ndhwc_cpu(const DType* out_grad, const DType* in_data,
1272 const DType *out_data, const mxnet::TShape &ishape,
1273 const mxnet::TShape &oshape, const mxnet::TShape &kernel,
1274 const mxnet::TShape &pad, const mxnet::TShape &stride,
1275 DType *in_grad, const bool is_avg = false,
1276 const bool count_include_pad = true) {
1277 const int depth = ishape[1], height = ishape[2], width = ishape[3];
1278 const int pooled_depth = oshape[1], pooled_height = oshape[2], pooled_width = oshape[3];
1279 const int kernel_d = kernel[0], kernel_h = kernel[1], kernel_w = kernel[2];
1280 const int pad_d = pad[0], pad_h = pad[1], pad_w = pad[2];
1281 const int stride_d = stride[0], stride_h = stride[1], stride_w = stride[2];
1282 const int features = oshape[4];
1283 const index_t in_grad_offset = ishape[1] * ishape[2] * ishape[3] * features;
1284 const index_t out_grad_offset = oshape[1] * oshape[2] * oshape[3] * features;
1285 for (index_t n = 0; n < oshape[0]; ++n) {
1286 for (int pd = 0; pd < pooled_depth; ++pd) {
1287 for (int ph = 0; ph < pooled_height; ++ph) {
1288 for (int pw = 0; pw < pooled_width; ++pw) {
1289 int dstart = pd * stride_d - pad_d;
1290 int hstart = ph * stride_h - pad_h;
1291 int wstart = pw * stride_w - pad_w;
1292 int dend = std::min(dstart + kernel_d, depth + pad_d);
1293 int hend = std::min(hstart + kernel_h, height + pad_h);
1294 int wend = std::min(wstart + kernel_w, width + pad_w);
1295 int pool_size = (is_avg ? (dend - dstart) * (hend - hstart) * (wend - wstart) : 1);
1296 dstart = std::max(dstart, 0);
1297 hstart = std::max(hstart, 0);
1298 wstart = std::max(wstart, 0);
1299 dend = std::min(dend, depth);
1300 hend = std::min(hend, height);
1301 wend = std::min(wend, width);
1302 if (is_avg && !count_include_pad) {
1303 pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart);
1304 }
1305 const int pool_index = (pd * pooled_height + ph) * pooled_width + pw;
1306 for (int d = dstart; d < dend; ++d) {
1307 for (int h = hstart; h < hend; ++h) {
1308 for (int w = wstart; w < wend; ++w) {
1309 const int in_index = (d * height + h) * width + w;
1310 for (index_t c = 0; c < features; ++c) {
1311 in_grad[in_index * features + c] +=
1312 lp_grad<DType, p>::Map(out_grad[pool_index * features + c],
1313 in_data[in_index * features + c],
1314 out_data[pool_index * features + c]) / pool_size;
1315 }
1316 }
1317 }
1318 }
1319 }
1320 }
1321 }
1322 in_grad += in_grad_offset;
1323 in_data += in_grad_offset;
1324 out_grad += out_grad_offset;
1325 out_data += out_grad_offset;
1326 }
1327 }
1328
1329 /*!
1330 * \brief This function serves as an interface for 1/2/3-D pooling operations.
1331 * \param s context stream defining the device in use is cpu
1332 * \param in_data pointer of the input tensor data in the format of NCW, NCHW, or NCDHW
1333 * \param ishape input tensor shape
1334 * \param oshape output tensor shape
1335 * \param kernel kernel shape
1336 * \param pad pad shape
1337 * \param stride stride shape
1338 * \param pool_type supported pooling type: max, avg, sum
1339 * \param req_type operator request type, only support kWriteTo for now
1340 * \param out_data pointer of the output tensor data in the format of NCW, NCHW, or NCDHW
1341 * \param p_value value of p for Lp pooling
1342 */
1343 template<typename DType, int p>
pool(mshadow::Stream<cpu> * s,const DType * in_data,const mxnet::TShape & ishape,const mxnet::TShape & oshape,const mxnet::TShape & kernel,const mxnet::TShape & pad,const mxnet::TShape & stride,const int pool_type,OpReqType req_type,DType * out_data,const bool count_include_pad,int layout)1344 inline void pool(mshadow::Stream<cpu>* s, const DType* in_data, const mxnet::TShape& ishape,
1345 const mxnet::TShape& oshape, const mxnet::TShape& kernel, const mxnet::TShape& pad,
1346 const mxnet::TShape& stride, const int pool_type, OpReqType req_type,
1347 DType* out_data, const bool count_include_pad, int layout) {
1348 CHECK_EQ(req_type, kWriteTo) << "Only support req=kWriteTo in pooling operations";
1349 if (kernel.ndim() == 1) {
1350 if (layout == mshadow::kNWC) {
1351 if (pool_enum::kMaxPooling == pool_type) {
1352 pool_max_1d_nwc_cpu(in_data, ishape, oshape, kernel, pad, stride, out_data);
1353 } else if (pool_enum::kAvgPooling == pool_type) {
1354 pool_sum_1d_nwc_cpu(in_data, ishape, oshape, kernel, pad, stride, out_data,
1355 true, count_include_pad);
1356 } else if (pool_enum::kSumPooling == pool_type) {
1357 pool_sum_1d_nwc_cpu(in_data, ishape, oshape, kernel, pad, stride, out_data);
1358 } else if (pool_enum::kLpPooling == pool_type) {
1359 pool_sum_1d_nwc_cpu<DType, p>(in_data, ishape, oshape, kernel, pad, stride, out_data);
1360 } else {
1361 LOG(FATAL) << "Unknown pooling type " << pool_type;
1362 }
1363 } else if (layout == mshadow::kNCW) {
1364 if (pool_enum::kMaxPooling == pool_type) {
1365 pool_max_1d_ncw_cpu(in_data, ishape, oshape, kernel, pad, stride, out_data);
1366 } else if (pool_enum::kAvgPooling == pool_type) {
1367 pool_sum_1d_ncw_cpu(in_data, ishape, oshape, kernel, pad, stride, out_data,
1368 true, count_include_pad);
1369 } else if (pool_enum::kSumPooling == pool_type) {
1370 pool_sum_1d_ncw_cpu(in_data, ishape, oshape, kernel, pad, stride, out_data);
1371 } else if (pool_enum::kLpPooling == pool_type) {
1372 pool_sum_1d_ncw_cpu<DType, p>(in_data, ishape, oshape, kernel, pad, stride, out_data);
1373 } else {
1374 LOG(FATAL) << "Unknown pooling type " << pool_type;
1375 }
1376 } else {
1377 LOG(FATAL) << "Unsupported layout, expecting kNCW or kNWC, saw: " << layout;
1378 }
1379 } else if (kernel.ndim() == 2) {
1380 if (layout == mshadow::kNHWC) {
1381 if (pool_enum::kMaxPooling == pool_type) {
1382 pool_max_2d_nhwc_cpu(in_data, ishape, oshape, kernel, pad, stride, out_data);
1383 } else if (pool_enum::kAvgPooling == pool_type) {
1384 pool_sum_2d_nhwc_cpu(in_data, ishape, oshape, kernel, pad, stride, out_data,
1385 true, count_include_pad);
1386 } else if (pool_enum::kSumPooling == pool_type) {
1387 pool_sum_2d_nhwc_cpu(in_data, ishape, oshape, kernel, pad, stride, out_data);
1388 } else if (pool_enum::kLpPooling == pool_type) {
1389 pool_sum_2d_nhwc_cpu<DType, p>(in_data, ishape, oshape, kernel, pad, stride, out_data);
1390 } else {
1391 LOG(FATAL) << "Unknown pooling type " << pool_type;
1392 }
1393 } else if (layout == mshadow::kNCHW) {
1394 if (pool_enum::kMaxPooling == pool_type) {
1395 pool_max_2d_nchw_cpu(in_data, ishape, oshape, kernel, pad, stride, out_data);
1396 } else if (pool_enum::kAvgPooling == pool_type) {
1397 pool_sum_2d_nchw_cpu(in_data, ishape, oshape, kernel, pad, stride, out_data,
1398 true, count_include_pad);
1399 } else if (pool_enum::kSumPooling == pool_type) {
1400 pool_sum_2d_nchw_cpu(in_data, ishape, oshape, kernel, pad, stride, out_data);
1401 } else if (pool_enum::kLpPooling == pool_type) {
1402 pool_sum_2d_nchw_cpu<DType, p>(in_data, ishape, oshape, kernel, pad, stride, out_data);
1403 } else {
1404 LOG(FATAL) << "Unknown pooling type " << pool_type;
1405 }
1406 } else {
1407 LOG(FATAL) << "Unsupported layout, expecting kNCHW or kNHWC, saw: " << layout;
1408 }
1409 } else if (kernel.ndim() == 3) {
1410 if (layout == mshadow::kNDHWC) {
1411 if (pool_enum::kMaxPooling == pool_type) {
1412 pool_max_3d_ndhwc_cpu(in_data, ishape, oshape, kernel, pad, stride, out_data);
1413 } else if (pool_enum::kAvgPooling == pool_type) {
1414 pool_sum_3d_ndhwc_cpu(in_data, ishape, oshape, kernel, pad, stride, out_data,
1415 true, count_include_pad);
1416 } else if (pool_enum::kSumPooling == pool_type) {
1417 pool_sum_3d_ndhwc_cpu(in_data, ishape, oshape, kernel, pad, stride, out_data);
1418 } else if (pool_enum::kLpPooling == pool_type) {
1419 pool_sum_3d_ndhwc_cpu<DType, p>(in_data, ishape, oshape, kernel, pad, stride, out_data);
1420 } else {
1421 LOG(FATAL) << "Unknown pooling type " << pool_type;
1422 }
1423 } else if (layout == mshadow::kNCDHW) {
1424 if (pool_enum::kMaxPooling == pool_type) {
1425 pool_max_3d_ncdhw_cpu(in_data, ishape, oshape, kernel, pad, stride, out_data);
1426 } else if (pool_enum::kAvgPooling == pool_type) {
1427 pool_sum_3d_ncdhw_cpu(in_data, ishape, oshape, kernel, pad, stride, out_data,
1428 true, count_include_pad);
1429 } else if (pool_enum::kSumPooling == pool_type) {
1430 pool_sum_3d_ncdhw_cpu(in_data, ishape, oshape, kernel, pad, stride, out_data);
1431 } else if (pool_enum::kLpPooling == pool_type) {
1432 pool_sum_3d_ncdhw_cpu<DType, p>(in_data, ishape, oshape, kernel, pad, stride, out_data);
1433 } else {
1434 LOG(FATAL) << "Unknown pooling type " << pool_type;
1435 }
1436 } else {
1437 LOG(FATAL) << "Unsupported layout, expecting kNCDHW or kNDHWC, saw: " << layout;
1438 }
1439 } else {
1440 LOG(FATAL) << "Unsupported " << kernel.ndim() << "-D pooling";
1441 }
1442 }
1443
1444 /*!
1445 * \brief This function serves as an interface for 1/2/3-D unpooling operations.
1446 * \param s context stream defining the device in use is cpu
1447 * \param out_grad pointer of the gradient of operator's output tensor
1448 * \param in_data pointer of the input tensor in the format of NCW, NCHW, or NCDHW
1449 * \param out_data pointer of the output tensor in the format of NCW, NCHW, or NCDHW
1450 * \param ishape input tensor shape
1451 * \param oshape output tensor shape
1452 * \param kernel kernel shape
1453 * \param pad pad shape
1454 * \param stride stride shape
1455 * \param pool_type supported pooling type: max, avg, sum
1456 * \param req_type operator request type: kNullOp, kNullWriteInplace, kNullWriteTo, kNullAddTo
1457 * \param in_grad pointer of the gradient of the operator's input tensor
1458 * \param p_value value of p for Lp pooling
1459 */
1460 template<typename DType, int p>
unpool(mshadow::Stream<cpu> * s,const DType * out_grad,const DType * in_data,const DType * out_data,const mxnet::TShape & ishape,const mxnet::TShape & oshape,const mxnet::TShape & kernel,const mxnet::TShape & pad,const mxnet::TShape & stride,const int pool_type,OpReqType req_type,DType * in_grad,const bool count_include_pad,int layout)1461 inline void unpool(mshadow::Stream<cpu>* s, const DType* out_grad, const DType* in_data,
1462 const DType* out_data, const mxnet::TShape& ishape,
1463 const mxnet::TShape& oshape, const mxnet::TShape& kernel,
1464 const mxnet::TShape& pad, const mxnet::TShape& stride,
1465 const int pool_type, OpReqType req_type, DType* in_grad,
1466 const bool count_include_pad, int layout) {
1467 if (mxnet::kNullOp == req_type) return;
1468 if (mxnet::kAddTo != req_type) {
1469 mxnet_op::Kernel<mxnet_op::set_zero, cpu>::Launch(s, ishape.Size(), in_grad);
1470 }
1471 if (kernel.ndim() == 1) {
1472 if (layout == mshadow::kNWC) {
1473 if (pool_enum::kMaxPooling == pool_type) {
1474 unpool_max_1d_nwc_cpu(out_grad, in_data, out_data, ishape, oshape, kernel, pad, stride,
1475 in_grad);
1476 } else if (pool_enum::kAvgPooling == pool_type) {
1477 unpool_sum_1d_nwc_cpu(out_grad, in_data, out_data, ishape, oshape, kernel, pad, stride,
1478 in_grad, true, count_include_pad);
1479 } else if (pool_enum::kSumPooling == pool_type) {
1480 unpool_sum_1d_nwc_cpu(out_grad, in_data, out_data, ishape, oshape, kernel, pad, stride,
1481 in_grad);
1482 } else if (pool_enum::kLpPooling == pool_type) {
1483 unpool_sum_1d_nwc_cpu<DType, p>(out_grad, in_data, out_data, ishape, oshape, kernel, pad,
1484 stride,
1485 in_grad);
1486 } else {
1487 LOG(FATAL) << "Unknown pooling type " << pool_type;
1488 }
1489 } else if (layout == mshadow::kNCW) {
1490 if (pool_enum::kMaxPooling == pool_type) {
1491 unpool_max_1d_ncw_cpu(out_grad, in_data, out_data, ishape, oshape, kernel, pad, stride,
1492 in_grad);
1493 } else if (pool_enum::kAvgPooling == pool_type) {
1494 unpool_sum_1d_ncw_cpu(out_grad, in_data, out_data, ishape, oshape, kernel, pad, stride,
1495 in_grad,
1496 true, count_include_pad);
1497 } else if (pool_enum::kSumPooling == pool_type) {
1498 unpool_sum_1d_ncw_cpu(out_grad, in_data, out_data, ishape, oshape, kernel, pad, stride,
1499 in_grad);
1500 } else if (pool_enum::kLpPooling == pool_type) {
1501 unpool_sum_1d_ncw_cpu<DType, p>(out_grad, in_data, out_data, ishape, oshape, kernel, pad,
1502 stride,
1503 in_grad);
1504 } else {
1505 LOG(FATAL) << "Unknown pooling type " << pool_type;
1506 }
1507 } else {
1508 LOG(FATAL) << "Unsupported layout, expecting kNCW or kNWC, saw: " << layout;
1509 }
1510 } else if (kernel.ndim() == 2) {
1511 if (layout == mshadow::kNHWC) {
1512 if (pool_enum::kMaxPooling == pool_type) {
1513 unpool_max_2d_nhwc_cpu(out_grad, in_data, out_data, ishape, oshape, kernel, pad, stride,
1514 in_grad);
1515 } else if (pool_enum::kAvgPooling == pool_type) {
1516 unpool_sum_2d_nhwc_cpu(out_grad, in_data, out_data, ishape, oshape, kernel, pad, stride,
1517 in_grad,
1518 true, count_include_pad);
1519 } else if (pool_enum::kSumPooling == pool_type) {
1520 unpool_sum_2d_nhwc_cpu(out_grad, in_data, out_data, ishape, oshape, kernel, pad, stride,
1521 in_grad);
1522 } else if (pool_enum::kLpPooling == pool_type) {
1523 unpool_sum_2d_nhwc_cpu<DType, p>(out_grad, in_data, out_data, ishape, oshape, kernel, pad,
1524 stride,
1525 in_grad);
1526 } else {
1527 LOG(FATAL) << "Unknown pooling type " << pool_type;
1528 }
1529 } else if (layout == mshadow::kNCHW) {
1530 if (pool_enum::kMaxPooling == pool_type) {
1531 unpool_max_2d_nchw_cpu(out_grad, in_data, out_data, ishape, oshape, kernel, pad, stride,
1532 in_grad);
1533 } else if (pool_enum::kAvgPooling == pool_type) {
1534 unpool_sum_2d_nchw_cpu(out_grad, in_data, out_data, ishape, oshape, kernel, pad, stride,
1535 in_grad,
1536 true, count_include_pad);
1537 } else if (pool_enum::kSumPooling == pool_type) {
1538 unpool_sum_2d_nchw_cpu(out_grad, in_data, out_data, ishape, oshape, kernel, pad, stride,
1539 in_grad);
1540 } else if (pool_enum::kLpPooling == pool_type) {
1541 unpool_sum_2d_nchw_cpu<DType, p>(out_grad, in_data, out_data, ishape, oshape, kernel, pad,
1542 stride,
1543 in_grad);
1544 } else {
1545 LOG(FATAL) << "Unknown pooling type " << pool_type;
1546 }
1547 } else {
1548 LOG(FATAL) << "Unsupported layout, expecting kNCHW or kNHWC, saw: " << layout;
1549 }
1550 } else if (kernel.ndim() == 3) {
1551 if (layout == mshadow::kNDHWC) {
1552 if (pool_enum::kMaxPooling == pool_type) {
1553 unpool_max_3d_ndhwc_cpu(out_grad, in_data, out_data, ishape, oshape, kernel, pad, stride,
1554 in_grad);
1555 } else if (pool_enum::kAvgPooling == pool_type) {
1556 unpool_sum_3d_ndhwc_cpu(out_grad, in_data, out_data, ishape, oshape, kernel, pad, stride,
1557 in_grad, true, count_include_pad);
1558 } else if (pool_enum::kSumPooling == pool_type) {
1559 unpool_sum_3d_ndhwc_cpu(out_grad, in_data, out_data, ishape, oshape, kernel, pad, stride,
1560 in_grad);
1561 } else if (pool_enum::kLpPooling == pool_type) {
1562 unpool_sum_3d_ndhwc_cpu<DType, p>(out_grad, in_data, out_data, ishape, oshape, kernel, pad,
1563 stride,
1564 in_grad);
1565 } else {
1566 LOG(FATAL) << "Unknown pooling type " << pool_type;
1567 }
1568 } else if (layout == mshadow::kNCDHW) {
1569 if (pool_enum::kMaxPooling == pool_type) {
1570 unpool_max_3d_ncdhw_cpu(out_grad, in_data, out_data, ishape, oshape, kernel, pad, stride,
1571 in_grad);
1572 } else if (pool_enum::kAvgPooling == pool_type) {
1573 unpool_sum_3d_ncdhw_cpu(out_grad, in_data, out_data, ishape, oshape, kernel, pad, stride,
1574 in_grad,
1575 true, count_include_pad);
1576 } else if (pool_enum::kSumPooling == pool_type) {
1577 unpool_sum_3d_ncdhw_cpu(out_grad, in_data, out_data, ishape, oshape, kernel, pad, stride,
1578 in_grad);
1579 } else if (pool_enum::kLpPooling == pool_type) {
1580 unpool_sum_3d_ncdhw_cpu<DType, p>(out_grad, in_data, out_data, ishape, oshape, kernel, pad,
1581 stride,
1582 in_grad);
1583 } else {
1584 LOG(FATAL) << "Unknown pooling type " << pool_type;
1585 }
1586 } else {
1587 LOG(FATAL) << "Unsupported layout, expecting kNCDHW or kNDHWC, saw: " << layout;
1588 }
1589 } else {
1590 LOG(FATAL) << "Unsupported " << kernel.ndim() << "-D unpooling";
1591 }
1592 }
1593
1594 } // namespace op
1595 } // namespace mxnet
1596 #ifdef __CUDACC__
1597 #include "./pool.cuh"
1598 #endif
1599
1600 #endif // MXNET_OPERATOR_NN_POOL_H_
1601