1 //
2 // OpenCLRunningUtils.hpp
3 // MNN
4 //
5 // Created by MNN on 2019/01/31.
6 // Copyright © 2018, Alibaba Group Holding Limited
7 //
8
9 #ifndef OpenCLRunningUtils_hpp
10 #define OpenCLRunningUtils_hpp
11
12 #include <string>
13 #include <vector>
14 #include <algorithm>
15 #include <climits>
16
17 #include "core/Macro.h"
18 #include "core/TensorUtils.hpp"
19 #include "backend/opencl/core/runtime/OpenCLRuntime.hpp"
20 #include "backend/opencl/core/runtime/OpenCLWrapper.hpp"
21
22 namespace MNN {
23 namespace OpenCL {
24
tensorShapeFormat(const Tensor * input)25 inline std::vector<int> tensorShapeFormat(const Tensor *input) {
26
27 int iN = (0 != input->buffer().dim[0].extent) ? input->buffer().dim[0].extent : 1;
28 int iC = (0 != input->buffer().dim[1].extent) ? input->buffer().dim[1].extent : 1;
29 int iH = (0 != input->buffer().dim[2].extent) ? input->buffer().dim[2].extent : 1;
30 int iW = (0 != input->buffer().dim[3].extent) ? input->buffer().dim[3].extent : 1;
31
32 if(input->buffer().dimensions > 4)//more than 4 dimensions put to N dimension
33 {
34 for(int i = 4; i < input->buffer().dimensions; i++)
35 {
36 iW *= input->buffer().dim[i].extent;
37 }
38 }
39
40 if (TensorUtils::getDescribe(input)->dimensionFormat == MNN::MNN_DATA_FORMAT_NHWC)
41 {
42 iN = (0 < input->buffer().dim[0].extent) ? input->buffer().dim[0].extent : 1;
43 iH = (0 < input->buffer().dim[1].extent) ? input->buffer().dim[1].extent : 1;
44 iW = (0 < input->buffer().dim[2].extent) ? input->buffer().dim[2].extent : 1;
45 iC = (0 < input->buffer().dim[3].extent) ? input->buffer().dim[3].extent : 1;
46
47 if(input->buffer().dimensions > 4)//more than 4 dimensions put to N dimension
48 {
49 for(int i = 4; i < input->buffer().dimensions; i++)
50 {
51 iC *= input->buffer().dim[i].extent;
52 }
53 }
54 }
55
56 if (input->buffer().dimensions == 2) {
57 iN = input->buffer().dim[0].extent;
58 iH = 1;
59 iW = 1;
60 iC = input->buffer().dim[1].extent;
61 }
62 if (input->buffer().dimensions == 1) {
63 iN = 1;
64 iH = 1;
65 iW = 1;
66 iC = input->buffer().dim[0].extent;
67 }
68
69 #ifdef LOG_VERBOSE
70 MNN_PRINT("tensorShapeFormat : [%d, %d, %d, %d] \n", iN, iH, iW, iC);
71 #endif
72 std::vector<int> shape_vec{iN, iH, iW, iC};
73
74 return shape_vec;
75 }
76
77 enum OpenCLBufferFormat {
78 CONV2D_FILTER = 0,
79 NHWC_BUFFER = 1,
80 ARGUMENT = 2,
81 DW_CONV2D_FILTER = 3,
82 NCHW_BUFFER = 4,
83 NHWC4_BUFFER = 5,
84 CONV2D1x1_OPT_FILTER = 6,
85 };
86
87 template <typename T, typename Dim>
IOHW2OIHW(const T * src,T * dst,Dim O,Dim I,Dim H,Dim W)88 inline void IOHW2OIHW(const T *src, T *dst, Dim O, Dim I, Dim H, Dim W) {
89 for (Dim i = 0; i < I; i++) {
90 for (Dim o = 0; o < O; o++) {
91 for (Dim h = 0; h < H; h++) {
92 for (Dim w = 0; w < W; w++) {
93 dst[o * I * H * W + i * H * W + h * W + w] = src[i * O * H * W + o * H * W + h * W + w];
94 }
95 }
96 }
97 }
98 };
openCLBuffer(const Tensor * tensor)99 inline cl::Buffer &openCLBuffer(const Tensor *tensor) {
100 return (*(cl::Buffer *)(tensor->deviceId()));
101 }
openCLImage(const Tensor * tensor)102 inline cl::Image &openCLImage(const Tensor *tensor) {
103 return (*(cl::Image *)(tensor->deviceId()));
104 }
105
106 void getImageShape(const std::vector<int> &shape, /* NHWC */
107 const OpenCLBufferFormat type, std::vector<size_t> *imageShape);
108
109 std::vector<uint32_t> turnLocalSize(cl::Kernel *kernel, std::vector<uint32_t> &gws, OpenCLRuntime *runtime);
110
111 void run3DKernelDefault(const ::cl::Kernel &kernel, const std::vector<uint32_t> &gws, const std::vector<uint32_t> &lws,
112 OpenCLRuntime *runtime, cl::Event* eventPtr = nullptr);
113
114 void runKernel2D(const ::cl::Kernel &kernel, const std::vector<uint32_t> &gws, const std::vector<uint32_t> &lws,
115 OpenCLRuntime *runtime, cl::Event* eventPtr = nullptr);
116
117 void runTurnKernelLWS2D(const ::cl::Kernel &kernel, const std::vector<uint32_t> &gws, const std::vector<uint32_t> &lws,
118 OpenCLRuntime *runtime);
119
120 std::pair<std::vector<uint32_t>, uint32_t> localWS3DDefault(const std::vector<uint32_t> &gws, const uint32_t maxWorkGroupSize,
121 OpenCLRuntime *runtime, const std::string &kernelName, const cl::Kernel &mKernel);
122
123 std::pair<std::vector<uint32_t>, uint32_t> localWS2DDefault(const std::vector<uint32_t> &gws, const uint32_t maxWorkGroupSize,
124 OpenCLRuntime *runtime, const std::string &kernelName, const cl::Kernel &mKernel);
125
126 void copyBufferToImage(OpenCLRuntime *runtime, const cl::Buffer &buffer, const cl::Image &image, int w, int h);
127
128 } // namespace OpenCL
129 } // namespace MNN
130 #endif /* OpenCLRunningUtils_hpp */
131