1 //
2 //  OpenCLRunningUtils.hpp
3 //  MNN
4 //
5 //  Created by MNN on 2019/01/31.
6 //  Copyright © 2018, Alibaba Group Holding Limited
7 //
8 
9 #ifndef OpenCLRunningUtils_hpp
10 #define OpenCLRunningUtils_hpp
11 
12 #include <string>
13 #include <vector>
14 #include <algorithm>
15 #include <climits>
16 
17 #include "core/Macro.h"
18 #include "core/TensorUtils.hpp"
19 #include "backend/opencl/core/runtime/OpenCLRuntime.hpp"
20 #include "backend/opencl/core/runtime/OpenCLWrapper.hpp"
21 
22 namespace MNN {
23 namespace OpenCL {
24 
tensorShapeFormat(const Tensor * input)25 inline std::vector<int> tensorShapeFormat(const Tensor *input) {
26 
27     int iN = (0 != input->buffer().dim[0].extent) ? input->buffer().dim[0].extent : 1;
28     int iC = (0 != input->buffer().dim[1].extent) ? input->buffer().dim[1].extent : 1;
29     int iH = (0 != input->buffer().dim[2].extent) ? input->buffer().dim[2].extent : 1;
30     int iW = (0 != input->buffer().dim[3].extent) ? input->buffer().dim[3].extent : 1;
31 
32     if(input->buffer().dimensions > 4)//more than 4 dimensions put to N dimension
33     {
34         for(int i = 4; i < input->buffer().dimensions; i++)
35         {
36             iW *= input->buffer().dim[i].extent;
37         }
38     }
39 
40     if (TensorUtils::getDescribe(input)->dimensionFormat == MNN::MNN_DATA_FORMAT_NHWC)
41     {
42         iN = (0 < input->buffer().dim[0].extent) ? input->buffer().dim[0].extent : 1;
43         iH = (0 < input->buffer().dim[1].extent) ? input->buffer().dim[1].extent : 1;
44         iW = (0 < input->buffer().dim[2].extent) ? input->buffer().dim[2].extent : 1;
45         iC = (0 < input->buffer().dim[3].extent) ? input->buffer().dim[3].extent : 1;
46 
47         if(input->buffer().dimensions > 4)//more than 4 dimensions put to N dimension
48         {
49             for(int i = 4; i < input->buffer().dimensions; i++)
50             {
51                 iC *= input->buffer().dim[i].extent;
52             }
53         }
54     }
55 
56     if (input->buffer().dimensions == 2) {
57         iN = input->buffer().dim[0].extent;
58         iH = 1;
59         iW = 1;
60         iC = input->buffer().dim[1].extent;
61     }
62     if (input->buffer().dimensions == 1) {
63         iN = 1;
64         iH = 1;
65         iW = 1;
66         iC = input->buffer().dim[0].extent;
67     }
68 
69 #ifdef LOG_VERBOSE
70     MNN_PRINT("tensorShapeFormat : [%d, %d, %d, %d] \n", iN, iH, iW, iC);
71 #endif
72     std::vector<int> shape_vec{iN, iH, iW, iC};
73 
74     return shape_vec;
75 }
76 
77 enum OpenCLBufferFormat {
78     CONV2D_FILTER    = 0,
79     NHWC_BUFFER      = 1,
80     ARGUMENT         = 2,
81     DW_CONV2D_FILTER = 3,
82     NCHW_BUFFER      = 4,
83     NHWC4_BUFFER     = 5,
84     CONV2D1x1_OPT_FILTER     = 6,
85 };
86 
87 template <typename T, typename Dim>
IOHW2OIHW(const T * src,T * dst,Dim O,Dim I,Dim H,Dim W)88 inline void IOHW2OIHW(const T *src, T *dst, Dim O, Dim I, Dim H, Dim W) {
89     for (Dim i = 0; i < I; i++) {
90         for (Dim o = 0; o < O; o++) {
91             for (Dim h = 0; h < H; h++) {
92                 for (Dim w = 0; w < W; w++) {
93                     dst[o * I * H * W + i * H * W + h * W + w] = src[i * O * H * W + o * H * W + h * W + w];
94                 }
95             }
96         }
97     }
98 };
openCLBuffer(const Tensor * tensor)99 inline cl::Buffer &openCLBuffer(const Tensor *tensor) {
100     return (*(cl::Buffer *)(tensor->deviceId()));
101 }
openCLImage(const Tensor * tensor)102 inline cl::Image &openCLImage(const Tensor *tensor) {
103     return (*(cl::Image *)(tensor->deviceId()));
104 }
105 
106 void getImageShape(const std::vector<int> &shape, /* NHWC */
107                    const OpenCLBufferFormat type, std::vector<size_t> *imageShape);
108 
109 std::vector<uint32_t> turnLocalSize(cl::Kernel *kernel, std::vector<uint32_t> &gws, OpenCLRuntime *runtime);
110 
111 void run3DKernelDefault(const ::cl::Kernel &kernel, const std::vector<uint32_t> &gws, const std::vector<uint32_t> &lws,
112                         OpenCLRuntime *runtime, cl::Event* eventPtr = nullptr);
113 
114 void runKernel2D(const ::cl::Kernel &kernel, const std::vector<uint32_t> &gws, const std::vector<uint32_t> &lws,
115                  OpenCLRuntime *runtime, cl::Event* eventPtr = nullptr);
116 
117 void runTurnKernelLWS2D(const ::cl::Kernel &kernel, const std::vector<uint32_t> &gws, const std::vector<uint32_t> &lws,
118                         OpenCLRuntime *runtime);
119 
120 std::pair<std::vector<uint32_t>, uint32_t> localWS3DDefault(const std::vector<uint32_t> &gws, const uint32_t maxWorkGroupSize,
121                                        OpenCLRuntime *runtime, const std::string &kernelName, const cl::Kernel &mKernel);
122 
123 std::pair<std::vector<uint32_t>, uint32_t> localWS2DDefault(const std::vector<uint32_t> &gws, const uint32_t maxWorkGroupSize,
124                                        OpenCLRuntime *runtime, const std::string &kernelName, const cl::Kernel &mKernel);
125 
126 void copyBufferToImage(OpenCLRuntime *runtime, const cl::Buffer &buffer, const cl::Image &image, int w, int h);
127 
128 } // namespace OpenCL
129 } // namespace MNN
130 #endif  /* OpenCLRunningUtils_hpp */
131