1 //
2 // GLConvolutionDepthwise.cpp
3 // MNN
4 //
5 // Created by MNN on 2019/01/31.
6 // Copyright © 2018, Alibaba Group Holding Limited
7 //
8
9 #include "backend/opengl/GLConvolutionDepthwise.hpp"
10 #include <MNN/AutoTime.hpp>
11
12 #include <sstream>
13 #include "AllShader.hpp"
14 #include "backend/opengl/GLBackend.hpp"
15 #include "core/Macro.h"
16 namespace MNN {
17 namespace OpenGL {
18
19 static const int gXLocal = 8;
20 static const int gYLocal = 8;
21 static const int gZLocal = 1;
22
~GLConvolutionDepthwise()23 GLConvolutionDepthwise::~GLConvolutionDepthwise() {
24 }
25
GLConvolutionDepthwise(const std::vector<Tensor * > & inputs,const Op * convOp,Backend * bn)26 GLConvolutionDepthwise::GLConvolutionDepthwise(const std::vector<Tensor *> &inputs, const Op *convOp, Backend *bn) : GPUConvolution(convOp, bn) {
27 auto extra = (GLBackend *)bn;
28
29 mBiasBuffer.reset(new GLSSBOBuffer(sizeof(float) * ALIGN_UP4(mCommon->outputCount())));
30 int fw = mCommon->kernelX();
31 int fh = mCommon->kernelY();
32 int unit = 4;
33 int srcDepthQuad = UP_DIV(mInputDepth, unit);
34
35 auto kernelBuffer = std::shared_ptr<GLSSBOBuffer>(new GLSSBOBuffer(sizeof(float) * fw * fh * srcDepthQuad * 4));
36 auto weight = kernelBuffer->map(GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
37 if(weight != nullptr){
38 ::memset(weight, 0, fw * fh * srcDepthQuad * 4 * sizeof(float));
39 ::memcpy(weight, convOp->main_as_Convolution2D()->weight()->data(),
40 convOp->main_as_Convolution2D()->weight()->size() * sizeof(float));
41 }
42
43 kernelBuffer->unmap();
44
45 auto bias = mBiasBuffer->map(GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
46 if(bias != nullptr){
47 ::memset(bias, 0, ALIGN_UP4(mCommon->outputCount()) * sizeof(float));
48 ::memcpy(bias, convOp->main_as_Convolution2D()->bias()->data(),
49 convOp->main_as_Convolution2D()->bias()->size() * sizeof(float));
50 }
51 mBiasBuffer->unmap();
52
53 std::vector<std::string> prefix;
54 if (mCommon->relu()) {
55 prefix.push_back("#define RELU");
56 }
57 if (mCommon->relu6()) {
58 prefix.push_back("#define RELU6");
59 }
60
61 {
62 std::ostringstream os;
63 os << "#define XLOCAL " << gXLocal;
64 prefix.push_back(os.str());
65 }
66 {
67 std::ostringstream os;
68 os << "#define YLOCAL " << gYLocal;
69 prefix.push_back(os.str());
70 }
71 {
72 std::ostringstream os;
73 os << "#define ZLOCAL " << gZLocal;
74 prefix.push_back(os.str());
75 }
76
77 mProgram = extra->getProgram("convolution_depthwise", glsl_convlutionDepthwise_glsl, prefix);
78 mKernelTexture = std::shared_ptr<GLTexture>(new GLTexture(srcDepthQuad, fw, fh, ((GLBackend *)backend())->getTextrueFormat(), GL_TEXTURE_3D, false));
79
80 auto transform = extra->getProgram("transform_kernel_image_depthwise", glsl_kernel2ImageDepthwise_glsl);
81 transform->useProgram();
82 glBindImageTexture(0, mKernelTexture->id(), 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
83 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, kernelBuffer->getId());
84 OPENGL_CHECK_ERROR;
85 glUniform1i(3, fw);
86 glUniform1i(4, fh);
87 OPENGL_CHECK_ERROR;
88
89 ((GLBackend *)backend())->compute(srcDepthQuad, fw, fh);
90 OPENGL_CHECK_ERROR;
91
92 }
93
onResize(const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs)94 ErrorCode GLConvolutionDepthwise::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
95 GPUConvolution::onResize(inputs, outputs);
96 int kx = mCommon->kernelX();
97 int ky = mCommon->kernelY();
98 int sx = mCommon->strideX();
99 int sy = mCommon->strideY();
100 int dx = mCommon->dilateX();
101 int dy = mCommon->dilateY();
102 mSetUniform = [=]() {
103 glUniform2i(4, mPadX, mPadY);
104 glUniform2i(5, kx, ky);
105 glUniform2i(6, sx, sy);
106 glUniform2i(7, dx, dy);
107 };
108 return NO_ERROR;
109 }
110
onExecute(const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs)111 ErrorCode GLConvolutionDepthwise::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
112 {
113 auto convLayer = mCommon;
114
115 auto input = inputs[0];
116 auto output = outputs[0];
117 auto inputTexture = input->deviceId();
118 auto outputTexture = output->deviceId();
119 int dst_depth_quad = UP_DIV(output->channel(), 4);
120
121 mProgram->useProgram();
122 glBindImageTexture(0, outputTexture, 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
123 OPENGL_CHECK_ERROR;
124 {
125 int texId = 0;
126 glActiveTexture(GL_TEXTURE0 + texId);
127 glUniform1i(1, texId);
128 glBindTexture(GL_TEXTURE_3D, inputTexture);
129 OPENGL_CHECK_ERROR;
130 }
131 {
132 int texId = 1;
133 glActiveTexture(GL_TEXTURE0 + texId);
134 OPENGL_CHECK_ERROR;
135 glUniform1i(2, texId);
136
137 OPENGL_CHECK_ERROR;
138 glBindTexture(GL_TEXTURE_3D, mKernelTexture->id());
139 OPENGL_CHECK_ERROR;
140 }
141 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, mBiasBuffer->getId());
142
143 OPENGL_CHECK_ERROR;
144 mSetUniform();
145
146 glUniform3i(10, output->width(), output->height(), UP_DIV(output->channel(), 4));
147 glUniform3i(11, input->width(), input->height(), UP_DIV(input->channel(), 4));
148
149 OPENGL_CHECK_ERROR;
150
151 ((GLBackend *)backend())->compute(UP_DIV(output->width(), (gXLocal)), UP_DIV(output->height(), gYLocal),
152 UP_DIV(dst_depth_quad, gZLocal));
153 OPENGL_CHECK_ERROR;
154
155 }
156
157 return NO_ERROR;
158 }
159 GLCreatorRegister<TypedCreator<GLConvolutionDepthwise>> __depthwise_conv_op(OpType_ConvolutionDepthwise);
160 } // namespace OpenGL
161 } // namespace MNN
162