1 //
2 //  GLConvolutionDepthwise.cpp
3 //  MNN
4 //
5 //  Created by MNN on 2019/01/31.
6 //  Copyright © 2018, Alibaba Group Holding Limited
7 //
8 
9 #include "backend/opengl/GLConvolutionDepthwise.hpp"
10 #include <MNN/AutoTime.hpp>
11 
12 #include <sstream>
13 #include "AllShader.hpp"
14 #include "backend/opengl/GLBackend.hpp"
15 #include "core/Macro.h"
16 namespace MNN {
17 namespace OpenGL {
18 
19 static const int gXLocal = 8;
20 static const int gYLocal = 8;
21 static const int gZLocal = 1;
22 
~GLConvolutionDepthwise()23 GLConvolutionDepthwise::~GLConvolutionDepthwise() {
24 }
25 
GLConvolutionDepthwise(const std::vector<Tensor * > & inputs,const Op * convOp,Backend * bn)26 GLConvolutionDepthwise::GLConvolutionDepthwise(const std::vector<Tensor *> &inputs, const Op *convOp, Backend *bn) : GPUConvolution(convOp, bn) {
27     auto extra = (GLBackend *)bn;
28 
29     mBiasBuffer.reset(new GLSSBOBuffer(sizeof(float) * ALIGN_UP4(mCommon->outputCount())));
30     int fw           = mCommon->kernelX();
31     int fh           = mCommon->kernelY();
32     int unit         = 4;
33     int srcDepthQuad = UP_DIV(mInputDepth, unit);
34 
35     auto kernelBuffer = std::shared_ptr<GLSSBOBuffer>(new GLSSBOBuffer(sizeof(float) * fw * fh * srcDepthQuad * 4));
36     auto weight       = kernelBuffer->map(GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
37     if(weight != nullptr){
38         ::memset(weight, 0, fw * fh * srcDepthQuad * 4 * sizeof(float));
39         ::memcpy(weight, convOp->main_as_Convolution2D()->weight()->data(),
40                  convOp->main_as_Convolution2D()->weight()->size() * sizeof(float));
41     }
42 
43     kernelBuffer->unmap();
44 
45     auto bias = mBiasBuffer->map(GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
46     if(bias != nullptr){
47         ::memset(bias, 0, ALIGN_UP4(mCommon->outputCount()) * sizeof(float));
48         ::memcpy(bias, convOp->main_as_Convolution2D()->bias()->data(),
49                  convOp->main_as_Convolution2D()->bias()->size() * sizeof(float));
50     }
51     mBiasBuffer->unmap();
52 
53     std::vector<std::string> prefix;
54     if (mCommon->relu()) {
55         prefix.push_back("#define RELU");
56     }
57     if (mCommon->relu6()) {
58         prefix.push_back("#define RELU6");
59     }
60 
61     {
62         std::ostringstream os;
63         os << "#define XLOCAL " << gXLocal;
64         prefix.push_back(os.str());
65     }
66     {
67         std::ostringstream os;
68         os << "#define YLOCAL " << gYLocal;
69         prefix.push_back(os.str());
70     }
71     {
72         std::ostringstream os;
73         os << "#define ZLOCAL " << gZLocal;
74         prefix.push_back(os.str());
75     }
76 
77     mProgram       = extra->getProgram("convolution_depthwise", glsl_convlutionDepthwise_glsl, prefix);
78     mKernelTexture = std::shared_ptr<GLTexture>(new GLTexture(srcDepthQuad, fw, fh, ((GLBackend *)backend())->getTextrueFormat(), GL_TEXTURE_3D, false));
79 
80     auto transform = extra->getProgram("transform_kernel_image_depthwise", glsl_kernel2ImageDepthwise_glsl);
81     transform->useProgram();
82     glBindImageTexture(0, mKernelTexture->id(), 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
83     glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, kernelBuffer->getId());
84     OPENGL_CHECK_ERROR;
85     glUniform1i(3, fw);
86     glUniform1i(4, fh);
87     OPENGL_CHECK_ERROR;
88 
89     ((GLBackend *)backend())->compute(srcDepthQuad, fw, fh);
90     OPENGL_CHECK_ERROR;
91 
92 }
93 
onResize(const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs)94 ErrorCode GLConvolutionDepthwise::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
95     GPUConvolution::onResize(inputs, outputs);
96     int kx      = mCommon->kernelX();
97     int ky      = mCommon->kernelY();
98     int sx      = mCommon->strideX();
99     int sy      = mCommon->strideY();
100     int dx      = mCommon->dilateX();
101     int dy      = mCommon->dilateY();
102     mSetUniform = [=]() {
103         glUniform2i(4, mPadX, mPadY);
104         glUniform2i(5, kx, ky);
105         glUniform2i(6, sx, sy);
106         glUniform2i(7, dx, dy);
107     };
108     return NO_ERROR;
109 }
110 
onExecute(const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs)111 ErrorCode GLConvolutionDepthwise::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
112     {
113         auto convLayer = mCommon;
114 
115         auto input         = inputs[0];
116         auto output        = outputs[0];
117         auto inputTexture  = input->deviceId();
118         auto outputTexture = output->deviceId();
119         int dst_depth_quad = UP_DIV(output->channel(), 4);
120 
121         mProgram->useProgram();
122         glBindImageTexture(0, outputTexture, 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
123         OPENGL_CHECK_ERROR;
124         {
125             int texId = 0;
126             glActiveTexture(GL_TEXTURE0 + texId);
127             glUniform1i(1, texId);
128             glBindTexture(GL_TEXTURE_3D, inputTexture);
129             OPENGL_CHECK_ERROR;
130         }
131         {
132             int texId = 1;
133             glActiveTexture(GL_TEXTURE0 + texId);
134             OPENGL_CHECK_ERROR;
135             glUniform1i(2, texId);
136 
137             OPENGL_CHECK_ERROR;
138             glBindTexture(GL_TEXTURE_3D, mKernelTexture->id());
139             OPENGL_CHECK_ERROR;
140         }
141         glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, mBiasBuffer->getId());
142 
143         OPENGL_CHECK_ERROR;
144         mSetUniform();
145 
146         glUniform3i(10, output->width(), output->height(), UP_DIV(output->channel(), 4));
147         glUniform3i(11, input->width(), input->height(), UP_DIV(input->channel(), 4));
148 
149         OPENGL_CHECK_ERROR;
150 
151         ((GLBackend *)backend())->compute(UP_DIV(output->width(), (gXLocal)), UP_DIV(output->height(), gYLocal),
152                           UP_DIV(dst_depth_quad, gZLocal));
153         OPENGL_CHECK_ERROR;
154 
155     }
156 
157     return NO_ERROR;
158 }
159 GLCreatorRegister<TypedCreator<GLConvolutionDepthwise>> __depthwise_conv_op(OpType_ConvolutionDepthwise);
160 } // namespace OpenGL
161 } // namespace MNN
162