1 //
2 // GLConvolution.cpp
3 // MNN
4 //
5 // Created by MNN on 2019/01/31.
6 // Copyright © 2018, Alibaba Group Holding Limited
7 //
8
9 #include "backend/opengl/GLConvolution.hpp"
10 #include <MNN/AutoTime.hpp>
11
12 #include <sstream>
13 #include "AllShader.hpp"
14 #include "core/Macro.h"
15 #include "backend/opengl/GLConvolutionIm2col.hpp"
16 namespace MNN {
17 namespace OpenGL {
18
19 #define UNIT 4
20
GPUConvolution(const Op * convOp,Backend * b)21 GPUConvolution::GPUConvolution(const Op *convOp, Backend *b) : MNN::Execution(b) {
22 mCommon = convOp->main_as_Convolution2D()->common();
23 auto convReal = convOp->main_as_Convolution2D();
24 auto outputCount = mCommon->outputCount();
25 mInputDepth = 0;
26
27 if (convReal->weight() != NULL) {
28 auto weightSize = convReal->weight()->size();
29 mInputDepth = weightSize * mCommon->group() / mCommon->kernelX() / mCommon->kernelY() / outputCount;
30 }
31 }
~GPUConvolution()32 GPUConvolution::~GPUConvolution() {
33 }
34
onResize(const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs)35 ErrorCode GPUConvolution::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
36 auto input = inputs[0];
37 auto output = outputs[0];
38 if (mCommon->padMode() == PadMode_SAME) {
39 int kernelWidthSize = (mCommon->kernelX() - 1) * mCommon->dilateX() + 1;
40 int kernelHeightSize = (mCommon->kernelY() - 1) * mCommon->dilateY() + 1;
41 int pad_needed_width = (output->width() - 1) * mCommon->strideX() + kernelWidthSize - input->width();
42 int pad_needed_height = (output->height() - 1) * mCommon->strideY() + kernelHeightSize - input->height();
43
44 mPadX = (pad_needed_width > 0 ? pad_needed_width : 0) / 2;
45 mPadY = (pad_needed_height > 0 ? pad_needed_height : 0) / 2;
46 return NO_ERROR;
47 }
48 mPadX = mCommon->padX();
49 mPadY = mCommon->padY();
50
51 return NO_ERROR;
52 }
53
~GLConvolution()54 GLConvolution::~GLConvolution() {
55 }
56
GLConvolution(const std::vector<Tensor * > & inputs,const Op * convOp,Backend * bn)57 GLConvolution::GLConvolution(const std::vector<Tensor *> &inputs, const Op *convOp, Backend *bn) : GPUConvolution(convOp, bn) {
58 auto totalWeightSize =
59 ALIGN_UP4(mCommon->outputCount()) * ALIGN_UP4(mInputDepth) * (mCommon->kernelY() * mCommon->kernelX());
60 auto extra = (GLBackend *)bn;
61
62 mBiasBuffer.reset(new GLSSBOBuffer(sizeof(float) * ALIGN_UP4(mCommon->outputCount())));
63 float* bias = (float*)(mBiasBuffer->map(GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
64 if(bias != nullptr){
65 ::memset(bias, 0, ALIGN_UP4(mCommon->outputCount()) * sizeof(float));
66 ::memcpy(bias, convOp->main_as_Convolution2D()->bias()->data(),
67 convOp->main_as_Convolution2D()->bias()->size() * sizeof(float));
68 }
69 mBiasBuffer->unmap();
70
71 auto mKernelBuffer = std::shared_ptr<GLSSBOBuffer>(new GLSSBOBuffer(sizeof(float) * totalWeightSize));
72 int fw = mCommon->kernelX();
73 int fh = mCommon->kernelY();
74 int unit = 4;
75 int unit2 = unit * unit;
76 int alignedWeightSize = UP_DIV(mInputDepth, unit) * fw * fh * unit2;
77 int oc_4 = UP_DIV(mCommon->outputCount(), unit);
78
79 float *dest = (float *)mKernelBuffer->map(GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
80 if(dest != nullptr){
81 ::memset(dest, 0, alignedWeightSize * sizeof(float));
82 const float *source = convOp->main_as_Convolution2D()->weight()->data();
83 int cur = 0;
84
85 //weight : oc ic h w -> oc/4, ic/4 ky kx ic4 oc4
86 for (int b = 0; b < mCommon->outputCount(); ++b) {
87 int b_4 = b / unit;
88 float *dst_b = dest + b_4 * alignedWeightSize;
89 int mx = b % unit;
90 for (int d = 0; d < mInputDepth; ++d) {
91 int my = d % unit;
92 int d_4 = d / unit;
93 float *dst_d = dst_b + d_4 * fw * fh * unit2;
94 for (int y = 0; y < fh; ++y) {
95 float *dst_y = dst_d + y * fw * unit2;
96 for (int x = 0; x < fw; ++x) {
97 float *dst_x = dst_y + x * unit2;
98 dst_x[unit * my + mx] = source[cur++];
99 }
100 }
101 }
102 }
103 }
104
105 mKernelBuffer->unmap();
106
107 int ic_4 = UP_DIV(mInputDepth, unit);
108 //weight image : ky kx, oc/4, ic/4*ic4 oc4
109 mKernelTexture =
110 std::shared_ptr<GLTexture>(new GLTexture(ic_4 * unit, oc_4, fw * fh, ((GLBackend *)backend())->getTextrueFormat() , GL_TEXTURE_3D, false));
111
112 auto transform = extra->getProgram("transform_kernel_image_adreno", glsl_kernel2image_adreno_glsl);
113 transform->useProgram();
114 glBindImageTexture(0, mKernelTexture->id(), 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
115 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, mKernelBuffer->getId());
116 OPENGL_CHECK_ERROR;
117 glUniform1i(3, fw * fh);
118 glUniform1i(4, ic_4);
119 OPENGL_CHECK_ERROR;
120
121 ((GLBackend *)backend())->compute(ic_4, oc_4, fw * fh);
122 OPENGL_CHECK_ERROR;
123 }
124
onResize(const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs)125 ErrorCode GLConvolution::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
126 GPUConvolution::onResize(inputs, outputs);
127 auto extra = (GLBackend *)backend();
128 std::vector<std::string> prefix;
129 if (mCommon->relu()) {
130 prefix.push_back("#define RELU");
131 }
132 if (mCommon->relu6()) {
133 prefix.push_back("#define RELU6");
134 }
135
136 auto dstDepthQuad = UP_DIV(outputs[0]->channel(), 4);
137
138 setLocalSize(prefix, mLocalSize, 1, 1, dstDepthQuad);
139
140 if (1 == mCommon->kernelY() && 1 == mCommon->kernelX() && 1 == mCommon->strideY() && 1 == mCommon->strideX() &&
141 0 == mCommon->padX() && 0 == mCommon->padY()) {
142 mIs1x1 = true;
143 }
144
145 if (mIs1x1) {
146 mProgram = extra->getProgram("convolution1x1", glsl_convolution1x1_glsl, prefix);
147 } else {
148 mKx = mCommon->kernelX();
149 mKy = mCommon->kernelY();
150 mSx = mCommon->strideX();
151 mSy = mCommon->strideY();
152 mDx = mCommon->dilateX();
153 mDy = mCommon->dilateY();
154 mProgram = extra->getProgram("convolution", glsl_convolution_glsl, prefix);
155 }
156
157 return NO_ERROR;
158 }
159
160
onExecute(const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs)161 ErrorCode GLConvolution::onExecute(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
162 {
163 auto convLayer = mCommon;
164
165 auto input = inputs[0];
166 auto output = outputs[0];
167 auto inputTexture = input->deviceId();
168 auto outputTexture = output->deviceId();
169 int oc_4 = UP_DIV(output->channel(), 4);
170
171 mProgram->useProgram();
172 glBindImageTexture(0, outputTexture, 0, GL_TRUE, 0, GL_WRITE_ONLY, ((GLBackend *)backend())->getTextrueFormat());
173 {
174 int texId = 0;
175 glActiveTexture(GL_TEXTURE0 + texId);
176 glUniform1i(1, texId);
177 glBindTexture(GL_TEXTURE_3D, inputTexture);
178 OPENGL_CHECK_ERROR;
179 }
180 {
181 int texId = 1;
182 glActiveTexture(GL_TEXTURE0 + texId);
183 OPENGL_CHECK_ERROR;
184 glUniform1i(2, texId);
185 OPENGL_CHECK_ERROR;
186 glBindTexture(GL_TEXTURE_3D, mKernelTexture->id());
187 OPENGL_CHECK_ERROR;
188 }
189 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, mBiasBuffer->getId());
190
191 if(!mIs1x1){
192 glUniform2i(4, mPadX, mPadY);
193 glUniform2i(5, mKx, mKy);
194 glUniform2i(6, mSx, mSy);
195 glUniform2i(7, mDx, mDy);
196 }
197 OPENGL_CHECK_ERROR;
198 glUniform3i(10, output->width(), output->height(), UP_DIV(output->channel(), 4));
199 glUniform3i(11, input->width(), input->height(), UP_DIV(input->channel(), 4));
200
201 glUniform1i(8, UNIT);
202 OPENGL_CHECK_ERROR;
203
204 ((GLBackend *)backend())->compute(UP_DIV(output->width(), UNIT*mLocalSize[0]), UP_DIV(output->height(), mLocalSize[1]),
205 UP_DIV(oc_4, mLocalSize[2]));
206
207 OPENGL_CHECK_ERROR;
208 }
209
210 return NO_ERROR;
211 }
212
213
214 class ConvolutionCreator : public GLBackend::Creator {
215 public:
216 virtual ~ConvolutionCreator() = default;
onCreate(const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs,const MNN::Op * op,Backend * backend) const217 virtual Execution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
218 const MNN::Op *op, Backend *backend) const override {
219 auto common = op->main_as_Convolution2D()->common();
220
221 //TODO: bugfix
222 if(common->padX() == 1 || common->strideX() != 1){
223 return new GLConvolution(inputs, op, backend);
224 }
225 if(((GLBackend *)backend)->gpuType() == GLBackend::ADRENO){
226 if(((GLBackend *)backend)->glVersion() >= 269){
227 return new GLConvolution(inputs, op, backend);
228 }else{
229 return new GLConvolutionIm2col(inputs, op, backend);
230 }
231 }else{
232 return new GLConvolutionIm2col(inputs, op, backend);
233 }
234 }
235 };
236
237 GLCreatorRegister<ConvolutionCreator> __gl_conv_op(OpType_Convolution);
238 } // namespace OpenGL
239 } // namespace MNN
240