1 //
2 //  VulkanConvolution.cpp
3 //  MNN
4 //
5 //  Created by MNN on 2019/01/31.
6 //  Copyright © 2018, Alibaba Group Holding Limited
7 //
8 
9 #include "VulkanConvolution.hpp"
10 #include "core/Macro.h"
11 #include "VulkanConvolutionImpl.hpp"
12 #include "core/ConvolutionCommon.hpp"
13 namespace MNN {
14 int VulkanConvolutionCommon::gImage2ColLocal = 256;
getPostTreatMacro(const Convolution2DCommon * common)15 std::string VulkanConvolutionCommon::getPostTreatMacro(const Convolution2DCommon* common) {
16     if (common->relu()) {
17         return "RELU_";
18     } else if (common->relu6()) {
19         return "RELU6_";
20     }
21     return "";
22 }
23 
_createBufferForConvDepthwise(VulkanBackend * extra,const Convolution2DCommon * mCommon,const float * weightSource,size_t weightSize)24 static std::shared_ptr<VulkanBuffer> _createBufferForConvDepthwise(VulkanBackend* extra,
25                                                                    const Convolution2DCommon* mCommon,
26                                                                    const float* weightSource, size_t weightSize) {
27     auto outputCount     = mCommon->outputCount();
28     auto totalWeightSize = ALIGN_UP4(mCommon->outputCount()) * (mCommon->kernelY() * mCommon->kernelX());
29     auto kernelBuffer    = std::make_shared<VulkanBuffer>(extra->getMemoryPool(), false, sizeof(float) * totalWeightSize, nullptr,
30                                                           VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
31     auto layer        = mCommon;
32 
33     auto weight     = (float*)kernelBuffer->map();
34     int kw          = layer->kernelX();
35     int kh          = layer->kernelY();
36     int planeStride = kw * kh * 4;
37 
38     int cur = 0;
39     for (int c = 0; c < outputCount; ++c) {
40         int plane  = c / 4;
41         int offset = c % 4;
42         for (int y = 0; y < kh; ++y) {
43             for (int x = 0; x < kw; ++x) {
44                 float* dst = weight + offset + (x + y * kw) * 4 + planeStride * plane;
45                 *dst       = weightSource[cur++];
46             }
47         }
48     }
49     kernelBuffer->unmap();
50     return kernelBuffer;
51 }
52 
writeParameter(ConvolutionParameter * convCons,const Convolution2DCommon * common,const Tensor * input,const Tensor * output)53 void VulkanConvolutionCommon::writeParameter(ConvolutionParameter* convCons, const Convolution2DCommon* common,
54                                              const Tensor* input, const Tensor* output) {
55     int icDiv4 = UP_DIV(input->channel(), 4);
56     int ocDiv4 = UP_DIV(output->channel(), 4);
57     auto pad = ConvolutionCommon::convolutionPad(input, output, common);
58     int padX   = pad.first;
59     int padY   = pad.second;
60     {
61         convCons->dilate[0]     = common->dilateX();
62         convCons->dilate[1]     = common->dilateY();
63         convCons->stride[0]     = common->strideX();
64         convCons->stride[1]     = common->strideY();
65         convCons->pad[0]        = padX;
66         convCons->pad[1]        = padY;
67         convCons->kernelSize[0] = common->kernelX();
68         convCons->kernelSize[1] = common->kernelY();
69 
70         convCons->inputSize[0] = input->width();
71         convCons->inputSize[1] = input->height();
72         convCons->inputSize[2] = icDiv4;
73         convCons->inputSize[3] = input->batch();
74 
75         convCons->outputSize[0] = output->width();
76         convCons->outputSize[1] = output->height();
77         convCons->outputSize[2] = ocDiv4;
78         convCons->outputSize[3] = output->batch();
79         convCons->offset[0]     = 0;
80         convCons->offset[1]     = 0;
81         convCons->offset[2]     = output->height();
82     }
83 }
84 
VulkanConvolutionCommon(const Op * convOp,Backend * bn)85 VulkanConvolutionCommon::VulkanConvolutionCommon(const Op* convOp, Backend* bn) : VulkanBasicExecution(bn) {
86     auto extra    = static_cast<VulkanBackend*>(bn);
87     mCommon       = convOp->main_as_Convolution2D()->common();
88     mConvCons = std::make_shared<VulkanBuffer>(extra->getMemoryPool(), false, sizeof(ConvolutionParameter), nullptr,
89                                                VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
90 }
91 
~VulkanConvolutionCommon()92 VulkanConvolutionCommon::~VulkanConvolutionCommon() {
93 }
94 
onEncode(const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs,const VulkanCommandPool::Buffer * cmdBuffer)95 ErrorCode VulkanConvolutionCommon::onEncode(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
96                                             const VulkanCommandPool::Buffer* cmdBuffer) {
97     auto input  = inputs[0];
98     auto output = outputs[0];
99     {
100         auto convCons = (ConvolutionParameter*)mConvCons->map();
101         writeParameter(convCons, mCommon, input, output);
102         mConvCons->unmap();
103     }
104 
105     auto code = this->onEncodeConvolution(mCommon, inputs, outputs, cmdBuffer, mConvCons.get());
106     if (NO_ERROR != code) {
107         return code;
108     }
109     return NO_ERROR;
110 }
_init(const float * weightData,size_t weightSize,const Op * convOp,Backend * bn)111 bool VulkanConvolutionDepthwise::_init(const float* weightData, size_t weightSize, const Op* convOp, Backend* bn) {
112     auto extra      = static_cast<VulkanBackend*>(bn);
113     auto common    = convOp->main_as_Convolution2D()->common();
114     mSampler        = extra->getCommonSampler();
115     // Create Pipeline
116     std::vector<VkDescriptorType> convTypes{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
117                                             VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
118                                             VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
119                                             VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER};
120     MNN_ASSERT(OpType_ConvolutionDepthwise == convOp->type());
121     auto macro = getPostTreatMacro(common);
122     if (extra->gpuType() == VulkanRuntime::ADRENO) {
123         mConvPipeline = extra->getPipeline("glsl_convolutionDepthwise_" + macro + "comp", convTypes);
124         mLocalX       = 16;
125         mLocalY       = 16;
126     } else {
127         mConvPipeline = extra->getPipeline("glsl_convolutionDepthwiseMali_" + macro + "comp", convTypes);
128         mLocalX       = 8;
129         mLocalY       = 8;
130     }
131     auto c4 = UP_DIV(common->outputCount(), 4);
132     mKernel = std::make_shared<VulkanImage>(extra->getMemoryPool(), false, common->kernelX() * common->kernelY(), c4);
133     if (nullptr != weightData){
134         auto tempBuffer = _createBufferForConvDepthwise(extra, common, weightData, weightSize);
135         extra->copyBufferToImage(tempBuffer.get(), mKernel.get());
136     }
137     auto convReal = convOp->main_as_Convolution2D();
138     mBias.reset(new VulkanImage(extra->getMemoryPool(), false, {c4, 1}));
139     auto biasBuffer = std::make_shared<VulkanBuffer>(extra->getMemoryPool(), false,
140                                                      sizeof(float) * ALIGN_UP4(common->outputCount()));
141 
142     auto bias = biasBuffer->map();
143     ::memset(bias, 0, ALIGN_UP4(common->outputCount()) * sizeof(float));
144     if (nullptr != convReal->bias()) {
145         // Create Buffer
146         ::memcpy(bias, convReal->bias()->data(), common->outputCount() * sizeof(float));
147     }
148     biasBuffer->unmap();
149     extra->copyBufferToImage(biasBuffer.get(), mBias.get());
150     return true;
151 }
152 
153 
VulkanConvolutionDepthwise(const float * weightData,size_t weightSize,const Op * convOp,Backend * bn)154 VulkanConvolutionDepthwise::VulkanConvolutionDepthwise(const float* weightData, size_t weightSize, const Op* convOp, Backend* bn)
155     : VulkanConvolutionCommon(convOp, bn) {
156     _init(weightData, weightSize, convOp, bn);
157 }
158 
~VulkanConvolutionDepthwise()159 VulkanConvolutionDepthwise::~VulkanConvolutionDepthwise() {
160 }
161 
onEncodeConvolution(const Convolution2DCommon * common,const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs,const VulkanCommandPool::Buffer * cmdBuffer,const VulkanBuffer * convCons)162 ErrorCode VulkanConvolutionDepthwise::onEncodeConvolution(const Convolution2DCommon* common,
163                                                           const std::vector<Tensor*>& inputs,
164                                                           const std::vector<Tensor*>& outputs,
165                                                           const VulkanCommandPool::Buffer* cmdBuffer,
166                                                           const VulkanBuffer* convCons) {
167     auto input  = inputs[0];
168     auto output = outputs[0];
169     /*Set Const Parameters*/
170     int ocDiv4 = UP_DIV(output->channel(), 4);
171     int ow     = output->width();
172     int oh     = output->height();
173     auto extra = static_cast<VulkanBackend*>(backend());
174     mExtraSets.clear();
175     mExtraBuffers.clear();
176     if (inputs.size() >= 2) {
177         auto weight = reinterpret_cast<VulkanTensor*>(inputs[1]->deviceId())->image();
178         auto pipeline = extra->getPipeline("glsl_dwweightcopy_comp", {
179             VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
180             VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
181             VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER
182         });
183         std::shared_ptr<VulkanPipeline::DescriptorSet> des(pipeline->createSet());
184         des->writeImage(weight->view(), extra->getCommonSampler()->get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 1);
185         des->writeImage(mKernel->view(), extra->getCommonSampler()->get(), VK_IMAGE_LAYOUT_GENERAL, 0);
186         int dim[4] = {
187             weight->width(),
188             weight->height(),
189             inputs[1]->height(),
190             weight->depth() * weight->height() * weight->width()
191         };
192         std::shared_ptr<VulkanBuffer> uniforms(new VulkanBuffer(extra->getMemoryPool(), false, sizeof(dim), &dim, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
193         des->writeBuffer(uniforms->buffer(), 2, uniforms->size());
194         pipeline->bind(cmdBuffer->get(), des->get());
195         vkCmdDispatch(cmdBuffer->get(), UP_DIV(dim[3], 256), 1, 1);
196         mExtraBuffers.emplace_back(uniforms);
197         mExtraSets.emplace_back(des);
198         cmdBuffer->barrierImage(mKernel->get(), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
199     }
200     const VulkanImage* bias;
201     if (inputs.size() >= 3) {
202         bias = reinterpret_cast<VulkanTensor*>(inputs[2]->deviceId())->image();
203     } else {
204         bias = mBias.get();
205     }
206     if (nullptr == bias) {
207         mBias.reset(new VulkanImage(extra->getMemoryPool(), false, {1, 1}));
208         // Create Buffer
209         auto biasBuffer = std::make_shared<VulkanBuffer>(extra->getMemoryPool(), false,
210                                                          sizeof(float) * 4);
211         auto biasPtr = biasBuffer->map();
212         ::memset(biasPtr, 0, 4 * sizeof(float));
213         biasBuffer->unmap();
214         extra->copyBufferToImage(biasBuffer.get(), mBias.get());
215         bias = mBias.get();
216     }
217     /*Write Command Buffer*/
218     mConvSet.reset(mConvPipeline->createSet());
219     mConvSet->writeImage(((VulkanTensor*)output->deviceId())->image()->view(), mSampler->get(), VK_IMAGE_LAYOUT_GENERAL, 0);
220     mConvSet->writeImage(((VulkanTensor*)input->deviceId())->image()->view(), mSampler->get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
221                          1);
222     mConvSet->writeImage(mKernel->view(), mSampler->get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 2);
223     mConvSet->writeImage(bias->view(), mSampler->get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 3);
224     mConvSet->writeBuffer(convCons->buffer(), 4, convCons->size());
225     mConvPipeline->bind(cmdBuffer->get(), mConvSet->get());
226     vkCmdDispatch(cmdBuffer->get(), UP_DIV(ow, mLocalX), UP_DIV(oh, mLocalY), ocDiv4 * input->batch());
227     return NO_ERROR;
228 }
229 
230 class VulkanConvolutionCreator : public VulkanBackend::Creator {
231 public:
onCreate(const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs,const MNN::Op * op,Backend * backend) const232     virtual VulkanBasicExecution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs, const MNN::Op* op,
233                                 Backend* backend) const override {
234         auto extra          = static_cast<VulkanBackend *>(backend);
235         auto convReal       = op->main_as_Convolution2D();
236         auto common         = convReal->common();
237         auto outputCount    = common->outputCount();
238         const int fh        = common->kernelY();
239         const int fw        = common->kernelX();
240         int srcCount        = 0;
241         const float* source = nullptr;
242         const float* biasPtr = nullptr;
243         int weightSize = 0;
244         std::shared_ptr<ConvolutionCommon::Int8Common> quanWeight;
245         if (nullptr != op->main_as_Convolution2D()->quanParameter()) {
246             auto quan = op->main_as_Convolution2D()->quanParameter();
247             if (1 == quan->type() || 2 == quan->type()) {
248                 if (quan->has_scaleInt()) {
249                     // Don't support IDST-int8 because of error
250                     return nullptr;
251                 }
252             }
253             quanWeight = ConvolutionCommon::load(op->main_as_Convolution2D()->quanParameter(), true);
254             srcCount = quanWeight->weightFloat.size() / (outputCount * fh * fw);
255             source   = quanWeight->weightFloat.get();
256             weightSize = quanWeight->weightFloat.size();
257         } else {
258             if (nullptr != convReal->weight()) {
259                 srcCount = convReal->weight()->size() / (outputCount * fh * fw);
260                 source   = convReal->weight()->data();
261                 weightSize = convReal->weight()->size();
262             } else {
263                 srcCount = convReal->common()->inputCount();
264             }
265         }
266         if (nullptr != convReal->bias()) {
267             biasPtr = convReal->bias()->data();
268         }
269         if (op->type() == OpType_Convolution) {
270             if (inputs.size() > 1) {
271                 return nullptr;
272             }
273             auto convCommonParam = op->main_as_Convolution2D()->common();
274             const int group      = convCommonParam->group();
275             if (1 == group) {
276                 return VulkanConvolutionImpl::create(extra, common, inputs, outputs[0], source,
277                                                      biasPtr, srcCount, outputCount);
278 
279             } else {
280                 return nullptr;
281             }
282         }
283         return new VulkanConvolutionDepthwise(source, weightSize, op, backend);
284     }
285 };
286 
__anon074957ed0102() 287 static bool gResistor = []() {
288     VulkanBackend::addCreator(OpType_Convolution, new VulkanConvolutionCreator);
289     VulkanBackend::addCreator(OpType_ConvolutionDepthwise, new VulkanConvolutionCreator);
290     return true;
291 }();
292 
293 } // namespace MNN
294