1 //
2 // VulkanConvolutionImpl.cpp
3 // MNN
4 //
5 // Created by MNN on 2019/01/31.
6 // Copyright © 2018, Alibaba Group Holding Limited
7 //
8
9 #include "VulkanConvolutionImpl.hpp"
10 #include "core/Macro.h"
11 #include "VulkanConvolution.hpp"
12 #include "VulkanConvolutionWinograd.hpp"
13 #include "VulkanMatMul.hpp"
14 //#define MNN_OPEN_TIME_TRACE
15 #include <MNN/AutoTime.hpp>
16 namespace MNN {
17 //#define VULKAN_IM2COL_GEMM_UNIT 512
writeParameters(VulkanMatMul::Reorder::nchwBuffer & parameters,int co,int ci,int kh,int kw)18 static void writeParameters(VulkanMatMul::Reorder::nchwBuffer& parameters, int co, int ci, int kh, int kw) {
19 parameters.size[0] = co;
20 parameters.size[1] = ci;
21 parameters.size[2] = kh;
22 parameters.size[3] = kw;
23 parameters.stride[0] = ci * kh * kw;
24 parameters.stride[1] = kh * kw;
25 parameters.stride[2] = kw;
26 parameters.stride[3] = 1;
27 }
28 class VulkanConvolutionIm2Col : public VulkanBasicExecution {
29 public:
30
VulkanConvolutionIm2Col(VulkanBackend * backend,const Convolution2DCommon * convOption,const float * weightPtr,const float * biasPtr,int ci,int co)31 VulkanConvolutionIm2Col(VulkanBackend* backend, const Convolution2DCommon* convOption, const float* weightPtr,
32 const float* biasPtr, int ci, int co) : VulkanBasicExecution(backend), mConvCommonOption(convOption) {
33 auto kw = convOption->kernelX();
34 auto kh = convOption->kernelY();
35 if (nullptr != weightPtr) {
36 // Static weight
37 VulkanMatMul::Reorder reorder(backend, true);
38 VulkanMatMul::Reorder::nchwBuffer parameters;
39 writeParameters(parameters, co, ci, kh, kw);
40 mKernel = VulkanMatrixMultier4x4::createKernel(backend, nullptr, ALIGN_UP4(ci) * kh * kw, co, 1);
41 auto weightSize = ci * co * kh * kw;
42 std::shared_ptr<VulkanBuffer> tempBuffer(new VulkanBuffer(backend->getMemoryPool(), false, weightSize*sizeof(float), nullptr, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
43 auto tempWeightBuffer = tempBuffer->map();
44 ::memcpy(tempWeightBuffer, weightPtr, weightSize * sizeof(float));
45 tempBuffer->unmap();
46 std::shared_ptr<VulkanBuffer> tempBuffer2(new VulkanBuffer(backend->getMemoryPool(), false, reorder.computeMiddleBufferSize(co, kh, kw, ci) *sizeof(float), nullptr, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
47 std::shared_ptr<VulkanCommandPool::Buffer> cmdBuffer(backend->getPool().allocBuffer());
48 cmdBuffer->begin(0);
49 reorder.encode(tempBuffer->buffer(), tempBuffer->size(), tempBuffer2->buffer()
50 , tempBuffer2->size(), mKernel.get(), cmdBuffer.get(), parameters);
51 cmdBuffer->end();
52 backend->getPool().submitAndWait(cmdBuffer->get());
53 }
54 mMultiCreator = [ci, kh, kw, co, backend, this]() {
55 auto multi = std::make_shared<VulkanMatrixMultier4x4>(backend, nullptr, ALIGN_UP4(ci) * kh * kw, co, 1, mKernel);
56 return multi;
57 };
58 std::vector<VkDescriptorType> im2Coltypes{
59 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER};
60 if (kw == 1 && kh == 1 && convOption->padX() == 0 && convOption->padY() == 0) {
61 mIm2Col =
62 backend->getPipeline("glsl_im2col1x1_comp", /* glsl_im2col1x1_comp, glsl_im2col1x1_comp_len,*/ im2Coltypes);
63 } else {
64 mIm2Col = backend->getPipeline("glsl_im2col_comp", /*glsl_im2col_comp, glsl_im2col_comp_len,*/ im2Coltypes);
65 }
66 std::vector<VkDescriptorType> Col2imTypes{
67 VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
68 VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER};
69 auto macro = VulkanConvolutionCommon::getPostTreatMacro(convOption);
70 mCol2Im = backend->getPipeline("glsl_col2Im_" + macro + "comp", Col2imTypes);
71
72 mSampler = backend->getCommonSampler();
73 if (nullptr != biasPtr) {
74 // Static bias
75 mBias = std::make_shared<VulkanImage>(backend->getMemoryPool(), false, UP_DIV(co, 4), 1);
76 auto tempBias = std::make_shared<VulkanBuffer>(backend->getMemoryPool(), false, sizeof(float) * ALIGN_UP4(co));
77 auto bias = tempBias->map();
78 ::memset(bias, 0, sizeof(float) * ALIGN_UP4(co));
79 ::memcpy(bias, biasPtr, sizeof(float) * co);
80 tempBias->unmap();
81 backend->copyBufferToImage(tempBias.get(), mBias.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
82 }
83 }
~VulkanConvolutionIm2Col()84 ~VulkanConvolutionIm2Col() {
85 // Do nothing
86 }
onEncode(const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs,const VulkanCommandPool::Buffer * cmdBuffer)87 virtual ErrorCode onEncode(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
88 const VulkanCommandPool::Buffer* cmdBuffer) override {
89 auto src = inputs[0];
90 auto dst = outputs[0];
91 const int icDiv4 = UP_DIV(src->channel(), 4);
92 const int ocDiv4 = UP_DIV(dst->channel(), 4);
93 auto vkBn = (VulkanBackend*)backend();
94 int limit = vkBn->proty().limits.maxImageDimension2D * 4;
95 #ifdef VULKAN_IM2COL_GEMM_UNIT
96 limit = VULKAN_IM2COL_GEMM_UNIT;
97 #endif
98 if (limit < dst->width()) {
99 MNN_ERROR("Don't support width too large feature: %d x %d, limit = %d\n", dst->width(), dst->height(), limit);
100 return NOT_SUPPORT;
101 }
102 int batchLoopNumber = 1;
103 int heightLoopNumber = 1;
104 int unitHeight = dst->height();
105 int unitBatch = dst->batch();
106 auto area = dst->width() * dst->height();
107 if (limit < area) {
108 batchLoopNumber = dst->batch();
109 unitBatch = 1;
110 unitHeight = limit / dst->width();
111 heightLoopNumber = UP_DIV(dst->height(), unitHeight);
112 } else if (limit < area * dst->batch()) {
113 unitBatch = limit / area;
114 batchLoopNumber = UP_DIV(dst->batch(), unitBatch);
115 }
116 int loopNumber = batchLoopNumber * heightLoopNumber;
117 mConvParams.resize(loopNumber);
118 mMultilers.resize(loopNumber);
119 mIm2ColSet.resize(loopNumber);
120 mCol2ImSet.resize(loopNumber);
121
122 for (int i=0; i<batchLoopNumber; ++i) {
123 int batchOffset = i * unitBatch;
124 int currentBatch = dst->batch() - batchOffset;
125 if (currentBatch > unitBatch) {
126 currentBatch = unitBatch;
127 }
128 for (int j=0; j<heightLoopNumber; ++j) {
129 int heightOffset = j * unitHeight;
130 int currentHeight = dst->height() - heightOffset;
131 if (currentHeight > unitHeight) {
132 currentHeight = unitHeight;
133 }
134 auto index = i * heightLoopNumber + j;
135 auto totalNumberInput = currentBatch * icDiv4 * dst->width() * currentHeight;
136 auto totalNumberOutput = currentBatch * ocDiv4 * dst->width() * currentHeight;
137 mConvParams[index] = std::make_shared<VulkanBuffer>(vkBn->getMemoryPool(), false,
138 sizeof(VulkanConvolutionCommon::ConvolutionParameter), nullptr,
139 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
140 {
141 auto convCons = reinterpret_cast<VulkanConvolutionCommon::ConvolutionParameter*>(mConvParams[index]->map());
142 VulkanConvolutionCommon::writeParameter(convCons, mConvCommonOption, src, dst);
143 convCons->offset[0] = batchOffset;
144 convCons->offset[1] = heightOffset;
145 convCons->outputSize[3] = currentBatch;
146 convCons->outputSize[1] = currentHeight;
147 mConvParams[index]->unmap();
148 }
149 mIm2ColSet[index].reset(mIm2Col->createSet());
150 mCol2ImSet[index].reset(mCol2Im->createSet());
151 mMultilers[index] = mMultiCreator();
152 mMultilers[index]->prepare(cmdBuffer, dst->width() * currentHeight * currentBatch);
153 auto mMultiler = mMultilers[index].get();
154 if (true) {
155 auto colImage = mMultiler->source();
156 cmdBuffer->barrierImageIfNeeded(colImage, VK_IMAGE_LAYOUT_GENERAL);
157 mIm2ColSet[index]->writeImage(colImage->view(), mSampler->get(), VK_IMAGE_LAYOUT_GENERAL, 0);
158 mIm2ColSet[index]->writeImage((reinterpret_cast<VulkanTensor*>(src->deviceId()))->image()->view(), mSampler->get(),
159 VK_IMAGE_LAYOUT_GENERAL, 1);
160 mIm2ColSet[index]->writeBuffer(mConvParams[index]->buffer(), 2, mConvParams[index]->size());
161 mIm2Col->bind(cmdBuffer->get(), mIm2ColSet[index]->get());
162 vkCmdDispatch(cmdBuffer->get(), UP_DIV(totalNumberInput, VulkanConvolutionCommon::gImage2ColLocal),
163 1, 1);
164 cmdBuffer->barrierImageIfNeeded(colImage, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
165 }
166 mMultilers[index]->compute(cmdBuffer);
167 if (true) {
168 auto dstImage = mMultiler->dest();
169 mCol2ImSet[index]->writeImage(dstImage->view(), mSampler->get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 0);
170 mCol2ImSet[index]->writeImage((reinterpret_cast<VulkanTensor*>(dst->deviceId()))->image()->view(), mSampler->get(),
171 VK_IMAGE_LAYOUT_GENERAL, 1);
172
173 mCol2ImSet[index]->writeImage(mBias->view(), mSampler->get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 2);
174 mCol2ImSet[index]->writeBuffer(mConvParams[index]->buffer(), 3, mConvParams[index]->size());
175 mCol2Im->bind(cmdBuffer->get(), mCol2ImSet[index]->get());
176 cmdBuffer->barrierImageIfNeeded(dstImage, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
177 // cmdBuffer->barrierImage(dstImage->get(), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
178 vkCmdDispatch(cmdBuffer->get(), UP_DIV(totalNumberOutput, VulkanConvolutionCommon::gImage2ColLocal),
179 1, 1);
180 }
181 }
182 }
183 return NO_ERROR;
184 }
185 private:
186 const VulkanPipeline* mIm2Col;
187 const VulkanPipeline* mCol2Im;
188 const VulkanSampler* mSampler;
189
190 std::shared_ptr<VulkanImage> mBias;
191 std::shared_ptr<VulkanImage> mKernel;
192 const Convolution2DCommon* mConvCommonOption;
193 std::vector<std::shared_ptr<VulkanPipeline::DescriptorSet>> mCol2ImSet;
194 std::vector<std::shared_ptr<VulkanPipeline::DescriptorSet>> mIm2ColSet;
195 std::vector<std::shared_ptr<VulkanBuffer>> mConvParams;
196 std::vector<std::shared_ptr<VulkanMatrixMultier4x4>> mMultilers;
197 std::function<std::shared_ptr<VulkanMatrixMultier4x4>()> mMultiCreator;
198 };
199
create(VulkanBackend * backend,const Convolution2DCommon * convOption,const std::vector<Tensor * > & inputs,const Tensor * output,const float * weightPtr,const float * biasPtr,int ci,int co)200 VulkanBasicExecution* VulkanConvolutionImpl::create(VulkanBackend* backend, const Convolution2DCommon* convOption,
201 const std::vector<Tensor*>& inputs, const Tensor* output,
202 const float* weightPtr, const float* biasPtr, int ci, int co) {
203 AUTOTIME;
204 if (inputs.size() > 1) {
205 return new VulkanConvolutionIm2Col(backend, convOption, weightPtr, biasPtr, ci, co);
206 }
207 auto imageLimit = backend->proty().limits.maxImageDimension2D;
208 if (VulkanConvolutionWinograd::support(convOption)) {
209 if (output->width() >= 4 && output->height() >= 4 && output->batch() == 1) {
210 return new VulkanConvolutionWinograd(backend, convOption, weightPtr, biasPtr, ci, co);
211 }
212 }
213 if (ALIGN_UP4(ci) * convOption->kernelX() * convOption->kernelY() > imageLimit) {
214 return nullptr;
215 }
216 return new VulkanConvolutionIm2Col(backend, convOption, weightPtr, biasPtr, ci, co);
217 }
218
219 } // namespace MNN
220