1 //
2 //  VulkanRelu.cpp
3 //  MNN
4 //
5 //  Created by MNN on 2019/01/31.
6 //  Copyright © 2018, Alibaba Group Holding Limited
7 //
8 
9 #include "VulkanRelu.hpp"
10 #include "core/Macro.h"
11 #include "core/TensorUtils.hpp"
12 namespace MNN {
13 
14 struct GpuReluParam {
15     ivec4 imgSize;
16     vec4 slope;
17 };
18 
19 //--------------------------relu--------------------------//
VulkanRelu(Backend * bn,const Op * op)20 VulkanRelu::VulkanRelu(Backend *bn, const Op* op) : VulkanBasicExecution(bn) {
21     auto vulkanBn = static_cast<VulkanBackend *>(bn);
22     mGpuReluParam.reset(new VulkanBuffer(vulkanBn->getMemoryPool(), false, sizeof(GpuReluParam), nullptr,
23                                          VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
24     if (op->type() == OpType_ReLU6) {
25         float minv = 0.0f;
26         float maxv = 6.0f;
27         if (nullptr != op->main_as_Relu6()) {
28             minv = op->main_as_Relu6()->minValue();
29             maxv = op->main_as_Relu6()->maxValue();
30         }
31         mSlope[0] = minv;
32         mSlope[1] = maxv;
33         mReluPipeline = vulkanBn->getPipeline("glsl_relu6_comp", {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER});
34     } else {
35         if (op->type() == OpType_ReLU) {
36             mSlope[0] = op->main_as_Relu()->slope();
37             mSlope[1] = op->main_as_Relu()->slope();
38             mSlope[2] = op->main_as_Relu()->slope();
39             mSlope[3] = op->main_as_Relu()->slope();
40         } else {
41             // PRELU
42             auto slope = op->main_as_PRelu()->slope()->data()[0];
43             mSlope[0] = slope;
44             mSlope[1] = slope;
45             mSlope[2] = slope;
46             mSlope[3] = slope;
47         }
48 
49         mReluPipeline = vulkanBn->getPipeline("glsl_relu_comp", {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER});
50     }
51 }
52 
~VulkanRelu()53 VulkanRelu::~VulkanRelu() {
54 }
55 
onEncode(const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs,const VulkanCommandPool::Buffer * cmdBuffer)56 ErrorCode VulkanRelu::onEncode(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
57                                const VulkanCommandPool::Buffer *cmdBuffer) {
58     auto input  = inputs[0];
59     auto output = outputs[0];
60 
61     auto vkBn = (VulkanBackend *)backend();
62 
63     auto inputTensor = reinterpret_cast<VulkanTensor*>(input->deviceId());
64     auto outputTensor = reinterpret_cast<VulkanTensor*>(output->deviceId());
65     auto reluParam = reinterpret_cast<GpuReluParam *>(mGpuReluParam->map());
66     ::memset(reluParam, 0, sizeof(GpuReluParam));
67     reluParam->imgSize[0] = inputTensor->image()->width();
68     reluParam->imgSize[1] = inputTensor->image()->height();
69     reluParam->imgSize[2] = inputTensor->image()->depth();
70     reluParam->imgSize[3] = 0;
71     for (int i=0; i<4; ++i) {
72         reluParam->slope[i]      = mSlope[i];
73     }
74     mGpuReluParam->unmap();
75     mDescriptorSet.reset(mReluPipeline->createSet());
76     mDescriptorSet->writeImage(outputTensor->image()->view(), vkBn->getCommonSampler()->get(),
77                                VK_IMAGE_LAYOUT_GENERAL, 0);
78     mDescriptorSet->writeImage(inputTensor->image()->view(), vkBn->getCommonSampler()->get(),
79                                VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 1);
80     mDescriptorSet->writeBuffer(mGpuReluParam->buffer(), 2, mGpuReluParam->size());
81     mReluPipeline->bind(cmdBuffer->get(), mDescriptorSet->get());
82     vkCmdDispatch(cmdBuffer->get(), UP_DIV(inputTensor->image()->width(), 16), UP_DIV(inputTensor->image()->height(), 16), 1);
83     return NO_ERROR;
84 }
85 //--------------------------Prelu--------------------------//
VulkanPrelu(Backend * bn,const Op * op)86 VulkanPrelu::VulkanPrelu(Backend *bn, const Op *op) : VulkanBasicExecution(bn) {
87     std::vector<VkDescriptorType> types{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
88                                         VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER};
89     auto vulkanBn    = static_cast<VulkanBackend *>(bn);
90     mPreluPipeline   = vulkanBn->getPipeline("glsl_preluWithChannel_comp",
91                                            /*glsl_preluWithChannel_comp, glsl_preluWithChannel_comp_len,*/ types);
92     const auto prelu = op->main_as_PRelu();
93     mGpuPreluParam.reset(new VulkanBuffer(vulkanBn->getMemoryPool(), false, sizeof(GpuReluParam), nullptr,
94                                           VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
95     int count = ALIGN_UP4(prelu->slope()->size());
96 
97     mSlope.reset(new VulkanImage(vulkanBn->getMemoryPool(), false, std::vector<int>{count / 4, 1}));
98     {
99         std::shared_ptr<VulkanBuffer> slopeBuffer(new VulkanBuffer(
100             vulkanBn->getMemoryPool(), false, sizeof(float) * count, nullptr, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
101         auto slope = slopeBuffer->map();
102         ::memset(slope, 0, count * sizeof(float));
103         ::memcpy(slope, prelu->slope()->data(), prelu->slope()->size() * sizeof(float));
104         slopeBuffer->unmap();
105         vulkanBn->copyBufferToImage(slopeBuffer.get(), mSlope.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
106     }
107 }
108 
~VulkanPrelu()109 VulkanPrelu::~VulkanPrelu() {
110 }
111 
onEncode(const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs,const VulkanCommandPool::Buffer * cmdBuffer)112 ErrorCode VulkanPrelu::onEncode(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
113                                 const VulkanCommandPool::Buffer *cmdBuffer) {
114     auto input  = inputs[0];
115     auto output = outputs[0];
116 
117     auto preluParam = reinterpret_cast<GpuReluParam *>(mGpuPreluParam->map());
118     ::memset(preluParam, 0, sizeof(GpuReluParam));
119     auto vkBn = static_cast<VulkanBackend *>(backend());
120 
121     const int channelDiv4  = UP_DIV(input->channel(), 4);
122     preluParam->imgSize[0] = input->width();
123     preluParam->imgSize[1] = input->height();
124     preluParam->imgSize[2] = channelDiv4;
125     preluParam->imgSize[3] = 0;
126     mGpuPreluParam->flush(true, 0, sizeof(GpuReluParam));
127     mGpuPreluParam->unmap();
128 
129     auto vkBackend = (VulkanBackend*)backend();
130     auto vkOutput  = (VulkanTensor*)output->deviceId();
131     auto vkInput   = (VulkanTensor*)input->deviceId();
132     cmdBuffer->barrierImageIfNeeded(vkOutput->image(), VK_IMAGE_LAYOUT_GENERAL);
133     cmdBuffer->barrierImageIfNeeded(vkInput->image(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
134     cmdBuffer->barrierImageIfNeeded(mSlope.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
135 
136     mDescriptorSet.reset(mPreluPipeline->createSet());
137     mDescriptorSet->writeImage(((VulkanTensor*)output->deviceId())->image()->view(), vkBn->getCommonSampler()->get(),
138                                VK_IMAGE_LAYOUT_GENERAL, 0);
139     mDescriptorSet->writeImage(((VulkanTensor*)input->deviceId())->image()->view(), vkBn->getCommonSampler()->get(),
140                                VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 1);
141     mDescriptorSet->writeImage((mSlope->view()), vkBn->getCommonSampler()->get(),
142                                VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 2);
143     mDescriptorSet->writeBuffer(mGpuPreluParam->buffer(), 3, mGpuPreluParam->size());
144 
145     mPreluPipeline->bind(cmdBuffer->get(), mDescriptorSet->get());
146 
147     vkCmdDispatch(cmdBuffer->get(), UP_DIV(input->width(), 16), UP_DIV(input->height(), 16), channelDiv4 * input->batch());
148     return NO_ERROR;
149 }
150 
151 class VulkanReluCreator : public VulkanBackend::Creator {
152 public:
onCreate(const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs,const MNN::Op * op,Backend * bn) const153     virtual VulkanBasicExecution *onCreate(const std::vector<Tensor *> &inputs, const std::vector<Tensor*>& outputs, const MNN::Op *op, Backend *bn) const override {
154         auto type  = op->type();
155         if (OpType_ReLU6 == type) {
156             return new VulkanRelu(bn, op);
157         }
158         if (OpType_ReLU == type) {
159             return new VulkanRelu(bn, op);
160         } else if (1 == op->main_as_PRelu()->slopeCount()) {
161             return new VulkanRelu(bn, op);
162         } else {
163             return new VulkanPrelu(bn, op);
164         }
165         return nullptr;
166     }
167 };
168 
__anon764c0b250102() 169 static bool gr = []() {
170     VulkanBackend::addCreator(OpType_ReLU, new VulkanReluCreator);
171     VulkanBackend::addCreator(OpType_PReLU, new VulkanReluCreator);
172     VulkanBackend::addCreator(OpType_ReLU6, new VulkanReluCreator);
173     return true;
174 }();
175 
176 } // namespace MNN
177