1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4 //
5 // Copyright (C) 2018, Intel Corporation, all rights reserved.
6 // Third party copyrights are property of their respective owners.
7 
8 #include "../../precomp.hpp"
9 #include "common.hpp"
10 #include "internal.hpp"
11 #include "../include/op_conv.hpp"
12 
13 namespace cv { namespace dnn { namespace vkcom {
14 
15 #ifdef HAVE_VULKAN
16 
17 #define DEFAULT_LOCAL_SZ 256
18 #define MAX_COMPUTE_GFLOPS 10
19 // TODO: query group count from vulkan device
20 #define MAX_GROUP_COUNT_X 65535
21 #define MAX_GROUP_COUNT_Y 65535
22 #define MAX_GROUP_COUNT_Z 65535
23 
24 struct ShaderConstant {
25     int lsz_x;
26     int lsz_y;
27     int lsz_z;
28     int in_h;
29     int in_w;
30     int out_w;
31     int stride_h;
32     int stride_w;
33     int pad_h;
34     int pad_w;
35     int filter_h;
36     int filter_w;
37     int channels;
38     int batch;
39     int m;
40     int k;
41     int n;
42     int tail_m;
43     int dilation_h;
44     int dilation_w;
45 };
46 
47 struct ShaderParam {
48     int in_h;
49     int in_w;
50     int out_h;
51     int out_w;
52     int stride_h;
53     int stride_w;
54     int pad_h;
55     int pad_w;
56     int filter_h;
57     int filter_w;
58     int dilation_h;
59     int dilation_w;
60     int channels;
61     int batch;
62     int has_bias;
63     int M;
64     int K;
65     int N;
66     int basic_shader_batch_idx;
67     int basic_shader_partition_idx;
68     int basic_shader_partition_size;
69 };
70 
OpConv(const int out_channel,const bool has_bias,const int * filter_size,const int * pad,const int * stride,const int * dilation,const int activation,const int group,const int padding_mode)71 OpConv::OpConv(const int out_channel, const bool has_bias,
72                const int* filter_size, const int* pad,
73                const int* stride, const int* dilation,
74                const int activation, const int group,
75                const int padding_mode)
76 {
77     init(out_channel, has_bias, filter_size, pad,
78          stride, dilation, activation, group, padding_mode);
79     type_ = "Conv";
80 }
81 
reshapeOutTensor(Tensor & in,Tensor & out)82 void OpConv::reshapeOutTensor(Tensor& in, Tensor& out)
83 {
84     Shape in_shape = in.getShape();
85     batch_ = in_shape[kShapeIdxBatch];
86     in_height_ = in_shape[kShapeIdxHeight];
87     in_width_ = in_shape[kShapeIdxWidth];
88     computeConvOutputShapeAndPadding(padding_mode_, padding_top_, padding_left_,
89                                      in_height_, in_width_,
90                                      filter_height_, filter_width_,
91                                      dilation_height_, dilation_width_,
92                                      stride_height_, stride_width_,
93                                      out_height_, out_width_);
94     Shape shape = {batch_, out_channel_, out_height_, out_width_};
95     out.reshape(NULL, shape);
96 }
97 
init(const int out_channel,const bool has_bias,const int * filter_size,const int * pad,const int * stride,const int * dilation,const int activation,const int group,const int padding_mode)98 bool OpConv::init(const int out_channel, const bool has_bias,
99                   const int* filter_size, const int* pad,
100                   const int* stride, const int* dilation,
101                   const int activation, const int group,
102                   const int padding_mode)
103 {
104     out_channel_ = out_channel;
105     filter_height_ = filter_size[0];
106     filter_width_ = filter_size[1];
107     padding_top_ = pad[0];
108     padding_left_ = pad[1];
109     stride_height_ = stride[0];
110     stride_width_ = stride[1];
111     dilation_height_ = dilation[0];
112     dilation_width_ = dilation[1];
113     padding_mode_ = (PaddingMode)padding_mode;
114     has_bias_ = has_bias ? 1 : 0;
115     activation_ = activation;
116     group_ = group;
117 
118     #define BUFFER_NUM 4
119     OpBase::initVulkanThing(BUFFER_NUM);
120     return true;
121 }
122 
forward(std::vector<Tensor> & ins,std::vector<Tensor> & blobs,std::vector<Tensor> & outs)123 bool OpConv::forward(std::vector<Tensor>& ins,
124                      std::vector<Tensor>& blobs,
125                      std::vector<Tensor>& outs)
126 {
127     std::vector<int> shape = {1};
128     Tensor bias(0, shape);
129 
130     if (has_bias_)
131     {
132         assert(blobs.size() == 2);
133         bias = blobs[1];
134     }
135 
136     return forward(ins[0], blobs[0], bias, outs[0]);
137 }
138 
forward(Tensor & in,Tensor & filter_weights,Tensor & bias,Tensor & out)139 bool OpConv::forward(Tensor& in, Tensor& filter_weights, Tensor& bias, Tensor& out)
140 {
141     Shape in_shape = in.getShape();
142     Shape out_shape = out.getShape();
143     batch_ = in_shape[kShapeIdxBatch];
144     in_height_ = in_shape[kShapeIdxHeight];
145     in_width_ = in_shape[kShapeIdxWidth];
146     in_channel_= in_shape[kShapeIdxChannel];
147     out_height_ = out_shape[kShapeIdxHeight];
148     out_width_ = out_shape[kShapeIdxWidth];
149     int M = out_height_ * out_width_;
150     int K = filter_height_ * filter_width_ * in_channel_;
151     int N = out_channel_;
152 
153     if (pipeline_ == VK_NULL_HANDLE)
154     {
155         config_.local_size_x = DEFAULT_LOCAL_SZ;
156         config_.local_size_y = 1;
157         config_.local_size_z = 1;
158         config_.block_height = 1;
159         config_.block_width  = 1;
160         config_.block_depth  = 1;
161         if ((N % 8 == 0) && (K % 4 == 0) && (M % 4) == 0)
162         {
163             assert(group_ == 1); // TODO: support group > 1
164             config_.shader_type  = kConvShaderType48;
165             config_.local_size_x = 1;
166             config_.local_size_y = DEFAULT_LOCAL_SZ;
167             config_.local_size_z = 1;
168             config_.block_height = 4;
169             config_.block_width  = 8;
170             createShaderModule(conv48_spv, sizeof(conv48_spv));
171             // specialization constants
172             VkSpecializationInfo spec_info;
173             ShaderConstant shader_constant;
174 #define SPECIALIZATION_CONST_NUM 20
175             VkSpecializationMapEntry entry[SPECIALIZATION_CONST_NUM];
176 #define SET_SPEC_CONST_ENTRY(n_, id_, offset_, size_) \
177             entry[n_].constantID = id_; \
178             entry[n_].offset = offset_; \
179             entry[n_].size = size_;
180 
181             shader_constant.lsz_x = config_.local_size_x;
182             shader_constant.lsz_y = config_.local_size_y;
183             shader_constant.lsz_z = config_.local_size_z;
184             shader_constant.in_h  = in_height_;
185             shader_constant.in_w  = in_width_;
186             shader_constant.out_w = out_width_;
187             shader_constant.stride_h = stride_height_;
188             shader_constant.stride_w = stride_width_;
189             shader_constant.pad_h = padding_top_;
190             shader_constant.pad_w = padding_left_;
191             shader_constant.filter_h = filter_height_;
192             shader_constant.filter_w = filter_width_;
193             shader_constant.channels = in_channel_;
194             shader_constant.batch = batch_;
195             shader_constant.m = M;
196             shader_constant.k = K;
197             shader_constant.n = N;
198             shader_constant.tail_m = M % 4;
199             shader_constant.dilation_h = dilation_height_;
200             shader_constant.dilation_w = dilation_width_;
201 
202             SET_SPEC_CONST_ENTRY(0, 0, offsetof(ShaderConstant,lsz_x), sizeof(int));
203             SET_SPEC_CONST_ENTRY(1, 1, offsetof(ShaderConstant,lsz_y), sizeof(int));
204             SET_SPEC_CONST_ENTRY(2, 2, offsetof(ShaderConstant,lsz_z), sizeof(int));
205             SET_SPEC_CONST_ENTRY(3, 3, offsetof(ShaderConstant,in_h), sizeof(int));
206             SET_SPEC_CONST_ENTRY(4, 4, offsetof(ShaderConstant,in_w), sizeof(int));
207             SET_SPEC_CONST_ENTRY(5, 5, offsetof(ShaderConstant,out_w), sizeof(int));
208             SET_SPEC_CONST_ENTRY(6, 6, offsetof(ShaderConstant,stride_h), sizeof(int));
209             SET_SPEC_CONST_ENTRY(7, 7, offsetof(ShaderConstant,stride_w), sizeof(int));
210             SET_SPEC_CONST_ENTRY(8, 8, offsetof(ShaderConstant,pad_h), sizeof(int));
211             SET_SPEC_CONST_ENTRY(9, 9, offsetof(ShaderConstant,pad_w), sizeof(int));
212             SET_SPEC_CONST_ENTRY(10, 10, offsetof(ShaderConstant,filter_h), sizeof(int));
213             SET_SPEC_CONST_ENTRY(11, 11, offsetof(ShaderConstant,filter_w), sizeof(int));
214             SET_SPEC_CONST_ENTRY(12, 12, offsetof(ShaderConstant,channels), sizeof(int));
215             SET_SPEC_CONST_ENTRY(13, 13, offsetof(ShaderConstant,batch), sizeof(int));
216             SET_SPEC_CONST_ENTRY(14, 14, offsetof(ShaderConstant,m), sizeof(int));
217             SET_SPEC_CONST_ENTRY(15, 15, offsetof(ShaderConstant,k), sizeof(int));
218             SET_SPEC_CONST_ENTRY(16, 16, offsetof(ShaderConstant,n), sizeof(int));
219             SET_SPEC_CONST_ENTRY(17, 17, offsetof(ShaderConstant,tail_m), sizeof(int));
220             SET_SPEC_CONST_ENTRY(18, 18, offsetof(ShaderConstant,dilation_h), sizeof(int));
221             SET_SPEC_CONST_ENTRY(19, 19, offsetof(ShaderConstant,dilation_w), sizeof(int));
222 
223             spec_info.mapEntryCount = SPECIALIZATION_CONST_NUM;
224             spec_info.pMapEntries = entry;
225             spec_info.dataSize = sizeof(shader_constant);
226             spec_info.pData = &shader_constant;
227             createPipeline(sizeof(ShaderParam), &spec_info);
228         }
229         else if (out_channel_ == in_channel_ && in_channel_ == group_)
230         {
231             config_.shader_type  = kConvShaderTypeDepthWise;
232             createShaderModule(dw_conv_spv, sizeof(dw_conv_spv));
233             createPipeline(sizeof(ShaderParam));
234         }
235         else
236         {
237             assert(group_ == 1); // TODO: support group > 1
238             config_.shader_type  = kConvShaderTypeBasic;
239             createShaderModule(conv_spv, sizeof(conv_spv));
240             createPipeline(sizeof(ShaderParam));
241         }
242 
243         computeGroupCount();
244     }
245 
246     bindTensor(device_, in, 0, descriptor_set_);
247     bindTensor(device_, bias, 1, descriptor_set_);
248     bindTensor(device_, filter_weights, 2, descriptor_set_);
249     bindTensor(device_, out, 3, descriptor_set_);
250 
251     ShaderParam param = {in_height_, in_width_,
252                          out_height_, out_width_,
253                          stride_height_, stride_width_,
254                          padding_top_, padding_left_,
255                          filter_height_, filter_width_,
256                          dilation_height_, dilation_width_,
257                          in_channel_, batch_, has_bias_,
258                          M, K, N, 0, 0, 0};
259 
260     if (config_.shader_type == kConvShaderTypeBasic || config_.shader_type == kConvShaderTypeDepthWise)
261     {
262         int partition_num = 1;
263         if (config_.shader_type == kConvShaderTypeBasic)
264         {
265             param.basic_shader_partition_size = group_y_;
266             partition_num = (int)ceil(1.0 * out_channel_ / group_y_);
267         }
268 
269         for (int b = 0;  b < batch_; b++)
270         {
271             param.basic_shader_batch_idx = b;
272             for (int n = 0;  n < partition_num; n++)
273             {
274                 param.basic_shader_partition_idx = n;
275                 recordCommandBuffer((void *)&param, sizeof(ShaderParam));
276                 runCommandBuffer();
277             }
278         }
279     }
280     else
281     {
282         recordCommandBuffer();
283         runCommandBuffer();
284     }
285 
286     return true;
287 }
288 
computeGroupCount()289 bool OpConv::computeGroupCount()
290 {
291     if (config_.shader_type == kConvShaderTypeDepthWise)
292     {
293         group_x_ = alignSize(out_width_, config_.local_size_x) / config_.local_size_x;
294         group_y_ = alignSize(out_height_, config_.local_size_y) / config_.local_size_y;
295         group_z_ = alignSize(in_channel_, config_.local_size_z) / config_.local_size_z;
296         return true;
297     }
298 
299     int M = out_height_ * out_width_;
300     int N = out_channel_;
301 
302     if (config_.shader_type == kConvShaderTypeBasic)
303     {
304 
305         group_x_ = alignSize(out_height_ * out_width_, config_.local_size_x) / config_.local_size_x;
306         float GFLOPS = (2.0 * filter_height_ * filter_width_ * in_channel_ + 1) *
307                        (out_channel_ * out_height_ * out_width_) / 1000 / 1000 / 1000;
308         CV_Assert(config_.local_size_y == 1);
309         group_y_ = std::min(MAX_GROUP_COUNT_Y, (int)floor(MAX_COMPUTE_GFLOPS / (GFLOPS / out_channel_)));
310         group_z_ = 1;
311     }
312     else if (config_.shader_type == kConvShaderType48)
313     {
314         assert(config_.block_width == 8 &&
315                config_.block_height == 4 &&
316                config_.block_depth == 1 &&
317                config_.local_size_z == 1);
318         group_x_ = N / config_.block_width;
319         group_y_ = alignSize(alignSize(M, 4) / 4, config_.local_size_y) / config_.local_size_y;
320         group_z_ = batch_;
321     }
322     else
323         CV_Assert(0);
324 
325     CV_Assert(group_x_ <= MAX_GROUP_COUNT_X);
326     CV_Assert(group_y_ <= MAX_GROUP_COUNT_Y);
327     CV_Assert(group_z_ <= MAX_GROUP_COUNT_Z);
328 
329     return true;
330 }
331 
332 #endif // HAVE_VULKAN
333 
334 }}} // namespace cv::dnn::vkcom
335