1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4 //
5 // Copyright (C) 2018, Intel Corporation, all rights reserved.
6 // Third party copyrights are property of their respective owners.
7
8 #include "../../precomp.hpp"
9 #include "common.hpp"
10 #include "internal.hpp"
11 #include "../include/op_conv.hpp"
12
13 namespace cv { namespace dnn { namespace vkcom {
14
15 #ifdef HAVE_VULKAN
16
17 #define DEFAULT_LOCAL_SZ 256
18 #define MAX_COMPUTE_GFLOPS 10
19 // TODO: query group count from vulkan device
20 #define MAX_GROUP_COUNT_X 65535
21 #define MAX_GROUP_COUNT_Y 65535
22 #define MAX_GROUP_COUNT_Z 65535
23
24 struct ShaderConstant {
25 int lsz_x;
26 int lsz_y;
27 int lsz_z;
28 int in_h;
29 int in_w;
30 int out_w;
31 int stride_h;
32 int stride_w;
33 int pad_h;
34 int pad_w;
35 int filter_h;
36 int filter_w;
37 int channels;
38 int batch;
39 int m;
40 int k;
41 int n;
42 int tail_m;
43 int dilation_h;
44 int dilation_w;
45 };
46
47 struct ShaderParam {
48 int in_h;
49 int in_w;
50 int out_h;
51 int out_w;
52 int stride_h;
53 int stride_w;
54 int pad_h;
55 int pad_w;
56 int filter_h;
57 int filter_w;
58 int dilation_h;
59 int dilation_w;
60 int channels;
61 int batch;
62 int has_bias;
63 int M;
64 int K;
65 int N;
66 int basic_shader_batch_idx;
67 int basic_shader_partition_idx;
68 int basic_shader_partition_size;
69 };
70
OpConv(const int out_channel,const bool has_bias,const int * filter_size,const int * pad,const int * stride,const int * dilation,const int activation,const int group,const int padding_mode)71 OpConv::OpConv(const int out_channel, const bool has_bias,
72 const int* filter_size, const int* pad,
73 const int* stride, const int* dilation,
74 const int activation, const int group,
75 const int padding_mode)
76 {
77 init(out_channel, has_bias, filter_size, pad,
78 stride, dilation, activation, group, padding_mode);
79 type_ = "Conv";
80 }
81
reshapeOutTensor(Tensor & in,Tensor & out)82 void OpConv::reshapeOutTensor(Tensor& in, Tensor& out)
83 {
84 Shape in_shape = in.getShape();
85 batch_ = in_shape[kShapeIdxBatch];
86 in_height_ = in_shape[kShapeIdxHeight];
87 in_width_ = in_shape[kShapeIdxWidth];
88 computeConvOutputShapeAndPadding(padding_mode_, padding_top_, padding_left_,
89 in_height_, in_width_,
90 filter_height_, filter_width_,
91 dilation_height_, dilation_width_,
92 stride_height_, stride_width_,
93 out_height_, out_width_);
94 Shape shape = {batch_, out_channel_, out_height_, out_width_};
95 out.reshape(NULL, shape);
96 }
97
init(const int out_channel,const bool has_bias,const int * filter_size,const int * pad,const int * stride,const int * dilation,const int activation,const int group,const int padding_mode)98 bool OpConv::init(const int out_channel, const bool has_bias,
99 const int* filter_size, const int* pad,
100 const int* stride, const int* dilation,
101 const int activation, const int group,
102 const int padding_mode)
103 {
104 out_channel_ = out_channel;
105 filter_height_ = filter_size[0];
106 filter_width_ = filter_size[1];
107 padding_top_ = pad[0];
108 padding_left_ = pad[1];
109 stride_height_ = stride[0];
110 stride_width_ = stride[1];
111 dilation_height_ = dilation[0];
112 dilation_width_ = dilation[1];
113 padding_mode_ = (PaddingMode)padding_mode;
114 has_bias_ = has_bias ? 1 : 0;
115 activation_ = activation;
116 group_ = group;
117
118 #define BUFFER_NUM 4
119 OpBase::initVulkanThing(BUFFER_NUM);
120 return true;
121 }
122
forward(std::vector<Tensor> & ins,std::vector<Tensor> & blobs,std::vector<Tensor> & outs)123 bool OpConv::forward(std::vector<Tensor>& ins,
124 std::vector<Tensor>& blobs,
125 std::vector<Tensor>& outs)
126 {
127 std::vector<int> shape = {1};
128 Tensor bias(0, shape);
129
130 if (has_bias_)
131 {
132 assert(blobs.size() == 2);
133 bias = blobs[1];
134 }
135
136 return forward(ins[0], blobs[0], bias, outs[0]);
137 }
138
forward(Tensor & in,Tensor & filter_weights,Tensor & bias,Tensor & out)139 bool OpConv::forward(Tensor& in, Tensor& filter_weights, Tensor& bias, Tensor& out)
140 {
141 Shape in_shape = in.getShape();
142 Shape out_shape = out.getShape();
143 batch_ = in_shape[kShapeIdxBatch];
144 in_height_ = in_shape[kShapeIdxHeight];
145 in_width_ = in_shape[kShapeIdxWidth];
146 in_channel_= in_shape[kShapeIdxChannel];
147 out_height_ = out_shape[kShapeIdxHeight];
148 out_width_ = out_shape[kShapeIdxWidth];
149 int M = out_height_ * out_width_;
150 int K = filter_height_ * filter_width_ * in_channel_;
151 int N = out_channel_;
152
153 if (pipeline_ == VK_NULL_HANDLE)
154 {
155 config_.local_size_x = DEFAULT_LOCAL_SZ;
156 config_.local_size_y = 1;
157 config_.local_size_z = 1;
158 config_.block_height = 1;
159 config_.block_width = 1;
160 config_.block_depth = 1;
161 if ((N % 8 == 0) && (K % 4 == 0) && (M % 4) == 0)
162 {
163 assert(group_ == 1); // TODO: support group > 1
164 config_.shader_type = kConvShaderType48;
165 config_.local_size_x = 1;
166 config_.local_size_y = DEFAULT_LOCAL_SZ;
167 config_.local_size_z = 1;
168 config_.block_height = 4;
169 config_.block_width = 8;
170 createShaderModule(conv48_spv, sizeof(conv48_spv));
171 // specialization constants
172 VkSpecializationInfo spec_info;
173 ShaderConstant shader_constant;
174 #define SPECIALIZATION_CONST_NUM 20
175 VkSpecializationMapEntry entry[SPECIALIZATION_CONST_NUM];
176 #define SET_SPEC_CONST_ENTRY(n_, id_, offset_, size_) \
177 entry[n_].constantID = id_; \
178 entry[n_].offset = offset_; \
179 entry[n_].size = size_;
180
181 shader_constant.lsz_x = config_.local_size_x;
182 shader_constant.lsz_y = config_.local_size_y;
183 shader_constant.lsz_z = config_.local_size_z;
184 shader_constant.in_h = in_height_;
185 shader_constant.in_w = in_width_;
186 shader_constant.out_w = out_width_;
187 shader_constant.stride_h = stride_height_;
188 shader_constant.stride_w = stride_width_;
189 shader_constant.pad_h = padding_top_;
190 shader_constant.pad_w = padding_left_;
191 shader_constant.filter_h = filter_height_;
192 shader_constant.filter_w = filter_width_;
193 shader_constant.channels = in_channel_;
194 shader_constant.batch = batch_;
195 shader_constant.m = M;
196 shader_constant.k = K;
197 shader_constant.n = N;
198 shader_constant.tail_m = M % 4;
199 shader_constant.dilation_h = dilation_height_;
200 shader_constant.dilation_w = dilation_width_;
201
202 SET_SPEC_CONST_ENTRY(0, 0, offsetof(ShaderConstant,lsz_x), sizeof(int));
203 SET_SPEC_CONST_ENTRY(1, 1, offsetof(ShaderConstant,lsz_y), sizeof(int));
204 SET_SPEC_CONST_ENTRY(2, 2, offsetof(ShaderConstant,lsz_z), sizeof(int));
205 SET_SPEC_CONST_ENTRY(3, 3, offsetof(ShaderConstant,in_h), sizeof(int));
206 SET_SPEC_CONST_ENTRY(4, 4, offsetof(ShaderConstant,in_w), sizeof(int));
207 SET_SPEC_CONST_ENTRY(5, 5, offsetof(ShaderConstant,out_w), sizeof(int));
208 SET_SPEC_CONST_ENTRY(6, 6, offsetof(ShaderConstant,stride_h), sizeof(int));
209 SET_SPEC_CONST_ENTRY(7, 7, offsetof(ShaderConstant,stride_w), sizeof(int));
210 SET_SPEC_CONST_ENTRY(8, 8, offsetof(ShaderConstant,pad_h), sizeof(int));
211 SET_SPEC_CONST_ENTRY(9, 9, offsetof(ShaderConstant,pad_w), sizeof(int));
212 SET_SPEC_CONST_ENTRY(10, 10, offsetof(ShaderConstant,filter_h), sizeof(int));
213 SET_SPEC_CONST_ENTRY(11, 11, offsetof(ShaderConstant,filter_w), sizeof(int));
214 SET_SPEC_CONST_ENTRY(12, 12, offsetof(ShaderConstant,channels), sizeof(int));
215 SET_SPEC_CONST_ENTRY(13, 13, offsetof(ShaderConstant,batch), sizeof(int));
216 SET_SPEC_CONST_ENTRY(14, 14, offsetof(ShaderConstant,m), sizeof(int));
217 SET_SPEC_CONST_ENTRY(15, 15, offsetof(ShaderConstant,k), sizeof(int));
218 SET_SPEC_CONST_ENTRY(16, 16, offsetof(ShaderConstant,n), sizeof(int));
219 SET_SPEC_CONST_ENTRY(17, 17, offsetof(ShaderConstant,tail_m), sizeof(int));
220 SET_SPEC_CONST_ENTRY(18, 18, offsetof(ShaderConstant,dilation_h), sizeof(int));
221 SET_SPEC_CONST_ENTRY(19, 19, offsetof(ShaderConstant,dilation_w), sizeof(int));
222
223 spec_info.mapEntryCount = SPECIALIZATION_CONST_NUM;
224 spec_info.pMapEntries = entry;
225 spec_info.dataSize = sizeof(shader_constant);
226 spec_info.pData = &shader_constant;
227 createPipeline(sizeof(ShaderParam), &spec_info);
228 }
229 else if (out_channel_ == in_channel_ && in_channel_ == group_)
230 {
231 config_.shader_type = kConvShaderTypeDepthWise;
232 createShaderModule(dw_conv_spv, sizeof(dw_conv_spv));
233 createPipeline(sizeof(ShaderParam));
234 }
235 else
236 {
237 assert(group_ == 1); // TODO: support group > 1
238 config_.shader_type = kConvShaderTypeBasic;
239 createShaderModule(conv_spv, sizeof(conv_spv));
240 createPipeline(sizeof(ShaderParam));
241 }
242
243 computeGroupCount();
244 }
245
246 bindTensor(device_, in, 0, descriptor_set_);
247 bindTensor(device_, bias, 1, descriptor_set_);
248 bindTensor(device_, filter_weights, 2, descriptor_set_);
249 bindTensor(device_, out, 3, descriptor_set_);
250
251 ShaderParam param = {in_height_, in_width_,
252 out_height_, out_width_,
253 stride_height_, stride_width_,
254 padding_top_, padding_left_,
255 filter_height_, filter_width_,
256 dilation_height_, dilation_width_,
257 in_channel_, batch_, has_bias_,
258 M, K, N, 0, 0, 0};
259
260 if (config_.shader_type == kConvShaderTypeBasic || config_.shader_type == kConvShaderTypeDepthWise)
261 {
262 int partition_num = 1;
263 if (config_.shader_type == kConvShaderTypeBasic)
264 {
265 param.basic_shader_partition_size = group_y_;
266 partition_num = (int)ceil(1.0 * out_channel_ / group_y_);
267 }
268
269 for (int b = 0; b < batch_; b++)
270 {
271 param.basic_shader_batch_idx = b;
272 for (int n = 0; n < partition_num; n++)
273 {
274 param.basic_shader_partition_idx = n;
275 recordCommandBuffer((void *)¶m, sizeof(ShaderParam));
276 runCommandBuffer();
277 }
278 }
279 }
280 else
281 {
282 recordCommandBuffer();
283 runCommandBuffer();
284 }
285
286 return true;
287 }
288
computeGroupCount()289 bool OpConv::computeGroupCount()
290 {
291 if (config_.shader_type == kConvShaderTypeDepthWise)
292 {
293 group_x_ = alignSize(out_width_, config_.local_size_x) / config_.local_size_x;
294 group_y_ = alignSize(out_height_, config_.local_size_y) / config_.local_size_y;
295 group_z_ = alignSize(in_channel_, config_.local_size_z) / config_.local_size_z;
296 return true;
297 }
298
299 int M = out_height_ * out_width_;
300 int N = out_channel_;
301
302 if (config_.shader_type == kConvShaderTypeBasic)
303 {
304
305 group_x_ = alignSize(out_height_ * out_width_, config_.local_size_x) / config_.local_size_x;
306 float GFLOPS = (2.0 * filter_height_ * filter_width_ * in_channel_ + 1) *
307 (out_channel_ * out_height_ * out_width_) / 1000 / 1000 / 1000;
308 CV_Assert(config_.local_size_y == 1);
309 group_y_ = std::min(MAX_GROUP_COUNT_Y, (int)floor(MAX_COMPUTE_GFLOPS / (GFLOPS / out_channel_)));
310 group_z_ = 1;
311 }
312 else if (config_.shader_type == kConvShaderType48)
313 {
314 assert(config_.block_width == 8 &&
315 config_.block_height == 4 &&
316 config_.block_depth == 1 &&
317 config_.local_size_z == 1);
318 group_x_ = N / config_.block_width;
319 group_y_ = alignSize(alignSize(M, 4) / 4, config_.local_size_y) / config_.local_size_y;
320 group_z_ = batch_;
321 }
322 else
323 CV_Assert(0);
324
325 CV_Assert(group_x_ <= MAX_GROUP_COUNT_X);
326 CV_Assert(group_y_ <= MAX_GROUP_COUNT_Y);
327 CV_Assert(group_z_ <= MAX_GROUP_COUNT_Z);
328
329 return true;
330 }
331
332 #endif // HAVE_VULKAN
333
334 }}} // namespace cv::dnn::vkcom
335