1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14
15 #include "innerproduct_vulkan.h"
16
17 #include "layer_shader_type.h"
18 #include "layer_type.h"
19
20 namespace ncnn {
21
InnerProduct_vulkan()22 InnerProduct_vulkan::InnerProduct_vulkan()
23 {
24 support_vulkan = true;
25 support_image_storage = true;
26
27 flatten = 0;
28
29 pipeline_innerproduct = 0;
30 pipeline_innerproduct_pack4 = 0;
31 pipeline_innerproduct_pack1to4 = 0;
32 pipeline_innerproduct_pack4to1 = 0;
33 pipeline_innerproduct_pack8 = 0;
34 pipeline_innerproduct_pack1to8 = 0;
35 pipeline_innerproduct_pack4to8 = 0;
36 pipeline_innerproduct_pack8to4 = 0;
37 pipeline_innerproduct_pack8to1 = 0;
38
39 pipeline_innerproduct_gemm = 0;
40 }
41
create_pipeline(const Option & _opt)42 int InnerProduct_vulkan::create_pipeline(const Option& _opt)
43 {
44 Option opt = _opt;
45 const Mat& shape = bottom_shapes.empty() ? Mat() : bottom_shapes[0];
46 const Mat& out_shape = top_shapes.empty() ? Mat() : top_shapes[0];
47
48 const int num_input = weight_data_size / num_output;
49
50 int in_elempack = opt.use_shader_pack8 && num_input % 8 == 0 ? 8 : num_input % 4 == 0 ? 4 : 1;
51 int out_elempack = opt.use_shader_pack8 && num_output % 8 == 0 ? 8 : num_output % 4 == 0 ? 4 : 1;
52
53 if (shape.dims == 2 && shape.w == num_input && shape.h > 1)
54 {
55 // gemm
56 int elempack = opt.use_shader_pack8 && shape.h % 8 == 0 ? 8 : shape.h % 4 == 0 ? 4 : 1;
57
58 size_t elemsize;
59 if (opt.use_fp16_storage)
60 {
61 elemsize = elempack * 2u;
62 }
63 else if (opt.use_fp16_packed)
64 {
65 elemsize = elempack == 1 ? 4u : elempack * 2u;
66 }
67 else
68 {
69 elemsize = elempack * 4u;
70 }
71
72 Mat shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack);
73 Mat out_shape_packed = Mat(out_shape.w, out_shape.h / elempack, (void*)0, elemsize, elempack);
74
75 // check blob shape
76 if (!vkdev->shape_support_image_storage(shape) || !vkdev->shape_support_image_storage(out_shape))
77 {
78 support_image_storage = false;
79 opt.use_image_storage = false;
80 }
81
82 // check blob shape
83 if (!vkdev->shape_support_image_storage(shape_packed) || !vkdev->shape_support_image_storage(out_shape_packed))
84 {
85 support_image_storage = false;
86 opt.use_image_storage = false;
87 }
88
89 std::vector<vk_specialization_type> specializations(4 + 10);
90 specializations[0].i = bias_term;
91 specializations[1].i = activation_type;
92 specializations[2].f = activation_params.w >= 1 ? activation_params[0] : 0.f;
93 specializations[3].f = activation_params.w == 2 ? activation_params[1] : 0.f;
94 specializations[4 + 0].i = shape.dims;
95 specializations[4 + 1].i = shape.w;
96 specializations[4 + 2].i = shape.h;
97 specializations[4 + 3].i = shape.c;
98 specializations[4 + 4].i = shape.cstep;
99 specializations[4 + 5].i = out_shape.dims;
100 specializations[4 + 6].i = out_shape.w;
101 specializations[4 + 7].i = out_shape.h;
102 specializations[4 + 8].i = out_shape.c;
103 specializations[4 + 9].i = out_shape.cstep;
104
105 Mat local_size_xyz(std::min(16, num_output / out_elempack), 4, 1, (void*)0);
106 if (out_shape.dims != 0)
107 {
108 local_size_xyz.w = std::min(16, out_shape.w / out_elempack);
109 local_size_xyz.h = std::min(4, out_shape.h);
110 local_size_xyz.c = 1;
111 }
112
113 {
114 pipeline_innerproduct_gemm = new Pipeline(vkdev);
115 pipeline_innerproduct_gemm->set_optimal_local_size_xyz(local_size_xyz);
116
117 // pack1
118 if (in_elempack == 1 && out_elempack == 1)
119 {
120 pipeline_innerproduct_gemm->create(LayerShaderType::innerproduct_gemm, opt, specializations);
121 }
122
123 // pack4
124 if (in_elempack == 4 && out_elempack == 4)
125 {
126 pipeline_innerproduct_gemm->create(LayerShaderType::innerproduct_gemm_wp4, opt, specializations);
127 }
128
129 // pack1to4
130 if (in_elempack == 1 && out_elempack == 4)
131 {
132 pipeline_innerproduct_gemm->create(LayerShaderType::innerproduct_gemm_wp1to4, opt, specializations);
133 }
134
135 // pack4to1
136 if (in_elempack == 4 && out_elempack == 1)
137 {
138 pipeline_innerproduct_gemm->create(LayerShaderType::innerproduct_gemm_wp4to1, opt, specializations);
139 }
140
141 // pack8
142 if (in_elempack == 8 && out_elempack == 8)
143 {
144 pipeline_innerproduct_gemm->create(LayerShaderType::innerproduct_gemm_wp8, opt, specializations);
145 }
146
147 // pack1to8
148 if (in_elempack == 1 && out_elempack == 8)
149 {
150 pipeline_innerproduct_gemm->create(LayerShaderType::innerproduct_gemm_wp1to8, opt, specializations);
151 }
152
153 // pack4to8
154 if (in_elempack == 4 && out_elempack == 8)
155 {
156 pipeline_innerproduct_gemm->create(LayerShaderType::innerproduct_gemm_wp4to8, opt, specializations);
157 }
158
159 // pack8to4
160 if (in_elempack == 8 && out_elempack == 4)
161 {
162 pipeline_innerproduct_gemm->create(LayerShaderType::innerproduct_gemm_wp8to4, opt, specializations);
163 }
164
165 // pack8to1
166 if (in_elempack == 8 && out_elempack == 1)
167 {
168 pipeline_innerproduct_gemm->create(LayerShaderType::innerproduct_gemm_wp8to1, opt, specializations);
169 }
170 }
171
172 return 0;
173 }
174
175 Mat shape_flatten;
176 if (shape.dims != 0)
177 {
178 shape_flatten = Mat(shape.w * shape.h * shape.c, (void*)0);
179 }
180
181 size_t elemsize;
182 size_t out_elemsize;
183 if (opt.use_fp16_storage)
184 {
185 elemsize = in_elempack * 2u;
186 out_elemsize = out_elempack * 2u;
187 }
188 else if (opt.use_fp16_packed)
189 {
190 elemsize = in_elempack == 1 ? 4u : in_elempack * 2u;
191 out_elemsize = out_elempack == 1 ? 4u : out_elempack * 2u;
192 }
193 else
194 {
195 elemsize = in_elempack * 4u;
196 out_elemsize = out_elempack * 4u;
197 }
198
199 Mat shape_flatten_packed;
200 if (shape_flatten.dims == 1) shape_flatten_packed = Mat(shape_flatten.w / in_elempack, (void*)0, elemsize, in_elempack);
201
202 Mat out_shape_packed;
203 if (out_shape.dims == 1) out_shape_packed = Mat(out_shape.w / out_elempack, (void*)0, out_elemsize, out_elempack);
204
205 // check blob shape
206 if (!vkdev->shape_support_image_storage(shape_flatten_packed) || !vkdev->shape_support_image_storage(out_shape_packed))
207 {
208 support_image_storage = false;
209 opt.use_image_storage = false;
210 }
211
212 // check weight shape
213 Mat weight_data_packed(num_input / in_elempack, num_output / out_elempack, (void*)0, (size_t)4 * in_elempack * out_elempack, in_elempack * out_elempack);
214 if (!vkdev->shape_support_image_storage(weight_data_packed))
215 {
216 support_image_storage = false;
217 opt.use_image_storage = false;
218 }
219
220 if (shape.dims == 0)
221 {
222 // check weight shape
223 Mat weight_data_packed(num_input, num_output, (void*)0, (size_t)4u, 1);
224 if (!vkdev->shape_support_image_storage(weight_data_packed))
225 {
226 support_image_storage = false;
227 opt.use_image_storage = false;
228 }
229 }
230
231 {
232 flatten = ncnn::create_layer(ncnn::LayerType::Flatten);
233 flatten->vkdev = vkdev;
234
235 flatten->bottom_shapes.resize(1);
236 flatten->bottom_shapes[0] = shape;
237 flatten->top_shapes.resize(1);
238 flatten->top_shapes[0] = shape_flatten;
239
240 ncnn::ParamDict pd;
241
242 flatten->load_param(pd);
243
244 flatten->create_pipeline(opt);
245 }
246
247 std::vector<vk_specialization_type> specializations(4 + 10);
248 specializations[0].i = bias_term;
249 specializations[1].i = activation_type;
250 specializations[2].f = activation_params.w >= 1 ? activation_params[0] : 0.f;
251 specializations[3].f = activation_params.w == 2 ? activation_params[1] : 0.f;
252 specializations[4 + 0].i = shape_flatten_packed.dims;
253 specializations[4 + 1].i = shape_flatten_packed.w;
254 specializations[4 + 2].i = shape_flatten_packed.h;
255 specializations[4 + 3].i = shape_flatten_packed.c;
256 specializations[4 + 4].i = shape_flatten_packed.cstep;
257 specializations[4 + 5].i = out_shape_packed.dims;
258 specializations[4 + 6].i = out_shape_packed.w;
259 specializations[4 + 7].i = out_shape_packed.h;
260 specializations[4 + 8].i = out_shape_packed.c;
261 specializations[4 + 9].i = out_shape_packed.cstep;
262
263 Mat local_size_xyz(std::min(64, num_output / out_elempack), 1, 1, (void*)0);
264 if (out_shape_packed.dims != 0)
265 {
266 local_size_xyz.w = std::min(64, out_shape_packed.w);
267 local_size_xyz.h = 1;
268 local_size_xyz.c = 1;
269 }
270
271 // pack1
272 if (in_elempack == 1 && out_elempack == 1)
273 {
274 pipeline_innerproduct = new Pipeline(vkdev);
275 pipeline_innerproduct->set_optimal_local_size_xyz(local_size_xyz);
276 pipeline_innerproduct->create(LayerShaderType::innerproduct, opt, specializations);
277 }
278
279 // pack4
280 if (in_elempack == 4 && out_elempack == 4)
281 {
282 pipeline_innerproduct_pack4 = new Pipeline(vkdev);
283 pipeline_innerproduct_pack4->set_optimal_local_size_xyz(local_size_xyz);
284 pipeline_innerproduct_pack4->create(LayerShaderType::innerproduct_pack4, opt, specializations);
285 }
286
287 // pack1to4
288 if (in_elempack == 1 && out_elempack == 4)
289 {
290 pipeline_innerproduct_pack1to4 = new Pipeline(vkdev);
291 pipeline_innerproduct_pack1to4->set_optimal_local_size_xyz(local_size_xyz);
292 pipeline_innerproduct_pack1to4->create(LayerShaderType::innerproduct_pack1to4, opt, specializations);
293 }
294
295 // pack4to1
296 if (in_elempack == 4 && out_elempack == 1)
297 {
298 pipeline_innerproduct_pack4to1 = new Pipeline(vkdev);
299 pipeline_innerproduct_pack4to1->set_optimal_local_size_xyz(local_size_xyz);
300 pipeline_innerproduct_pack4to1->create(LayerShaderType::innerproduct_pack4to1, opt, specializations);
301 }
302
303 // pack8
304 if (in_elempack == 8 && out_elempack == 8)
305 {
306 pipeline_innerproduct_pack8 = new Pipeline(vkdev);
307 pipeline_innerproduct_pack8->set_optimal_local_size_xyz(local_size_xyz);
308 pipeline_innerproduct_pack8->create(LayerShaderType::innerproduct_pack8, opt, specializations);
309 }
310
311 // pack1to8
312 if (in_elempack == 1 && out_elempack == 8)
313 {
314 pipeline_innerproduct_pack1to8 = new Pipeline(vkdev);
315 pipeline_innerproduct_pack1to8->set_optimal_local_size_xyz(local_size_xyz);
316 pipeline_innerproduct_pack1to8->create(LayerShaderType::innerproduct_pack1to8, opt, specializations);
317 }
318
319 // pack4to8
320 if (in_elempack == 4 && out_elempack == 8)
321 {
322 pipeline_innerproduct_pack4to8 = new Pipeline(vkdev);
323 pipeline_innerproduct_pack4to8->set_optimal_local_size_xyz(local_size_xyz);
324 pipeline_innerproduct_pack4to8->create(LayerShaderType::innerproduct_pack4to8, opt, specializations);
325 }
326
327 // pack8to4
328 if (in_elempack == 8 && out_elempack == 4)
329 {
330 pipeline_innerproduct_pack8to4 = new Pipeline(vkdev);
331 pipeline_innerproduct_pack8to4->set_optimal_local_size_xyz(local_size_xyz);
332 pipeline_innerproduct_pack8to4->create(LayerShaderType::innerproduct_pack8to4, opt, specializations);
333 }
334
335 // pack8to1
336 if (in_elempack == 8 && out_elempack == 1)
337 {
338 pipeline_innerproduct_pack8to1 = new Pipeline(vkdev);
339 pipeline_innerproduct_pack8to1->set_optimal_local_size_xyz(local_size_xyz);
340 pipeline_innerproduct_pack8to1->create(LayerShaderType::innerproduct_pack8to1, opt, specializations);
341 }
342
343 // gemm for no shape hint
344 if (shape.dims == 0)
345 {
346 std::vector<vk_specialization_type> specializations(4 + 10);
347 specializations[0].i = bias_term;
348 specializations[1].i = activation_type;
349 specializations[2].f = activation_params.w >= 1 ? activation_params[0] : 0.f;
350 specializations[3].f = activation_params.w == 2 ? activation_params[1] : 0.f;
351 specializations[4 + 0].i = 0;
352 specializations[4 + 1].i = 0;
353 specializations[4 + 2].i = 0;
354 specializations[4 + 3].i = 0;
355 specializations[4 + 4].i = 0;
356 specializations[4 + 5].i = 0;
357 specializations[4 + 6].i = 0;
358 specializations[4 + 7].i = 0;
359 specializations[4 + 8].i = 0;
360 specializations[4 + 9].i = 0;
361
362 Mat local_size_xyz(std::min(16, num_output / out_elempack), 4, 1, (void*)0);
363
364 {
365 pipeline_innerproduct_gemm = new Pipeline(vkdev);
366 pipeline_innerproduct_gemm->set_optimal_local_size_xyz(local_size_xyz);
367
368 // pack1
369 if (in_elempack == 1 && out_elempack == 1)
370 {
371 pipeline_innerproduct_gemm->create(LayerShaderType::innerproduct_gemm, opt, specializations);
372 }
373
374 // pack4
375 if (in_elempack == 4 && out_elempack == 4)
376 {
377 pipeline_innerproduct_gemm->create(LayerShaderType::innerproduct_gemm_wp4, opt, specializations);
378 }
379
380 // pack1to4
381 if (in_elempack == 1 && out_elempack == 4)
382 {
383 pipeline_innerproduct_gemm->create(LayerShaderType::innerproduct_gemm_wp1to4, opt, specializations);
384 }
385
386 // pack4to1
387 if (in_elempack == 4 && out_elempack == 1)
388 {
389 pipeline_innerproduct_gemm->create(LayerShaderType::innerproduct_gemm_wp4to1, opt, specializations);
390 }
391
392 // pack8
393 if (in_elempack == 8 && out_elempack == 8)
394 {
395 pipeline_innerproduct_gemm->create(LayerShaderType::innerproduct_gemm_wp8, opt, specializations);
396 }
397
398 // pack1to8
399 if (in_elempack == 1 && out_elempack == 8)
400 {
401 pipeline_innerproduct_gemm->create(LayerShaderType::innerproduct_gemm_wp1to8, opt, specializations);
402 }
403
404 // pack4to8
405 if (in_elempack == 4 && out_elempack == 8)
406 {
407 pipeline_innerproduct_gemm->create(LayerShaderType::innerproduct_gemm_wp4to8, opt, specializations);
408 }
409
410 // pack8to4
411 if (in_elempack == 8 && out_elempack == 4)
412 {
413 pipeline_innerproduct_gemm->create(LayerShaderType::innerproduct_gemm_wp8to4, opt, specializations);
414 }
415
416 // pack8to1
417 if (in_elempack == 8 && out_elempack == 1)
418 {
419 pipeline_innerproduct_gemm->create(LayerShaderType::innerproduct_gemm_wp8to1, opt, specializations);
420 }
421 }
422
423 return 0;
424 }
425
426 return 0;
427 }
428
destroy_pipeline(const Option & opt)429 int InnerProduct_vulkan::destroy_pipeline(const Option& opt)
430 {
431 if (flatten)
432 {
433 flatten->destroy_pipeline(opt);
434 delete flatten;
435 flatten = 0;
436 }
437
438 delete pipeline_innerproduct;
439 pipeline_innerproduct = 0;
440
441 delete pipeline_innerproduct_pack4;
442 pipeline_innerproduct_pack4 = 0;
443
444 delete pipeline_innerproduct_pack1to4;
445 pipeline_innerproduct_pack1to4 = 0;
446
447 delete pipeline_innerproduct_pack4to1;
448 pipeline_innerproduct_pack4to1 = 0;
449
450 delete pipeline_innerproduct_pack8;
451 pipeline_innerproduct_pack8 = 0;
452
453 delete pipeline_innerproduct_pack1to8;
454 pipeline_innerproduct_pack1to8 = 0;
455
456 delete pipeline_innerproduct_pack4to8;
457 pipeline_innerproduct_pack4to8 = 0;
458
459 delete pipeline_innerproduct_pack8to4;
460 pipeline_innerproduct_pack8to4 = 0;
461
462 delete pipeline_innerproduct_pack8to1;
463 pipeline_innerproduct_pack8to1 = 0;
464
465 delete pipeline_innerproduct_gemm;
466 pipeline_innerproduct_gemm = 0;
467
468 return 0;
469 }
470
upload_model(VkTransfer & cmd,const Option & opt)471 int InnerProduct_vulkan::upload_model(VkTransfer& cmd, const Option& opt)
472 {
473 const int num_input = weight_data_size / num_output;
474
475 int in_elempack = opt.use_shader_pack8 && num_input % 8 == 0 ? 8 : num_input % 4 == 0 ? 4 : 1;
476 int out_elempack = opt.use_shader_pack8 && num_output % 8 == 0 ? 8 : num_output % 4 == 0 ? 4 : 1;
477
478 // src = inch-outch
479 // dst = pa-pb-inch/pa-outch/pb
480 Mat weight_data_packed;
481 {
482 Mat weight_data_r2 = weight_data.reshape(num_input, num_output);
483
484 weight_data_packed.create(num_input / in_elempack, num_output / out_elempack, (size_t)4 * in_elempack * out_elempack, in_elempack * out_elempack);
485
486 for (int q = 0; q + (out_elempack - 1) < num_output; q += out_elempack)
487 {
488 float* g00 = weight_data_packed.row(q / out_elempack);
489
490 for (int p = 0; p + (in_elempack - 1) < num_input; p += in_elempack)
491 {
492 for (int i = 0; i < out_elempack; i++)
493 {
494 const float* k0 = weight_data_r2.row(q + i);
495 k0 += p;
496
497 for (int j = 0; j < in_elempack; j++)
498 {
499 g00[0] = k0[j];
500
501 g00++;
502 }
503 }
504 }
505 }
506 }
507
508 if (support_image_storage && opt.use_image_storage)
509 {
510 cmd.record_upload(weight_data_packed, weight_data_gpu_image, opt);
511 }
512 else
513 {
514 cmd.record_upload(weight_data_packed, weight_data_gpu, opt);
515 }
516
517 if (bias_term)
518 {
519 Mat bias_data_packed;
520 convert_packing(bias_data, bias_data_packed, out_elempack);
521
522 if (support_image_storage && opt.use_image_storage)
523 {
524 cmd.record_upload(bias_data_packed, bias_data_gpu_image, opt);
525 }
526 else
527 {
528 cmd.record_upload(bias_data_packed, bias_data_gpu, opt);
529 }
530 }
531
532 return 0;
533 }
534
forward(const VkMat & bottom_blob,VkMat & top_blob,VkCompute & cmd,const Option & opt) const535 int InnerProduct_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt) const
536 {
537 const int num_input = weight_data_size / num_output;
538
539 int in_elempack = opt.use_shader_pack8 && num_input % 8 == 0 ? 8 : num_input % 4 == 0 ? 4 : 1;
540 int out_elempack = opt.use_shader_pack8 && num_output % 8 == 0 ? 8 : num_output % 4 == 0 ? 4 : 1;
541
542 if (bottom_blob.dims == 2 && bottom_blob.w == num_input && bottom_blob.h * bottom_blob.elempack > 1)
543 {
544 // gemm
545 int h = bottom_blob.h;
546 size_t elemsize = bottom_blob.elemsize;
547 int elempack = bottom_blob.elempack;
548
549 // unpacking
550 VkMat bottom_blob_unpacked = bottom_blob;
551 if (elempack > 1)
552 {
553 Option opt_pack1 = opt;
554 opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
555
556 vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, 1, cmd, opt_pack1);
557 }
558
559 top_blob.create(num_output, h, elemsize, elempack, opt.blob_vkallocator);
560 if (top_blob.empty())
561 return -100;
562
563 VkMat top_blob_unpacked = top_blob;
564 if (elempack > 1)
565 {
566 top_blob_unpacked.create(num_output, h * elempack, bottom_blob_unpacked.elemsize, 1, opt.workspace_vkallocator);
567 if (top_blob_unpacked.empty())
568 return -100;
569 }
570
571 std::vector<VkMat> bindings(4);
572 bindings[0] = bottom_blob_unpacked;
573 bindings[1] = top_blob_unpacked;
574 bindings[2] = weight_data_gpu;
575 bindings[3] = bias_data_gpu;
576
577 std::vector<vk_constant_type> constants(10);
578 constants[0].i = bottom_blob_unpacked.dims;
579 constants[1].i = bottom_blob_unpacked.w;
580 constants[2].i = bottom_blob_unpacked.h;
581 constants[3].i = bottom_blob_unpacked.c;
582 constants[4].i = bottom_blob_unpacked.cstep;
583 constants[5].i = top_blob_unpacked.dims;
584 constants[6].i = top_blob_unpacked.w;
585 constants[7].i = top_blob_unpacked.h;
586 constants[8].i = top_blob_unpacked.c;
587 constants[9].i = top_blob_unpacked.cstep;
588
589 const Pipeline* pipeline = pipeline_innerproduct_gemm;
590
591 VkMat dispatcher;
592 dispatcher.w = top_blob_unpacked.w / out_elempack;
593 dispatcher.h = top_blob_unpacked.h;
594 dispatcher.c = 1;
595
596 cmd.record_pipeline(pipeline, bindings, constants, dispatcher);
597
598 // packing
599 if (elempack > 1)
600 {
601 vkdev->convert_packing(top_blob_unpacked, top_blob, elempack, cmd, opt);
602 }
603
604 return 0;
605 }
606
607 // flatten
608 VkMat bottom_blob_flattened = bottom_blob;
609 {
610 Option opt_flatten = opt;
611 opt_flatten.blob_vkallocator = opt.workspace_vkallocator;
612
613 flatten->forward(bottom_blob, bottom_blob_flattened, cmd, opt_flatten);
614 }
615
616 size_t elemsize = bottom_blob_flattened.elemsize;
617 size_t out_elemsize = elemsize / in_elempack * out_elempack;
618
619 if (opt.use_fp16_packed && !opt.use_fp16_storage)
620 {
621 if (out_elempack == 8) out_elemsize = 8 * 2u;
622 if (out_elempack == 4) out_elemsize = 4 * 2u;
623 if (out_elempack == 1) out_elemsize = 4u;
624 }
625
626 top_blob.create(num_output / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
627 if (top_blob.empty())
628 return -100;
629
630 std::vector<VkMat> bindings(4);
631 bindings[0] = bottom_blob_flattened;
632 bindings[1] = top_blob;
633 bindings[2] = weight_data_gpu;
634 bindings[3] = bias_data_gpu;
635
636 std::vector<vk_constant_type> constants(10);
637 constants[0].i = bottom_blob_flattened.dims;
638 constants[1].i = bottom_blob_flattened.w;
639 constants[2].i = bottom_blob_flattened.h;
640 constants[3].i = bottom_blob_flattened.c;
641 constants[4].i = bottom_blob_flattened.cstep;
642 constants[5].i = top_blob.dims;
643 constants[6].i = top_blob.w;
644 constants[7].i = top_blob.h;
645 constants[8].i = top_blob.c;
646 constants[9].i = top_blob.cstep;
647
648 const Pipeline* pipeline = 0;
649 if (in_elempack == 1 && out_elempack == 1)
650 {
651 pipeline = pipeline_innerproduct;
652 }
653 else if (in_elempack == 4 && out_elempack == 4)
654 {
655 pipeline = pipeline_innerproduct_pack4;
656 }
657 else if (in_elempack == 1 && out_elempack == 4)
658 {
659 pipeline = pipeline_innerproduct_pack1to4;
660 }
661 else if (in_elempack == 4 && out_elempack == 1)
662 {
663 pipeline = pipeline_innerproduct_pack4to1;
664 }
665 else if (in_elempack == 8 && out_elempack == 8)
666 {
667 pipeline = pipeline_innerproduct_pack8;
668 }
669 else if (in_elempack == 1 && out_elempack == 8)
670 {
671 pipeline = pipeline_innerproduct_pack1to8;
672 }
673 else if (in_elempack == 4 && out_elempack == 8)
674 {
675 pipeline = pipeline_innerproduct_pack4to8;
676 }
677 else if (in_elempack == 8 && out_elempack == 4)
678 {
679 pipeline = pipeline_innerproduct_pack8to4;
680 }
681 else if (in_elempack == 8 && out_elempack == 1)
682 {
683 pipeline = pipeline_innerproduct_pack8to1;
684 }
685
686 cmd.record_pipeline(pipeline, bindings, constants, top_blob);
687
688 return 0;
689 }
690
forward(const VkImageMat & bottom_blob,VkImageMat & top_blob,VkCompute & cmd,const Option & opt) const691 int InnerProduct_vulkan::forward(const VkImageMat& bottom_blob, VkImageMat& top_blob, VkCompute& cmd, const Option& opt) const
692 {
693 const int num_input = weight_data_size / num_output;
694
695 int in_elempack = opt.use_shader_pack8 && num_input % 8 == 0 ? 8 : num_input % 4 == 0 ? 4 : 1;
696 int out_elempack = opt.use_shader_pack8 && num_output % 8 == 0 ? 8 : num_output % 4 == 0 ? 4 : 1;
697
698 if (bottom_blob.dims == 2 && bottom_blob.w == num_input && bottom_blob.h * bottom_blob.elempack > 1)
699 {
700 // gemm
701 int h = bottom_blob.h;
702 size_t elemsize = bottom_blob.elemsize;
703 int elempack = bottom_blob.elempack;
704
705 // unpacking
706 VkImageMat bottom_blob_unpacked = bottom_blob;
707 if (elempack > 1)
708 {
709 Option opt_pack1 = opt;
710 opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
711
712 vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, 1, cmd, opt_pack1);
713 }
714
715 top_blob.create(num_output, h, elemsize, elempack, opt.blob_vkallocator);
716 if (top_blob.empty())
717 return -100;
718
719 VkImageMat top_blob_unpacked = top_blob;
720 if (elempack > 1)
721 {
722 top_blob_unpacked.create(num_output, h * elempack, bottom_blob_unpacked.elemsize, 1, opt.workspace_vkallocator);
723 if (top_blob_unpacked.empty())
724 return -100;
725 }
726
727 std::vector<VkImageMat> bindings(4);
728 bindings[0] = bottom_blob_unpacked;
729 bindings[1] = top_blob_unpacked;
730 bindings[2] = weight_data_gpu_image;
731 bindings[3] = bias_data_gpu_image;
732
733 std::vector<vk_constant_type> constants(10);
734 constants[0].i = bottom_blob_unpacked.dims;
735 constants[1].i = bottom_blob_unpacked.w;
736 constants[2].i = bottom_blob_unpacked.h;
737 constants[3].i = bottom_blob_unpacked.c;
738 constants[4].i = 0; //bottom_blob_unpacked.cstep;
739 constants[5].i = top_blob_unpacked.dims;
740 constants[6].i = top_blob_unpacked.w;
741 constants[7].i = top_blob_unpacked.h;
742 constants[8].i = top_blob_unpacked.c;
743 constants[9].i = 0; //top_blob_unpacked.cstep;
744
745 const Pipeline* pipeline = pipeline_innerproduct_gemm;
746
747 VkImageMat dispatcher;
748 dispatcher.w = top_blob_unpacked.w / out_elempack;
749 dispatcher.h = top_blob_unpacked.h;
750 dispatcher.c = 1;
751
752 cmd.record_pipeline(pipeline, bindings, constants, dispatcher);
753
754 // packing
755 if (elempack > 1)
756 {
757 vkdev->convert_packing(top_blob_unpacked, top_blob, elempack, cmd, opt);
758 }
759
760 return 0;
761 }
762
763 // flatten
764 VkImageMat bottom_blob_flattened = bottom_blob;
765 {
766 Option opt_flatten = opt;
767 opt_flatten.blob_vkallocator = opt.workspace_vkallocator;
768
769 flatten->forward(bottom_blob, bottom_blob_flattened, cmd, opt_flatten);
770 }
771
772 size_t elemsize = bottom_blob_flattened.elemsize;
773 size_t out_elemsize = elemsize / in_elempack * out_elempack;
774
775 if (opt.use_fp16_packed && !opt.use_fp16_storage)
776 {
777 if (out_elempack == 8) out_elemsize = 8 * 2u;
778 if (out_elempack == 4) out_elemsize = 4 * 2u;
779 if (out_elempack == 1) out_elemsize = 4u;
780 }
781
782 top_blob.create(num_output / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
783 if (top_blob.empty())
784 return -100;
785
786 std::vector<VkImageMat> bindings(4);
787 bindings[0] = bottom_blob_flattened;
788 bindings[1] = top_blob;
789 bindings[2] = weight_data_gpu_image;
790 bindings[3] = bias_data_gpu_image;
791
792 std::vector<vk_constant_type> constants(10);
793 constants[0].i = bottom_blob_flattened.dims;
794 constants[1].i = bottom_blob_flattened.w;
795 constants[2].i = bottom_blob_flattened.h;
796 constants[3].i = bottom_blob_flattened.c;
797 constants[4].i = 0; //bottom_blob_flattened.cstep;
798 constants[5].i = top_blob.dims;
799 constants[6].i = top_blob.w;
800 constants[7].i = top_blob.h;
801 constants[8].i = top_blob.c;
802 constants[9].i = 0; //top_blob.cstep;
803
804 const Pipeline* pipeline = 0;
805 if (in_elempack == 1 && out_elempack == 1)
806 {
807 pipeline = pipeline_innerproduct;
808 }
809 else if (in_elempack == 4 && out_elempack == 4)
810 {
811 pipeline = pipeline_innerproduct_pack4;
812 }
813 else if (in_elempack == 1 && out_elempack == 4)
814 {
815 pipeline = pipeline_innerproduct_pack1to4;
816 }
817 else if (in_elempack == 4 && out_elempack == 1)
818 {
819 pipeline = pipeline_innerproduct_pack4to1;
820 }
821 else if (in_elempack == 8 && out_elempack == 8)
822 {
823 pipeline = pipeline_innerproduct_pack8;
824 }
825 else if (in_elempack == 1 && out_elempack == 8)
826 {
827 pipeline = pipeline_innerproduct_pack1to8;
828 }
829 else if (in_elempack == 4 && out_elempack == 8)
830 {
831 pipeline = pipeline_innerproduct_pack4to8;
832 }
833 else if (in_elempack == 8 && out_elempack == 4)
834 {
835 pipeline = pipeline_innerproduct_pack8to4;
836 }
837 else if (in_elempack == 8 && out_elempack == 1)
838 {
839 pipeline = pipeline_innerproduct_pack8to1;
840 }
841
842 cmd.record_pipeline(pipeline, bindings, constants, top_blob);
843
844 return 0;
845 }
846
847 } // namespace ncnn
848