1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14
15 #include "crop_vulkan.h"
16
17 #include "layer_shader_type.h"
18 #include "layer_type.h"
19
20 namespace ncnn {
21
Crop_vulkan()22 Crop_vulkan::Crop_vulkan()
23 {
24 support_vulkan = true;
25 support_image_storage = true;
26
27 pipeline_crop = 0;
28 pipeline_crop_pack4 = 0;
29 pipeline_crop_pack1to4 = 0;
30 pipeline_crop_pack4to1 = 0;
31 pipeline_crop_pack8 = 0;
32 pipeline_crop_pack1to8 = 0;
33 pipeline_crop_pack4to8 = 0;
34 pipeline_crop_pack8to4 = 0;
35 pipeline_crop_pack8to1 = 0;
36 }
37
create_pipeline(const Option & opt)38 int Crop_vulkan::create_pipeline(const Option& opt)
39 {
40 const Mat& shape = bottom_shapes.empty() ? Mat() : bottom_shapes[0];
41 const Mat& out_shape = top_shapes.empty() ? Mat() : top_shapes[0];
42
43 int elempack = 1;
44 if (shape.dims == 1) elempack = opt.use_shader_pack8 && shape.w % 8 == 0 ? 8 : shape.w % 4 == 0 ? 4 : 1;
45 if (shape.dims == 2) elempack = opt.use_shader_pack8 && shape.h % 8 == 0 ? 8 : shape.h % 4 == 0 ? 4 : 1;
46 if (shape.dims == 3 || shape.dims == 4) elempack = opt.use_shader_pack8 && shape.c % 8 == 0 ? 8 : shape.c % 4 == 0 ? 4 : 1;
47
48 int out_elempack = 1;
49 if (out_shape.dims == 1) out_elempack = opt.use_shader_pack8 && out_shape.w % 8 == 0 ? 8 : out_shape.w % 4 == 0 ? 4 : 1;
50 if (out_shape.dims == 2) out_elempack = opt.use_shader_pack8 && out_shape.h % 8 == 0 ? 8 : out_shape.h % 4 == 0 ? 4 : 1;
51 if (out_shape.dims == 3 || out_shape.dims == 4) out_elempack = opt.use_shader_pack8 && out_shape.c % 8 == 0 ? 8 : out_shape.c % 4 == 0 ? 4 : 1;
52
53 int offset_elempack = 1;
54 bool numpy_style_slice = !starts.empty() && !ends.empty();
55 if (numpy_style_slice)
56 {
57 offset_elempack = elempack;
58
59 const int* starts_ptr = starts;
60 const int* axes_ptr = axes;
61
62 int _axes[4] = {0, 1, 2, 3};
63 int num_axis = axes.w;
64 if (num_axis == 0)
65 {
66 num_axis = shape.dims;
67 }
68 else
69 {
70 for (int i = 0; i < num_axis; i++)
71 {
72 int axis = axes_ptr[i];
73 if (axis < 0)
74 axis = shape.dims + axis;
75 _axes[i] = axis;
76 }
77 }
78
79 for (int i = 0; i < num_axis; i++)
80 {
81 int start = starts_ptr[i];
82 int axis = _axes[i];
83
84 if (shape.dims == 1 && axis == 0)
85 {
86 int _woffset = start >= 0 ? start : shape.w + start;
87 offset_elempack = opt.use_shader_pack8 && _woffset % 8 == 0 ? 8 : _woffset % 4 == 0 ? 4 : 1;
88 }
89 if (shape.dims == 2 && axis == 0)
90 {
91 int _hoffset = start >= 0 ? start : shape.h + start;
92 offset_elempack = opt.use_shader_pack8 && _hoffset % 8 == 0 ? 8 : _hoffset % 4 == 0 ? 4 : 1;
93 }
94 if ((shape.dims == 3 || shape.dims == 4) && axis == 0)
95 {
96 int _coffset = start >= 0 ? start : shape.c + start;
97 offset_elempack = opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
98 }
99 }
100 }
101 else
102 {
103 if (shape.dims == 1)
104 {
105 if (woffset == 0)
106 offset_elempack = elempack;
107 else
108 offset_elempack = opt.use_shader_pack8 && woffset % 8 == 0 ? 8 : woffset % 4 == 0 ? 4 : 1;
109 }
110 else if (shape.dims == 2)
111 {
112 if (hoffset == 0)
113 offset_elempack = elempack;
114 else
115 offset_elempack = opt.use_shader_pack8 && hoffset % 8 == 0 ? 8 : hoffset % 4 == 0 ? 4 : 1;
116 }
117 else // if (shape.dims == 3 || shape.dims == 4)
118 {
119 if (coffset == 0)
120 offset_elempack = elempack;
121 else
122 offset_elempack = opt.use_shader_pack8 && coffset % 8 == 0 ? 8 : coffset % 4 == 0 ? 4 : 1;
123 }
124 }
125
126 offset_elempack = std::min(offset_elempack, elempack);
127
128 size_t elemsize;
129 size_t out_elemsize;
130 if (opt.use_fp16_storage)
131 {
132 elemsize = elempack * 2u;
133 out_elemsize = out_elempack * 2u;
134 }
135 else if (opt.use_fp16_packed)
136 {
137 elemsize = elempack == 1 ? 4u : elempack * 2u;
138 out_elemsize = out_elempack == 1 ? 4u : out_elempack * 2u;
139 }
140 else
141 {
142 elemsize = elempack * 4u;
143 out_elemsize = out_elempack * 4u;
144 }
145
146 Mat shape_packed;
147 if (shape.dims == 1) shape_packed = Mat(shape.w / elempack, (void*)0, elemsize, elempack);
148 if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack);
149 if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);
150 if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack);
151
152 Mat out_shape_packed;
153 if (out_shape.dims == 1) out_shape_packed = Mat(out_shape.w / out_elempack, (void*)0, out_elemsize, out_elempack);
154 if (out_shape.dims == 2) out_shape_packed = Mat(out_shape.w, out_shape.h / out_elempack, (void*)0, out_elemsize, out_elempack);
155 if (out_shape.dims == 3) out_shape_packed = Mat(out_shape.w, out_shape.h, out_shape.c / out_elempack, (void*)0, out_elemsize, out_elempack);
156 if (out_shape.dims == 4) out_shape_packed = Mat(out_shape.w, out_shape.h, out_shape.d, out_shape.c / out_elempack, (void*)0, out_elemsize, out_elempack);
157
158 Mat shape_unpacked = shape_packed;
159 if (one_blob_only && shape.dims != 0 && elempack == out_elempack && elempack > offset_elempack)
160 {
161 size_t offset_elemsize;
162 if (opt.use_fp16_storage)
163 {
164 offset_elemsize = offset_elempack * 2u;
165 }
166 else if (opt.use_fp16_packed)
167 {
168 offset_elemsize = offset_elempack == 1 ? 4u : offset_elempack * 2u;
169 }
170 else
171 {
172 offset_elemsize = offset_elempack * 4u;
173 }
174
175 if (shape.dims == 1) shape_unpacked = Mat(shape.w / offset_elempack, (void*)0, offset_elemsize, offset_elempack);
176 if (shape.dims == 2) shape_unpacked = Mat(shape.w, shape.h / offset_elempack, (void*)0, offset_elemsize, offset_elempack);
177 if (shape.dims == 3) shape_unpacked = Mat(shape.w, shape.h, shape.c / offset_elempack, (void*)0, offset_elemsize, offset_elempack);
178 if (shape.dims == 4) shape_unpacked = Mat(shape.w, shape.h, shape.d, shape.c / offset_elempack, (void*)0, offset_elemsize, offset_elempack);
179 }
180
181 std::vector<vk_specialization_type> specializations(1 + 12);
182 specializations[0].i = vkdev->info.bug_implicit_fp16_arithmetic();
183 specializations[1 + 0].i = shape_unpacked.dims;
184 specializations[1 + 1].i = shape_unpacked.w;
185 specializations[1 + 2].i = shape_unpacked.h;
186 specializations[1 + 3].i = shape_unpacked.d;
187 specializations[1 + 4].i = shape_unpacked.c;
188 specializations[1 + 5].i = shape_unpacked.cstep;
189 specializations[1 + 6].i = out_shape_packed.dims;
190 specializations[1 + 7].i = out_shape_packed.w;
191 specializations[1 + 8].i = out_shape_packed.h;
192 specializations[1 + 9].i = out_shape_packed.d;
193 specializations[1 + 10].i = out_shape_packed.c;
194 specializations[1 + 11].i = out_shape_packed.cstep;
195
196 Mat local_size_xyz;
197 if (out_shape_packed.dims == 1)
198 {
199 local_size_xyz.w = std::min(64, out_shape_packed.w);
200 local_size_xyz.h = 1;
201 local_size_xyz.c = 1;
202 }
203 if (out_shape_packed.dims == 2)
204 {
205 local_size_xyz.w = std::min(8, out_shape_packed.w);
206 local_size_xyz.h = std::min(8, out_shape_packed.h);
207 local_size_xyz.c = 1;
208 }
209 if (out_shape_packed.dims == 3)
210 {
211 local_size_xyz.w = std::min(4, out_shape_packed.w);
212 local_size_xyz.h = std::min(4, out_shape_packed.h);
213 local_size_xyz.c = std::min(4, out_shape_packed.c);
214 }
215 if (out_shape_packed.dims == 4)
216 {
217 local_size_xyz.w = std::min(4, out_shape_packed.w);
218 local_size_xyz.h = std::min(4, out_shape_packed.h * out_shape_packed.d);
219 local_size_xyz.c = std::min(4, out_shape_packed.c);
220 }
221
222 // pack1
223 if (out_shape.dims == 0 || out_elempack == 1)
224 {
225 pipeline_crop = new Pipeline(vkdev);
226 pipeline_crop->set_optimal_local_size_xyz(local_size_xyz);
227 pipeline_crop->create(LayerShaderType::crop, opt, specializations);
228 }
229
230 // pack4
231 if (out_shape.dims == 0 || out_elempack == 4)
232 {
233 pipeline_crop_pack4 = new Pipeline(vkdev);
234 pipeline_crop_pack4->set_optimal_local_size_xyz(local_size_xyz);
235 pipeline_crop_pack4->create(LayerShaderType::crop_pack4, opt, specializations);
236 }
237
238 // pack1to4
239 if (out_shape.dims == 0 || out_elempack == 4)
240 {
241 pipeline_crop_pack1to4 = new Pipeline(vkdev);
242 pipeline_crop_pack1to4->set_optimal_local_size_xyz(local_size_xyz);
243 pipeline_crop_pack1to4->create(LayerShaderType::crop_pack1to4, opt, specializations);
244 }
245
246 // pack4to1
247 if (out_shape.dims == 0 || out_elempack == 1)
248 {
249 pipeline_crop_pack4to1 = new Pipeline(vkdev);
250 pipeline_crop_pack4to1->set_optimal_local_size_xyz(local_size_xyz);
251 pipeline_crop_pack4to1->create(LayerShaderType::crop_pack4to1, opt, specializations);
252 }
253
254 // pack8
255 if ((opt.use_shader_pack8 && out_shape.dims == 0) || (elempack == 8 && out_elempack == 8))
256 {
257 pipeline_crop_pack8 = new Pipeline(vkdev);
258 pipeline_crop_pack8->set_optimal_local_size_xyz(local_size_xyz);
259 pipeline_crop_pack8->create(LayerShaderType::crop_pack8, opt, specializations);
260 }
261
262 // pack1to8
263 if ((opt.use_shader_pack8 && out_shape.dims == 0) || out_elempack == 8)
264 {
265 pipeline_crop_pack1to8 = new Pipeline(vkdev);
266 pipeline_crop_pack1to8->set_optimal_local_size_xyz(local_size_xyz);
267 pipeline_crop_pack1to8->create(LayerShaderType::crop_pack1to8, opt, specializations);
268 }
269
270 // pack4to8
271 if ((opt.use_shader_pack8 && out_shape.dims == 0) || out_elempack == 8)
272 {
273 pipeline_crop_pack4to8 = new Pipeline(vkdev);
274 pipeline_crop_pack4to8->set_optimal_local_size_xyz(local_size_xyz);
275 pipeline_crop_pack4to8->create(LayerShaderType::crop_pack4to8, opt, specializations);
276 }
277
278 // pack8to4
279 if ((opt.use_shader_pack8 && out_shape.dims == 0) || (elempack == 8 && out_elempack == 4))
280 {
281 pipeline_crop_pack8to4 = new Pipeline(vkdev);
282 pipeline_crop_pack8to4->set_optimal_local_size_xyz(local_size_xyz);
283 pipeline_crop_pack8to4->create(LayerShaderType::crop_pack8to4, opt, specializations);
284 }
285
286 // pack8to1
287 if ((opt.use_shader_pack8 && out_shape.dims == 0) || (elempack == 8 && out_elempack == 1))
288 {
289 pipeline_crop_pack8to1 = new Pipeline(vkdev);
290 pipeline_crop_pack8to1->set_optimal_local_size_xyz(local_size_xyz);
291 pipeline_crop_pack8to1->create(LayerShaderType::crop_pack8to1, opt, specializations);
292 }
293
294 return 0;
295 }
296
destroy_pipeline(const Option &)297 int Crop_vulkan::destroy_pipeline(const Option& /*opt*/)
298 {
299 delete pipeline_crop;
300 pipeline_crop = 0;
301
302 delete pipeline_crop_pack4;
303 pipeline_crop_pack4 = 0;
304
305 delete pipeline_crop_pack1to4;
306 pipeline_crop_pack1to4 = 0;
307
308 delete pipeline_crop_pack4to1;
309 pipeline_crop_pack4to1 = 0;
310
311 delete pipeline_crop_pack8;
312 pipeline_crop_pack8 = 0;
313
314 delete pipeline_crop_pack1to8;
315 pipeline_crop_pack1to8 = 0;
316
317 delete pipeline_crop_pack4to8;
318 pipeline_crop_pack4to8 = 0;
319
320 delete pipeline_crop_pack8to4;
321 pipeline_crop_pack8to4 = 0;
322
323 delete pipeline_crop_pack8to1;
324 pipeline_crop_pack8to1 = 0;
325
326 return 0;
327 }
328
forward(const VkMat & bottom_blob,VkMat & top_blob,VkCompute & cmd,const Option & opt) const329 int Crop_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt) const
330 {
331 int dims = bottom_blob.dims;
332 size_t elemsize = bottom_blob.elemsize;
333 int elempack = bottom_blob.elempack;
334
335 int _woffset, _hoffset, _doffset, _coffset;
336 int _outw, _outh, _outd, _outc;
337 resolve_crop_roi(bottom_blob.shape(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
338
339 int offset_elempack;
340 int out_elempack;
341
342 if (dims == 1)
343 {
344 if (_woffset == 0 && _outw == bottom_blob.w * elempack)
345 {
346 top_blob = bottom_blob;
347 return 0;
348 }
349
350 offset_elempack = _woffset == 0 ? elempack : opt.use_shader_pack8 && _woffset % 8 == 0 ? 8 : _woffset % 4 == 0 ? 4 : 1;
351 out_elempack = opt.use_shader_pack8 && _outw % 8 == 0 ? 8 : _outw % 4 == 0 ? 4 : 1;
352 }
353 else if (dims == 2)
354 {
355 if (_woffset == 0 && _hoffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h * elempack)
356 {
357 top_blob = bottom_blob;
358 return 0;
359 }
360
361 offset_elempack = _hoffset == 0 ? elempack : opt.use_shader_pack8 && _hoffset % 8 == 0 ? 8 : _hoffset % 4 == 0 ? 4 : 1;
362 out_elempack = opt.use_shader_pack8 && _outh % 8 == 0 ? 8 : _outh % 4 == 0 ? 4 : 1;
363 }
364 else if (dims == 3)
365 {
366 if (_woffset == 0 && _hoffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outc == bottom_blob.c * elempack)
367 {
368 top_blob = bottom_blob;
369 return 0;
370 }
371
372 offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
373 out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
374 }
375 else // if (dims == 4)
376 {
377 if (_woffset == 0 && _hoffset == 0 && _doffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outd == bottom_blob.d && _outc == bottom_blob.c * elempack)
378 {
379 top_blob = bottom_blob;
380 return 0;
381 }
382
383 offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
384 out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
385 }
386
387 offset_elempack = std::min(offset_elempack, elempack);
388
389 size_t out_elemsize = elemsize / elempack * out_elempack;
390
391 if (opt.use_fp16_packed && !opt.use_fp16_storage)
392 {
393 if (out_elempack == 8) out_elemsize = 8 * 2u;
394 if (out_elempack == 4) out_elemsize = 4 * 2u;
395 if (out_elempack == 1) out_elemsize = 4u;
396 }
397
398 // unpacking
399 VkMat bottom_blob_unpacked = bottom_blob;
400 if (elempack == out_elempack && elempack > offset_elempack)
401 {
402 Option opt_pack1 = opt;
403 opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
404
405 vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, offset_elempack, cmd, opt_pack1);
406 }
407
408 if (dims == 1)
409 {
410 top_blob.create(_outw / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
411 }
412 else if (dims == 2)
413 {
414 top_blob.create(_outw, _outh / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
415 }
416 else if (dims == 3)
417 {
418 top_blob.create(_outw, _outh, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
419 }
420 else // if (dims == 4)
421 {
422 top_blob.create(_outw, _outh, _outd, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
423 }
424 if (top_blob.empty())
425 return -100;
426
427 std::vector<VkMat> bindings(2);
428 bindings[0] = bottom_blob_unpacked;
429 bindings[1] = top_blob;
430
431 std::vector<vk_constant_type> constants(16);
432 constants[0].i = bottom_blob_unpacked.dims;
433 constants[1].i = bottom_blob_unpacked.w;
434 constants[2].i = bottom_blob_unpacked.h;
435 constants[3].i = bottom_blob_unpacked.d;
436 constants[4].i = bottom_blob_unpacked.c;
437 constants[5].i = bottom_blob_unpacked.cstep;
438 constants[6].i = top_blob.dims;
439 constants[7].i = top_blob.w;
440 constants[8].i = top_blob.h;
441 constants[9].i = top_blob.d;
442 constants[10].i = top_blob.c;
443 constants[11].i = top_blob.cstep;
444 constants[12].i = _woffset;
445 constants[13].i = _hoffset;
446 constants[14].i = _doffset;
447 constants[15].i = _coffset;
448
449 const Pipeline* pipeline = 0;
450 if (elempack == 1 && out_elempack == 1)
451 {
452 pipeline = pipeline_crop;
453 }
454 else if (elempack == 4 && offset_elempack == 4 && out_elempack == 4)
455 {
456 pipeline = pipeline_crop_pack4;
457 }
458 else if (elempack == 4 && offset_elempack == 1 && out_elempack == 4)
459 {
460 pipeline = pipeline_crop_pack1to4;
461 }
462 else if (elempack == 1 && out_elempack == 4)
463 {
464 pipeline = pipeline_crop_pack1to4;
465 }
466 else if (elempack == 4 && out_elempack == 1)
467 {
468 pipeline = pipeline_crop_pack4to1;
469 }
470 else if (elempack == 8 && offset_elempack == 8 && out_elempack == 8)
471 {
472 pipeline = pipeline_crop_pack8;
473 }
474 else if (elempack == 8 && offset_elempack == 4 && out_elempack == 8)
475 {
476 pipeline = pipeline_crop_pack4to8;
477 }
478 else if (elempack == 8 && offset_elempack == 1 && out_elempack == 8)
479 {
480 pipeline = pipeline_crop_pack1to8;
481 }
482 else if (elempack == 1 && out_elempack == 8)
483 {
484 pipeline = pipeline_crop_pack1to8;
485 }
486 else if (elempack == 4 && out_elempack == 8)
487 {
488 pipeline = pipeline_crop_pack4to8;
489 }
490 else if (elempack == 8 && out_elempack == 4)
491 {
492 pipeline = pipeline_crop_pack8to4;
493 }
494 else if (elempack == 8 && out_elempack == 1)
495 {
496 pipeline = pipeline_crop_pack8to1;
497 }
498
499 cmd.record_pipeline(pipeline, bindings, constants, top_blob);
500
501 return 0;
502 }
503
forward(const std::vector<VkMat> & bottom_blobs,std::vector<VkMat> & top_blobs,VkCompute & cmd,const Option & opt) const504 int Crop_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt) const
505 {
506 const VkMat& bottom_blob = bottom_blobs[0];
507 const VkMat& reference_blob = bottom_blobs[1];
508 VkMat& top_blob = top_blobs[0];
509
510 int dims = bottom_blob.dims;
511 size_t elemsize = bottom_blob.elemsize;
512 int elempack = bottom_blob.elempack;
513
514 int _woffset, _hoffset, _doffset, _coffset;
515 int _outw, _outh, _outd, _outc;
516 if (woffset == -233)
517 {
518 resolve_crop_roi(bottom_blob.shape(), (const int*)reference_blob.mapped(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
519 }
520 else
521 {
522 resolve_crop_roi(bottom_blob.shape(), reference_blob.shape(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
523 }
524
525 int offset_elempack;
526 int out_elempack;
527
528 if (dims == 1)
529 {
530 if (_woffset == 0 && _outw == bottom_blob.w * elempack)
531 {
532 top_blob = bottom_blob;
533 return 0;
534 }
535
536 offset_elempack = _woffset == 0 ? elempack : opt.use_shader_pack8 && _woffset % 8 == 0 ? 8 : _woffset % 4 == 0 ? 4 : 1;
537 out_elempack = opt.use_shader_pack8 && _outw % 8 == 0 ? 8 : _outw % 4 == 0 ? 4 : 1;
538 }
539 else if (dims == 2)
540 {
541 if (_woffset == 0 && _hoffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h * elempack)
542 {
543 top_blob = bottom_blob;
544 return 0;
545 }
546
547 offset_elempack = _hoffset == 0 ? elempack : opt.use_shader_pack8 && _hoffset % 8 == 0 ? 8 : _hoffset % 4 == 0 ? 4 : 1;
548 out_elempack = opt.use_shader_pack8 && _outh % 8 == 0 ? 8 : _outh % 4 == 0 ? 4 : 1;
549 }
550 else if (dims == 3)
551 {
552 if (_woffset == 0 && _hoffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outc == bottom_blob.c * elempack)
553 {
554 top_blob = bottom_blob;
555 return 0;
556 }
557
558 offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
559 out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
560 }
561 else // if (dims == 4)
562 {
563 if (_woffset == 0 && _hoffset == 0 && _doffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outd == bottom_blob.d && _outc == bottom_blob.c * elempack)
564 {
565 top_blob = bottom_blob;
566 return 0;
567 }
568
569 offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
570 out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
571 }
572
573 offset_elempack = std::min(offset_elempack, elempack);
574
575 size_t out_elemsize = elemsize / elempack * out_elempack;
576
577 if (opt.use_fp16_packed && !opt.use_fp16_storage)
578 {
579 if (out_elempack == 8) out_elemsize = 8 * 2u;
580 if (out_elempack == 4) out_elemsize = 4 * 2u;
581 if (out_elempack == 1) out_elemsize = 4u;
582 }
583
584 // unpacking
585 VkMat bottom_blob_unpacked = bottom_blob;
586 if (elempack == out_elempack && elempack > offset_elempack)
587 {
588 Option opt_pack1 = opt;
589 opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
590
591 vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, offset_elempack, cmd, opt_pack1);
592 }
593
594 if (dims == 1)
595 {
596 top_blob.create(_outw / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
597 }
598 else if (dims == 2)
599 {
600 top_blob.create(_outw, _outh / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
601 }
602 else if (dims == 3)
603 {
604 top_blob.create(_outw, _outh, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
605 }
606 else // if (dims == 4)
607 {
608 top_blob.create(_outw, _outh, _outd, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
609 }
610 if (top_blob.empty())
611 return -100;
612
613 std::vector<VkMat> bindings(2);
614 bindings[0] = bottom_blob_unpacked;
615 bindings[1] = top_blob;
616
617 std::vector<vk_constant_type> constants(16);
618 constants[0].i = bottom_blob_unpacked.dims;
619 constants[1].i = bottom_blob_unpacked.w;
620 constants[2].i = bottom_blob_unpacked.h;
621 constants[3].i = bottom_blob_unpacked.d;
622 constants[4].i = bottom_blob_unpacked.c;
623 constants[5].i = bottom_blob_unpacked.cstep;
624 constants[6].i = top_blob.dims;
625 constants[7].i = top_blob.w;
626 constants[8].i = top_blob.h;
627 constants[9].i = top_blob.d;
628 constants[10].i = top_blob.c;
629 constants[11].i = top_blob.cstep;
630 constants[12].i = _woffset;
631 constants[13].i = _hoffset;
632 constants[14].i = _doffset;
633 constants[15].i = _coffset;
634
635 const Pipeline* pipeline = 0;
636 if (elempack == 1 && out_elempack == 1)
637 {
638 pipeline = pipeline_crop;
639 }
640 else if (elempack == 4 && offset_elempack == 4 && out_elempack == 4)
641 {
642 pipeline = pipeline_crop_pack4;
643 }
644 else if (elempack == 4 && offset_elempack == 1 && out_elempack == 4)
645 {
646 pipeline = pipeline_crop_pack1to4;
647 }
648 else if (elempack == 1 && out_elempack == 4)
649 {
650 pipeline = pipeline_crop_pack1to4;
651 }
652 else if (elempack == 4 && out_elempack == 1)
653 {
654 pipeline = pipeline_crop_pack4to1;
655 }
656 else if (elempack == 8 && offset_elempack == 8 && out_elempack == 8)
657 {
658 pipeline = pipeline_crop_pack8;
659 }
660 else if (elempack == 8 && offset_elempack == 4 && out_elempack == 8)
661 {
662 pipeline = pipeline_crop_pack4to8;
663 }
664 else if (elempack == 8 && offset_elempack == 1 && out_elempack == 8)
665 {
666 pipeline = pipeline_crop_pack1to8;
667 }
668 else if (elempack == 1 && out_elempack == 8)
669 {
670 pipeline = pipeline_crop_pack1to8;
671 }
672 else if (elempack == 4 && out_elempack == 8)
673 {
674 pipeline = pipeline_crop_pack4to8;
675 }
676 else if (elempack == 8 && out_elempack == 4)
677 {
678 pipeline = pipeline_crop_pack8to4;
679 }
680 else if (elempack == 8 && out_elempack == 1)
681 {
682 pipeline = pipeline_crop_pack8to1;
683 }
684
685 cmd.record_pipeline(pipeline, bindings, constants, top_blob);
686
687 return 0;
688 }
689
forward(const VkImageMat & bottom_blob,VkImageMat & top_blob,VkCompute & cmd,const Option & opt) const690 int Crop_vulkan::forward(const VkImageMat& bottom_blob, VkImageMat& top_blob, VkCompute& cmd, const Option& opt) const
691 {
692 int dims = bottom_blob.dims;
693 size_t elemsize = bottom_blob.elemsize;
694 int elempack = bottom_blob.elempack;
695
696 int _woffset, _hoffset, _doffset, _coffset;
697 int _outw, _outh, _outd, _outc;
698 resolve_crop_roi(bottom_blob.shape(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
699
700 int offset_elempack;
701 int out_elempack;
702
703 if (dims == 1)
704 {
705 if (_woffset == 0 && _outw == bottom_blob.w * elempack)
706 {
707 top_blob = bottom_blob;
708 return 0;
709 }
710
711 offset_elempack = _woffset == 0 ? elempack : opt.use_shader_pack8 && _woffset % 8 == 0 ? 8 : _woffset % 4 == 0 ? 4 : 1;
712 out_elempack = opt.use_shader_pack8 && _outw % 8 == 0 ? 8 : _outw % 4 == 0 ? 4 : 1;
713 }
714 else if (dims == 2)
715 {
716 if (_woffset == 0 && _hoffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h * elempack)
717 {
718 top_blob = bottom_blob;
719 return 0;
720 }
721
722 offset_elempack = _hoffset == 0 ? elempack : opt.use_shader_pack8 && _hoffset % 8 == 0 ? 8 : _hoffset % 4 == 0 ? 4 : 1;
723 out_elempack = opt.use_shader_pack8 && _outh % 8 == 0 ? 8 : _outh % 4 == 0 ? 4 : 1;
724 }
725 else if (dims == 3)
726 {
727 if (_woffset == 0 && _hoffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outc == bottom_blob.c * elempack)
728 {
729 top_blob = bottom_blob;
730 return 0;
731 }
732
733 offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
734 out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
735 }
736 else // if (dims == 4)
737 {
738 if (_woffset == 0 && _hoffset == 0 && _doffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outd == bottom_blob.d && _outc == bottom_blob.c * elempack)
739 {
740 top_blob = bottom_blob;
741 return 0;
742 }
743
744 offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
745 out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
746 }
747
748 offset_elempack = std::min(offset_elempack, elempack);
749
750 size_t out_elemsize = elemsize / elempack * out_elempack;
751
752 if (opt.use_fp16_packed && !opt.use_fp16_storage)
753 {
754 if (out_elempack == 8) out_elemsize = 8 * 2u;
755 if (out_elempack == 4) out_elemsize = 4 * 2u;
756 if (out_elempack == 1) out_elemsize = 4u;
757 }
758
759 // unpacking
760 VkImageMat bottom_blob_unpacked = bottom_blob;
761 if (elempack == out_elempack && elempack > offset_elempack)
762 {
763 Option opt_pack1 = opt;
764 opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
765
766 vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, offset_elempack, cmd, opt_pack1);
767 }
768
769 if (dims == 1)
770 {
771 top_blob.create(_outw / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
772 }
773 else if (dims == 2)
774 {
775 top_blob.create(_outw, _outh / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
776 }
777 else if (dims == 3)
778 {
779 top_blob.create(_outw, _outh, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
780 }
781 else // if (dims == 4)
782 {
783 top_blob.create(_outw, _outh, _outd, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
784 }
785 if (top_blob.empty())
786 return -100;
787
788 std::vector<VkImageMat> bindings(2);
789 bindings[0] = bottom_blob_unpacked;
790 bindings[1] = top_blob;
791
792 std::vector<vk_constant_type> constants(16);
793 constants[0].i = bottom_blob_unpacked.dims;
794 constants[1].i = bottom_blob_unpacked.w;
795 constants[2].i = bottom_blob_unpacked.h;
796 constants[3].i = bottom_blob_unpacked.d;
797 constants[4].i = bottom_blob_unpacked.c;
798 constants[5].i = 0; //bottom_blob_unpacked.cstep;
799 constants[6].i = top_blob.dims;
800 constants[7].i = top_blob.w;
801 constants[8].i = top_blob.h;
802 constants[9].i = top_blob.d;
803 constants[10].i = top_blob.c;
804 constants[11].i = 0; //top_blob.cstep;
805 constants[12].i = _woffset;
806 constants[13].i = _hoffset;
807 constants[14].i = _doffset;
808 constants[15].i = _coffset;
809
810 const Pipeline* pipeline = 0;
811 if (elempack == 1 && out_elempack == 1)
812 {
813 pipeline = pipeline_crop;
814 }
815 else if (elempack == 4 && offset_elempack == 4 && out_elempack == 4)
816 {
817 pipeline = pipeline_crop_pack4;
818 }
819 else if (elempack == 4 && offset_elempack == 1 && out_elempack == 4)
820 {
821 pipeline = pipeline_crop_pack1to4;
822 }
823 else if (elempack == 1 && out_elempack == 4)
824 {
825 pipeline = pipeline_crop_pack1to4;
826 }
827 else if (elempack == 4 && out_elempack == 1)
828 {
829 pipeline = pipeline_crop_pack4to1;
830 }
831 else if (elempack == 8 && offset_elempack == 8 && out_elempack == 8)
832 {
833 pipeline = pipeline_crop_pack8;
834 }
835 else if (elempack == 8 && offset_elempack == 4 && out_elempack == 8)
836 {
837 pipeline = pipeline_crop_pack4to8;
838 }
839 else if (elempack == 8 && offset_elempack == 1 && out_elempack == 8)
840 {
841 pipeline = pipeline_crop_pack1to8;
842 }
843 else if (elempack == 1 && out_elempack == 8)
844 {
845 pipeline = pipeline_crop_pack1to8;
846 }
847 else if (elempack == 4 && out_elempack == 8)
848 {
849 pipeline = pipeline_crop_pack4to8;
850 }
851 else if (elempack == 8 && out_elempack == 4)
852 {
853 pipeline = pipeline_crop_pack8to4;
854 }
855 else if (elempack == 8 && out_elempack == 1)
856 {
857 pipeline = pipeline_crop_pack8to1;
858 }
859
860 cmd.record_pipeline(pipeline, bindings, constants, top_blob);
861
862 return 0;
863 }
864
forward(const std::vector<VkImageMat> & bottom_blobs,std::vector<VkImageMat> & top_blobs,VkCompute & cmd,const Option & opt) const865 int Crop_vulkan::forward(const std::vector<VkImageMat>& bottom_blobs, std::vector<VkImageMat>& top_blobs, VkCompute& cmd, const Option& opt) const
866 {
867 const VkImageMat& bottom_blob = bottom_blobs[0];
868 const VkImageMat& reference_blob = bottom_blobs[1];
869 VkImageMat& top_blob = top_blobs[0];
870
871 int dims = bottom_blob.dims;
872 size_t elemsize = bottom_blob.elemsize;
873 int elempack = bottom_blob.elempack;
874
875 int _woffset, _hoffset, _doffset, _coffset;
876 int _outw, _outh, _outd, _outc;
877 if (woffset == -233)
878 {
879 resolve_crop_roi(bottom_blob.shape(), (const int*)reference_blob.mapped(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
880 }
881 else
882 {
883 resolve_crop_roi(bottom_blob.shape(), reference_blob.shape(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
884 }
885
886 int offset_elempack;
887 int out_elempack;
888
889 if (dims == 1)
890 {
891 if (_woffset == 0 && _outw == bottom_blob.w * elempack)
892 {
893 top_blob = bottom_blob;
894 return 0;
895 }
896
897 offset_elempack = _woffset == 0 ? elempack : opt.use_shader_pack8 && _woffset % 8 == 0 ? 8 : _woffset % 4 == 0 ? 4 : 1;
898 out_elempack = opt.use_shader_pack8 && _outw % 8 == 0 ? 8 : _outw % 4 == 0 ? 4 : 1;
899 }
900 else if (dims == 2)
901 {
902 if (_woffset == 0 && _hoffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h * elempack)
903 {
904 top_blob = bottom_blob;
905 return 0;
906 }
907
908 offset_elempack = _hoffset == 0 ? elempack : opt.use_shader_pack8 && _hoffset % 8 == 0 ? 8 : _hoffset % 4 == 0 ? 4 : 1;
909 out_elempack = opt.use_shader_pack8 && _outh % 8 == 0 ? 8 : _outh % 4 == 0 ? 4 : 1;
910 }
911 else if (dims == 3)
912 {
913 if (_woffset == 0 && _hoffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outc == bottom_blob.c * elempack)
914 {
915 top_blob = bottom_blob;
916 return 0;
917 }
918
919 offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
920 out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
921 }
922 else // if (dims == 4)
923 {
924 if (_woffset == 0 && _hoffset == 0 && _doffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outd == bottom_blob.d && _outc == bottom_blob.c * elempack)
925 {
926 top_blob = bottom_blob;
927 return 0;
928 }
929
930 offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
931 out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
932 }
933
934 offset_elempack = std::min(offset_elempack, elempack);
935
936 size_t out_elemsize = elemsize / elempack * out_elempack;
937
938 if (opt.use_fp16_packed && !opt.use_fp16_storage)
939 {
940 if (out_elempack == 8) out_elemsize = 8 * 2u;
941 if (out_elempack == 4) out_elemsize = 4 * 2u;
942 if (out_elempack == 1) out_elemsize = 4u;
943 }
944
945 // unpacking
946 VkImageMat bottom_blob_unpacked = bottom_blob;
947 if (elempack == out_elempack && elempack > offset_elempack)
948 {
949 Option opt_pack1 = opt;
950 opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
951
952 vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, offset_elempack, cmd, opt_pack1);
953 }
954
955 if (dims == 1)
956 {
957 top_blob.create(_outw / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
958 }
959 else if (dims == 2)
960 {
961 top_blob.create(_outw, _outh / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
962 }
963 else if (dims == 3)
964 {
965 top_blob.create(_outw, _outh, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
966 }
967 else // if (dims == 4)
968 {
969 top_blob.create(_outw, _outh, _outd, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
970 }
971 if (top_blob.empty())
972 return -100;
973
974 std::vector<VkImageMat> bindings(2);
975 bindings[0] = bottom_blob_unpacked;
976 bindings[1] = top_blob;
977
978 std::vector<vk_constant_type> constants(16);
979 constants[0].i = bottom_blob_unpacked.dims;
980 constants[1].i = bottom_blob_unpacked.w;
981 constants[2].i = bottom_blob_unpacked.h;
982 constants[3].i = bottom_blob_unpacked.d;
983 constants[4].i = bottom_blob_unpacked.c;
984 constants[5].i = 0; //bottom_blob_unpacked.cstep;
985 constants[6].i = top_blob.dims;
986 constants[7].i = top_blob.w;
987 constants[8].i = top_blob.h;
988 constants[9].i = top_blob.d;
989 constants[10].i = top_blob.c;
990 constants[11].i = 0; //top_blob.cstep;
991 constants[12].i = _woffset;
992 constants[13].i = _hoffset;
993 constants[14].i = _doffset;
994 constants[15].i = _coffset;
995
996 const Pipeline* pipeline = 0;
997 if (elempack == 1 && out_elempack == 1)
998 {
999 pipeline = pipeline_crop;
1000 }
1001 else if (elempack == 4 && offset_elempack == 4 && out_elempack == 4)
1002 {
1003 pipeline = pipeline_crop_pack4;
1004 }
1005 else if (elempack == 4 && offset_elempack == 1 && out_elempack == 4)
1006 {
1007 pipeline = pipeline_crop_pack1to4;
1008 }
1009 else if (elempack == 1 && out_elempack == 4)
1010 {
1011 pipeline = pipeline_crop_pack1to4;
1012 }
1013 else if (elempack == 4 && out_elempack == 1)
1014 {
1015 pipeline = pipeline_crop_pack4to1;
1016 }
1017 else if (elempack == 8 && offset_elempack == 8 && out_elempack == 8)
1018 {
1019 pipeline = pipeline_crop_pack8;
1020 }
1021 else if (elempack == 8 && offset_elempack == 4 && out_elempack == 8)
1022 {
1023 pipeline = pipeline_crop_pack4to8;
1024 }
1025 else if (elempack == 8 && offset_elempack == 1 && out_elempack == 8)
1026 {
1027 pipeline = pipeline_crop_pack1to8;
1028 }
1029 else if (elempack == 1 && out_elempack == 8)
1030 {
1031 pipeline = pipeline_crop_pack1to8;
1032 }
1033 else if (elempack == 4 && out_elempack == 8)
1034 {
1035 pipeline = pipeline_crop_pack4to8;
1036 }
1037 else if (elempack == 8 && out_elempack == 4)
1038 {
1039 pipeline = pipeline_crop_pack8to4;
1040 }
1041 else if (elempack == 8 && out_elempack == 1)
1042 {
1043 pipeline = pipeline_crop_pack8to1;
1044 }
1045
1046 cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1047
1048 return 0;
1049 }
1050
1051 } // namespace ncnn
1052