1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #include "crop_vulkan.h"
16 
17 #include "layer_shader_type.h"
18 #include "layer_type.h"
19 
20 namespace ncnn {
21 
Crop_vulkan()22 Crop_vulkan::Crop_vulkan()
23 {
24     support_vulkan = true;
25     support_image_storage = true;
26 
27     pipeline_crop = 0;
28     pipeline_crop_pack4 = 0;
29     pipeline_crop_pack1to4 = 0;
30     pipeline_crop_pack4to1 = 0;
31     pipeline_crop_pack8 = 0;
32     pipeline_crop_pack1to8 = 0;
33     pipeline_crop_pack4to8 = 0;
34     pipeline_crop_pack8to4 = 0;
35     pipeline_crop_pack8to1 = 0;
36 }
37 
create_pipeline(const Option & opt)38 int Crop_vulkan::create_pipeline(const Option& opt)
39 {
40     const Mat& shape = bottom_shapes.empty() ? Mat() : bottom_shapes[0];
41     const Mat& out_shape = top_shapes.empty() ? Mat() : top_shapes[0];
42 
43     int elempack = 1;
44     if (shape.dims == 1) elempack = opt.use_shader_pack8 && shape.w % 8 == 0 ? 8 : shape.w % 4 == 0 ? 4 : 1;
45     if (shape.dims == 2) elempack = opt.use_shader_pack8 && shape.h % 8 == 0 ? 8 : shape.h % 4 == 0 ? 4 : 1;
46     if (shape.dims == 3 || shape.dims == 4) elempack = opt.use_shader_pack8 && shape.c % 8 == 0 ? 8 : shape.c % 4 == 0 ? 4 : 1;
47 
48     int out_elempack = 1;
49     if (out_shape.dims == 1) out_elempack = opt.use_shader_pack8 && out_shape.w % 8 == 0 ? 8 : out_shape.w % 4 == 0 ? 4 : 1;
50     if (out_shape.dims == 2) out_elempack = opt.use_shader_pack8 && out_shape.h % 8 == 0 ? 8 : out_shape.h % 4 == 0 ? 4 : 1;
51     if (out_shape.dims == 3 || out_shape.dims == 4) out_elempack = opt.use_shader_pack8 && out_shape.c % 8 == 0 ? 8 : out_shape.c % 4 == 0 ? 4 : 1;
52 
53     int offset_elempack = 1;
54     bool numpy_style_slice = !starts.empty() && !ends.empty();
55     if (numpy_style_slice)
56     {
57         offset_elempack = elempack;
58 
59         const int* starts_ptr = starts;
60         const int* axes_ptr = axes;
61 
62         int _axes[4] = {0, 1, 2, 3};
63         int num_axis = axes.w;
64         if (num_axis == 0)
65         {
66             num_axis = shape.dims;
67         }
68         else
69         {
70             for (int i = 0; i < num_axis; i++)
71             {
72                 int axis = axes_ptr[i];
73                 if (axis < 0)
74                     axis = shape.dims + axis;
75                 _axes[i] = axis;
76             }
77         }
78 
79         for (int i = 0; i < num_axis; i++)
80         {
81             int start = starts_ptr[i];
82             int axis = _axes[i];
83 
84             if (shape.dims == 1 && axis == 0)
85             {
86                 int _woffset = start >= 0 ? start : shape.w + start;
87                 offset_elempack = opt.use_shader_pack8 && _woffset % 8 == 0 ? 8 : _woffset % 4 == 0 ? 4 : 1;
88             }
89             if (shape.dims == 2 && axis == 0)
90             {
91                 int _hoffset = start >= 0 ? start : shape.h + start;
92                 offset_elempack = opt.use_shader_pack8 && _hoffset % 8 == 0 ? 8 : _hoffset % 4 == 0 ? 4 : 1;
93             }
94             if ((shape.dims == 3 || shape.dims == 4) && axis == 0)
95             {
96                 int _coffset = start >= 0 ? start : shape.c + start;
97                 offset_elempack = opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
98             }
99         }
100     }
101     else
102     {
103         if (shape.dims == 1)
104         {
105             if (woffset == 0)
106                 offset_elempack = elempack;
107             else
108                 offset_elempack = opt.use_shader_pack8 && woffset % 8 == 0 ? 8 : woffset % 4 == 0 ? 4 : 1;
109         }
110         else if (shape.dims == 2)
111         {
112             if (hoffset == 0)
113                 offset_elempack = elempack;
114             else
115                 offset_elempack = opt.use_shader_pack8 && hoffset % 8 == 0 ? 8 : hoffset % 4 == 0 ? 4 : 1;
116         }
117         else // if (shape.dims == 3 || shape.dims == 4)
118         {
119             if (coffset == 0)
120                 offset_elempack = elempack;
121             else
122                 offset_elempack = opt.use_shader_pack8 && coffset % 8 == 0 ? 8 : coffset % 4 == 0 ? 4 : 1;
123         }
124     }
125 
126     offset_elempack = std::min(offset_elempack, elempack);
127 
128     size_t elemsize;
129     size_t out_elemsize;
130     if (opt.use_fp16_storage)
131     {
132         elemsize = elempack * 2u;
133         out_elemsize = out_elempack * 2u;
134     }
135     else if (opt.use_fp16_packed)
136     {
137         elemsize = elempack == 1 ? 4u : elempack * 2u;
138         out_elemsize = out_elempack == 1 ? 4u : out_elempack * 2u;
139     }
140     else
141     {
142         elemsize = elempack * 4u;
143         out_elemsize = out_elempack * 4u;
144     }
145 
146     Mat shape_packed;
147     if (shape.dims == 1) shape_packed = Mat(shape.w / elempack, (void*)0, elemsize, elempack);
148     if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack);
149     if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);
150     if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack);
151 
152     Mat out_shape_packed;
153     if (out_shape.dims == 1) out_shape_packed = Mat(out_shape.w / out_elempack, (void*)0, out_elemsize, out_elempack);
154     if (out_shape.dims == 2) out_shape_packed = Mat(out_shape.w, out_shape.h / out_elempack, (void*)0, out_elemsize, out_elempack);
155     if (out_shape.dims == 3) out_shape_packed = Mat(out_shape.w, out_shape.h, out_shape.c / out_elempack, (void*)0, out_elemsize, out_elempack);
156     if (out_shape.dims == 4) out_shape_packed = Mat(out_shape.w, out_shape.h, out_shape.d, out_shape.c / out_elempack, (void*)0, out_elemsize, out_elempack);
157 
158     Mat shape_unpacked = shape_packed;
159     if (one_blob_only && shape.dims != 0 && elempack == out_elempack && elempack > offset_elempack)
160     {
161         size_t offset_elemsize;
162         if (opt.use_fp16_storage)
163         {
164             offset_elemsize = offset_elempack * 2u;
165         }
166         else if (opt.use_fp16_packed)
167         {
168             offset_elemsize = offset_elempack == 1 ? 4u : offset_elempack * 2u;
169         }
170         else
171         {
172             offset_elemsize = offset_elempack * 4u;
173         }
174 
175         if (shape.dims == 1) shape_unpacked = Mat(shape.w / offset_elempack, (void*)0, offset_elemsize, offset_elempack);
176         if (shape.dims == 2) shape_unpacked = Mat(shape.w, shape.h / offset_elempack, (void*)0, offset_elemsize, offset_elempack);
177         if (shape.dims == 3) shape_unpacked = Mat(shape.w, shape.h, shape.c / offset_elempack, (void*)0, offset_elemsize, offset_elempack);
178         if (shape.dims == 4) shape_unpacked = Mat(shape.w, shape.h, shape.d, shape.c / offset_elempack, (void*)0, offset_elemsize, offset_elempack);
179     }
180 
181     std::vector<vk_specialization_type> specializations(1 + 12);
182     specializations[0].i = vkdev->info.bug_implicit_fp16_arithmetic();
183     specializations[1 + 0].i = shape_unpacked.dims;
184     specializations[1 + 1].i = shape_unpacked.w;
185     specializations[1 + 2].i = shape_unpacked.h;
186     specializations[1 + 3].i = shape_unpacked.d;
187     specializations[1 + 4].i = shape_unpacked.c;
188     specializations[1 + 5].i = shape_unpacked.cstep;
189     specializations[1 + 6].i = out_shape_packed.dims;
190     specializations[1 + 7].i = out_shape_packed.w;
191     specializations[1 + 8].i = out_shape_packed.h;
192     specializations[1 + 9].i = out_shape_packed.d;
193     specializations[1 + 10].i = out_shape_packed.c;
194     specializations[1 + 11].i = out_shape_packed.cstep;
195 
196     Mat local_size_xyz;
197     if (out_shape_packed.dims == 1)
198     {
199         local_size_xyz.w = std::min(64, out_shape_packed.w);
200         local_size_xyz.h = 1;
201         local_size_xyz.c = 1;
202     }
203     if (out_shape_packed.dims == 2)
204     {
205         local_size_xyz.w = std::min(8, out_shape_packed.w);
206         local_size_xyz.h = std::min(8, out_shape_packed.h);
207         local_size_xyz.c = 1;
208     }
209     if (out_shape_packed.dims == 3)
210     {
211         local_size_xyz.w = std::min(4, out_shape_packed.w);
212         local_size_xyz.h = std::min(4, out_shape_packed.h);
213         local_size_xyz.c = std::min(4, out_shape_packed.c);
214     }
215     if (out_shape_packed.dims == 4)
216     {
217         local_size_xyz.w = std::min(4, out_shape_packed.w);
218         local_size_xyz.h = std::min(4, out_shape_packed.h * out_shape_packed.d);
219         local_size_xyz.c = std::min(4, out_shape_packed.c);
220     }
221 
222     // pack1
223     if (out_shape.dims == 0 || out_elempack == 1)
224     {
225         pipeline_crop = new Pipeline(vkdev);
226         pipeline_crop->set_optimal_local_size_xyz(local_size_xyz);
227         pipeline_crop->create(LayerShaderType::crop, opt, specializations);
228     }
229 
230     // pack4
231     if (out_shape.dims == 0 || out_elempack == 4)
232     {
233         pipeline_crop_pack4 = new Pipeline(vkdev);
234         pipeline_crop_pack4->set_optimal_local_size_xyz(local_size_xyz);
235         pipeline_crop_pack4->create(LayerShaderType::crop_pack4, opt, specializations);
236     }
237 
238     // pack1to4
239     if (out_shape.dims == 0 || out_elempack == 4)
240     {
241         pipeline_crop_pack1to4 = new Pipeline(vkdev);
242         pipeline_crop_pack1to4->set_optimal_local_size_xyz(local_size_xyz);
243         pipeline_crop_pack1to4->create(LayerShaderType::crop_pack1to4, opt, specializations);
244     }
245 
246     // pack4to1
247     if (out_shape.dims == 0 || out_elempack == 1)
248     {
249         pipeline_crop_pack4to1 = new Pipeline(vkdev);
250         pipeline_crop_pack4to1->set_optimal_local_size_xyz(local_size_xyz);
251         pipeline_crop_pack4to1->create(LayerShaderType::crop_pack4to1, opt, specializations);
252     }
253 
254     // pack8
255     if ((opt.use_shader_pack8 && out_shape.dims == 0) || (elempack == 8 && out_elempack == 8))
256     {
257         pipeline_crop_pack8 = new Pipeline(vkdev);
258         pipeline_crop_pack8->set_optimal_local_size_xyz(local_size_xyz);
259         pipeline_crop_pack8->create(LayerShaderType::crop_pack8, opt, specializations);
260     }
261 
262     // pack1to8
263     if ((opt.use_shader_pack8 && out_shape.dims == 0) || out_elempack == 8)
264     {
265         pipeline_crop_pack1to8 = new Pipeline(vkdev);
266         pipeline_crop_pack1to8->set_optimal_local_size_xyz(local_size_xyz);
267         pipeline_crop_pack1to8->create(LayerShaderType::crop_pack1to8, opt, specializations);
268     }
269 
270     // pack4to8
271     if ((opt.use_shader_pack8 && out_shape.dims == 0) || out_elempack == 8)
272     {
273         pipeline_crop_pack4to8 = new Pipeline(vkdev);
274         pipeline_crop_pack4to8->set_optimal_local_size_xyz(local_size_xyz);
275         pipeline_crop_pack4to8->create(LayerShaderType::crop_pack4to8, opt, specializations);
276     }
277 
278     // pack8to4
279     if ((opt.use_shader_pack8 && out_shape.dims == 0) || (elempack == 8 && out_elempack == 4))
280     {
281         pipeline_crop_pack8to4 = new Pipeline(vkdev);
282         pipeline_crop_pack8to4->set_optimal_local_size_xyz(local_size_xyz);
283         pipeline_crop_pack8to4->create(LayerShaderType::crop_pack8to4, opt, specializations);
284     }
285 
286     // pack8to1
287     if ((opt.use_shader_pack8 && out_shape.dims == 0) || (elempack == 8 && out_elempack == 1))
288     {
289         pipeline_crop_pack8to1 = new Pipeline(vkdev);
290         pipeline_crop_pack8to1->set_optimal_local_size_xyz(local_size_xyz);
291         pipeline_crop_pack8to1->create(LayerShaderType::crop_pack8to1, opt, specializations);
292     }
293 
294     return 0;
295 }
296 
destroy_pipeline(const Option &)297 int Crop_vulkan::destroy_pipeline(const Option& /*opt*/)
298 {
299     delete pipeline_crop;
300     pipeline_crop = 0;
301 
302     delete pipeline_crop_pack4;
303     pipeline_crop_pack4 = 0;
304 
305     delete pipeline_crop_pack1to4;
306     pipeline_crop_pack1to4 = 0;
307 
308     delete pipeline_crop_pack4to1;
309     pipeline_crop_pack4to1 = 0;
310 
311     delete pipeline_crop_pack8;
312     pipeline_crop_pack8 = 0;
313 
314     delete pipeline_crop_pack1to8;
315     pipeline_crop_pack1to8 = 0;
316 
317     delete pipeline_crop_pack4to8;
318     pipeline_crop_pack4to8 = 0;
319 
320     delete pipeline_crop_pack8to4;
321     pipeline_crop_pack8to4 = 0;
322 
323     delete pipeline_crop_pack8to1;
324     pipeline_crop_pack8to1 = 0;
325 
326     return 0;
327 }
328 
forward(const VkMat & bottom_blob,VkMat & top_blob,VkCompute & cmd,const Option & opt) const329 int Crop_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt) const
330 {
331     int dims = bottom_blob.dims;
332     size_t elemsize = bottom_blob.elemsize;
333     int elempack = bottom_blob.elempack;
334 
335     int _woffset, _hoffset, _doffset, _coffset;
336     int _outw, _outh, _outd, _outc;
337     resolve_crop_roi(bottom_blob.shape(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
338 
339     int offset_elempack;
340     int out_elempack;
341 
342     if (dims == 1)
343     {
344         if (_woffset == 0 && _outw == bottom_blob.w * elempack)
345         {
346             top_blob = bottom_blob;
347             return 0;
348         }
349 
350         offset_elempack = _woffset == 0 ? elempack : opt.use_shader_pack8 && _woffset % 8 == 0 ? 8 : _woffset % 4 == 0 ? 4 : 1;
351         out_elempack = opt.use_shader_pack8 && _outw % 8 == 0 ? 8 : _outw % 4 == 0 ? 4 : 1;
352     }
353     else if (dims == 2)
354     {
355         if (_woffset == 0 && _hoffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h * elempack)
356         {
357             top_blob = bottom_blob;
358             return 0;
359         }
360 
361         offset_elempack = _hoffset == 0 ? elempack : opt.use_shader_pack8 && _hoffset % 8 == 0 ? 8 : _hoffset % 4 == 0 ? 4 : 1;
362         out_elempack = opt.use_shader_pack8 && _outh % 8 == 0 ? 8 : _outh % 4 == 0 ? 4 : 1;
363     }
364     else if (dims == 3)
365     {
366         if (_woffset == 0 && _hoffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outc == bottom_blob.c * elempack)
367         {
368             top_blob = bottom_blob;
369             return 0;
370         }
371 
372         offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
373         out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
374     }
375     else // if (dims == 4)
376     {
377         if (_woffset == 0 && _hoffset == 0 && _doffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outd == bottom_blob.d && _outc == bottom_blob.c * elempack)
378         {
379             top_blob = bottom_blob;
380             return 0;
381         }
382 
383         offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
384         out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
385     }
386 
387     offset_elempack = std::min(offset_elempack, elempack);
388 
389     size_t out_elemsize = elemsize / elempack * out_elempack;
390 
391     if (opt.use_fp16_packed && !opt.use_fp16_storage)
392     {
393         if (out_elempack == 8) out_elemsize = 8 * 2u;
394         if (out_elempack == 4) out_elemsize = 4 * 2u;
395         if (out_elempack == 1) out_elemsize = 4u;
396     }
397 
398     // unpacking
399     VkMat bottom_blob_unpacked = bottom_blob;
400     if (elempack == out_elempack && elempack > offset_elempack)
401     {
402         Option opt_pack1 = opt;
403         opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
404 
405         vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, offset_elempack, cmd, opt_pack1);
406     }
407 
408     if (dims == 1)
409     {
410         top_blob.create(_outw / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
411     }
412     else if (dims == 2)
413     {
414         top_blob.create(_outw, _outh / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
415     }
416     else if (dims == 3)
417     {
418         top_blob.create(_outw, _outh, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
419     }
420     else // if (dims == 4)
421     {
422         top_blob.create(_outw, _outh, _outd, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
423     }
424     if (top_blob.empty())
425         return -100;
426 
427     std::vector<VkMat> bindings(2);
428     bindings[0] = bottom_blob_unpacked;
429     bindings[1] = top_blob;
430 
431     std::vector<vk_constant_type> constants(16);
432     constants[0].i = bottom_blob_unpacked.dims;
433     constants[1].i = bottom_blob_unpacked.w;
434     constants[2].i = bottom_blob_unpacked.h;
435     constants[3].i = bottom_blob_unpacked.d;
436     constants[4].i = bottom_blob_unpacked.c;
437     constants[5].i = bottom_blob_unpacked.cstep;
438     constants[6].i = top_blob.dims;
439     constants[7].i = top_blob.w;
440     constants[8].i = top_blob.h;
441     constants[9].i = top_blob.d;
442     constants[10].i = top_blob.c;
443     constants[11].i = top_blob.cstep;
444     constants[12].i = _woffset;
445     constants[13].i = _hoffset;
446     constants[14].i = _doffset;
447     constants[15].i = _coffset;
448 
449     const Pipeline* pipeline = 0;
450     if (elempack == 1 && out_elempack == 1)
451     {
452         pipeline = pipeline_crop;
453     }
454     else if (elempack == 4 && offset_elempack == 4 && out_elempack == 4)
455     {
456         pipeline = pipeline_crop_pack4;
457     }
458     else if (elempack == 4 && offset_elempack == 1 && out_elempack == 4)
459     {
460         pipeline = pipeline_crop_pack1to4;
461     }
462     else if (elempack == 1 && out_elempack == 4)
463     {
464         pipeline = pipeline_crop_pack1to4;
465     }
466     else if (elempack == 4 && out_elempack == 1)
467     {
468         pipeline = pipeline_crop_pack4to1;
469     }
470     else if (elempack == 8 && offset_elempack == 8 && out_elempack == 8)
471     {
472         pipeline = pipeline_crop_pack8;
473     }
474     else if (elempack == 8 && offset_elempack == 4 && out_elempack == 8)
475     {
476         pipeline = pipeline_crop_pack4to8;
477     }
478     else if (elempack == 8 && offset_elempack == 1 && out_elempack == 8)
479     {
480         pipeline = pipeline_crop_pack1to8;
481     }
482     else if (elempack == 1 && out_elempack == 8)
483     {
484         pipeline = pipeline_crop_pack1to8;
485     }
486     else if (elempack == 4 && out_elempack == 8)
487     {
488         pipeline = pipeline_crop_pack4to8;
489     }
490     else if (elempack == 8 && out_elempack == 4)
491     {
492         pipeline = pipeline_crop_pack8to4;
493     }
494     else if (elempack == 8 && out_elempack == 1)
495     {
496         pipeline = pipeline_crop_pack8to1;
497     }
498 
499     cmd.record_pipeline(pipeline, bindings, constants, top_blob);
500 
501     return 0;
502 }
503 
forward(const std::vector<VkMat> & bottom_blobs,std::vector<VkMat> & top_blobs,VkCompute & cmd,const Option & opt) const504 int Crop_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt) const
505 {
506     const VkMat& bottom_blob = bottom_blobs[0];
507     const VkMat& reference_blob = bottom_blobs[1];
508     VkMat& top_blob = top_blobs[0];
509 
510     int dims = bottom_blob.dims;
511     size_t elemsize = bottom_blob.elemsize;
512     int elempack = bottom_blob.elempack;
513 
514     int _woffset, _hoffset, _doffset, _coffset;
515     int _outw, _outh, _outd, _outc;
516     if (woffset == -233)
517     {
518         resolve_crop_roi(bottom_blob.shape(), (const int*)reference_blob.mapped(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
519     }
520     else
521     {
522         resolve_crop_roi(bottom_blob.shape(), reference_blob.shape(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
523     }
524 
525     int offset_elempack;
526     int out_elempack;
527 
528     if (dims == 1)
529     {
530         if (_woffset == 0 && _outw == bottom_blob.w * elempack)
531         {
532             top_blob = bottom_blob;
533             return 0;
534         }
535 
536         offset_elempack = _woffset == 0 ? elempack : opt.use_shader_pack8 && _woffset % 8 == 0 ? 8 : _woffset % 4 == 0 ? 4 : 1;
537         out_elempack = opt.use_shader_pack8 && _outw % 8 == 0 ? 8 : _outw % 4 == 0 ? 4 : 1;
538     }
539     else if (dims == 2)
540     {
541         if (_woffset == 0 && _hoffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h * elempack)
542         {
543             top_blob = bottom_blob;
544             return 0;
545         }
546 
547         offset_elempack = _hoffset == 0 ? elempack : opt.use_shader_pack8 && _hoffset % 8 == 0 ? 8 : _hoffset % 4 == 0 ? 4 : 1;
548         out_elempack = opt.use_shader_pack8 && _outh % 8 == 0 ? 8 : _outh % 4 == 0 ? 4 : 1;
549     }
550     else if (dims == 3)
551     {
552         if (_woffset == 0 && _hoffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outc == bottom_blob.c * elempack)
553         {
554             top_blob = bottom_blob;
555             return 0;
556         }
557 
558         offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
559         out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
560     }
561     else // if (dims == 4)
562     {
563         if (_woffset == 0 && _hoffset == 0 && _doffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outd == bottom_blob.d && _outc == bottom_blob.c * elempack)
564         {
565             top_blob = bottom_blob;
566             return 0;
567         }
568 
569         offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
570         out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
571     }
572 
573     offset_elempack = std::min(offset_elempack, elempack);
574 
575     size_t out_elemsize = elemsize / elempack * out_elempack;
576 
577     if (opt.use_fp16_packed && !opt.use_fp16_storage)
578     {
579         if (out_elempack == 8) out_elemsize = 8 * 2u;
580         if (out_elempack == 4) out_elemsize = 4 * 2u;
581         if (out_elempack == 1) out_elemsize = 4u;
582     }
583 
584     // unpacking
585     VkMat bottom_blob_unpacked = bottom_blob;
586     if (elempack == out_elempack && elempack > offset_elempack)
587     {
588         Option opt_pack1 = opt;
589         opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
590 
591         vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, offset_elempack, cmd, opt_pack1);
592     }
593 
594     if (dims == 1)
595     {
596         top_blob.create(_outw / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
597     }
598     else if (dims == 2)
599     {
600         top_blob.create(_outw, _outh / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
601     }
602     else if (dims == 3)
603     {
604         top_blob.create(_outw, _outh, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
605     }
606     else // if (dims == 4)
607     {
608         top_blob.create(_outw, _outh, _outd, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
609     }
610     if (top_blob.empty())
611         return -100;
612 
613     std::vector<VkMat> bindings(2);
614     bindings[0] = bottom_blob_unpacked;
615     bindings[1] = top_blob;
616 
617     std::vector<vk_constant_type> constants(16);
618     constants[0].i = bottom_blob_unpacked.dims;
619     constants[1].i = bottom_blob_unpacked.w;
620     constants[2].i = bottom_blob_unpacked.h;
621     constants[3].i = bottom_blob_unpacked.d;
622     constants[4].i = bottom_blob_unpacked.c;
623     constants[5].i = bottom_blob_unpacked.cstep;
624     constants[6].i = top_blob.dims;
625     constants[7].i = top_blob.w;
626     constants[8].i = top_blob.h;
627     constants[9].i = top_blob.d;
628     constants[10].i = top_blob.c;
629     constants[11].i = top_blob.cstep;
630     constants[12].i = _woffset;
631     constants[13].i = _hoffset;
632     constants[14].i = _doffset;
633     constants[15].i = _coffset;
634 
635     const Pipeline* pipeline = 0;
636     if (elempack == 1 && out_elempack == 1)
637     {
638         pipeline = pipeline_crop;
639     }
640     else if (elempack == 4 && offset_elempack == 4 && out_elempack == 4)
641     {
642         pipeline = pipeline_crop_pack4;
643     }
644     else if (elempack == 4 && offset_elempack == 1 && out_elempack == 4)
645     {
646         pipeline = pipeline_crop_pack1to4;
647     }
648     else if (elempack == 1 && out_elempack == 4)
649     {
650         pipeline = pipeline_crop_pack1to4;
651     }
652     else if (elempack == 4 && out_elempack == 1)
653     {
654         pipeline = pipeline_crop_pack4to1;
655     }
656     else if (elempack == 8 && offset_elempack == 8 && out_elempack == 8)
657     {
658         pipeline = pipeline_crop_pack8;
659     }
660     else if (elempack == 8 && offset_elempack == 4 && out_elempack == 8)
661     {
662         pipeline = pipeline_crop_pack4to8;
663     }
664     else if (elempack == 8 && offset_elempack == 1 && out_elempack == 8)
665     {
666         pipeline = pipeline_crop_pack1to8;
667     }
668     else if (elempack == 1 && out_elempack == 8)
669     {
670         pipeline = pipeline_crop_pack1to8;
671     }
672     else if (elempack == 4 && out_elempack == 8)
673     {
674         pipeline = pipeline_crop_pack4to8;
675     }
676     else if (elempack == 8 && out_elempack == 4)
677     {
678         pipeline = pipeline_crop_pack8to4;
679     }
680     else if (elempack == 8 && out_elempack == 1)
681     {
682         pipeline = pipeline_crop_pack8to1;
683     }
684 
685     cmd.record_pipeline(pipeline, bindings, constants, top_blob);
686 
687     return 0;
688 }
689 
forward(const VkImageMat & bottom_blob,VkImageMat & top_blob,VkCompute & cmd,const Option & opt) const690 int Crop_vulkan::forward(const VkImageMat& bottom_blob, VkImageMat& top_blob, VkCompute& cmd, const Option& opt) const
691 {
692     int dims = bottom_blob.dims;
693     size_t elemsize = bottom_blob.elemsize;
694     int elempack = bottom_blob.elempack;
695 
696     int _woffset, _hoffset, _doffset, _coffset;
697     int _outw, _outh, _outd, _outc;
698     resolve_crop_roi(bottom_blob.shape(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
699 
700     int offset_elempack;
701     int out_elempack;
702 
703     if (dims == 1)
704     {
705         if (_woffset == 0 && _outw == bottom_blob.w * elempack)
706         {
707             top_blob = bottom_blob;
708             return 0;
709         }
710 
711         offset_elempack = _woffset == 0 ? elempack : opt.use_shader_pack8 && _woffset % 8 == 0 ? 8 : _woffset % 4 == 0 ? 4 : 1;
712         out_elempack = opt.use_shader_pack8 && _outw % 8 == 0 ? 8 : _outw % 4 == 0 ? 4 : 1;
713     }
714     else if (dims == 2)
715     {
716         if (_woffset == 0 && _hoffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h * elempack)
717         {
718             top_blob = bottom_blob;
719             return 0;
720         }
721 
722         offset_elempack = _hoffset == 0 ? elempack : opt.use_shader_pack8 && _hoffset % 8 == 0 ? 8 : _hoffset % 4 == 0 ? 4 : 1;
723         out_elempack = opt.use_shader_pack8 && _outh % 8 == 0 ? 8 : _outh % 4 == 0 ? 4 : 1;
724     }
725     else if (dims == 3)
726     {
727         if (_woffset == 0 && _hoffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outc == bottom_blob.c * elempack)
728         {
729             top_blob = bottom_blob;
730             return 0;
731         }
732 
733         offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
734         out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
735     }
736     else // if (dims == 4)
737     {
738         if (_woffset == 0 && _hoffset == 0 && _doffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outd == bottom_blob.d && _outc == bottom_blob.c * elempack)
739         {
740             top_blob = bottom_blob;
741             return 0;
742         }
743 
744         offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
745         out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
746     }
747 
748     offset_elempack = std::min(offset_elempack, elempack);
749 
750     size_t out_elemsize = elemsize / elempack * out_elempack;
751 
752     if (opt.use_fp16_packed && !opt.use_fp16_storage)
753     {
754         if (out_elempack == 8) out_elemsize = 8 * 2u;
755         if (out_elempack == 4) out_elemsize = 4 * 2u;
756         if (out_elempack == 1) out_elemsize = 4u;
757     }
758 
759     // unpacking
760     VkImageMat bottom_blob_unpacked = bottom_blob;
761     if (elempack == out_elempack && elempack > offset_elempack)
762     {
763         Option opt_pack1 = opt;
764         opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
765 
766         vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, offset_elempack, cmd, opt_pack1);
767     }
768 
769     if (dims == 1)
770     {
771         top_blob.create(_outw / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
772     }
773     else if (dims == 2)
774     {
775         top_blob.create(_outw, _outh / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
776     }
777     else if (dims == 3)
778     {
779         top_blob.create(_outw, _outh, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
780     }
781     else // if (dims == 4)
782     {
783         top_blob.create(_outw, _outh, _outd, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
784     }
785     if (top_blob.empty())
786         return -100;
787 
788     std::vector<VkImageMat> bindings(2);
789     bindings[0] = bottom_blob_unpacked;
790     bindings[1] = top_blob;
791 
792     std::vector<vk_constant_type> constants(16);
793     constants[0].i = bottom_blob_unpacked.dims;
794     constants[1].i = bottom_blob_unpacked.w;
795     constants[2].i = bottom_blob_unpacked.h;
796     constants[3].i = bottom_blob_unpacked.d;
797     constants[4].i = bottom_blob_unpacked.c;
798     constants[5].i = 0; //bottom_blob_unpacked.cstep;
799     constants[6].i = top_blob.dims;
800     constants[7].i = top_blob.w;
801     constants[8].i = top_blob.h;
802     constants[9].i = top_blob.d;
803     constants[10].i = top_blob.c;
804     constants[11].i = 0; //top_blob.cstep;
805     constants[12].i = _woffset;
806     constants[13].i = _hoffset;
807     constants[14].i = _doffset;
808     constants[15].i = _coffset;
809 
810     const Pipeline* pipeline = 0;
811     if (elempack == 1 && out_elempack == 1)
812     {
813         pipeline = pipeline_crop;
814     }
815     else if (elempack == 4 && offset_elempack == 4 && out_elempack == 4)
816     {
817         pipeline = pipeline_crop_pack4;
818     }
819     else if (elempack == 4 && offset_elempack == 1 && out_elempack == 4)
820     {
821         pipeline = pipeline_crop_pack1to4;
822     }
823     else if (elempack == 1 && out_elempack == 4)
824     {
825         pipeline = pipeline_crop_pack1to4;
826     }
827     else if (elempack == 4 && out_elempack == 1)
828     {
829         pipeline = pipeline_crop_pack4to1;
830     }
831     else if (elempack == 8 && offset_elempack == 8 && out_elempack == 8)
832     {
833         pipeline = pipeline_crop_pack8;
834     }
835     else if (elempack == 8 && offset_elempack == 4 && out_elempack == 8)
836     {
837         pipeline = pipeline_crop_pack4to8;
838     }
839     else if (elempack == 8 && offset_elempack == 1 && out_elempack == 8)
840     {
841         pipeline = pipeline_crop_pack1to8;
842     }
843     else if (elempack == 1 && out_elempack == 8)
844     {
845         pipeline = pipeline_crop_pack1to8;
846     }
847     else if (elempack == 4 && out_elempack == 8)
848     {
849         pipeline = pipeline_crop_pack4to8;
850     }
851     else if (elempack == 8 && out_elempack == 4)
852     {
853         pipeline = pipeline_crop_pack8to4;
854     }
855     else if (elempack == 8 && out_elempack == 1)
856     {
857         pipeline = pipeline_crop_pack8to1;
858     }
859 
860     cmd.record_pipeline(pipeline, bindings, constants, top_blob);
861 
862     return 0;
863 }
864 
forward(const std::vector<VkImageMat> & bottom_blobs,std::vector<VkImageMat> & top_blobs,VkCompute & cmd,const Option & opt) const865 int Crop_vulkan::forward(const std::vector<VkImageMat>& bottom_blobs, std::vector<VkImageMat>& top_blobs, VkCompute& cmd, const Option& opt) const
866 {
867     const VkImageMat& bottom_blob = bottom_blobs[0];
868     const VkImageMat& reference_blob = bottom_blobs[1];
869     VkImageMat& top_blob = top_blobs[0];
870 
871     int dims = bottom_blob.dims;
872     size_t elemsize = bottom_blob.elemsize;
873     int elempack = bottom_blob.elempack;
874 
875     int _woffset, _hoffset, _doffset, _coffset;
876     int _outw, _outh, _outd, _outc;
877     if (woffset == -233)
878     {
879         resolve_crop_roi(bottom_blob.shape(), (const int*)reference_blob.mapped(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
880     }
881     else
882     {
883         resolve_crop_roi(bottom_blob.shape(), reference_blob.shape(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
884     }
885 
886     int offset_elempack;
887     int out_elempack;
888 
889     if (dims == 1)
890     {
891         if (_woffset == 0 && _outw == bottom_blob.w * elempack)
892         {
893             top_blob = bottom_blob;
894             return 0;
895         }
896 
897         offset_elempack = _woffset == 0 ? elempack : opt.use_shader_pack8 && _woffset % 8 == 0 ? 8 : _woffset % 4 == 0 ? 4 : 1;
898         out_elempack = opt.use_shader_pack8 && _outw % 8 == 0 ? 8 : _outw % 4 == 0 ? 4 : 1;
899     }
900     else if (dims == 2)
901     {
902         if (_woffset == 0 && _hoffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h * elempack)
903         {
904             top_blob = bottom_blob;
905             return 0;
906         }
907 
908         offset_elempack = _hoffset == 0 ? elempack : opt.use_shader_pack8 && _hoffset % 8 == 0 ? 8 : _hoffset % 4 == 0 ? 4 : 1;
909         out_elempack = opt.use_shader_pack8 && _outh % 8 == 0 ? 8 : _outh % 4 == 0 ? 4 : 1;
910     }
911     else if (dims == 3)
912     {
913         if (_woffset == 0 && _hoffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outc == bottom_blob.c * elempack)
914         {
915             top_blob = bottom_blob;
916             return 0;
917         }
918 
919         offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
920         out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
921     }
922     else // if (dims == 4)
923     {
924         if (_woffset == 0 && _hoffset == 0 && _doffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outd == bottom_blob.d && _outc == bottom_blob.c * elempack)
925         {
926             top_blob = bottom_blob;
927             return 0;
928         }
929 
930         offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
931         out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
932     }
933 
934     offset_elempack = std::min(offset_elempack, elempack);
935 
936     size_t out_elemsize = elemsize / elempack * out_elempack;
937 
938     if (opt.use_fp16_packed && !opt.use_fp16_storage)
939     {
940         if (out_elempack == 8) out_elemsize = 8 * 2u;
941         if (out_elempack == 4) out_elemsize = 4 * 2u;
942         if (out_elempack == 1) out_elemsize = 4u;
943     }
944 
945     // unpacking
946     VkImageMat bottom_blob_unpacked = bottom_blob;
947     if (elempack == out_elempack && elempack > offset_elempack)
948     {
949         Option opt_pack1 = opt;
950         opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
951 
952         vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, offset_elempack, cmd, opt_pack1);
953     }
954 
955     if (dims == 1)
956     {
957         top_blob.create(_outw / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
958     }
959     else if (dims == 2)
960     {
961         top_blob.create(_outw, _outh / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
962     }
963     else if (dims == 3)
964     {
965         top_blob.create(_outw, _outh, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
966     }
967     else // if (dims == 4)
968     {
969         top_blob.create(_outw, _outh, _outd, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
970     }
971     if (top_blob.empty())
972         return -100;
973 
974     std::vector<VkImageMat> bindings(2);
975     bindings[0] = bottom_blob_unpacked;
976     bindings[1] = top_blob;
977 
978     std::vector<vk_constant_type> constants(16);
979     constants[0].i = bottom_blob_unpacked.dims;
980     constants[1].i = bottom_blob_unpacked.w;
981     constants[2].i = bottom_blob_unpacked.h;
982     constants[3].i = bottom_blob_unpacked.d;
983     constants[4].i = bottom_blob_unpacked.c;
984     constants[5].i = 0; //bottom_blob_unpacked.cstep;
985     constants[6].i = top_blob.dims;
986     constants[7].i = top_blob.w;
987     constants[8].i = top_blob.h;
988     constants[9].i = top_blob.d;
989     constants[10].i = top_blob.c;
990     constants[11].i = 0; //top_blob.cstep;
991     constants[12].i = _woffset;
992     constants[13].i = _hoffset;
993     constants[14].i = _doffset;
994     constants[15].i = _coffset;
995 
996     const Pipeline* pipeline = 0;
997     if (elempack == 1 && out_elempack == 1)
998     {
999         pipeline = pipeline_crop;
1000     }
1001     else if (elempack == 4 && offset_elempack == 4 && out_elempack == 4)
1002     {
1003         pipeline = pipeline_crop_pack4;
1004     }
1005     else if (elempack == 4 && offset_elempack == 1 && out_elempack == 4)
1006     {
1007         pipeline = pipeline_crop_pack1to4;
1008     }
1009     else if (elempack == 1 && out_elempack == 4)
1010     {
1011         pipeline = pipeline_crop_pack1to4;
1012     }
1013     else if (elempack == 4 && out_elempack == 1)
1014     {
1015         pipeline = pipeline_crop_pack4to1;
1016     }
1017     else if (elempack == 8 && offset_elempack == 8 && out_elempack == 8)
1018     {
1019         pipeline = pipeline_crop_pack8;
1020     }
1021     else if (elempack == 8 && offset_elempack == 4 && out_elempack == 8)
1022     {
1023         pipeline = pipeline_crop_pack4to8;
1024     }
1025     else if (elempack == 8 && offset_elempack == 1 && out_elempack == 8)
1026     {
1027         pipeline = pipeline_crop_pack1to8;
1028     }
1029     else if (elempack == 1 && out_elempack == 8)
1030     {
1031         pipeline = pipeline_crop_pack1to8;
1032     }
1033     else if (elempack == 4 && out_elempack == 8)
1034     {
1035         pipeline = pipeline_crop_pack4to8;
1036     }
1037     else if (elempack == 8 && out_elempack == 4)
1038     {
1039         pipeline = pipeline_crop_pack8to4;
1040     }
1041     else if (elempack == 8 && out_elempack == 1)
1042     {
1043         pipeline = pipeline_crop_pack8to1;
1044     }
1045 
1046     cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1047 
1048     return 0;
1049 }
1050 
1051 } // namespace ncnn
1052