1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #include "layer/packing.h"
16 #include "testutil.h"
17 
test_packing_cpu_fp32(const ncnn::Mat & a,int in_elempack,int out_elempack)18 static int test_packing_cpu_fp32(const ncnn::Mat& a, int in_elempack, int out_elempack)
19 {
20     ncnn::ParamDict pd;
21     pd.set(0, out_elempack);
22 
23     std::vector<ncnn::Mat> weights(0);
24 
25     ncnn::Option opt;
26     opt.num_threads = 1;
27     opt.use_vulkan_compute = false;
28     opt.use_int8_inference = false;
29     opt.use_fp16_storage = false;
30     opt.use_fp16_arithmetic = false;
31     opt.use_packing_layout = false;
32 
33     ncnn::Layer* op = ncnn::create_layer("Packing");
34 
35     op->load_param(pd);
36 
37     ncnn::ModelBinFromMatArray mb(weights.data());
38 
39     op->load_model(mb);
40 
41     op->create_pipeline(opt);
42 
43     ncnn::Mat ap;
44     ncnn::convert_packing(a, ap, in_elempack);
45 
46     ncnn::Mat b;
47     ((ncnn::Packing*)op)->ncnn::Packing::forward(ap, b, opt);
48 
49     ncnn::Mat c;
50     op->forward(ap, c, opt);
51 
52     op->destroy_pipeline(opt);
53 
54     delete op;
55 
56     if (CompareMat(b, c, 0.001) != 0)
57     {
58         fprintf(stderr, "test_packing_cpu_fp32 failed a.dims=%d a=(%d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.c, in_elempack, out_elempack);
59         return -1;
60     }
61 
62     return 0;
63 }
64 
test_packing_cpu_fp16(const ncnn::Mat & a,int in_elempack,int out_elempack)65 static int test_packing_cpu_fp16(const ncnn::Mat& a, int in_elempack, int out_elempack)
66 {
67     ncnn::ParamDict pd;
68     pd.set(0, out_elempack);
69 
70     std::vector<ncnn::Mat> weights(0);
71 
72     ncnn::Option opt;
73     opt.num_threads = 1;
74     opt.use_vulkan_compute = false;
75     opt.use_int8_inference = false;
76     opt.use_fp16_storage = true;
77     opt.use_fp16_arithmetic = true;
78     opt.use_packing_layout = false;
79 
80     ncnn::Layer* op = ncnn::create_layer("Packing");
81 
82     if (!op->support_fp16_storage)
83     {
84         delete op;
85         return 0;
86     }
87 
88     op->load_param(pd);
89 
90     ncnn::ModelBinFromMatArray mb(weights.data());
91 
92     op->load_model(mb);
93 
94     op->create_pipeline(opt);
95 
96     ncnn::Mat a16;
97     ncnn::cast_float32_to_float16(a, a16);
98 
99     ncnn::Mat ap;
100     ncnn::convert_packing(a16, ap, in_elempack);
101 
102     ncnn::Mat b;
103     ((ncnn::Packing*)op)->ncnn::Packing::forward(ap, b, opt);
104 
105     ncnn::Mat c;
106     op->forward(ap, c, opt);
107 
108     op->destroy_pipeline(opt);
109 
110     delete op;
111 
112     ncnn::Mat c32;
113     ncnn::cast_float16_to_float32(c, c32);
114 
115     if (CompareMat(b, c32, 0.001) != 0)
116     {
117         fprintf(stderr, "test_packing_cpu_fp16 failed a.dims=%d a=(%d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.c, in_elempack, out_elempack);
118         return -1;
119     }
120 
121     return 0;
122 }
123 
test_packing_cpu(const ncnn::Mat & a,int in_elempack,int out_elempack)124 static int test_packing_cpu(const ncnn::Mat& a, int in_elempack, int out_elempack)
125 {
126     return 0
127            || test_packing_cpu_fp32(a, in_elempack, out_elempack)
128            || test_packing_cpu_fp16(a, in_elempack, out_elempack);
129 }
130 
131 #if NCNN_VULKAN
132 #include "layer/vulkan/packing_vulkan.h"
133 
test_packing_gpu_buffer(const ncnn::Mat & a,int in_elempack,int out_elempack)134 static int test_packing_gpu_buffer(const ncnn::Mat& a, int in_elempack, int out_elempack)
135 {
136     ncnn::ParamDict pd;
137     pd.set(0, out_elempack);
138     pd.set(2, 1); // cast_type_from
139     pd.set(3, 1); // cast_type_to
140     pd.set(4, 0); // storage_type_from
141     pd.set(5, 0); // storage_type_to
142 
143     std::vector<ncnn::Mat> weights(0);
144 
145     ncnn::Option opt;
146     opt.num_threads = 1;
147     opt.use_vulkan_compute = true;
148     opt.use_int8_inference = false;
149     opt.use_fp16_packed = false;
150     opt.use_fp16_storage = false;
151     opt.use_fp16_arithmetic = false;
152     opt.use_int8_storage = false;
153     opt.use_int8_arithmetic = false;
154     opt.use_packing_layout = true;
155     opt.use_shader_pack8 = true;
156     opt.use_image_storage = false;
157 
158     ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
159 
160     ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
161     ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
162 
163     opt.blob_vkallocator = blob_vkallocator;
164     opt.workspace_vkallocator = blob_vkallocator;
165     opt.staging_vkallocator = staging_vkallocator;
166 
167     if (!vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false;
168     if (!vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false;
169 
170     ncnn::Layer* op = ncnn::create_layer("Packing");
171 
172     op->vkdev = vkdev;
173 
174     op->load_param(pd);
175 
176     ncnn::ModelBinFromMatArray mb(weights.data());
177 
178     op->load_model(mb);
179 
180     op->create_pipeline(opt);
181 
182     ncnn::Mat ap;
183     ncnn::convert_packing(a, ap, in_elempack);
184 
185     ncnn::Mat b;
186     ((ncnn::Packing*)op)->ncnn::Packing::forward(ap, b, opt);
187 
188     ncnn::Mat d;
189 
190     // forward
191     ncnn::VkCompute cmd(vkdev);
192 
193     // upload
194     ncnn::VkMat a_gpu;
195     cmd.record_clone(ap, a_gpu, opt);
196 
197     ncnn::VkMat d_gpu;
198     op->forward(a_gpu, d_gpu, cmd, opt);
199 
200     // download
201     cmd.record_clone(d_gpu, d, opt);
202 
203     cmd.submit_and_wait();
204 
205     op->destroy_pipeline(opt);
206 
207     delete op;
208 
209     vkdev->reclaim_blob_allocator(blob_vkallocator);
210     vkdev->reclaim_staging_allocator(staging_vkallocator);
211 
212     if (CompareMat(b, d, 0.001) != 0)
213     {
214         fprintf(stderr, "test_packing_gpu_buffer failed a.dims=%d a=(%d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.c, in_elempack, out_elempack);
215         return -1;
216     }
217 
218     return 0;
219 }
220 
test_packing_gpu_image(const ncnn::Mat & a,int in_elempack,int out_elempack)221 static int test_packing_gpu_image(const ncnn::Mat& a, int in_elempack, int out_elempack)
222 {
223     ncnn::ParamDict pd;
224     pd.set(0, out_elempack);
225     pd.set(2, 1); // cast_type_from
226     pd.set(3, 1); // cast_type_to
227     pd.set(4, 1); // storage_type_from
228     pd.set(5, 1); // storage_type_to
229 
230     std::vector<ncnn::Mat> weights(0);
231 
232     ncnn::Option opt;
233     opt.num_threads = 1;
234     opt.use_vulkan_compute = true;
235     opt.use_int8_inference = false;
236     opt.use_fp16_packed = false;
237     opt.use_fp16_storage = false;
238     opt.use_fp16_arithmetic = false;
239     opt.use_int8_storage = false;
240     opt.use_int8_arithmetic = false;
241     opt.use_packing_layout = true;
242     opt.use_shader_pack8 = true;
243     opt.use_image_storage = true;
244 
245     ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
246 
247     ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
248     ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
249 
250     opt.blob_vkallocator = blob_vkallocator;
251     opt.workspace_vkallocator = blob_vkallocator;
252     opt.staging_vkallocator = staging_vkallocator;
253 
254     if (!vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false;
255     if (!vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false;
256 
257     ncnn::Layer* op = ncnn::create_layer("Packing");
258 
259     op->vkdev = vkdev;
260 
261     op->load_param(pd);
262 
263     ncnn::ModelBinFromMatArray mb(weights.data());
264 
265     op->load_model(mb);
266 
267     op->create_pipeline(opt);
268 
269     ncnn::Mat ap;
270     ncnn::convert_packing(a, ap, in_elempack);
271 
272     ncnn::Mat b;
273     ((ncnn::Packing*)op)->ncnn::Packing::forward(ap, b, opt);
274 
275     ncnn::Mat d;
276 
277     // forward
278     ncnn::VkCompute cmd(vkdev);
279 
280     // upload
281     ncnn::VkImageMat a_gpu;
282     cmd.record_clone(ap, a_gpu, opt);
283 
284     ncnn::VkImageMat d_gpu;
285     op->forward(a_gpu, d_gpu, cmd, opt);
286 
287     // download
288     cmd.record_clone(d_gpu, d, opt);
289 
290     cmd.submit_and_wait();
291 
292     op->destroy_pipeline(opt);
293 
294     delete op;
295 
296     vkdev->reclaim_blob_allocator(blob_vkallocator);
297     vkdev->reclaim_staging_allocator(staging_vkallocator);
298 
299     if (CompareMat(b, d, 0.001) != 0)
300     {
301         fprintf(stderr, "test_packing_gpu_image failed a.dims=%d a=(%d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.c, in_elempack, out_elempack);
302         return -1;
303     }
304 
305     return 0;
306 }
307 
test_packing_gpu_buffer2image(const ncnn::Mat & a,int in_elempack,int out_elempack)308 static int test_packing_gpu_buffer2image(const ncnn::Mat& a, int in_elempack, int out_elempack)
309 {
310     ncnn::ParamDict pd;
311     pd.set(0, out_elempack);
312     pd.set(2, 1); // cast_type_from
313     pd.set(3, 1); // cast_type_to
314     pd.set(4, 0); // storage_type_from
315     pd.set(5, 1); // storage_type_to
316 
317     std::vector<ncnn::Mat> weights(0);
318 
319     ncnn::Option opt;
320     opt.num_threads = 1;
321     opt.use_vulkan_compute = true;
322     opt.use_int8_inference = false;
323     opt.use_fp16_packed = false;
324     opt.use_fp16_storage = false;
325     opt.use_fp16_arithmetic = false;
326     opt.use_int8_storage = false;
327     opt.use_int8_arithmetic = false;
328     opt.use_packing_layout = true;
329     opt.use_shader_pack8 = true;
330     opt.use_image_storage = true;
331 
332     ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
333 
334     ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
335     ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
336 
337     opt.blob_vkallocator = blob_vkallocator;
338     opt.workspace_vkallocator = blob_vkallocator;
339     opt.staging_vkallocator = staging_vkallocator;
340 
341     if (!vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false;
342     if (!vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false;
343 
344     ncnn::Packing_vulkan* op = new ncnn::Packing_vulkan;
345 
346     op->vkdev = vkdev;
347 
348     op->load_param(pd);
349 
350     ncnn::ModelBinFromMatArray mb(weights.data());
351 
352     op->load_model(mb);
353 
354     op->create_pipeline(opt);
355 
356     ncnn::Mat ap;
357     ncnn::convert_packing(a, ap, in_elempack);
358 
359     ncnn::Mat b;
360     ((ncnn::Packing*)op)->ncnn::Packing::forward(ap, b, opt);
361 
362     ncnn::Mat d;
363 
364     // forward
365     ncnn::VkCompute cmd(vkdev);
366 
367     // upload
368     ncnn::VkMat a_gpu;
369     cmd.record_clone(ap, a_gpu, opt);
370 
371     ncnn::VkImageMat d_gpu;
372     op->forward(a_gpu, d_gpu, cmd, opt);
373 
374     // download
375     cmd.record_clone(d_gpu, d, opt);
376 
377     cmd.submit_and_wait();
378 
379     op->destroy_pipeline(opt);
380 
381     delete op;
382 
383     vkdev->reclaim_blob_allocator(blob_vkallocator);
384     vkdev->reclaim_staging_allocator(staging_vkallocator);
385 
386     if (CompareMat(b, d, 0.001) != 0)
387     {
388         fprintf(stderr, "test_packing_gpu_buffer2image failed a.dims=%d a=(%d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.c, in_elempack, out_elempack);
389         return -1;
390     }
391 
392     return 0;
393 }
394 
test_packing_gpu_image2buffer(const ncnn::Mat & a,int in_elempack,int out_elempack)395 static int test_packing_gpu_image2buffer(const ncnn::Mat& a, int in_elempack, int out_elempack)
396 {
397     ncnn::ParamDict pd;
398     pd.set(0, out_elempack);
399     pd.set(2, 1); // cast_type_from
400     pd.set(3, 1); // cast_type_to
401     pd.set(4, 1); // storage_type_from
402     pd.set(5, 0); // storage_type_to
403 
404     std::vector<ncnn::Mat> weights(0);
405 
406     ncnn::Option opt;
407     opt.num_threads = 1;
408     opt.use_vulkan_compute = true;
409     opt.use_int8_inference = false;
410     opt.use_fp16_packed = false;
411     opt.use_fp16_storage = false;
412     opt.use_fp16_arithmetic = false;
413     opt.use_int8_storage = false;
414     opt.use_int8_arithmetic = false;
415     opt.use_packing_layout = true;
416     opt.use_shader_pack8 = true;
417     opt.use_image_storage = true;
418 
419     ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
420 
421     ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
422     ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
423 
424     opt.blob_vkallocator = blob_vkallocator;
425     opt.workspace_vkallocator = blob_vkallocator;
426     opt.staging_vkallocator = staging_vkallocator;
427 
428     if (!vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false;
429     if (!vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false;
430 
431     ncnn::Packing_vulkan* op = new ncnn::Packing_vulkan;
432 
433     op->vkdev = vkdev;
434 
435     op->load_param(pd);
436 
437     ncnn::ModelBinFromMatArray mb(weights.data());
438 
439     op->load_model(mb);
440 
441     op->create_pipeline(opt);
442 
443     ncnn::Mat ap;
444     ncnn::convert_packing(a, ap, in_elempack);
445 
446     ncnn::Mat b;
447     ((ncnn::Packing*)op)->ncnn::Packing::forward(ap, b, opt);
448 
449     ncnn::Mat d;
450 
451     // forward
452     ncnn::VkCompute cmd(vkdev);
453 
454     // upload
455     ncnn::VkImageMat a_gpu;
456     cmd.record_clone(ap, a_gpu, opt);
457 
458     ncnn::VkMat d_gpu;
459     op->forward(a_gpu, d_gpu, cmd, opt);
460 
461     // download
462     cmd.record_clone(d_gpu, d, opt);
463 
464     cmd.submit_and_wait();
465 
466     op->destroy_pipeline(opt);
467 
468     delete op;
469 
470     vkdev->reclaim_blob_allocator(blob_vkallocator);
471     vkdev->reclaim_staging_allocator(staging_vkallocator);
472 
473     if (CompareMat(b, d, 0.001) != 0)
474     {
475         fprintf(stderr, "test_packing_gpu_image2buffer failed a.dims=%d a=(%d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.c, in_elempack, out_elempack);
476         return -1;
477     }
478 
479     return 0;
480 }
481 #endif
482 
test_packing_0()483 static int test_packing_0()
484 {
485     ncnn::Mat a = RandomMat(9, 10, 16);
486     ncnn::Mat b = RandomMat(9, 10, 3);
487 
488     return 0
489            || test_packing_cpu(a, 1, 1)
490            || test_packing_cpu(a, 4, 4)
491            || test_packing_cpu(a, 4, 8)
492            || test_packing_cpu(a, 1, 4)
493            || test_packing_cpu(a, 4, 1)
494            || test_packing_cpu(a, 1, 8)
495            || test_packing_cpu(a, 8, 1)
496            || test_packing_cpu(a, 4, 8)
497            || test_packing_cpu(a, 8, 4)
498            || test_packing_cpu(b, 1, 1)
499            || test_packing_cpu(b, 4, 4)
500            || test_packing_cpu(b, 4, 8)
501            || test_packing_cpu(b, 1, 4)
502            || test_packing_cpu(b, 4, 1)
503            || test_packing_cpu(b, 1, 8)
504            || test_packing_cpu(b, 8, 1)
505            || test_packing_cpu(b, 4, 8)
506            || test_packing_cpu(b, 8, 4)
507 #if NCNN_VULKAN
508            || test_packing_gpu_buffer(a, 1, 1)
509            || test_packing_gpu_buffer(a, 4, 4)
510            || test_packing_gpu_buffer(a, 8, 8)
511            || test_packing_gpu_buffer(a, 1, 4)
512            || test_packing_gpu_buffer(a, 4, 1)
513            || test_packing_gpu_buffer(a, 1, 8)
514            || test_packing_gpu_buffer(a, 8, 1)
515            || test_packing_gpu_buffer(a, 4, 8)
516            || test_packing_gpu_buffer(a, 8, 4)
517            || test_packing_gpu_image(a, 1, 1)
518            || test_packing_gpu_image(a, 4, 4)
519            || test_packing_gpu_image(a, 8, 8)
520            || test_packing_gpu_image(a, 1, 4)
521            || test_packing_gpu_image(a, 4, 1)
522            || test_packing_gpu_image(a, 1, 8)
523            || test_packing_gpu_image(a, 8, 1)
524            || test_packing_gpu_image(a, 4, 8)
525            || test_packing_gpu_image(a, 8, 4)
526            || test_packing_gpu_buffer2image(a, 1, 1)
527            || test_packing_gpu_buffer2image(a, 4, 4)
528            || test_packing_gpu_buffer2image(a, 8, 8)
529            || test_packing_gpu_buffer2image(a, 1, 4)
530            || test_packing_gpu_buffer2image(a, 4, 1)
531            || test_packing_gpu_buffer2image(a, 1, 8)
532            || test_packing_gpu_buffer2image(a, 8, 1)
533            || test_packing_gpu_buffer2image(a, 4, 8)
534            || test_packing_gpu_buffer2image(a, 8, 4)
535            || test_packing_gpu_image2buffer(a, 1, 1)
536            || test_packing_gpu_image2buffer(a, 4, 4)
537            || test_packing_gpu_image2buffer(a, 8, 8)
538            || test_packing_gpu_image2buffer(a, 1, 4)
539            || test_packing_gpu_image2buffer(a, 4, 1)
540            || test_packing_gpu_image2buffer(a, 1, 8)
541            || test_packing_gpu_image2buffer(a, 8, 1)
542            || test_packing_gpu_image2buffer(a, 4, 8)
543            || test_packing_gpu_image2buffer(a, 8, 4)
544 #endif // NCNN_VULKAN
545            ;
546 }
547 
test_packing_1()548 static int test_packing_1()
549 {
550     ncnn::Mat a = RandomMat(19, 16);
551 
552     return 0
553            || test_packing_cpu(a, 1, 1)
554            || test_packing_cpu(a, 4, 4)
555            || test_packing_cpu(a, 4, 8)
556            || test_packing_cpu(a, 1, 4)
557            || test_packing_cpu(a, 4, 1)
558            || test_packing_cpu(a, 1, 8)
559            || test_packing_cpu(a, 8, 1)
560            || test_packing_cpu(a, 4, 8)
561            || test_packing_cpu(a, 8, 4)
562 #if NCNN_VULKAN
563            || test_packing_gpu_buffer(a, 1, 1)
564            || test_packing_gpu_buffer(a, 4, 4)
565            || test_packing_gpu_buffer(a, 8, 8)
566            || test_packing_gpu_buffer(a, 1, 4)
567            || test_packing_gpu_buffer(a, 4, 1)
568            || test_packing_gpu_buffer(a, 1, 8)
569            || test_packing_gpu_buffer(a, 8, 1)
570            || test_packing_gpu_buffer(a, 4, 8)
571            || test_packing_gpu_buffer(a, 8, 4)
572            || test_packing_gpu_image(a, 1, 1)
573            || test_packing_gpu_image(a, 4, 4)
574            || test_packing_gpu_image(a, 8, 8)
575            || test_packing_gpu_image(a, 1, 4)
576            || test_packing_gpu_image(a, 4, 1)
577            || test_packing_gpu_image(a, 1, 8)
578            || test_packing_gpu_image(a, 8, 1)
579            || test_packing_gpu_image(a, 4, 8)
580            || test_packing_gpu_image(a, 8, 4)
581            || test_packing_gpu_buffer2image(a, 1, 1)
582            || test_packing_gpu_buffer2image(a, 4, 4)
583            || test_packing_gpu_buffer2image(a, 8, 8)
584            || test_packing_gpu_buffer2image(a, 1, 4)
585            || test_packing_gpu_buffer2image(a, 4, 1)
586            || test_packing_gpu_buffer2image(a, 1, 8)
587            || test_packing_gpu_buffer2image(a, 8, 1)
588            || test_packing_gpu_buffer2image(a, 4, 8)
589            || test_packing_gpu_buffer2image(a, 8, 4)
590            || test_packing_gpu_image2buffer(a, 1, 1)
591            || test_packing_gpu_image2buffer(a, 4, 4)
592            || test_packing_gpu_image2buffer(a, 8, 8)
593            || test_packing_gpu_image2buffer(a, 1, 4)
594            || test_packing_gpu_image2buffer(a, 4, 1)
595            || test_packing_gpu_image2buffer(a, 1, 8)
596            || test_packing_gpu_image2buffer(a, 8, 1)
597            || test_packing_gpu_image2buffer(a, 4, 8)
598            || test_packing_gpu_image2buffer(a, 8, 4)
599 #endif // NCNN_VULKAN
600            ;
601 }
602 
test_packing_2()603 static int test_packing_2()
604 {
605     ncnn::Mat a = RandomMat(80);
606 
607     return 0
608            || test_packing_cpu(a, 1, 1)
609            || test_packing_cpu(a, 4, 4)
610            || test_packing_cpu(a, 4, 8)
611            || test_packing_cpu(a, 1, 4)
612            || test_packing_cpu(a, 4, 1)
613            || test_packing_cpu(a, 1, 8)
614            || test_packing_cpu(a, 8, 1)
615            || test_packing_cpu(a, 4, 8)
616            || test_packing_cpu(a, 8, 4)
617 #if NCNN_VULKAN
618            || test_packing_gpu_buffer(a, 1, 1)
619            || test_packing_gpu_buffer(a, 4, 4)
620            || test_packing_gpu_buffer(a, 8, 8)
621            || test_packing_gpu_buffer(a, 1, 4)
622            || test_packing_gpu_buffer(a, 4, 1)
623            || test_packing_gpu_buffer(a, 1, 8)
624            || test_packing_gpu_buffer(a, 8, 1)
625            || test_packing_gpu_buffer(a, 4, 8)
626            || test_packing_gpu_buffer(a, 8, 4)
627            || test_packing_gpu_image(a, 1, 1)
628            || test_packing_gpu_image(a, 4, 4)
629            || test_packing_gpu_image(a, 8, 8)
630            || test_packing_gpu_image(a, 1, 4)
631            || test_packing_gpu_image(a, 4, 1)
632            || test_packing_gpu_image(a, 1, 8)
633            || test_packing_gpu_image(a, 8, 1)
634            || test_packing_gpu_image(a, 4, 8)
635            || test_packing_gpu_image(a, 8, 4)
636            || test_packing_gpu_buffer2image(a, 1, 1)
637            || test_packing_gpu_buffer2image(a, 4, 4)
638            || test_packing_gpu_buffer2image(a, 8, 8)
639            || test_packing_gpu_buffer2image(a, 1, 4)
640            || test_packing_gpu_buffer2image(a, 4, 1)
641            || test_packing_gpu_buffer2image(a, 1, 8)
642            || test_packing_gpu_buffer2image(a, 8, 1)
643            || test_packing_gpu_buffer2image(a, 4, 8)
644            || test_packing_gpu_buffer2image(a, 8, 4)
645            || test_packing_gpu_image2buffer(a, 1, 1)
646            || test_packing_gpu_image2buffer(a, 4, 4)
647            || test_packing_gpu_image2buffer(a, 8, 8)
648            || test_packing_gpu_image2buffer(a, 1, 4)
649            || test_packing_gpu_image2buffer(a, 4, 1)
650            || test_packing_gpu_image2buffer(a, 1, 8)
651            || test_packing_gpu_image2buffer(a, 8, 1)
652            || test_packing_gpu_image2buffer(a, 4, 8)
653            || test_packing_gpu_image2buffer(a, 8, 4)
654 #endif // NCNN_VULKAN
655            ;
656 }
657 
main()658 int main()
659 {
660     SRAND(7767517);
661 
662     return 0
663            || test_packing_0()
664            || test_packing_1()
665            || test_packing_2();
666 }
667