1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14
15 #include "layer/cast.h"
16 #include "testutil.h"
17
test_cast_cpu(const ncnn::Mat & a,int type_from,int type_to)18 static int test_cast_cpu(const ncnn::Mat& a, int type_from, int type_to)
19 {
20 ncnn::ParamDict pd;
21 pd.set(0, type_from);
22 pd.set(1, type_to);
23
24 std::vector<ncnn::Mat> weights(0);
25
26 ncnn::Option opt;
27 opt.num_threads = 1;
28 opt.use_vulkan_compute = false;
29 opt.use_int8_inference = false;
30 opt.use_packing_layout = false;
31
32 ncnn::Layer* op = ncnn::create_layer("Cast");
33
34 op->load_param(pd);
35
36 ncnn::ModelBinFromMatArray mb(weights.data());
37
38 op->load_model(mb);
39
40 op->create_pipeline(opt);
41
42 ncnn::Mat a_fp16;
43 if (type_from == 2)
44 {
45 ncnn::cast_float32_to_float16(a, a_fp16, opt);
46 }
47 else if (type_from == 4)
48 {
49 ncnn::cast_float32_to_bfloat16(a, a_fp16, opt);
50 }
51 else
52 {
53 a_fp16 = a;
54 }
55
56 ncnn::Mat b;
57 ((ncnn::Cast*)op)->ncnn::Cast::forward(a_fp16, b, opt);
58
59 ncnn::Mat c;
60 op->forward(a_fp16, c, opt);
61
62 op->destroy_pipeline(opt);
63
64 delete op;
65
66 if (CompareMat(b, c, 0.001) != 0)
67 {
68 fprintf(stderr, "test_cast_cpu failed a.dims=%d a=(%d %d %d %d) type_from=%d type_to=%d\n", a.dims, a.w, a.h, a.d, a.c, type_from, type_to);
69 return -1;
70 }
71
72 return 0;
73 }
74
test_cast_cpu_packed(const ncnn::Mat & a,int type_from,int type_to)75 static int test_cast_cpu_packed(const ncnn::Mat& a, int type_from, int type_to)
76 {
77 ncnn::ParamDict pd;
78 pd.set(0, type_from);
79 pd.set(1, type_to);
80
81 std::vector<ncnn::Mat> weights(0);
82
83 ncnn::Option opt;
84 opt.num_threads = 1;
85 opt.use_vulkan_compute = false;
86 opt.use_packing_layout = false;
87
88 ncnn::Layer* op = ncnn::create_layer("Cast");
89
90 op->load_param(pd);
91
92 ncnn::ModelBinFromMatArray mb(weights.data());
93
94 op->load_model(mb);
95
96 op->create_pipeline(opt);
97
98 ncnn::Mat a_fp16;
99 if (type_from == 2)
100 {
101 ncnn::cast_float32_to_float16(a, a_fp16, opt);
102 }
103 else if (type_from == 4)
104 {
105 ncnn::cast_float32_to_bfloat16(a, a_fp16, opt);
106 }
107 else
108 {
109 a_fp16 = a;
110 }
111
112 ncnn::Mat b;
113 ((ncnn::Cast*)op)->ncnn::Cast::forward(a_fp16, b, opt);
114
115 ncnn::Mat a4;
116 ncnn::convert_packing(a, a4, 4, opt);
117
118 ncnn::Mat a4_fp16;
119 if (type_from == 2)
120 {
121 ncnn::cast_float32_to_float16(a4, a4_fp16, opt);
122 }
123 else if (type_from == 4)
124 {
125 ncnn::cast_float32_to_bfloat16(a4, a4_fp16, opt);
126 }
127 else
128 {
129 a4_fp16 = a4;
130 }
131
132 ncnn::Mat c;
133 op->forward(a4_fp16, c, opt);
134
135 op->destroy_pipeline(opt);
136
137 delete op;
138
139 if (CompareMat(b, c, 0.001) != 0)
140 {
141 fprintf(stderr, "test_cast_cpu_packed failed a.dims=%d a=(%d %d %d %d) type_from=%d type_to=%d\n", a.dims, a.w, a.h, a.d, a.c, type_from, type_to);
142 return -1;
143 }
144
145 return 0;
146 }
147
148 #if NCNN_VULKAN
test_cast_gpu_fp16p(const ncnn::Mat & a,int type_from,int type_to)149 static int test_cast_gpu_fp16p(const ncnn::Mat& a, int type_from, int type_to)
150 {
151 if (type_to == 4 || type_from == 4)
152 return 0;
153 ncnn::ParamDict pd;
154 pd.set(0, type_from);
155 pd.set(1, type_to);
156
157 std::vector<ncnn::Mat> weights(0);
158
159 ncnn::Option opt;
160 opt.num_threads = 1;
161 opt.use_vulkan_compute = true;
162 opt.use_int8_inference = false;
163 opt.use_fp16_packed = true;
164 opt.use_fp16_storage = false;
165 opt.use_fp16_arithmetic = false;
166 opt.use_int8_storage = false;
167 opt.use_int8_arithmetic = false;
168 opt.use_packing_layout = true;
169 opt.use_image_storage = false;
170
171 ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
172
173 ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
174 ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
175
176 opt.blob_vkallocator = blob_vkallocator;
177 opt.workspace_vkallocator = blob_vkallocator;
178 opt.staging_vkallocator = staging_vkallocator;
179
180 if (!vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false;
181 if (!vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false;
182
183 ncnn::Layer* op = ncnn::create_layer("Cast");
184
185 op->vkdev = vkdev;
186
187 op->load_param(pd);
188
189 ncnn::ModelBinFromMatArray mb(weights.data());
190
191 op->load_model(mb);
192
193 op->create_pipeline(opt);
194
195 ncnn::Mat a_fp16;
196 if (type_from == 2)
197 {
198 ncnn::cast_float32_to_float16(a, a_fp16, opt);
199 }
200 else
201 {
202 a_fp16 = a;
203 }
204
205 ncnn::Mat b;
206 ((ncnn::Cast*)op)->ncnn::Cast::forward(a_fp16, b, opt);
207
208 ncnn::Mat d;
209
210 // pack
211 ncnn::Mat a4;
212 ncnn::convert_packing(a, a4, 4, opt);
213
214 ncnn::Mat a4_fp16;
215 if (type_from == 2 && a4.elempack == 4)
216 {
217 ncnn::cast_float32_to_float16(a4, a4_fp16, opt);
218 }
219 else
220 {
221 a4_fp16 = a4;
222 }
223
224 // forward
225 ncnn::VkCompute cmd(vkdev);
226
227 // upload
228 ncnn::VkMat a4_gpu;
229 cmd.record_clone(a4_fp16, a4_gpu, opt);
230
231 ncnn::VkMat d4_gpu;
232 if (op->support_inplace)
233 {
234 op->forward_inplace(a4_gpu, cmd, opt);
235
236 d4_gpu = a4_gpu;
237 }
238 else
239 {
240 op->forward(a4_gpu, d4_gpu, cmd, opt);
241 }
242
243 // download
244 cmd.record_clone(d4_gpu, d, opt);
245
246 cmd.submit_and_wait();
247
248 op->destroy_pipeline(opt);
249
250 delete op;
251
252 vkdev->reclaim_blob_allocator(blob_vkallocator);
253 vkdev->reclaim_staging_allocator(staging_vkallocator);
254
255 if (CompareMat(b, d, 0.001) != 0)
256 {
257 fprintf(stderr, "test_cast_gpu_fp16p failed a.dims=%d a=(%d %d %d %d) type_from=%d type_to=%d\n", a.dims, a.w, a.h, a.d, a.c, type_from, type_to);
258 return -1;
259 }
260
261 return 0;
262 }
263
test_cast_gpu_fp16p_pack8(const ncnn::Mat & a,int type_from,int type_to)264 static int test_cast_gpu_fp16p_pack8(const ncnn::Mat& a, int type_from, int type_to)
265 {
266 if (type_to == 4 || type_from == 4)
267 return 0;
268 ncnn::ParamDict pd;
269 pd.set(0, type_from);
270 pd.set(1, type_to);
271
272 std::vector<ncnn::Mat> weights(0);
273
274 ncnn::Option opt;
275 opt.num_threads = 1;
276 opt.use_vulkan_compute = true;
277 opt.use_int8_inference = false;
278 opt.use_fp16_packed = true;
279 opt.use_fp16_storage = false;
280 opt.use_fp16_arithmetic = false;
281 opt.use_int8_storage = false;
282 opt.use_int8_arithmetic = false;
283 opt.use_packing_layout = true;
284 opt.use_shader_pack8 = true;
285 opt.use_image_storage = false;
286
287 ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
288
289 ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
290 ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
291
292 opt.blob_vkallocator = blob_vkallocator;
293 opt.workspace_vkallocator = blob_vkallocator;
294 opt.staging_vkallocator = staging_vkallocator;
295
296 if (!vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false;
297 if (!vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false;
298
299 ncnn::Layer* op = ncnn::create_layer("Cast");
300
301 op->vkdev = vkdev;
302
303 op->load_param(pd);
304
305 ncnn::ModelBinFromMatArray mb(weights.data());
306
307 op->load_model(mb);
308
309 op->create_pipeline(opt);
310
311 ncnn::Mat a_fp16;
312 if (type_from == 2)
313 {
314 ncnn::cast_float32_to_float16(a, a_fp16, opt);
315 }
316 else
317 {
318 a_fp16 = a;
319 }
320
321 ncnn::Mat b;
322 ((ncnn::Cast*)op)->ncnn::Cast::forward(a_fp16, b, opt);
323
324 ncnn::Mat d;
325
326 // pack
327 ncnn::Mat a4;
328 ncnn::convert_packing(a, a4, 8, opt);
329 if (a4.elempack != 8)
330 ncnn::convert_packing(a, a4, 4, opt);
331
332 ncnn::Mat a4_fp16;
333 if (type_from == 2 && (a4.elempack == 4 || a4.elempack == 8))
334 {
335 ncnn::cast_float32_to_float16(a4, a4_fp16, opt);
336 }
337 else
338 {
339 a4_fp16 = a4;
340 }
341
342 // forward
343 ncnn::VkCompute cmd(vkdev);
344
345 // upload
346 ncnn::VkMat a4_gpu;
347 cmd.record_clone(a4_fp16, a4_gpu, opt);
348
349 ncnn::VkMat d4_gpu;
350 if (op->support_inplace)
351 {
352 op->forward_inplace(a4_gpu, cmd, opt);
353
354 d4_gpu = a4_gpu;
355 }
356 else
357 {
358 op->forward(a4_gpu, d4_gpu, cmd, opt);
359 }
360
361 // download
362 cmd.record_clone(d4_gpu, d, opt);
363
364 cmd.submit_and_wait();
365
366 op->destroy_pipeline(opt);
367
368 delete op;
369
370 vkdev->reclaim_blob_allocator(blob_vkallocator);
371 vkdev->reclaim_staging_allocator(staging_vkallocator);
372
373 if (CompareMat(b, d, 0.001) != 0)
374 {
375 fprintf(stderr, "test_cast_gpu_fp16p_pack8 failed a.dims=%d a=(%d %d %d %d) type_from=%d type_to=%d\n", a.dims, a.w, a.h, a.d, a.c, type_from, type_to);
376 return -1;
377 }
378
379 return 0;
380 }
381
test_cast_gpu_image_fp16p(const ncnn::Mat & a,int type_from,int type_to)382 static int test_cast_gpu_image_fp16p(const ncnn::Mat& a, int type_from, int type_to)
383 {
384 if (type_to == 4 || type_from == 4)
385 return 0;
386 ncnn::ParamDict pd;
387 pd.set(0, type_from);
388 pd.set(1, type_to);
389
390 std::vector<ncnn::Mat> weights(0);
391
392 ncnn::Option opt;
393 opt.num_threads = 1;
394 opt.use_vulkan_compute = true;
395 opt.use_int8_inference = false;
396 opt.use_fp16_packed = true;
397 opt.use_fp16_storage = false;
398 opt.use_fp16_arithmetic = false;
399 opt.use_int8_storage = false;
400 opt.use_int8_arithmetic = false;
401 opt.use_packing_layout = true;
402 opt.use_image_storage = true;
403
404 ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
405
406 ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
407 ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
408
409 opt.blob_vkallocator = blob_vkallocator;
410 opt.workspace_vkallocator = blob_vkallocator;
411 opt.staging_vkallocator = staging_vkallocator;
412
413 if (!vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false;
414 if (!vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false;
415
416 ncnn::Layer* op = ncnn::create_layer("Cast");
417
418 op->vkdev = vkdev;
419
420 op->load_param(pd);
421
422 ncnn::ModelBinFromMatArray mb(weights.data());
423
424 op->load_model(mb);
425
426 op->create_pipeline(opt);
427
428 ncnn::Mat a_fp16;
429 if (type_from == 2)
430 {
431 ncnn::cast_float32_to_float16(a, a_fp16, opt);
432 }
433 else
434 {
435 a_fp16 = a;
436 }
437
438 ncnn::Mat b;
439 ((ncnn::Cast*)op)->ncnn::Cast::forward(a_fp16, b, opt);
440
441 ncnn::Mat d;
442
443 // pack
444 ncnn::Mat a4;
445 ncnn::convert_packing(a, a4, 4, opt);
446
447 ncnn::Mat a4_fp16;
448 if (type_from == 2 && a4.elempack == 4)
449 {
450 ncnn::cast_float32_to_float16(a4, a4_fp16, opt);
451 }
452 else
453 {
454 a4_fp16 = a4;
455 }
456
457 // forward
458 ncnn::VkCompute cmd(vkdev);
459
460 // upload
461 ncnn::VkImageMat a4_gpu;
462 cmd.record_clone(a4_fp16, a4_gpu, opt);
463
464 ncnn::VkImageMat d4_gpu;
465 if (op->support_inplace)
466 {
467 op->forward_inplace(a4_gpu, cmd, opt);
468
469 d4_gpu = a4_gpu;
470 }
471 else
472 {
473 op->forward(a4_gpu, d4_gpu, cmd, opt);
474 }
475
476 // download
477 cmd.record_clone(d4_gpu, d, opt);
478
479 cmd.submit_and_wait();
480
481 op->destroy_pipeline(opt);
482
483 delete op;
484
485 vkdev->reclaim_blob_allocator(blob_vkallocator);
486 vkdev->reclaim_staging_allocator(staging_vkallocator);
487
488 if (CompareMat(b, d, 0.001) != 0)
489 {
490 fprintf(stderr, "test_cast_gpu_image_fp16p failed a.dims=%d a=(%d %d %d %d) type_from=%d type_to=%d\n", a.dims, a.w, a.h, a.d, a.c, type_from, type_to);
491 return -1;
492 }
493
494 return 0;
495 }
496
test_cast_gpu_image_fp16p_pack8(const ncnn::Mat & a,int type_from,int type_to)497 static int test_cast_gpu_image_fp16p_pack8(const ncnn::Mat& a, int type_from, int type_to)
498 {
499 if (type_to == 4 || type_from == 4)
500 return 0;
501 ncnn::ParamDict pd;
502 pd.set(0, type_from);
503 pd.set(1, type_to);
504
505 std::vector<ncnn::Mat> weights(0);
506
507 ncnn::Option opt;
508 opt.num_threads = 1;
509 opt.use_vulkan_compute = true;
510 opt.use_int8_inference = false;
511 opt.use_fp16_packed = true;
512 opt.use_fp16_storage = false;
513 opt.use_fp16_arithmetic = false;
514 opt.use_int8_storage = false;
515 opt.use_int8_arithmetic = false;
516 opt.use_packing_layout = true;
517 opt.use_shader_pack8 = true;
518 opt.use_image_storage = true;
519
520 ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
521
522 ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
523 ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
524
525 opt.blob_vkallocator = blob_vkallocator;
526 opt.workspace_vkallocator = blob_vkallocator;
527 opt.staging_vkallocator = staging_vkallocator;
528
529 if (!vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false;
530 if (!vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false;
531
532 ncnn::Layer* op = ncnn::create_layer("Cast");
533
534 op->vkdev = vkdev;
535
536 op->load_param(pd);
537
538 ncnn::ModelBinFromMatArray mb(weights.data());
539
540 op->load_model(mb);
541
542 op->create_pipeline(opt);
543
544 ncnn::Mat a_fp16;
545 if (type_from == 2)
546 {
547 ncnn::cast_float32_to_float16(a, a_fp16, opt);
548 }
549 else
550 {
551 a_fp16 = a;
552 }
553
554 ncnn::Mat b;
555 ((ncnn::Cast*)op)->ncnn::Cast::forward(a_fp16, b, opt);
556
557 ncnn::Mat d;
558
559 // pack
560 ncnn::Mat a4;
561 ncnn::convert_packing(a, a4, 8, opt);
562 if (a4.elempack != 8)
563 ncnn::convert_packing(a, a4, 4, opt);
564
565 ncnn::Mat a4_fp16;
566 if (type_from == 2 && (a4.elempack == 4 || a4.elempack == 8))
567 {
568 ncnn::cast_float32_to_float16(a4, a4_fp16, opt);
569 }
570 else
571 {
572 a4_fp16 = a4;
573 }
574
575 // forward
576 ncnn::VkCompute cmd(vkdev);
577
578 // upload
579 ncnn::VkImageMat a4_gpu;
580 cmd.record_clone(a4_fp16, a4_gpu, opt);
581
582 ncnn::VkImageMat d4_gpu;
583 if (op->support_inplace)
584 {
585 op->forward_inplace(a4_gpu, cmd, opt);
586
587 d4_gpu = a4_gpu;
588 }
589 else
590 {
591 op->forward(a4_gpu, d4_gpu, cmd, opt);
592 }
593
594 // download
595 cmd.record_clone(d4_gpu, d, opt);
596
597 cmd.submit_and_wait();
598
599 op->destroy_pipeline(opt);
600
601 delete op;
602
603 vkdev->reclaim_blob_allocator(blob_vkallocator);
604 vkdev->reclaim_staging_allocator(staging_vkallocator);
605
606 if (CompareMat(b, d, 0.001) != 0)
607 {
608 fprintf(stderr, "test_cast_gpu_image_fp16p_pack8 failed a.dims=%d a=(%d %d %d %d) type_from=%d type_to=%d\n", a.dims, a.w, a.h, a.d, a.c, type_from, type_to);
609 return -1;
610 }
611
612 return 0;
613 }
614 #endif // NCNN_VULKAN
615
test_cast(const ncnn::Mat & a,int type_from,int type_to)616 static int test_cast(const ncnn::Mat& a, int type_from, int type_to)
617 {
618 return 0
619 || test_cast_cpu(a, type_from, type_to)
620 || test_cast_cpu_packed(a, type_from, type_to)
621 #if NCNN_VULKAN
622 || test_cast_gpu_fp16p(a, type_from, type_to)
623 || test_cast_gpu_fp16p_pack8(a, type_from, type_to)
624 || test_cast_gpu_image_fp16p(a, type_from, type_to)
625 || test_cast_gpu_image_fp16p_pack8(a, type_from, type_to)
626 #endif // NCNN_VULKAN
627 ;
628 }
629
test_cast_0()630 static int test_cast_0()
631 {
632 return 0
633 || test_cast(RandomMat(5, 6, 7, 16), 1, 2)
634 || test_cast(RandomMat(3, 4, 5, 13), 1, 2)
635 || test_cast(RandomMat(5, 6, 7, 16), 2, 1)
636 || test_cast(RandomMat(3, 4, 5, 13), 2, 1)
637 || test_cast(RandomMat(5, 6, 7, 16), 1, 4)
638 || test_cast(RandomMat(3, 4, 5, 13), 1, 4)
639 || test_cast(RandomMat(5, 6, 7, 16), 4, 1)
640 || test_cast(RandomMat(3, 4, 5, 13), 4, 1);
641 }
642
test_cast_1()643 static int test_cast_1()
644 {
645 return 0
646 || test_cast(RandomMat(5, 7, 16), 1, 2)
647 || test_cast(RandomMat(3, 5, 13), 1, 2)
648 || test_cast(RandomMat(5, 7, 16), 2, 1)
649 || test_cast(RandomMat(3, 5, 13), 2, 1)
650 || test_cast(RandomMat(5, 7, 16), 1, 4)
651 || test_cast(RandomMat(3, 5, 13), 1, 4)
652 || test_cast(RandomMat(5, 7, 16), 4, 1)
653 || test_cast(RandomMat(3, 5, 13), 4, 1);
654 }
655
test_cast_2()656 static int test_cast_2()
657 {
658 return 0
659 || test_cast(RandomMat(6, 16), 1, 2)
660 || test_cast(RandomMat(7, 15), 1, 2)
661 || test_cast(RandomMat(6, 16), 2, 1)
662 || test_cast(RandomMat(7, 15), 2, 1)
663 || test_cast(RandomMat(6, 16), 1, 4)
664 || test_cast(RandomMat(7, 15), 1, 4)
665 || test_cast(RandomMat(6, 16), 4, 1)
666 || test_cast(RandomMat(7, 15), 4, 1);
667 }
668
test_cast_3()669 static int test_cast_3()
670 {
671 return 0
672 || test_cast(RandomMat(128), 1, 2)
673 || test_cast(RandomMat(127), 1, 2)
674 || test_cast(RandomMat(128), 2, 1)
675 || test_cast(RandomMat(127), 2, 1)
676 || test_cast(RandomMat(128), 1, 4)
677 || test_cast(RandomMat(127), 1, 4)
678 || test_cast(RandomMat(128), 4, 1)
679 || test_cast(RandomMat(127), 4, 1);
680 }
681
main()682 int main()
683 {
684 SRAND(7767517);
685
686 return 0
687 || test_cast_0()
688 || test_cast_1()
689 || test_cast_2()
690 || test_cast_3();
691 }
692