/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/ |
H A D | convolution_sgemm_int8.h | 23 kernel_tm.create(m * k, (size_t)1u); in conv_im2col_sgemm_transform_kernel_int8_neon() 25 int8_t* sa = kernel_tm; in conv_im2col_sgemm_transform_kernel_int8_neon() 82 const int8_t* pa = kernel_tm; in conv_im2col_sgemm_int8_neon() 111 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_transform_kernel_int8_neon() 152 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_transform_kernel_int8_neon() 327 const signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_neon() 532 const signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_neon() 805 signed char* ktmp = kernel_tm.channel(p / 8); in conv_im2col_sgemm_transform_kernel_int8_neon() 845 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_transform_kernel_int8_neon() 872 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_transform_kernel_int8_neon() [all …]
|
H A D | convolution_sgemm.h | 24 kernel_tm.create(4 * kernel_size, inch, outch / 4 + outch % 4); in conv_im2col_sgemm_transform_kernel_neon() 47 float* ktmp = kernel_tm.channel(p / 8); in conv_im2col_sgemm_transform_kernel_neon() 85 float* ktmp = kernel_tm.channel(p / 8 + (p % 8) / 4); in conv_im2col_sgemm_transform_kernel_neon() 87 float* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_transform_kernel_neon() 112 float* ktmp = kernel_tm.channel(p / 8 + (p % 8) / 4 + p % 4); in conv_im2col_sgemm_transform_kernel_neon() 114 float* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_transform_kernel_neon() 280 const float* va = kernel_tm.channel(i / 8); in conv_im2col_sgemm_neon() 600 const float* va = kernel_tm.channel(i / 8); in conv_im2col_sgemm_neon() 773 const float* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_neon() 1100 const float* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_neon() [all …]
|
H A D | convolution_3x3_pack8to4_fp16s.h | 18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() local 19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 75 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 76 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 77 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 78 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 79 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 80 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 81 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 82 const Mat k7 = kernel_tm.channel(p + 7); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() [all …]
|
H A D | convolution_3x3_pack8to1_fp16s.h | 18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() local 19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 75 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 76 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 77 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 78 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 79 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 80 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 81 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 82 const Mat k7 = kernel_tm.channel(p + 7); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() [all …]
|
H A D | convolution_3x3_pack4.h | 18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack4_neon() local 19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 80 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 81 const Mat k1 = kernel_tm.channel(q + 1); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 82 const Mat k2 = kernel_tm.channel(q + 2); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 83 const Mat k3 = kernel_tm.channel(q + 3); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 84 const Mat k4 = kernel_tm.channel(q + 4); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 85 const Mat k5 = kernel_tm.channel(q + 5); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 86 const Mat k6 = kernel_tm.channel(q + 6); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 184 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_neon() [all …]
|
H A D | convolution_3x3_pack4_fp16s.h | 18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack4_fp16sa_neon() local 19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack4_fp16sa_neon() 75 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_fp16sa_neon() 76 const Mat k1 = kernel_tm.channel(q + 1); in conv3x3s1_winograd64_transform_kernel_pack4_fp16sa_neon() 77 const Mat k2 = kernel_tm.channel(q + 2); in conv3x3s1_winograd64_transform_kernel_pack4_fp16sa_neon() 78 const Mat k3 = kernel_tm.channel(q + 3); in conv3x3s1_winograd64_transform_kernel_pack4_fp16sa_neon() 79 const Mat k4 = kernel_tm.channel(q + 4); in conv3x3s1_winograd64_transform_kernel_pack4_fp16sa_neon() 80 const Mat k5 = kernel_tm.channel(q + 5); in conv3x3s1_winograd64_transform_kernel_pack4_fp16sa_neon() 81 const Mat k6 = kernel_tm.channel(q + 6); in conv3x3s1_winograd64_transform_kernel_pack4_fp16sa_neon() 82 const Mat k7 = kernel_tm.channel(q + 7); in conv3x3s1_winograd64_transform_kernel_pack4_fp16sa_neon() [all …]
|
H A D | convolution_3x3_pack8_fp16s.h | 18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8_fp16sa_neon() local 19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8_fp16sa_neon() 75 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack8_fp16sa_neon() 76 const Mat k1 = kernel_tm.channel(q + 1); in conv3x3s1_winograd64_transform_kernel_pack8_fp16sa_neon() 77 const Mat k2 = kernel_tm.channel(q + 2); in conv3x3s1_winograd64_transform_kernel_pack8_fp16sa_neon() 78 const Mat k3 = kernel_tm.channel(q + 3); in conv3x3s1_winograd64_transform_kernel_pack8_fp16sa_neon() 79 const Mat k4 = kernel_tm.channel(q + 4); in conv3x3s1_winograd64_transform_kernel_pack8_fp16sa_neon() 80 const Mat k5 = kernel_tm.channel(q + 5); in conv3x3s1_winograd64_transform_kernel_pack8_fp16sa_neon() 81 const Mat k6 = kernel_tm.channel(q + 6); in conv3x3s1_winograd64_transform_kernel_pack8_fp16sa_neon() 82 const Mat k7 = kernel_tm.channel(q + 7); in conv3x3s1_winograd64_transform_kernel_pack8_fp16sa_neon() [all …]
|
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack8to4_fp16s.h | 18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() local 19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 75 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 76 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 77 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 78 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 79 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 80 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 81 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 82 const Mat k7 = kernel_tm.channel(p + 7); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() [all …]
|
H A D | convolution_3x3_pack8to1_fp16s.h | 18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() local 19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 75 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 76 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 77 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 78 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 79 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 80 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 81 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 82 const Mat k7 = kernel_tm.channel(p + 7); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() [all …]
|
H A D | convolution_3x3_pack4.h | 18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack4_neon() local 19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 80 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 81 const Mat k1 = kernel_tm.channel(q + 1); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 82 const Mat k2 = kernel_tm.channel(q + 2); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 83 const Mat k3 = kernel_tm.channel(q + 3); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 84 const Mat k4 = kernel_tm.channel(q + 4); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 85 const Mat k5 = kernel_tm.channel(q + 5); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 86 const Mat k6 = kernel_tm.channel(q + 6); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 184 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_neon() [all …]
|
H A D | convolution_3x3_pack4to1.h | 18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack4to1_neon() local 19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack4to1_neon() 80 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack4to1_neon() 81 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack4to1_neon() 82 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack4to1_neon() 83 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack4to1_neon() 84 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack4to1_neon() 85 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack4to1_neon() 86 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack4to1_neon() 180 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack4to1_neon() [all …]
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack8to4_fp16s.h | 18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() local 19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 75 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 76 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 77 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 78 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 79 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 80 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 81 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 82 const Mat k7 = kernel_tm.channel(p + 7); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() [all …]
|
H A D | convolution_3x3_pack8to1_fp16s.h | 18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() local 19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 75 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 76 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 77 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 78 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 79 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 80 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 81 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 82 const Mat k7 = kernel_tm.channel(p + 7); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() [all …]
|
H A D | convolution_3x3_pack4.h | 18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack4_neon() local 19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 80 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 81 const Mat k1 = kernel_tm.channel(q + 1); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 82 const Mat k2 = kernel_tm.channel(q + 2); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 83 const Mat k3 = kernel_tm.channel(q + 3); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 84 const Mat k4 = kernel_tm.channel(q + 4); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 85 const Mat k5 = kernel_tm.channel(q + 5); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 86 const Mat k6 = kernel_tm.channel(q + 6); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 184 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_neon() [all …]
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack8to4_fp16s.h | 18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() local 19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 75 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 76 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 77 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 78 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 79 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 80 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 81 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 82 const Mat k7 = kernel_tm.channel(p + 7); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() [all …]
|
H A D | convolution_3x3_pack8to1_fp16s.h | 18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() local 19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 75 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 76 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 77 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 78 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 79 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 80 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 81 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 82 const Mat k7 = kernel_tm.channel(p + 7); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() [all …]
|
H A D | convolution_3x3_pack4.h | 18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack4_neon() local 19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 80 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 81 const Mat k1 = kernel_tm.channel(q + 1); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 82 const Mat k2 = kernel_tm.channel(q + 2); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 83 const Mat k3 = kernel_tm.channel(q + 3); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 84 const Mat k4 = kernel_tm.channel(q + 4); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 85 const Mat k5 = kernel_tm.channel(q + 5); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 86 const Mat k6 = kernel_tm.channel(q + 6); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 184 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_neon() [all …]
|
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/ |
H A D | convolution_3x3_pack8to4_fp16s.h | 18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() local 19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 75 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 76 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 77 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 78 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 79 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 80 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 81 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() 82 const Mat k7 = kernel_tm.channel(p + 7); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() [all …]
|
H A D | convolution_3x3_pack8to1_fp16s.h | 18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() local 19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 75 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 76 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 77 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 78 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 79 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 80 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 81 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() 82 const Mat k7 = kernel_tm.channel(p + 7); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() [all …]
|
H A D | convolution_3x3_pack4.h | 18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack4_neon() local 19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 80 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 81 const Mat k1 = kernel_tm.channel(q + 1); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 82 const Mat k2 = kernel_tm.channel(q + 2); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 83 const Mat k3 = kernel_tm.channel(q + 3); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 84 const Mat k4 = kernel_tm.channel(q + 4); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 85 const Mat k5 = kernel_tm.channel(q + 5); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 86 const Mat k6 = kernel_tm.channel(q + 6); in conv3x3s1_winograd64_transform_kernel_pack4_neon() 184 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_neon() [all …]
|
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/x86/ |
H A D | convolution_sgemm_int8.h | 161 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_sse() 203 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_int8_sse() 250 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_sse() 315 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_sse() 597 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_dequant_sse() 639 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_int8_dequant_sse() 696 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_dequant_sse() 761 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_dequant_sse() 1046 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_requant_sse() 1149 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_requant_sse() [all …]
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/x86/ |
H A D | convolution_sgemm_int8.h | 161 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_sse() 203 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_int8_sse() 250 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_sse() 315 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_sse() 597 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_dequant_sse() 639 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_int8_dequant_sse() 696 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_dequant_sse() 761 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_dequant_sse() 1046 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_requant_sse() 1149 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_requant_sse() [all …]
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/x86/ |
H A D | convolution_sgemm_int8.h | 161 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_sse() 203 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_int8_sse() 250 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_sse() 315 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_sse() 597 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_dequant_sse() 639 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_int8_dequant_sse() 696 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_dequant_sse() 761 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_dequant_sse() 1046 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_requant_sse() 1149 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_requant_sse() [all …]
|
/dports/misc/ncnn/ncnn-20211208/src/layer/x86/ |
H A D | convolution_sgemm_int8.h | 161 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_sse() 203 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_int8_sse() 250 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_sse() 315 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_sse() 597 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_dequant_sse() 639 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_int8_dequant_sse() 696 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_dequant_sse() 761 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_dequant_sse() 1046 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_requant_sse() 1149 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_requant_sse() [all …]
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/ |
H A D | convolution_sgemm_int8.h | 169 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_sse() 211 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_int8_sse() 258 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_sse() 323 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_sse() 605 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_dequant_sse() 647 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_int8_dequant_sse() 704 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_dequant_sse() 769 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_dequant_sse() 1054 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_requant_sse() 1157 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_requant_sse() [all …]
|