Home
last modified time | relevance | path

Searched refs:kernel_tm (Results 1 – 25 of 142) sorted by relevance

123456

/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/
H A Dconvolution_sgemm_int8.h23 kernel_tm.create(m * k, (size_t)1u); in conv_im2col_sgemm_transform_kernel_int8_neon()
25 int8_t* sa = kernel_tm; in conv_im2col_sgemm_transform_kernel_int8_neon()
82 const int8_t* pa = kernel_tm; in conv_im2col_sgemm_int8_neon()
111 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_transform_kernel_int8_neon()
152 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_transform_kernel_int8_neon()
327 const signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_neon()
532 const signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_neon()
805 signed char* ktmp = kernel_tm.channel(p / 8); in conv_im2col_sgemm_transform_kernel_int8_neon()
845 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_transform_kernel_int8_neon()
872 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_transform_kernel_int8_neon()
[all …]
H A Dconvolution_sgemm.h24 kernel_tm.create(4 * kernel_size, inch, outch / 4 + outch % 4); in conv_im2col_sgemm_transform_kernel_neon()
47 float* ktmp = kernel_tm.channel(p / 8); in conv_im2col_sgemm_transform_kernel_neon()
85 float* ktmp = kernel_tm.channel(p / 8 + (p % 8) / 4); in conv_im2col_sgemm_transform_kernel_neon()
87 float* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_transform_kernel_neon()
112 float* ktmp = kernel_tm.channel(p / 8 + (p % 8) / 4 + p % 4); in conv_im2col_sgemm_transform_kernel_neon()
114 float* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_transform_kernel_neon()
280 const float* va = kernel_tm.channel(i / 8); in conv_im2col_sgemm_neon()
600 const float* va = kernel_tm.channel(i / 8); in conv_im2col_sgemm_neon()
773 const float* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_neon()
1100 const float* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_neon()
[all …]
H A Dconvolution_3x3_pack8to4_fp16s.h18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() local
19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
75 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
76 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
77 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
78 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
79 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
80 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
81 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
82 const Mat k7 = kernel_tm.channel(p + 7); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack8to1_fp16s.h18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() local
19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
75 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
76 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
77 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
78 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
79 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
80 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
81 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
82 const Mat k7 = kernel_tm.channel(p + 7); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack4.h18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack4_neon() local
19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
80 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
81 const Mat k1 = kernel_tm.channel(q + 1); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
82 const Mat k2 = kernel_tm.channel(q + 2); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
83 const Mat k3 = kernel_tm.channel(q + 3); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
84 const Mat k4 = kernel_tm.channel(q + 4); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
85 const Mat k5 = kernel_tm.channel(q + 5); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
86 const Mat k6 = kernel_tm.channel(q + 6); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
184 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
[all …]
H A Dconvolution_3x3_pack4_fp16s.h18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack4_fp16sa_neon() local
19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack4_fp16sa_neon()
75 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_fp16sa_neon()
76 const Mat k1 = kernel_tm.channel(q + 1); in conv3x3s1_winograd64_transform_kernel_pack4_fp16sa_neon()
77 const Mat k2 = kernel_tm.channel(q + 2); in conv3x3s1_winograd64_transform_kernel_pack4_fp16sa_neon()
78 const Mat k3 = kernel_tm.channel(q + 3); in conv3x3s1_winograd64_transform_kernel_pack4_fp16sa_neon()
79 const Mat k4 = kernel_tm.channel(q + 4); in conv3x3s1_winograd64_transform_kernel_pack4_fp16sa_neon()
80 const Mat k5 = kernel_tm.channel(q + 5); in conv3x3s1_winograd64_transform_kernel_pack4_fp16sa_neon()
81 const Mat k6 = kernel_tm.channel(q + 6); in conv3x3s1_winograd64_transform_kernel_pack4_fp16sa_neon()
82 const Mat k7 = kernel_tm.channel(q + 7); in conv3x3s1_winograd64_transform_kernel_pack4_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack8_fp16s.h18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8_fp16sa_neon() local
19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8_fp16sa_neon()
75 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack8_fp16sa_neon()
76 const Mat k1 = kernel_tm.channel(q + 1); in conv3x3s1_winograd64_transform_kernel_pack8_fp16sa_neon()
77 const Mat k2 = kernel_tm.channel(q + 2); in conv3x3s1_winograd64_transform_kernel_pack8_fp16sa_neon()
78 const Mat k3 = kernel_tm.channel(q + 3); in conv3x3s1_winograd64_transform_kernel_pack8_fp16sa_neon()
79 const Mat k4 = kernel_tm.channel(q + 4); in conv3x3s1_winograd64_transform_kernel_pack8_fp16sa_neon()
80 const Mat k5 = kernel_tm.channel(q + 5); in conv3x3s1_winograd64_transform_kernel_pack8_fp16sa_neon()
81 const Mat k6 = kernel_tm.channel(q + 6); in conv3x3s1_winograd64_transform_kernel_pack8_fp16sa_neon()
82 const Mat k7 = kernel_tm.channel(q + 7); in conv3x3s1_winograd64_transform_kernel_pack8_fp16sa_neon()
[all …]
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to4_fp16s.h18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() local
19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
75 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
76 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
77 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
78 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
79 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
80 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
81 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
82 const Mat k7 = kernel_tm.channel(p + 7); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack8to1_fp16s.h18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() local
19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
75 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
76 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
77 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
78 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
79 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
80 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
81 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
82 const Mat k7 = kernel_tm.channel(p + 7); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack4.h18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack4_neon() local
19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
80 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
81 const Mat k1 = kernel_tm.channel(q + 1); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
82 const Mat k2 = kernel_tm.channel(q + 2); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
83 const Mat k3 = kernel_tm.channel(q + 3); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
84 const Mat k4 = kernel_tm.channel(q + 4); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
85 const Mat k5 = kernel_tm.channel(q + 5); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
86 const Mat k6 = kernel_tm.channel(q + 6); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
184 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
[all …]
H A Dconvolution_3x3_pack4to1.h18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack4to1_neon() local
19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack4to1_neon()
80 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack4to1_neon()
81 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack4to1_neon()
82 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack4to1_neon()
83 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack4to1_neon()
84 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack4to1_neon()
85 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack4to1_neon()
86 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack4to1_neon()
180 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack4to1_neon()
[all …]
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to4_fp16s.h18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() local
19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
75 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
76 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
77 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
78 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
79 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
80 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
81 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
82 const Mat k7 = kernel_tm.channel(p + 7); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack8to1_fp16s.h18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() local
19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
75 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
76 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
77 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
78 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
79 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
80 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
81 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
82 const Mat k7 = kernel_tm.channel(p + 7); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack4.h18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack4_neon() local
19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
80 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
81 const Mat k1 = kernel_tm.channel(q + 1); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
82 const Mat k2 = kernel_tm.channel(q + 2); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
83 const Mat k3 = kernel_tm.channel(q + 3); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
84 const Mat k4 = kernel_tm.channel(q + 4); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
85 const Mat k5 = kernel_tm.channel(q + 5); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
86 const Mat k6 = kernel_tm.channel(q + 6); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
184 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
[all …]
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to4_fp16s.h18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() local
19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
75 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
76 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
77 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
78 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
79 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
80 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
81 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
82 const Mat k7 = kernel_tm.channel(p + 7); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack8to1_fp16s.h18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() local
19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
75 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
76 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
77 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
78 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
79 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
80 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
81 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
82 const Mat k7 = kernel_tm.channel(p + 7); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack4.h18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack4_neon() local
19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
80 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
81 const Mat k1 = kernel_tm.channel(q + 1); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
82 const Mat k2 = kernel_tm.channel(q + 2); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
83 const Mat k3 = kernel_tm.channel(q + 3); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
84 const Mat k4 = kernel_tm.channel(q + 4); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
85 const Mat k5 = kernel_tm.channel(q + 5); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
86 const Mat k6 = kernel_tm.channel(q + 6); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
184 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/
H A Dconvolution_3x3_pack8to4_fp16s.h18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon() local
19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
75 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
76 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
77 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
78 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
79 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
80 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
81 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
82 const Mat k7 = kernel_tm.channel(p + 7); in conv3x3s1_winograd64_transform_kernel_pack8to4_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack8to1_fp16s.h18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon() local
19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
75 const Mat k0 = kernel_tm.channel(p); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
76 const Mat k1 = kernel_tm.channel(p + 1); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
77 const Mat k2 = kernel_tm.channel(p + 2); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
78 const Mat k3 = kernel_tm.channel(p + 3); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
79 const Mat k4 = kernel_tm.channel(p + 4); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
80 const Mat k5 = kernel_tm.channel(p + 5); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
81 const Mat k6 = kernel_tm.channel(p + 6); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
82 const Mat k7 = kernel_tm.channel(p + 7); in conv3x3s1_winograd64_transform_kernel_pack8to1_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack4.h18 Mat kernel_tm; in conv3x3s1_winograd64_transform_kernel_pack4_neon() local
19 kernel_tm.create(8 * 8, inch, outch); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
80 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
81 const Mat k1 = kernel_tm.channel(q + 1); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
82 const Mat k2 = kernel_tm.channel(q + 2); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
83 const Mat k3 = kernel_tm.channel(q + 3); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
84 const Mat k4 = kernel_tm.channel(q + 4); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
85 const Mat k5 = kernel_tm.channel(q + 5); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
86 const Mat k6 = kernel_tm.channel(q + 6); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
184 const Mat k0 = kernel_tm.channel(q); in conv3x3s1_winograd64_transform_kernel_pack4_neon()
[all …]
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/x86/
H A Dconvolution_sgemm_int8.h161 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_sse()
203 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_int8_sse()
250 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_sse()
315 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_sse()
597 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_dequant_sse()
639 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_int8_dequant_sse()
696 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_dequant_sse()
761 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_dequant_sse()
1046 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_requant_sse()
1149 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_requant_sse()
[all …]
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/x86/
H A Dconvolution_sgemm_int8.h161 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_sse()
203 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_int8_sse()
250 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_sse()
315 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_sse()
597 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_dequant_sse()
639 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_int8_dequant_sse()
696 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_dequant_sse()
761 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_dequant_sse()
1046 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_requant_sse()
1149 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_requant_sse()
[all …]
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/x86/
H A Dconvolution_sgemm_int8.h161 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_sse()
203 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_int8_sse()
250 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_sse()
315 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_sse()
597 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_dequant_sse()
639 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_int8_dequant_sse()
696 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_dequant_sse()
761 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_dequant_sse()
1046 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_requant_sse()
1149 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_requant_sse()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/x86/
H A Dconvolution_sgemm_int8.h161 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_sse()
203 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_int8_sse()
250 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_sse()
315 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_sse()
597 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_dequant_sse()
639 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_int8_dequant_sse()
696 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_dequant_sse()
761 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_dequant_sse()
1046 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_requant_sse()
1149 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_requant_sse()
[all …]
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/
H A Dconvolution_sgemm_int8.h169 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_sse()
211 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_int8_sse()
258 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_sse()
323 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_sse()
605 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_dequant_sse()
647 signed char* ktmp = kernel_tm.channel(p / 4 + p % 4); in conv_im2col_sgemm_int8_dequant_sse()
704 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_dequant_sse()
769 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_dequant_sse()
1054 signed char* ktmp = kernel_tm.channel(p / 4); in conv_im2col_sgemm_int8_requant_sse()
1157 signed char* va = kernel_tm.channel(i / 4); in conv_im2col_sgemm_int8_requant_sse()
[all …]

123456