Home
last modified time | relevance | path

Searched refs:nn_size (Results 1 – 25 of 144) sorted by relevance

123456

/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/
H A Dconvolution_1x1_pack8_fp16s.h91 int nn_size; in conv1x1s1_sgemm_pack8_fp16sa_neon() local
94 nn_size = size / 12; in conv1x1s1_sgemm_pack8_fp16sa_neon()
95 remain_size_start = nn_size * 12; in conv1x1s1_sgemm_pack8_fp16sa_neon()
98 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack8_fp16sa_neon()
143 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack8_fp16sa_neon()
146 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack8_fp16sa_neon()
185 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack8_fp16sa_neon()
189 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack8_fp16sa_neon()
214 remain_size_start += nn_size << 2; in conv1x1s1_sgemm_pack8_fp16sa_neon()
218 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack8_fp16sa_neon()
[all …]
H A Dconvolution_1x1_pack4.h160 int nn_size; in conv1x1s1_sgemm_pack4_neon() local
164 nn_size = size / 12; in conv1x1s1_sgemm_pack4_neon()
165 remain_size_start = nn_size * 12; in conv1x1s1_sgemm_pack4_neon()
168 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4_neon()
210 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack4_neon()
213 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4_neon()
276 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack4_neon()
280 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4_neon()
320 remain_size_start += nn_size << 2; in conv1x1s1_sgemm_pack4_neon()
324 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4_neon()
[all …]
H A Dconvolution_1x1_pack4_bf16s.h177 int nn_size; in conv1x1s1_sgemm_pack4_bf16s_neon() local
181 nn_size = size / 12; in conv1x1s1_sgemm_pack4_bf16s_neon()
182 remain_size_start = nn_size * 12; in conv1x1s1_sgemm_pack4_bf16s_neon()
185 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4_bf16s_neon()
220 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack4_bf16s_neon()
223 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4_bf16s_neon()
273 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack4_bf16s_neon()
277 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4_bf16s_neon()
317 remain_size_start += nn_size << 2; in conv1x1s1_sgemm_pack4_bf16s_neon()
321 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4_bf16s_neon()
[all …]
H A Dconvolution_1x1_pack4to1.h177 int nn_size = 0; in conv1x1s1_sgemm_pack4to1_neon() local
181 nn_size = size / 12; in conv1x1s1_sgemm_pack4to1_neon()
184 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon()
226 remain_size_start += nn_size * 12; in conv1x1s1_sgemm_pack4to1_neon()
227 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack4to1_neon()
229 nn_size = size >> 3; in conv1x1s1_sgemm_pack4to1_neon()
233 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon()
303 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack4to1_neon()
304 nn_size = (size - remain_size_start) >> 2; in conv1x1s1_sgemm_pack4to1_neon()
307 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon()
[all …]
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/
H A Dconvolution_sgemm_pack8_fp16s.h40 int nn_size = size / 12; in im2col_sgemm_pack8_fp16sa_neon() local
44 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon()
91 remain_size_start += nn_size * 12; in im2col_sgemm_pack8_fp16sa_neon()
92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack8_fp16sa_neon()
95 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon()
135 remain_size_start += nn_size << 3; in im2col_sgemm_pack8_fp16sa_neon()
136 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_pack8_fp16sa_neon()
139 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon()
165 remain_size_start += nn_size << 2; in im2col_sgemm_pack8_fp16sa_neon()
169 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon()
[all …]
H A Dconvolution_sgemm_pack4.h52 int nn_size = size / 12; in im2col_sgemm_pack4_neon() local
56 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon()
98 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_neon()
99 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_neon()
101 int nn_size = size >> 3; in im2col_sgemm_pack4_neon()
106 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon()
171 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_neon()
175 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon()
217 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_neon()
221 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon()
[all …]
H A Dconvolution_sgemm_pack8to4_int8.h56 int nn_size = size >> 4; in im2col_sgemm_pack8to4_int8_neon() local
60 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon()
98 remain_size_start += nn_size << 4; in im2col_sgemm_pack8to4_int8_neon()
99 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack8to4_int8_neon()
102 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon()
133 remain_size_start += nn_size << 3; in im2col_sgemm_pack8to4_int8_neon()
134 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_pack8to4_int8_neon()
141 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon()
183 remain_size_start += nn_size << 2; in im2col_sgemm_pack8to4_int8_neon()
191 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon()
[all …]
H A Dconvolution_sgemm_pack4_bf16s.h52 int nn_size = size / 12; in im2col_sgemm_pack4_bf16s_neon() local
56 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon()
91 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_bf16s_neon()
92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_bf16s_neon()
94 int nn_size = size >> 3; in im2col_sgemm_pack4_bf16s_neon()
99 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon()
151 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_bf16s_neon()
155 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon()
197 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_bf16s_neon()
201 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon()
[all …]
H A Dconvolution_1x1_pack4to1.h177 int nn_size = 0; in conv1x1s1_sgemm_pack4to1_neon() local
181 nn_size = size / 12; in conv1x1s1_sgemm_pack4to1_neon()
184 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon()
226 remain_size_start += nn_size * 12; in conv1x1s1_sgemm_pack4to1_neon()
227 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack4to1_neon()
229 nn_size = size >> 3; in conv1x1s1_sgemm_pack4to1_neon()
233 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon()
303 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack4to1_neon()
304 nn_size = (size - remain_size_start) >> 2; in conv1x1s1_sgemm_pack4to1_neon()
307 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon()
[all …]
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/
H A Dconvolution_sgemm_pack8_fp16s.h40 int nn_size = size / 12; in im2col_sgemm_pack8_fp16sa_neon() local
44 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon()
91 remain_size_start += nn_size * 12; in im2col_sgemm_pack8_fp16sa_neon()
92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack8_fp16sa_neon()
95 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon()
135 remain_size_start += nn_size << 3; in im2col_sgemm_pack8_fp16sa_neon()
136 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_pack8_fp16sa_neon()
139 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon()
165 remain_size_start += nn_size << 2; in im2col_sgemm_pack8_fp16sa_neon()
169 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon()
[all …]
H A Dconvolution_sgemm_pack4.h52 int nn_size = size / 12; in im2col_sgemm_pack4_neon() local
56 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon()
98 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_neon()
99 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_neon()
101 int nn_size = size >> 3; in im2col_sgemm_pack4_neon()
106 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon()
171 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_neon()
175 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon()
217 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_neon()
221 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon()
[all …]
H A Dconvolution_sgemm_pack8to4_int8.h56 int nn_size = size >> 4; in im2col_sgemm_pack8to4_int8_neon() local
60 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon()
98 remain_size_start += nn_size << 4; in im2col_sgemm_pack8to4_int8_neon()
99 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack8to4_int8_neon()
102 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon()
133 remain_size_start += nn_size << 3; in im2col_sgemm_pack8to4_int8_neon()
134 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_pack8to4_int8_neon()
141 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon()
183 remain_size_start += nn_size << 2; in im2col_sgemm_pack8to4_int8_neon()
191 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon()
[all …]
H A Dconvolution_sgemm_pack4_bf16s.h52 int nn_size = size / 12; in im2col_sgemm_pack4_bf16s_neon() local
56 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon()
91 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_bf16s_neon()
92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_bf16s_neon()
94 int nn_size = size >> 3; in im2col_sgemm_pack4_bf16s_neon()
99 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon()
151 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_bf16s_neon()
155 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon()
197 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_bf16s_neon()
201 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon()
[all …]
H A Dconvolution_1x1_pack4to1.h177 int nn_size = 0; in conv1x1s1_sgemm_pack4to1_neon() local
181 nn_size = size / 12; in conv1x1s1_sgemm_pack4to1_neon()
184 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon()
226 remain_size_start += nn_size * 12; in conv1x1s1_sgemm_pack4to1_neon()
227 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack4to1_neon()
229 nn_size = size >> 3; in conv1x1s1_sgemm_pack4to1_neon()
233 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon()
303 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack4to1_neon()
304 nn_size = (size - remain_size_start) >> 2; in conv1x1s1_sgemm_pack4to1_neon()
307 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon()
[all …]
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/
H A Dconvolution_sgemm_pack8_fp16s.h40 int nn_size = size / 12; in im2col_sgemm_pack8_fp16sa_neon() local
44 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon()
91 remain_size_start += nn_size * 12; in im2col_sgemm_pack8_fp16sa_neon()
92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack8_fp16sa_neon()
95 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon()
135 remain_size_start += nn_size << 3; in im2col_sgemm_pack8_fp16sa_neon()
136 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_pack8_fp16sa_neon()
139 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon()
165 remain_size_start += nn_size << 2; in im2col_sgemm_pack8_fp16sa_neon()
169 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon()
[all …]
H A Dconvolution_sgemm_pack4.h52 int nn_size = size / 12; in im2col_sgemm_pack4_neon() local
56 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon()
98 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_neon()
99 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_neon()
101 int nn_size = size >> 3; in im2col_sgemm_pack4_neon()
106 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon()
171 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_neon()
175 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon()
217 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_neon()
221 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon()
[all …]
H A Dconvolution_sgemm_pack8to4_int8.h56 int nn_size = size >> 4; in im2col_sgemm_pack8to4_int8_neon() local
60 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon()
98 remain_size_start += nn_size << 4; in im2col_sgemm_pack8to4_int8_neon()
99 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack8to4_int8_neon()
102 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon()
133 remain_size_start += nn_size << 3; in im2col_sgemm_pack8to4_int8_neon()
134 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_pack8to4_int8_neon()
141 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon()
183 remain_size_start += nn_size << 2; in im2col_sgemm_pack8to4_int8_neon()
191 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon()
[all …]
H A Dconvolution_sgemm_pack4_bf16s.h52 int nn_size = size / 12; in im2col_sgemm_pack4_bf16s_neon() local
56 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon()
91 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_bf16s_neon()
92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_bf16s_neon()
94 int nn_size = size >> 3; in im2col_sgemm_pack4_bf16s_neon()
99 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon()
151 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_bf16s_neon()
155 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon()
197 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_bf16s_neon()
201 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon()
[all …]
H A Dconvolution_1x1_pack4to1.h177 int nn_size = 0; in conv1x1s1_sgemm_pack4to1_neon() local
181 nn_size = size / 12; in conv1x1s1_sgemm_pack4to1_neon()
184 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon()
226 remain_size_start += nn_size * 12; in conv1x1s1_sgemm_pack4to1_neon()
227 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack4to1_neon()
229 nn_size = size >> 3; in conv1x1s1_sgemm_pack4to1_neon()
233 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon()
303 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack4to1_neon()
304 nn_size = (size - remain_size_start) >> 2; in conv1x1s1_sgemm_pack4to1_neon()
307 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/
H A Dconvolution_sgemm_pack8_fp16s.h40 int nn_size = size / 12; in im2col_sgemm_pack8_fp16sa_neon() local
44 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon()
91 remain_size_start += nn_size * 12; in im2col_sgemm_pack8_fp16sa_neon()
92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack8_fp16sa_neon()
95 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon()
135 remain_size_start += nn_size << 3; in im2col_sgemm_pack8_fp16sa_neon()
136 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_pack8_fp16sa_neon()
139 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon()
165 remain_size_start += nn_size << 2; in im2col_sgemm_pack8_fp16sa_neon()
169 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon()
[all …]
H A Dconvolution_sgemm_pack4.h52 int nn_size = size / 12; in im2col_sgemm_pack4_neon() local
56 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon()
98 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_neon()
99 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_neon()
101 int nn_size = size >> 3; in im2col_sgemm_pack4_neon()
106 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon()
171 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_neon()
175 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon()
217 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_neon()
221 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon()
[all …]
H A Dconvolution_sgemm_pack8to4_int8.h56 int nn_size = size >> 4; in im2col_sgemm_pack8to4_int8_neon() local
60 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon()
98 remain_size_start += nn_size << 4; in im2col_sgemm_pack8to4_int8_neon()
99 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack8to4_int8_neon()
102 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon()
133 remain_size_start += nn_size << 3; in im2col_sgemm_pack8to4_int8_neon()
134 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_pack8to4_int8_neon()
141 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon()
183 remain_size_start += nn_size << 2; in im2col_sgemm_pack8to4_int8_neon()
191 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon()
[all …]
H A Dconvolution_sgemm_pack4_bf16s.h52 int nn_size = size / 12; in im2col_sgemm_pack4_bf16s_neon() local
56 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon()
91 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_bf16s_neon()
92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_bf16s_neon()
94 int nn_size = size >> 3; in im2col_sgemm_pack4_bf16s_neon()
99 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon()
151 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_bf16s_neon()
155 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon()
197 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_bf16s_neon()
201 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon()
[all …]
H A Dconvolution_1x1_pack4to1.h177 int nn_size = 0; in conv1x1s1_sgemm_pack4to1_neon() local
181 nn_size = size / 12; in conv1x1s1_sgemm_pack4to1_neon()
184 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon()
226 remain_size_start += nn_size * 12; in conv1x1s1_sgemm_pack4to1_neon()
227 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack4to1_neon()
229 nn_size = size >> 3; in conv1x1s1_sgemm_pack4to1_neon()
233 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon()
303 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack4to1_neon()
304 nn_size = (size - remain_size_start) >> 2; in conv1x1s1_sgemm_pack4to1_neon()
307 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/riscv/
H A Dconvolution_sgemm_packn.h42 int nn_size = size >> 3; in im2col_sgemm_packn_rvv() local
45 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_packn_rvv()
90 remain_size_start += nn_size << 3; in im2col_sgemm_packn_rvv()
91 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_packn_rvv()
94 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_packn_rvv()
131 remain_size_start += nn_size << 2; in im2col_sgemm_packn_rvv()
132 nn_size = (size - remain_size_start) >> 1; in im2col_sgemm_packn_rvv()
135 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_packn_rvv()
168 remain_size_start += nn_size << 1; in im2col_sgemm_packn_rvv()

123456