/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/ |
H A D | convolution_1x1_pack8_fp16s.h | 91 int nn_size; in conv1x1s1_sgemm_pack8_fp16sa_neon() local 94 nn_size = size / 12; in conv1x1s1_sgemm_pack8_fp16sa_neon() 95 remain_size_start = nn_size * 12; in conv1x1s1_sgemm_pack8_fp16sa_neon() 98 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack8_fp16sa_neon() 143 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack8_fp16sa_neon() 146 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack8_fp16sa_neon() 185 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack8_fp16sa_neon() 189 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack8_fp16sa_neon() 214 remain_size_start += nn_size << 2; in conv1x1s1_sgemm_pack8_fp16sa_neon() 218 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack8_fp16sa_neon() [all …]
|
H A D | convolution_1x1_pack4.h | 160 int nn_size; in conv1x1s1_sgemm_pack4_neon() local 164 nn_size = size / 12; in conv1x1s1_sgemm_pack4_neon() 165 remain_size_start = nn_size * 12; in conv1x1s1_sgemm_pack4_neon() 168 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4_neon() 210 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack4_neon() 213 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4_neon() 276 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack4_neon() 280 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4_neon() 320 remain_size_start += nn_size << 2; in conv1x1s1_sgemm_pack4_neon() 324 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4_neon() [all …]
|
H A D | convolution_1x1_pack4_bf16s.h | 177 int nn_size; in conv1x1s1_sgemm_pack4_bf16s_neon() local 181 nn_size = size / 12; in conv1x1s1_sgemm_pack4_bf16s_neon() 182 remain_size_start = nn_size * 12; in conv1x1s1_sgemm_pack4_bf16s_neon() 185 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4_bf16s_neon() 220 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack4_bf16s_neon() 223 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4_bf16s_neon() 273 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack4_bf16s_neon() 277 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4_bf16s_neon() 317 remain_size_start += nn_size << 2; in conv1x1s1_sgemm_pack4_bf16s_neon() 321 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4_bf16s_neon() [all …]
|
H A D | convolution_1x1_pack4to1.h | 177 int nn_size = 0; in conv1x1s1_sgemm_pack4to1_neon() local 181 nn_size = size / 12; in conv1x1s1_sgemm_pack4to1_neon() 184 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon() 226 remain_size_start += nn_size * 12; in conv1x1s1_sgemm_pack4to1_neon() 227 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack4to1_neon() 229 nn_size = size >> 3; in conv1x1s1_sgemm_pack4to1_neon() 233 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon() 303 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack4to1_neon() 304 nn_size = (size - remain_size_start) >> 2; in conv1x1s1_sgemm_pack4to1_neon() 307 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon() [all …]
|
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/ |
H A D | convolution_sgemm_pack8_fp16s.h | 40 int nn_size = size / 12; in im2col_sgemm_pack8_fp16sa_neon() local 44 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon() 91 remain_size_start += nn_size * 12; in im2col_sgemm_pack8_fp16sa_neon() 92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack8_fp16sa_neon() 95 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon() 135 remain_size_start += nn_size << 3; in im2col_sgemm_pack8_fp16sa_neon() 136 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_pack8_fp16sa_neon() 139 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon() 165 remain_size_start += nn_size << 2; in im2col_sgemm_pack8_fp16sa_neon() 169 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon() [all …]
|
H A D | convolution_sgemm_pack4.h | 52 int nn_size = size / 12; in im2col_sgemm_pack4_neon() local 56 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon() 98 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_neon() 99 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_neon() 101 int nn_size = size >> 3; in im2col_sgemm_pack4_neon() 106 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon() 171 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_neon() 175 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon() 217 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_neon() 221 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon() [all …]
|
H A D | convolution_sgemm_pack8to4_int8.h | 56 int nn_size = size >> 4; in im2col_sgemm_pack8to4_int8_neon() local 60 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon() 98 remain_size_start += nn_size << 4; in im2col_sgemm_pack8to4_int8_neon() 99 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack8to4_int8_neon() 102 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon() 133 remain_size_start += nn_size << 3; in im2col_sgemm_pack8to4_int8_neon() 134 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_pack8to4_int8_neon() 141 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon() 183 remain_size_start += nn_size << 2; in im2col_sgemm_pack8to4_int8_neon() 191 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon() [all …]
|
H A D | convolution_sgemm_pack4_bf16s.h | 52 int nn_size = size / 12; in im2col_sgemm_pack4_bf16s_neon() local 56 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon() 91 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_bf16s_neon() 92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_bf16s_neon() 94 int nn_size = size >> 3; in im2col_sgemm_pack4_bf16s_neon() 99 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon() 151 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_bf16s_neon() 155 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon() 197 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_bf16s_neon() 201 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon() [all …]
|
H A D | convolution_1x1_pack4to1.h | 177 int nn_size = 0; in conv1x1s1_sgemm_pack4to1_neon() local 181 nn_size = size / 12; in conv1x1s1_sgemm_pack4to1_neon() 184 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon() 226 remain_size_start += nn_size * 12; in conv1x1s1_sgemm_pack4to1_neon() 227 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack4to1_neon() 229 nn_size = size >> 3; in conv1x1s1_sgemm_pack4to1_neon() 233 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon() 303 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack4to1_neon() 304 nn_size = (size - remain_size_start) >> 2; in conv1x1s1_sgemm_pack4to1_neon() 307 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon() [all …]
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/ |
H A D | convolution_sgemm_pack8_fp16s.h | 40 int nn_size = size / 12; in im2col_sgemm_pack8_fp16sa_neon() local 44 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon() 91 remain_size_start += nn_size * 12; in im2col_sgemm_pack8_fp16sa_neon() 92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack8_fp16sa_neon() 95 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon() 135 remain_size_start += nn_size << 3; in im2col_sgemm_pack8_fp16sa_neon() 136 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_pack8_fp16sa_neon() 139 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon() 165 remain_size_start += nn_size << 2; in im2col_sgemm_pack8_fp16sa_neon() 169 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon() [all …]
|
H A D | convolution_sgemm_pack4.h | 52 int nn_size = size / 12; in im2col_sgemm_pack4_neon() local 56 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon() 98 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_neon() 99 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_neon() 101 int nn_size = size >> 3; in im2col_sgemm_pack4_neon() 106 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon() 171 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_neon() 175 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon() 217 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_neon() 221 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon() [all …]
|
H A D | convolution_sgemm_pack8to4_int8.h | 56 int nn_size = size >> 4; in im2col_sgemm_pack8to4_int8_neon() local 60 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon() 98 remain_size_start += nn_size << 4; in im2col_sgemm_pack8to4_int8_neon() 99 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack8to4_int8_neon() 102 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon() 133 remain_size_start += nn_size << 3; in im2col_sgemm_pack8to4_int8_neon() 134 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_pack8to4_int8_neon() 141 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon() 183 remain_size_start += nn_size << 2; in im2col_sgemm_pack8to4_int8_neon() 191 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon() [all …]
|
H A D | convolution_sgemm_pack4_bf16s.h | 52 int nn_size = size / 12; in im2col_sgemm_pack4_bf16s_neon() local 56 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon() 91 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_bf16s_neon() 92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_bf16s_neon() 94 int nn_size = size >> 3; in im2col_sgemm_pack4_bf16s_neon() 99 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon() 151 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_bf16s_neon() 155 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon() 197 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_bf16s_neon() 201 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon() [all …]
|
H A D | convolution_1x1_pack4to1.h | 177 int nn_size = 0; in conv1x1s1_sgemm_pack4to1_neon() local 181 nn_size = size / 12; in conv1x1s1_sgemm_pack4to1_neon() 184 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon() 226 remain_size_start += nn_size * 12; in conv1x1s1_sgemm_pack4to1_neon() 227 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack4to1_neon() 229 nn_size = size >> 3; in conv1x1s1_sgemm_pack4to1_neon() 233 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon() 303 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack4to1_neon() 304 nn_size = (size - remain_size_start) >> 2; in conv1x1s1_sgemm_pack4to1_neon() 307 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon() [all …]
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/ |
H A D | convolution_sgemm_pack8_fp16s.h | 40 int nn_size = size / 12; in im2col_sgemm_pack8_fp16sa_neon() local 44 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon() 91 remain_size_start += nn_size * 12; in im2col_sgemm_pack8_fp16sa_neon() 92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack8_fp16sa_neon() 95 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon() 135 remain_size_start += nn_size << 3; in im2col_sgemm_pack8_fp16sa_neon() 136 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_pack8_fp16sa_neon() 139 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon() 165 remain_size_start += nn_size << 2; in im2col_sgemm_pack8_fp16sa_neon() 169 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon() [all …]
|
H A D | convolution_sgemm_pack4.h | 52 int nn_size = size / 12; in im2col_sgemm_pack4_neon() local 56 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon() 98 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_neon() 99 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_neon() 101 int nn_size = size >> 3; in im2col_sgemm_pack4_neon() 106 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon() 171 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_neon() 175 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon() 217 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_neon() 221 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon() [all …]
|
H A D | convolution_sgemm_pack8to4_int8.h | 56 int nn_size = size >> 4; in im2col_sgemm_pack8to4_int8_neon() local 60 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon() 98 remain_size_start += nn_size << 4; in im2col_sgemm_pack8to4_int8_neon() 99 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack8to4_int8_neon() 102 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon() 133 remain_size_start += nn_size << 3; in im2col_sgemm_pack8to4_int8_neon() 134 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_pack8to4_int8_neon() 141 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon() 183 remain_size_start += nn_size << 2; in im2col_sgemm_pack8to4_int8_neon() 191 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon() [all …]
|
H A D | convolution_sgemm_pack4_bf16s.h | 52 int nn_size = size / 12; in im2col_sgemm_pack4_bf16s_neon() local 56 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon() 91 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_bf16s_neon() 92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_bf16s_neon() 94 int nn_size = size >> 3; in im2col_sgemm_pack4_bf16s_neon() 99 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon() 151 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_bf16s_neon() 155 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon() 197 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_bf16s_neon() 201 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon() [all …]
|
H A D | convolution_1x1_pack4to1.h | 177 int nn_size = 0; in conv1x1s1_sgemm_pack4to1_neon() local 181 nn_size = size / 12; in conv1x1s1_sgemm_pack4to1_neon() 184 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon() 226 remain_size_start += nn_size * 12; in conv1x1s1_sgemm_pack4to1_neon() 227 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack4to1_neon() 229 nn_size = size >> 3; in conv1x1s1_sgemm_pack4to1_neon() 233 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon() 303 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack4to1_neon() 304 nn_size = (size - remain_size_start) >> 2; in conv1x1s1_sgemm_pack4to1_neon() 307 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon() [all …]
|
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/ |
H A D | convolution_sgemm_pack8_fp16s.h | 40 int nn_size = size / 12; in im2col_sgemm_pack8_fp16sa_neon() local 44 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon() 91 remain_size_start += nn_size * 12; in im2col_sgemm_pack8_fp16sa_neon() 92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack8_fp16sa_neon() 95 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon() 135 remain_size_start += nn_size << 3; in im2col_sgemm_pack8_fp16sa_neon() 136 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_pack8_fp16sa_neon() 139 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon() 165 remain_size_start += nn_size << 2; in im2col_sgemm_pack8_fp16sa_neon() 169 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8_fp16sa_neon() [all …]
|
H A D | convolution_sgemm_pack4.h | 52 int nn_size = size / 12; in im2col_sgemm_pack4_neon() local 56 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon() 98 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_neon() 99 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_neon() 101 int nn_size = size >> 3; in im2col_sgemm_pack4_neon() 106 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon() 171 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_neon() 175 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon() 217 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_neon() 221 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_neon() [all …]
|
H A D | convolution_sgemm_pack8to4_int8.h | 56 int nn_size = size >> 4; in im2col_sgemm_pack8to4_int8_neon() local 60 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon() 98 remain_size_start += nn_size << 4; in im2col_sgemm_pack8to4_int8_neon() 99 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack8to4_int8_neon() 102 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon() 133 remain_size_start += nn_size << 3; in im2col_sgemm_pack8to4_int8_neon() 134 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_pack8to4_int8_neon() 141 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon() 183 remain_size_start += nn_size << 2; in im2col_sgemm_pack8to4_int8_neon() 191 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack8to4_int8_neon() [all …]
|
H A D | convolution_sgemm_pack4_bf16s.h | 52 int nn_size = size / 12; in im2col_sgemm_pack4_bf16s_neon() local 56 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon() 91 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_bf16s_neon() 92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_bf16s_neon() 94 int nn_size = size >> 3; in im2col_sgemm_pack4_bf16s_neon() 99 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon() 151 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_bf16s_neon() 155 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon() 197 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_bf16s_neon() 201 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_pack4_bf16s_neon() [all …]
|
H A D | convolution_1x1_pack4to1.h | 177 int nn_size = 0; in conv1x1s1_sgemm_pack4to1_neon() local 181 nn_size = size / 12; in conv1x1s1_sgemm_pack4to1_neon() 184 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon() 226 remain_size_start += nn_size * 12; in conv1x1s1_sgemm_pack4to1_neon() 227 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack4to1_neon() 229 nn_size = size >> 3; in conv1x1s1_sgemm_pack4to1_neon() 233 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon() 303 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack4to1_neon() 304 nn_size = (size - remain_size_start) >> 2; in conv1x1s1_sgemm_pack4to1_neon() 307 for (int ii = 0; ii < nn_size; ii++) in conv1x1s1_sgemm_pack4to1_neon() [all …]
|
/dports/misc/ncnn/ncnn-20211208/src/layer/riscv/ |
H A D | convolution_sgemm_packn.h | 42 int nn_size = size >> 3; in im2col_sgemm_packn_rvv() local 45 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_packn_rvv() 90 remain_size_start += nn_size << 3; in im2col_sgemm_packn_rvv() 91 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_packn_rvv() 94 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_packn_rvv() 131 remain_size_start += nn_size << 2; in im2col_sgemm_packn_rvv() 132 nn_size = (size - remain_size_start) >> 1; in im2col_sgemm_packn_rvv() 135 for (int ii = 0; ii < nn_size; ii++) in im2col_sgemm_packn_rvv() 168 remain_size_start += nn_size << 1; in im2col_sgemm_packn_rvv()
|