/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/ |
H A D | convolution_sgemm_pack8_fp16s.h | 41 int remain_size_start = 0; in im2col_sgemm_pack8_fp16sa_neon() local 46 int i = remain_size_start + ii * 12; in im2col_sgemm_pack8_fp16sa_neon() 91 remain_size_start += nn_size * 12; in im2col_sgemm_pack8_fp16sa_neon() 92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack8_fp16sa_neon() 97 int i = remain_size_start + ii * 8; in im2col_sgemm_pack8_fp16sa_neon() 135 remain_size_start += nn_size << 3; in im2col_sgemm_pack8_fp16sa_neon() 136 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_pack8_fp16sa_neon() 141 int i = remain_size_start + ii * 4; in im2col_sgemm_pack8_fp16sa_neon() 165 remain_size_start += nn_size << 2; in im2col_sgemm_pack8_fp16sa_neon() 171 int i = remain_size_start + ii * 2; in im2col_sgemm_pack8_fp16sa_neon() [all …]
|
H A D | convolution_sgemm_pack8to4_int8.h | 57 int remain_size_start = 0; in im2col_sgemm_pack8to4_int8_neon() local 62 int i = remain_size_start + ii * 16; in im2col_sgemm_pack8to4_int8_neon() 98 remain_size_start += nn_size << 4; in im2col_sgemm_pack8to4_int8_neon() 104 int i = remain_size_start + ii * 8; in im2col_sgemm_pack8to4_int8_neon() 133 remain_size_start += nn_size << 3; in im2col_sgemm_pack8to4_int8_neon() 136 int remain_size_start = 0; in im2col_sgemm_pack8to4_int8_neon() 143 int i = remain_size_start + ii * 4; in im2col_sgemm_pack8to4_int8_neon() 183 remain_size_start += nn_size << 2; in im2col_sgemm_pack8to4_int8_neon() 186 int remain_size_start = 0; in im2col_sgemm_pack8to4_int8_neon() 193 int i = remain_size_start + ii * 2; in im2col_sgemm_pack8to4_int8_neon() [all …]
|
H A D | convolution_sgemm_pack4.h | 53 int remain_size_start = 0; in im2col_sgemm_pack4_neon() local 58 int i = remain_size_start + ii * 12; in im2col_sgemm_pack4_neon() 98 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_neon() 99 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_neon() 102 int remain_size_start = 0; in im2col_sgemm_pack4_neon() 108 int i = remain_size_start + ii * 8; in im2col_sgemm_pack4_neon() 171 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_neon() 177 int i = remain_size_start + ii * 4; in im2col_sgemm_pack4_neon() 217 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_neon() 223 int i = remain_size_start + ii * 2; in im2col_sgemm_pack4_neon() [all …]
|
H A D | convolution_sgemm_pack4_bf16s.h | 53 int remain_size_start = 0; in im2col_sgemm_pack4_bf16s_neon() local 58 int i = remain_size_start + ii * 12; in im2col_sgemm_pack4_bf16s_neon() 91 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_bf16s_neon() 92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_bf16s_neon() 95 int remain_size_start = 0; in im2col_sgemm_pack4_bf16s_neon() 101 int i = remain_size_start + ii * 8; in im2col_sgemm_pack4_bf16s_neon() 151 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_bf16s_neon() 157 int i = remain_size_start + ii * 4; in im2col_sgemm_pack4_bf16s_neon() 197 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_bf16s_neon() 203 int i = remain_size_start + ii * 2; in im2col_sgemm_pack4_bf16s_neon() [all …]
|
H A D | convolution_1x1_pack8to1_fp16s.h | 106 int remain_size_start = 0; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() local 108 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 113 int i = remain_size_start + ii * 8; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 150 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 151 nn_size = (size - remain_size_start) >> 2; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 156 int i = remain_size_start + ii * 4; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 180 remain_size_start += nn_size << 2; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 183 for (int i = remain_size_start; i < size; i++) in conv1x1s1_sgemm_pack8to1_fp16sa_neon()
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/ |
H A D | convolution_sgemm_pack8_fp16s.h | 41 int remain_size_start = 0; in im2col_sgemm_pack8_fp16sa_neon() local 46 int i = remain_size_start + ii * 12; in im2col_sgemm_pack8_fp16sa_neon() 91 remain_size_start += nn_size * 12; in im2col_sgemm_pack8_fp16sa_neon() 92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack8_fp16sa_neon() 97 int i = remain_size_start + ii * 8; in im2col_sgemm_pack8_fp16sa_neon() 135 remain_size_start += nn_size << 3; in im2col_sgemm_pack8_fp16sa_neon() 136 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_pack8_fp16sa_neon() 141 int i = remain_size_start + ii * 4; in im2col_sgemm_pack8_fp16sa_neon() 165 remain_size_start += nn_size << 2; in im2col_sgemm_pack8_fp16sa_neon() 171 int i = remain_size_start + ii * 2; in im2col_sgemm_pack8_fp16sa_neon() [all …]
|
H A D | convolution_sgemm_pack8to4_int8.h | 57 int remain_size_start = 0; in im2col_sgemm_pack8to4_int8_neon() local 62 int i = remain_size_start + ii * 16; in im2col_sgemm_pack8to4_int8_neon() 98 remain_size_start += nn_size << 4; in im2col_sgemm_pack8to4_int8_neon() 104 int i = remain_size_start + ii * 8; in im2col_sgemm_pack8to4_int8_neon() 133 remain_size_start += nn_size << 3; in im2col_sgemm_pack8to4_int8_neon() 136 int remain_size_start = 0; in im2col_sgemm_pack8to4_int8_neon() 143 int i = remain_size_start + ii * 4; in im2col_sgemm_pack8to4_int8_neon() 183 remain_size_start += nn_size << 2; in im2col_sgemm_pack8to4_int8_neon() 186 int remain_size_start = 0; in im2col_sgemm_pack8to4_int8_neon() 193 int i = remain_size_start + ii * 2; in im2col_sgemm_pack8to4_int8_neon() [all …]
|
H A D | convolution_sgemm_pack4.h | 53 int remain_size_start = 0; in im2col_sgemm_pack4_neon() local 58 int i = remain_size_start + ii * 12; in im2col_sgemm_pack4_neon() 98 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_neon() 99 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_neon() 102 int remain_size_start = 0; in im2col_sgemm_pack4_neon() 108 int i = remain_size_start + ii * 8; in im2col_sgemm_pack4_neon() 171 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_neon() 177 int i = remain_size_start + ii * 4; in im2col_sgemm_pack4_neon() 217 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_neon() 223 int i = remain_size_start + ii * 2; in im2col_sgemm_pack4_neon() [all …]
|
H A D | convolution_sgemm_pack4_bf16s.h | 53 int remain_size_start = 0; in im2col_sgemm_pack4_bf16s_neon() local 58 int i = remain_size_start + ii * 12; in im2col_sgemm_pack4_bf16s_neon() 91 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_bf16s_neon() 92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_bf16s_neon() 95 int remain_size_start = 0; in im2col_sgemm_pack4_bf16s_neon() 101 int i = remain_size_start + ii * 8; in im2col_sgemm_pack4_bf16s_neon() 151 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_bf16s_neon() 157 int i = remain_size_start + ii * 4; in im2col_sgemm_pack4_bf16s_neon() 197 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_bf16s_neon() 203 int i = remain_size_start + ii * 2; in im2col_sgemm_pack4_bf16s_neon() [all …]
|
H A D | convolution_1x1_pack8to1_fp16s.h | 106 int remain_size_start = 0; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() local 108 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 113 int i = remain_size_start + ii * 8; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 150 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 151 nn_size = (size - remain_size_start) >> 2; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 156 int i = remain_size_start + ii * 4; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 180 remain_size_start += nn_size << 2; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 183 for (int i = remain_size_start; i < size; i++) in conv1x1s1_sgemm_pack8to1_fp16sa_neon()
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/ |
H A D | convolution_sgemm_pack8_fp16s.h | 41 int remain_size_start = 0; in im2col_sgemm_pack8_fp16sa_neon() local 46 int i = remain_size_start + ii * 12; in im2col_sgemm_pack8_fp16sa_neon() 91 remain_size_start += nn_size * 12; in im2col_sgemm_pack8_fp16sa_neon() 92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack8_fp16sa_neon() 97 int i = remain_size_start + ii * 8; in im2col_sgemm_pack8_fp16sa_neon() 135 remain_size_start += nn_size << 3; in im2col_sgemm_pack8_fp16sa_neon() 136 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_pack8_fp16sa_neon() 141 int i = remain_size_start + ii * 4; in im2col_sgemm_pack8_fp16sa_neon() 165 remain_size_start += nn_size << 2; in im2col_sgemm_pack8_fp16sa_neon() 171 int i = remain_size_start + ii * 2; in im2col_sgemm_pack8_fp16sa_neon() [all …]
|
H A D | convolution_sgemm_pack8to4_int8.h | 57 int remain_size_start = 0; in im2col_sgemm_pack8to4_int8_neon() local 62 int i = remain_size_start + ii * 16; in im2col_sgemm_pack8to4_int8_neon() 98 remain_size_start += nn_size << 4; in im2col_sgemm_pack8to4_int8_neon() 104 int i = remain_size_start + ii * 8; in im2col_sgemm_pack8to4_int8_neon() 133 remain_size_start += nn_size << 3; in im2col_sgemm_pack8to4_int8_neon() 136 int remain_size_start = 0; in im2col_sgemm_pack8to4_int8_neon() 143 int i = remain_size_start + ii * 4; in im2col_sgemm_pack8to4_int8_neon() 183 remain_size_start += nn_size << 2; in im2col_sgemm_pack8to4_int8_neon() 186 int remain_size_start = 0; in im2col_sgemm_pack8to4_int8_neon() 193 int i = remain_size_start + ii * 2; in im2col_sgemm_pack8to4_int8_neon() [all …]
|
H A D | convolution_sgemm_pack4.h | 53 int remain_size_start = 0; in im2col_sgemm_pack4_neon() local 58 int i = remain_size_start + ii * 12; in im2col_sgemm_pack4_neon() 98 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_neon() 99 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_neon() 102 int remain_size_start = 0; in im2col_sgemm_pack4_neon() 108 int i = remain_size_start + ii * 8; in im2col_sgemm_pack4_neon() 171 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_neon() 177 int i = remain_size_start + ii * 4; in im2col_sgemm_pack4_neon() 217 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_neon() 223 int i = remain_size_start + ii * 2; in im2col_sgemm_pack4_neon() [all …]
|
H A D | convolution_sgemm_pack4_bf16s.h | 53 int remain_size_start = 0; in im2col_sgemm_pack4_bf16s_neon() local 58 int i = remain_size_start + ii * 12; in im2col_sgemm_pack4_bf16s_neon() 91 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_bf16s_neon() 92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_bf16s_neon() 95 int remain_size_start = 0; in im2col_sgemm_pack4_bf16s_neon() 101 int i = remain_size_start + ii * 8; in im2col_sgemm_pack4_bf16s_neon() 151 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_bf16s_neon() 157 int i = remain_size_start + ii * 4; in im2col_sgemm_pack4_bf16s_neon() 197 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_bf16s_neon() 203 int i = remain_size_start + ii * 2; in im2col_sgemm_pack4_bf16s_neon() [all …]
|
H A D | convolution_1x1_pack8to1_fp16s.h | 106 int remain_size_start = 0; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() local 108 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 113 int i = remain_size_start + ii * 8; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 150 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 151 nn_size = (size - remain_size_start) >> 2; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 156 int i = remain_size_start + ii * 4; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 180 remain_size_start += nn_size << 2; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 183 for (int i = remain_size_start; i < size; i++) in conv1x1s1_sgemm_pack8to1_fp16sa_neon()
|
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/ |
H A D | convolution_sgemm_pack8_fp16s.h | 41 int remain_size_start = 0; in im2col_sgemm_pack8_fp16sa_neon() local 46 int i = remain_size_start + ii * 12; in im2col_sgemm_pack8_fp16sa_neon() 91 remain_size_start += nn_size * 12; in im2col_sgemm_pack8_fp16sa_neon() 92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack8_fp16sa_neon() 97 int i = remain_size_start + ii * 8; in im2col_sgemm_pack8_fp16sa_neon() 135 remain_size_start += nn_size << 3; in im2col_sgemm_pack8_fp16sa_neon() 136 nn_size = (size - remain_size_start) >> 2; in im2col_sgemm_pack8_fp16sa_neon() 141 int i = remain_size_start + ii * 4; in im2col_sgemm_pack8_fp16sa_neon() 165 remain_size_start += nn_size << 2; in im2col_sgemm_pack8_fp16sa_neon() 171 int i = remain_size_start + ii * 2; in im2col_sgemm_pack8_fp16sa_neon() [all …]
|
H A D | convolution_sgemm_pack8to4_int8.h | 57 int remain_size_start = 0; in im2col_sgemm_pack8to4_int8_neon() local 62 int i = remain_size_start + ii * 16; in im2col_sgemm_pack8to4_int8_neon() 98 remain_size_start += nn_size << 4; in im2col_sgemm_pack8to4_int8_neon() 104 int i = remain_size_start + ii * 8; in im2col_sgemm_pack8to4_int8_neon() 133 remain_size_start += nn_size << 3; in im2col_sgemm_pack8to4_int8_neon() 136 int remain_size_start = 0; in im2col_sgemm_pack8to4_int8_neon() 143 int i = remain_size_start + ii * 4; in im2col_sgemm_pack8to4_int8_neon() 183 remain_size_start += nn_size << 2; in im2col_sgemm_pack8to4_int8_neon() 186 int remain_size_start = 0; in im2col_sgemm_pack8to4_int8_neon() 193 int i = remain_size_start + ii * 2; in im2col_sgemm_pack8to4_int8_neon() [all …]
|
H A D | convolution_sgemm_pack4.h | 53 int remain_size_start = 0; in im2col_sgemm_pack4_neon() local 58 int i = remain_size_start + ii * 12; in im2col_sgemm_pack4_neon() 98 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_neon() 99 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_neon() 102 int remain_size_start = 0; in im2col_sgemm_pack4_neon() 108 int i = remain_size_start + ii * 8; in im2col_sgemm_pack4_neon() 171 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_neon() 177 int i = remain_size_start + ii * 4; in im2col_sgemm_pack4_neon() 217 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_neon() 223 int i = remain_size_start + ii * 2; in im2col_sgemm_pack4_neon() [all …]
|
H A D | convolution_sgemm_pack4_bf16s.h | 53 int remain_size_start = 0; in im2col_sgemm_pack4_bf16s_neon() local 58 int i = remain_size_start + ii * 12; in im2col_sgemm_pack4_bf16s_neon() 91 remain_size_start += nn_size * 12; in im2col_sgemm_pack4_bf16s_neon() 92 nn_size = (size - remain_size_start) >> 3; in im2col_sgemm_pack4_bf16s_neon() 95 int remain_size_start = 0; in im2col_sgemm_pack4_bf16s_neon() 101 int i = remain_size_start + ii * 8; in im2col_sgemm_pack4_bf16s_neon() 151 remain_size_start += nn_size << 3; in im2col_sgemm_pack4_bf16s_neon() 157 int i = remain_size_start + ii * 4; in im2col_sgemm_pack4_bf16s_neon() 197 remain_size_start += nn_size << 2; in im2col_sgemm_pack4_bf16s_neon() 203 int i = remain_size_start + ii * 2; in im2col_sgemm_pack4_bf16s_neon() [all …]
|
H A D | convolution_1x1_pack8to1_fp16s.h | 106 int remain_size_start = 0; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() local 108 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 113 int i = remain_size_start + ii * 8; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 150 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 151 nn_size = (size - remain_size_start) >> 2; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 156 int i = remain_size_start + ii * 4; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 180 remain_size_start += nn_size << 2; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 183 for (int i = remain_size_start; i < size; i++) in conv1x1s1_sgemm_pack8to1_fp16sa_neon()
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/ |
H A D | convolution_1x1_pack8_fp16s.h | 92 int remain_size_start; in conv1x1s1_sgemm_pack8_fp16sa_neon() local 95 remain_size_start = nn_size * 12; in conv1x1s1_sgemm_pack8_fp16sa_neon() 143 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack8_fp16sa_neon() 148 int i = remain_size_start + ii * 8; in conv1x1s1_sgemm_pack8_fp16sa_neon() 185 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack8_fp16sa_neon() 186 nn_size = (size - remain_size_start) >> 2; in conv1x1s1_sgemm_pack8_fp16sa_neon() 191 int i = remain_size_start + ii * 4; in conv1x1s1_sgemm_pack8_fp16sa_neon() 214 remain_size_start += nn_size << 2; in conv1x1s1_sgemm_pack8_fp16sa_neon() 215 nn_size = (size - remain_size_start) >> 1; in conv1x1s1_sgemm_pack8_fp16sa_neon() 220 int i = remain_size_start + ii * 2; in conv1x1s1_sgemm_pack8_fp16sa_neon() [all …]
|
H A D | convolution_1x1_pack4.h | 161 int remain_size_start; in conv1x1s1_sgemm_pack4_neon() local 165 remain_size_start = nn_size * 12; in conv1x1s1_sgemm_pack4_neon() 208 remain_size_start = 0; in conv1x1s1_sgemm_pack4_neon() 210 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack4_neon() 215 int i = remain_size_start + ii * 8; in conv1x1s1_sgemm_pack4_neon() 276 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack4_neon() 277 nn_size = (size - remain_size_start) >> 2; in conv1x1s1_sgemm_pack4_neon() 282 int i = remain_size_start + ii * 4; in conv1x1s1_sgemm_pack4_neon() 320 remain_size_start += nn_size << 2; in conv1x1s1_sgemm_pack4_neon() 326 int i = remain_size_start + ii * 2; in conv1x1s1_sgemm_pack4_neon() [all …]
|
H A D | convolution_1x1_pack4_bf16s.h | 178 int remain_size_start; in conv1x1s1_sgemm_pack4_bf16s_neon() local 182 remain_size_start = nn_size * 12; in conv1x1s1_sgemm_pack4_bf16s_neon() 218 remain_size_start = 0; in conv1x1s1_sgemm_pack4_bf16s_neon() 220 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack4_bf16s_neon() 225 int i = remain_size_start + ii * 8; in conv1x1s1_sgemm_pack4_bf16s_neon() 273 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack4_bf16s_neon() 274 nn_size = (size - remain_size_start) >> 2; in conv1x1s1_sgemm_pack4_bf16s_neon() 279 int i = remain_size_start + ii * 4; in conv1x1s1_sgemm_pack4_bf16s_neon() 317 remain_size_start += nn_size << 2; in conv1x1s1_sgemm_pack4_bf16s_neon() 323 int i = remain_size_start + ii * 2; in conv1x1s1_sgemm_pack4_bf16s_neon() [all …]
|
H A D | convolution_1x1_pack8to1_fp16s.h | 106 int remain_size_start = 0; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() local 108 nn_size = (size - remain_size_start) >> 3; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 113 int i = remain_size_start + ii * 8; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 150 remain_size_start += nn_size << 3; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 151 nn_size = (size - remain_size_start) >> 2; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 156 int i = remain_size_start + ii * 4; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 180 remain_size_start += nn_size << 2; in conv1x1s1_sgemm_pack8to1_fp16sa_neon() 183 for (int i = remain_size_start; i < size; i++) in conv1x1s1_sgemm_pack8to1_fp16sa_neon()
|
/dports/misc/ncnn/ncnn-20211208/src/layer/x86/ |
H A D | convolution_1x1_pack4.h | 81 int remain_size_start; in conv1x1s1_sgemm_pack4_sse() local 83 remain_size_start = 0; in conv1x1s1_sgemm_pack4_sse() 84 nn_size = (size - remain_size_start) >> 2; in conv1x1s1_sgemm_pack4_sse() 89 int i = remain_size_start + ii * 4; in conv1x1s1_sgemm_pack4_sse() 112 remain_size_start += nn_size << 2; in conv1x1s1_sgemm_pack4_sse() 113 nn_size = (size - remain_size_start) >> 1; in conv1x1s1_sgemm_pack4_sse() 118 int i = remain_size_start + ii * 2; in conv1x1s1_sgemm_pack4_sse() 137 remain_size_start += nn_size << 1; in conv1x1s1_sgemm_pack4_sse() 140 for (int i = remain_size_start; i < size; i++) in conv1x1s1_sgemm_pack4_sse()
|