/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/ |
H A D | convolution_1x1_int8.h | 430 int sum0_3 = 0; in conv1x1s1_sgemm_int8_neon() local 456 sum0_3 += tmpptr[6] * kptr[0]; in conv1x1s1_sgemm_int8_neon() 457 sum0_3 += tmpptr[7] * kptr[1]; in conv1x1s1_sgemm_int8_neon() 519 outptr0[3] = sum0_3; in conv1x1s1_sgemm_int8_neon() 1089 int sum0_3 = 0; in conv1x1s1_sgemm_int8_requant_neon() local 1758 int sum0_3 = 0; in conv1x1s1_sgemm_int8_neon() local 1836 outptr0[3] = sum0_3; in conv1x1s1_sgemm_int8_neon() 1972 int sum0_3 = 0; in conv1x1s1_sgemm_int8_neon() local 2018 outptr0[3] = sum0_3; in conv1x1s1_sgemm_int8_neon() 2793 int sum0_3 = 0; in conv1x1s1_sgemm_int8_requant_neon() local [all …]
|
H A D | convolution_1x1_bf16s.h | 1152 float sum0_3 = biasptr[0]; in conv1x1s1_sgemm_bf16s_neon() local 1190 sum0_3 += bfloat16_to_float32(tmpptr[3]) * bfloat16_to_float32(kptr[0]); in conv1x1s1_sgemm_bf16s_neon() 1230 outptr0[3] = float32_to_bfloat16(sum0_3); in conv1x1s1_sgemm_bf16s_neon() 1505 float sum0_3 = biasptr[0]; in conv1x1s1_sgemm_bf16s_neon() local 1527 sum0_3 += bfloat16_to_float32(tmpptr[3]) * bfloat16_to_float32(kptr[0]); in conv1x1s1_sgemm_bf16s_neon() 1551 outptr0[3] = float32_to_bfloat16(sum0_3); in conv1x1s1_sgemm_bf16s_neon()
|
H A D | convolution_1x1.h | 1006 float sum0_3 = biasptr[0]; in conv1x1s1_sgemm_neon() local 1044 sum0_3 += tmpptr[3] * kptr[0]; in conv1x1s1_sgemm_neon() 1084 outptr0[3] = sum0_3; in conv1x1s1_sgemm_neon() 1325 float sum0_3 = biasptr[0]; in conv1x1s1_sgemm_neon() local 1347 sum0_3 += tmpptr[3] * kptr[0]; in conv1x1s1_sgemm_neon() 1371 outptr0[3] = sum0_3; in conv1x1s1_sgemm_neon()
|
H A D | convolution_3x3.h | 6302 float sum0_3 = 0.f; in conv3x3s1_winograd64_neon5() local 6340 sum0_3 += bb2p0[3] * ktm0[0]; in conv3x3s1_winograd64_neon5() 6380 output0_tm[3] = sum0_3; in conv3x3s1_winograd64_neon5() 6610 float sum0_3 = 0.f; in conv3x3s1_winograd64_neon5() local 6632 sum0_3 += bb2p0[3] * ktm0[0]; in conv3x3s1_winograd64_neon5() 6656 output0_tm[3] = sum0_3; in conv3x3s1_winograd64_neon5()
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/ |
H A D | convolution_1x1_bf16s.h | 1152 float sum0_3 = biasptr[0]; in conv1x1s1_sgemm_bf16s_neon() local 1190 sum0_3 += bfloat16_to_float32(tmpptr[3]) * bfloat16_to_float32(kptr[0]); in conv1x1s1_sgemm_bf16s_neon() 1230 outptr0[3] = float32_to_bfloat16(sum0_3); in conv1x1s1_sgemm_bf16s_neon() 1505 float sum0_3 = biasptr[0]; in conv1x1s1_sgemm_bf16s_neon() local 1527 sum0_3 += bfloat16_to_float32(tmpptr[3]) * bfloat16_to_float32(kptr[0]); in conv1x1s1_sgemm_bf16s_neon() 1551 outptr0[3] = float32_to_bfloat16(sum0_3); in conv1x1s1_sgemm_bf16s_neon()
|
H A D | convolution_3x3.h | 6302 float sum0_3 = 0.f; in conv3x3s1_winograd64_neon5() local 6340 sum0_3 += bb2p0[3] * ktm0[0]; in conv3x3s1_winograd64_neon5() 6380 output0_tm[3] = sum0_3; in conv3x3s1_winograd64_neon5() 6610 float sum0_3 = 0.f; in conv3x3s1_winograd64_neon5() local 6632 sum0_3 += bb2p0[3] * ktm0[0]; in conv3x3s1_winograd64_neon5() 6656 output0_tm[3] = sum0_3; in conv3x3s1_winograd64_neon5()
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/ |
H A D | convolution_1x1_bf16s.h | 1152 float sum0_3 = biasptr[0]; in conv1x1s1_sgemm_bf16s_neon() local 1190 sum0_3 += bfloat16_to_float32(tmpptr[3]) * bfloat16_to_float32(kptr[0]); in conv1x1s1_sgemm_bf16s_neon() 1230 outptr0[3] = float32_to_bfloat16(sum0_3); in conv1x1s1_sgemm_bf16s_neon() 1505 float sum0_3 = biasptr[0]; in conv1x1s1_sgemm_bf16s_neon() local 1527 sum0_3 += bfloat16_to_float32(tmpptr[3]) * bfloat16_to_float32(kptr[0]); in conv1x1s1_sgemm_bf16s_neon() 1551 outptr0[3] = float32_to_bfloat16(sum0_3); in conv1x1s1_sgemm_bf16s_neon()
|
H A D | convolution_3x3.h | 6302 float sum0_3 = 0.f; in conv3x3s1_winograd64_neon5() local 6340 sum0_3 += bb2p0[3] * ktm0[0]; in conv3x3s1_winograd64_neon5() 6380 output0_tm[3] = sum0_3; in conv3x3s1_winograd64_neon5() 6610 float sum0_3 = 0.f; in conv3x3s1_winograd64_neon5() local 6632 sum0_3 += bb2p0[3] * ktm0[0]; in conv3x3s1_winograd64_neon5() 6656 output0_tm[3] = sum0_3; in conv3x3s1_winograd64_neon5()
|
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/ |
H A D | convolution_1x1_bf16s.h | 1152 float sum0_3 = biasptr[0]; in conv1x1s1_sgemm_bf16s_neon() local 1190 sum0_3 += bfloat16_to_float32(tmpptr[3]) * bfloat16_to_float32(kptr[0]); in conv1x1s1_sgemm_bf16s_neon() 1230 outptr0[3] = float32_to_bfloat16(sum0_3); in conv1x1s1_sgemm_bf16s_neon() 1505 float sum0_3 = biasptr[0]; in conv1x1s1_sgemm_bf16s_neon() local 1527 sum0_3 += bfloat16_to_float32(tmpptr[3]) * bfloat16_to_float32(kptr[0]); in conv1x1s1_sgemm_bf16s_neon() 1551 outptr0[3] = float32_to_bfloat16(sum0_3); in conv1x1s1_sgemm_bf16s_neon()
|
H A D | convolution_3x3.h | 6302 float sum0_3 = 0.f; in conv3x3s1_winograd64_neon5() local 6340 sum0_3 += bb2p0[3] * ktm0[0]; in conv3x3s1_winograd64_neon5() 6380 output0_tm[3] = sum0_3; in conv3x3s1_winograd64_neon5() 6610 float sum0_3 = 0.f; in conv3x3s1_winograd64_neon5() local 6632 sum0_3 += bb2p0[3] * ktm0[0]; in conv3x3s1_winograd64_neon5() 6656 output0_tm[3] = sum0_3; in conv3x3s1_winograd64_neon5()
|
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/ |
H A D | convolution_1x1_bf16s.h | 1152 float sum0_3 = biasptr[0]; in conv1x1s1_sgemm_bf16s_neon() local 1190 sum0_3 += bfloat16_to_float32(tmpptr[3]) * bfloat16_to_float32(kptr[0]); in conv1x1s1_sgemm_bf16s_neon() 1230 outptr0[3] = float32_to_bfloat16(sum0_3); in conv1x1s1_sgemm_bf16s_neon() 1505 float sum0_3 = biasptr[0]; in conv1x1s1_sgemm_bf16s_neon() local 1527 sum0_3 += bfloat16_to_float32(tmpptr[3]) * bfloat16_to_float32(kptr[0]); in conv1x1s1_sgemm_bf16s_neon() 1551 outptr0[3] = float32_to_bfloat16(sum0_3); in conv1x1s1_sgemm_bf16s_neon()
|
H A D | convolution_3x3.h | 6302 float sum0_3 = 0.f; in conv3x3s1_winograd64_neon5() local 6340 sum0_3 += bb2p0[3] * ktm0[0]; in conv3x3s1_winograd64_neon5() 6380 output0_tm[3] = sum0_3; in conv3x3s1_winograd64_neon5() 6610 float sum0_3 = 0.f; in conv3x3s1_winograd64_neon5() local 6632 sum0_3 += bb2p0[3] * ktm0[0]; in conv3x3s1_winograd64_neon5() 6656 output0_tm[3] = sum0_3; in conv3x3s1_winograd64_neon5()
|
/dports/math/ntl/ntl-11.5.1/src/ |
H A D | mat_lzz_p.cpp | 2282 unsigned long sum0_3 = ll_get_lo(sum_3); in muladd1_by_32_full() local 2289 x[j+3] = sp_ll_red_31_normalized(0, sum1_3, sum0_3, p, ll_red_struct); in muladd1_by_32_full() 2295 x[j+3] = sp_ll_red_31(0, sum1_3, sum0_3, p, ll_red_struct); in muladd1_by_32_full() 2361 unsigned long sum0_3 = ll_get_lo(sum_3); in muladd1_by_32_full_width() local 2368 x[j+3] = sp_ll_red_31_normalized(0, sum1_3, sum0_3, p, ll_red_struct); in muladd1_by_32_full_width() 2374 x[j+3] = sp_ll_red_31(0, sum1_3, sum0_3, p, ll_red_struct); in muladd1_by_32_full_width()
|