/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/ |
H A D | innerproduct_x86.cpp | 270 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in forward() local 651 __m128 _w4 = _mm_loadu_ps(kptr + 16); in forward() local 948 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in forward() local 1052 __m128 _w4 = _mm_loadu_ps(kptr + 16); in forward() local 1172 __m256 _w4 = _mm256_loadu_ps(w4); in forward() local 1511 __m256 _w4 = loadfp16(w4); in forward_fp16() local 1576 __m256 _w4 = loadfp16(fp16_weights[4]); in forward_fp16() local
|
H A D | convolution_1x1_pack8_fp16.h | 387 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local 635 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local 807 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local 922 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local 975 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local
|
H A D | convolution_1x1_pack8.h | 387 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local 635 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local 807 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local 922 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local 975 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local
|
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/ |
H A D | convolution_pack8_fp16s.h | 80 float16x8_t _w4 = vld1q_f16(kptr + 32); in convolution_pack8_fp16sa_neon() local
|
H A D | convolution_pack8to4_fp16s.h | 80 float16x4_t _w4 = vld1_f16(kptr + 16); in convolution_pack8to4_fp16sa_neon() local
|
H A D | convolution_3x3_pack8to4_int8.h | 1004 int16x8_t _w4 = vld1q_s16(k0 + 32); in conv3x3s1_winograd42_pack8to4_int8_neon() local 1176 int16x8_t _w4 = vld1q_s16(k0 + 32); in conv3x3s1_winograd42_pack8to4_int8_neon() local 1389 int16x8_t _w4 = vld1q_s16(k0 + 32); in conv3x3s1_winograd42_pack8to4_int8_neon() local 1467 int16x8_t _w4 = vld1q_s16(k0 + 32); in conv3x3s1_winograd42_pack8to4_int8_neon() local
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/ |
H A D | convolution_pack8to4_fp16s.h | 80 float16x4_t _w4 = vld1_f16(kptr + 16); in convolution_pack8to4_fp16sa_neon() local
|
H A D | convolution_pack8_fp16s.h | 80 float16x8_t _w4 = vld1q_f16(kptr + 32); in convolution_pack8_fp16sa_neon() local
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/ |
H A D | convolution_pack8_fp16s.h | 80 float16x8_t _w4 = vld1q_f16(kptr + 32); in convolution_pack8_fp16sa_neon() local
|
H A D | convolution_pack8to4_fp16s.h | 80 float16x4_t _w4 = vld1_f16(kptr + 16); in convolution_pack8to4_fp16sa_neon() local
|
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/ |
H A D | convolution_pack8to4_fp16s.h | 80 float16x4_t _w4 = vld1_f16(kptr + 16); in convolution_pack8to4_fp16sa_neon() local
|
H A D | convolution_pack8_fp16s.h | 80 float16x8_t _w4 = vld1q_f16(kptr + 32); in convolution_pack8_fp16sa_neon() local
|
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/x86/ |
H A D | innerproduct_x86.cpp | 277 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in forward() local 658 __m128 _w4 = _mm_loadu_ps(kptr + 16); in forward() local 932 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in forward() local 1036 __m128 _w4 = _mm_loadu_ps(kptr + 16); in forward() local 1156 __m256 _w4 = _mm256_loadu_ps(w4); in forward() local 1470 __m256 _w4 = loadfp16(w4); in forward_fp16() local 1535 __m256 _w4 = loadfp16(fp16_weights[4]); in forward_fp16() local
|
H A D | convolution_1x1_pack8.h | 395 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local 600 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local 772 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local 887 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local 940 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local
|
H A D | convolution_1x1_pack8_fp16.h | 387 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local 635 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local 807 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local 922 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local 975 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/x86/ |
H A D | innerproduct_x86.cpp | 277 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in forward() local 658 __m128 _w4 = _mm_loadu_ps(kptr + 16); in forward() local 932 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in forward() local 1036 __m128 _w4 = _mm_loadu_ps(kptr + 16); in forward() local 1156 __m256 _w4 = _mm256_loadu_ps(w4); in forward() local 1470 __m256 _w4 = loadfp16(w4); in forward_fp16() local 1535 __m256 _w4 = loadfp16(fp16_weights[4]); in forward_fp16() local
|
H A D | convolution_1x1_pack8.h | 395 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local 600 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local 772 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local 887 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local 940 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local
|
H A D | convolution_1x1_pack8_fp16.h | 387 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local 635 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local 807 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local 922 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local 975 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/x86/ |
H A D | innerproduct_x86.cpp | 277 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in forward() local 658 __m128 _w4 = _mm_loadu_ps(kptr + 16); in forward() local 932 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in forward() local 1036 __m128 _w4 = _mm_loadu_ps(kptr + 16); in forward() local 1156 __m256 _w4 = _mm256_loadu_ps(w4); in forward() local 1470 __m256 _w4 = loadfp16(w4); in forward_fp16() local 1535 __m256 _w4 = loadfp16(fp16_weights[4]); in forward_fp16() local
|
H A D | convolution_1x1_pack8_fp16.h | 387 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local 635 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local 807 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local 922 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local 975 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local
|
H A D | convolution_1x1_pack8.h | 395 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local 600 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local 772 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local 887 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local 940 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local
|
/dports/misc/ncnn/ncnn-20211208/src/layer/x86/ |
H A D | innerproduct_x86.cpp | 277 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in forward() local 658 __m128 _w4 = _mm_loadu_ps(kptr + 16); in forward() local 932 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in forward() local 1036 __m128 _w4 = _mm_loadu_ps(kptr + 16); in forward() local 1156 __m256 _w4 = _mm256_loadu_ps(w4); in forward() local 1470 __m256 _w4 = loadfp16(w4); in forward_fp16() local 1535 __m256 _w4 = loadfp16(fp16_weights[4]); in forward_fp16() local
|
H A D | convolution1d_x86.cpp | 173 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in forward() local 379 __m128 _w4 = _mm_loadu_ps(kptr + 16); in forward() local
|
H A D | convolution_1x1_pack8_fp16.h | 387 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local 635 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local 807 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local 922 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local 975 __m256 _w4 = loadfp16(kptr + 32); in conv1x1s1_sgemm_fp16_pack8_avx() local
|
H A D | convolution_1x1_pack8.h | 395 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local 600 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local 772 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local 887 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local 940 __m256 _w4 = _mm256_loadu_ps(kptr + 32); in conv1x1s1_sgemm_pack8_avx() local
|