/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/x86/ |
H A D | convolution_1x1_pack4.h | 191 __m128 _val30 = _mm_load1_ps(tmpptr + 12); in conv1x1s1_sgemm_pack4_sse() local 214 _sum3 = _mm_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_pack4_sse() 231 _sum3 = _mm_add_ps(_mm_mul_ps(_w0, _val30), _sum3); in conv1x1s1_sgemm_pack4_sse()
|
H A D | convolution_1x1_pack8.h | 435 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_pack8_avx() local 440 _mm256_fmadd_ps4(_sum3, _w0, _w1, _w2, _w3, _val30, _val31, _val32, _val33); in conv1x1s1_sgemm_pack8_avx() 647 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_pack8_avx() local 664 _sum3 = _mm256_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_pack8_avx() 819 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_pack8_avx() local 836 _sum3 = _mm256_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_pack8_avx()
|
H A D | convolution_1x1_pack8_fp16.h | 434 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_fp16_pack8_avx() local 451 _sum3 = _mm256_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_fp16_pack8_avx() 682 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_fp16_pack8_avx() local 699 _sum3 = _mm256_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_fp16_pack8_avx() 854 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_fp16_pack8_avx() local 871 _sum3 = _mm256_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_fp16_pack8_avx()
|
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/x86/ |
H A D | convolution_1x1_pack4.h | 191 __m128 _val30 = _mm_load1_ps(tmpptr + 12); in conv1x1s1_sgemm_pack4_sse() local 214 _sum3 = _mm_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_pack4_sse() 231 _sum3 = _mm_add_ps(_mm_mul_ps(_w0, _val30), _sum3); in conv1x1s1_sgemm_pack4_sse()
|
H A D | convolution_1x1_pack8.h | 435 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_pack8_avx() local 440 _mm256_fmadd_ps4(_sum3, _w0, _w1, _w2, _w3, _val30, _val31, _val32, _val33); in conv1x1s1_sgemm_pack8_avx() 647 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_pack8_avx() local 664 _sum3 = _mm256_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_pack8_avx() 819 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_pack8_avx() local 836 _sum3 = _mm256_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_pack8_avx()
|
H A D | convolution_1x1_pack8_fp16.h | 434 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_fp16_pack8_avx() local 451 _sum3 = _mm256_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_fp16_pack8_avx() 682 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_fp16_pack8_avx() local 699 _sum3 = _mm256_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_fp16_pack8_avx() 854 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_fp16_pack8_avx() local 871 _sum3 = _mm256_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_fp16_pack8_avx()
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/x86/ |
H A D | convolution_1x1_pack4.h | 191 __m128 _val30 = _mm_load1_ps(tmpptr + 12); in conv1x1s1_sgemm_pack4_sse() local 214 _sum3 = _mm_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_pack4_sse() 231 _sum3 = _mm_add_ps(_mm_mul_ps(_w0, _val30), _sum3); in conv1x1s1_sgemm_pack4_sse()
|
H A D | convolution_1x1_pack8.h | 435 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_pack8_avx() local 440 _mm256_fmadd_ps4(_sum3, _w0, _w1, _w2, _w3, _val30, _val31, _val32, _val33); in conv1x1s1_sgemm_pack8_avx() 647 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_pack8_avx() local 664 _sum3 = _mm256_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_pack8_avx() 819 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_pack8_avx() local 836 _sum3 = _mm256_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_pack8_avx()
|
H A D | convolution_1x1_pack8_fp16.h | 434 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_fp16_pack8_avx() local 451 _sum3 = _mm256_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_fp16_pack8_avx() 682 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_fp16_pack8_avx() local 699 _sum3 = _mm256_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_fp16_pack8_avx() 854 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_fp16_pack8_avx() local 871 _sum3 = _mm256_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_fp16_pack8_avx()
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/ |
H A D | convolution_1x1_pack4.h | 191 __m128 _val30 = _mm_load1_ps(tmpptr + 12); in conv1x1s1_sgemm_pack4_sse() local 214 _sum3 = _mm_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_pack4_sse() 231 _sum3 = _mm_add_ps(_mm_mul_ps(_w0, _val30), _sum3); in conv1x1s1_sgemm_pack4_sse()
|
H A D | convolution_1x1_pack8.h | 434 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_pack8_avx() local 451 _sum3 = _mm256_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_pack8_avx() 682 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_pack8_avx() local 699 _sum3 = _mm256_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_pack8_avx() 854 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_pack8_avx() local 871 _sum3 = _mm256_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_pack8_avx()
|
H A D | convolution_1x1_pack8_fp16.h | 434 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_fp16_pack8_avx() local 451 _sum3 = _mm256_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_fp16_pack8_avx() 682 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_fp16_pack8_avx() local 699 _sum3 = _mm256_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_fp16_pack8_avx() 854 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_fp16_pack8_avx() local 871 _sum3 = _mm256_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_fp16_pack8_avx()
|
/dports/misc/ncnn/ncnn-20211208/src/layer/x86/ |
H A D | convolution_1x1_pack4.h | 191 __m128 _val30 = _mm_load1_ps(tmpptr + 12); in conv1x1s1_sgemm_pack4_sse() local 213 _sum3 = _mm_comp_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_pack4_sse()
|
H A D | convolution_1x1_pack8.h | 435 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_pack8_avx() local 440 _mm256_comp_fmadd_ps4(_sum3, _w0, _w1, _w2, _w3, _val30, _val31, _val32, _val33); in conv1x1s1_sgemm_pack8_avx() 647 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_pack8_avx() local 664 _sum3 = _mm256_comp_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_pack8_avx() 819 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_pack8_avx() local 836 _sum3 = _mm256_comp_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_pack8_avx()
|
H A D | convolution_1x1_pack8_fp16.h | 434 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_fp16_pack8_avx() local 451 _sum3 = _mm256_comp_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_fp16_pack8_avx() 682 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_fp16_pack8_avx() local 699 _sum3 = _mm256_comp_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_fp16_pack8_avx() 854 __m256 _val30 = _mm256_broadcast_ss(tmpptr + 24); in conv1x1s1_sgemm_fp16_pack8_avx() local 871 _sum3 = _mm256_comp_fmadd_ps(_w0, _val30, _sum3); in conv1x1s1_sgemm_fp16_pack8_avx()
|