/dports/misc/ncnn/ncnn-20211208/src/layer/x86/ |
H A D | convolution_3x3_pack1to4.h | 91 __m128 _r23 = _mm_set1_ps(*(r2 + 2)); in conv3x3s1_pack1to4_sse() local 226 __m128 _r23 = _mm_set1_ps(*(r2 + 2)); in conv3x3s1_pack1to4_sse() local 301 __m128 _r23 = _mm_set1_ps(*(r2 + 2)); in conv3x3s1_pack1to4_sse() local 390 __m128 _r23 = _mm_set1_ps(*(r2 + 2)); in conv3x3s1_pack1to4_sse() local 469 __m128 _r23 = _mm_set1_ps(*(r2 + 2)); in conv3x3s1_pack1to4_sse() local 516 __m128 _r23 = _mm_set1_ps(*(r2 + 2)); in conv3x3s1_pack1to4_sse() local 625 __m128 _r23 = _mm_set1_ps(*(r2 + 2)); in conv3x3s2_pack1to4_sse() local 768 __m128 _r23 = _mm_set1_ps(*(r2 + 2)); in conv3x3s2_pack1to4_sse() local 845 __m128 _r23 = _mm_set1_ps(*(r2 + 2)); in conv3x3s2_pack1to4_sse() local 933 __m128 _r23 = _mm_set1_ps(*(r2 + 2)); in conv3x3s2_pack1to4_sse() local [all …]
|
H A D | convolution_3x3_pack1to8.h | 91 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 226 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 301 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 390 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 469 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 516 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 625 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s2_pack1to8_avx() local 898 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s2_pack1to8_avx() local 1041 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s2_pack1to8_avx() local 1118 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s2_pack1to8_avx() local [all …]
|
H A D | convolutiondepthwise_3x3_pack4.h | 83 __m128 _r23 = _mm_loadu_ps(r2 + 12); in convdw3x3s1_pack4_sse() local 225 __m128 _r23 = _mm_loadu_ps(r2 + 12); in convdw3x3s1_pack4_sse() local 304 __m128 _r23 = _mm_loadu_ps(r2 + 12); in convdw3x3s1_pack4_sse() local 435 __m128 _r23 = _mm_loadu_ps(r2 + 12); in convdw3x3s2_pack4_sse() local 523 __m128 _r23 = _mm_loadu_ps(r2 + 12); in convdw3x3s2_pack4_sse() local
|
H A D | convolutiondepthwise_3x3_pack8_fp16.h | 83 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_fp16_pack8_avx() local 225 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_fp16_pack8_avx() local 304 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_fp16_pack8_avx() local 435 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s2_fp16_pack8_avx() local 523 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s2_fp16_pack8_avx() local
|
H A D | convolutiondepthwise_3x3_pack8.h | 83 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_pack8_avx() local 225 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_pack8_avx() local 304 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_pack8_avx() local 435 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s2_pack8_avx() local 523 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s2_pack8_avx() local
|
H A D | pooling_3x3_pack8.h | 62 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in pooling3x3s2_max_pack8_avx() local 140 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in pooling3x3s2_max_pack8_avx() local
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/x86/ |
H A D | convolution_3x3_pack1to8.h | 91 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 226 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 301 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 390 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 469 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 516 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 625 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s2_pack1to8_avx() local 898 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s2_pack1to8_avx() local 1041 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s2_pack1to8_avx() local 1118 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s2_pack1to8_avx() local [all …]
|
H A D | convolutiondepthwise_3x3_pack8_fp16.h | 83 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_fp16_pack8_avx() local 225 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_fp16_pack8_avx() local 304 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_fp16_pack8_avx() local 435 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s2_fp16_pack8_avx() local 523 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s2_fp16_pack8_avx() local
|
H A D | convolutiondepthwise_3x3_pack8.h | 83 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_pack8_avx() local 225 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_pack8_avx() local 304 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_pack8_avx() local 435 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s2_pack8_avx() local 523 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s2_pack8_avx() local
|
H A D | pooling_3x3_pack8.h | 62 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in pooling3x3s2_max_pack8_avx() local 140 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in pooling3x3s2_max_pack8_avx() local
|
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/x86/ |
H A D | convolution_3x3_pack1to8.h | 91 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 226 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 301 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 390 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 469 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 516 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 625 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s2_pack1to8_avx() local 898 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s2_pack1to8_avx() local 1041 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s2_pack1to8_avx() local 1118 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s2_pack1to8_avx() local [all …]
|
H A D | convolutiondepthwise_3x3_pack8.h | 83 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_pack8_avx() local 225 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_pack8_avx() local 304 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_pack8_avx() local 435 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s2_pack8_avx() local 523 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s2_pack8_avx() local
|
H A D | convolutiondepthwise_3x3_pack8_fp16.h | 83 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_fp16_pack8_avx() local 225 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_fp16_pack8_avx() local 304 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_fp16_pack8_avx() local 435 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s2_fp16_pack8_avx() local 523 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s2_fp16_pack8_avx() local
|
H A D | pooling_3x3_pack8.h | 62 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in pooling3x3s2_max_pack8_avx() local 140 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in pooling3x3s2_max_pack8_avx() local
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/x86/ |
H A D | convolution_3x3_pack1to8.h | 91 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 226 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 301 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 390 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 469 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 516 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 625 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s2_pack1to8_avx() local 898 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s2_pack1to8_avx() local 1041 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s2_pack1to8_avx() local 1118 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s2_pack1to8_avx() local [all …]
|
H A D | convolutiondepthwise_3x3_pack8.h | 83 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_pack8_avx() local 225 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_pack8_avx() local 304 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_pack8_avx() local 435 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s2_pack8_avx() local 523 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s2_pack8_avx() local
|
H A D | convolutiondepthwise_3x3_pack8_fp16.h | 83 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_fp16_pack8_avx() local 225 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_fp16_pack8_avx() local 304 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_fp16_pack8_avx() local 435 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s2_fp16_pack8_avx() local 523 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s2_fp16_pack8_avx() local
|
H A D | pooling_3x3_pack8.h | 62 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in pooling3x3s2_max_pack8_avx() local 140 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in pooling3x3s2_max_pack8_avx() local
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/ |
H A D | convolution_3x3_pack1to8.h | 91 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 226 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 301 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 390 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 469 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 516 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s1_pack1to8_avx() local 625 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s2_pack1to8_avx() local 898 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s2_pack1to8_avx() local 1041 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s2_pack1to8_avx() local 1118 __m256 _r23 = _mm256_broadcast_ss(r2 + 2); in conv3x3s2_pack1to8_avx() local [all …]
|
H A D | convolutiondepthwise_3x3_pack8_fp16.h | 83 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_fp16_pack8_avx() local 225 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_fp16_pack8_avx() local 304 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_fp16_pack8_avx() local 435 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s2_fp16_pack8_avx() local 523 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s2_fp16_pack8_avx() local
|
H A D | convolutiondepthwise_3x3_pack8.h | 83 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_pack8_avx() local 225 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_pack8_avx() local 304 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s1_pack8_avx() local 435 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s2_pack8_avx() local 523 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in convdw3x3s2_pack8_avx() local
|
H A D | pooling_3x3_pack8.h | 62 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in pooling3x3s2_max_pack8_avx() local 140 __m256 _r23 = _mm256_loadu_ps(r2 + 24); in pooling3x3s2_max_pack8_avx() local
|
/dports/misc/ncnn/ncnn-20211208/src/layer/mips/ |
H A D | convolution_3x3_pack1to4.h | 157 v4f32 _r23 = (v4f32)__msa_splati_w(_r2, 3); in conv3x3s1_pack1to4_msa() local 264 v4f32 _r23 = (v4f32)__msa_splati_w(_r2, 3); in conv3x3s1_pack1to4_msa() local 327 v4f32 _r23 = (v4f32)__msa_splati_w(_r2, 3); in conv3x3s1_pack1to4_msa() local 557 v4f32 _r23 = (v4f32)__msa_splati_w(_r2, 3); in conv3x3s2_pack1to4_msa() local 677 v4f32 _r23 = (v4f32)__msa_splati_w(_r2, 3); in conv3x3s2_pack1to4_msa() local 745 v4f32 _r23 = (v4f32)__msa_splati_w(_r2, 3); in conv3x3s2_pack1to4_msa() local
|
H A D | convolutiondepthwise_3x3_pack4.h | 104 v4f32 _r23 = (v4f32)__msa_ld_w(r2 + 4 * 3, 0); in convdw3x3s1_pack4_msa() local 251 v4f32 _r23 = (v4f32)__msa_ld_w(r2 + 4 * 3, 0); in convdw3x3s1_pack4_msa() local 399 v4f32 _r23 = (v4f32)__msa_ld_w(r2 + 4 * 3, 0); in convdw3x3s2_pack4_msa() local
|
/dports/misc/ncnn/ncnn-20211208/src/layer/riscv/ |
H A D | convolutiondepthwise_3x3_packn_fp16s.h | 102 vfloat16m1_t _r23 = vle16_v_f16m1(r2 + packn * 3, vl); in convdw3x3s1_packn_fp16sa_rvv() local 240 vfloat16m1_t _r23 = vle16_v_f16m1(r2 + packn * 3, vl); in convdw3x3s1_packn_fp16sa_rvv() local 383 vfloat16m1_t _r23 = vle16_v_f16m1(r2 + packn * 3, vl); in convdw3x3s2_packn_fp16sa_rvv() local
|