/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/x86/ |
H A D | lstm_x86.cpp | 113 __m256 _sumF_1 = _mm256_setzero_ps(); in lstm_fp16() local 126 _sumF_1 = _mm256_fmadd_ps(loadfp16(weight_xc_F_1), xi, _sumF_1); in lstm_fp16() 150 _sumF_1 = _mm256_fmadd_ps(loadfp16(weight_hc_F_1), h_cont, _sumF_1); in lstm_fp16() 196 _sumF_1 = _mm256_fmadd_ps(loadfp16(fp16_weights[5]), xi, _sumF_1); in lstm_fp16() 233 _sumF_1 = _mm256_fmadd_ps(loadfp16(fp16_weights[5]), h_cont, _sumF_1); in lstm_fp16() 238 …reu_ps(sums, HorizontalSums(_sumI_0, _sumF_0, _sumO_0, _sumG_0, _sumI_1, _sumF_1, _sumO_1, _sumG_1… in lstm_fp16() 514 __m256 _sumF_1 = _mm256_setzero_ps(); in lstm() local 527 _sumF_1 = _mm256_fmadd_ps(_mm256_loadu_ps(weight_xc_F_1), xi, _sumF_1); in lstm() 551 _sumF_1 = _mm256_fmadd_ps(_mm256_loadu_ps(weight_hc_F_1), h_cont, _sumF_1); in lstm() 565 …reu_ps(sums, HorizontalSums(_sumI_0, _sumF_0, _sumO_0, _sumG_0, _sumI_1, _sumF_1, _sumO_1, _sumG_1… in lstm()
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/x86/ |
H A D | lstm_x86.cpp | 113 __m256 _sumF_1 = _mm256_setzero_ps(); in lstm_fp16() local 126 _sumF_1 = _mm256_fmadd_ps(loadfp16(weight_xc_F_1), xi, _sumF_1); in lstm_fp16() 150 _sumF_1 = _mm256_fmadd_ps(loadfp16(weight_hc_F_1), h_cont, _sumF_1); in lstm_fp16() 196 _sumF_1 = _mm256_fmadd_ps(loadfp16(fp16_weights[5]), xi, _sumF_1); in lstm_fp16() 233 _sumF_1 = _mm256_fmadd_ps(loadfp16(fp16_weights[5]), h_cont, _sumF_1); in lstm_fp16() 238 …reu_ps(sums, HorizontalSums(_sumI_0, _sumF_0, _sumO_0, _sumG_0, _sumI_1, _sumF_1, _sumO_1, _sumG_1… in lstm_fp16() 514 __m256 _sumF_1 = _mm256_setzero_ps(); in lstm() local 527 _sumF_1 = _mm256_fmadd_ps(_mm256_loadu_ps(weight_xc_F_1), xi, _sumF_1); in lstm() 551 _sumF_1 = _mm256_fmadd_ps(_mm256_loadu_ps(weight_hc_F_1), h_cont, _sumF_1); in lstm() 565 …reu_ps(sums, HorizontalSums(_sumI_0, _sumF_0, _sumO_0, _sumG_0, _sumI_1, _sumF_1, _sumO_1, _sumG_1… in lstm()
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/x86/ |
H A D | lstm_x86.cpp | 113 __m256 _sumF_1 = _mm256_setzero_ps(); in lstm_fp16() local 126 _sumF_1 = _mm256_fmadd_ps(loadfp16(weight_xc_F_1), xi, _sumF_1); in lstm_fp16() 150 _sumF_1 = _mm256_fmadd_ps(loadfp16(weight_hc_F_1), h_cont, _sumF_1); in lstm_fp16() 196 _sumF_1 = _mm256_fmadd_ps(loadfp16(fp16_weights[5]), xi, _sumF_1); in lstm_fp16() 233 _sumF_1 = _mm256_fmadd_ps(loadfp16(fp16_weights[5]), h_cont, _sumF_1); in lstm_fp16() 238 …reu_ps(sums, HorizontalSums(_sumI_0, _sumF_0, _sumO_0, _sumG_0, _sumI_1, _sumF_1, _sumO_1, _sumG_1… in lstm_fp16() 514 __m256 _sumF_1 = _mm256_setzero_ps(); in lstm() local 527 _sumF_1 = _mm256_fmadd_ps(_mm256_loadu_ps(weight_xc_F_1), xi, _sumF_1); in lstm() 551 _sumF_1 = _mm256_fmadd_ps(_mm256_loadu_ps(weight_hc_F_1), h_cont, _sumF_1); in lstm() 565 …reu_ps(sums, HorizontalSums(_sumI_0, _sumF_0, _sumO_0, _sumG_0, _sumI_1, _sumF_1, _sumO_1, _sumG_1… in lstm()
|
/dports/misc/ncnn/ncnn-20211208/src/layer/x86/ |
H A D | lstm_x86.cpp | 114 __m256 _sumF_1 = _mm256_setzero_ps(); in lstm_fp16() local 127 _sumF_1 = _mm256_comp_fmadd_ps(loadfp16(weight_xc_F_1), xi, _sumF_1); in lstm_fp16() 151 _sumF_1 = _mm256_comp_fmadd_ps(loadfp16(weight_hc_F_1), h_cont, _sumF_1); in lstm_fp16() 197 _sumF_1 = _mm256_comp_fmadd_ps(loadfp16(fp16_weights[5]), xi, _sumF_1); in lstm_fp16() 234 _sumF_1 = _mm256_comp_fmadd_ps(loadfp16(fp16_weights[5]), h_cont, _sumF_1); in lstm_fp16() 239 …reu_ps(sums, HorizontalSums(_sumI_0, _sumF_0, _sumO_0, _sumG_0, _sumI_1, _sumF_1, _sumO_1, _sumG_1… in lstm_fp16() 515 __m256 _sumF_1 = _mm256_setzero_ps(); in lstm() local 528 _sumF_1 = _mm256_comp_fmadd_ps(_mm256_loadu_ps(weight_xc_F_1), xi, _sumF_1); in lstm() 552 _sumF_1 = _mm256_comp_fmadd_ps(_mm256_loadu_ps(weight_hc_F_1), h_cont, _sumF_1); in lstm() 566 …reu_ps(sums, HorizontalSums(_sumI_0, _sumF_0, _sumO_0, _sumG_0, _sumI_1, _sumF_1, _sumO_1, _sumG_1… in lstm()
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/ |
H A D | lstm_x86.cpp | 115 __m256 _sumF_1 = _mm256_setzero_ps(); in lstm_fp16() local 128 _sumF_1 = _mm256_fmadd_ps(loadfp16(weight_xc_F_1), xi, _sumF_1); in lstm_fp16() 152 _sumF_1 = _mm256_fmadd_ps(loadfp16(weight_hc_F_1), h_cont, _sumF_1); in lstm_fp16() 198 _sumF_1 = _mm256_fmadd_ps(loadfp16(fp16_weights[5]), xi, _sumF_1); in lstm_fp16() 235 _sumF_1 = _mm256_fmadd_ps(loadfp16(fp16_weights[5]), h_cont, _sumF_1); in lstm_fp16() 240 …reu_ps(sums, HorizontalSums(_sumI_0, _sumF_0, _sumO_0, _sumG_0, _sumI_1, _sumF_1, _sumO_1, _sumG_1… in lstm_fp16() 516 __m256 _sumF_1 = _mm256_setzero_ps(); in lstm() local 529 _sumF_1 = _mm256_fmadd_ps(_mm256_loadu_ps(weight_xc_F_1), xi, _sumF_1); in lstm() 553 _sumF_1 = _mm256_fmadd_ps(_mm256_loadu_ps(weight_hc_F_1), h_cont, _sumF_1); in lstm() 567 …reu_ps(sums, HorizontalSums(_sumI_0, _sumF_0, _sumO_0, _sumG_0, _sumI_1, _sumF_1, _sumO_1, _sumG_1… in lstm()
|