/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/x86/ |
H A D | lstm_x86.cpp | 108 __m256 _sumI_0 = _mm256_setzero_ps(); in lstm_fp16() local 121 _sumI_0 = _mm256_fmadd_ps(loadfp16(weight_xc_I_0), xi, _sumI_0); in lstm_fp16() 145 _sumI_0 = _mm256_fmadd_ps(loadfp16(weight_hc_I_0), h_cont, _sumI_0); in lstm_fp16() 191 _sumI_0 = _mm256_fmadd_ps(loadfp16(fp16_weights[0]), xi, _sumI_0); in lstm_fp16() 228 _sumI_0 = _mm256_fmadd_ps(loadfp16(fp16_weights[0]), h_cont, _sumI_0); in lstm_fp16() 238 …_mm256_storeu_ps(sums, HorizontalSums(_sumI_0, _sumF_0, _sumO_0, _sumG_0, _sumI_1, _sumF_1, _sumO_… in lstm_fp16() 509 __m256 _sumI_0 = _mm256_setzero_ps(); in lstm() local 522 _sumI_0 = _mm256_fmadd_ps(_mm256_loadu_ps(weight_xc_I_0), xi, _sumI_0); in lstm() 546 _sumI_0 = _mm256_fmadd_ps(_mm256_loadu_ps(weight_hc_I_0), h_cont, _sumI_0); in lstm() 565 …_mm256_storeu_ps(sums, HorizontalSums(_sumI_0, _sumF_0, _sumO_0, _sumG_0, _sumI_1, _sumF_1, _sumO_… in lstm()
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/x86/ |
H A D | lstm_x86.cpp | 108 __m256 _sumI_0 = _mm256_setzero_ps(); in lstm_fp16() local 121 _sumI_0 = _mm256_fmadd_ps(loadfp16(weight_xc_I_0), xi, _sumI_0); in lstm_fp16() 145 _sumI_0 = _mm256_fmadd_ps(loadfp16(weight_hc_I_0), h_cont, _sumI_0); in lstm_fp16() 191 _sumI_0 = _mm256_fmadd_ps(loadfp16(fp16_weights[0]), xi, _sumI_0); in lstm_fp16() 228 _sumI_0 = _mm256_fmadd_ps(loadfp16(fp16_weights[0]), h_cont, _sumI_0); in lstm_fp16() 238 …_mm256_storeu_ps(sums, HorizontalSums(_sumI_0, _sumF_0, _sumO_0, _sumG_0, _sumI_1, _sumF_1, _sumO_… in lstm_fp16() 509 __m256 _sumI_0 = _mm256_setzero_ps(); in lstm() local 522 _sumI_0 = _mm256_fmadd_ps(_mm256_loadu_ps(weight_xc_I_0), xi, _sumI_0); in lstm() 546 _sumI_0 = _mm256_fmadd_ps(_mm256_loadu_ps(weight_hc_I_0), h_cont, _sumI_0); in lstm() 565 …_mm256_storeu_ps(sums, HorizontalSums(_sumI_0, _sumF_0, _sumO_0, _sumG_0, _sumI_1, _sumF_1, _sumO_… in lstm()
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/x86/ |
H A D | lstm_x86.cpp | 108 __m256 _sumI_0 = _mm256_setzero_ps(); in lstm_fp16() local 121 _sumI_0 = _mm256_fmadd_ps(loadfp16(weight_xc_I_0), xi, _sumI_0); in lstm_fp16() 145 _sumI_0 = _mm256_fmadd_ps(loadfp16(weight_hc_I_0), h_cont, _sumI_0); in lstm_fp16() 191 _sumI_0 = _mm256_fmadd_ps(loadfp16(fp16_weights[0]), xi, _sumI_0); in lstm_fp16() 228 _sumI_0 = _mm256_fmadd_ps(loadfp16(fp16_weights[0]), h_cont, _sumI_0); in lstm_fp16() 238 …_mm256_storeu_ps(sums, HorizontalSums(_sumI_0, _sumF_0, _sumO_0, _sumG_0, _sumI_1, _sumF_1, _sumO_… in lstm_fp16() 509 __m256 _sumI_0 = _mm256_setzero_ps(); in lstm() local 522 _sumI_0 = _mm256_fmadd_ps(_mm256_loadu_ps(weight_xc_I_0), xi, _sumI_0); in lstm() 546 _sumI_0 = _mm256_fmadd_ps(_mm256_loadu_ps(weight_hc_I_0), h_cont, _sumI_0); in lstm() 565 …_mm256_storeu_ps(sums, HorizontalSums(_sumI_0, _sumF_0, _sumO_0, _sumG_0, _sumI_1, _sumF_1, _sumO_… in lstm()
|
/dports/misc/ncnn/ncnn-20211208/src/layer/x86/ |
H A D | lstm_x86.cpp | 109 __m256 _sumI_0 = _mm256_setzero_ps(); in lstm_fp16() local 122 _sumI_0 = _mm256_comp_fmadd_ps(loadfp16(weight_xc_I_0), xi, _sumI_0); in lstm_fp16() 146 _sumI_0 = _mm256_comp_fmadd_ps(loadfp16(weight_hc_I_0), h_cont, _sumI_0); in lstm_fp16() 192 _sumI_0 = _mm256_comp_fmadd_ps(loadfp16(fp16_weights[0]), xi, _sumI_0); in lstm_fp16() 229 _sumI_0 = _mm256_comp_fmadd_ps(loadfp16(fp16_weights[0]), h_cont, _sumI_0); in lstm_fp16() 239 …_mm256_storeu_ps(sums, HorizontalSums(_sumI_0, _sumF_0, _sumO_0, _sumG_0, _sumI_1, _sumF_1, _sumO_… in lstm_fp16() 510 __m256 _sumI_0 = _mm256_setzero_ps(); in lstm() local 523 _sumI_0 = _mm256_comp_fmadd_ps(_mm256_loadu_ps(weight_xc_I_0), xi, _sumI_0); in lstm() 547 _sumI_0 = _mm256_comp_fmadd_ps(_mm256_loadu_ps(weight_hc_I_0), h_cont, _sumI_0); in lstm() 566 …_mm256_storeu_ps(sums, HorizontalSums(_sumI_0, _sumF_0, _sumO_0, _sumG_0, _sumI_1, _sumF_1, _sumO_… in lstm()
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/ |
H A D | lstm_x86.cpp | 110 __m256 _sumI_0 = _mm256_setzero_ps(); in lstm_fp16() local 123 _sumI_0 = _mm256_fmadd_ps(loadfp16(weight_xc_I_0), xi, _sumI_0); in lstm_fp16() 147 _sumI_0 = _mm256_fmadd_ps(loadfp16(weight_hc_I_0), h_cont, _sumI_0); in lstm_fp16() 193 _sumI_0 = _mm256_fmadd_ps(loadfp16(fp16_weights[0]), xi, _sumI_0); in lstm_fp16() 230 _sumI_0 = _mm256_fmadd_ps(loadfp16(fp16_weights[0]), h_cont, _sumI_0); in lstm_fp16() 240 …_mm256_storeu_ps(sums, HorizontalSums(_sumI_0, _sumF_0, _sumO_0, _sumG_0, _sumI_1, _sumF_1, _sumO_… in lstm_fp16() 511 __m256 _sumI_0 = _mm256_setzero_ps(); in lstm() local 524 _sumI_0 = _mm256_fmadd_ps(_mm256_loadu_ps(weight_xc_I_0), xi, _sumI_0); in lstm() 548 _sumI_0 = _mm256_fmadd_ps(_mm256_loadu_ps(weight_hc_I_0), h_cont, _sumI_0); in lstm() 567 …_mm256_storeu_ps(sums, HorizontalSums(_sumI_0, _sumF_0, _sumO_0, _sumG_0, _sumI_1, _sumF_1, _sumO_… in lstm()
|