Lines Matching refs:_sum

250                     __m256 _sum = _mm256_set1_ps(0.f);  in forward()  local
254 _sum = _mm256_loadu_ps((const float*)bias_data + p * 8); in forward()
270 _sum = _mm256_fmadd_ps(_val0, _w0, _sum); in forward()
272 _sum = _mm256_fmadd_ps(_val1, _w1, _sum); in forward()
274 _sum = _mm256_fmadd_ps(_val2, _w2, _sum); in forward()
276 _sum = _mm256_fmadd_ps(_val3, _w3, _sum); in forward()
278 _sum = _mm256_fmadd_ps(_val4, _w4, _sum); in forward()
280 _sum = _mm256_fmadd_ps(_val5, _w5, _sum); in forward()
282 _sum = _mm256_fmadd_ps(_val6, _w6, _sum); in forward()
284 _sum = _mm256_fmadd_ps(_val7, _w7, _sum); in forward()
297 _sum = _mm256_fmadd_ps(_val0, _w0, _sum); in forward()
299 _sum = _mm256_fmadd_ps(_val1, _w1, _sum); in forward()
301 _sum = _mm256_fmadd_ps(_val2, _w2, _sum); in forward()
303 _sum = _mm256_fmadd_ps(_val3, _w3, _sum); in forward()
312 _sum = _mm256_fmadd_ps(_val, _w, _sum); in forward()
318 _sum = activation_avx(_sum, activation_type, activation_params); in forward()
320 _mm256_storeu_ps(outptr, _sum); in forward()
630 __m128 _sum = _mm_set1_ps(0.f); in forward() local
634 _sum = _mm_loadu_ps((const float*)bias_data + p * 4); in forward()
651 _sum = _mm_fmadd_ps(_val0, _w0, _sum); in forward()
653 _sum = _mm_fmadd_ps(_val1, _w1, _sum); in forward()
655 _sum = _mm_fmadd_ps(_val2, _w2, _sum); in forward()
657 _sum = _mm_fmadd_ps(_val3, _w3, _sum); in forward()
659 _sum = _mm_fmadd_ps(_val4, _w4, _sum); in forward()
661 _sum = _mm_fmadd_ps(_val5, _w5, _sum); in forward()
663 _sum = _mm_fmadd_ps(_val6, _w6, _sum); in forward()
665 _sum = _mm_fmadd_ps(_val7, _w7, _sum); in forward()
679 _sum = _mm_add_ps(_mm_mul_ps(_val0, _w0), _sum); in forward()
681 _sum = _mm_add_ps(_mm_mul_ps(_val1, _w1), _sum); in forward()
683 _sum = _mm_add_ps(_mm_mul_ps(_val2, _w2), _sum); in forward()
685 _sum = _mm_add_ps(_mm_mul_ps(_val3, _w3), _sum); in forward()
694 _sum = _mm_add_ps(_mm_mul_ps(_val, _k), _sum); in forward()
700 _sum = activation_sse(_sum, activation_type, activation_params); in forward()
702 _mm_storeu_ps(outptr, _sum); in forward()
804 __m256 _sum = _mm256_set1_ps(0.f); in forward() local
809 _sum = _mm256_fmadd_ps(_m, _w, _sum); in forward()
833 sum += _mm256_reduce_add_ps(_sum); in forward()
1333 __m256 _sum = _mm256_set1_ps(0.f); in forward() local
1339 _sum = _mm256_fmadd_ps(_m, _w, _sum); in forward()
1366 sum += _mm256_reduce_add_ps(_sum); in forward()
1657 __m256 _sum = _mm256_set1_ps(0.f); in forward_fp16() local
1668 _sum = _mm256_fmadd_ps(_m, _w, _sum); in forward_fp16()
1689 _sum = _mm256_fmadd_ps(_m, _w, _sum); in forward_fp16()
1692 sum += _mm256_reduce_add_ps(_sum); in forward_fp16()