/dports/math/libflame/libflame-5.2.0/examples/hevd_test/ |
H A D | zdotc.c | 29 v2df_t rho1v; in zdotc_() local 40 rho1v.v = _mm_setzero_pd(); in zdotc_() 54 rho1v.v = rho1v.v + _mm_addsub_pd( bcac.v, adbd.v ); in zdotc_() 60 rho1v.v = _mm_shuffle_pd( rho1v.v, rho1v.v, _MM_SHUFFLE2 (0,1) ); in zdotc_() 62 rho1v.d[1] = -rho1v.d[1]; in zdotc_() 65 rho.real = rho1v.d[0]; in zdotc_() 66 rho.imag = rho1v.d[1]; in zdotc_()
|
/dports/math/libflame/libflame-5.2.0/examples/appg_test/ |
H A D | zdotc.c | 29 v2df_t rho1v; in zdotc_() local 40 rho1v.v = _mm_setzero_pd(); in zdotc_() 54 rho1v.v = rho1v.v + _mm_addsub_pd( bcac.v, adbd.v ); in zdotc_() 60 rho1v.v = _mm_shuffle_pd( rho1v.v, rho1v.v, _MM_SHUFFLE2 (0,1) ); in zdotc_() 62 rho1v.d[1] = -rho1v.d[1]; in zdotc_() 65 rho.real = rho1v.d[0]; in zdotc_() 66 rho.imag = rho1v.d[1]; in zdotc_()
|
/dports/math/blis/blis-0.8.1/kernels/zen/1f/ |
H A D | bli_dotxf_zen_int_8.c | 162 v8sf_t rho1v; rho1v.v = _mm256_setzero_ps(); in bli_sdotxf_zen_int_8() local 213 rho1 += rho1v.f[0] + rho1v.f[1] + rho1v.f[2] + rho1v.f[3] + in bli_sdotxf_zen_int_8() 214 rho1v.f[4] + rho1v.f[5] + rho1v.f[6] + rho1v.f[7]; in bli_sdotxf_zen_int_8() 248 rho1 = rho1v.f[0] + rho1v.f[4]; in bli_sdotxf_zen_int_8() 276 v8sf_t rho1v; rho1v.v = _mm256_setzero_ps(); in bli_sdotxf_zen_int_8() local 552 v4df_t rho1v; rho1v.v = _mm256_setzero_pd(); in bli_ddotxf_zen_int_8() local 602 rho1 += rho1v.d[0] + rho1v.d[1] + rho1v.d[2] + rho1v.d[3]; in bli_ddotxf_zen_int_8() 613 rho1v.v = _mm256_hadd_pd( rho1v.v, rho1v.v ); in bli_ddotxf_zen_int_8() 623 rho1 = rho1v.d[0] + rho1v.d[2]; in bli_ddotxf_zen_int_8() 651 v4df_t rho1v; rho1v.v = _mm256_setzero_pd(); in bli_ddotxf_zen_int_8() local [all …]
|
/dports/math/libflame/libflame-5.2.0/src/base/flamec/blis/fused/ |
H A D | bl1_dotsv2.c | 97 rho1v.v = _mm_setzero_pd(); in bl1_ddotsv2() 110 rho1v.v += x1v.v * z1v.v; in bl1_ddotsv2() 113 rho1v.v += x2v.v * z2v.v; in bl1_ddotsv2() 121 rho1 += rho1v.d[0] + rho1v.d[1]; in bl1_ddotsv2() 262 v2df_t r1v, rho1v; in bl1_zdotsv2() local 272 rho1v.v = _mm_setzero_pd(); in bl1_zdotsv2() 288 rho1v.v = rho1v.v + _mm_addsub_pd( bcac.v, adbd.v ); in bl1_zdotsv2() 301 rho1v.v = _mm_shuffle_pd( rho1v.v, rho1v.v, _MM_SHUFFLE2 (0,1) ); in bl1_zdotsv2() 304 rho1v.d[1] = -rho1v.d[1]; in bl1_zdotsv2() 320 rho1v.v = rho1v.v + _mm_addsub_pd( cada.v, dbcb.v ); in bl1_zdotsv2() [all …]
|
H A D | bl1_dotaxpy.c | 49 v2df_t k1v, rho1v; in bl1_ddotaxpy() local 92 rho1v.v = _mm_setzero_pd(); in bl1_ddotaxpy() 106 rho1v.v += a1v.v * x1v.v; in bl1_ddotaxpy() 111 rho1v.v += a2v.v * x2v.v; in bl1_ddotaxpy() 141 rho_c += rho1v.d[0] + rho1v.d[1]; in bl1_ddotaxpy() 272 v2df_t rho1v; in bl1_zdotaxpy() local 290 rho1v.v = _mm_setzero_pd(); in bl1_zdotaxpy() 304 rho1v.v = rho1v.v + _mm_addsub_pd( adac.v, bcbd.v ); in bl1_zdotaxpy() 323 rho1v.v = _mm_shuffle_pd( rho1v.v, rho1v.v, _MM_SHUFFLE2 (0,1) ); in bl1_zdotaxpy() 327 _mm_store_pd( ( double* )rho, rho1v.v ); in bl1_zdotaxpy()
|
H A D | bl1_dotaxmyv2.c | 56 v2df_t rho1v; in bl1_ddotaxmyv2() local 104 rho1v.v = _mm_setzero_pd(); in bl1_ddotaxmyv2() 113 rho1v.v += x1v.v * u1v.v; in bl1_ddotaxmyv2() 126 rho_c += rho1v.d[0] + rho1v.d[1]; in bl1_ddotaxmyv2() 294 v2df_t rho1v; in bl1_zdotaxmyv2() local 314 rho1v.v = _mm_setzero_pd(); in bl1_zdotaxmyv2() 353 rho1v.v = rho1v.v + _mm_addsub_pd( bcac.v, adbd.v ); in bl1_zdotaxmyv2() 361 rho1v.v = _mm_shuffle_pd( rho1v.v, rho1v.v, _MM_SHUFFLE2 (0,1) ); in bl1_zdotaxmyv2() 363 rho1v.d[1] = -rho1v.d[1]; in bl1_zdotaxmyv2() 365 _mm_store_pd( ( double* )rho, rho1v.v ); in bl1_zdotaxmyv2()
|
H A D | bl1_dotsv3.c | 110 rho1v.v = _mm_setzero_pd(); in bl1_ddotsv3() 121 rho1v.v += x1v.v * z1v.v; in bl1_ddotsv3() 130 rho1v.v += x2v.v * z2v.v; in bl1_ddotsv3() 140 rho1 += rho1v.d[0] + rho1v.d[1]; in bl1_ddotsv3() 307 v2df_t r1v, rho1v; in bl1_zdotsv3() local 320 rho1v.v = _mm_setzero_pd(); in bl1_zdotsv3() 337 rho1v.v = rho1v.v + _mm_addsub_pd( bcac.v, adbd.v ); in bl1_zdotsv3() 357 rho1v.v = _mm_shuffle_pd( rho1v.v, rho1v.v, _MM_SHUFFLE2 (0,1) ); in bl1_zdotsv3() 361 rho1v.d[1] = -rho1v.d[1]; in bl1_zdotsv3() 378 rho1v.v = rho1v.v + _mm_addsub_pd( cada.v, dbcb.v ); in bl1_zdotsv3() [all …]
|
H A D | bl1_dotv2axpyv2b.c | 59 v2df_t k1v, rho1v; in bl1_ddotv2axpyv2b() local 113 rho1v.v = _mm_setzero_pd(); in bl1_ddotv2axpyv2b() 126 rho1v.v += a11v.v * x1v.v; in bl1_ddotv2axpyv2b() 139 rho1v.v += a21v.v * x2v.v; in bl1_ddotv2axpyv2b() 179 rho1_c += rho1v.d[0] + rho1v.d[1]; in bl1_ddotv2axpyv2b() 350 v2df_t rho1v; in bl1_zdotv2axpyv2b() local 369 rho1v.v = _mm_setzero_pd(); in bl1_zdotv2axpyv2b() 395 rho1v.v = rho1v.v + _mm_addsub_pd( adac.v, bcbd.v ); in bl1_zdotv2axpyv2b() 436 rho1v.v = _mm_shuffle_pd( rho1v.v, rho1v.v, _MM_SHUFFLE2 (0,1) ); in bl1_zdotv2axpyv2b() 441 _mm_store_pd( ( double* )rho1, rho1v.v ); in bl1_zdotv2axpyv2b()
|
H A D | bl1_axpyv2bdotaxpy.c | 364 v2df_t rho1v; in bl1_zaxpyv2bdotaxpy() local 399 rho1v.v = _mm_setzero_pd(); in bl1_zaxpyv2bdotaxpy() 434 rho1v.v = rho1v.v + _mm_addsub_pd( adac.v, bcbd.v ); in bl1_zaxpyv2bdotaxpy() 457 rho1v.v = _mm_shuffle_pd( rho1v.v, rho1v.v, _MM_SHUFFLE2 (0,1) ); in bl1_zaxpyv2bdotaxpy() 461 _mm_store_pd( ( double* )rho, rho1v.v ); in bl1_zaxpyv2bdotaxpy()
|
/dports/math/blis/blis-0.8.1/kernels/zen/1/ |
H A D | bli_dotxv_zen_int.c | 81 v8sf_t rho0v, rho1v, rho2v, rho3v; in bli_sdotxv_zen_int() local 121 rho1v.v = _mm256_setzero_ps(); in bli_sdotxv_zen_int() 143 rho1v.v = _mm256_fmadd_ps( x1v.v, y1v.v, rho1v.v ); in bli_sdotxv_zen_int() 152 rho0v.v += rho1v.v; in bli_sdotxv_zen_int() 209 v4df_t rho0v, rho1v, rho2v, rho3v; in bli_ddotxv_zen_int() local 249 rho1v.v = _mm256_setzero_pd(); in bli_ddotxv_zen_int() 271 rho1v.v = _mm256_fmadd_pd( x1v.v, y1v.v, rho1v.v ); in bli_ddotxv_zen_int() 280 rho0v.v += rho1v.v; in bli_ddotxv_zen_int()
|
H A D | bli_dotv_zen_int.c | 79 v8sf_t rho0v, rho1v, rho2v, rho3v; in bli_sdotv_zen_int() local 115 rho1v.v = _mm256_setzero_ps(); in bli_sdotv_zen_int() 137 rho1v.v = _mm256_fmadd_ps( x1v.v, y1v.v, rho1v.v ); in bli_sdotv_zen_int() 146 rho0v.v += rho1v.v; in bli_sdotv_zen_int() 201 v4df_t rho0v, rho1v, rho2v, rho3v; in bli_ddotv_zen_int() local 237 rho1v.v = _mm256_setzero_pd(); in bli_ddotv_zen_int() 259 rho1v.v = _mm256_fmadd_pd( x1v.v, y1v.v, rho1v.v ); in bli_ddotv_zen_int() 268 rho0v.v += rho1v.v; in bli_ddotv_zen_int()
|
/dports/math/blis/blis-0.8.1/kernels/penryn/1f/ |
H A D | bli_dotxf_penryn_int.c | 82 v2df_t rho0v, rho1v, rho2v, rho3v; in bli_ddotxf_penryn_int() local 190 rho1v.v = _mm_setzero_pd(); in bli_ddotxf_penryn_int() 203 rho1v.v += x1v.v * y0v.v; in bli_ddotxf_penryn_int() 214 rho1v.v += x1v.v * y0v.v; in bli_ddotxf_penryn_int() 225 rho1v.v += x1v.v * y0v.v; in bli_ddotxf_penryn_int() 236 rho1v.v += x1v.v * y0v.v; in bli_ddotxf_penryn_int() 249 rho1 += rho1v.d[0] + rho1v.d[1]; in bli_ddotxf_penryn_int() 287 rho1v.d[0] = rho0; in bli_ddotxf_penryn_int() 288 rho1v.d[1] = rho1; in bli_ddotxf_penryn_int() 301 rho0v.v += alphav.v * rho1v.v; in bli_ddotxf_penryn_int()
|
H A D | bli_dotxaxpyf_penryn_int.c | 90 v2df_t rho0v, rho1v, rho2v, rho3v; in bli_ddotxaxpyf_penryn_int() local 228 rho1v.v = _mm_setzero_pd(); in bli_ddotxaxpyf_penryn_int() 250 rho1v.v += a01v.v * w1v.v; in bli_ddotxaxpyf_penryn_int() 277 rho1v.v += a11v.v * w2v.v; in bli_ddotxaxpyf_penryn_int() 305 rho1 += rho1v.d[0] + rho1v.d[1]; in bli_ddotxaxpyf_penryn_int() 344 rho1v.d[0] = rho2; in bli_ddotxaxpyf_penryn_int() 345 rho1v.d[1] = rho3; in bli_ddotxaxpyf_penryn_int() 354 psi1v.v = betav.v * psi1v.v + alphav.v * rho1v.v; in bli_ddotxaxpyf_penryn_int()
|
/dports/math/blis/blis-0.8.1/kernels/penryn/1/ |
H A D | bli_dotv_penryn_int.c | 71 v2df_t rho1v; in bli_ddotv_penryn_int() local 141 rho1v.v = _mm_setzero_pd(); in bli_ddotv_penryn_int() 148 rho1v.v += x1v.v * y1v.v; in bli_ddotv_penryn_int() 156 rho1 += rho1v.d[0] + rho1v.d[1]; in bli_ddotv_penryn_int()
|