1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "blis1.h"
12
13 /*
14 Effective computation:
15
16 rho_xz = beta * rho_xz + x * z;
17 rho_yz = beta * rho_yz + y * z;
18
19 where x and y are optionally conjugated.
20 */
21
zdotc_(int * n,dcomplex * x,int * inc_x,dcomplex * z,int * inc_z)22 dcomplex zdotc_( int* n,
23 dcomplex* x, int* inc_x,
24 dcomplex* z, int* inc_z )
25 {
26 dcomplex* restrict x1;
27 dcomplex* restrict z1;
28 int i;
29 v2df_t rho1v;
30 v2df_t z11v, z12v;
31 v2df_t x1v, x1rv;
32 dcomplex rho;
33 int n1 = *n;
34 int incx = *inc_x;
35 int incz = *inc_z;
36
37 x1 = x;
38 z1 = z;
39
40 rho1v.v = _mm_setzero_pd();
41
42 {
43 v2df_t bcac, adbd;
44
45 for ( i = 0; i < n1; ++i )
46 {
47 z11v.v = _mm_loaddup_pd( ( double* )&(z1->real) );
48 z12v.v = _mm_loaddup_pd( ( double* )&(z1->imag) );
49
50 x1v.v = _mm_load_pd( ( double* )x1 );
51 x1rv.v = _mm_shuffle_pd( x1v.v, x1v.v, _MM_SHUFFLE2 (0,1) );
52 bcac.v = x1rv.v * z11v.v;
53 adbd.v = x1v.v * z12v.v;
54 rho1v.v = rho1v.v + _mm_addsub_pd( bcac.v, adbd.v );
55
56 x1 += incx;
57 z1 += incz;
58 }
59
60 rho1v.v = _mm_shuffle_pd( rho1v.v, rho1v.v, _MM_SHUFFLE2 (0,1) );
61
62 rho1v.d[1] = -rho1v.d[1];
63 }
64
65 rho.real = rho1v.d[0];
66 rho.imag = rho1v.d[1];
67
68 return rho;
69 }
70
71