1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #include "blis1.h"
12 
13 /*
14    Effective computation:
15 
16      rho_xz = beta * rho_xz + x * z;
17      rho_yz = beta * rho_yz + y * z;
18 
19    where x and y are optionally conjugated.
20 */
21 
zdotc_(int * n,dcomplex * x,int * inc_x,dcomplex * z,int * inc_z)22 dcomplex zdotc_( int*      n,
23                  dcomplex* x, int* inc_x,
24                  dcomplex* z, int* inc_z )
25 {
26 	dcomplex* restrict x1;
27 	dcomplex* restrict z1;
28 	int                i;
29 	v2df_t rho1v;
30 	v2df_t z11v, z12v;
31 	v2df_t x1v, x1rv;
32 	dcomplex rho;
33 	int    n1 = *n;
34 	int    incx = *inc_x;
35 	int    incz = *inc_z;
36 
37 	x1 = x;
38 	z1 = z;
39 
40 	rho1v.v = _mm_setzero_pd();
41 
42 	{
43 		v2df_t bcac, adbd;
44 
45 		for ( i = 0; i < n1; ++i )
46 		{
47 			z11v.v = _mm_loaddup_pd( ( double* )&(z1->real) );
48 			z12v.v = _mm_loaddup_pd( ( double* )&(z1->imag) );
49 
50 			x1v.v  = _mm_load_pd( ( double* )x1 );
51 			x1rv.v = _mm_shuffle_pd( x1v.v, x1v.v, _MM_SHUFFLE2 (0,1) );
52 			bcac.v = x1rv.v * z11v.v;
53 			adbd.v = x1v.v  * z12v.v;
54 			rho1v.v = rho1v.v + _mm_addsub_pd( bcac.v, adbd.v );
55 
56 			x1 += incx;
57 			z1 += incz;
58 		}
59 
60 		rho1v.v = _mm_shuffle_pd( rho1v.v, rho1v.v, _MM_SHUFFLE2 (0,1) );
61 
62 		rho1v.d[1] = -rho1v.d[1];
63 	}
64 
65 	rho.real = rho1v.d[0];
66 	rho.imag = rho1v.d[1];
67 
68 	return rho;
69 }
70 
71