1 #include <blas_extended.h>
2 #include <blas_extended_private.h>
3 #include <blas_fpu.h>
BLAS_zhemv2_z_d(enum blas_order_type order,enum blas_uplo_type uplo,int n,const void * alpha,const void * a,int lda,const double * x_head,const double * x_tail,int incx,const void * beta,const double * y,int incy)4 void BLAS_zhemv2_z_d(enum blas_order_type order, enum blas_uplo_type uplo,
5 		     int n, const void *alpha, const void *a, int lda,
6 		     const double *x_head, const double *x_tail, int incx,
7 		     const void *beta, const double *y, int incy)
8 
9 /*
10  * Purpose
11  * =======
12  *
13  * This routines computes the matrix product:
14  *
15  *     y  <-  alpha * A * (x_head + x_tail) + beta * y
16  *
17  * where A is a complex Hermitian matrix.
18  *
19  * Arguments
20  * =========
21  *
22  * order   (input) enum blas_order_type
23  *         Storage format of input symmetric matrix A.
24  *
25  * uplo    (input) enum blas_uplo_type
26  *         Determines which half of matrix A (upper or lower triangle)
27  *         is accessed.
28  *
29  * n       (input) int
30  *         Dimension of A and size of vectors x, y.
31  *
32  * alpha   (input) const void*
33  *
34  * a       (input) void*
35  *         Matrix A.
36  *
37  * lda     (input) int
38  *         Leading dimension of matrix A.
39  *
40  * x_head  (input) double*
41  *         Vector x_head
42  *
43  * x_tail  (input) double*
44  *         Vector x_tail
45  *
46  * incx    (input) int
47  *         Stride for vector x.
48  *
49  * beta    (input) const void*
50  *
51  * y       (input) double*
52  *         Vector y.
53  *
54  * incy    (input) int
55  *         Stride for vector y.
56  *
57  */
58 {
59   /* Routine name */
60   const char routine_name[] = "BLAS_zhemv2_z_d";
61 
62   int i, j;
63   int xi, yi, xi0, yi0;
64   int aij, ai;
65   int incai;
66   int incaij, incaij2;
67 
68   const double *a_i = (double *) a;
69   const double *x_head_i = x_head;
70   const double *x_tail_i = x_tail;
71   double *y_i = (double *) y;
72   double *alpha_i = (double *) alpha;
73   double *beta_i = (double *) beta;
74   double a_elem[2];
75   double x_elem;
76   double y_elem[2];
77   double diag_elem;
78   double prod1[2];
79   double prod2[2];
80   double sum1[2];
81   double sum2[2];
82   double tmp1[2];
83   double tmp2[2];
84   double tmp3[2];
85 
86 
87 
88   /* Test for no-op */
89   if (n <= 0) {
90     return;
91   }
92   if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
93       && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
94     return;
95   }
96 
97   /* Check for error conditions. */
98   if (n < 0) {
99     BLAS_error(routine_name, -3, n, NULL);
100   }
101   if (lda < n) {
102     BLAS_error(routine_name, -6, n, NULL);
103   }
104   if (incx == 0) {
105     BLAS_error(routine_name, -9, incx, NULL);
106   }
107   if (incy == 0) {
108     BLAS_error(routine_name, -12, incy, NULL);
109   }
110 
111   if ((order == blas_colmajor && uplo == blas_upper) ||
112       (order == blas_rowmajor && uplo == blas_lower)) {
113     incai = lda;
114     incaij = 1;
115     incaij2 = lda;
116   } else {
117     incai = 1;
118     incaij = lda;
119     incaij2 = 1;
120   }
121 
122 
123   incy *= 2;
124   incai *= 2;
125   incaij *= 2;
126   incaij2 *= 2;
127   xi0 = (incx > 0) ? 0 : ((-n + 1) * incx);
128   yi0 = (incy > 0) ? 0 : ((-n + 1) * incy);
129 
130 
131 
132   /* The most general form,   y <--- alpha * A * (x_head + x_tail) + beta * y   */
133   if (uplo == blas_lower) {
134     for (i = 0, yi = yi0, ai = 0; i < n; i++, yi += incy, ai += incai) {
135       sum1[0] = sum1[1] = 0.0;
136       sum2[0] = sum2[1] = 0.0;
137       for (j = 0, aij = ai, xi = xi0; j < i; j++, aij += incaij, xi += incx) {
138 	a_elem[0] = a_i[aij];
139 	a_elem[1] = a_i[aij + 1];
140 	x_elem = x_head_i[xi];
141 	{
142 	  prod1[0] = a_elem[0] * x_elem;
143 	  prod1[1] = a_elem[1] * x_elem;
144 	}
145 	sum1[0] = sum1[0] + prod1[0];
146 	sum1[1] = sum1[1] + prod1[1];
147 	x_elem = x_tail_i[xi];
148 	{
149 	  prod2[0] = a_elem[0] * x_elem;
150 	  prod2[1] = a_elem[1] * x_elem;
151 	}
152 	sum2[0] = sum2[0] + prod2[0];
153 	sum2[1] = sum2[1] + prod2[1];
154       }
155 
156       diag_elem = a_i[aij];
157       x_elem = x_head_i[xi];
158       prod1[0] = x_elem * diag_elem;
159       prod1[1] = 0.0;
160       sum1[0] = sum1[0] + prod1[0];
161       sum1[1] = sum1[1] + prod1[1];
162       x_elem = x_tail_i[xi];
163       prod2[0] = x_elem * diag_elem;
164       prod2[1] = 0.0;
165       sum2[0] = sum2[0] + prod2[0];
166       sum2[1] = sum2[1] + prod2[1];
167       j++;
168       aij += incaij2;
169       xi += incx;
170 
171       for (; j < n; j++, aij += incaij2, xi += incx) {
172 	a_elem[0] = a_i[aij];
173 	a_elem[1] = a_i[aij + 1];
174 	a_elem[1] = -a_elem[1];
175 	x_elem = x_head_i[xi];
176 	{
177 	  prod1[0] = a_elem[0] * x_elem;
178 	  prod1[1] = a_elem[1] * x_elem;
179 	}
180 	sum1[0] = sum1[0] + prod1[0];
181 	sum1[1] = sum1[1] + prod1[1];
182 	x_elem = x_tail_i[xi];
183 	{
184 	  prod2[0] = a_elem[0] * x_elem;
185 	  prod2[1] = a_elem[1] * x_elem;
186 	}
187 	sum2[0] = sum2[0] + prod2[0];
188 	sum2[1] = sum2[1] + prod2[1];
189       }
190       sum1[0] = sum1[0] + sum2[0];
191       sum1[1] = sum1[1] + sum2[1];
192       {
193 	tmp1[0] =
194 	  (double) sum1[0] * alpha_i[0] - (double) sum1[1] * alpha_i[1];
195 	tmp1[1] =
196 	  (double) sum1[0] * alpha_i[1] + (double) sum1[1] * alpha_i[0];
197       }
198       y_elem[0] = y_i[yi];
199       y_elem[1] = y_i[yi + 1];
200       {
201 	tmp2[0] =
202 	  (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1];
203 	tmp2[1] =
204 	  (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0];
205       }
206       tmp3[0] = tmp1[0] + tmp2[0];
207       tmp3[1] = tmp1[1] + tmp2[1];
208       y_i[yi] = tmp3[0];
209       y_i[yi + 1] = tmp3[1];
210     }
211   } else {
212     /* uplo == blas_upper */
213     for (i = 0, yi = yi0, ai = 0; i < n; i++, yi += incy, ai += incai) {
214       sum1[0] = sum1[1] = 0.0;
215       sum2[0] = sum2[1] = 0.0;
216 
217       for (j = 0, aij = ai, xi = xi0; j < i; j++, aij += incaij, xi += incx) {
218 	a_elem[0] = a_i[aij];
219 	a_elem[1] = a_i[aij + 1];
220 	a_elem[1] = -a_elem[1];
221 	x_elem = x_head_i[xi];
222 	{
223 	  prod1[0] = a_elem[0] * x_elem;
224 	  prod1[1] = a_elem[1] * x_elem;
225 	}
226 	sum1[0] = sum1[0] + prod1[0];
227 	sum1[1] = sum1[1] + prod1[1];
228 	x_elem = x_tail_i[xi];
229 	{
230 	  prod2[0] = a_elem[0] * x_elem;
231 	  prod2[1] = a_elem[1] * x_elem;
232 	}
233 	sum2[0] = sum2[0] + prod2[0];
234 	sum2[1] = sum2[1] + prod2[1];
235       }
236 
237       diag_elem = a_i[aij];
238       x_elem = x_head_i[xi];
239       prod1[0] = x_elem * diag_elem;
240       prod1[1] = 0.0;
241       sum1[0] = sum1[0] + prod1[0];
242       sum1[1] = sum1[1] + prod1[1];
243       x_elem = x_tail_i[xi];
244       prod2[0] = x_elem * diag_elem;
245       prod2[1] = 0.0;
246       sum2[0] = sum2[0] + prod2[0];
247       sum2[1] = sum2[1] + prod2[1];
248       j++;
249       aij += incaij2;
250       xi += incx;
251 
252       for (; j < n; j++, aij += incaij2, xi += incx) {
253 	a_elem[0] = a_i[aij];
254 	a_elem[1] = a_i[aij + 1];
255 	x_elem = x_head_i[xi];
256 	{
257 	  prod1[0] = a_elem[0] * x_elem;
258 	  prod1[1] = a_elem[1] * x_elem;
259 	}
260 	sum1[0] = sum1[0] + prod1[0];
261 	sum1[1] = sum1[1] + prod1[1];
262 	x_elem = x_tail_i[xi];
263 	{
264 	  prod2[0] = a_elem[0] * x_elem;
265 	  prod2[1] = a_elem[1] * x_elem;
266 	}
267 	sum2[0] = sum2[0] + prod2[0];
268 	sum2[1] = sum2[1] + prod2[1];
269       }
270       sum1[0] = sum1[0] + sum2[0];
271       sum1[1] = sum1[1] + sum2[1];
272       {
273 	tmp1[0] =
274 	  (double) sum1[0] * alpha_i[0] - (double) sum1[1] * alpha_i[1];
275 	tmp1[1] =
276 	  (double) sum1[0] * alpha_i[1] + (double) sum1[1] * alpha_i[0];
277       }
278       y_elem[0] = y_i[yi];
279       y_elem[1] = y_i[yi + 1];
280       {
281 	tmp2[0] =
282 	  (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1];
283 	tmp2[1] =
284 	  (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0];
285       }
286       tmp3[0] = tmp1[0] + tmp2[0];
287       tmp3[1] = tmp1[1] + tmp2[1];
288       y_i[yi] = tmp3[0];
289       y_i[yi + 1] = tmp3[1];
290     }
291   }
292 
293 
294 
295 }				/* end BLAS_zhemv2_z_d */
296