1 #include <blas_extended.h>
2 #include <blas_extended_private.h>
3 #include <blas_fpu.h>
BLAS_zhemv2_z_d(enum blas_order_type order,enum blas_uplo_type uplo,int n,const void * alpha,const void * a,int lda,const double * x_head,const double * x_tail,int incx,const void * beta,const double * y,int incy)4 void BLAS_zhemv2_z_d(enum blas_order_type order, enum blas_uplo_type uplo,
5 int n, const void *alpha, const void *a, int lda,
6 const double *x_head, const double *x_tail, int incx,
7 const void *beta, const double *y, int incy)
8
9 /*
10 * Purpose
11 * =======
12 *
13 * This routines computes the matrix product:
14 *
15 * y <- alpha * A * (x_head + x_tail) + beta * y
16 *
17 * where A is a complex Hermitian matrix.
18 *
19 * Arguments
20 * =========
21 *
22 * order (input) enum blas_order_type
23 * Storage format of input symmetric matrix A.
24 *
25 * uplo (input) enum blas_uplo_type
26 * Determines which half of matrix A (upper or lower triangle)
27 * is accessed.
28 *
29 * n (input) int
30 * Dimension of A and size of vectors x, y.
31 *
32 * alpha (input) const void*
33 *
34 * a (input) void*
35 * Matrix A.
36 *
37 * lda (input) int
38 * Leading dimension of matrix A.
39 *
40 * x_head (input) double*
41 * Vector x_head
42 *
43 * x_tail (input) double*
44 * Vector x_tail
45 *
46 * incx (input) int
47 * Stride for vector x.
48 *
49 * beta (input) const void*
50 *
51 * y (input) double*
52 * Vector y.
53 *
54 * incy (input) int
55 * Stride for vector y.
56 *
57 */
58 {
59 /* Routine name */
60 const char routine_name[] = "BLAS_zhemv2_z_d";
61
62 int i, j;
63 int xi, yi, xi0, yi0;
64 int aij, ai;
65 int incai;
66 int incaij, incaij2;
67
68 const double *a_i = (double *) a;
69 const double *x_head_i = x_head;
70 const double *x_tail_i = x_tail;
71 double *y_i = (double *) y;
72 double *alpha_i = (double *) alpha;
73 double *beta_i = (double *) beta;
74 double a_elem[2];
75 double x_elem;
76 double y_elem[2];
77 double diag_elem;
78 double prod1[2];
79 double prod2[2];
80 double sum1[2];
81 double sum2[2];
82 double tmp1[2];
83 double tmp2[2];
84 double tmp3[2];
85
86
87
88 /* Test for no-op */
89 if (n <= 0) {
90 return;
91 }
92 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
93 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
94 return;
95 }
96
97 /* Check for error conditions. */
98 if (n < 0) {
99 BLAS_error(routine_name, -3, n, NULL);
100 }
101 if (lda < n) {
102 BLAS_error(routine_name, -6, n, NULL);
103 }
104 if (incx == 0) {
105 BLAS_error(routine_name, -9, incx, NULL);
106 }
107 if (incy == 0) {
108 BLAS_error(routine_name, -12, incy, NULL);
109 }
110
111 if ((order == blas_colmajor && uplo == blas_upper) ||
112 (order == blas_rowmajor && uplo == blas_lower)) {
113 incai = lda;
114 incaij = 1;
115 incaij2 = lda;
116 } else {
117 incai = 1;
118 incaij = lda;
119 incaij2 = 1;
120 }
121
122
123 incy *= 2;
124 incai *= 2;
125 incaij *= 2;
126 incaij2 *= 2;
127 xi0 = (incx > 0) ? 0 : ((-n + 1) * incx);
128 yi0 = (incy > 0) ? 0 : ((-n + 1) * incy);
129
130
131
132 /* The most general form, y <--- alpha * A * (x_head + x_tail) + beta * y */
133 if (uplo == blas_lower) {
134 for (i = 0, yi = yi0, ai = 0; i < n; i++, yi += incy, ai += incai) {
135 sum1[0] = sum1[1] = 0.0;
136 sum2[0] = sum2[1] = 0.0;
137 for (j = 0, aij = ai, xi = xi0; j < i; j++, aij += incaij, xi += incx) {
138 a_elem[0] = a_i[aij];
139 a_elem[1] = a_i[aij + 1];
140 x_elem = x_head_i[xi];
141 {
142 prod1[0] = a_elem[0] * x_elem;
143 prod1[1] = a_elem[1] * x_elem;
144 }
145 sum1[0] = sum1[0] + prod1[0];
146 sum1[1] = sum1[1] + prod1[1];
147 x_elem = x_tail_i[xi];
148 {
149 prod2[0] = a_elem[0] * x_elem;
150 prod2[1] = a_elem[1] * x_elem;
151 }
152 sum2[0] = sum2[0] + prod2[0];
153 sum2[1] = sum2[1] + prod2[1];
154 }
155
156 diag_elem = a_i[aij];
157 x_elem = x_head_i[xi];
158 prod1[0] = x_elem * diag_elem;
159 prod1[1] = 0.0;
160 sum1[0] = sum1[0] + prod1[0];
161 sum1[1] = sum1[1] + prod1[1];
162 x_elem = x_tail_i[xi];
163 prod2[0] = x_elem * diag_elem;
164 prod2[1] = 0.0;
165 sum2[0] = sum2[0] + prod2[0];
166 sum2[1] = sum2[1] + prod2[1];
167 j++;
168 aij += incaij2;
169 xi += incx;
170
171 for (; j < n; j++, aij += incaij2, xi += incx) {
172 a_elem[0] = a_i[aij];
173 a_elem[1] = a_i[aij + 1];
174 a_elem[1] = -a_elem[1];
175 x_elem = x_head_i[xi];
176 {
177 prod1[0] = a_elem[0] * x_elem;
178 prod1[1] = a_elem[1] * x_elem;
179 }
180 sum1[0] = sum1[0] + prod1[0];
181 sum1[1] = sum1[1] + prod1[1];
182 x_elem = x_tail_i[xi];
183 {
184 prod2[0] = a_elem[0] * x_elem;
185 prod2[1] = a_elem[1] * x_elem;
186 }
187 sum2[0] = sum2[0] + prod2[0];
188 sum2[1] = sum2[1] + prod2[1];
189 }
190 sum1[0] = sum1[0] + sum2[0];
191 sum1[1] = sum1[1] + sum2[1];
192 {
193 tmp1[0] =
194 (double) sum1[0] * alpha_i[0] - (double) sum1[1] * alpha_i[1];
195 tmp1[1] =
196 (double) sum1[0] * alpha_i[1] + (double) sum1[1] * alpha_i[0];
197 }
198 y_elem[0] = y_i[yi];
199 y_elem[1] = y_i[yi + 1];
200 {
201 tmp2[0] =
202 (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1];
203 tmp2[1] =
204 (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0];
205 }
206 tmp3[0] = tmp1[0] + tmp2[0];
207 tmp3[1] = tmp1[1] + tmp2[1];
208 y_i[yi] = tmp3[0];
209 y_i[yi + 1] = tmp3[1];
210 }
211 } else {
212 /* uplo == blas_upper */
213 for (i = 0, yi = yi0, ai = 0; i < n; i++, yi += incy, ai += incai) {
214 sum1[0] = sum1[1] = 0.0;
215 sum2[0] = sum2[1] = 0.0;
216
217 for (j = 0, aij = ai, xi = xi0; j < i; j++, aij += incaij, xi += incx) {
218 a_elem[0] = a_i[aij];
219 a_elem[1] = a_i[aij + 1];
220 a_elem[1] = -a_elem[1];
221 x_elem = x_head_i[xi];
222 {
223 prod1[0] = a_elem[0] * x_elem;
224 prod1[1] = a_elem[1] * x_elem;
225 }
226 sum1[0] = sum1[0] + prod1[0];
227 sum1[1] = sum1[1] + prod1[1];
228 x_elem = x_tail_i[xi];
229 {
230 prod2[0] = a_elem[0] * x_elem;
231 prod2[1] = a_elem[1] * x_elem;
232 }
233 sum2[0] = sum2[0] + prod2[0];
234 sum2[1] = sum2[1] + prod2[1];
235 }
236
237 diag_elem = a_i[aij];
238 x_elem = x_head_i[xi];
239 prod1[0] = x_elem * diag_elem;
240 prod1[1] = 0.0;
241 sum1[0] = sum1[0] + prod1[0];
242 sum1[1] = sum1[1] + prod1[1];
243 x_elem = x_tail_i[xi];
244 prod2[0] = x_elem * diag_elem;
245 prod2[1] = 0.0;
246 sum2[0] = sum2[0] + prod2[0];
247 sum2[1] = sum2[1] + prod2[1];
248 j++;
249 aij += incaij2;
250 xi += incx;
251
252 for (; j < n; j++, aij += incaij2, xi += incx) {
253 a_elem[0] = a_i[aij];
254 a_elem[1] = a_i[aij + 1];
255 x_elem = x_head_i[xi];
256 {
257 prod1[0] = a_elem[0] * x_elem;
258 prod1[1] = a_elem[1] * x_elem;
259 }
260 sum1[0] = sum1[0] + prod1[0];
261 sum1[1] = sum1[1] + prod1[1];
262 x_elem = x_tail_i[xi];
263 {
264 prod2[0] = a_elem[0] * x_elem;
265 prod2[1] = a_elem[1] * x_elem;
266 }
267 sum2[0] = sum2[0] + prod2[0];
268 sum2[1] = sum2[1] + prod2[1];
269 }
270 sum1[0] = sum1[0] + sum2[0];
271 sum1[1] = sum1[1] + sum2[1];
272 {
273 tmp1[0] =
274 (double) sum1[0] * alpha_i[0] - (double) sum1[1] * alpha_i[1];
275 tmp1[1] =
276 (double) sum1[0] * alpha_i[1] + (double) sum1[1] * alpha_i[0];
277 }
278 y_elem[0] = y_i[yi];
279 y_elem[1] = y_i[yi + 1];
280 {
281 tmp2[0] =
282 (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1];
283 tmp2[1] =
284 (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0];
285 }
286 tmp3[0] = tmp1[0] + tmp2[0];
287 tmp3[1] = tmp1[1] + tmp2[1];
288 y_i[yi] = tmp3[0];
289 y_i[yi + 1] = tmp3[1];
290 }
291 }
292
293
294
295 } /* end BLAS_zhemv2_z_d */
296