1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_zhbmv_z_d(enum blas_order_type order,enum blas_uplo_type uplo,int n,int k,const void * alpha,const void * a,int lda,const double * x,int incx,const void * beta,void * y,int incy)3 void BLAS_zhbmv_z_d(enum blas_order_type order,
4 enum blas_uplo_type uplo, int n, int k,
5 const void *alpha, const void *a, int lda,
6 const double *x, int incx, const void *beta,
7 void *y, int incy)
8
9 /*
10 * Purpose
11 * =======
12 *
13 * This routines computes the matrix product:
14 *
15 * y <- alpha * A * x + beta * y
16 *
17 * where A is a hermitian band matrix.
18 *
19 * Arguments
20 * =========
21 *
22 * order (input) enum blas_order_type
23 * Storage format of input hermitian matrix A.
24 *
25 * uplo (input) enum blas_uplo_type
26 * Determines which half of matrix A (upper or lower triangle)
27 * is accessed.
28 *
29 * n (input) int
30 * Dimension of A and size of vectors x, y.
31 *
32 * k (input) int
33 * Number of subdiagonals ( = number of superdiagonals)
34 *
35 * alpha (input) const void*
36 *
37 * a (input) void*
38 * Matrix A.
39 *
40 * lda (input) int
41 * Leading dimension of matrix A.
42 *
43 * x (input) double*
44 * Vector x.
45 *
46 * incx (input) int
47 * Stride for vector x.
48 *
49 * beta (input) const void*
50 *
51 * y (input/output) void*
52 * Vector y.
53 *
54 * incy (input) int
55 * Stride for vector y.
56 *
57 *
58 * Notes on storing a hermitian band matrix:
59 *
60 * Integers in the below arrays represent values of
61 * type complex double.
62 *
63 * if we have a hermitian matrix:
64 *
65 * 1d 2 3 0 0
66 * 2# 4d 5 6 0
67 * 3# 5# 7d 8 9
68 * 0 6# 8# 10d 11
69 * 0 0 9# 11# 12d
70 *
71 * This matrix has n == 5, and k == 2. It can be stored in the
72 * following ways:
73 *
74 * Notes for the examples:
75 * Each column below represents a contigous vector.
76 * Columns are strided by lda.
77 * An asterisk (*) represents a position in the
78 * matrix that is not used.
79 * A pound sign (#) represents the conjugated form is stored
80 * A d following an integer indicates that the imaginary
81 * part of the number is assumed to be zero.
82 * Note that the minimum lda (size of column) is 3 (k+1).
83 * lda may be arbitrarily large; an lda > 3 would mean
84 * there would be unused data at the bottom of the below
85 * columns.
86 *
87 * blas_colmajor and blas_upper:
88 * * * 3 6 9
89 * * 2 5 8 11
90 * 1d 4d 7d 10d 12d
91 *
92 *
93 * blas_colmajor and blas_lower
94 * 1d 4d 7d 10d 12d
95 * 2# 5# 8# 11# *
96 * 3# 6# 9# * *
97 *
98 *
99 * blas_rowmajor and blas_upper
100 * Columns here also represent contigous arrays.
101 * 1d 4d 7d 10d 12d
102 * 2 5 8 11 *
103 * 3 6 9 * *
104 *
105 *
106 * blas_rowmajor and blas_lower
107 * Columns here also represent contigous arrays.
108 * * * 3# 6# 9#
109 * * 2# 5# 8# 11#
110 * 1d 4d 7d 10d 12d
111 *
112 */
113 {
114 /* Routine name */
115 static const char routine_name[] = "BLAS_zhbmv_z_d";
116
117 /* Integer Index Variables */
118 int i, j;
119 int xi, yi;
120 int aij, astarti, x_starti, y_starti;
121 int incaij, incaij2;
122 int n_i;
123 int maxj_first, maxj_second;
124
125 /* Input Matrices */
126 const double *a_i = (double *) a;
127 const double *x_i = x;
128
129 /* Output Vector */
130 double *y_i = (double *) y;
131
132 /* Input Scalars */
133 double *alpha_i = (double *) alpha;
134 double *beta_i = (double *) beta;
135
136 /* Temporary Floating-Point Variables */
137 double a_elem[2];
138 double x_elem;
139 double y_elem[2];
140 double prod[2];
141 double sum[2];
142 double tmp1[2];
143 double tmp2[2];
144
145
146
147 /* Test for no-op */
148 if (n <= 0) {
149 return;
150 }
151 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
152 && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
153 return;
154 }
155
156 /* Check for error conditions. */
157 if (order != blas_colmajor && order != blas_rowmajor) {
158 BLAS_error(routine_name, -1, order, 0);
159 }
160 if (uplo != blas_upper && uplo != blas_lower) {
161 BLAS_error(routine_name, -2, uplo, 0);
162 }
163 if (n < 0) {
164 BLAS_error(routine_name, -3, n, 0);
165 }
166 if (k < 0 || k > n) {
167 BLAS_error(routine_name, -4, k, 0);
168 }
169 if ((lda < k + 1) || (lda < 1)) {
170 BLAS_error(routine_name, -7, lda, 0);
171 }
172 if (incx == 0) {
173 BLAS_error(routine_name, -9, incx, 0);
174 }
175 if (incy == 0) {
176 BLAS_error(routine_name, -12, incy, 0);
177 }
178
179 /* Set Index Parameters */
180 n_i = n;
181
182 if (((uplo == blas_upper) && (order == blas_colmajor)) ||
183 ((uplo == blas_lower) && (order == blas_rowmajor))) {
184 incaij = 1; /* increment in first loop */
185 incaij2 = lda - 1; /* increment in second loop */
186 astarti = k; /* does not start on zero element */
187 } else {
188 incaij = lda - 1;
189 incaij2 = 1;
190 astarti = 0; /* start on first element of array */
191 }
192 /* Adjustment to increments (if any) */
193 incy *= 2;
194 astarti *= 2;
195 incaij *= 2;
196 incaij2 *= 2;
197 if (incx < 0) {
198 x_starti = (-n_i + 1) * incx;
199 } else {
200 x_starti = 0;
201 }
202 if (incy < 0) {
203 y_starti = (-n_i + 1) * incy;
204 } else {
205 y_starti = 0;
206 }
207
208
209
210 /* alpha = 0. In this case, just return beta * y */
211 if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
212 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
213 y_elem[0] = y_i[yi];
214 y_elem[1] = y_i[yi + 1];
215 {
216 tmp1[0] =
217 (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1];
218 tmp1[1] =
219 (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0];
220 }
221 y_i[yi] = tmp1[0];
222 y_i[yi + 1] = tmp1[1];
223 }
224 } else {
225 /* determine the loop interation counts */
226 /* maj_first is number of elements done in first loop
227 (this will increase by one over each column up to a limit) */
228 maxj_first = 0;
229
230 /* maxj_second is number of elements done in
231 second loop the first time */
232 maxj_second = MIN(k + 1, n_i);
233
234 /* determine whether we conjugate in first loop or second loop */
235 if (uplo == blas_lower) {
236 /* conjugate second loop */
237
238 /* Case alpha == 1. */
239 if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
240
241 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
242 /* Case alpha = 1, beta = 0. We compute y <--- A * x */
243 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
244 sum[0] = sum[1] = 0.0;
245 for (j = 0, aij = astarti, xi = x_starti;
246 j < maxj_first; j++, aij += incaij, xi += incx) {
247 a_elem[0] = a_i[aij];
248 a_elem[1] = a_i[aij + 1];
249
250 x_elem = x_i[xi];
251 {
252 prod[0] = a_elem[0] * x_elem;
253 prod[1] = a_elem[1] * x_elem;
254 }
255 sum[0] = sum[0] + prod[0];
256 sum[1] = sum[1] + prod[1];
257 }
258 a_elem[0] = a_i[aij];
259 x_elem = x_i[xi];
260 prod[0] = x_elem * a_elem[0];
261 prod[1] = 0.0;
262 sum[0] = sum[0] + prod[0];
263 sum[1] = sum[1] + prod[1];
264 aij += incaij2;
265 xi += incx;
266 for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) {
267 a_elem[0] = a_i[aij];
268 a_elem[1] = a_i[aij + 1];
269 a_elem[1] = -a_elem[1];
270 x_elem = x_i[xi];
271 {
272 prod[0] = a_elem[0] * x_elem;
273 prod[1] = a_elem[1] * x_elem;
274 }
275 sum[0] = sum[0] + prod[0];
276 sum[1] = sum[1] + prod[1];
277 }
278 y_i[yi] = sum[0];
279 y_i[yi + 1] = sum[1];
280 if (i + 1 >= (n_i - k)) {
281 maxj_second--;
282 }
283 if (i >= k) {
284 astarti += (incaij + incaij2);
285 x_starti += incx;
286 } else {
287 maxj_first++;
288 astarti += incaij2;
289 }
290 }
291 } else {
292 /* Case alpha = 1, but beta != 0.
293 We compute y <--- A * x + beta * y */
294 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
295 sum[0] = sum[1] = 0.0;
296
297 for (j = 0, aij = astarti, xi = x_starti;
298 j < maxj_first; j++, aij += incaij, xi += incx) {
299 a_elem[0] = a_i[aij];
300 a_elem[1] = a_i[aij + 1];
301
302 x_elem = x_i[xi];
303 {
304 prod[0] = a_elem[0] * x_elem;
305 prod[1] = a_elem[1] * x_elem;
306 }
307 sum[0] = sum[0] + prod[0];
308 sum[1] = sum[1] + prod[1];
309 }
310 a_elem[0] = a_i[aij];
311 x_elem = x_i[xi];
312 prod[0] = x_elem * a_elem[0];
313 prod[1] = 0.0;
314 sum[0] = sum[0] + prod[0];
315 sum[1] = sum[1] + prod[1];
316 aij += incaij2;
317 xi += incx;
318 for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) {
319 a_elem[0] = a_i[aij];
320 a_elem[1] = a_i[aij + 1];
321 a_elem[1] = -a_elem[1];
322 x_elem = x_i[xi];
323 {
324 prod[0] = a_elem[0] * x_elem;
325 prod[1] = a_elem[1] * x_elem;
326 }
327 sum[0] = sum[0] + prod[0];
328 sum[1] = sum[1] + prod[1];
329 }
330 y_elem[0] = y_i[yi];
331 y_elem[1] = y_i[yi + 1];
332 {
333 tmp2[0] =
334 (double) y_elem[0] * beta_i[0] -
335 (double) y_elem[1] * beta_i[1];
336 tmp2[1] =
337 (double) y_elem[0] * beta_i[1] +
338 (double) y_elem[1] * beta_i[0];
339 }
340 tmp1[0] = sum[0];
341 tmp1[1] = sum[1];
342 tmp1[0] = tmp2[0] + tmp1[0];
343 tmp1[1] = tmp2[1] + tmp1[1];
344 y_i[yi] = tmp1[0];
345 y_i[yi + 1] = tmp1[1];
346 if (i + 1 >= (n_i - k)) {
347 maxj_second--;
348 }
349 if (i >= k) {
350 astarti += (incaij + incaij2);
351 x_starti += incx;
352 } else {
353 maxj_first++;
354 astarti += incaij2;
355 }
356 }
357 }
358 } else {
359 /* The most general form, y <--- alpha * A * x + beta * y */
360 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
361 sum[0] = sum[1] = 0.0;
362
363 for (j = 0, aij = astarti, xi = x_starti;
364 j < maxj_first; j++, aij += incaij, xi += incx) {
365 a_elem[0] = a_i[aij];
366 a_elem[1] = a_i[aij + 1];
367
368 x_elem = x_i[xi];
369 {
370 prod[0] = a_elem[0] * x_elem;
371 prod[1] = a_elem[1] * x_elem;
372 }
373 sum[0] = sum[0] + prod[0];
374 sum[1] = sum[1] + prod[1];
375 }
376 a_elem[0] = a_i[aij];
377 x_elem = x_i[xi];
378 prod[0] = x_elem * a_elem[0];
379 prod[1] = 0.0;
380 sum[0] = sum[0] + prod[0];
381 sum[1] = sum[1] + prod[1];
382 aij += incaij2;
383 xi += incx;
384 for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) {
385 a_elem[0] = a_i[aij];
386 a_elem[1] = a_i[aij + 1];
387 a_elem[1] = -a_elem[1];
388 x_elem = x_i[xi];
389 {
390 prod[0] = a_elem[0] * x_elem;
391 prod[1] = a_elem[1] * x_elem;
392 }
393 sum[0] = sum[0] + prod[0];
394 sum[1] = sum[1] + prod[1];
395 }
396 y_elem[0] = y_i[yi];
397 y_elem[1] = y_i[yi + 1];
398 {
399 tmp2[0] =
400 (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1];
401 tmp2[1] =
402 (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0];
403 }
404 {
405 tmp1[0] =
406 (double) sum[0] * alpha_i[0] - (double) sum[1] * alpha_i[1];
407 tmp1[1] =
408 (double) sum[0] * alpha_i[1] + (double) sum[1] * alpha_i[0];
409 }
410 tmp1[0] = tmp2[0] + tmp1[0];
411 tmp1[1] = tmp2[1] + tmp1[1];
412 y_i[yi] = tmp1[0];
413 y_i[yi + 1] = tmp1[1];
414 if (i + 1 >= (n_i - k)) {
415 maxj_second--;
416 }
417 if (i >= k) {
418 astarti += (incaij + incaij2);
419 x_starti += incx;
420 } else {
421 maxj_first++;
422 astarti += incaij2;
423 }
424 }
425 }
426 } else {
427 /* conjugate first loop */
428
429 /* Case alpha == 1. */
430 if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
431
432 if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
433 /* Case alpha = 1, beta = 0. We compute y <--- A * x */
434 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
435 sum[0] = sum[1] = 0.0;
436 for (j = 0, aij = astarti, xi = x_starti;
437 j < maxj_first; j++, aij += incaij, xi += incx) {
438 a_elem[0] = a_i[aij];
439 a_elem[1] = a_i[aij + 1];
440 a_elem[1] = -a_elem[1];
441 x_elem = x_i[xi];
442 {
443 prod[0] = a_elem[0] * x_elem;
444 prod[1] = a_elem[1] * x_elem;
445 }
446 sum[0] = sum[0] + prod[0];
447 sum[1] = sum[1] + prod[1];
448 }
449 a_elem[0] = a_i[aij];
450 x_elem = x_i[xi];
451 prod[0] = x_elem * a_elem[0];
452 prod[1] = 0.0;
453 sum[0] = sum[0] + prod[0];
454 sum[1] = sum[1] + prod[1];
455 aij += incaij2;
456 xi += incx;
457 for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) {
458 a_elem[0] = a_i[aij];
459 a_elem[1] = a_i[aij + 1];
460
461 x_elem = x_i[xi];
462 {
463 prod[0] = a_elem[0] * x_elem;
464 prod[1] = a_elem[1] * x_elem;
465 }
466 sum[0] = sum[0] + prod[0];
467 sum[1] = sum[1] + prod[1];
468 }
469 y_i[yi] = sum[0];
470 y_i[yi + 1] = sum[1];
471 if (i + 1 >= (n_i - k)) {
472 maxj_second--;
473 }
474 if (i >= k) {
475 astarti += (incaij + incaij2);
476 x_starti += incx;
477 } else {
478 maxj_first++;
479 astarti += incaij2;
480 }
481 }
482 } else {
483 /* Case alpha = 1, but beta != 0.
484 We compute y <--- A * x + beta * y */
485 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
486 sum[0] = sum[1] = 0.0;
487
488 for (j = 0, aij = astarti, xi = x_starti;
489 j < maxj_first; j++, aij += incaij, xi += incx) {
490 a_elem[0] = a_i[aij];
491 a_elem[1] = a_i[aij + 1];
492 a_elem[1] = -a_elem[1];
493 x_elem = x_i[xi];
494 {
495 prod[0] = a_elem[0] * x_elem;
496 prod[1] = a_elem[1] * x_elem;
497 }
498 sum[0] = sum[0] + prod[0];
499 sum[1] = sum[1] + prod[1];
500 }
501 a_elem[0] = a_i[aij];
502 x_elem = x_i[xi];
503 prod[0] = x_elem * a_elem[0];
504 prod[1] = 0.0;
505 sum[0] = sum[0] + prod[0];
506 sum[1] = sum[1] + prod[1];
507 aij += incaij2;
508 xi += incx;
509 for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) {
510 a_elem[0] = a_i[aij];
511 a_elem[1] = a_i[aij + 1];
512
513 x_elem = x_i[xi];
514 {
515 prod[0] = a_elem[0] * x_elem;
516 prod[1] = a_elem[1] * x_elem;
517 }
518 sum[0] = sum[0] + prod[0];
519 sum[1] = sum[1] + prod[1];
520 }
521 y_elem[0] = y_i[yi];
522 y_elem[1] = y_i[yi + 1];
523 {
524 tmp2[0] =
525 (double) y_elem[0] * beta_i[0] -
526 (double) y_elem[1] * beta_i[1];
527 tmp2[1] =
528 (double) y_elem[0] * beta_i[1] +
529 (double) y_elem[1] * beta_i[0];
530 }
531 tmp1[0] = sum[0];
532 tmp1[1] = sum[1];
533 tmp1[0] = tmp2[0] + tmp1[0];
534 tmp1[1] = tmp2[1] + tmp1[1];
535 y_i[yi] = tmp1[0];
536 y_i[yi + 1] = tmp1[1];
537 if (i + 1 >= (n_i - k)) {
538 maxj_second--;
539 }
540 if (i >= k) {
541 astarti += (incaij + incaij2);
542 x_starti += incx;
543 } else {
544 maxj_first++;
545 astarti += incaij2;
546 }
547 }
548 }
549 } else {
550 /* The most general form, y <--- alpha * A * x + beta * y */
551 for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
552 sum[0] = sum[1] = 0.0;
553
554 for (j = 0, aij = astarti, xi = x_starti;
555 j < maxj_first; j++, aij += incaij, xi += incx) {
556 a_elem[0] = a_i[aij];
557 a_elem[1] = a_i[aij + 1];
558 a_elem[1] = -a_elem[1];
559 x_elem = x_i[xi];
560 {
561 prod[0] = a_elem[0] * x_elem;
562 prod[1] = a_elem[1] * x_elem;
563 }
564 sum[0] = sum[0] + prod[0];
565 sum[1] = sum[1] + prod[1];
566 }
567 a_elem[0] = a_i[aij];
568 x_elem = x_i[xi];
569 prod[0] = x_elem * a_elem[0];
570 prod[1] = 0.0;
571 sum[0] = sum[0] + prod[0];
572 sum[1] = sum[1] + prod[1];
573 aij += incaij2;
574 xi += incx;
575 for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) {
576 a_elem[0] = a_i[aij];
577 a_elem[1] = a_i[aij + 1];
578
579 x_elem = x_i[xi];
580 {
581 prod[0] = a_elem[0] * x_elem;
582 prod[1] = a_elem[1] * x_elem;
583 }
584 sum[0] = sum[0] + prod[0];
585 sum[1] = sum[1] + prod[1];
586 }
587 y_elem[0] = y_i[yi];
588 y_elem[1] = y_i[yi + 1];
589 {
590 tmp2[0] =
591 (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1];
592 tmp2[1] =
593 (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0];
594 }
595 {
596 tmp1[0] =
597 (double) sum[0] * alpha_i[0] - (double) sum[1] * alpha_i[1];
598 tmp1[1] =
599 (double) sum[0] * alpha_i[1] + (double) sum[1] * alpha_i[0];
600 }
601 tmp1[0] = tmp2[0] + tmp1[0];
602 tmp1[1] = tmp2[1] + tmp1[1];
603 y_i[yi] = tmp1[0];
604 y_i[yi + 1] = tmp1[1];
605 if (i + 1 >= (n_i - k)) {
606 maxj_second--;
607 }
608 if (i >= k) {
609 astarti += (incaij + incaij2);
610 x_starti += incx;
611 } else {
612 maxj_first++;
613 astarti += incaij2;
614 }
615 }
616 }
617 }
618 }
619
620
621 } /* end BLAS_zhbmv_z_d */
622