1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
BLAS_zhbmv_z_d(enum blas_order_type order,enum blas_uplo_type uplo,int n,int k,const void * alpha,const void * a,int lda,const double * x,int incx,const void * beta,void * y,int incy)3 void BLAS_zhbmv_z_d(enum blas_order_type order,
4 		    enum blas_uplo_type uplo, int n, int k,
5 		    const void *alpha, const void *a, int lda,
6 		    const double *x, int incx, const void *beta,
7 		    void *y, int incy)
8 
9 /*
10  * Purpose
11  * =======
12  *
13  * This routines computes the matrix product:
14  *
15  *     y  <-  alpha * A * x  +  beta * y
16  *
17  * where A is a hermitian band matrix.
18  *
19  * Arguments
20  * =========
21  *
22  * order   (input) enum blas_order_type
23  *         Storage format of input hermitian matrix A.
24  *
25  * uplo    (input) enum blas_uplo_type
26  *         Determines which half of matrix A (upper or lower triangle)
27  *         is accessed.
28  *
29  * n       (input) int
30  *         Dimension of A and size of vectors x, y.
31  *
32  * k       (input) int
33  *         Number of subdiagonals ( = number of superdiagonals)
34  *
35  * alpha   (input) const void*
36  *
37  * a       (input) void*
38  *         Matrix A.
39  *
40  * lda     (input) int
41  *         Leading dimension of matrix A.
42  *
43  * x       (input) double*
44  *         Vector x.
45  *
46  * incx    (input) int
47  *         Stride for vector x.
48  *
49  * beta    (input) const void*
50  *
51  * y       (input/output) void*
52  *         Vector y.
53  *
54  * incy    (input) int
55  *         Stride for vector y.
56  *
57  *
58  *  Notes on storing a hermitian band matrix:
59  *
60  *      Integers in the below arrays represent values of
61  *              type complex double.
62  *
63  *    if we have a hermitian matrix:
64  *
65  *      1d  2   3   0   0
66  *      2#  4d  5   6   0
67  *      3#  5#  7d  8   9
68  *      0   6#  8#  10d 11
69  *      0   0   9#  11# 12d
70  *
71  *     This matrix has n == 5, and k == 2. It can be stored in the
72  *      following ways:
73  *
74  *      Notes for the examples:
75  *      Each column below represents a contigous vector.
76  *      Columns are strided by lda.
77  *      An asterisk (*) represents a position in the
78  *       matrix that is not used.
79  *      A pound sign (#) represents the conjugated form is stored
80  *      A d following an integer indicates that the imaginary
81  *       part of the number is assumed to be zero.
82  *      Note that the minimum lda (size of column) is 3 (k+1).
83  *       lda may be arbitrarily large; an lda > 3 would mean
84  *       there would be unused data at the bottom of the below
85  *       columns.
86  *
87  *    blas_colmajor and blas_upper:
88  *      *   *   3   6   9
89  *      *   2   5   8   11
90  *      1d  4d  7d  10d 12d
91  *
92  *
93  *    blas_colmajor and blas_lower
94  *      1d   4d   7d   10d  12d
95  *      2#   5#   8#   11#  *
96  *      3#   6#   9#   *    *
97  *
98  *
99  *    blas_rowmajor and blas_upper
100  *      Columns here also represent contigous arrays.
101  *      1d  4d  7d  10d  12d
102  *      2   5   8   11   *
103  *      3   6   9   *    *
104  *
105  *
106  *    blas_rowmajor and blas_lower
107  *      Columns here also represent contigous arrays.
108  *      *   *   3#  6#   9#
109  *      *   2#  5#  8#   11#
110  *      1d  4d  7d  10d  12d
111  *
112  */
113 {
114   /* Routine name */
115   static const char routine_name[] = "BLAS_zhbmv_z_d";
116 
117   /* Integer Index Variables */
118   int i, j;
119   int xi, yi;
120   int aij, astarti, x_starti, y_starti;
121   int incaij, incaij2;
122   int n_i;
123   int maxj_first, maxj_second;
124 
125   /* Input Matrices */
126   const double *a_i = (double *) a;
127   const double *x_i = x;
128 
129   /* Output Vector */
130   double *y_i = (double *) y;
131 
132   /* Input Scalars */
133   double *alpha_i = (double *) alpha;
134   double *beta_i = (double *) beta;
135 
136   /* Temporary Floating-Point Variables */
137   double a_elem[2];
138   double x_elem;
139   double y_elem[2];
140   double prod[2];
141   double sum[2];
142   double tmp1[2];
143   double tmp2[2];
144 
145 
146 
147   /* Test for no-op */
148   if (n <= 0) {
149     return;
150   }
151   if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0
152       && (beta_i[0] == 1.0 && beta_i[1] == 0.0)) {
153     return;
154   }
155 
156   /* Check for error conditions. */
157   if (order != blas_colmajor && order != blas_rowmajor) {
158     BLAS_error(routine_name, -1, order, 0);
159   }
160   if (uplo != blas_upper && uplo != blas_lower) {
161     BLAS_error(routine_name, -2, uplo, 0);
162   }
163   if (n < 0) {
164     BLAS_error(routine_name, -3, n, 0);
165   }
166   if (k < 0 || k > n) {
167     BLAS_error(routine_name, -4, k, 0);
168   }
169   if ((lda < k + 1) || (lda < 1)) {
170     BLAS_error(routine_name, -7, lda, 0);
171   }
172   if (incx == 0) {
173     BLAS_error(routine_name, -9, incx, 0);
174   }
175   if (incy == 0) {
176     BLAS_error(routine_name, -12, incy, 0);
177   }
178 
179   /* Set Index Parameters */
180   n_i = n;
181 
182   if (((uplo == blas_upper) && (order == blas_colmajor)) ||
183       ((uplo == blas_lower) && (order == blas_rowmajor))) {
184     incaij = 1;			/* increment in first loop */
185     incaij2 = lda - 1;		/* increment in second loop */
186     astarti = k;		/* does not start on zero element */
187   } else {
188     incaij = lda - 1;
189     incaij2 = 1;
190     astarti = 0;		/* start on first element of array */
191   }
192   /* Adjustment to increments (if any) */
193   incy *= 2;
194   astarti *= 2;
195   incaij *= 2;
196   incaij2 *= 2;
197   if (incx < 0) {
198     x_starti = (-n_i + 1) * incx;
199   } else {
200     x_starti = 0;
201   }
202   if (incy < 0) {
203     y_starti = (-n_i + 1) * incy;
204   } else {
205     y_starti = 0;
206   }
207 
208 
209 
210   /* alpha = 0.  In this case, just return beta * y */
211   if (alpha_i[0] == 0.0 && alpha_i[1] == 0.0) {
212     for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
213       y_elem[0] = y_i[yi];
214       y_elem[1] = y_i[yi + 1];
215       {
216 	tmp1[0] =
217 	  (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1];
218 	tmp1[1] =
219 	  (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0];
220       }
221       y_i[yi] = tmp1[0];
222       y_i[yi + 1] = tmp1[1];
223     }
224   } else {
225     /*  determine the loop interation counts */
226     /* maj_first is number of elements done in first loop
227        (this will increase by one over each column up to a limit) */
228     maxj_first = 0;
229 
230     /* maxj_second is number of elements done in
231        second loop the first time */
232     maxj_second = MIN(k + 1, n_i);
233 
234     /*  determine whether we conjugate in first loop or second loop */
235     if (uplo == blas_lower) {
236       /*  conjugate second loop */
237 
238       /* Case alpha == 1. */
239       if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
240 
241 	if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
242 	  /* Case alpha = 1, beta = 0.  We compute  y <--- A * x */
243 	  for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
244 	    sum[0] = sum[1] = 0.0;
245 	    for (j = 0, aij = astarti, xi = x_starti;
246 		 j < maxj_first; j++, aij += incaij, xi += incx) {
247 	      a_elem[0] = a_i[aij];
248 	      a_elem[1] = a_i[aij + 1];
249 
250 	      x_elem = x_i[xi];
251 	      {
252 		prod[0] = a_elem[0] * x_elem;
253 		prod[1] = a_elem[1] * x_elem;
254 	      }
255 	      sum[0] = sum[0] + prod[0];
256 	      sum[1] = sum[1] + prod[1];
257 	    }
258 	    a_elem[0] = a_i[aij];
259 	    x_elem = x_i[xi];
260 	    prod[0] = x_elem * a_elem[0];
261 	    prod[1] = 0.0;
262 	    sum[0] = sum[0] + prod[0];
263 	    sum[1] = sum[1] + prod[1];
264 	    aij += incaij2;
265 	    xi += incx;
266 	    for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) {
267 	      a_elem[0] = a_i[aij];
268 	      a_elem[1] = a_i[aij + 1];
269 	      a_elem[1] = -a_elem[1];
270 	      x_elem = x_i[xi];
271 	      {
272 		prod[0] = a_elem[0] * x_elem;
273 		prod[1] = a_elem[1] * x_elem;
274 	      }
275 	      sum[0] = sum[0] + prod[0];
276 	      sum[1] = sum[1] + prod[1];
277 	    }
278 	    y_i[yi] = sum[0];
279 	    y_i[yi + 1] = sum[1];
280 	    if (i + 1 >= (n_i - k)) {
281 	      maxj_second--;
282 	    }
283 	    if (i >= k) {
284 	      astarti += (incaij + incaij2);
285 	      x_starti += incx;
286 	    } else {
287 	      maxj_first++;
288 	      astarti += incaij2;
289 	    }
290 	  }
291 	} else {
292 	  /* Case alpha = 1, but beta != 0.
293 	     We compute  y  <--- A * x + beta * y */
294 	  for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
295 	    sum[0] = sum[1] = 0.0;
296 
297 	    for (j = 0, aij = astarti, xi = x_starti;
298 		 j < maxj_first; j++, aij += incaij, xi += incx) {
299 	      a_elem[0] = a_i[aij];
300 	      a_elem[1] = a_i[aij + 1];
301 
302 	      x_elem = x_i[xi];
303 	      {
304 		prod[0] = a_elem[0] * x_elem;
305 		prod[1] = a_elem[1] * x_elem;
306 	      }
307 	      sum[0] = sum[0] + prod[0];
308 	      sum[1] = sum[1] + prod[1];
309 	    }
310 	    a_elem[0] = a_i[aij];
311 	    x_elem = x_i[xi];
312 	    prod[0] = x_elem * a_elem[0];
313 	    prod[1] = 0.0;
314 	    sum[0] = sum[0] + prod[0];
315 	    sum[1] = sum[1] + prod[1];
316 	    aij += incaij2;
317 	    xi += incx;
318 	    for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) {
319 	      a_elem[0] = a_i[aij];
320 	      a_elem[1] = a_i[aij + 1];
321 	      a_elem[1] = -a_elem[1];
322 	      x_elem = x_i[xi];
323 	      {
324 		prod[0] = a_elem[0] * x_elem;
325 		prod[1] = a_elem[1] * x_elem;
326 	      }
327 	      sum[0] = sum[0] + prod[0];
328 	      sum[1] = sum[1] + prod[1];
329 	    }
330 	    y_elem[0] = y_i[yi];
331 	    y_elem[1] = y_i[yi + 1];
332 	    {
333 	      tmp2[0] =
334 		(double) y_elem[0] * beta_i[0] -
335 		(double) y_elem[1] * beta_i[1];
336 	      tmp2[1] =
337 		(double) y_elem[0] * beta_i[1] +
338 		(double) y_elem[1] * beta_i[0];
339 	    }
340 	    tmp1[0] = sum[0];
341 	    tmp1[1] = sum[1];
342 	    tmp1[0] = tmp2[0] + tmp1[0];
343 	    tmp1[1] = tmp2[1] + tmp1[1];
344 	    y_i[yi] = tmp1[0];
345 	    y_i[yi + 1] = tmp1[1];
346 	    if (i + 1 >= (n_i - k)) {
347 	      maxj_second--;
348 	    }
349 	    if (i >= k) {
350 	      astarti += (incaij + incaij2);
351 	      x_starti += incx;
352 	    } else {
353 	      maxj_first++;
354 	      astarti += incaij2;
355 	    }
356 	  }
357 	}
358       } else {
359 	/* The most general form,   y <--- alpha * A * x + beta * y */
360 	for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
361 	  sum[0] = sum[1] = 0.0;
362 
363 	  for (j = 0, aij = astarti, xi = x_starti;
364 	       j < maxj_first; j++, aij += incaij, xi += incx) {
365 	    a_elem[0] = a_i[aij];
366 	    a_elem[1] = a_i[aij + 1];
367 
368 	    x_elem = x_i[xi];
369 	    {
370 	      prod[0] = a_elem[0] * x_elem;
371 	      prod[1] = a_elem[1] * x_elem;
372 	    }
373 	    sum[0] = sum[0] + prod[0];
374 	    sum[1] = sum[1] + prod[1];
375 	  }
376 	  a_elem[0] = a_i[aij];
377 	  x_elem = x_i[xi];
378 	  prod[0] = x_elem * a_elem[0];
379 	  prod[1] = 0.0;
380 	  sum[0] = sum[0] + prod[0];
381 	  sum[1] = sum[1] + prod[1];
382 	  aij += incaij2;
383 	  xi += incx;
384 	  for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) {
385 	    a_elem[0] = a_i[aij];
386 	    a_elem[1] = a_i[aij + 1];
387 	    a_elem[1] = -a_elem[1];
388 	    x_elem = x_i[xi];
389 	    {
390 	      prod[0] = a_elem[0] * x_elem;
391 	      prod[1] = a_elem[1] * x_elem;
392 	    }
393 	    sum[0] = sum[0] + prod[0];
394 	    sum[1] = sum[1] + prod[1];
395 	  }
396 	  y_elem[0] = y_i[yi];
397 	  y_elem[1] = y_i[yi + 1];
398 	  {
399 	    tmp2[0] =
400 	      (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1];
401 	    tmp2[1] =
402 	      (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0];
403 	  }
404 	  {
405 	    tmp1[0] =
406 	      (double) sum[0] * alpha_i[0] - (double) sum[1] * alpha_i[1];
407 	    tmp1[1] =
408 	      (double) sum[0] * alpha_i[1] + (double) sum[1] * alpha_i[0];
409 	  }
410 	  tmp1[0] = tmp2[0] + tmp1[0];
411 	  tmp1[1] = tmp2[1] + tmp1[1];
412 	  y_i[yi] = tmp1[0];
413 	  y_i[yi + 1] = tmp1[1];
414 	  if (i + 1 >= (n_i - k)) {
415 	    maxj_second--;
416 	  }
417 	  if (i >= k) {
418 	    astarti += (incaij + incaij2);
419 	    x_starti += incx;
420 	  } else {
421 	    maxj_first++;
422 	    astarti += incaij2;
423 	  }
424 	}
425       }
426     } else {
427       /*  conjugate first loop */
428 
429       /* Case alpha == 1. */
430       if ((alpha_i[0] == 1.0 && alpha_i[1] == 0.0)) {
431 
432 	if (beta_i[0] == 0.0 && beta_i[1] == 0.0) {
433 	  /* Case alpha = 1, beta = 0.  We compute  y <--- A * x */
434 	  for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
435 	    sum[0] = sum[1] = 0.0;
436 	    for (j = 0, aij = astarti, xi = x_starti;
437 		 j < maxj_first; j++, aij += incaij, xi += incx) {
438 	      a_elem[0] = a_i[aij];
439 	      a_elem[1] = a_i[aij + 1];
440 	      a_elem[1] = -a_elem[1];
441 	      x_elem = x_i[xi];
442 	      {
443 		prod[0] = a_elem[0] * x_elem;
444 		prod[1] = a_elem[1] * x_elem;
445 	      }
446 	      sum[0] = sum[0] + prod[0];
447 	      sum[1] = sum[1] + prod[1];
448 	    }
449 	    a_elem[0] = a_i[aij];
450 	    x_elem = x_i[xi];
451 	    prod[0] = x_elem * a_elem[0];
452 	    prod[1] = 0.0;
453 	    sum[0] = sum[0] + prod[0];
454 	    sum[1] = sum[1] + prod[1];
455 	    aij += incaij2;
456 	    xi += incx;
457 	    for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) {
458 	      a_elem[0] = a_i[aij];
459 	      a_elem[1] = a_i[aij + 1];
460 
461 	      x_elem = x_i[xi];
462 	      {
463 		prod[0] = a_elem[0] * x_elem;
464 		prod[1] = a_elem[1] * x_elem;
465 	      }
466 	      sum[0] = sum[0] + prod[0];
467 	      sum[1] = sum[1] + prod[1];
468 	    }
469 	    y_i[yi] = sum[0];
470 	    y_i[yi + 1] = sum[1];
471 	    if (i + 1 >= (n_i - k)) {
472 	      maxj_second--;
473 	    }
474 	    if (i >= k) {
475 	      astarti += (incaij + incaij2);
476 	      x_starti += incx;
477 	    } else {
478 	      maxj_first++;
479 	      astarti += incaij2;
480 	    }
481 	  }
482 	} else {
483 	  /* Case alpha = 1, but beta != 0.
484 	     We compute  y  <--- A * x + beta * y */
485 	  for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
486 	    sum[0] = sum[1] = 0.0;
487 
488 	    for (j = 0, aij = astarti, xi = x_starti;
489 		 j < maxj_first; j++, aij += incaij, xi += incx) {
490 	      a_elem[0] = a_i[aij];
491 	      a_elem[1] = a_i[aij + 1];
492 	      a_elem[1] = -a_elem[1];
493 	      x_elem = x_i[xi];
494 	      {
495 		prod[0] = a_elem[0] * x_elem;
496 		prod[1] = a_elem[1] * x_elem;
497 	      }
498 	      sum[0] = sum[0] + prod[0];
499 	      sum[1] = sum[1] + prod[1];
500 	    }
501 	    a_elem[0] = a_i[aij];
502 	    x_elem = x_i[xi];
503 	    prod[0] = x_elem * a_elem[0];
504 	    prod[1] = 0.0;
505 	    sum[0] = sum[0] + prod[0];
506 	    sum[1] = sum[1] + prod[1];
507 	    aij += incaij2;
508 	    xi += incx;
509 	    for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) {
510 	      a_elem[0] = a_i[aij];
511 	      a_elem[1] = a_i[aij + 1];
512 
513 	      x_elem = x_i[xi];
514 	      {
515 		prod[0] = a_elem[0] * x_elem;
516 		prod[1] = a_elem[1] * x_elem;
517 	      }
518 	      sum[0] = sum[0] + prod[0];
519 	      sum[1] = sum[1] + prod[1];
520 	    }
521 	    y_elem[0] = y_i[yi];
522 	    y_elem[1] = y_i[yi + 1];
523 	    {
524 	      tmp2[0] =
525 		(double) y_elem[0] * beta_i[0] -
526 		(double) y_elem[1] * beta_i[1];
527 	      tmp2[1] =
528 		(double) y_elem[0] * beta_i[1] +
529 		(double) y_elem[1] * beta_i[0];
530 	    }
531 	    tmp1[0] = sum[0];
532 	    tmp1[1] = sum[1];
533 	    tmp1[0] = tmp2[0] + tmp1[0];
534 	    tmp1[1] = tmp2[1] + tmp1[1];
535 	    y_i[yi] = tmp1[0];
536 	    y_i[yi + 1] = tmp1[1];
537 	    if (i + 1 >= (n_i - k)) {
538 	      maxj_second--;
539 	    }
540 	    if (i >= k) {
541 	      astarti += (incaij + incaij2);
542 	      x_starti += incx;
543 	    } else {
544 	      maxj_first++;
545 	      astarti += incaij2;
546 	    }
547 	  }
548 	}
549       } else {
550 	/* The most general form,   y <--- alpha * A * x + beta * y */
551 	for (i = 0, yi = y_starti; i < n_i; i++, yi += incy) {
552 	  sum[0] = sum[1] = 0.0;
553 
554 	  for (j = 0, aij = astarti, xi = x_starti;
555 	       j < maxj_first; j++, aij += incaij, xi += incx) {
556 	    a_elem[0] = a_i[aij];
557 	    a_elem[1] = a_i[aij + 1];
558 	    a_elem[1] = -a_elem[1];
559 	    x_elem = x_i[xi];
560 	    {
561 	      prod[0] = a_elem[0] * x_elem;
562 	      prod[1] = a_elem[1] * x_elem;
563 	    }
564 	    sum[0] = sum[0] + prod[0];
565 	    sum[1] = sum[1] + prod[1];
566 	  }
567 	  a_elem[0] = a_i[aij];
568 	  x_elem = x_i[xi];
569 	  prod[0] = x_elem * a_elem[0];
570 	  prod[1] = 0.0;
571 	  sum[0] = sum[0] + prod[0];
572 	  sum[1] = sum[1] + prod[1];
573 	  aij += incaij2;
574 	  xi += incx;
575 	  for (j = 1; j < maxj_second; j++, aij += incaij2, xi += incx) {
576 	    a_elem[0] = a_i[aij];
577 	    a_elem[1] = a_i[aij + 1];
578 
579 	    x_elem = x_i[xi];
580 	    {
581 	      prod[0] = a_elem[0] * x_elem;
582 	      prod[1] = a_elem[1] * x_elem;
583 	    }
584 	    sum[0] = sum[0] + prod[0];
585 	    sum[1] = sum[1] + prod[1];
586 	  }
587 	  y_elem[0] = y_i[yi];
588 	  y_elem[1] = y_i[yi + 1];
589 	  {
590 	    tmp2[0] =
591 	      (double) y_elem[0] * beta_i[0] - (double) y_elem[1] * beta_i[1];
592 	    tmp2[1] =
593 	      (double) y_elem[0] * beta_i[1] + (double) y_elem[1] * beta_i[0];
594 	  }
595 	  {
596 	    tmp1[0] =
597 	      (double) sum[0] * alpha_i[0] - (double) sum[1] * alpha_i[1];
598 	    tmp1[1] =
599 	      (double) sum[0] * alpha_i[1] + (double) sum[1] * alpha_i[0];
600 	  }
601 	  tmp1[0] = tmp2[0] + tmp1[0];
602 	  tmp1[1] = tmp2[1] + tmp1[1];
603 	  y_i[yi] = tmp1[0];
604 	  y_i[yi + 1] = tmp1[1];
605 	  if (i + 1 >= (n_i - k)) {
606 	    maxj_second--;
607 	  }
608 	  if (i >= k) {
609 	    astarti += (incaij + incaij2);
610 	    x_starti += incx;
611 	  } else {
612 	    maxj_first++;
613 	    astarti += incaij2;
614 	  }
615 	}
616       }
617     }
618   }
619 
620 
621 }				/* end BLAS_zhbmv_z_d */
622