1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
3 #include "blas_extended_test.h"
4
5
BLAS_sgbmv2_testgen(int norm,enum blas_order_type order,enum blas_trans_type trans,int m,int n,int kl,int ku,float * alpha,int alpha_flag,float * AB,int lda,float * x_head,float * x_tail,float * beta,int beta_flag,float * y,int * seed,double * r_true_l,double * r_true_t)6 void BLAS_sgbmv2_testgen(int norm, enum blas_order_type order,
7 enum blas_trans_type trans, int m, int n, int kl,
8 int ku, float *alpha, int alpha_flag, float *AB,
9 int lda, float *x_head, float *x_tail, float *beta,
10 int beta_flag, float *y, int *seed, double *r_true_l,
11 double *r_true_t)
12
13 /*
14 * Purpose
15 * =======
16 *
17 * Generates alpha, AB, x, beta, and y, where AB is a banded
18 * matrix; and computes r_true.
19 *
20 * Arguments
21 * =========
22 *
23 * norm (input) blas_norm_type
24 *
25 * order (input) blas_order_type
26 * Order of AB; row or column major
27 *
28 * trans (input) blas_trans_type
29 * Whether AB is no trans, trans, or conj trans
30 *
31 * m (input) int
32 * The number of rows
33 *
34 * n (input) int
35 * The number of columns
36 *
37 * kl (input) int
38 * The number of subdiagonals
39 *
40 * ku (input) int
41 * The number of superdiagonals
42 *
43 * alpha (input/output) float*
44 * If alpha_flag = 1, alpha is input.
45 * If alpha_flag = 0, alpha is output.
46 *
47 * alpha_flag (input) int
48 * = 0 : alpha is free, and is output.
49 * = 1 : alpha is fixed on input.
50 *
51 * AB (output) float*
52 * Matrix A in the banded storage.
53 *
54 *
55 * lda (input) int
56 * The first dimension of AB
57 *
58 * x_head (input/output) float*
59 * x_tail (input/output) float*
60 *
61 * beta (input/output) float*
62 * If beta_flag = 1, beta is input.
63 * If beta_flag = 0, beta is output.
64 *
65 * beta_flag (input) int
66 * = 0 : beta is free, and is output.
67 * = 1 : beta is fixed on input.
68 *
69 * y (input/output) float*
70 *
71 * seed (input/output) int
72 *
73 * r_true_l (output) double*
74 * The leading part of the truth in double-double.
75 *
76 * r_true_t (output) double*
77 * The trailing part of the truth in double-double.
78 *
79 */
80 {
81 float *x_head_i = x_head;
82 float *x_tail_i = x_tail;
83 float *y_i = y;
84 int n_fix2;
85 int n_mix;
86 int ysize;
87 int i;
88
89 float *a_vec;
90 int m_i, n_i;
91 int max_mn;
92 int incy, incAB, incx;
93 float y_elem;
94
95 max_mn = MAX(m, n);
96 incx = incy = incAB = 1;
97
98
99
100
101 if (trans == blas_no_trans) {
102 m_i = m;
103 n_i = n;
104 } else {
105 m_i = n;
106 n_i = m;
107 }
108
109 a_vec = (float *) blas_malloc(max_mn * sizeof(float));
110 if (max_mn > 0 && a_vec == NULL) {
111 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
112 }
113
114 /* calling dot_testgen n time. in each iteration, one row of AB, and one
115 element of y are produced. the vector x is produced at the first
116 iteration only */
117 for (i = 0; i < m_i; i++) {
118 /* copy AB to a_vec */
119 sgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
120 &n_fix2, &n_mix, &ysize);
121
122 if (i == 1) {
123 /* from now on, fix alpha and beta */
124 alpha_flag = 1;
125 beta_flag = 1;
126 }
127
128 BLAS_sdot2_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
129 alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
130 seed, &y_elem, &r_true_l[i * incy],
131 &r_true_t[i * incy]);
132 y_i[i * incy] = y_elem;
133
134
135 /* copy a_vec to AB */
136 sgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
137 }
138
139 /* Zero out trailing part of x */
140 for (i = ysize; i < n_i; i++) {
141 x_head_i[i * incx] = 0.0;
142 x_tail_i[i * incx] = 0.0;
143 }
144
145 blas_free(a_vec);
146 }
BLAS_dgbmv2_testgen(int norm,enum blas_order_type order,enum blas_trans_type trans,int m,int n,int kl,int ku,double * alpha,int alpha_flag,double * AB,int lda,double * x_head,double * x_tail,double * beta,int beta_flag,double * y,int * seed,double * r_true_l,double * r_true_t)147 void BLAS_dgbmv2_testgen(int norm, enum blas_order_type order,
148 enum blas_trans_type trans, int m, int n, int kl,
149 int ku, double *alpha, int alpha_flag, double *AB,
150 int lda, double *x_head, double *x_tail,
151 double *beta, int beta_flag, double *y, int *seed,
152 double *r_true_l, double *r_true_t)
153
154 /*
155 * Purpose
156 * =======
157 *
158 * Generates alpha, AB, x, beta, and y, where AB is a banded
159 * matrix; and computes r_true.
160 *
161 * Arguments
162 * =========
163 *
164 * norm (input) blas_norm_type
165 *
166 * order (input) blas_order_type
167 * Order of AB; row or column major
168 *
169 * trans (input) blas_trans_type
170 * Whether AB is no trans, trans, or conj trans
171 *
172 * m (input) int
173 * The number of rows
174 *
175 * n (input) int
176 * The number of columns
177 *
178 * kl (input) int
179 * The number of subdiagonals
180 *
181 * ku (input) int
182 * The number of superdiagonals
183 *
184 * alpha (input/output) double*
185 * If alpha_flag = 1, alpha is input.
186 * If alpha_flag = 0, alpha is output.
187 *
188 * alpha_flag (input) int
189 * = 0 : alpha is free, and is output.
190 * = 1 : alpha is fixed on input.
191 *
192 * AB (output) double*
193 * Matrix A in the banded storage.
194 *
195 *
196 * lda (input) int
197 * The first dimension of AB
198 *
199 * x_head (input/output) double*
200 * x_tail (input/output) double*
201 *
202 * beta (input/output) double*
203 * If beta_flag = 1, beta is input.
204 * If beta_flag = 0, beta is output.
205 *
206 * beta_flag (input) int
207 * = 0 : beta is free, and is output.
208 * = 1 : beta is fixed on input.
209 *
210 * y (input/output) double*
211 *
212 * seed (input/output) int
213 *
214 * r_true_l (output) double*
215 * The leading part of the truth in double-double.
216 *
217 * r_true_t (output) double*
218 * The trailing part of the truth in double-double.
219 *
220 */
221 {
222 double *x_head_i = x_head;
223 double *x_tail_i = x_tail;
224 double *y_i = y;
225 int n_fix2;
226 int n_mix;
227 int ysize;
228 int i;
229
230 double *a_vec;
231 int m_i, n_i;
232 int max_mn;
233 int incy, incAB, incx;
234 double y_elem;
235
236 max_mn = MAX(m, n);
237 incx = incy = incAB = 1;
238
239
240
241
242 if (trans == blas_no_trans) {
243 m_i = m;
244 n_i = n;
245 } else {
246 m_i = n;
247 n_i = m;
248 }
249
250 a_vec = (double *) blas_malloc(max_mn * sizeof(double));
251 if (max_mn > 0 && a_vec == NULL) {
252 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
253 }
254
255 /* calling dot_testgen n time. in each iteration, one row of AB, and one
256 element of y are produced. the vector x is produced at the first
257 iteration only */
258 for (i = 0; i < m_i; i++) {
259 /* copy AB to a_vec */
260 dgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
261 &n_fix2, &n_mix, &ysize);
262
263 if (i == 1) {
264 /* from now on, fix alpha and beta */
265 alpha_flag = 1;
266 beta_flag = 1;
267 }
268
269 BLAS_ddot2_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
270 alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
271 seed, &y_elem, &r_true_l[i * incy],
272 &r_true_t[i * incy]);
273 y_i[i * incy] = y_elem;
274
275
276 /* copy a_vec to AB */
277 dgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
278 }
279
280 /* Zero out trailing part of x */
281 for (i = ysize; i < n_i; i++) {
282 x_head_i[i * incx] = 0.0;
283 x_tail_i[i * incx] = 0.0;
284 }
285
286 blas_free(a_vec);
287 }
BLAS_cgbmv2_testgen(int norm,enum blas_order_type order,enum blas_trans_type trans,int m,int n,int kl,int ku,void * alpha,int alpha_flag,void * AB,int lda,void * x_head,void * x_tail,void * beta,int beta_flag,void * y,int * seed,double * r_true_l,double * r_true_t)288 void BLAS_cgbmv2_testgen(int norm, enum blas_order_type order,
289 enum blas_trans_type trans, int m, int n, int kl,
290 int ku, void *alpha, int alpha_flag, void *AB,
291 int lda, void *x_head, void *x_tail, void *beta,
292 int beta_flag, void *y, int *seed, double *r_true_l,
293 double *r_true_t)
294
295 /*
296 * Purpose
297 * =======
298 *
299 * Generates alpha, AB, x, beta, and y, where AB is a banded
300 * matrix; and computes r_true.
301 *
302 * Arguments
303 * =========
304 *
305 * norm (input) blas_norm_type
306 *
307 * order (input) blas_order_type
308 * Order of AB; row or column major
309 *
310 * trans (input) blas_trans_type
311 * Whether AB is no trans, trans, or conj trans
312 *
313 * m (input) int
314 * The number of rows
315 *
316 * n (input) int
317 * The number of columns
318 *
319 * kl (input) int
320 * The number of subdiagonals
321 *
322 * ku (input) int
323 * The number of superdiagonals
324 *
325 * alpha (input/output) void*
326 * If alpha_flag = 1, alpha is input.
327 * If alpha_flag = 0, alpha is output.
328 *
329 * alpha_flag (input) int
330 * = 0 : alpha is free, and is output.
331 * = 1 : alpha is fixed on input.
332 *
333 * AB (output) void*
334 * Matrix A in the banded storage.
335 *
336 *
337 * lda (input) int
338 * The first dimension of AB
339 *
340 * x_head (input/output) void*
341 * x_tail (input/output) void*
342 *
343 * beta (input/output) void*
344 * If beta_flag = 1, beta is input.
345 * If beta_flag = 0, beta is output.
346 *
347 * beta_flag (input) int
348 * = 0 : beta is free, and is output.
349 * = 1 : beta is fixed on input.
350 *
351 * y (input/output) void*
352 *
353 * seed (input/output) int
354 *
355 * r_true_l (output) double*
356 * The leading part of the truth in double-double.
357 *
358 * r_true_t (output) double*
359 * The trailing part of the truth in double-double.
360 *
361 */
362 {
363 float *x_head_i = (float *) x_head;
364 float *x_tail_i = (float *) x_tail;
365 float *y_i = (float *) y;
366 int n_fix2;
367 int n_mix;
368 int ysize;
369 int i;
370 int j;
371 float *a_vec;
372 int m_i, n_i;
373 int max_mn;
374 int incy, incAB, incx;
375 float y_elem[2];
376
377 max_mn = MAX(m, n);
378 incx = incy = incAB = 1;
379 incy *= 2;
380 incAB *= 2;
381 incx *= 2;
382
383 if (trans == blas_no_trans) {
384 m_i = m;
385 n_i = n;
386 } else {
387 m_i = n;
388 n_i = m;
389 }
390
391 a_vec = (float *) blas_malloc(max_mn * sizeof(float) * 2);
392 if (max_mn > 0 && a_vec == NULL) {
393 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
394 }
395
396 /* calling dot_testgen n time. in each iteration, one row of AB, and one
397 element of y are produced. the vector x is produced at the first
398 iteration only */
399 for (i = 0; i < m_i; i++) {
400 /* copy AB to a_vec */
401 cgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
402 &n_fix2, &n_mix, &ysize);
403
404 if (i == 1) {
405 /* from now on, fix alpha and beta */
406 alpha_flag = 1;
407 beta_flag = 1;
408 }
409
410 BLAS_cdot2_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
411 alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
412 seed, y_elem, &r_true_l[i * incy],
413 &r_true_t[i * incy]);
414 y_i[i * incy] = y_elem[0];
415 y_i[i * incy + 1] = y_elem[1];
416
417 if (trans == blas_conj_trans) {
418 for (j = 0; j < n_i * incAB; j += 2) {
419 a_vec[j + 1] = -a_vec[j + 1];
420 }
421 }
422 /* copy a_vec to AB */
423 cgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
424 }
425
426 /* Zero out trailing part of x */
427 for (i = ysize; i < n_i; i++) {
428 x_head_i[i * incx] = 0.0;
429 x_head_i[i * incx + 1] = 0.0;
430 x_tail_i[i * incx] = 0.0;
431 x_tail_i[i * incx + 1] = 0.0;
432 }
433
434 blas_free(a_vec);
435 }
BLAS_zgbmv2_testgen(int norm,enum blas_order_type order,enum blas_trans_type trans,int m,int n,int kl,int ku,void * alpha,int alpha_flag,void * AB,int lda,void * x_head,void * x_tail,void * beta,int beta_flag,void * y,int * seed,double * r_true_l,double * r_true_t)436 void BLAS_zgbmv2_testgen(int norm, enum blas_order_type order,
437 enum blas_trans_type trans, int m, int n, int kl,
438 int ku, void *alpha, int alpha_flag, void *AB,
439 int lda, void *x_head, void *x_tail, void *beta,
440 int beta_flag, void *y, int *seed, double *r_true_l,
441 double *r_true_t)
442
443 /*
444 * Purpose
445 * =======
446 *
447 * Generates alpha, AB, x, beta, and y, where AB is a banded
448 * matrix; and computes r_true.
449 *
450 * Arguments
451 * =========
452 *
453 * norm (input) blas_norm_type
454 *
455 * order (input) blas_order_type
456 * Order of AB; row or column major
457 *
458 * trans (input) blas_trans_type
459 * Whether AB is no trans, trans, or conj trans
460 *
461 * m (input) int
462 * The number of rows
463 *
464 * n (input) int
465 * The number of columns
466 *
467 * kl (input) int
468 * The number of subdiagonals
469 *
470 * ku (input) int
471 * The number of superdiagonals
472 *
473 * alpha (input/output) void*
474 * If alpha_flag = 1, alpha is input.
475 * If alpha_flag = 0, alpha is output.
476 *
477 * alpha_flag (input) int
478 * = 0 : alpha is free, and is output.
479 * = 1 : alpha is fixed on input.
480 *
481 * AB (output) void*
482 * Matrix A in the banded storage.
483 *
484 *
485 * lda (input) int
486 * The first dimension of AB
487 *
488 * x_head (input/output) void*
489 * x_tail (input/output) void*
490 *
491 * beta (input/output) void*
492 * If beta_flag = 1, beta is input.
493 * If beta_flag = 0, beta is output.
494 *
495 * beta_flag (input) int
496 * = 0 : beta is free, and is output.
497 * = 1 : beta is fixed on input.
498 *
499 * y (input/output) void*
500 *
501 * seed (input/output) int
502 *
503 * r_true_l (output) double*
504 * The leading part of the truth in double-double.
505 *
506 * r_true_t (output) double*
507 * The trailing part of the truth in double-double.
508 *
509 */
510 {
511 double *x_head_i = (double *) x_head;
512 double *x_tail_i = (double *) x_tail;
513 double *y_i = (double *) y;
514 int n_fix2;
515 int n_mix;
516 int ysize;
517 int i;
518 int j;
519 double *a_vec;
520 int m_i, n_i;
521 int max_mn;
522 int incy, incAB, incx;
523 double y_elem[2];
524
525 max_mn = MAX(m, n);
526 incx = incy = incAB = 1;
527 incy *= 2;
528 incAB *= 2;
529 incx *= 2;
530
531 if (trans == blas_no_trans) {
532 m_i = m;
533 n_i = n;
534 } else {
535 m_i = n;
536 n_i = m;
537 }
538
539 a_vec = (double *) blas_malloc(max_mn * sizeof(double) * 2);
540 if (max_mn > 0 && a_vec == NULL) {
541 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
542 }
543
544 /* calling dot_testgen n time. in each iteration, one row of AB, and one
545 element of y are produced. the vector x is produced at the first
546 iteration only */
547 for (i = 0; i < m_i; i++) {
548 /* copy AB to a_vec */
549 zgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
550 &n_fix2, &n_mix, &ysize);
551
552 if (i == 1) {
553 /* from now on, fix alpha and beta */
554 alpha_flag = 1;
555 beta_flag = 1;
556 }
557
558 BLAS_zdot2_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
559 alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
560 seed, y_elem, &r_true_l[i * incy],
561 &r_true_t[i * incy]);
562 y_i[i * incy] = y_elem[0];
563 y_i[i * incy + 1] = y_elem[1];
564
565 if (trans == blas_conj_trans) {
566 for (j = 0; j < n_i * incAB; j += 2) {
567 a_vec[j + 1] = -a_vec[j + 1];
568 }
569 }
570 /* copy a_vec to AB */
571 zgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
572 }
573
574 /* Zero out trailing part of x */
575 for (i = ysize; i < n_i; i++) {
576 x_head_i[i * incx] = 0.0;
577 x_head_i[i * incx + 1] = 0.0;
578 x_tail_i[i * incx] = 0.0;
579 x_tail_i[i * incx + 1] = 0.0;
580 }
581
582 blas_free(a_vec);
583 }
BLAS_cgbmv2_s_s_testgen(int norm,enum blas_order_type order,enum blas_trans_type trans,int m,int n,int kl,int ku,void * alpha,int alpha_flag,float * AB,int lda,float * x_head,float * x_tail,void * beta,int beta_flag,void * y,int * seed,double * r_true_l,double * r_true_t)584 void BLAS_cgbmv2_s_s_testgen(int norm, enum blas_order_type order,
585 enum blas_trans_type trans, int m, int n, int kl,
586 int ku, void *alpha, int alpha_flag, float *AB,
587 int lda, float *x_head, float *x_tail,
588 void *beta, int beta_flag, void *y, int *seed,
589 double *r_true_l, double *r_true_t)
590
591 /*
592 * Purpose
593 * =======
594 *
595 * Generates alpha, AB, x, beta, and y, where AB is a banded
596 * matrix; and computes r_true.
597 *
598 * Arguments
599 * =========
600 *
601 * norm (input) blas_norm_type
602 *
603 * order (input) blas_order_type
604 * Order of AB; row or column major
605 *
606 * trans (input) blas_trans_type
607 * Whether AB is no trans, trans, or conj trans
608 *
609 * m (input) int
610 * The number of rows
611 *
612 * n (input) int
613 * The number of columns
614 *
615 * kl (input) int
616 * The number of subdiagonals
617 *
618 * ku (input) int
619 * The number of superdiagonals
620 *
621 * alpha (input/output) void*
622 * If alpha_flag = 1, alpha is input.
623 * If alpha_flag = 0, alpha is output.
624 *
625 * alpha_flag (input) int
626 * = 0 : alpha is free, and is output.
627 * = 1 : alpha is fixed on input.
628 *
629 * AB (output) float*
630 * Matrix A in the banded storage.
631 *
632 *
633 * lda (input) int
634 * The first dimension of AB
635 *
636 * x_head (input/output) float*
637 * x_tail (input/output) float*
638 *
639 * beta (input/output) void*
640 * If beta_flag = 1, beta is input.
641 * If beta_flag = 0, beta is output.
642 *
643 * beta_flag (input) int
644 * = 0 : beta is free, and is output.
645 * = 1 : beta is fixed on input.
646 *
647 * y (input/output) void*
648 *
649 * seed (input/output) int
650 *
651 * r_true_l (output) double*
652 * The leading part of the truth in double-double.
653 *
654 * r_true_t (output) double*
655 * The trailing part of the truth in double-double.
656 *
657 */
658 {
659 float *x_head_i = x_head;
660 float *x_tail_i = x_tail;
661 float *y_i = (float *) y;
662 int n_fix2;
663 int n_mix;
664 int ysize;
665 int i;
666
667 float *a_vec;
668 int m_i, n_i;
669 int max_mn;
670 int incy, incAB, incx;
671 float y_elem[2];
672
673 max_mn = MAX(m, n);
674 incx = incy = incAB = 1;
675 incy *= 2;
676
677
678
679 if (trans == blas_no_trans) {
680 m_i = m;
681 n_i = n;
682 } else {
683 m_i = n;
684 n_i = m;
685 }
686
687 a_vec = (float *) blas_malloc(max_mn * sizeof(float));
688 if (max_mn > 0 && a_vec == NULL) {
689 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
690 }
691
692 /* calling dot_testgen n time. in each iteration, one row of AB, and one
693 element of y are produced. the vector x is produced at the first
694 iteration only */
695 for (i = 0; i < m_i; i++) {
696 /* copy AB to a_vec */
697 sgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
698 &n_fix2, &n_mix, &ysize);
699
700 if (i == 1) {
701 /* from now on, fix alpha and beta */
702 alpha_flag = 1;
703 beta_flag = 1;
704 }
705
706 BLAS_cdot2_s_s_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
707 alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
708 seed, y_elem, &r_true_l[i * incy],
709 &r_true_t[i * incy]);
710 y_i[i * incy] = y_elem[0];
711 y_i[i * incy + 1] = y_elem[1];
712
713
714 /* copy a_vec to AB */
715 sgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
716 }
717
718 /* Zero out trailing part of x */
719 for (i = ysize; i < n_i; i++) {
720 x_head_i[i * incx] = 0.0;
721 x_tail_i[i * incx] = 0.0;
722 }
723
724 blas_free(a_vec);
725 }
BLAS_cgbmv2_s_c_testgen(int norm,enum blas_order_type order,enum blas_trans_type trans,int m,int n,int kl,int ku,void * alpha,int alpha_flag,float * AB,int lda,void * x_head,void * x_tail,void * beta,int beta_flag,void * y,int * seed,double * r_true_l,double * r_true_t)726 void BLAS_cgbmv2_s_c_testgen(int norm, enum blas_order_type order,
727 enum blas_trans_type trans, int m, int n, int kl,
728 int ku, void *alpha, int alpha_flag, float *AB,
729 int lda, void *x_head, void *x_tail, void *beta,
730 int beta_flag, void *y, int *seed,
731 double *r_true_l, double *r_true_t)
732
733 /*
734 * Purpose
735 * =======
736 *
737 * Generates alpha, AB, x, beta, and y, where AB is a banded
738 * matrix; and computes r_true.
739 *
740 * Arguments
741 * =========
742 *
743 * norm (input) blas_norm_type
744 *
745 * order (input) blas_order_type
746 * Order of AB; row or column major
747 *
748 * trans (input) blas_trans_type
749 * Whether AB is no trans, trans, or conj trans
750 *
751 * m (input) int
752 * The number of rows
753 *
754 * n (input) int
755 * The number of columns
756 *
757 * kl (input) int
758 * The number of subdiagonals
759 *
760 * ku (input) int
761 * The number of superdiagonals
762 *
763 * alpha (input/output) void*
764 * If alpha_flag = 1, alpha is input.
765 * If alpha_flag = 0, alpha is output.
766 *
767 * alpha_flag (input) int
768 * = 0 : alpha is free, and is output.
769 * = 1 : alpha is fixed on input.
770 *
771 * AB (output) float*
772 * Matrix A in the banded storage.
773 *
774 *
775 * lda (input) int
776 * The first dimension of AB
777 *
778 * x_head (input/output) void*
779 * x_tail (input/output) void*
780 *
781 * beta (input/output) void*
782 * If beta_flag = 1, beta is input.
783 * If beta_flag = 0, beta is output.
784 *
785 * beta_flag (input) int
786 * = 0 : beta is free, and is output.
787 * = 1 : beta is fixed on input.
788 *
789 * y (input/output) void*
790 *
791 * seed (input/output) int
792 *
793 * r_true_l (output) double*
794 * The leading part of the truth in double-double.
795 *
796 * r_true_t (output) double*
797 * The trailing part of the truth in double-double.
798 *
799 */
800 {
801 float *x_head_i = (float *) x_head;
802 float *x_tail_i = (float *) x_tail;
803 float *y_i = (float *) y;
804 int n_fix2;
805 int n_mix;
806 int ysize;
807 int i;
808
809 float *a_vec;
810 int m_i, n_i;
811 int max_mn;
812 int incy, incAB, incx;
813 float y_elem[2];
814
815 max_mn = MAX(m, n);
816 incx = incy = incAB = 1;
817 incy *= 2;
818
819 incx *= 2;
820
821 if (trans == blas_no_trans) {
822 m_i = m;
823 n_i = n;
824 } else {
825 m_i = n;
826 n_i = m;
827 }
828
829 a_vec = (float *) blas_malloc(max_mn * sizeof(float));
830 if (max_mn > 0 && a_vec == NULL) {
831 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
832 }
833
834 /* calling dot_testgen n time. in each iteration, one row of AB, and one
835 element of y are produced. the vector x is produced at the first
836 iteration only */
837 for (i = 0; i < m_i; i++) {
838 /* copy AB to a_vec */
839 sgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
840 &n_fix2, &n_mix, &ysize);
841
842 if (i == 1) {
843 /* from now on, fix alpha and beta */
844 alpha_flag = 1;
845 beta_flag = 1;
846 }
847
848 BLAS_cdot2_c_s_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
849 alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
850 seed, y_elem, &r_true_l[i * incy],
851 &r_true_t[i * incy]);
852 y_i[i * incy] = y_elem[0];
853 y_i[i * incy + 1] = y_elem[1];
854
855
856 /* copy a_vec to AB */
857 sgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
858 }
859
860 /* Zero out trailing part of x */
861 for (i = ysize; i < n_i; i++) {
862 x_head_i[i * incx] = 0.0;
863 x_head_i[i * incx + 1] = 0.0;
864 x_tail_i[i * incx] = 0.0;
865 x_tail_i[i * incx + 1] = 0.0;
866 }
867
868 blas_free(a_vec);
869 }
BLAS_cgbmv2_c_s_testgen(int norm,enum blas_order_type order,enum blas_trans_type trans,int m,int n,int kl,int ku,void * alpha,int alpha_flag,void * AB,int lda,float * x_head,float * x_tail,void * beta,int beta_flag,void * y,int * seed,double * r_true_l,double * r_true_t)870 void BLAS_cgbmv2_c_s_testgen(int norm, enum blas_order_type order,
871 enum blas_trans_type trans, int m, int n, int kl,
872 int ku, void *alpha, int alpha_flag, void *AB,
873 int lda, float *x_head, float *x_tail,
874 void *beta, int beta_flag, void *y, int *seed,
875 double *r_true_l, double *r_true_t)
876
877 /*
878 * Purpose
879 * =======
880 *
881 * Generates alpha, AB, x, beta, and y, where AB is a banded
882 * matrix; and computes r_true.
883 *
884 * Arguments
885 * =========
886 *
887 * norm (input) blas_norm_type
888 *
889 * order (input) blas_order_type
890 * Order of AB; row or column major
891 *
892 * trans (input) blas_trans_type
893 * Whether AB is no trans, trans, or conj trans
894 *
895 * m (input) int
896 * The number of rows
897 *
898 * n (input) int
899 * The number of columns
900 *
901 * kl (input) int
902 * The number of subdiagonals
903 *
904 * ku (input) int
905 * The number of superdiagonals
906 *
907 * alpha (input/output) void*
908 * If alpha_flag = 1, alpha is input.
909 * If alpha_flag = 0, alpha is output.
910 *
911 * alpha_flag (input) int
912 * = 0 : alpha is free, and is output.
913 * = 1 : alpha is fixed on input.
914 *
915 * AB (output) void*
916 * Matrix A in the banded storage.
917 *
918 *
919 * lda (input) int
920 * The first dimension of AB
921 *
922 * x_head (input/output) float*
923 * x_tail (input/output) float*
924 *
925 * beta (input/output) void*
926 * If beta_flag = 1, beta is input.
927 * If beta_flag = 0, beta is output.
928 *
929 * beta_flag (input) int
930 * = 0 : beta is free, and is output.
931 * = 1 : beta is fixed on input.
932 *
933 * y (input/output) void*
934 *
935 * seed (input/output) int
936 *
937 * r_true_l (output) double*
938 * The leading part of the truth in double-double.
939 *
940 * r_true_t (output) double*
941 * The trailing part of the truth in double-double.
942 *
943 */
944 {
945 float *x_head_i = x_head;
946 float *x_tail_i = x_tail;
947 float *y_i = (float *) y;
948 int n_fix2;
949 int n_mix;
950 int ysize;
951 int i;
952 int j;
953 float *a_vec;
954 int m_i, n_i;
955 int max_mn;
956 int incy, incAB, incx;
957 float y_elem[2];
958
959 max_mn = MAX(m, n);
960 incx = incy = incAB = 1;
961 incy *= 2;
962 incAB *= 2;
963
964
965 if (trans == blas_no_trans) {
966 m_i = m;
967 n_i = n;
968 } else {
969 m_i = n;
970 n_i = m;
971 }
972
973 a_vec = (float *) blas_malloc(max_mn * sizeof(float) * 2);
974 if (max_mn > 0 && a_vec == NULL) {
975 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
976 }
977
978 /* calling dot_testgen n time. in each iteration, one row of AB, and one
979 element of y are produced. the vector x is produced at the first
980 iteration only */
981 for (i = 0; i < m_i; i++) {
982 /* copy AB to a_vec */
983 cgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
984 &n_fix2, &n_mix, &ysize);
985
986 if (i == 1) {
987 /* from now on, fix alpha and beta */
988 alpha_flag = 1;
989 beta_flag = 1;
990 }
991
992 BLAS_cdot2_s_c_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
993 alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
994 seed, y_elem, &r_true_l[i * incy],
995 &r_true_t[i * incy]);
996 y_i[i * incy] = y_elem[0];
997 y_i[i * incy + 1] = y_elem[1];
998
999 if (trans == blas_conj_trans) {
1000 for (j = 0; j < n_i * incAB; j += 2) {
1001 a_vec[j + 1] = -a_vec[j + 1];
1002 }
1003 }
1004 /* copy a_vec to AB */
1005 cgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
1006 }
1007
1008 /* Zero out trailing part of x */
1009 for (i = ysize; i < n_i; i++) {
1010 x_head_i[i * incx] = 0.0;
1011 x_tail_i[i * incx] = 0.0;
1012 }
1013
1014 blas_free(a_vec);
1015 }
BLAS_zgbmv2_d_d_testgen(int norm,enum blas_order_type order,enum blas_trans_type trans,int m,int n,int kl,int ku,void * alpha,int alpha_flag,double * AB,int lda,double * x_head,double * x_tail,void * beta,int beta_flag,void * y,int * seed,double * r_true_l,double * r_true_t)1016 void BLAS_zgbmv2_d_d_testgen(int norm, enum blas_order_type order,
1017 enum blas_trans_type trans, int m, int n, int kl,
1018 int ku, void *alpha, int alpha_flag, double *AB,
1019 int lda, double *x_head, double *x_tail,
1020 void *beta, int beta_flag, void *y, int *seed,
1021 double *r_true_l, double *r_true_t)
1022
1023 /*
1024 * Purpose
1025 * =======
1026 *
1027 * Generates alpha, AB, x, beta, and y, where AB is a banded
1028 * matrix; and computes r_true.
1029 *
1030 * Arguments
1031 * =========
1032 *
1033 * norm (input) blas_norm_type
1034 *
1035 * order (input) blas_order_type
1036 * Order of AB; row or column major
1037 *
1038 * trans (input) blas_trans_type
1039 * Whether AB is no trans, trans, or conj trans
1040 *
1041 * m (input) int
1042 * The number of rows
1043 *
1044 * n (input) int
1045 * The number of columns
1046 *
1047 * kl (input) int
1048 * The number of subdiagonals
1049 *
1050 * ku (input) int
1051 * The number of superdiagonals
1052 *
1053 * alpha (input/output) void*
1054 * If alpha_flag = 1, alpha is input.
1055 * If alpha_flag = 0, alpha is output.
1056 *
1057 * alpha_flag (input) int
1058 * = 0 : alpha is free, and is output.
1059 * = 1 : alpha is fixed on input.
1060 *
1061 * AB (output) double*
1062 * Matrix A in the banded storage.
1063 *
1064 *
1065 * lda (input) int
1066 * The first dimension of AB
1067 *
1068 * x_head (input/output) double*
1069 * x_tail (input/output) double*
1070 *
1071 * beta (input/output) void*
1072 * If beta_flag = 1, beta is input.
1073 * If beta_flag = 0, beta is output.
1074 *
1075 * beta_flag (input) int
1076 * = 0 : beta is free, and is output.
1077 * = 1 : beta is fixed on input.
1078 *
1079 * y (input/output) void*
1080 *
1081 * seed (input/output) int
1082 *
1083 * r_true_l (output) double*
1084 * The leading part of the truth in double-double.
1085 *
1086 * r_true_t (output) double*
1087 * The trailing part of the truth in double-double.
1088 *
1089 */
1090 {
1091 double *x_head_i = x_head;
1092 double *x_tail_i = x_tail;
1093 double *y_i = (double *) y;
1094 int n_fix2;
1095 int n_mix;
1096 int ysize;
1097 int i;
1098
1099 double *a_vec;
1100 int m_i, n_i;
1101 int max_mn;
1102 int incy, incAB, incx;
1103 double y_elem[2];
1104
1105 max_mn = MAX(m, n);
1106 incx = incy = incAB = 1;
1107 incy *= 2;
1108
1109
1110
1111 if (trans == blas_no_trans) {
1112 m_i = m;
1113 n_i = n;
1114 } else {
1115 m_i = n;
1116 n_i = m;
1117 }
1118
1119 a_vec = (double *) blas_malloc(max_mn * sizeof(double));
1120 if (max_mn > 0 && a_vec == NULL) {
1121 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1122 }
1123
1124 /* calling dot_testgen n time. in each iteration, one row of AB, and one
1125 element of y are produced. the vector x is produced at the first
1126 iteration only */
1127 for (i = 0; i < m_i; i++) {
1128 /* copy AB to a_vec */
1129 dgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
1130 &n_fix2, &n_mix, &ysize);
1131
1132 if (i == 1) {
1133 /* from now on, fix alpha and beta */
1134 alpha_flag = 1;
1135 beta_flag = 1;
1136 }
1137
1138 BLAS_zdot2_d_d_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
1139 alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
1140 seed, y_elem, &r_true_l[i * incy],
1141 &r_true_t[i * incy]);
1142 y_i[i * incy] = y_elem[0];
1143 y_i[i * incy + 1] = y_elem[1];
1144
1145
1146 /* copy a_vec to AB */
1147 dgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
1148 }
1149
1150 /* Zero out trailing part of x */
1151 for (i = ysize; i < n_i; i++) {
1152 x_head_i[i * incx] = 0.0;
1153 x_tail_i[i * incx] = 0.0;
1154 }
1155
1156 blas_free(a_vec);
1157 }
BLAS_zgbmv2_d_z_testgen(int norm,enum blas_order_type order,enum blas_trans_type trans,int m,int n,int kl,int ku,void * alpha,int alpha_flag,double * AB,int lda,void * x_head,void * x_tail,void * beta,int beta_flag,void * y,int * seed,double * r_true_l,double * r_true_t)1158 void BLAS_zgbmv2_d_z_testgen(int norm, enum blas_order_type order,
1159 enum blas_trans_type trans, int m, int n, int kl,
1160 int ku, void *alpha, int alpha_flag, double *AB,
1161 int lda, void *x_head, void *x_tail, void *beta,
1162 int beta_flag, void *y, int *seed,
1163 double *r_true_l, double *r_true_t)
1164
1165 /*
1166 * Purpose
1167 * =======
1168 *
1169 * Generates alpha, AB, x, beta, and y, where AB is a banded
1170 * matrix; and computes r_true.
1171 *
1172 * Arguments
1173 * =========
1174 *
1175 * norm (input) blas_norm_type
1176 *
1177 * order (input) blas_order_type
1178 * Order of AB; row or column major
1179 *
1180 * trans (input) blas_trans_type
1181 * Whether AB is no trans, trans, or conj trans
1182 *
1183 * m (input) int
1184 * The number of rows
1185 *
1186 * n (input) int
1187 * The number of columns
1188 *
1189 * kl (input) int
1190 * The number of subdiagonals
1191 *
1192 * ku (input) int
1193 * The number of superdiagonals
1194 *
1195 * alpha (input/output) void*
1196 * If alpha_flag = 1, alpha is input.
1197 * If alpha_flag = 0, alpha is output.
1198 *
1199 * alpha_flag (input) int
1200 * = 0 : alpha is free, and is output.
1201 * = 1 : alpha is fixed on input.
1202 *
1203 * AB (output) double*
1204 * Matrix A in the banded storage.
1205 *
1206 *
1207 * lda (input) int
1208 * The first dimension of AB
1209 *
1210 * x_head (input/output) void*
1211 * x_tail (input/output) void*
1212 *
1213 * beta (input/output) void*
1214 * If beta_flag = 1, beta is input.
1215 * If beta_flag = 0, beta is output.
1216 *
1217 * beta_flag (input) int
1218 * = 0 : beta is free, and is output.
1219 * = 1 : beta is fixed on input.
1220 *
1221 * y (input/output) void*
1222 *
1223 * seed (input/output) int
1224 *
1225 * r_true_l (output) double*
1226 * The leading part of the truth in double-double.
1227 *
1228 * r_true_t (output) double*
1229 * The trailing part of the truth in double-double.
1230 *
1231 */
1232 {
1233 double *x_head_i = (double *) x_head;
1234 double *x_tail_i = (double *) x_tail;
1235 double *y_i = (double *) y;
1236 int n_fix2;
1237 int n_mix;
1238 int ysize;
1239 int i;
1240
1241 double *a_vec;
1242 int m_i, n_i;
1243 int max_mn;
1244 int incy, incAB, incx;
1245 double y_elem[2];
1246
1247 max_mn = MAX(m, n);
1248 incx = incy = incAB = 1;
1249 incy *= 2;
1250
1251 incx *= 2;
1252
1253 if (trans == blas_no_trans) {
1254 m_i = m;
1255 n_i = n;
1256 } else {
1257 m_i = n;
1258 n_i = m;
1259 }
1260
1261 a_vec = (double *) blas_malloc(max_mn * sizeof(double));
1262 if (max_mn > 0 && a_vec == NULL) {
1263 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1264 }
1265
1266 /* calling dot_testgen n time. in each iteration, one row of AB, and one
1267 element of y are produced. the vector x is produced at the first
1268 iteration only */
1269 for (i = 0; i < m_i; i++) {
1270 /* copy AB to a_vec */
1271 dgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
1272 &n_fix2, &n_mix, &ysize);
1273
1274 if (i == 1) {
1275 /* from now on, fix alpha and beta */
1276 alpha_flag = 1;
1277 beta_flag = 1;
1278 }
1279
1280 BLAS_zdot2_z_d_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
1281 alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
1282 seed, y_elem, &r_true_l[i * incy],
1283 &r_true_t[i * incy]);
1284 y_i[i * incy] = y_elem[0];
1285 y_i[i * incy + 1] = y_elem[1];
1286
1287
1288 /* copy a_vec to AB */
1289 dgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
1290 }
1291
1292 /* Zero out trailing part of x */
1293 for (i = ysize; i < n_i; i++) {
1294 x_head_i[i * incx] = 0.0;
1295 x_head_i[i * incx + 1] = 0.0;
1296 x_tail_i[i * incx] = 0.0;
1297 x_tail_i[i * incx + 1] = 0.0;
1298 }
1299
1300 blas_free(a_vec);
1301 }
BLAS_zgbmv2_z_d_testgen(int norm,enum blas_order_type order,enum blas_trans_type trans,int m,int n,int kl,int ku,void * alpha,int alpha_flag,void * AB,int lda,double * x_head,double * x_tail,void * beta,int beta_flag,void * y,int * seed,double * r_true_l,double * r_true_t)1302 void BLAS_zgbmv2_z_d_testgen(int norm, enum blas_order_type order,
1303 enum blas_trans_type trans, int m, int n, int kl,
1304 int ku, void *alpha, int alpha_flag, void *AB,
1305 int lda, double *x_head, double *x_tail,
1306 void *beta, int beta_flag, void *y, int *seed,
1307 double *r_true_l, double *r_true_t)
1308
1309 /*
1310 * Purpose
1311 * =======
1312 *
1313 * Generates alpha, AB, x, beta, and y, where AB is a banded
1314 * matrix; and computes r_true.
1315 *
1316 * Arguments
1317 * =========
1318 *
1319 * norm (input) blas_norm_type
1320 *
1321 * order (input) blas_order_type
1322 * Order of AB; row or column major
1323 *
1324 * trans (input) blas_trans_type
1325 * Whether AB is no trans, trans, or conj trans
1326 *
1327 * m (input) int
1328 * The number of rows
1329 *
1330 * n (input) int
1331 * The number of columns
1332 *
1333 * kl (input) int
1334 * The number of subdiagonals
1335 *
1336 * ku (input) int
1337 * The number of superdiagonals
1338 *
1339 * alpha (input/output) void*
1340 * If alpha_flag = 1, alpha is input.
1341 * If alpha_flag = 0, alpha is output.
1342 *
1343 * alpha_flag (input) int
1344 * = 0 : alpha is free, and is output.
1345 * = 1 : alpha is fixed on input.
1346 *
1347 * AB (output) void*
1348 * Matrix A in the banded storage.
1349 *
1350 *
1351 * lda (input) int
1352 * The first dimension of AB
1353 *
1354 * x_head (input/output) double*
1355 * x_tail (input/output) double*
1356 *
1357 * beta (input/output) void*
1358 * If beta_flag = 1, beta is input.
1359 * If beta_flag = 0, beta is output.
1360 *
1361 * beta_flag (input) int
1362 * = 0 : beta is free, and is output.
1363 * = 1 : beta is fixed on input.
1364 *
1365 * y (input/output) void*
1366 *
1367 * seed (input/output) int
1368 *
1369 * r_true_l (output) double*
1370 * The leading part of the truth in double-double.
1371 *
1372 * r_true_t (output) double*
1373 * The trailing part of the truth in double-double.
1374 *
1375 */
1376 {
1377 double *x_head_i = x_head;
1378 double *x_tail_i = x_tail;
1379 double *y_i = (double *) y;
1380 int n_fix2;
1381 int n_mix;
1382 int ysize;
1383 int i;
1384 int j;
1385 double *a_vec;
1386 int m_i, n_i;
1387 int max_mn;
1388 int incy, incAB, incx;
1389 double y_elem[2];
1390
1391 max_mn = MAX(m, n);
1392 incx = incy = incAB = 1;
1393 incy *= 2;
1394 incAB *= 2;
1395
1396
1397 if (trans == blas_no_trans) {
1398 m_i = m;
1399 n_i = n;
1400 } else {
1401 m_i = n;
1402 n_i = m;
1403 }
1404
1405 a_vec = (double *) blas_malloc(max_mn * sizeof(double) * 2);
1406 if (max_mn > 0 && a_vec == NULL) {
1407 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1408 }
1409
1410 /* calling dot_testgen n time. in each iteration, one row of AB, and one
1411 element of y are produced. the vector x is produced at the first
1412 iteration only */
1413 for (i = 0; i < m_i; i++) {
1414 /* copy AB to a_vec */
1415 zgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
1416 &n_fix2, &n_mix, &ysize);
1417
1418 if (i == 1) {
1419 /* from now on, fix alpha and beta */
1420 alpha_flag = 1;
1421 beta_flag = 1;
1422 }
1423
1424 BLAS_zdot2_d_z_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
1425 alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
1426 seed, y_elem, &r_true_l[i * incy],
1427 &r_true_t[i * incy]);
1428 y_i[i * incy] = y_elem[0];
1429 y_i[i * incy + 1] = y_elem[1];
1430
1431 if (trans == blas_conj_trans) {
1432 for (j = 0; j < n_i * incAB; j += 2) {
1433 a_vec[j + 1] = -a_vec[j + 1];
1434 }
1435 }
1436 /* copy a_vec to AB */
1437 zgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
1438 }
1439
1440 /* Zero out trailing part of x */
1441 for (i = ysize; i < n_i; i++) {
1442 x_head_i[i * incx] = 0.0;
1443 x_tail_i[i * incx] = 0.0;
1444 }
1445
1446 blas_free(a_vec);
1447 }
BLAS_dgbmv2_s_s_testgen(int norm,enum blas_order_type order,enum blas_trans_type trans,int m,int n,int kl,int ku,double * alpha,int alpha_flag,float * AB,int lda,float * x_head,float * x_tail,double * beta,int beta_flag,double * y,int * seed,double * r_true_l,double * r_true_t)1448 void BLAS_dgbmv2_s_s_testgen(int norm, enum blas_order_type order,
1449 enum blas_trans_type trans, int m, int n, int kl,
1450 int ku, double *alpha, int alpha_flag, float *AB,
1451 int lda, float *x_head, float *x_tail,
1452 double *beta, int beta_flag, double *y,
1453 int *seed, double *r_true_l, double *r_true_t)
1454
1455 /*
1456 * Purpose
1457 * =======
1458 *
1459 * Generates alpha, AB, x, beta, and y, where AB is a banded
1460 * matrix; and computes r_true.
1461 *
1462 * Arguments
1463 * =========
1464 *
1465 * norm (input) blas_norm_type
1466 *
1467 * order (input) blas_order_type
1468 * Order of AB; row or column major
1469 *
1470 * trans (input) blas_trans_type
1471 * Whether AB is no trans, trans, or conj trans
1472 *
1473 * m (input) int
1474 * The number of rows
1475 *
1476 * n (input) int
1477 * The number of columns
1478 *
1479 * kl (input) int
1480 * The number of subdiagonals
1481 *
1482 * ku (input) int
1483 * The number of superdiagonals
1484 *
1485 * alpha (input/output) double*
1486 * If alpha_flag = 1, alpha is input.
1487 * If alpha_flag = 0, alpha is output.
1488 *
1489 * alpha_flag (input) int
1490 * = 0 : alpha is free, and is output.
1491 * = 1 : alpha is fixed on input.
1492 *
1493 * AB (output) float*
1494 * Matrix A in the banded storage.
1495 *
1496 *
1497 * lda (input) int
1498 * The first dimension of AB
1499 *
1500 * x_head (input/output) float*
1501 * x_tail (input/output) float*
1502 *
1503 * beta (input/output) double*
1504 * If beta_flag = 1, beta is input.
1505 * If beta_flag = 0, beta is output.
1506 *
1507 * beta_flag (input) int
1508 * = 0 : beta is free, and is output.
1509 * = 1 : beta is fixed on input.
1510 *
1511 * y (input/output) double*
1512 *
1513 * seed (input/output) int
1514 *
1515 * r_true_l (output) double*
1516 * The leading part of the truth in double-double.
1517 *
1518 * r_true_t (output) double*
1519 * The trailing part of the truth in double-double.
1520 *
1521 */
1522 {
1523 float *x_head_i = x_head;
1524 float *x_tail_i = x_tail;
1525 double *y_i = y;
1526 int n_fix2;
1527 int n_mix;
1528 int ysize;
1529 int i;
1530
1531 float *a_vec;
1532 int m_i, n_i;
1533 int max_mn;
1534 int incy, incAB, incx;
1535 double y_elem;
1536
1537 max_mn = MAX(m, n);
1538 incx = incy = incAB = 1;
1539
1540
1541
1542
1543 if (trans == blas_no_trans) {
1544 m_i = m;
1545 n_i = n;
1546 } else {
1547 m_i = n;
1548 n_i = m;
1549 }
1550
1551 a_vec = (float *) blas_malloc(max_mn * sizeof(float));
1552 if (max_mn > 0 && a_vec == NULL) {
1553 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1554 }
1555
1556 /* calling dot_testgen n time. in each iteration, one row of AB, and one
1557 element of y are produced. the vector x is produced at the first
1558 iteration only */
1559 for (i = 0; i < m_i; i++) {
1560 /* copy AB to a_vec */
1561 sgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
1562 &n_fix2, &n_mix, &ysize);
1563
1564 if (i == 1) {
1565 /* from now on, fix alpha and beta */
1566 alpha_flag = 1;
1567 beta_flag = 1;
1568 }
1569
1570 BLAS_ddot2_s_s_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
1571 alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
1572 seed, &y_elem, &r_true_l[i * incy],
1573 &r_true_t[i * incy]);
1574 y_i[i * incy] = y_elem;
1575
1576
1577 /* copy a_vec to AB */
1578 sgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
1579 }
1580
1581 /* Zero out trailing part of x */
1582 for (i = ysize; i < n_i; i++) {
1583 x_head_i[i * incx] = 0.0;
1584 x_tail_i[i * incx] = 0.0;
1585 }
1586
1587 blas_free(a_vec);
1588 }
BLAS_dgbmv2_s_d_testgen(int norm,enum blas_order_type order,enum blas_trans_type trans,int m,int n,int kl,int ku,double * alpha,int alpha_flag,float * AB,int lda,double * x_head,double * x_tail,double * beta,int beta_flag,double * y,int * seed,double * r_true_l,double * r_true_t)1589 void BLAS_dgbmv2_s_d_testgen(int norm, enum blas_order_type order,
1590 enum blas_trans_type trans, int m, int n, int kl,
1591 int ku, double *alpha, int alpha_flag, float *AB,
1592 int lda, double *x_head, double *x_tail,
1593 double *beta, int beta_flag, double *y,
1594 int *seed, double *r_true_l, double *r_true_t)
1595
1596 /*
1597 * Purpose
1598 * =======
1599 *
1600 * Generates alpha, AB, x, beta, and y, where AB is a banded
1601 * matrix; and computes r_true.
1602 *
1603 * Arguments
1604 * =========
1605 *
1606 * norm (input) blas_norm_type
1607 *
1608 * order (input) blas_order_type
1609 * Order of AB; row or column major
1610 *
1611 * trans (input) blas_trans_type
1612 * Whether AB is no trans, trans, or conj trans
1613 *
1614 * m (input) int
1615 * The number of rows
1616 *
1617 * n (input) int
1618 * The number of columns
1619 *
1620 * kl (input) int
1621 * The number of subdiagonals
1622 *
1623 * ku (input) int
1624 * The number of superdiagonals
1625 *
1626 * alpha (input/output) double*
1627 * If alpha_flag = 1, alpha is input.
1628 * If alpha_flag = 0, alpha is output.
1629 *
1630 * alpha_flag (input) int
1631 * = 0 : alpha is free, and is output.
1632 * = 1 : alpha is fixed on input.
1633 *
1634 * AB (output) float*
1635 * Matrix A in the banded storage.
1636 *
1637 *
1638 * lda (input) int
1639 * The first dimension of AB
1640 *
1641 * x_head (input/output) double*
1642 * x_tail (input/output) double*
1643 *
1644 * beta (input/output) double*
1645 * If beta_flag = 1, beta is input.
1646 * If beta_flag = 0, beta is output.
1647 *
1648 * beta_flag (input) int
1649 * = 0 : beta is free, and is output.
1650 * = 1 : beta is fixed on input.
1651 *
1652 * y (input/output) double*
1653 *
1654 * seed (input/output) int
1655 *
1656 * r_true_l (output) double*
1657 * The leading part of the truth in double-double.
1658 *
1659 * r_true_t (output) double*
1660 * The trailing part of the truth in double-double.
1661 *
1662 */
1663 {
1664 double *x_head_i = x_head;
1665 double *x_tail_i = x_tail;
1666 double *y_i = y;
1667 int n_fix2;
1668 int n_mix;
1669 int ysize;
1670 int i;
1671
1672 float *a_vec;
1673 int m_i, n_i;
1674 int max_mn;
1675 int incy, incAB, incx;
1676 double y_elem;
1677
1678 max_mn = MAX(m, n);
1679 incx = incy = incAB = 1;
1680
1681
1682
1683
1684 if (trans == blas_no_trans) {
1685 m_i = m;
1686 n_i = n;
1687 } else {
1688 m_i = n;
1689 n_i = m;
1690 }
1691
1692 a_vec = (float *) blas_malloc(max_mn * sizeof(float));
1693 if (max_mn > 0 && a_vec == NULL) {
1694 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1695 }
1696
1697 /* calling dot_testgen n time. in each iteration, one row of AB, and one
1698 element of y are produced. the vector x is produced at the first
1699 iteration only */
1700 for (i = 0; i < m_i; i++) {
1701 /* copy AB to a_vec */
1702 sgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
1703 &n_fix2, &n_mix, &ysize);
1704
1705 if (i == 1) {
1706 /* from now on, fix alpha and beta */
1707 alpha_flag = 1;
1708 beta_flag = 1;
1709 }
1710
1711 BLAS_ddot2_d_s_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
1712 alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
1713 seed, &y_elem, &r_true_l[i * incy],
1714 &r_true_t[i * incy]);
1715 y_i[i * incy] = y_elem;
1716
1717
1718 /* copy a_vec to AB */
1719 sgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
1720 }
1721
1722 /* Zero out trailing part of x */
1723 for (i = ysize; i < n_i; i++) {
1724 x_head_i[i * incx] = 0.0;
1725 x_tail_i[i * incx] = 0.0;
1726 }
1727
1728 blas_free(a_vec);
1729 }
BLAS_dgbmv2_d_s_testgen(int norm,enum blas_order_type order,enum blas_trans_type trans,int m,int n,int kl,int ku,double * alpha,int alpha_flag,double * AB,int lda,float * x_head,float * x_tail,double * beta,int beta_flag,double * y,int * seed,double * r_true_l,double * r_true_t)1730 void BLAS_dgbmv2_d_s_testgen(int norm, enum blas_order_type order,
1731 enum blas_trans_type trans, int m, int n, int kl,
1732 int ku, double *alpha, int alpha_flag,
1733 double *AB, int lda, float *x_head,
1734 float *x_tail, double *beta, int beta_flag,
1735 double *y, int *seed, double *r_true_l,
1736 double *r_true_t)
1737
1738 /*
1739 * Purpose
1740 * =======
1741 *
1742 * Generates alpha, AB, x, beta, and y, where AB is a banded
1743 * matrix; and computes r_true.
1744 *
1745 * Arguments
1746 * =========
1747 *
1748 * norm (input) blas_norm_type
1749 *
1750 * order (input) blas_order_type
1751 * Order of AB; row or column major
1752 *
1753 * trans (input) blas_trans_type
1754 * Whether AB is no trans, trans, or conj trans
1755 *
1756 * m (input) int
1757 * The number of rows
1758 *
1759 * n (input) int
1760 * The number of columns
1761 *
1762 * kl (input) int
1763 * The number of subdiagonals
1764 *
1765 * ku (input) int
1766 * The number of superdiagonals
1767 *
1768 * alpha (input/output) double*
1769 * If alpha_flag = 1, alpha is input.
1770 * If alpha_flag = 0, alpha is output.
1771 *
1772 * alpha_flag (input) int
1773 * = 0 : alpha is free, and is output.
1774 * = 1 : alpha is fixed on input.
1775 *
1776 * AB (output) double*
1777 * Matrix A in the banded storage.
1778 *
1779 *
1780 * lda (input) int
1781 * The first dimension of AB
1782 *
1783 * x_head (input/output) float*
1784 * x_tail (input/output) float*
1785 *
1786 * beta (input/output) double*
1787 * If beta_flag = 1, beta is input.
1788 * If beta_flag = 0, beta is output.
1789 *
1790 * beta_flag (input) int
1791 * = 0 : beta is free, and is output.
1792 * = 1 : beta is fixed on input.
1793 *
1794 * y (input/output) double*
1795 *
1796 * seed (input/output) int
1797 *
1798 * r_true_l (output) double*
1799 * The leading part of the truth in double-double.
1800 *
1801 * r_true_t (output) double*
1802 * The trailing part of the truth in double-double.
1803 *
1804 */
1805 {
1806 float *x_head_i = x_head;
1807 float *x_tail_i = x_tail;
1808 double *y_i = y;
1809 int n_fix2;
1810 int n_mix;
1811 int ysize;
1812 int i;
1813
1814 double *a_vec;
1815 int m_i, n_i;
1816 int max_mn;
1817 int incy, incAB, incx;
1818 double y_elem;
1819
1820 max_mn = MAX(m, n);
1821 incx = incy = incAB = 1;
1822
1823
1824
1825
1826 if (trans == blas_no_trans) {
1827 m_i = m;
1828 n_i = n;
1829 } else {
1830 m_i = n;
1831 n_i = m;
1832 }
1833
1834 a_vec = (double *) blas_malloc(max_mn * sizeof(double));
1835 if (max_mn > 0 && a_vec == NULL) {
1836 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1837 }
1838
1839 /* calling dot_testgen n time. in each iteration, one row of AB, and one
1840 element of y are produced. the vector x is produced at the first
1841 iteration only */
1842 for (i = 0; i < m_i; i++) {
1843 /* copy AB to a_vec */
1844 dgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
1845 &n_fix2, &n_mix, &ysize);
1846
1847 if (i == 1) {
1848 /* from now on, fix alpha and beta */
1849 alpha_flag = 1;
1850 beta_flag = 1;
1851 }
1852
1853 BLAS_ddot2_s_d_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
1854 alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
1855 seed, &y_elem, &r_true_l[i * incy],
1856 &r_true_t[i * incy]);
1857 y_i[i * incy] = y_elem;
1858
1859
1860 /* copy a_vec to AB */
1861 dgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
1862 }
1863
1864 /* Zero out trailing part of x */
1865 for (i = ysize; i < n_i; i++) {
1866 x_head_i[i * incx] = 0.0;
1867 x_tail_i[i * incx] = 0.0;
1868 }
1869
1870 blas_free(a_vec);
1871 }
BLAS_zgbmv2_c_c_testgen(int norm,enum blas_order_type order,enum blas_trans_type trans,int m,int n,int kl,int ku,void * alpha,int alpha_flag,void * AB,int lda,void * x_head,void * x_tail,void * beta,int beta_flag,void * y,int * seed,double * r_true_l,double * r_true_t)1872 void BLAS_zgbmv2_c_c_testgen(int norm, enum blas_order_type order,
1873 enum blas_trans_type trans, int m, int n, int kl,
1874 int ku, void *alpha, int alpha_flag, void *AB,
1875 int lda, void *x_head, void *x_tail, void *beta,
1876 int beta_flag, void *y, int *seed,
1877 double *r_true_l, double *r_true_t)
1878
1879 /*
1880 * Purpose
1881 * =======
1882 *
1883 * Generates alpha, AB, x, beta, and y, where AB is a banded
1884 * matrix; and computes r_true.
1885 *
1886 * Arguments
1887 * =========
1888 *
1889 * norm (input) blas_norm_type
1890 *
1891 * order (input) blas_order_type
1892 * Order of AB; row or column major
1893 *
1894 * trans (input) blas_trans_type
1895 * Whether AB is no trans, trans, or conj trans
1896 *
1897 * m (input) int
1898 * The number of rows
1899 *
1900 * n (input) int
1901 * The number of columns
1902 *
1903 * kl (input) int
1904 * The number of subdiagonals
1905 *
1906 * ku (input) int
1907 * The number of superdiagonals
1908 *
1909 * alpha (input/output) void*
1910 * If alpha_flag = 1, alpha is input.
1911 * If alpha_flag = 0, alpha is output.
1912 *
1913 * alpha_flag (input) int
1914 * = 0 : alpha is free, and is output.
1915 * = 1 : alpha is fixed on input.
1916 *
1917 * AB (output) void*
1918 * Matrix A in the banded storage.
1919 *
1920 *
1921 * lda (input) int
1922 * The first dimension of AB
1923 *
1924 * x_head (input/output) void*
1925 * x_tail (input/output) void*
1926 *
1927 * beta (input/output) void*
1928 * If beta_flag = 1, beta is input.
1929 * If beta_flag = 0, beta is output.
1930 *
1931 * beta_flag (input) int
1932 * = 0 : beta is free, and is output.
1933 * = 1 : beta is fixed on input.
1934 *
1935 * y (input/output) void*
1936 *
1937 * seed (input/output) int
1938 *
1939 * r_true_l (output) double*
1940 * The leading part of the truth in double-double.
1941 *
1942 * r_true_t (output) double*
1943 * The trailing part of the truth in double-double.
1944 *
1945 */
1946 {
1947 float *x_head_i = (float *) x_head;
1948 float *x_tail_i = (float *) x_tail;
1949 double *y_i = (double *) y;
1950 int n_fix2;
1951 int n_mix;
1952 int ysize;
1953 int i;
1954 int j;
1955 float *a_vec;
1956 int m_i, n_i;
1957 int max_mn;
1958 int incy, incAB, incx;
1959 double y_elem[2];
1960
1961 max_mn = MAX(m, n);
1962 incx = incy = incAB = 1;
1963 incy *= 2;
1964 incAB *= 2;
1965 incx *= 2;
1966
1967 if (trans == blas_no_trans) {
1968 m_i = m;
1969 n_i = n;
1970 } else {
1971 m_i = n;
1972 n_i = m;
1973 }
1974
1975 a_vec = (float *) blas_malloc(max_mn * sizeof(float) * 2);
1976 if (max_mn > 0 && a_vec == NULL) {
1977 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1978 }
1979
1980 /* calling dot_testgen n time. in each iteration, one row of AB, and one
1981 element of y are produced. the vector x is produced at the first
1982 iteration only */
1983 for (i = 0; i < m_i; i++) {
1984 /* copy AB to a_vec */
1985 cgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
1986 &n_fix2, &n_mix, &ysize);
1987
1988 if (i == 1) {
1989 /* from now on, fix alpha and beta */
1990 alpha_flag = 1;
1991 beta_flag = 1;
1992 }
1993
1994 BLAS_zdot2_c_c_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
1995 alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
1996 seed, y_elem, &r_true_l[i * incy],
1997 &r_true_t[i * incy]);
1998 y_i[i * incy] = y_elem[0];
1999 y_i[i * incy + 1] = y_elem[1];
2000
2001 if (trans == blas_conj_trans) {
2002 for (j = 0; j < n_i * incAB; j += 2) {
2003 a_vec[j + 1] = -a_vec[j + 1];
2004 }
2005 }
2006 /* copy a_vec to AB */
2007 cgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
2008 }
2009
2010 /* Zero out trailing part of x */
2011 for (i = ysize; i < n_i; i++) {
2012 x_head_i[i * incx] = 0.0;
2013 x_head_i[i * incx + 1] = 0.0;
2014 x_tail_i[i * incx] = 0.0;
2015 x_tail_i[i * incx + 1] = 0.0;
2016 }
2017
2018 blas_free(a_vec);
2019 }
BLAS_zgbmv2_c_z_testgen(int norm,enum blas_order_type order,enum blas_trans_type trans,int m,int n,int kl,int ku,void * alpha,int alpha_flag,void * AB,int lda,void * x_head,void * x_tail,void * beta,int beta_flag,void * y,int * seed,double * r_true_l,double * r_true_t)2020 void BLAS_zgbmv2_c_z_testgen(int norm, enum blas_order_type order,
2021 enum blas_trans_type trans, int m, int n, int kl,
2022 int ku, void *alpha, int alpha_flag, void *AB,
2023 int lda, void *x_head, void *x_tail, void *beta,
2024 int beta_flag, void *y, int *seed,
2025 double *r_true_l, double *r_true_t)
2026
2027 /*
2028 * Purpose
2029 * =======
2030 *
2031 * Generates alpha, AB, x, beta, and y, where AB is a banded
2032 * matrix; and computes r_true.
2033 *
2034 * Arguments
2035 * =========
2036 *
2037 * norm (input) blas_norm_type
2038 *
2039 * order (input) blas_order_type
2040 * Order of AB; row or column major
2041 *
2042 * trans (input) blas_trans_type
2043 * Whether AB is no trans, trans, or conj trans
2044 *
2045 * m (input) int
2046 * The number of rows
2047 *
2048 * n (input) int
2049 * The number of columns
2050 *
2051 * kl (input) int
2052 * The number of subdiagonals
2053 *
2054 * ku (input) int
2055 * The number of superdiagonals
2056 *
2057 * alpha (input/output) void*
2058 * If alpha_flag = 1, alpha is input.
2059 * If alpha_flag = 0, alpha is output.
2060 *
2061 * alpha_flag (input) int
2062 * = 0 : alpha is free, and is output.
2063 * = 1 : alpha is fixed on input.
2064 *
2065 * AB (output) void*
2066 * Matrix A in the banded storage.
2067 *
2068 *
2069 * lda (input) int
2070 * The first dimension of AB
2071 *
2072 * x_head (input/output) void*
2073 * x_tail (input/output) void*
2074 *
2075 * beta (input/output) void*
2076 * If beta_flag = 1, beta is input.
2077 * If beta_flag = 0, beta is output.
2078 *
2079 * beta_flag (input) int
2080 * = 0 : beta is free, and is output.
2081 * = 1 : beta is fixed on input.
2082 *
2083 * y (input/output) void*
2084 *
2085 * seed (input/output) int
2086 *
2087 * r_true_l (output) double*
2088 * The leading part of the truth in double-double.
2089 *
2090 * r_true_t (output) double*
2091 * The trailing part of the truth in double-double.
2092 *
2093 */
2094 {
2095 double *x_head_i = (double *) x_head;
2096 double *x_tail_i = (double *) x_tail;
2097 double *y_i = (double *) y;
2098 int n_fix2;
2099 int n_mix;
2100 int ysize;
2101 int i;
2102 int j;
2103 float *a_vec;
2104 int m_i, n_i;
2105 int max_mn;
2106 int incy, incAB, incx;
2107 double y_elem[2];
2108
2109 max_mn = MAX(m, n);
2110 incx = incy = incAB = 1;
2111 incy *= 2;
2112 incAB *= 2;
2113 incx *= 2;
2114
2115 if (trans == blas_no_trans) {
2116 m_i = m;
2117 n_i = n;
2118 } else {
2119 m_i = n;
2120 n_i = m;
2121 }
2122
2123 a_vec = (float *) blas_malloc(max_mn * sizeof(float) * 2);
2124 if (max_mn > 0 && a_vec == NULL) {
2125 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2126 }
2127
2128 /* calling dot_testgen n time. in each iteration, one row of AB, and one
2129 element of y are produced. the vector x is produced at the first
2130 iteration only */
2131 for (i = 0; i < m_i; i++) {
2132 /* copy AB to a_vec */
2133 cgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
2134 &n_fix2, &n_mix, &ysize);
2135
2136 if (i == 1) {
2137 /* from now on, fix alpha and beta */
2138 alpha_flag = 1;
2139 beta_flag = 1;
2140 }
2141
2142 BLAS_zdot2_z_c_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
2143 alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
2144 seed, y_elem, &r_true_l[i * incy],
2145 &r_true_t[i * incy]);
2146 y_i[i * incy] = y_elem[0];
2147 y_i[i * incy + 1] = y_elem[1];
2148
2149 if (trans == blas_conj_trans) {
2150 for (j = 0; j < n_i * incAB; j += 2) {
2151 a_vec[j + 1] = -a_vec[j + 1];
2152 }
2153 }
2154 /* copy a_vec to AB */
2155 cgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
2156 }
2157
2158 /* Zero out trailing part of x */
2159 for (i = ysize; i < n_i; i++) {
2160 x_head_i[i * incx] = 0.0;
2161 x_head_i[i * incx + 1] = 0.0;
2162 x_tail_i[i * incx] = 0.0;
2163 x_tail_i[i * incx + 1] = 0.0;
2164 }
2165
2166 blas_free(a_vec);
2167 }
BLAS_zgbmv2_z_c_testgen(int norm,enum blas_order_type order,enum blas_trans_type trans,int m,int n,int kl,int ku,void * alpha,int alpha_flag,void * AB,int lda,void * x_head,void * x_tail,void * beta,int beta_flag,void * y,int * seed,double * r_true_l,double * r_true_t)2168 void BLAS_zgbmv2_z_c_testgen(int norm, enum blas_order_type order,
2169 enum blas_trans_type trans, int m, int n, int kl,
2170 int ku, void *alpha, int alpha_flag, void *AB,
2171 int lda, void *x_head, void *x_tail, void *beta,
2172 int beta_flag, void *y, int *seed,
2173 double *r_true_l, double *r_true_t)
2174
2175 /*
2176 * Purpose
2177 * =======
2178 *
2179 * Generates alpha, AB, x, beta, and y, where AB is a banded
2180 * matrix; and computes r_true.
2181 *
2182 * Arguments
2183 * =========
2184 *
2185 * norm (input) blas_norm_type
2186 *
2187 * order (input) blas_order_type
2188 * Order of AB; row or column major
2189 *
2190 * trans (input) blas_trans_type
2191 * Whether AB is no trans, trans, or conj trans
2192 *
2193 * m (input) int
2194 * The number of rows
2195 *
2196 * n (input) int
2197 * The number of columns
2198 *
2199 * kl (input) int
2200 * The number of subdiagonals
2201 *
2202 * ku (input) int
2203 * The number of superdiagonals
2204 *
2205 * alpha (input/output) void*
2206 * If alpha_flag = 1, alpha is input.
2207 * If alpha_flag = 0, alpha is output.
2208 *
2209 * alpha_flag (input) int
2210 * = 0 : alpha is free, and is output.
2211 * = 1 : alpha is fixed on input.
2212 *
2213 * AB (output) void*
2214 * Matrix A in the banded storage.
2215 *
2216 *
2217 * lda (input) int
2218 * The first dimension of AB
2219 *
2220 * x_head (input/output) void*
2221 * x_tail (input/output) void*
2222 *
2223 * beta (input/output) void*
2224 * If beta_flag = 1, beta is input.
2225 * If beta_flag = 0, beta is output.
2226 *
2227 * beta_flag (input) int
2228 * = 0 : beta is free, and is output.
2229 * = 1 : beta is fixed on input.
2230 *
2231 * y (input/output) void*
2232 *
2233 * seed (input/output) int
2234 *
2235 * r_true_l (output) double*
2236 * The leading part of the truth in double-double.
2237 *
2238 * r_true_t (output) double*
2239 * The trailing part of the truth in double-double.
2240 *
2241 */
2242 {
2243 float *x_head_i = (float *) x_head;
2244 float *x_tail_i = (float *) x_tail;
2245 double *y_i = (double *) y;
2246 int n_fix2;
2247 int n_mix;
2248 int ysize;
2249 int i;
2250 int j;
2251 double *a_vec;
2252 int m_i, n_i;
2253 int max_mn;
2254 int incy, incAB, incx;
2255 double y_elem[2];
2256
2257 max_mn = MAX(m, n);
2258 incx = incy = incAB = 1;
2259 incy *= 2;
2260 incAB *= 2;
2261 incx *= 2;
2262
2263 if (trans == blas_no_trans) {
2264 m_i = m;
2265 n_i = n;
2266 } else {
2267 m_i = n;
2268 n_i = m;
2269 }
2270
2271 a_vec = (double *) blas_malloc(max_mn * sizeof(double) * 2);
2272 if (max_mn > 0 && a_vec == NULL) {
2273 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2274 }
2275
2276 /* calling dot_testgen n time. in each iteration, one row of AB, and one
2277 element of y are produced. the vector x is produced at the first
2278 iteration only */
2279 for (i = 0; i < m_i; i++) {
2280 /* copy AB to a_vec */
2281 zgbmv_prepare(order, trans, m, n, kl, ku, AB, lda, a_vec, i,
2282 &n_fix2, &n_mix, &ysize);
2283
2284 if (i == 1) {
2285 /* from now on, fix alpha and beta */
2286 alpha_flag = 1;
2287 beta_flag = 1;
2288 }
2289
2290 BLAS_zdot2_c_z_testgen(ysize, n_fix2, n_mix, norm, blas_no_conj, alpha,
2291 alpha_flag, beta, beta_flag, x_head, x_tail, a_vec,
2292 seed, y_elem, &r_true_l[i * incy],
2293 &r_true_t[i * incy]);
2294 y_i[i * incy] = y_elem[0];
2295 y_i[i * incy + 1] = y_elem[1];
2296
2297 if (trans == blas_conj_trans) {
2298 for (j = 0; j < n_i * incAB; j += 2) {
2299 a_vec[j + 1] = -a_vec[j + 1];
2300 }
2301 }
2302 /* copy a_vec to AB */
2303 zgbmv_commit(order, trans, m, n, kl, ku, AB, lda, a_vec, i);
2304 }
2305
2306 /* Zero out trailing part of x */
2307 for (i = ysize; i < n_i; i++) {
2308 x_head_i[i * incx] = 0.0;
2309 x_head_i[i * incx + 1] = 0.0;
2310 x_tail_i[i * incx] = 0.0;
2311 x_tail_i[i * incx + 1] = 0.0;
2312 }
2313
2314 blas_free(a_vec);
2315 }
2316