1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
3 #include "blas_extended_test.h"
4
BLAS_stbsv_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,int k,int randomize,float * alpha,int alpha_flag,float * T,int ldt,float * x,int * seed,double * head_r_true,double * tail_r_true,int row,enum blas_prec_type prec)5 void BLAS_stbsv_testgen(int norm, enum blas_order_type order,
6 enum blas_uplo_type uplo, enum blas_trans_type trans,
7 enum blas_diag_type diag, int n, int k, int randomize,
8 float *alpha, int alpha_flag, float *T, int ldt,
9 float *x, int *seed, double *head_r_true,
10 double *tail_r_true, int row,
11 enum blas_prec_type prec)
12
13 /*
14 * Purpose
15 * =======
16 *
17 * Generates alpha, x and T, where T is a triangular Banded matrix;
18 * and computes r_true.
19 *
20 * Arguments
21 * =========
22 *
23 * norm (input) blas_norm_type
24 *
25 * order (input) blas_order_type
26 * Order of T; row or column major
27 *
28 * uplo (input) blas_uplo_type
29 * Whether T is upper or lower
30 *
31 * trans (input) blas_trans_type
32 * No trans, trans, conj trans
33 *
34 * diag (input) blas_diag_type
35 * non unit, unit
36 *
37 * n (input) int
38 * Dimension of T and the length of vector x
39 *
40 * k (input) int
41 * Number of super(sub)diagonals of T
42 *
43 * alpha (input/output) float*
44 * If alpha_flag = 1, alpha is input.
45 * If alpha_flag = 0, alpha is output.
46 *
47 * alpha_flag (input) int
48 * = 0 : alpha is free, and is output.
49 * = 1 : alpha is fixed on input.
50 *
51 * T (output) float*
52 *
53 * x (input/output) float*
54 *
55 * seed (input/output) int
56 *
57 * head_r_true (output) double*
58 * The leading part of the truth in double-double.
59 *
60 * tail_r_true (output) double*
61 * The trailing part of the truth in double-double.
62 *
63 * row (input) int
64 * The true row being generated
65 *
66 * prec (input) blas_prec_type
67 * single, double, or extra precision
68 *
69 */
70 {
71 int start;
72 int length;
73 int i, j;
74 int tempi, inc_tempi;
75 float alpha_i;
76 float minus_one;
77 float Tii;
78 float *temp;
79 float *xtemp2;
80
81 temp = (float *) blas_malloc(n * 2 * sizeof(float));
82 if (n * 2 > 0 && temp == NULL) {
83 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
84 }
85
86 /* always allocate, not always needed */
87 xtemp2 = (float *) blas_malloc(n * 2 * sizeof(float));
88 if (n * 2 > 0 && xtemp2 == NULL) {
89 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
90 }
91
92 minus_one = -1.0;
93
94 /* if alpha_flag=0, gives a random value to alpha */
95 if (alpha_flag == 0) {
96 alpha_i = xrand(seed);
97 *alpha = alpha_i;
98 alpha_flag = 1;
99 }
100
101 for (i = 0; i < 4 * n * ldt; i++) {
102 T[i] = 0.0;
103 }
104
105 inc_tempi = 1;
106
107
108 for (i = 0; i < n; i++) {
109
110 if (i != row) {
111 if (diag == blas_non_unit_diag) {
112 Tii = xrand(seed);
113 } else {
114 Tii = 1.0;
115 }
116 for (j = 0, tempi = 0; j < n; j++, tempi += inc_tempi) {
117 if (j != i) {
118 temp[tempi] = 0.0;
119 } else {
120 temp[tempi] = Tii;
121 }
122 }
123 stbsv_commit(order, uplo, trans, n, k, T, ldt, temp, i);
124
125
126 x[i] = xrand(seed);
127
128 switch (prec) {
129 case blas_prec_single:
130 {
131 float multemp;
132 float divtemp;
133
134 multemp = x[i] * *alpha;
135 divtemp = multemp / Tii;
136 head_r_true[i] = divtemp;
137 tail_r_true[i] = 0.0;
138 xtemp2[i] = divtemp;
139 break;
140 }
141 case blas_prec_indigenous:
142 case blas_prec_double:
143 {
144 double multemp;
145 double divtemp;
146
147 multemp = (double) x[i] * *alpha;
148 divtemp = (double) multemp / Tii;
149 head_r_true[i] = divtemp;
150 tail_r_true[i] = 0.0;
151 break;
152 }
153 case blas_prec_extra:
154 {
155 double head_multemp, tail_multemp;
156 double head_divtemp, tail_divtemp;
157
158 head_multemp = (double) x[i] * *alpha;
159 tail_multemp = 0.0;
160 {
161 double dt = (double) Tii;
162 {
163 /* Compute double-double = double-double / double,
164 using a Newton iteration scheme. */
165 double b1, b2, con, e, t1, t2, t11, t21, t12, t22;
166
167 /* Compute a DP approximation to the quotient. */
168 t1 = head_multemp / dt;
169
170 /* Split t1 and b into two parts with at most 26 bits each,
171 using the Dekker-Veltkamp method. */
172 con = t1 * split;
173 t11 = con - (con - t1);
174 t21 = t1 - t11;
175 con = dt * split;
176 b1 = con - (con - dt);
177 b2 = dt - b1;
178
179 /* Compute t1 * b using Dekker method. */
180 t12 = t1 * dt;
181 t22 = (((t11 * b1 - t12) + t11 * b2) + t21 * b1) + t21 * b2;
182
183 /* Compute dda - (t12, t22) using Knuth trick. */
184 t11 = head_multemp - t12;
185 e = t11 - head_multemp;
186 t21 =
187 ((-t12 - e) + (head_multemp - (t11 - e))) + tail_multemp -
188 t22;
189
190 /* Compute high-order word of (t11, t21) and divide by b. */
191 t2 = (t11 + t21) / dt;
192
193 /* The result is t1 + t2, after normalization. */
194 head_divtemp = t1 + t2;
195 tail_divtemp = t2 - (head_divtemp - t1);
196 }
197 }
198 head_r_true[i] = head_divtemp;
199 tail_r_true[i] = tail_divtemp;
200 break;
201 }
202 } /* case */
203 } /* if */
204 } /* for */
205
206 for (j = 0; j < n; j++) {
207 temp[j] = 0.0;
208 }
209
210 /* now set the important row - */
211 Tii = 1.0;
212 for (j = 0, tempi = 0; j < n; j++, tempi += inc_tempi) {
213 if (j != row) {
214 temp[tempi] = 0.0;
215 } else {
216 temp[tempi] = Tii;
217 }
218 }
219 /* this is extra, it will really be committed later */
220 stbsv_commit(order, uplo, trans, n, k, T, ldt, temp, row);
221
222
223 if ((uplo == blas_lower && (trans == blas_no_trans || trans == blas_conj))
224 || (uplo == blas_upper
225 && (trans == blas_trans || trans == blas_conj_trans))) {
226 length = MIN(row, k);
227 start = MAX(0, row - k);
228 } else {
229 length = MIN(n - row - 1, k);
230 start = row + 1;
231 }
232
233 if (length != 0) {
234
235 switch (prec) {
236 case blas_prec_single:
237 BLAS_sdot_testgen(length, 0, length, norm,
238 blas_no_conj, &minus_one, 1, alpha, 1,
239 &xtemp2[start], &temp[start], seed, &x[row],
240 &head_r_true[row], &tail_r_true[row]);
241 break;
242 case blas_prec_indigenous:
243 case blas_prec_double:
244 case blas_prec_extra:
245 BLAS_sdot_x_testgen(length, 0, length, norm,
246 blas_no_conj, &minus_one, 1, alpha, 1,
247 &head_r_true[start], &tail_r_true[start],
248 &temp[start],
249 seed, &x[row], &head_r_true[row],
250 &tail_r_true[row]);
251 break;
252 }
253 stbsv_commit(order, uplo, trans, n, k, T, ldt, temp, row);
254 } else {
255 /* probably low k- must use tricks to test */
256 /* tricks are not yet implemented */
257 x[row] = xrand(seed);
258
259 switch (prec) {
260 case blas_prec_single:
261 {
262 float multemp;
263
264 multemp = x[row] * *alpha;
265 head_r_true[row] = multemp;
266 tail_r_true[row] = 0.0;
267 break;
268 }
269 case blas_prec_indigenous:
270 case blas_prec_double:
271 {
272 double multemp;
273
274 multemp = (double) x[row] * *alpha;
275 head_r_true[row] = multemp;
276 tail_r_true[row] = 0.0;
277 break;
278 }
279 case blas_prec_extra:
280 {
281 double head_multemp, tail_multemp;
282
283 head_multemp = (double) x[row] * *alpha;
284 tail_multemp = 0.0;
285 head_r_true[row] = head_multemp;
286 tail_r_true[row] = tail_multemp;
287 break;
288 }
289 }
290 }
291
292 blas_free(temp);
293 blas_free(xtemp2);
294 }
295
BLAS_dtbsv_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,int k,int randomize,double * alpha,int alpha_flag,double * T,int ldt,double * x,int * seed,double * head_r_true,double * tail_r_true,int row,enum blas_prec_type prec)296 void BLAS_dtbsv_testgen(int norm, enum blas_order_type order,
297 enum blas_uplo_type uplo, enum blas_trans_type trans,
298 enum blas_diag_type diag, int n, int k, int randomize,
299 double *alpha, int alpha_flag, double *T, int ldt,
300 double *x, int *seed, double *head_r_true,
301 double *tail_r_true, int row,
302 enum blas_prec_type prec)
303
304 /*
305 * Purpose
306 * =======
307 *
308 * Generates alpha, x and T, where T is a triangular Banded matrix;
309 * and computes r_true.
310 *
311 * Arguments
312 * =========
313 *
314 * norm (input) blas_norm_type
315 *
316 * order (input) blas_order_type
317 * Order of T; row or column major
318 *
319 * uplo (input) blas_uplo_type
320 * Whether T is upper or lower
321 *
322 * trans (input) blas_trans_type
323 * No trans, trans, conj trans
324 *
325 * diag (input) blas_diag_type
326 * non unit, unit
327 *
328 * n (input) int
329 * Dimension of T and the length of vector x
330 *
331 * k (input) int
332 * Number of super(sub)diagonals of T
333 *
334 * alpha (input/output) double*
335 * If alpha_flag = 1, alpha is input.
336 * If alpha_flag = 0, alpha is output.
337 *
338 * alpha_flag (input) int
339 * = 0 : alpha is free, and is output.
340 * = 1 : alpha is fixed on input.
341 *
342 * T (output) double*
343 *
344 * x (input/output) double*
345 *
346 * seed (input/output) int
347 *
348 * head_r_true (output) double*
349 * The leading part of the truth in double-double.
350 *
351 * tail_r_true (output) double*
352 * The trailing part of the truth in double-double.
353 *
354 * row (input) int
355 * The true row being generated
356 *
357 * prec (input) blas_prec_type
358 * single, double, or extra precision
359 *
360 */
361 {
362 int start;
363 int length;
364 int i, j;
365 int tempi, inc_tempi;
366 float alpha_i;
367 double minus_one;
368 double Tii;
369 double *temp;
370 double *xtemp2;
371
372 temp = (double *) blas_malloc(n * 2 * sizeof(double));
373 if (n * 2 > 0 && temp == NULL) {
374 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
375 }
376
377 /* always allocate, not always needed */
378 xtemp2 = (double *) blas_malloc(n * 2 * sizeof(double));
379 if (n * 2 > 0 && xtemp2 == NULL) {
380 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
381 }
382
383 minus_one = -1.0;
384
385 /* if alpha_flag=0, gives a random value to alpha */
386 if (alpha_flag == 0) {
387 alpha_i = xrand(seed);
388 *alpha = alpha_i;
389 alpha_flag = 1;
390 }
391
392 for (i = 0; i < 4 * n * ldt; i++) {
393 T[i] = 0.0;
394 }
395
396 inc_tempi = 1;
397
398
399 for (i = 0; i < n; i++) {
400
401 if (i != row) {
402 if (diag == blas_non_unit_diag) {
403 Tii = xrand(seed);
404 } else {
405 Tii = 1.0;
406 }
407 for (j = 0, tempi = 0; j < n; j++, tempi += inc_tempi) {
408 if (j != i) {
409 temp[tempi] = 0.0;
410 } else {
411 temp[tempi] = Tii;
412 }
413 }
414 dtbsv_commit(order, uplo, trans, n, k, T, ldt, temp, i);
415
416
417 x[i] = xrand(seed);
418
419 switch (prec) {
420 case blas_prec_single:
421 {
422 double multemp;
423 double divtemp;
424
425 multemp = x[i] * *alpha;
426 divtemp = multemp / Tii;
427 head_r_true[i] = divtemp;
428 tail_r_true[i] = 0.0;
429 xtemp2[i] = divtemp;
430 break;
431 }
432 case blas_prec_indigenous:
433 case blas_prec_double:
434 {
435 double multemp;
436 double divtemp;
437
438 multemp = x[i] * *alpha;
439 divtemp = multemp / Tii;
440 head_r_true[i] = divtemp;
441 tail_r_true[i] = 0.0;
442 xtemp2[i] = divtemp;
443 break;
444 }
445 case blas_prec_extra:
446 {
447 double head_multemp, tail_multemp;
448 double head_divtemp, tail_divtemp;
449
450 {
451 /* Compute double_double = double * double. */
452 double a1, a2, b1, b2, con;
453
454 con = x[i] * split;
455 a1 = con - x[i];
456 a1 = con - a1;
457 a2 = x[i] - a1;
458 con = *alpha * split;
459 b1 = con - *alpha;
460 b1 = con - b1;
461 b2 = *alpha - b1;
462
463 head_multemp = x[i] * *alpha;
464 tail_multemp =
465 (((a1 * b1 - head_multemp) + a1 * b2) + a2 * b1) + a2 * b2;
466 }
467 {
468 /* Compute double-double = double-double / double,
469 using a Newton iteration scheme. */
470 double b1, b2, con, e, t1, t2, t11, t21, t12, t22;
471
472 /* Compute a DP approximation to the quotient. */
473 t1 = head_multemp / Tii;
474
475 /* Split t1 and b into two parts with at most 26 bits each,
476 using the Dekker-Veltkamp method. */
477 con = t1 * split;
478 t11 = con - (con - t1);
479 t21 = t1 - t11;
480 con = Tii * split;
481 b1 = con - (con - Tii);
482 b2 = Tii - b1;
483
484 /* Compute t1 * b using Dekker method. */
485 t12 = t1 * Tii;
486 t22 = (((t11 * b1 - t12) + t11 * b2) + t21 * b1) + t21 * b2;
487
488 /* Compute dda - (t12, t22) using Knuth trick. */
489 t11 = head_multemp - t12;
490 e = t11 - head_multemp;
491 t21 =
492 ((-t12 - e) + (head_multemp - (t11 - e))) + tail_multemp - t22;
493
494 /* Compute high-order word of (t11, t21) and divide by b. */
495 t2 = (t11 + t21) / Tii;
496
497 /* The result is t1 + t2, after normalization. */
498 head_divtemp = t1 + t2;
499 tail_divtemp = t2 - (head_divtemp - t1);
500 }
501 head_r_true[i] = head_divtemp;
502 tail_r_true[i] = tail_divtemp;
503 break;
504 }
505 } /* case */
506 } /* if */
507 } /* for */
508
509 for (j = 0; j < n; j++) {
510 temp[j] = 0.0;
511 }
512
513 /* now set the important row - */
514 Tii = 1.0;
515 for (j = 0, tempi = 0; j < n; j++, tempi += inc_tempi) {
516 if (j != row) {
517 temp[tempi] = 0.0;
518 } else {
519 temp[tempi] = Tii;
520 }
521 }
522 /* this is extra, it will really be committed later */
523 dtbsv_commit(order, uplo, trans, n, k, T, ldt, temp, row);
524
525
526 if ((uplo == blas_lower && (trans == blas_no_trans || trans == blas_conj))
527 || (uplo == blas_upper
528 && (trans == blas_trans || trans == blas_conj_trans))) {
529 length = MIN(row, k);
530 start = MAX(0, row - k);
531 } else {
532 length = MIN(n - row - 1, k);
533 start = row + 1;
534 }
535
536 if (length != 0) {
537
538 switch (prec) {
539 case blas_prec_single:
540 BLAS_ddot_testgen(length, 0, length, norm,
541 blas_no_conj, &minus_one, 1, alpha, 1,
542 &xtemp2[start], &temp[start], seed, &x[row],
543 &head_r_true[row], &tail_r_true[row]);
544 break;
545 case blas_prec_indigenous:
546 case blas_prec_double:
547 case blas_prec_extra:
548 BLAS_ddot_x_testgen(length, 0, length, norm,
549 blas_no_conj, &minus_one, 1, alpha, 1,
550 &head_r_true[start], &tail_r_true[start],
551 &temp[start],
552 seed, &x[row], &head_r_true[row],
553 &tail_r_true[row]);
554 break;
555 }
556 dtbsv_commit(order, uplo, trans, n, k, T, ldt, temp, row);
557 } else {
558 /* probably low k- must use tricks to test */
559 /* tricks are not yet implemented */
560 x[row] = xrand(seed);
561
562 switch (prec) {
563 case blas_prec_single:
564 {
565 double multemp;
566
567 multemp = x[row] * *alpha;
568 head_r_true[row] = multemp;
569 tail_r_true[row] = 0.0;
570 break;
571 }
572 case blas_prec_indigenous:
573 case blas_prec_double:
574 {
575 double multemp;
576
577 multemp = x[row] * *alpha;
578 head_r_true[row] = multemp;
579 tail_r_true[row] = 0.0;
580 break;
581 }
582 case blas_prec_extra:
583 {
584 double head_multemp, tail_multemp;
585
586 {
587 /* Compute double_double = double * double. */
588 double a1, a2, b1, b2, con;
589
590 con = x[row] * split;
591 a1 = con - x[row];
592 a1 = con - a1;
593 a2 = x[row] - a1;
594 con = *alpha * split;
595 b1 = con - *alpha;
596 b1 = con - b1;
597 b2 = *alpha - b1;
598
599 head_multemp = x[row] * *alpha;
600 tail_multemp =
601 (((a1 * b1 - head_multemp) + a1 * b2) + a2 * b1) + a2 * b2;
602 }
603 head_r_true[row] = head_multemp;
604 tail_r_true[row] = tail_multemp;
605 break;
606 }
607 }
608 }
609
610 blas_free(temp);
611 blas_free(xtemp2);
612 }
613
BLAS_dtbsv_s_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,int k,int randomize,double * alpha,int alpha_flag,float * T,int ldt,double * x,int * seed,double * head_r_true,double * tail_r_true,int row,enum blas_prec_type prec)614 void BLAS_dtbsv_s_testgen(int norm, enum blas_order_type order,
615 enum blas_uplo_type uplo,
616 enum blas_trans_type trans,
617 enum blas_diag_type diag, int n, int k,
618 int randomize, double *alpha, int alpha_flag,
619 float *T, int ldt, double *x, int *seed,
620 double *head_r_true, double *tail_r_true, int row,
621 enum blas_prec_type prec)
622
623 /*
624 * Purpose
625 * =======
626 *
627 * Generates alpha, x and T, where T is a triangular Banded matrix;
628 * and computes r_true.
629 *
630 * Arguments
631 * =========
632 *
633 * norm (input) blas_norm_type
634 *
635 * order (input) blas_order_type
636 * Order of T; row or column major
637 *
638 * uplo (input) blas_uplo_type
639 * Whether T is upper or lower
640 *
641 * trans (input) blas_trans_type
642 * No trans, trans, conj trans
643 *
644 * diag (input) blas_diag_type
645 * non unit, unit
646 *
647 * n (input) int
648 * Dimension of T and the length of vector x
649 *
650 * k (input) int
651 * Number of super(sub)diagonals of T
652 *
653 * alpha (input/output) double*
654 * If alpha_flag = 1, alpha is input.
655 * If alpha_flag = 0, alpha is output.
656 *
657 * alpha_flag (input) int
658 * = 0 : alpha is free, and is output.
659 * = 1 : alpha is fixed on input.
660 *
661 * T (output) float*
662 *
663 * x (input/output) double*
664 *
665 * seed (input/output) int
666 *
667 * head_r_true (output) double*
668 * The leading part of the truth in double-double.
669 *
670 * tail_r_true (output) double*
671 * The trailing part of the truth in double-double.
672 *
673 * row (input) int
674 * The true row being generated
675 *
676 * prec (input) blas_prec_type
677 * single, double, or extra precision
678 *
679 */
680 {
681 int start;
682 int length;
683 int i, j;
684 int tempi, inc_tempi;
685 float alpha_i;
686 double minus_one;
687 float Tii;
688 float *temp;
689 double *xtemp2;
690
691 temp = (float *) blas_malloc(n * 2 * sizeof(float));
692 if (n * 2 > 0 && temp == NULL) {
693 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
694 }
695
696 /* always allocate, not always needed */
697 xtemp2 = (double *) blas_malloc(n * 2 * sizeof(double));
698 if (n * 2 > 0 && xtemp2 == NULL) {
699 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
700 }
701
702 minus_one = -1.0;
703
704 /* if alpha_flag=0, gives a random value to alpha */
705 if (alpha_flag == 0) {
706 alpha_i = xrand(seed);
707 *alpha = alpha_i;
708 alpha_flag = 1;
709 }
710
711 for (i = 0; i < 4 * n * ldt; i++) {
712 T[i] = 0.0;
713 }
714
715 inc_tempi = 1;
716
717
718 for (i = 0; i < n; i++) {
719
720 if (i != row) {
721 if (diag == blas_non_unit_diag) {
722 Tii = xrand(seed);
723 } else {
724 Tii = 1.0;
725 }
726 for (j = 0, tempi = 0; j < n; j++, tempi += inc_tempi) {
727 if (j != i) {
728 temp[tempi] = 0.0;
729 } else {
730 temp[tempi] = Tii;
731 }
732 }
733 stbsv_commit(order, uplo, trans, n, k, T, ldt, temp, i);
734
735
736 x[i] = xrand(seed);
737
738 switch (prec) {
739 case blas_prec_single:
740 {
741 double multemp;
742 double divtemp;
743
744 multemp = x[i] * *alpha;
745 divtemp = multemp / Tii;
746 head_r_true[i] = divtemp;
747 tail_r_true[i] = 0.0;
748 xtemp2[i] = divtemp;
749 break;
750 }
751 case blas_prec_indigenous:
752 case blas_prec_double:
753 {
754 double multemp;
755 double divtemp;
756
757 multemp = x[i] * *alpha;
758 divtemp = multemp / Tii;
759 head_r_true[i] = divtemp;
760 tail_r_true[i] = 0.0;
761 xtemp2[i] = divtemp;
762 break;
763 }
764 case blas_prec_extra:
765 {
766 double head_multemp, tail_multemp;
767 double head_divtemp, tail_divtemp;
768
769 {
770 /* Compute double_double = double * double. */
771 double a1, a2, b1, b2, con;
772
773 con = x[i] * split;
774 a1 = con - x[i];
775 a1 = con - a1;
776 a2 = x[i] - a1;
777 con = *alpha * split;
778 b1 = con - *alpha;
779 b1 = con - b1;
780 b2 = *alpha - b1;
781
782 head_multemp = x[i] * *alpha;
783 tail_multemp =
784 (((a1 * b1 - head_multemp) + a1 * b2) + a2 * b1) + a2 * b2;
785 }
786 {
787 double dt = (double) Tii;
788 {
789 /* Compute double-double = double-double / double,
790 using a Newton iteration scheme. */
791 double b1, b2, con, e, t1, t2, t11, t21, t12, t22;
792
793 /* Compute a DP approximation to the quotient. */
794 t1 = head_multemp / dt;
795
796 /* Split t1 and b into two parts with at most 26 bits each,
797 using the Dekker-Veltkamp method. */
798 con = t1 * split;
799 t11 = con - (con - t1);
800 t21 = t1 - t11;
801 con = dt * split;
802 b1 = con - (con - dt);
803 b2 = dt - b1;
804
805 /* Compute t1 * b using Dekker method. */
806 t12 = t1 * dt;
807 t22 = (((t11 * b1 - t12) + t11 * b2) + t21 * b1) + t21 * b2;
808
809 /* Compute dda - (t12, t22) using Knuth trick. */
810 t11 = head_multemp - t12;
811 e = t11 - head_multemp;
812 t21 =
813 ((-t12 - e) + (head_multemp - (t11 - e))) + tail_multemp -
814 t22;
815
816 /* Compute high-order word of (t11, t21) and divide by b. */
817 t2 = (t11 + t21) / dt;
818
819 /* The result is t1 + t2, after normalization. */
820 head_divtemp = t1 + t2;
821 tail_divtemp = t2 - (head_divtemp - t1);
822 }
823 }
824 head_r_true[i] = head_divtemp;
825 tail_r_true[i] = tail_divtemp;
826 break;
827 }
828 } /* case */
829 } /* if */
830 } /* for */
831
832 for (j = 0; j < n; j++) {
833 temp[j] = 0.0;
834 }
835
836 /* now set the important row - */
837 Tii = 1.0;
838 for (j = 0, tempi = 0; j < n; j++, tempi += inc_tempi) {
839 if (j != row) {
840 temp[tempi] = 0.0;
841 } else {
842 temp[tempi] = Tii;
843 }
844 }
845 /* this is extra, it will really be committed later */
846 stbsv_commit(order, uplo, trans, n, k, T, ldt, temp, row);
847
848
849 if ((uplo == blas_lower && (trans == blas_no_trans || trans == blas_conj))
850 || (uplo == blas_upper
851 && (trans == blas_trans || trans == blas_conj_trans))) {
852 length = MIN(row, k);
853 start = MAX(0, row - k);
854 } else {
855 length = MIN(n - row - 1, k);
856 start = row + 1;
857 }
858
859 if (length != 0) {
860
861 switch (prec) {
862 case blas_prec_single:
863 case blas_prec_indigenous:
864 case blas_prec_double: /*BLAS_ddot_s_x_testgen(length, 0, length, norm,
865 blas_no_conj, &minus_one, 1, alpha, 1,
866 &head_r_true[start], &tail_r_true[start],
867 &temp[start],
868 seed, &x[row], &head_r_true[row], &tail_r_true[row]);
869 break; */
870 case blas_prec_extra:
871 BLAS_ddot_s_x_testgen(length, 0, length, norm,
872 blas_no_conj, &minus_one, 1, alpha, 1,
873 &head_r_true[start], &tail_r_true[start],
874 &temp[start],
875 seed, &x[row], &head_r_true[row],
876 &tail_r_true[row]);
877 break;
878 }
879 stbsv_commit(order, uplo, trans, n, k, T, ldt, temp, row);
880 } else {
881 /* probably low k- must use tricks to test */
882 /* tricks are not yet implemented */
883 x[row] = xrand(seed);
884
885 switch (prec) {
886 case blas_prec_single:
887 {
888 double multemp;
889
890 multemp = x[row] * *alpha;
891 head_r_true[row] = multemp;
892 tail_r_true[row] = 0.0;
893 break;
894 }
895 case blas_prec_indigenous:
896 case blas_prec_double:
897 {
898 double multemp;
899
900 multemp = x[row] * *alpha;
901 head_r_true[row] = multemp;
902 tail_r_true[row] = 0.0;
903 break;
904 }
905 case blas_prec_extra:
906 {
907 double head_multemp, tail_multemp;
908
909 {
910 /* Compute double_double = double * double. */
911 double a1, a2, b1, b2, con;
912
913 con = x[row] * split;
914 a1 = con - x[row];
915 a1 = con - a1;
916 a2 = x[row] - a1;
917 con = *alpha * split;
918 b1 = con - *alpha;
919 b1 = con - b1;
920 b2 = *alpha - b1;
921
922 head_multemp = x[row] * *alpha;
923 tail_multemp =
924 (((a1 * b1 - head_multemp) + a1 * b2) + a2 * b1) + a2 * b2;
925 }
926 head_r_true[row] = head_multemp;
927 tail_r_true[row] = tail_multemp;
928 break;
929 }
930 }
931 }
932
933 blas_free(temp);
934 blas_free(xtemp2);
935 }
936
BLAS_ctbsv_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,int k,int randomize,void * alpha,int alpha_flag,void * T,int ldt,void * x,int * seed,double * head_r_true,double * tail_r_true,int row,enum blas_prec_type prec)937 void BLAS_ctbsv_testgen(int norm, enum blas_order_type order,
938 enum blas_uplo_type uplo, enum blas_trans_type trans,
939 enum blas_diag_type diag, int n, int k, int randomize,
940 void *alpha, int alpha_flag, void *T, int ldt,
941 void *x, int *seed, double *head_r_true,
942 double *tail_r_true, int row,
943 enum blas_prec_type prec)
944
945 /*
946 * Purpose
947 * =======
948 *
949 * Generates alpha, x and T, where T is a triangular Banded matrix;
950 * and computes r_true.
951 *
952 * Arguments
953 * =========
954 *
955 * norm (input) blas_norm_type
956 *
957 * order (input) blas_order_type
958 * Order of T; row or column major
959 *
960 * uplo (input) blas_uplo_type
961 * Whether T is upper or lower
962 *
963 * trans (input) blas_trans_type
964 * No trans, trans, conj trans
965 *
966 * diag (input) blas_diag_type
967 * non unit, unit
968 *
969 * n (input) int
970 * Dimension of T and the length of vector x
971 *
972 * k (input) int
973 * Number of super(sub)diagonals of T
974 *
975 * alpha (input/output) void*
976 * If alpha_flag = 1, alpha is input.
977 * If alpha_flag = 0, alpha is output.
978 *
979 * alpha_flag (input) int
980 * = 0 : alpha is free, and is output.
981 * = 1 : alpha is fixed on input.
982 *
983 * T (output) void*
984 *
985 * x (input/output) void*
986 *
987 * seed (input/output) int
988 *
989 * head_r_true (output) double*
990 * The leading part of the truth in double-double.
991 *
992 * tail_r_true (output) double*
993 * The trailing part of the truth in double-double.
994 *
995 * row (input) int
996 * The true row being generated
997 *
998 * prec (input) blas_prec_type
999 * single, double, or extra precision
1000 *
1001 */
1002 {
1003 float *x_i = (float *) x;
1004 float *alpha_i = (float *) alpha;
1005 float *T_i = (float *) T;
1006 float alpha_r;
1007 float *T_r;
1008 float *x_r;
1009 float *T_temp_r;
1010 float *T_temp_c;
1011 double *head_r_true_r, *tail_r_true_r;
1012 int i, inc = 2, length, j;
1013
1014 T_r = (float *) blas_malloc(8 * n * ldt * sizeof(float));
1015 if (8 * n * ldt > 0 && T_r == NULL) {
1016 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1017 }
1018 x_r = (float *) blas_malloc(n * sizeof(float));
1019 if (n > 0 && x_r == NULL) {
1020 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1021 }
1022 head_r_true_r = (double *) blas_malloc(n * sizeof(double));
1023 tail_r_true_r = (double *) blas_malloc(n * sizeof(double));
1024 if (n > 0 && (head_r_true_r == NULL || tail_r_true_r == NULL)) {
1025 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1026 }
1027 T_temp_c = (float *) blas_malloc(n * sizeof(float) * 2);
1028 if (n > 0 && T_temp_c == NULL) {
1029 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1030 }
1031 T_temp_r = (float *) blas_malloc(n * sizeof(float));
1032 if (n > 0 && T_temp_r == NULL) {
1033 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1034 }
1035
1036 if (alpha_flag == 1) {
1037 alpha_r = alpha_i[0];
1038 }
1039
1040 if ((uplo == blas_lower && trans == blas_no_trans) ||
1041 (uplo == blas_upper && trans != blas_no_trans)) {
1042 length = row;
1043 } else {
1044 length = n - row - 1;
1045 }
1046
1047 BLAS_stbsv_testgen(norm, order, uplo, trans, diag, n, k, randomize,
1048 &alpha_r,
1049 alpha_flag, T_r, ldt, x_r, seed, head_r_true_r,
1050 tail_r_true_r, row, prec);
1051
1052 /* multiply alpha_r by 1+i */
1053 alpha_i[0] = alpha_r;
1054 alpha_i[1] = alpha_r;
1055
1056 if (diag == blas_non_unit_diag) {
1057 for (i = 0; i < n; i++) {
1058 /* multiply x_r by i */
1059 x_i[i * inc] = 0.0;
1060 x_i[i * inc + 1] = x_r[i];
1061
1062 /* multiply non major rows truth by i */
1063 if (i != row) {
1064 head_r_true[i * inc] = 0.0;
1065 head_r_true[i * inc + 1] = head_r_true_r[i];
1066 tail_r_true[i * inc] = 0.0;
1067 tail_r_true[i * inc + 1] = tail_r_true_r[i];
1068 } else {
1069 /* multiply major rows truth by (-1+i) */
1070 head_r_true[i * inc] = -head_r_true_r[i];
1071 head_r_true[i * inc + 1] = head_r_true_r[i];
1072 tail_r_true[i * inc] = -tail_r_true_r[i];
1073 tail_r_true[i * inc + 1] = tail_r_true_r[i];
1074 }
1075 }
1076
1077 /* copy T real to complex (and multiply by (1+i)) */
1078 for (i = 0; i < n; i++) {
1079 stbsv_copy(order, uplo, trans, n, k, T_r, ldt, T_temp_r, i);
1080 for (j = 0; j < n; j++) {
1081 T_temp_c[j * inc] = T_temp_r[j];
1082 /* conjugation is handled by the commit later - */
1083 T_temp_c[j * inc + 1] = T_temp_r[j];
1084 }
1085 if (i == row) {
1086 /* zero out imaginary part of diagonal on important row. */
1087 T_temp_c[row * inc + 1] = 0.0;
1088 }
1089 ctbsv_commit(order, uplo, trans, n, k, T_i, ldt, T_temp_c, i);
1090 }
1091 } else {
1092 for (i = 0; i < n; i++) {
1093 /* multiply x by i */
1094 x_i[i * inc] = 0.0;
1095 x_i[i * inc + 1] = x_r[i];
1096
1097 /* multiply non-major rows by -1+i */
1098 if (i != row || length == 0) {
1099 head_r_true[i * inc] = -head_r_true_r[i];
1100 head_r_true[i * inc + 1] = head_r_true_r[i];
1101 tail_r_true[i * inc] = -tail_r_true_r[i];
1102 tail_r_true[i * inc + 1] = tail_r_true_r[i];
1103 } else {
1104 /* multiply x by 1+i (overwrite multiply by i earlier) */
1105 x_i[i * inc] = x_r[i];
1106 x_i[i * inc + 1] = x_r[i];
1107
1108 head_r_true[i * inc] = 0.0;
1109 head_r_true[i * inc + 1] = 2 * head_r_true_r[i];
1110 tail_r_true[i * inc] = 0.0;
1111 tail_r_true[i * inc + 1] = 2 * tail_r_true_r[i];
1112 }
1113 }
1114
1115 /* copy T real to complex (and multiply by (1-i)) */
1116 for (i = 0; i < n; i++) {
1117 stbsv_copy(order, uplo, trans, n, k, T_r, ldt, T_temp_r, i);
1118 for (j = 0; j < n; j++) {
1119 T_temp_c[j * inc] = T_temp_r[j];
1120 /* conjugation is handled by the commit later - */
1121 T_temp_c[j * inc + 1] = -T_temp_r[j];
1122 }
1123 /* zero out imaginary part of diagonal on every row. */
1124 T_temp_c[i * inc + 1] = 0.0;
1125 ctbsv_commit(order, uplo, trans, n, k, T_i, ldt, T_temp_c, i);
1126 }
1127 }
1128
1129 blas_free(T_temp_c);
1130 blas_free(T_temp_r);
1131 blas_free(T_r);
1132 blas_free(x_r);
1133 blas_free(head_r_true_r);
1134 blas_free(tail_r_true_r);
1135 }
1136
BLAS_ztbsv_c_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,int k,int randomize,void * alpha,int alpha_flag,void * T,int ldt,void * x,int * seed,double * head_r_true,double * tail_r_true,int row,enum blas_prec_type prec)1137 void BLAS_ztbsv_c_testgen(int norm, enum blas_order_type order,
1138 enum blas_uplo_type uplo,
1139 enum blas_trans_type trans,
1140 enum blas_diag_type diag, int n, int k,
1141 int randomize, void *alpha, int alpha_flag, void *T,
1142 int ldt, void *x, int *seed, double *head_r_true,
1143 double *tail_r_true, int row,
1144 enum blas_prec_type prec)
1145
1146 /*
1147 * Purpose
1148 * =======
1149 *
1150 * Generates alpha, x and T, where T is a triangular Banded matrix;
1151 * and computes r_true.
1152 *
1153 * Arguments
1154 * =========
1155 *
1156 * norm (input) blas_norm_type
1157 *
1158 * order (input) blas_order_type
1159 * Order of T; row or column major
1160 *
1161 * uplo (input) blas_uplo_type
1162 * Whether T is upper or lower
1163 *
1164 * trans (input) blas_trans_type
1165 * No trans, trans, conj trans
1166 *
1167 * diag (input) blas_diag_type
1168 * non unit, unit
1169 *
1170 * n (input) int
1171 * Dimension of T and the length of vector x
1172 *
1173 * k (input) int
1174 * Number of super(sub)diagonals of T
1175 *
1176 * alpha (input/output) void*
1177 * If alpha_flag = 1, alpha is input.
1178 * If alpha_flag = 0, alpha is output.
1179 *
1180 * alpha_flag (input) int
1181 * = 0 : alpha is free, and is output.
1182 * = 1 : alpha is fixed on input.
1183 *
1184 * T (output) void*
1185 *
1186 * x (input/output) void*
1187 *
1188 * seed (input/output) int
1189 *
1190 * head_r_true (output) double*
1191 * The leading part of the truth in double-double.
1192 *
1193 * tail_r_true (output) double*
1194 * The trailing part of the truth in double-double.
1195 *
1196 * row (input) int
1197 * The true row being generated
1198 *
1199 * prec (input) blas_prec_type
1200 * single, double, or extra precision
1201 *
1202 */
1203 {
1204 double *x_i = (double *) x;
1205 double *alpha_i = (double *) alpha;
1206 float *T_i = (float *) T;
1207 double alpha_r;
1208 float *T_r;
1209 double *x_r;
1210 float *T_temp_r;
1211 float *T_temp_c;
1212 double *head_r_true_r, *tail_r_true_r;
1213 int i, inc = 2, length, j;
1214
1215 T_r = (float *) blas_malloc(8 * n * ldt * sizeof(float));
1216 if (8 * n * ldt > 0 && T_r == NULL) {
1217 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1218 }
1219 x_r = (double *) blas_malloc(n * sizeof(double));
1220 if (n > 0 && x_r == NULL) {
1221 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1222 }
1223 head_r_true_r = (double *) blas_malloc(n * sizeof(double));
1224 tail_r_true_r = (double *) blas_malloc(n * sizeof(double));
1225 if (n > 0 && (head_r_true_r == NULL || tail_r_true_r == NULL)) {
1226 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1227 }
1228 T_temp_c = (float *) blas_malloc(n * sizeof(float) * 2);
1229 if (n > 0 && T_temp_c == NULL) {
1230 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1231 }
1232 T_temp_r = (float *) blas_malloc(n * sizeof(float));
1233 if (n > 0 && T_temp_r == NULL) {
1234 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1235 }
1236
1237 if (alpha_flag == 1) {
1238 alpha_r = alpha_i[0];
1239 }
1240
1241 if ((uplo == blas_lower && trans == blas_no_trans) ||
1242 (uplo == blas_upper && trans != blas_no_trans)) {
1243 length = row;
1244 } else {
1245 length = n - row - 1;
1246 }
1247
1248 BLAS_dtbsv_s_testgen(norm, order, uplo, trans, diag, n, k, randomize,
1249 &alpha_r,
1250 alpha_flag, T_r, ldt, x_r, seed, head_r_true_r,
1251 tail_r_true_r, row, prec);
1252
1253 /* multiply alpha_r by 1+i */
1254 alpha_i[0] = alpha_r;
1255 alpha_i[1] = alpha_r;
1256
1257 if (diag == blas_non_unit_diag) {
1258 for (i = 0; i < n; i++) {
1259 /* multiply x_r by i */
1260 x_i[i * inc] = 0.0;
1261 x_i[i * inc + 1] = x_r[i];
1262
1263 /* multiply non major rows truth by i */
1264 if (i != row) {
1265 head_r_true[i * inc] = 0.0;
1266 head_r_true[i * inc + 1] = head_r_true_r[i];
1267 tail_r_true[i * inc] = 0.0;
1268 tail_r_true[i * inc + 1] = tail_r_true_r[i];
1269 } else {
1270 /* multiply major rows truth by (-1+i) */
1271 head_r_true[i * inc] = -head_r_true_r[i];
1272 head_r_true[i * inc + 1] = head_r_true_r[i];
1273 tail_r_true[i * inc] = -tail_r_true_r[i];
1274 tail_r_true[i * inc + 1] = tail_r_true_r[i];
1275 }
1276 }
1277
1278 /* copy T real to complex (and multiply by (1+i)) */
1279 for (i = 0; i < n; i++) {
1280 stbsv_copy(order, uplo, trans, n, k, T_r, ldt, T_temp_r, i);
1281 for (j = 0; j < n; j++) {
1282 T_temp_c[j * inc] = T_temp_r[j];
1283 /* conjugation is handled by the commit later - */
1284 T_temp_c[j * inc + 1] = T_temp_r[j];
1285 }
1286 if (i == row) {
1287 /* zero out imaginary part of diagonal on important row. */
1288 T_temp_c[row * inc + 1] = 0.0;
1289 }
1290 ctbsv_commit(order, uplo, trans, n, k, T_i, ldt, T_temp_c, i);
1291 }
1292 } else {
1293 for (i = 0; i < n; i++) {
1294 /* multiply x by i */
1295 x_i[i * inc] = 0.0;
1296 x_i[i * inc + 1] = x_r[i];
1297
1298 /* multiply non-major rows by -1+i */
1299 if (i != row || length == 0) {
1300 head_r_true[i * inc] = -head_r_true_r[i];
1301 head_r_true[i * inc + 1] = head_r_true_r[i];
1302 tail_r_true[i * inc] = -tail_r_true_r[i];
1303 tail_r_true[i * inc + 1] = tail_r_true_r[i];
1304 } else {
1305 /* multiply x by 1+i (overwrite multiply by i earlier) */
1306 x_i[i * inc] = x_r[i];
1307 x_i[i * inc + 1] = x_r[i];
1308
1309 head_r_true[i * inc] = 0.0;
1310 head_r_true[i * inc + 1] = 2 * head_r_true_r[i];
1311 tail_r_true[i * inc] = 0.0;
1312 tail_r_true[i * inc + 1] = 2 * tail_r_true_r[i];
1313 }
1314 }
1315
1316 /* copy T real to complex (and multiply by (1-i)) */
1317 for (i = 0; i < n; i++) {
1318 stbsv_copy(order, uplo, trans, n, k, T_r, ldt, T_temp_r, i);
1319 for (j = 0; j < n; j++) {
1320 T_temp_c[j * inc] = T_temp_r[j];
1321 /* conjugation is handled by the commit later - */
1322 T_temp_c[j * inc + 1] = -T_temp_r[j];
1323 }
1324 /* zero out imaginary part of diagonal on every row. */
1325 T_temp_c[i * inc + 1] = 0.0;
1326 ctbsv_commit(order, uplo, trans, n, k, T_i, ldt, T_temp_c, i);
1327 }
1328 }
1329
1330 blas_free(T_temp_c);
1331 blas_free(T_temp_r);
1332 blas_free(T_r);
1333 blas_free(x_r);
1334 blas_free(head_r_true_r);
1335 blas_free(tail_r_true_r);
1336 }
1337
BLAS_ztbsv_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,int k,int randomize,void * alpha,int alpha_flag,void * T,int ldt,void * x,int * seed,double * head_r_true,double * tail_r_true,int row,enum blas_prec_type prec)1338 void BLAS_ztbsv_testgen(int norm, enum blas_order_type order,
1339 enum blas_uplo_type uplo, enum blas_trans_type trans,
1340 enum blas_diag_type diag, int n, int k, int randomize,
1341 void *alpha, int alpha_flag, void *T, int ldt,
1342 void *x, int *seed, double *head_r_true,
1343 double *tail_r_true, int row,
1344 enum blas_prec_type prec)
1345
1346 /*
1347 * Purpose
1348 * =======
1349 *
1350 * Generates alpha, x and T, where T is a triangular Banded matrix;
1351 * and computes r_true.
1352 *
1353 * Arguments
1354 * =========
1355 *
1356 * norm (input) blas_norm_type
1357 *
1358 * order (input) blas_order_type
1359 * Order of T; row or column major
1360 *
1361 * uplo (input) blas_uplo_type
1362 * Whether T is upper or lower
1363 *
1364 * trans (input) blas_trans_type
1365 * No trans, trans, conj trans
1366 *
1367 * diag (input) blas_diag_type
1368 * non unit, unit
1369 *
1370 * n (input) int
1371 * Dimension of T and the length of vector x
1372 *
1373 * k (input) int
1374 * Number of super(sub)diagonals of T
1375 *
1376 * alpha (input/output) void*
1377 * If alpha_flag = 1, alpha is input.
1378 * If alpha_flag = 0, alpha is output.
1379 *
1380 * alpha_flag (input) int
1381 * = 0 : alpha is free, and is output.
1382 * = 1 : alpha is fixed on input.
1383 *
1384 * T (output) void*
1385 *
1386 * x (input/output) void*
1387 *
1388 * seed (input/output) int
1389 *
1390 * head_r_true (output) double*
1391 * The leading part of the truth in double-double.
1392 *
1393 * tail_r_true (output) double*
1394 * The trailing part of the truth in double-double.
1395 *
1396 * row (input) int
1397 * The true row being generated
1398 *
1399 * prec (input) blas_prec_type
1400 * single, double, or extra precision
1401 *
1402 */
1403 {
1404 double *x_i = (double *) x;
1405 double *alpha_i = (double *) alpha;
1406 double *T_i = (double *) T;
1407 double alpha_r;
1408 double *T_r;
1409 double *x_r;
1410 double *T_temp_r;
1411 double *T_temp_c;
1412 double *head_r_true_r, *tail_r_true_r;
1413 int i, inc = 2, length, j;
1414
1415 T_r = (double *) blas_malloc(8 * n * ldt * sizeof(double));
1416 if (8 * n * ldt > 0 && T_r == NULL) {
1417 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1418 }
1419 x_r = (double *) blas_malloc(n * sizeof(double));
1420 if (n > 0 && x_r == NULL) {
1421 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1422 }
1423 head_r_true_r = (double *) blas_malloc(n * sizeof(double));
1424 tail_r_true_r = (double *) blas_malloc(n * sizeof(double));
1425 if (n > 0 && (head_r_true_r == NULL || tail_r_true_r == NULL)) {
1426 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1427 }
1428 T_temp_c = (double *) blas_malloc(n * sizeof(double) * 2);
1429 if (n > 0 && T_temp_c == NULL) {
1430 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1431 }
1432 T_temp_r = (double *) blas_malloc(n * sizeof(double));
1433 if (n > 0 && T_temp_r == NULL) {
1434 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1435 }
1436
1437 if (alpha_flag == 1) {
1438 alpha_r = alpha_i[0];
1439 }
1440
1441 if ((uplo == blas_lower && trans == blas_no_trans) ||
1442 (uplo == blas_upper && trans != blas_no_trans)) {
1443 length = row;
1444 } else {
1445 length = n - row - 1;
1446 }
1447
1448 BLAS_dtbsv_testgen(norm, order, uplo, trans, diag, n, k, randomize,
1449 &alpha_r,
1450 alpha_flag, T_r, ldt, x_r, seed, head_r_true_r,
1451 tail_r_true_r, row, prec);
1452
1453 /* multiply alpha_r by 1+i */
1454 alpha_i[0] = alpha_r;
1455 alpha_i[1] = alpha_r;
1456
1457 if (diag == blas_non_unit_diag) {
1458 for (i = 0; i < n; i++) {
1459 /* multiply x_r by i */
1460 x_i[i * inc] = 0.0;
1461 x_i[i * inc + 1] = x_r[i];
1462
1463 /* multiply non major rows truth by i */
1464 if (i != row) {
1465 head_r_true[i * inc] = 0.0;
1466 head_r_true[i * inc + 1] = head_r_true_r[i];
1467 tail_r_true[i * inc] = 0.0;
1468 tail_r_true[i * inc + 1] = tail_r_true_r[i];
1469 } else {
1470 /* multiply major rows truth by (-1+i) */
1471 head_r_true[i * inc] = -head_r_true_r[i];
1472 head_r_true[i * inc + 1] = head_r_true_r[i];
1473 tail_r_true[i * inc] = -tail_r_true_r[i];
1474 tail_r_true[i * inc + 1] = tail_r_true_r[i];
1475 }
1476 }
1477
1478 /* copy T real to complex (and multiply by (1+i)) */
1479 for (i = 0; i < n; i++) {
1480 dtbsv_copy(order, uplo, trans, n, k, T_r, ldt, T_temp_r, i);
1481 for (j = 0; j < n; j++) {
1482 T_temp_c[j * inc] = T_temp_r[j];
1483 /* conjugation is handled by the commit later - */
1484 T_temp_c[j * inc + 1] = T_temp_r[j];
1485 }
1486 if (i == row) {
1487 /* zero out imaginary part of diagonal on important row. */
1488 T_temp_c[row * inc + 1] = 0.0;
1489 }
1490 ztbsv_commit(order, uplo, trans, n, k, T_i, ldt, T_temp_c, i);
1491 }
1492 } else {
1493 for (i = 0; i < n; i++) {
1494 /* multiply x by i */
1495 x_i[i * inc] = 0.0;
1496 x_i[i * inc + 1] = x_r[i];
1497
1498 /* multiply non-major rows by -1+i */
1499 if (i != row || length == 0) {
1500 head_r_true[i * inc] = -head_r_true_r[i];
1501 head_r_true[i * inc + 1] = head_r_true_r[i];
1502 tail_r_true[i * inc] = -tail_r_true_r[i];
1503 tail_r_true[i * inc + 1] = tail_r_true_r[i];
1504 } else {
1505 /* multiply x by 1+i (overwrite multiply by i earlier) */
1506 x_i[i * inc] = x_r[i];
1507 x_i[i * inc + 1] = x_r[i];
1508
1509 head_r_true[i * inc] = 0.0;
1510 head_r_true[i * inc + 1] = 2 * head_r_true_r[i];
1511 tail_r_true[i * inc] = 0.0;
1512 tail_r_true[i * inc + 1] = 2 * tail_r_true_r[i];
1513 }
1514 }
1515
1516 /* copy T real to complex (and multiply by (1-i)) */
1517 for (i = 0; i < n; i++) {
1518 dtbsv_copy(order, uplo, trans, n, k, T_r, ldt, T_temp_r, i);
1519 for (j = 0; j < n; j++) {
1520 T_temp_c[j * inc] = T_temp_r[j];
1521 /* conjugation is handled by the commit later - */
1522 T_temp_c[j * inc + 1] = -T_temp_r[j];
1523 }
1524 /* zero out imaginary part of diagonal on every row. */
1525 T_temp_c[i * inc + 1] = 0.0;
1526 ztbsv_commit(order, uplo, trans, n, k, T_i, ldt, T_temp_c, i);
1527 }
1528 }
1529
1530 blas_free(T_temp_c);
1531 blas_free(T_temp_r);
1532 blas_free(T_r);
1533 blas_free(x_r);
1534 blas_free(head_r_true_r);
1535 blas_free(tail_r_true_r);
1536 }
1537
BLAS_ctbsv_s_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,int k,int randomize,void * alpha,int alpha_flag,float * T,int ldt,void * x,int * seed,double * head_r_true,double * tail_r_true,int row,enum blas_prec_type prec)1538 void BLAS_ctbsv_s_testgen(int norm, enum blas_order_type order,
1539 enum blas_uplo_type uplo,
1540 enum blas_trans_type trans,
1541 enum blas_diag_type diag, int n, int k,
1542 int randomize, void *alpha, int alpha_flag,
1543 float *T, int ldt, void *x, int *seed,
1544 double *head_r_true, double *tail_r_true, int row,
1545 enum blas_prec_type prec)
1546
1547 /*
1548 * Purpose
1549 * =======
1550 *
1551 * Generates alpha, x and T, where T is a triangular Banded matrix;
1552 * and computes r_true.
1553 *
1554 * Arguments
1555 * =========
1556 *
1557 * norm (input) blas_norm_type
1558 *
1559 * order (input) blas_order_type
1560 * Order of T; row or column major
1561 *
1562 * uplo (input) blas_uplo_type
1563 * Whether T is upper or lower
1564 *
1565 * trans (input) blas_trans_type
1566 * No trans, trans, conj trans
1567 *
1568 * diag (input) blas_diag_type
1569 * non unit, unit
1570 *
1571 * n (input) int
1572 * Dimension of T and the length of vector x
1573 *
1574 * k (input) int
1575 * Number of super(sub)diagonals of T
1576 *
1577 * alpha (input/output) void*
1578 * If alpha_flag = 1, alpha is input.
1579 * If alpha_flag = 0, alpha is output.
1580 *
1581 * alpha_flag (input) int
1582 * = 0 : alpha is free, and is output.
1583 * = 1 : alpha is fixed on input.
1584 *
1585 * T (output) float*
1586 *
1587 * x (input/output) void*
1588 *
1589 * seed (input/output) int
1590 *
1591 * head_r_true (output) double*
1592 * The leading part of the truth in double-double.
1593 *
1594 * tail_r_true (output) double*
1595 * The trailing part of the truth in double-double.
1596 *
1597 * row (input) int
1598 * The true row being generated
1599 *
1600 * prec (input) blas_prec_type
1601 * single, double, or extra precision
1602 *
1603 */
1604 {
1605 float *x_i = (float *) x;
1606 float *alpha_i = (float *) alpha;
1607 float *T_i = T;
1608 float alpha_r;
1609 float *x_r;
1610 double *head_r_true_r, *tail_r_true_r;
1611 int i, inc = 2;
1612
1613 x_r = (float *) blas_malloc(n * sizeof(float));
1614 if (n > 0 && x_r == NULL) {
1615 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1616 }
1617 head_r_true_r = (double *) blas_malloc(n * sizeof(double));
1618 tail_r_true_r = (double *) blas_malloc(n * sizeof(double));
1619 if (n > 0 && (head_r_true_r == NULL || tail_r_true_r == NULL)) {
1620 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1621 }
1622
1623 if (alpha_flag == 1) {
1624 alpha_r = alpha_i[0];
1625 }
1626
1627 BLAS_stbsv_testgen(norm, order, uplo, trans, diag, n, k,
1628 randomize, &alpha_r,
1629 alpha_flag, T_i, ldt, x_r, seed, head_r_true_r,
1630 tail_r_true_r, row, prec);
1631
1632 alpha_i[0] = alpha_r;
1633 alpha_i[1] = alpha_r;
1634
1635 for (i = 0; i < n; i++) {
1636 x_i[i * inc] = 0.0;
1637 x_i[i * inc + 1] = x_r[i];
1638
1639 head_r_true[i * inc] = -head_r_true_r[i];
1640 head_r_true[i * inc + 1] = head_r_true_r[i];
1641 tail_r_true[i * inc] = -tail_r_true_r[i];
1642 tail_r_true[i * inc + 1] = tail_r_true_r[i];
1643 }
1644
1645 blas_free(x_r);
1646 blas_free(head_r_true_r);
1647 blas_free(tail_r_true_r);
1648 }
1649
BLAS_ztbsv_d_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,int k,int randomize,void * alpha,int alpha_flag,double * T,int ldt,void * x,int * seed,double * head_r_true,double * tail_r_true,int row,enum blas_prec_type prec)1650 void BLAS_ztbsv_d_testgen(int norm, enum blas_order_type order,
1651 enum blas_uplo_type uplo,
1652 enum blas_trans_type trans,
1653 enum blas_diag_type diag, int n, int k,
1654 int randomize, void *alpha, int alpha_flag,
1655 double *T, int ldt, void *x, int *seed,
1656 double *head_r_true, double *tail_r_true, int row,
1657 enum blas_prec_type prec)
1658
1659 /*
1660 * Purpose
1661 * =======
1662 *
1663 * Generates alpha, x and T, where T is a triangular Banded matrix;
1664 * and computes r_true.
1665 *
1666 * Arguments
1667 * =========
1668 *
1669 * norm (input) blas_norm_type
1670 *
1671 * order (input) blas_order_type
1672 * Order of T; row or column major
1673 *
1674 * uplo (input) blas_uplo_type
1675 * Whether T is upper or lower
1676 *
1677 * trans (input) blas_trans_type
1678 * No trans, trans, conj trans
1679 *
1680 * diag (input) blas_diag_type
1681 * non unit, unit
1682 *
1683 * n (input) int
1684 * Dimension of T and the length of vector x
1685 *
1686 * k (input) int
1687 * Number of super(sub)diagonals of T
1688 *
1689 * alpha (input/output) void*
1690 * If alpha_flag = 1, alpha is input.
1691 * If alpha_flag = 0, alpha is output.
1692 *
1693 * alpha_flag (input) int
1694 * = 0 : alpha is free, and is output.
1695 * = 1 : alpha is fixed on input.
1696 *
1697 * T (output) double*
1698 *
1699 * x (input/output) void*
1700 *
1701 * seed (input/output) int
1702 *
1703 * head_r_true (output) double*
1704 * The leading part of the truth in double-double.
1705 *
1706 * tail_r_true (output) double*
1707 * The trailing part of the truth in double-double.
1708 *
1709 * row (input) int
1710 * The true row being generated
1711 *
1712 * prec (input) blas_prec_type
1713 * single, double, or extra precision
1714 *
1715 */
1716 {
1717 double *x_i = (double *) x;
1718 double *alpha_i = (double *) alpha;
1719 double *T_i = T;
1720 double alpha_r;
1721 double *x_r;
1722 double *head_r_true_r, *tail_r_true_r;
1723 int i, inc = 2;
1724
1725 x_r = (double *) blas_malloc(n * sizeof(double));
1726 if (n > 0 && x_r == NULL) {
1727 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1728 }
1729 head_r_true_r = (double *) blas_malloc(n * sizeof(double));
1730 tail_r_true_r = (double *) blas_malloc(n * sizeof(double));
1731 if (n > 0 && (head_r_true_r == NULL || tail_r_true_r == NULL)) {
1732 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1733 }
1734
1735 if (alpha_flag == 1) {
1736 alpha_r = alpha_i[0];
1737 }
1738
1739 BLAS_dtbsv_testgen(norm, order, uplo, trans, diag, n, k,
1740 randomize, &alpha_r,
1741 alpha_flag, T_i, ldt, x_r, seed, head_r_true_r,
1742 tail_r_true_r, row, prec);
1743
1744 alpha_i[0] = alpha_r;
1745 alpha_i[1] = alpha_r;
1746
1747 for (i = 0; i < n; i++) {
1748 x_i[i * inc] = 0.0;
1749 x_i[i * inc + 1] = x_r[i];
1750
1751 head_r_true[i * inc] = -head_r_true_r[i];
1752 head_r_true[i * inc + 1] = head_r_true_r[i];
1753 tail_r_true[i * inc] = -tail_r_true_r[i];
1754 tail_r_true[i * inc + 1] = tail_r_true_r[i];
1755 }
1756
1757 blas_free(x_r);
1758 blas_free(head_r_true_r);
1759 blas_free(tail_r_true_r);
1760 }
1761