1 #include "blas_extended.h"
2 #include "blas_extended_private.h"
3 #include "blas_extended_test.h"
4 
BLAS_stbsv_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,int k,int randomize,float * alpha,int alpha_flag,float * T,int ldt,float * x,int * seed,double * head_r_true,double * tail_r_true,int row,enum blas_prec_type prec)5 void BLAS_stbsv_testgen(int norm, enum blas_order_type order,
6 			enum blas_uplo_type uplo, enum blas_trans_type trans,
7 			enum blas_diag_type diag, int n, int k, int randomize,
8 			float *alpha, int alpha_flag, float *T, int ldt,
9 			float *x, int *seed, double *head_r_true,
10 			double *tail_r_true, int row,
11 			enum blas_prec_type prec)
12 
13 /*
14  * Purpose
15  * =======
16  *
17  * Generates alpha, x and T, where T is a triangular Banded matrix;
18  *      and computes r_true.
19  *
20  * Arguments
21  * =========
22  *
23  * norm         (input) blas_norm_type
24  *
25  * order        (input) blas_order_type
26  *              Order of T; row or column major
27  *
28  * uplo         (input) blas_uplo_type
29  *              Whether T is upper or lower
30  *
31  * trans        (input) blas_trans_type
32  *              No trans, trans, conj trans
33  *
34  * diag         (input) blas_diag_type
35  *              non unit, unit
36  *
37  * n            (input) int
38  *              Dimension of T and the length of vector x
39  *
40  * k            (input) int
41  *              Number of super(sub)diagonals of T
42  *
43  * alpha        (input/output) float*
44  *              If alpha_flag = 1, alpha is input.
45  *              If alpha_flag = 0, alpha is output.
46  *
47  * alpha_flag   (input) int
48  *              = 0 : alpha is free, and is output.
49  *              = 1 : alpha is fixed on input.
50  *
51  * T            (output) float*
52  *
53  * x            (input/output) float*
54  *
55  * seed         (input/output) int
56  *
57  * head_r_true     (output) double*
58  *              The leading part of the truth in double-double.
59  *
60  * tail_r_true     (output) double*
61  *              The trailing part of the truth in double-double.
62  *
63  * row          (input) int
64  *              The true row being generated
65  *
66  * prec         (input) blas_prec_type
67  *              single, double, or extra precision
68  *
69  */
70 {
71   int start;
72   int length;
73   int i, j;
74   int tempi, inc_tempi;
75   float alpha_i;
76   float minus_one;
77   float Tii;
78   float *temp;
79   float *xtemp2;
80 
81   temp = (float *) blas_malloc(n * 2 * sizeof(float));
82   if (n * 2 > 0 && temp == NULL) {
83     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
84   }
85 
86   /* always allocate, not always needed */
87   xtemp2 = (float *) blas_malloc(n * 2 * sizeof(float));
88   if (n * 2 > 0 && xtemp2 == NULL) {
89     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
90   }
91 
92   minus_one = -1.0;
93 
94   /* if alpha_flag=0, gives a random value to alpha */
95   if (alpha_flag == 0) {
96     alpha_i = xrand(seed);
97     *alpha = alpha_i;
98     alpha_flag = 1;
99   }
100 
101   for (i = 0; i < 4 * n * ldt; i++) {
102     T[i] = 0.0;
103   }
104 
105   inc_tempi = 1;
106 
107 
108   for (i = 0; i < n; i++) {
109 
110     if (i != row) {
111       if (diag == blas_non_unit_diag) {
112 	Tii = xrand(seed);
113       } else {
114 	Tii = 1.0;
115       }
116       for (j = 0, tempi = 0; j < n; j++, tempi += inc_tempi) {
117 	if (j != i) {
118 	  temp[tempi] = 0.0;
119 	} else {
120 	  temp[tempi] = Tii;
121 	}
122       }
123       stbsv_commit(order, uplo, trans, n, k, T, ldt, temp, i);
124 
125 
126       x[i] = xrand(seed);
127 
128       switch (prec) {
129       case blas_prec_single:
130 	{
131 	  float multemp;
132 	  float divtemp;
133 
134 	  multemp = x[i] * *alpha;
135 	  divtemp = multemp / Tii;
136 	  head_r_true[i] = divtemp;
137 	  tail_r_true[i] = 0.0;
138 	  xtemp2[i] = divtemp;
139 	  break;
140 	}
141       case blas_prec_indigenous:
142       case blas_prec_double:
143 	{
144 	  double multemp;
145 	  double divtemp;
146 
147 	  multemp = (double) x[i] * *alpha;
148 	  divtemp = (double) multemp / Tii;
149 	  head_r_true[i] = divtemp;
150 	  tail_r_true[i] = 0.0;
151 	  break;
152 	}
153       case blas_prec_extra:
154 	{
155 	  double head_multemp, tail_multemp;
156 	  double head_divtemp, tail_divtemp;
157 
158 	  head_multemp = (double) x[i] * *alpha;
159 	  tail_multemp = 0.0;
160 	  {
161 	    double dt = (double) Tii;
162 	    {
163 	      /* Compute double-double = double-double / double,
164 	         using a Newton iteration scheme. */
165 	      double b1, b2, con, e, t1, t2, t11, t21, t12, t22;
166 
167 	      /* Compute a DP approximation to the quotient. */
168 	      t1 = head_multemp / dt;
169 
170 	      /* Split t1 and b into two parts with at most 26 bits each,
171 	         using the Dekker-Veltkamp method. */
172 	      con = t1 * split;
173 	      t11 = con - (con - t1);
174 	      t21 = t1 - t11;
175 	      con = dt * split;
176 	      b1 = con - (con - dt);
177 	      b2 = dt - b1;
178 
179 	      /* Compute t1 * b using Dekker method. */
180 	      t12 = t1 * dt;
181 	      t22 = (((t11 * b1 - t12) + t11 * b2) + t21 * b1) + t21 * b2;
182 
183 	      /* Compute dda - (t12, t22) using Knuth trick. */
184 	      t11 = head_multemp - t12;
185 	      e = t11 - head_multemp;
186 	      t21 =
187 		((-t12 - e) + (head_multemp - (t11 - e))) + tail_multemp -
188 		t22;
189 
190 	      /* Compute high-order word of (t11, t21) and divide by b. */
191 	      t2 = (t11 + t21) / dt;
192 
193 	      /* The result is t1 + t2, after normalization. */
194 	      head_divtemp = t1 + t2;
195 	      tail_divtemp = t2 - (head_divtemp - t1);
196 	    }
197 	  }
198 	  head_r_true[i] = head_divtemp;
199 	  tail_r_true[i] = tail_divtemp;
200 	  break;
201 	}
202       }				/* case */
203     }				/* if */
204   }				/* for */
205 
206   for (j = 0; j < n; j++) {
207     temp[j] = 0.0;
208   }
209 
210   /* now set the important row - */
211   Tii = 1.0;
212   for (j = 0, tempi = 0; j < n; j++, tempi += inc_tempi) {
213     if (j != row) {
214       temp[tempi] = 0.0;
215     } else {
216       temp[tempi] = Tii;
217     }
218   }
219   /* this is extra, it will really be committed later */
220   stbsv_commit(order, uplo, trans, n, k, T, ldt, temp, row);
221 
222 
223   if ((uplo == blas_lower && (trans == blas_no_trans || trans == blas_conj))
224       || (uplo == blas_upper
225 	  && (trans == blas_trans || trans == blas_conj_trans))) {
226     length = MIN(row, k);
227     start = MAX(0, row - k);
228   } else {
229     length = MIN(n - row - 1, k);
230     start = row + 1;
231   }
232 
233   if (length != 0) {
234 
235     switch (prec) {
236     case blas_prec_single:
237       BLAS_sdot_testgen(length, 0, length, norm,
238 			blas_no_conj, &minus_one, 1, alpha, 1,
239 			&xtemp2[start], &temp[start], seed, &x[row],
240 			&head_r_true[row], &tail_r_true[row]);
241       break;
242     case blas_prec_indigenous:
243     case blas_prec_double:
244     case blas_prec_extra:
245       BLAS_sdot_x_testgen(length, 0, length, norm,
246 			  blas_no_conj, &minus_one, 1, alpha, 1,
247 			  &head_r_true[start], &tail_r_true[start],
248 			  &temp[start],
249 			  seed, &x[row], &head_r_true[row],
250 			  &tail_r_true[row]);
251       break;
252     }
253     stbsv_commit(order, uplo, trans, n, k, T, ldt, temp, row);
254   } else {
255     /* probably low k- must use tricks to test */
256     /* tricks are not yet implemented */
257     x[row] = xrand(seed);
258 
259     switch (prec) {
260     case blas_prec_single:
261       {
262 	float multemp;
263 
264 	multemp = x[row] * *alpha;
265 	head_r_true[row] = multemp;
266 	tail_r_true[row] = 0.0;
267 	break;
268       }
269     case blas_prec_indigenous:
270     case blas_prec_double:
271       {
272 	double multemp;
273 
274 	multemp = (double) x[row] * *alpha;
275 	head_r_true[row] = multemp;
276 	tail_r_true[row] = 0.0;
277 	break;
278       }
279     case blas_prec_extra:
280       {
281 	double head_multemp, tail_multemp;
282 
283 	head_multemp = (double) x[row] * *alpha;
284 	tail_multemp = 0.0;
285 	head_r_true[row] = head_multemp;
286 	tail_r_true[row] = tail_multemp;
287 	break;
288       }
289     }
290   }
291 
292   blas_free(temp);
293   blas_free(xtemp2);
294 }
295 
BLAS_dtbsv_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,int k,int randomize,double * alpha,int alpha_flag,double * T,int ldt,double * x,int * seed,double * head_r_true,double * tail_r_true,int row,enum blas_prec_type prec)296 void BLAS_dtbsv_testgen(int norm, enum blas_order_type order,
297 			enum blas_uplo_type uplo, enum blas_trans_type trans,
298 			enum blas_diag_type diag, int n, int k, int randomize,
299 			double *alpha, int alpha_flag, double *T, int ldt,
300 			double *x, int *seed, double *head_r_true,
301 			double *tail_r_true, int row,
302 			enum blas_prec_type prec)
303 
304 /*
305  * Purpose
306  * =======
307  *
308  * Generates alpha, x and T, where T is a triangular Banded matrix;
309  *      and computes r_true.
310  *
311  * Arguments
312  * =========
313  *
314  * norm         (input) blas_norm_type
315  *
316  * order        (input) blas_order_type
317  *              Order of T; row or column major
318  *
319  * uplo         (input) blas_uplo_type
320  *              Whether T is upper or lower
321  *
322  * trans        (input) blas_trans_type
323  *              No trans, trans, conj trans
324  *
325  * diag         (input) blas_diag_type
326  *              non unit, unit
327  *
328  * n            (input) int
329  *              Dimension of T and the length of vector x
330  *
331  * k            (input) int
332  *              Number of super(sub)diagonals of T
333  *
334  * alpha        (input/output) double*
335  *              If alpha_flag = 1, alpha is input.
336  *              If alpha_flag = 0, alpha is output.
337  *
338  * alpha_flag   (input) int
339  *              = 0 : alpha is free, and is output.
340  *              = 1 : alpha is fixed on input.
341  *
342  * T            (output) double*
343  *
344  * x            (input/output) double*
345  *
346  * seed         (input/output) int
347  *
348  * head_r_true     (output) double*
349  *              The leading part of the truth in double-double.
350  *
351  * tail_r_true     (output) double*
352  *              The trailing part of the truth in double-double.
353  *
354  * row          (input) int
355  *              The true row being generated
356  *
357  * prec         (input) blas_prec_type
358  *              single, double, or extra precision
359  *
360  */
361 {
362   int start;
363   int length;
364   int i, j;
365   int tempi, inc_tempi;
366   float alpha_i;
367   double minus_one;
368   double Tii;
369   double *temp;
370   double *xtemp2;
371 
372   temp = (double *) blas_malloc(n * 2 * sizeof(double));
373   if (n * 2 > 0 && temp == NULL) {
374     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
375   }
376 
377   /* always allocate, not always needed */
378   xtemp2 = (double *) blas_malloc(n * 2 * sizeof(double));
379   if (n * 2 > 0 && xtemp2 == NULL) {
380     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
381   }
382 
383   minus_one = -1.0;
384 
385   /* if alpha_flag=0, gives a random value to alpha */
386   if (alpha_flag == 0) {
387     alpha_i = xrand(seed);
388     *alpha = alpha_i;
389     alpha_flag = 1;
390   }
391 
392   for (i = 0; i < 4 * n * ldt; i++) {
393     T[i] = 0.0;
394   }
395 
396   inc_tempi = 1;
397 
398 
399   for (i = 0; i < n; i++) {
400 
401     if (i != row) {
402       if (diag == blas_non_unit_diag) {
403 	Tii = xrand(seed);
404       } else {
405 	Tii = 1.0;
406       }
407       for (j = 0, tempi = 0; j < n; j++, tempi += inc_tempi) {
408 	if (j != i) {
409 	  temp[tempi] = 0.0;
410 	} else {
411 	  temp[tempi] = Tii;
412 	}
413       }
414       dtbsv_commit(order, uplo, trans, n, k, T, ldt, temp, i);
415 
416 
417       x[i] = xrand(seed);
418 
419       switch (prec) {
420       case blas_prec_single:
421 	{
422 	  double multemp;
423 	  double divtemp;
424 
425 	  multemp = x[i] * *alpha;
426 	  divtemp = multemp / Tii;
427 	  head_r_true[i] = divtemp;
428 	  tail_r_true[i] = 0.0;
429 	  xtemp2[i] = divtemp;
430 	  break;
431 	}
432       case blas_prec_indigenous:
433       case blas_prec_double:
434 	{
435 	  double multemp;
436 	  double divtemp;
437 
438 	  multemp = x[i] * *alpha;
439 	  divtemp = multemp / Tii;
440 	  head_r_true[i] = divtemp;
441 	  tail_r_true[i] = 0.0;
442 	  xtemp2[i] = divtemp;
443 	  break;
444 	}
445       case blas_prec_extra:
446 	{
447 	  double head_multemp, tail_multemp;
448 	  double head_divtemp, tail_divtemp;
449 
450 	  {
451 	    /* Compute double_double = double * double. */
452 	    double a1, a2, b1, b2, con;
453 
454 	    con = x[i] * split;
455 	    a1 = con - x[i];
456 	    a1 = con - a1;
457 	    a2 = x[i] - a1;
458 	    con = *alpha * split;
459 	    b1 = con - *alpha;
460 	    b1 = con - b1;
461 	    b2 = *alpha - b1;
462 
463 	    head_multemp = x[i] * *alpha;
464 	    tail_multemp =
465 	      (((a1 * b1 - head_multemp) + a1 * b2) + a2 * b1) + a2 * b2;
466 	  }
467 	  {
468 	    /* Compute double-double = double-double / double,
469 	       using a Newton iteration scheme. */
470 	    double b1, b2, con, e, t1, t2, t11, t21, t12, t22;
471 
472 	    /* Compute a DP approximation to the quotient. */
473 	    t1 = head_multemp / Tii;
474 
475 	    /* Split t1 and b into two parts with at most 26 bits each,
476 	       using the Dekker-Veltkamp method. */
477 	    con = t1 * split;
478 	    t11 = con - (con - t1);
479 	    t21 = t1 - t11;
480 	    con = Tii * split;
481 	    b1 = con - (con - Tii);
482 	    b2 = Tii - b1;
483 
484 	    /* Compute t1 * b using Dekker method. */
485 	    t12 = t1 * Tii;
486 	    t22 = (((t11 * b1 - t12) + t11 * b2) + t21 * b1) + t21 * b2;
487 
488 	    /* Compute dda - (t12, t22) using Knuth trick. */
489 	    t11 = head_multemp - t12;
490 	    e = t11 - head_multemp;
491 	    t21 =
492 	      ((-t12 - e) + (head_multemp - (t11 - e))) + tail_multemp - t22;
493 
494 	    /* Compute high-order word of (t11, t21) and divide by b. */
495 	    t2 = (t11 + t21) / Tii;
496 
497 	    /* The result is t1 + t2, after normalization. */
498 	    head_divtemp = t1 + t2;
499 	    tail_divtemp = t2 - (head_divtemp - t1);
500 	  }
501 	  head_r_true[i] = head_divtemp;
502 	  tail_r_true[i] = tail_divtemp;
503 	  break;
504 	}
505       }				/* case */
506     }				/* if */
507   }				/* for */
508 
509   for (j = 0; j < n; j++) {
510     temp[j] = 0.0;
511   }
512 
513   /* now set the important row - */
514   Tii = 1.0;
515   for (j = 0, tempi = 0; j < n; j++, tempi += inc_tempi) {
516     if (j != row) {
517       temp[tempi] = 0.0;
518     } else {
519       temp[tempi] = Tii;
520     }
521   }
522   /* this is extra, it will really be committed later */
523   dtbsv_commit(order, uplo, trans, n, k, T, ldt, temp, row);
524 
525 
526   if ((uplo == blas_lower && (trans == blas_no_trans || trans == blas_conj))
527       || (uplo == blas_upper
528 	  && (trans == blas_trans || trans == blas_conj_trans))) {
529     length = MIN(row, k);
530     start = MAX(0, row - k);
531   } else {
532     length = MIN(n - row - 1, k);
533     start = row + 1;
534   }
535 
536   if (length != 0) {
537 
538     switch (prec) {
539     case blas_prec_single:
540       BLAS_ddot_testgen(length, 0, length, norm,
541 			blas_no_conj, &minus_one, 1, alpha, 1,
542 			&xtemp2[start], &temp[start], seed, &x[row],
543 			&head_r_true[row], &tail_r_true[row]);
544       break;
545     case blas_prec_indigenous:
546     case blas_prec_double:
547     case blas_prec_extra:
548       BLAS_ddot_x_testgen(length, 0, length, norm,
549 			  blas_no_conj, &minus_one, 1, alpha, 1,
550 			  &head_r_true[start], &tail_r_true[start],
551 			  &temp[start],
552 			  seed, &x[row], &head_r_true[row],
553 			  &tail_r_true[row]);
554       break;
555     }
556     dtbsv_commit(order, uplo, trans, n, k, T, ldt, temp, row);
557   } else {
558     /* probably low k- must use tricks to test */
559     /* tricks are not yet implemented */
560     x[row] = xrand(seed);
561 
562     switch (prec) {
563     case blas_prec_single:
564       {
565 	double multemp;
566 
567 	multemp = x[row] * *alpha;
568 	head_r_true[row] = multemp;
569 	tail_r_true[row] = 0.0;
570 	break;
571       }
572     case blas_prec_indigenous:
573     case blas_prec_double:
574       {
575 	double multemp;
576 
577 	multemp = x[row] * *alpha;
578 	head_r_true[row] = multemp;
579 	tail_r_true[row] = 0.0;
580 	break;
581       }
582     case blas_prec_extra:
583       {
584 	double head_multemp, tail_multemp;
585 
586 	{
587 	  /* Compute double_double = double * double. */
588 	  double a1, a2, b1, b2, con;
589 
590 	  con = x[row] * split;
591 	  a1 = con - x[row];
592 	  a1 = con - a1;
593 	  a2 = x[row] - a1;
594 	  con = *alpha * split;
595 	  b1 = con - *alpha;
596 	  b1 = con - b1;
597 	  b2 = *alpha - b1;
598 
599 	  head_multemp = x[row] * *alpha;
600 	  tail_multemp =
601 	    (((a1 * b1 - head_multemp) + a1 * b2) + a2 * b1) + a2 * b2;
602 	}
603 	head_r_true[row] = head_multemp;
604 	tail_r_true[row] = tail_multemp;
605 	break;
606       }
607     }
608   }
609 
610   blas_free(temp);
611   blas_free(xtemp2);
612 }
613 
BLAS_dtbsv_s_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,int k,int randomize,double * alpha,int alpha_flag,float * T,int ldt,double * x,int * seed,double * head_r_true,double * tail_r_true,int row,enum blas_prec_type prec)614 void BLAS_dtbsv_s_testgen(int norm, enum blas_order_type order,
615 			  enum blas_uplo_type uplo,
616 			  enum blas_trans_type trans,
617 			  enum blas_diag_type diag, int n, int k,
618 			  int randomize, double *alpha, int alpha_flag,
619 			  float *T, int ldt, double *x, int *seed,
620 			  double *head_r_true, double *tail_r_true, int row,
621 			  enum blas_prec_type prec)
622 
623 /*
624  * Purpose
625  * =======
626  *
627  * Generates alpha, x and T, where T is a triangular Banded matrix;
628  *      and computes r_true.
629  *
630  * Arguments
631  * =========
632  *
633  * norm         (input) blas_norm_type
634  *
635  * order        (input) blas_order_type
636  *              Order of T; row or column major
637  *
638  * uplo         (input) blas_uplo_type
639  *              Whether T is upper or lower
640  *
641  * trans        (input) blas_trans_type
642  *              No trans, trans, conj trans
643  *
644  * diag         (input) blas_diag_type
645  *              non unit, unit
646  *
647  * n            (input) int
648  *              Dimension of T and the length of vector x
649  *
650  * k            (input) int
651  *              Number of super(sub)diagonals of T
652  *
653  * alpha        (input/output) double*
654  *              If alpha_flag = 1, alpha is input.
655  *              If alpha_flag = 0, alpha is output.
656  *
657  * alpha_flag   (input) int
658  *              = 0 : alpha is free, and is output.
659  *              = 1 : alpha is fixed on input.
660  *
661  * T            (output) float*
662  *
663  * x            (input/output) double*
664  *
665  * seed         (input/output) int
666  *
667  * head_r_true     (output) double*
668  *              The leading part of the truth in double-double.
669  *
670  * tail_r_true     (output) double*
671  *              The trailing part of the truth in double-double.
672  *
673  * row          (input) int
674  *              The true row being generated
675  *
676  * prec         (input) blas_prec_type
677  *              single, double, or extra precision
678  *
679  */
680 {
681   int start;
682   int length;
683   int i, j;
684   int tempi, inc_tempi;
685   float alpha_i;
686   double minus_one;
687   float Tii;
688   float *temp;
689   double *xtemp2;
690 
691   temp = (float *) blas_malloc(n * 2 * sizeof(float));
692   if (n * 2 > 0 && temp == NULL) {
693     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
694   }
695 
696   /* always allocate, not always needed */
697   xtemp2 = (double *) blas_malloc(n * 2 * sizeof(double));
698   if (n * 2 > 0 && xtemp2 == NULL) {
699     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
700   }
701 
702   minus_one = -1.0;
703 
704   /* if alpha_flag=0, gives a random value to alpha */
705   if (alpha_flag == 0) {
706     alpha_i = xrand(seed);
707     *alpha = alpha_i;
708     alpha_flag = 1;
709   }
710 
711   for (i = 0; i < 4 * n * ldt; i++) {
712     T[i] = 0.0;
713   }
714 
715   inc_tempi = 1;
716 
717 
718   for (i = 0; i < n; i++) {
719 
720     if (i != row) {
721       if (diag == blas_non_unit_diag) {
722 	Tii = xrand(seed);
723       } else {
724 	Tii = 1.0;
725       }
726       for (j = 0, tempi = 0; j < n; j++, tempi += inc_tempi) {
727 	if (j != i) {
728 	  temp[tempi] = 0.0;
729 	} else {
730 	  temp[tempi] = Tii;
731 	}
732       }
733       stbsv_commit(order, uplo, trans, n, k, T, ldt, temp, i);
734 
735 
736       x[i] = xrand(seed);
737 
738       switch (prec) {
739       case blas_prec_single:
740 	{
741 	  double multemp;
742 	  double divtemp;
743 
744 	  multemp = x[i] * *alpha;
745 	  divtemp = multemp / Tii;
746 	  head_r_true[i] = divtemp;
747 	  tail_r_true[i] = 0.0;
748 	  xtemp2[i] = divtemp;
749 	  break;
750 	}
751       case blas_prec_indigenous:
752       case blas_prec_double:
753 	{
754 	  double multemp;
755 	  double divtemp;
756 
757 	  multemp = x[i] * *alpha;
758 	  divtemp = multemp / Tii;
759 	  head_r_true[i] = divtemp;
760 	  tail_r_true[i] = 0.0;
761 	  xtemp2[i] = divtemp;
762 	  break;
763 	}
764       case blas_prec_extra:
765 	{
766 	  double head_multemp, tail_multemp;
767 	  double head_divtemp, tail_divtemp;
768 
769 	  {
770 	    /* Compute double_double = double * double. */
771 	    double a1, a2, b1, b2, con;
772 
773 	    con = x[i] * split;
774 	    a1 = con - x[i];
775 	    a1 = con - a1;
776 	    a2 = x[i] - a1;
777 	    con = *alpha * split;
778 	    b1 = con - *alpha;
779 	    b1 = con - b1;
780 	    b2 = *alpha - b1;
781 
782 	    head_multemp = x[i] * *alpha;
783 	    tail_multemp =
784 	      (((a1 * b1 - head_multemp) + a1 * b2) + a2 * b1) + a2 * b2;
785 	  }
786 	  {
787 	    double dt = (double) Tii;
788 	    {
789 	      /* Compute double-double = double-double / double,
790 	         using a Newton iteration scheme. */
791 	      double b1, b2, con, e, t1, t2, t11, t21, t12, t22;
792 
793 	      /* Compute a DP approximation to the quotient. */
794 	      t1 = head_multemp / dt;
795 
796 	      /* Split t1 and b into two parts with at most 26 bits each,
797 	         using the Dekker-Veltkamp method. */
798 	      con = t1 * split;
799 	      t11 = con - (con - t1);
800 	      t21 = t1 - t11;
801 	      con = dt * split;
802 	      b1 = con - (con - dt);
803 	      b2 = dt - b1;
804 
805 	      /* Compute t1 * b using Dekker method. */
806 	      t12 = t1 * dt;
807 	      t22 = (((t11 * b1 - t12) + t11 * b2) + t21 * b1) + t21 * b2;
808 
809 	      /* Compute dda - (t12, t22) using Knuth trick. */
810 	      t11 = head_multemp - t12;
811 	      e = t11 - head_multemp;
812 	      t21 =
813 		((-t12 - e) + (head_multemp - (t11 - e))) + tail_multemp -
814 		t22;
815 
816 	      /* Compute high-order word of (t11, t21) and divide by b. */
817 	      t2 = (t11 + t21) / dt;
818 
819 	      /* The result is t1 + t2, after normalization. */
820 	      head_divtemp = t1 + t2;
821 	      tail_divtemp = t2 - (head_divtemp - t1);
822 	    }
823 	  }
824 	  head_r_true[i] = head_divtemp;
825 	  tail_r_true[i] = tail_divtemp;
826 	  break;
827 	}
828       }				/* case */
829     }				/* if */
830   }				/* for */
831 
832   for (j = 0; j < n; j++) {
833     temp[j] = 0.0;
834   }
835 
836   /* now set the important row - */
837   Tii = 1.0;
838   for (j = 0, tempi = 0; j < n; j++, tempi += inc_tempi) {
839     if (j != row) {
840       temp[tempi] = 0.0;
841     } else {
842       temp[tempi] = Tii;
843     }
844   }
845   /* this is extra, it will really be committed later */
846   stbsv_commit(order, uplo, trans, n, k, T, ldt, temp, row);
847 
848 
849   if ((uplo == blas_lower && (trans == blas_no_trans || trans == blas_conj))
850       || (uplo == blas_upper
851 	  && (trans == blas_trans || trans == blas_conj_trans))) {
852     length = MIN(row, k);
853     start = MAX(0, row - k);
854   } else {
855     length = MIN(n - row - 1, k);
856     start = row + 1;
857   }
858 
859   if (length != 0) {
860 
861     switch (prec) {
862     case blas_prec_single:
863     case blas_prec_indigenous:
864     case blas_prec_double:	/*BLAS_ddot_s_x_testgen(length, 0, length, norm,
865 				   blas_no_conj, &minus_one, 1, alpha, 1,
866 				   &head_r_true[start], &tail_r_true[start],
867 				   &temp[start],
868 				   seed, &x[row], &head_r_true[row], &tail_r_true[row]);
869 				   break; */
870     case blas_prec_extra:
871       BLAS_ddot_s_x_testgen(length, 0, length, norm,
872 			    blas_no_conj, &minus_one, 1, alpha, 1,
873 			    &head_r_true[start], &tail_r_true[start],
874 			    &temp[start],
875 			    seed, &x[row], &head_r_true[row],
876 			    &tail_r_true[row]);
877       break;
878     }
879     stbsv_commit(order, uplo, trans, n, k, T, ldt, temp, row);
880   } else {
881     /* probably low k- must use tricks to test */
882     /* tricks are not yet implemented */
883     x[row] = xrand(seed);
884 
885     switch (prec) {
886     case blas_prec_single:
887       {
888 	double multemp;
889 
890 	multemp = x[row] * *alpha;
891 	head_r_true[row] = multemp;
892 	tail_r_true[row] = 0.0;
893 	break;
894       }
895     case blas_prec_indigenous:
896     case blas_prec_double:
897       {
898 	double multemp;
899 
900 	multemp = x[row] * *alpha;
901 	head_r_true[row] = multemp;
902 	tail_r_true[row] = 0.0;
903 	break;
904       }
905     case blas_prec_extra:
906       {
907 	double head_multemp, tail_multemp;
908 
909 	{
910 	  /* Compute double_double = double * double. */
911 	  double a1, a2, b1, b2, con;
912 
913 	  con = x[row] * split;
914 	  a1 = con - x[row];
915 	  a1 = con - a1;
916 	  a2 = x[row] - a1;
917 	  con = *alpha * split;
918 	  b1 = con - *alpha;
919 	  b1 = con - b1;
920 	  b2 = *alpha - b1;
921 
922 	  head_multemp = x[row] * *alpha;
923 	  tail_multemp =
924 	    (((a1 * b1 - head_multemp) + a1 * b2) + a2 * b1) + a2 * b2;
925 	}
926 	head_r_true[row] = head_multemp;
927 	tail_r_true[row] = tail_multemp;
928 	break;
929       }
930     }
931   }
932 
933   blas_free(temp);
934   blas_free(xtemp2);
935 }
936 
BLAS_ctbsv_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,int k,int randomize,void * alpha,int alpha_flag,void * T,int ldt,void * x,int * seed,double * head_r_true,double * tail_r_true,int row,enum blas_prec_type prec)937 void BLAS_ctbsv_testgen(int norm, enum blas_order_type order,
938 			enum blas_uplo_type uplo, enum blas_trans_type trans,
939 			enum blas_diag_type diag, int n, int k, int randomize,
940 			void *alpha, int alpha_flag, void *T, int ldt,
941 			void *x, int *seed, double *head_r_true,
942 			double *tail_r_true, int row,
943 			enum blas_prec_type prec)
944 
945 /*
946  * Purpose
947  * =======
948  *
949  * Generates alpha, x and T, where T is a triangular Banded matrix;
950  *      and computes r_true.
951  *
952  * Arguments
953  * =========
954  *
955  * norm         (input) blas_norm_type
956  *
957  * order        (input) blas_order_type
958  *              Order of T; row or column major
959  *
960  * uplo         (input) blas_uplo_type
961  *              Whether T is upper or lower
962  *
963  * trans        (input) blas_trans_type
964  *              No trans, trans, conj trans
965  *
966  * diag         (input) blas_diag_type
967  *              non unit, unit
968  *
969  * n            (input) int
970  *              Dimension of T and the length of vector x
971  *
972  * k            (input) int
973  *              Number of super(sub)diagonals of T
974  *
975  * alpha        (input/output) void*
976  *              If alpha_flag = 1, alpha is input.
977  *              If alpha_flag = 0, alpha is output.
978  *
979  * alpha_flag   (input) int
980  *              = 0 : alpha is free, and is output.
981  *              = 1 : alpha is fixed on input.
982  *
983  * T            (output) void*
984  *
985  * x            (input/output) void*
986  *
987  * seed         (input/output) int
988  *
989  * head_r_true     (output) double*
990  *              The leading part of the truth in double-double.
991  *
992  * tail_r_true     (output) double*
993  *              The trailing part of the truth in double-double.
994  *
995  * row          (input) int
996  *              The true row being generated
997  *
998  * prec         (input) blas_prec_type
999  *              single, double, or extra precision
1000  *
1001  */
1002 {
1003   float *x_i = (float *) x;
1004   float *alpha_i = (float *) alpha;
1005   float *T_i = (float *) T;
1006   float alpha_r;
1007   float *T_r;
1008   float *x_r;
1009   float *T_temp_r;
1010   float *T_temp_c;
1011   double *head_r_true_r, *tail_r_true_r;
1012   int i, inc = 2, length, j;
1013 
1014   T_r = (float *) blas_malloc(8 * n * ldt * sizeof(float));
1015   if (8 * n * ldt > 0 && T_r == NULL) {
1016     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1017   }
1018   x_r = (float *) blas_malloc(n * sizeof(float));
1019   if (n > 0 && x_r == NULL) {
1020     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1021   }
1022   head_r_true_r = (double *) blas_malloc(n * sizeof(double));
1023   tail_r_true_r = (double *) blas_malloc(n * sizeof(double));
1024   if (n > 0 && (head_r_true_r == NULL || tail_r_true_r == NULL)) {
1025     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1026   }
1027   T_temp_c = (float *) blas_malloc(n * sizeof(float) * 2);
1028   if (n > 0 && T_temp_c == NULL) {
1029     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1030   }
1031   T_temp_r = (float *) blas_malloc(n * sizeof(float));
1032   if (n > 0 && T_temp_r == NULL) {
1033     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1034   }
1035 
1036   if (alpha_flag == 1) {
1037     alpha_r = alpha_i[0];
1038   }
1039 
1040   if ((uplo == blas_lower && trans == blas_no_trans) ||
1041       (uplo == blas_upper && trans != blas_no_trans)) {
1042     length = row;
1043   } else {
1044     length = n - row - 1;
1045   }
1046 
1047   BLAS_stbsv_testgen(norm, order, uplo, trans, diag, n, k, randomize,
1048 		     &alpha_r,
1049 		     alpha_flag, T_r, ldt, x_r, seed, head_r_true_r,
1050 		     tail_r_true_r, row, prec);
1051 
1052   /* multiply alpha_r by 1+i */
1053   alpha_i[0] = alpha_r;
1054   alpha_i[1] = alpha_r;
1055 
1056   if (diag == blas_non_unit_diag) {
1057     for (i = 0; i < n; i++) {
1058       /* multiply x_r by i */
1059       x_i[i * inc] = 0.0;
1060       x_i[i * inc + 1] = x_r[i];
1061 
1062       /* multiply non major rows truth by i */
1063       if (i != row) {
1064 	head_r_true[i * inc] = 0.0;
1065 	head_r_true[i * inc + 1] = head_r_true_r[i];
1066 	tail_r_true[i * inc] = 0.0;
1067 	tail_r_true[i * inc + 1] = tail_r_true_r[i];
1068       } else {
1069 	/* multiply major rows truth by (-1+i) */
1070 	head_r_true[i * inc] = -head_r_true_r[i];
1071 	head_r_true[i * inc + 1] = head_r_true_r[i];
1072 	tail_r_true[i * inc] = -tail_r_true_r[i];
1073 	tail_r_true[i * inc + 1] = tail_r_true_r[i];
1074       }
1075     }
1076 
1077     /* copy T real to complex (and multiply by (1+i)) */
1078     for (i = 0; i < n; i++) {
1079       stbsv_copy(order, uplo, trans, n, k, T_r, ldt, T_temp_r, i);
1080       for (j = 0; j < n; j++) {
1081 	T_temp_c[j * inc] = T_temp_r[j];
1082 	/* conjugation is handled by the commit later - */
1083 	T_temp_c[j * inc + 1] = T_temp_r[j];
1084       }
1085       if (i == row) {
1086 	/* zero out imaginary part of diagonal on important row. */
1087 	T_temp_c[row * inc + 1] = 0.0;
1088       }
1089       ctbsv_commit(order, uplo, trans, n, k, T_i, ldt, T_temp_c, i);
1090     }
1091   } else {
1092     for (i = 0; i < n; i++) {
1093       /* multiply x by i */
1094       x_i[i * inc] = 0.0;
1095       x_i[i * inc + 1] = x_r[i];
1096 
1097       /* multiply non-major rows by -1+i */
1098       if (i != row || length == 0) {
1099 	head_r_true[i * inc] = -head_r_true_r[i];
1100 	head_r_true[i * inc + 1] = head_r_true_r[i];
1101 	tail_r_true[i * inc] = -tail_r_true_r[i];
1102 	tail_r_true[i * inc + 1] = tail_r_true_r[i];
1103       } else {
1104 	/* multiply x by 1+i (overwrite multiply by i earlier) */
1105 	x_i[i * inc] = x_r[i];
1106 	x_i[i * inc + 1] = x_r[i];
1107 
1108 	head_r_true[i * inc] = 0.0;
1109 	head_r_true[i * inc + 1] = 2 * head_r_true_r[i];
1110 	tail_r_true[i * inc] = 0.0;
1111 	tail_r_true[i * inc + 1] = 2 * tail_r_true_r[i];
1112       }
1113     }
1114 
1115     /* copy T real to complex (and multiply by (1-i)) */
1116     for (i = 0; i < n; i++) {
1117       stbsv_copy(order, uplo, trans, n, k, T_r, ldt, T_temp_r, i);
1118       for (j = 0; j < n; j++) {
1119 	T_temp_c[j * inc] = T_temp_r[j];
1120 	/* conjugation is handled by the commit later - */
1121 	T_temp_c[j * inc + 1] = -T_temp_r[j];
1122       }
1123       /* zero out imaginary part of diagonal on every row. */
1124       T_temp_c[i * inc + 1] = 0.0;
1125       ctbsv_commit(order, uplo, trans, n, k, T_i, ldt, T_temp_c, i);
1126     }
1127   }
1128 
1129   blas_free(T_temp_c);
1130   blas_free(T_temp_r);
1131   blas_free(T_r);
1132   blas_free(x_r);
1133   blas_free(head_r_true_r);
1134   blas_free(tail_r_true_r);
1135 }
1136 
BLAS_ztbsv_c_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,int k,int randomize,void * alpha,int alpha_flag,void * T,int ldt,void * x,int * seed,double * head_r_true,double * tail_r_true,int row,enum blas_prec_type prec)1137 void BLAS_ztbsv_c_testgen(int norm, enum blas_order_type order,
1138 			  enum blas_uplo_type uplo,
1139 			  enum blas_trans_type trans,
1140 			  enum blas_diag_type diag, int n, int k,
1141 			  int randomize, void *alpha, int alpha_flag, void *T,
1142 			  int ldt, void *x, int *seed, double *head_r_true,
1143 			  double *tail_r_true, int row,
1144 			  enum blas_prec_type prec)
1145 
1146 /*
1147  * Purpose
1148  * =======
1149  *
1150  * Generates alpha, x and T, where T is a triangular Banded matrix;
1151  *      and computes r_true.
1152  *
1153  * Arguments
1154  * =========
1155  *
1156  * norm         (input) blas_norm_type
1157  *
1158  * order        (input) blas_order_type
1159  *              Order of T; row or column major
1160  *
1161  * uplo         (input) blas_uplo_type
1162  *              Whether T is upper or lower
1163  *
1164  * trans        (input) blas_trans_type
1165  *              No trans, trans, conj trans
1166  *
1167  * diag         (input) blas_diag_type
1168  *              non unit, unit
1169  *
1170  * n            (input) int
1171  *              Dimension of T and the length of vector x
1172  *
1173  * k            (input) int
1174  *              Number of super(sub)diagonals of T
1175  *
1176  * alpha        (input/output) void*
1177  *              If alpha_flag = 1, alpha is input.
1178  *              If alpha_flag = 0, alpha is output.
1179  *
1180  * alpha_flag   (input) int
1181  *              = 0 : alpha is free, and is output.
1182  *              = 1 : alpha is fixed on input.
1183  *
1184  * T            (output) void*
1185  *
1186  * x            (input/output) void*
1187  *
1188  * seed         (input/output) int
1189  *
1190  * head_r_true     (output) double*
1191  *              The leading part of the truth in double-double.
1192  *
1193  * tail_r_true     (output) double*
1194  *              The trailing part of the truth in double-double.
1195  *
1196  * row          (input) int
1197  *              The true row being generated
1198  *
1199  * prec         (input) blas_prec_type
1200  *              single, double, or extra precision
1201  *
1202  */
1203 {
1204   double *x_i = (double *) x;
1205   double *alpha_i = (double *) alpha;
1206   float *T_i = (float *) T;
1207   double alpha_r;
1208   float *T_r;
1209   double *x_r;
1210   float *T_temp_r;
1211   float *T_temp_c;
1212   double *head_r_true_r, *tail_r_true_r;
1213   int i, inc = 2, length, j;
1214 
1215   T_r = (float *) blas_malloc(8 * n * ldt * sizeof(float));
1216   if (8 * n * ldt > 0 && T_r == NULL) {
1217     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1218   }
1219   x_r = (double *) blas_malloc(n * sizeof(double));
1220   if (n > 0 && x_r == NULL) {
1221     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1222   }
1223   head_r_true_r = (double *) blas_malloc(n * sizeof(double));
1224   tail_r_true_r = (double *) blas_malloc(n * sizeof(double));
1225   if (n > 0 && (head_r_true_r == NULL || tail_r_true_r == NULL)) {
1226     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1227   }
1228   T_temp_c = (float *) blas_malloc(n * sizeof(float) * 2);
1229   if (n > 0 && T_temp_c == NULL) {
1230     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1231   }
1232   T_temp_r = (float *) blas_malloc(n * sizeof(float));
1233   if (n > 0 && T_temp_r == NULL) {
1234     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1235   }
1236 
1237   if (alpha_flag == 1) {
1238     alpha_r = alpha_i[0];
1239   }
1240 
1241   if ((uplo == blas_lower && trans == blas_no_trans) ||
1242       (uplo == blas_upper && trans != blas_no_trans)) {
1243     length = row;
1244   } else {
1245     length = n - row - 1;
1246   }
1247 
1248   BLAS_dtbsv_s_testgen(norm, order, uplo, trans, diag, n, k, randomize,
1249 		       &alpha_r,
1250 		       alpha_flag, T_r, ldt, x_r, seed, head_r_true_r,
1251 		       tail_r_true_r, row, prec);
1252 
1253   /* multiply alpha_r by 1+i */
1254   alpha_i[0] = alpha_r;
1255   alpha_i[1] = alpha_r;
1256 
1257   if (diag == blas_non_unit_diag) {
1258     for (i = 0; i < n; i++) {
1259       /* multiply x_r by i */
1260       x_i[i * inc] = 0.0;
1261       x_i[i * inc + 1] = x_r[i];
1262 
1263       /* multiply non major rows truth by i */
1264       if (i != row) {
1265 	head_r_true[i * inc] = 0.0;
1266 	head_r_true[i * inc + 1] = head_r_true_r[i];
1267 	tail_r_true[i * inc] = 0.0;
1268 	tail_r_true[i * inc + 1] = tail_r_true_r[i];
1269       } else {
1270 	/* multiply major rows truth by (-1+i) */
1271 	head_r_true[i * inc] = -head_r_true_r[i];
1272 	head_r_true[i * inc + 1] = head_r_true_r[i];
1273 	tail_r_true[i * inc] = -tail_r_true_r[i];
1274 	tail_r_true[i * inc + 1] = tail_r_true_r[i];
1275       }
1276     }
1277 
1278     /* copy T real to complex (and multiply by (1+i)) */
1279     for (i = 0; i < n; i++) {
1280       stbsv_copy(order, uplo, trans, n, k, T_r, ldt, T_temp_r, i);
1281       for (j = 0; j < n; j++) {
1282 	T_temp_c[j * inc] = T_temp_r[j];
1283 	/* conjugation is handled by the commit later - */
1284 	T_temp_c[j * inc + 1] = T_temp_r[j];
1285       }
1286       if (i == row) {
1287 	/* zero out imaginary part of diagonal on important row. */
1288 	T_temp_c[row * inc + 1] = 0.0;
1289       }
1290       ctbsv_commit(order, uplo, trans, n, k, T_i, ldt, T_temp_c, i);
1291     }
1292   } else {
1293     for (i = 0; i < n; i++) {
1294       /* multiply x by i */
1295       x_i[i * inc] = 0.0;
1296       x_i[i * inc + 1] = x_r[i];
1297 
1298       /* multiply non-major rows by -1+i */
1299       if (i != row || length == 0) {
1300 	head_r_true[i * inc] = -head_r_true_r[i];
1301 	head_r_true[i * inc + 1] = head_r_true_r[i];
1302 	tail_r_true[i * inc] = -tail_r_true_r[i];
1303 	tail_r_true[i * inc + 1] = tail_r_true_r[i];
1304       } else {
1305 	/* multiply x by 1+i (overwrite multiply by i earlier) */
1306 	x_i[i * inc] = x_r[i];
1307 	x_i[i * inc + 1] = x_r[i];
1308 
1309 	head_r_true[i * inc] = 0.0;
1310 	head_r_true[i * inc + 1] = 2 * head_r_true_r[i];
1311 	tail_r_true[i * inc] = 0.0;
1312 	tail_r_true[i * inc + 1] = 2 * tail_r_true_r[i];
1313       }
1314     }
1315 
1316     /* copy T real to complex (and multiply by (1-i)) */
1317     for (i = 0; i < n; i++) {
1318       stbsv_copy(order, uplo, trans, n, k, T_r, ldt, T_temp_r, i);
1319       for (j = 0; j < n; j++) {
1320 	T_temp_c[j * inc] = T_temp_r[j];
1321 	/* conjugation is handled by the commit later - */
1322 	T_temp_c[j * inc + 1] = -T_temp_r[j];
1323       }
1324       /* zero out imaginary part of diagonal on every row. */
1325       T_temp_c[i * inc + 1] = 0.0;
1326       ctbsv_commit(order, uplo, trans, n, k, T_i, ldt, T_temp_c, i);
1327     }
1328   }
1329 
1330   blas_free(T_temp_c);
1331   blas_free(T_temp_r);
1332   blas_free(T_r);
1333   blas_free(x_r);
1334   blas_free(head_r_true_r);
1335   blas_free(tail_r_true_r);
1336 }
1337 
BLAS_ztbsv_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,int k,int randomize,void * alpha,int alpha_flag,void * T,int ldt,void * x,int * seed,double * head_r_true,double * tail_r_true,int row,enum blas_prec_type prec)1338 void BLAS_ztbsv_testgen(int norm, enum blas_order_type order,
1339 			enum blas_uplo_type uplo, enum blas_trans_type trans,
1340 			enum blas_diag_type diag, int n, int k, int randomize,
1341 			void *alpha, int alpha_flag, void *T, int ldt,
1342 			void *x, int *seed, double *head_r_true,
1343 			double *tail_r_true, int row,
1344 			enum blas_prec_type prec)
1345 
1346 /*
1347  * Purpose
1348  * =======
1349  *
1350  * Generates alpha, x and T, where T is a triangular Banded matrix;
1351  *      and computes r_true.
1352  *
1353  * Arguments
1354  * =========
1355  *
1356  * norm         (input) blas_norm_type
1357  *
1358  * order        (input) blas_order_type
1359  *              Order of T; row or column major
1360  *
1361  * uplo         (input) blas_uplo_type
1362  *              Whether T is upper or lower
1363  *
1364  * trans        (input) blas_trans_type
1365  *              No trans, trans, conj trans
1366  *
1367  * diag         (input) blas_diag_type
1368  *              non unit, unit
1369  *
1370  * n            (input) int
1371  *              Dimension of T and the length of vector x
1372  *
1373  * k            (input) int
1374  *              Number of super(sub)diagonals of T
1375  *
1376  * alpha        (input/output) void*
1377  *              If alpha_flag = 1, alpha is input.
1378  *              If alpha_flag = 0, alpha is output.
1379  *
1380  * alpha_flag   (input) int
1381  *              = 0 : alpha is free, and is output.
1382  *              = 1 : alpha is fixed on input.
1383  *
1384  * T            (output) void*
1385  *
1386  * x            (input/output) void*
1387  *
1388  * seed         (input/output) int
1389  *
1390  * head_r_true     (output) double*
1391  *              The leading part of the truth in double-double.
1392  *
1393  * tail_r_true     (output) double*
1394  *              The trailing part of the truth in double-double.
1395  *
1396  * row          (input) int
1397  *              The true row being generated
1398  *
1399  * prec         (input) blas_prec_type
1400  *              single, double, or extra precision
1401  *
1402  */
1403 {
1404   double *x_i = (double *) x;
1405   double *alpha_i = (double *) alpha;
1406   double *T_i = (double *) T;
1407   double alpha_r;
1408   double *T_r;
1409   double *x_r;
1410   double *T_temp_r;
1411   double *T_temp_c;
1412   double *head_r_true_r, *tail_r_true_r;
1413   int i, inc = 2, length, j;
1414 
1415   T_r = (double *) blas_malloc(8 * n * ldt * sizeof(double));
1416   if (8 * n * ldt > 0 && T_r == NULL) {
1417     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1418   }
1419   x_r = (double *) blas_malloc(n * sizeof(double));
1420   if (n > 0 && x_r == NULL) {
1421     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1422   }
1423   head_r_true_r = (double *) blas_malloc(n * sizeof(double));
1424   tail_r_true_r = (double *) blas_malloc(n * sizeof(double));
1425   if (n > 0 && (head_r_true_r == NULL || tail_r_true_r == NULL)) {
1426     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1427   }
1428   T_temp_c = (double *) blas_malloc(n * sizeof(double) * 2);
1429   if (n > 0 && T_temp_c == NULL) {
1430     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1431   }
1432   T_temp_r = (double *) blas_malloc(n * sizeof(double));
1433   if (n > 0 && T_temp_r == NULL) {
1434     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1435   }
1436 
1437   if (alpha_flag == 1) {
1438     alpha_r = alpha_i[0];
1439   }
1440 
1441   if ((uplo == blas_lower && trans == blas_no_trans) ||
1442       (uplo == blas_upper && trans != blas_no_trans)) {
1443     length = row;
1444   } else {
1445     length = n - row - 1;
1446   }
1447 
1448   BLAS_dtbsv_testgen(norm, order, uplo, trans, diag, n, k, randomize,
1449 		     &alpha_r,
1450 		     alpha_flag, T_r, ldt, x_r, seed, head_r_true_r,
1451 		     tail_r_true_r, row, prec);
1452 
1453   /* multiply alpha_r by 1+i */
1454   alpha_i[0] = alpha_r;
1455   alpha_i[1] = alpha_r;
1456 
1457   if (diag == blas_non_unit_diag) {
1458     for (i = 0; i < n; i++) {
1459       /* multiply x_r by i */
1460       x_i[i * inc] = 0.0;
1461       x_i[i * inc + 1] = x_r[i];
1462 
1463       /* multiply non major rows truth by i */
1464       if (i != row) {
1465 	head_r_true[i * inc] = 0.0;
1466 	head_r_true[i * inc + 1] = head_r_true_r[i];
1467 	tail_r_true[i * inc] = 0.0;
1468 	tail_r_true[i * inc + 1] = tail_r_true_r[i];
1469       } else {
1470 	/* multiply major rows truth by (-1+i) */
1471 	head_r_true[i * inc] = -head_r_true_r[i];
1472 	head_r_true[i * inc + 1] = head_r_true_r[i];
1473 	tail_r_true[i * inc] = -tail_r_true_r[i];
1474 	tail_r_true[i * inc + 1] = tail_r_true_r[i];
1475       }
1476     }
1477 
1478     /* copy T real to complex (and multiply by (1+i)) */
1479     for (i = 0; i < n; i++) {
1480       dtbsv_copy(order, uplo, trans, n, k, T_r, ldt, T_temp_r, i);
1481       for (j = 0; j < n; j++) {
1482 	T_temp_c[j * inc] = T_temp_r[j];
1483 	/* conjugation is handled by the commit later - */
1484 	T_temp_c[j * inc + 1] = T_temp_r[j];
1485       }
1486       if (i == row) {
1487 	/* zero out imaginary part of diagonal on important row. */
1488 	T_temp_c[row * inc + 1] = 0.0;
1489       }
1490       ztbsv_commit(order, uplo, trans, n, k, T_i, ldt, T_temp_c, i);
1491     }
1492   } else {
1493     for (i = 0; i < n; i++) {
1494       /* multiply x by i */
1495       x_i[i * inc] = 0.0;
1496       x_i[i * inc + 1] = x_r[i];
1497 
1498       /* multiply non-major rows by -1+i */
1499       if (i != row || length == 0) {
1500 	head_r_true[i * inc] = -head_r_true_r[i];
1501 	head_r_true[i * inc + 1] = head_r_true_r[i];
1502 	tail_r_true[i * inc] = -tail_r_true_r[i];
1503 	tail_r_true[i * inc + 1] = tail_r_true_r[i];
1504       } else {
1505 	/* multiply x by 1+i (overwrite multiply by i earlier) */
1506 	x_i[i * inc] = x_r[i];
1507 	x_i[i * inc + 1] = x_r[i];
1508 
1509 	head_r_true[i * inc] = 0.0;
1510 	head_r_true[i * inc + 1] = 2 * head_r_true_r[i];
1511 	tail_r_true[i * inc] = 0.0;
1512 	tail_r_true[i * inc + 1] = 2 * tail_r_true_r[i];
1513       }
1514     }
1515 
1516     /* copy T real to complex (and multiply by (1-i)) */
1517     for (i = 0; i < n; i++) {
1518       dtbsv_copy(order, uplo, trans, n, k, T_r, ldt, T_temp_r, i);
1519       for (j = 0; j < n; j++) {
1520 	T_temp_c[j * inc] = T_temp_r[j];
1521 	/* conjugation is handled by the commit later - */
1522 	T_temp_c[j * inc + 1] = -T_temp_r[j];
1523       }
1524       /* zero out imaginary part of diagonal on every row. */
1525       T_temp_c[i * inc + 1] = 0.0;
1526       ztbsv_commit(order, uplo, trans, n, k, T_i, ldt, T_temp_c, i);
1527     }
1528   }
1529 
1530   blas_free(T_temp_c);
1531   blas_free(T_temp_r);
1532   blas_free(T_r);
1533   blas_free(x_r);
1534   blas_free(head_r_true_r);
1535   blas_free(tail_r_true_r);
1536 }
1537 
BLAS_ctbsv_s_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,int k,int randomize,void * alpha,int alpha_flag,float * T,int ldt,void * x,int * seed,double * head_r_true,double * tail_r_true,int row,enum blas_prec_type prec)1538 void BLAS_ctbsv_s_testgen(int norm, enum blas_order_type order,
1539 			  enum blas_uplo_type uplo,
1540 			  enum blas_trans_type trans,
1541 			  enum blas_diag_type diag, int n, int k,
1542 			  int randomize, void *alpha, int alpha_flag,
1543 			  float *T, int ldt, void *x, int *seed,
1544 			  double *head_r_true, double *tail_r_true, int row,
1545 			  enum blas_prec_type prec)
1546 
1547 /*
1548  * Purpose
1549  * =======
1550  *
1551  * Generates alpha, x and T, where T is a triangular Banded matrix;
1552  *      and computes r_true.
1553  *
1554  * Arguments
1555  * =========
1556  *
1557  * norm         (input) blas_norm_type
1558  *
1559  * order        (input) blas_order_type
1560  *              Order of T; row or column major
1561  *
1562  * uplo         (input) blas_uplo_type
1563  *              Whether T is upper or lower
1564  *
1565  * trans        (input) blas_trans_type
1566  *              No trans, trans, conj trans
1567  *
1568  * diag         (input) blas_diag_type
1569  *              non unit, unit
1570  *
1571  * n            (input) int
1572  *              Dimension of T and the length of vector x
1573  *
1574  * k            (input) int
1575  *              Number of super(sub)diagonals of T
1576  *
1577  * alpha        (input/output) void*
1578  *              If alpha_flag = 1, alpha is input.
1579  *              If alpha_flag = 0, alpha is output.
1580  *
1581  * alpha_flag   (input) int
1582  *              = 0 : alpha is free, and is output.
1583  *              = 1 : alpha is fixed on input.
1584  *
1585  * T            (output) float*
1586  *
1587  * x            (input/output) void*
1588  *
1589  * seed         (input/output) int
1590  *
1591  * head_r_true     (output) double*
1592  *              The leading part of the truth in double-double.
1593  *
1594  * tail_r_true     (output) double*
1595  *              The trailing part of the truth in double-double.
1596  *
1597  * row          (input) int
1598  *              The true row being generated
1599  *
1600  * prec         (input) blas_prec_type
1601  *              single, double, or extra precision
1602  *
1603  */
1604 {
1605   float *x_i = (float *) x;
1606   float *alpha_i = (float *) alpha;
1607   float *T_i = T;
1608   float alpha_r;
1609   float *x_r;
1610   double *head_r_true_r, *tail_r_true_r;
1611   int i, inc = 2;
1612 
1613   x_r = (float *) blas_malloc(n * sizeof(float));
1614   if (n > 0 && x_r == NULL) {
1615     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1616   }
1617   head_r_true_r = (double *) blas_malloc(n * sizeof(double));
1618   tail_r_true_r = (double *) blas_malloc(n * sizeof(double));
1619   if (n > 0 && (head_r_true_r == NULL || tail_r_true_r == NULL)) {
1620     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1621   }
1622 
1623   if (alpha_flag == 1) {
1624     alpha_r = alpha_i[0];
1625   }
1626 
1627   BLAS_stbsv_testgen(norm, order, uplo, trans, diag, n, k,
1628 		     randomize, &alpha_r,
1629 		     alpha_flag, T_i, ldt, x_r, seed, head_r_true_r,
1630 		     tail_r_true_r, row, prec);
1631 
1632   alpha_i[0] = alpha_r;
1633   alpha_i[1] = alpha_r;
1634 
1635   for (i = 0; i < n; i++) {
1636     x_i[i * inc] = 0.0;
1637     x_i[i * inc + 1] = x_r[i];
1638 
1639     head_r_true[i * inc] = -head_r_true_r[i];
1640     head_r_true[i * inc + 1] = head_r_true_r[i];
1641     tail_r_true[i * inc] = -tail_r_true_r[i];
1642     tail_r_true[i * inc + 1] = tail_r_true_r[i];
1643   }
1644 
1645   blas_free(x_r);
1646   blas_free(head_r_true_r);
1647   blas_free(tail_r_true_r);
1648 }
1649 
BLAS_ztbsv_d_testgen(int norm,enum blas_order_type order,enum blas_uplo_type uplo,enum blas_trans_type trans,enum blas_diag_type diag,int n,int k,int randomize,void * alpha,int alpha_flag,double * T,int ldt,void * x,int * seed,double * head_r_true,double * tail_r_true,int row,enum blas_prec_type prec)1650 void BLAS_ztbsv_d_testgen(int norm, enum blas_order_type order,
1651 			  enum blas_uplo_type uplo,
1652 			  enum blas_trans_type trans,
1653 			  enum blas_diag_type diag, int n, int k,
1654 			  int randomize, void *alpha, int alpha_flag,
1655 			  double *T, int ldt, void *x, int *seed,
1656 			  double *head_r_true, double *tail_r_true, int row,
1657 			  enum blas_prec_type prec)
1658 
1659 /*
1660  * Purpose
1661  * =======
1662  *
1663  * Generates alpha, x and T, where T is a triangular Banded matrix;
1664  *      and computes r_true.
1665  *
1666  * Arguments
1667  * =========
1668  *
1669  * norm         (input) blas_norm_type
1670  *
1671  * order        (input) blas_order_type
1672  *              Order of T; row or column major
1673  *
1674  * uplo         (input) blas_uplo_type
1675  *              Whether T is upper or lower
1676  *
1677  * trans        (input) blas_trans_type
1678  *              No trans, trans, conj trans
1679  *
1680  * diag         (input) blas_diag_type
1681  *              non unit, unit
1682  *
1683  * n            (input) int
1684  *              Dimension of T and the length of vector x
1685  *
1686  * k            (input) int
1687  *              Number of super(sub)diagonals of T
1688  *
1689  * alpha        (input/output) void*
1690  *              If alpha_flag = 1, alpha is input.
1691  *              If alpha_flag = 0, alpha is output.
1692  *
1693  * alpha_flag   (input) int
1694  *              = 0 : alpha is free, and is output.
1695  *              = 1 : alpha is fixed on input.
1696  *
1697  * T            (output) double*
1698  *
1699  * x            (input/output) void*
1700  *
1701  * seed         (input/output) int
1702  *
1703  * head_r_true     (output) double*
1704  *              The leading part of the truth in double-double.
1705  *
1706  * tail_r_true     (output) double*
1707  *              The trailing part of the truth in double-double.
1708  *
1709  * row          (input) int
1710  *              The true row being generated
1711  *
1712  * prec         (input) blas_prec_type
1713  *              single, double, or extra precision
1714  *
1715  */
1716 {
1717   double *x_i = (double *) x;
1718   double *alpha_i = (double *) alpha;
1719   double *T_i = T;
1720   double alpha_r;
1721   double *x_r;
1722   double *head_r_true_r, *tail_r_true_r;
1723   int i, inc = 2;
1724 
1725   x_r = (double *) blas_malloc(n * sizeof(double));
1726   if (n > 0 && x_r == NULL) {
1727     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1728   }
1729   head_r_true_r = (double *) blas_malloc(n * sizeof(double));
1730   tail_r_true_r = (double *) blas_malloc(n * sizeof(double));
1731   if (n > 0 && (head_r_true_r == NULL || tail_r_true_r == NULL)) {
1732     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1733   }
1734 
1735   if (alpha_flag == 1) {
1736     alpha_r = alpha_i[0];
1737   }
1738 
1739   BLAS_dtbsv_testgen(norm, order, uplo, trans, diag, n, k,
1740 		     randomize, &alpha_r,
1741 		     alpha_flag, T_i, ldt, x_r, seed, head_r_true_r,
1742 		     tail_r_true_r, row, prec);
1743 
1744   alpha_i[0] = alpha_r;
1745   alpha_i[1] = alpha_r;
1746 
1747   for (i = 0; i < n; i++) {
1748     x_i[i * inc] = 0.0;
1749     x_i[i * inc + 1] = x_r[i];
1750 
1751     head_r_true[i * inc] = -head_r_true_r[i];
1752     head_r_true[i * inc + 1] = head_r_true_r[i];
1753     tail_r_true[i * inc] = -tail_r_true_r[i];
1754     tail_r_true[i * inc + 1] = tail_r_true_r[i];
1755   }
1756 
1757   blas_free(x_r);
1758   blas_free(head_r_true_r);
1759   blas_free(tail_r_true_r);
1760 }
1761