1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <math.h>
4 #include "blas_extended.h"
5 #include "blas_extended_private.h"
6 #include "blas_extended_test.h"
7
8
9
10
do_test_dtrsv_s(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)11 double do_test_dtrsv_s(int n,
12 int ntests,
13 int *seed,
14 double thresh,
15 int debug,
16 float test_prob,
17 double *min_ratio, int *num_bad_ratio, int *num_tests)
18
19 /*
20 * Purpose
21 * =======
22 *
23 * Runs a series of tests on TRSV.
24 *
25 * Arguments
26 * =========
27 *
28 * n (input) int
29 * The size of vector being tested
30 *
31 * ntests (input) int
32 * The number of tests to run for each set of attributes.
33 *
34 * seed (input/output) int
35 * The seed for the random number generator used in testgen().
36 *
37 * thresh (input) double
38 * When the ratio returned from test() exceeds the specified
39 * threshold, the current size, r_true, r_comp, and ratio will be
40 * printed. (Since ratio is supposed to be O(1), we can set thresh
41 * to ~10.)
42 *
43 * debug (input) int
44 * If debug=3, print summary
45 * If debug=2, print summary only if the number of bad ratios > 0
46 * If debug=1, print complete info if tests fail
47 * If debug=0, return max ratio
48 *
49 * test_prob (input) float
50 * The specified test will be performed only if the generated
51 * random exceeds this threshold.
52 *
53 * min_ratio (output) double
54 * The minimum ratio
55 *
56 * num_bad_ratio (output) int
57 * The number of tests fail; they are above the threshold.
58 *
59 * num_tests (output) int
60 * The number of tests is being performed.
61 *
62 * Return value
63 * ============
64 *
65 * The maximum ratio if run successfully, otherwise return -1
66 *
67 * Code structure
68 * ==============
69 *
70 * debug loop -- if debug is one, the first loop computes the max ratio
71 * -- and the last(second) loop outputs debugging information,
72 * -- if the test fail and its ratio > 0.5 * max ratio.
73 * -- if debug is zero, the loop is executed once
74 * alpha loop -- varying alpha: 0, 1, or random
75 *
76 * norm loop -- varying norm: near undeflow, near one, or
77 * -- near overflow
78 * order loop -- varying order type: rowmajor or colmajor
79 * uplo loop -- varying uplo type: upper or lower
80 * trans loop -- varying trans type: no trans, trans, and conj trans
81 * diag loop -- varying diag type: non unit, and unit
82 * numtest loop -- how many times the test is perform with
83 * -- above set of attributes
84 * lda loop -- varying lda: n, n+1, and 2n
85 * incx loop -- varying incx: -2, -1, 1, 2
86 */
87 {
88 /* function name */
89 const char fname[] = "BLAS_dtrsv_s";
90
91 /* max number of debug lines to print */
92 const int max_print = 3;
93
94 /* Variables in the "x_val" form are loop vars for corresponding
95 variables */
96 int i; /* iterate through the repeating tests */
97 int j; /* multipurpose counters */
98 int ix; /* use to index x */
99 int lda_val, lda = 0; /* for testing different values for lda */
100 int incx_val, incx; /* for testing different inc values */
101 int incx_gen = 1; /* 1 if real, 2 if complex */
102 int d_count; /* counter for debug */
103 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
104 int p_count; /* counter for the number of debug lines printed */
105 int tot_tests; /* total number of tests to be done */
106 int norm; /* input values of near underflow/one/overflow */
107 double ratio_max; /* the current maximum ratio */
108 double ratio_min; /* the current minimum ratio */
109 double *ratios; /* a temporary variable for calculating ratio */
110 double ratio = 0.0; /* the per-use test ratio from test() */
111 int bad_ratios = 0; /* the number of ratios over the threshold */
112 double eps_int; /* the internal epsilon expected--2^(-24) for float */
113 double un_int; /* the internal underflow threshold */
114 double alpha;
115 double beta;
116 float *T;
117 double *x;
118 double *x_gen;
119 float *temp; /* use for calculating ratio */
120
121 /* x_gen and y_gen are used to store vectors generated by testgen.
122 they eventually are copied back to x and y */
123
124
125 /* the true r calculated by testgen(), in double-double */
126 double *head_r_true, *tail_r_true;
127 int alpha_val;
128 int alpha_flag = 0, beta_flag = 0;
129 int order_val;
130 enum blas_order_type order_type = 0;
131
132 enum blas_prec_type prec = 0;
133 int uplo_val;
134 enum blas_uplo_type uplo_type = 0;
135 int trans_val;
136 enum blas_trans_type trans_type = 0;
137 int diag_val;
138 enum blas_diag_type diag_type = 0;
139 int row;
140 int saved_seed; /* for saving the original seed */
141 int count, old_count = -1; /* use for counting the number of testgen calls * 2 */
142 FPU_FIX_DECL;
143
144 /* test for bad arguments */
145 if (n < 0 || ntests < 0)
146 BLAS_error(fname, 0, 0, NULL);
147
148 /* if there is nothing to test, return all zero */
149 if (n == 0 || ntests == 0) {
150 *min_ratio = 0.0;
151 *num_bad_ratio = 0;
152 *num_tests = 0;
153 return 0.0;
154 }
155
156 FPU_FIX_START;
157
158
159
160 /* get space for calculation */
161 x = (double *) blas_malloc(n * 2 * sizeof(double));
162 if (n * 2 > 0 && x == NULL) {
163 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
164 }
165 x_gen = (double *) blas_malloc(n * sizeof(double));
166 if (n > 0 && x_gen == NULL) {
167 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
168 }
169 T = (float *) blas_malloc(4 * n * n * sizeof(float));
170 if (4 * n * n > 0 && T == NULL) {
171 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
172 }
173 temp = (float *) blas_malloc(n * sizeof(float));
174 if (n > 0 && temp == NULL) {
175 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
176 }
177 head_r_true = (double *) blas_malloc(n * sizeof(double));
178 tail_r_true = (double *) blas_malloc(n * sizeof(double));
179 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
180 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
181 }
182 ratios = (double *) blas_malloc(n * sizeof(double));
183 if (n > 0 && ratios == NULL) {
184 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
185 }
186
187 /* initialization */
188 saved_seed = *seed;
189 ratio_min = 1e308;
190 ratio_max = 0.0;
191 tot_tests = 0;
192 p_count = 0;
193 count = 0;
194 find_max_ratio = 0;
195 beta = 0.0;
196 beta_flag = 1;
197 if (debug == 3)
198 find_max_ratio = 1;
199
200 /* The debug iteration:
201 If debug=1, then will execute the iteration twice. First, compute the
202 max ratio. Second, print info if ratio > (50% * ratio_max). */
203 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
204 bad_ratios = 0; /* set to zero */
205
206 if ((debug == 3) && (d_count == find_max_ratio))
207 *seed = saved_seed; /* restore the original seed */
208
209 /* varying alpha */
210 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
211 alpha_flag = 0;
212 switch (alpha_val) {
213 case 0:
214 alpha = 0.0;
215 alpha_flag = 1;
216 break;
217 case 1:
218 alpha = 1.0;
219 alpha_flag = 1;
220 break;
221 }
222
223
224 eps_int = power(2, -BITS_D);
225 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
226 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
227 prec = blas_prec_double;
228
229 /* values near underflow, 1, or overflow */
230 for (norm = -1; norm <= 1; norm++) {
231
232 /* row or col major */
233 for (order_val = 0; order_val < 2; order_val++) {
234 switch (order_val) {
235 case 0:
236 order_type = blas_rowmajor;
237 break;
238 case 1:
239 order_type = blas_colmajor;
240 break;
241 }
242
243 /* upper or lower */
244 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
245 switch (uplo_val) {
246 case 0:
247 uplo_type = blas_upper;
248 break;
249 case 1:
250 uplo_type = blas_lower;
251 break;
252 }
253
254 /* no trans, trans, or conj trans */
255 for (trans_val = 0; trans_val < 3; trans_val++) {
256 switch (trans_val) {
257 case 0:
258 trans_type = blas_no_trans;
259 break;
260 case 1:
261 trans_type = blas_trans;
262 break;
263 case 2:
264 trans_type = blas_conj_trans;
265 break;
266 }
267
268 /* non_unit_diag, unit_diag */
269 for (diag_val = 0; diag_val < 2; diag_val++) {
270 switch (diag_val) {
271 case 0:
272 diag_type = blas_non_unit_diag;
273 break;
274 case 1:
275 diag_type = blas_unit_diag;
276 break;
277 }
278
279 /* number of tests */
280 for (i = 0; i < ntests; i++) {
281
282 for (lda_val = 0; lda_val < 3; lda_val++) {
283 switch (lda_val) {
284 case 0:
285 lda = n;
286 break;
287 case 1:
288 lda = n + 1;
289 break;
290 case 2:
291 lda = 2 * n;
292 break;
293 }
294
295 /* For the sake of speed, we throw out this case at random */
296 if (xrand(seed) >= test_prob)
297 continue;
298
299 for (row = 0; row < n; row++) {
300 BLAS_dtrsv_s_testgen(norm, order_type, uplo_type,
301 trans_type, diag_type, n,
302 &alpha, alpha_flag, T, lda, x_gen,
303 seed, head_r_true, tail_r_true,
304 row, prec);
305
306 count++;
307
308 /* varying incx */
309 for (incx_val = -2; incx_val <= 2; incx_val++) {
310 if (incx_val == 0)
311 continue;
312
313
314 /* setting incx */
315 incx = incx_val;
316
317
318 /* set x starting index */
319 ix = 0;
320 if (incx < 0)
321 ix = -(n - 1) * incx;
322
323 /* copy x_gen to x */
324 for (j = 0; j < n * incx_gen; j += incx_gen) {
325 x[ix] = x_gen[j];
326
327 ix += incx;
328 }
329
330 /* call BLAS_dtrsv_s */
331 FPU_FIX_STOP;
332 BLAS_dtrsv_s(order_type, uplo_type, trans_type,
333 diag_type, n, alpha, T, lda, x,
334 incx_val);
335 FPU_FIX_START;
336
337 ix = 0;
338 if (incx < 0)
339 ix = -(n - 1) * incx;
340
341 /* computing the ratio */
342 for (j = 0; j < n; j++) {
343 if (j == row) {
344 int minus_one = -1.0;
345 /* copy row j of T to temp */
346 str_copy_row(order_type, uplo_type, trans_type, n,
347 T, lda, temp, j);
348
349 test_BLAS_ddot_d_s(n, blas_no_conj,
350 minus_one, alpha,
351 x_gen[j * incx_gen],
352 x[ix],
353 head_r_true[j * incx_gen],
354 tail_r_true[j * incx_gen], x,
355 incx_val, temp, 1, eps_int,
356 un_int, &ratios[j]);
357 } else {
358 double eps_out = power(2, -BITS_D);
359
360 double tmp =
361 fabs((x[ix] - head_r_true[j]) - tail_r_true[j]);
362
363 if (head_r_true[j] == 0.0)
364 ratios[j] = 0.0;
365 else
366 ratios[j] =
367 tmp / (head_r_true[j] * 2.0 * eps_out);
368 }
369
370 /* take the max ratio */
371 if (j == 0) {
372 ratio = ratios[0];
373 } else if (!(ratios[j] <= ratio)) {
374 /* The !<= test causes NaN error to be detected.
375 Note that (NaN > thresh) is always false. */
376 ratio = ratios[j];
377 }
378 ix += incx;
379 }
380
381 /* Increase the number of bad ratio, if the ratio
382 is bigger than the threshold.
383 The !<= below causes NaN error to be detected.
384 Note that (NaN > thresh) is always false. */
385 if (!(ratio <= thresh)) {
386
387 bad_ratios++;
388
389 if ((debug == 3) && /* print only when debug is on */
390 (count != old_count) && /* print if old vector is different
391 from the current one */
392 (d_count == find_max_ratio) &&
393 (p_count <= max_print) &&
394 (ratio > 0.5 * ratio_max)) {
395 p_count++;
396 old_count = count;
397
398 printf
399 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
400 fname, n, ntests, thresh);
401
402 /* Print test info */
403 switch (prec) {
404 case blas_prec_single:
405 printf("single ");
406 break;
407 case blas_prec_double:
408 printf("double ");
409 break;
410 case blas_prec_indigenous:
411 printf("indigenous ");
412 break;
413 case blas_prec_extra:
414 printf("extra ");
415 break;
416 }
417 switch (norm) {
418 case -1:
419 printf("near_underflow ");
420 break;
421 case 0:
422 printf("near_one ");
423 break;
424 case 1:
425 printf("near_overflow ");
426 break;
427 }
428 switch (order_type) {
429 case blas_rowmajor:
430 printf("row_major ");
431 break;
432 case blas_colmajor:
433 printf("col_major ");
434 break;
435 }
436 switch (uplo_type) {
437 case blas_upper:
438 printf("upper ");
439 break;
440 case blas_lower:
441 printf("lower ");
442 break;
443 }
444 switch (trans_type) {
445 case blas_no_trans:
446 printf("no_trans ");
447 break;
448 case blas_trans:
449 printf("trans ");
450 break;
451 case blas_conj_trans:
452 printf("conj_trans ");
453 break;
454 }
455 switch (diag_type) {
456 case blas_non_unit_diag:
457 printf("non_unit_diag ");
458 break;
459 case blas_unit_diag:
460 printf("unit_diag ");
461 break;
462 }
463
464 printf("row=%d, lda=%d, incx=%d:\n", row, lda,
465 incx);
466
467 ix = 0;
468 if (incx < 0)
469 ix = -(n - 1) * incx;
470
471 printf(" T=");
472 for (j = 0; j < n; j++) {
473 /* copy row j of T to temp */
474 str_copy_row(order_type, uplo_type, trans_type,
475 n, T, lda, temp, j);
476
477 if (j > 0)
478 printf(" ");
479 sprint_vector(temp, n, 1, NULL);
480 }
481
482 ix = 0;
483 if (incx < 0)
484 ix = -(n - 1) * incx;
485
486 for (j = 0; j < n; j++) {
487 printf(" ");
488 printf("x[%d] = ", j * incx_gen);
489 printf("%24.16e", x_gen[j * incx_gen]);
490 printf("; ");
491 printf("x_final[%d] = ", ix);
492 printf("%24.16e", x[ix]);
493 printf("\n");
494 ix += incx;
495 }
496
497 printf(" ");
498 printf("alpha = ");
499 printf("%24.16e", alpha);
500 printf("; ");
501 printf("\n");
502 for (j = 0; j < n; j++) {
503 if (j == row)
504 printf(" =>");
505 else
506 printf(" ");
507 printf("[%24.16e, %24.16e]",
508 head_r_true[j * incx_gen],
509 tail_r_true[j * incx_gen]);
510 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
511 }
512 printf(" ratio=%.4e\n", ratio);
513 }
514 if (bad_ratios >= MAX_BAD_TESTS) {
515 printf("\ntoo many failures, exiting....");
516 printf("\nTesting and compilation");
517 printf(" are incomplete\n\n");
518 goto end;
519 }
520 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
521 printf("\nFlagrant ratio %e, exiting...", ratio);
522 printf("\nTesting and compilation");
523 printf(" are incomplete\n\n");
524 goto end;
525 }
526 }
527
528 if (d_count == 0) {
529 if (ratio > ratio_max)
530 ratio_max = ratio;
531
532 if (ratio != 0.0 && ratio < ratio_min)
533 ratio_min = ratio;
534
535 tot_tests++;
536 }
537 } /* incx */
538 } /* row */
539 } /* lda */
540 } /* numtests */
541 } /* diag */
542 } /* trans */
543 } /* uplo */
544 } /* order */
545 } /* norm */
546
547 } /* alpha */
548 } /* debug */
549
550 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
551 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
552 fname, n, ntests, thresh);
553 printf
554 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
555 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
556 ratio_min, ratio_max);
557 }
558
559 end:
560 FPU_FIX_STOP;
561
562 blas_free(x);
563 blas_free(x_gen);
564 blas_free(temp);
565 blas_free(T);
566 blas_free(head_r_true);
567 blas_free(tail_r_true);
568 blas_free(ratios);
569
570 *min_ratio = ratio_min;
571 *num_bad_ratio = bad_ratios;
572 *num_tests = tot_tests;
573 return ratio_max;
574 } /* end of do_test_dtrsv_s */
575
do_test_ztrsv_c(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)576 double do_test_ztrsv_c(int n,
577 int ntests,
578 int *seed,
579 double thresh,
580 int debug,
581 float test_prob,
582 double *min_ratio, int *num_bad_ratio, int *num_tests)
583
584 /*
585 * Purpose
586 * =======
587 *
588 * Runs a series of tests on TRSV.
589 *
590 * Arguments
591 * =========
592 *
593 * n (input) int
594 * The size of vector being tested
595 *
596 * ntests (input) int
597 * The number of tests to run for each set of attributes.
598 *
599 * seed (input/output) int
600 * The seed for the random number generator used in testgen().
601 *
602 * thresh (input) double
603 * When the ratio returned from test() exceeds the specified
604 * threshold, the current size, r_true, r_comp, and ratio will be
605 * printed. (Since ratio is supposed to be O(1), we can set thresh
606 * to ~10.)
607 *
608 * debug (input) int
609 * If debug=3, print summary
610 * If debug=2, print summary only if the number of bad ratios > 0
611 * If debug=1, print complete info if tests fail
612 * If debug=0, return max ratio
613 *
614 * test_prob (input) float
615 * The specified test will be performed only if the generated
616 * random exceeds this threshold.
617 *
618 * min_ratio (output) double
619 * The minimum ratio
620 *
621 * num_bad_ratio (output) int
622 * The number of tests fail; they are above the threshold.
623 *
624 * num_tests (output) int
625 * The number of tests is being performed.
626 *
627 * Return value
628 * ============
629 *
630 * The maximum ratio if run successfully, otherwise return -1
631 *
632 * Code structure
633 * ==============
634 *
635 * debug loop -- if debug is one, the first loop computes the max ratio
636 * -- and the last(second) loop outputs debugging information,
637 * -- if the test fail and its ratio > 0.5 * max ratio.
638 * -- if debug is zero, the loop is executed once
639 * alpha loop -- varying alpha: 0, 1, or random
640 *
641 * norm loop -- varying norm: near undeflow, near one, or
642 * -- near overflow
643 * order loop -- varying order type: rowmajor or colmajor
644 * uplo loop -- varying uplo type: upper or lower
645 * trans loop -- varying trans type: no trans, trans, and conj trans
646 * diag loop -- varying diag type: non unit, and unit
647 * numtest loop -- how many times the test is perform with
648 * -- above set of attributes
649 * lda loop -- varying lda: n, n+1, and 2n
650 * incx loop -- varying incx: -2, -1, 1, 2
651 */
652 {
653 /* function name */
654 const char fname[] = "BLAS_ztrsv_c";
655
656 /* max number of debug lines to print */
657 const int max_print = 3;
658
659 /* Variables in the "x_val" form are loop vars for corresponding
660 variables */
661 int i; /* iterate through the repeating tests */
662 int j; /* multipurpose counters */
663 int ix; /* use to index x */
664 int lda_val, lda = 0; /* for testing different values for lda */
665 int incx_val, incx; /* for testing different inc values */
666 int incx_gen = 1; /* 1 if real, 2 if complex */
667 int d_count; /* counter for debug */
668 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
669 int p_count; /* counter for the number of debug lines printed */
670 int tot_tests; /* total number of tests to be done */
671 int norm; /* input values of near underflow/one/overflow */
672 double ratio_max; /* the current maximum ratio */
673 double ratio_min; /* the current minimum ratio */
674 double *ratios; /* a temporary variable for calculating ratio */
675 double ratio = 0.0; /* the per-use test ratio from test() */
676 int bad_ratios = 0; /* the number of ratios over the threshold */
677 double eps_int; /* the internal epsilon expected--2^(-24) for float */
678 double un_int; /* the internal underflow threshold */
679 double alpha[2];
680 double beta[2];
681 float *T;
682 double *x;
683 double *x_gen;
684 float *temp; /* use for calculating ratio */
685
686 /* x_gen and y_gen are used to store vectors generated by testgen.
687 they eventually are copied back to x and y */
688
689
690 /* the true r calculated by testgen(), in double-double */
691 double *head_r_true, *tail_r_true;
692
693 int alpha_val;
694 int alpha_flag = 0, beta_flag = 0;
695 int order_val;
696 enum blas_order_type order_type = 0;
697
698 enum blas_prec_type prec = 0;
699 int uplo_val;
700 enum blas_uplo_type uplo_type = 0;
701 int trans_val;
702 enum blas_trans_type trans_type = 0;
703 int diag_val;
704 enum blas_diag_type diag_type = 0;
705 int row;
706 int saved_seed; /* for saving the original seed */
707 int count, old_count = -1; /* use for counting the number of testgen calls * 2 */
708 FPU_FIX_DECL;
709
710 /* test for bad arguments */
711 if (n < 0 || ntests < 0)
712 BLAS_error(fname, 0, 0, NULL);
713
714 /* if there is nothing to test, return all zero */
715 if (n == 0 || ntests == 0) {
716 *min_ratio = 0.0;
717 *num_bad_ratio = 0;
718 *num_tests = 0;
719 return 0.0;
720 }
721
722 FPU_FIX_START;
723
724 incx_gen *= 2;
725
726 /* get space for calculation */
727 x = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
728 if (n * 2 > 0 && x == NULL) {
729 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
730 }
731 x_gen = (double *) blas_malloc(n * sizeof(double) * 2);
732 if (n > 0 && x_gen == NULL) {
733 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
734 }
735 T = (float *) blas_malloc(4 * n * n * sizeof(float) * 2);
736 if (4 * n * n > 0 && T == NULL) {
737 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
738 }
739 temp = (float *) blas_malloc(n * sizeof(float) * 2);
740 if (n > 0 && temp == NULL) {
741 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
742 }
743 head_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
744 tail_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
745 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
746 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
747 }
748 ratios = (double *) blas_malloc(n * sizeof(double));
749 if (n > 0 && ratios == NULL) {
750 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
751 }
752
753 /* initialization */
754 saved_seed = *seed;
755 ratio_min = 1e308;
756 ratio_max = 0.0;
757 tot_tests = 0;
758 p_count = 0;
759 count = 0;
760 find_max_ratio = 0;
761 beta[0] = beta[1] = 0.0;
762 beta_flag = 1;
763 if (debug == 3)
764 find_max_ratio = 1;
765
766 /* The debug iteration:
767 If debug=1, then will execute the iteration twice. First, compute the
768 max ratio. Second, print info if ratio > (50% * ratio_max). */
769 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
770 bad_ratios = 0; /* set to zero */
771
772 if ((debug == 3) && (d_count == find_max_ratio))
773 *seed = saved_seed; /* restore the original seed */
774
775 /* varying alpha */
776 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
777 alpha_flag = 0;
778 switch (alpha_val) {
779 case 0:
780 alpha[0] = alpha[1] = 0.0;
781 alpha_flag = 1;
782 break;
783 case 1:
784 alpha[0] = 1.0;
785 alpha[1] = 0.0;
786 alpha_flag = 1;
787 break;
788 }
789
790
791 eps_int = power(2, -BITS_D);
792 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
793 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
794 prec = blas_prec_double;
795
796 /* values near underflow, 1, or overflow */
797 for (norm = -1; norm <= 1; norm++) {
798
799 /* row or col major */
800 for (order_val = 0; order_val < 2; order_val++) {
801 switch (order_val) {
802 case 0:
803 order_type = blas_rowmajor;
804 break;
805 case 1:
806 order_type = blas_colmajor;
807 break;
808 }
809
810 /* upper or lower */
811 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
812 switch (uplo_val) {
813 case 0:
814 uplo_type = blas_upper;
815 break;
816 case 1:
817 uplo_type = blas_lower;
818 break;
819 }
820
821 /* no trans, trans, or conj trans */
822 for (trans_val = 0; trans_val < 3; trans_val++) {
823 switch (trans_val) {
824 case 0:
825 trans_type = blas_no_trans;
826 break;
827 case 1:
828 trans_type = blas_trans;
829 break;
830 case 2:
831 trans_type = blas_conj_trans;
832 break;
833 }
834
835 /* non_unit_diag, unit_diag */
836 for (diag_val = 0; diag_val < 2; diag_val++) {
837 switch (diag_val) {
838 case 0:
839 diag_type = blas_non_unit_diag;
840 break;
841 case 1:
842 diag_type = blas_unit_diag;
843 break;
844 }
845
846 /* number of tests */
847 for (i = 0; i < ntests; i++) {
848
849 for (lda_val = 0; lda_val < 3; lda_val++) {
850 switch (lda_val) {
851 case 0:
852 lda = n;
853 break;
854 case 1:
855 lda = n + 1;
856 break;
857 case 2:
858 lda = 2 * n;
859 break;
860 }
861
862 /* For the sake of speed, we throw out this case at random */
863 if (xrand(seed) >= test_prob)
864 continue;
865
866 for (row = 0; row < n; row++) {
867 BLAS_ztrsv_c_testgen(norm, order_type, uplo_type,
868 trans_type, diag_type, n,
869 &alpha, alpha_flag, T, lda, x_gen,
870 seed, head_r_true, tail_r_true,
871 row, prec);
872
873 count++;
874
875 /* varying incx */
876 for (incx_val = -2; incx_val <= 2; incx_val++) {
877 if (incx_val == 0)
878 continue;
879
880
881 /* setting incx */
882 incx = incx_val;
883 incx *= 2;
884
885 /* set x starting index */
886 ix = 0;
887 if (incx < 0)
888 ix = -(n - 1) * incx;
889
890 /* copy x_gen to x */
891 for (j = 0; j < n * incx_gen; j += incx_gen) {
892 x[ix] = x_gen[j];
893 x[ix + 1] = x_gen[j + 1];
894
895 ix += incx;
896 }
897
898 /* call BLAS_ztrsv_c */
899 FPU_FIX_STOP;
900 BLAS_ztrsv_c(order_type, uplo_type, trans_type,
901 diag_type, n, alpha, T, lda, x,
902 incx_val);
903 FPU_FIX_START;
904
905 ix = 0;
906 if (incx < 0)
907 ix = -(n - 1) * incx;
908
909 /* computing the ratio */
910 for (j = 0; j < n; j++) {
911 if (j == row) {
912 double minus_one[2] = { -1.0, 0.0 };
913 /* copy row j of T to temp */
914 ctr_copy_row(order_type, uplo_type, trans_type, n,
915 T, lda, temp, j);
916
917 test_BLAS_zdot_z_c(n, blas_no_conj,
918 minus_one, alpha,
919 &x_gen[j * incx_gen],
920 &x[ix],
921 &head_r_true[j * incx_gen],
922 &tail_r_true[j * incx_gen], x,
923 incx_val, temp, 1, eps_int,
924 un_int, &ratios[j]);
925 } else {
926 double eps_out = power(2, -BITS_D);
927
928 double tmp =
929 sqrt(((x[ix] - head_r_true[j * incx_gen]) -
930 tail_r_true[j * incx_gen]) * ((x[ix] -
931 head_r_true
932 [j *
933 incx_gen])
934 -
935 tail_r_true
936 [j *
937 incx_gen])
938 +
939 ((x[ix + 1] -
940 head_r_true[j * incx_gen + 1]) -
941 tail_r_true[j * incx_gen +
942 1]) * ((x[ix + 1] -
943 head_r_true[j *
944 incx_gen +
945 1]) -
946 tail_r_true[j *
947 incx_gen +
948 1]));
949
950 if (head_r_true[j * incx_gen] == 0.0
951 && head_r_true[j * incx_gen + 1] == 0.0)
952 ratios[j] = 0.0;
953 else
954 ratios[j] =
955 tmp /
956 (sqrt
957 (head_r_true[j * incx_gen] *
958 head_r_true[j * incx_gen] +
959 head_r_true[j * incx_gen +
960 1] * head_r_true[j * incx_gen +
961 1]) * 8.0 *
962 sqrt(2.0) * eps_out);
963 }
964
965 /* take the max ratio */
966 if (j == 0) {
967 ratio = ratios[0];
968 } else if (!(ratios[j] <= ratio)) {
969 /* The !<= test causes NaN error to be detected.
970 Note that (NaN > thresh) is always false. */
971 ratio = ratios[j];
972 }
973 ix += incx;
974 }
975
976 /* Increase the number of bad ratio, if the ratio
977 is bigger than the threshold.
978 The !<= below causes NaN error to be detected.
979 Note that (NaN > thresh) is always false. */
980 if (!(ratio <= thresh)) {
981
982 bad_ratios++;
983
984 if ((debug == 3) && /* print only when debug is on */
985 (count != old_count) && /* print if old vector is different
986 from the current one */
987 (d_count == find_max_ratio) &&
988 (p_count <= max_print) &&
989 (ratio > 0.5 * ratio_max)) {
990 p_count++;
991 old_count = count;
992
993 printf
994 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
995 fname, n, ntests, thresh);
996
997 /* Print test info */
998 switch (prec) {
999 case blas_prec_single:
1000 printf("single ");
1001 break;
1002 case blas_prec_double:
1003 printf("double ");
1004 break;
1005 case blas_prec_indigenous:
1006 printf("indigenous ");
1007 break;
1008 case blas_prec_extra:
1009 printf("extra ");
1010 break;
1011 }
1012 switch (norm) {
1013 case -1:
1014 printf("near_underflow ");
1015 break;
1016 case 0:
1017 printf("near_one ");
1018 break;
1019 case 1:
1020 printf("near_overflow ");
1021 break;
1022 }
1023 switch (order_type) {
1024 case blas_rowmajor:
1025 printf("row_major ");
1026 break;
1027 case blas_colmajor:
1028 printf("col_major ");
1029 break;
1030 }
1031 switch (uplo_type) {
1032 case blas_upper:
1033 printf("upper ");
1034 break;
1035 case blas_lower:
1036 printf("lower ");
1037 break;
1038 }
1039 switch (trans_type) {
1040 case blas_no_trans:
1041 printf("no_trans ");
1042 break;
1043 case blas_trans:
1044 printf("trans ");
1045 break;
1046 case blas_conj_trans:
1047 printf("conj_trans ");
1048 break;
1049 }
1050 switch (diag_type) {
1051 case blas_non_unit_diag:
1052 printf("non_unit_diag ");
1053 break;
1054 case blas_unit_diag:
1055 printf("unit_diag ");
1056 break;
1057 }
1058
1059 printf("row=%d, lda=%d, incx=%d:\n", row, lda,
1060 incx);
1061
1062 ix = 0;
1063 if (incx < 0)
1064 ix = -(n - 1) * incx;
1065
1066 printf(" T=");
1067 for (j = 0; j < n; j++) {
1068 /* copy row j of T to temp */
1069 ctr_copy_row(order_type, uplo_type, trans_type,
1070 n, T, lda, temp, j);
1071
1072 if (j > 0)
1073 printf(" ");
1074 cprint_vector(temp, n, 1, NULL);
1075 }
1076
1077 ix = 0;
1078 if (incx < 0)
1079 ix = -(n - 1) * incx;
1080
1081 for (j = 0; j < n; j++) {
1082 printf(" ");
1083 printf("x[%d] = ", j * incx_gen);
1084 printf("(%24.16e, %24.16e)",
1085 x_gen[j * incx_gen],
1086 x_gen[j * incx_gen + 1]);
1087 printf("; ");
1088 printf("x_final[%d] = ", ix);
1089 printf("(%24.16e, %24.16e)", x[ix], x[ix + 1]);
1090 printf("\n");
1091 ix += incx;
1092 }
1093
1094 printf(" ");
1095 printf("alpha = ");
1096 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
1097 printf("; ");
1098 printf("\n");
1099 for (j = 0; j < n; j++) {
1100 if (j == row)
1101 printf(" =>");
1102 else
1103 printf(" ");
1104 printf
1105 ("([%24.16e %24.16e], [%24.16e %24.16e])",
1106 head_r_true[j * incx_gen],
1107 tail_r_true[j * incx_gen],
1108 head_r_true[j * incx_gen + 1],
1109 tail_r_true[j * incx_gen + 1]);
1110 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
1111 }
1112 printf(" ratio=%.4e\n", ratio);
1113 }
1114 if (bad_ratios >= MAX_BAD_TESTS) {
1115 printf("\ntoo many failures, exiting....");
1116 printf("\nTesting and compilation");
1117 printf(" are incomplete\n\n");
1118 goto end;
1119 }
1120 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
1121 printf("\nFlagrant ratio %e, exiting...", ratio);
1122 printf("\nTesting and compilation");
1123 printf(" are incomplete\n\n");
1124 goto end;
1125 }
1126 }
1127
1128 if (d_count == 0) {
1129 if (ratio > ratio_max)
1130 ratio_max = ratio;
1131
1132 if (ratio != 0.0 && ratio < ratio_min)
1133 ratio_min = ratio;
1134
1135 tot_tests++;
1136 }
1137 } /* incx */
1138 } /* row */
1139 } /* lda */
1140 } /* numtests */
1141 } /* diag */
1142 } /* trans */
1143 } /* uplo */
1144 } /* order */
1145 } /* norm */
1146
1147 } /* alpha */
1148 } /* debug */
1149
1150 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
1151 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
1152 fname, n, ntests, thresh);
1153 printf
1154 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
1155 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
1156 ratio_min, ratio_max);
1157 }
1158
1159 end:
1160 FPU_FIX_STOP;
1161
1162 blas_free(x);
1163 blas_free(x_gen);
1164 blas_free(temp);
1165 blas_free(T);
1166 blas_free(head_r_true);
1167 blas_free(tail_r_true);
1168 blas_free(ratios);
1169
1170 *min_ratio = ratio_min;
1171 *num_bad_ratio = bad_ratios;
1172 *num_tests = tot_tests;
1173 return ratio_max;
1174 } /* end of do_test_ztrsv_c */
1175
do_test_ctrsv_s(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)1176 double do_test_ctrsv_s(int n,
1177 int ntests,
1178 int *seed,
1179 double thresh,
1180 int debug,
1181 float test_prob,
1182 double *min_ratio, int *num_bad_ratio, int *num_tests)
1183
1184 /*
1185 * Purpose
1186 * =======
1187 *
1188 * Runs a series of tests on TRSV.
1189 *
1190 * Arguments
1191 * =========
1192 *
1193 * n (input) int
1194 * The size of vector being tested
1195 *
1196 * ntests (input) int
1197 * The number of tests to run for each set of attributes.
1198 *
1199 * seed (input/output) int
1200 * The seed for the random number generator used in testgen().
1201 *
1202 * thresh (input) double
1203 * When the ratio returned from test() exceeds the specified
1204 * threshold, the current size, r_true, r_comp, and ratio will be
1205 * printed. (Since ratio is supposed to be O(1), we can set thresh
1206 * to ~10.)
1207 *
1208 * debug (input) int
1209 * If debug=3, print summary
1210 * If debug=2, print summary only if the number of bad ratios > 0
1211 * If debug=1, print complete info if tests fail
1212 * If debug=0, return max ratio
1213 *
1214 * test_prob (input) float
1215 * The specified test will be performed only if the generated
1216 * random exceeds this threshold.
1217 *
1218 * min_ratio (output) double
1219 * The minimum ratio
1220 *
1221 * num_bad_ratio (output) int
1222 * The number of tests fail; they are above the threshold.
1223 *
1224 * num_tests (output) int
1225 * The number of tests is being performed.
1226 *
1227 * Return value
1228 * ============
1229 *
1230 * The maximum ratio if run successfully, otherwise return -1
1231 *
1232 * Code structure
1233 * ==============
1234 *
1235 * debug loop -- if debug is one, the first loop computes the max ratio
1236 * -- and the last(second) loop outputs debugging information,
1237 * -- if the test fail and its ratio > 0.5 * max ratio.
1238 * -- if debug is zero, the loop is executed once
1239 * alpha loop -- varying alpha: 0, 1, or random
1240 *
1241 * norm loop -- varying norm: near undeflow, near one, or
1242 * -- near overflow
1243 * order loop -- varying order type: rowmajor or colmajor
1244 * uplo loop -- varying uplo type: upper or lower
1245 * trans loop -- varying trans type: no trans, trans, and conj trans
1246 * diag loop -- varying diag type: non unit, and unit
1247 * numtest loop -- how many times the test is perform with
1248 * -- above set of attributes
1249 * lda loop -- varying lda: n, n+1, and 2n
1250 * incx loop -- varying incx: -2, -1, 1, 2
1251 */
1252 {
1253 /* function name */
1254 const char fname[] = "BLAS_ctrsv_s";
1255
1256 /* max number of debug lines to print */
1257 const int max_print = 3;
1258
1259 /* Variables in the "x_val" form are loop vars for corresponding
1260 variables */
1261 int i; /* iterate through the repeating tests */
1262 int j; /* multipurpose counters */
1263 int ix; /* use to index x */
1264 int lda_val, lda = 0; /* for testing different values for lda */
1265 int incx_val, incx; /* for testing different inc values */
1266 int incx_gen = 1; /* 1 if real, 2 if complex */
1267 int d_count; /* counter for debug */
1268 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
1269 int p_count; /* counter for the number of debug lines printed */
1270 int tot_tests; /* total number of tests to be done */
1271 int norm; /* input values of near underflow/one/overflow */
1272 double ratio_max; /* the current maximum ratio */
1273 double ratio_min; /* the current minimum ratio */
1274 double *ratios; /* a temporary variable for calculating ratio */
1275 double ratio = 0.0; /* the per-use test ratio from test() */
1276 int bad_ratios = 0; /* the number of ratios over the threshold */
1277 double eps_int; /* the internal epsilon expected--2^(-24) for float */
1278 double un_int; /* the internal underflow threshold */
1279 float alpha[2];
1280 float beta[2];
1281 float *T;
1282 float *x;
1283 float *x_gen;
1284 float *temp; /* use for calculating ratio */
1285
1286 /* x_gen and y_gen are used to store vectors generated by testgen.
1287 they eventually are copied back to x and y */
1288
1289
1290 /* the true r calculated by testgen(), in double-double */
1291 double *head_r_true, *tail_r_true;
1292
1293 int alpha_val;
1294 int alpha_flag = 0, beta_flag = 0;
1295 int order_val;
1296 enum blas_order_type order_type = 0;
1297
1298 enum blas_prec_type prec = 0;
1299 int uplo_val;
1300 enum blas_uplo_type uplo_type = 0;
1301 int trans_val;
1302 enum blas_trans_type trans_type = 0;
1303 int diag_val;
1304 enum blas_diag_type diag_type = 0;
1305 int row;
1306 int saved_seed; /* for saving the original seed */
1307 int count, old_count = -1; /* use for counting the number of testgen calls * 2 */
1308 FPU_FIX_DECL;
1309
1310 /* test for bad arguments */
1311 if (n < 0 || ntests < 0)
1312 BLAS_error(fname, 0, 0, NULL);
1313
1314 /* if there is nothing to test, return all zero */
1315 if (n == 0 || ntests == 0) {
1316 *min_ratio = 0.0;
1317 *num_bad_ratio = 0;
1318 *num_tests = 0;
1319 return 0.0;
1320 }
1321
1322 FPU_FIX_START;
1323
1324 incx_gen *= 2;
1325
1326 /* get space for calculation */
1327 x = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
1328 if (n * 2 > 0 && x == NULL) {
1329 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1330 }
1331 x_gen = (float *) blas_malloc(n * sizeof(float) * 2);
1332 if (n > 0 && x_gen == NULL) {
1333 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1334 }
1335 T = (float *) blas_malloc(4 * n * n * sizeof(float));
1336 if (4 * n * n > 0 && T == NULL) {
1337 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1338 }
1339 temp = (float *) blas_malloc(n * sizeof(float));
1340 if (n > 0 && temp == NULL) {
1341 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1342 }
1343 head_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
1344 tail_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
1345 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
1346 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1347 }
1348 ratios = (double *) blas_malloc(n * sizeof(double));
1349 if (n > 0 && ratios == NULL) {
1350 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1351 }
1352
1353 /* initialization */
1354 saved_seed = *seed;
1355 ratio_min = 1e308;
1356 ratio_max = 0.0;
1357 tot_tests = 0;
1358 p_count = 0;
1359 count = 0;
1360 find_max_ratio = 0;
1361 beta[0] = beta[1] = 0.0;
1362 beta_flag = 1;
1363 if (debug == 3)
1364 find_max_ratio = 1;
1365
1366 /* The debug iteration:
1367 If debug=1, then will execute the iteration twice. First, compute the
1368 max ratio. Second, print info if ratio > (50% * ratio_max). */
1369 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
1370 bad_ratios = 0; /* set to zero */
1371
1372 if ((debug == 3) && (d_count == find_max_ratio))
1373 *seed = saved_seed; /* restore the original seed */
1374
1375 /* varying alpha */
1376 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
1377 alpha_flag = 0;
1378 switch (alpha_val) {
1379 case 0:
1380 alpha[0] = alpha[1] = 0.0;
1381 alpha_flag = 1;
1382 break;
1383 case 1:
1384 alpha[0] = 1.0;
1385 alpha[1] = 0.0;
1386 alpha_flag = 1;
1387 break;
1388 }
1389
1390
1391 eps_int = power(2, -BITS_S);
1392 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
1393 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
1394 prec = blas_prec_single;
1395
1396 /* values near underflow, 1, or overflow */
1397 for (norm = -1; norm <= 1; norm++) {
1398
1399 /* row or col major */
1400 for (order_val = 0; order_val < 2; order_val++) {
1401 switch (order_val) {
1402 case 0:
1403 order_type = blas_rowmajor;
1404 break;
1405 case 1:
1406 order_type = blas_colmajor;
1407 break;
1408 }
1409
1410 /* upper or lower */
1411 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
1412 switch (uplo_val) {
1413 case 0:
1414 uplo_type = blas_upper;
1415 break;
1416 case 1:
1417 uplo_type = blas_lower;
1418 break;
1419 }
1420
1421 /* no trans, trans, or conj trans */
1422 for (trans_val = 0; trans_val < 3; trans_val++) {
1423 switch (trans_val) {
1424 case 0:
1425 trans_type = blas_no_trans;
1426 break;
1427 case 1:
1428 trans_type = blas_trans;
1429 break;
1430 case 2:
1431 trans_type = blas_conj_trans;
1432 break;
1433 }
1434
1435 /* non_unit_diag, unit_diag */
1436 for (diag_val = 0; diag_val < 2; diag_val++) {
1437 switch (diag_val) {
1438 case 0:
1439 diag_type = blas_non_unit_diag;
1440 break;
1441 case 1:
1442 diag_type = blas_unit_diag;
1443 break;
1444 }
1445
1446 /* number of tests */
1447 for (i = 0; i < ntests; i++) {
1448
1449 for (lda_val = 0; lda_val < 3; lda_val++) {
1450 switch (lda_val) {
1451 case 0:
1452 lda = n;
1453 break;
1454 case 1:
1455 lda = n + 1;
1456 break;
1457 case 2:
1458 lda = 2 * n;
1459 break;
1460 }
1461
1462 /* For the sake of speed, we throw out this case at random */
1463 if (xrand(seed) >= test_prob)
1464 continue;
1465
1466 for (row = 0; row < n; row++) {
1467 BLAS_ctrsv_s_testgen(norm, order_type, uplo_type,
1468 trans_type, diag_type, n,
1469 &alpha, alpha_flag, T, lda, x_gen,
1470 seed, head_r_true, tail_r_true,
1471 row, prec);
1472
1473 count++;
1474
1475 /* varying incx */
1476 for (incx_val = -2; incx_val <= 2; incx_val++) {
1477 if (incx_val == 0)
1478 continue;
1479
1480
1481 /* setting incx */
1482 incx = incx_val;
1483 incx *= 2;
1484
1485 /* set x starting index */
1486 ix = 0;
1487 if (incx < 0)
1488 ix = -(n - 1) * incx;
1489
1490 /* copy x_gen to x */
1491 for (j = 0; j < n * incx_gen; j += incx_gen) {
1492 x[ix] = x_gen[j];
1493 x[ix + 1] = x_gen[j + 1];
1494
1495 ix += incx;
1496 }
1497
1498 /* call BLAS_ctrsv_s */
1499 FPU_FIX_STOP;
1500 BLAS_ctrsv_s(order_type, uplo_type, trans_type,
1501 diag_type, n, alpha, T, lda, x,
1502 incx_val);
1503 FPU_FIX_START;
1504
1505 ix = 0;
1506 if (incx < 0)
1507 ix = -(n - 1) * incx;
1508
1509 /* computing the ratio */
1510 for (j = 0; j < n; j++) {
1511 if (j == row) {
1512 float minus_one[2] = { -1.0, 0.0 };
1513 /* copy row j of T to temp */
1514 str_copy_row(order_type, uplo_type, trans_type, n,
1515 T, lda, temp, j);
1516
1517 test_BLAS_cdot_c_s(n, blas_no_conj,
1518 minus_one, alpha,
1519 &x_gen[j * incx_gen],
1520 &x[ix],
1521 &head_r_true[j * incx_gen],
1522 &tail_r_true[j * incx_gen], x,
1523 incx_val, temp, 1, eps_int,
1524 un_int, &ratios[j]);
1525 } else {
1526 double eps_out = power(2, -BITS_S);
1527
1528 double tmp =
1529 sqrt(((x[ix] - head_r_true[j * incx_gen]) -
1530 tail_r_true[j * incx_gen]) * ((x[ix] -
1531 head_r_true
1532 [j *
1533 incx_gen])
1534 -
1535 tail_r_true
1536 [j *
1537 incx_gen])
1538 +
1539 ((x[ix + 1] -
1540 head_r_true[j * incx_gen + 1]) -
1541 tail_r_true[j * incx_gen +
1542 1]) * ((x[ix + 1] -
1543 head_r_true[j *
1544 incx_gen +
1545 1]) -
1546 tail_r_true[j *
1547 incx_gen +
1548 1]));
1549 if (head_r_true[j * incx_gen] == 0.0
1550 && head_r_true[j * incx_gen + 1] == 0.0)
1551 ratios[j] = 0.0;
1552 else
1553 ratios[j] =
1554 tmp /
1555 (sqrt
1556 (head_r_true[j * incx_gen] *
1557 head_r_true[j * incx_gen] +
1558 head_r_true[j * incx_gen +
1559 1] * head_r_true[j * incx_gen +
1560 1]) * 2.0 *
1561 sqrt(2.0) * eps_out);
1562 }
1563
1564 /* take the max ratio */
1565 if (j == 0) {
1566 ratio = ratios[0];
1567 } else if (!(ratios[j] <= ratio)) {
1568 /* The !<= test causes NaN error to be detected.
1569 Note that (NaN > thresh) is always false. */
1570 ratio = ratios[j];
1571 }
1572 ix += incx;
1573 }
1574
1575 /* Increase the number of bad ratio, if the ratio
1576 is bigger than the threshold.
1577 The !<= below causes NaN error to be detected.
1578 Note that (NaN > thresh) is always false. */
1579 if (!(ratio <= thresh)) {
1580
1581 bad_ratios++;
1582
1583 if ((debug == 3) && /* print only when debug is on */
1584 (count != old_count) && /* print if old vector is different
1585 from the current one */
1586 (d_count == find_max_ratio) &&
1587 (p_count <= max_print) &&
1588 (ratio > 0.5 * ratio_max)) {
1589 p_count++;
1590 old_count = count;
1591
1592 printf
1593 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
1594 fname, n, ntests, thresh);
1595
1596 /* Print test info */
1597 switch (prec) {
1598 case blas_prec_single:
1599 printf("single ");
1600 break;
1601 case blas_prec_double:
1602 printf("double ");
1603 break;
1604 case blas_prec_indigenous:
1605 printf("indigenous ");
1606 break;
1607 case blas_prec_extra:
1608 printf("extra ");
1609 break;
1610 }
1611 switch (norm) {
1612 case -1:
1613 printf("near_underflow ");
1614 break;
1615 case 0:
1616 printf("near_one ");
1617 break;
1618 case 1:
1619 printf("near_overflow ");
1620 break;
1621 }
1622 switch (order_type) {
1623 case blas_rowmajor:
1624 printf("row_major ");
1625 break;
1626 case blas_colmajor:
1627 printf("col_major ");
1628 break;
1629 }
1630 switch (uplo_type) {
1631 case blas_upper:
1632 printf("upper ");
1633 break;
1634 case blas_lower:
1635 printf("lower ");
1636 break;
1637 }
1638 switch (trans_type) {
1639 case blas_no_trans:
1640 printf("no_trans ");
1641 break;
1642 case blas_trans:
1643 printf("trans ");
1644 break;
1645 case blas_conj_trans:
1646 printf("conj_trans ");
1647 break;
1648 }
1649 switch (diag_type) {
1650 case blas_non_unit_diag:
1651 printf("non_unit_diag ");
1652 break;
1653 case blas_unit_diag:
1654 printf("unit_diag ");
1655 break;
1656 }
1657
1658 printf("row=%d, lda=%d, incx=%d:\n", row, lda,
1659 incx);
1660
1661 ix = 0;
1662 if (incx < 0)
1663 ix = -(n - 1) * incx;
1664
1665 printf(" T=");
1666 for (j = 0; j < n; j++) {
1667 /* copy row j of T to temp */
1668 str_copy_row(order_type, uplo_type, trans_type,
1669 n, T, lda, temp, j);
1670
1671 if (j > 0)
1672 printf(" ");
1673 sprint_vector(temp, n, 1, NULL);
1674 }
1675
1676 ix = 0;
1677 if (incx < 0)
1678 ix = -(n - 1) * incx;
1679
1680 for (j = 0; j < n; j++) {
1681 printf(" ");
1682 printf("x[%d] = ", j * incx_gen);
1683 printf("(%16.8e, %16.8e)", x_gen[j * incx_gen],
1684 x_gen[j * incx_gen + 1]);
1685 printf("; ");
1686 printf("x_final[%d] = ", ix);
1687 printf("(%16.8e, %16.8e)", x[ix], x[ix + 1]);
1688 printf("\n");
1689 ix += incx;
1690 }
1691
1692 printf(" ");
1693 printf("alpha = ");
1694 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
1695 printf("; ");
1696 printf("\n");
1697 for (j = 0; j < n; j++) {
1698 if (j == row)
1699 printf(" =>");
1700 else
1701 printf(" ");
1702 printf
1703 ("([%24.16e %24.16e], [%24.16e %24.16e])",
1704 head_r_true[j * incx_gen],
1705 tail_r_true[j * incx_gen],
1706 head_r_true[j * incx_gen + 1],
1707 tail_r_true[j * incx_gen + 1]);
1708 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
1709 }
1710 printf(" ratio=%.4e\n", ratio);
1711 }
1712 if (bad_ratios >= MAX_BAD_TESTS) {
1713 printf("\ntoo many failures, exiting....");
1714 printf("\nTesting and compilation");
1715 printf(" are incomplete\n\n");
1716 goto end;
1717 }
1718 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
1719 printf("\nFlagrant ratio %e, exiting...", ratio);
1720 printf("\nTesting and compilation");
1721 printf(" are incomplete\n\n");
1722 goto end;
1723 }
1724 }
1725
1726 if (d_count == 0) {
1727 if (ratio > ratio_max)
1728 ratio_max = ratio;
1729
1730 if (ratio != 0.0 && ratio < ratio_min)
1731 ratio_min = ratio;
1732
1733 tot_tests++;
1734 }
1735 } /* incx */
1736 } /* row */
1737 } /* lda */
1738 } /* numtests */
1739 } /* diag */
1740 } /* trans */
1741 } /* uplo */
1742 } /* order */
1743 } /* norm */
1744
1745 } /* alpha */
1746 } /* debug */
1747
1748 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
1749 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
1750 fname, n, ntests, thresh);
1751 printf
1752 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
1753 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
1754 ratio_min, ratio_max);
1755 }
1756
1757 end:
1758 FPU_FIX_STOP;
1759
1760 blas_free(x);
1761 blas_free(x_gen);
1762 blas_free(temp);
1763 blas_free(T);
1764 blas_free(head_r_true);
1765 blas_free(tail_r_true);
1766 blas_free(ratios);
1767
1768 *min_ratio = ratio_min;
1769 *num_bad_ratio = bad_ratios;
1770 *num_tests = tot_tests;
1771 return ratio_max;
1772 } /* end of do_test_ctrsv_s */
1773
do_test_ztrsv_d(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)1774 double do_test_ztrsv_d(int n,
1775 int ntests,
1776 int *seed,
1777 double thresh,
1778 int debug,
1779 float test_prob,
1780 double *min_ratio, int *num_bad_ratio, int *num_tests)
1781
1782 /*
1783 * Purpose
1784 * =======
1785 *
1786 * Runs a series of tests on TRSV.
1787 *
1788 * Arguments
1789 * =========
1790 *
1791 * n (input) int
1792 * The size of vector being tested
1793 *
1794 * ntests (input) int
1795 * The number of tests to run for each set of attributes.
1796 *
1797 * seed (input/output) int
1798 * The seed for the random number generator used in testgen().
1799 *
1800 * thresh (input) double
1801 * When the ratio returned from test() exceeds the specified
1802 * threshold, the current size, r_true, r_comp, and ratio will be
1803 * printed. (Since ratio is supposed to be O(1), we can set thresh
1804 * to ~10.)
1805 *
1806 * debug (input) int
1807 * If debug=3, print summary
1808 * If debug=2, print summary only if the number of bad ratios > 0
1809 * If debug=1, print complete info if tests fail
1810 * If debug=0, return max ratio
1811 *
1812 * test_prob (input) float
1813 * The specified test will be performed only if the generated
1814 * random exceeds this threshold.
1815 *
1816 * min_ratio (output) double
1817 * The minimum ratio
1818 *
1819 * num_bad_ratio (output) int
1820 * The number of tests fail; they are above the threshold.
1821 *
1822 * num_tests (output) int
1823 * The number of tests is being performed.
1824 *
1825 * Return value
1826 * ============
1827 *
1828 * The maximum ratio if run successfully, otherwise return -1
1829 *
1830 * Code structure
1831 * ==============
1832 *
1833 * debug loop -- if debug is one, the first loop computes the max ratio
1834 * -- and the last(second) loop outputs debugging information,
1835 * -- if the test fail and its ratio > 0.5 * max ratio.
1836 * -- if debug is zero, the loop is executed once
1837 * alpha loop -- varying alpha: 0, 1, or random
1838 *
1839 * norm loop -- varying norm: near undeflow, near one, or
1840 * -- near overflow
1841 * order loop -- varying order type: rowmajor or colmajor
1842 * uplo loop -- varying uplo type: upper or lower
1843 * trans loop -- varying trans type: no trans, trans, and conj trans
1844 * diag loop -- varying diag type: non unit, and unit
1845 * numtest loop -- how many times the test is perform with
1846 * -- above set of attributes
1847 * lda loop -- varying lda: n, n+1, and 2n
1848 * incx loop -- varying incx: -2, -1, 1, 2
1849 */
1850 {
1851 /* function name */
1852 const char fname[] = "BLAS_ztrsv_d";
1853
1854 /* max number of debug lines to print */
1855 const int max_print = 3;
1856
1857 /* Variables in the "x_val" form are loop vars for corresponding
1858 variables */
1859 int i; /* iterate through the repeating tests */
1860 int j; /* multipurpose counters */
1861 int ix; /* use to index x */
1862 int lda_val, lda = 0; /* for testing different values for lda */
1863 int incx_val, incx; /* for testing different inc values */
1864 int incx_gen = 1; /* 1 if real, 2 if complex */
1865 int d_count; /* counter for debug */
1866 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
1867 int p_count; /* counter for the number of debug lines printed */
1868 int tot_tests; /* total number of tests to be done */
1869 int norm; /* input values of near underflow/one/overflow */
1870 double ratio_max; /* the current maximum ratio */
1871 double ratio_min; /* the current minimum ratio */
1872 double *ratios; /* a temporary variable for calculating ratio */
1873 double ratio = 0.0; /* the per-use test ratio from test() */
1874 int bad_ratios = 0; /* the number of ratios over the threshold */
1875 double eps_int; /* the internal epsilon expected--2^(-24) for float */
1876 double un_int; /* the internal underflow threshold */
1877 double alpha[2];
1878 double beta[2];
1879 double *T;
1880 double *x;
1881 double *x_gen;
1882 double *temp; /* use for calculating ratio */
1883
1884 /* x_gen and y_gen are used to store vectors generated by testgen.
1885 they eventually are copied back to x and y */
1886
1887
1888 /* the true r calculated by testgen(), in double-double */
1889 double *head_r_true, *tail_r_true;
1890
1891 int alpha_val;
1892 int alpha_flag = 0, beta_flag = 0;
1893 int order_val;
1894 enum blas_order_type order_type = 0;
1895
1896 enum blas_prec_type prec = 0;
1897 int uplo_val;
1898 enum blas_uplo_type uplo_type = 0;
1899 int trans_val;
1900 enum blas_trans_type trans_type = 0;
1901 int diag_val;
1902 enum blas_diag_type diag_type = 0;
1903 int row;
1904 int saved_seed; /* for saving the original seed */
1905 int count, old_count = -1; /* use for counting the number of testgen calls * 2 */
1906 FPU_FIX_DECL;
1907
1908 /* test for bad arguments */
1909 if (n < 0 || ntests < 0)
1910 BLAS_error(fname, 0, 0, NULL);
1911
1912 /* if there is nothing to test, return all zero */
1913 if (n == 0 || ntests == 0) {
1914 *min_ratio = 0.0;
1915 *num_bad_ratio = 0;
1916 *num_tests = 0;
1917 return 0.0;
1918 }
1919
1920 FPU_FIX_START;
1921
1922 incx_gen *= 2;
1923
1924 /* get space for calculation */
1925 x = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
1926 if (n * 2 > 0 && x == NULL) {
1927 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1928 }
1929 x_gen = (double *) blas_malloc(n * sizeof(double) * 2);
1930 if (n > 0 && x_gen == NULL) {
1931 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1932 }
1933 T = (double *) blas_malloc(4 * n * n * sizeof(double));
1934 if (4 * n * n > 0 && T == NULL) {
1935 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1936 }
1937 temp = (double *) blas_malloc(n * sizeof(double));
1938 if (n > 0 && temp == NULL) {
1939 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1940 }
1941 head_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
1942 tail_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
1943 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
1944 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1945 }
1946 ratios = (double *) blas_malloc(n * sizeof(double));
1947 if (n > 0 && ratios == NULL) {
1948 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1949 }
1950
1951 /* initialization */
1952 saved_seed = *seed;
1953 ratio_min = 1e308;
1954 ratio_max = 0.0;
1955 tot_tests = 0;
1956 p_count = 0;
1957 count = 0;
1958 find_max_ratio = 0;
1959 beta[0] = beta[1] = 0.0;
1960 beta_flag = 1;
1961 if (debug == 3)
1962 find_max_ratio = 1;
1963
1964 /* The debug iteration:
1965 If debug=1, then will execute the iteration twice. First, compute the
1966 max ratio. Second, print info if ratio > (50% * ratio_max). */
1967 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
1968 bad_ratios = 0; /* set to zero */
1969
1970 if ((debug == 3) && (d_count == find_max_ratio))
1971 *seed = saved_seed; /* restore the original seed */
1972
1973 /* varying alpha */
1974 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
1975 alpha_flag = 0;
1976 switch (alpha_val) {
1977 case 0:
1978 alpha[0] = alpha[1] = 0.0;
1979 alpha_flag = 1;
1980 break;
1981 case 1:
1982 alpha[0] = 1.0;
1983 alpha[1] = 0.0;
1984 alpha_flag = 1;
1985 break;
1986 }
1987
1988
1989 eps_int = power(2, -BITS_D);
1990 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1991 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1992 prec = blas_prec_double;
1993
1994 /* values near underflow, 1, or overflow */
1995 for (norm = -1; norm <= 1; norm++) {
1996
1997 /* row or col major */
1998 for (order_val = 0; order_val < 2; order_val++) {
1999 switch (order_val) {
2000 case 0:
2001 order_type = blas_rowmajor;
2002 break;
2003 case 1:
2004 order_type = blas_colmajor;
2005 break;
2006 }
2007
2008 /* upper or lower */
2009 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
2010 switch (uplo_val) {
2011 case 0:
2012 uplo_type = blas_upper;
2013 break;
2014 case 1:
2015 uplo_type = blas_lower;
2016 break;
2017 }
2018
2019 /* no trans, trans, or conj trans */
2020 for (trans_val = 0; trans_val < 3; trans_val++) {
2021 switch (trans_val) {
2022 case 0:
2023 trans_type = blas_no_trans;
2024 break;
2025 case 1:
2026 trans_type = blas_trans;
2027 break;
2028 case 2:
2029 trans_type = blas_conj_trans;
2030 break;
2031 }
2032
2033 /* non_unit_diag, unit_diag */
2034 for (diag_val = 0; diag_val < 2; diag_val++) {
2035 switch (diag_val) {
2036 case 0:
2037 diag_type = blas_non_unit_diag;
2038 break;
2039 case 1:
2040 diag_type = blas_unit_diag;
2041 break;
2042 }
2043
2044 /* number of tests */
2045 for (i = 0; i < ntests; i++) {
2046
2047 for (lda_val = 0; lda_val < 3; lda_val++) {
2048 switch (lda_val) {
2049 case 0:
2050 lda = n;
2051 break;
2052 case 1:
2053 lda = n + 1;
2054 break;
2055 case 2:
2056 lda = 2 * n;
2057 break;
2058 }
2059
2060 /* For the sake of speed, we throw out this case at random */
2061 if (xrand(seed) >= test_prob)
2062 continue;
2063
2064 for (row = 0; row < n; row++) {
2065 BLAS_ztrsv_d_testgen(norm, order_type, uplo_type,
2066 trans_type, diag_type, n,
2067 &alpha, alpha_flag, T, lda, x_gen,
2068 seed, head_r_true, tail_r_true,
2069 row, prec);
2070
2071 count++;
2072
2073 /* varying incx */
2074 for (incx_val = -2; incx_val <= 2; incx_val++) {
2075 if (incx_val == 0)
2076 continue;
2077
2078
2079 /* setting incx */
2080 incx = incx_val;
2081 incx *= 2;
2082
2083 /* set x starting index */
2084 ix = 0;
2085 if (incx < 0)
2086 ix = -(n - 1) * incx;
2087
2088 /* copy x_gen to x */
2089 for (j = 0; j < n * incx_gen; j += incx_gen) {
2090 x[ix] = x_gen[j];
2091 x[ix + 1] = x_gen[j + 1];
2092
2093 ix += incx;
2094 }
2095
2096 /* call BLAS_ztrsv_d */
2097 FPU_FIX_STOP;
2098 BLAS_ztrsv_d(order_type, uplo_type, trans_type,
2099 diag_type, n, alpha, T, lda, x,
2100 incx_val);
2101 FPU_FIX_START;
2102
2103 ix = 0;
2104 if (incx < 0)
2105 ix = -(n - 1) * incx;
2106
2107 /* computing the ratio */
2108 for (j = 0; j < n; j++) {
2109 if (j == row) {
2110 double minus_one[2] = { -1.0, 0.0 };
2111 /* copy row j of T to temp */
2112 dtr_copy_row(order_type, uplo_type, trans_type, n,
2113 T, lda, temp, j);
2114
2115 test_BLAS_zdot_z_d(n, blas_no_conj,
2116 minus_one, alpha,
2117 &x_gen[j * incx_gen],
2118 &x[ix],
2119 &head_r_true[j * incx_gen],
2120 &tail_r_true[j * incx_gen], x,
2121 incx_val, temp, 1, eps_int,
2122 un_int, &ratios[j]);
2123 } else {
2124 double eps_out = power(2, -BITS_D);
2125
2126 double tmp =
2127 sqrt(((x[ix] - head_r_true[j * incx_gen]) -
2128 tail_r_true[j * incx_gen]) * ((x[ix] -
2129 head_r_true
2130 [j *
2131 incx_gen])
2132 -
2133 tail_r_true
2134 [j *
2135 incx_gen])
2136 +
2137 ((x[ix + 1] -
2138 head_r_true[j * incx_gen + 1]) -
2139 tail_r_true[j * incx_gen +
2140 1]) * ((x[ix + 1] -
2141 head_r_true[j *
2142 incx_gen +
2143 1]) -
2144 tail_r_true[j *
2145 incx_gen +
2146 1]));
2147 if (head_r_true[j * incx_gen] == 0.0
2148 && head_r_true[j * incx_gen + 1] == 0.0)
2149 ratios[j] = 0.0;
2150 else
2151 ratios[j] =
2152 tmp /
2153 (sqrt
2154 (head_r_true[j * incx_gen] *
2155 head_r_true[j * incx_gen] +
2156 head_r_true[j * incx_gen +
2157 1] * head_r_true[j * incx_gen +
2158 1]) * 2.0 *
2159 sqrt(2.0) * eps_out);
2160 }
2161
2162 /* take the max ratio */
2163 if (j == 0) {
2164 ratio = ratios[0];
2165 } else if (!(ratios[j] <= ratio)) {
2166 /* The !<= test causes NaN error to be detected.
2167 Note that (NaN > thresh) is always false. */
2168 ratio = ratios[j];
2169 }
2170 ix += incx;
2171 }
2172
2173 /* Increase the number of bad ratio, if the ratio
2174 is bigger than the threshold.
2175 The !<= below causes NaN error to be detected.
2176 Note that (NaN > thresh) is always false. */
2177 if (!(ratio <= thresh)) {
2178
2179 bad_ratios++;
2180
2181 if ((debug == 3) && /* print only when debug is on */
2182 (count != old_count) && /* print if old vector is different
2183 from the current one */
2184 (d_count == find_max_ratio) &&
2185 (p_count <= max_print) &&
2186 (ratio > 0.5 * ratio_max)) {
2187 p_count++;
2188 old_count = count;
2189
2190 printf
2191 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
2192 fname, n, ntests, thresh);
2193
2194 /* Print test info */
2195 switch (prec) {
2196 case blas_prec_single:
2197 printf("single ");
2198 break;
2199 case blas_prec_double:
2200 printf("double ");
2201 break;
2202 case blas_prec_indigenous:
2203 printf("indigenous ");
2204 break;
2205 case blas_prec_extra:
2206 printf("extra ");
2207 break;
2208 }
2209 switch (norm) {
2210 case -1:
2211 printf("near_underflow ");
2212 break;
2213 case 0:
2214 printf("near_one ");
2215 break;
2216 case 1:
2217 printf("near_overflow ");
2218 break;
2219 }
2220 switch (order_type) {
2221 case blas_rowmajor:
2222 printf("row_major ");
2223 break;
2224 case blas_colmajor:
2225 printf("col_major ");
2226 break;
2227 }
2228 switch (uplo_type) {
2229 case blas_upper:
2230 printf("upper ");
2231 break;
2232 case blas_lower:
2233 printf("lower ");
2234 break;
2235 }
2236 switch (trans_type) {
2237 case blas_no_trans:
2238 printf("no_trans ");
2239 break;
2240 case blas_trans:
2241 printf("trans ");
2242 break;
2243 case blas_conj_trans:
2244 printf("conj_trans ");
2245 break;
2246 }
2247 switch (diag_type) {
2248 case blas_non_unit_diag:
2249 printf("non_unit_diag ");
2250 break;
2251 case blas_unit_diag:
2252 printf("unit_diag ");
2253 break;
2254 }
2255
2256 printf("row=%d, lda=%d, incx=%d:\n", row, lda,
2257 incx);
2258
2259 ix = 0;
2260 if (incx < 0)
2261 ix = -(n - 1) * incx;
2262
2263 printf(" T=");
2264 for (j = 0; j < n; j++) {
2265 /* copy row j of T to temp */
2266 dtr_copy_row(order_type, uplo_type, trans_type,
2267 n, T, lda, temp, j);
2268
2269 if (j > 0)
2270 printf(" ");
2271 dprint_vector(temp, n, 1, NULL);
2272 }
2273
2274 ix = 0;
2275 if (incx < 0)
2276 ix = -(n - 1) * incx;
2277
2278 for (j = 0; j < n; j++) {
2279 printf(" ");
2280 printf("x[%d] = ", j * incx_gen);
2281 printf("(%24.16e, %24.16e)",
2282 x_gen[j * incx_gen],
2283 x_gen[j * incx_gen + 1]);
2284 printf("; ");
2285 printf("x_final[%d] = ", ix);
2286 printf("(%24.16e, %24.16e)", x[ix], x[ix + 1]);
2287 printf("\n");
2288 ix += incx;
2289 }
2290
2291 printf(" ");
2292 printf("alpha = ");
2293 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
2294 printf("; ");
2295 printf("\n");
2296 for (j = 0; j < n; j++) {
2297 if (j == row)
2298 printf(" =>");
2299 else
2300 printf(" ");
2301 printf
2302 ("([%24.16e %24.16e], [%24.16e %24.16e])",
2303 head_r_true[j * incx_gen],
2304 tail_r_true[j * incx_gen],
2305 head_r_true[j * incx_gen + 1],
2306 tail_r_true[j * incx_gen + 1]);
2307 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
2308 }
2309 printf(" ratio=%.4e\n", ratio);
2310 }
2311 if (bad_ratios >= MAX_BAD_TESTS) {
2312 printf("\ntoo many failures, exiting....");
2313 printf("\nTesting and compilation");
2314 printf(" are incomplete\n\n");
2315 goto end;
2316 }
2317 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
2318 printf("\nFlagrant ratio %e, exiting...", ratio);
2319 printf("\nTesting and compilation");
2320 printf(" are incomplete\n\n");
2321 goto end;
2322 }
2323 }
2324
2325 if (d_count == 0) {
2326 if (ratio > ratio_max)
2327 ratio_max = ratio;
2328
2329 if (ratio != 0.0 && ratio < ratio_min)
2330 ratio_min = ratio;
2331
2332 tot_tests++;
2333 }
2334 } /* incx */
2335 } /* row */
2336 } /* lda */
2337 } /* numtests */
2338 } /* diag */
2339 } /* trans */
2340 } /* uplo */
2341 } /* order */
2342 } /* norm */
2343
2344 } /* alpha */
2345 } /* debug */
2346
2347 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
2348 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
2349 fname, n, ntests, thresh);
2350 printf
2351 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
2352 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
2353 ratio_min, ratio_max);
2354 }
2355
2356 end:
2357 FPU_FIX_STOP;
2358
2359 blas_free(x);
2360 blas_free(x_gen);
2361 blas_free(temp);
2362 blas_free(T);
2363 blas_free(head_r_true);
2364 blas_free(tail_r_true);
2365 blas_free(ratios);
2366
2367 *min_ratio = ratio_min;
2368 *num_bad_ratio = bad_ratios;
2369 *num_tests = tot_tests;
2370 return ratio_max;
2371 } /* end of do_test_ztrsv_d */
2372
do_test_strsv_x(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)2373 double do_test_strsv_x(int n,
2374 int ntests,
2375 int *seed,
2376 double thresh,
2377 int debug,
2378 float test_prob,
2379 double *min_ratio, int *num_bad_ratio, int *num_tests)
2380
2381 /*
2382 * Purpose
2383 * =======
2384 *
2385 * Runs a series of tests on TRSV.
2386 *
2387 * Arguments
2388 * =========
2389 *
2390 * n (input) int
2391 * The size of vector being tested
2392 *
2393 * ntests (input) int
2394 * The number of tests to run for each set of attributes.
2395 *
2396 * seed (input/output) int
2397 * The seed for the random number generator used in testgen().
2398 *
2399 * thresh (input) double
2400 * When the ratio returned from test() exceeds the specified
2401 * threshold, the current size, r_true, r_comp, and ratio will be
2402 * printed. (Since ratio is supposed to be O(1), we can set thresh
2403 * to ~10.)
2404 *
2405 * debug (input) int
2406 * If debug=3, print summary
2407 * If debug=2, print summary only if the number of bad ratios > 0
2408 * If debug=1, print complete info if tests fail
2409 * If debug=0, return max ratio
2410 *
2411 * test_prob (input) float
2412 * The specified test will be performed only if the generated
2413 * random exceeds this threshold.
2414 *
2415 * min_ratio (output) double
2416 * The minimum ratio
2417 *
2418 * num_bad_ratio (output) int
2419 * The number of tests fail; they are above the threshold.
2420 *
2421 * num_tests (output) int
2422 * The number of tests is being performed.
2423 *
2424 * Return value
2425 * ============
2426 *
2427 * The maximum ratio if run successfully, otherwise return -1
2428 *
2429 * Code structure
2430 * ==============
2431 *
2432 * debug loop -- if debug is one, the first loop computes the max ratio
2433 * -- and the last(second) loop outputs debugging information,
2434 * -- if the test fail and its ratio > 0.5 * max ratio.
2435 * -- if debug is zero, the loop is executed once
2436 * alpha loop -- varying alpha: 0, 1, or random
2437 * prec loop -- varying internal prec: single, double, or extra
2438 * norm loop -- varying norm: near undeflow, near one, or
2439 * -- near overflow
2440 * order loop -- varying order type: rowmajor or colmajor
2441 * uplo loop -- varying uplo type: upper or lower
2442 * trans loop -- varying trans type: no trans, trans, and conj trans
2443 * diag loop -- varying diag type: non unit, and unit
2444 * numtest loop -- how many times the test is perform with
2445 * -- above set of attributes
2446 * lda loop -- varying lda: n, n+1, and 2n
2447 * incx loop -- varying incx: -2, -1, 1, 2
2448 */
2449 {
2450 /* function name */
2451 const char fname[] = "BLAS_strsv_x";
2452
2453 /* max number of debug lines to print */
2454 const int max_print = 3;
2455
2456 /* Variables in the "x_val" form are loop vars for corresponding
2457 variables */
2458 int i; /* iterate through the repeating tests */
2459 int j; /* multipurpose counters */
2460 int ix; /* use to index x */
2461 int lda_val, lda = 0; /* for testing different values for lda */
2462 int incx_val, incx; /* for testing different inc values */
2463 int incx_gen = 1; /* 1 if real, 2 if complex */
2464 int d_count; /* counter for debug */
2465 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
2466 int p_count; /* counter for the number of debug lines printed */
2467 int tot_tests; /* total number of tests to be done */
2468 int norm; /* input values of near underflow/one/overflow */
2469 double ratio_max; /* the current maximum ratio */
2470 double ratio_min; /* the current minimum ratio */
2471 double *ratios; /* a temporary variable for calculating ratio */
2472 double ratio = 0.0; /* the per-use test ratio from test() */
2473 int bad_ratios = 0; /* the number of ratios over the threshold */
2474 double eps_int; /* the internal epsilon expected--2^(-24) for float */
2475 double un_int; /* the internal underflow threshold */
2476 float alpha;
2477 float beta;
2478 float *T;
2479 float *x;
2480 float *x_gen;
2481 float *temp; /* use for calculating ratio */
2482
2483 /* x_gen and y_gen are used to store vectors generated by testgen.
2484 they eventually are copied back to x and y */
2485
2486
2487 /* the true r calculated by testgen(), in double-double */
2488 double *head_r_true, *tail_r_true;
2489 int alpha_val;
2490 int alpha_flag = 0, beta_flag = 0;
2491 int order_val;
2492 enum blas_order_type order_type = 0;
2493 int prec_val;
2494 enum blas_prec_type prec = 0;
2495 int uplo_val;
2496 enum blas_uplo_type uplo_type = 0;
2497 int trans_val;
2498 enum blas_trans_type trans_type = 0;
2499 int diag_val;
2500 enum blas_diag_type diag_type = 0;
2501 int row;
2502 int saved_seed; /* for saving the original seed */
2503 int count, old_count = -1; /* use for counting the number of testgen calls * 2 */
2504 FPU_FIX_DECL;
2505
2506 /* test for bad arguments */
2507 if (n < 0 || ntests < 0)
2508 BLAS_error(fname, 0, 0, NULL);
2509
2510 /* if there is nothing to test, return all zero */
2511 if (n == 0 || ntests == 0) {
2512 *min_ratio = 0.0;
2513 *num_bad_ratio = 0;
2514 *num_tests = 0;
2515 return 0.0;
2516 }
2517
2518 FPU_FIX_START;
2519
2520
2521
2522 /* get space for calculation */
2523 x = (float *) blas_malloc(n * 2 * sizeof(float));
2524 if (n * 2 > 0 && x == NULL) {
2525 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2526 }
2527 x_gen = (float *) blas_malloc(n * sizeof(float));
2528 if (n > 0 && x_gen == NULL) {
2529 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2530 }
2531 T = (float *) blas_malloc(4 * n * n * sizeof(float));
2532 if (4 * n * n > 0 && T == NULL) {
2533 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2534 }
2535 temp = (float *) blas_malloc(n * sizeof(float));
2536 if (n > 0 && temp == NULL) {
2537 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2538 }
2539 head_r_true = (double *) blas_malloc(n * sizeof(double));
2540 tail_r_true = (double *) blas_malloc(n * sizeof(double));
2541 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
2542 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2543 }
2544 ratios = (double *) blas_malloc(n * sizeof(double));
2545 if (n > 0 && ratios == NULL) {
2546 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2547 }
2548
2549 /* initialization */
2550 saved_seed = *seed;
2551 ratio_min = 1e308;
2552 ratio_max = 0.0;
2553 tot_tests = 0;
2554 p_count = 0;
2555 count = 0;
2556 find_max_ratio = 0;
2557 beta = 0.0;
2558 beta_flag = 1;
2559 if (debug == 3)
2560 find_max_ratio = 1;
2561
2562 /* The debug iteration:
2563 If debug=1, then will execute the iteration twice. First, compute the
2564 max ratio. Second, print info if ratio > (50% * ratio_max). */
2565 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
2566 bad_ratios = 0; /* set to zero */
2567
2568 if ((debug == 3) && (d_count == find_max_ratio))
2569 *seed = saved_seed; /* restore the original seed */
2570
2571 /* varying alpha */
2572 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
2573 alpha_flag = 0;
2574 switch (alpha_val) {
2575 case 0:
2576 alpha = 0.0;
2577 alpha_flag = 1;
2578 break;
2579 case 1:
2580 alpha = 1.0;
2581 alpha_flag = 1;
2582 break;
2583 }
2584
2585
2586 /* varying extra precs */
2587 for (prec_val = 0; prec_val <= 2; prec_val++) {
2588 switch (prec_val) {
2589 case 0:
2590 eps_int = power(2, -BITS_S);
2591 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
2592 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
2593 prec = blas_prec_single;
2594 break;
2595 case 1:
2596 eps_int = power(2, -BITS_D);
2597 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
2598 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
2599 prec = blas_prec_double;
2600 break;
2601 case 2:
2602 default:
2603 eps_int = power(2, -BITS_E);
2604 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
2605 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
2606 prec = blas_prec_extra;
2607 break;
2608 }
2609
2610 /* values near underflow, 1, or overflow */
2611 for (norm = -1; norm <= 1; norm++) {
2612
2613 /* row or col major */
2614 for (order_val = 0; order_val < 2; order_val++) {
2615 switch (order_val) {
2616 case 0:
2617 order_type = blas_rowmajor;
2618 break;
2619 case 1:
2620 order_type = blas_colmajor;
2621 break;
2622 }
2623
2624 /* upper or lower */
2625 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
2626 switch (uplo_val) {
2627 case 0:
2628 uplo_type = blas_upper;
2629 break;
2630 case 1:
2631 uplo_type = blas_lower;
2632 break;
2633 }
2634
2635 /* no trans, trans, or conj trans */
2636 for (trans_val = 0; trans_val < 3; trans_val++) {
2637 switch (trans_val) {
2638 case 0:
2639 trans_type = blas_no_trans;
2640 break;
2641 case 1:
2642 trans_type = blas_trans;
2643 break;
2644 case 2:
2645 trans_type = blas_conj_trans;
2646 break;
2647 }
2648
2649 /* non_unit_diag, unit_diag */
2650 for (diag_val = 0; diag_val < 2; diag_val++) {
2651 switch (diag_val) {
2652 case 0:
2653 diag_type = blas_non_unit_diag;
2654 break;
2655 case 1:
2656 diag_type = blas_unit_diag;
2657 break;
2658 }
2659
2660 /* number of tests */
2661 for (i = 0; i < ntests; i++) {
2662
2663 for (lda_val = 0; lda_val < 3; lda_val++) {
2664 switch (lda_val) {
2665 case 0:
2666 lda = n;
2667 break;
2668 case 1:
2669 lda = n + 1;
2670 break;
2671 case 2:
2672 lda = 2 * n;
2673 break;
2674 }
2675
2676 /* For the sake of speed, we throw out this case at random */
2677 if (xrand(seed) >= test_prob)
2678 continue;
2679
2680 for (row = 0; row < n; row++) {
2681 BLAS_strsv_testgen(norm, order_type, uplo_type,
2682 trans_type, diag_type, n,
2683 &alpha, alpha_flag, T, lda, x_gen,
2684 seed, head_r_true, tail_r_true,
2685 row, prec);
2686
2687 count++;
2688
2689 /* varying incx */
2690 for (incx_val = -2; incx_val <= 2; incx_val++) {
2691 if (incx_val == 0)
2692 continue;
2693
2694
2695 /* setting incx */
2696 incx = incx_val;
2697
2698
2699 /* set x starting index */
2700 ix = 0;
2701 if (incx < 0)
2702 ix = -(n - 1) * incx;
2703
2704 /* copy x_gen to x */
2705 for (j = 0; j < n * incx_gen; j += incx_gen) {
2706 x[ix] = x_gen[j];
2707
2708 ix += incx;
2709 }
2710
2711 /* call BLAS_strsv_x */
2712 FPU_FIX_STOP;
2713 BLAS_strsv_x(order_type, uplo_type, trans_type,
2714 diag_type, n, alpha, T, lda, x,
2715 incx_val, prec);
2716 FPU_FIX_START;
2717
2718 ix = 0;
2719 if (incx < 0)
2720 ix = -(n - 1) * incx;
2721
2722 /* computing the ratio */
2723 for (j = 0; j < n; j++) {
2724 if (j == row) {
2725 int minus_one = -1.0;
2726 /* copy row j of T to temp */
2727 str_copy_row(order_type, uplo_type, trans_type,
2728 n, T, lda, temp, j);
2729
2730 test_BLAS_sdot(n, blas_no_conj,
2731 minus_one, alpha,
2732 x_gen[j * incx_gen],
2733 x[ix],
2734 head_r_true[j * incx_gen],
2735 tail_r_true[j * incx_gen], x,
2736 incx_val, temp, 1, eps_int,
2737 un_int, &ratios[j]);
2738 } else {
2739 double eps_out = power(2, -BITS_S);
2740
2741 double tmp =
2742 fabs((x[ix] - head_r_true[j]) -
2743 tail_r_true[j]);
2744
2745 if (head_r_true[j] == 0.0)
2746 ratios[j] = 0.0;
2747 else
2748 ratios[j] =
2749 tmp / (head_r_true[j] * 2.0 * eps_out);
2750 }
2751
2752 /* take the max ratio */
2753 if (j == 0) {
2754 ratio = ratios[0];
2755 } else if (!(ratios[j] <= ratio)) {
2756 /* The !<= test causes NaN error to be detected.
2757 Note that (NaN > thresh) is always false. */
2758 ratio = ratios[j];
2759 }
2760 ix += incx;
2761 }
2762
2763 /* Increase the number of bad ratio, if the ratio
2764 is bigger than the threshold.
2765 The !<= below causes NaN error to be detected.
2766 Note that (NaN > thresh) is always false. */
2767 if (!(ratio <= thresh)) {
2768
2769 bad_ratios++;
2770
2771 if ((debug == 3) && /* print only when debug is on */
2772 (count != old_count) && /* print if old vector is different
2773 from the current one */
2774 (d_count == find_max_ratio) &&
2775 (p_count <= max_print) &&
2776 (ratio > 0.5 * ratio_max)) {
2777 p_count++;
2778 old_count = count;
2779
2780 printf
2781 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
2782 fname, n, ntests, thresh);
2783
2784 /* Print test info */
2785 switch (prec) {
2786 case blas_prec_single:
2787 printf("single ");
2788 break;
2789 case blas_prec_double:
2790 printf("double ");
2791 break;
2792 case blas_prec_indigenous:
2793 printf("indigenous ");
2794 break;
2795 case blas_prec_extra:
2796 printf("extra ");
2797 break;
2798 }
2799 switch (norm) {
2800 case -1:
2801 printf("near_underflow ");
2802 break;
2803 case 0:
2804 printf("near_one ");
2805 break;
2806 case 1:
2807 printf("near_overflow ");
2808 break;
2809 }
2810 switch (order_type) {
2811 case blas_rowmajor:
2812 printf("row_major ");
2813 break;
2814 case blas_colmajor:
2815 printf("col_major ");
2816 break;
2817 }
2818 switch (uplo_type) {
2819 case blas_upper:
2820 printf("upper ");
2821 break;
2822 case blas_lower:
2823 printf("lower ");
2824 break;
2825 }
2826 switch (trans_type) {
2827 case blas_no_trans:
2828 printf("no_trans ");
2829 break;
2830 case blas_trans:
2831 printf("trans ");
2832 break;
2833 case blas_conj_trans:
2834 printf("conj_trans ");
2835 break;
2836 }
2837 switch (diag_type) {
2838 case blas_non_unit_diag:
2839 printf("non_unit_diag ");
2840 break;
2841 case blas_unit_diag:
2842 printf("unit_diag ");
2843 break;
2844 }
2845
2846 printf("row=%d, lda=%d, incx=%d:\n", row, lda,
2847 incx);
2848
2849 ix = 0;
2850 if (incx < 0)
2851 ix = -(n - 1) * incx;
2852
2853 printf(" T=");
2854 for (j = 0; j < n; j++) {
2855 /* copy row j of T to temp */
2856 str_copy_row(order_type, uplo_type,
2857 trans_type, n, T, lda, temp, j);
2858
2859 if (j > 0)
2860 printf(" ");
2861 sprint_vector(temp, n, 1, NULL);
2862 }
2863
2864 ix = 0;
2865 if (incx < 0)
2866 ix = -(n - 1) * incx;
2867
2868 for (j = 0; j < n; j++) {
2869 printf(" ");
2870 printf("x[%d] = ", j * incx_gen);
2871 printf("%16.8e", x_gen[j * incx_gen]);
2872 printf("; ");
2873 printf("x_final[%d] = ", ix);
2874 printf("%16.8e", x[ix]);
2875 printf("\n");
2876 ix += incx;
2877 }
2878
2879 printf(" ");
2880 printf("alpha = ");
2881 printf("%16.8e", alpha);
2882 printf("; ");
2883 printf("\n");
2884 for (j = 0; j < n; j++) {
2885 if (j == row)
2886 printf(" =>");
2887 else
2888 printf(" ");
2889 printf("[%24.16e, %24.16e]",
2890 head_r_true[j * incx_gen],
2891 tail_r_true[j * incx_gen]);
2892 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
2893 }
2894 printf(" ratio=%.4e\n", ratio);
2895 }
2896 if (bad_ratios >= MAX_BAD_TESTS) {
2897 printf("\ntoo many failures, exiting....");
2898 printf("\nTesting and compilation");
2899 printf(" are incomplete\n\n");
2900 goto end;
2901 }
2902 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
2903 printf("\nFlagrant ratio %e, exiting...",
2904 ratio);
2905 printf("\nTesting and compilation");
2906 printf(" are incomplete\n\n");
2907 goto end;
2908 }
2909 }
2910
2911 if (d_count == 0) {
2912 if (ratio > ratio_max)
2913 ratio_max = ratio;
2914
2915 if (ratio != 0.0 && ratio < ratio_min)
2916 ratio_min = ratio;
2917
2918 tot_tests++;
2919 }
2920 } /* incx */
2921 } /* row */
2922 } /* lda */
2923 } /* numtests */
2924 } /* diag */
2925 } /* trans */
2926 } /* uplo */
2927 } /* order */
2928 } /* norm */
2929 } /* prec */
2930 } /* alpha */
2931 } /* debug */
2932
2933 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
2934 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
2935 fname, n, ntests, thresh);
2936 printf
2937 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
2938 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
2939 ratio_min, ratio_max);
2940 }
2941
2942 end:
2943 FPU_FIX_STOP;
2944
2945 blas_free(x);
2946 blas_free(x_gen);
2947 blas_free(temp);
2948 blas_free(T);
2949 blas_free(head_r_true);
2950 blas_free(tail_r_true);
2951 blas_free(ratios);
2952
2953 *min_ratio = ratio_min;
2954 *num_bad_ratio = bad_ratios;
2955 *num_tests = tot_tests;
2956 return ratio_max;
2957 } /* end of do_test_strsv_x */
2958
do_test_dtrsv_x(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)2959 double do_test_dtrsv_x(int n,
2960 int ntests,
2961 int *seed,
2962 double thresh,
2963 int debug,
2964 float test_prob,
2965 double *min_ratio, int *num_bad_ratio, int *num_tests)
2966
2967 /*
2968 * Purpose
2969 * =======
2970 *
2971 * Runs a series of tests on TRSV.
2972 *
2973 * Arguments
2974 * =========
2975 *
2976 * n (input) int
2977 * The size of vector being tested
2978 *
2979 * ntests (input) int
2980 * The number of tests to run for each set of attributes.
2981 *
2982 * seed (input/output) int
2983 * The seed for the random number generator used in testgen().
2984 *
2985 * thresh (input) double
2986 * When the ratio returned from test() exceeds the specified
2987 * threshold, the current size, r_true, r_comp, and ratio will be
2988 * printed. (Since ratio is supposed to be O(1), we can set thresh
2989 * to ~10.)
2990 *
2991 * debug (input) int
2992 * If debug=3, print summary
2993 * If debug=2, print summary only if the number of bad ratios > 0
2994 * If debug=1, print complete info if tests fail
2995 * If debug=0, return max ratio
2996 *
2997 * test_prob (input) float
2998 * The specified test will be performed only if the generated
2999 * random exceeds this threshold.
3000 *
3001 * min_ratio (output) double
3002 * The minimum ratio
3003 *
3004 * num_bad_ratio (output) int
3005 * The number of tests fail; they are above the threshold.
3006 *
3007 * num_tests (output) int
3008 * The number of tests is being performed.
3009 *
3010 * Return value
3011 * ============
3012 *
3013 * The maximum ratio if run successfully, otherwise return -1
3014 *
3015 * Code structure
3016 * ==============
3017 *
3018 * debug loop -- if debug is one, the first loop computes the max ratio
3019 * -- and the last(second) loop outputs debugging information,
3020 * -- if the test fail and its ratio > 0.5 * max ratio.
3021 * -- if debug is zero, the loop is executed once
3022 * alpha loop -- varying alpha: 0, 1, or random
3023 * prec loop -- varying internal prec: single, double, or extra
3024 * norm loop -- varying norm: near undeflow, near one, or
3025 * -- near overflow
3026 * order loop -- varying order type: rowmajor or colmajor
3027 * uplo loop -- varying uplo type: upper or lower
3028 * trans loop -- varying trans type: no trans, trans, and conj trans
3029 * diag loop -- varying diag type: non unit, and unit
3030 * numtest loop -- how many times the test is perform with
3031 * -- above set of attributes
3032 * lda loop -- varying lda: n, n+1, and 2n
3033 * incx loop -- varying incx: -2, -1, 1, 2
3034 */
3035 {
3036 /* function name */
3037 const char fname[] = "BLAS_dtrsv_x";
3038
3039 /* max number of debug lines to print */
3040 const int max_print = 3;
3041
3042 /* Variables in the "x_val" form are loop vars for corresponding
3043 variables */
3044 int i; /* iterate through the repeating tests */
3045 int j; /* multipurpose counters */
3046 int ix; /* use to index x */
3047 int lda_val, lda = 0; /* for testing different values for lda */
3048 int incx_val, incx; /* for testing different inc values */
3049 int incx_gen = 1; /* 1 if real, 2 if complex */
3050 int d_count; /* counter for debug */
3051 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
3052 int p_count; /* counter for the number of debug lines printed */
3053 int tot_tests; /* total number of tests to be done */
3054 int norm; /* input values of near underflow/one/overflow */
3055 double ratio_max; /* the current maximum ratio */
3056 double ratio_min; /* the current minimum ratio */
3057 double *ratios; /* a temporary variable for calculating ratio */
3058 double ratio = 0.0; /* the per-use test ratio from test() */
3059 int bad_ratios = 0; /* the number of ratios over the threshold */
3060 double eps_int; /* the internal epsilon expected--2^(-24) for float */
3061 double un_int; /* the internal underflow threshold */
3062 double alpha;
3063 double beta;
3064 double *T;
3065 double *x;
3066 double *x_gen;
3067 double *temp; /* use for calculating ratio */
3068
3069 /* x_gen and y_gen are used to store vectors generated by testgen.
3070 they eventually are copied back to x and y */
3071
3072
3073 /* the true r calculated by testgen(), in double-double */
3074 double *head_r_true, *tail_r_true;
3075 int alpha_val;
3076 int alpha_flag = 0, beta_flag = 0;
3077 int order_val;
3078 enum blas_order_type order_type = 0;
3079 int prec_val;
3080 enum blas_prec_type prec = 0;
3081 int uplo_val;
3082 enum blas_uplo_type uplo_type = 0;
3083 int trans_val;
3084 enum blas_trans_type trans_type = 0;
3085 int diag_val;
3086 enum blas_diag_type diag_type = 0;
3087 int row;
3088 int saved_seed; /* for saving the original seed */
3089 int count, old_count = -1; /* use for counting the number of testgen calls * 2 */
3090 FPU_FIX_DECL;
3091
3092 /* test for bad arguments */
3093 if (n < 0 || ntests < 0)
3094 BLAS_error(fname, 0, 0, NULL);
3095
3096 /* if there is nothing to test, return all zero */
3097 if (n == 0 || ntests == 0) {
3098 *min_ratio = 0.0;
3099 *num_bad_ratio = 0;
3100 *num_tests = 0;
3101 return 0.0;
3102 }
3103
3104 FPU_FIX_START;
3105
3106
3107
3108 /* get space for calculation */
3109 x = (double *) blas_malloc(n * 2 * sizeof(double));
3110 if (n * 2 > 0 && x == NULL) {
3111 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3112 }
3113 x_gen = (double *) blas_malloc(n * sizeof(double));
3114 if (n > 0 && x_gen == NULL) {
3115 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3116 }
3117 T = (double *) blas_malloc(4 * n * n * sizeof(double));
3118 if (4 * n * n > 0 && T == NULL) {
3119 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3120 }
3121 temp = (double *) blas_malloc(n * sizeof(double));
3122 if (n > 0 && temp == NULL) {
3123 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3124 }
3125 head_r_true = (double *) blas_malloc(n * sizeof(double));
3126 tail_r_true = (double *) blas_malloc(n * sizeof(double));
3127 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
3128 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3129 }
3130 ratios = (double *) blas_malloc(n * sizeof(double));
3131 if (n > 0 && ratios == NULL) {
3132 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3133 }
3134
3135 /* initialization */
3136 saved_seed = *seed;
3137 ratio_min = 1e308;
3138 ratio_max = 0.0;
3139 tot_tests = 0;
3140 p_count = 0;
3141 count = 0;
3142 find_max_ratio = 0;
3143 beta = 0.0;
3144 beta_flag = 1;
3145 if (debug == 3)
3146 find_max_ratio = 1;
3147
3148 /* The debug iteration:
3149 If debug=1, then will execute the iteration twice. First, compute the
3150 max ratio. Second, print info if ratio > (50% * ratio_max). */
3151 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
3152 bad_ratios = 0; /* set to zero */
3153
3154 if ((debug == 3) && (d_count == find_max_ratio))
3155 *seed = saved_seed; /* restore the original seed */
3156
3157 /* varying alpha */
3158 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
3159 alpha_flag = 0;
3160 switch (alpha_val) {
3161 case 0:
3162 alpha = 0.0;
3163 alpha_flag = 1;
3164 break;
3165 case 1:
3166 alpha = 1.0;
3167 alpha_flag = 1;
3168 break;
3169 }
3170
3171
3172 /* varying extra precs */
3173 for (prec_val = 0; prec_val <= 2; prec_val++) {
3174 switch (prec_val) {
3175 case 0:
3176 eps_int = power(2, -BITS_D);
3177 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
3178 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
3179 prec = blas_prec_double;
3180 break;
3181 case 1:
3182 eps_int = power(2, -BITS_D);
3183 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
3184 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
3185 prec = blas_prec_double;
3186 break;
3187 case 2:
3188 default:
3189 eps_int = power(2, -BITS_E);
3190 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
3191 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
3192 prec = blas_prec_extra;
3193 break;
3194 }
3195
3196 /* values near underflow, 1, or overflow */
3197 for (norm = -1; norm <= 1; norm++) {
3198
3199 /* row or col major */
3200 for (order_val = 0; order_val < 2; order_val++) {
3201 switch (order_val) {
3202 case 0:
3203 order_type = blas_rowmajor;
3204 break;
3205 case 1:
3206 order_type = blas_colmajor;
3207 break;
3208 }
3209
3210 /* upper or lower */
3211 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
3212 switch (uplo_val) {
3213 case 0:
3214 uplo_type = blas_upper;
3215 break;
3216 case 1:
3217 uplo_type = blas_lower;
3218 break;
3219 }
3220
3221 /* no trans, trans, or conj trans */
3222 for (trans_val = 0; trans_val < 3; trans_val++) {
3223 switch (trans_val) {
3224 case 0:
3225 trans_type = blas_no_trans;
3226 break;
3227 case 1:
3228 trans_type = blas_trans;
3229 break;
3230 case 2:
3231 trans_type = blas_conj_trans;
3232 break;
3233 }
3234
3235 /* non_unit_diag, unit_diag */
3236 for (diag_val = 0; diag_val < 2; diag_val++) {
3237 switch (diag_val) {
3238 case 0:
3239 diag_type = blas_non_unit_diag;
3240 break;
3241 case 1:
3242 diag_type = blas_unit_diag;
3243 break;
3244 }
3245
3246 /* number of tests */
3247 for (i = 0; i < ntests; i++) {
3248
3249 for (lda_val = 0; lda_val < 3; lda_val++) {
3250 switch (lda_val) {
3251 case 0:
3252 lda = n;
3253 break;
3254 case 1:
3255 lda = n + 1;
3256 break;
3257 case 2:
3258 lda = 2 * n;
3259 break;
3260 }
3261
3262 /* For the sake of speed, we throw out this case at random */
3263 if (xrand(seed) >= test_prob)
3264 continue;
3265
3266 for (row = 0; row < n; row++) {
3267 BLAS_dtrsv_testgen(norm, order_type, uplo_type,
3268 trans_type, diag_type, n,
3269 &alpha, alpha_flag, T, lda, x_gen,
3270 seed, head_r_true, tail_r_true,
3271 row, prec);
3272
3273 count++;
3274
3275 /* varying incx */
3276 for (incx_val = -2; incx_val <= 2; incx_val++) {
3277 if (incx_val == 0)
3278 continue;
3279
3280
3281 /* setting incx */
3282 incx = incx_val;
3283
3284
3285 /* set x starting index */
3286 ix = 0;
3287 if (incx < 0)
3288 ix = -(n - 1) * incx;
3289
3290 /* copy x_gen to x */
3291 for (j = 0; j < n * incx_gen; j += incx_gen) {
3292 x[ix] = x_gen[j];
3293
3294 ix += incx;
3295 }
3296
3297 /* call BLAS_dtrsv_x */
3298 FPU_FIX_STOP;
3299 BLAS_dtrsv_x(order_type, uplo_type, trans_type,
3300 diag_type, n, alpha, T, lda, x,
3301 incx_val, prec);
3302 FPU_FIX_START;
3303
3304 ix = 0;
3305 if (incx < 0)
3306 ix = -(n - 1) * incx;
3307
3308 /* computing the ratio */
3309 for (j = 0; j < n; j++) {
3310 if (j == row) {
3311 int minus_one = -1.0;
3312 /* copy row j of T to temp */
3313 dtr_copy_row(order_type, uplo_type, trans_type,
3314 n, T, lda, temp, j);
3315
3316 test_BLAS_ddot(n, blas_no_conj,
3317 minus_one, alpha,
3318 x_gen[j * incx_gen],
3319 x[ix],
3320 head_r_true[j * incx_gen],
3321 tail_r_true[j * incx_gen], x,
3322 incx_val, temp, 1, eps_int,
3323 un_int, &ratios[j]);
3324 } else {
3325 double eps_out = power(2, -BITS_D);
3326
3327 double tmp =
3328 fabs((x[ix] - head_r_true[j]) -
3329 tail_r_true[j]);
3330
3331 if (head_r_true[j] == 0.0)
3332 ratios[j] = 0.0;
3333 else
3334 ratios[j] =
3335 tmp / (head_r_true[j] * 2.0 * eps_out);
3336 }
3337
3338 /* take the max ratio */
3339 if (j == 0) {
3340 ratio = ratios[0];
3341 } else if (!(ratios[j] <= ratio)) {
3342 /* The !<= test causes NaN error to be detected.
3343 Note that (NaN > thresh) is always false. */
3344 ratio = ratios[j];
3345 }
3346 ix += incx;
3347 }
3348
3349 /* Increase the number of bad ratio, if the ratio
3350 is bigger than the threshold.
3351 The !<= below causes NaN error to be detected.
3352 Note that (NaN > thresh) is always false. */
3353 if (!(ratio <= thresh)) {
3354
3355 bad_ratios++;
3356
3357 if ((debug == 3) && /* print only when debug is on */
3358 (count != old_count) && /* print if old vector is different
3359 from the current one */
3360 (d_count == find_max_ratio) &&
3361 (p_count <= max_print) &&
3362 (ratio > 0.5 * ratio_max)) {
3363 p_count++;
3364 old_count = count;
3365
3366 printf
3367 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
3368 fname, n, ntests, thresh);
3369
3370 /* Print test info */
3371 switch (prec) {
3372 case blas_prec_single:
3373 printf("single ");
3374 break;
3375 case blas_prec_double:
3376 printf("double ");
3377 break;
3378 case blas_prec_indigenous:
3379 printf("indigenous ");
3380 break;
3381 case blas_prec_extra:
3382 printf("extra ");
3383 break;
3384 }
3385 switch (norm) {
3386 case -1:
3387 printf("near_underflow ");
3388 break;
3389 case 0:
3390 printf("near_one ");
3391 break;
3392 case 1:
3393 printf("near_overflow ");
3394 break;
3395 }
3396 switch (order_type) {
3397 case blas_rowmajor:
3398 printf("row_major ");
3399 break;
3400 case blas_colmajor:
3401 printf("col_major ");
3402 break;
3403 }
3404 switch (uplo_type) {
3405 case blas_upper:
3406 printf("upper ");
3407 break;
3408 case blas_lower:
3409 printf("lower ");
3410 break;
3411 }
3412 switch (trans_type) {
3413 case blas_no_trans:
3414 printf("no_trans ");
3415 break;
3416 case blas_trans:
3417 printf("trans ");
3418 break;
3419 case blas_conj_trans:
3420 printf("conj_trans ");
3421 break;
3422 }
3423 switch (diag_type) {
3424 case blas_non_unit_diag:
3425 printf("non_unit_diag ");
3426 break;
3427 case blas_unit_diag:
3428 printf("unit_diag ");
3429 break;
3430 }
3431
3432 printf("row=%d, lda=%d, incx=%d:\n", row, lda,
3433 incx);
3434
3435 ix = 0;
3436 if (incx < 0)
3437 ix = -(n - 1) * incx;
3438
3439 printf(" T=");
3440 for (j = 0; j < n; j++) {
3441 /* copy row j of T to temp */
3442 dtr_copy_row(order_type, uplo_type,
3443 trans_type, n, T, lda, temp, j);
3444
3445 if (j > 0)
3446 printf(" ");
3447 dprint_vector(temp, n, 1, NULL);
3448 }
3449
3450 ix = 0;
3451 if (incx < 0)
3452 ix = -(n - 1) * incx;
3453
3454 for (j = 0; j < n; j++) {
3455 printf(" ");
3456 printf("x[%d] = ", j * incx_gen);
3457 printf("%24.16e", x_gen[j * incx_gen]);
3458 printf("; ");
3459 printf("x_final[%d] = ", ix);
3460 printf("%24.16e", x[ix]);
3461 printf("\n");
3462 ix += incx;
3463 }
3464
3465 printf(" ");
3466 printf("alpha = ");
3467 printf("%24.16e", alpha);
3468 printf("; ");
3469 printf("\n");
3470 for (j = 0; j < n; j++) {
3471 if (j == row)
3472 printf(" =>");
3473 else
3474 printf(" ");
3475 printf("[%24.16e, %24.16e]",
3476 head_r_true[j * incx_gen],
3477 tail_r_true[j * incx_gen]);
3478 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
3479 }
3480 printf(" ratio=%.4e\n", ratio);
3481 }
3482 if (bad_ratios >= MAX_BAD_TESTS) {
3483 printf("\ntoo many failures, exiting....");
3484 printf("\nTesting and compilation");
3485 printf(" are incomplete\n\n");
3486 goto end;
3487 }
3488 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
3489 printf("\nFlagrant ratio %e, exiting...",
3490 ratio);
3491 printf("\nTesting and compilation");
3492 printf(" are incomplete\n\n");
3493 goto end;
3494 }
3495 }
3496
3497 if (d_count == 0) {
3498 if (ratio > ratio_max)
3499 ratio_max = ratio;
3500
3501 if (ratio != 0.0 && ratio < ratio_min)
3502 ratio_min = ratio;
3503
3504 tot_tests++;
3505 }
3506 } /* incx */
3507 } /* row */
3508 } /* lda */
3509 } /* numtests */
3510 } /* diag */
3511 } /* trans */
3512 } /* uplo */
3513 } /* order */
3514 } /* norm */
3515 } /* prec */
3516 } /* alpha */
3517 } /* debug */
3518
3519 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
3520 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
3521 fname, n, ntests, thresh);
3522 printf
3523 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
3524 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
3525 ratio_min, ratio_max);
3526 }
3527
3528 end:
3529 FPU_FIX_STOP;
3530
3531 blas_free(x);
3532 blas_free(x_gen);
3533 blas_free(temp);
3534 blas_free(T);
3535 blas_free(head_r_true);
3536 blas_free(tail_r_true);
3537 blas_free(ratios);
3538
3539 *min_ratio = ratio_min;
3540 *num_bad_ratio = bad_ratios;
3541 *num_tests = tot_tests;
3542 return ratio_max;
3543 } /* end of do_test_dtrsv_x */
3544
do_test_dtrsv_s_x(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)3545 double do_test_dtrsv_s_x(int n,
3546 int ntests,
3547 int *seed,
3548 double thresh,
3549 int debug,
3550 float test_prob,
3551 double *min_ratio,
3552 int *num_bad_ratio, int *num_tests)
3553
3554 /*
3555 * Purpose
3556 * =======
3557 *
3558 * Runs a series of tests on TRSV.
3559 *
3560 * Arguments
3561 * =========
3562 *
3563 * n (input) int
3564 * The size of vector being tested
3565 *
3566 * ntests (input) int
3567 * The number of tests to run for each set of attributes.
3568 *
3569 * seed (input/output) int
3570 * The seed for the random number generator used in testgen().
3571 *
3572 * thresh (input) double
3573 * When the ratio returned from test() exceeds the specified
3574 * threshold, the current size, r_true, r_comp, and ratio will be
3575 * printed. (Since ratio is supposed to be O(1), we can set thresh
3576 * to ~10.)
3577 *
3578 * debug (input) int
3579 * If debug=3, print summary
3580 * If debug=2, print summary only if the number of bad ratios > 0
3581 * If debug=1, print complete info if tests fail
3582 * If debug=0, return max ratio
3583 *
3584 * test_prob (input) float
3585 * The specified test will be performed only if the generated
3586 * random exceeds this threshold.
3587 *
3588 * min_ratio (output) double
3589 * The minimum ratio
3590 *
3591 * num_bad_ratio (output) int
3592 * The number of tests fail; they are above the threshold.
3593 *
3594 * num_tests (output) int
3595 * The number of tests is being performed.
3596 *
3597 * Return value
3598 * ============
3599 *
3600 * The maximum ratio if run successfully, otherwise return -1
3601 *
3602 * Code structure
3603 * ==============
3604 *
3605 * debug loop -- if debug is one, the first loop computes the max ratio
3606 * -- and the last(second) loop outputs debugging information,
3607 * -- if the test fail and its ratio > 0.5 * max ratio.
3608 * -- if debug is zero, the loop is executed once
3609 * alpha loop -- varying alpha: 0, 1, or random
3610 * prec loop -- varying internal prec: single, double, or extra
3611 * norm loop -- varying norm: near undeflow, near one, or
3612 * -- near overflow
3613 * order loop -- varying order type: rowmajor or colmajor
3614 * uplo loop -- varying uplo type: upper or lower
3615 * trans loop -- varying trans type: no trans, trans, and conj trans
3616 * diag loop -- varying diag type: non unit, and unit
3617 * numtest loop -- how many times the test is perform with
3618 * -- above set of attributes
3619 * lda loop -- varying lda: n, n+1, and 2n
3620 * incx loop -- varying incx: -2, -1, 1, 2
3621 */
3622 {
3623 /* function name */
3624 const char fname[] = "BLAS_dtrsv_s_x";
3625
3626 /* max number of debug lines to print */
3627 const int max_print = 3;
3628
3629 /* Variables in the "x_val" form are loop vars for corresponding
3630 variables */
3631 int i; /* iterate through the repeating tests */
3632 int j; /* multipurpose counters */
3633 int ix; /* use to index x */
3634 int lda_val, lda = 0; /* for testing different values for lda */
3635 int incx_val, incx; /* for testing different inc values */
3636 int incx_gen = 1; /* 1 if real, 2 if complex */
3637 int d_count; /* counter for debug */
3638 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
3639 int p_count; /* counter for the number of debug lines printed */
3640 int tot_tests; /* total number of tests to be done */
3641 int norm; /* input values of near underflow/one/overflow */
3642 double ratio_max; /* the current maximum ratio */
3643 double ratio_min; /* the current minimum ratio */
3644 double *ratios; /* a temporary variable for calculating ratio */
3645 double ratio = 0.0; /* the per-use test ratio from test() */
3646 int bad_ratios = 0; /* the number of ratios over the threshold */
3647 double eps_int; /* the internal epsilon expected--2^(-24) for float */
3648 double un_int; /* the internal underflow threshold */
3649 double alpha;
3650 double beta;
3651 float *T;
3652 double *x;
3653 double *x_gen;
3654 float *temp; /* use for calculating ratio */
3655
3656 /* x_gen and y_gen are used to store vectors generated by testgen.
3657 they eventually are copied back to x and y */
3658
3659
3660 /* the true r calculated by testgen(), in double-double */
3661 double *head_r_true, *tail_r_true;
3662 int alpha_val;
3663 int alpha_flag = 0, beta_flag = 0;
3664 int order_val;
3665 enum blas_order_type order_type = 0;
3666 int prec_val;
3667 enum blas_prec_type prec = 0;
3668 int uplo_val;
3669 enum blas_uplo_type uplo_type = 0;
3670 int trans_val;
3671 enum blas_trans_type trans_type = 0;
3672 int diag_val;
3673 enum blas_diag_type diag_type = 0;
3674 int row;
3675 int saved_seed; /* for saving the original seed */
3676 int count, old_count = -1; /* use for counting the number of testgen calls * 2 */
3677 FPU_FIX_DECL;
3678
3679 /* test for bad arguments */
3680 if (n < 0 || ntests < 0)
3681 BLAS_error(fname, 0, 0, NULL);
3682
3683 /* if there is nothing to test, return all zero */
3684 if (n == 0 || ntests == 0) {
3685 *min_ratio = 0.0;
3686 *num_bad_ratio = 0;
3687 *num_tests = 0;
3688 return 0.0;
3689 }
3690
3691 FPU_FIX_START;
3692
3693
3694
3695 /* get space for calculation */
3696 x = (double *) blas_malloc(n * 2 * sizeof(double));
3697 if (n * 2 > 0 && x == NULL) {
3698 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3699 }
3700 x_gen = (double *) blas_malloc(n * sizeof(double));
3701 if (n > 0 && x_gen == NULL) {
3702 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3703 }
3704 T = (float *) blas_malloc(4 * n * n * sizeof(float));
3705 if (4 * n * n > 0 && T == NULL) {
3706 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3707 }
3708 temp = (float *) blas_malloc(n * sizeof(float));
3709 if (n > 0 && temp == NULL) {
3710 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3711 }
3712 head_r_true = (double *) blas_malloc(n * sizeof(double));
3713 tail_r_true = (double *) blas_malloc(n * sizeof(double));
3714 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
3715 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3716 }
3717 ratios = (double *) blas_malloc(n * sizeof(double));
3718 if (n > 0 && ratios == NULL) {
3719 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3720 }
3721
3722 /* initialization */
3723 saved_seed = *seed;
3724 ratio_min = 1e308;
3725 ratio_max = 0.0;
3726 tot_tests = 0;
3727 p_count = 0;
3728 count = 0;
3729 find_max_ratio = 0;
3730 beta = 0.0;
3731 beta_flag = 1;
3732 if (debug == 3)
3733 find_max_ratio = 1;
3734
3735 /* The debug iteration:
3736 If debug=1, then will execute the iteration twice. First, compute the
3737 max ratio. Second, print info if ratio > (50% * ratio_max). */
3738 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
3739 bad_ratios = 0; /* set to zero */
3740
3741 if ((debug == 3) && (d_count == find_max_ratio))
3742 *seed = saved_seed; /* restore the original seed */
3743
3744 /* varying alpha */
3745 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
3746 alpha_flag = 0;
3747 switch (alpha_val) {
3748 case 0:
3749 alpha = 0.0;
3750 alpha_flag = 1;
3751 break;
3752 case 1:
3753 alpha = 1.0;
3754 alpha_flag = 1;
3755 break;
3756 }
3757
3758
3759 /* varying extra precs */
3760 for (prec_val = 0; prec_val <= 2; prec_val++) {
3761 switch (prec_val) {
3762 case 0:
3763 eps_int = power(2, -BITS_D);
3764 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
3765 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
3766 prec = blas_prec_double;
3767 break;
3768 case 1:
3769 eps_int = power(2, -BITS_D);
3770 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
3771 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
3772 prec = blas_prec_double;
3773 break;
3774 case 2:
3775 default:
3776 eps_int = power(2, -BITS_E);
3777 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
3778 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
3779 prec = blas_prec_extra;
3780 break;
3781 }
3782
3783 /* values near underflow, 1, or overflow */
3784 for (norm = -1; norm <= 1; norm++) {
3785
3786 /* row or col major */
3787 for (order_val = 0; order_val < 2; order_val++) {
3788 switch (order_val) {
3789 case 0:
3790 order_type = blas_rowmajor;
3791 break;
3792 case 1:
3793 order_type = blas_colmajor;
3794 break;
3795 }
3796
3797 /* upper or lower */
3798 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
3799 switch (uplo_val) {
3800 case 0:
3801 uplo_type = blas_upper;
3802 break;
3803 case 1:
3804 uplo_type = blas_lower;
3805 break;
3806 }
3807
3808 /* no trans, trans, or conj trans */
3809 for (trans_val = 0; trans_val < 3; trans_val++) {
3810 switch (trans_val) {
3811 case 0:
3812 trans_type = blas_no_trans;
3813 break;
3814 case 1:
3815 trans_type = blas_trans;
3816 break;
3817 case 2:
3818 trans_type = blas_conj_trans;
3819 break;
3820 }
3821
3822 /* non_unit_diag, unit_diag */
3823 for (diag_val = 0; diag_val < 2; diag_val++) {
3824 switch (diag_val) {
3825 case 0:
3826 diag_type = blas_non_unit_diag;
3827 break;
3828 case 1:
3829 diag_type = blas_unit_diag;
3830 break;
3831 }
3832
3833 /* number of tests */
3834 for (i = 0; i < ntests; i++) {
3835
3836 for (lda_val = 0; lda_val < 3; lda_val++) {
3837 switch (lda_val) {
3838 case 0:
3839 lda = n;
3840 break;
3841 case 1:
3842 lda = n + 1;
3843 break;
3844 case 2:
3845 lda = 2 * n;
3846 break;
3847 }
3848
3849 /* For the sake of speed, we throw out this case at random */
3850 if (xrand(seed) >= test_prob)
3851 continue;
3852
3853 for (row = 0; row < n; row++) {
3854 BLAS_dtrsv_s_testgen(norm, order_type, uplo_type,
3855 trans_type, diag_type, n,
3856 &alpha, alpha_flag, T, lda,
3857 x_gen, seed, head_r_true,
3858 tail_r_true, row, prec);
3859
3860 count++;
3861
3862 /* varying incx */
3863 for (incx_val = -2; incx_val <= 2; incx_val++) {
3864 if (incx_val == 0)
3865 continue;
3866
3867
3868 /* setting incx */
3869 incx = incx_val;
3870
3871
3872 /* set x starting index */
3873 ix = 0;
3874 if (incx < 0)
3875 ix = -(n - 1) * incx;
3876
3877 /* copy x_gen to x */
3878 for (j = 0; j < n * incx_gen; j += incx_gen) {
3879 x[ix] = x_gen[j];
3880
3881 ix += incx;
3882 }
3883
3884 /* call BLAS_dtrsv_s_x */
3885 FPU_FIX_STOP;
3886 BLAS_dtrsv_s_x(order_type, uplo_type, trans_type,
3887 diag_type, n, alpha, T, lda, x,
3888 incx_val, prec);
3889 FPU_FIX_START;
3890
3891 ix = 0;
3892 if (incx < 0)
3893 ix = -(n - 1) * incx;
3894
3895 /* computing the ratio */
3896 for (j = 0; j < n; j++) {
3897 if (j == row) {
3898 int minus_one = -1.0;
3899 /* copy row j of T to temp */
3900 str_copy_row(order_type, uplo_type, trans_type,
3901 n, T, lda, temp, j);
3902
3903 test_BLAS_ddot_d_s(n, blas_no_conj,
3904 minus_one, alpha,
3905 x_gen[j * incx_gen],
3906 x[ix],
3907 head_r_true[j * incx_gen],
3908 tail_r_true[j * incx_gen], x,
3909 incx_val, temp, 1, eps_int,
3910 un_int, &ratios[j]);
3911 } else {
3912 double eps_out = power(2, -BITS_D);
3913
3914 double tmp =
3915 fabs((x[ix] - head_r_true[j]) -
3916 tail_r_true[j]);
3917
3918 if (head_r_true[j] == 0.0)
3919 ratios[j] = 0.0;
3920 else
3921 ratios[j] =
3922 tmp / (head_r_true[j] * 2.0 * eps_out);
3923 }
3924
3925 /* take the max ratio */
3926 if (j == 0) {
3927 ratio = ratios[0];
3928 } else if (!(ratios[j] <= ratio)) {
3929 /* The !<= test causes NaN error to be detected.
3930 Note that (NaN > thresh) is always false. */
3931 ratio = ratios[j];
3932 }
3933 ix += incx;
3934 }
3935
3936 /* Increase the number of bad ratio, if the ratio
3937 is bigger than the threshold.
3938 The !<= below causes NaN error to be detected.
3939 Note that (NaN > thresh) is always false. */
3940 if (!(ratio <= thresh)) {
3941
3942 bad_ratios++;
3943
3944 if ((debug == 3) && /* print only when debug is on */
3945 (count != old_count) && /* print if old vector is different
3946 from the current one */
3947 (d_count == find_max_ratio) &&
3948 (p_count <= max_print) &&
3949 (ratio > 0.5 * ratio_max)) {
3950 p_count++;
3951 old_count = count;
3952
3953 printf
3954 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
3955 fname, n, ntests, thresh);
3956
3957 /* Print test info */
3958 switch (prec) {
3959 case blas_prec_single:
3960 printf("single ");
3961 break;
3962 case blas_prec_double:
3963 printf("double ");
3964 break;
3965 case blas_prec_indigenous:
3966 printf("indigenous ");
3967 break;
3968 case blas_prec_extra:
3969 printf("extra ");
3970 break;
3971 }
3972 switch (norm) {
3973 case -1:
3974 printf("near_underflow ");
3975 break;
3976 case 0:
3977 printf("near_one ");
3978 break;
3979 case 1:
3980 printf("near_overflow ");
3981 break;
3982 }
3983 switch (order_type) {
3984 case blas_rowmajor:
3985 printf("row_major ");
3986 break;
3987 case blas_colmajor:
3988 printf("col_major ");
3989 break;
3990 }
3991 switch (uplo_type) {
3992 case blas_upper:
3993 printf("upper ");
3994 break;
3995 case blas_lower:
3996 printf("lower ");
3997 break;
3998 }
3999 switch (trans_type) {
4000 case blas_no_trans:
4001 printf("no_trans ");
4002 break;
4003 case blas_trans:
4004 printf("trans ");
4005 break;
4006 case blas_conj_trans:
4007 printf("conj_trans ");
4008 break;
4009 }
4010 switch (diag_type) {
4011 case blas_non_unit_diag:
4012 printf("non_unit_diag ");
4013 break;
4014 case blas_unit_diag:
4015 printf("unit_diag ");
4016 break;
4017 }
4018
4019 printf("row=%d, lda=%d, incx=%d:\n", row, lda,
4020 incx);
4021
4022 ix = 0;
4023 if (incx < 0)
4024 ix = -(n - 1) * incx;
4025
4026 printf(" T=");
4027 for (j = 0; j < n; j++) {
4028 /* copy row j of T to temp */
4029 str_copy_row(order_type, uplo_type,
4030 trans_type, n, T, lda, temp, j);
4031
4032 if (j > 0)
4033 printf(" ");
4034 sprint_vector(temp, n, 1, NULL);
4035 }
4036
4037 ix = 0;
4038 if (incx < 0)
4039 ix = -(n - 1) * incx;
4040
4041 for (j = 0; j < n; j++) {
4042 printf(" ");
4043 printf("x[%d] = ", j * incx_gen);
4044 printf("%24.16e", x_gen[j * incx_gen]);
4045 printf("; ");
4046 printf("x_final[%d] = ", ix);
4047 printf("%24.16e", x[ix]);
4048 printf("\n");
4049 ix += incx;
4050 }
4051
4052 printf(" ");
4053 printf("alpha = ");
4054 printf("%24.16e", alpha);
4055 printf("; ");
4056 printf("\n");
4057 for (j = 0; j < n; j++) {
4058 if (j == row)
4059 printf(" =>");
4060 else
4061 printf(" ");
4062 printf("[%24.16e, %24.16e]",
4063 head_r_true[j * incx_gen],
4064 tail_r_true[j * incx_gen]);
4065 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
4066 }
4067 printf(" ratio=%.4e\n", ratio);
4068 }
4069 if (bad_ratios >= MAX_BAD_TESTS) {
4070 printf("\ntoo many failures, exiting....");
4071 printf("\nTesting and compilation");
4072 printf(" are incomplete\n\n");
4073 goto end;
4074 }
4075 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
4076 printf("\nFlagrant ratio %e, exiting...",
4077 ratio);
4078 printf("\nTesting and compilation");
4079 printf(" are incomplete\n\n");
4080 goto end;
4081 }
4082 }
4083
4084 if (d_count == 0) {
4085 if (ratio > ratio_max)
4086 ratio_max = ratio;
4087
4088 if (ratio != 0.0 && ratio < ratio_min)
4089 ratio_min = ratio;
4090
4091 tot_tests++;
4092 }
4093 } /* incx */
4094 } /* row */
4095 } /* lda */
4096 } /* numtests */
4097 } /* diag */
4098 } /* trans */
4099 } /* uplo */
4100 } /* order */
4101 } /* norm */
4102 } /* prec */
4103 } /* alpha */
4104 } /* debug */
4105
4106 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
4107 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
4108 fname, n, ntests, thresh);
4109 printf
4110 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
4111 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
4112 ratio_min, ratio_max);
4113 }
4114
4115 end:
4116 FPU_FIX_STOP;
4117
4118 blas_free(x);
4119 blas_free(x_gen);
4120 blas_free(temp);
4121 blas_free(T);
4122 blas_free(head_r_true);
4123 blas_free(tail_r_true);
4124 blas_free(ratios);
4125
4126 *min_ratio = ratio_min;
4127 *num_bad_ratio = bad_ratios;
4128 *num_tests = tot_tests;
4129 return ratio_max;
4130 } /* end of do_test_dtrsv_s_x */
4131
do_test_ctrsv_x(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)4132 double do_test_ctrsv_x(int n,
4133 int ntests,
4134 int *seed,
4135 double thresh,
4136 int debug,
4137 float test_prob,
4138 double *min_ratio, int *num_bad_ratio, int *num_tests)
4139
4140 /*
4141 * Purpose
4142 * =======
4143 *
4144 * Runs a series of tests on TRSV.
4145 *
4146 * Arguments
4147 * =========
4148 *
4149 * n (input) int
4150 * The size of vector being tested
4151 *
4152 * ntests (input) int
4153 * The number of tests to run for each set of attributes.
4154 *
4155 * seed (input/output) int
4156 * The seed for the random number generator used in testgen().
4157 *
4158 * thresh (input) double
4159 * When the ratio returned from test() exceeds the specified
4160 * threshold, the current size, r_true, r_comp, and ratio will be
4161 * printed. (Since ratio is supposed to be O(1), we can set thresh
4162 * to ~10.)
4163 *
4164 * debug (input) int
4165 * If debug=3, print summary
4166 * If debug=2, print summary only if the number of bad ratios > 0
4167 * If debug=1, print complete info if tests fail
4168 * If debug=0, return max ratio
4169 *
4170 * test_prob (input) float
4171 * The specified test will be performed only if the generated
4172 * random exceeds this threshold.
4173 *
4174 * min_ratio (output) double
4175 * The minimum ratio
4176 *
4177 * num_bad_ratio (output) int
4178 * The number of tests fail; they are above the threshold.
4179 *
4180 * num_tests (output) int
4181 * The number of tests is being performed.
4182 *
4183 * Return value
4184 * ============
4185 *
4186 * The maximum ratio if run successfully, otherwise return -1
4187 *
4188 * Code structure
4189 * ==============
4190 *
4191 * debug loop -- if debug is one, the first loop computes the max ratio
4192 * -- and the last(second) loop outputs debugging information,
4193 * -- if the test fail and its ratio > 0.5 * max ratio.
4194 * -- if debug is zero, the loop is executed once
4195 * alpha loop -- varying alpha: 0, 1, or random
4196 * prec loop -- varying internal prec: single, double, or extra
4197 * norm loop -- varying norm: near undeflow, near one, or
4198 * -- near overflow
4199 * order loop -- varying order type: rowmajor or colmajor
4200 * uplo loop -- varying uplo type: upper or lower
4201 * trans loop -- varying trans type: no trans, trans, and conj trans
4202 * diag loop -- varying diag type: non unit, and unit
4203 * numtest loop -- how many times the test is perform with
4204 * -- above set of attributes
4205 * lda loop -- varying lda: n, n+1, and 2n
4206 * incx loop -- varying incx: -2, -1, 1, 2
4207 */
4208 {
4209 /* function name */
4210 const char fname[] = "BLAS_ctrsv_x";
4211
4212 /* max number of debug lines to print */
4213 const int max_print = 3;
4214
4215 /* Variables in the "x_val" form are loop vars for corresponding
4216 variables */
4217 int i; /* iterate through the repeating tests */
4218 int j; /* multipurpose counters */
4219 int ix; /* use to index x */
4220 int lda_val, lda = 0; /* for testing different values for lda */
4221 int incx_val, incx; /* for testing different inc values */
4222 int incx_gen = 1; /* 1 if real, 2 if complex */
4223 int d_count; /* counter for debug */
4224 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
4225 int p_count; /* counter for the number of debug lines printed */
4226 int tot_tests; /* total number of tests to be done */
4227 int norm; /* input values of near underflow/one/overflow */
4228 double ratio_max; /* the current maximum ratio */
4229 double ratio_min; /* the current minimum ratio */
4230 double *ratios; /* a temporary variable for calculating ratio */
4231 double ratio = 0.0; /* the per-use test ratio from test() */
4232 int bad_ratios = 0; /* the number of ratios over the threshold */
4233 double eps_int; /* the internal epsilon expected--2^(-24) for float */
4234 double un_int; /* the internal underflow threshold */
4235 float alpha[2];
4236 float beta[2];
4237 float *T;
4238 float *x;
4239 float *x_gen;
4240 float *temp; /* use for calculating ratio */
4241
4242 /* x_gen and y_gen are used to store vectors generated by testgen.
4243 they eventually are copied back to x and y */
4244
4245
4246 /* the true r calculated by testgen(), in double-double */
4247 double *head_r_true, *tail_r_true;
4248
4249 int alpha_val;
4250 int alpha_flag = 0, beta_flag = 0;
4251 int order_val;
4252 enum blas_order_type order_type = 0;
4253 int prec_val;
4254 enum blas_prec_type prec = 0;
4255 int uplo_val;
4256 enum blas_uplo_type uplo_type = 0;
4257 int trans_val;
4258 enum blas_trans_type trans_type = 0;
4259 int diag_val;
4260 enum blas_diag_type diag_type = 0;
4261 int row;
4262 int saved_seed; /* for saving the original seed */
4263 int count, old_count = -1; /* use for counting the number of testgen calls * 2 */
4264 FPU_FIX_DECL;
4265
4266 /* test for bad arguments */
4267 if (n < 0 || ntests < 0)
4268 BLAS_error(fname, 0, 0, NULL);
4269
4270 /* if there is nothing to test, return all zero */
4271 if (n == 0 || ntests == 0) {
4272 *min_ratio = 0.0;
4273 *num_bad_ratio = 0;
4274 *num_tests = 0;
4275 return 0.0;
4276 }
4277
4278 FPU_FIX_START;
4279
4280 incx_gen *= 2;
4281
4282 /* get space for calculation */
4283 x = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
4284 if (n * 2 > 0 && x == NULL) {
4285 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4286 }
4287 x_gen = (float *) blas_malloc(n * sizeof(float) * 2);
4288 if (n > 0 && x_gen == NULL) {
4289 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4290 }
4291 T = (float *) blas_malloc(4 * n * n * sizeof(float) * 2);
4292 if (4 * n * n > 0 && T == NULL) {
4293 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4294 }
4295 temp = (float *) blas_malloc(n * sizeof(float) * 2);
4296 if (n > 0 && temp == NULL) {
4297 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4298 }
4299 head_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
4300 tail_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
4301 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
4302 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4303 }
4304 ratios = (double *) blas_malloc(n * sizeof(double));
4305 if (n > 0 && ratios == NULL) {
4306 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4307 }
4308
4309 /* initialization */
4310 saved_seed = *seed;
4311 ratio_min = 1e308;
4312 ratio_max = 0.0;
4313 tot_tests = 0;
4314 p_count = 0;
4315 count = 0;
4316 find_max_ratio = 0;
4317 beta[0] = beta[1] = 0.0;
4318 beta_flag = 1;
4319 if (debug == 3)
4320 find_max_ratio = 1;
4321
4322 /* The debug iteration:
4323 If debug=1, then will execute the iteration twice. First, compute the
4324 max ratio. Second, print info if ratio > (50% * ratio_max). */
4325 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
4326 bad_ratios = 0; /* set to zero */
4327
4328 if ((debug == 3) && (d_count == find_max_ratio))
4329 *seed = saved_seed; /* restore the original seed */
4330
4331 /* varying alpha */
4332 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
4333 alpha_flag = 0;
4334 switch (alpha_val) {
4335 case 0:
4336 alpha[0] = alpha[1] = 0.0;
4337 alpha_flag = 1;
4338 break;
4339 case 1:
4340 alpha[0] = 1.0;
4341 alpha[1] = 0.0;
4342 alpha_flag = 1;
4343 break;
4344 }
4345
4346
4347 /* varying extra precs */
4348 for (prec_val = 0; prec_val <= 2; prec_val++) {
4349 switch (prec_val) {
4350 case 0:
4351 eps_int = power(2, -BITS_S);
4352 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
4353 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
4354 prec = blas_prec_single;
4355 break;
4356 case 1:
4357 eps_int = power(2, -BITS_D);
4358 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
4359 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
4360 prec = blas_prec_double;
4361 break;
4362 case 2:
4363 default:
4364 eps_int = power(2, -BITS_E);
4365 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
4366 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
4367 prec = blas_prec_extra;
4368 break;
4369 }
4370
4371 /* values near underflow, 1, or overflow */
4372 for (norm = -1; norm <= 1; norm++) {
4373
4374 /* row or col major */
4375 for (order_val = 0; order_val < 2; order_val++) {
4376 switch (order_val) {
4377 case 0:
4378 order_type = blas_rowmajor;
4379 break;
4380 case 1:
4381 order_type = blas_colmajor;
4382 break;
4383 }
4384
4385 /* upper or lower */
4386 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
4387 switch (uplo_val) {
4388 case 0:
4389 uplo_type = blas_upper;
4390 break;
4391 case 1:
4392 uplo_type = blas_lower;
4393 break;
4394 }
4395
4396 /* no trans, trans, or conj trans */
4397 for (trans_val = 0; trans_val < 3; trans_val++) {
4398 switch (trans_val) {
4399 case 0:
4400 trans_type = blas_no_trans;
4401 break;
4402 case 1:
4403 trans_type = blas_trans;
4404 break;
4405 case 2:
4406 trans_type = blas_conj_trans;
4407 break;
4408 }
4409
4410 /* non_unit_diag, unit_diag */
4411 for (diag_val = 0; diag_val < 2; diag_val++) {
4412 switch (diag_val) {
4413 case 0:
4414 diag_type = blas_non_unit_diag;
4415 break;
4416 case 1:
4417 diag_type = blas_unit_diag;
4418 break;
4419 }
4420
4421 /* number of tests */
4422 for (i = 0; i < ntests; i++) {
4423
4424 for (lda_val = 0; lda_val < 3; lda_val++) {
4425 switch (lda_val) {
4426 case 0:
4427 lda = n;
4428 break;
4429 case 1:
4430 lda = n + 1;
4431 break;
4432 case 2:
4433 lda = 2 * n;
4434 break;
4435 }
4436
4437 /* For the sake of speed, we throw out this case at random */
4438 if (xrand(seed) >= test_prob)
4439 continue;
4440
4441 for (row = 0; row < n; row++) {
4442 BLAS_ctrsv_testgen(norm, order_type, uplo_type,
4443 trans_type, diag_type, n,
4444 &alpha, alpha_flag, T, lda, x_gen,
4445 seed, head_r_true, tail_r_true,
4446 row, prec);
4447
4448 count++;
4449
4450 /* varying incx */
4451 for (incx_val = -2; incx_val <= 2; incx_val++) {
4452 if (incx_val == 0)
4453 continue;
4454
4455
4456 /* setting incx */
4457 incx = incx_val;
4458 incx *= 2;
4459
4460 /* set x starting index */
4461 ix = 0;
4462 if (incx < 0)
4463 ix = -(n - 1) * incx;
4464
4465 /* copy x_gen to x */
4466 for (j = 0; j < n * incx_gen; j += incx_gen) {
4467 x[ix] = x_gen[j];
4468 x[ix + 1] = x_gen[j + 1];
4469
4470 ix += incx;
4471 }
4472
4473 /* call BLAS_ctrsv_x */
4474 FPU_FIX_STOP;
4475 BLAS_ctrsv_x(order_type, uplo_type, trans_type,
4476 diag_type, n, alpha, T, lda, x,
4477 incx_val, prec);
4478 FPU_FIX_START;
4479
4480 ix = 0;
4481 if (incx < 0)
4482 ix = -(n - 1) * incx;
4483
4484 /* computing the ratio */
4485 for (j = 0; j < n; j++) {
4486 if (j == row) {
4487 float minus_one[2] = { -1.0, 0.0 };
4488 /* copy row j of T to temp */
4489 ctr_copy_row(order_type, uplo_type, trans_type,
4490 n, T, lda, temp, j);
4491
4492 test_BLAS_cdot(n, blas_no_conj,
4493 minus_one, alpha,
4494 &x_gen[j * incx_gen],
4495 &x[ix],
4496 &head_r_true[j * incx_gen],
4497 &tail_r_true[j * incx_gen], x,
4498 incx_val, temp, 1, eps_int,
4499 un_int, &ratios[j]);
4500 } else {
4501 double eps_out = power(2, -BITS_S);
4502
4503 double tmp =
4504 sqrt(((x[ix] - head_r_true[j * incx_gen]) -
4505 tail_r_true[j * incx_gen]) * ((x[ix] -
4506 head_r_true
4507 [j *
4508 incx_gen])
4509 -
4510 tail_r_true
4511 [j *
4512 incx_gen])
4513 +
4514 ((x[ix + 1] -
4515 head_r_true[j * incx_gen + 1]) -
4516 tail_r_true[j * incx_gen +
4517 1]) * ((x[ix + 1] -
4518 head_r_true[j *
4519 incx_gen
4520 + 1]) -
4521 tail_r_true[j *
4522 incx_gen
4523 + 1]));
4524
4525 if (head_r_true[j * incx_gen] == 0.0
4526 && head_r_true[j * incx_gen + 1] == 0.0)
4527 ratios[j] = 0.0;
4528 else
4529 ratios[j] =
4530 tmp /
4531 (sqrt
4532 (head_r_true[j * incx_gen] *
4533 head_r_true[j * incx_gen] +
4534 head_r_true[j * incx_gen +
4535 1] * head_r_true[j *
4536 incx_gen +
4537 1]) * 8.0 *
4538 sqrt(2.0) * eps_out);
4539 }
4540
4541 /* take the max ratio */
4542 if (j == 0) {
4543 ratio = ratios[0];
4544 } else if (!(ratios[j] <= ratio)) {
4545 /* The !<= test causes NaN error to be detected.
4546 Note that (NaN > thresh) is always false. */
4547 ratio = ratios[j];
4548 }
4549 ix += incx;
4550 }
4551
4552 /* Increase the number of bad ratio, if the ratio
4553 is bigger than the threshold.
4554 The !<= below causes NaN error to be detected.
4555 Note that (NaN > thresh) is always false. */
4556 if (!(ratio <= thresh)) {
4557
4558 bad_ratios++;
4559
4560 if ((debug == 3) && /* print only when debug is on */
4561 (count != old_count) && /* print if old vector is different
4562 from the current one */
4563 (d_count == find_max_ratio) &&
4564 (p_count <= max_print) &&
4565 (ratio > 0.5 * ratio_max)) {
4566 p_count++;
4567 old_count = count;
4568
4569 printf
4570 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
4571 fname, n, ntests, thresh);
4572
4573 /* Print test info */
4574 switch (prec) {
4575 case blas_prec_single:
4576 printf("single ");
4577 break;
4578 case blas_prec_double:
4579 printf("double ");
4580 break;
4581 case blas_prec_indigenous:
4582 printf("indigenous ");
4583 break;
4584 case blas_prec_extra:
4585 printf("extra ");
4586 break;
4587 }
4588 switch (norm) {
4589 case -1:
4590 printf("near_underflow ");
4591 break;
4592 case 0:
4593 printf("near_one ");
4594 break;
4595 case 1:
4596 printf("near_overflow ");
4597 break;
4598 }
4599 switch (order_type) {
4600 case blas_rowmajor:
4601 printf("row_major ");
4602 break;
4603 case blas_colmajor:
4604 printf("col_major ");
4605 break;
4606 }
4607 switch (uplo_type) {
4608 case blas_upper:
4609 printf("upper ");
4610 break;
4611 case blas_lower:
4612 printf("lower ");
4613 break;
4614 }
4615 switch (trans_type) {
4616 case blas_no_trans:
4617 printf("no_trans ");
4618 break;
4619 case blas_trans:
4620 printf("trans ");
4621 break;
4622 case blas_conj_trans:
4623 printf("conj_trans ");
4624 break;
4625 }
4626 switch (diag_type) {
4627 case blas_non_unit_diag:
4628 printf("non_unit_diag ");
4629 break;
4630 case blas_unit_diag:
4631 printf("unit_diag ");
4632 break;
4633 }
4634
4635 printf("row=%d, lda=%d, incx=%d:\n", row, lda,
4636 incx);
4637
4638 ix = 0;
4639 if (incx < 0)
4640 ix = -(n - 1) * incx;
4641
4642 printf(" T=");
4643 for (j = 0; j < n; j++) {
4644 /* copy row j of T to temp */
4645 ctr_copy_row(order_type, uplo_type,
4646 trans_type, n, T, lda, temp, j);
4647
4648 if (j > 0)
4649 printf(" ");
4650 cprint_vector(temp, n, 1, NULL);
4651 }
4652
4653 ix = 0;
4654 if (incx < 0)
4655 ix = -(n - 1) * incx;
4656
4657 for (j = 0; j < n; j++) {
4658 printf(" ");
4659 printf("x[%d] = ", j * incx_gen);
4660 printf("(%16.8e, %16.8e)",
4661 x_gen[j * incx_gen],
4662 x_gen[j * incx_gen + 1]);
4663 printf("; ");
4664 printf("x_final[%d] = ", ix);
4665 printf("(%16.8e, %16.8e)", x[ix], x[ix + 1]);
4666 printf("\n");
4667 ix += incx;
4668 }
4669
4670 printf(" ");
4671 printf("alpha = ");
4672 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
4673 printf("; ");
4674 printf("\n");
4675 for (j = 0; j < n; j++) {
4676 if (j == row)
4677 printf(" =>");
4678 else
4679 printf(" ");
4680 printf
4681 ("([%24.16e %24.16e], [%24.16e %24.16e])",
4682 head_r_true[j * incx_gen],
4683 tail_r_true[j * incx_gen],
4684 head_r_true[j * incx_gen + 1],
4685 tail_r_true[j * incx_gen + 1]);
4686 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
4687 }
4688 printf(" ratio=%.4e\n", ratio);
4689 }
4690 if (bad_ratios >= MAX_BAD_TESTS) {
4691 printf("\ntoo many failures, exiting....");
4692 printf("\nTesting and compilation");
4693 printf(" are incomplete\n\n");
4694 goto end;
4695 }
4696 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
4697 printf("\nFlagrant ratio %e, exiting...",
4698 ratio);
4699 printf("\nTesting and compilation");
4700 printf(" are incomplete\n\n");
4701 goto end;
4702 }
4703 }
4704
4705 if (d_count == 0) {
4706 if (ratio > ratio_max)
4707 ratio_max = ratio;
4708
4709 if (ratio != 0.0 && ratio < ratio_min)
4710 ratio_min = ratio;
4711
4712 tot_tests++;
4713 }
4714 } /* incx */
4715 } /* row */
4716 } /* lda */
4717 } /* numtests */
4718 } /* diag */
4719 } /* trans */
4720 } /* uplo */
4721 } /* order */
4722 } /* norm */
4723 } /* prec */
4724 } /* alpha */
4725 } /* debug */
4726
4727 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
4728 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
4729 fname, n, ntests, thresh);
4730 printf
4731 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
4732 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
4733 ratio_min, ratio_max);
4734 }
4735
4736 end:
4737 FPU_FIX_STOP;
4738
4739 blas_free(x);
4740 blas_free(x_gen);
4741 blas_free(temp);
4742 blas_free(T);
4743 blas_free(head_r_true);
4744 blas_free(tail_r_true);
4745 blas_free(ratios);
4746
4747 *min_ratio = ratio_min;
4748 *num_bad_ratio = bad_ratios;
4749 *num_tests = tot_tests;
4750 return ratio_max;
4751 } /* end of do_test_ctrsv_x */
4752
do_test_ztrsv_x(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)4753 double do_test_ztrsv_x(int n,
4754 int ntests,
4755 int *seed,
4756 double thresh,
4757 int debug,
4758 float test_prob,
4759 double *min_ratio, int *num_bad_ratio, int *num_tests)
4760
4761 /*
4762 * Purpose
4763 * =======
4764 *
4765 * Runs a series of tests on TRSV.
4766 *
4767 * Arguments
4768 * =========
4769 *
4770 * n (input) int
4771 * The size of vector being tested
4772 *
4773 * ntests (input) int
4774 * The number of tests to run for each set of attributes.
4775 *
4776 * seed (input/output) int
4777 * The seed for the random number generator used in testgen().
4778 *
4779 * thresh (input) double
4780 * When the ratio returned from test() exceeds the specified
4781 * threshold, the current size, r_true, r_comp, and ratio will be
4782 * printed. (Since ratio is supposed to be O(1), we can set thresh
4783 * to ~10.)
4784 *
4785 * debug (input) int
4786 * If debug=3, print summary
4787 * If debug=2, print summary only if the number of bad ratios > 0
4788 * If debug=1, print complete info if tests fail
4789 * If debug=0, return max ratio
4790 *
4791 * test_prob (input) float
4792 * The specified test will be performed only if the generated
4793 * random exceeds this threshold.
4794 *
4795 * min_ratio (output) double
4796 * The minimum ratio
4797 *
4798 * num_bad_ratio (output) int
4799 * The number of tests fail; they are above the threshold.
4800 *
4801 * num_tests (output) int
4802 * The number of tests is being performed.
4803 *
4804 * Return value
4805 * ============
4806 *
4807 * The maximum ratio if run successfully, otherwise return -1
4808 *
4809 * Code structure
4810 * ==============
4811 *
4812 * debug loop -- if debug is one, the first loop computes the max ratio
4813 * -- and the last(second) loop outputs debugging information,
4814 * -- if the test fail and its ratio > 0.5 * max ratio.
4815 * -- if debug is zero, the loop is executed once
4816 * alpha loop -- varying alpha: 0, 1, or random
4817 * prec loop -- varying internal prec: single, double, or extra
4818 * norm loop -- varying norm: near undeflow, near one, or
4819 * -- near overflow
4820 * order loop -- varying order type: rowmajor or colmajor
4821 * uplo loop -- varying uplo type: upper or lower
4822 * trans loop -- varying trans type: no trans, trans, and conj trans
4823 * diag loop -- varying diag type: non unit, and unit
4824 * numtest loop -- how many times the test is perform with
4825 * -- above set of attributes
4826 * lda loop -- varying lda: n, n+1, and 2n
4827 * incx loop -- varying incx: -2, -1, 1, 2
4828 */
4829 {
4830 /* function name */
4831 const char fname[] = "BLAS_ztrsv_x";
4832
4833 /* max number of debug lines to print */
4834 const int max_print = 3;
4835
4836 /* Variables in the "x_val" form are loop vars for corresponding
4837 variables */
4838 int i; /* iterate through the repeating tests */
4839 int j; /* multipurpose counters */
4840 int ix; /* use to index x */
4841 int lda_val, lda = 0; /* for testing different values for lda */
4842 int incx_val, incx; /* for testing different inc values */
4843 int incx_gen = 1; /* 1 if real, 2 if complex */
4844 int d_count; /* counter for debug */
4845 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
4846 int p_count; /* counter for the number of debug lines printed */
4847 int tot_tests; /* total number of tests to be done */
4848 int norm; /* input values of near underflow/one/overflow */
4849 double ratio_max; /* the current maximum ratio */
4850 double ratio_min; /* the current minimum ratio */
4851 double *ratios; /* a temporary variable for calculating ratio */
4852 double ratio = 0.0; /* the per-use test ratio from test() */
4853 int bad_ratios = 0; /* the number of ratios over the threshold */
4854 double eps_int; /* the internal epsilon expected--2^(-24) for float */
4855 double un_int; /* the internal underflow threshold */
4856 double alpha[2];
4857 double beta[2];
4858 double *T;
4859 double *x;
4860 double *x_gen;
4861 double *temp; /* use for calculating ratio */
4862
4863 /* x_gen and y_gen are used to store vectors generated by testgen.
4864 they eventually are copied back to x and y */
4865
4866
4867 /* the true r calculated by testgen(), in double-double */
4868 double *head_r_true, *tail_r_true;
4869
4870 int alpha_val;
4871 int alpha_flag = 0, beta_flag = 0;
4872 int order_val;
4873 enum blas_order_type order_type = 0;
4874 int prec_val;
4875 enum blas_prec_type prec = 0;
4876 int uplo_val;
4877 enum blas_uplo_type uplo_type = 0;
4878 int trans_val;
4879 enum blas_trans_type trans_type = 0;
4880 int diag_val;
4881 enum blas_diag_type diag_type = 0;
4882 int row;
4883 int saved_seed; /* for saving the original seed */
4884 int count, old_count = -1; /* use for counting the number of testgen calls * 2 */
4885 FPU_FIX_DECL;
4886
4887 /* test for bad arguments */
4888 if (n < 0 || ntests < 0)
4889 BLAS_error(fname, 0, 0, NULL);
4890
4891 /* if there is nothing to test, return all zero */
4892 if (n == 0 || ntests == 0) {
4893 *min_ratio = 0.0;
4894 *num_bad_ratio = 0;
4895 *num_tests = 0;
4896 return 0.0;
4897 }
4898
4899 FPU_FIX_START;
4900
4901 incx_gen *= 2;
4902
4903 /* get space for calculation */
4904 x = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
4905 if (n * 2 > 0 && x == NULL) {
4906 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4907 }
4908 x_gen = (double *) blas_malloc(n * sizeof(double) * 2);
4909 if (n > 0 && x_gen == NULL) {
4910 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4911 }
4912 T = (double *) blas_malloc(4 * n * n * sizeof(double) * 2);
4913 if (4 * n * n > 0 && T == NULL) {
4914 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4915 }
4916 temp = (double *) blas_malloc(n * sizeof(double) * 2);
4917 if (n > 0 && temp == NULL) {
4918 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4919 }
4920 head_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
4921 tail_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
4922 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
4923 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4924 }
4925 ratios = (double *) blas_malloc(n * sizeof(double));
4926 if (n > 0 && ratios == NULL) {
4927 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4928 }
4929
4930 /* initialization */
4931 saved_seed = *seed;
4932 ratio_min = 1e308;
4933 ratio_max = 0.0;
4934 tot_tests = 0;
4935 p_count = 0;
4936 count = 0;
4937 find_max_ratio = 0;
4938 beta[0] = beta[1] = 0.0;
4939 beta_flag = 1;
4940 if (debug == 3)
4941 find_max_ratio = 1;
4942
4943 /* The debug iteration:
4944 If debug=1, then will execute the iteration twice. First, compute the
4945 max ratio. Second, print info if ratio > (50% * ratio_max). */
4946 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
4947 bad_ratios = 0; /* set to zero */
4948
4949 if ((debug == 3) && (d_count == find_max_ratio))
4950 *seed = saved_seed; /* restore the original seed */
4951
4952 /* varying alpha */
4953 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
4954 alpha_flag = 0;
4955 switch (alpha_val) {
4956 case 0:
4957 alpha[0] = alpha[1] = 0.0;
4958 alpha_flag = 1;
4959 break;
4960 case 1:
4961 alpha[0] = 1.0;
4962 alpha[1] = 0.0;
4963 alpha_flag = 1;
4964 break;
4965 }
4966
4967
4968 /* varying extra precs */
4969 for (prec_val = 0; prec_val <= 2; prec_val++) {
4970 switch (prec_val) {
4971 case 0:
4972 eps_int = power(2, -BITS_D);
4973 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
4974 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
4975 prec = blas_prec_double;
4976 break;
4977 case 1:
4978 eps_int = power(2, -BITS_D);
4979 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
4980 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
4981 prec = blas_prec_double;
4982 break;
4983 case 2:
4984 default:
4985 eps_int = power(2, -BITS_E);
4986 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
4987 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
4988 prec = blas_prec_extra;
4989 break;
4990 }
4991
4992 /* values near underflow, 1, or overflow */
4993 for (norm = -1; norm <= 1; norm++) {
4994
4995 /* row or col major */
4996 for (order_val = 0; order_val < 2; order_val++) {
4997 switch (order_val) {
4998 case 0:
4999 order_type = blas_rowmajor;
5000 break;
5001 case 1:
5002 order_type = blas_colmajor;
5003 break;
5004 }
5005
5006 /* upper or lower */
5007 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
5008 switch (uplo_val) {
5009 case 0:
5010 uplo_type = blas_upper;
5011 break;
5012 case 1:
5013 uplo_type = blas_lower;
5014 break;
5015 }
5016
5017 /* no trans, trans, or conj trans */
5018 for (trans_val = 0; trans_val < 3; trans_val++) {
5019 switch (trans_val) {
5020 case 0:
5021 trans_type = blas_no_trans;
5022 break;
5023 case 1:
5024 trans_type = blas_trans;
5025 break;
5026 case 2:
5027 trans_type = blas_conj_trans;
5028 break;
5029 }
5030
5031 /* non_unit_diag, unit_diag */
5032 for (diag_val = 0; diag_val < 2; diag_val++) {
5033 switch (diag_val) {
5034 case 0:
5035 diag_type = blas_non_unit_diag;
5036 break;
5037 case 1:
5038 diag_type = blas_unit_diag;
5039 break;
5040 }
5041
5042 /* number of tests */
5043 for (i = 0; i < ntests; i++) {
5044
5045 for (lda_val = 0; lda_val < 3; lda_val++) {
5046 switch (lda_val) {
5047 case 0:
5048 lda = n;
5049 break;
5050 case 1:
5051 lda = n + 1;
5052 break;
5053 case 2:
5054 lda = 2 * n;
5055 break;
5056 }
5057
5058 /* For the sake of speed, we throw out this case at random */
5059 if (xrand(seed) >= test_prob)
5060 continue;
5061
5062 for (row = 0; row < n; row++) {
5063 BLAS_ztrsv_testgen(norm, order_type, uplo_type,
5064 trans_type, diag_type, n,
5065 &alpha, alpha_flag, T, lda, x_gen,
5066 seed, head_r_true, tail_r_true,
5067 row, prec);
5068
5069 count++;
5070
5071 /* varying incx */
5072 for (incx_val = -2; incx_val <= 2; incx_val++) {
5073 if (incx_val == 0)
5074 continue;
5075
5076
5077 /* setting incx */
5078 incx = incx_val;
5079 incx *= 2;
5080
5081 /* set x starting index */
5082 ix = 0;
5083 if (incx < 0)
5084 ix = -(n - 1) * incx;
5085
5086 /* copy x_gen to x */
5087 for (j = 0; j < n * incx_gen; j += incx_gen) {
5088 x[ix] = x_gen[j];
5089 x[ix + 1] = x_gen[j + 1];
5090
5091 ix += incx;
5092 }
5093
5094 /* call BLAS_ztrsv_x */
5095 FPU_FIX_STOP;
5096 BLAS_ztrsv_x(order_type, uplo_type, trans_type,
5097 diag_type, n, alpha, T, lda, x,
5098 incx_val, prec);
5099 FPU_FIX_START;
5100
5101 ix = 0;
5102 if (incx < 0)
5103 ix = -(n - 1) * incx;
5104
5105 /* computing the ratio */
5106 for (j = 0; j < n; j++) {
5107 if (j == row) {
5108 double minus_one[2] = { -1.0, 0.0 };
5109 /* copy row j of T to temp */
5110 ztr_copy_row(order_type, uplo_type, trans_type,
5111 n, T, lda, temp, j);
5112
5113 test_BLAS_zdot(n, blas_no_conj,
5114 minus_one, alpha,
5115 &x_gen[j * incx_gen],
5116 &x[ix],
5117 &head_r_true[j * incx_gen],
5118 &tail_r_true[j * incx_gen], x,
5119 incx_val, temp, 1, eps_int,
5120 un_int, &ratios[j]);
5121 } else {
5122 double eps_out = power(2, -BITS_D);
5123
5124 double tmp =
5125 sqrt(((x[ix] - head_r_true[j * incx_gen]) -
5126 tail_r_true[j * incx_gen]) * ((x[ix] -
5127 head_r_true
5128 [j *
5129 incx_gen])
5130 -
5131 tail_r_true
5132 [j *
5133 incx_gen])
5134 +
5135 ((x[ix + 1] -
5136 head_r_true[j * incx_gen + 1]) -
5137 tail_r_true[j * incx_gen +
5138 1]) * ((x[ix + 1] -
5139 head_r_true[j *
5140 incx_gen
5141 + 1]) -
5142 tail_r_true[j *
5143 incx_gen
5144 + 1]));
5145
5146 if (head_r_true[j * incx_gen] == 0.0
5147 && head_r_true[j * incx_gen + 1] == 0.0)
5148 ratios[j] = 0.0;
5149 else
5150 ratios[j] =
5151 tmp /
5152 (sqrt
5153 (head_r_true[j * incx_gen] *
5154 head_r_true[j * incx_gen] +
5155 head_r_true[j * incx_gen +
5156 1] * head_r_true[j *
5157 incx_gen +
5158 1]) * 8.0 *
5159 sqrt(2.0) * eps_out);
5160 }
5161
5162 /* take the max ratio */
5163 if (j == 0) {
5164 ratio = ratios[0];
5165 } else if (!(ratios[j] <= ratio)) {
5166 /* The !<= test causes NaN error to be detected.
5167 Note that (NaN > thresh) is always false. */
5168 ratio = ratios[j];
5169 }
5170 ix += incx;
5171 }
5172
5173 /* Increase the number of bad ratio, if the ratio
5174 is bigger than the threshold.
5175 The !<= below causes NaN error to be detected.
5176 Note that (NaN > thresh) is always false. */
5177 if (!(ratio <= thresh)) {
5178
5179 bad_ratios++;
5180
5181 if ((debug == 3) && /* print only when debug is on */
5182 (count != old_count) && /* print if old vector is different
5183 from the current one */
5184 (d_count == find_max_ratio) &&
5185 (p_count <= max_print) &&
5186 (ratio > 0.5 * ratio_max)) {
5187 p_count++;
5188 old_count = count;
5189
5190 printf
5191 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
5192 fname, n, ntests, thresh);
5193
5194 /* Print test info */
5195 switch (prec) {
5196 case blas_prec_single:
5197 printf("single ");
5198 break;
5199 case blas_prec_double:
5200 printf("double ");
5201 break;
5202 case blas_prec_indigenous:
5203 printf("indigenous ");
5204 break;
5205 case blas_prec_extra:
5206 printf("extra ");
5207 break;
5208 }
5209 switch (norm) {
5210 case -1:
5211 printf("near_underflow ");
5212 break;
5213 case 0:
5214 printf("near_one ");
5215 break;
5216 case 1:
5217 printf("near_overflow ");
5218 break;
5219 }
5220 switch (order_type) {
5221 case blas_rowmajor:
5222 printf("row_major ");
5223 break;
5224 case blas_colmajor:
5225 printf("col_major ");
5226 break;
5227 }
5228 switch (uplo_type) {
5229 case blas_upper:
5230 printf("upper ");
5231 break;
5232 case blas_lower:
5233 printf("lower ");
5234 break;
5235 }
5236 switch (trans_type) {
5237 case blas_no_trans:
5238 printf("no_trans ");
5239 break;
5240 case blas_trans:
5241 printf("trans ");
5242 break;
5243 case blas_conj_trans:
5244 printf("conj_trans ");
5245 break;
5246 }
5247 switch (diag_type) {
5248 case blas_non_unit_diag:
5249 printf("non_unit_diag ");
5250 break;
5251 case blas_unit_diag:
5252 printf("unit_diag ");
5253 break;
5254 }
5255
5256 printf("row=%d, lda=%d, incx=%d:\n", row, lda,
5257 incx);
5258
5259 ix = 0;
5260 if (incx < 0)
5261 ix = -(n - 1) * incx;
5262
5263 printf(" T=");
5264 for (j = 0; j < n; j++) {
5265 /* copy row j of T to temp */
5266 ztr_copy_row(order_type, uplo_type,
5267 trans_type, n, T, lda, temp, j);
5268
5269 if (j > 0)
5270 printf(" ");
5271 zprint_vector(temp, n, 1, NULL);
5272 }
5273
5274 ix = 0;
5275 if (incx < 0)
5276 ix = -(n - 1) * incx;
5277
5278 for (j = 0; j < n; j++) {
5279 printf(" ");
5280 printf("x[%d] = ", j * incx_gen);
5281 printf("(%24.16e, %24.16e)",
5282 x_gen[j * incx_gen],
5283 x_gen[j * incx_gen + 1]);
5284 printf("; ");
5285 printf("x_final[%d] = ", ix);
5286 printf("(%24.16e, %24.16e)", x[ix],
5287 x[ix + 1]);
5288 printf("\n");
5289 ix += incx;
5290 }
5291
5292 printf(" ");
5293 printf("alpha = ");
5294 printf("(%24.16e, %24.16e)", alpha[0],
5295 alpha[1]);
5296 printf("; ");
5297 printf("\n");
5298 for (j = 0; j < n; j++) {
5299 if (j == row)
5300 printf(" =>");
5301 else
5302 printf(" ");
5303 printf
5304 ("([%24.16e %24.16e], [%24.16e %24.16e])",
5305 head_r_true[j * incx_gen],
5306 tail_r_true[j * incx_gen],
5307 head_r_true[j * incx_gen + 1],
5308 tail_r_true[j * incx_gen + 1]);
5309 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
5310 }
5311 printf(" ratio=%.4e\n", ratio);
5312 }
5313 if (bad_ratios >= MAX_BAD_TESTS) {
5314 printf("\ntoo many failures, exiting....");
5315 printf("\nTesting and compilation");
5316 printf(" are incomplete\n\n");
5317 goto end;
5318 }
5319 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
5320 printf("\nFlagrant ratio %e, exiting...",
5321 ratio);
5322 printf("\nTesting and compilation");
5323 printf(" are incomplete\n\n");
5324 goto end;
5325 }
5326 }
5327
5328 if (d_count == 0) {
5329 if (ratio > ratio_max)
5330 ratio_max = ratio;
5331
5332 if (ratio != 0.0 && ratio < ratio_min)
5333 ratio_min = ratio;
5334
5335 tot_tests++;
5336 }
5337 } /* incx */
5338 } /* row */
5339 } /* lda */
5340 } /* numtests */
5341 } /* diag */
5342 } /* trans */
5343 } /* uplo */
5344 } /* order */
5345 } /* norm */
5346 } /* prec */
5347 } /* alpha */
5348 } /* debug */
5349
5350 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
5351 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
5352 fname, n, ntests, thresh);
5353 printf
5354 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
5355 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
5356 ratio_min, ratio_max);
5357 }
5358
5359 end:
5360 FPU_FIX_STOP;
5361
5362 blas_free(x);
5363 blas_free(x_gen);
5364 blas_free(temp);
5365 blas_free(T);
5366 blas_free(head_r_true);
5367 blas_free(tail_r_true);
5368 blas_free(ratios);
5369
5370 *min_ratio = ratio_min;
5371 *num_bad_ratio = bad_ratios;
5372 *num_tests = tot_tests;
5373 return ratio_max;
5374 } /* end of do_test_ztrsv_x */
5375
do_test_ztrsv_c_x(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)5376 double do_test_ztrsv_c_x(int n,
5377 int ntests,
5378 int *seed,
5379 double thresh,
5380 int debug,
5381 float test_prob,
5382 double *min_ratio,
5383 int *num_bad_ratio, int *num_tests)
5384
5385 /*
5386 * Purpose
5387 * =======
5388 *
5389 * Runs a series of tests on TRSV.
5390 *
5391 * Arguments
5392 * =========
5393 *
5394 * n (input) int
5395 * The size of vector being tested
5396 *
5397 * ntests (input) int
5398 * The number of tests to run for each set of attributes.
5399 *
5400 * seed (input/output) int
5401 * The seed for the random number generator used in testgen().
5402 *
5403 * thresh (input) double
5404 * When the ratio returned from test() exceeds the specified
5405 * threshold, the current size, r_true, r_comp, and ratio will be
5406 * printed. (Since ratio is supposed to be O(1), we can set thresh
5407 * to ~10.)
5408 *
5409 * debug (input) int
5410 * If debug=3, print summary
5411 * If debug=2, print summary only if the number of bad ratios > 0
5412 * If debug=1, print complete info if tests fail
5413 * If debug=0, return max ratio
5414 *
5415 * test_prob (input) float
5416 * The specified test will be performed only if the generated
5417 * random exceeds this threshold.
5418 *
5419 * min_ratio (output) double
5420 * The minimum ratio
5421 *
5422 * num_bad_ratio (output) int
5423 * The number of tests fail; they are above the threshold.
5424 *
5425 * num_tests (output) int
5426 * The number of tests is being performed.
5427 *
5428 * Return value
5429 * ============
5430 *
5431 * The maximum ratio if run successfully, otherwise return -1
5432 *
5433 * Code structure
5434 * ==============
5435 *
5436 * debug loop -- if debug is one, the first loop computes the max ratio
5437 * -- and the last(second) loop outputs debugging information,
5438 * -- if the test fail and its ratio > 0.5 * max ratio.
5439 * -- if debug is zero, the loop is executed once
5440 * alpha loop -- varying alpha: 0, 1, or random
5441 * prec loop -- varying internal prec: single, double, or extra
5442 * norm loop -- varying norm: near undeflow, near one, or
5443 * -- near overflow
5444 * order loop -- varying order type: rowmajor or colmajor
5445 * uplo loop -- varying uplo type: upper or lower
5446 * trans loop -- varying trans type: no trans, trans, and conj trans
5447 * diag loop -- varying diag type: non unit, and unit
5448 * numtest loop -- how many times the test is perform with
5449 * -- above set of attributes
5450 * lda loop -- varying lda: n, n+1, and 2n
5451 * incx loop -- varying incx: -2, -1, 1, 2
5452 */
5453 {
5454 /* function name */
5455 const char fname[] = "BLAS_ztrsv_c_x";
5456
5457 /* max number of debug lines to print */
5458 const int max_print = 3;
5459
5460 /* Variables in the "x_val" form are loop vars for corresponding
5461 variables */
5462 int i; /* iterate through the repeating tests */
5463 int j; /* multipurpose counters */
5464 int ix; /* use to index x */
5465 int lda_val, lda = 0; /* for testing different values for lda */
5466 int incx_val, incx; /* for testing different inc values */
5467 int incx_gen = 1; /* 1 if real, 2 if complex */
5468 int d_count; /* counter for debug */
5469 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
5470 int p_count; /* counter for the number of debug lines printed */
5471 int tot_tests; /* total number of tests to be done */
5472 int norm; /* input values of near underflow/one/overflow */
5473 double ratio_max; /* the current maximum ratio */
5474 double ratio_min; /* the current minimum ratio */
5475 double *ratios; /* a temporary variable for calculating ratio */
5476 double ratio = 0.0; /* the per-use test ratio from test() */
5477 int bad_ratios = 0; /* the number of ratios over the threshold */
5478 double eps_int; /* the internal epsilon expected--2^(-24) for float */
5479 double un_int; /* the internal underflow threshold */
5480 double alpha[2];
5481 double beta[2];
5482 float *T;
5483 double *x;
5484 double *x_gen;
5485 float *temp; /* use for calculating ratio */
5486
5487 /* x_gen and y_gen are used to store vectors generated by testgen.
5488 they eventually are copied back to x and y */
5489
5490
5491 /* the true r calculated by testgen(), in double-double */
5492 double *head_r_true, *tail_r_true;
5493
5494 int alpha_val;
5495 int alpha_flag = 0, beta_flag = 0;
5496 int order_val;
5497 enum blas_order_type order_type = 0;
5498 int prec_val;
5499 enum blas_prec_type prec = 0;
5500 int uplo_val;
5501 enum blas_uplo_type uplo_type = 0;
5502 int trans_val;
5503 enum blas_trans_type trans_type = 0;
5504 int diag_val;
5505 enum blas_diag_type diag_type = 0;
5506 int row;
5507 int saved_seed; /* for saving the original seed */
5508 int count, old_count = -1; /* use for counting the number of testgen calls * 2 */
5509 FPU_FIX_DECL;
5510
5511 /* test for bad arguments */
5512 if (n < 0 || ntests < 0)
5513 BLAS_error(fname, 0, 0, NULL);
5514
5515 /* if there is nothing to test, return all zero */
5516 if (n == 0 || ntests == 0) {
5517 *min_ratio = 0.0;
5518 *num_bad_ratio = 0;
5519 *num_tests = 0;
5520 return 0.0;
5521 }
5522
5523 FPU_FIX_START;
5524
5525 incx_gen *= 2;
5526
5527 /* get space for calculation */
5528 x = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
5529 if (n * 2 > 0 && x == NULL) {
5530 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5531 }
5532 x_gen = (double *) blas_malloc(n * sizeof(double) * 2);
5533 if (n > 0 && x_gen == NULL) {
5534 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5535 }
5536 T = (float *) blas_malloc(4 * n * n * sizeof(float) * 2);
5537 if (4 * n * n > 0 && T == NULL) {
5538 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5539 }
5540 temp = (float *) blas_malloc(n * sizeof(float) * 2);
5541 if (n > 0 && temp == NULL) {
5542 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5543 }
5544 head_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
5545 tail_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
5546 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
5547 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5548 }
5549 ratios = (double *) blas_malloc(n * sizeof(double));
5550 if (n > 0 && ratios == NULL) {
5551 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5552 }
5553
5554 /* initialization */
5555 saved_seed = *seed;
5556 ratio_min = 1e308;
5557 ratio_max = 0.0;
5558 tot_tests = 0;
5559 p_count = 0;
5560 count = 0;
5561 find_max_ratio = 0;
5562 beta[0] = beta[1] = 0.0;
5563 beta_flag = 1;
5564 if (debug == 3)
5565 find_max_ratio = 1;
5566
5567 /* The debug iteration:
5568 If debug=1, then will execute the iteration twice. First, compute the
5569 max ratio. Second, print info if ratio > (50% * ratio_max). */
5570 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
5571 bad_ratios = 0; /* set to zero */
5572
5573 if ((debug == 3) && (d_count == find_max_ratio))
5574 *seed = saved_seed; /* restore the original seed */
5575
5576 /* varying alpha */
5577 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
5578 alpha_flag = 0;
5579 switch (alpha_val) {
5580 case 0:
5581 alpha[0] = alpha[1] = 0.0;
5582 alpha_flag = 1;
5583 break;
5584 case 1:
5585 alpha[0] = 1.0;
5586 alpha[1] = 0.0;
5587 alpha_flag = 1;
5588 break;
5589 }
5590
5591
5592 /* varying extra precs */
5593 for (prec_val = 0; prec_val <= 2; prec_val++) {
5594 switch (prec_val) {
5595 case 0:
5596 eps_int = power(2, -BITS_D);
5597 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
5598 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
5599 prec = blas_prec_double;
5600 break;
5601 case 1:
5602 eps_int = power(2, -BITS_D);
5603 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
5604 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
5605 prec = blas_prec_double;
5606 break;
5607 case 2:
5608 default:
5609 eps_int = power(2, -BITS_E);
5610 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
5611 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
5612 prec = blas_prec_extra;
5613 break;
5614 }
5615
5616 /* values near underflow, 1, or overflow */
5617 for (norm = -1; norm <= 1; norm++) {
5618
5619 /* row or col major */
5620 for (order_val = 0; order_val < 2; order_val++) {
5621 switch (order_val) {
5622 case 0:
5623 order_type = blas_rowmajor;
5624 break;
5625 case 1:
5626 order_type = blas_colmajor;
5627 break;
5628 }
5629
5630 /* upper or lower */
5631 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
5632 switch (uplo_val) {
5633 case 0:
5634 uplo_type = blas_upper;
5635 break;
5636 case 1:
5637 uplo_type = blas_lower;
5638 break;
5639 }
5640
5641 /* no trans, trans, or conj trans */
5642 for (trans_val = 0; trans_val < 3; trans_val++) {
5643 switch (trans_val) {
5644 case 0:
5645 trans_type = blas_no_trans;
5646 break;
5647 case 1:
5648 trans_type = blas_trans;
5649 break;
5650 case 2:
5651 trans_type = blas_conj_trans;
5652 break;
5653 }
5654
5655 /* non_unit_diag, unit_diag */
5656 for (diag_val = 0; diag_val < 2; diag_val++) {
5657 switch (diag_val) {
5658 case 0:
5659 diag_type = blas_non_unit_diag;
5660 break;
5661 case 1:
5662 diag_type = blas_unit_diag;
5663 break;
5664 }
5665
5666 /* number of tests */
5667 for (i = 0; i < ntests; i++) {
5668
5669 for (lda_val = 0; lda_val < 3; lda_val++) {
5670 switch (lda_val) {
5671 case 0:
5672 lda = n;
5673 break;
5674 case 1:
5675 lda = n + 1;
5676 break;
5677 case 2:
5678 lda = 2 * n;
5679 break;
5680 }
5681
5682 /* For the sake of speed, we throw out this case at random */
5683 if (xrand(seed) >= test_prob)
5684 continue;
5685
5686 for (row = 0; row < n; row++) {
5687 BLAS_ztrsv_c_testgen(norm, order_type, uplo_type,
5688 trans_type, diag_type, n,
5689 &alpha, alpha_flag, T, lda,
5690 x_gen, seed, head_r_true,
5691 tail_r_true, row, prec);
5692
5693 count++;
5694
5695 /* varying incx */
5696 for (incx_val = -2; incx_val <= 2; incx_val++) {
5697 if (incx_val == 0)
5698 continue;
5699
5700
5701 /* setting incx */
5702 incx = incx_val;
5703 incx *= 2;
5704
5705 /* set x starting index */
5706 ix = 0;
5707 if (incx < 0)
5708 ix = -(n - 1) * incx;
5709
5710 /* copy x_gen to x */
5711 for (j = 0; j < n * incx_gen; j += incx_gen) {
5712 x[ix] = x_gen[j];
5713 x[ix + 1] = x_gen[j + 1];
5714
5715 ix += incx;
5716 }
5717
5718 /* call BLAS_ztrsv_c_x */
5719 FPU_FIX_STOP;
5720 BLAS_ztrsv_c_x(order_type, uplo_type, trans_type,
5721 diag_type, n, alpha, T, lda, x,
5722 incx_val, prec);
5723 FPU_FIX_START;
5724
5725 ix = 0;
5726 if (incx < 0)
5727 ix = -(n - 1) * incx;
5728
5729 /* computing the ratio */
5730 for (j = 0; j < n; j++) {
5731 if (j == row) {
5732 double minus_one[2] = { -1.0, 0.0 };
5733 /* copy row j of T to temp */
5734 ctr_copy_row(order_type, uplo_type, trans_type,
5735 n, T, lda, temp, j);
5736
5737 test_BLAS_zdot_z_c(n, blas_no_conj,
5738 minus_one, alpha,
5739 &x_gen[j * incx_gen],
5740 &x[ix],
5741 &head_r_true[j * incx_gen],
5742 &tail_r_true[j * incx_gen],
5743 x, incx_val, temp, 1,
5744 eps_int, un_int, &ratios[j]);
5745 } else {
5746 double eps_out = power(2, -BITS_D);
5747
5748 double tmp =
5749 sqrt(((x[ix] - head_r_true[j * incx_gen]) -
5750 tail_r_true[j * incx_gen]) * ((x[ix] -
5751 head_r_true
5752 [j *
5753 incx_gen])
5754 -
5755 tail_r_true
5756 [j *
5757 incx_gen])
5758 +
5759 ((x[ix + 1] -
5760 head_r_true[j * incx_gen + 1]) -
5761 tail_r_true[j * incx_gen +
5762 1]) * ((x[ix + 1] -
5763 head_r_true[j *
5764 incx_gen
5765 + 1]) -
5766 tail_r_true[j *
5767 incx_gen
5768 + 1]));
5769
5770 if (head_r_true[j * incx_gen] == 0.0
5771 && head_r_true[j * incx_gen + 1] == 0.0)
5772 ratios[j] = 0.0;
5773 else
5774 ratios[j] =
5775 tmp /
5776 (sqrt
5777 (head_r_true[j * incx_gen] *
5778 head_r_true[j * incx_gen] +
5779 head_r_true[j * incx_gen +
5780 1] * head_r_true[j *
5781 incx_gen +
5782 1]) * 8.0 *
5783 sqrt(2.0) * eps_out);
5784 }
5785
5786 /* take the max ratio */
5787 if (j == 0) {
5788 ratio = ratios[0];
5789 } else if (!(ratios[j] <= ratio)) {
5790 /* The !<= test causes NaN error to be detected.
5791 Note that (NaN > thresh) is always false. */
5792 ratio = ratios[j];
5793 }
5794 ix += incx;
5795 }
5796
5797 /* Increase the number of bad ratio, if the ratio
5798 is bigger than the threshold.
5799 The !<= below causes NaN error to be detected.
5800 Note that (NaN > thresh) is always false. */
5801 if (!(ratio <= thresh)) {
5802
5803 bad_ratios++;
5804
5805 if ((debug == 3) && /* print only when debug is on */
5806 (count != old_count) && /* print if old vector is different
5807 from the current one */
5808 (d_count == find_max_ratio) &&
5809 (p_count <= max_print) &&
5810 (ratio > 0.5 * ratio_max)) {
5811 p_count++;
5812 old_count = count;
5813
5814 printf
5815 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
5816 fname, n, ntests, thresh);
5817
5818 /* Print test info */
5819 switch (prec) {
5820 case blas_prec_single:
5821 printf("single ");
5822 break;
5823 case blas_prec_double:
5824 printf("double ");
5825 break;
5826 case blas_prec_indigenous:
5827 printf("indigenous ");
5828 break;
5829 case blas_prec_extra:
5830 printf("extra ");
5831 break;
5832 }
5833 switch (norm) {
5834 case -1:
5835 printf("near_underflow ");
5836 break;
5837 case 0:
5838 printf("near_one ");
5839 break;
5840 case 1:
5841 printf("near_overflow ");
5842 break;
5843 }
5844 switch (order_type) {
5845 case blas_rowmajor:
5846 printf("row_major ");
5847 break;
5848 case blas_colmajor:
5849 printf("col_major ");
5850 break;
5851 }
5852 switch (uplo_type) {
5853 case blas_upper:
5854 printf("upper ");
5855 break;
5856 case blas_lower:
5857 printf("lower ");
5858 break;
5859 }
5860 switch (trans_type) {
5861 case blas_no_trans:
5862 printf("no_trans ");
5863 break;
5864 case blas_trans:
5865 printf("trans ");
5866 break;
5867 case blas_conj_trans:
5868 printf("conj_trans ");
5869 break;
5870 }
5871 switch (diag_type) {
5872 case blas_non_unit_diag:
5873 printf("non_unit_diag ");
5874 break;
5875 case blas_unit_diag:
5876 printf("unit_diag ");
5877 break;
5878 }
5879
5880 printf("row=%d, lda=%d, incx=%d:\n", row, lda,
5881 incx);
5882
5883 ix = 0;
5884 if (incx < 0)
5885 ix = -(n - 1) * incx;
5886
5887 printf(" T=");
5888 for (j = 0; j < n; j++) {
5889 /* copy row j of T to temp */
5890 ctr_copy_row(order_type, uplo_type,
5891 trans_type, n, T, lda, temp, j);
5892
5893 if (j > 0)
5894 printf(" ");
5895 cprint_vector(temp, n, 1, NULL);
5896 }
5897
5898 ix = 0;
5899 if (incx < 0)
5900 ix = -(n - 1) * incx;
5901
5902 for (j = 0; j < n; j++) {
5903 printf(" ");
5904 printf("x[%d] = ", j * incx_gen);
5905 printf("(%24.16e, %24.16e)",
5906 x_gen[j * incx_gen],
5907 x_gen[j * incx_gen + 1]);
5908 printf("; ");
5909 printf("x_final[%d] = ", ix);
5910 printf("(%24.16e, %24.16e)", x[ix],
5911 x[ix + 1]);
5912 printf("\n");
5913 ix += incx;
5914 }
5915
5916 printf(" ");
5917 printf("alpha = ");
5918 printf("(%24.16e, %24.16e)", alpha[0],
5919 alpha[1]);
5920 printf("; ");
5921 printf("\n");
5922 for (j = 0; j < n; j++) {
5923 if (j == row)
5924 printf(" =>");
5925 else
5926 printf(" ");
5927 printf
5928 ("([%24.16e %24.16e], [%24.16e %24.16e])",
5929 head_r_true[j * incx_gen],
5930 tail_r_true[j * incx_gen],
5931 head_r_true[j * incx_gen + 1],
5932 tail_r_true[j * incx_gen + 1]);
5933 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
5934 }
5935 printf(" ratio=%.4e\n", ratio);
5936 }
5937 if (bad_ratios >= MAX_BAD_TESTS) {
5938 printf("\ntoo many failures, exiting....");
5939 printf("\nTesting and compilation");
5940 printf(" are incomplete\n\n");
5941 goto end;
5942 }
5943 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
5944 printf("\nFlagrant ratio %e, exiting...",
5945 ratio);
5946 printf("\nTesting and compilation");
5947 printf(" are incomplete\n\n");
5948 goto end;
5949 }
5950 }
5951
5952 if (d_count == 0) {
5953 if (ratio > ratio_max)
5954 ratio_max = ratio;
5955
5956 if (ratio != 0.0 && ratio < ratio_min)
5957 ratio_min = ratio;
5958
5959 tot_tests++;
5960 }
5961 } /* incx */
5962 } /* row */
5963 } /* lda */
5964 } /* numtests */
5965 } /* diag */
5966 } /* trans */
5967 } /* uplo */
5968 } /* order */
5969 } /* norm */
5970 } /* prec */
5971 } /* alpha */
5972 } /* debug */
5973
5974 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
5975 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
5976 fname, n, ntests, thresh);
5977 printf
5978 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
5979 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
5980 ratio_min, ratio_max);
5981 }
5982
5983 end:
5984 FPU_FIX_STOP;
5985
5986 blas_free(x);
5987 blas_free(x_gen);
5988 blas_free(temp);
5989 blas_free(T);
5990 blas_free(head_r_true);
5991 blas_free(tail_r_true);
5992 blas_free(ratios);
5993
5994 *min_ratio = ratio_min;
5995 *num_bad_ratio = bad_ratios;
5996 *num_tests = tot_tests;
5997 return ratio_max;
5998 } /* end of do_test_ztrsv_c_x */
5999
do_test_ctrsv_s_x(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)6000 double do_test_ctrsv_s_x(int n,
6001 int ntests,
6002 int *seed,
6003 double thresh,
6004 int debug,
6005 float test_prob,
6006 double *min_ratio,
6007 int *num_bad_ratio, int *num_tests)
6008
6009 /*
6010 * Purpose
6011 * =======
6012 *
6013 * Runs a series of tests on TRSV.
6014 *
6015 * Arguments
6016 * =========
6017 *
6018 * n (input) int
6019 * The size of vector being tested
6020 *
6021 * ntests (input) int
6022 * The number of tests to run for each set of attributes.
6023 *
6024 * seed (input/output) int
6025 * The seed for the random number generator used in testgen().
6026 *
6027 * thresh (input) double
6028 * When the ratio returned from test() exceeds the specified
6029 * threshold, the current size, r_true, r_comp, and ratio will be
6030 * printed. (Since ratio is supposed to be O(1), we can set thresh
6031 * to ~10.)
6032 *
6033 * debug (input) int
6034 * If debug=3, print summary
6035 * If debug=2, print summary only if the number of bad ratios > 0
6036 * If debug=1, print complete info if tests fail
6037 * If debug=0, return max ratio
6038 *
6039 * test_prob (input) float
6040 * The specified test will be performed only if the generated
6041 * random exceeds this threshold.
6042 *
6043 * min_ratio (output) double
6044 * The minimum ratio
6045 *
6046 * num_bad_ratio (output) int
6047 * The number of tests fail; they are above the threshold.
6048 *
6049 * num_tests (output) int
6050 * The number of tests is being performed.
6051 *
6052 * Return value
6053 * ============
6054 *
6055 * The maximum ratio if run successfully, otherwise return -1
6056 *
6057 * Code structure
6058 * ==============
6059 *
6060 * debug loop -- if debug is one, the first loop computes the max ratio
6061 * -- and the last(second) loop outputs debugging information,
6062 * -- if the test fail and its ratio > 0.5 * max ratio.
6063 * -- if debug is zero, the loop is executed once
6064 * alpha loop -- varying alpha: 0, 1, or random
6065 * prec loop -- varying internal prec: single, double, or extra
6066 * norm loop -- varying norm: near undeflow, near one, or
6067 * -- near overflow
6068 * order loop -- varying order type: rowmajor or colmajor
6069 * uplo loop -- varying uplo type: upper or lower
6070 * trans loop -- varying trans type: no trans, trans, and conj trans
6071 * diag loop -- varying diag type: non unit, and unit
6072 * numtest loop -- how many times the test is perform with
6073 * -- above set of attributes
6074 * lda loop -- varying lda: n, n+1, and 2n
6075 * incx loop -- varying incx: -2, -1, 1, 2
6076 */
6077 {
6078 /* function name */
6079 const char fname[] = "BLAS_ctrsv_s_x";
6080
6081 /* max number of debug lines to print */
6082 const int max_print = 3;
6083
6084 /* Variables in the "x_val" form are loop vars for corresponding
6085 variables */
6086 int i; /* iterate through the repeating tests */
6087 int j; /* multipurpose counters */
6088 int ix; /* use to index x */
6089 int lda_val, lda = 0; /* for testing different values for lda */
6090 int incx_val, incx; /* for testing different inc values */
6091 int incx_gen = 1; /* 1 if real, 2 if complex */
6092 int d_count; /* counter for debug */
6093 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
6094 int p_count; /* counter for the number of debug lines printed */
6095 int tot_tests; /* total number of tests to be done */
6096 int norm; /* input values of near underflow/one/overflow */
6097 double ratio_max; /* the current maximum ratio */
6098 double ratio_min; /* the current minimum ratio */
6099 double *ratios; /* a temporary variable for calculating ratio */
6100 double ratio = 0.0; /* the per-use test ratio from test() */
6101 int bad_ratios = 0; /* the number of ratios over the threshold */
6102 double eps_int; /* the internal epsilon expected--2^(-24) for float */
6103 double un_int; /* the internal underflow threshold */
6104 float alpha[2];
6105 float beta[2];
6106 float *T;
6107 float *x;
6108 float *x_gen;
6109 float *temp; /* use for calculating ratio */
6110
6111 /* x_gen and y_gen are used to store vectors generated by testgen.
6112 they eventually are copied back to x and y */
6113
6114
6115 /* the true r calculated by testgen(), in double-double */
6116 double *head_r_true, *tail_r_true;
6117
6118 int alpha_val;
6119 int alpha_flag = 0, beta_flag = 0;
6120 int order_val;
6121 enum blas_order_type order_type = 0;
6122 int prec_val;
6123 enum blas_prec_type prec = 0;
6124 int uplo_val;
6125 enum blas_uplo_type uplo_type = 0;
6126 int trans_val;
6127 enum blas_trans_type trans_type = 0;
6128 int diag_val;
6129 enum blas_diag_type diag_type = 0;
6130 int row;
6131 int saved_seed; /* for saving the original seed */
6132 int count, old_count = -1; /* use for counting the number of testgen calls * 2 */
6133 FPU_FIX_DECL;
6134
6135 /* test for bad arguments */
6136 if (n < 0 || ntests < 0)
6137 BLAS_error(fname, 0, 0, NULL);
6138
6139 /* if there is nothing to test, return all zero */
6140 if (n == 0 || ntests == 0) {
6141 *min_ratio = 0.0;
6142 *num_bad_ratio = 0;
6143 *num_tests = 0;
6144 return 0.0;
6145 }
6146
6147 FPU_FIX_START;
6148
6149 incx_gen *= 2;
6150
6151 /* get space for calculation */
6152 x = (float *) blas_malloc(n * 2 * sizeof(float) * 2);
6153 if (n * 2 > 0 && x == NULL) {
6154 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6155 }
6156 x_gen = (float *) blas_malloc(n * sizeof(float) * 2);
6157 if (n > 0 && x_gen == NULL) {
6158 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6159 }
6160 T = (float *) blas_malloc(4 * n * n * sizeof(float));
6161 if (4 * n * n > 0 && T == NULL) {
6162 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6163 }
6164 temp = (float *) blas_malloc(n * sizeof(float));
6165 if (n > 0 && temp == NULL) {
6166 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6167 }
6168 head_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
6169 tail_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
6170 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
6171 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6172 }
6173 ratios = (double *) blas_malloc(n * sizeof(double));
6174 if (n > 0 && ratios == NULL) {
6175 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6176 }
6177
6178 /* initialization */
6179 saved_seed = *seed;
6180 ratio_min = 1e308;
6181 ratio_max = 0.0;
6182 tot_tests = 0;
6183 p_count = 0;
6184 count = 0;
6185 find_max_ratio = 0;
6186 beta[0] = beta[1] = 0.0;
6187 beta_flag = 1;
6188 if (debug == 3)
6189 find_max_ratio = 1;
6190
6191 /* The debug iteration:
6192 If debug=1, then will execute the iteration twice. First, compute the
6193 max ratio. Second, print info if ratio > (50% * ratio_max). */
6194 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
6195 bad_ratios = 0; /* set to zero */
6196
6197 if ((debug == 3) && (d_count == find_max_ratio))
6198 *seed = saved_seed; /* restore the original seed */
6199
6200 /* varying alpha */
6201 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
6202 alpha_flag = 0;
6203 switch (alpha_val) {
6204 case 0:
6205 alpha[0] = alpha[1] = 0.0;
6206 alpha_flag = 1;
6207 break;
6208 case 1:
6209 alpha[0] = 1.0;
6210 alpha[1] = 0.0;
6211 alpha_flag = 1;
6212 break;
6213 }
6214
6215
6216 /* varying extra precs */
6217 for (prec_val = 0; prec_val <= 2; prec_val++) {
6218 switch (prec_val) {
6219 case 0:
6220 eps_int = power(2, -BITS_S);
6221 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
6222 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
6223 prec = blas_prec_single;
6224 break;
6225 case 1:
6226 eps_int = power(2, -BITS_D);
6227 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
6228 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
6229 prec = blas_prec_double;
6230 break;
6231 case 2:
6232 default:
6233 eps_int = power(2, -BITS_E);
6234 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
6235 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
6236 prec = blas_prec_extra;
6237 break;
6238 }
6239
6240 /* values near underflow, 1, or overflow */
6241 for (norm = -1; norm <= 1; norm++) {
6242
6243 /* row or col major */
6244 for (order_val = 0; order_val < 2; order_val++) {
6245 switch (order_val) {
6246 case 0:
6247 order_type = blas_rowmajor;
6248 break;
6249 case 1:
6250 order_type = blas_colmajor;
6251 break;
6252 }
6253
6254 /* upper or lower */
6255 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
6256 switch (uplo_val) {
6257 case 0:
6258 uplo_type = blas_upper;
6259 break;
6260 case 1:
6261 uplo_type = blas_lower;
6262 break;
6263 }
6264
6265 /* no trans, trans, or conj trans */
6266 for (trans_val = 0; trans_val < 3; trans_val++) {
6267 switch (trans_val) {
6268 case 0:
6269 trans_type = blas_no_trans;
6270 break;
6271 case 1:
6272 trans_type = blas_trans;
6273 break;
6274 case 2:
6275 trans_type = blas_conj_trans;
6276 break;
6277 }
6278
6279 /* non_unit_diag, unit_diag */
6280 for (diag_val = 0; diag_val < 2; diag_val++) {
6281 switch (diag_val) {
6282 case 0:
6283 diag_type = blas_non_unit_diag;
6284 break;
6285 case 1:
6286 diag_type = blas_unit_diag;
6287 break;
6288 }
6289
6290 /* number of tests */
6291 for (i = 0; i < ntests; i++) {
6292
6293 for (lda_val = 0; lda_val < 3; lda_val++) {
6294 switch (lda_val) {
6295 case 0:
6296 lda = n;
6297 break;
6298 case 1:
6299 lda = n + 1;
6300 break;
6301 case 2:
6302 lda = 2 * n;
6303 break;
6304 }
6305
6306 /* For the sake of speed, we throw out this case at random */
6307 if (xrand(seed) >= test_prob)
6308 continue;
6309
6310 for (row = 0; row < n; row++) {
6311 BLAS_ctrsv_s_testgen(norm, order_type, uplo_type,
6312 trans_type, diag_type, n,
6313 &alpha, alpha_flag, T, lda,
6314 x_gen, seed, head_r_true,
6315 tail_r_true, row, prec);
6316
6317 count++;
6318
6319 /* varying incx */
6320 for (incx_val = -2; incx_val <= 2; incx_val++) {
6321 if (incx_val == 0)
6322 continue;
6323
6324
6325 /* setting incx */
6326 incx = incx_val;
6327 incx *= 2;
6328
6329 /* set x starting index */
6330 ix = 0;
6331 if (incx < 0)
6332 ix = -(n - 1) * incx;
6333
6334 /* copy x_gen to x */
6335 for (j = 0; j < n * incx_gen; j += incx_gen) {
6336 x[ix] = x_gen[j];
6337 x[ix + 1] = x_gen[j + 1];
6338
6339 ix += incx;
6340 }
6341
6342 /* call BLAS_ctrsv_s_x */
6343 FPU_FIX_STOP;
6344 BLAS_ctrsv_s_x(order_type, uplo_type, trans_type,
6345 diag_type, n, alpha, T, lda, x,
6346 incx_val, prec);
6347 FPU_FIX_START;
6348
6349 ix = 0;
6350 if (incx < 0)
6351 ix = -(n - 1) * incx;
6352
6353 /* computing the ratio */
6354 for (j = 0; j < n; j++) {
6355 if (j == row) {
6356 float minus_one[2] = { -1.0, 0.0 };
6357 /* copy row j of T to temp */
6358 str_copy_row(order_type, uplo_type, trans_type,
6359 n, T, lda, temp, j);
6360
6361 test_BLAS_cdot_c_s(n, blas_no_conj,
6362 minus_one, alpha,
6363 &x_gen[j * incx_gen],
6364 &x[ix],
6365 &head_r_true[j * incx_gen],
6366 &tail_r_true[j * incx_gen],
6367 x, incx_val, temp, 1,
6368 eps_int, un_int, &ratios[j]);
6369 } else {
6370 double eps_out = power(2, -BITS_S);
6371
6372 double tmp =
6373 sqrt(((x[ix] - head_r_true[j * incx_gen]) -
6374 tail_r_true[j * incx_gen]) * ((x[ix] -
6375 head_r_true
6376 [j *
6377 incx_gen])
6378 -
6379 tail_r_true
6380 [j *
6381 incx_gen])
6382 +
6383 ((x[ix + 1] -
6384 head_r_true[j * incx_gen + 1]) -
6385 tail_r_true[j * incx_gen +
6386 1]) * ((x[ix + 1] -
6387 head_r_true[j *
6388 incx_gen
6389 + 1]) -
6390 tail_r_true[j *
6391 incx_gen
6392 + 1]));
6393 if (head_r_true[j * incx_gen] == 0.0
6394 && head_r_true[j * incx_gen + 1] == 0.0)
6395 ratios[j] = 0.0;
6396 else
6397 ratios[j] =
6398 tmp /
6399 (sqrt
6400 (head_r_true[j * incx_gen] *
6401 head_r_true[j * incx_gen] +
6402 head_r_true[j * incx_gen +
6403 1] * head_r_true[j *
6404 incx_gen +
6405 1]) * 2.0 *
6406 sqrt(2.0) * eps_out);
6407 }
6408
6409 /* take the max ratio */
6410 if (j == 0) {
6411 ratio = ratios[0];
6412 } else if (!(ratios[j] <= ratio)) {
6413 /* The !<= test causes NaN error to be detected.
6414 Note that (NaN > thresh) is always false. */
6415 ratio = ratios[j];
6416 }
6417 ix += incx;
6418 }
6419
6420 /* Increase the number of bad ratio, if the ratio
6421 is bigger than the threshold.
6422 The !<= below causes NaN error to be detected.
6423 Note that (NaN > thresh) is always false. */
6424 if (!(ratio <= thresh)) {
6425
6426 bad_ratios++;
6427
6428 if ((debug == 3) && /* print only when debug is on */
6429 (count != old_count) && /* print if old vector is different
6430 from the current one */
6431 (d_count == find_max_ratio) &&
6432 (p_count <= max_print) &&
6433 (ratio > 0.5 * ratio_max)) {
6434 p_count++;
6435 old_count = count;
6436
6437 printf
6438 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
6439 fname, n, ntests, thresh);
6440
6441 /* Print test info */
6442 switch (prec) {
6443 case blas_prec_single:
6444 printf("single ");
6445 break;
6446 case blas_prec_double:
6447 printf("double ");
6448 break;
6449 case blas_prec_indigenous:
6450 printf("indigenous ");
6451 break;
6452 case blas_prec_extra:
6453 printf("extra ");
6454 break;
6455 }
6456 switch (norm) {
6457 case -1:
6458 printf("near_underflow ");
6459 break;
6460 case 0:
6461 printf("near_one ");
6462 break;
6463 case 1:
6464 printf("near_overflow ");
6465 break;
6466 }
6467 switch (order_type) {
6468 case blas_rowmajor:
6469 printf("row_major ");
6470 break;
6471 case blas_colmajor:
6472 printf("col_major ");
6473 break;
6474 }
6475 switch (uplo_type) {
6476 case blas_upper:
6477 printf("upper ");
6478 break;
6479 case blas_lower:
6480 printf("lower ");
6481 break;
6482 }
6483 switch (trans_type) {
6484 case blas_no_trans:
6485 printf("no_trans ");
6486 break;
6487 case blas_trans:
6488 printf("trans ");
6489 break;
6490 case blas_conj_trans:
6491 printf("conj_trans ");
6492 break;
6493 }
6494 switch (diag_type) {
6495 case blas_non_unit_diag:
6496 printf("non_unit_diag ");
6497 break;
6498 case blas_unit_diag:
6499 printf("unit_diag ");
6500 break;
6501 }
6502
6503 printf("row=%d, lda=%d, incx=%d:\n", row, lda,
6504 incx);
6505
6506 ix = 0;
6507 if (incx < 0)
6508 ix = -(n - 1) * incx;
6509
6510 printf(" T=");
6511 for (j = 0; j < n; j++) {
6512 /* copy row j of T to temp */
6513 str_copy_row(order_type, uplo_type,
6514 trans_type, n, T, lda, temp, j);
6515
6516 if (j > 0)
6517 printf(" ");
6518 sprint_vector(temp, n, 1, NULL);
6519 }
6520
6521 ix = 0;
6522 if (incx < 0)
6523 ix = -(n - 1) * incx;
6524
6525 for (j = 0; j < n; j++) {
6526 printf(" ");
6527 printf("x[%d] = ", j * incx_gen);
6528 printf("(%16.8e, %16.8e)",
6529 x_gen[j * incx_gen],
6530 x_gen[j * incx_gen + 1]);
6531 printf("; ");
6532 printf("x_final[%d] = ", ix);
6533 printf("(%16.8e, %16.8e)", x[ix], x[ix + 1]);
6534 printf("\n");
6535 ix += incx;
6536 }
6537
6538 printf(" ");
6539 printf("alpha = ");
6540 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
6541 printf("; ");
6542 printf("\n");
6543 for (j = 0; j < n; j++) {
6544 if (j == row)
6545 printf(" =>");
6546 else
6547 printf(" ");
6548 printf
6549 ("([%24.16e %24.16e], [%24.16e %24.16e])",
6550 head_r_true[j * incx_gen],
6551 tail_r_true[j * incx_gen],
6552 head_r_true[j * incx_gen + 1],
6553 tail_r_true[j * incx_gen + 1]);
6554 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
6555 }
6556 printf(" ratio=%.4e\n", ratio);
6557 }
6558 if (bad_ratios >= MAX_BAD_TESTS) {
6559 printf("\ntoo many failures, exiting....");
6560 printf("\nTesting and compilation");
6561 printf(" are incomplete\n\n");
6562 goto end;
6563 }
6564 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
6565 printf("\nFlagrant ratio %e, exiting...",
6566 ratio);
6567 printf("\nTesting and compilation");
6568 printf(" are incomplete\n\n");
6569 goto end;
6570 }
6571 }
6572
6573 if (d_count == 0) {
6574 if (ratio > ratio_max)
6575 ratio_max = ratio;
6576
6577 if (ratio != 0.0 && ratio < ratio_min)
6578 ratio_min = ratio;
6579
6580 tot_tests++;
6581 }
6582 } /* incx */
6583 } /* row */
6584 } /* lda */
6585 } /* numtests */
6586 } /* diag */
6587 } /* trans */
6588 } /* uplo */
6589 } /* order */
6590 } /* norm */
6591 } /* prec */
6592 } /* alpha */
6593 } /* debug */
6594
6595 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
6596 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
6597 fname, n, ntests, thresh);
6598 printf
6599 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
6600 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
6601 ratio_min, ratio_max);
6602 }
6603
6604 end:
6605 FPU_FIX_STOP;
6606
6607 blas_free(x);
6608 blas_free(x_gen);
6609 blas_free(temp);
6610 blas_free(T);
6611 blas_free(head_r_true);
6612 blas_free(tail_r_true);
6613 blas_free(ratios);
6614
6615 *min_ratio = ratio_min;
6616 *num_bad_ratio = bad_ratios;
6617 *num_tests = tot_tests;
6618 return ratio_max;
6619 } /* end of do_test_ctrsv_s_x */
6620
do_test_ztrsv_d_x(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)6621 double do_test_ztrsv_d_x(int n,
6622 int ntests,
6623 int *seed,
6624 double thresh,
6625 int debug,
6626 float test_prob,
6627 double *min_ratio,
6628 int *num_bad_ratio, int *num_tests)
6629
6630 /*
6631 * Purpose
6632 * =======
6633 *
6634 * Runs a series of tests on TRSV.
6635 *
6636 * Arguments
6637 * =========
6638 *
6639 * n (input) int
6640 * The size of vector being tested
6641 *
6642 * ntests (input) int
6643 * The number of tests to run for each set of attributes.
6644 *
6645 * seed (input/output) int
6646 * The seed for the random number generator used in testgen().
6647 *
6648 * thresh (input) double
6649 * When the ratio returned from test() exceeds the specified
6650 * threshold, the current size, r_true, r_comp, and ratio will be
6651 * printed. (Since ratio is supposed to be O(1), we can set thresh
6652 * to ~10.)
6653 *
6654 * debug (input) int
6655 * If debug=3, print summary
6656 * If debug=2, print summary only if the number of bad ratios > 0
6657 * If debug=1, print complete info if tests fail
6658 * If debug=0, return max ratio
6659 *
6660 * test_prob (input) float
6661 * The specified test will be performed only if the generated
6662 * random exceeds this threshold.
6663 *
6664 * min_ratio (output) double
6665 * The minimum ratio
6666 *
6667 * num_bad_ratio (output) int
6668 * The number of tests fail; they are above the threshold.
6669 *
6670 * num_tests (output) int
6671 * The number of tests is being performed.
6672 *
6673 * Return value
6674 * ============
6675 *
6676 * The maximum ratio if run successfully, otherwise return -1
6677 *
6678 * Code structure
6679 * ==============
6680 *
6681 * debug loop -- if debug is one, the first loop computes the max ratio
6682 * -- and the last(second) loop outputs debugging information,
6683 * -- if the test fail and its ratio > 0.5 * max ratio.
6684 * -- if debug is zero, the loop is executed once
6685 * alpha loop -- varying alpha: 0, 1, or random
6686 * prec loop -- varying internal prec: single, double, or extra
6687 * norm loop -- varying norm: near undeflow, near one, or
6688 * -- near overflow
6689 * order loop -- varying order type: rowmajor or colmajor
6690 * uplo loop -- varying uplo type: upper or lower
6691 * trans loop -- varying trans type: no trans, trans, and conj trans
6692 * diag loop -- varying diag type: non unit, and unit
6693 * numtest loop -- how many times the test is perform with
6694 * -- above set of attributes
6695 * lda loop -- varying lda: n, n+1, and 2n
6696 * incx loop -- varying incx: -2, -1, 1, 2
6697 */
6698 {
6699 /* function name */
6700 const char fname[] = "BLAS_ztrsv_d_x";
6701
6702 /* max number of debug lines to print */
6703 const int max_print = 3;
6704
6705 /* Variables in the "x_val" form are loop vars for corresponding
6706 variables */
6707 int i; /* iterate through the repeating tests */
6708 int j; /* multipurpose counters */
6709 int ix; /* use to index x */
6710 int lda_val, lda = 0; /* for testing different values for lda */
6711 int incx_val, incx; /* for testing different inc values */
6712 int incx_gen = 1; /* 1 if real, 2 if complex */
6713 int d_count; /* counter for debug */
6714 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
6715 int p_count; /* counter for the number of debug lines printed */
6716 int tot_tests; /* total number of tests to be done */
6717 int norm; /* input values of near underflow/one/overflow */
6718 double ratio_max; /* the current maximum ratio */
6719 double ratio_min; /* the current minimum ratio */
6720 double *ratios; /* a temporary variable for calculating ratio */
6721 double ratio = 0.0; /* the per-use test ratio from test() */
6722 int bad_ratios = 0; /* the number of ratios over the threshold */
6723 double eps_int; /* the internal epsilon expected--2^(-24) for float */
6724 double un_int; /* the internal underflow threshold */
6725 double alpha[2];
6726 double beta[2];
6727 double *T;
6728 double *x;
6729 double *x_gen;
6730 double *temp; /* use for calculating ratio */
6731
6732 /* x_gen and y_gen are used to store vectors generated by testgen.
6733 they eventually are copied back to x and y */
6734
6735
6736 /* the true r calculated by testgen(), in double-double */
6737 double *head_r_true, *tail_r_true;
6738
6739 int alpha_val;
6740 int alpha_flag = 0, beta_flag = 0;
6741 int order_val;
6742 enum blas_order_type order_type = 0;
6743 int prec_val;
6744 enum blas_prec_type prec = 0;
6745 int uplo_val;
6746 enum blas_uplo_type uplo_type = 0;
6747 int trans_val;
6748 enum blas_trans_type trans_type = 0;
6749 int diag_val;
6750 enum blas_diag_type diag_type = 0;
6751 int row;
6752 int saved_seed; /* for saving the original seed */
6753 int count, old_count = -1; /* use for counting the number of testgen calls * 2 */
6754 FPU_FIX_DECL;
6755
6756 /* test for bad arguments */
6757 if (n < 0 || ntests < 0)
6758 BLAS_error(fname, 0, 0, NULL);
6759
6760 /* if there is nothing to test, return all zero */
6761 if (n == 0 || ntests == 0) {
6762 *min_ratio = 0.0;
6763 *num_bad_ratio = 0;
6764 *num_tests = 0;
6765 return 0.0;
6766 }
6767
6768 FPU_FIX_START;
6769
6770 incx_gen *= 2;
6771
6772 /* get space for calculation */
6773 x = (double *) blas_malloc(n * 2 * sizeof(double) * 2);
6774 if (n * 2 > 0 && x == NULL) {
6775 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6776 }
6777 x_gen = (double *) blas_malloc(n * sizeof(double) * 2);
6778 if (n > 0 && x_gen == NULL) {
6779 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6780 }
6781 T = (double *) blas_malloc(4 * n * n * sizeof(double));
6782 if (4 * n * n > 0 && T == NULL) {
6783 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6784 }
6785 temp = (double *) blas_malloc(n * sizeof(double));
6786 if (n > 0 && temp == NULL) {
6787 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6788 }
6789 head_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
6790 tail_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
6791 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
6792 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6793 }
6794 ratios = (double *) blas_malloc(n * sizeof(double));
6795 if (n > 0 && ratios == NULL) {
6796 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6797 }
6798
6799 /* initialization */
6800 saved_seed = *seed;
6801 ratio_min = 1e308;
6802 ratio_max = 0.0;
6803 tot_tests = 0;
6804 p_count = 0;
6805 count = 0;
6806 find_max_ratio = 0;
6807 beta[0] = beta[1] = 0.0;
6808 beta_flag = 1;
6809 if (debug == 3)
6810 find_max_ratio = 1;
6811
6812 /* The debug iteration:
6813 If debug=1, then will execute the iteration twice. First, compute the
6814 max ratio. Second, print info if ratio > (50% * ratio_max). */
6815 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
6816 bad_ratios = 0; /* set to zero */
6817
6818 if ((debug == 3) && (d_count == find_max_ratio))
6819 *seed = saved_seed; /* restore the original seed */
6820
6821 /* varying alpha */
6822 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
6823 alpha_flag = 0;
6824 switch (alpha_val) {
6825 case 0:
6826 alpha[0] = alpha[1] = 0.0;
6827 alpha_flag = 1;
6828 break;
6829 case 1:
6830 alpha[0] = 1.0;
6831 alpha[1] = 0.0;
6832 alpha_flag = 1;
6833 break;
6834 }
6835
6836
6837 /* varying extra precs */
6838 for (prec_val = 0; prec_val <= 2; prec_val++) {
6839 switch (prec_val) {
6840 case 0:
6841 eps_int = power(2, -BITS_D);
6842 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
6843 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
6844 prec = blas_prec_double;
6845 break;
6846 case 1:
6847 eps_int = power(2, -BITS_D);
6848 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
6849 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
6850 prec = blas_prec_double;
6851 break;
6852 case 2:
6853 default:
6854 eps_int = power(2, -BITS_E);
6855 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
6856 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
6857 prec = blas_prec_extra;
6858 break;
6859 }
6860
6861 /* values near underflow, 1, or overflow */
6862 for (norm = -1; norm <= 1; norm++) {
6863
6864 /* row or col major */
6865 for (order_val = 0; order_val < 2; order_val++) {
6866 switch (order_val) {
6867 case 0:
6868 order_type = blas_rowmajor;
6869 break;
6870 case 1:
6871 order_type = blas_colmajor;
6872 break;
6873 }
6874
6875 /* upper or lower */
6876 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
6877 switch (uplo_val) {
6878 case 0:
6879 uplo_type = blas_upper;
6880 break;
6881 case 1:
6882 uplo_type = blas_lower;
6883 break;
6884 }
6885
6886 /* no trans, trans, or conj trans */
6887 for (trans_val = 0; trans_val < 3; trans_val++) {
6888 switch (trans_val) {
6889 case 0:
6890 trans_type = blas_no_trans;
6891 break;
6892 case 1:
6893 trans_type = blas_trans;
6894 break;
6895 case 2:
6896 trans_type = blas_conj_trans;
6897 break;
6898 }
6899
6900 /* non_unit_diag, unit_diag */
6901 for (diag_val = 0; diag_val < 2; diag_val++) {
6902 switch (diag_val) {
6903 case 0:
6904 diag_type = blas_non_unit_diag;
6905 break;
6906 case 1:
6907 diag_type = blas_unit_diag;
6908 break;
6909 }
6910
6911 /* number of tests */
6912 for (i = 0; i < ntests; i++) {
6913
6914 for (lda_val = 0; lda_val < 3; lda_val++) {
6915 switch (lda_val) {
6916 case 0:
6917 lda = n;
6918 break;
6919 case 1:
6920 lda = n + 1;
6921 break;
6922 case 2:
6923 lda = 2 * n;
6924 break;
6925 }
6926
6927 /* For the sake of speed, we throw out this case at random */
6928 if (xrand(seed) >= test_prob)
6929 continue;
6930
6931 for (row = 0; row < n; row++) {
6932 BLAS_ztrsv_d_testgen(norm, order_type, uplo_type,
6933 trans_type, diag_type, n,
6934 &alpha, alpha_flag, T, lda,
6935 x_gen, seed, head_r_true,
6936 tail_r_true, row, prec);
6937
6938 count++;
6939
6940 /* varying incx */
6941 for (incx_val = -2; incx_val <= 2; incx_val++) {
6942 if (incx_val == 0)
6943 continue;
6944
6945
6946 /* setting incx */
6947 incx = incx_val;
6948 incx *= 2;
6949
6950 /* set x starting index */
6951 ix = 0;
6952 if (incx < 0)
6953 ix = -(n - 1) * incx;
6954
6955 /* copy x_gen to x */
6956 for (j = 0; j < n * incx_gen; j += incx_gen) {
6957 x[ix] = x_gen[j];
6958 x[ix + 1] = x_gen[j + 1];
6959
6960 ix += incx;
6961 }
6962
6963 /* call BLAS_ztrsv_d_x */
6964 FPU_FIX_STOP;
6965 BLAS_ztrsv_d_x(order_type, uplo_type, trans_type,
6966 diag_type, n, alpha, T, lda, x,
6967 incx_val, prec);
6968 FPU_FIX_START;
6969
6970 ix = 0;
6971 if (incx < 0)
6972 ix = -(n - 1) * incx;
6973
6974 /* computing the ratio */
6975 for (j = 0; j < n; j++) {
6976 if (j == row) {
6977 double minus_one[2] = { -1.0, 0.0 };
6978 /* copy row j of T to temp */
6979 dtr_copy_row(order_type, uplo_type, trans_type,
6980 n, T, lda, temp, j);
6981
6982 test_BLAS_zdot_z_d(n, blas_no_conj,
6983 minus_one, alpha,
6984 &x_gen[j * incx_gen],
6985 &x[ix],
6986 &head_r_true[j * incx_gen],
6987 &tail_r_true[j * incx_gen],
6988 x, incx_val, temp, 1,
6989 eps_int, un_int, &ratios[j]);
6990 } else {
6991 double eps_out = power(2, -BITS_D);
6992
6993 double tmp =
6994 sqrt(((x[ix] - head_r_true[j * incx_gen]) -
6995 tail_r_true[j * incx_gen]) * ((x[ix] -
6996 head_r_true
6997 [j *
6998 incx_gen])
6999 -
7000 tail_r_true
7001 [j *
7002 incx_gen])
7003 +
7004 ((x[ix + 1] -
7005 head_r_true[j * incx_gen + 1]) -
7006 tail_r_true[j * incx_gen +
7007 1]) * ((x[ix + 1] -
7008 head_r_true[j *
7009 incx_gen
7010 + 1]) -
7011 tail_r_true[j *
7012 incx_gen
7013 + 1]));
7014 if (head_r_true[j * incx_gen] == 0.0
7015 && head_r_true[j * incx_gen + 1] == 0.0)
7016 ratios[j] = 0.0;
7017 else
7018 ratios[j] =
7019 tmp /
7020 (sqrt
7021 (head_r_true[j * incx_gen] *
7022 head_r_true[j * incx_gen] +
7023 head_r_true[j * incx_gen +
7024 1] * head_r_true[j *
7025 incx_gen +
7026 1]) * 2.0 *
7027 sqrt(2.0) * eps_out);
7028 }
7029
7030 /* take the max ratio */
7031 if (j == 0) {
7032 ratio = ratios[0];
7033 } else if (!(ratios[j] <= ratio)) {
7034 /* The !<= test causes NaN error to be detected.
7035 Note that (NaN > thresh) is always false. */
7036 ratio = ratios[j];
7037 }
7038 ix += incx;
7039 }
7040
7041 /* Increase the number of bad ratio, if the ratio
7042 is bigger than the threshold.
7043 The !<= below causes NaN error to be detected.
7044 Note that (NaN > thresh) is always false. */
7045 if (!(ratio <= thresh)) {
7046
7047 bad_ratios++;
7048
7049 if ((debug == 3) && /* print only when debug is on */
7050 (count != old_count) && /* print if old vector is different
7051 from the current one */
7052 (d_count == find_max_ratio) &&
7053 (p_count <= max_print) &&
7054 (ratio > 0.5 * ratio_max)) {
7055 p_count++;
7056 old_count = count;
7057
7058 printf
7059 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
7060 fname, n, ntests, thresh);
7061
7062 /* Print test info */
7063 switch (prec) {
7064 case blas_prec_single:
7065 printf("single ");
7066 break;
7067 case blas_prec_double:
7068 printf("double ");
7069 break;
7070 case blas_prec_indigenous:
7071 printf("indigenous ");
7072 break;
7073 case blas_prec_extra:
7074 printf("extra ");
7075 break;
7076 }
7077 switch (norm) {
7078 case -1:
7079 printf("near_underflow ");
7080 break;
7081 case 0:
7082 printf("near_one ");
7083 break;
7084 case 1:
7085 printf("near_overflow ");
7086 break;
7087 }
7088 switch (order_type) {
7089 case blas_rowmajor:
7090 printf("row_major ");
7091 break;
7092 case blas_colmajor:
7093 printf("col_major ");
7094 break;
7095 }
7096 switch (uplo_type) {
7097 case blas_upper:
7098 printf("upper ");
7099 break;
7100 case blas_lower:
7101 printf("lower ");
7102 break;
7103 }
7104 switch (trans_type) {
7105 case blas_no_trans:
7106 printf("no_trans ");
7107 break;
7108 case blas_trans:
7109 printf("trans ");
7110 break;
7111 case blas_conj_trans:
7112 printf("conj_trans ");
7113 break;
7114 }
7115 switch (diag_type) {
7116 case blas_non_unit_diag:
7117 printf("non_unit_diag ");
7118 break;
7119 case blas_unit_diag:
7120 printf("unit_diag ");
7121 break;
7122 }
7123
7124 printf("row=%d, lda=%d, incx=%d:\n", row, lda,
7125 incx);
7126
7127 ix = 0;
7128 if (incx < 0)
7129 ix = -(n - 1) * incx;
7130
7131 printf(" T=");
7132 for (j = 0; j < n; j++) {
7133 /* copy row j of T to temp */
7134 dtr_copy_row(order_type, uplo_type,
7135 trans_type, n, T, lda, temp, j);
7136
7137 if (j > 0)
7138 printf(" ");
7139 dprint_vector(temp, n, 1, NULL);
7140 }
7141
7142 ix = 0;
7143 if (incx < 0)
7144 ix = -(n - 1) * incx;
7145
7146 for (j = 0; j < n; j++) {
7147 printf(" ");
7148 printf("x[%d] = ", j * incx_gen);
7149 printf("(%24.16e, %24.16e)",
7150 x_gen[j * incx_gen],
7151 x_gen[j * incx_gen + 1]);
7152 printf("; ");
7153 printf("x_final[%d] = ", ix);
7154 printf("(%24.16e, %24.16e)", x[ix],
7155 x[ix + 1]);
7156 printf("\n");
7157 ix += incx;
7158 }
7159
7160 printf(" ");
7161 printf("alpha = ");
7162 printf("(%24.16e, %24.16e)", alpha[0],
7163 alpha[1]);
7164 printf("; ");
7165 printf("\n");
7166 for (j = 0; j < n; j++) {
7167 if (j == row)
7168 printf(" =>");
7169 else
7170 printf(" ");
7171 printf
7172 ("([%24.16e %24.16e], [%24.16e %24.16e])",
7173 head_r_true[j * incx_gen],
7174 tail_r_true[j * incx_gen],
7175 head_r_true[j * incx_gen + 1],
7176 tail_r_true[j * incx_gen + 1]);
7177 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
7178 }
7179 printf(" ratio=%.4e\n", ratio);
7180 }
7181 if (bad_ratios >= MAX_BAD_TESTS) {
7182 printf("\ntoo many failures, exiting....");
7183 printf("\nTesting and compilation");
7184 printf(" are incomplete\n\n");
7185 goto end;
7186 }
7187 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
7188 printf("\nFlagrant ratio %e, exiting...",
7189 ratio);
7190 printf("\nTesting and compilation");
7191 printf(" are incomplete\n\n");
7192 goto end;
7193 }
7194 }
7195
7196 if (d_count == 0) {
7197 if (ratio > ratio_max)
7198 ratio_max = ratio;
7199
7200 if (ratio != 0.0 && ratio < ratio_min)
7201 ratio_min = ratio;
7202
7203 tot_tests++;
7204 }
7205 } /* incx */
7206 } /* row */
7207 } /* lda */
7208 } /* numtests */
7209 } /* diag */
7210 } /* trans */
7211 } /* uplo */
7212 } /* order */
7213 } /* norm */
7214 } /* prec */
7215 } /* alpha */
7216 } /* debug */
7217
7218 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
7219 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
7220 fname, n, ntests, thresh);
7221 printf
7222 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
7223 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
7224 ratio_min, ratio_max);
7225 }
7226
7227 end:
7228 FPU_FIX_STOP;
7229
7230 blas_free(x);
7231 blas_free(x_gen);
7232 blas_free(temp);
7233 blas_free(T);
7234 blas_free(head_r_true);
7235 blas_free(tail_r_true);
7236 blas_free(ratios);
7237
7238 *min_ratio = ratio_min;
7239 *num_bad_ratio = bad_ratios;
7240 *num_tests = tot_tests;
7241 return ratio_max;
7242 } /* end of do_test_ztrsv_d_x */
7243
7244 #define NSIZES 12
7245
main(int argc,char ** argv)7246 int main(int argc, char **argv)
7247 {
7248 int nsizes, ntests, debug;
7249 double thresh, test_prob;
7250 double total_min_ratio, total_max_ratio;
7251 int total_bad_ratios;
7252 int seed, num_bad_ratio, num_tests;
7253 int total_tests, nr_failed_routines = 0, nr_routines = 0;
7254 double min_ratio, max_ratio;
7255 const char *base_routine = "trsv";
7256 char *fname;
7257 int n;
7258
7259 int i;
7260 int sizes[NSIZES] = { 0, 1, 2, 3, 4, 5, 6, 10, 17, 25, 37, 46 };
7261
7262 if (argc != 6) {
7263 printf("Usage:\n");
7264 printf("do_test_trsv <nsizes> <ntests> <thresh> <debug> <test_prob>\n");
7265 printf(" <nsizes>: number of sizes to be run.\n");
7266 printf
7267 (" <ntests>: the number of tests performed for each set of attributes\n");
7268 printf
7269 (" <thresh>: to catch bad ratios if it is greater than <thresh>\n");
7270 printf(" <debug>: 0, 1, 2, or 3; \n");
7271 printf(" if 0, no printing \n");
7272 printf(" if 1, print error summary only if tests fail\n");
7273 printf(" if 2, print error summary for each n\n");
7274 printf(" if 3, print complete info each test fails \n");
7275 printf("<test_prob>: probability of preforming a given \n");
7276 printf(" test case: 0.0 does no tests, 1.0 does all tests\n");
7277 return -1;
7278 } else {
7279 nsizes = atoi(argv[1]);
7280 ntests = atoi(argv[2]);
7281 thresh = atof(argv[3]);
7282 debug = atoi(argv[4]);
7283 test_prob = atof(argv[5]);
7284 }
7285
7286 seed = 1999;
7287
7288 if (nsizes < 0 || ntests < 0 || debug < 0 || debug > 3)
7289 BLAS_error("Testing trsv", 0, 0, NULL);
7290
7291 printf("Testing %s...\n", base_routine);
7292 printf("INPUT: nsizes = %d, ntests = %d, thresh = %4.2f, debug = %d\n\n",
7293 nsizes, ntests, thresh, debug);
7294
7295
7296
7297
7298
7299 fname = "BLAS_dtrsv_s";
7300 printf("Testing %s...\n", fname);
7301 min_ratio = 1e308;
7302 max_ratio = 0.0;
7303 total_bad_ratios = 0;
7304 total_tests = 0;
7305 for (i = 0; i < nsizes; i++) {
7306
7307 n = sizes[i];
7308 total_max_ratio = do_test_dtrsv_s(n, ntests, &seed, thresh, debug,
7309 test_prob, &total_min_ratio,
7310 &num_bad_ratio, &num_tests);
7311 if (total_max_ratio > max_ratio)
7312 max_ratio = total_max_ratio;
7313
7314 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
7315 min_ratio = total_min_ratio;
7316
7317 total_bad_ratios += num_bad_ratio;
7318 total_tests += num_tests;
7319 }
7320
7321 nr_routines++;
7322 if (total_bad_ratios == 0)
7323 printf("PASS> ");
7324 else {
7325 nr_failed_routines++;
7326 printf("FAIL> ");
7327 }
7328
7329 if (min_ratio == 1e308)
7330 min_ratio = 0.0;
7331
7332 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
7333 fname, total_bad_ratios, total_tests, max_ratio);
7334
7335 fname = "BLAS_ztrsv_c";
7336 printf("Testing %s...\n", fname);
7337 min_ratio = 1e308;
7338 max_ratio = 0.0;
7339 total_bad_ratios = 0;
7340 total_tests = 0;
7341 for (i = 0; i < nsizes; i++) {
7342
7343 n = sizes[i];
7344 total_max_ratio = do_test_ztrsv_c(n, ntests, &seed, thresh, debug,
7345 test_prob, &total_min_ratio,
7346 &num_bad_ratio, &num_tests);
7347 if (total_max_ratio > max_ratio)
7348 max_ratio = total_max_ratio;
7349
7350 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
7351 min_ratio = total_min_ratio;
7352
7353 total_bad_ratios += num_bad_ratio;
7354 total_tests += num_tests;
7355 }
7356
7357 nr_routines++;
7358 if (total_bad_ratios == 0)
7359 printf("PASS> ");
7360 else {
7361 nr_failed_routines++;
7362 printf("FAIL> ");
7363 }
7364
7365 if (min_ratio == 1e308)
7366 min_ratio = 0.0;
7367
7368 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
7369 fname, total_bad_ratios, total_tests, max_ratio);
7370
7371 fname = "BLAS_ctrsv_s";
7372 printf("Testing %s...\n", fname);
7373 min_ratio = 1e308;
7374 max_ratio = 0.0;
7375 total_bad_ratios = 0;
7376 total_tests = 0;
7377 for (i = 0; i < nsizes; i++) {
7378
7379 n = sizes[i];
7380 total_max_ratio = do_test_ctrsv_s(n, ntests, &seed, thresh, debug,
7381 test_prob, &total_min_ratio,
7382 &num_bad_ratio, &num_tests);
7383 if (total_max_ratio > max_ratio)
7384 max_ratio = total_max_ratio;
7385
7386 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
7387 min_ratio = total_min_ratio;
7388
7389 total_bad_ratios += num_bad_ratio;
7390 total_tests += num_tests;
7391 }
7392
7393 nr_routines++;
7394 if (total_bad_ratios == 0)
7395 printf("PASS> ");
7396 else {
7397 nr_failed_routines++;
7398 printf("FAIL> ");
7399 }
7400
7401 if (min_ratio == 1e308)
7402 min_ratio = 0.0;
7403
7404 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
7405 fname, total_bad_ratios, total_tests, max_ratio);
7406
7407 fname = "BLAS_ztrsv_d";
7408 printf("Testing %s...\n", fname);
7409 min_ratio = 1e308;
7410 max_ratio = 0.0;
7411 total_bad_ratios = 0;
7412 total_tests = 0;
7413 for (i = 0; i < nsizes; i++) {
7414
7415 n = sizes[i];
7416 total_max_ratio = do_test_ztrsv_d(n, ntests, &seed, thresh, debug,
7417 test_prob, &total_min_ratio,
7418 &num_bad_ratio, &num_tests);
7419 if (total_max_ratio > max_ratio)
7420 max_ratio = total_max_ratio;
7421
7422 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
7423 min_ratio = total_min_ratio;
7424
7425 total_bad_ratios += num_bad_ratio;
7426 total_tests += num_tests;
7427 }
7428
7429 nr_routines++;
7430 if (total_bad_ratios == 0)
7431 printf("PASS> ");
7432 else {
7433 nr_failed_routines++;
7434 printf("FAIL> ");
7435 }
7436
7437 if (min_ratio == 1e308)
7438 min_ratio = 0.0;
7439
7440 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
7441 fname, total_bad_ratios, total_tests, max_ratio);
7442
7443 fname = "BLAS_strsv_x";
7444 printf("Testing %s...\n", fname);
7445 min_ratio = 1e308;
7446 max_ratio = 0.0;
7447 total_bad_ratios = 0;
7448 total_tests = 0;
7449 for (i = 0; i < nsizes; i++) {
7450
7451 n = sizes[i];
7452 total_max_ratio = do_test_strsv_x(n, ntests, &seed, thresh, debug,
7453 test_prob, &total_min_ratio,
7454 &num_bad_ratio, &num_tests);
7455 if (total_max_ratio > max_ratio)
7456 max_ratio = total_max_ratio;
7457
7458 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
7459 min_ratio = total_min_ratio;
7460
7461 total_bad_ratios += num_bad_ratio;
7462 total_tests += num_tests;
7463 }
7464
7465 nr_routines++;
7466 if (total_bad_ratios == 0)
7467 printf("PASS> ");
7468 else {
7469 nr_failed_routines++;
7470 printf("FAIL> ");
7471 }
7472
7473 if (min_ratio == 1e308)
7474 min_ratio = 0.0;
7475
7476 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
7477 fname, total_bad_ratios, total_tests, max_ratio);
7478
7479 fname = "BLAS_dtrsv_x";
7480 printf("Testing %s...\n", fname);
7481 min_ratio = 1e308;
7482 max_ratio = 0.0;
7483 total_bad_ratios = 0;
7484 total_tests = 0;
7485 for (i = 0; i < nsizes; i++) {
7486
7487 n = sizes[i];
7488 total_max_ratio = do_test_dtrsv_x(n, ntests, &seed, thresh, debug,
7489 test_prob, &total_min_ratio,
7490 &num_bad_ratio, &num_tests);
7491 if (total_max_ratio > max_ratio)
7492 max_ratio = total_max_ratio;
7493
7494 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
7495 min_ratio = total_min_ratio;
7496
7497 total_bad_ratios += num_bad_ratio;
7498 total_tests += num_tests;
7499 }
7500
7501 nr_routines++;
7502 if (total_bad_ratios == 0)
7503 printf("PASS> ");
7504 else {
7505 nr_failed_routines++;
7506 printf("FAIL> ");
7507 }
7508
7509 if (min_ratio == 1e308)
7510 min_ratio = 0.0;
7511
7512 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
7513 fname, total_bad_ratios, total_tests, max_ratio);
7514
7515 fname = "BLAS_dtrsv_s_x";
7516 printf("Testing %s...\n", fname);
7517 min_ratio = 1e308;
7518 max_ratio = 0.0;
7519 total_bad_ratios = 0;
7520 total_tests = 0;
7521 for (i = 0; i < nsizes; i++) {
7522
7523 n = sizes[i];
7524 total_max_ratio = do_test_dtrsv_s_x(n, ntests, &seed, thresh, debug,
7525 test_prob, &total_min_ratio,
7526 &num_bad_ratio, &num_tests);
7527 if (total_max_ratio > max_ratio)
7528 max_ratio = total_max_ratio;
7529
7530 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
7531 min_ratio = total_min_ratio;
7532
7533 total_bad_ratios += num_bad_ratio;
7534 total_tests += num_tests;
7535 }
7536
7537 nr_routines++;
7538 if (total_bad_ratios == 0)
7539 printf("PASS> ");
7540 else {
7541 nr_failed_routines++;
7542 printf("FAIL> ");
7543 }
7544
7545 if (min_ratio == 1e308)
7546 min_ratio = 0.0;
7547
7548 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
7549 fname, total_bad_ratios, total_tests, max_ratio);
7550
7551 fname = "BLAS_ctrsv_x";
7552 printf("Testing %s...\n", fname);
7553 min_ratio = 1e308;
7554 max_ratio = 0.0;
7555 total_bad_ratios = 0;
7556 total_tests = 0;
7557 for (i = 0; i < nsizes; i++) {
7558
7559 n = sizes[i];
7560 total_max_ratio = do_test_ctrsv_x(n, ntests, &seed, thresh, debug,
7561 test_prob, &total_min_ratio,
7562 &num_bad_ratio, &num_tests);
7563 if (total_max_ratio > max_ratio)
7564 max_ratio = total_max_ratio;
7565
7566 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
7567 min_ratio = total_min_ratio;
7568
7569 total_bad_ratios += num_bad_ratio;
7570 total_tests += num_tests;
7571 }
7572
7573 nr_routines++;
7574 if (total_bad_ratios == 0)
7575 printf("PASS> ");
7576 else {
7577 nr_failed_routines++;
7578 printf("FAIL> ");
7579 }
7580
7581 if (min_ratio == 1e308)
7582 min_ratio = 0.0;
7583
7584 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
7585 fname, total_bad_ratios, total_tests, max_ratio);
7586
7587 fname = "BLAS_ztrsv_x";
7588 printf("Testing %s...\n", fname);
7589 min_ratio = 1e308;
7590 max_ratio = 0.0;
7591 total_bad_ratios = 0;
7592 total_tests = 0;
7593 for (i = 0; i < nsizes; i++) {
7594
7595 n = sizes[i];
7596 total_max_ratio = do_test_ztrsv_x(n, ntests, &seed, thresh, debug,
7597 test_prob, &total_min_ratio,
7598 &num_bad_ratio, &num_tests);
7599 if (total_max_ratio > max_ratio)
7600 max_ratio = total_max_ratio;
7601
7602 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
7603 min_ratio = total_min_ratio;
7604
7605 total_bad_ratios += num_bad_ratio;
7606 total_tests += num_tests;
7607 }
7608
7609 nr_routines++;
7610 if (total_bad_ratios == 0)
7611 printf("PASS> ");
7612 else {
7613 nr_failed_routines++;
7614 printf("FAIL> ");
7615 }
7616
7617 if (min_ratio == 1e308)
7618 min_ratio = 0.0;
7619
7620 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
7621 fname, total_bad_ratios, total_tests, max_ratio);
7622
7623 fname = "BLAS_ztrsv_c_x";
7624 printf("Testing %s...\n", fname);
7625 min_ratio = 1e308;
7626 max_ratio = 0.0;
7627 total_bad_ratios = 0;
7628 total_tests = 0;
7629 for (i = 0; i < nsizes; i++) {
7630
7631 n = sizes[i];
7632 total_max_ratio = do_test_ztrsv_c_x(n, ntests, &seed, thresh, debug,
7633 test_prob, &total_min_ratio,
7634 &num_bad_ratio, &num_tests);
7635 if (total_max_ratio > max_ratio)
7636 max_ratio = total_max_ratio;
7637
7638 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
7639 min_ratio = total_min_ratio;
7640
7641 total_bad_ratios += num_bad_ratio;
7642 total_tests += num_tests;
7643 }
7644
7645 nr_routines++;
7646 if (total_bad_ratios == 0)
7647 printf("PASS> ");
7648 else {
7649 nr_failed_routines++;
7650 printf("FAIL> ");
7651 }
7652
7653 if (min_ratio == 1e308)
7654 min_ratio = 0.0;
7655
7656 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
7657 fname, total_bad_ratios, total_tests, max_ratio);
7658
7659 fname = "BLAS_ctrsv_s_x";
7660 printf("Testing %s...\n", fname);
7661 min_ratio = 1e308;
7662 max_ratio = 0.0;
7663 total_bad_ratios = 0;
7664 total_tests = 0;
7665 for (i = 0; i < nsizes; i++) {
7666
7667 n = sizes[i];
7668 total_max_ratio = do_test_ctrsv_s_x(n, ntests, &seed, thresh, debug,
7669 test_prob, &total_min_ratio,
7670 &num_bad_ratio, &num_tests);
7671 if (total_max_ratio > max_ratio)
7672 max_ratio = total_max_ratio;
7673
7674 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
7675 min_ratio = total_min_ratio;
7676
7677 total_bad_ratios += num_bad_ratio;
7678 total_tests += num_tests;
7679 }
7680
7681 nr_routines++;
7682 if (total_bad_ratios == 0)
7683 printf("PASS> ");
7684 else {
7685 nr_failed_routines++;
7686 printf("FAIL> ");
7687 }
7688
7689 if (min_ratio == 1e308)
7690 min_ratio = 0.0;
7691
7692 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
7693 fname, total_bad_ratios, total_tests, max_ratio);
7694
7695 fname = "BLAS_ztrsv_d_x";
7696 printf("Testing %s...\n", fname);
7697 min_ratio = 1e308;
7698 max_ratio = 0.0;
7699 total_bad_ratios = 0;
7700 total_tests = 0;
7701 for (i = 0; i < nsizes; i++) {
7702
7703 n = sizes[i];
7704 total_max_ratio = do_test_ztrsv_d_x(n, ntests, &seed, thresh, debug,
7705 test_prob, &total_min_ratio,
7706 &num_bad_ratio, &num_tests);
7707 if (total_max_ratio > max_ratio)
7708 max_ratio = total_max_ratio;
7709
7710 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
7711 min_ratio = total_min_ratio;
7712
7713 total_bad_ratios += num_bad_ratio;
7714 total_tests += num_tests;
7715 }
7716
7717 nr_routines++;
7718 if (total_bad_ratios == 0)
7719 printf("PASS> ");
7720 else {
7721 nr_failed_routines++;
7722 printf("FAIL> ");
7723 }
7724
7725 if (min_ratio == 1e308)
7726 min_ratio = 0.0;
7727
7728 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
7729 fname, total_bad_ratios, total_tests, max_ratio);
7730
7731
7732
7733 printf("\n");
7734 if (nr_failed_routines)
7735 printf("FAILED ");
7736 else
7737 printf("PASSED ");
7738 printf("%-10s: FAIL/TOTAL = %d/%d\n",
7739 base_routine, nr_failed_routines, nr_routines);
7740
7741 return 0;
7742 }
7743