1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <math.h>
4 #include "blas_extended.h"
5 #include "blas_extended_private.h"
6 #include "blas_extended_test.h"
7
8
9
do_test_dtpmv_s(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)10 double do_test_dtpmv_s(int n,
11 int ntests,
12 int *seed,
13 double thresh,
14 int debug,
15 float test_prob,
16 double *min_ratio, int *num_bad_ratio, int *num_tests)
17
18 /*
19 * Purpose
20 * =======
21 *
22 * Runs a series of tests on TPMV.
23 *
24 * Arguments
25 * =========
26 *
27 * n (input) int
28 * The size of vector being tested
29 *
30 * ntests (input) int
31 * The number of tests to run for each set of attributes.
32 *
33 * seed (input/output) int
34 * The seed for the random number generator used in testgen().
35 *
36 * thresh (input) double
37 * When the ratio returned from test() exceeds the specified
38 * threshold, the current size, r_true, r_comp, and ratio will be
39 * printed. (Since ratio is supposed to be O(1), we can set thresh
40 * to ~10.)
41 *
42 * debug (input) int
43 * If debug=3, print summary
44 * If debug=2, print summary only if the number of bad ratios > 0
45 * If debug=1, print complete info if tests fail
46 * If debug=0, return max ratio
47 *
48 * min_ratio (output) double
49 * The minimum ratio
50 *
51 * num_bad_ratio (output) int
52 * The number of tests fail; they are above the threshold.
53 *
54 * num_tests (output) int
55 * The number of tests is being performed.
56 *
57 * Return value
58 * ============
59 *
60 * The maximum ratio if run successfully, otherwise return -1
61 *
62 * Code structure
63 * ==============
64 *
65 * debug loop -- if debug is one, the first loop computes the max ratio
66 * -- and the last(second) loop outputs debugging information,
67 * -- if the test fail and its ratio > 0.5 * max ratio.
68 * -- if debug is zero, the loop is executed once
69 * alpha loop -- varying alpha: 0, 1, or random
70 *
71 * norm loop -- varying norm: near undeflow, near one, or
72 * -- near overflow
73 * order loop -- varying order type: rowmajor or colmajor
74 * uplo loop -- varying uplo type: upper or lower
75 * trans loop -- varying trans type: no trans, trans, and conj trans
76 * diag loop -- varying diag type: non unit, and unit
77 * numtest loop -- how many times the test is perform with
78 * -- above set of attributes
79 * incx loop -- varying incx: -2, -1, 1, 2
80 */
81 {
82 /* function name */
83 const char fname[] = "BLAS_dtpmv_s";
84
85 /* max number of debug lines to print */
86 const int max_print = 3;
87
88 /* Variables in the "x_val" form are loop vars for corresponding
89 variables */
90 int i; /* iterate through the repeating tests */
91 int j; /* multipurpose counters */
92 int ix; /* use to index x */
93 int incx_val, incx; /* for testing different inc values */
94 int inctp, inc_index; /* 1 if real, 2 if complex */
95 int d_count; /* counter for debug */
96 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
97 int p_count; /* counter for the number of debug lines printed */
98 int tot_tests; /* total number of tests to be done */
99 int norm; /* input values of near underflow/one/overflow */
100 double ratio_max; /* the current maximum ratio */
101 double ratio_min; /* the current minimum ratio */
102 double *ratios; /* a temporary variable for calculating ratio */
103 double ratio = 0.0; /* the per-use test ratio from test() */
104 int bad_ratios = 0; /* the number of ratios over the threshold */
105 double eps_int; /* the internal epsilon expected--2^(-24) for float */
106 double un_int; /* the internal underflow threshold */
107 double alpha;
108 double beta;
109 float *tp;
110 double *x;
111 double *x_gen; /* used to store vectors generated by testgen; eventually copied to x */
112 float *temp; /* use for calculating ratio */
113
114 /* the true r calculated by testgen(), in double-double */
115 double *head_r_true, *tail_r_true;
116
117 int alpha_val;
118 int alpha_flag = 0, beta_flag = 0;
119 int order_val;
120 enum blas_order_type order_type = 0;
121
122 enum blas_prec_type prec = 0;
123 int uplo_val;
124 enum blas_uplo_type uplo_type = 0;
125 int trans_val;
126 enum blas_trans_type trans_type = 0;
127 int diag_val;
128 enum blas_diag_type diag_type = 0;
129
130 double beta_zero_fake;
131 double rin_zero_fake;
132
133 int saved_seed; /* for saving the original seed */
134 int count, old_count = -1;
135 FPU_FIX_DECL;
136
137 /* test for bad arguments */
138 if (n < 0 || ntests < 0)
139 BLAS_error(fname, 0, 0, NULL);
140
141 /* if there is nothing to test, return all zero */
142 if (n == 0 || ntests == 0) {
143 *min_ratio = 0.0;
144 *num_bad_ratio = 0;
145 *num_tests = 0;
146 return 0.0;
147 }
148
149 FPU_FIX_START;
150
151
152
153 incx = inctp = inc_index = 1;
154
155
156
157
158 /* get space for calculation */
159 x = (double *) blas_malloc(3 * n * 2 * sizeof(double));
160 if (3 * n * 2 > 0 && x == NULL) {
161 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
162 }
163 x_gen = (double *) blas_malloc(3 * n * 2 * sizeof(double));
164 if (3 * n * 2 > 0 && x_gen == NULL) {
165 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
166 }
167 tp = (float *) blas_malloc(2 * n * n * n * inctp * sizeof(float));
168 if (2 * n * n * n * inctp > 0 && tp == NULL) {
169 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
170 }
171 temp = (float *) blas_malloc(3 * n * 2 * sizeof(float));
172 if (3 * n * 2 > 0 && temp == NULL) {
173 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
174 }
175 head_r_true = (double *) blas_malloc(n * sizeof(double));
176 tail_r_true = (double *) blas_malloc(n * sizeof(double));
177 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
178 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
179 }
180 ratios = (double *) blas_malloc(n * sizeof(double));
181 if (n > 0 && ratios == NULL) {
182 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
183 }
184
185
186 /* initialization */
187 saved_seed = *seed;
188 ratio_min = 1e308;
189 ratio_max = 0.0;
190 tot_tests = 0;
191 p_count = 0;
192 count = 0;
193 find_max_ratio = 0;
194 beta = 0.0;
195 beta_flag = 1;
196
197 beta_zero_fake = 0.0;
198 rin_zero_fake = 0.0;
199
200 if (debug == 3)
201 find_max_ratio = 1;
202
203 /* The debug iteration:
204 If debug=1, then will execute the iteration twice. First, compute the
205 max ratio. Second, print info if ratio > (50% * ratio_max). */
206 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
207 bad_ratios = 0; /* set to zero */
208
209 if ((debug == 3) && (d_count == find_max_ratio))
210 *seed = saved_seed; /* restore the original seed */
211
212 /* varying alpha */
213 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
214 alpha_flag = 0;
215 switch (alpha_val) {
216 case 0:
217 alpha = 0.0;
218 alpha_flag = 1;
219 break;
220 case 1:
221 alpha = 1.0;
222 alpha_flag = 1;
223 break;
224 }
225
226
227 eps_int = power(2, -BITS_D);
228 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
229 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
230 prec = blas_prec_double;
231
232 /* values near underflow, 1, or overflow */
233 for (norm = -1; norm <= 1; norm++) {
234
235 /* row or col major */
236 for (order_val = 0; order_val < 2; order_val++) {
237 switch (order_val) {
238 case 0:
239 order_type = blas_rowmajor;
240 break;
241 case 1:
242 order_type = blas_colmajor;
243 break;
244 }
245
246 /* upper or lower */
247 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
248 switch (uplo_val) {
249 case 0:
250 uplo_type = blas_upper;
251 break;
252 case 1:
253 uplo_type = blas_lower;
254 break;
255 }
256
257 /* no trans, trans, or conj trans */
258 for (trans_val = 0; trans_val < 3; trans_val++) {
259 switch (trans_val) {
260 case 0:
261 trans_type = blas_no_trans;
262 break;
263 case 1:
264 trans_type = blas_trans;
265 break;
266 case 2:
267 trans_type = blas_conj_trans;
268 break;
269 }
270
271 /* non_unit_diag, unit_diag */
272 for (diag_val = 0; diag_val < 2; diag_val++) {
273 switch (diag_val) {
274 case 0:
275 diag_type = blas_non_unit_diag;
276 break;
277 case 1:
278 diag_type = blas_unit_diag;
279 break;
280 }
281
282 /* number of tests */
283 for (i = 0; i < ntests; i++) {
284 /* For the sake of speed, we throw out this case at random */
285 if (xrand(seed) >= test_prob)
286 continue;
287
288 BLAS_dtpmv_s_testgen(norm, order_type, uplo_type,
289 trans_type, diag_type, n,
290 &alpha, alpha_flag, tp, x_gen,
291 seed, head_r_true, tail_r_true);
292
293 count++;
294
295 /* varying incx */
296 for (incx_val = -2; incx_val <= 2; incx_val++) {
297 if (incx_val == 0)
298 continue;
299
300 /* setting incx */
301 incx = incx_val;
302
303
304 /* set x starting index */
305 ix = 0;
306 if (incx < 0)
307 ix = -(n - 1) * incx;
308
309 /* copy x_gen to x */
310 for (j = 0; j < n; j++) {
311 x[ix] = x_gen[j * inc_index];
312 ix += incx;
313 }
314
315 /* call BLAS_dtpmv_s */
316 FPU_FIX_STOP;
317 BLAS_dtpmv_s(order_type, uplo_type, trans_type, diag_type,
318 n, alpha, tp, x, incx_val);
319 FPU_FIX_START;
320
321 /* set x starting index */
322 ix = 0;
323 if (incx < 0)
324 ix = -(n - 1) * incx;
325
326 /* computing the ratio */
327 for (j = 0; j < n; j++) {
328 /* copy row j of tp to temp */
329 stpmv_copy_row(order_type, uplo_type, trans_type, n, tp,
330 temp, j);
331
332 test_BLAS_ddot_s_d(n, blas_no_conj,
333 alpha, beta_zero_fake, rin_zero_fake,
334 x[ix],
335 head_r_true[j * inc_index],
336 tail_r_true[j * inc_index], temp, 1,
337 x_gen, 1, eps_int, un_int,
338 &ratios[j]);
339
340 /* take the max ratio */
341 if (j == 0)
342 ratio = ratios[0];
343 else if (ratios[j] > ratio)
344 ratio = ratios[j];
345
346 ix += incx;
347 }
348
349 /* increase the number of bad ratio, if the ratio is
350 bigger than the threshold */
351 if (ratio > thresh) {
352 bad_ratios++;
353
354 if ((debug == 3) && /* print only when debug is on */
355 (count != old_count) && /* print if old vector is different
356 from the current one */
357 (d_count == find_max_ratio) &&
358 (p_count <= max_print) &&
359 (ratio > 0.5 * ratio_max)) {
360 p_count++;
361 old_count = count;
362
363 printf
364 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
365 fname, n, ntests, thresh);
366
367 /* Print test info */
368 switch (prec) {
369 case blas_prec_single:
370 printf("single ");
371 break;
372 case blas_prec_double:
373 printf("double ");
374 break;
375 case blas_prec_indigenous:
376 printf("indigenous ");
377 break;
378 case blas_prec_extra:
379 printf("extra ");
380 break;
381 }
382 switch (norm) {
383 case -1:
384 printf("near_underflow ");
385 break;
386 case 0:
387 printf("near_one ");
388 break;
389 case 1:
390 printf("near_overflow ");
391 break;
392 }
393 switch (order_type) {
394 case blas_rowmajor:
395 printf("row_major ");
396 break;
397 case blas_colmajor:
398 printf("col_major ");
399 break;
400 }
401 switch (uplo_type) {
402 case blas_upper:
403 printf("upper ");
404 break;
405 case blas_lower:
406 printf("lower ");
407 break;
408 }
409 switch (trans_type) {
410 case blas_no_trans:
411 printf("no_trans ");
412 break;
413 case blas_trans:
414 printf("trans ");
415 break;
416 case blas_conj_trans:
417 printf("conj_trans ");
418 break;
419 }
420 switch (diag_type) {
421 case blas_non_unit_diag:
422 printf("non_unit_diag ");
423 break;
424 case blas_unit_diag:
425 printf("unit_diag ");
426 break;
427 }
428
429 printf(" incx=%d:\n", incx);
430 ix = 0;
431 if (incx < 0)
432 ix = -(n - 1) * incx;
433
434 printf(" TP=");
435 for (j = 0; j < n; j++) {
436 /* copy row j of tp to temp */
437 stpmv_copy_row(order_type, uplo_type, trans_type, n,
438 tp, temp, j);
439
440 if (j > 0)
441 printf(" ");
442 sprint_vector(temp, n, 1, NULL);
443 }
444
445 ix = 0;
446 if (incx < 0)
447 ix = -(n - 1) * incx;
448
449 for (j = 0; j < n; j++) {
450 printf(" ");
451 printf("x[%d] = ", j * inc_index);
452 printf("%24.16e", x_gen[j * inc_index]);
453 printf("; ");
454 printf("x_final[%d] = ", ix);
455 printf("%24.16e", x[ix]);
456 printf("\n");
457 ix += incx;
458 }
459
460 printf(" ");
461 printf("alpha = ");
462 printf("%24.16e", alpha);
463 printf("; ");
464 printf("\n");
465 for (j = 0; j < n; j++) {
466 printf(" ");
467
468 printf("[%24.16e, %24.16e]",
469 head_r_true[j * inc_index],
470 tail_r_true[j * inc_index]);
471 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
472 }
473 printf(" ratio=%.4e\n", ratio);
474 }
475 }
476 if (d_count == 0) {
477 if (ratio > ratio_max)
478 ratio_max = ratio;
479 if (ratio != 0.0 && ratio < ratio_min)
480 ratio_min = ratio;
481 tot_tests++;
482 }
483 } /* incx */
484
485 } /* numtests */
486 } /* diag */
487 } /* trans */
488 } /* uplo */
489 } /* order */
490 } /* norm */
491
492 } /* alpha */
493 } /* debug */
494
495 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
496 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
497 fname, n, ntests, thresh);
498 printf
499 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
500 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
501 ratio_min, ratio_max);
502 }
503
504 FPU_FIX_STOP;
505
506 blas_free(x);
507 blas_free(x_gen);
508 blas_free(temp);
509 blas_free(tp);
510 blas_free(head_r_true);
511 blas_free(tail_r_true);
512 blas_free(ratios);
513
514 *min_ratio = ratio_min;
515 *num_bad_ratio = bad_ratios;
516 *num_tests = tot_tests;
517 return ratio_max;
518 } /* end of do_test_dtpmv_s */
519
do_test_ztpmv_c(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)520 double do_test_ztpmv_c(int n,
521 int ntests,
522 int *seed,
523 double thresh,
524 int debug,
525 float test_prob,
526 double *min_ratio, int *num_bad_ratio, int *num_tests)
527
528 /*
529 * Purpose
530 * =======
531 *
532 * Runs a series of tests on TPMV.
533 *
534 * Arguments
535 * =========
536 *
537 * n (input) int
538 * The size of vector being tested
539 *
540 * ntests (input) int
541 * The number of tests to run for each set of attributes.
542 *
543 * seed (input/output) int
544 * The seed for the random number generator used in testgen().
545 *
546 * thresh (input) double
547 * When the ratio returned from test() exceeds the specified
548 * threshold, the current size, r_true, r_comp, and ratio will be
549 * printed. (Since ratio is supposed to be O(1), we can set thresh
550 * to ~10.)
551 *
552 * debug (input) int
553 * If debug=3, print summary
554 * If debug=2, print summary only if the number of bad ratios > 0
555 * If debug=1, print complete info if tests fail
556 * If debug=0, return max ratio
557 *
558 * min_ratio (output) double
559 * The minimum ratio
560 *
561 * num_bad_ratio (output) int
562 * The number of tests fail; they are above the threshold.
563 *
564 * num_tests (output) int
565 * The number of tests is being performed.
566 *
567 * Return value
568 * ============
569 *
570 * The maximum ratio if run successfully, otherwise return -1
571 *
572 * Code structure
573 * ==============
574 *
575 * debug loop -- if debug is one, the first loop computes the max ratio
576 * -- and the last(second) loop outputs debugging information,
577 * -- if the test fail and its ratio > 0.5 * max ratio.
578 * -- if debug is zero, the loop is executed once
579 * alpha loop -- varying alpha: 0, 1, or random
580 *
581 * norm loop -- varying norm: near undeflow, near one, or
582 * -- near overflow
583 * order loop -- varying order type: rowmajor or colmajor
584 * uplo loop -- varying uplo type: upper or lower
585 * trans loop -- varying trans type: no trans, trans, and conj trans
586 * diag loop -- varying diag type: non unit, and unit
587 * numtest loop -- how many times the test is perform with
588 * -- above set of attributes
589 * incx loop -- varying incx: -2, -1, 1, 2
590 */
591 {
592 /* function name */
593 const char fname[] = "BLAS_ztpmv_c";
594
595 /* max number of debug lines to print */
596 const int max_print = 3;
597
598 /* Variables in the "x_val" form are loop vars for corresponding
599 variables */
600 int i; /* iterate through the repeating tests */
601 int j; /* multipurpose counters */
602 int ix; /* use to index x */
603 int incx_val, incx; /* for testing different inc values */
604 int inctp, inc_index; /* 1 if real, 2 if complex */
605 int d_count; /* counter for debug */
606 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
607 int p_count; /* counter for the number of debug lines printed */
608 int tot_tests; /* total number of tests to be done */
609 int norm; /* input values of near underflow/one/overflow */
610 double ratio_max; /* the current maximum ratio */
611 double ratio_min; /* the current minimum ratio */
612 double *ratios; /* a temporary variable for calculating ratio */
613 double ratio = 0.0; /* the per-use test ratio from test() */
614 int bad_ratios = 0; /* the number of ratios over the threshold */
615 double eps_int; /* the internal epsilon expected--2^(-24) for float */
616 double un_int; /* the internal underflow threshold */
617 double alpha[2];
618 double beta[2];
619 float *tp;
620 double *x;
621 double *x_gen; /* used to store vectors generated by testgen; eventually copied to x */
622 float *temp; /* use for calculating ratio */
623
624 /* the true r calculated by testgen(), in double-double */
625 double *head_r_true, *tail_r_true;
626
627
628 int alpha_val;
629 int alpha_flag = 0, beta_flag = 0;
630 int order_val;
631 enum blas_order_type order_type = 0;
632
633 enum blas_prec_type prec = 0;
634 int uplo_val;
635 enum blas_uplo_type uplo_type = 0;
636 int trans_val;
637 enum blas_trans_type trans_type = 0;
638 int diag_val;
639 enum blas_diag_type diag_type = 0;
640
641 double beta_zero_fake[2];
642 double rin_zero_fake[2];
643
644 int saved_seed; /* for saving the original seed */
645 int count, old_count = -1;
646 FPU_FIX_DECL;
647
648 /* test for bad arguments */
649 if (n < 0 || ntests < 0)
650 BLAS_error(fname, 0, 0, NULL);
651
652 /* if there is nothing to test, return all zero */
653 if (n == 0 || ntests == 0) {
654 *min_ratio = 0.0;
655 *num_bad_ratio = 0;
656 *num_tests = 0;
657 return 0.0;
658 }
659
660 FPU_FIX_START;
661
662
663
664 incx = inctp = inc_index = 1;
665 incx *= 2;
666 inctp *= 2;
667 inc_index *= 2;
668
669 /* get space for calculation */
670 x = (double *) blas_malloc(3 * n * 2 * sizeof(double) * 2);
671 if (3 * n * 2 > 0 && x == NULL) {
672 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
673 }
674 x_gen = (double *) blas_malloc(3 * n * 2 * sizeof(double) * 2);
675 if (3 * n * 2 > 0 && x_gen == NULL) {
676 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
677 }
678 tp = (float *) blas_malloc(2 * n * n * n * inctp * sizeof(float) * 2);
679 if (2 * n * n * n * inctp > 0 && tp == NULL) {
680 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
681 }
682 temp = (float *) blas_malloc(3 * n * 2 * sizeof(float) * 2);
683 if (3 * n * 2 > 0 && temp == NULL) {
684 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
685 }
686 head_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
687 tail_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
688 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
689 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
690 }
691 ratios = (double *) blas_malloc(n * sizeof(double));
692 if (n > 0 && ratios == NULL) {
693 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
694 }
695
696
697 /* initialization */
698 saved_seed = *seed;
699 ratio_min = 1e308;
700 ratio_max = 0.0;
701 tot_tests = 0;
702 p_count = 0;
703 count = 0;
704 find_max_ratio = 0;
705 beta[0] = beta[1] = 0.0;
706 beta_flag = 1;
707
708 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
709 rin_zero_fake[0] = rin_zero_fake[1] = 0.0;
710
711 if (debug == 3)
712 find_max_ratio = 1;
713
714 /* The debug iteration:
715 If debug=1, then will execute the iteration twice. First, compute the
716 max ratio. Second, print info if ratio > (50% * ratio_max). */
717 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
718 bad_ratios = 0; /* set to zero */
719
720 if ((debug == 3) && (d_count == find_max_ratio))
721 *seed = saved_seed; /* restore the original seed */
722
723 /* varying alpha */
724 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
725 alpha_flag = 0;
726 switch (alpha_val) {
727 case 0:
728 alpha[0] = alpha[1] = 0.0;
729 alpha_flag = 1;
730 break;
731 case 1:
732 alpha[0] = 1.0;
733 alpha[1] = 0.0;
734 alpha_flag = 1;
735 break;
736 }
737
738
739 eps_int = power(2, -BITS_D);
740 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
741 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
742 prec = blas_prec_double;
743
744 /* values near underflow, 1, or overflow */
745 for (norm = -1; norm <= 1; norm++) {
746
747 /* row or col major */
748 for (order_val = 0; order_val < 2; order_val++) {
749 switch (order_val) {
750 case 0:
751 order_type = blas_rowmajor;
752 break;
753 case 1:
754 order_type = blas_colmajor;
755 break;
756 }
757
758 /* upper or lower */
759 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
760 switch (uplo_val) {
761 case 0:
762 uplo_type = blas_upper;
763 break;
764 case 1:
765 uplo_type = blas_lower;
766 break;
767 }
768
769 /* no trans, trans, or conj trans */
770 for (trans_val = 0; trans_val < 3; trans_val++) {
771 switch (trans_val) {
772 case 0:
773 trans_type = blas_no_trans;
774 break;
775 case 1:
776 trans_type = blas_trans;
777 break;
778 case 2:
779 trans_type = blas_conj_trans;
780 break;
781 }
782
783 /* non_unit_diag, unit_diag */
784 for (diag_val = 0; diag_val < 2; diag_val++) {
785 switch (diag_val) {
786 case 0:
787 diag_type = blas_non_unit_diag;
788 break;
789 case 1:
790 diag_type = blas_unit_diag;
791 break;
792 }
793
794 /* number of tests */
795 for (i = 0; i < ntests; i++) {
796 /* For the sake of speed, we throw out this case at random */
797 if (xrand(seed) >= test_prob)
798 continue;
799
800 BLAS_ztpmv_c_testgen(norm, order_type, uplo_type,
801 trans_type, diag_type, n,
802 &alpha, alpha_flag, tp, x_gen,
803 seed, head_r_true, tail_r_true);
804
805 count++;
806
807 /* varying incx */
808 for (incx_val = -2; incx_val <= 2; incx_val++) {
809 if (incx_val == 0)
810 continue;
811
812 /* setting incx */
813 incx = incx_val;
814 incx *= 2;
815
816 /* set x starting index */
817 ix = 0;
818 if (incx < 0)
819 ix = -(n - 1) * incx;
820
821 /* copy x_gen to x */
822 for (j = 0; j < n; j++) {
823 x[ix] = x_gen[j * inc_index];
824 x[ix + 1] = x_gen[j * inc_index + 1];
825 ix += incx;
826 }
827
828 /* call BLAS_ztpmv_c */
829 FPU_FIX_STOP;
830 BLAS_ztpmv_c(order_type, uplo_type, trans_type, diag_type,
831 n, alpha, tp, x, incx_val);
832 FPU_FIX_START;
833
834 /* set x starting index */
835 ix = 0;
836 if (incx < 0)
837 ix = -(n - 1) * incx;
838
839 /* computing the ratio */
840 for (j = 0; j < n; j++) {
841 /* copy row j of tp to temp */
842 ctpmv_copy_row(order_type, uplo_type, trans_type, n, tp,
843 temp, j);
844
845 test_BLAS_zdot_c_z(n, blas_no_conj,
846 alpha, beta_zero_fake, rin_zero_fake,
847 &x[ix],
848 &head_r_true[j * inc_index],
849 &tail_r_true[j * inc_index], temp, 1,
850 x_gen, 1, eps_int, un_int,
851 &ratios[j]);
852
853 /* take the max ratio */
854 if (j == 0)
855 ratio = ratios[0];
856 else if (ratios[j] > ratio)
857 ratio = ratios[j];
858
859 ix += incx;
860 }
861
862 /* increase the number of bad ratio, if the ratio is
863 bigger than the threshold */
864 if (ratio > thresh) {
865 bad_ratios++;
866
867 if ((debug == 3) && /* print only when debug is on */
868 (count != old_count) && /* print if old vector is different
869 from the current one */
870 (d_count == find_max_ratio) &&
871 (p_count <= max_print) &&
872 (ratio > 0.5 * ratio_max)) {
873 p_count++;
874 old_count = count;
875
876 printf
877 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
878 fname, n, ntests, thresh);
879
880 /* Print test info */
881 switch (prec) {
882 case blas_prec_single:
883 printf("single ");
884 break;
885 case blas_prec_double:
886 printf("double ");
887 break;
888 case blas_prec_indigenous:
889 printf("indigenous ");
890 break;
891 case blas_prec_extra:
892 printf("extra ");
893 break;
894 }
895 switch (norm) {
896 case -1:
897 printf("near_underflow ");
898 break;
899 case 0:
900 printf("near_one ");
901 break;
902 case 1:
903 printf("near_overflow ");
904 break;
905 }
906 switch (order_type) {
907 case blas_rowmajor:
908 printf("row_major ");
909 break;
910 case blas_colmajor:
911 printf("col_major ");
912 break;
913 }
914 switch (uplo_type) {
915 case blas_upper:
916 printf("upper ");
917 break;
918 case blas_lower:
919 printf("lower ");
920 break;
921 }
922 switch (trans_type) {
923 case blas_no_trans:
924 printf("no_trans ");
925 break;
926 case blas_trans:
927 printf("trans ");
928 break;
929 case blas_conj_trans:
930 printf("conj_trans ");
931 break;
932 }
933 switch (diag_type) {
934 case blas_non_unit_diag:
935 printf("non_unit_diag ");
936 break;
937 case blas_unit_diag:
938 printf("unit_diag ");
939 break;
940 }
941
942 printf(" incx=%d:\n", incx);
943 ix = 0;
944 if (incx < 0)
945 ix = -(n - 1) * incx;
946
947 printf(" TP=");
948 for (j = 0; j < n; j++) {
949 /* copy row j of tp to temp */
950 ctpmv_copy_row(order_type, uplo_type, trans_type, n,
951 tp, temp, j);
952
953 if (j > 0)
954 printf(" ");
955 cprint_vector(temp, n, 1, NULL);
956 }
957
958 ix = 0;
959 if (incx < 0)
960 ix = -(n - 1) * incx;
961
962 for (j = 0; j < n; j++) {
963 printf(" ");
964 printf("x[%d] = ", j * inc_index);
965 printf("(%24.16e, %24.16e)", x_gen[j * inc_index],
966 x_gen[j * inc_index + 1]);
967 printf("; ");
968 printf("x_final[%d] = ", ix);
969 printf("(%24.16e, %24.16e)", x[ix], x[ix + 1]);
970 printf("\n");
971 ix += incx;
972 }
973
974 printf(" ");
975 printf("alpha = ");
976 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
977 printf("; ");
978 printf("\n");
979 for (j = 0; j < n; j++) {
980 printf(" ");
981
982 printf("([%24.16e %24.16e], [%24.16e %24.16e])",
983 head_r_true[j * inc_index],
984 tail_r_true[j * inc_index],
985 head_r_true[j * inc_index + 1],
986 tail_r_true[j * inc_index + 1]);
987 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
988 }
989 printf(" ratio=%.4e\n", ratio);
990 }
991 }
992 if (d_count == 0) {
993 if (ratio > ratio_max)
994 ratio_max = ratio;
995 if (ratio != 0.0 && ratio < ratio_min)
996 ratio_min = ratio;
997 tot_tests++;
998 }
999 } /* incx */
1000
1001 } /* numtests */
1002 } /* diag */
1003 } /* trans */
1004 } /* uplo */
1005 } /* order */
1006 } /* norm */
1007
1008 } /* alpha */
1009 } /* debug */
1010
1011 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
1012 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
1013 fname, n, ntests, thresh);
1014 printf
1015 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
1016 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
1017 ratio_min, ratio_max);
1018 }
1019
1020 FPU_FIX_STOP;
1021
1022 blas_free(x);
1023 blas_free(x_gen);
1024 blas_free(temp);
1025 blas_free(tp);
1026 blas_free(head_r_true);
1027 blas_free(tail_r_true);
1028 blas_free(ratios);
1029
1030 *min_ratio = ratio_min;
1031 *num_bad_ratio = bad_ratios;
1032 *num_tests = tot_tests;
1033 return ratio_max;
1034 } /* end of do_test_ztpmv_c */
1035
do_test_ctpmv_s(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)1036 double do_test_ctpmv_s(int n,
1037 int ntests,
1038 int *seed,
1039 double thresh,
1040 int debug,
1041 float test_prob,
1042 double *min_ratio, int *num_bad_ratio, int *num_tests)
1043
1044 /*
1045 * Purpose
1046 * =======
1047 *
1048 * Runs a series of tests on TPMV.
1049 *
1050 * Arguments
1051 * =========
1052 *
1053 * n (input) int
1054 * The size of vector being tested
1055 *
1056 * ntests (input) int
1057 * The number of tests to run for each set of attributes.
1058 *
1059 * seed (input/output) int
1060 * The seed for the random number generator used in testgen().
1061 *
1062 * thresh (input) double
1063 * When the ratio returned from test() exceeds the specified
1064 * threshold, the current size, r_true, r_comp, and ratio will be
1065 * printed. (Since ratio is supposed to be O(1), we can set thresh
1066 * to ~10.)
1067 *
1068 * debug (input) int
1069 * If debug=3, print summary
1070 * If debug=2, print summary only if the number of bad ratios > 0
1071 * If debug=1, print complete info if tests fail
1072 * If debug=0, return max ratio
1073 *
1074 * min_ratio (output) double
1075 * The minimum ratio
1076 *
1077 * num_bad_ratio (output) int
1078 * The number of tests fail; they are above the threshold.
1079 *
1080 * num_tests (output) int
1081 * The number of tests is being performed.
1082 *
1083 * Return value
1084 * ============
1085 *
1086 * The maximum ratio if run successfully, otherwise return -1
1087 *
1088 * Code structure
1089 * ==============
1090 *
1091 * debug loop -- if debug is one, the first loop computes the max ratio
1092 * -- and the last(second) loop outputs debugging information,
1093 * -- if the test fail and its ratio > 0.5 * max ratio.
1094 * -- if debug is zero, the loop is executed once
1095 * alpha loop -- varying alpha: 0, 1, or random
1096 *
1097 * norm loop -- varying norm: near undeflow, near one, or
1098 * -- near overflow
1099 * order loop -- varying order type: rowmajor or colmajor
1100 * uplo loop -- varying uplo type: upper or lower
1101 * trans loop -- varying trans type: no trans, trans, and conj trans
1102 * diag loop -- varying diag type: non unit, and unit
1103 * numtest loop -- how many times the test is perform with
1104 * -- above set of attributes
1105 * incx loop -- varying incx: -2, -1, 1, 2
1106 */
1107 {
1108 /* function name */
1109 const char fname[] = "BLAS_ctpmv_s";
1110
1111 /* max number of debug lines to print */
1112 const int max_print = 3;
1113
1114 /* Variables in the "x_val" form are loop vars for corresponding
1115 variables */
1116 int i; /* iterate through the repeating tests */
1117 int j; /* multipurpose counters */
1118 int ix; /* use to index x */
1119 int incx_val, incx; /* for testing different inc values */
1120 int inctp, inc_index; /* 1 if real, 2 if complex */
1121 int d_count; /* counter for debug */
1122 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
1123 int p_count; /* counter for the number of debug lines printed */
1124 int tot_tests; /* total number of tests to be done */
1125 int norm; /* input values of near underflow/one/overflow */
1126 double ratio_max; /* the current maximum ratio */
1127 double ratio_min; /* the current minimum ratio */
1128 double *ratios; /* a temporary variable for calculating ratio */
1129 double ratio = 0.0; /* the per-use test ratio from test() */
1130 int bad_ratios = 0; /* the number of ratios over the threshold */
1131 double eps_int; /* the internal epsilon expected--2^(-24) for float */
1132 double un_int; /* the internal underflow threshold */
1133 float alpha[2];
1134 float beta[2];
1135 float *tp;
1136 float *x;
1137 float *x_gen; /* used to store vectors generated by testgen; eventually copied to x */
1138 float *temp; /* use for calculating ratio */
1139
1140 /* the true r calculated by testgen(), in double-double */
1141 double *head_r_true, *tail_r_true;
1142
1143
1144 int alpha_val;
1145 int alpha_flag = 0, beta_flag = 0;
1146 int order_val;
1147 enum blas_order_type order_type = 0;
1148
1149 enum blas_prec_type prec = 0;
1150 int uplo_val;
1151 enum blas_uplo_type uplo_type = 0;
1152 int trans_val;
1153 enum blas_trans_type trans_type = 0;
1154 int diag_val;
1155 enum blas_diag_type diag_type = 0;
1156
1157 float beta_zero_fake[2];
1158 float rin_zero_fake[2];
1159
1160 int saved_seed; /* for saving the original seed */
1161 int count, old_count = -1;
1162 FPU_FIX_DECL;
1163
1164 /* test for bad arguments */
1165 if (n < 0 || ntests < 0)
1166 BLAS_error(fname, 0, 0, NULL);
1167
1168 /* if there is nothing to test, return all zero */
1169 if (n == 0 || ntests == 0) {
1170 *min_ratio = 0.0;
1171 *num_bad_ratio = 0;
1172 *num_tests = 0;
1173 return 0.0;
1174 }
1175
1176 FPU_FIX_START;
1177
1178
1179
1180 incx = inctp = inc_index = 1;
1181 incx *= 2;
1182
1183 inc_index *= 2;
1184
1185 /* get space for calculation */
1186 x = (float *) blas_malloc(3 * n * 2 * sizeof(float) * 2);
1187 if (3 * n * 2 > 0 && x == NULL) {
1188 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1189 }
1190 x_gen = (float *) blas_malloc(3 * n * 2 * sizeof(float) * 2);
1191 if (3 * n * 2 > 0 && x_gen == NULL) {
1192 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1193 }
1194 tp = (float *) blas_malloc(2 * n * n * n * inctp * sizeof(float));
1195 if (2 * n * n * n * inctp > 0 && tp == NULL) {
1196 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1197 }
1198 temp = (float *) blas_malloc(3 * n * 2 * sizeof(float));
1199 if (3 * n * 2 > 0 && temp == NULL) {
1200 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1201 }
1202 head_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
1203 tail_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
1204 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
1205 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1206 }
1207 ratios = (double *) blas_malloc(n * sizeof(double));
1208 if (n > 0 && ratios == NULL) {
1209 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1210 }
1211
1212
1213 /* initialization */
1214 saved_seed = *seed;
1215 ratio_min = 1e308;
1216 ratio_max = 0.0;
1217 tot_tests = 0;
1218 p_count = 0;
1219 count = 0;
1220 find_max_ratio = 0;
1221 beta[0] = beta[1] = 0.0;
1222 beta_flag = 1;
1223
1224 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
1225 rin_zero_fake[0] = rin_zero_fake[1] = 0.0;
1226
1227 if (debug == 3)
1228 find_max_ratio = 1;
1229
1230 /* The debug iteration:
1231 If debug=1, then will execute the iteration twice. First, compute the
1232 max ratio. Second, print info if ratio > (50% * ratio_max). */
1233 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
1234 bad_ratios = 0; /* set to zero */
1235
1236 if ((debug == 3) && (d_count == find_max_ratio))
1237 *seed = saved_seed; /* restore the original seed */
1238
1239 /* varying alpha */
1240 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
1241 alpha_flag = 0;
1242 switch (alpha_val) {
1243 case 0:
1244 alpha[0] = alpha[1] = 0.0;
1245 alpha_flag = 1;
1246 break;
1247 case 1:
1248 alpha[0] = 1.0;
1249 alpha[1] = 0.0;
1250 alpha_flag = 1;
1251 break;
1252 }
1253
1254
1255 eps_int = power(2, -BITS_S);
1256 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
1257 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
1258 prec = blas_prec_single;
1259
1260 /* values near underflow, 1, or overflow */
1261 for (norm = -1; norm <= 1; norm++) {
1262
1263 /* row or col major */
1264 for (order_val = 0; order_val < 2; order_val++) {
1265 switch (order_val) {
1266 case 0:
1267 order_type = blas_rowmajor;
1268 break;
1269 case 1:
1270 order_type = blas_colmajor;
1271 break;
1272 }
1273
1274 /* upper or lower */
1275 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
1276 switch (uplo_val) {
1277 case 0:
1278 uplo_type = blas_upper;
1279 break;
1280 case 1:
1281 uplo_type = blas_lower;
1282 break;
1283 }
1284
1285 /* no trans, trans, or conj trans */
1286 for (trans_val = 0; trans_val < 3; trans_val++) {
1287 switch (trans_val) {
1288 case 0:
1289 trans_type = blas_no_trans;
1290 break;
1291 case 1:
1292 trans_type = blas_trans;
1293 break;
1294 case 2:
1295 trans_type = blas_conj_trans;
1296 break;
1297 }
1298
1299 /* non_unit_diag, unit_diag */
1300 for (diag_val = 0; diag_val < 2; diag_val++) {
1301 switch (diag_val) {
1302 case 0:
1303 diag_type = blas_non_unit_diag;
1304 break;
1305 case 1:
1306 diag_type = blas_unit_diag;
1307 break;
1308 }
1309
1310 /* number of tests */
1311 for (i = 0; i < ntests; i++) {
1312 /* For the sake of speed, we throw out this case at random */
1313 if (xrand(seed) >= test_prob)
1314 continue;
1315
1316 BLAS_ctpmv_s_testgen(norm, order_type, uplo_type,
1317 trans_type, diag_type, n,
1318 &alpha, alpha_flag, tp, x_gen,
1319 seed, head_r_true, tail_r_true);
1320
1321 count++;
1322
1323 /* varying incx */
1324 for (incx_val = -2; incx_val <= 2; incx_val++) {
1325 if (incx_val == 0)
1326 continue;
1327
1328 /* setting incx */
1329 incx = incx_val;
1330 incx *= 2;
1331
1332 /* set x starting index */
1333 ix = 0;
1334 if (incx < 0)
1335 ix = -(n - 1) * incx;
1336
1337 /* copy x_gen to x */
1338 for (j = 0; j < n; j++) {
1339 x[ix] = x_gen[j * inc_index];
1340 x[ix + 1] = x_gen[j * inc_index + 1];
1341 ix += incx;
1342 }
1343
1344 /* call BLAS_ctpmv_s */
1345 FPU_FIX_STOP;
1346 BLAS_ctpmv_s(order_type, uplo_type, trans_type, diag_type,
1347 n, alpha, tp, x, incx_val);
1348 FPU_FIX_START;
1349
1350 /* set x starting index */
1351 ix = 0;
1352 if (incx < 0)
1353 ix = -(n - 1) * incx;
1354
1355 /* computing the ratio */
1356 for (j = 0; j < n; j++) {
1357 /* copy row j of tp to temp */
1358 stpmv_copy_row(order_type, uplo_type, trans_type, n, tp,
1359 temp, j);
1360
1361 test_BLAS_cdot_s_c(n, blas_no_conj,
1362 alpha, beta_zero_fake, rin_zero_fake,
1363 &x[ix],
1364 &head_r_true[j * inc_index],
1365 &tail_r_true[j * inc_index], temp, 1,
1366 x_gen, 1, eps_int, un_int,
1367 &ratios[j]);
1368
1369 /* take the max ratio */
1370 if (j == 0)
1371 ratio = ratios[0];
1372 else if (ratios[j] > ratio)
1373 ratio = ratios[j];
1374
1375 ix += incx;
1376 }
1377
1378 /* increase the number of bad ratio, if the ratio is
1379 bigger than the threshold */
1380 if (ratio > thresh) {
1381 bad_ratios++;
1382
1383 if ((debug == 3) && /* print only when debug is on */
1384 (count != old_count) && /* print if old vector is different
1385 from the current one */
1386 (d_count == find_max_ratio) &&
1387 (p_count <= max_print) &&
1388 (ratio > 0.5 * ratio_max)) {
1389 p_count++;
1390 old_count = count;
1391
1392 printf
1393 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
1394 fname, n, ntests, thresh);
1395
1396 /* Print test info */
1397 switch (prec) {
1398 case blas_prec_single:
1399 printf("single ");
1400 break;
1401 case blas_prec_double:
1402 printf("double ");
1403 break;
1404 case blas_prec_indigenous:
1405 printf("indigenous ");
1406 break;
1407 case blas_prec_extra:
1408 printf("extra ");
1409 break;
1410 }
1411 switch (norm) {
1412 case -1:
1413 printf("near_underflow ");
1414 break;
1415 case 0:
1416 printf("near_one ");
1417 break;
1418 case 1:
1419 printf("near_overflow ");
1420 break;
1421 }
1422 switch (order_type) {
1423 case blas_rowmajor:
1424 printf("row_major ");
1425 break;
1426 case blas_colmajor:
1427 printf("col_major ");
1428 break;
1429 }
1430 switch (uplo_type) {
1431 case blas_upper:
1432 printf("upper ");
1433 break;
1434 case blas_lower:
1435 printf("lower ");
1436 break;
1437 }
1438 switch (trans_type) {
1439 case blas_no_trans:
1440 printf("no_trans ");
1441 break;
1442 case blas_trans:
1443 printf("trans ");
1444 break;
1445 case blas_conj_trans:
1446 printf("conj_trans ");
1447 break;
1448 }
1449 switch (diag_type) {
1450 case blas_non_unit_diag:
1451 printf("non_unit_diag ");
1452 break;
1453 case blas_unit_diag:
1454 printf("unit_diag ");
1455 break;
1456 }
1457
1458 printf(" incx=%d:\n", incx);
1459 ix = 0;
1460 if (incx < 0)
1461 ix = -(n - 1) * incx;
1462
1463 printf(" TP=");
1464 for (j = 0; j < n; j++) {
1465 /* copy row j of tp to temp */
1466 stpmv_copy_row(order_type, uplo_type, trans_type, n,
1467 tp, temp, j);
1468
1469 if (j > 0)
1470 printf(" ");
1471 sprint_vector(temp, n, 1, NULL);
1472 }
1473
1474 ix = 0;
1475 if (incx < 0)
1476 ix = -(n - 1) * incx;
1477
1478 for (j = 0; j < n; j++) {
1479 printf(" ");
1480 printf("x[%d] = ", j * inc_index);
1481 printf("(%16.8e, %16.8e)", x_gen[j * inc_index],
1482 x_gen[j * inc_index + 1]);
1483 printf("; ");
1484 printf("x_final[%d] = ", ix);
1485 printf("(%16.8e, %16.8e)", x[ix], x[ix + 1]);
1486 printf("\n");
1487 ix += incx;
1488 }
1489
1490 printf(" ");
1491 printf("alpha = ");
1492 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
1493 printf("; ");
1494 printf("\n");
1495 for (j = 0; j < n; j++) {
1496 printf(" ");
1497
1498 printf("([%24.16e %24.16e], [%24.16e %24.16e])",
1499 head_r_true[j * inc_index],
1500 tail_r_true[j * inc_index],
1501 head_r_true[j * inc_index + 1],
1502 tail_r_true[j * inc_index + 1]);
1503 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
1504 }
1505 printf(" ratio=%.4e\n", ratio);
1506 }
1507 }
1508 if (d_count == 0) {
1509 if (ratio > ratio_max)
1510 ratio_max = ratio;
1511 if (ratio != 0.0 && ratio < ratio_min)
1512 ratio_min = ratio;
1513 tot_tests++;
1514 }
1515 } /* incx */
1516
1517 } /* numtests */
1518 } /* diag */
1519 } /* trans */
1520 } /* uplo */
1521 } /* order */
1522 } /* norm */
1523
1524 } /* alpha */
1525 } /* debug */
1526
1527 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
1528 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
1529 fname, n, ntests, thresh);
1530 printf
1531 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
1532 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
1533 ratio_min, ratio_max);
1534 }
1535
1536 FPU_FIX_STOP;
1537
1538 blas_free(x);
1539 blas_free(x_gen);
1540 blas_free(temp);
1541 blas_free(tp);
1542 blas_free(head_r_true);
1543 blas_free(tail_r_true);
1544 blas_free(ratios);
1545
1546 *min_ratio = ratio_min;
1547 *num_bad_ratio = bad_ratios;
1548 *num_tests = tot_tests;
1549 return ratio_max;
1550 } /* end of do_test_ctpmv_s */
1551
do_test_ztpmv_d(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)1552 double do_test_ztpmv_d(int n,
1553 int ntests,
1554 int *seed,
1555 double thresh,
1556 int debug,
1557 float test_prob,
1558 double *min_ratio, int *num_bad_ratio, int *num_tests)
1559
1560 /*
1561 * Purpose
1562 * =======
1563 *
1564 * Runs a series of tests on TPMV.
1565 *
1566 * Arguments
1567 * =========
1568 *
1569 * n (input) int
1570 * The size of vector being tested
1571 *
1572 * ntests (input) int
1573 * The number of tests to run for each set of attributes.
1574 *
1575 * seed (input/output) int
1576 * The seed for the random number generator used in testgen().
1577 *
1578 * thresh (input) double
1579 * When the ratio returned from test() exceeds the specified
1580 * threshold, the current size, r_true, r_comp, and ratio will be
1581 * printed. (Since ratio is supposed to be O(1), we can set thresh
1582 * to ~10.)
1583 *
1584 * debug (input) int
1585 * If debug=3, print summary
1586 * If debug=2, print summary only if the number of bad ratios > 0
1587 * If debug=1, print complete info if tests fail
1588 * If debug=0, return max ratio
1589 *
1590 * min_ratio (output) double
1591 * The minimum ratio
1592 *
1593 * num_bad_ratio (output) int
1594 * The number of tests fail; they are above the threshold.
1595 *
1596 * num_tests (output) int
1597 * The number of tests is being performed.
1598 *
1599 * Return value
1600 * ============
1601 *
1602 * The maximum ratio if run successfully, otherwise return -1
1603 *
1604 * Code structure
1605 * ==============
1606 *
1607 * debug loop -- if debug is one, the first loop computes the max ratio
1608 * -- and the last(second) loop outputs debugging information,
1609 * -- if the test fail and its ratio > 0.5 * max ratio.
1610 * -- if debug is zero, the loop is executed once
1611 * alpha loop -- varying alpha: 0, 1, or random
1612 *
1613 * norm loop -- varying norm: near undeflow, near one, or
1614 * -- near overflow
1615 * order loop -- varying order type: rowmajor or colmajor
1616 * uplo loop -- varying uplo type: upper or lower
1617 * trans loop -- varying trans type: no trans, trans, and conj trans
1618 * diag loop -- varying diag type: non unit, and unit
1619 * numtest loop -- how many times the test is perform with
1620 * -- above set of attributes
1621 * incx loop -- varying incx: -2, -1, 1, 2
1622 */
1623 {
1624 /* function name */
1625 const char fname[] = "BLAS_ztpmv_d";
1626
1627 /* max number of debug lines to print */
1628 const int max_print = 3;
1629
1630 /* Variables in the "x_val" form are loop vars for corresponding
1631 variables */
1632 int i; /* iterate through the repeating tests */
1633 int j; /* multipurpose counters */
1634 int ix; /* use to index x */
1635 int incx_val, incx; /* for testing different inc values */
1636 int inctp, inc_index; /* 1 if real, 2 if complex */
1637 int d_count; /* counter for debug */
1638 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
1639 int p_count; /* counter for the number of debug lines printed */
1640 int tot_tests; /* total number of tests to be done */
1641 int norm; /* input values of near underflow/one/overflow */
1642 double ratio_max; /* the current maximum ratio */
1643 double ratio_min; /* the current minimum ratio */
1644 double *ratios; /* a temporary variable for calculating ratio */
1645 double ratio = 0.0; /* the per-use test ratio from test() */
1646 int bad_ratios = 0; /* the number of ratios over the threshold */
1647 double eps_int; /* the internal epsilon expected--2^(-24) for float */
1648 double un_int; /* the internal underflow threshold */
1649 double alpha[2];
1650 double beta[2];
1651 double *tp;
1652 double *x;
1653 double *x_gen; /* used to store vectors generated by testgen; eventually copied to x */
1654 double *temp; /* use for calculating ratio */
1655
1656 /* the true r calculated by testgen(), in double-double */
1657 double *head_r_true, *tail_r_true;
1658
1659
1660 int alpha_val;
1661 int alpha_flag = 0, beta_flag = 0;
1662 int order_val;
1663 enum blas_order_type order_type = 0;
1664
1665 enum blas_prec_type prec = 0;
1666 int uplo_val;
1667 enum blas_uplo_type uplo_type = 0;
1668 int trans_val;
1669 enum blas_trans_type trans_type = 0;
1670 int diag_val;
1671 enum blas_diag_type diag_type = 0;
1672
1673 double beta_zero_fake[2];
1674 double rin_zero_fake[2];
1675
1676 int saved_seed; /* for saving the original seed */
1677 int count, old_count = -1;
1678 FPU_FIX_DECL;
1679
1680 /* test for bad arguments */
1681 if (n < 0 || ntests < 0)
1682 BLAS_error(fname, 0, 0, NULL);
1683
1684 /* if there is nothing to test, return all zero */
1685 if (n == 0 || ntests == 0) {
1686 *min_ratio = 0.0;
1687 *num_bad_ratio = 0;
1688 *num_tests = 0;
1689 return 0.0;
1690 }
1691
1692 FPU_FIX_START;
1693
1694
1695
1696 incx = inctp = inc_index = 1;
1697 incx *= 2;
1698
1699 inc_index *= 2;
1700
1701 /* get space for calculation */
1702 x = (double *) blas_malloc(3 * n * 2 * sizeof(double) * 2);
1703 if (3 * n * 2 > 0 && x == NULL) {
1704 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1705 }
1706 x_gen = (double *) blas_malloc(3 * n * 2 * sizeof(double) * 2);
1707 if (3 * n * 2 > 0 && x_gen == NULL) {
1708 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1709 }
1710 tp = (double *) blas_malloc(2 * n * n * n * inctp * sizeof(double));
1711 if (2 * n * n * n * inctp > 0 && tp == NULL) {
1712 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1713 }
1714 temp = (double *) blas_malloc(3 * n * 2 * sizeof(double));
1715 if (3 * n * 2 > 0 && temp == NULL) {
1716 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1717 }
1718 head_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
1719 tail_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
1720 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
1721 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1722 }
1723 ratios = (double *) blas_malloc(n * sizeof(double));
1724 if (n > 0 && ratios == NULL) {
1725 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1726 }
1727
1728
1729 /* initialization */
1730 saved_seed = *seed;
1731 ratio_min = 1e308;
1732 ratio_max = 0.0;
1733 tot_tests = 0;
1734 p_count = 0;
1735 count = 0;
1736 find_max_ratio = 0;
1737 beta[0] = beta[1] = 0.0;
1738 beta_flag = 1;
1739
1740 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
1741 rin_zero_fake[0] = rin_zero_fake[1] = 0.0;
1742
1743 if (debug == 3)
1744 find_max_ratio = 1;
1745
1746 /* The debug iteration:
1747 If debug=1, then will execute the iteration twice. First, compute the
1748 max ratio. Second, print info if ratio > (50% * ratio_max). */
1749 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
1750 bad_ratios = 0; /* set to zero */
1751
1752 if ((debug == 3) && (d_count == find_max_ratio))
1753 *seed = saved_seed; /* restore the original seed */
1754
1755 /* varying alpha */
1756 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
1757 alpha_flag = 0;
1758 switch (alpha_val) {
1759 case 0:
1760 alpha[0] = alpha[1] = 0.0;
1761 alpha_flag = 1;
1762 break;
1763 case 1:
1764 alpha[0] = 1.0;
1765 alpha[1] = 0.0;
1766 alpha_flag = 1;
1767 break;
1768 }
1769
1770
1771 eps_int = power(2, -BITS_D);
1772 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1773 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1774 prec = blas_prec_double;
1775
1776 /* values near underflow, 1, or overflow */
1777 for (norm = -1; norm <= 1; norm++) {
1778
1779 /* row or col major */
1780 for (order_val = 0; order_val < 2; order_val++) {
1781 switch (order_val) {
1782 case 0:
1783 order_type = blas_rowmajor;
1784 break;
1785 case 1:
1786 order_type = blas_colmajor;
1787 break;
1788 }
1789
1790 /* upper or lower */
1791 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
1792 switch (uplo_val) {
1793 case 0:
1794 uplo_type = blas_upper;
1795 break;
1796 case 1:
1797 uplo_type = blas_lower;
1798 break;
1799 }
1800
1801 /* no trans, trans, or conj trans */
1802 for (trans_val = 0; trans_val < 3; trans_val++) {
1803 switch (trans_val) {
1804 case 0:
1805 trans_type = blas_no_trans;
1806 break;
1807 case 1:
1808 trans_type = blas_trans;
1809 break;
1810 case 2:
1811 trans_type = blas_conj_trans;
1812 break;
1813 }
1814
1815 /* non_unit_diag, unit_diag */
1816 for (diag_val = 0; diag_val < 2; diag_val++) {
1817 switch (diag_val) {
1818 case 0:
1819 diag_type = blas_non_unit_diag;
1820 break;
1821 case 1:
1822 diag_type = blas_unit_diag;
1823 break;
1824 }
1825
1826 /* number of tests */
1827 for (i = 0; i < ntests; i++) {
1828 /* For the sake of speed, we throw out this case at random */
1829 if (xrand(seed) >= test_prob)
1830 continue;
1831
1832 BLAS_ztpmv_d_testgen(norm, order_type, uplo_type,
1833 trans_type, diag_type, n,
1834 &alpha, alpha_flag, tp, x_gen,
1835 seed, head_r_true, tail_r_true);
1836
1837 count++;
1838
1839 /* varying incx */
1840 for (incx_val = -2; incx_val <= 2; incx_val++) {
1841 if (incx_val == 0)
1842 continue;
1843
1844 /* setting incx */
1845 incx = incx_val;
1846 incx *= 2;
1847
1848 /* set x starting index */
1849 ix = 0;
1850 if (incx < 0)
1851 ix = -(n - 1) * incx;
1852
1853 /* copy x_gen to x */
1854 for (j = 0; j < n; j++) {
1855 x[ix] = x_gen[j * inc_index];
1856 x[ix + 1] = x_gen[j * inc_index + 1];
1857 ix += incx;
1858 }
1859
1860 /* call BLAS_ztpmv_d */
1861 FPU_FIX_STOP;
1862 BLAS_ztpmv_d(order_type, uplo_type, trans_type, diag_type,
1863 n, alpha, tp, x, incx_val);
1864 FPU_FIX_START;
1865
1866 /* set x starting index */
1867 ix = 0;
1868 if (incx < 0)
1869 ix = -(n - 1) * incx;
1870
1871 /* computing the ratio */
1872 for (j = 0; j < n; j++) {
1873 /* copy row j of tp to temp */
1874 dtpmv_copy_row(order_type, uplo_type, trans_type, n, tp,
1875 temp, j);
1876
1877 test_BLAS_zdot_d_z(n, blas_no_conj,
1878 alpha, beta_zero_fake, rin_zero_fake,
1879 &x[ix],
1880 &head_r_true[j * inc_index],
1881 &tail_r_true[j * inc_index], temp, 1,
1882 x_gen, 1, eps_int, un_int,
1883 &ratios[j]);
1884
1885 /* take the max ratio */
1886 if (j == 0)
1887 ratio = ratios[0];
1888 else if (ratios[j] > ratio)
1889 ratio = ratios[j];
1890
1891 ix += incx;
1892 }
1893
1894 /* increase the number of bad ratio, if the ratio is
1895 bigger than the threshold */
1896 if (ratio > thresh) {
1897 bad_ratios++;
1898
1899 if ((debug == 3) && /* print only when debug is on */
1900 (count != old_count) && /* print if old vector is different
1901 from the current one */
1902 (d_count == find_max_ratio) &&
1903 (p_count <= max_print) &&
1904 (ratio > 0.5 * ratio_max)) {
1905 p_count++;
1906 old_count = count;
1907
1908 printf
1909 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
1910 fname, n, ntests, thresh);
1911
1912 /* Print test info */
1913 switch (prec) {
1914 case blas_prec_single:
1915 printf("single ");
1916 break;
1917 case blas_prec_double:
1918 printf("double ");
1919 break;
1920 case blas_prec_indigenous:
1921 printf("indigenous ");
1922 break;
1923 case blas_prec_extra:
1924 printf("extra ");
1925 break;
1926 }
1927 switch (norm) {
1928 case -1:
1929 printf("near_underflow ");
1930 break;
1931 case 0:
1932 printf("near_one ");
1933 break;
1934 case 1:
1935 printf("near_overflow ");
1936 break;
1937 }
1938 switch (order_type) {
1939 case blas_rowmajor:
1940 printf("row_major ");
1941 break;
1942 case blas_colmajor:
1943 printf("col_major ");
1944 break;
1945 }
1946 switch (uplo_type) {
1947 case blas_upper:
1948 printf("upper ");
1949 break;
1950 case blas_lower:
1951 printf("lower ");
1952 break;
1953 }
1954 switch (trans_type) {
1955 case blas_no_trans:
1956 printf("no_trans ");
1957 break;
1958 case blas_trans:
1959 printf("trans ");
1960 break;
1961 case blas_conj_trans:
1962 printf("conj_trans ");
1963 break;
1964 }
1965 switch (diag_type) {
1966 case blas_non_unit_diag:
1967 printf("non_unit_diag ");
1968 break;
1969 case blas_unit_diag:
1970 printf("unit_diag ");
1971 break;
1972 }
1973
1974 printf(" incx=%d:\n", incx);
1975 ix = 0;
1976 if (incx < 0)
1977 ix = -(n - 1) * incx;
1978
1979 printf(" TP=");
1980 for (j = 0; j < n; j++) {
1981 /* copy row j of tp to temp */
1982 dtpmv_copy_row(order_type, uplo_type, trans_type, n,
1983 tp, temp, j);
1984
1985 if (j > 0)
1986 printf(" ");
1987 dprint_vector(temp, n, 1, NULL);
1988 }
1989
1990 ix = 0;
1991 if (incx < 0)
1992 ix = -(n - 1) * incx;
1993
1994 for (j = 0; j < n; j++) {
1995 printf(" ");
1996 printf("x[%d] = ", j * inc_index);
1997 printf("(%24.16e, %24.16e)", x_gen[j * inc_index],
1998 x_gen[j * inc_index + 1]);
1999 printf("; ");
2000 printf("x_final[%d] = ", ix);
2001 printf("(%24.16e, %24.16e)", x[ix], x[ix + 1]);
2002 printf("\n");
2003 ix += incx;
2004 }
2005
2006 printf(" ");
2007 printf("alpha = ");
2008 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
2009 printf("; ");
2010 printf("\n");
2011 for (j = 0; j < n; j++) {
2012 printf(" ");
2013
2014 printf("([%24.16e %24.16e], [%24.16e %24.16e])",
2015 head_r_true[j * inc_index],
2016 tail_r_true[j * inc_index],
2017 head_r_true[j * inc_index + 1],
2018 tail_r_true[j * inc_index + 1]);
2019 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
2020 }
2021 printf(" ratio=%.4e\n", ratio);
2022 }
2023 }
2024 if (d_count == 0) {
2025 if (ratio > ratio_max)
2026 ratio_max = ratio;
2027 if (ratio != 0.0 && ratio < ratio_min)
2028 ratio_min = ratio;
2029 tot_tests++;
2030 }
2031 } /* incx */
2032
2033 } /* numtests */
2034 } /* diag */
2035 } /* trans */
2036 } /* uplo */
2037 } /* order */
2038 } /* norm */
2039
2040 } /* alpha */
2041 } /* debug */
2042
2043 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
2044 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
2045 fname, n, ntests, thresh);
2046 printf
2047 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
2048 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
2049 ratio_min, ratio_max);
2050 }
2051
2052 FPU_FIX_STOP;
2053
2054 blas_free(x);
2055 blas_free(x_gen);
2056 blas_free(temp);
2057 blas_free(tp);
2058 blas_free(head_r_true);
2059 blas_free(tail_r_true);
2060 blas_free(ratios);
2061
2062 *min_ratio = ratio_min;
2063 *num_bad_ratio = bad_ratios;
2064 *num_tests = tot_tests;
2065 return ratio_max;
2066 } /* end of do_test_ztpmv_d */
2067
do_test_stpmv_x(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)2068 double do_test_stpmv_x(int n,
2069 int ntests,
2070 int *seed,
2071 double thresh,
2072 int debug,
2073 float test_prob,
2074 double *min_ratio, int *num_bad_ratio, int *num_tests)
2075
2076 /*
2077 * Purpose
2078 * =======
2079 *
2080 * Runs a series of tests on TPMV.
2081 *
2082 * Arguments
2083 * =========
2084 *
2085 * n (input) int
2086 * The size of vector being tested
2087 *
2088 * ntests (input) int
2089 * The number of tests to run for each set of attributes.
2090 *
2091 * seed (input/output) int
2092 * The seed for the random number generator used in testgen().
2093 *
2094 * thresh (input) double
2095 * When the ratio returned from test() exceeds the specified
2096 * threshold, the current size, r_true, r_comp, and ratio will be
2097 * printed. (Since ratio is supposed to be O(1), we can set thresh
2098 * to ~10.)
2099 *
2100 * debug (input) int
2101 * If debug=3, print summary
2102 * If debug=2, print summary only if the number of bad ratios > 0
2103 * If debug=1, print complete info if tests fail
2104 * If debug=0, return max ratio
2105 *
2106 * min_ratio (output) double
2107 * The minimum ratio
2108 *
2109 * num_bad_ratio (output) int
2110 * The number of tests fail; they are above the threshold.
2111 *
2112 * num_tests (output) int
2113 * The number of tests is being performed.
2114 *
2115 * Return value
2116 * ============
2117 *
2118 * The maximum ratio if run successfully, otherwise return -1
2119 *
2120 * Code structure
2121 * ==============
2122 *
2123 * debug loop -- if debug is one, the first loop computes the max ratio
2124 * -- and the last(second) loop outputs debugging information,
2125 * -- if the test fail and its ratio > 0.5 * max ratio.
2126 * -- if debug is zero, the loop is executed once
2127 * alpha loop -- varying alpha: 0, 1, or random
2128 * prec loop -- varying internal prec: single, double, or extra
2129 * norm loop -- varying norm: near undeflow, near one, or
2130 * -- near overflow
2131 * order loop -- varying order type: rowmajor or colmajor
2132 * uplo loop -- varying uplo type: upper or lower
2133 * trans loop -- varying trans type: no trans, trans, and conj trans
2134 * diag loop -- varying diag type: non unit, and unit
2135 * numtest loop -- how many times the test is perform with
2136 * -- above set of attributes
2137 * incx loop -- varying incx: -2, -1, 1, 2
2138 */
2139 {
2140 /* function name */
2141 const char fname[] = "BLAS_stpmv_x";
2142
2143 /* max number of debug lines to print */
2144 const int max_print = 3;
2145
2146 /* Variables in the "x_val" form are loop vars for corresponding
2147 variables */
2148 int i; /* iterate through the repeating tests */
2149 int j; /* multipurpose counters */
2150 int ix; /* use to index x */
2151 int incx_val, incx; /* for testing different inc values */
2152 int inctp, inc_index; /* 1 if real, 2 if complex */
2153 int d_count; /* counter for debug */
2154 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
2155 int p_count; /* counter for the number of debug lines printed */
2156 int tot_tests; /* total number of tests to be done */
2157 int norm; /* input values of near underflow/one/overflow */
2158 double ratio_max; /* the current maximum ratio */
2159 double ratio_min; /* the current minimum ratio */
2160 double *ratios; /* a temporary variable for calculating ratio */
2161 double ratio = 0.0; /* the per-use test ratio from test() */
2162 int bad_ratios = 0; /* the number of ratios over the threshold */
2163 double eps_int; /* the internal epsilon expected--2^(-24) for float */
2164 double un_int; /* the internal underflow threshold */
2165 float alpha;
2166 float beta;
2167 float *tp;
2168 float *x;
2169 float *x_gen; /* used to store vectors generated by testgen; eventually copied to x */
2170 float *temp; /* use for calculating ratio */
2171
2172 /* the true r calculated by testgen(), in double-double */
2173 double *head_r_true, *tail_r_true;
2174
2175 int alpha_val;
2176 int alpha_flag = 0, beta_flag = 0;
2177 int order_val;
2178 enum blas_order_type order_type = 0;
2179 int prec_val;
2180 enum blas_prec_type prec = 0;
2181 int uplo_val;
2182 enum blas_uplo_type uplo_type = 0;
2183 int trans_val;
2184 enum blas_trans_type trans_type = 0;
2185 int diag_val;
2186 enum blas_diag_type diag_type = 0;
2187
2188 float beta_zero_fake;
2189 float rin_zero_fake;
2190
2191 int saved_seed; /* for saving the original seed */
2192 int count, old_count = -1;
2193 FPU_FIX_DECL;
2194
2195 /* test for bad arguments */
2196 if (n < 0 || ntests < 0)
2197 BLAS_error(fname, 0, 0, NULL);
2198
2199 /* if there is nothing to test, return all zero */
2200 if (n == 0 || ntests == 0) {
2201 *min_ratio = 0.0;
2202 *num_bad_ratio = 0;
2203 *num_tests = 0;
2204 return 0.0;
2205 }
2206
2207 FPU_FIX_START;
2208
2209
2210
2211 incx = inctp = inc_index = 1;
2212
2213
2214
2215
2216 /* get space for calculation */
2217 x = (float *) blas_malloc(3 * n * 2 * sizeof(float));
2218 if (3 * n * 2 > 0 && x == NULL) {
2219 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2220 }
2221 x_gen = (float *) blas_malloc(3 * n * 2 * sizeof(float));
2222 if (3 * n * 2 > 0 && x_gen == NULL) {
2223 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2224 }
2225 tp = (float *) blas_malloc(2 * n * n * n * inctp * sizeof(float));
2226 if (2 * n * n * n * inctp > 0 && tp == NULL) {
2227 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2228 }
2229 temp = (float *) blas_malloc(3 * n * 2 * sizeof(float));
2230 if (3 * n * 2 > 0 && temp == NULL) {
2231 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2232 }
2233 head_r_true = (double *) blas_malloc(n * sizeof(double));
2234 tail_r_true = (double *) blas_malloc(n * sizeof(double));
2235 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
2236 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2237 }
2238 ratios = (double *) blas_malloc(n * sizeof(double));
2239 if (n > 0 && ratios == NULL) {
2240 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2241 }
2242
2243
2244 /* initialization */
2245 saved_seed = *seed;
2246 ratio_min = 1e308;
2247 ratio_max = 0.0;
2248 tot_tests = 0;
2249 p_count = 0;
2250 count = 0;
2251 find_max_ratio = 0;
2252 beta = 0.0;
2253 beta_flag = 1;
2254
2255 beta_zero_fake = 0.0;
2256 rin_zero_fake = 0.0;
2257
2258 if (debug == 3)
2259 find_max_ratio = 1;
2260
2261 /* The debug iteration:
2262 If debug=1, then will execute the iteration twice. First, compute the
2263 max ratio. Second, print info if ratio > (50% * ratio_max). */
2264 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
2265 bad_ratios = 0; /* set to zero */
2266
2267 if ((debug == 3) && (d_count == find_max_ratio))
2268 *seed = saved_seed; /* restore the original seed */
2269
2270 /* varying alpha */
2271 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
2272 alpha_flag = 0;
2273 switch (alpha_val) {
2274 case 0:
2275 alpha = 0.0;
2276 alpha_flag = 1;
2277 break;
2278 case 1:
2279 alpha = 1.0;
2280 alpha_flag = 1;
2281 break;
2282 }
2283
2284
2285 /* varying extra precs */
2286 for (prec_val = 0; prec_val <= 2; prec_val++) {
2287 switch (prec_val) {
2288 case 0:
2289 eps_int = power(2, -BITS_S);
2290 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
2291 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
2292 prec = blas_prec_single;
2293 break;
2294 case 1:
2295 eps_int = power(2, -BITS_D);
2296 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
2297 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
2298 prec = blas_prec_double;
2299 break;
2300 case 2:
2301 default:
2302 eps_int = power(2, -BITS_E);
2303 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
2304 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
2305 prec = blas_prec_extra;
2306 break;
2307 }
2308
2309 /* values near underflow, 1, or overflow */
2310 for (norm = -1; norm <= 1; norm++) {
2311
2312 /* row or col major */
2313 for (order_val = 0; order_val < 2; order_val++) {
2314 switch (order_val) {
2315 case 0:
2316 order_type = blas_rowmajor;
2317 break;
2318 case 1:
2319 order_type = blas_colmajor;
2320 break;
2321 }
2322
2323 /* upper or lower */
2324 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
2325 switch (uplo_val) {
2326 case 0:
2327 uplo_type = blas_upper;
2328 break;
2329 case 1:
2330 uplo_type = blas_lower;
2331 break;
2332 }
2333
2334 /* no trans, trans, or conj trans */
2335 for (trans_val = 0; trans_val < 3; trans_val++) {
2336 switch (trans_val) {
2337 case 0:
2338 trans_type = blas_no_trans;
2339 break;
2340 case 1:
2341 trans_type = blas_trans;
2342 break;
2343 case 2:
2344 trans_type = blas_conj_trans;
2345 break;
2346 }
2347
2348 /* non_unit_diag, unit_diag */
2349 for (diag_val = 0; diag_val < 2; diag_val++) {
2350 switch (diag_val) {
2351 case 0:
2352 diag_type = blas_non_unit_diag;
2353 break;
2354 case 1:
2355 diag_type = blas_unit_diag;
2356 break;
2357 }
2358
2359 /* number of tests */
2360 for (i = 0; i < ntests; i++) {
2361 /* For the sake of speed, we throw out this case at random */
2362 if (xrand(seed) >= test_prob)
2363 continue;
2364
2365 BLAS_stpmv_testgen(norm, order_type, uplo_type,
2366 trans_type, diag_type, n,
2367 &alpha, alpha_flag, tp, x_gen,
2368 seed, head_r_true, tail_r_true);
2369
2370 count++;
2371
2372 /* varying incx */
2373 for (incx_val = -2; incx_val <= 2; incx_val++) {
2374 if (incx_val == 0)
2375 continue;
2376
2377 /* setting incx */
2378 incx = incx_val;
2379
2380
2381 /* set x starting index */
2382 ix = 0;
2383 if (incx < 0)
2384 ix = -(n - 1) * incx;
2385
2386 /* copy x_gen to x */
2387 for (j = 0; j < n; j++) {
2388 x[ix] = x_gen[j * inc_index];
2389 ix += incx;
2390 }
2391
2392 /* call BLAS_stpmv_x */
2393 FPU_FIX_STOP;
2394 BLAS_stpmv_x(order_type, uplo_type, trans_type,
2395 diag_type, n, alpha, tp, x, incx_val,
2396 prec);
2397 FPU_FIX_START;
2398
2399 /* set x starting index */
2400 ix = 0;
2401 if (incx < 0)
2402 ix = -(n - 1) * incx;
2403
2404 /* computing the ratio */
2405 for (j = 0; j < n; j++) {
2406 /* copy row j of tp to temp */
2407 stpmv_copy_row(order_type, uplo_type, trans_type, n,
2408 tp, temp, j);
2409
2410 test_BLAS_sdot(n, blas_no_conj,
2411 alpha, beta_zero_fake, rin_zero_fake,
2412 x[ix],
2413 head_r_true[j * inc_index],
2414 tail_r_true[j * inc_index], temp, 1,
2415 x_gen, 1, eps_int, un_int, &ratios[j]);
2416
2417 /* take the max ratio */
2418 if (j == 0)
2419 ratio = ratios[0];
2420 else if (ratios[j] > ratio)
2421 ratio = ratios[j];
2422
2423 ix += incx;
2424 }
2425
2426 /* increase the number of bad ratio, if the ratio is
2427 bigger than the threshold */
2428 if (ratio > thresh) {
2429 bad_ratios++;
2430
2431 if ((debug == 3) && /* print only when debug is on */
2432 (count != old_count) && /* print if old vector is different
2433 from the current one */
2434 (d_count == find_max_ratio) &&
2435 (p_count <= max_print) &&
2436 (ratio > 0.5 * ratio_max)) {
2437 p_count++;
2438 old_count = count;
2439
2440 printf
2441 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
2442 fname, n, ntests, thresh);
2443
2444 /* Print test info */
2445 switch (prec) {
2446 case blas_prec_single:
2447 printf("single ");
2448 break;
2449 case blas_prec_double:
2450 printf("double ");
2451 break;
2452 case blas_prec_indigenous:
2453 printf("indigenous ");
2454 break;
2455 case blas_prec_extra:
2456 printf("extra ");
2457 break;
2458 }
2459 switch (norm) {
2460 case -1:
2461 printf("near_underflow ");
2462 break;
2463 case 0:
2464 printf("near_one ");
2465 break;
2466 case 1:
2467 printf("near_overflow ");
2468 break;
2469 }
2470 switch (order_type) {
2471 case blas_rowmajor:
2472 printf("row_major ");
2473 break;
2474 case blas_colmajor:
2475 printf("col_major ");
2476 break;
2477 }
2478 switch (uplo_type) {
2479 case blas_upper:
2480 printf("upper ");
2481 break;
2482 case blas_lower:
2483 printf("lower ");
2484 break;
2485 }
2486 switch (trans_type) {
2487 case blas_no_trans:
2488 printf("no_trans ");
2489 break;
2490 case blas_trans:
2491 printf("trans ");
2492 break;
2493 case blas_conj_trans:
2494 printf("conj_trans ");
2495 break;
2496 }
2497 switch (diag_type) {
2498 case blas_non_unit_diag:
2499 printf("non_unit_diag ");
2500 break;
2501 case blas_unit_diag:
2502 printf("unit_diag ");
2503 break;
2504 }
2505
2506 printf(" incx=%d:\n", incx);
2507 ix = 0;
2508 if (incx < 0)
2509 ix = -(n - 1) * incx;
2510
2511 printf(" TP=");
2512 for (j = 0; j < n; j++) {
2513 /* copy row j of tp to temp */
2514 stpmv_copy_row(order_type, uplo_type, trans_type,
2515 n, tp, temp, j);
2516
2517 if (j > 0)
2518 printf(" ");
2519 sprint_vector(temp, n, 1, NULL);
2520 }
2521
2522 ix = 0;
2523 if (incx < 0)
2524 ix = -(n - 1) * incx;
2525
2526 for (j = 0; j < n; j++) {
2527 printf(" ");
2528 printf("x[%d] = ", j * inc_index);
2529 printf("%16.8e", x_gen[j * inc_index]);
2530 printf("; ");
2531 printf("x_final[%d] = ", ix);
2532 printf("%16.8e", x[ix]);
2533 printf("\n");
2534 ix += incx;
2535 }
2536
2537 printf(" ");
2538 printf("alpha = ");
2539 printf("%16.8e", alpha);
2540 printf("; ");
2541 printf("\n");
2542 for (j = 0; j < n; j++) {
2543 printf(" ");
2544
2545 printf("[%24.16e, %24.16e]",
2546 head_r_true[j * inc_index],
2547 tail_r_true[j * inc_index]);
2548 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
2549 }
2550 printf(" ratio=%.4e\n", ratio);
2551 }
2552 }
2553 if (d_count == 0) {
2554 if (ratio > ratio_max)
2555 ratio_max = ratio;
2556 if (ratio != 0.0 && ratio < ratio_min)
2557 ratio_min = ratio;
2558 tot_tests++;
2559 }
2560 } /* incx */
2561
2562 } /* numtests */
2563 } /* diag */
2564 } /* trans */
2565 } /* uplo */
2566 } /* order */
2567 } /* norm */
2568 } /* prec */
2569 } /* alpha */
2570 } /* debug */
2571
2572 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
2573 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
2574 fname, n, ntests, thresh);
2575 printf
2576 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
2577 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
2578 ratio_min, ratio_max);
2579 }
2580
2581 FPU_FIX_STOP;
2582
2583 blas_free(x);
2584 blas_free(x_gen);
2585 blas_free(temp);
2586 blas_free(tp);
2587 blas_free(head_r_true);
2588 blas_free(tail_r_true);
2589 blas_free(ratios);
2590
2591 *min_ratio = ratio_min;
2592 *num_bad_ratio = bad_ratios;
2593 *num_tests = tot_tests;
2594 return ratio_max;
2595 } /* end of do_test_stpmv_x */
2596
do_test_dtpmv_x(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)2597 double do_test_dtpmv_x(int n,
2598 int ntests,
2599 int *seed,
2600 double thresh,
2601 int debug,
2602 float test_prob,
2603 double *min_ratio, int *num_bad_ratio, int *num_tests)
2604
2605 /*
2606 * Purpose
2607 * =======
2608 *
2609 * Runs a series of tests on TPMV.
2610 *
2611 * Arguments
2612 * =========
2613 *
2614 * n (input) int
2615 * The size of vector being tested
2616 *
2617 * ntests (input) int
2618 * The number of tests to run for each set of attributes.
2619 *
2620 * seed (input/output) int
2621 * The seed for the random number generator used in testgen().
2622 *
2623 * thresh (input) double
2624 * When the ratio returned from test() exceeds the specified
2625 * threshold, the current size, r_true, r_comp, and ratio will be
2626 * printed. (Since ratio is supposed to be O(1), we can set thresh
2627 * to ~10.)
2628 *
2629 * debug (input) int
2630 * If debug=3, print summary
2631 * If debug=2, print summary only if the number of bad ratios > 0
2632 * If debug=1, print complete info if tests fail
2633 * If debug=0, return max ratio
2634 *
2635 * min_ratio (output) double
2636 * The minimum ratio
2637 *
2638 * num_bad_ratio (output) int
2639 * The number of tests fail; they are above the threshold.
2640 *
2641 * num_tests (output) int
2642 * The number of tests is being performed.
2643 *
2644 * Return value
2645 * ============
2646 *
2647 * The maximum ratio if run successfully, otherwise return -1
2648 *
2649 * Code structure
2650 * ==============
2651 *
2652 * debug loop -- if debug is one, the first loop computes the max ratio
2653 * -- and the last(second) loop outputs debugging information,
2654 * -- if the test fail and its ratio > 0.5 * max ratio.
2655 * -- if debug is zero, the loop is executed once
2656 * alpha loop -- varying alpha: 0, 1, or random
2657 * prec loop -- varying internal prec: single, double, or extra
2658 * norm loop -- varying norm: near undeflow, near one, or
2659 * -- near overflow
2660 * order loop -- varying order type: rowmajor or colmajor
2661 * uplo loop -- varying uplo type: upper or lower
2662 * trans loop -- varying trans type: no trans, trans, and conj trans
2663 * diag loop -- varying diag type: non unit, and unit
2664 * numtest loop -- how many times the test is perform with
2665 * -- above set of attributes
2666 * incx loop -- varying incx: -2, -1, 1, 2
2667 */
2668 {
2669 /* function name */
2670 const char fname[] = "BLAS_dtpmv_x";
2671
2672 /* max number of debug lines to print */
2673 const int max_print = 3;
2674
2675 /* Variables in the "x_val" form are loop vars for corresponding
2676 variables */
2677 int i; /* iterate through the repeating tests */
2678 int j; /* multipurpose counters */
2679 int ix; /* use to index x */
2680 int incx_val, incx; /* for testing different inc values */
2681 int inctp, inc_index; /* 1 if real, 2 if complex */
2682 int d_count; /* counter for debug */
2683 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
2684 int p_count; /* counter for the number of debug lines printed */
2685 int tot_tests; /* total number of tests to be done */
2686 int norm; /* input values of near underflow/one/overflow */
2687 double ratio_max; /* the current maximum ratio */
2688 double ratio_min; /* the current minimum ratio */
2689 double *ratios; /* a temporary variable for calculating ratio */
2690 double ratio = 0.0; /* the per-use test ratio from test() */
2691 int bad_ratios = 0; /* the number of ratios over the threshold */
2692 double eps_int; /* the internal epsilon expected--2^(-24) for float */
2693 double un_int; /* the internal underflow threshold */
2694 double alpha;
2695 double beta;
2696 double *tp;
2697 double *x;
2698 double *x_gen; /* used to store vectors generated by testgen; eventually copied to x */
2699 double *temp; /* use for calculating ratio */
2700
2701 /* the true r calculated by testgen(), in double-double */
2702 double *head_r_true, *tail_r_true;
2703
2704 int alpha_val;
2705 int alpha_flag = 0, beta_flag = 0;
2706 int order_val;
2707 enum blas_order_type order_type = 0;
2708 int prec_val;
2709 enum blas_prec_type prec = 0;
2710 int uplo_val;
2711 enum blas_uplo_type uplo_type = 0;
2712 int trans_val;
2713 enum blas_trans_type trans_type = 0;
2714 int diag_val;
2715 enum blas_diag_type diag_type = 0;
2716
2717 double beta_zero_fake;
2718 double rin_zero_fake;
2719
2720 int saved_seed; /* for saving the original seed */
2721 int count, old_count = -1;
2722 FPU_FIX_DECL;
2723
2724 /* test for bad arguments */
2725 if (n < 0 || ntests < 0)
2726 BLAS_error(fname, 0, 0, NULL);
2727
2728 /* if there is nothing to test, return all zero */
2729 if (n == 0 || ntests == 0) {
2730 *min_ratio = 0.0;
2731 *num_bad_ratio = 0;
2732 *num_tests = 0;
2733 return 0.0;
2734 }
2735
2736 FPU_FIX_START;
2737
2738
2739
2740 incx = inctp = inc_index = 1;
2741
2742
2743
2744
2745 /* get space for calculation */
2746 x = (double *) blas_malloc(3 * n * 2 * sizeof(double));
2747 if (3 * n * 2 > 0 && x == NULL) {
2748 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2749 }
2750 x_gen = (double *) blas_malloc(3 * n * 2 * sizeof(double));
2751 if (3 * n * 2 > 0 && x_gen == NULL) {
2752 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2753 }
2754 tp = (double *) blas_malloc(2 * n * n * n * inctp * sizeof(double));
2755 if (2 * n * n * n * inctp > 0 && tp == NULL) {
2756 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2757 }
2758 temp = (double *) blas_malloc(3 * n * 2 * sizeof(double));
2759 if (3 * n * 2 > 0 && temp == NULL) {
2760 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2761 }
2762 head_r_true = (double *) blas_malloc(n * sizeof(double));
2763 tail_r_true = (double *) blas_malloc(n * sizeof(double));
2764 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
2765 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2766 }
2767 ratios = (double *) blas_malloc(n * sizeof(double));
2768 if (n > 0 && ratios == NULL) {
2769 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2770 }
2771
2772
2773 /* initialization */
2774 saved_seed = *seed;
2775 ratio_min = 1e308;
2776 ratio_max = 0.0;
2777 tot_tests = 0;
2778 p_count = 0;
2779 count = 0;
2780 find_max_ratio = 0;
2781 beta = 0.0;
2782 beta_flag = 1;
2783
2784 beta_zero_fake = 0.0;
2785 rin_zero_fake = 0.0;
2786
2787 if (debug == 3)
2788 find_max_ratio = 1;
2789
2790 /* The debug iteration:
2791 If debug=1, then will execute the iteration twice. First, compute the
2792 max ratio. Second, print info if ratio > (50% * ratio_max). */
2793 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
2794 bad_ratios = 0; /* set to zero */
2795
2796 if ((debug == 3) && (d_count == find_max_ratio))
2797 *seed = saved_seed; /* restore the original seed */
2798
2799 /* varying alpha */
2800 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
2801 alpha_flag = 0;
2802 switch (alpha_val) {
2803 case 0:
2804 alpha = 0.0;
2805 alpha_flag = 1;
2806 break;
2807 case 1:
2808 alpha = 1.0;
2809 alpha_flag = 1;
2810 break;
2811 }
2812
2813
2814 /* varying extra precs */
2815 for (prec_val = 0; prec_val <= 2; prec_val++) {
2816 switch (prec_val) {
2817 case 0:
2818 eps_int = power(2, -BITS_D);
2819 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
2820 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
2821 prec = blas_prec_double;
2822 break;
2823 case 1:
2824 eps_int = power(2, -BITS_D);
2825 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
2826 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
2827 prec = blas_prec_double;
2828 break;
2829 case 2:
2830 default:
2831 eps_int = power(2, -BITS_E);
2832 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
2833 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
2834 prec = blas_prec_extra;
2835 break;
2836 }
2837
2838 /* values near underflow, 1, or overflow */
2839 for (norm = -1; norm <= 1; norm++) {
2840
2841 /* row or col major */
2842 for (order_val = 0; order_val < 2; order_val++) {
2843 switch (order_val) {
2844 case 0:
2845 order_type = blas_rowmajor;
2846 break;
2847 case 1:
2848 order_type = blas_colmajor;
2849 break;
2850 }
2851
2852 /* upper or lower */
2853 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
2854 switch (uplo_val) {
2855 case 0:
2856 uplo_type = blas_upper;
2857 break;
2858 case 1:
2859 uplo_type = blas_lower;
2860 break;
2861 }
2862
2863 /* no trans, trans, or conj trans */
2864 for (trans_val = 0; trans_val < 3; trans_val++) {
2865 switch (trans_val) {
2866 case 0:
2867 trans_type = blas_no_trans;
2868 break;
2869 case 1:
2870 trans_type = blas_trans;
2871 break;
2872 case 2:
2873 trans_type = blas_conj_trans;
2874 break;
2875 }
2876
2877 /* non_unit_diag, unit_diag */
2878 for (diag_val = 0; diag_val < 2; diag_val++) {
2879 switch (diag_val) {
2880 case 0:
2881 diag_type = blas_non_unit_diag;
2882 break;
2883 case 1:
2884 diag_type = blas_unit_diag;
2885 break;
2886 }
2887
2888 /* number of tests */
2889 for (i = 0; i < ntests; i++) {
2890 /* For the sake of speed, we throw out this case at random */
2891 if (xrand(seed) >= test_prob)
2892 continue;
2893
2894 BLAS_dtpmv_testgen(norm, order_type, uplo_type,
2895 trans_type, diag_type, n,
2896 &alpha, alpha_flag, tp, x_gen,
2897 seed, head_r_true, tail_r_true);
2898
2899 count++;
2900
2901 /* varying incx */
2902 for (incx_val = -2; incx_val <= 2; incx_val++) {
2903 if (incx_val == 0)
2904 continue;
2905
2906 /* setting incx */
2907 incx = incx_val;
2908
2909
2910 /* set x starting index */
2911 ix = 0;
2912 if (incx < 0)
2913 ix = -(n - 1) * incx;
2914
2915 /* copy x_gen to x */
2916 for (j = 0; j < n; j++) {
2917 x[ix] = x_gen[j * inc_index];
2918 ix += incx;
2919 }
2920
2921 /* call BLAS_dtpmv_x */
2922 FPU_FIX_STOP;
2923 BLAS_dtpmv_x(order_type, uplo_type, trans_type,
2924 diag_type, n, alpha, tp, x, incx_val,
2925 prec);
2926 FPU_FIX_START;
2927
2928 /* set x starting index */
2929 ix = 0;
2930 if (incx < 0)
2931 ix = -(n - 1) * incx;
2932
2933 /* computing the ratio */
2934 for (j = 0; j < n; j++) {
2935 /* copy row j of tp to temp */
2936 dtpmv_copy_row(order_type, uplo_type, trans_type, n,
2937 tp, temp, j);
2938
2939 test_BLAS_ddot(n, blas_no_conj,
2940 alpha, beta_zero_fake, rin_zero_fake,
2941 x[ix],
2942 head_r_true[j * inc_index],
2943 tail_r_true[j * inc_index], temp, 1,
2944 x_gen, 1, eps_int, un_int, &ratios[j]);
2945
2946 /* take the max ratio */
2947 if (j == 0)
2948 ratio = ratios[0];
2949 else if (ratios[j] > ratio)
2950 ratio = ratios[j];
2951
2952 ix += incx;
2953 }
2954
2955 /* increase the number of bad ratio, if the ratio is
2956 bigger than the threshold */
2957 if (ratio > thresh) {
2958 bad_ratios++;
2959
2960 if ((debug == 3) && /* print only when debug is on */
2961 (count != old_count) && /* print if old vector is different
2962 from the current one */
2963 (d_count == find_max_ratio) &&
2964 (p_count <= max_print) &&
2965 (ratio > 0.5 * ratio_max)) {
2966 p_count++;
2967 old_count = count;
2968
2969 printf
2970 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
2971 fname, n, ntests, thresh);
2972
2973 /* Print test info */
2974 switch (prec) {
2975 case blas_prec_single:
2976 printf("single ");
2977 break;
2978 case blas_prec_double:
2979 printf("double ");
2980 break;
2981 case blas_prec_indigenous:
2982 printf("indigenous ");
2983 break;
2984 case blas_prec_extra:
2985 printf("extra ");
2986 break;
2987 }
2988 switch (norm) {
2989 case -1:
2990 printf("near_underflow ");
2991 break;
2992 case 0:
2993 printf("near_one ");
2994 break;
2995 case 1:
2996 printf("near_overflow ");
2997 break;
2998 }
2999 switch (order_type) {
3000 case blas_rowmajor:
3001 printf("row_major ");
3002 break;
3003 case blas_colmajor:
3004 printf("col_major ");
3005 break;
3006 }
3007 switch (uplo_type) {
3008 case blas_upper:
3009 printf("upper ");
3010 break;
3011 case blas_lower:
3012 printf("lower ");
3013 break;
3014 }
3015 switch (trans_type) {
3016 case blas_no_trans:
3017 printf("no_trans ");
3018 break;
3019 case blas_trans:
3020 printf("trans ");
3021 break;
3022 case blas_conj_trans:
3023 printf("conj_trans ");
3024 break;
3025 }
3026 switch (diag_type) {
3027 case blas_non_unit_diag:
3028 printf("non_unit_diag ");
3029 break;
3030 case blas_unit_diag:
3031 printf("unit_diag ");
3032 break;
3033 }
3034
3035 printf(" incx=%d:\n", incx);
3036 ix = 0;
3037 if (incx < 0)
3038 ix = -(n - 1) * incx;
3039
3040 printf(" TP=");
3041 for (j = 0; j < n; j++) {
3042 /* copy row j of tp to temp */
3043 dtpmv_copy_row(order_type, uplo_type, trans_type,
3044 n, tp, temp, j);
3045
3046 if (j > 0)
3047 printf(" ");
3048 dprint_vector(temp, n, 1, NULL);
3049 }
3050
3051 ix = 0;
3052 if (incx < 0)
3053 ix = -(n - 1) * incx;
3054
3055 for (j = 0; j < n; j++) {
3056 printf(" ");
3057 printf("x[%d] = ", j * inc_index);
3058 printf("%24.16e", x_gen[j * inc_index]);
3059 printf("; ");
3060 printf("x_final[%d] = ", ix);
3061 printf("%24.16e", x[ix]);
3062 printf("\n");
3063 ix += incx;
3064 }
3065
3066 printf(" ");
3067 printf("alpha = ");
3068 printf("%24.16e", alpha);
3069 printf("; ");
3070 printf("\n");
3071 for (j = 0; j < n; j++) {
3072 printf(" ");
3073
3074 printf("[%24.16e, %24.16e]",
3075 head_r_true[j * inc_index],
3076 tail_r_true[j * inc_index]);
3077 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
3078 }
3079 printf(" ratio=%.4e\n", ratio);
3080 }
3081 }
3082 if (d_count == 0) {
3083 if (ratio > ratio_max)
3084 ratio_max = ratio;
3085 if (ratio != 0.0 && ratio < ratio_min)
3086 ratio_min = ratio;
3087 tot_tests++;
3088 }
3089 } /* incx */
3090
3091 } /* numtests */
3092 } /* diag */
3093 } /* trans */
3094 } /* uplo */
3095 } /* order */
3096 } /* norm */
3097 } /* prec */
3098 } /* alpha */
3099 } /* debug */
3100
3101 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
3102 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
3103 fname, n, ntests, thresh);
3104 printf
3105 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
3106 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
3107 ratio_min, ratio_max);
3108 }
3109
3110 FPU_FIX_STOP;
3111
3112 blas_free(x);
3113 blas_free(x_gen);
3114 blas_free(temp);
3115 blas_free(tp);
3116 blas_free(head_r_true);
3117 blas_free(tail_r_true);
3118 blas_free(ratios);
3119
3120 *min_ratio = ratio_min;
3121 *num_bad_ratio = bad_ratios;
3122 *num_tests = tot_tests;
3123 return ratio_max;
3124 } /* end of do_test_dtpmv_x */
3125
do_test_ctpmv_x(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)3126 double do_test_ctpmv_x(int n,
3127 int ntests,
3128 int *seed,
3129 double thresh,
3130 int debug,
3131 float test_prob,
3132 double *min_ratio, int *num_bad_ratio, int *num_tests)
3133
3134 /*
3135 * Purpose
3136 * =======
3137 *
3138 * Runs a series of tests on TPMV.
3139 *
3140 * Arguments
3141 * =========
3142 *
3143 * n (input) int
3144 * The size of vector being tested
3145 *
3146 * ntests (input) int
3147 * The number of tests to run for each set of attributes.
3148 *
3149 * seed (input/output) int
3150 * The seed for the random number generator used in testgen().
3151 *
3152 * thresh (input) double
3153 * When the ratio returned from test() exceeds the specified
3154 * threshold, the current size, r_true, r_comp, and ratio will be
3155 * printed. (Since ratio is supposed to be O(1), we can set thresh
3156 * to ~10.)
3157 *
3158 * debug (input) int
3159 * If debug=3, print summary
3160 * If debug=2, print summary only if the number of bad ratios > 0
3161 * If debug=1, print complete info if tests fail
3162 * If debug=0, return max ratio
3163 *
3164 * min_ratio (output) double
3165 * The minimum ratio
3166 *
3167 * num_bad_ratio (output) int
3168 * The number of tests fail; they are above the threshold.
3169 *
3170 * num_tests (output) int
3171 * The number of tests is being performed.
3172 *
3173 * Return value
3174 * ============
3175 *
3176 * The maximum ratio if run successfully, otherwise return -1
3177 *
3178 * Code structure
3179 * ==============
3180 *
3181 * debug loop -- if debug is one, the first loop computes the max ratio
3182 * -- and the last(second) loop outputs debugging information,
3183 * -- if the test fail and its ratio > 0.5 * max ratio.
3184 * -- if debug is zero, the loop is executed once
3185 * alpha loop -- varying alpha: 0, 1, or random
3186 * prec loop -- varying internal prec: single, double, or extra
3187 * norm loop -- varying norm: near undeflow, near one, or
3188 * -- near overflow
3189 * order loop -- varying order type: rowmajor or colmajor
3190 * uplo loop -- varying uplo type: upper or lower
3191 * trans loop -- varying trans type: no trans, trans, and conj trans
3192 * diag loop -- varying diag type: non unit, and unit
3193 * numtest loop -- how many times the test is perform with
3194 * -- above set of attributes
3195 * incx loop -- varying incx: -2, -1, 1, 2
3196 */
3197 {
3198 /* function name */
3199 const char fname[] = "BLAS_ctpmv_x";
3200
3201 /* max number of debug lines to print */
3202 const int max_print = 3;
3203
3204 /* Variables in the "x_val" form are loop vars for corresponding
3205 variables */
3206 int i; /* iterate through the repeating tests */
3207 int j; /* multipurpose counters */
3208 int ix; /* use to index x */
3209 int incx_val, incx; /* for testing different inc values */
3210 int inctp, inc_index; /* 1 if real, 2 if complex */
3211 int d_count; /* counter for debug */
3212 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
3213 int p_count; /* counter for the number of debug lines printed */
3214 int tot_tests; /* total number of tests to be done */
3215 int norm; /* input values of near underflow/one/overflow */
3216 double ratio_max; /* the current maximum ratio */
3217 double ratio_min; /* the current minimum ratio */
3218 double *ratios; /* a temporary variable for calculating ratio */
3219 double ratio = 0.0; /* the per-use test ratio from test() */
3220 int bad_ratios = 0; /* the number of ratios over the threshold */
3221 double eps_int; /* the internal epsilon expected--2^(-24) for float */
3222 double un_int; /* the internal underflow threshold */
3223 float alpha[2];
3224 float beta[2];
3225 float *tp;
3226 float *x;
3227 float *x_gen; /* used to store vectors generated by testgen; eventually copied to x */
3228 float *temp; /* use for calculating ratio */
3229
3230 /* the true r calculated by testgen(), in double-double */
3231 double *head_r_true, *tail_r_true;
3232
3233
3234 int alpha_val;
3235 int alpha_flag = 0, beta_flag = 0;
3236 int order_val;
3237 enum blas_order_type order_type = 0;
3238 int prec_val;
3239 enum blas_prec_type prec = 0;
3240 int uplo_val;
3241 enum blas_uplo_type uplo_type = 0;
3242 int trans_val;
3243 enum blas_trans_type trans_type = 0;
3244 int diag_val;
3245 enum blas_diag_type diag_type = 0;
3246
3247 float beta_zero_fake[2];
3248 float rin_zero_fake[2];
3249
3250 int saved_seed; /* for saving the original seed */
3251 int count, old_count = -1;
3252 FPU_FIX_DECL;
3253
3254 /* test for bad arguments */
3255 if (n < 0 || ntests < 0)
3256 BLAS_error(fname, 0, 0, NULL);
3257
3258 /* if there is nothing to test, return all zero */
3259 if (n == 0 || ntests == 0) {
3260 *min_ratio = 0.0;
3261 *num_bad_ratio = 0;
3262 *num_tests = 0;
3263 return 0.0;
3264 }
3265
3266 FPU_FIX_START;
3267
3268
3269
3270 incx = inctp = inc_index = 1;
3271 incx *= 2;
3272 inctp *= 2;
3273 inc_index *= 2;
3274
3275 /* get space for calculation */
3276 x = (float *) blas_malloc(3 * n * 2 * sizeof(float) * 2);
3277 if (3 * n * 2 > 0 && x == NULL) {
3278 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3279 }
3280 x_gen = (float *) blas_malloc(3 * n * 2 * sizeof(float) * 2);
3281 if (3 * n * 2 > 0 && x_gen == NULL) {
3282 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3283 }
3284 tp = (float *) blas_malloc(2 * n * n * n * inctp * sizeof(float) * 2);
3285 if (2 * n * n * n * inctp > 0 && tp == NULL) {
3286 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3287 }
3288 temp = (float *) blas_malloc(3 * n * 2 * sizeof(float) * 2);
3289 if (3 * n * 2 > 0 && temp == NULL) {
3290 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3291 }
3292 head_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
3293 tail_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
3294 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
3295 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3296 }
3297 ratios = (double *) blas_malloc(n * sizeof(double));
3298 if (n > 0 && ratios == NULL) {
3299 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3300 }
3301
3302
3303 /* initialization */
3304 saved_seed = *seed;
3305 ratio_min = 1e308;
3306 ratio_max = 0.0;
3307 tot_tests = 0;
3308 p_count = 0;
3309 count = 0;
3310 find_max_ratio = 0;
3311 beta[0] = beta[1] = 0.0;
3312 beta_flag = 1;
3313
3314 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
3315 rin_zero_fake[0] = rin_zero_fake[1] = 0.0;
3316
3317 if (debug == 3)
3318 find_max_ratio = 1;
3319
3320 /* The debug iteration:
3321 If debug=1, then will execute the iteration twice. First, compute the
3322 max ratio. Second, print info if ratio > (50% * ratio_max). */
3323 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
3324 bad_ratios = 0; /* set to zero */
3325
3326 if ((debug == 3) && (d_count == find_max_ratio))
3327 *seed = saved_seed; /* restore the original seed */
3328
3329 /* varying alpha */
3330 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
3331 alpha_flag = 0;
3332 switch (alpha_val) {
3333 case 0:
3334 alpha[0] = alpha[1] = 0.0;
3335 alpha_flag = 1;
3336 break;
3337 case 1:
3338 alpha[0] = 1.0;
3339 alpha[1] = 0.0;
3340 alpha_flag = 1;
3341 break;
3342 }
3343
3344
3345 /* varying extra precs */
3346 for (prec_val = 0; prec_val <= 2; prec_val++) {
3347 switch (prec_val) {
3348 case 0:
3349 eps_int = power(2, -BITS_S);
3350 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
3351 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
3352 prec = blas_prec_single;
3353 break;
3354 case 1:
3355 eps_int = power(2, -BITS_D);
3356 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
3357 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
3358 prec = blas_prec_double;
3359 break;
3360 case 2:
3361 default:
3362 eps_int = power(2, -BITS_E);
3363 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
3364 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
3365 prec = blas_prec_extra;
3366 break;
3367 }
3368
3369 /* values near underflow, 1, or overflow */
3370 for (norm = -1; norm <= 1; norm++) {
3371
3372 /* row or col major */
3373 for (order_val = 0; order_val < 2; order_val++) {
3374 switch (order_val) {
3375 case 0:
3376 order_type = blas_rowmajor;
3377 break;
3378 case 1:
3379 order_type = blas_colmajor;
3380 break;
3381 }
3382
3383 /* upper or lower */
3384 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
3385 switch (uplo_val) {
3386 case 0:
3387 uplo_type = blas_upper;
3388 break;
3389 case 1:
3390 uplo_type = blas_lower;
3391 break;
3392 }
3393
3394 /* no trans, trans, or conj trans */
3395 for (trans_val = 0; trans_val < 3; trans_val++) {
3396 switch (trans_val) {
3397 case 0:
3398 trans_type = blas_no_trans;
3399 break;
3400 case 1:
3401 trans_type = blas_trans;
3402 break;
3403 case 2:
3404 trans_type = blas_conj_trans;
3405 break;
3406 }
3407
3408 /* non_unit_diag, unit_diag */
3409 for (diag_val = 0; diag_val < 2; diag_val++) {
3410 switch (diag_val) {
3411 case 0:
3412 diag_type = blas_non_unit_diag;
3413 break;
3414 case 1:
3415 diag_type = blas_unit_diag;
3416 break;
3417 }
3418
3419 /* number of tests */
3420 for (i = 0; i < ntests; i++) {
3421 /* For the sake of speed, we throw out this case at random */
3422 if (xrand(seed) >= test_prob)
3423 continue;
3424
3425 BLAS_ctpmv_testgen(norm, order_type, uplo_type,
3426 trans_type, diag_type, n,
3427 &alpha, alpha_flag, tp, x_gen,
3428 seed, head_r_true, tail_r_true);
3429
3430 count++;
3431
3432 /* varying incx */
3433 for (incx_val = -2; incx_val <= 2; incx_val++) {
3434 if (incx_val == 0)
3435 continue;
3436
3437 /* setting incx */
3438 incx = incx_val;
3439 incx *= 2;
3440
3441 /* set x starting index */
3442 ix = 0;
3443 if (incx < 0)
3444 ix = -(n - 1) * incx;
3445
3446 /* copy x_gen to x */
3447 for (j = 0; j < n; j++) {
3448 x[ix] = x_gen[j * inc_index];
3449 x[ix + 1] = x_gen[j * inc_index + 1];
3450 ix += incx;
3451 }
3452
3453 /* call BLAS_ctpmv_x */
3454 FPU_FIX_STOP;
3455 BLAS_ctpmv_x(order_type, uplo_type, trans_type,
3456 diag_type, n, alpha, tp, x, incx_val,
3457 prec);
3458 FPU_FIX_START;
3459
3460 /* set x starting index */
3461 ix = 0;
3462 if (incx < 0)
3463 ix = -(n - 1) * incx;
3464
3465 /* computing the ratio */
3466 for (j = 0; j < n; j++) {
3467 /* copy row j of tp to temp */
3468 ctpmv_copy_row(order_type, uplo_type, trans_type, n,
3469 tp, temp, j);
3470
3471 test_BLAS_cdot(n, blas_no_conj,
3472 alpha, beta_zero_fake, rin_zero_fake,
3473 &x[ix],
3474 &head_r_true[j * inc_index],
3475 &tail_r_true[j * inc_index], temp, 1,
3476 x_gen, 1, eps_int, un_int, &ratios[j]);
3477
3478 /* take the max ratio */
3479 if (j == 0)
3480 ratio = ratios[0];
3481 else if (ratios[j] > ratio)
3482 ratio = ratios[j];
3483
3484 ix += incx;
3485 }
3486
3487 /* increase the number of bad ratio, if the ratio is
3488 bigger than the threshold */
3489 if (ratio > thresh) {
3490 bad_ratios++;
3491
3492 if ((debug == 3) && /* print only when debug is on */
3493 (count != old_count) && /* print if old vector is different
3494 from the current one */
3495 (d_count == find_max_ratio) &&
3496 (p_count <= max_print) &&
3497 (ratio > 0.5 * ratio_max)) {
3498 p_count++;
3499 old_count = count;
3500
3501 printf
3502 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
3503 fname, n, ntests, thresh);
3504
3505 /* Print test info */
3506 switch (prec) {
3507 case blas_prec_single:
3508 printf("single ");
3509 break;
3510 case blas_prec_double:
3511 printf("double ");
3512 break;
3513 case blas_prec_indigenous:
3514 printf("indigenous ");
3515 break;
3516 case blas_prec_extra:
3517 printf("extra ");
3518 break;
3519 }
3520 switch (norm) {
3521 case -1:
3522 printf("near_underflow ");
3523 break;
3524 case 0:
3525 printf("near_one ");
3526 break;
3527 case 1:
3528 printf("near_overflow ");
3529 break;
3530 }
3531 switch (order_type) {
3532 case blas_rowmajor:
3533 printf("row_major ");
3534 break;
3535 case blas_colmajor:
3536 printf("col_major ");
3537 break;
3538 }
3539 switch (uplo_type) {
3540 case blas_upper:
3541 printf("upper ");
3542 break;
3543 case blas_lower:
3544 printf("lower ");
3545 break;
3546 }
3547 switch (trans_type) {
3548 case blas_no_trans:
3549 printf("no_trans ");
3550 break;
3551 case blas_trans:
3552 printf("trans ");
3553 break;
3554 case blas_conj_trans:
3555 printf("conj_trans ");
3556 break;
3557 }
3558 switch (diag_type) {
3559 case blas_non_unit_diag:
3560 printf("non_unit_diag ");
3561 break;
3562 case blas_unit_diag:
3563 printf("unit_diag ");
3564 break;
3565 }
3566
3567 printf(" incx=%d:\n", incx);
3568 ix = 0;
3569 if (incx < 0)
3570 ix = -(n - 1) * incx;
3571
3572 printf(" TP=");
3573 for (j = 0; j < n; j++) {
3574 /* copy row j of tp to temp */
3575 ctpmv_copy_row(order_type, uplo_type, trans_type,
3576 n, tp, temp, j);
3577
3578 if (j > 0)
3579 printf(" ");
3580 cprint_vector(temp, n, 1, NULL);
3581 }
3582
3583 ix = 0;
3584 if (incx < 0)
3585 ix = -(n - 1) * incx;
3586
3587 for (j = 0; j < n; j++) {
3588 printf(" ");
3589 printf("x[%d] = ", j * inc_index);
3590 printf("(%16.8e, %16.8e)", x_gen[j * inc_index],
3591 x_gen[j * inc_index + 1]);
3592 printf("; ");
3593 printf("x_final[%d] = ", ix);
3594 printf("(%16.8e, %16.8e)", x[ix], x[ix + 1]);
3595 printf("\n");
3596 ix += incx;
3597 }
3598
3599 printf(" ");
3600 printf("alpha = ");
3601 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
3602 printf("; ");
3603 printf("\n");
3604 for (j = 0; j < n; j++) {
3605 printf(" ");
3606
3607 printf("([%24.16e %24.16e], [%24.16e %24.16e])",
3608 head_r_true[j * inc_index],
3609 tail_r_true[j * inc_index],
3610 head_r_true[j * inc_index + 1],
3611 tail_r_true[j * inc_index + 1]);
3612 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
3613 }
3614 printf(" ratio=%.4e\n", ratio);
3615 }
3616 }
3617 if (d_count == 0) {
3618 if (ratio > ratio_max)
3619 ratio_max = ratio;
3620 if (ratio != 0.0 && ratio < ratio_min)
3621 ratio_min = ratio;
3622 tot_tests++;
3623 }
3624 } /* incx */
3625
3626 } /* numtests */
3627 } /* diag */
3628 } /* trans */
3629 } /* uplo */
3630 } /* order */
3631 } /* norm */
3632 } /* prec */
3633 } /* alpha */
3634 } /* debug */
3635
3636 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
3637 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
3638 fname, n, ntests, thresh);
3639 printf
3640 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
3641 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
3642 ratio_min, ratio_max);
3643 }
3644
3645 FPU_FIX_STOP;
3646
3647 blas_free(x);
3648 blas_free(x_gen);
3649 blas_free(temp);
3650 blas_free(tp);
3651 blas_free(head_r_true);
3652 blas_free(tail_r_true);
3653 blas_free(ratios);
3654
3655 *min_ratio = ratio_min;
3656 *num_bad_ratio = bad_ratios;
3657 *num_tests = tot_tests;
3658 return ratio_max;
3659 } /* end of do_test_ctpmv_x */
3660
do_test_ztpmv_x(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)3661 double do_test_ztpmv_x(int n,
3662 int ntests,
3663 int *seed,
3664 double thresh,
3665 int debug,
3666 float test_prob,
3667 double *min_ratio, int *num_bad_ratio, int *num_tests)
3668
3669 /*
3670 * Purpose
3671 * =======
3672 *
3673 * Runs a series of tests on TPMV.
3674 *
3675 * Arguments
3676 * =========
3677 *
3678 * n (input) int
3679 * The size of vector being tested
3680 *
3681 * ntests (input) int
3682 * The number of tests to run for each set of attributes.
3683 *
3684 * seed (input/output) int
3685 * The seed for the random number generator used in testgen().
3686 *
3687 * thresh (input) double
3688 * When the ratio returned from test() exceeds the specified
3689 * threshold, the current size, r_true, r_comp, and ratio will be
3690 * printed. (Since ratio is supposed to be O(1), we can set thresh
3691 * to ~10.)
3692 *
3693 * debug (input) int
3694 * If debug=3, print summary
3695 * If debug=2, print summary only if the number of bad ratios > 0
3696 * If debug=1, print complete info if tests fail
3697 * If debug=0, return max ratio
3698 *
3699 * min_ratio (output) double
3700 * The minimum ratio
3701 *
3702 * num_bad_ratio (output) int
3703 * The number of tests fail; they are above the threshold.
3704 *
3705 * num_tests (output) int
3706 * The number of tests is being performed.
3707 *
3708 * Return value
3709 * ============
3710 *
3711 * The maximum ratio if run successfully, otherwise return -1
3712 *
3713 * Code structure
3714 * ==============
3715 *
3716 * debug loop -- if debug is one, the first loop computes the max ratio
3717 * -- and the last(second) loop outputs debugging information,
3718 * -- if the test fail and its ratio > 0.5 * max ratio.
3719 * -- if debug is zero, the loop is executed once
3720 * alpha loop -- varying alpha: 0, 1, or random
3721 * prec loop -- varying internal prec: single, double, or extra
3722 * norm loop -- varying norm: near undeflow, near one, or
3723 * -- near overflow
3724 * order loop -- varying order type: rowmajor or colmajor
3725 * uplo loop -- varying uplo type: upper or lower
3726 * trans loop -- varying trans type: no trans, trans, and conj trans
3727 * diag loop -- varying diag type: non unit, and unit
3728 * numtest loop -- how many times the test is perform with
3729 * -- above set of attributes
3730 * incx loop -- varying incx: -2, -1, 1, 2
3731 */
3732 {
3733 /* function name */
3734 const char fname[] = "BLAS_ztpmv_x";
3735
3736 /* max number of debug lines to print */
3737 const int max_print = 3;
3738
3739 /* Variables in the "x_val" form are loop vars for corresponding
3740 variables */
3741 int i; /* iterate through the repeating tests */
3742 int j; /* multipurpose counters */
3743 int ix; /* use to index x */
3744 int incx_val, incx; /* for testing different inc values */
3745 int inctp, inc_index; /* 1 if real, 2 if complex */
3746 int d_count; /* counter for debug */
3747 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
3748 int p_count; /* counter for the number of debug lines printed */
3749 int tot_tests; /* total number of tests to be done */
3750 int norm; /* input values of near underflow/one/overflow */
3751 double ratio_max; /* the current maximum ratio */
3752 double ratio_min; /* the current minimum ratio */
3753 double *ratios; /* a temporary variable for calculating ratio */
3754 double ratio = 0.0; /* the per-use test ratio from test() */
3755 int bad_ratios = 0; /* the number of ratios over the threshold */
3756 double eps_int; /* the internal epsilon expected--2^(-24) for float */
3757 double un_int; /* the internal underflow threshold */
3758 double alpha[2];
3759 double beta[2];
3760 double *tp;
3761 double *x;
3762 double *x_gen; /* used to store vectors generated by testgen; eventually copied to x */
3763 double *temp; /* use for calculating ratio */
3764
3765 /* the true r calculated by testgen(), in double-double */
3766 double *head_r_true, *tail_r_true;
3767
3768
3769 int alpha_val;
3770 int alpha_flag = 0, beta_flag = 0;
3771 int order_val;
3772 enum blas_order_type order_type = 0;
3773 int prec_val;
3774 enum blas_prec_type prec = 0;
3775 int uplo_val;
3776 enum blas_uplo_type uplo_type = 0;
3777 int trans_val;
3778 enum blas_trans_type trans_type = 0;
3779 int diag_val;
3780 enum blas_diag_type diag_type = 0;
3781
3782 double beta_zero_fake[2];
3783 double rin_zero_fake[2];
3784
3785 int saved_seed; /* for saving the original seed */
3786 int count, old_count = -1;
3787 FPU_FIX_DECL;
3788
3789 /* test for bad arguments */
3790 if (n < 0 || ntests < 0)
3791 BLAS_error(fname, 0, 0, NULL);
3792
3793 /* if there is nothing to test, return all zero */
3794 if (n == 0 || ntests == 0) {
3795 *min_ratio = 0.0;
3796 *num_bad_ratio = 0;
3797 *num_tests = 0;
3798 return 0.0;
3799 }
3800
3801 FPU_FIX_START;
3802
3803
3804
3805 incx = inctp = inc_index = 1;
3806 incx *= 2;
3807 inctp *= 2;
3808 inc_index *= 2;
3809
3810 /* get space for calculation */
3811 x = (double *) blas_malloc(3 * n * 2 * sizeof(double) * 2);
3812 if (3 * n * 2 > 0 && x == NULL) {
3813 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3814 }
3815 x_gen = (double *) blas_malloc(3 * n * 2 * sizeof(double) * 2);
3816 if (3 * n * 2 > 0 && x_gen == NULL) {
3817 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3818 }
3819 tp = (double *) blas_malloc(2 * n * n * n * inctp * sizeof(double) * 2);
3820 if (2 * n * n * n * inctp > 0 && tp == NULL) {
3821 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3822 }
3823 temp = (double *) blas_malloc(3 * n * 2 * sizeof(double) * 2);
3824 if (3 * n * 2 > 0 && temp == NULL) {
3825 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3826 }
3827 head_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
3828 tail_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
3829 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
3830 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3831 }
3832 ratios = (double *) blas_malloc(n * sizeof(double));
3833 if (n > 0 && ratios == NULL) {
3834 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3835 }
3836
3837
3838 /* initialization */
3839 saved_seed = *seed;
3840 ratio_min = 1e308;
3841 ratio_max = 0.0;
3842 tot_tests = 0;
3843 p_count = 0;
3844 count = 0;
3845 find_max_ratio = 0;
3846 beta[0] = beta[1] = 0.0;
3847 beta_flag = 1;
3848
3849 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
3850 rin_zero_fake[0] = rin_zero_fake[1] = 0.0;
3851
3852 if (debug == 3)
3853 find_max_ratio = 1;
3854
3855 /* The debug iteration:
3856 If debug=1, then will execute the iteration twice. First, compute the
3857 max ratio. Second, print info if ratio > (50% * ratio_max). */
3858 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
3859 bad_ratios = 0; /* set to zero */
3860
3861 if ((debug == 3) && (d_count == find_max_ratio))
3862 *seed = saved_seed; /* restore the original seed */
3863
3864 /* varying alpha */
3865 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
3866 alpha_flag = 0;
3867 switch (alpha_val) {
3868 case 0:
3869 alpha[0] = alpha[1] = 0.0;
3870 alpha_flag = 1;
3871 break;
3872 case 1:
3873 alpha[0] = 1.0;
3874 alpha[1] = 0.0;
3875 alpha_flag = 1;
3876 break;
3877 }
3878
3879
3880 /* varying extra precs */
3881 for (prec_val = 0; prec_val <= 2; prec_val++) {
3882 switch (prec_val) {
3883 case 0:
3884 eps_int = power(2, -BITS_D);
3885 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
3886 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
3887 prec = blas_prec_double;
3888 break;
3889 case 1:
3890 eps_int = power(2, -BITS_D);
3891 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
3892 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
3893 prec = blas_prec_double;
3894 break;
3895 case 2:
3896 default:
3897 eps_int = power(2, -BITS_E);
3898 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
3899 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
3900 prec = blas_prec_extra;
3901 break;
3902 }
3903
3904 /* values near underflow, 1, or overflow */
3905 for (norm = -1; norm <= 1; norm++) {
3906
3907 /* row or col major */
3908 for (order_val = 0; order_val < 2; order_val++) {
3909 switch (order_val) {
3910 case 0:
3911 order_type = blas_rowmajor;
3912 break;
3913 case 1:
3914 order_type = blas_colmajor;
3915 break;
3916 }
3917
3918 /* upper or lower */
3919 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
3920 switch (uplo_val) {
3921 case 0:
3922 uplo_type = blas_upper;
3923 break;
3924 case 1:
3925 uplo_type = blas_lower;
3926 break;
3927 }
3928
3929 /* no trans, trans, or conj trans */
3930 for (trans_val = 0; trans_val < 3; trans_val++) {
3931 switch (trans_val) {
3932 case 0:
3933 trans_type = blas_no_trans;
3934 break;
3935 case 1:
3936 trans_type = blas_trans;
3937 break;
3938 case 2:
3939 trans_type = blas_conj_trans;
3940 break;
3941 }
3942
3943 /* non_unit_diag, unit_diag */
3944 for (diag_val = 0; diag_val < 2; diag_val++) {
3945 switch (diag_val) {
3946 case 0:
3947 diag_type = blas_non_unit_diag;
3948 break;
3949 case 1:
3950 diag_type = blas_unit_diag;
3951 break;
3952 }
3953
3954 /* number of tests */
3955 for (i = 0; i < ntests; i++) {
3956 /* For the sake of speed, we throw out this case at random */
3957 if (xrand(seed) >= test_prob)
3958 continue;
3959
3960 BLAS_ztpmv_testgen(norm, order_type, uplo_type,
3961 trans_type, diag_type, n,
3962 &alpha, alpha_flag, tp, x_gen,
3963 seed, head_r_true, tail_r_true);
3964
3965 count++;
3966
3967 /* varying incx */
3968 for (incx_val = -2; incx_val <= 2; incx_val++) {
3969 if (incx_val == 0)
3970 continue;
3971
3972 /* setting incx */
3973 incx = incx_val;
3974 incx *= 2;
3975
3976 /* set x starting index */
3977 ix = 0;
3978 if (incx < 0)
3979 ix = -(n - 1) * incx;
3980
3981 /* copy x_gen to x */
3982 for (j = 0; j < n; j++) {
3983 x[ix] = x_gen[j * inc_index];
3984 x[ix + 1] = x_gen[j * inc_index + 1];
3985 ix += incx;
3986 }
3987
3988 /* call BLAS_ztpmv_x */
3989 FPU_FIX_STOP;
3990 BLAS_ztpmv_x(order_type, uplo_type, trans_type,
3991 diag_type, n, alpha, tp, x, incx_val,
3992 prec);
3993 FPU_FIX_START;
3994
3995 /* set x starting index */
3996 ix = 0;
3997 if (incx < 0)
3998 ix = -(n - 1) * incx;
3999
4000 /* computing the ratio */
4001 for (j = 0; j < n; j++) {
4002 /* copy row j of tp to temp */
4003 ztpmv_copy_row(order_type, uplo_type, trans_type, n,
4004 tp, temp, j);
4005
4006 test_BLAS_zdot(n, blas_no_conj,
4007 alpha, beta_zero_fake, rin_zero_fake,
4008 &x[ix],
4009 &head_r_true[j * inc_index],
4010 &tail_r_true[j * inc_index], temp, 1,
4011 x_gen, 1, eps_int, un_int, &ratios[j]);
4012
4013 /* take the max ratio */
4014 if (j == 0)
4015 ratio = ratios[0];
4016 else if (ratios[j] > ratio)
4017 ratio = ratios[j];
4018
4019 ix += incx;
4020 }
4021
4022 /* increase the number of bad ratio, if the ratio is
4023 bigger than the threshold */
4024 if (ratio > thresh) {
4025 bad_ratios++;
4026
4027 if ((debug == 3) && /* print only when debug is on */
4028 (count != old_count) && /* print if old vector is different
4029 from the current one */
4030 (d_count == find_max_ratio) &&
4031 (p_count <= max_print) &&
4032 (ratio > 0.5 * ratio_max)) {
4033 p_count++;
4034 old_count = count;
4035
4036 printf
4037 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
4038 fname, n, ntests, thresh);
4039
4040 /* Print test info */
4041 switch (prec) {
4042 case blas_prec_single:
4043 printf("single ");
4044 break;
4045 case blas_prec_double:
4046 printf("double ");
4047 break;
4048 case blas_prec_indigenous:
4049 printf("indigenous ");
4050 break;
4051 case blas_prec_extra:
4052 printf("extra ");
4053 break;
4054 }
4055 switch (norm) {
4056 case -1:
4057 printf("near_underflow ");
4058 break;
4059 case 0:
4060 printf("near_one ");
4061 break;
4062 case 1:
4063 printf("near_overflow ");
4064 break;
4065 }
4066 switch (order_type) {
4067 case blas_rowmajor:
4068 printf("row_major ");
4069 break;
4070 case blas_colmajor:
4071 printf("col_major ");
4072 break;
4073 }
4074 switch (uplo_type) {
4075 case blas_upper:
4076 printf("upper ");
4077 break;
4078 case blas_lower:
4079 printf("lower ");
4080 break;
4081 }
4082 switch (trans_type) {
4083 case blas_no_trans:
4084 printf("no_trans ");
4085 break;
4086 case blas_trans:
4087 printf("trans ");
4088 break;
4089 case blas_conj_trans:
4090 printf("conj_trans ");
4091 break;
4092 }
4093 switch (diag_type) {
4094 case blas_non_unit_diag:
4095 printf("non_unit_diag ");
4096 break;
4097 case blas_unit_diag:
4098 printf("unit_diag ");
4099 break;
4100 }
4101
4102 printf(" incx=%d:\n", incx);
4103 ix = 0;
4104 if (incx < 0)
4105 ix = -(n - 1) * incx;
4106
4107 printf(" TP=");
4108 for (j = 0; j < n; j++) {
4109 /* copy row j of tp to temp */
4110 ztpmv_copy_row(order_type, uplo_type, trans_type,
4111 n, tp, temp, j);
4112
4113 if (j > 0)
4114 printf(" ");
4115 zprint_vector(temp, n, 1, NULL);
4116 }
4117
4118 ix = 0;
4119 if (incx < 0)
4120 ix = -(n - 1) * incx;
4121
4122 for (j = 0; j < n; j++) {
4123 printf(" ");
4124 printf("x[%d] = ", j * inc_index);
4125 printf("(%24.16e, %24.16e)", x_gen[j * inc_index],
4126 x_gen[j * inc_index + 1]);
4127 printf("; ");
4128 printf("x_final[%d] = ", ix);
4129 printf("(%24.16e, %24.16e)", x[ix], x[ix + 1]);
4130 printf("\n");
4131 ix += incx;
4132 }
4133
4134 printf(" ");
4135 printf("alpha = ");
4136 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
4137 printf("; ");
4138 printf("\n");
4139 for (j = 0; j < n; j++) {
4140 printf(" ");
4141
4142 printf("([%24.16e %24.16e], [%24.16e %24.16e])",
4143 head_r_true[j * inc_index],
4144 tail_r_true[j * inc_index],
4145 head_r_true[j * inc_index + 1],
4146 tail_r_true[j * inc_index + 1]);
4147 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
4148 }
4149 printf(" ratio=%.4e\n", ratio);
4150 }
4151 }
4152 if (d_count == 0) {
4153 if (ratio > ratio_max)
4154 ratio_max = ratio;
4155 if (ratio != 0.0 && ratio < ratio_min)
4156 ratio_min = ratio;
4157 tot_tests++;
4158 }
4159 } /* incx */
4160
4161 } /* numtests */
4162 } /* diag */
4163 } /* trans */
4164 } /* uplo */
4165 } /* order */
4166 } /* norm */
4167 } /* prec */
4168 } /* alpha */
4169 } /* debug */
4170
4171 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
4172 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
4173 fname, n, ntests, thresh);
4174 printf
4175 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
4176 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
4177 ratio_min, ratio_max);
4178 }
4179
4180 FPU_FIX_STOP;
4181
4182 blas_free(x);
4183 blas_free(x_gen);
4184 blas_free(temp);
4185 blas_free(tp);
4186 blas_free(head_r_true);
4187 blas_free(tail_r_true);
4188 blas_free(ratios);
4189
4190 *min_ratio = ratio_min;
4191 *num_bad_ratio = bad_ratios;
4192 *num_tests = tot_tests;
4193 return ratio_max;
4194 } /* end of do_test_ztpmv_x */
4195
do_test_dtpmv_s_x(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)4196 double do_test_dtpmv_s_x(int n,
4197 int ntests,
4198 int *seed,
4199 double thresh,
4200 int debug,
4201 float test_prob,
4202 double *min_ratio,
4203 int *num_bad_ratio, int *num_tests)
4204
4205 /*
4206 * Purpose
4207 * =======
4208 *
4209 * Runs a series of tests on TPMV.
4210 *
4211 * Arguments
4212 * =========
4213 *
4214 * n (input) int
4215 * The size of vector being tested
4216 *
4217 * ntests (input) int
4218 * The number of tests to run for each set of attributes.
4219 *
4220 * seed (input/output) int
4221 * The seed for the random number generator used in testgen().
4222 *
4223 * thresh (input) double
4224 * When the ratio returned from test() exceeds the specified
4225 * threshold, the current size, r_true, r_comp, and ratio will be
4226 * printed. (Since ratio is supposed to be O(1), we can set thresh
4227 * to ~10.)
4228 *
4229 * debug (input) int
4230 * If debug=3, print summary
4231 * If debug=2, print summary only if the number of bad ratios > 0
4232 * If debug=1, print complete info if tests fail
4233 * If debug=0, return max ratio
4234 *
4235 * min_ratio (output) double
4236 * The minimum ratio
4237 *
4238 * num_bad_ratio (output) int
4239 * The number of tests fail; they are above the threshold.
4240 *
4241 * num_tests (output) int
4242 * The number of tests is being performed.
4243 *
4244 * Return value
4245 * ============
4246 *
4247 * The maximum ratio if run successfully, otherwise return -1
4248 *
4249 * Code structure
4250 * ==============
4251 *
4252 * debug loop -- if debug is one, the first loop computes the max ratio
4253 * -- and the last(second) loop outputs debugging information,
4254 * -- if the test fail and its ratio > 0.5 * max ratio.
4255 * -- if debug is zero, the loop is executed once
4256 * alpha loop -- varying alpha: 0, 1, or random
4257 * prec loop -- varying internal prec: single, double, or extra
4258 * norm loop -- varying norm: near undeflow, near one, or
4259 * -- near overflow
4260 * order loop -- varying order type: rowmajor or colmajor
4261 * uplo loop -- varying uplo type: upper or lower
4262 * trans loop -- varying trans type: no trans, trans, and conj trans
4263 * diag loop -- varying diag type: non unit, and unit
4264 * numtest loop -- how many times the test is perform with
4265 * -- above set of attributes
4266 * incx loop -- varying incx: -2, -1, 1, 2
4267 */
4268 {
4269 /* function name */
4270 const char fname[] = "BLAS_dtpmv_s_x";
4271
4272 /* max number of debug lines to print */
4273 const int max_print = 3;
4274
4275 /* Variables in the "x_val" form are loop vars for corresponding
4276 variables */
4277 int i; /* iterate through the repeating tests */
4278 int j; /* multipurpose counters */
4279 int ix; /* use to index x */
4280 int incx_val, incx; /* for testing different inc values */
4281 int inctp, inc_index; /* 1 if real, 2 if complex */
4282 int d_count; /* counter for debug */
4283 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
4284 int p_count; /* counter for the number of debug lines printed */
4285 int tot_tests; /* total number of tests to be done */
4286 int norm; /* input values of near underflow/one/overflow */
4287 double ratio_max; /* the current maximum ratio */
4288 double ratio_min; /* the current minimum ratio */
4289 double *ratios; /* a temporary variable for calculating ratio */
4290 double ratio = 0.0; /* the per-use test ratio from test() */
4291 int bad_ratios = 0; /* the number of ratios over the threshold */
4292 double eps_int; /* the internal epsilon expected--2^(-24) for float */
4293 double un_int; /* the internal underflow threshold */
4294 double alpha;
4295 double beta;
4296 float *tp;
4297 double *x;
4298 double *x_gen; /* used to store vectors generated by testgen; eventually copied to x */
4299 float *temp; /* use for calculating ratio */
4300
4301 /* the true r calculated by testgen(), in double-double */
4302 double *head_r_true, *tail_r_true;
4303
4304 int alpha_val;
4305 int alpha_flag = 0, beta_flag = 0;
4306 int order_val;
4307 enum blas_order_type order_type = 0;
4308 int prec_val;
4309 enum blas_prec_type prec = 0;
4310 int uplo_val;
4311 enum blas_uplo_type uplo_type = 0;
4312 int trans_val;
4313 enum blas_trans_type trans_type = 0;
4314 int diag_val;
4315 enum blas_diag_type diag_type = 0;
4316
4317 double beta_zero_fake;
4318 double rin_zero_fake;
4319
4320 int saved_seed; /* for saving the original seed */
4321 int count, old_count = -1;
4322 FPU_FIX_DECL;
4323
4324 /* test for bad arguments */
4325 if (n < 0 || ntests < 0)
4326 BLAS_error(fname, 0, 0, NULL);
4327
4328 /* if there is nothing to test, return all zero */
4329 if (n == 0 || ntests == 0) {
4330 *min_ratio = 0.0;
4331 *num_bad_ratio = 0;
4332 *num_tests = 0;
4333 return 0.0;
4334 }
4335
4336 FPU_FIX_START;
4337
4338
4339
4340 incx = inctp = inc_index = 1;
4341
4342
4343
4344
4345 /* get space for calculation */
4346 x = (double *) blas_malloc(3 * n * 2 * sizeof(double));
4347 if (3 * n * 2 > 0 && x == NULL) {
4348 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4349 }
4350 x_gen = (double *) blas_malloc(3 * n * 2 * sizeof(double));
4351 if (3 * n * 2 > 0 && x_gen == NULL) {
4352 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4353 }
4354 tp = (float *) blas_malloc(2 * n * n * n * inctp * sizeof(float));
4355 if (2 * n * n * n * inctp > 0 && tp == NULL) {
4356 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4357 }
4358 temp = (float *) blas_malloc(3 * n * 2 * sizeof(float));
4359 if (3 * n * 2 > 0 && temp == NULL) {
4360 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4361 }
4362 head_r_true = (double *) blas_malloc(n * sizeof(double));
4363 tail_r_true = (double *) blas_malloc(n * sizeof(double));
4364 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
4365 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4366 }
4367 ratios = (double *) blas_malloc(n * sizeof(double));
4368 if (n > 0 && ratios == NULL) {
4369 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4370 }
4371
4372
4373 /* initialization */
4374 saved_seed = *seed;
4375 ratio_min = 1e308;
4376 ratio_max = 0.0;
4377 tot_tests = 0;
4378 p_count = 0;
4379 count = 0;
4380 find_max_ratio = 0;
4381 beta = 0.0;
4382 beta_flag = 1;
4383
4384 beta_zero_fake = 0.0;
4385 rin_zero_fake = 0.0;
4386
4387 if (debug == 3)
4388 find_max_ratio = 1;
4389
4390 /* The debug iteration:
4391 If debug=1, then will execute the iteration twice. First, compute the
4392 max ratio. Second, print info if ratio > (50% * ratio_max). */
4393 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
4394 bad_ratios = 0; /* set to zero */
4395
4396 if ((debug == 3) && (d_count == find_max_ratio))
4397 *seed = saved_seed; /* restore the original seed */
4398
4399 /* varying alpha */
4400 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
4401 alpha_flag = 0;
4402 switch (alpha_val) {
4403 case 0:
4404 alpha = 0.0;
4405 alpha_flag = 1;
4406 break;
4407 case 1:
4408 alpha = 1.0;
4409 alpha_flag = 1;
4410 break;
4411 }
4412
4413
4414 /* varying extra precs */
4415 for (prec_val = 0; prec_val <= 2; prec_val++) {
4416 switch (prec_val) {
4417 case 0:
4418 eps_int = power(2, -BITS_D);
4419 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
4420 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
4421 prec = blas_prec_double;
4422 break;
4423 case 1:
4424 eps_int = power(2, -BITS_D);
4425 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
4426 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
4427 prec = blas_prec_double;
4428 break;
4429 case 2:
4430 default:
4431 eps_int = power(2, -BITS_E);
4432 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
4433 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
4434 prec = blas_prec_extra;
4435 break;
4436 }
4437
4438 /* values near underflow, 1, or overflow */
4439 for (norm = -1; norm <= 1; norm++) {
4440
4441 /* row or col major */
4442 for (order_val = 0; order_val < 2; order_val++) {
4443 switch (order_val) {
4444 case 0:
4445 order_type = blas_rowmajor;
4446 break;
4447 case 1:
4448 order_type = blas_colmajor;
4449 break;
4450 }
4451
4452 /* upper or lower */
4453 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
4454 switch (uplo_val) {
4455 case 0:
4456 uplo_type = blas_upper;
4457 break;
4458 case 1:
4459 uplo_type = blas_lower;
4460 break;
4461 }
4462
4463 /* no trans, trans, or conj trans */
4464 for (trans_val = 0; trans_val < 3; trans_val++) {
4465 switch (trans_val) {
4466 case 0:
4467 trans_type = blas_no_trans;
4468 break;
4469 case 1:
4470 trans_type = blas_trans;
4471 break;
4472 case 2:
4473 trans_type = blas_conj_trans;
4474 break;
4475 }
4476
4477 /* non_unit_diag, unit_diag */
4478 for (diag_val = 0; diag_val < 2; diag_val++) {
4479 switch (diag_val) {
4480 case 0:
4481 diag_type = blas_non_unit_diag;
4482 break;
4483 case 1:
4484 diag_type = blas_unit_diag;
4485 break;
4486 }
4487
4488 /* number of tests */
4489 for (i = 0; i < ntests; i++) {
4490 /* For the sake of speed, we throw out this case at random */
4491 if (xrand(seed) >= test_prob)
4492 continue;
4493
4494 BLAS_dtpmv_s_testgen(norm, order_type, uplo_type,
4495 trans_type, diag_type, n,
4496 &alpha, alpha_flag, tp, x_gen,
4497 seed, head_r_true, tail_r_true);
4498
4499 count++;
4500
4501 /* varying incx */
4502 for (incx_val = -2; incx_val <= 2; incx_val++) {
4503 if (incx_val == 0)
4504 continue;
4505
4506 /* setting incx */
4507 incx = incx_val;
4508
4509
4510 /* set x starting index */
4511 ix = 0;
4512 if (incx < 0)
4513 ix = -(n - 1) * incx;
4514
4515 /* copy x_gen to x */
4516 for (j = 0; j < n; j++) {
4517 x[ix] = x_gen[j * inc_index];
4518 ix += incx;
4519 }
4520
4521 /* call BLAS_dtpmv_s_x */
4522 FPU_FIX_STOP;
4523 BLAS_dtpmv_s_x(order_type, uplo_type, trans_type,
4524 diag_type, n, alpha, tp, x, incx_val,
4525 prec);
4526 FPU_FIX_START;
4527
4528 /* set x starting index */
4529 ix = 0;
4530 if (incx < 0)
4531 ix = -(n - 1) * incx;
4532
4533 /* computing the ratio */
4534 for (j = 0; j < n; j++) {
4535 /* copy row j of tp to temp */
4536 stpmv_copy_row(order_type, uplo_type, trans_type, n,
4537 tp, temp, j);
4538
4539 test_BLAS_ddot_s_d(n, blas_no_conj,
4540 alpha, beta_zero_fake,
4541 rin_zero_fake, x[ix],
4542 head_r_true[j * inc_index],
4543 tail_r_true[j * inc_index], temp,
4544 1, x_gen, 1, eps_int, un_int,
4545 &ratios[j]);
4546
4547 /* take the max ratio */
4548 if (j == 0)
4549 ratio = ratios[0];
4550 else if (ratios[j] > ratio)
4551 ratio = ratios[j];
4552
4553 ix += incx;
4554 }
4555
4556 /* increase the number of bad ratio, if the ratio is
4557 bigger than the threshold */
4558 if (ratio > thresh) {
4559 bad_ratios++;
4560
4561 if ((debug == 3) && /* print only when debug is on */
4562 (count != old_count) && /* print if old vector is different
4563 from the current one */
4564 (d_count == find_max_ratio) &&
4565 (p_count <= max_print) &&
4566 (ratio > 0.5 * ratio_max)) {
4567 p_count++;
4568 old_count = count;
4569
4570 printf
4571 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
4572 fname, n, ntests, thresh);
4573
4574 /* Print test info */
4575 switch (prec) {
4576 case blas_prec_single:
4577 printf("single ");
4578 break;
4579 case blas_prec_double:
4580 printf("double ");
4581 break;
4582 case blas_prec_indigenous:
4583 printf("indigenous ");
4584 break;
4585 case blas_prec_extra:
4586 printf("extra ");
4587 break;
4588 }
4589 switch (norm) {
4590 case -1:
4591 printf("near_underflow ");
4592 break;
4593 case 0:
4594 printf("near_one ");
4595 break;
4596 case 1:
4597 printf("near_overflow ");
4598 break;
4599 }
4600 switch (order_type) {
4601 case blas_rowmajor:
4602 printf("row_major ");
4603 break;
4604 case blas_colmajor:
4605 printf("col_major ");
4606 break;
4607 }
4608 switch (uplo_type) {
4609 case blas_upper:
4610 printf("upper ");
4611 break;
4612 case blas_lower:
4613 printf("lower ");
4614 break;
4615 }
4616 switch (trans_type) {
4617 case blas_no_trans:
4618 printf("no_trans ");
4619 break;
4620 case blas_trans:
4621 printf("trans ");
4622 break;
4623 case blas_conj_trans:
4624 printf("conj_trans ");
4625 break;
4626 }
4627 switch (diag_type) {
4628 case blas_non_unit_diag:
4629 printf("non_unit_diag ");
4630 break;
4631 case blas_unit_diag:
4632 printf("unit_diag ");
4633 break;
4634 }
4635
4636 printf(" incx=%d:\n", incx);
4637 ix = 0;
4638 if (incx < 0)
4639 ix = -(n - 1) * incx;
4640
4641 printf(" TP=");
4642 for (j = 0; j < n; j++) {
4643 /* copy row j of tp to temp */
4644 stpmv_copy_row(order_type, uplo_type, trans_type,
4645 n, tp, temp, j);
4646
4647 if (j > 0)
4648 printf(" ");
4649 sprint_vector(temp, n, 1, NULL);
4650 }
4651
4652 ix = 0;
4653 if (incx < 0)
4654 ix = -(n - 1) * incx;
4655
4656 for (j = 0; j < n; j++) {
4657 printf(" ");
4658 printf("x[%d] = ", j * inc_index);
4659 printf("%24.16e", x_gen[j * inc_index]);
4660 printf("; ");
4661 printf("x_final[%d] = ", ix);
4662 printf("%24.16e", x[ix]);
4663 printf("\n");
4664 ix += incx;
4665 }
4666
4667 printf(" ");
4668 printf("alpha = ");
4669 printf("%24.16e", alpha);
4670 printf("; ");
4671 printf("\n");
4672 for (j = 0; j < n; j++) {
4673 printf(" ");
4674
4675 printf("[%24.16e, %24.16e]",
4676 head_r_true[j * inc_index],
4677 tail_r_true[j * inc_index]);
4678 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
4679 }
4680 printf(" ratio=%.4e\n", ratio);
4681 }
4682 }
4683 if (d_count == 0) {
4684 if (ratio > ratio_max)
4685 ratio_max = ratio;
4686 if (ratio != 0.0 && ratio < ratio_min)
4687 ratio_min = ratio;
4688 tot_tests++;
4689 }
4690 } /* incx */
4691
4692 } /* numtests */
4693 } /* diag */
4694 } /* trans */
4695 } /* uplo */
4696 } /* order */
4697 } /* norm */
4698 } /* prec */
4699 } /* alpha */
4700 } /* debug */
4701
4702 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
4703 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
4704 fname, n, ntests, thresh);
4705 printf
4706 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
4707 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
4708 ratio_min, ratio_max);
4709 }
4710
4711 FPU_FIX_STOP;
4712
4713 blas_free(x);
4714 blas_free(x_gen);
4715 blas_free(temp);
4716 blas_free(tp);
4717 blas_free(head_r_true);
4718 blas_free(tail_r_true);
4719 blas_free(ratios);
4720
4721 *min_ratio = ratio_min;
4722 *num_bad_ratio = bad_ratios;
4723 *num_tests = tot_tests;
4724 return ratio_max;
4725 } /* end of do_test_dtpmv_s_x */
4726
do_test_ztpmv_c_x(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)4727 double do_test_ztpmv_c_x(int n,
4728 int ntests,
4729 int *seed,
4730 double thresh,
4731 int debug,
4732 float test_prob,
4733 double *min_ratio,
4734 int *num_bad_ratio, int *num_tests)
4735
4736 /*
4737 * Purpose
4738 * =======
4739 *
4740 * Runs a series of tests on TPMV.
4741 *
4742 * Arguments
4743 * =========
4744 *
4745 * n (input) int
4746 * The size of vector being tested
4747 *
4748 * ntests (input) int
4749 * The number of tests to run for each set of attributes.
4750 *
4751 * seed (input/output) int
4752 * The seed for the random number generator used in testgen().
4753 *
4754 * thresh (input) double
4755 * When the ratio returned from test() exceeds the specified
4756 * threshold, the current size, r_true, r_comp, and ratio will be
4757 * printed. (Since ratio is supposed to be O(1), we can set thresh
4758 * to ~10.)
4759 *
4760 * debug (input) int
4761 * If debug=3, print summary
4762 * If debug=2, print summary only if the number of bad ratios > 0
4763 * If debug=1, print complete info if tests fail
4764 * If debug=0, return max ratio
4765 *
4766 * min_ratio (output) double
4767 * The minimum ratio
4768 *
4769 * num_bad_ratio (output) int
4770 * The number of tests fail; they are above the threshold.
4771 *
4772 * num_tests (output) int
4773 * The number of tests is being performed.
4774 *
4775 * Return value
4776 * ============
4777 *
4778 * The maximum ratio if run successfully, otherwise return -1
4779 *
4780 * Code structure
4781 * ==============
4782 *
4783 * debug loop -- if debug is one, the first loop computes the max ratio
4784 * -- and the last(second) loop outputs debugging information,
4785 * -- if the test fail and its ratio > 0.5 * max ratio.
4786 * -- if debug is zero, the loop is executed once
4787 * alpha loop -- varying alpha: 0, 1, or random
4788 * prec loop -- varying internal prec: single, double, or extra
4789 * norm loop -- varying norm: near undeflow, near one, or
4790 * -- near overflow
4791 * order loop -- varying order type: rowmajor or colmajor
4792 * uplo loop -- varying uplo type: upper or lower
4793 * trans loop -- varying trans type: no trans, trans, and conj trans
4794 * diag loop -- varying diag type: non unit, and unit
4795 * numtest loop -- how many times the test is perform with
4796 * -- above set of attributes
4797 * incx loop -- varying incx: -2, -1, 1, 2
4798 */
4799 {
4800 /* function name */
4801 const char fname[] = "BLAS_ztpmv_c_x";
4802
4803 /* max number of debug lines to print */
4804 const int max_print = 3;
4805
4806 /* Variables in the "x_val" form are loop vars for corresponding
4807 variables */
4808 int i; /* iterate through the repeating tests */
4809 int j; /* multipurpose counters */
4810 int ix; /* use to index x */
4811 int incx_val, incx; /* for testing different inc values */
4812 int inctp, inc_index; /* 1 if real, 2 if complex */
4813 int d_count; /* counter for debug */
4814 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
4815 int p_count; /* counter for the number of debug lines printed */
4816 int tot_tests; /* total number of tests to be done */
4817 int norm; /* input values of near underflow/one/overflow */
4818 double ratio_max; /* the current maximum ratio */
4819 double ratio_min; /* the current minimum ratio */
4820 double *ratios; /* a temporary variable for calculating ratio */
4821 double ratio = 0.0; /* the per-use test ratio from test() */
4822 int bad_ratios = 0; /* the number of ratios over the threshold */
4823 double eps_int; /* the internal epsilon expected--2^(-24) for float */
4824 double un_int; /* the internal underflow threshold */
4825 double alpha[2];
4826 double beta[2];
4827 float *tp;
4828 double *x;
4829 double *x_gen; /* used to store vectors generated by testgen; eventually copied to x */
4830 float *temp; /* use for calculating ratio */
4831
4832 /* the true r calculated by testgen(), in double-double */
4833 double *head_r_true, *tail_r_true;
4834
4835
4836 int alpha_val;
4837 int alpha_flag = 0, beta_flag = 0;
4838 int order_val;
4839 enum blas_order_type order_type = 0;
4840 int prec_val;
4841 enum blas_prec_type prec = 0;
4842 int uplo_val;
4843 enum blas_uplo_type uplo_type = 0;
4844 int trans_val;
4845 enum blas_trans_type trans_type = 0;
4846 int diag_val;
4847 enum blas_diag_type diag_type = 0;
4848
4849 double beta_zero_fake[2];
4850 double rin_zero_fake[2];
4851
4852 int saved_seed; /* for saving the original seed */
4853 int count, old_count = -1;
4854 FPU_FIX_DECL;
4855
4856 /* test for bad arguments */
4857 if (n < 0 || ntests < 0)
4858 BLAS_error(fname, 0, 0, NULL);
4859
4860 /* if there is nothing to test, return all zero */
4861 if (n == 0 || ntests == 0) {
4862 *min_ratio = 0.0;
4863 *num_bad_ratio = 0;
4864 *num_tests = 0;
4865 return 0.0;
4866 }
4867
4868 FPU_FIX_START;
4869
4870
4871
4872 incx = inctp = inc_index = 1;
4873 incx *= 2;
4874 inctp *= 2;
4875 inc_index *= 2;
4876
4877 /* get space for calculation */
4878 x = (double *) blas_malloc(3 * n * 2 * sizeof(double) * 2);
4879 if (3 * n * 2 > 0 && x == NULL) {
4880 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4881 }
4882 x_gen = (double *) blas_malloc(3 * n * 2 * sizeof(double) * 2);
4883 if (3 * n * 2 > 0 && x_gen == NULL) {
4884 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4885 }
4886 tp = (float *) blas_malloc(2 * n * n * n * inctp * sizeof(float) * 2);
4887 if (2 * n * n * n * inctp > 0 && tp == NULL) {
4888 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4889 }
4890 temp = (float *) blas_malloc(3 * n * 2 * sizeof(float) * 2);
4891 if (3 * n * 2 > 0 && temp == NULL) {
4892 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4893 }
4894 head_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
4895 tail_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
4896 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
4897 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4898 }
4899 ratios = (double *) blas_malloc(n * sizeof(double));
4900 if (n > 0 && ratios == NULL) {
4901 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4902 }
4903
4904
4905 /* initialization */
4906 saved_seed = *seed;
4907 ratio_min = 1e308;
4908 ratio_max = 0.0;
4909 tot_tests = 0;
4910 p_count = 0;
4911 count = 0;
4912 find_max_ratio = 0;
4913 beta[0] = beta[1] = 0.0;
4914 beta_flag = 1;
4915
4916 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
4917 rin_zero_fake[0] = rin_zero_fake[1] = 0.0;
4918
4919 if (debug == 3)
4920 find_max_ratio = 1;
4921
4922 /* The debug iteration:
4923 If debug=1, then will execute the iteration twice. First, compute the
4924 max ratio. Second, print info if ratio > (50% * ratio_max). */
4925 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
4926 bad_ratios = 0; /* set to zero */
4927
4928 if ((debug == 3) && (d_count == find_max_ratio))
4929 *seed = saved_seed; /* restore the original seed */
4930
4931 /* varying alpha */
4932 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
4933 alpha_flag = 0;
4934 switch (alpha_val) {
4935 case 0:
4936 alpha[0] = alpha[1] = 0.0;
4937 alpha_flag = 1;
4938 break;
4939 case 1:
4940 alpha[0] = 1.0;
4941 alpha[1] = 0.0;
4942 alpha_flag = 1;
4943 break;
4944 }
4945
4946
4947 /* varying extra precs */
4948 for (prec_val = 0; prec_val <= 2; prec_val++) {
4949 switch (prec_val) {
4950 case 0:
4951 eps_int = power(2, -BITS_D);
4952 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
4953 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
4954 prec = blas_prec_double;
4955 break;
4956 case 1:
4957 eps_int = power(2, -BITS_D);
4958 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
4959 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
4960 prec = blas_prec_double;
4961 break;
4962 case 2:
4963 default:
4964 eps_int = power(2, -BITS_E);
4965 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
4966 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
4967 prec = blas_prec_extra;
4968 break;
4969 }
4970
4971 /* values near underflow, 1, or overflow */
4972 for (norm = -1; norm <= 1; norm++) {
4973
4974 /* row or col major */
4975 for (order_val = 0; order_val < 2; order_val++) {
4976 switch (order_val) {
4977 case 0:
4978 order_type = blas_rowmajor;
4979 break;
4980 case 1:
4981 order_type = blas_colmajor;
4982 break;
4983 }
4984
4985 /* upper or lower */
4986 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
4987 switch (uplo_val) {
4988 case 0:
4989 uplo_type = blas_upper;
4990 break;
4991 case 1:
4992 uplo_type = blas_lower;
4993 break;
4994 }
4995
4996 /* no trans, trans, or conj trans */
4997 for (trans_val = 0; trans_val < 3; trans_val++) {
4998 switch (trans_val) {
4999 case 0:
5000 trans_type = blas_no_trans;
5001 break;
5002 case 1:
5003 trans_type = blas_trans;
5004 break;
5005 case 2:
5006 trans_type = blas_conj_trans;
5007 break;
5008 }
5009
5010 /* non_unit_diag, unit_diag */
5011 for (diag_val = 0; diag_val < 2; diag_val++) {
5012 switch (diag_val) {
5013 case 0:
5014 diag_type = blas_non_unit_diag;
5015 break;
5016 case 1:
5017 diag_type = blas_unit_diag;
5018 break;
5019 }
5020
5021 /* number of tests */
5022 for (i = 0; i < ntests; i++) {
5023 /* For the sake of speed, we throw out this case at random */
5024 if (xrand(seed) >= test_prob)
5025 continue;
5026
5027 BLAS_ztpmv_c_testgen(norm, order_type, uplo_type,
5028 trans_type, diag_type, n,
5029 &alpha, alpha_flag, tp, x_gen,
5030 seed, head_r_true, tail_r_true);
5031
5032 count++;
5033
5034 /* varying incx */
5035 for (incx_val = -2; incx_val <= 2; incx_val++) {
5036 if (incx_val == 0)
5037 continue;
5038
5039 /* setting incx */
5040 incx = incx_val;
5041 incx *= 2;
5042
5043 /* set x starting index */
5044 ix = 0;
5045 if (incx < 0)
5046 ix = -(n - 1) * incx;
5047
5048 /* copy x_gen to x */
5049 for (j = 0; j < n; j++) {
5050 x[ix] = x_gen[j * inc_index];
5051 x[ix + 1] = x_gen[j * inc_index + 1];
5052 ix += incx;
5053 }
5054
5055 /* call BLAS_ztpmv_c_x */
5056 FPU_FIX_STOP;
5057 BLAS_ztpmv_c_x(order_type, uplo_type, trans_type,
5058 diag_type, n, alpha, tp, x, incx_val,
5059 prec);
5060 FPU_FIX_START;
5061
5062 /* set x starting index */
5063 ix = 0;
5064 if (incx < 0)
5065 ix = -(n - 1) * incx;
5066
5067 /* computing the ratio */
5068 for (j = 0; j < n; j++) {
5069 /* copy row j of tp to temp */
5070 ctpmv_copy_row(order_type, uplo_type, trans_type, n,
5071 tp, temp, j);
5072
5073 test_BLAS_zdot_c_z(n, blas_no_conj,
5074 alpha, beta_zero_fake,
5075 rin_zero_fake, &x[ix],
5076 &head_r_true[j * inc_index],
5077 &tail_r_true[j * inc_index], temp,
5078 1, x_gen, 1, eps_int, un_int,
5079 &ratios[j]);
5080
5081 /* take the max ratio */
5082 if (j == 0)
5083 ratio = ratios[0];
5084 else if (ratios[j] > ratio)
5085 ratio = ratios[j];
5086
5087 ix += incx;
5088 }
5089
5090 /* increase the number of bad ratio, if the ratio is
5091 bigger than the threshold */
5092 if (ratio > thresh) {
5093 bad_ratios++;
5094
5095 if ((debug == 3) && /* print only when debug is on */
5096 (count != old_count) && /* print if old vector is different
5097 from the current one */
5098 (d_count == find_max_ratio) &&
5099 (p_count <= max_print) &&
5100 (ratio > 0.5 * ratio_max)) {
5101 p_count++;
5102 old_count = count;
5103
5104 printf
5105 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
5106 fname, n, ntests, thresh);
5107
5108 /* Print test info */
5109 switch (prec) {
5110 case blas_prec_single:
5111 printf("single ");
5112 break;
5113 case blas_prec_double:
5114 printf("double ");
5115 break;
5116 case blas_prec_indigenous:
5117 printf("indigenous ");
5118 break;
5119 case blas_prec_extra:
5120 printf("extra ");
5121 break;
5122 }
5123 switch (norm) {
5124 case -1:
5125 printf("near_underflow ");
5126 break;
5127 case 0:
5128 printf("near_one ");
5129 break;
5130 case 1:
5131 printf("near_overflow ");
5132 break;
5133 }
5134 switch (order_type) {
5135 case blas_rowmajor:
5136 printf("row_major ");
5137 break;
5138 case blas_colmajor:
5139 printf("col_major ");
5140 break;
5141 }
5142 switch (uplo_type) {
5143 case blas_upper:
5144 printf("upper ");
5145 break;
5146 case blas_lower:
5147 printf("lower ");
5148 break;
5149 }
5150 switch (trans_type) {
5151 case blas_no_trans:
5152 printf("no_trans ");
5153 break;
5154 case blas_trans:
5155 printf("trans ");
5156 break;
5157 case blas_conj_trans:
5158 printf("conj_trans ");
5159 break;
5160 }
5161 switch (diag_type) {
5162 case blas_non_unit_diag:
5163 printf("non_unit_diag ");
5164 break;
5165 case blas_unit_diag:
5166 printf("unit_diag ");
5167 break;
5168 }
5169
5170 printf(" incx=%d:\n", incx);
5171 ix = 0;
5172 if (incx < 0)
5173 ix = -(n - 1) * incx;
5174
5175 printf(" TP=");
5176 for (j = 0; j < n; j++) {
5177 /* copy row j of tp to temp */
5178 ctpmv_copy_row(order_type, uplo_type, trans_type,
5179 n, tp, temp, j);
5180
5181 if (j > 0)
5182 printf(" ");
5183 cprint_vector(temp, n, 1, NULL);
5184 }
5185
5186 ix = 0;
5187 if (incx < 0)
5188 ix = -(n - 1) * incx;
5189
5190 for (j = 0; j < n; j++) {
5191 printf(" ");
5192 printf("x[%d] = ", j * inc_index);
5193 printf("(%24.16e, %24.16e)", x_gen[j * inc_index],
5194 x_gen[j * inc_index + 1]);
5195 printf("; ");
5196 printf("x_final[%d] = ", ix);
5197 printf("(%24.16e, %24.16e)", x[ix], x[ix + 1]);
5198 printf("\n");
5199 ix += incx;
5200 }
5201
5202 printf(" ");
5203 printf("alpha = ");
5204 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
5205 printf("; ");
5206 printf("\n");
5207 for (j = 0; j < n; j++) {
5208 printf(" ");
5209
5210 printf("([%24.16e %24.16e], [%24.16e %24.16e])",
5211 head_r_true[j * inc_index],
5212 tail_r_true[j * inc_index],
5213 head_r_true[j * inc_index + 1],
5214 tail_r_true[j * inc_index + 1]);
5215 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
5216 }
5217 printf(" ratio=%.4e\n", ratio);
5218 }
5219 }
5220 if (d_count == 0) {
5221 if (ratio > ratio_max)
5222 ratio_max = ratio;
5223 if (ratio != 0.0 && ratio < ratio_min)
5224 ratio_min = ratio;
5225 tot_tests++;
5226 }
5227 } /* incx */
5228
5229 } /* numtests */
5230 } /* diag */
5231 } /* trans */
5232 } /* uplo */
5233 } /* order */
5234 } /* norm */
5235 } /* prec */
5236 } /* alpha */
5237 } /* debug */
5238
5239 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
5240 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
5241 fname, n, ntests, thresh);
5242 printf
5243 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
5244 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
5245 ratio_min, ratio_max);
5246 }
5247
5248 FPU_FIX_STOP;
5249
5250 blas_free(x);
5251 blas_free(x_gen);
5252 blas_free(temp);
5253 blas_free(tp);
5254 blas_free(head_r_true);
5255 blas_free(tail_r_true);
5256 blas_free(ratios);
5257
5258 *min_ratio = ratio_min;
5259 *num_bad_ratio = bad_ratios;
5260 *num_tests = tot_tests;
5261 return ratio_max;
5262 } /* end of do_test_ztpmv_c_x */
5263
do_test_ctpmv_s_x(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)5264 double do_test_ctpmv_s_x(int n,
5265 int ntests,
5266 int *seed,
5267 double thresh,
5268 int debug,
5269 float test_prob,
5270 double *min_ratio,
5271 int *num_bad_ratio, int *num_tests)
5272
5273 /*
5274 * Purpose
5275 * =======
5276 *
5277 * Runs a series of tests on TPMV.
5278 *
5279 * Arguments
5280 * =========
5281 *
5282 * n (input) int
5283 * The size of vector being tested
5284 *
5285 * ntests (input) int
5286 * The number of tests to run for each set of attributes.
5287 *
5288 * seed (input/output) int
5289 * The seed for the random number generator used in testgen().
5290 *
5291 * thresh (input) double
5292 * When the ratio returned from test() exceeds the specified
5293 * threshold, the current size, r_true, r_comp, and ratio will be
5294 * printed. (Since ratio is supposed to be O(1), we can set thresh
5295 * to ~10.)
5296 *
5297 * debug (input) int
5298 * If debug=3, print summary
5299 * If debug=2, print summary only if the number of bad ratios > 0
5300 * If debug=1, print complete info if tests fail
5301 * If debug=0, return max ratio
5302 *
5303 * min_ratio (output) double
5304 * The minimum ratio
5305 *
5306 * num_bad_ratio (output) int
5307 * The number of tests fail; they are above the threshold.
5308 *
5309 * num_tests (output) int
5310 * The number of tests is being performed.
5311 *
5312 * Return value
5313 * ============
5314 *
5315 * The maximum ratio if run successfully, otherwise return -1
5316 *
5317 * Code structure
5318 * ==============
5319 *
5320 * debug loop -- if debug is one, the first loop computes the max ratio
5321 * -- and the last(second) loop outputs debugging information,
5322 * -- if the test fail and its ratio > 0.5 * max ratio.
5323 * -- if debug is zero, the loop is executed once
5324 * alpha loop -- varying alpha: 0, 1, or random
5325 * prec loop -- varying internal prec: single, double, or extra
5326 * norm loop -- varying norm: near undeflow, near one, or
5327 * -- near overflow
5328 * order loop -- varying order type: rowmajor or colmajor
5329 * uplo loop -- varying uplo type: upper or lower
5330 * trans loop -- varying trans type: no trans, trans, and conj trans
5331 * diag loop -- varying diag type: non unit, and unit
5332 * numtest loop -- how many times the test is perform with
5333 * -- above set of attributes
5334 * incx loop -- varying incx: -2, -1, 1, 2
5335 */
5336 {
5337 /* function name */
5338 const char fname[] = "BLAS_ctpmv_s_x";
5339
5340 /* max number of debug lines to print */
5341 const int max_print = 3;
5342
5343 /* Variables in the "x_val" form are loop vars for corresponding
5344 variables */
5345 int i; /* iterate through the repeating tests */
5346 int j; /* multipurpose counters */
5347 int ix; /* use to index x */
5348 int incx_val, incx; /* for testing different inc values */
5349 int inctp, inc_index; /* 1 if real, 2 if complex */
5350 int d_count; /* counter for debug */
5351 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
5352 int p_count; /* counter for the number of debug lines printed */
5353 int tot_tests; /* total number of tests to be done */
5354 int norm; /* input values of near underflow/one/overflow */
5355 double ratio_max; /* the current maximum ratio */
5356 double ratio_min; /* the current minimum ratio */
5357 double *ratios; /* a temporary variable for calculating ratio */
5358 double ratio = 0.0; /* the per-use test ratio from test() */
5359 int bad_ratios = 0; /* the number of ratios over the threshold */
5360 double eps_int; /* the internal epsilon expected--2^(-24) for float */
5361 double un_int; /* the internal underflow threshold */
5362 float alpha[2];
5363 float beta[2];
5364 float *tp;
5365 float *x;
5366 float *x_gen; /* used to store vectors generated by testgen; eventually copied to x */
5367 float *temp; /* use for calculating ratio */
5368
5369 /* the true r calculated by testgen(), in double-double */
5370 double *head_r_true, *tail_r_true;
5371
5372
5373 int alpha_val;
5374 int alpha_flag = 0, beta_flag = 0;
5375 int order_val;
5376 enum blas_order_type order_type = 0;
5377 int prec_val;
5378 enum blas_prec_type prec = 0;
5379 int uplo_val;
5380 enum blas_uplo_type uplo_type = 0;
5381 int trans_val;
5382 enum blas_trans_type trans_type = 0;
5383 int diag_val;
5384 enum blas_diag_type diag_type = 0;
5385
5386 float beta_zero_fake[2];
5387 float rin_zero_fake[2];
5388
5389 int saved_seed; /* for saving the original seed */
5390 int count, old_count = -1;
5391 FPU_FIX_DECL;
5392
5393 /* test for bad arguments */
5394 if (n < 0 || ntests < 0)
5395 BLAS_error(fname, 0, 0, NULL);
5396
5397 /* if there is nothing to test, return all zero */
5398 if (n == 0 || ntests == 0) {
5399 *min_ratio = 0.0;
5400 *num_bad_ratio = 0;
5401 *num_tests = 0;
5402 return 0.0;
5403 }
5404
5405 FPU_FIX_START;
5406
5407
5408
5409 incx = inctp = inc_index = 1;
5410 incx *= 2;
5411
5412 inc_index *= 2;
5413
5414 /* get space for calculation */
5415 x = (float *) blas_malloc(3 * n * 2 * sizeof(float) * 2);
5416 if (3 * n * 2 > 0 && x == NULL) {
5417 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5418 }
5419 x_gen = (float *) blas_malloc(3 * n * 2 * sizeof(float) * 2);
5420 if (3 * n * 2 > 0 && x_gen == NULL) {
5421 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5422 }
5423 tp = (float *) blas_malloc(2 * n * n * n * inctp * sizeof(float));
5424 if (2 * n * n * n * inctp > 0 && tp == NULL) {
5425 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5426 }
5427 temp = (float *) blas_malloc(3 * n * 2 * sizeof(float));
5428 if (3 * n * 2 > 0 && temp == NULL) {
5429 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5430 }
5431 head_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
5432 tail_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
5433 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
5434 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5435 }
5436 ratios = (double *) blas_malloc(n * sizeof(double));
5437 if (n > 0 && ratios == NULL) {
5438 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5439 }
5440
5441
5442 /* initialization */
5443 saved_seed = *seed;
5444 ratio_min = 1e308;
5445 ratio_max = 0.0;
5446 tot_tests = 0;
5447 p_count = 0;
5448 count = 0;
5449 find_max_ratio = 0;
5450 beta[0] = beta[1] = 0.0;
5451 beta_flag = 1;
5452
5453 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
5454 rin_zero_fake[0] = rin_zero_fake[1] = 0.0;
5455
5456 if (debug == 3)
5457 find_max_ratio = 1;
5458
5459 /* The debug iteration:
5460 If debug=1, then will execute the iteration twice. First, compute the
5461 max ratio. Second, print info if ratio > (50% * ratio_max). */
5462 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
5463 bad_ratios = 0; /* set to zero */
5464
5465 if ((debug == 3) && (d_count == find_max_ratio))
5466 *seed = saved_seed; /* restore the original seed */
5467
5468 /* varying alpha */
5469 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
5470 alpha_flag = 0;
5471 switch (alpha_val) {
5472 case 0:
5473 alpha[0] = alpha[1] = 0.0;
5474 alpha_flag = 1;
5475 break;
5476 case 1:
5477 alpha[0] = 1.0;
5478 alpha[1] = 0.0;
5479 alpha_flag = 1;
5480 break;
5481 }
5482
5483
5484 /* varying extra precs */
5485 for (prec_val = 0; prec_val <= 2; prec_val++) {
5486 switch (prec_val) {
5487 case 0:
5488 eps_int = power(2, -BITS_S);
5489 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
5490 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
5491 prec = blas_prec_single;
5492 break;
5493 case 1:
5494 eps_int = power(2, -BITS_D);
5495 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
5496 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
5497 prec = blas_prec_double;
5498 break;
5499 case 2:
5500 default:
5501 eps_int = power(2, -BITS_E);
5502 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
5503 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
5504 prec = blas_prec_extra;
5505 break;
5506 }
5507
5508 /* values near underflow, 1, or overflow */
5509 for (norm = -1; norm <= 1; norm++) {
5510
5511 /* row or col major */
5512 for (order_val = 0; order_val < 2; order_val++) {
5513 switch (order_val) {
5514 case 0:
5515 order_type = blas_rowmajor;
5516 break;
5517 case 1:
5518 order_type = blas_colmajor;
5519 break;
5520 }
5521
5522 /* upper or lower */
5523 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
5524 switch (uplo_val) {
5525 case 0:
5526 uplo_type = blas_upper;
5527 break;
5528 case 1:
5529 uplo_type = blas_lower;
5530 break;
5531 }
5532
5533 /* no trans, trans, or conj trans */
5534 for (trans_val = 0; trans_val < 3; trans_val++) {
5535 switch (trans_val) {
5536 case 0:
5537 trans_type = blas_no_trans;
5538 break;
5539 case 1:
5540 trans_type = blas_trans;
5541 break;
5542 case 2:
5543 trans_type = blas_conj_trans;
5544 break;
5545 }
5546
5547 /* non_unit_diag, unit_diag */
5548 for (diag_val = 0; diag_val < 2; diag_val++) {
5549 switch (diag_val) {
5550 case 0:
5551 diag_type = blas_non_unit_diag;
5552 break;
5553 case 1:
5554 diag_type = blas_unit_diag;
5555 break;
5556 }
5557
5558 /* number of tests */
5559 for (i = 0; i < ntests; i++) {
5560 /* For the sake of speed, we throw out this case at random */
5561 if (xrand(seed) >= test_prob)
5562 continue;
5563
5564 BLAS_ctpmv_s_testgen(norm, order_type, uplo_type,
5565 trans_type, diag_type, n,
5566 &alpha, alpha_flag, tp, x_gen,
5567 seed, head_r_true, tail_r_true);
5568
5569 count++;
5570
5571 /* varying incx */
5572 for (incx_val = -2; incx_val <= 2; incx_val++) {
5573 if (incx_val == 0)
5574 continue;
5575
5576 /* setting incx */
5577 incx = incx_val;
5578 incx *= 2;
5579
5580 /* set x starting index */
5581 ix = 0;
5582 if (incx < 0)
5583 ix = -(n - 1) * incx;
5584
5585 /* copy x_gen to x */
5586 for (j = 0; j < n; j++) {
5587 x[ix] = x_gen[j * inc_index];
5588 x[ix + 1] = x_gen[j * inc_index + 1];
5589 ix += incx;
5590 }
5591
5592 /* call BLAS_ctpmv_s_x */
5593 FPU_FIX_STOP;
5594 BLAS_ctpmv_s_x(order_type, uplo_type, trans_type,
5595 diag_type, n, alpha, tp, x, incx_val,
5596 prec);
5597 FPU_FIX_START;
5598
5599 /* set x starting index */
5600 ix = 0;
5601 if (incx < 0)
5602 ix = -(n - 1) * incx;
5603
5604 /* computing the ratio */
5605 for (j = 0; j < n; j++) {
5606 /* copy row j of tp to temp */
5607 stpmv_copy_row(order_type, uplo_type, trans_type, n,
5608 tp, temp, j);
5609
5610 test_BLAS_cdot_s_c(n, blas_no_conj,
5611 alpha, beta_zero_fake,
5612 rin_zero_fake, &x[ix],
5613 &head_r_true[j * inc_index],
5614 &tail_r_true[j * inc_index], temp,
5615 1, x_gen, 1, eps_int, un_int,
5616 &ratios[j]);
5617
5618 /* take the max ratio */
5619 if (j == 0)
5620 ratio = ratios[0];
5621 else if (ratios[j] > ratio)
5622 ratio = ratios[j];
5623
5624 ix += incx;
5625 }
5626
5627 /* increase the number of bad ratio, if the ratio is
5628 bigger than the threshold */
5629 if (ratio > thresh) {
5630 bad_ratios++;
5631
5632 if ((debug == 3) && /* print only when debug is on */
5633 (count != old_count) && /* print if old vector is different
5634 from the current one */
5635 (d_count == find_max_ratio) &&
5636 (p_count <= max_print) &&
5637 (ratio > 0.5 * ratio_max)) {
5638 p_count++;
5639 old_count = count;
5640
5641 printf
5642 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
5643 fname, n, ntests, thresh);
5644
5645 /* Print test info */
5646 switch (prec) {
5647 case blas_prec_single:
5648 printf("single ");
5649 break;
5650 case blas_prec_double:
5651 printf("double ");
5652 break;
5653 case blas_prec_indigenous:
5654 printf("indigenous ");
5655 break;
5656 case blas_prec_extra:
5657 printf("extra ");
5658 break;
5659 }
5660 switch (norm) {
5661 case -1:
5662 printf("near_underflow ");
5663 break;
5664 case 0:
5665 printf("near_one ");
5666 break;
5667 case 1:
5668 printf("near_overflow ");
5669 break;
5670 }
5671 switch (order_type) {
5672 case blas_rowmajor:
5673 printf("row_major ");
5674 break;
5675 case blas_colmajor:
5676 printf("col_major ");
5677 break;
5678 }
5679 switch (uplo_type) {
5680 case blas_upper:
5681 printf("upper ");
5682 break;
5683 case blas_lower:
5684 printf("lower ");
5685 break;
5686 }
5687 switch (trans_type) {
5688 case blas_no_trans:
5689 printf("no_trans ");
5690 break;
5691 case blas_trans:
5692 printf("trans ");
5693 break;
5694 case blas_conj_trans:
5695 printf("conj_trans ");
5696 break;
5697 }
5698 switch (diag_type) {
5699 case blas_non_unit_diag:
5700 printf("non_unit_diag ");
5701 break;
5702 case blas_unit_diag:
5703 printf("unit_diag ");
5704 break;
5705 }
5706
5707 printf(" incx=%d:\n", incx);
5708 ix = 0;
5709 if (incx < 0)
5710 ix = -(n - 1) * incx;
5711
5712 printf(" TP=");
5713 for (j = 0; j < n; j++) {
5714 /* copy row j of tp to temp */
5715 stpmv_copy_row(order_type, uplo_type, trans_type,
5716 n, tp, temp, j);
5717
5718 if (j > 0)
5719 printf(" ");
5720 sprint_vector(temp, n, 1, NULL);
5721 }
5722
5723 ix = 0;
5724 if (incx < 0)
5725 ix = -(n - 1) * incx;
5726
5727 for (j = 0; j < n; j++) {
5728 printf(" ");
5729 printf("x[%d] = ", j * inc_index);
5730 printf("(%16.8e, %16.8e)", x_gen[j * inc_index],
5731 x_gen[j * inc_index + 1]);
5732 printf("; ");
5733 printf("x_final[%d] = ", ix);
5734 printf("(%16.8e, %16.8e)", x[ix], x[ix + 1]);
5735 printf("\n");
5736 ix += incx;
5737 }
5738
5739 printf(" ");
5740 printf("alpha = ");
5741 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
5742 printf("; ");
5743 printf("\n");
5744 for (j = 0; j < n; j++) {
5745 printf(" ");
5746
5747 printf("([%24.16e %24.16e], [%24.16e %24.16e])",
5748 head_r_true[j * inc_index],
5749 tail_r_true[j * inc_index],
5750 head_r_true[j * inc_index + 1],
5751 tail_r_true[j * inc_index + 1]);
5752 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
5753 }
5754 printf(" ratio=%.4e\n", ratio);
5755 }
5756 }
5757 if (d_count == 0) {
5758 if (ratio > ratio_max)
5759 ratio_max = ratio;
5760 if (ratio != 0.0 && ratio < ratio_min)
5761 ratio_min = ratio;
5762 tot_tests++;
5763 }
5764 } /* incx */
5765
5766 } /* numtests */
5767 } /* diag */
5768 } /* trans */
5769 } /* uplo */
5770 } /* order */
5771 } /* norm */
5772 } /* prec */
5773 } /* alpha */
5774 } /* debug */
5775
5776 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
5777 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
5778 fname, n, ntests, thresh);
5779 printf
5780 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
5781 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
5782 ratio_min, ratio_max);
5783 }
5784
5785 FPU_FIX_STOP;
5786
5787 blas_free(x);
5788 blas_free(x_gen);
5789 blas_free(temp);
5790 blas_free(tp);
5791 blas_free(head_r_true);
5792 blas_free(tail_r_true);
5793 blas_free(ratios);
5794
5795 *min_ratio = ratio_min;
5796 *num_bad_ratio = bad_ratios;
5797 *num_tests = tot_tests;
5798 return ratio_max;
5799 } /* end of do_test_ctpmv_s_x */
5800
do_test_ztpmv_d_x(int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)5801 double do_test_ztpmv_d_x(int n,
5802 int ntests,
5803 int *seed,
5804 double thresh,
5805 int debug,
5806 float test_prob,
5807 double *min_ratio,
5808 int *num_bad_ratio, int *num_tests)
5809
5810 /*
5811 * Purpose
5812 * =======
5813 *
5814 * Runs a series of tests on TPMV.
5815 *
5816 * Arguments
5817 * =========
5818 *
5819 * n (input) int
5820 * The size of vector being tested
5821 *
5822 * ntests (input) int
5823 * The number of tests to run for each set of attributes.
5824 *
5825 * seed (input/output) int
5826 * The seed for the random number generator used in testgen().
5827 *
5828 * thresh (input) double
5829 * When the ratio returned from test() exceeds the specified
5830 * threshold, the current size, r_true, r_comp, and ratio will be
5831 * printed. (Since ratio is supposed to be O(1), we can set thresh
5832 * to ~10.)
5833 *
5834 * debug (input) int
5835 * If debug=3, print summary
5836 * If debug=2, print summary only if the number of bad ratios > 0
5837 * If debug=1, print complete info if tests fail
5838 * If debug=0, return max ratio
5839 *
5840 * min_ratio (output) double
5841 * The minimum ratio
5842 *
5843 * num_bad_ratio (output) int
5844 * The number of tests fail; they are above the threshold.
5845 *
5846 * num_tests (output) int
5847 * The number of tests is being performed.
5848 *
5849 * Return value
5850 * ============
5851 *
5852 * The maximum ratio if run successfully, otherwise return -1
5853 *
5854 * Code structure
5855 * ==============
5856 *
5857 * debug loop -- if debug is one, the first loop computes the max ratio
5858 * -- and the last(second) loop outputs debugging information,
5859 * -- if the test fail and its ratio > 0.5 * max ratio.
5860 * -- if debug is zero, the loop is executed once
5861 * alpha loop -- varying alpha: 0, 1, or random
5862 * prec loop -- varying internal prec: single, double, or extra
5863 * norm loop -- varying norm: near undeflow, near one, or
5864 * -- near overflow
5865 * order loop -- varying order type: rowmajor or colmajor
5866 * uplo loop -- varying uplo type: upper or lower
5867 * trans loop -- varying trans type: no trans, trans, and conj trans
5868 * diag loop -- varying diag type: non unit, and unit
5869 * numtest loop -- how many times the test is perform with
5870 * -- above set of attributes
5871 * incx loop -- varying incx: -2, -1, 1, 2
5872 */
5873 {
5874 /* function name */
5875 const char fname[] = "BLAS_ztpmv_d_x";
5876
5877 /* max number of debug lines to print */
5878 const int max_print = 3;
5879
5880 /* Variables in the "x_val" form are loop vars for corresponding
5881 variables */
5882 int i; /* iterate through the repeating tests */
5883 int j; /* multipurpose counters */
5884 int ix; /* use to index x */
5885 int incx_val, incx; /* for testing different inc values */
5886 int inctp, inc_index; /* 1 if real, 2 if complex */
5887 int d_count; /* counter for debug */
5888 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
5889 int p_count; /* counter for the number of debug lines printed */
5890 int tot_tests; /* total number of tests to be done */
5891 int norm; /* input values of near underflow/one/overflow */
5892 double ratio_max; /* the current maximum ratio */
5893 double ratio_min; /* the current minimum ratio */
5894 double *ratios; /* a temporary variable for calculating ratio */
5895 double ratio = 0.0; /* the per-use test ratio from test() */
5896 int bad_ratios = 0; /* the number of ratios over the threshold */
5897 double eps_int; /* the internal epsilon expected--2^(-24) for float */
5898 double un_int; /* the internal underflow threshold */
5899 double alpha[2];
5900 double beta[2];
5901 double *tp;
5902 double *x;
5903 double *x_gen; /* used to store vectors generated by testgen; eventually copied to x */
5904 double *temp; /* use for calculating ratio */
5905
5906 /* the true r calculated by testgen(), in double-double */
5907 double *head_r_true, *tail_r_true;
5908
5909
5910 int alpha_val;
5911 int alpha_flag = 0, beta_flag = 0;
5912 int order_val;
5913 enum blas_order_type order_type = 0;
5914 int prec_val;
5915 enum blas_prec_type prec = 0;
5916 int uplo_val;
5917 enum blas_uplo_type uplo_type = 0;
5918 int trans_val;
5919 enum blas_trans_type trans_type = 0;
5920 int diag_val;
5921 enum blas_diag_type diag_type = 0;
5922
5923 double beta_zero_fake[2];
5924 double rin_zero_fake[2];
5925
5926 int saved_seed; /* for saving the original seed */
5927 int count, old_count = -1;
5928 FPU_FIX_DECL;
5929
5930 /* test for bad arguments */
5931 if (n < 0 || ntests < 0)
5932 BLAS_error(fname, 0, 0, NULL);
5933
5934 /* if there is nothing to test, return all zero */
5935 if (n == 0 || ntests == 0) {
5936 *min_ratio = 0.0;
5937 *num_bad_ratio = 0;
5938 *num_tests = 0;
5939 return 0.0;
5940 }
5941
5942 FPU_FIX_START;
5943
5944
5945
5946 incx = inctp = inc_index = 1;
5947 incx *= 2;
5948
5949 inc_index *= 2;
5950
5951 /* get space for calculation */
5952 x = (double *) blas_malloc(3 * n * 2 * sizeof(double) * 2);
5953 if (3 * n * 2 > 0 && x == NULL) {
5954 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5955 }
5956 x_gen = (double *) blas_malloc(3 * n * 2 * sizeof(double) * 2);
5957 if (3 * n * 2 > 0 && x_gen == NULL) {
5958 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5959 }
5960 tp = (double *) blas_malloc(2 * n * n * n * inctp * sizeof(double));
5961 if (2 * n * n * n * inctp > 0 && tp == NULL) {
5962 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5963 }
5964 temp = (double *) blas_malloc(3 * n * 2 * sizeof(double));
5965 if (3 * n * 2 > 0 && temp == NULL) {
5966 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5967 }
5968 head_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
5969 tail_r_true = (double *) blas_malloc(n * sizeof(double) * 2);
5970 if (n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
5971 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5972 }
5973 ratios = (double *) blas_malloc(n * sizeof(double));
5974 if (n > 0 && ratios == NULL) {
5975 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5976 }
5977
5978
5979 /* initialization */
5980 saved_seed = *seed;
5981 ratio_min = 1e308;
5982 ratio_max = 0.0;
5983 tot_tests = 0;
5984 p_count = 0;
5985 count = 0;
5986 find_max_ratio = 0;
5987 beta[0] = beta[1] = 0.0;
5988 beta_flag = 1;
5989
5990 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
5991 rin_zero_fake[0] = rin_zero_fake[1] = 0.0;
5992
5993 if (debug == 3)
5994 find_max_ratio = 1;
5995
5996 /* The debug iteration:
5997 If debug=1, then will execute the iteration twice. First, compute the
5998 max ratio. Second, print info if ratio > (50% * ratio_max). */
5999 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
6000 bad_ratios = 0; /* set to zero */
6001
6002 if ((debug == 3) && (d_count == find_max_ratio))
6003 *seed = saved_seed; /* restore the original seed */
6004
6005 /* varying alpha */
6006 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
6007 alpha_flag = 0;
6008 switch (alpha_val) {
6009 case 0:
6010 alpha[0] = alpha[1] = 0.0;
6011 alpha_flag = 1;
6012 break;
6013 case 1:
6014 alpha[0] = 1.0;
6015 alpha[1] = 0.0;
6016 alpha_flag = 1;
6017 break;
6018 }
6019
6020
6021 /* varying extra precs */
6022 for (prec_val = 0; prec_val <= 2; prec_val++) {
6023 switch (prec_val) {
6024 case 0:
6025 eps_int = power(2, -BITS_D);
6026 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
6027 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
6028 prec = blas_prec_double;
6029 break;
6030 case 1:
6031 eps_int = power(2, -BITS_D);
6032 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
6033 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
6034 prec = blas_prec_double;
6035 break;
6036 case 2:
6037 default:
6038 eps_int = power(2, -BITS_E);
6039 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
6040 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
6041 prec = blas_prec_extra;
6042 break;
6043 }
6044
6045 /* values near underflow, 1, or overflow */
6046 for (norm = -1; norm <= 1; norm++) {
6047
6048 /* row or col major */
6049 for (order_val = 0; order_val < 2; order_val++) {
6050 switch (order_val) {
6051 case 0:
6052 order_type = blas_rowmajor;
6053 break;
6054 case 1:
6055 order_type = blas_colmajor;
6056 break;
6057 }
6058
6059 /* upper or lower */
6060 for (uplo_val = 0; uplo_val < 2; uplo_val++) {
6061 switch (uplo_val) {
6062 case 0:
6063 uplo_type = blas_upper;
6064 break;
6065 case 1:
6066 uplo_type = blas_lower;
6067 break;
6068 }
6069
6070 /* no trans, trans, or conj trans */
6071 for (trans_val = 0; trans_val < 3; trans_val++) {
6072 switch (trans_val) {
6073 case 0:
6074 trans_type = blas_no_trans;
6075 break;
6076 case 1:
6077 trans_type = blas_trans;
6078 break;
6079 case 2:
6080 trans_type = blas_conj_trans;
6081 break;
6082 }
6083
6084 /* non_unit_diag, unit_diag */
6085 for (diag_val = 0; diag_val < 2; diag_val++) {
6086 switch (diag_val) {
6087 case 0:
6088 diag_type = blas_non_unit_diag;
6089 break;
6090 case 1:
6091 diag_type = blas_unit_diag;
6092 break;
6093 }
6094
6095 /* number of tests */
6096 for (i = 0; i < ntests; i++) {
6097 /* For the sake of speed, we throw out this case at random */
6098 if (xrand(seed) >= test_prob)
6099 continue;
6100
6101 BLAS_ztpmv_d_testgen(norm, order_type, uplo_type,
6102 trans_type, diag_type, n,
6103 &alpha, alpha_flag, tp, x_gen,
6104 seed, head_r_true, tail_r_true);
6105
6106 count++;
6107
6108 /* varying incx */
6109 for (incx_val = -2; incx_val <= 2; incx_val++) {
6110 if (incx_val == 0)
6111 continue;
6112
6113 /* setting incx */
6114 incx = incx_val;
6115 incx *= 2;
6116
6117 /* set x starting index */
6118 ix = 0;
6119 if (incx < 0)
6120 ix = -(n - 1) * incx;
6121
6122 /* copy x_gen to x */
6123 for (j = 0; j < n; j++) {
6124 x[ix] = x_gen[j * inc_index];
6125 x[ix + 1] = x_gen[j * inc_index + 1];
6126 ix += incx;
6127 }
6128
6129 /* call BLAS_ztpmv_d_x */
6130 FPU_FIX_STOP;
6131 BLAS_ztpmv_d_x(order_type, uplo_type, trans_type,
6132 diag_type, n, alpha, tp, x, incx_val,
6133 prec);
6134 FPU_FIX_START;
6135
6136 /* set x starting index */
6137 ix = 0;
6138 if (incx < 0)
6139 ix = -(n - 1) * incx;
6140
6141 /* computing the ratio */
6142 for (j = 0; j < n; j++) {
6143 /* copy row j of tp to temp */
6144 dtpmv_copy_row(order_type, uplo_type, trans_type, n,
6145 tp, temp, j);
6146
6147 test_BLAS_zdot_d_z(n, blas_no_conj,
6148 alpha, beta_zero_fake,
6149 rin_zero_fake, &x[ix],
6150 &head_r_true[j * inc_index],
6151 &tail_r_true[j * inc_index], temp,
6152 1, x_gen, 1, eps_int, un_int,
6153 &ratios[j]);
6154
6155 /* take the max ratio */
6156 if (j == 0)
6157 ratio = ratios[0];
6158 else if (ratios[j] > ratio)
6159 ratio = ratios[j];
6160
6161 ix += incx;
6162 }
6163
6164 /* increase the number of bad ratio, if the ratio is
6165 bigger than the threshold */
6166 if (ratio > thresh) {
6167 bad_ratios++;
6168
6169 if ((debug == 3) && /* print only when debug is on */
6170 (count != old_count) && /* print if old vector is different
6171 from the current one */
6172 (d_count == find_max_ratio) &&
6173 (p_count <= max_print) &&
6174 (ratio > 0.5 * ratio_max)) {
6175 p_count++;
6176 old_count = count;
6177
6178 printf
6179 ("FAIL> %s: n = %d, ntests = %d, threshold = %4.2f,\n",
6180 fname, n, ntests, thresh);
6181
6182 /* Print test info */
6183 switch (prec) {
6184 case blas_prec_single:
6185 printf("single ");
6186 break;
6187 case blas_prec_double:
6188 printf("double ");
6189 break;
6190 case blas_prec_indigenous:
6191 printf("indigenous ");
6192 break;
6193 case blas_prec_extra:
6194 printf("extra ");
6195 break;
6196 }
6197 switch (norm) {
6198 case -1:
6199 printf("near_underflow ");
6200 break;
6201 case 0:
6202 printf("near_one ");
6203 break;
6204 case 1:
6205 printf("near_overflow ");
6206 break;
6207 }
6208 switch (order_type) {
6209 case blas_rowmajor:
6210 printf("row_major ");
6211 break;
6212 case blas_colmajor:
6213 printf("col_major ");
6214 break;
6215 }
6216 switch (uplo_type) {
6217 case blas_upper:
6218 printf("upper ");
6219 break;
6220 case blas_lower:
6221 printf("lower ");
6222 break;
6223 }
6224 switch (trans_type) {
6225 case blas_no_trans:
6226 printf("no_trans ");
6227 break;
6228 case blas_trans:
6229 printf("trans ");
6230 break;
6231 case blas_conj_trans:
6232 printf("conj_trans ");
6233 break;
6234 }
6235 switch (diag_type) {
6236 case blas_non_unit_diag:
6237 printf("non_unit_diag ");
6238 break;
6239 case blas_unit_diag:
6240 printf("unit_diag ");
6241 break;
6242 }
6243
6244 printf(" incx=%d:\n", incx);
6245 ix = 0;
6246 if (incx < 0)
6247 ix = -(n - 1) * incx;
6248
6249 printf(" TP=");
6250 for (j = 0; j < n; j++) {
6251 /* copy row j of tp to temp */
6252 dtpmv_copy_row(order_type, uplo_type, trans_type,
6253 n, tp, temp, j);
6254
6255 if (j > 0)
6256 printf(" ");
6257 dprint_vector(temp, n, 1, NULL);
6258 }
6259
6260 ix = 0;
6261 if (incx < 0)
6262 ix = -(n - 1) * incx;
6263
6264 for (j = 0; j < n; j++) {
6265 printf(" ");
6266 printf("x[%d] = ", j * inc_index);
6267 printf("(%24.16e, %24.16e)", x_gen[j * inc_index],
6268 x_gen[j * inc_index + 1]);
6269 printf("; ");
6270 printf("x_final[%d] = ", ix);
6271 printf("(%24.16e, %24.16e)", x[ix], x[ix + 1]);
6272 printf("\n");
6273 ix += incx;
6274 }
6275
6276 printf(" ");
6277 printf("alpha = ");
6278 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
6279 printf("; ");
6280 printf("\n");
6281 for (j = 0; j < n; j++) {
6282 printf(" ");
6283
6284 printf("([%24.16e %24.16e], [%24.16e %24.16e])",
6285 head_r_true[j * inc_index],
6286 tail_r_true[j * inc_index],
6287 head_r_true[j * inc_index + 1],
6288 tail_r_true[j * inc_index + 1]);
6289 printf(", ratio[%d]=%.4e\n", j, ratios[j]);
6290 }
6291 printf(" ratio=%.4e\n", ratio);
6292 }
6293 }
6294 if (d_count == 0) {
6295 if (ratio > ratio_max)
6296 ratio_max = ratio;
6297 if (ratio != 0.0 && ratio < ratio_min)
6298 ratio_min = ratio;
6299 tot_tests++;
6300 }
6301 } /* incx */
6302
6303 } /* numtests */
6304 } /* diag */
6305 } /* trans */
6306 } /* uplo */
6307 } /* order */
6308 } /* norm */
6309 } /* prec */
6310 } /* alpha */
6311 } /* debug */
6312
6313 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
6314 printf(" %s: n = %d, ntests = %d, thresh = %4.2f\n",
6315 fname, n, ntests, thresh);
6316 printf
6317 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
6318 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
6319 ratio_min, ratio_max);
6320 }
6321
6322 FPU_FIX_STOP;
6323
6324 blas_free(x);
6325 blas_free(x_gen);
6326 blas_free(temp);
6327 blas_free(tp);
6328 blas_free(head_r_true);
6329 blas_free(tail_r_true);
6330 blas_free(ratios);
6331
6332 *min_ratio = ratio_min;
6333 *num_bad_ratio = bad_ratios;
6334 *num_tests = tot_tests;
6335 return ratio_max;
6336 } /* end of do_test_ztpmv_d_x */
6337
6338 #define NSIZES 12
6339
main(int argc,char ** argv)6340 int main(int argc, char **argv)
6341 {
6342 int nsizes, ntests, debug;
6343 double thresh, test_prob;
6344 double total_min_ratio, total_max_ratio;
6345 int total_bad_ratios;
6346 int seed, num_bad_ratio, num_tests;
6347 int total_tests, nr_failed_routines = 0, nr_routines = 0;
6348 double min_ratio, max_ratio;
6349 const char *base_routine = "tpmv";
6350 char *fname;
6351 int n;
6352
6353 int i;
6354 int sizes[NSIZES] = { 0, 1, 2, 3, 4, 5, 6, 10, 17, 28, 37, 58 };
6355
6356 if (argc != 6) {
6357 printf("Usage:\n");
6358 printf("do_test_tpmv <nsizes> <ntests> <thresh> <debug> <test_prob>\n");
6359 printf(" <nsizes>: number of sizes to be run.\n");
6360 printf
6361 (" <ntests>: the number of tests performed for each set of attributes\n");
6362 printf
6363 (" <thresh>: to catch bad ratios if it is greater than <thresh>\n");
6364 printf(" <debug>: 0, 1, 2, or 3; \n");
6365 printf(" if 0, no printing \n");
6366 printf(" if 1, print error summary only if tests fail\n");
6367 printf(" if 2, print error summary for each n\n");
6368 printf(" if 3, print complete info each test fails \n");
6369 printf("<test_prob>: probability of preforming a given \n");
6370 printf(" test case: 0.0 does no tests, 1.0 does all tests\n");
6371 return -1;
6372 } else {
6373 nsizes = atoi(argv[1]);
6374 ntests = atoi(argv[2]);
6375 thresh = atof(argv[3]);
6376 debug = atoi(argv[4]);
6377 test_prob = atof(argv[5]);
6378 }
6379
6380 seed = 1999;
6381
6382 if (nsizes < 0 || ntests < 0 || debug < 0 || debug > 3)
6383 BLAS_error("Testing tpmv", 0, 0, NULL);
6384
6385 printf("Testing %s...\n", base_routine);
6386 printf("INPUT: nsizes = %d, ntests = %d, thresh = %4.2f, debug = %d\n\n",
6387 nsizes, ntests, thresh, debug);
6388
6389
6390
6391
6392
6393 fname = "BLAS_dtpmv_s";
6394 printf("Testing %s...\n", fname);
6395 min_ratio = 1e308;
6396 max_ratio = 0.0;
6397 total_bad_ratios = 0;
6398 total_tests = 0;
6399 for (i = 0; i < nsizes; i++) {
6400
6401 n = sizes[i];
6402 total_max_ratio = do_test_dtpmv_s(n, ntests, &seed, thresh, debug,
6403 test_prob,
6404 &total_min_ratio, &num_bad_ratio,
6405 &num_tests);
6406 if (total_max_ratio > max_ratio)
6407 max_ratio = total_max_ratio;
6408
6409 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
6410 min_ratio = total_min_ratio;
6411
6412 total_bad_ratios += num_bad_ratio;
6413 total_tests += num_tests;
6414 }
6415
6416 nr_routines++;
6417 if (total_bad_ratios == 0)
6418 printf("PASS> ");
6419 else {
6420 nr_failed_routines++;
6421 printf("FAIL> ");
6422 }
6423
6424 if (min_ratio == 1e308)
6425 min_ratio = 0.0;
6426 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
6427 fname, total_bad_ratios, total_tests, max_ratio);
6428
6429 fname = "BLAS_ztpmv_c";
6430 printf("Testing %s...\n", fname);
6431 min_ratio = 1e308;
6432 max_ratio = 0.0;
6433 total_bad_ratios = 0;
6434 total_tests = 0;
6435 for (i = 0; i < nsizes; i++) {
6436
6437 n = sizes[i];
6438 total_max_ratio = do_test_ztpmv_c(n, ntests, &seed, thresh, debug,
6439 test_prob,
6440 &total_min_ratio, &num_bad_ratio,
6441 &num_tests);
6442 if (total_max_ratio > max_ratio)
6443 max_ratio = total_max_ratio;
6444
6445 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
6446 min_ratio = total_min_ratio;
6447
6448 total_bad_ratios += num_bad_ratio;
6449 total_tests += num_tests;
6450 }
6451
6452 nr_routines++;
6453 if (total_bad_ratios == 0)
6454 printf("PASS> ");
6455 else {
6456 nr_failed_routines++;
6457 printf("FAIL> ");
6458 }
6459
6460 if (min_ratio == 1e308)
6461 min_ratio = 0.0;
6462 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
6463 fname, total_bad_ratios, total_tests, max_ratio);
6464
6465 fname = "BLAS_ctpmv_s";
6466 printf("Testing %s...\n", fname);
6467 min_ratio = 1e308;
6468 max_ratio = 0.0;
6469 total_bad_ratios = 0;
6470 total_tests = 0;
6471 for (i = 0; i < nsizes; i++) {
6472
6473 n = sizes[i];
6474 total_max_ratio = do_test_ctpmv_s(n, ntests, &seed, thresh, debug,
6475 test_prob,
6476 &total_min_ratio, &num_bad_ratio,
6477 &num_tests);
6478 if (total_max_ratio > max_ratio)
6479 max_ratio = total_max_ratio;
6480
6481 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
6482 min_ratio = total_min_ratio;
6483
6484 total_bad_ratios += num_bad_ratio;
6485 total_tests += num_tests;
6486 }
6487
6488 nr_routines++;
6489 if (total_bad_ratios == 0)
6490 printf("PASS> ");
6491 else {
6492 nr_failed_routines++;
6493 printf("FAIL> ");
6494 }
6495
6496 if (min_ratio == 1e308)
6497 min_ratio = 0.0;
6498 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
6499 fname, total_bad_ratios, total_tests, max_ratio);
6500
6501 fname = "BLAS_ztpmv_d";
6502 printf("Testing %s...\n", fname);
6503 min_ratio = 1e308;
6504 max_ratio = 0.0;
6505 total_bad_ratios = 0;
6506 total_tests = 0;
6507 for (i = 0; i < nsizes; i++) {
6508
6509 n = sizes[i];
6510 total_max_ratio = do_test_ztpmv_d(n, ntests, &seed, thresh, debug,
6511 test_prob,
6512 &total_min_ratio, &num_bad_ratio,
6513 &num_tests);
6514 if (total_max_ratio > max_ratio)
6515 max_ratio = total_max_ratio;
6516
6517 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
6518 min_ratio = total_min_ratio;
6519
6520 total_bad_ratios += num_bad_ratio;
6521 total_tests += num_tests;
6522 }
6523
6524 nr_routines++;
6525 if (total_bad_ratios == 0)
6526 printf("PASS> ");
6527 else {
6528 nr_failed_routines++;
6529 printf("FAIL> ");
6530 }
6531
6532 if (min_ratio == 1e308)
6533 min_ratio = 0.0;
6534 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
6535 fname, total_bad_ratios, total_tests, max_ratio);
6536
6537 fname = "BLAS_stpmv_x";
6538 printf("Testing %s...\n", fname);
6539 min_ratio = 1e308;
6540 max_ratio = 0.0;
6541 total_bad_ratios = 0;
6542 total_tests = 0;
6543 for (i = 0; i < nsizes; i++) {
6544
6545 n = sizes[i];
6546 total_max_ratio = do_test_stpmv_x(n, ntests, &seed, thresh, debug,
6547 test_prob,
6548 &total_min_ratio, &num_bad_ratio,
6549 &num_tests);
6550 if (total_max_ratio > max_ratio)
6551 max_ratio = total_max_ratio;
6552
6553 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
6554 min_ratio = total_min_ratio;
6555
6556 total_bad_ratios += num_bad_ratio;
6557 total_tests += num_tests;
6558 }
6559
6560 nr_routines++;
6561 if (total_bad_ratios == 0)
6562 printf("PASS> ");
6563 else {
6564 nr_failed_routines++;
6565 printf("FAIL> ");
6566 }
6567
6568 if (min_ratio == 1e308)
6569 min_ratio = 0.0;
6570 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
6571 fname, total_bad_ratios, total_tests, max_ratio);
6572
6573 fname = "BLAS_dtpmv_x";
6574 printf("Testing %s...\n", fname);
6575 min_ratio = 1e308;
6576 max_ratio = 0.0;
6577 total_bad_ratios = 0;
6578 total_tests = 0;
6579 for (i = 0; i < nsizes; i++) {
6580
6581 n = sizes[i];
6582 total_max_ratio = do_test_dtpmv_x(n, ntests, &seed, thresh, debug,
6583 test_prob,
6584 &total_min_ratio, &num_bad_ratio,
6585 &num_tests);
6586 if (total_max_ratio > max_ratio)
6587 max_ratio = total_max_ratio;
6588
6589 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
6590 min_ratio = total_min_ratio;
6591
6592 total_bad_ratios += num_bad_ratio;
6593 total_tests += num_tests;
6594 }
6595
6596 nr_routines++;
6597 if (total_bad_ratios == 0)
6598 printf("PASS> ");
6599 else {
6600 nr_failed_routines++;
6601 printf("FAIL> ");
6602 }
6603
6604 if (min_ratio == 1e308)
6605 min_ratio = 0.0;
6606 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
6607 fname, total_bad_ratios, total_tests, max_ratio);
6608
6609 fname = "BLAS_ctpmv_x";
6610 printf("Testing %s...\n", fname);
6611 min_ratio = 1e308;
6612 max_ratio = 0.0;
6613 total_bad_ratios = 0;
6614 total_tests = 0;
6615 for (i = 0; i < nsizes; i++) {
6616
6617 n = sizes[i];
6618 total_max_ratio = do_test_ctpmv_x(n, ntests, &seed, thresh, debug,
6619 test_prob,
6620 &total_min_ratio, &num_bad_ratio,
6621 &num_tests);
6622 if (total_max_ratio > max_ratio)
6623 max_ratio = total_max_ratio;
6624
6625 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
6626 min_ratio = total_min_ratio;
6627
6628 total_bad_ratios += num_bad_ratio;
6629 total_tests += num_tests;
6630 }
6631
6632 nr_routines++;
6633 if (total_bad_ratios == 0)
6634 printf("PASS> ");
6635 else {
6636 nr_failed_routines++;
6637 printf("FAIL> ");
6638 }
6639
6640 if (min_ratio == 1e308)
6641 min_ratio = 0.0;
6642 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
6643 fname, total_bad_ratios, total_tests, max_ratio);
6644
6645 fname = "BLAS_ztpmv_x";
6646 printf("Testing %s...\n", fname);
6647 min_ratio = 1e308;
6648 max_ratio = 0.0;
6649 total_bad_ratios = 0;
6650 total_tests = 0;
6651 for (i = 0; i < nsizes; i++) {
6652
6653 n = sizes[i];
6654 total_max_ratio = do_test_ztpmv_x(n, ntests, &seed, thresh, debug,
6655 test_prob,
6656 &total_min_ratio, &num_bad_ratio,
6657 &num_tests);
6658 if (total_max_ratio > max_ratio)
6659 max_ratio = total_max_ratio;
6660
6661 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
6662 min_ratio = total_min_ratio;
6663
6664 total_bad_ratios += num_bad_ratio;
6665 total_tests += num_tests;
6666 }
6667
6668 nr_routines++;
6669 if (total_bad_ratios == 0)
6670 printf("PASS> ");
6671 else {
6672 nr_failed_routines++;
6673 printf("FAIL> ");
6674 }
6675
6676 if (min_ratio == 1e308)
6677 min_ratio = 0.0;
6678 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
6679 fname, total_bad_ratios, total_tests, max_ratio);
6680
6681 fname = "BLAS_dtpmv_s_x";
6682 printf("Testing %s...\n", fname);
6683 min_ratio = 1e308;
6684 max_ratio = 0.0;
6685 total_bad_ratios = 0;
6686 total_tests = 0;
6687 for (i = 0; i < nsizes; i++) {
6688
6689 n = sizes[i];
6690 total_max_ratio = do_test_dtpmv_s_x(n, ntests, &seed, thresh, debug,
6691 test_prob,
6692 &total_min_ratio, &num_bad_ratio,
6693 &num_tests);
6694 if (total_max_ratio > max_ratio)
6695 max_ratio = total_max_ratio;
6696
6697 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
6698 min_ratio = total_min_ratio;
6699
6700 total_bad_ratios += num_bad_ratio;
6701 total_tests += num_tests;
6702 }
6703
6704 nr_routines++;
6705 if (total_bad_ratios == 0)
6706 printf("PASS> ");
6707 else {
6708 nr_failed_routines++;
6709 printf("FAIL> ");
6710 }
6711
6712 if (min_ratio == 1e308)
6713 min_ratio = 0.0;
6714 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
6715 fname, total_bad_ratios, total_tests, max_ratio);
6716
6717 fname = "BLAS_ztpmv_c_x";
6718 printf("Testing %s...\n", fname);
6719 min_ratio = 1e308;
6720 max_ratio = 0.0;
6721 total_bad_ratios = 0;
6722 total_tests = 0;
6723 for (i = 0; i < nsizes; i++) {
6724
6725 n = sizes[i];
6726 total_max_ratio = do_test_ztpmv_c_x(n, ntests, &seed, thresh, debug,
6727 test_prob,
6728 &total_min_ratio, &num_bad_ratio,
6729 &num_tests);
6730 if (total_max_ratio > max_ratio)
6731 max_ratio = total_max_ratio;
6732
6733 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
6734 min_ratio = total_min_ratio;
6735
6736 total_bad_ratios += num_bad_ratio;
6737 total_tests += num_tests;
6738 }
6739
6740 nr_routines++;
6741 if (total_bad_ratios == 0)
6742 printf("PASS> ");
6743 else {
6744 nr_failed_routines++;
6745 printf("FAIL> ");
6746 }
6747
6748 if (min_ratio == 1e308)
6749 min_ratio = 0.0;
6750 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
6751 fname, total_bad_ratios, total_tests, max_ratio);
6752
6753 fname = "BLAS_ctpmv_s_x";
6754 printf("Testing %s...\n", fname);
6755 min_ratio = 1e308;
6756 max_ratio = 0.0;
6757 total_bad_ratios = 0;
6758 total_tests = 0;
6759 for (i = 0; i < nsizes; i++) {
6760
6761 n = sizes[i];
6762 total_max_ratio = do_test_ctpmv_s_x(n, ntests, &seed, thresh, debug,
6763 test_prob,
6764 &total_min_ratio, &num_bad_ratio,
6765 &num_tests);
6766 if (total_max_ratio > max_ratio)
6767 max_ratio = total_max_ratio;
6768
6769 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
6770 min_ratio = total_min_ratio;
6771
6772 total_bad_ratios += num_bad_ratio;
6773 total_tests += num_tests;
6774 }
6775
6776 nr_routines++;
6777 if (total_bad_ratios == 0)
6778 printf("PASS> ");
6779 else {
6780 nr_failed_routines++;
6781 printf("FAIL> ");
6782 }
6783
6784 if (min_ratio == 1e308)
6785 min_ratio = 0.0;
6786 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
6787 fname, total_bad_ratios, total_tests, max_ratio);
6788
6789 fname = "BLAS_ztpmv_d_x";
6790 printf("Testing %s...\n", fname);
6791 min_ratio = 1e308;
6792 max_ratio = 0.0;
6793 total_bad_ratios = 0;
6794 total_tests = 0;
6795 for (i = 0; i < nsizes; i++) {
6796
6797 n = sizes[i];
6798 total_max_ratio = do_test_ztpmv_d_x(n, ntests, &seed, thresh, debug,
6799 test_prob,
6800 &total_min_ratio, &num_bad_ratio,
6801 &num_tests);
6802 if (total_max_ratio > max_ratio)
6803 max_ratio = total_max_ratio;
6804
6805 if (total_min_ratio != 0.0 && total_min_ratio < min_ratio)
6806 min_ratio = total_min_ratio;
6807
6808 total_bad_ratios += num_bad_ratio;
6809 total_tests += num_tests;
6810 }
6811
6812 nr_routines++;
6813 if (total_bad_ratios == 0)
6814 printf("PASS> ");
6815 else {
6816 nr_failed_routines++;
6817 printf("FAIL> ");
6818 }
6819
6820 if (min_ratio == 1e308)
6821 min_ratio = 0.0;
6822 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
6823 fname, total_bad_ratios, total_tests, max_ratio);
6824
6825
6826
6827 printf("\n");
6828 if (nr_failed_routines)
6829 printf("FAILED ");
6830 else
6831 printf("PASSED ");
6832 printf("%-10s: FAIL/TOTAL = %d/%d\n",
6833 base_routine, nr_failed_routines, nr_routines);
6834
6835 return 0;
6836 }
6837