1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <math.h>
4 #include "blas_extended.h"
5 #include "blas_extended_private.h"
6 #include "blas_extended_test.h"
7
do_test_dgemv2_d_s(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)8 double do_test_dgemv2_d_s(int m, int n, int ntests, int *seed, double thresh,
9 int debug, float test_prob, double *min_ratio,
10 int *num_bad_ratio, int *num_tests)
11
12 /*
13 * Purpose
14 * =======
15 *
16 * Runs a series of tests on GEMV2.
17 *
18 * Arguments
19 * =========
20 *
21 * m (input) int
22 * The number of rows
23 *
24 * n (input) int
25 * The number of columns
26 *
27 * ntests (input) int
28 * The number of tests to run for each set of attributes.
29 *
30 * seed (input/output) int
31 * The seed for the random number generator used in testgen().
32 *
33 * thresh (input) double
34 * When the ratio returned from test() exceeds the specified
35 * threshold, the current size, r_true, r_comp, and ratio will be
36 * printed. (Since ratio is supposed to be O(1), we can set thresh
37 * to ~10.)
38 *
39 * debug (input) int
40 * If debug=3, print summary
41 * If debug=2, print summary only if the number of bad ratios > 0
42 * If debug=1, print complete info if tests fail
43 * If debug=0, return max ratio
44 *
45 * test_prob (input) float
46 * The specified test will be performed only if the generated
47 * random exceeds this threshold.
48 *
49 * min_ratio (output) double
50 * The minimum ratio
51 *
52 * num_bad_ratio (output) int
53 * The number of tests fail; they are above the threshold.
54 *
55 * num_tests (output) int
56 * The number of tests is being performed.
57 *
58 * Return value
59 * ============
60 *
61 * The maximum ratio if run successfully, otherwise return -1
62 *
63 * Code structure
64 * ==============
65 *
66 * debug loop -- if debug is one, the first loop computes the max ratio
67 * -- and the last(second) loop outputs debugging information,
68 * -- if the test fail and its ratio > 0.5 * max ratio.
69 * -- if debug is zero, the loop is executed once
70 * alpha loop -- varying alpha: 0, 1, or random
71 * beta loop -- varying beta: 0, 1, or random
72
73 * norm loop -- varying norm: near undeflow, near one, or
74 * -- near overflow
75 * numtest loop -- how many times the test is perform with
76 * -- above set of attributes
77 * order loop -- varying order type: rowmajor or colmajor
78 * trans loop -- varying uplo type: upper or lower
79 * lda loop -- varying lda: m, m+1, 2m
80 * incx loop -- varying incx: -2, -1, 1, 2
81 * incy loop -- varying incy: -2, -1, 1, 2
82 */
83 {
84 /* function name */
85 const char fname[] = "BLAS_dgemv2_d_s";
86
87 /* max number of debug lines to print */
88 const int max_print = 8;
89
90 /* Variables in the "x_val" form are loop vars for corresponding
91 variables */
92 int i; /* iterate through the repeating tests */
93 int j, k; /* multipurpose counters or variables */
94 int iy; /* use to index y */
95 int incx_val, incy_val, /* for testing different inc values */
96 incx, incy;
97 int incy_gen; /* for complex case inc=2, for real case inc=1 */
98 int d_count; /* counter for debug */
99 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
100 int p_count; /* counter for the number of debug lines printed */
101 int tot_tests; /* total number of tests to be done */
102 int norm; /* input values of near underflow/one/overflow */
103 double ratio_max; /* the current maximum ratio */
104 double ratio_min; /* the current minimum ratio */
105 double *ratios; /* a temporary variable for calculating ratio */
106 double ratio; /* the per-use test ratio from test() */
107 int bad_ratios; /* the number of ratios over the threshold */
108 double eps_int; /* the internal epsilon expected--2^(-24) for float */
109 double un_int; /* the internal underflow threshold */
110 double alpha;
111 double beta;
112 double *A;
113 float *head_x;
114 float *tail_x;
115 double *y;
116 double *temp; /* use for calculating ratio */
117
118 /* x_gen and y_gen are used to store vectors generated by testgen.
119 they eventually are copied back to x and y */
120 float *head_x_gen;
121 float *tail_x_gen;
122 double *y_gen;
123
124 /* the true r calculated by testgen(), in double-double */
125 double *head_r_true, *tail_r_true;
126 int alpha_val;
127 int alpha_flag; /* input flag for BLAS_dgemv2_d_s_testgen */
128 int beta_val;
129 int beta_flag; /* input flag for BLAS_dgemv2_d_s_testgen */
130 int order_val;
131 enum blas_order_type order_type;
132
133 enum blas_prec_type prec;
134 int trans_val;
135 enum blas_trans_type trans_type;
136 int m_i;
137 int n_i;
138 int max_mn; /* the max of m and n */
139 int lda_val;
140 int lda;
141 int saved_seed; /* for saving the original seed */
142 int count, old_count; /* use for counting the number of testgen calls * 2 */
143
144 FPU_FIX_DECL;
145
146 /* test for bad arguments */
147 if (n < 0 || m < 0 || ntests < 0)
148 BLAS_error(fname, 0, 0, NULL);
149
150 /* initialization */
151 *num_bad_ratio = 0;
152 *num_tests = 0;
153 *min_ratio = 0.0;
154
155 saved_seed = *seed;
156 ratio_min = 1e308;
157 ratio_max = 0.0;
158 ratio = 0.0;
159 tot_tests = 0;
160 p_count = 0;
161 count = 0;
162 find_max_ratio = 0;
163 bad_ratios = 0;
164 old_count = 0;
165
166 if (debug == 3)
167 find_max_ratio = 1;
168 max_mn = MAX(m, n);
169 if (m == 0 || n == 0) {
170 return 0.0;
171 }
172
173 FPU_FIX_START;
174
175 incy_gen = 1;
176
177
178 /* get space for calculation */
179 head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
180 if (max_mn * 2 > 0 && head_x == NULL) {
181 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
182 }
183 tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
184 if (max_mn * 2 > 0 && tail_x == NULL) {
185 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
186 }
187 y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
188 if (max_mn * 2 > 0 && y == NULL) {
189 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
190 }
191 head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
192 if (max_mn > 0 && head_x_gen == NULL) {
193 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
194 }
195 tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
196 if (max_mn > 0 && tail_x_gen == NULL) {
197 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
198 }
199 y_gen = (double *) blas_malloc(max_mn * sizeof(double));
200 if (max_mn > 0 && y_gen == NULL) {
201 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
202 }
203 temp = (double *) blas_malloc(max_mn * sizeof(double));
204 if (max_mn > 0 && temp == NULL) {
205 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
206 }
207 head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
208 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
209 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
210 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
211 }
212 ratios = (double *) blas_malloc(max_mn * sizeof(double));
213 if (max_mn > 0 && ratios == NULL) {
214 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
215 }
216 A =
217 (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
218 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
219 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
220 }
221
222 /* The debug iteration:
223 If debug=1, then will execute the iteration twice. First, compute the
224 max ratio. Second, print info if ratio > (50% * ratio_max). */
225 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
226 bad_ratios = 0; /* set to zero */
227
228 if ((debug == 3) && (d_count == find_max_ratio))
229 *seed = saved_seed; /* restore the original seed */
230
231 /* varying alpha */
232 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
233 alpha_flag = 0;
234 switch (alpha_val) {
235 case 0:
236 alpha = 0.0;
237 alpha_flag = 1;
238 break;
239 case 1:
240 alpha = 1.0;
241 alpha_flag = 1;
242 break;
243 }
244
245 /* varying beta */
246 for (beta_val = 0; beta_val < 3; beta_val++) {
247 beta_flag = 0;
248 switch (beta_val) {
249 case 0:
250 beta = 0.0;
251 beta_flag = 1;
252 break;
253 case 1:
254 beta = 1.0;
255 beta_flag = 1;
256 break;
257 }
258
259
260 eps_int = power(2, -BITS_D);
261 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
262 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
263 prec = blas_prec_double;
264
265 /* values near underflow, 1, or overflow */
266 for (norm = -1; norm <= 1; norm++) {
267
268 /* number of tests */
269 for (i = 0; i < ntests; i++) {
270
271 /* row or col major */
272 for (order_val = 0; order_val < 2; order_val++) {
273 switch (order_val) {
274 case 0:
275 order_type = blas_rowmajor;
276 break;
277 case 1:
278 default:
279 order_type = blas_colmajor;
280 break;
281 }
282
283 /* no_trans, trans, or conj_trans */
284 for (trans_val = 0; trans_val < 3; trans_val++) {
285 switch (trans_val) {
286 case 0:
287 trans_type = blas_no_trans;
288 m_i = m;
289 n_i = n;
290 break;
291 case 1:
292 trans_type = blas_trans;
293 m_i = n;
294 n_i = m;
295 break;
296 case 2:
297 default:
298 trans_type = blas_conj_trans;
299 m_i = n;
300 n_i = m;
301 break;
302 }
303
304 /* lda=n, n+1, or 2n */
305 for (lda_val = 0; lda_val < 3; lda_val++) {
306 switch (lda_val) {
307 case 0:
308 lda = m_i;
309 break;
310 case 1:
311 lda = m_i + 1;
312 break;
313 case 2:
314 default:
315 lda = 2 * m_i;
316 break;
317 }
318 if ((order_type == blas_rowmajor && lda < n) ||
319 (order_type == blas_colmajor && lda < m))
320 continue;
321
322 /* For the sake of speed, we throw out this case at random */
323 if (xrand(seed) >= test_prob)
324 continue;
325
326 /* in the trivial cases, no need to run testgen */
327 if (m > 0 && n > 0)
328 BLAS_dgemv2_d_s_testgen(norm, order_type, trans_type, m,
329 n, &alpha, alpha_flag, A, lda,
330 head_x_gen, tail_x_gen, &beta,
331 beta_flag, y_gen, seed,
332 head_r_true, tail_r_true);
333
334 count++;
335
336 /* varying incx */
337 for (incx_val = -2; incx_val <= 2; incx_val++) {
338 if (incx_val == 0)
339 continue;
340
341 /* setting incx */
342 incx = incx_val;
343
344
345 scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
346 scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
347
348 /* varying incy */
349 for (incy_val = -2; incy_val <= 2; incy_val++) {
350 if (incy_val == 0)
351 continue;
352
353 /* setting incy */
354 incy = incy_val;
355
356
357 dcopy_vector(y_gen, m_i, 1, y, incy_val);
358
359 /* call BLAS_dgemv2_d_s */
360 FPU_FIX_STOP;
361 BLAS_dgemv2_d_s(order_type, trans_type, m, n, alpha, A,
362 lda, head_x, tail_x, incx_val, beta, y,
363 incy_val);
364 FPU_FIX_START;
365
366 /* set y starting index */
367 iy = 0;
368 if (incy < 0)
369 iy = -(m_i - 1) * incy;
370
371 /* computing the ratio */
372 if (m > 0 && n > 0)
373 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
374 /* copy row j of A to temp */
375 dge_copy_row(order_type, trans_type, m_i, n_i, A,
376 lda, temp, j);
377
378 test_BLAS_ddot2_d_s(n_i, blas_no_conj, alpha, beta,
379 y_gen[k], y[iy], head_r_true[k],
380 tail_r_true[k], temp, 1, head_x,
381 tail_x, incx_val, eps_int,
382 un_int, &ratios[j]);
383
384 /* take the max ratio */
385 if (j == 0) {
386 ratio = ratios[0];
387 /* The !<= below causes NaN error to be detected.
388 Note that (NaN > thresh) is always false. */
389 } else if (!(ratios[j] <= ratio)) {
390 ratio = ratios[j];
391 }
392 iy += incy;
393 }
394
395 /* Increase the number of bad ratio, if the ratio
396 is bigger than the threshold.
397 The !<= below causes NaN error to be detected.
398 Note that (NaN > thresh) is always false. */
399 if (!(ratio <= thresh)) {
400 bad_ratios++;
401
402 if ((debug == 3) && /* print only when debug is on */
403 (count != old_count) && /* print if old vector is different
404 from the current one */
405 (d_count == find_max_ratio) &&
406 (p_count <= max_print) &&
407 (ratio > 0.5 * ratio_max)) {
408 old_count = count;
409
410 printf
411 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
412 fname, m, n, ntests, thresh);
413
414 /* Print test info */
415 switch (prec) {
416 case blas_prec_single:
417 printf("single ");
418 break;
419 case blas_prec_double:
420 printf("double ");
421 break;
422 case blas_prec_indigenous:
423 printf("indigenous ");
424 break;
425 case blas_prec_extra:
426 printf("extra ");
427 break;
428 }
429 switch (norm) {
430 case -1:
431 printf("near_underflow ");
432 break;
433 case 0:
434 printf("near_one ");
435 break;
436 case 1:
437 printf("near_overflow ");
438 break;
439 }
440 switch (order_type) {
441 case blas_rowmajor:
442 printf("row_major ");
443 break;
444 case blas_colmajor:
445 printf("col_major ");
446 break;
447 }
448 switch (trans_type) {
449 case blas_no_trans:
450 printf("no_trans ");
451 break;
452 case blas_trans:
453 printf("trans ");
454 break;
455 case blas_conj_trans:
456 printf("conj_trans ");
457 break;
458 }
459
460 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
461 incy);
462
463 dge_print_matrix(A, m_i, n_i, lda, order_type, "A");
464
465 sprint_vector(head_x, n_i, incx_val, "head_x");
466 sprint_vector(tail_x, n_i, incx_val, "tail_x");
467 dprint_vector(y_gen, m_i, 1, "y_gen");
468 dprint_vector(y, m_i, incy_val, "y_final");
469
470 printf(" ");
471 printf("alpha = ");
472 printf("%24.16e", alpha);
473 printf("\n ");
474 printf("beta = ");
475 printf("%24.16e", beta);
476 printf("\n");
477 for (j = 0, k = 0; j < m_i * incy_gen;
478 j += incy_gen, k++) {
479 printf(" ");
480 printf("[%24.16e, %24.16e]", head_r_true[j],
481 tail_r_true[j]);
482 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
483 }
484
485 printf(" ratio=%.4e\n", ratio);
486 p_count++;
487 }
488 if (bad_ratios >= MAX_BAD_TESTS) {
489 printf("\ntoo many failures, exiting....");
490 printf("\nTesting and compilation");
491 printf(" are incomplete\n\n");
492 goto end;
493 }
494 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
495 printf("\nFlagrant ratio error, exiting...");
496 printf("\nTesting and compilation");
497 printf(" are incomplete\n\n");
498 goto end;
499 }
500 }
501 if (d_count == 0) {
502 if (ratio > ratio_max)
503 ratio_max = ratio;
504
505 if (ratio != 0.0 && ratio < ratio_min)
506 ratio_min = ratio;
507
508 tot_tests++;
509 }
510 } /* incy */
511 } /* incx */
512 } /* lda */
513 } /* trans */
514 } /* order */
515 } /* tests */
516 } /* norm */
517
518 } /* beta */
519 } /* alpha */
520 } /* debug */
521
522 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
523 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
524 fname, m, n, ntests, thresh);
525 printf
526 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
527 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
528 ratio_min, ratio_max);
529 }
530
531 end:
532 FPU_FIX_STOP;
533
534 blas_free(head_x);
535 blas_free(tail_x);
536 blas_free(y);
537 blas_free(head_x_gen);
538 blas_free(tail_x_gen);
539 blas_free(y_gen);
540 blas_free(temp);
541 blas_free(A);
542 blas_free(head_r_true);
543 blas_free(tail_r_true);
544 blas_free(ratios);
545
546 *min_ratio = ratio_min;
547 *num_bad_ratio = bad_ratios;
548 *num_tests = tot_tests;
549 return ratio_max;
550 }
do_test_dgemv2_s_d(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)551 double do_test_dgemv2_s_d(int m, int n, int ntests, int *seed, double thresh,
552 int debug, float test_prob, double *min_ratio,
553 int *num_bad_ratio, int *num_tests)
554
555 /*
556 * Purpose
557 * =======
558 *
559 * Runs a series of tests on GEMV2.
560 *
561 * Arguments
562 * =========
563 *
564 * m (input) int
565 * The number of rows
566 *
567 * n (input) int
568 * The number of columns
569 *
570 * ntests (input) int
571 * The number of tests to run for each set of attributes.
572 *
573 * seed (input/output) int
574 * The seed for the random number generator used in testgen().
575 *
576 * thresh (input) double
577 * When the ratio returned from test() exceeds the specified
578 * threshold, the current size, r_true, r_comp, and ratio will be
579 * printed. (Since ratio is supposed to be O(1), we can set thresh
580 * to ~10.)
581 *
582 * debug (input) int
583 * If debug=3, print summary
584 * If debug=2, print summary only if the number of bad ratios > 0
585 * If debug=1, print complete info if tests fail
586 * If debug=0, return max ratio
587 *
588 * test_prob (input) float
589 * The specified test will be performed only if the generated
590 * random exceeds this threshold.
591 *
592 * min_ratio (output) double
593 * The minimum ratio
594 *
595 * num_bad_ratio (output) int
596 * The number of tests fail; they are above the threshold.
597 *
598 * num_tests (output) int
599 * The number of tests is being performed.
600 *
601 * Return value
602 * ============
603 *
604 * The maximum ratio if run successfully, otherwise return -1
605 *
606 * Code structure
607 * ==============
608 *
609 * debug loop -- if debug is one, the first loop computes the max ratio
610 * -- and the last(second) loop outputs debugging information,
611 * -- if the test fail and its ratio > 0.5 * max ratio.
612 * -- if debug is zero, the loop is executed once
613 * alpha loop -- varying alpha: 0, 1, or random
614 * beta loop -- varying beta: 0, 1, or random
615
616 * norm loop -- varying norm: near undeflow, near one, or
617 * -- near overflow
618 * numtest loop -- how many times the test is perform with
619 * -- above set of attributes
620 * order loop -- varying order type: rowmajor or colmajor
621 * trans loop -- varying uplo type: upper or lower
622 * lda loop -- varying lda: m, m+1, 2m
623 * incx loop -- varying incx: -2, -1, 1, 2
624 * incy loop -- varying incy: -2, -1, 1, 2
625 */
626 {
627 /* function name */
628 const char fname[] = "BLAS_dgemv2_s_d";
629
630 /* max number of debug lines to print */
631 const int max_print = 8;
632
633 /* Variables in the "x_val" form are loop vars for corresponding
634 variables */
635 int i; /* iterate through the repeating tests */
636 int j, k; /* multipurpose counters or variables */
637 int iy; /* use to index y */
638 int incx_val, incy_val, /* for testing different inc values */
639 incx, incy;
640 int incy_gen; /* for complex case inc=2, for real case inc=1 */
641 int d_count; /* counter for debug */
642 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
643 int p_count; /* counter for the number of debug lines printed */
644 int tot_tests; /* total number of tests to be done */
645 int norm; /* input values of near underflow/one/overflow */
646 double ratio_max; /* the current maximum ratio */
647 double ratio_min; /* the current minimum ratio */
648 double *ratios; /* a temporary variable for calculating ratio */
649 double ratio; /* the per-use test ratio from test() */
650 int bad_ratios; /* the number of ratios over the threshold */
651 double eps_int; /* the internal epsilon expected--2^(-24) for float */
652 double un_int; /* the internal underflow threshold */
653 double alpha;
654 double beta;
655 float *A;
656 double *head_x;
657 double *tail_x;
658 double *y;
659 float *temp; /* use for calculating ratio */
660
661 /* x_gen and y_gen are used to store vectors generated by testgen.
662 they eventually are copied back to x and y */
663 double *head_x_gen;
664 double *tail_x_gen;
665 double *y_gen;
666
667 /* the true r calculated by testgen(), in double-double */
668 double *head_r_true, *tail_r_true;
669 int alpha_val;
670 int alpha_flag; /* input flag for BLAS_dgemv2_s_d_testgen */
671 int beta_val;
672 int beta_flag; /* input flag for BLAS_dgemv2_s_d_testgen */
673 int order_val;
674 enum blas_order_type order_type;
675
676 enum blas_prec_type prec;
677 int trans_val;
678 enum blas_trans_type trans_type;
679 int m_i;
680 int n_i;
681 int max_mn; /* the max of m and n */
682 int lda_val;
683 int lda;
684 int saved_seed; /* for saving the original seed */
685 int count, old_count; /* use for counting the number of testgen calls * 2 */
686
687 FPU_FIX_DECL;
688
689 /* test for bad arguments */
690 if (n < 0 || m < 0 || ntests < 0)
691 BLAS_error(fname, 0, 0, NULL);
692
693 /* initialization */
694 *num_bad_ratio = 0;
695 *num_tests = 0;
696 *min_ratio = 0.0;
697
698 saved_seed = *seed;
699 ratio_min = 1e308;
700 ratio_max = 0.0;
701 ratio = 0.0;
702 tot_tests = 0;
703 p_count = 0;
704 count = 0;
705 find_max_ratio = 0;
706 bad_ratios = 0;
707 old_count = 0;
708
709 if (debug == 3)
710 find_max_ratio = 1;
711 max_mn = MAX(m, n);
712 if (m == 0 || n == 0) {
713 return 0.0;
714 }
715
716 FPU_FIX_START;
717
718 incy_gen = 1;
719
720
721 /* get space for calculation */
722 head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
723 if (max_mn * 2 > 0 && head_x == NULL) {
724 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
725 }
726 tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
727 if (max_mn * 2 > 0 && tail_x == NULL) {
728 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
729 }
730 y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
731 if (max_mn * 2 > 0 && y == NULL) {
732 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
733 }
734 head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
735 if (max_mn > 0 && head_x_gen == NULL) {
736 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
737 }
738 tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
739 if (max_mn > 0 && tail_x_gen == NULL) {
740 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
741 }
742 y_gen = (double *) blas_malloc(max_mn * sizeof(double));
743 if (max_mn > 0 && y_gen == NULL) {
744 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
745 }
746 temp = (float *) blas_malloc(max_mn * sizeof(float));
747 if (max_mn > 0 && temp == NULL) {
748 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
749 }
750 head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
751 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
752 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
753 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
754 }
755 ratios = (double *) blas_malloc(max_mn * sizeof(double));
756 if (max_mn > 0 && ratios == NULL) {
757 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
758 }
759 A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
760 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
761 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
762 }
763
764 /* The debug iteration:
765 If debug=1, then will execute the iteration twice. First, compute the
766 max ratio. Second, print info if ratio > (50% * ratio_max). */
767 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
768 bad_ratios = 0; /* set to zero */
769
770 if ((debug == 3) && (d_count == find_max_ratio))
771 *seed = saved_seed; /* restore the original seed */
772
773 /* varying alpha */
774 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
775 alpha_flag = 0;
776 switch (alpha_val) {
777 case 0:
778 alpha = 0.0;
779 alpha_flag = 1;
780 break;
781 case 1:
782 alpha = 1.0;
783 alpha_flag = 1;
784 break;
785 }
786
787 /* varying beta */
788 for (beta_val = 0; beta_val < 3; beta_val++) {
789 beta_flag = 0;
790 switch (beta_val) {
791 case 0:
792 beta = 0.0;
793 beta_flag = 1;
794 break;
795 case 1:
796 beta = 1.0;
797 beta_flag = 1;
798 break;
799 }
800
801
802 eps_int = power(2, -BITS_D);
803 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
804 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
805 prec = blas_prec_double;
806
807 /* values near underflow, 1, or overflow */
808 for (norm = -1; norm <= 1; norm++) {
809
810 /* number of tests */
811 for (i = 0; i < ntests; i++) {
812
813 /* row or col major */
814 for (order_val = 0; order_val < 2; order_val++) {
815 switch (order_val) {
816 case 0:
817 order_type = blas_rowmajor;
818 break;
819 case 1:
820 default:
821 order_type = blas_colmajor;
822 break;
823 }
824
825 /* no_trans, trans, or conj_trans */
826 for (trans_val = 0; trans_val < 3; trans_val++) {
827 switch (trans_val) {
828 case 0:
829 trans_type = blas_no_trans;
830 m_i = m;
831 n_i = n;
832 break;
833 case 1:
834 trans_type = blas_trans;
835 m_i = n;
836 n_i = m;
837 break;
838 case 2:
839 default:
840 trans_type = blas_conj_trans;
841 m_i = n;
842 n_i = m;
843 break;
844 }
845
846 /* lda=n, n+1, or 2n */
847 for (lda_val = 0; lda_val < 3; lda_val++) {
848 switch (lda_val) {
849 case 0:
850 lda = m_i;
851 break;
852 case 1:
853 lda = m_i + 1;
854 break;
855 case 2:
856 default:
857 lda = 2 * m_i;
858 break;
859 }
860 if ((order_type == blas_rowmajor && lda < n) ||
861 (order_type == blas_colmajor && lda < m))
862 continue;
863
864 /* For the sake of speed, we throw out this case at random */
865 if (xrand(seed) >= test_prob)
866 continue;
867
868 /* in the trivial cases, no need to run testgen */
869 if (m > 0 && n > 0)
870 BLAS_dgemv2_s_d_testgen(norm, order_type, trans_type, m,
871 n, &alpha, alpha_flag, A, lda,
872 head_x_gen, tail_x_gen, &beta,
873 beta_flag, y_gen, seed,
874 head_r_true, tail_r_true);
875
876 count++;
877
878 /* varying incx */
879 for (incx_val = -2; incx_val <= 2; incx_val++) {
880 if (incx_val == 0)
881 continue;
882
883 /* setting incx */
884 incx = incx_val;
885
886
887 dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
888 dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
889
890 /* varying incy */
891 for (incy_val = -2; incy_val <= 2; incy_val++) {
892 if (incy_val == 0)
893 continue;
894
895 /* setting incy */
896 incy = incy_val;
897
898
899 dcopy_vector(y_gen, m_i, 1, y, incy_val);
900
901 /* call BLAS_dgemv2_s_d */
902 FPU_FIX_STOP;
903 BLAS_dgemv2_s_d(order_type, trans_type, m, n, alpha, A,
904 lda, head_x, tail_x, incx_val, beta, y,
905 incy_val);
906 FPU_FIX_START;
907
908 /* set y starting index */
909 iy = 0;
910 if (incy < 0)
911 iy = -(m_i - 1) * incy;
912
913 /* computing the ratio */
914 if (m > 0 && n > 0)
915 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
916 /* copy row j of A to temp */
917 sge_copy_row(order_type, trans_type, m_i, n_i, A,
918 lda, temp, j);
919
920 test_BLAS_ddot2_s_d(n_i, blas_no_conj, alpha, beta,
921 y_gen[k], y[iy], head_r_true[k],
922 tail_r_true[k], temp, 1, head_x,
923 tail_x, incx_val, eps_int,
924 un_int, &ratios[j]);
925
926 /* take the max ratio */
927 if (j == 0) {
928 ratio = ratios[0];
929 /* The !<= below causes NaN error to be detected.
930 Note that (NaN > thresh) is always false. */
931 } else if (!(ratios[j] <= ratio)) {
932 ratio = ratios[j];
933 }
934 iy += incy;
935 }
936
937 /* Increase the number of bad ratio, if the ratio
938 is bigger than the threshold.
939 The !<= below causes NaN error to be detected.
940 Note that (NaN > thresh) is always false. */
941 if (!(ratio <= thresh)) {
942 bad_ratios++;
943
944 if ((debug == 3) && /* print only when debug is on */
945 (count != old_count) && /* print if old vector is different
946 from the current one */
947 (d_count == find_max_ratio) &&
948 (p_count <= max_print) &&
949 (ratio > 0.5 * ratio_max)) {
950 old_count = count;
951
952 printf
953 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
954 fname, m, n, ntests, thresh);
955
956 /* Print test info */
957 switch (prec) {
958 case blas_prec_single:
959 printf("single ");
960 break;
961 case blas_prec_double:
962 printf("double ");
963 break;
964 case blas_prec_indigenous:
965 printf("indigenous ");
966 break;
967 case blas_prec_extra:
968 printf("extra ");
969 break;
970 }
971 switch (norm) {
972 case -1:
973 printf("near_underflow ");
974 break;
975 case 0:
976 printf("near_one ");
977 break;
978 case 1:
979 printf("near_overflow ");
980 break;
981 }
982 switch (order_type) {
983 case blas_rowmajor:
984 printf("row_major ");
985 break;
986 case blas_colmajor:
987 printf("col_major ");
988 break;
989 }
990 switch (trans_type) {
991 case blas_no_trans:
992 printf("no_trans ");
993 break;
994 case blas_trans:
995 printf("trans ");
996 break;
997 case blas_conj_trans:
998 printf("conj_trans ");
999 break;
1000 }
1001
1002 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
1003 incy);
1004
1005 sge_print_matrix(A, m_i, n_i, lda, order_type, "A");
1006
1007 dprint_vector(head_x, n_i, incx_val, "head_x");
1008 dprint_vector(tail_x, n_i, incx_val, "tail_x");
1009 dprint_vector(y_gen, m_i, 1, "y_gen");
1010 dprint_vector(y, m_i, incy_val, "y_final");
1011
1012 printf(" ");
1013 printf("alpha = ");
1014 printf("%24.16e", alpha);
1015 printf("\n ");
1016 printf("beta = ");
1017 printf("%24.16e", beta);
1018 printf("\n");
1019 for (j = 0, k = 0; j < m_i * incy_gen;
1020 j += incy_gen, k++) {
1021 printf(" ");
1022 printf("[%24.16e, %24.16e]", head_r_true[j],
1023 tail_r_true[j]);
1024 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
1025 }
1026
1027 printf(" ratio=%.4e\n", ratio);
1028 p_count++;
1029 }
1030 if (bad_ratios >= MAX_BAD_TESTS) {
1031 printf("\ntoo many failures, exiting....");
1032 printf("\nTesting and compilation");
1033 printf(" are incomplete\n\n");
1034 goto end;
1035 }
1036 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
1037 printf("\nFlagrant ratio error, exiting...");
1038 printf("\nTesting and compilation");
1039 printf(" are incomplete\n\n");
1040 goto end;
1041 }
1042 }
1043 if (d_count == 0) {
1044 if (ratio > ratio_max)
1045 ratio_max = ratio;
1046
1047 if (ratio != 0.0 && ratio < ratio_min)
1048 ratio_min = ratio;
1049
1050 tot_tests++;
1051 }
1052 } /* incy */
1053 } /* incx */
1054 } /* lda */
1055 } /* trans */
1056 } /* order */
1057 } /* tests */
1058 } /* norm */
1059
1060 } /* beta */
1061 } /* alpha */
1062 } /* debug */
1063
1064 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
1065 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
1066 fname, m, n, ntests, thresh);
1067 printf
1068 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
1069 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
1070 ratio_min, ratio_max);
1071 }
1072
1073 end:
1074 FPU_FIX_STOP;
1075
1076 blas_free(head_x);
1077 blas_free(tail_x);
1078 blas_free(y);
1079 blas_free(head_x_gen);
1080 blas_free(tail_x_gen);
1081 blas_free(y_gen);
1082 blas_free(temp);
1083 blas_free(A);
1084 blas_free(head_r_true);
1085 blas_free(tail_r_true);
1086 blas_free(ratios);
1087
1088 *min_ratio = ratio_min;
1089 *num_bad_ratio = bad_ratios;
1090 *num_tests = tot_tests;
1091 return ratio_max;
1092 }
do_test_dgemv2_s_s(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)1093 double do_test_dgemv2_s_s(int m, int n, int ntests, int *seed, double thresh,
1094 int debug, float test_prob, double *min_ratio,
1095 int *num_bad_ratio, int *num_tests)
1096
1097 /*
1098 * Purpose
1099 * =======
1100 *
1101 * Runs a series of tests on GEMV2.
1102 *
1103 * Arguments
1104 * =========
1105 *
1106 * m (input) int
1107 * The number of rows
1108 *
1109 * n (input) int
1110 * The number of columns
1111 *
1112 * ntests (input) int
1113 * The number of tests to run for each set of attributes.
1114 *
1115 * seed (input/output) int
1116 * The seed for the random number generator used in testgen().
1117 *
1118 * thresh (input) double
1119 * When the ratio returned from test() exceeds the specified
1120 * threshold, the current size, r_true, r_comp, and ratio will be
1121 * printed. (Since ratio is supposed to be O(1), we can set thresh
1122 * to ~10.)
1123 *
1124 * debug (input) int
1125 * If debug=3, print summary
1126 * If debug=2, print summary only if the number of bad ratios > 0
1127 * If debug=1, print complete info if tests fail
1128 * If debug=0, return max ratio
1129 *
1130 * test_prob (input) float
1131 * The specified test will be performed only if the generated
1132 * random exceeds this threshold.
1133 *
1134 * min_ratio (output) double
1135 * The minimum ratio
1136 *
1137 * num_bad_ratio (output) int
1138 * The number of tests fail; they are above the threshold.
1139 *
1140 * num_tests (output) int
1141 * The number of tests is being performed.
1142 *
1143 * Return value
1144 * ============
1145 *
1146 * The maximum ratio if run successfully, otherwise return -1
1147 *
1148 * Code structure
1149 * ==============
1150 *
1151 * debug loop -- if debug is one, the first loop computes the max ratio
1152 * -- and the last(second) loop outputs debugging information,
1153 * -- if the test fail and its ratio > 0.5 * max ratio.
1154 * -- if debug is zero, the loop is executed once
1155 * alpha loop -- varying alpha: 0, 1, or random
1156 * beta loop -- varying beta: 0, 1, or random
1157
1158 * norm loop -- varying norm: near undeflow, near one, or
1159 * -- near overflow
1160 * numtest loop -- how many times the test is perform with
1161 * -- above set of attributes
1162 * order loop -- varying order type: rowmajor or colmajor
1163 * trans loop -- varying uplo type: upper or lower
1164 * lda loop -- varying lda: m, m+1, 2m
1165 * incx loop -- varying incx: -2, -1, 1, 2
1166 * incy loop -- varying incy: -2, -1, 1, 2
1167 */
1168 {
1169 /* function name */
1170 const char fname[] = "BLAS_dgemv2_s_s";
1171
1172 /* max number of debug lines to print */
1173 const int max_print = 8;
1174
1175 /* Variables in the "x_val" form are loop vars for corresponding
1176 variables */
1177 int i; /* iterate through the repeating tests */
1178 int j, k; /* multipurpose counters or variables */
1179 int iy; /* use to index y */
1180 int incx_val, incy_val, /* for testing different inc values */
1181 incx, incy;
1182 int incy_gen; /* for complex case inc=2, for real case inc=1 */
1183 int d_count; /* counter for debug */
1184 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
1185 int p_count; /* counter for the number of debug lines printed */
1186 int tot_tests; /* total number of tests to be done */
1187 int norm; /* input values of near underflow/one/overflow */
1188 double ratio_max; /* the current maximum ratio */
1189 double ratio_min; /* the current minimum ratio */
1190 double *ratios; /* a temporary variable for calculating ratio */
1191 double ratio; /* the per-use test ratio from test() */
1192 int bad_ratios; /* the number of ratios over the threshold */
1193 double eps_int; /* the internal epsilon expected--2^(-24) for float */
1194 double un_int; /* the internal underflow threshold */
1195 double alpha;
1196 double beta;
1197 float *A;
1198 float *head_x;
1199 float *tail_x;
1200 double *y;
1201 float *temp; /* use for calculating ratio */
1202
1203 /* x_gen and y_gen are used to store vectors generated by testgen.
1204 they eventually are copied back to x and y */
1205 float *head_x_gen;
1206 float *tail_x_gen;
1207 double *y_gen;
1208
1209 /* the true r calculated by testgen(), in double-double */
1210 double *head_r_true, *tail_r_true;
1211 int alpha_val;
1212 int alpha_flag; /* input flag for BLAS_dgemv2_s_s_testgen */
1213 int beta_val;
1214 int beta_flag; /* input flag for BLAS_dgemv2_s_s_testgen */
1215 int order_val;
1216 enum blas_order_type order_type;
1217
1218 enum blas_prec_type prec;
1219 int trans_val;
1220 enum blas_trans_type trans_type;
1221 int m_i;
1222 int n_i;
1223 int max_mn; /* the max of m and n */
1224 int lda_val;
1225 int lda;
1226 int saved_seed; /* for saving the original seed */
1227 int count, old_count; /* use for counting the number of testgen calls * 2 */
1228
1229 FPU_FIX_DECL;
1230
1231 /* test for bad arguments */
1232 if (n < 0 || m < 0 || ntests < 0)
1233 BLAS_error(fname, 0, 0, NULL);
1234
1235 /* initialization */
1236 *num_bad_ratio = 0;
1237 *num_tests = 0;
1238 *min_ratio = 0.0;
1239
1240 saved_seed = *seed;
1241 ratio_min = 1e308;
1242 ratio_max = 0.0;
1243 ratio = 0.0;
1244 tot_tests = 0;
1245 p_count = 0;
1246 count = 0;
1247 find_max_ratio = 0;
1248 bad_ratios = 0;
1249 old_count = 0;
1250
1251 if (debug == 3)
1252 find_max_ratio = 1;
1253 max_mn = MAX(m, n);
1254 if (m == 0 || n == 0) {
1255 return 0.0;
1256 }
1257
1258 FPU_FIX_START;
1259
1260 incy_gen = 1;
1261
1262
1263 /* get space for calculation */
1264 head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
1265 if (max_mn * 2 > 0 && head_x == NULL) {
1266 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1267 }
1268 tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
1269 if (max_mn * 2 > 0 && tail_x == NULL) {
1270 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1271 }
1272 y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
1273 if (max_mn * 2 > 0 && y == NULL) {
1274 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1275 }
1276 head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
1277 if (max_mn > 0 && head_x_gen == NULL) {
1278 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1279 }
1280 tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
1281 if (max_mn > 0 && tail_x_gen == NULL) {
1282 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1283 }
1284 y_gen = (double *) blas_malloc(max_mn * sizeof(double));
1285 if (max_mn > 0 && y_gen == NULL) {
1286 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1287 }
1288 temp = (float *) blas_malloc(max_mn * sizeof(float));
1289 if (max_mn > 0 && temp == NULL) {
1290 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1291 }
1292 head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
1293 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
1294 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
1295 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1296 }
1297 ratios = (double *) blas_malloc(max_mn * sizeof(double));
1298 if (max_mn > 0 && ratios == NULL) {
1299 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1300 }
1301 A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
1302 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
1303 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1304 }
1305
1306 /* The debug iteration:
1307 If debug=1, then will execute the iteration twice. First, compute the
1308 max ratio. Second, print info if ratio > (50% * ratio_max). */
1309 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
1310 bad_ratios = 0; /* set to zero */
1311
1312 if ((debug == 3) && (d_count == find_max_ratio))
1313 *seed = saved_seed; /* restore the original seed */
1314
1315 /* varying alpha */
1316 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
1317 alpha_flag = 0;
1318 switch (alpha_val) {
1319 case 0:
1320 alpha = 0.0;
1321 alpha_flag = 1;
1322 break;
1323 case 1:
1324 alpha = 1.0;
1325 alpha_flag = 1;
1326 break;
1327 }
1328
1329 /* varying beta */
1330 for (beta_val = 0; beta_val < 3; beta_val++) {
1331 beta_flag = 0;
1332 switch (beta_val) {
1333 case 0:
1334 beta = 0.0;
1335 beta_flag = 1;
1336 break;
1337 case 1:
1338 beta = 1.0;
1339 beta_flag = 1;
1340 break;
1341 }
1342
1343
1344 eps_int = power(2, -BITS_D);
1345 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1346 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1347 prec = blas_prec_double;
1348
1349 /* values near underflow, 1, or overflow */
1350 for (norm = -1; norm <= 1; norm++) {
1351
1352 /* number of tests */
1353 for (i = 0; i < ntests; i++) {
1354
1355 /* row or col major */
1356 for (order_val = 0; order_val < 2; order_val++) {
1357 switch (order_val) {
1358 case 0:
1359 order_type = blas_rowmajor;
1360 break;
1361 case 1:
1362 default:
1363 order_type = blas_colmajor;
1364 break;
1365 }
1366
1367 /* no_trans, trans, or conj_trans */
1368 for (trans_val = 0; trans_val < 3; trans_val++) {
1369 switch (trans_val) {
1370 case 0:
1371 trans_type = blas_no_trans;
1372 m_i = m;
1373 n_i = n;
1374 break;
1375 case 1:
1376 trans_type = blas_trans;
1377 m_i = n;
1378 n_i = m;
1379 break;
1380 case 2:
1381 default:
1382 trans_type = blas_conj_trans;
1383 m_i = n;
1384 n_i = m;
1385 break;
1386 }
1387
1388 /* lda=n, n+1, or 2n */
1389 for (lda_val = 0; lda_val < 3; lda_val++) {
1390 switch (lda_val) {
1391 case 0:
1392 lda = m_i;
1393 break;
1394 case 1:
1395 lda = m_i + 1;
1396 break;
1397 case 2:
1398 default:
1399 lda = 2 * m_i;
1400 break;
1401 }
1402 if ((order_type == blas_rowmajor && lda < n) ||
1403 (order_type == blas_colmajor && lda < m))
1404 continue;
1405
1406 /* For the sake of speed, we throw out this case at random */
1407 if (xrand(seed) >= test_prob)
1408 continue;
1409
1410 /* in the trivial cases, no need to run testgen */
1411 if (m > 0 && n > 0)
1412 BLAS_dgemv2_s_s_testgen(norm, order_type, trans_type, m,
1413 n, &alpha, alpha_flag, A, lda,
1414 head_x_gen, tail_x_gen, &beta,
1415 beta_flag, y_gen, seed,
1416 head_r_true, tail_r_true);
1417
1418 count++;
1419
1420 /* varying incx */
1421 for (incx_val = -2; incx_val <= 2; incx_val++) {
1422 if (incx_val == 0)
1423 continue;
1424
1425 /* setting incx */
1426 incx = incx_val;
1427
1428
1429 scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
1430 scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
1431
1432 /* varying incy */
1433 for (incy_val = -2; incy_val <= 2; incy_val++) {
1434 if (incy_val == 0)
1435 continue;
1436
1437 /* setting incy */
1438 incy = incy_val;
1439
1440
1441 dcopy_vector(y_gen, m_i, 1, y, incy_val);
1442
1443 /* call BLAS_dgemv2_s_s */
1444 FPU_FIX_STOP;
1445 BLAS_dgemv2_s_s(order_type, trans_type, m, n, alpha, A,
1446 lda, head_x, tail_x, incx_val, beta, y,
1447 incy_val);
1448 FPU_FIX_START;
1449
1450 /* set y starting index */
1451 iy = 0;
1452 if (incy < 0)
1453 iy = -(m_i - 1) * incy;
1454
1455 /* computing the ratio */
1456 if (m > 0 && n > 0)
1457 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
1458 /* copy row j of A to temp */
1459 sge_copy_row(order_type, trans_type, m_i, n_i, A,
1460 lda, temp, j);
1461
1462 test_BLAS_ddot2_s_s(n_i, blas_no_conj, alpha, beta,
1463 y_gen[k], y[iy], head_r_true[k],
1464 tail_r_true[k], temp, 1, head_x,
1465 tail_x, incx_val, eps_int,
1466 un_int, &ratios[j]);
1467
1468 /* take the max ratio */
1469 if (j == 0) {
1470 ratio = ratios[0];
1471 /* The !<= below causes NaN error to be detected.
1472 Note that (NaN > thresh) is always false. */
1473 } else if (!(ratios[j] <= ratio)) {
1474 ratio = ratios[j];
1475 }
1476 iy += incy;
1477 }
1478
1479 /* Increase the number of bad ratio, if the ratio
1480 is bigger than the threshold.
1481 The !<= below causes NaN error to be detected.
1482 Note that (NaN > thresh) is always false. */
1483 if (!(ratio <= thresh)) {
1484 bad_ratios++;
1485
1486 if ((debug == 3) && /* print only when debug is on */
1487 (count != old_count) && /* print if old vector is different
1488 from the current one */
1489 (d_count == find_max_ratio) &&
1490 (p_count <= max_print) &&
1491 (ratio > 0.5 * ratio_max)) {
1492 old_count = count;
1493
1494 printf
1495 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
1496 fname, m, n, ntests, thresh);
1497
1498 /* Print test info */
1499 switch (prec) {
1500 case blas_prec_single:
1501 printf("single ");
1502 break;
1503 case blas_prec_double:
1504 printf("double ");
1505 break;
1506 case blas_prec_indigenous:
1507 printf("indigenous ");
1508 break;
1509 case blas_prec_extra:
1510 printf("extra ");
1511 break;
1512 }
1513 switch (norm) {
1514 case -1:
1515 printf("near_underflow ");
1516 break;
1517 case 0:
1518 printf("near_one ");
1519 break;
1520 case 1:
1521 printf("near_overflow ");
1522 break;
1523 }
1524 switch (order_type) {
1525 case blas_rowmajor:
1526 printf("row_major ");
1527 break;
1528 case blas_colmajor:
1529 printf("col_major ");
1530 break;
1531 }
1532 switch (trans_type) {
1533 case blas_no_trans:
1534 printf("no_trans ");
1535 break;
1536 case blas_trans:
1537 printf("trans ");
1538 break;
1539 case blas_conj_trans:
1540 printf("conj_trans ");
1541 break;
1542 }
1543
1544 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
1545 incy);
1546
1547 sge_print_matrix(A, m_i, n_i, lda, order_type, "A");
1548
1549 sprint_vector(head_x, n_i, incx_val, "head_x");
1550 sprint_vector(tail_x, n_i, incx_val, "tail_x");
1551 dprint_vector(y_gen, m_i, 1, "y_gen");
1552 dprint_vector(y, m_i, incy_val, "y_final");
1553
1554 printf(" ");
1555 printf("alpha = ");
1556 printf("%24.16e", alpha);
1557 printf("\n ");
1558 printf("beta = ");
1559 printf("%24.16e", beta);
1560 printf("\n");
1561 for (j = 0, k = 0; j < m_i * incy_gen;
1562 j += incy_gen, k++) {
1563 printf(" ");
1564 printf("[%24.16e, %24.16e]", head_r_true[j],
1565 tail_r_true[j]);
1566 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
1567 }
1568
1569 printf(" ratio=%.4e\n", ratio);
1570 p_count++;
1571 }
1572 if (bad_ratios >= MAX_BAD_TESTS) {
1573 printf("\ntoo many failures, exiting....");
1574 printf("\nTesting and compilation");
1575 printf(" are incomplete\n\n");
1576 goto end;
1577 }
1578 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
1579 printf("\nFlagrant ratio error, exiting...");
1580 printf("\nTesting and compilation");
1581 printf(" are incomplete\n\n");
1582 goto end;
1583 }
1584 }
1585 if (d_count == 0) {
1586 if (ratio > ratio_max)
1587 ratio_max = ratio;
1588
1589 if (ratio != 0.0 && ratio < ratio_min)
1590 ratio_min = ratio;
1591
1592 tot_tests++;
1593 }
1594 } /* incy */
1595 } /* incx */
1596 } /* lda */
1597 } /* trans */
1598 } /* order */
1599 } /* tests */
1600 } /* norm */
1601
1602 } /* beta */
1603 } /* alpha */
1604 } /* debug */
1605
1606 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
1607 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
1608 fname, m, n, ntests, thresh);
1609 printf
1610 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
1611 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
1612 ratio_min, ratio_max);
1613 }
1614
1615 end:
1616 FPU_FIX_STOP;
1617
1618 blas_free(head_x);
1619 blas_free(tail_x);
1620 blas_free(y);
1621 blas_free(head_x_gen);
1622 blas_free(tail_x_gen);
1623 blas_free(y_gen);
1624 blas_free(temp);
1625 blas_free(A);
1626 blas_free(head_r_true);
1627 blas_free(tail_r_true);
1628 blas_free(ratios);
1629
1630 *min_ratio = ratio_min;
1631 *num_bad_ratio = bad_ratios;
1632 *num_tests = tot_tests;
1633 return ratio_max;
1634 }
do_test_zgemv2_z_c(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)1635 double do_test_zgemv2_z_c(int m, int n, int ntests, int *seed, double thresh,
1636 int debug, float test_prob, double *min_ratio,
1637 int *num_bad_ratio, int *num_tests)
1638
1639 /*
1640 * Purpose
1641 * =======
1642 *
1643 * Runs a series of tests on GEMV2.
1644 *
1645 * Arguments
1646 * =========
1647 *
1648 * m (input) int
1649 * The number of rows
1650 *
1651 * n (input) int
1652 * The number of columns
1653 *
1654 * ntests (input) int
1655 * The number of tests to run for each set of attributes.
1656 *
1657 * seed (input/output) int
1658 * The seed for the random number generator used in testgen().
1659 *
1660 * thresh (input) double
1661 * When the ratio returned from test() exceeds the specified
1662 * threshold, the current size, r_true, r_comp, and ratio will be
1663 * printed. (Since ratio is supposed to be O(1), we can set thresh
1664 * to ~10.)
1665 *
1666 * debug (input) int
1667 * If debug=3, print summary
1668 * If debug=2, print summary only if the number of bad ratios > 0
1669 * If debug=1, print complete info if tests fail
1670 * If debug=0, return max ratio
1671 *
1672 * test_prob (input) float
1673 * The specified test will be performed only if the generated
1674 * random exceeds this threshold.
1675 *
1676 * min_ratio (output) double
1677 * The minimum ratio
1678 *
1679 * num_bad_ratio (output) int
1680 * The number of tests fail; they are above the threshold.
1681 *
1682 * num_tests (output) int
1683 * The number of tests is being performed.
1684 *
1685 * Return value
1686 * ============
1687 *
1688 * The maximum ratio if run successfully, otherwise return -1
1689 *
1690 * Code structure
1691 * ==============
1692 *
1693 * debug loop -- if debug is one, the first loop computes the max ratio
1694 * -- and the last(second) loop outputs debugging information,
1695 * -- if the test fail and its ratio > 0.5 * max ratio.
1696 * -- if debug is zero, the loop is executed once
1697 * alpha loop -- varying alpha: 0, 1, or random
1698 * beta loop -- varying beta: 0, 1, or random
1699
1700 * norm loop -- varying norm: near undeflow, near one, or
1701 * -- near overflow
1702 * numtest loop -- how many times the test is perform with
1703 * -- above set of attributes
1704 * order loop -- varying order type: rowmajor or colmajor
1705 * trans loop -- varying uplo type: upper or lower
1706 * lda loop -- varying lda: m, m+1, 2m
1707 * incx loop -- varying incx: -2, -1, 1, 2
1708 * incy loop -- varying incy: -2, -1, 1, 2
1709 */
1710 {
1711 /* function name */
1712 const char fname[] = "BLAS_zgemv2_z_c";
1713
1714 /* max number of debug lines to print */
1715 const int max_print = 8;
1716
1717 /* Variables in the "x_val" form are loop vars for corresponding
1718 variables */
1719 int i; /* iterate through the repeating tests */
1720 int j, k; /* multipurpose counters or variables */
1721 int iy; /* use to index y */
1722 int incx_val, incy_val, /* for testing different inc values */
1723 incx, incy;
1724 int incy_gen; /* for complex case inc=2, for real case inc=1 */
1725 int d_count; /* counter for debug */
1726 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
1727 int p_count; /* counter for the number of debug lines printed */
1728 int tot_tests; /* total number of tests to be done */
1729 int norm; /* input values of near underflow/one/overflow */
1730 double ratio_max; /* the current maximum ratio */
1731 double ratio_min; /* the current minimum ratio */
1732 double *ratios; /* a temporary variable for calculating ratio */
1733 double ratio; /* the per-use test ratio from test() */
1734 int bad_ratios; /* the number of ratios over the threshold */
1735 double eps_int; /* the internal epsilon expected--2^(-24) for float */
1736 double un_int; /* the internal underflow threshold */
1737 double alpha[2];
1738 double beta[2];
1739 double *A;
1740 float *head_x;
1741 float *tail_x;
1742 double *y;
1743 double *temp; /* use for calculating ratio */
1744
1745 /* x_gen and y_gen are used to store vectors generated by testgen.
1746 they eventually are copied back to x and y */
1747 float *head_x_gen;
1748 float *tail_x_gen;
1749 double *y_gen;
1750
1751 /* the true r calculated by testgen(), in double-double */
1752 double *head_r_true, *tail_r_true;
1753
1754 int alpha_val;
1755 int alpha_flag; /* input flag for BLAS_zgemv2_z_c_testgen */
1756 int beta_val;
1757 int beta_flag; /* input flag for BLAS_zgemv2_z_c_testgen */
1758 int order_val;
1759 enum blas_order_type order_type;
1760
1761 enum blas_prec_type prec;
1762 int trans_val;
1763 enum blas_trans_type trans_type;
1764 int m_i;
1765 int n_i;
1766 int max_mn; /* the max of m and n */
1767 int lda_val;
1768 int lda;
1769 int saved_seed; /* for saving the original seed */
1770 int count, old_count; /* use for counting the number of testgen calls * 2 */
1771
1772 FPU_FIX_DECL;
1773
1774 /* test for bad arguments */
1775 if (n < 0 || m < 0 || ntests < 0)
1776 BLAS_error(fname, 0, 0, NULL);
1777
1778 /* initialization */
1779 *num_bad_ratio = 0;
1780 *num_tests = 0;
1781 *min_ratio = 0.0;
1782
1783 saved_seed = *seed;
1784 ratio_min = 1e308;
1785 ratio_max = 0.0;
1786 ratio = 0.0;
1787 tot_tests = 0;
1788 p_count = 0;
1789 count = 0;
1790 find_max_ratio = 0;
1791 bad_ratios = 0;
1792 old_count = 0;
1793
1794 if (debug == 3)
1795 find_max_ratio = 1;
1796 max_mn = MAX(m, n);
1797 if (m == 0 || n == 0) {
1798 return 0.0;
1799 }
1800
1801 FPU_FIX_START;
1802
1803 incy_gen = 1;
1804 incy_gen *= 2;
1805
1806 /* get space for calculation */
1807 head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
1808 if (max_mn * 2 > 0 && head_x == NULL) {
1809 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1810 }
1811 tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
1812 if (max_mn * 2 > 0 && tail_x == NULL) {
1813 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1814 }
1815 y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
1816 if (max_mn * 2 > 0 && y == NULL) {
1817 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1818 }
1819 head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
1820 if (max_mn > 0 && head_x_gen == NULL) {
1821 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1822 }
1823 tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
1824 if (max_mn > 0 && tail_x_gen == NULL) {
1825 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1826 }
1827 y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
1828 if (max_mn > 0 && y_gen == NULL) {
1829 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1830 }
1831 temp = (double *) blas_malloc(max_mn * sizeof(double) * 2);
1832 if (max_mn > 0 && temp == NULL) {
1833 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1834 }
1835 head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
1836 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
1837 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
1838 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1839 }
1840 ratios = (double *) blas_malloc(max_mn * sizeof(double));
1841 if (max_mn > 0 && ratios == NULL) {
1842 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1843 }
1844 A =
1845 (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double) *
1846 2);
1847 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
1848 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1849 }
1850
1851 /* The debug iteration:
1852 If debug=1, then will execute the iteration twice. First, compute the
1853 max ratio. Second, print info if ratio > (50% * ratio_max). */
1854 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
1855 bad_ratios = 0; /* set to zero */
1856
1857 if ((debug == 3) && (d_count == find_max_ratio))
1858 *seed = saved_seed; /* restore the original seed */
1859
1860 /* varying alpha */
1861 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
1862 alpha_flag = 0;
1863 switch (alpha_val) {
1864 case 0:
1865 alpha[0] = alpha[1] = 0.0;
1866 alpha_flag = 1;
1867 break;
1868 case 1:
1869 alpha[0] = 1.0;
1870 alpha[1] = 0.0;
1871 alpha_flag = 1;
1872 break;
1873 }
1874
1875 /* varying beta */
1876 for (beta_val = 0; beta_val < 3; beta_val++) {
1877 beta_flag = 0;
1878 switch (beta_val) {
1879 case 0:
1880 beta[0] = beta[1] = 0.0;
1881 beta_flag = 1;
1882 break;
1883 case 1:
1884 beta[0] = 1.0;
1885 beta[1] = 0.0;
1886 beta_flag = 1;
1887 break;
1888 }
1889
1890
1891 eps_int = power(2, -BITS_D);
1892 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1893 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1894 prec = blas_prec_double;
1895
1896 /* values near underflow, 1, or overflow */
1897 for (norm = -1; norm <= 1; norm++) {
1898
1899 /* number of tests */
1900 for (i = 0; i < ntests; i++) {
1901
1902 /* row or col major */
1903 for (order_val = 0; order_val < 2; order_val++) {
1904 switch (order_val) {
1905 case 0:
1906 order_type = blas_rowmajor;
1907 break;
1908 case 1:
1909 default:
1910 order_type = blas_colmajor;
1911 break;
1912 }
1913
1914 /* no_trans, trans, or conj_trans */
1915 for (trans_val = 0; trans_val < 3; trans_val++) {
1916 switch (trans_val) {
1917 case 0:
1918 trans_type = blas_no_trans;
1919 m_i = m;
1920 n_i = n;
1921 break;
1922 case 1:
1923 trans_type = blas_trans;
1924 m_i = n;
1925 n_i = m;
1926 break;
1927 case 2:
1928 default:
1929 trans_type = blas_conj_trans;
1930 m_i = n;
1931 n_i = m;
1932 break;
1933 }
1934
1935 /* lda=n, n+1, or 2n */
1936 for (lda_val = 0; lda_val < 3; lda_val++) {
1937 switch (lda_val) {
1938 case 0:
1939 lda = m_i;
1940 break;
1941 case 1:
1942 lda = m_i + 1;
1943 break;
1944 case 2:
1945 default:
1946 lda = 2 * m_i;
1947 break;
1948 }
1949 if ((order_type == blas_rowmajor && lda < n) ||
1950 (order_type == blas_colmajor && lda < m))
1951 continue;
1952
1953 /* For the sake of speed, we throw out this case at random */
1954 if (xrand(seed) >= test_prob)
1955 continue;
1956
1957 /* in the trivial cases, no need to run testgen */
1958 if (m > 0 && n > 0)
1959 BLAS_zgemv2_z_c_testgen(norm, order_type, trans_type, m,
1960 n, &alpha, alpha_flag, A, lda,
1961 head_x_gen, tail_x_gen, &beta,
1962 beta_flag, y_gen, seed,
1963 head_r_true, tail_r_true);
1964
1965 count++;
1966
1967 /* varying incx */
1968 for (incx_val = -2; incx_val <= 2; incx_val++) {
1969 if (incx_val == 0)
1970 continue;
1971
1972 /* setting incx */
1973 incx = incx_val;
1974 incx *= 2;
1975
1976 ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
1977 ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
1978
1979 /* varying incy */
1980 for (incy_val = -2; incy_val <= 2; incy_val++) {
1981 if (incy_val == 0)
1982 continue;
1983
1984 /* setting incy */
1985 incy = incy_val;
1986 incy *= 2;
1987
1988 zcopy_vector(y_gen, m_i, 1, y, incy_val);
1989
1990 /* call BLAS_zgemv2_z_c */
1991 FPU_FIX_STOP;
1992 BLAS_zgemv2_z_c(order_type, trans_type, m, n, alpha, A,
1993 lda, head_x, tail_x, incx_val, beta, y,
1994 incy_val);
1995 FPU_FIX_START;
1996
1997 /* set y starting index */
1998 iy = 0;
1999 if (incy < 0)
2000 iy = -(m_i - 1) * incy;
2001
2002 /* computing the ratio */
2003 if (m > 0 && n > 0)
2004 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
2005 /* copy row j of A to temp */
2006 zge_copy_row(order_type, trans_type, m_i, n_i, A,
2007 lda, temp, j);
2008
2009 test_BLAS_zdot2_z_c(n_i, blas_no_conj, alpha, beta,
2010 &y_gen[k], &y[iy],
2011 &head_r_true[k],
2012 &tail_r_true[k], temp, 1,
2013 head_x, tail_x, incx_val,
2014 eps_int, un_int, &ratios[j]);
2015
2016 /* take the max ratio */
2017 if (j == 0) {
2018 ratio = ratios[0];
2019 /* The !<= below causes NaN error to be detected.
2020 Note that (NaN > thresh) is always false. */
2021 } else if (!(ratios[j] <= ratio)) {
2022 ratio = ratios[j];
2023 }
2024 iy += incy;
2025 }
2026
2027 /* Increase the number of bad ratio, if the ratio
2028 is bigger than the threshold.
2029 The !<= below causes NaN error to be detected.
2030 Note that (NaN > thresh) is always false. */
2031 if (!(ratio <= thresh)) {
2032 bad_ratios++;
2033
2034 if ((debug == 3) && /* print only when debug is on */
2035 (count != old_count) && /* print if old vector is different
2036 from the current one */
2037 (d_count == find_max_ratio) &&
2038 (p_count <= max_print) &&
2039 (ratio > 0.5 * ratio_max)) {
2040 old_count = count;
2041
2042 printf
2043 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
2044 fname, m, n, ntests, thresh);
2045
2046 /* Print test info */
2047 switch (prec) {
2048 case blas_prec_single:
2049 printf("single ");
2050 break;
2051 case blas_prec_double:
2052 printf("double ");
2053 break;
2054 case blas_prec_indigenous:
2055 printf("indigenous ");
2056 break;
2057 case blas_prec_extra:
2058 printf("extra ");
2059 break;
2060 }
2061 switch (norm) {
2062 case -1:
2063 printf("near_underflow ");
2064 break;
2065 case 0:
2066 printf("near_one ");
2067 break;
2068 case 1:
2069 printf("near_overflow ");
2070 break;
2071 }
2072 switch (order_type) {
2073 case blas_rowmajor:
2074 printf("row_major ");
2075 break;
2076 case blas_colmajor:
2077 printf("col_major ");
2078 break;
2079 }
2080 switch (trans_type) {
2081 case blas_no_trans:
2082 printf("no_trans ");
2083 break;
2084 case blas_trans:
2085 printf("trans ");
2086 break;
2087 case blas_conj_trans:
2088 printf("conj_trans ");
2089 break;
2090 }
2091
2092 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
2093 incy);
2094
2095 zge_print_matrix(A, m_i, n_i, lda, order_type, "A");
2096
2097 cprint_vector(head_x, n_i, incx_val, "head_x");
2098 cprint_vector(tail_x, n_i, incx_val, "tail_x");
2099 zprint_vector(y_gen, m_i, 1, "y_gen");
2100 zprint_vector(y, m_i, incy_val, "y_final");
2101
2102 printf(" ");
2103 printf("alpha = ");
2104 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
2105 printf("\n ");
2106 printf("beta = ");
2107 printf("(%24.16e, %24.16e)", beta[0], beta[1]);
2108 printf("\n");
2109 for (j = 0, k = 0; j < m_i * incy_gen;
2110 j += incy_gen, k++) {
2111 printf(" ");
2112 printf("([%24.16e %24.16e], [%24.16e %24.16e])",
2113 head_r_true[j], tail_r_true[j],
2114 head_r_true[j + 1], tail_r_true[j + 1]);
2115 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
2116 }
2117
2118 printf(" ratio=%.4e\n", ratio);
2119 p_count++;
2120 }
2121 if (bad_ratios >= MAX_BAD_TESTS) {
2122 printf("\ntoo many failures, exiting....");
2123 printf("\nTesting and compilation");
2124 printf(" are incomplete\n\n");
2125 goto end;
2126 }
2127 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
2128 printf("\nFlagrant ratio error, exiting...");
2129 printf("\nTesting and compilation");
2130 printf(" are incomplete\n\n");
2131 goto end;
2132 }
2133 }
2134 if (d_count == 0) {
2135 if (ratio > ratio_max)
2136 ratio_max = ratio;
2137
2138 if (ratio != 0.0 && ratio < ratio_min)
2139 ratio_min = ratio;
2140
2141 tot_tests++;
2142 }
2143 } /* incy */
2144 } /* incx */
2145 } /* lda */
2146 } /* trans */
2147 } /* order */
2148 } /* tests */
2149 } /* norm */
2150
2151 } /* beta */
2152 } /* alpha */
2153 } /* debug */
2154
2155 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
2156 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
2157 fname, m, n, ntests, thresh);
2158 printf
2159 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
2160 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
2161 ratio_min, ratio_max);
2162 }
2163
2164 end:
2165 FPU_FIX_STOP;
2166
2167 blas_free(head_x);
2168 blas_free(tail_x);
2169 blas_free(y);
2170 blas_free(head_x_gen);
2171 blas_free(tail_x_gen);
2172 blas_free(y_gen);
2173 blas_free(temp);
2174 blas_free(A);
2175 blas_free(head_r_true);
2176 blas_free(tail_r_true);
2177 blas_free(ratios);
2178
2179 *min_ratio = ratio_min;
2180 *num_bad_ratio = bad_ratios;
2181 *num_tests = tot_tests;
2182 return ratio_max;
2183 }
do_test_zgemv2_c_z(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)2184 double do_test_zgemv2_c_z(int m, int n, int ntests, int *seed, double thresh,
2185 int debug, float test_prob, double *min_ratio,
2186 int *num_bad_ratio, int *num_tests)
2187
2188 /*
2189 * Purpose
2190 * =======
2191 *
2192 * Runs a series of tests on GEMV2.
2193 *
2194 * Arguments
2195 * =========
2196 *
2197 * m (input) int
2198 * The number of rows
2199 *
2200 * n (input) int
2201 * The number of columns
2202 *
2203 * ntests (input) int
2204 * The number of tests to run for each set of attributes.
2205 *
2206 * seed (input/output) int
2207 * The seed for the random number generator used in testgen().
2208 *
2209 * thresh (input) double
2210 * When the ratio returned from test() exceeds the specified
2211 * threshold, the current size, r_true, r_comp, and ratio will be
2212 * printed. (Since ratio is supposed to be O(1), we can set thresh
2213 * to ~10.)
2214 *
2215 * debug (input) int
2216 * If debug=3, print summary
2217 * If debug=2, print summary only if the number of bad ratios > 0
2218 * If debug=1, print complete info if tests fail
2219 * If debug=0, return max ratio
2220 *
2221 * test_prob (input) float
2222 * The specified test will be performed only if the generated
2223 * random exceeds this threshold.
2224 *
2225 * min_ratio (output) double
2226 * The minimum ratio
2227 *
2228 * num_bad_ratio (output) int
2229 * The number of tests fail; they are above the threshold.
2230 *
2231 * num_tests (output) int
2232 * The number of tests is being performed.
2233 *
2234 * Return value
2235 * ============
2236 *
2237 * The maximum ratio if run successfully, otherwise return -1
2238 *
2239 * Code structure
2240 * ==============
2241 *
2242 * debug loop -- if debug is one, the first loop computes the max ratio
2243 * -- and the last(second) loop outputs debugging information,
2244 * -- if the test fail and its ratio > 0.5 * max ratio.
2245 * -- if debug is zero, the loop is executed once
2246 * alpha loop -- varying alpha: 0, 1, or random
2247 * beta loop -- varying beta: 0, 1, or random
2248
2249 * norm loop -- varying norm: near undeflow, near one, or
2250 * -- near overflow
2251 * numtest loop -- how many times the test is perform with
2252 * -- above set of attributes
2253 * order loop -- varying order type: rowmajor or colmajor
2254 * trans loop -- varying uplo type: upper or lower
2255 * lda loop -- varying lda: m, m+1, 2m
2256 * incx loop -- varying incx: -2, -1, 1, 2
2257 * incy loop -- varying incy: -2, -1, 1, 2
2258 */
2259 {
2260 /* function name */
2261 const char fname[] = "BLAS_zgemv2_c_z";
2262
2263 /* max number of debug lines to print */
2264 const int max_print = 8;
2265
2266 /* Variables in the "x_val" form are loop vars for corresponding
2267 variables */
2268 int i; /* iterate through the repeating tests */
2269 int j, k; /* multipurpose counters or variables */
2270 int iy; /* use to index y */
2271 int incx_val, incy_val, /* for testing different inc values */
2272 incx, incy;
2273 int incy_gen; /* for complex case inc=2, for real case inc=1 */
2274 int d_count; /* counter for debug */
2275 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
2276 int p_count; /* counter for the number of debug lines printed */
2277 int tot_tests; /* total number of tests to be done */
2278 int norm; /* input values of near underflow/one/overflow */
2279 double ratio_max; /* the current maximum ratio */
2280 double ratio_min; /* the current minimum ratio */
2281 double *ratios; /* a temporary variable for calculating ratio */
2282 double ratio; /* the per-use test ratio from test() */
2283 int bad_ratios; /* the number of ratios over the threshold */
2284 double eps_int; /* the internal epsilon expected--2^(-24) for float */
2285 double un_int; /* the internal underflow threshold */
2286 double alpha[2];
2287 double beta[2];
2288 float *A;
2289 double *head_x;
2290 double *tail_x;
2291 double *y;
2292 float *temp; /* use for calculating ratio */
2293
2294 /* x_gen and y_gen are used to store vectors generated by testgen.
2295 they eventually are copied back to x and y */
2296 double *head_x_gen;
2297 double *tail_x_gen;
2298 double *y_gen;
2299
2300 /* the true r calculated by testgen(), in double-double */
2301 double *head_r_true, *tail_r_true;
2302
2303 int alpha_val;
2304 int alpha_flag; /* input flag for BLAS_zgemv2_c_z_testgen */
2305 int beta_val;
2306 int beta_flag; /* input flag for BLAS_zgemv2_c_z_testgen */
2307 int order_val;
2308 enum blas_order_type order_type;
2309
2310 enum blas_prec_type prec;
2311 int trans_val;
2312 enum blas_trans_type trans_type;
2313 int m_i;
2314 int n_i;
2315 int max_mn; /* the max of m and n */
2316 int lda_val;
2317 int lda;
2318 int saved_seed; /* for saving the original seed */
2319 int count, old_count; /* use for counting the number of testgen calls * 2 */
2320
2321 FPU_FIX_DECL;
2322
2323 /* test for bad arguments */
2324 if (n < 0 || m < 0 || ntests < 0)
2325 BLAS_error(fname, 0, 0, NULL);
2326
2327 /* initialization */
2328 *num_bad_ratio = 0;
2329 *num_tests = 0;
2330 *min_ratio = 0.0;
2331
2332 saved_seed = *seed;
2333 ratio_min = 1e308;
2334 ratio_max = 0.0;
2335 ratio = 0.0;
2336 tot_tests = 0;
2337 p_count = 0;
2338 count = 0;
2339 find_max_ratio = 0;
2340 bad_ratios = 0;
2341 old_count = 0;
2342
2343 if (debug == 3)
2344 find_max_ratio = 1;
2345 max_mn = MAX(m, n);
2346 if (m == 0 || n == 0) {
2347 return 0.0;
2348 }
2349
2350 FPU_FIX_START;
2351
2352 incy_gen = 1;
2353 incy_gen *= 2;
2354
2355 /* get space for calculation */
2356 head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
2357 if (max_mn * 2 > 0 && head_x == NULL) {
2358 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2359 }
2360 tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
2361 if (max_mn * 2 > 0 && tail_x == NULL) {
2362 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2363 }
2364 y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
2365 if (max_mn * 2 > 0 && y == NULL) {
2366 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2367 }
2368 head_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
2369 if (max_mn > 0 && head_x_gen == NULL) {
2370 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2371 }
2372 tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
2373 if (max_mn > 0 && tail_x_gen == NULL) {
2374 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2375 }
2376 y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
2377 if (max_mn > 0 && y_gen == NULL) {
2378 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2379 }
2380 temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
2381 if (max_mn > 0 && temp == NULL) {
2382 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2383 }
2384 head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
2385 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
2386 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
2387 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2388 }
2389 ratios = (double *) blas_malloc(max_mn * sizeof(double));
2390 if (max_mn > 0 && ratios == NULL) {
2391 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2392 }
2393 A =
2394 (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
2395 2);
2396 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
2397 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2398 }
2399
2400 /* The debug iteration:
2401 If debug=1, then will execute the iteration twice. First, compute the
2402 max ratio. Second, print info if ratio > (50% * ratio_max). */
2403 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
2404 bad_ratios = 0; /* set to zero */
2405
2406 if ((debug == 3) && (d_count == find_max_ratio))
2407 *seed = saved_seed; /* restore the original seed */
2408
2409 /* varying alpha */
2410 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
2411 alpha_flag = 0;
2412 switch (alpha_val) {
2413 case 0:
2414 alpha[0] = alpha[1] = 0.0;
2415 alpha_flag = 1;
2416 break;
2417 case 1:
2418 alpha[0] = 1.0;
2419 alpha[1] = 0.0;
2420 alpha_flag = 1;
2421 break;
2422 }
2423
2424 /* varying beta */
2425 for (beta_val = 0; beta_val < 3; beta_val++) {
2426 beta_flag = 0;
2427 switch (beta_val) {
2428 case 0:
2429 beta[0] = beta[1] = 0.0;
2430 beta_flag = 1;
2431 break;
2432 case 1:
2433 beta[0] = 1.0;
2434 beta[1] = 0.0;
2435 beta_flag = 1;
2436 break;
2437 }
2438
2439
2440 eps_int = power(2, -BITS_D);
2441 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
2442 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
2443 prec = blas_prec_double;
2444
2445 /* values near underflow, 1, or overflow */
2446 for (norm = -1; norm <= 1; norm++) {
2447
2448 /* number of tests */
2449 for (i = 0; i < ntests; i++) {
2450
2451 /* row or col major */
2452 for (order_val = 0; order_val < 2; order_val++) {
2453 switch (order_val) {
2454 case 0:
2455 order_type = blas_rowmajor;
2456 break;
2457 case 1:
2458 default:
2459 order_type = blas_colmajor;
2460 break;
2461 }
2462
2463 /* no_trans, trans, or conj_trans */
2464 for (trans_val = 0; trans_val < 3; trans_val++) {
2465 switch (trans_val) {
2466 case 0:
2467 trans_type = blas_no_trans;
2468 m_i = m;
2469 n_i = n;
2470 break;
2471 case 1:
2472 trans_type = blas_trans;
2473 m_i = n;
2474 n_i = m;
2475 break;
2476 case 2:
2477 default:
2478 trans_type = blas_conj_trans;
2479 m_i = n;
2480 n_i = m;
2481 break;
2482 }
2483
2484 /* lda=n, n+1, or 2n */
2485 for (lda_val = 0; lda_val < 3; lda_val++) {
2486 switch (lda_val) {
2487 case 0:
2488 lda = m_i;
2489 break;
2490 case 1:
2491 lda = m_i + 1;
2492 break;
2493 case 2:
2494 default:
2495 lda = 2 * m_i;
2496 break;
2497 }
2498 if ((order_type == blas_rowmajor && lda < n) ||
2499 (order_type == blas_colmajor && lda < m))
2500 continue;
2501
2502 /* For the sake of speed, we throw out this case at random */
2503 if (xrand(seed) >= test_prob)
2504 continue;
2505
2506 /* in the trivial cases, no need to run testgen */
2507 if (m > 0 && n > 0)
2508 BLAS_zgemv2_c_z_testgen(norm, order_type, trans_type, m,
2509 n, &alpha, alpha_flag, A, lda,
2510 head_x_gen, tail_x_gen, &beta,
2511 beta_flag, y_gen, seed,
2512 head_r_true, tail_r_true);
2513
2514 count++;
2515
2516 /* varying incx */
2517 for (incx_val = -2; incx_val <= 2; incx_val++) {
2518 if (incx_val == 0)
2519 continue;
2520
2521 /* setting incx */
2522 incx = incx_val;
2523 incx *= 2;
2524
2525 zcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
2526 zcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
2527
2528 /* varying incy */
2529 for (incy_val = -2; incy_val <= 2; incy_val++) {
2530 if (incy_val == 0)
2531 continue;
2532
2533 /* setting incy */
2534 incy = incy_val;
2535 incy *= 2;
2536
2537 zcopy_vector(y_gen, m_i, 1, y, incy_val);
2538
2539 /* call BLAS_zgemv2_c_z */
2540 FPU_FIX_STOP;
2541 BLAS_zgemv2_c_z(order_type, trans_type, m, n, alpha, A,
2542 lda, head_x, tail_x, incx_val, beta, y,
2543 incy_val);
2544 FPU_FIX_START;
2545
2546 /* set y starting index */
2547 iy = 0;
2548 if (incy < 0)
2549 iy = -(m_i - 1) * incy;
2550
2551 /* computing the ratio */
2552 if (m > 0 && n > 0)
2553 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
2554 /* copy row j of A to temp */
2555 cge_copy_row(order_type, trans_type, m_i, n_i, A,
2556 lda, temp, j);
2557
2558 test_BLAS_zdot2_c_z(n_i, blas_no_conj, alpha, beta,
2559 &y_gen[k], &y[iy],
2560 &head_r_true[k],
2561 &tail_r_true[k], temp, 1,
2562 head_x, tail_x, incx_val,
2563 eps_int, un_int, &ratios[j]);
2564
2565 /* take the max ratio */
2566 if (j == 0) {
2567 ratio = ratios[0];
2568 /* The !<= below causes NaN error to be detected.
2569 Note that (NaN > thresh) is always false. */
2570 } else if (!(ratios[j] <= ratio)) {
2571 ratio = ratios[j];
2572 }
2573 iy += incy;
2574 }
2575
2576 /* Increase the number of bad ratio, if the ratio
2577 is bigger than the threshold.
2578 The !<= below causes NaN error to be detected.
2579 Note that (NaN > thresh) is always false. */
2580 if (!(ratio <= thresh)) {
2581 bad_ratios++;
2582
2583 if ((debug == 3) && /* print only when debug is on */
2584 (count != old_count) && /* print if old vector is different
2585 from the current one */
2586 (d_count == find_max_ratio) &&
2587 (p_count <= max_print) &&
2588 (ratio > 0.5 * ratio_max)) {
2589 old_count = count;
2590
2591 printf
2592 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
2593 fname, m, n, ntests, thresh);
2594
2595 /* Print test info */
2596 switch (prec) {
2597 case blas_prec_single:
2598 printf("single ");
2599 break;
2600 case blas_prec_double:
2601 printf("double ");
2602 break;
2603 case blas_prec_indigenous:
2604 printf("indigenous ");
2605 break;
2606 case blas_prec_extra:
2607 printf("extra ");
2608 break;
2609 }
2610 switch (norm) {
2611 case -1:
2612 printf("near_underflow ");
2613 break;
2614 case 0:
2615 printf("near_one ");
2616 break;
2617 case 1:
2618 printf("near_overflow ");
2619 break;
2620 }
2621 switch (order_type) {
2622 case blas_rowmajor:
2623 printf("row_major ");
2624 break;
2625 case blas_colmajor:
2626 printf("col_major ");
2627 break;
2628 }
2629 switch (trans_type) {
2630 case blas_no_trans:
2631 printf("no_trans ");
2632 break;
2633 case blas_trans:
2634 printf("trans ");
2635 break;
2636 case blas_conj_trans:
2637 printf("conj_trans ");
2638 break;
2639 }
2640
2641 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
2642 incy);
2643
2644 cge_print_matrix(A, m_i, n_i, lda, order_type, "A");
2645
2646 zprint_vector(head_x, n_i, incx_val, "head_x");
2647 zprint_vector(tail_x, n_i, incx_val, "tail_x");
2648 zprint_vector(y_gen, m_i, 1, "y_gen");
2649 zprint_vector(y, m_i, incy_val, "y_final");
2650
2651 printf(" ");
2652 printf("alpha = ");
2653 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
2654 printf("\n ");
2655 printf("beta = ");
2656 printf("(%24.16e, %24.16e)", beta[0], beta[1]);
2657 printf("\n");
2658 for (j = 0, k = 0; j < m_i * incy_gen;
2659 j += incy_gen, k++) {
2660 printf(" ");
2661 printf("([%24.16e %24.16e], [%24.16e %24.16e])",
2662 head_r_true[j], tail_r_true[j],
2663 head_r_true[j + 1], tail_r_true[j + 1]);
2664 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
2665 }
2666
2667 printf(" ratio=%.4e\n", ratio);
2668 p_count++;
2669 }
2670 if (bad_ratios >= MAX_BAD_TESTS) {
2671 printf("\ntoo many failures, exiting....");
2672 printf("\nTesting and compilation");
2673 printf(" are incomplete\n\n");
2674 goto end;
2675 }
2676 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
2677 printf("\nFlagrant ratio error, exiting...");
2678 printf("\nTesting and compilation");
2679 printf(" are incomplete\n\n");
2680 goto end;
2681 }
2682 }
2683 if (d_count == 0) {
2684 if (ratio > ratio_max)
2685 ratio_max = ratio;
2686
2687 if (ratio != 0.0 && ratio < ratio_min)
2688 ratio_min = ratio;
2689
2690 tot_tests++;
2691 }
2692 } /* incy */
2693 } /* incx */
2694 } /* lda */
2695 } /* trans */
2696 } /* order */
2697 } /* tests */
2698 } /* norm */
2699
2700 } /* beta */
2701 } /* alpha */
2702 } /* debug */
2703
2704 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
2705 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
2706 fname, m, n, ntests, thresh);
2707 printf
2708 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
2709 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
2710 ratio_min, ratio_max);
2711 }
2712
2713 end:
2714 FPU_FIX_STOP;
2715
2716 blas_free(head_x);
2717 blas_free(tail_x);
2718 blas_free(y);
2719 blas_free(head_x_gen);
2720 blas_free(tail_x_gen);
2721 blas_free(y_gen);
2722 blas_free(temp);
2723 blas_free(A);
2724 blas_free(head_r_true);
2725 blas_free(tail_r_true);
2726 blas_free(ratios);
2727
2728 *min_ratio = ratio_min;
2729 *num_bad_ratio = bad_ratios;
2730 *num_tests = tot_tests;
2731 return ratio_max;
2732 }
do_test_zgemv2_c_c(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)2733 double do_test_zgemv2_c_c(int m, int n, int ntests, int *seed, double thresh,
2734 int debug, float test_prob, double *min_ratio,
2735 int *num_bad_ratio, int *num_tests)
2736
2737 /*
2738 * Purpose
2739 * =======
2740 *
2741 * Runs a series of tests on GEMV2.
2742 *
2743 * Arguments
2744 * =========
2745 *
2746 * m (input) int
2747 * The number of rows
2748 *
2749 * n (input) int
2750 * The number of columns
2751 *
2752 * ntests (input) int
2753 * The number of tests to run for each set of attributes.
2754 *
2755 * seed (input/output) int
2756 * The seed for the random number generator used in testgen().
2757 *
2758 * thresh (input) double
2759 * When the ratio returned from test() exceeds the specified
2760 * threshold, the current size, r_true, r_comp, and ratio will be
2761 * printed. (Since ratio is supposed to be O(1), we can set thresh
2762 * to ~10.)
2763 *
2764 * debug (input) int
2765 * If debug=3, print summary
2766 * If debug=2, print summary only if the number of bad ratios > 0
2767 * If debug=1, print complete info if tests fail
2768 * If debug=0, return max ratio
2769 *
2770 * test_prob (input) float
2771 * The specified test will be performed only if the generated
2772 * random exceeds this threshold.
2773 *
2774 * min_ratio (output) double
2775 * The minimum ratio
2776 *
2777 * num_bad_ratio (output) int
2778 * The number of tests fail; they are above the threshold.
2779 *
2780 * num_tests (output) int
2781 * The number of tests is being performed.
2782 *
2783 * Return value
2784 * ============
2785 *
2786 * The maximum ratio if run successfully, otherwise return -1
2787 *
2788 * Code structure
2789 * ==============
2790 *
2791 * debug loop -- if debug is one, the first loop computes the max ratio
2792 * -- and the last(second) loop outputs debugging information,
2793 * -- if the test fail and its ratio > 0.5 * max ratio.
2794 * -- if debug is zero, the loop is executed once
2795 * alpha loop -- varying alpha: 0, 1, or random
2796 * beta loop -- varying beta: 0, 1, or random
2797
2798 * norm loop -- varying norm: near undeflow, near one, or
2799 * -- near overflow
2800 * numtest loop -- how many times the test is perform with
2801 * -- above set of attributes
2802 * order loop -- varying order type: rowmajor or colmajor
2803 * trans loop -- varying uplo type: upper or lower
2804 * lda loop -- varying lda: m, m+1, 2m
2805 * incx loop -- varying incx: -2, -1, 1, 2
2806 * incy loop -- varying incy: -2, -1, 1, 2
2807 */
2808 {
2809 /* function name */
2810 const char fname[] = "BLAS_zgemv2_c_c";
2811
2812 /* max number of debug lines to print */
2813 const int max_print = 8;
2814
2815 /* Variables in the "x_val" form are loop vars for corresponding
2816 variables */
2817 int i; /* iterate through the repeating tests */
2818 int j, k; /* multipurpose counters or variables */
2819 int iy; /* use to index y */
2820 int incx_val, incy_val, /* for testing different inc values */
2821 incx, incy;
2822 int incy_gen; /* for complex case inc=2, for real case inc=1 */
2823 int d_count; /* counter for debug */
2824 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
2825 int p_count; /* counter for the number of debug lines printed */
2826 int tot_tests; /* total number of tests to be done */
2827 int norm; /* input values of near underflow/one/overflow */
2828 double ratio_max; /* the current maximum ratio */
2829 double ratio_min; /* the current minimum ratio */
2830 double *ratios; /* a temporary variable for calculating ratio */
2831 double ratio; /* the per-use test ratio from test() */
2832 int bad_ratios; /* the number of ratios over the threshold */
2833 double eps_int; /* the internal epsilon expected--2^(-24) for float */
2834 double un_int; /* the internal underflow threshold */
2835 double alpha[2];
2836 double beta[2];
2837 float *A;
2838 float *head_x;
2839 float *tail_x;
2840 double *y;
2841 float *temp; /* use for calculating ratio */
2842
2843 /* x_gen and y_gen are used to store vectors generated by testgen.
2844 they eventually are copied back to x and y */
2845 float *head_x_gen;
2846 float *tail_x_gen;
2847 double *y_gen;
2848
2849 /* the true r calculated by testgen(), in double-double */
2850 double *head_r_true, *tail_r_true;
2851
2852 int alpha_val;
2853 int alpha_flag; /* input flag for BLAS_zgemv2_c_c_testgen */
2854 int beta_val;
2855 int beta_flag; /* input flag for BLAS_zgemv2_c_c_testgen */
2856 int order_val;
2857 enum blas_order_type order_type;
2858
2859 enum blas_prec_type prec;
2860 int trans_val;
2861 enum blas_trans_type trans_type;
2862 int m_i;
2863 int n_i;
2864 int max_mn; /* the max of m and n */
2865 int lda_val;
2866 int lda;
2867 int saved_seed; /* for saving the original seed */
2868 int count, old_count; /* use for counting the number of testgen calls * 2 */
2869
2870 FPU_FIX_DECL;
2871
2872 /* test for bad arguments */
2873 if (n < 0 || m < 0 || ntests < 0)
2874 BLAS_error(fname, 0, 0, NULL);
2875
2876 /* initialization */
2877 *num_bad_ratio = 0;
2878 *num_tests = 0;
2879 *min_ratio = 0.0;
2880
2881 saved_seed = *seed;
2882 ratio_min = 1e308;
2883 ratio_max = 0.0;
2884 ratio = 0.0;
2885 tot_tests = 0;
2886 p_count = 0;
2887 count = 0;
2888 find_max_ratio = 0;
2889 bad_ratios = 0;
2890 old_count = 0;
2891
2892 if (debug == 3)
2893 find_max_ratio = 1;
2894 max_mn = MAX(m, n);
2895 if (m == 0 || n == 0) {
2896 return 0.0;
2897 }
2898
2899 FPU_FIX_START;
2900
2901 incy_gen = 1;
2902 incy_gen *= 2;
2903
2904 /* get space for calculation */
2905 head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
2906 if (max_mn * 2 > 0 && head_x == NULL) {
2907 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2908 }
2909 tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
2910 if (max_mn * 2 > 0 && tail_x == NULL) {
2911 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2912 }
2913 y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
2914 if (max_mn * 2 > 0 && y == NULL) {
2915 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2916 }
2917 head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
2918 if (max_mn > 0 && head_x_gen == NULL) {
2919 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2920 }
2921 tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
2922 if (max_mn > 0 && tail_x_gen == NULL) {
2923 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2924 }
2925 y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
2926 if (max_mn > 0 && y_gen == NULL) {
2927 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2928 }
2929 temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
2930 if (max_mn > 0 && temp == NULL) {
2931 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2932 }
2933 head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
2934 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
2935 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
2936 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2937 }
2938 ratios = (double *) blas_malloc(max_mn * sizeof(double));
2939 if (max_mn > 0 && ratios == NULL) {
2940 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2941 }
2942 A =
2943 (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
2944 2);
2945 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
2946 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2947 }
2948
2949 /* The debug iteration:
2950 If debug=1, then will execute the iteration twice. First, compute the
2951 max ratio. Second, print info if ratio > (50% * ratio_max). */
2952 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
2953 bad_ratios = 0; /* set to zero */
2954
2955 if ((debug == 3) && (d_count == find_max_ratio))
2956 *seed = saved_seed; /* restore the original seed */
2957
2958 /* varying alpha */
2959 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
2960 alpha_flag = 0;
2961 switch (alpha_val) {
2962 case 0:
2963 alpha[0] = alpha[1] = 0.0;
2964 alpha_flag = 1;
2965 break;
2966 case 1:
2967 alpha[0] = 1.0;
2968 alpha[1] = 0.0;
2969 alpha_flag = 1;
2970 break;
2971 }
2972
2973 /* varying beta */
2974 for (beta_val = 0; beta_val < 3; beta_val++) {
2975 beta_flag = 0;
2976 switch (beta_val) {
2977 case 0:
2978 beta[0] = beta[1] = 0.0;
2979 beta_flag = 1;
2980 break;
2981 case 1:
2982 beta[0] = 1.0;
2983 beta[1] = 0.0;
2984 beta_flag = 1;
2985 break;
2986 }
2987
2988
2989 eps_int = power(2, -BITS_D);
2990 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
2991 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
2992 prec = blas_prec_double;
2993
2994 /* values near underflow, 1, or overflow */
2995 for (norm = -1; norm <= 1; norm++) {
2996
2997 /* number of tests */
2998 for (i = 0; i < ntests; i++) {
2999
3000 /* row or col major */
3001 for (order_val = 0; order_val < 2; order_val++) {
3002 switch (order_val) {
3003 case 0:
3004 order_type = blas_rowmajor;
3005 break;
3006 case 1:
3007 default:
3008 order_type = blas_colmajor;
3009 break;
3010 }
3011
3012 /* no_trans, trans, or conj_trans */
3013 for (trans_val = 0; trans_val < 3; trans_val++) {
3014 switch (trans_val) {
3015 case 0:
3016 trans_type = blas_no_trans;
3017 m_i = m;
3018 n_i = n;
3019 break;
3020 case 1:
3021 trans_type = blas_trans;
3022 m_i = n;
3023 n_i = m;
3024 break;
3025 case 2:
3026 default:
3027 trans_type = blas_conj_trans;
3028 m_i = n;
3029 n_i = m;
3030 break;
3031 }
3032
3033 /* lda=n, n+1, or 2n */
3034 for (lda_val = 0; lda_val < 3; lda_val++) {
3035 switch (lda_val) {
3036 case 0:
3037 lda = m_i;
3038 break;
3039 case 1:
3040 lda = m_i + 1;
3041 break;
3042 case 2:
3043 default:
3044 lda = 2 * m_i;
3045 break;
3046 }
3047 if ((order_type == blas_rowmajor && lda < n) ||
3048 (order_type == blas_colmajor && lda < m))
3049 continue;
3050
3051 /* For the sake of speed, we throw out this case at random */
3052 if (xrand(seed) >= test_prob)
3053 continue;
3054
3055 /* in the trivial cases, no need to run testgen */
3056 if (m > 0 && n > 0)
3057 BLAS_zgemv2_c_c_testgen(norm, order_type, trans_type, m,
3058 n, &alpha, alpha_flag, A, lda,
3059 head_x_gen, tail_x_gen, &beta,
3060 beta_flag, y_gen, seed,
3061 head_r_true, tail_r_true);
3062
3063 count++;
3064
3065 /* varying incx */
3066 for (incx_val = -2; incx_val <= 2; incx_val++) {
3067 if (incx_val == 0)
3068 continue;
3069
3070 /* setting incx */
3071 incx = incx_val;
3072 incx *= 2;
3073
3074 ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
3075 ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
3076
3077 /* varying incy */
3078 for (incy_val = -2; incy_val <= 2; incy_val++) {
3079 if (incy_val == 0)
3080 continue;
3081
3082 /* setting incy */
3083 incy = incy_val;
3084 incy *= 2;
3085
3086 zcopy_vector(y_gen, m_i, 1, y, incy_val);
3087
3088 /* call BLAS_zgemv2_c_c */
3089 FPU_FIX_STOP;
3090 BLAS_zgemv2_c_c(order_type, trans_type, m, n, alpha, A,
3091 lda, head_x, tail_x, incx_val, beta, y,
3092 incy_val);
3093 FPU_FIX_START;
3094
3095 /* set y starting index */
3096 iy = 0;
3097 if (incy < 0)
3098 iy = -(m_i - 1) * incy;
3099
3100 /* computing the ratio */
3101 if (m > 0 && n > 0)
3102 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
3103 /* copy row j of A to temp */
3104 cge_copy_row(order_type, trans_type, m_i, n_i, A,
3105 lda, temp, j);
3106
3107 test_BLAS_zdot2_c_c(n_i, blas_no_conj, alpha, beta,
3108 &y_gen[k], &y[iy],
3109 &head_r_true[k],
3110 &tail_r_true[k], temp, 1,
3111 head_x, tail_x, incx_val,
3112 eps_int, un_int, &ratios[j]);
3113
3114 /* take the max ratio */
3115 if (j == 0) {
3116 ratio = ratios[0];
3117 /* The !<= below causes NaN error to be detected.
3118 Note that (NaN > thresh) is always false. */
3119 } else if (!(ratios[j] <= ratio)) {
3120 ratio = ratios[j];
3121 }
3122 iy += incy;
3123 }
3124
3125 /* Increase the number of bad ratio, if the ratio
3126 is bigger than the threshold.
3127 The !<= below causes NaN error to be detected.
3128 Note that (NaN > thresh) is always false. */
3129 if (!(ratio <= thresh)) {
3130 bad_ratios++;
3131
3132 if ((debug == 3) && /* print only when debug is on */
3133 (count != old_count) && /* print if old vector is different
3134 from the current one */
3135 (d_count == find_max_ratio) &&
3136 (p_count <= max_print) &&
3137 (ratio > 0.5 * ratio_max)) {
3138 old_count = count;
3139
3140 printf
3141 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
3142 fname, m, n, ntests, thresh);
3143
3144 /* Print test info */
3145 switch (prec) {
3146 case blas_prec_single:
3147 printf("single ");
3148 break;
3149 case blas_prec_double:
3150 printf("double ");
3151 break;
3152 case blas_prec_indigenous:
3153 printf("indigenous ");
3154 break;
3155 case blas_prec_extra:
3156 printf("extra ");
3157 break;
3158 }
3159 switch (norm) {
3160 case -1:
3161 printf("near_underflow ");
3162 break;
3163 case 0:
3164 printf("near_one ");
3165 break;
3166 case 1:
3167 printf("near_overflow ");
3168 break;
3169 }
3170 switch (order_type) {
3171 case blas_rowmajor:
3172 printf("row_major ");
3173 break;
3174 case blas_colmajor:
3175 printf("col_major ");
3176 break;
3177 }
3178 switch (trans_type) {
3179 case blas_no_trans:
3180 printf("no_trans ");
3181 break;
3182 case blas_trans:
3183 printf("trans ");
3184 break;
3185 case blas_conj_trans:
3186 printf("conj_trans ");
3187 break;
3188 }
3189
3190 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
3191 incy);
3192
3193 cge_print_matrix(A, m_i, n_i, lda, order_type, "A");
3194
3195 cprint_vector(head_x, n_i, incx_val, "head_x");
3196 cprint_vector(tail_x, n_i, incx_val, "tail_x");
3197 zprint_vector(y_gen, m_i, 1, "y_gen");
3198 zprint_vector(y, m_i, incy_val, "y_final");
3199
3200 printf(" ");
3201 printf("alpha = ");
3202 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
3203 printf("\n ");
3204 printf("beta = ");
3205 printf("(%24.16e, %24.16e)", beta[0], beta[1]);
3206 printf("\n");
3207 for (j = 0, k = 0; j < m_i * incy_gen;
3208 j += incy_gen, k++) {
3209 printf(" ");
3210 printf("([%24.16e %24.16e], [%24.16e %24.16e])",
3211 head_r_true[j], tail_r_true[j],
3212 head_r_true[j + 1], tail_r_true[j + 1]);
3213 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
3214 }
3215
3216 printf(" ratio=%.4e\n", ratio);
3217 p_count++;
3218 }
3219 if (bad_ratios >= MAX_BAD_TESTS) {
3220 printf("\ntoo many failures, exiting....");
3221 printf("\nTesting and compilation");
3222 printf(" are incomplete\n\n");
3223 goto end;
3224 }
3225 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
3226 printf("\nFlagrant ratio error, exiting...");
3227 printf("\nTesting and compilation");
3228 printf(" are incomplete\n\n");
3229 goto end;
3230 }
3231 }
3232 if (d_count == 0) {
3233 if (ratio > ratio_max)
3234 ratio_max = ratio;
3235
3236 if (ratio != 0.0 && ratio < ratio_min)
3237 ratio_min = ratio;
3238
3239 tot_tests++;
3240 }
3241 } /* incy */
3242 } /* incx */
3243 } /* lda */
3244 } /* trans */
3245 } /* order */
3246 } /* tests */
3247 } /* norm */
3248
3249 } /* beta */
3250 } /* alpha */
3251 } /* debug */
3252
3253 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
3254 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
3255 fname, m, n, ntests, thresh);
3256 printf
3257 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
3258 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
3259 ratio_min, ratio_max);
3260 }
3261
3262 end:
3263 FPU_FIX_STOP;
3264
3265 blas_free(head_x);
3266 blas_free(tail_x);
3267 blas_free(y);
3268 blas_free(head_x_gen);
3269 blas_free(tail_x_gen);
3270 blas_free(y_gen);
3271 blas_free(temp);
3272 blas_free(A);
3273 blas_free(head_r_true);
3274 blas_free(tail_r_true);
3275 blas_free(ratios);
3276
3277 *min_ratio = ratio_min;
3278 *num_bad_ratio = bad_ratios;
3279 *num_tests = tot_tests;
3280 return ratio_max;
3281 }
do_test_cgemv2_c_s(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)3282 double do_test_cgemv2_c_s(int m, int n, int ntests, int *seed, double thresh,
3283 int debug, float test_prob, double *min_ratio,
3284 int *num_bad_ratio, int *num_tests)
3285
3286 /*
3287 * Purpose
3288 * =======
3289 *
3290 * Runs a series of tests on GEMV2.
3291 *
3292 * Arguments
3293 * =========
3294 *
3295 * m (input) int
3296 * The number of rows
3297 *
3298 * n (input) int
3299 * The number of columns
3300 *
3301 * ntests (input) int
3302 * The number of tests to run for each set of attributes.
3303 *
3304 * seed (input/output) int
3305 * The seed for the random number generator used in testgen().
3306 *
3307 * thresh (input) double
3308 * When the ratio returned from test() exceeds the specified
3309 * threshold, the current size, r_true, r_comp, and ratio will be
3310 * printed. (Since ratio is supposed to be O(1), we can set thresh
3311 * to ~10.)
3312 *
3313 * debug (input) int
3314 * If debug=3, print summary
3315 * If debug=2, print summary only if the number of bad ratios > 0
3316 * If debug=1, print complete info if tests fail
3317 * If debug=0, return max ratio
3318 *
3319 * test_prob (input) float
3320 * The specified test will be performed only if the generated
3321 * random exceeds this threshold.
3322 *
3323 * min_ratio (output) double
3324 * The minimum ratio
3325 *
3326 * num_bad_ratio (output) int
3327 * The number of tests fail; they are above the threshold.
3328 *
3329 * num_tests (output) int
3330 * The number of tests is being performed.
3331 *
3332 * Return value
3333 * ============
3334 *
3335 * The maximum ratio if run successfully, otherwise return -1
3336 *
3337 * Code structure
3338 * ==============
3339 *
3340 * debug loop -- if debug is one, the first loop computes the max ratio
3341 * -- and the last(second) loop outputs debugging information,
3342 * -- if the test fail and its ratio > 0.5 * max ratio.
3343 * -- if debug is zero, the loop is executed once
3344 * alpha loop -- varying alpha: 0, 1, or random
3345 * beta loop -- varying beta: 0, 1, or random
3346
3347 * norm loop -- varying norm: near undeflow, near one, or
3348 * -- near overflow
3349 * numtest loop -- how many times the test is perform with
3350 * -- above set of attributes
3351 * order loop -- varying order type: rowmajor or colmajor
3352 * trans loop -- varying uplo type: upper or lower
3353 * lda loop -- varying lda: m, m+1, 2m
3354 * incx loop -- varying incx: -2, -1, 1, 2
3355 * incy loop -- varying incy: -2, -1, 1, 2
3356 */
3357 {
3358 /* function name */
3359 const char fname[] = "BLAS_cgemv2_c_s";
3360
3361 /* max number of debug lines to print */
3362 const int max_print = 8;
3363
3364 /* Variables in the "x_val" form are loop vars for corresponding
3365 variables */
3366 int i; /* iterate through the repeating tests */
3367 int j, k; /* multipurpose counters or variables */
3368 int iy; /* use to index y */
3369 int incx_val, incy_val, /* for testing different inc values */
3370 incx, incy;
3371 int incy_gen; /* for complex case inc=2, for real case inc=1 */
3372 int d_count; /* counter for debug */
3373 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
3374 int p_count; /* counter for the number of debug lines printed */
3375 int tot_tests; /* total number of tests to be done */
3376 int norm; /* input values of near underflow/one/overflow */
3377 double ratio_max; /* the current maximum ratio */
3378 double ratio_min; /* the current minimum ratio */
3379 double *ratios; /* a temporary variable for calculating ratio */
3380 double ratio; /* the per-use test ratio from test() */
3381 int bad_ratios; /* the number of ratios over the threshold */
3382 double eps_int; /* the internal epsilon expected--2^(-24) for float */
3383 double un_int; /* the internal underflow threshold */
3384 float alpha[2];
3385 float beta[2];
3386 float *A;
3387 float *head_x;
3388 float *tail_x;
3389 float *y;
3390 float *temp; /* use for calculating ratio */
3391
3392 /* x_gen and y_gen are used to store vectors generated by testgen.
3393 they eventually are copied back to x and y */
3394 float *head_x_gen;
3395 float *tail_x_gen;
3396 float *y_gen;
3397
3398 /* the true r calculated by testgen(), in double-double */
3399 double *head_r_true, *tail_r_true;
3400
3401 int alpha_val;
3402 int alpha_flag; /* input flag for BLAS_cgemv2_c_s_testgen */
3403 int beta_val;
3404 int beta_flag; /* input flag for BLAS_cgemv2_c_s_testgen */
3405 int order_val;
3406 enum blas_order_type order_type;
3407
3408 enum blas_prec_type prec;
3409 int trans_val;
3410 enum blas_trans_type trans_type;
3411 int m_i;
3412 int n_i;
3413 int max_mn; /* the max of m and n */
3414 int lda_val;
3415 int lda;
3416 int saved_seed; /* for saving the original seed */
3417 int count, old_count; /* use for counting the number of testgen calls * 2 */
3418
3419 FPU_FIX_DECL;
3420
3421 /* test for bad arguments */
3422 if (n < 0 || m < 0 || ntests < 0)
3423 BLAS_error(fname, 0, 0, NULL);
3424
3425 /* initialization */
3426 *num_bad_ratio = 0;
3427 *num_tests = 0;
3428 *min_ratio = 0.0;
3429
3430 saved_seed = *seed;
3431 ratio_min = 1e308;
3432 ratio_max = 0.0;
3433 ratio = 0.0;
3434 tot_tests = 0;
3435 p_count = 0;
3436 count = 0;
3437 find_max_ratio = 0;
3438 bad_ratios = 0;
3439 old_count = 0;
3440
3441 if (debug == 3)
3442 find_max_ratio = 1;
3443 max_mn = MAX(m, n);
3444 if (m == 0 || n == 0) {
3445 return 0.0;
3446 }
3447
3448 FPU_FIX_START;
3449
3450 incy_gen = 1;
3451 incy_gen *= 2;
3452
3453 /* get space for calculation */
3454 head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
3455 if (max_mn * 2 > 0 && head_x == NULL) {
3456 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3457 }
3458 tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
3459 if (max_mn * 2 > 0 && tail_x == NULL) {
3460 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3461 }
3462 y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
3463 if (max_mn * 2 > 0 && y == NULL) {
3464 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3465 }
3466 head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
3467 if (max_mn > 0 && head_x_gen == NULL) {
3468 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3469 }
3470 tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
3471 if (max_mn > 0 && tail_x_gen == NULL) {
3472 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3473 }
3474 y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
3475 if (max_mn > 0 && y_gen == NULL) {
3476 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3477 }
3478 temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
3479 if (max_mn > 0 && temp == NULL) {
3480 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3481 }
3482 head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
3483 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
3484 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
3485 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3486 }
3487 ratios = (double *) blas_malloc(max_mn * sizeof(double));
3488 if (max_mn > 0 && ratios == NULL) {
3489 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3490 }
3491 A =
3492 (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
3493 2);
3494 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
3495 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3496 }
3497
3498 /* The debug iteration:
3499 If debug=1, then will execute the iteration twice. First, compute the
3500 max ratio. Second, print info if ratio > (50% * ratio_max). */
3501 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
3502 bad_ratios = 0; /* set to zero */
3503
3504 if ((debug == 3) && (d_count == find_max_ratio))
3505 *seed = saved_seed; /* restore the original seed */
3506
3507 /* varying alpha */
3508 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
3509 alpha_flag = 0;
3510 switch (alpha_val) {
3511 case 0:
3512 alpha[0] = alpha[1] = 0.0;
3513 alpha_flag = 1;
3514 break;
3515 case 1:
3516 alpha[0] = 1.0;
3517 alpha[1] = 0.0;
3518 alpha_flag = 1;
3519 break;
3520 }
3521
3522 /* varying beta */
3523 for (beta_val = 0; beta_val < 3; beta_val++) {
3524 beta_flag = 0;
3525 switch (beta_val) {
3526 case 0:
3527 beta[0] = beta[1] = 0.0;
3528 beta_flag = 1;
3529 break;
3530 case 1:
3531 beta[0] = 1.0;
3532 beta[1] = 0.0;
3533 beta_flag = 1;
3534 break;
3535 }
3536
3537
3538 eps_int = power(2, -BITS_S);
3539 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
3540 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
3541 prec = blas_prec_single;
3542
3543 /* values near underflow, 1, or overflow */
3544 for (norm = -1; norm <= 1; norm++) {
3545
3546 /* number of tests */
3547 for (i = 0; i < ntests; i++) {
3548
3549 /* row or col major */
3550 for (order_val = 0; order_val < 2; order_val++) {
3551 switch (order_val) {
3552 case 0:
3553 order_type = blas_rowmajor;
3554 break;
3555 case 1:
3556 default:
3557 order_type = blas_colmajor;
3558 break;
3559 }
3560
3561 /* no_trans, trans, or conj_trans */
3562 for (trans_val = 0; trans_val < 3; trans_val++) {
3563 switch (trans_val) {
3564 case 0:
3565 trans_type = blas_no_trans;
3566 m_i = m;
3567 n_i = n;
3568 break;
3569 case 1:
3570 trans_type = blas_trans;
3571 m_i = n;
3572 n_i = m;
3573 break;
3574 case 2:
3575 default:
3576 trans_type = blas_conj_trans;
3577 m_i = n;
3578 n_i = m;
3579 break;
3580 }
3581
3582 /* lda=n, n+1, or 2n */
3583 for (lda_val = 0; lda_val < 3; lda_val++) {
3584 switch (lda_val) {
3585 case 0:
3586 lda = m_i;
3587 break;
3588 case 1:
3589 lda = m_i + 1;
3590 break;
3591 case 2:
3592 default:
3593 lda = 2 * m_i;
3594 break;
3595 }
3596 if ((order_type == blas_rowmajor && lda < n) ||
3597 (order_type == blas_colmajor && lda < m))
3598 continue;
3599
3600 /* For the sake of speed, we throw out this case at random */
3601 if (xrand(seed) >= test_prob)
3602 continue;
3603
3604 /* in the trivial cases, no need to run testgen */
3605 if (m > 0 && n > 0)
3606 BLAS_cgemv2_c_s_testgen(norm, order_type, trans_type, m,
3607 n, &alpha, alpha_flag, A, lda,
3608 head_x_gen, tail_x_gen, &beta,
3609 beta_flag, y_gen, seed,
3610 head_r_true, tail_r_true);
3611
3612 count++;
3613
3614 /* varying incx */
3615 for (incx_val = -2; incx_val <= 2; incx_val++) {
3616 if (incx_val == 0)
3617 continue;
3618
3619 /* setting incx */
3620 incx = incx_val;
3621
3622
3623 scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
3624 scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
3625
3626 /* varying incy */
3627 for (incy_val = -2; incy_val <= 2; incy_val++) {
3628 if (incy_val == 0)
3629 continue;
3630
3631 /* setting incy */
3632 incy = incy_val;
3633 incy *= 2;
3634
3635 ccopy_vector(y_gen, m_i, 1, y, incy_val);
3636
3637 /* call BLAS_cgemv2_c_s */
3638 FPU_FIX_STOP;
3639 BLAS_cgemv2_c_s(order_type, trans_type, m, n, alpha, A,
3640 lda, head_x, tail_x, incx_val, beta, y,
3641 incy_val);
3642 FPU_FIX_START;
3643
3644 /* set y starting index */
3645 iy = 0;
3646 if (incy < 0)
3647 iy = -(m_i - 1) * incy;
3648
3649 /* computing the ratio */
3650 if (m > 0 && n > 0)
3651 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
3652 /* copy row j of A to temp */
3653 cge_copy_row(order_type, trans_type, m_i, n_i, A,
3654 lda, temp, j);
3655
3656 test_BLAS_cdot2_c_s(n_i, blas_no_conj, alpha, beta,
3657 &y_gen[k], &y[iy],
3658 &head_r_true[k],
3659 &tail_r_true[k], temp, 1,
3660 head_x, tail_x, incx_val,
3661 eps_int, un_int, &ratios[j]);
3662
3663 /* take the max ratio */
3664 if (j == 0) {
3665 ratio = ratios[0];
3666 /* The !<= below causes NaN error to be detected.
3667 Note that (NaN > thresh) is always false. */
3668 } else if (!(ratios[j] <= ratio)) {
3669 ratio = ratios[j];
3670 }
3671 iy += incy;
3672 }
3673
3674 /* Increase the number of bad ratio, if the ratio
3675 is bigger than the threshold.
3676 The !<= below causes NaN error to be detected.
3677 Note that (NaN > thresh) is always false. */
3678 if (!(ratio <= thresh)) {
3679 bad_ratios++;
3680
3681 if ((debug == 3) && /* print only when debug is on */
3682 (count != old_count) && /* print if old vector is different
3683 from the current one */
3684 (d_count == find_max_ratio) &&
3685 (p_count <= max_print) &&
3686 (ratio > 0.5 * ratio_max)) {
3687 old_count = count;
3688
3689 printf
3690 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
3691 fname, m, n, ntests, thresh);
3692
3693 /* Print test info */
3694 switch (prec) {
3695 case blas_prec_single:
3696 printf("single ");
3697 break;
3698 case blas_prec_double:
3699 printf("double ");
3700 break;
3701 case blas_prec_indigenous:
3702 printf("indigenous ");
3703 break;
3704 case blas_prec_extra:
3705 printf("extra ");
3706 break;
3707 }
3708 switch (norm) {
3709 case -1:
3710 printf("near_underflow ");
3711 break;
3712 case 0:
3713 printf("near_one ");
3714 break;
3715 case 1:
3716 printf("near_overflow ");
3717 break;
3718 }
3719 switch (order_type) {
3720 case blas_rowmajor:
3721 printf("row_major ");
3722 break;
3723 case blas_colmajor:
3724 printf("col_major ");
3725 break;
3726 }
3727 switch (trans_type) {
3728 case blas_no_trans:
3729 printf("no_trans ");
3730 break;
3731 case blas_trans:
3732 printf("trans ");
3733 break;
3734 case blas_conj_trans:
3735 printf("conj_trans ");
3736 break;
3737 }
3738
3739 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
3740 incy);
3741
3742 cge_print_matrix(A, m_i, n_i, lda, order_type, "A");
3743
3744 sprint_vector(head_x, n_i, incx_val, "head_x");
3745 sprint_vector(tail_x, n_i, incx_val, "tail_x");
3746 cprint_vector(y_gen, m_i, 1, "y_gen");
3747 cprint_vector(y, m_i, incy_val, "y_final");
3748
3749 printf(" ");
3750 printf("alpha = ");
3751 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
3752 printf("\n ");
3753 printf("beta = ");
3754 printf("(%16.8e, %16.8e)", beta[0], beta[1]);
3755 printf("\n");
3756 for (j = 0, k = 0; j < m_i * incy_gen;
3757 j += incy_gen, k++) {
3758 printf(" ");
3759 printf("([%24.16e %24.16e], [%24.16e %24.16e])",
3760 head_r_true[j], tail_r_true[j],
3761 head_r_true[j + 1], tail_r_true[j + 1]);
3762 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
3763 }
3764
3765 printf(" ratio=%.4e\n", ratio);
3766 p_count++;
3767 }
3768 if (bad_ratios >= MAX_BAD_TESTS) {
3769 printf("\ntoo many failures, exiting....");
3770 printf("\nTesting and compilation");
3771 printf(" are incomplete\n\n");
3772 goto end;
3773 }
3774 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
3775 printf("\nFlagrant ratio error, exiting...");
3776 printf("\nTesting and compilation");
3777 printf(" are incomplete\n\n");
3778 goto end;
3779 }
3780 }
3781 if (d_count == 0) {
3782 if (ratio > ratio_max)
3783 ratio_max = ratio;
3784
3785 if (ratio != 0.0 && ratio < ratio_min)
3786 ratio_min = ratio;
3787
3788 tot_tests++;
3789 }
3790 } /* incy */
3791 } /* incx */
3792 } /* lda */
3793 } /* trans */
3794 } /* order */
3795 } /* tests */
3796 } /* norm */
3797
3798 } /* beta */
3799 } /* alpha */
3800 } /* debug */
3801
3802 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
3803 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
3804 fname, m, n, ntests, thresh);
3805 printf
3806 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
3807 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
3808 ratio_min, ratio_max);
3809 }
3810
3811 end:
3812 FPU_FIX_STOP;
3813
3814 blas_free(head_x);
3815 blas_free(tail_x);
3816 blas_free(y);
3817 blas_free(head_x_gen);
3818 blas_free(tail_x_gen);
3819 blas_free(y_gen);
3820 blas_free(temp);
3821 blas_free(A);
3822 blas_free(head_r_true);
3823 blas_free(tail_r_true);
3824 blas_free(ratios);
3825
3826 *min_ratio = ratio_min;
3827 *num_bad_ratio = bad_ratios;
3828 *num_tests = tot_tests;
3829 return ratio_max;
3830 }
do_test_cgemv2_s_c(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)3831 double do_test_cgemv2_s_c(int m, int n, int ntests, int *seed, double thresh,
3832 int debug, float test_prob, double *min_ratio,
3833 int *num_bad_ratio, int *num_tests)
3834
3835 /*
3836 * Purpose
3837 * =======
3838 *
3839 * Runs a series of tests on GEMV2.
3840 *
3841 * Arguments
3842 * =========
3843 *
3844 * m (input) int
3845 * The number of rows
3846 *
3847 * n (input) int
3848 * The number of columns
3849 *
3850 * ntests (input) int
3851 * The number of tests to run for each set of attributes.
3852 *
3853 * seed (input/output) int
3854 * The seed for the random number generator used in testgen().
3855 *
3856 * thresh (input) double
3857 * When the ratio returned from test() exceeds the specified
3858 * threshold, the current size, r_true, r_comp, and ratio will be
3859 * printed. (Since ratio is supposed to be O(1), we can set thresh
3860 * to ~10.)
3861 *
3862 * debug (input) int
3863 * If debug=3, print summary
3864 * If debug=2, print summary only if the number of bad ratios > 0
3865 * If debug=1, print complete info if tests fail
3866 * If debug=0, return max ratio
3867 *
3868 * test_prob (input) float
3869 * The specified test will be performed only if the generated
3870 * random exceeds this threshold.
3871 *
3872 * min_ratio (output) double
3873 * The minimum ratio
3874 *
3875 * num_bad_ratio (output) int
3876 * The number of tests fail; they are above the threshold.
3877 *
3878 * num_tests (output) int
3879 * The number of tests is being performed.
3880 *
3881 * Return value
3882 * ============
3883 *
3884 * The maximum ratio if run successfully, otherwise return -1
3885 *
3886 * Code structure
3887 * ==============
3888 *
3889 * debug loop -- if debug is one, the first loop computes the max ratio
3890 * -- and the last(second) loop outputs debugging information,
3891 * -- if the test fail and its ratio > 0.5 * max ratio.
3892 * -- if debug is zero, the loop is executed once
3893 * alpha loop -- varying alpha: 0, 1, or random
3894 * beta loop -- varying beta: 0, 1, or random
3895
3896 * norm loop -- varying norm: near undeflow, near one, or
3897 * -- near overflow
3898 * numtest loop -- how many times the test is perform with
3899 * -- above set of attributes
3900 * order loop -- varying order type: rowmajor or colmajor
3901 * trans loop -- varying uplo type: upper or lower
3902 * lda loop -- varying lda: m, m+1, 2m
3903 * incx loop -- varying incx: -2, -1, 1, 2
3904 * incy loop -- varying incy: -2, -1, 1, 2
3905 */
3906 {
3907 /* function name */
3908 const char fname[] = "BLAS_cgemv2_s_c";
3909
3910 /* max number of debug lines to print */
3911 const int max_print = 8;
3912
3913 /* Variables in the "x_val" form are loop vars for corresponding
3914 variables */
3915 int i; /* iterate through the repeating tests */
3916 int j, k; /* multipurpose counters or variables */
3917 int iy; /* use to index y */
3918 int incx_val, incy_val, /* for testing different inc values */
3919 incx, incy;
3920 int incy_gen; /* for complex case inc=2, for real case inc=1 */
3921 int d_count; /* counter for debug */
3922 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
3923 int p_count; /* counter for the number of debug lines printed */
3924 int tot_tests; /* total number of tests to be done */
3925 int norm; /* input values of near underflow/one/overflow */
3926 double ratio_max; /* the current maximum ratio */
3927 double ratio_min; /* the current minimum ratio */
3928 double *ratios; /* a temporary variable for calculating ratio */
3929 double ratio; /* the per-use test ratio from test() */
3930 int bad_ratios; /* the number of ratios over the threshold */
3931 double eps_int; /* the internal epsilon expected--2^(-24) for float */
3932 double un_int; /* the internal underflow threshold */
3933 float alpha[2];
3934 float beta[2];
3935 float *A;
3936 float *head_x;
3937 float *tail_x;
3938 float *y;
3939 float *temp; /* use for calculating ratio */
3940
3941 /* x_gen and y_gen are used to store vectors generated by testgen.
3942 they eventually are copied back to x and y */
3943 float *head_x_gen;
3944 float *tail_x_gen;
3945 float *y_gen;
3946
3947 /* the true r calculated by testgen(), in double-double */
3948 double *head_r_true, *tail_r_true;
3949
3950 int alpha_val;
3951 int alpha_flag; /* input flag for BLAS_cgemv2_s_c_testgen */
3952 int beta_val;
3953 int beta_flag; /* input flag for BLAS_cgemv2_s_c_testgen */
3954 int order_val;
3955 enum blas_order_type order_type;
3956
3957 enum blas_prec_type prec;
3958 int trans_val;
3959 enum blas_trans_type trans_type;
3960 int m_i;
3961 int n_i;
3962 int max_mn; /* the max of m and n */
3963 int lda_val;
3964 int lda;
3965 int saved_seed; /* for saving the original seed */
3966 int count, old_count; /* use for counting the number of testgen calls * 2 */
3967
3968 FPU_FIX_DECL;
3969
3970 /* test for bad arguments */
3971 if (n < 0 || m < 0 || ntests < 0)
3972 BLAS_error(fname, 0, 0, NULL);
3973
3974 /* initialization */
3975 *num_bad_ratio = 0;
3976 *num_tests = 0;
3977 *min_ratio = 0.0;
3978
3979 saved_seed = *seed;
3980 ratio_min = 1e308;
3981 ratio_max = 0.0;
3982 ratio = 0.0;
3983 tot_tests = 0;
3984 p_count = 0;
3985 count = 0;
3986 find_max_ratio = 0;
3987 bad_ratios = 0;
3988 old_count = 0;
3989
3990 if (debug == 3)
3991 find_max_ratio = 1;
3992 max_mn = MAX(m, n);
3993 if (m == 0 || n == 0) {
3994 return 0.0;
3995 }
3996
3997 FPU_FIX_START;
3998
3999 incy_gen = 1;
4000 incy_gen *= 2;
4001
4002 /* get space for calculation */
4003 head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
4004 if (max_mn * 2 > 0 && head_x == NULL) {
4005 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4006 }
4007 tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
4008 if (max_mn * 2 > 0 && tail_x == NULL) {
4009 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4010 }
4011 y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
4012 if (max_mn * 2 > 0 && y == NULL) {
4013 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4014 }
4015 head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
4016 if (max_mn > 0 && head_x_gen == NULL) {
4017 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4018 }
4019 tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
4020 if (max_mn > 0 && tail_x_gen == NULL) {
4021 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4022 }
4023 y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
4024 if (max_mn > 0 && y_gen == NULL) {
4025 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4026 }
4027 temp = (float *) blas_malloc(max_mn * sizeof(float));
4028 if (max_mn > 0 && temp == NULL) {
4029 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4030 }
4031 head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
4032 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
4033 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
4034 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4035 }
4036 ratios = (double *) blas_malloc(max_mn * sizeof(double));
4037 if (max_mn > 0 && ratios == NULL) {
4038 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4039 }
4040 A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
4041 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
4042 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4043 }
4044
4045 /* The debug iteration:
4046 If debug=1, then will execute the iteration twice. First, compute the
4047 max ratio. Second, print info if ratio > (50% * ratio_max). */
4048 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
4049 bad_ratios = 0; /* set to zero */
4050
4051 if ((debug == 3) && (d_count == find_max_ratio))
4052 *seed = saved_seed; /* restore the original seed */
4053
4054 /* varying alpha */
4055 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
4056 alpha_flag = 0;
4057 switch (alpha_val) {
4058 case 0:
4059 alpha[0] = alpha[1] = 0.0;
4060 alpha_flag = 1;
4061 break;
4062 case 1:
4063 alpha[0] = 1.0;
4064 alpha[1] = 0.0;
4065 alpha_flag = 1;
4066 break;
4067 }
4068
4069 /* varying beta */
4070 for (beta_val = 0; beta_val < 3; beta_val++) {
4071 beta_flag = 0;
4072 switch (beta_val) {
4073 case 0:
4074 beta[0] = beta[1] = 0.0;
4075 beta_flag = 1;
4076 break;
4077 case 1:
4078 beta[0] = 1.0;
4079 beta[1] = 0.0;
4080 beta_flag = 1;
4081 break;
4082 }
4083
4084
4085 eps_int = power(2, -BITS_S);
4086 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
4087 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
4088 prec = blas_prec_single;
4089
4090 /* values near underflow, 1, or overflow */
4091 for (norm = -1; norm <= 1; norm++) {
4092
4093 /* number of tests */
4094 for (i = 0; i < ntests; i++) {
4095
4096 /* row or col major */
4097 for (order_val = 0; order_val < 2; order_val++) {
4098 switch (order_val) {
4099 case 0:
4100 order_type = blas_rowmajor;
4101 break;
4102 case 1:
4103 default:
4104 order_type = blas_colmajor;
4105 break;
4106 }
4107
4108 /* no_trans, trans, or conj_trans */
4109 for (trans_val = 0; trans_val < 3; trans_val++) {
4110 switch (trans_val) {
4111 case 0:
4112 trans_type = blas_no_trans;
4113 m_i = m;
4114 n_i = n;
4115 break;
4116 case 1:
4117 trans_type = blas_trans;
4118 m_i = n;
4119 n_i = m;
4120 break;
4121 case 2:
4122 default:
4123 trans_type = blas_conj_trans;
4124 m_i = n;
4125 n_i = m;
4126 break;
4127 }
4128
4129 /* lda=n, n+1, or 2n */
4130 for (lda_val = 0; lda_val < 3; lda_val++) {
4131 switch (lda_val) {
4132 case 0:
4133 lda = m_i;
4134 break;
4135 case 1:
4136 lda = m_i + 1;
4137 break;
4138 case 2:
4139 default:
4140 lda = 2 * m_i;
4141 break;
4142 }
4143 if ((order_type == blas_rowmajor && lda < n) ||
4144 (order_type == blas_colmajor && lda < m))
4145 continue;
4146
4147 /* For the sake of speed, we throw out this case at random */
4148 if (xrand(seed) >= test_prob)
4149 continue;
4150
4151 /* in the trivial cases, no need to run testgen */
4152 if (m > 0 && n > 0)
4153 BLAS_cgemv2_s_c_testgen(norm, order_type, trans_type, m,
4154 n, &alpha, alpha_flag, A, lda,
4155 head_x_gen, tail_x_gen, &beta,
4156 beta_flag, y_gen, seed,
4157 head_r_true, tail_r_true);
4158
4159 count++;
4160
4161 /* varying incx */
4162 for (incx_val = -2; incx_val <= 2; incx_val++) {
4163 if (incx_val == 0)
4164 continue;
4165
4166 /* setting incx */
4167 incx = incx_val;
4168 incx *= 2;
4169
4170 ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
4171 ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
4172
4173 /* varying incy */
4174 for (incy_val = -2; incy_val <= 2; incy_val++) {
4175 if (incy_val == 0)
4176 continue;
4177
4178 /* setting incy */
4179 incy = incy_val;
4180 incy *= 2;
4181
4182 ccopy_vector(y_gen, m_i, 1, y, incy_val);
4183
4184 /* call BLAS_cgemv2_s_c */
4185 FPU_FIX_STOP;
4186 BLAS_cgemv2_s_c(order_type, trans_type, m, n, alpha, A,
4187 lda, head_x, tail_x, incx_val, beta, y,
4188 incy_val);
4189 FPU_FIX_START;
4190
4191 /* set y starting index */
4192 iy = 0;
4193 if (incy < 0)
4194 iy = -(m_i - 1) * incy;
4195
4196 /* computing the ratio */
4197 if (m > 0 && n > 0)
4198 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
4199 /* copy row j of A to temp */
4200 sge_copy_row(order_type, trans_type, m_i, n_i, A,
4201 lda, temp, j);
4202
4203 test_BLAS_cdot2_s_c(n_i, blas_no_conj, alpha, beta,
4204 &y_gen[k], &y[iy],
4205 &head_r_true[k],
4206 &tail_r_true[k], temp, 1,
4207 head_x, tail_x, incx_val,
4208 eps_int, un_int, &ratios[j]);
4209
4210 /* take the max ratio */
4211 if (j == 0) {
4212 ratio = ratios[0];
4213 /* The !<= below causes NaN error to be detected.
4214 Note that (NaN > thresh) is always false. */
4215 } else if (!(ratios[j] <= ratio)) {
4216 ratio = ratios[j];
4217 }
4218 iy += incy;
4219 }
4220
4221 /* Increase the number of bad ratio, if the ratio
4222 is bigger than the threshold.
4223 The !<= below causes NaN error to be detected.
4224 Note that (NaN > thresh) is always false. */
4225 if (!(ratio <= thresh)) {
4226 bad_ratios++;
4227
4228 if ((debug == 3) && /* print only when debug is on */
4229 (count != old_count) && /* print if old vector is different
4230 from the current one */
4231 (d_count == find_max_ratio) &&
4232 (p_count <= max_print) &&
4233 (ratio > 0.5 * ratio_max)) {
4234 old_count = count;
4235
4236 printf
4237 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
4238 fname, m, n, ntests, thresh);
4239
4240 /* Print test info */
4241 switch (prec) {
4242 case blas_prec_single:
4243 printf("single ");
4244 break;
4245 case blas_prec_double:
4246 printf("double ");
4247 break;
4248 case blas_prec_indigenous:
4249 printf("indigenous ");
4250 break;
4251 case blas_prec_extra:
4252 printf("extra ");
4253 break;
4254 }
4255 switch (norm) {
4256 case -1:
4257 printf("near_underflow ");
4258 break;
4259 case 0:
4260 printf("near_one ");
4261 break;
4262 case 1:
4263 printf("near_overflow ");
4264 break;
4265 }
4266 switch (order_type) {
4267 case blas_rowmajor:
4268 printf("row_major ");
4269 break;
4270 case blas_colmajor:
4271 printf("col_major ");
4272 break;
4273 }
4274 switch (trans_type) {
4275 case blas_no_trans:
4276 printf("no_trans ");
4277 break;
4278 case blas_trans:
4279 printf("trans ");
4280 break;
4281 case blas_conj_trans:
4282 printf("conj_trans ");
4283 break;
4284 }
4285
4286 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
4287 incy);
4288
4289 sge_print_matrix(A, m_i, n_i, lda, order_type, "A");
4290
4291 cprint_vector(head_x, n_i, incx_val, "head_x");
4292 cprint_vector(tail_x, n_i, incx_val, "tail_x");
4293 cprint_vector(y_gen, m_i, 1, "y_gen");
4294 cprint_vector(y, m_i, incy_val, "y_final");
4295
4296 printf(" ");
4297 printf("alpha = ");
4298 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
4299 printf("\n ");
4300 printf("beta = ");
4301 printf("(%16.8e, %16.8e)", beta[0], beta[1]);
4302 printf("\n");
4303 for (j = 0, k = 0; j < m_i * incy_gen;
4304 j += incy_gen, k++) {
4305 printf(" ");
4306 printf("([%24.16e %24.16e], [%24.16e %24.16e])",
4307 head_r_true[j], tail_r_true[j],
4308 head_r_true[j + 1], tail_r_true[j + 1]);
4309 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
4310 }
4311
4312 printf(" ratio=%.4e\n", ratio);
4313 p_count++;
4314 }
4315 if (bad_ratios >= MAX_BAD_TESTS) {
4316 printf("\ntoo many failures, exiting....");
4317 printf("\nTesting and compilation");
4318 printf(" are incomplete\n\n");
4319 goto end;
4320 }
4321 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
4322 printf("\nFlagrant ratio error, exiting...");
4323 printf("\nTesting and compilation");
4324 printf(" are incomplete\n\n");
4325 goto end;
4326 }
4327 }
4328 if (d_count == 0) {
4329 if (ratio > ratio_max)
4330 ratio_max = ratio;
4331
4332 if (ratio != 0.0 && ratio < ratio_min)
4333 ratio_min = ratio;
4334
4335 tot_tests++;
4336 }
4337 } /* incy */
4338 } /* incx */
4339 } /* lda */
4340 } /* trans */
4341 } /* order */
4342 } /* tests */
4343 } /* norm */
4344
4345 } /* beta */
4346 } /* alpha */
4347 } /* debug */
4348
4349 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
4350 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
4351 fname, m, n, ntests, thresh);
4352 printf
4353 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
4354 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
4355 ratio_min, ratio_max);
4356 }
4357
4358 end:
4359 FPU_FIX_STOP;
4360
4361 blas_free(head_x);
4362 blas_free(tail_x);
4363 blas_free(y);
4364 blas_free(head_x_gen);
4365 blas_free(tail_x_gen);
4366 blas_free(y_gen);
4367 blas_free(temp);
4368 blas_free(A);
4369 blas_free(head_r_true);
4370 blas_free(tail_r_true);
4371 blas_free(ratios);
4372
4373 *min_ratio = ratio_min;
4374 *num_bad_ratio = bad_ratios;
4375 *num_tests = tot_tests;
4376 return ratio_max;
4377 }
do_test_cgemv2_s_s(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)4378 double do_test_cgemv2_s_s(int m, int n, int ntests, int *seed, double thresh,
4379 int debug, float test_prob, double *min_ratio,
4380 int *num_bad_ratio, int *num_tests)
4381
4382 /*
4383 * Purpose
4384 * =======
4385 *
4386 * Runs a series of tests on GEMV2.
4387 *
4388 * Arguments
4389 * =========
4390 *
4391 * m (input) int
4392 * The number of rows
4393 *
4394 * n (input) int
4395 * The number of columns
4396 *
4397 * ntests (input) int
4398 * The number of tests to run for each set of attributes.
4399 *
4400 * seed (input/output) int
4401 * The seed for the random number generator used in testgen().
4402 *
4403 * thresh (input) double
4404 * When the ratio returned from test() exceeds the specified
4405 * threshold, the current size, r_true, r_comp, and ratio will be
4406 * printed. (Since ratio is supposed to be O(1), we can set thresh
4407 * to ~10.)
4408 *
4409 * debug (input) int
4410 * If debug=3, print summary
4411 * If debug=2, print summary only if the number of bad ratios > 0
4412 * If debug=1, print complete info if tests fail
4413 * If debug=0, return max ratio
4414 *
4415 * test_prob (input) float
4416 * The specified test will be performed only if the generated
4417 * random exceeds this threshold.
4418 *
4419 * min_ratio (output) double
4420 * The minimum ratio
4421 *
4422 * num_bad_ratio (output) int
4423 * The number of tests fail; they are above the threshold.
4424 *
4425 * num_tests (output) int
4426 * The number of tests is being performed.
4427 *
4428 * Return value
4429 * ============
4430 *
4431 * The maximum ratio if run successfully, otherwise return -1
4432 *
4433 * Code structure
4434 * ==============
4435 *
4436 * debug loop -- if debug is one, the first loop computes the max ratio
4437 * -- and the last(second) loop outputs debugging information,
4438 * -- if the test fail and its ratio > 0.5 * max ratio.
4439 * -- if debug is zero, the loop is executed once
4440 * alpha loop -- varying alpha: 0, 1, or random
4441 * beta loop -- varying beta: 0, 1, or random
4442
4443 * norm loop -- varying norm: near undeflow, near one, or
4444 * -- near overflow
4445 * numtest loop -- how many times the test is perform with
4446 * -- above set of attributes
4447 * order loop -- varying order type: rowmajor or colmajor
4448 * trans loop -- varying uplo type: upper or lower
4449 * lda loop -- varying lda: m, m+1, 2m
4450 * incx loop -- varying incx: -2, -1, 1, 2
4451 * incy loop -- varying incy: -2, -1, 1, 2
4452 */
4453 {
4454 /* function name */
4455 const char fname[] = "BLAS_cgemv2_s_s";
4456
4457 /* max number of debug lines to print */
4458 const int max_print = 8;
4459
4460 /* Variables in the "x_val" form are loop vars for corresponding
4461 variables */
4462 int i; /* iterate through the repeating tests */
4463 int j, k; /* multipurpose counters or variables */
4464 int iy; /* use to index y */
4465 int incx_val, incy_val, /* for testing different inc values */
4466 incx, incy;
4467 int incy_gen; /* for complex case inc=2, for real case inc=1 */
4468 int d_count; /* counter for debug */
4469 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
4470 int p_count; /* counter for the number of debug lines printed */
4471 int tot_tests; /* total number of tests to be done */
4472 int norm; /* input values of near underflow/one/overflow */
4473 double ratio_max; /* the current maximum ratio */
4474 double ratio_min; /* the current minimum ratio */
4475 double *ratios; /* a temporary variable for calculating ratio */
4476 double ratio; /* the per-use test ratio from test() */
4477 int bad_ratios; /* the number of ratios over the threshold */
4478 double eps_int; /* the internal epsilon expected--2^(-24) for float */
4479 double un_int; /* the internal underflow threshold */
4480 float alpha[2];
4481 float beta[2];
4482 float *A;
4483 float *head_x;
4484 float *tail_x;
4485 float *y;
4486 float *temp; /* use for calculating ratio */
4487
4488 /* x_gen and y_gen are used to store vectors generated by testgen.
4489 they eventually are copied back to x and y */
4490 float *head_x_gen;
4491 float *tail_x_gen;
4492 float *y_gen;
4493
4494 /* the true r calculated by testgen(), in double-double */
4495 double *head_r_true, *tail_r_true;
4496
4497 int alpha_val;
4498 int alpha_flag; /* input flag for BLAS_cgemv2_s_s_testgen */
4499 int beta_val;
4500 int beta_flag; /* input flag for BLAS_cgemv2_s_s_testgen */
4501 int order_val;
4502 enum blas_order_type order_type;
4503
4504 enum blas_prec_type prec;
4505 int trans_val;
4506 enum blas_trans_type trans_type;
4507 int m_i;
4508 int n_i;
4509 int max_mn; /* the max of m and n */
4510 int lda_val;
4511 int lda;
4512 int saved_seed; /* for saving the original seed */
4513 int count, old_count; /* use for counting the number of testgen calls * 2 */
4514
4515 FPU_FIX_DECL;
4516
4517 /* test for bad arguments */
4518 if (n < 0 || m < 0 || ntests < 0)
4519 BLAS_error(fname, 0, 0, NULL);
4520
4521 /* initialization */
4522 *num_bad_ratio = 0;
4523 *num_tests = 0;
4524 *min_ratio = 0.0;
4525
4526 saved_seed = *seed;
4527 ratio_min = 1e308;
4528 ratio_max = 0.0;
4529 ratio = 0.0;
4530 tot_tests = 0;
4531 p_count = 0;
4532 count = 0;
4533 find_max_ratio = 0;
4534 bad_ratios = 0;
4535 old_count = 0;
4536
4537 if (debug == 3)
4538 find_max_ratio = 1;
4539 max_mn = MAX(m, n);
4540 if (m == 0 || n == 0) {
4541 return 0.0;
4542 }
4543
4544 FPU_FIX_START;
4545
4546 incy_gen = 1;
4547 incy_gen *= 2;
4548
4549 /* get space for calculation */
4550 head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
4551 if (max_mn * 2 > 0 && head_x == NULL) {
4552 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4553 }
4554 tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
4555 if (max_mn * 2 > 0 && tail_x == NULL) {
4556 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4557 }
4558 y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
4559 if (max_mn * 2 > 0 && y == NULL) {
4560 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4561 }
4562 head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
4563 if (max_mn > 0 && head_x_gen == NULL) {
4564 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4565 }
4566 tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
4567 if (max_mn > 0 && tail_x_gen == NULL) {
4568 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4569 }
4570 y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
4571 if (max_mn > 0 && y_gen == NULL) {
4572 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4573 }
4574 temp = (float *) blas_malloc(max_mn * sizeof(float));
4575 if (max_mn > 0 && temp == NULL) {
4576 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4577 }
4578 head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
4579 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
4580 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
4581 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4582 }
4583 ratios = (double *) blas_malloc(max_mn * sizeof(double));
4584 if (max_mn > 0 && ratios == NULL) {
4585 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4586 }
4587 A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
4588 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
4589 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4590 }
4591
4592 /* The debug iteration:
4593 If debug=1, then will execute the iteration twice. First, compute the
4594 max ratio. Second, print info if ratio > (50% * ratio_max). */
4595 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
4596 bad_ratios = 0; /* set to zero */
4597
4598 if ((debug == 3) && (d_count == find_max_ratio))
4599 *seed = saved_seed; /* restore the original seed */
4600
4601 /* varying alpha */
4602 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
4603 alpha_flag = 0;
4604 switch (alpha_val) {
4605 case 0:
4606 alpha[0] = alpha[1] = 0.0;
4607 alpha_flag = 1;
4608 break;
4609 case 1:
4610 alpha[0] = 1.0;
4611 alpha[1] = 0.0;
4612 alpha_flag = 1;
4613 break;
4614 }
4615
4616 /* varying beta */
4617 for (beta_val = 0; beta_val < 3; beta_val++) {
4618 beta_flag = 0;
4619 switch (beta_val) {
4620 case 0:
4621 beta[0] = beta[1] = 0.0;
4622 beta_flag = 1;
4623 break;
4624 case 1:
4625 beta[0] = 1.0;
4626 beta[1] = 0.0;
4627 beta_flag = 1;
4628 break;
4629 }
4630
4631
4632 eps_int = power(2, -BITS_S);
4633 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
4634 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
4635 prec = blas_prec_single;
4636
4637 /* values near underflow, 1, or overflow */
4638 for (norm = -1; norm <= 1; norm++) {
4639
4640 /* number of tests */
4641 for (i = 0; i < ntests; i++) {
4642
4643 /* row or col major */
4644 for (order_val = 0; order_val < 2; order_val++) {
4645 switch (order_val) {
4646 case 0:
4647 order_type = blas_rowmajor;
4648 break;
4649 case 1:
4650 default:
4651 order_type = blas_colmajor;
4652 break;
4653 }
4654
4655 /* no_trans, trans, or conj_trans */
4656 for (trans_val = 0; trans_val < 3; trans_val++) {
4657 switch (trans_val) {
4658 case 0:
4659 trans_type = blas_no_trans;
4660 m_i = m;
4661 n_i = n;
4662 break;
4663 case 1:
4664 trans_type = blas_trans;
4665 m_i = n;
4666 n_i = m;
4667 break;
4668 case 2:
4669 default:
4670 trans_type = blas_conj_trans;
4671 m_i = n;
4672 n_i = m;
4673 break;
4674 }
4675
4676 /* lda=n, n+1, or 2n */
4677 for (lda_val = 0; lda_val < 3; lda_val++) {
4678 switch (lda_val) {
4679 case 0:
4680 lda = m_i;
4681 break;
4682 case 1:
4683 lda = m_i + 1;
4684 break;
4685 case 2:
4686 default:
4687 lda = 2 * m_i;
4688 break;
4689 }
4690 if ((order_type == blas_rowmajor && lda < n) ||
4691 (order_type == blas_colmajor && lda < m))
4692 continue;
4693
4694 /* For the sake of speed, we throw out this case at random */
4695 if (xrand(seed) >= test_prob)
4696 continue;
4697
4698 /* in the trivial cases, no need to run testgen */
4699 if (m > 0 && n > 0)
4700 BLAS_cgemv2_s_s_testgen(norm, order_type, trans_type, m,
4701 n, &alpha, alpha_flag, A, lda,
4702 head_x_gen, tail_x_gen, &beta,
4703 beta_flag, y_gen, seed,
4704 head_r_true, tail_r_true);
4705
4706 count++;
4707
4708 /* varying incx */
4709 for (incx_val = -2; incx_val <= 2; incx_val++) {
4710 if (incx_val == 0)
4711 continue;
4712
4713 /* setting incx */
4714 incx = incx_val;
4715
4716
4717 scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
4718 scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
4719
4720 /* varying incy */
4721 for (incy_val = -2; incy_val <= 2; incy_val++) {
4722 if (incy_val == 0)
4723 continue;
4724
4725 /* setting incy */
4726 incy = incy_val;
4727 incy *= 2;
4728
4729 ccopy_vector(y_gen, m_i, 1, y, incy_val);
4730
4731 /* call BLAS_cgemv2_s_s */
4732 FPU_FIX_STOP;
4733 BLAS_cgemv2_s_s(order_type, trans_type, m, n, alpha, A,
4734 lda, head_x, tail_x, incx_val, beta, y,
4735 incy_val);
4736 FPU_FIX_START;
4737
4738 /* set y starting index */
4739 iy = 0;
4740 if (incy < 0)
4741 iy = -(m_i - 1) * incy;
4742
4743 /* computing the ratio */
4744 if (m > 0 && n > 0)
4745 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
4746 /* copy row j of A to temp */
4747 sge_copy_row(order_type, trans_type, m_i, n_i, A,
4748 lda, temp, j);
4749
4750 test_BLAS_cdot2_s_s(n_i, blas_no_conj, alpha, beta,
4751 &y_gen[k], &y[iy],
4752 &head_r_true[k],
4753 &tail_r_true[k], temp, 1,
4754 head_x, tail_x, incx_val,
4755 eps_int, un_int, &ratios[j]);
4756
4757 /* take the max ratio */
4758 if (j == 0) {
4759 ratio = ratios[0];
4760 /* The !<= below causes NaN error to be detected.
4761 Note that (NaN > thresh) is always false. */
4762 } else if (!(ratios[j] <= ratio)) {
4763 ratio = ratios[j];
4764 }
4765 iy += incy;
4766 }
4767
4768 /* Increase the number of bad ratio, if the ratio
4769 is bigger than the threshold.
4770 The !<= below causes NaN error to be detected.
4771 Note that (NaN > thresh) is always false. */
4772 if (!(ratio <= thresh)) {
4773 bad_ratios++;
4774
4775 if ((debug == 3) && /* print only when debug is on */
4776 (count != old_count) && /* print if old vector is different
4777 from the current one */
4778 (d_count == find_max_ratio) &&
4779 (p_count <= max_print) &&
4780 (ratio > 0.5 * ratio_max)) {
4781 old_count = count;
4782
4783 printf
4784 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
4785 fname, m, n, ntests, thresh);
4786
4787 /* Print test info */
4788 switch (prec) {
4789 case blas_prec_single:
4790 printf("single ");
4791 break;
4792 case blas_prec_double:
4793 printf("double ");
4794 break;
4795 case blas_prec_indigenous:
4796 printf("indigenous ");
4797 break;
4798 case blas_prec_extra:
4799 printf("extra ");
4800 break;
4801 }
4802 switch (norm) {
4803 case -1:
4804 printf("near_underflow ");
4805 break;
4806 case 0:
4807 printf("near_one ");
4808 break;
4809 case 1:
4810 printf("near_overflow ");
4811 break;
4812 }
4813 switch (order_type) {
4814 case blas_rowmajor:
4815 printf("row_major ");
4816 break;
4817 case blas_colmajor:
4818 printf("col_major ");
4819 break;
4820 }
4821 switch (trans_type) {
4822 case blas_no_trans:
4823 printf("no_trans ");
4824 break;
4825 case blas_trans:
4826 printf("trans ");
4827 break;
4828 case blas_conj_trans:
4829 printf("conj_trans ");
4830 break;
4831 }
4832
4833 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
4834 incy);
4835
4836 sge_print_matrix(A, m_i, n_i, lda, order_type, "A");
4837
4838 sprint_vector(head_x, n_i, incx_val, "head_x");
4839 sprint_vector(tail_x, n_i, incx_val, "tail_x");
4840 cprint_vector(y_gen, m_i, 1, "y_gen");
4841 cprint_vector(y, m_i, incy_val, "y_final");
4842
4843 printf(" ");
4844 printf("alpha = ");
4845 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
4846 printf("\n ");
4847 printf("beta = ");
4848 printf("(%16.8e, %16.8e)", beta[0], beta[1]);
4849 printf("\n");
4850 for (j = 0, k = 0; j < m_i * incy_gen;
4851 j += incy_gen, k++) {
4852 printf(" ");
4853 printf("([%24.16e %24.16e], [%24.16e %24.16e])",
4854 head_r_true[j], tail_r_true[j],
4855 head_r_true[j + 1], tail_r_true[j + 1]);
4856 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
4857 }
4858
4859 printf(" ratio=%.4e\n", ratio);
4860 p_count++;
4861 }
4862 if (bad_ratios >= MAX_BAD_TESTS) {
4863 printf("\ntoo many failures, exiting....");
4864 printf("\nTesting and compilation");
4865 printf(" are incomplete\n\n");
4866 goto end;
4867 }
4868 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
4869 printf("\nFlagrant ratio error, exiting...");
4870 printf("\nTesting and compilation");
4871 printf(" are incomplete\n\n");
4872 goto end;
4873 }
4874 }
4875 if (d_count == 0) {
4876 if (ratio > ratio_max)
4877 ratio_max = ratio;
4878
4879 if (ratio != 0.0 && ratio < ratio_min)
4880 ratio_min = ratio;
4881
4882 tot_tests++;
4883 }
4884 } /* incy */
4885 } /* incx */
4886 } /* lda */
4887 } /* trans */
4888 } /* order */
4889 } /* tests */
4890 } /* norm */
4891
4892 } /* beta */
4893 } /* alpha */
4894 } /* debug */
4895
4896 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
4897 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
4898 fname, m, n, ntests, thresh);
4899 printf
4900 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
4901 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
4902 ratio_min, ratio_max);
4903 }
4904
4905 end:
4906 FPU_FIX_STOP;
4907
4908 blas_free(head_x);
4909 blas_free(tail_x);
4910 blas_free(y);
4911 blas_free(head_x_gen);
4912 blas_free(tail_x_gen);
4913 blas_free(y_gen);
4914 blas_free(temp);
4915 blas_free(A);
4916 blas_free(head_r_true);
4917 blas_free(tail_r_true);
4918 blas_free(ratios);
4919
4920 *min_ratio = ratio_min;
4921 *num_bad_ratio = bad_ratios;
4922 *num_tests = tot_tests;
4923 return ratio_max;
4924 }
do_test_zgemv2_z_d(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)4925 double do_test_zgemv2_z_d(int m, int n, int ntests, int *seed, double thresh,
4926 int debug, float test_prob, double *min_ratio,
4927 int *num_bad_ratio, int *num_tests)
4928
4929 /*
4930 * Purpose
4931 * =======
4932 *
4933 * Runs a series of tests on GEMV2.
4934 *
4935 * Arguments
4936 * =========
4937 *
4938 * m (input) int
4939 * The number of rows
4940 *
4941 * n (input) int
4942 * The number of columns
4943 *
4944 * ntests (input) int
4945 * The number of tests to run for each set of attributes.
4946 *
4947 * seed (input/output) int
4948 * The seed for the random number generator used in testgen().
4949 *
4950 * thresh (input) double
4951 * When the ratio returned from test() exceeds the specified
4952 * threshold, the current size, r_true, r_comp, and ratio will be
4953 * printed. (Since ratio is supposed to be O(1), we can set thresh
4954 * to ~10.)
4955 *
4956 * debug (input) int
4957 * If debug=3, print summary
4958 * If debug=2, print summary only if the number of bad ratios > 0
4959 * If debug=1, print complete info if tests fail
4960 * If debug=0, return max ratio
4961 *
4962 * test_prob (input) float
4963 * The specified test will be performed only if the generated
4964 * random exceeds this threshold.
4965 *
4966 * min_ratio (output) double
4967 * The minimum ratio
4968 *
4969 * num_bad_ratio (output) int
4970 * The number of tests fail; they are above the threshold.
4971 *
4972 * num_tests (output) int
4973 * The number of tests is being performed.
4974 *
4975 * Return value
4976 * ============
4977 *
4978 * The maximum ratio if run successfully, otherwise return -1
4979 *
4980 * Code structure
4981 * ==============
4982 *
4983 * debug loop -- if debug is one, the first loop computes the max ratio
4984 * -- and the last(second) loop outputs debugging information,
4985 * -- if the test fail and its ratio > 0.5 * max ratio.
4986 * -- if debug is zero, the loop is executed once
4987 * alpha loop -- varying alpha: 0, 1, or random
4988 * beta loop -- varying beta: 0, 1, or random
4989
4990 * norm loop -- varying norm: near undeflow, near one, or
4991 * -- near overflow
4992 * numtest loop -- how many times the test is perform with
4993 * -- above set of attributes
4994 * order loop -- varying order type: rowmajor or colmajor
4995 * trans loop -- varying uplo type: upper or lower
4996 * lda loop -- varying lda: m, m+1, 2m
4997 * incx loop -- varying incx: -2, -1, 1, 2
4998 * incy loop -- varying incy: -2, -1, 1, 2
4999 */
5000 {
5001 /* function name */
5002 const char fname[] = "BLAS_zgemv2_z_d";
5003
5004 /* max number of debug lines to print */
5005 const int max_print = 8;
5006
5007 /* Variables in the "x_val" form are loop vars for corresponding
5008 variables */
5009 int i; /* iterate through the repeating tests */
5010 int j, k; /* multipurpose counters or variables */
5011 int iy; /* use to index y */
5012 int incx_val, incy_val, /* for testing different inc values */
5013 incx, incy;
5014 int incy_gen; /* for complex case inc=2, for real case inc=1 */
5015 int d_count; /* counter for debug */
5016 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
5017 int p_count; /* counter for the number of debug lines printed */
5018 int tot_tests; /* total number of tests to be done */
5019 int norm; /* input values of near underflow/one/overflow */
5020 double ratio_max; /* the current maximum ratio */
5021 double ratio_min; /* the current minimum ratio */
5022 double *ratios; /* a temporary variable for calculating ratio */
5023 double ratio; /* the per-use test ratio from test() */
5024 int bad_ratios; /* the number of ratios over the threshold */
5025 double eps_int; /* the internal epsilon expected--2^(-24) for float */
5026 double un_int; /* the internal underflow threshold */
5027 double alpha[2];
5028 double beta[2];
5029 double *A;
5030 double *head_x;
5031 double *tail_x;
5032 double *y;
5033 double *temp; /* use for calculating ratio */
5034
5035 /* x_gen and y_gen are used to store vectors generated by testgen.
5036 they eventually are copied back to x and y */
5037 double *head_x_gen;
5038 double *tail_x_gen;
5039 double *y_gen;
5040
5041 /* the true r calculated by testgen(), in double-double */
5042 double *head_r_true, *tail_r_true;
5043
5044 int alpha_val;
5045 int alpha_flag; /* input flag for BLAS_zgemv2_z_d_testgen */
5046 int beta_val;
5047 int beta_flag; /* input flag for BLAS_zgemv2_z_d_testgen */
5048 int order_val;
5049 enum blas_order_type order_type;
5050
5051 enum blas_prec_type prec;
5052 int trans_val;
5053 enum blas_trans_type trans_type;
5054 int m_i;
5055 int n_i;
5056 int max_mn; /* the max of m and n */
5057 int lda_val;
5058 int lda;
5059 int saved_seed; /* for saving the original seed */
5060 int count, old_count; /* use for counting the number of testgen calls * 2 */
5061
5062 FPU_FIX_DECL;
5063
5064 /* test for bad arguments */
5065 if (n < 0 || m < 0 || ntests < 0)
5066 BLAS_error(fname, 0, 0, NULL);
5067
5068 /* initialization */
5069 *num_bad_ratio = 0;
5070 *num_tests = 0;
5071 *min_ratio = 0.0;
5072
5073 saved_seed = *seed;
5074 ratio_min = 1e308;
5075 ratio_max = 0.0;
5076 ratio = 0.0;
5077 tot_tests = 0;
5078 p_count = 0;
5079 count = 0;
5080 find_max_ratio = 0;
5081 bad_ratios = 0;
5082 old_count = 0;
5083
5084 if (debug == 3)
5085 find_max_ratio = 1;
5086 max_mn = MAX(m, n);
5087 if (m == 0 || n == 0) {
5088 return 0.0;
5089 }
5090
5091 FPU_FIX_START;
5092
5093 incy_gen = 1;
5094 incy_gen *= 2;
5095
5096 /* get space for calculation */
5097 head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
5098 if (max_mn * 2 > 0 && head_x == NULL) {
5099 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5100 }
5101 tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
5102 if (max_mn * 2 > 0 && tail_x == NULL) {
5103 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5104 }
5105 y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
5106 if (max_mn * 2 > 0 && y == NULL) {
5107 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5108 }
5109 head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
5110 if (max_mn > 0 && head_x_gen == NULL) {
5111 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5112 }
5113 tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
5114 if (max_mn > 0 && tail_x_gen == NULL) {
5115 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5116 }
5117 y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
5118 if (max_mn > 0 && y_gen == NULL) {
5119 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5120 }
5121 temp = (double *) blas_malloc(max_mn * sizeof(double) * 2);
5122 if (max_mn > 0 && temp == NULL) {
5123 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5124 }
5125 head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
5126 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
5127 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
5128 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5129 }
5130 ratios = (double *) blas_malloc(max_mn * sizeof(double));
5131 if (max_mn > 0 && ratios == NULL) {
5132 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5133 }
5134 A =
5135 (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double) *
5136 2);
5137 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
5138 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5139 }
5140
5141 /* The debug iteration:
5142 If debug=1, then will execute the iteration twice. First, compute the
5143 max ratio. Second, print info if ratio > (50% * ratio_max). */
5144 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
5145 bad_ratios = 0; /* set to zero */
5146
5147 if ((debug == 3) && (d_count == find_max_ratio))
5148 *seed = saved_seed; /* restore the original seed */
5149
5150 /* varying alpha */
5151 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
5152 alpha_flag = 0;
5153 switch (alpha_val) {
5154 case 0:
5155 alpha[0] = alpha[1] = 0.0;
5156 alpha_flag = 1;
5157 break;
5158 case 1:
5159 alpha[0] = 1.0;
5160 alpha[1] = 0.0;
5161 alpha_flag = 1;
5162 break;
5163 }
5164
5165 /* varying beta */
5166 for (beta_val = 0; beta_val < 3; beta_val++) {
5167 beta_flag = 0;
5168 switch (beta_val) {
5169 case 0:
5170 beta[0] = beta[1] = 0.0;
5171 beta_flag = 1;
5172 break;
5173 case 1:
5174 beta[0] = 1.0;
5175 beta[1] = 0.0;
5176 beta_flag = 1;
5177 break;
5178 }
5179
5180
5181 eps_int = power(2, -BITS_D);
5182 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
5183 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
5184 prec = blas_prec_double;
5185
5186 /* values near underflow, 1, or overflow */
5187 for (norm = -1; norm <= 1; norm++) {
5188
5189 /* number of tests */
5190 for (i = 0; i < ntests; i++) {
5191
5192 /* row or col major */
5193 for (order_val = 0; order_val < 2; order_val++) {
5194 switch (order_val) {
5195 case 0:
5196 order_type = blas_rowmajor;
5197 break;
5198 case 1:
5199 default:
5200 order_type = blas_colmajor;
5201 break;
5202 }
5203
5204 /* no_trans, trans, or conj_trans */
5205 for (trans_val = 0; trans_val < 3; trans_val++) {
5206 switch (trans_val) {
5207 case 0:
5208 trans_type = blas_no_trans;
5209 m_i = m;
5210 n_i = n;
5211 break;
5212 case 1:
5213 trans_type = blas_trans;
5214 m_i = n;
5215 n_i = m;
5216 break;
5217 case 2:
5218 default:
5219 trans_type = blas_conj_trans;
5220 m_i = n;
5221 n_i = m;
5222 break;
5223 }
5224
5225 /* lda=n, n+1, or 2n */
5226 for (lda_val = 0; lda_val < 3; lda_val++) {
5227 switch (lda_val) {
5228 case 0:
5229 lda = m_i;
5230 break;
5231 case 1:
5232 lda = m_i + 1;
5233 break;
5234 case 2:
5235 default:
5236 lda = 2 * m_i;
5237 break;
5238 }
5239 if ((order_type == blas_rowmajor && lda < n) ||
5240 (order_type == blas_colmajor && lda < m))
5241 continue;
5242
5243 /* For the sake of speed, we throw out this case at random */
5244 if (xrand(seed) >= test_prob)
5245 continue;
5246
5247 /* in the trivial cases, no need to run testgen */
5248 if (m > 0 && n > 0)
5249 BLAS_zgemv2_z_d_testgen(norm, order_type, trans_type, m,
5250 n, &alpha, alpha_flag, A, lda,
5251 head_x_gen, tail_x_gen, &beta,
5252 beta_flag, y_gen, seed,
5253 head_r_true, tail_r_true);
5254
5255 count++;
5256
5257 /* varying incx */
5258 for (incx_val = -2; incx_val <= 2; incx_val++) {
5259 if (incx_val == 0)
5260 continue;
5261
5262 /* setting incx */
5263 incx = incx_val;
5264
5265
5266 dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
5267 dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
5268
5269 /* varying incy */
5270 for (incy_val = -2; incy_val <= 2; incy_val++) {
5271 if (incy_val == 0)
5272 continue;
5273
5274 /* setting incy */
5275 incy = incy_val;
5276 incy *= 2;
5277
5278 zcopy_vector(y_gen, m_i, 1, y, incy_val);
5279
5280 /* call BLAS_zgemv2_z_d */
5281 FPU_FIX_STOP;
5282 BLAS_zgemv2_z_d(order_type, trans_type, m, n, alpha, A,
5283 lda, head_x, tail_x, incx_val, beta, y,
5284 incy_val);
5285 FPU_FIX_START;
5286
5287 /* set y starting index */
5288 iy = 0;
5289 if (incy < 0)
5290 iy = -(m_i - 1) * incy;
5291
5292 /* computing the ratio */
5293 if (m > 0 && n > 0)
5294 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
5295 /* copy row j of A to temp */
5296 zge_copy_row(order_type, trans_type, m_i, n_i, A,
5297 lda, temp, j);
5298
5299 test_BLAS_zdot2_z_d(n_i, blas_no_conj, alpha, beta,
5300 &y_gen[k], &y[iy],
5301 &head_r_true[k],
5302 &tail_r_true[k], temp, 1,
5303 head_x, tail_x, incx_val,
5304 eps_int, un_int, &ratios[j]);
5305
5306 /* take the max ratio */
5307 if (j == 0) {
5308 ratio = ratios[0];
5309 /* The !<= below causes NaN error to be detected.
5310 Note that (NaN > thresh) is always false. */
5311 } else if (!(ratios[j] <= ratio)) {
5312 ratio = ratios[j];
5313 }
5314 iy += incy;
5315 }
5316
5317 /* Increase the number of bad ratio, if the ratio
5318 is bigger than the threshold.
5319 The !<= below causes NaN error to be detected.
5320 Note that (NaN > thresh) is always false. */
5321 if (!(ratio <= thresh)) {
5322 bad_ratios++;
5323
5324 if ((debug == 3) && /* print only when debug is on */
5325 (count != old_count) && /* print if old vector is different
5326 from the current one */
5327 (d_count == find_max_ratio) &&
5328 (p_count <= max_print) &&
5329 (ratio > 0.5 * ratio_max)) {
5330 old_count = count;
5331
5332 printf
5333 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
5334 fname, m, n, ntests, thresh);
5335
5336 /* Print test info */
5337 switch (prec) {
5338 case blas_prec_single:
5339 printf("single ");
5340 break;
5341 case blas_prec_double:
5342 printf("double ");
5343 break;
5344 case blas_prec_indigenous:
5345 printf("indigenous ");
5346 break;
5347 case blas_prec_extra:
5348 printf("extra ");
5349 break;
5350 }
5351 switch (norm) {
5352 case -1:
5353 printf("near_underflow ");
5354 break;
5355 case 0:
5356 printf("near_one ");
5357 break;
5358 case 1:
5359 printf("near_overflow ");
5360 break;
5361 }
5362 switch (order_type) {
5363 case blas_rowmajor:
5364 printf("row_major ");
5365 break;
5366 case blas_colmajor:
5367 printf("col_major ");
5368 break;
5369 }
5370 switch (trans_type) {
5371 case blas_no_trans:
5372 printf("no_trans ");
5373 break;
5374 case blas_trans:
5375 printf("trans ");
5376 break;
5377 case blas_conj_trans:
5378 printf("conj_trans ");
5379 break;
5380 }
5381
5382 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
5383 incy);
5384
5385 zge_print_matrix(A, m_i, n_i, lda, order_type, "A");
5386
5387 dprint_vector(head_x, n_i, incx_val, "head_x");
5388 dprint_vector(tail_x, n_i, incx_val, "tail_x");
5389 zprint_vector(y_gen, m_i, 1, "y_gen");
5390 zprint_vector(y, m_i, incy_val, "y_final");
5391
5392 printf(" ");
5393 printf("alpha = ");
5394 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
5395 printf("\n ");
5396 printf("beta = ");
5397 printf("(%24.16e, %24.16e)", beta[0], beta[1]);
5398 printf("\n");
5399 for (j = 0, k = 0; j < m_i * incy_gen;
5400 j += incy_gen, k++) {
5401 printf(" ");
5402 printf("([%24.16e %24.16e], [%24.16e %24.16e])",
5403 head_r_true[j], tail_r_true[j],
5404 head_r_true[j + 1], tail_r_true[j + 1]);
5405 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
5406 }
5407
5408 printf(" ratio=%.4e\n", ratio);
5409 p_count++;
5410 }
5411 if (bad_ratios >= MAX_BAD_TESTS) {
5412 printf("\ntoo many failures, exiting....");
5413 printf("\nTesting and compilation");
5414 printf(" are incomplete\n\n");
5415 goto end;
5416 }
5417 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
5418 printf("\nFlagrant ratio error, exiting...");
5419 printf("\nTesting and compilation");
5420 printf(" are incomplete\n\n");
5421 goto end;
5422 }
5423 }
5424 if (d_count == 0) {
5425 if (ratio > ratio_max)
5426 ratio_max = ratio;
5427
5428 if (ratio != 0.0 && ratio < ratio_min)
5429 ratio_min = ratio;
5430
5431 tot_tests++;
5432 }
5433 } /* incy */
5434 } /* incx */
5435 } /* lda */
5436 } /* trans */
5437 } /* order */
5438 } /* tests */
5439 } /* norm */
5440
5441 } /* beta */
5442 } /* alpha */
5443 } /* debug */
5444
5445 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
5446 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
5447 fname, m, n, ntests, thresh);
5448 printf
5449 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
5450 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
5451 ratio_min, ratio_max);
5452 }
5453
5454 end:
5455 FPU_FIX_STOP;
5456
5457 blas_free(head_x);
5458 blas_free(tail_x);
5459 blas_free(y);
5460 blas_free(head_x_gen);
5461 blas_free(tail_x_gen);
5462 blas_free(y_gen);
5463 blas_free(temp);
5464 blas_free(A);
5465 blas_free(head_r_true);
5466 blas_free(tail_r_true);
5467 blas_free(ratios);
5468
5469 *min_ratio = ratio_min;
5470 *num_bad_ratio = bad_ratios;
5471 *num_tests = tot_tests;
5472 return ratio_max;
5473 }
do_test_zgemv2_d_z(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)5474 double do_test_zgemv2_d_z(int m, int n, int ntests, int *seed, double thresh,
5475 int debug, float test_prob, double *min_ratio,
5476 int *num_bad_ratio, int *num_tests)
5477
5478 /*
5479 * Purpose
5480 * =======
5481 *
5482 * Runs a series of tests on GEMV2.
5483 *
5484 * Arguments
5485 * =========
5486 *
5487 * m (input) int
5488 * The number of rows
5489 *
5490 * n (input) int
5491 * The number of columns
5492 *
5493 * ntests (input) int
5494 * The number of tests to run for each set of attributes.
5495 *
5496 * seed (input/output) int
5497 * The seed for the random number generator used in testgen().
5498 *
5499 * thresh (input) double
5500 * When the ratio returned from test() exceeds the specified
5501 * threshold, the current size, r_true, r_comp, and ratio will be
5502 * printed. (Since ratio is supposed to be O(1), we can set thresh
5503 * to ~10.)
5504 *
5505 * debug (input) int
5506 * If debug=3, print summary
5507 * If debug=2, print summary only if the number of bad ratios > 0
5508 * If debug=1, print complete info if tests fail
5509 * If debug=0, return max ratio
5510 *
5511 * test_prob (input) float
5512 * The specified test will be performed only if the generated
5513 * random exceeds this threshold.
5514 *
5515 * min_ratio (output) double
5516 * The minimum ratio
5517 *
5518 * num_bad_ratio (output) int
5519 * The number of tests fail; they are above the threshold.
5520 *
5521 * num_tests (output) int
5522 * The number of tests is being performed.
5523 *
5524 * Return value
5525 * ============
5526 *
5527 * The maximum ratio if run successfully, otherwise return -1
5528 *
5529 * Code structure
5530 * ==============
5531 *
5532 * debug loop -- if debug is one, the first loop computes the max ratio
5533 * -- and the last(second) loop outputs debugging information,
5534 * -- if the test fail and its ratio > 0.5 * max ratio.
5535 * -- if debug is zero, the loop is executed once
5536 * alpha loop -- varying alpha: 0, 1, or random
5537 * beta loop -- varying beta: 0, 1, or random
5538
5539 * norm loop -- varying norm: near undeflow, near one, or
5540 * -- near overflow
5541 * numtest loop -- how many times the test is perform with
5542 * -- above set of attributes
5543 * order loop -- varying order type: rowmajor or colmajor
5544 * trans loop -- varying uplo type: upper or lower
5545 * lda loop -- varying lda: m, m+1, 2m
5546 * incx loop -- varying incx: -2, -1, 1, 2
5547 * incy loop -- varying incy: -2, -1, 1, 2
5548 */
5549 {
5550 /* function name */
5551 const char fname[] = "BLAS_zgemv2_d_z";
5552
5553 /* max number of debug lines to print */
5554 const int max_print = 8;
5555
5556 /* Variables in the "x_val" form are loop vars for corresponding
5557 variables */
5558 int i; /* iterate through the repeating tests */
5559 int j, k; /* multipurpose counters or variables */
5560 int iy; /* use to index y */
5561 int incx_val, incy_val, /* for testing different inc values */
5562 incx, incy;
5563 int incy_gen; /* for complex case inc=2, for real case inc=1 */
5564 int d_count; /* counter for debug */
5565 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
5566 int p_count; /* counter for the number of debug lines printed */
5567 int tot_tests; /* total number of tests to be done */
5568 int norm; /* input values of near underflow/one/overflow */
5569 double ratio_max; /* the current maximum ratio */
5570 double ratio_min; /* the current minimum ratio */
5571 double *ratios; /* a temporary variable for calculating ratio */
5572 double ratio; /* the per-use test ratio from test() */
5573 int bad_ratios; /* the number of ratios over the threshold */
5574 double eps_int; /* the internal epsilon expected--2^(-24) for float */
5575 double un_int; /* the internal underflow threshold */
5576 double alpha[2];
5577 double beta[2];
5578 double *A;
5579 double *head_x;
5580 double *tail_x;
5581 double *y;
5582 double *temp; /* use for calculating ratio */
5583
5584 /* x_gen and y_gen are used to store vectors generated by testgen.
5585 they eventually are copied back to x and y */
5586 double *head_x_gen;
5587 double *tail_x_gen;
5588 double *y_gen;
5589
5590 /* the true r calculated by testgen(), in double-double */
5591 double *head_r_true, *tail_r_true;
5592
5593 int alpha_val;
5594 int alpha_flag; /* input flag for BLAS_zgemv2_d_z_testgen */
5595 int beta_val;
5596 int beta_flag; /* input flag for BLAS_zgemv2_d_z_testgen */
5597 int order_val;
5598 enum blas_order_type order_type;
5599
5600 enum blas_prec_type prec;
5601 int trans_val;
5602 enum blas_trans_type trans_type;
5603 int m_i;
5604 int n_i;
5605 int max_mn; /* the max of m and n */
5606 int lda_val;
5607 int lda;
5608 int saved_seed; /* for saving the original seed */
5609 int count, old_count; /* use for counting the number of testgen calls * 2 */
5610
5611 FPU_FIX_DECL;
5612
5613 /* test for bad arguments */
5614 if (n < 0 || m < 0 || ntests < 0)
5615 BLAS_error(fname, 0, 0, NULL);
5616
5617 /* initialization */
5618 *num_bad_ratio = 0;
5619 *num_tests = 0;
5620 *min_ratio = 0.0;
5621
5622 saved_seed = *seed;
5623 ratio_min = 1e308;
5624 ratio_max = 0.0;
5625 ratio = 0.0;
5626 tot_tests = 0;
5627 p_count = 0;
5628 count = 0;
5629 find_max_ratio = 0;
5630 bad_ratios = 0;
5631 old_count = 0;
5632
5633 if (debug == 3)
5634 find_max_ratio = 1;
5635 max_mn = MAX(m, n);
5636 if (m == 0 || n == 0) {
5637 return 0.0;
5638 }
5639
5640 FPU_FIX_START;
5641
5642 incy_gen = 1;
5643 incy_gen *= 2;
5644
5645 /* get space for calculation */
5646 head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
5647 if (max_mn * 2 > 0 && head_x == NULL) {
5648 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5649 }
5650 tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
5651 if (max_mn * 2 > 0 && tail_x == NULL) {
5652 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5653 }
5654 y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
5655 if (max_mn * 2 > 0 && y == NULL) {
5656 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5657 }
5658 head_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
5659 if (max_mn > 0 && head_x_gen == NULL) {
5660 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5661 }
5662 tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
5663 if (max_mn > 0 && tail_x_gen == NULL) {
5664 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5665 }
5666 y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
5667 if (max_mn > 0 && y_gen == NULL) {
5668 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5669 }
5670 temp = (double *) blas_malloc(max_mn * sizeof(double));
5671 if (max_mn > 0 && temp == NULL) {
5672 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5673 }
5674 head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
5675 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
5676 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
5677 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5678 }
5679 ratios = (double *) blas_malloc(max_mn * sizeof(double));
5680 if (max_mn > 0 && ratios == NULL) {
5681 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5682 }
5683 A =
5684 (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
5685 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
5686 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5687 }
5688
5689 /* The debug iteration:
5690 If debug=1, then will execute the iteration twice. First, compute the
5691 max ratio. Second, print info if ratio > (50% * ratio_max). */
5692 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
5693 bad_ratios = 0; /* set to zero */
5694
5695 if ((debug == 3) && (d_count == find_max_ratio))
5696 *seed = saved_seed; /* restore the original seed */
5697
5698 /* varying alpha */
5699 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
5700 alpha_flag = 0;
5701 switch (alpha_val) {
5702 case 0:
5703 alpha[0] = alpha[1] = 0.0;
5704 alpha_flag = 1;
5705 break;
5706 case 1:
5707 alpha[0] = 1.0;
5708 alpha[1] = 0.0;
5709 alpha_flag = 1;
5710 break;
5711 }
5712
5713 /* varying beta */
5714 for (beta_val = 0; beta_val < 3; beta_val++) {
5715 beta_flag = 0;
5716 switch (beta_val) {
5717 case 0:
5718 beta[0] = beta[1] = 0.0;
5719 beta_flag = 1;
5720 break;
5721 case 1:
5722 beta[0] = 1.0;
5723 beta[1] = 0.0;
5724 beta_flag = 1;
5725 break;
5726 }
5727
5728
5729 eps_int = power(2, -BITS_D);
5730 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
5731 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
5732 prec = blas_prec_double;
5733
5734 /* values near underflow, 1, or overflow */
5735 for (norm = -1; norm <= 1; norm++) {
5736
5737 /* number of tests */
5738 for (i = 0; i < ntests; i++) {
5739
5740 /* row or col major */
5741 for (order_val = 0; order_val < 2; order_val++) {
5742 switch (order_val) {
5743 case 0:
5744 order_type = blas_rowmajor;
5745 break;
5746 case 1:
5747 default:
5748 order_type = blas_colmajor;
5749 break;
5750 }
5751
5752 /* no_trans, trans, or conj_trans */
5753 for (trans_val = 0; trans_val < 3; trans_val++) {
5754 switch (trans_val) {
5755 case 0:
5756 trans_type = blas_no_trans;
5757 m_i = m;
5758 n_i = n;
5759 break;
5760 case 1:
5761 trans_type = blas_trans;
5762 m_i = n;
5763 n_i = m;
5764 break;
5765 case 2:
5766 default:
5767 trans_type = blas_conj_trans;
5768 m_i = n;
5769 n_i = m;
5770 break;
5771 }
5772
5773 /* lda=n, n+1, or 2n */
5774 for (lda_val = 0; lda_val < 3; lda_val++) {
5775 switch (lda_val) {
5776 case 0:
5777 lda = m_i;
5778 break;
5779 case 1:
5780 lda = m_i + 1;
5781 break;
5782 case 2:
5783 default:
5784 lda = 2 * m_i;
5785 break;
5786 }
5787 if ((order_type == blas_rowmajor && lda < n) ||
5788 (order_type == blas_colmajor && lda < m))
5789 continue;
5790
5791 /* For the sake of speed, we throw out this case at random */
5792 if (xrand(seed) >= test_prob)
5793 continue;
5794
5795 /* in the trivial cases, no need to run testgen */
5796 if (m > 0 && n > 0)
5797 BLAS_zgemv2_d_z_testgen(norm, order_type, trans_type, m,
5798 n, &alpha, alpha_flag, A, lda,
5799 head_x_gen, tail_x_gen, &beta,
5800 beta_flag, y_gen, seed,
5801 head_r_true, tail_r_true);
5802
5803 count++;
5804
5805 /* varying incx */
5806 for (incx_val = -2; incx_val <= 2; incx_val++) {
5807 if (incx_val == 0)
5808 continue;
5809
5810 /* setting incx */
5811 incx = incx_val;
5812 incx *= 2;
5813
5814 zcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
5815 zcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
5816
5817 /* varying incy */
5818 for (incy_val = -2; incy_val <= 2; incy_val++) {
5819 if (incy_val == 0)
5820 continue;
5821
5822 /* setting incy */
5823 incy = incy_val;
5824 incy *= 2;
5825
5826 zcopy_vector(y_gen, m_i, 1, y, incy_val);
5827
5828 /* call BLAS_zgemv2_d_z */
5829 FPU_FIX_STOP;
5830 BLAS_zgemv2_d_z(order_type, trans_type, m, n, alpha, A,
5831 lda, head_x, tail_x, incx_val, beta, y,
5832 incy_val);
5833 FPU_FIX_START;
5834
5835 /* set y starting index */
5836 iy = 0;
5837 if (incy < 0)
5838 iy = -(m_i - 1) * incy;
5839
5840 /* computing the ratio */
5841 if (m > 0 && n > 0)
5842 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
5843 /* copy row j of A to temp */
5844 dge_copy_row(order_type, trans_type, m_i, n_i, A,
5845 lda, temp, j);
5846
5847 test_BLAS_zdot2_d_z(n_i, blas_no_conj, alpha, beta,
5848 &y_gen[k], &y[iy],
5849 &head_r_true[k],
5850 &tail_r_true[k], temp, 1,
5851 head_x, tail_x, incx_val,
5852 eps_int, un_int, &ratios[j]);
5853
5854 /* take the max ratio */
5855 if (j == 0) {
5856 ratio = ratios[0];
5857 /* The !<= below causes NaN error to be detected.
5858 Note that (NaN > thresh) is always false. */
5859 } else if (!(ratios[j] <= ratio)) {
5860 ratio = ratios[j];
5861 }
5862 iy += incy;
5863 }
5864
5865 /* Increase the number of bad ratio, if the ratio
5866 is bigger than the threshold.
5867 The !<= below causes NaN error to be detected.
5868 Note that (NaN > thresh) is always false. */
5869 if (!(ratio <= thresh)) {
5870 bad_ratios++;
5871
5872 if ((debug == 3) && /* print only when debug is on */
5873 (count != old_count) && /* print if old vector is different
5874 from the current one */
5875 (d_count == find_max_ratio) &&
5876 (p_count <= max_print) &&
5877 (ratio > 0.5 * ratio_max)) {
5878 old_count = count;
5879
5880 printf
5881 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
5882 fname, m, n, ntests, thresh);
5883
5884 /* Print test info */
5885 switch (prec) {
5886 case blas_prec_single:
5887 printf("single ");
5888 break;
5889 case blas_prec_double:
5890 printf("double ");
5891 break;
5892 case blas_prec_indigenous:
5893 printf("indigenous ");
5894 break;
5895 case blas_prec_extra:
5896 printf("extra ");
5897 break;
5898 }
5899 switch (norm) {
5900 case -1:
5901 printf("near_underflow ");
5902 break;
5903 case 0:
5904 printf("near_one ");
5905 break;
5906 case 1:
5907 printf("near_overflow ");
5908 break;
5909 }
5910 switch (order_type) {
5911 case blas_rowmajor:
5912 printf("row_major ");
5913 break;
5914 case blas_colmajor:
5915 printf("col_major ");
5916 break;
5917 }
5918 switch (trans_type) {
5919 case blas_no_trans:
5920 printf("no_trans ");
5921 break;
5922 case blas_trans:
5923 printf("trans ");
5924 break;
5925 case blas_conj_trans:
5926 printf("conj_trans ");
5927 break;
5928 }
5929
5930 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
5931 incy);
5932
5933 dge_print_matrix(A, m_i, n_i, lda, order_type, "A");
5934
5935 zprint_vector(head_x, n_i, incx_val, "head_x");
5936 zprint_vector(tail_x, n_i, incx_val, "tail_x");
5937 zprint_vector(y_gen, m_i, 1, "y_gen");
5938 zprint_vector(y, m_i, incy_val, "y_final");
5939
5940 printf(" ");
5941 printf("alpha = ");
5942 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
5943 printf("\n ");
5944 printf("beta = ");
5945 printf("(%24.16e, %24.16e)", beta[0], beta[1]);
5946 printf("\n");
5947 for (j = 0, k = 0; j < m_i * incy_gen;
5948 j += incy_gen, k++) {
5949 printf(" ");
5950 printf("([%24.16e %24.16e], [%24.16e %24.16e])",
5951 head_r_true[j], tail_r_true[j],
5952 head_r_true[j + 1], tail_r_true[j + 1]);
5953 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
5954 }
5955
5956 printf(" ratio=%.4e\n", ratio);
5957 p_count++;
5958 }
5959 if (bad_ratios >= MAX_BAD_TESTS) {
5960 printf("\ntoo many failures, exiting....");
5961 printf("\nTesting and compilation");
5962 printf(" are incomplete\n\n");
5963 goto end;
5964 }
5965 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
5966 printf("\nFlagrant ratio error, exiting...");
5967 printf("\nTesting and compilation");
5968 printf(" are incomplete\n\n");
5969 goto end;
5970 }
5971 }
5972 if (d_count == 0) {
5973 if (ratio > ratio_max)
5974 ratio_max = ratio;
5975
5976 if (ratio != 0.0 && ratio < ratio_min)
5977 ratio_min = ratio;
5978
5979 tot_tests++;
5980 }
5981 } /* incy */
5982 } /* incx */
5983 } /* lda */
5984 } /* trans */
5985 } /* order */
5986 } /* tests */
5987 } /* norm */
5988
5989 } /* beta */
5990 } /* alpha */
5991 } /* debug */
5992
5993 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
5994 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
5995 fname, m, n, ntests, thresh);
5996 printf
5997 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
5998 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
5999 ratio_min, ratio_max);
6000 }
6001
6002 end:
6003 FPU_FIX_STOP;
6004
6005 blas_free(head_x);
6006 blas_free(tail_x);
6007 blas_free(y);
6008 blas_free(head_x_gen);
6009 blas_free(tail_x_gen);
6010 blas_free(y_gen);
6011 blas_free(temp);
6012 blas_free(A);
6013 blas_free(head_r_true);
6014 blas_free(tail_r_true);
6015 blas_free(ratios);
6016
6017 *min_ratio = ratio_min;
6018 *num_bad_ratio = bad_ratios;
6019 *num_tests = tot_tests;
6020 return ratio_max;
6021 }
do_test_zgemv2_d_d(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)6022 double do_test_zgemv2_d_d(int m, int n, int ntests, int *seed, double thresh,
6023 int debug, float test_prob, double *min_ratio,
6024 int *num_bad_ratio, int *num_tests)
6025
6026 /*
6027 * Purpose
6028 * =======
6029 *
6030 * Runs a series of tests on GEMV2.
6031 *
6032 * Arguments
6033 * =========
6034 *
6035 * m (input) int
6036 * The number of rows
6037 *
6038 * n (input) int
6039 * The number of columns
6040 *
6041 * ntests (input) int
6042 * The number of tests to run for each set of attributes.
6043 *
6044 * seed (input/output) int
6045 * The seed for the random number generator used in testgen().
6046 *
6047 * thresh (input) double
6048 * When the ratio returned from test() exceeds the specified
6049 * threshold, the current size, r_true, r_comp, and ratio will be
6050 * printed. (Since ratio is supposed to be O(1), we can set thresh
6051 * to ~10.)
6052 *
6053 * debug (input) int
6054 * If debug=3, print summary
6055 * If debug=2, print summary only if the number of bad ratios > 0
6056 * If debug=1, print complete info if tests fail
6057 * If debug=0, return max ratio
6058 *
6059 * test_prob (input) float
6060 * The specified test will be performed only if the generated
6061 * random exceeds this threshold.
6062 *
6063 * min_ratio (output) double
6064 * The minimum ratio
6065 *
6066 * num_bad_ratio (output) int
6067 * The number of tests fail; they are above the threshold.
6068 *
6069 * num_tests (output) int
6070 * The number of tests is being performed.
6071 *
6072 * Return value
6073 * ============
6074 *
6075 * The maximum ratio if run successfully, otherwise return -1
6076 *
6077 * Code structure
6078 * ==============
6079 *
6080 * debug loop -- if debug is one, the first loop computes the max ratio
6081 * -- and the last(second) loop outputs debugging information,
6082 * -- if the test fail and its ratio > 0.5 * max ratio.
6083 * -- if debug is zero, the loop is executed once
6084 * alpha loop -- varying alpha: 0, 1, or random
6085 * beta loop -- varying beta: 0, 1, or random
6086
6087 * norm loop -- varying norm: near undeflow, near one, or
6088 * -- near overflow
6089 * numtest loop -- how many times the test is perform with
6090 * -- above set of attributes
6091 * order loop -- varying order type: rowmajor or colmajor
6092 * trans loop -- varying uplo type: upper or lower
6093 * lda loop -- varying lda: m, m+1, 2m
6094 * incx loop -- varying incx: -2, -1, 1, 2
6095 * incy loop -- varying incy: -2, -1, 1, 2
6096 */
6097 {
6098 /* function name */
6099 const char fname[] = "BLAS_zgemv2_d_d";
6100
6101 /* max number of debug lines to print */
6102 const int max_print = 8;
6103
6104 /* Variables in the "x_val" form are loop vars for corresponding
6105 variables */
6106 int i; /* iterate through the repeating tests */
6107 int j, k; /* multipurpose counters or variables */
6108 int iy; /* use to index y */
6109 int incx_val, incy_val, /* for testing different inc values */
6110 incx, incy;
6111 int incy_gen; /* for complex case inc=2, for real case inc=1 */
6112 int d_count; /* counter for debug */
6113 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
6114 int p_count; /* counter for the number of debug lines printed */
6115 int tot_tests; /* total number of tests to be done */
6116 int norm; /* input values of near underflow/one/overflow */
6117 double ratio_max; /* the current maximum ratio */
6118 double ratio_min; /* the current minimum ratio */
6119 double *ratios; /* a temporary variable for calculating ratio */
6120 double ratio; /* the per-use test ratio from test() */
6121 int bad_ratios; /* the number of ratios over the threshold */
6122 double eps_int; /* the internal epsilon expected--2^(-24) for float */
6123 double un_int; /* the internal underflow threshold */
6124 double alpha[2];
6125 double beta[2];
6126 double *A;
6127 double *head_x;
6128 double *tail_x;
6129 double *y;
6130 double *temp; /* use for calculating ratio */
6131
6132 /* x_gen and y_gen are used to store vectors generated by testgen.
6133 they eventually are copied back to x and y */
6134 double *head_x_gen;
6135 double *tail_x_gen;
6136 double *y_gen;
6137
6138 /* the true r calculated by testgen(), in double-double */
6139 double *head_r_true, *tail_r_true;
6140
6141 int alpha_val;
6142 int alpha_flag; /* input flag for BLAS_zgemv2_d_d_testgen */
6143 int beta_val;
6144 int beta_flag; /* input flag for BLAS_zgemv2_d_d_testgen */
6145 int order_val;
6146 enum blas_order_type order_type;
6147
6148 enum blas_prec_type prec;
6149 int trans_val;
6150 enum blas_trans_type trans_type;
6151 int m_i;
6152 int n_i;
6153 int max_mn; /* the max of m and n */
6154 int lda_val;
6155 int lda;
6156 int saved_seed; /* for saving the original seed */
6157 int count, old_count; /* use for counting the number of testgen calls * 2 */
6158
6159 FPU_FIX_DECL;
6160
6161 /* test for bad arguments */
6162 if (n < 0 || m < 0 || ntests < 0)
6163 BLAS_error(fname, 0, 0, NULL);
6164
6165 /* initialization */
6166 *num_bad_ratio = 0;
6167 *num_tests = 0;
6168 *min_ratio = 0.0;
6169
6170 saved_seed = *seed;
6171 ratio_min = 1e308;
6172 ratio_max = 0.0;
6173 ratio = 0.0;
6174 tot_tests = 0;
6175 p_count = 0;
6176 count = 0;
6177 find_max_ratio = 0;
6178 bad_ratios = 0;
6179 old_count = 0;
6180
6181 if (debug == 3)
6182 find_max_ratio = 1;
6183 max_mn = MAX(m, n);
6184 if (m == 0 || n == 0) {
6185 return 0.0;
6186 }
6187
6188 FPU_FIX_START;
6189
6190 incy_gen = 1;
6191 incy_gen *= 2;
6192
6193 /* get space for calculation */
6194 head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
6195 if (max_mn * 2 > 0 && head_x == NULL) {
6196 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6197 }
6198 tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
6199 if (max_mn * 2 > 0 && tail_x == NULL) {
6200 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6201 }
6202 y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
6203 if (max_mn * 2 > 0 && y == NULL) {
6204 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6205 }
6206 head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
6207 if (max_mn > 0 && head_x_gen == NULL) {
6208 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6209 }
6210 tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
6211 if (max_mn > 0 && tail_x_gen == NULL) {
6212 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6213 }
6214 y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
6215 if (max_mn > 0 && y_gen == NULL) {
6216 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6217 }
6218 temp = (double *) blas_malloc(max_mn * sizeof(double));
6219 if (max_mn > 0 && temp == NULL) {
6220 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6221 }
6222 head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
6223 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
6224 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
6225 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6226 }
6227 ratios = (double *) blas_malloc(max_mn * sizeof(double));
6228 if (max_mn > 0 && ratios == NULL) {
6229 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6230 }
6231 A =
6232 (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
6233 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
6234 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6235 }
6236
6237 /* The debug iteration:
6238 If debug=1, then will execute the iteration twice. First, compute the
6239 max ratio. Second, print info if ratio > (50% * ratio_max). */
6240 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
6241 bad_ratios = 0; /* set to zero */
6242
6243 if ((debug == 3) && (d_count == find_max_ratio))
6244 *seed = saved_seed; /* restore the original seed */
6245
6246 /* varying alpha */
6247 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
6248 alpha_flag = 0;
6249 switch (alpha_val) {
6250 case 0:
6251 alpha[0] = alpha[1] = 0.0;
6252 alpha_flag = 1;
6253 break;
6254 case 1:
6255 alpha[0] = 1.0;
6256 alpha[1] = 0.0;
6257 alpha_flag = 1;
6258 break;
6259 }
6260
6261 /* varying beta */
6262 for (beta_val = 0; beta_val < 3; beta_val++) {
6263 beta_flag = 0;
6264 switch (beta_val) {
6265 case 0:
6266 beta[0] = beta[1] = 0.0;
6267 beta_flag = 1;
6268 break;
6269 case 1:
6270 beta[0] = 1.0;
6271 beta[1] = 0.0;
6272 beta_flag = 1;
6273 break;
6274 }
6275
6276
6277 eps_int = power(2, -BITS_D);
6278 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
6279 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
6280 prec = blas_prec_double;
6281
6282 /* values near underflow, 1, or overflow */
6283 for (norm = -1; norm <= 1; norm++) {
6284
6285 /* number of tests */
6286 for (i = 0; i < ntests; i++) {
6287
6288 /* row or col major */
6289 for (order_val = 0; order_val < 2; order_val++) {
6290 switch (order_val) {
6291 case 0:
6292 order_type = blas_rowmajor;
6293 break;
6294 case 1:
6295 default:
6296 order_type = blas_colmajor;
6297 break;
6298 }
6299
6300 /* no_trans, trans, or conj_trans */
6301 for (trans_val = 0; trans_val < 3; trans_val++) {
6302 switch (trans_val) {
6303 case 0:
6304 trans_type = blas_no_trans;
6305 m_i = m;
6306 n_i = n;
6307 break;
6308 case 1:
6309 trans_type = blas_trans;
6310 m_i = n;
6311 n_i = m;
6312 break;
6313 case 2:
6314 default:
6315 trans_type = blas_conj_trans;
6316 m_i = n;
6317 n_i = m;
6318 break;
6319 }
6320
6321 /* lda=n, n+1, or 2n */
6322 for (lda_val = 0; lda_val < 3; lda_val++) {
6323 switch (lda_val) {
6324 case 0:
6325 lda = m_i;
6326 break;
6327 case 1:
6328 lda = m_i + 1;
6329 break;
6330 case 2:
6331 default:
6332 lda = 2 * m_i;
6333 break;
6334 }
6335 if ((order_type == blas_rowmajor && lda < n) ||
6336 (order_type == blas_colmajor && lda < m))
6337 continue;
6338
6339 /* For the sake of speed, we throw out this case at random */
6340 if (xrand(seed) >= test_prob)
6341 continue;
6342
6343 /* in the trivial cases, no need to run testgen */
6344 if (m > 0 && n > 0)
6345 BLAS_zgemv2_d_d_testgen(norm, order_type, trans_type, m,
6346 n, &alpha, alpha_flag, A, lda,
6347 head_x_gen, tail_x_gen, &beta,
6348 beta_flag, y_gen, seed,
6349 head_r_true, tail_r_true);
6350
6351 count++;
6352
6353 /* varying incx */
6354 for (incx_val = -2; incx_val <= 2; incx_val++) {
6355 if (incx_val == 0)
6356 continue;
6357
6358 /* setting incx */
6359 incx = incx_val;
6360
6361
6362 dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
6363 dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
6364
6365 /* varying incy */
6366 for (incy_val = -2; incy_val <= 2; incy_val++) {
6367 if (incy_val == 0)
6368 continue;
6369
6370 /* setting incy */
6371 incy = incy_val;
6372 incy *= 2;
6373
6374 zcopy_vector(y_gen, m_i, 1, y, incy_val);
6375
6376 /* call BLAS_zgemv2_d_d */
6377 FPU_FIX_STOP;
6378 BLAS_zgemv2_d_d(order_type, trans_type, m, n, alpha, A,
6379 lda, head_x, tail_x, incx_val, beta, y,
6380 incy_val);
6381 FPU_FIX_START;
6382
6383 /* set y starting index */
6384 iy = 0;
6385 if (incy < 0)
6386 iy = -(m_i - 1) * incy;
6387
6388 /* computing the ratio */
6389 if (m > 0 && n > 0)
6390 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
6391 /* copy row j of A to temp */
6392 dge_copy_row(order_type, trans_type, m_i, n_i, A,
6393 lda, temp, j);
6394
6395 test_BLAS_zdot2_d_d(n_i, blas_no_conj, alpha, beta,
6396 &y_gen[k], &y[iy],
6397 &head_r_true[k],
6398 &tail_r_true[k], temp, 1,
6399 head_x, tail_x, incx_val,
6400 eps_int, un_int, &ratios[j]);
6401
6402 /* take the max ratio */
6403 if (j == 0) {
6404 ratio = ratios[0];
6405 /* The !<= below causes NaN error to be detected.
6406 Note that (NaN > thresh) is always false. */
6407 } else if (!(ratios[j] <= ratio)) {
6408 ratio = ratios[j];
6409 }
6410 iy += incy;
6411 }
6412
6413 /* Increase the number of bad ratio, if the ratio
6414 is bigger than the threshold.
6415 The !<= below causes NaN error to be detected.
6416 Note that (NaN > thresh) is always false. */
6417 if (!(ratio <= thresh)) {
6418 bad_ratios++;
6419
6420 if ((debug == 3) && /* print only when debug is on */
6421 (count != old_count) && /* print if old vector is different
6422 from the current one */
6423 (d_count == find_max_ratio) &&
6424 (p_count <= max_print) &&
6425 (ratio > 0.5 * ratio_max)) {
6426 old_count = count;
6427
6428 printf
6429 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
6430 fname, m, n, ntests, thresh);
6431
6432 /* Print test info */
6433 switch (prec) {
6434 case blas_prec_single:
6435 printf("single ");
6436 break;
6437 case blas_prec_double:
6438 printf("double ");
6439 break;
6440 case blas_prec_indigenous:
6441 printf("indigenous ");
6442 break;
6443 case blas_prec_extra:
6444 printf("extra ");
6445 break;
6446 }
6447 switch (norm) {
6448 case -1:
6449 printf("near_underflow ");
6450 break;
6451 case 0:
6452 printf("near_one ");
6453 break;
6454 case 1:
6455 printf("near_overflow ");
6456 break;
6457 }
6458 switch (order_type) {
6459 case blas_rowmajor:
6460 printf("row_major ");
6461 break;
6462 case blas_colmajor:
6463 printf("col_major ");
6464 break;
6465 }
6466 switch (trans_type) {
6467 case blas_no_trans:
6468 printf("no_trans ");
6469 break;
6470 case blas_trans:
6471 printf("trans ");
6472 break;
6473 case blas_conj_trans:
6474 printf("conj_trans ");
6475 break;
6476 }
6477
6478 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
6479 incy);
6480
6481 dge_print_matrix(A, m_i, n_i, lda, order_type, "A");
6482
6483 dprint_vector(head_x, n_i, incx_val, "head_x");
6484 dprint_vector(tail_x, n_i, incx_val, "tail_x");
6485 zprint_vector(y_gen, m_i, 1, "y_gen");
6486 zprint_vector(y, m_i, incy_val, "y_final");
6487
6488 printf(" ");
6489 printf("alpha = ");
6490 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
6491 printf("\n ");
6492 printf("beta = ");
6493 printf("(%24.16e, %24.16e)", beta[0], beta[1]);
6494 printf("\n");
6495 for (j = 0, k = 0; j < m_i * incy_gen;
6496 j += incy_gen, k++) {
6497 printf(" ");
6498 printf("([%24.16e %24.16e], [%24.16e %24.16e])",
6499 head_r_true[j], tail_r_true[j],
6500 head_r_true[j + 1], tail_r_true[j + 1]);
6501 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
6502 }
6503
6504 printf(" ratio=%.4e\n", ratio);
6505 p_count++;
6506 }
6507 if (bad_ratios >= MAX_BAD_TESTS) {
6508 printf("\ntoo many failures, exiting....");
6509 printf("\nTesting and compilation");
6510 printf(" are incomplete\n\n");
6511 goto end;
6512 }
6513 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
6514 printf("\nFlagrant ratio error, exiting...");
6515 printf("\nTesting and compilation");
6516 printf(" are incomplete\n\n");
6517 goto end;
6518 }
6519 }
6520 if (d_count == 0) {
6521 if (ratio > ratio_max)
6522 ratio_max = ratio;
6523
6524 if (ratio != 0.0 && ratio < ratio_min)
6525 ratio_min = ratio;
6526
6527 tot_tests++;
6528 }
6529 } /* incy */
6530 } /* incx */
6531 } /* lda */
6532 } /* trans */
6533 } /* order */
6534 } /* tests */
6535 } /* norm */
6536
6537 } /* beta */
6538 } /* alpha */
6539 } /* debug */
6540
6541 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
6542 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
6543 fname, m, n, ntests, thresh);
6544 printf
6545 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
6546 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
6547 ratio_min, ratio_max);
6548 }
6549
6550 end:
6551 FPU_FIX_STOP;
6552
6553 blas_free(head_x);
6554 blas_free(tail_x);
6555 blas_free(y);
6556 blas_free(head_x_gen);
6557 blas_free(tail_x_gen);
6558 blas_free(y_gen);
6559 blas_free(temp);
6560 blas_free(A);
6561 blas_free(head_r_true);
6562 blas_free(tail_r_true);
6563 blas_free(ratios);
6564
6565 *min_ratio = ratio_min;
6566 *num_bad_ratio = bad_ratios;
6567 *num_tests = tot_tests;
6568 return ratio_max;
6569 }
do_test_sgemv2_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)6570 double do_test_sgemv2_x(int m, int n, int ntests, int *seed, double thresh,
6571 int debug, float test_prob, double *min_ratio,
6572 int *num_bad_ratio, int *num_tests)
6573
6574 /*
6575 * Purpose
6576 * =======
6577 *
6578 * Runs a series of tests on GEMV2.
6579 *
6580 * Arguments
6581 * =========
6582 *
6583 * m (input) int
6584 * The number of rows
6585 *
6586 * n (input) int
6587 * The number of columns
6588 *
6589 * ntests (input) int
6590 * The number of tests to run for each set of attributes.
6591 *
6592 * seed (input/output) int
6593 * The seed for the random number generator used in testgen().
6594 *
6595 * thresh (input) double
6596 * When the ratio returned from test() exceeds the specified
6597 * threshold, the current size, r_true, r_comp, and ratio will be
6598 * printed. (Since ratio is supposed to be O(1), we can set thresh
6599 * to ~10.)
6600 *
6601 * debug (input) int
6602 * If debug=3, print summary
6603 * If debug=2, print summary only if the number of bad ratios > 0
6604 * If debug=1, print complete info if tests fail
6605 * If debug=0, return max ratio
6606 *
6607 * test_prob (input) float
6608 * The specified test will be performed only if the generated
6609 * random exceeds this threshold.
6610 *
6611 * min_ratio (output) double
6612 * The minimum ratio
6613 *
6614 * num_bad_ratio (output) int
6615 * The number of tests fail; they are above the threshold.
6616 *
6617 * num_tests (output) int
6618 * The number of tests is being performed.
6619 *
6620 * Return value
6621 * ============
6622 *
6623 * The maximum ratio if run successfully, otherwise return -1
6624 *
6625 * Code structure
6626 * ==============
6627 *
6628 * debug loop -- if debug is one, the first loop computes the max ratio
6629 * -- and the last(second) loop outputs debugging information,
6630 * -- if the test fail and its ratio > 0.5 * max ratio.
6631 * -- if debug is zero, the loop is executed once
6632 * alpha loop -- varying alpha: 0, 1, or random
6633 * beta loop -- varying beta: 0, 1, or random
6634 * prec loop -- varying internal prec: single, double, or extra
6635 * norm loop -- varying norm: near undeflow, near one, or
6636 * -- near overflow
6637 * numtest loop -- how many times the test is perform with
6638 * -- above set of attributes
6639 * order loop -- varying order type: rowmajor or colmajor
6640 * trans loop -- varying uplo type: upper or lower
6641 * lda loop -- varying lda: m, m+1, 2m
6642 * incx loop -- varying incx: -2, -1, 1, 2
6643 * incy loop -- varying incy: -2, -1, 1, 2
6644 */
6645 {
6646 /* function name */
6647 const char fname[] = "BLAS_sgemv2_x";
6648
6649 /* max number of debug lines to print */
6650 const int max_print = 8;
6651
6652 /* Variables in the "x_val" form are loop vars for corresponding
6653 variables */
6654 int i; /* iterate through the repeating tests */
6655 int j, k; /* multipurpose counters or variables */
6656 int iy; /* use to index y */
6657 int incx_val, incy_val, /* for testing different inc values */
6658 incx, incy;
6659 int incy_gen; /* for complex case inc=2, for real case inc=1 */
6660 int d_count; /* counter for debug */
6661 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
6662 int p_count; /* counter for the number of debug lines printed */
6663 int tot_tests; /* total number of tests to be done */
6664 int norm; /* input values of near underflow/one/overflow */
6665 double ratio_max; /* the current maximum ratio */
6666 double ratio_min; /* the current minimum ratio */
6667 double *ratios; /* a temporary variable for calculating ratio */
6668 double ratio; /* the per-use test ratio from test() */
6669 int bad_ratios; /* the number of ratios over the threshold */
6670 double eps_int; /* the internal epsilon expected--2^(-24) for float */
6671 double un_int; /* the internal underflow threshold */
6672 float alpha;
6673 float beta;
6674 float *A;
6675 float *head_x;
6676 float *tail_x;
6677 float *y;
6678 float *temp; /* use for calculating ratio */
6679
6680 /* x_gen and y_gen are used to store vectors generated by testgen.
6681 they eventually are copied back to x and y */
6682 float *head_x_gen;
6683 float *tail_x_gen;
6684 float *y_gen;
6685
6686 /* the true r calculated by testgen(), in double-double */
6687 double *head_r_true, *tail_r_true;
6688 int alpha_val;
6689 int alpha_flag; /* input flag for BLAS_sgemv2_testgen */
6690 int beta_val;
6691 int beta_flag; /* input flag for BLAS_sgemv2_testgen */
6692 int order_val;
6693 enum blas_order_type order_type;
6694 int prec_val;
6695 enum blas_prec_type prec;
6696 int trans_val;
6697 enum blas_trans_type trans_type;
6698 int m_i;
6699 int n_i;
6700 int max_mn; /* the max of m and n */
6701 int lda_val;
6702 int lda;
6703 int saved_seed; /* for saving the original seed */
6704 int count, old_count; /* use for counting the number of testgen calls * 2 */
6705
6706 FPU_FIX_DECL;
6707
6708 /* test for bad arguments */
6709 if (n < 0 || m < 0 || ntests < 0)
6710 BLAS_error(fname, 0, 0, NULL);
6711
6712 /* initialization */
6713 *num_bad_ratio = 0;
6714 *num_tests = 0;
6715 *min_ratio = 0.0;
6716
6717 saved_seed = *seed;
6718 ratio_min = 1e308;
6719 ratio_max = 0.0;
6720 ratio = 0.0;
6721 tot_tests = 0;
6722 p_count = 0;
6723 count = 0;
6724 find_max_ratio = 0;
6725 bad_ratios = 0;
6726 old_count = 0;
6727
6728 if (debug == 3)
6729 find_max_ratio = 1;
6730 max_mn = MAX(m, n);
6731 if (m == 0 || n == 0) {
6732 return 0.0;
6733 }
6734
6735 FPU_FIX_START;
6736
6737 incy_gen = 1;
6738
6739
6740 /* get space for calculation */
6741 head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
6742 if (max_mn * 2 > 0 && head_x == NULL) {
6743 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6744 }
6745 tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
6746 if (max_mn * 2 > 0 && tail_x == NULL) {
6747 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6748 }
6749 y = (float *) blas_malloc(max_mn * 2 * sizeof(float));
6750 if (max_mn * 2 > 0 && y == NULL) {
6751 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6752 }
6753 head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
6754 if (max_mn > 0 && head_x_gen == NULL) {
6755 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6756 }
6757 tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
6758 if (max_mn > 0 && tail_x_gen == NULL) {
6759 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6760 }
6761 y_gen = (float *) blas_malloc(max_mn * sizeof(float));
6762 if (max_mn > 0 && y_gen == NULL) {
6763 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6764 }
6765 temp = (float *) blas_malloc(max_mn * sizeof(float));
6766 if (max_mn > 0 && temp == NULL) {
6767 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6768 }
6769 head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
6770 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
6771 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
6772 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6773 }
6774 ratios = (double *) blas_malloc(max_mn * sizeof(double));
6775 if (max_mn > 0 && ratios == NULL) {
6776 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6777 }
6778 A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
6779 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
6780 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6781 }
6782
6783 /* The debug iteration:
6784 If debug=1, then will execute the iteration twice. First, compute the
6785 max ratio. Second, print info if ratio > (50% * ratio_max). */
6786 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
6787 bad_ratios = 0; /* set to zero */
6788
6789 if ((debug == 3) && (d_count == find_max_ratio))
6790 *seed = saved_seed; /* restore the original seed */
6791
6792 /* varying alpha */
6793 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
6794 alpha_flag = 0;
6795 switch (alpha_val) {
6796 case 0:
6797 alpha = 0.0;
6798 alpha_flag = 1;
6799 break;
6800 case 1:
6801 alpha = 1.0;
6802 alpha_flag = 1;
6803 break;
6804 }
6805
6806 /* varying beta */
6807 for (beta_val = 0; beta_val < 3; beta_val++) {
6808 beta_flag = 0;
6809 switch (beta_val) {
6810 case 0:
6811 beta = 0.0;
6812 beta_flag = 1;
6813 break;
6814 case 1:
6815 beta = 1.0;
6816 beta_flag = 1;
6817 break;
6818 }
6819
6820
6821 /* varying extra precs */
6822 for (prec_val = 0; prec_val <= 2; prec_val++) {
6823 switch (prec_val) {
6824 case 0:
6825 eps_int = power(2, -BITS_S);
6826 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
6827 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
6828 prec = blas_prec_single;
6829 break;
6830 case 1:
6831 eps_int = power(2, -BITS_D);
6832 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
6833 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
6834 prec = blas_prec_double;
6835 break;
6836 case 2:
6837 default:
6838 eps_int = power(2, -BITS_E);
6839 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
6840 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
6841 prec = blas_prec_extra;
6842 break;
6843 }
6844
6845 /* values near underflow, 1, or overflow */
6846 for (norm = -1; norm <= 1; norm++) {
6847
6848 /* number of tests */
6849 for (i = 0; i < ntests; i++) {
6850
6851 /* row or col major */
6852 for (order_val = 0; order_val < 2; order_val++) {
6853 switch (order_val) {
6854 case 0:
6855 order_type = blas_rowmajor;
6856 break;
6857 case 1:
6858 default:
6859 order_type = blas_colmajor;
6860 break;
6861 }
6862
6863 /* no_trans, trans, or conj_trans */
6864 for (trans_val = 0; trans_val < 3; trans_val++) {
6865 switch (trans_val) {
6866 case 0:
6867 trans_type = blas_no_trans;
6868 m_i = m;
6869 n_i = n;
6870 break;
6871 case 1:
6872 trans_type = blas_trans;
6873 m_i = n;
6874 n_i = m;
6875 break;
6876 case 2:
6877 default:
6878 trans_type = blas_conj_trans;
6879 m_i = n;
6880 n_i = m;
6881 break;
6882 }
6883
6884 /* lda=n, n+1, or 2n */
6885 for (lda_val = 0; lda_val < 3; lda_val++) {
6886 switch (lda_val) {
6887 case 0:
6888 lda = m_i;
6889 break;
6890 case 1:
6891 lda = m_i + 1;
6892 break;
6893 case 2:
6894 default:
6895 lda = 2 * m_i;
6896 break;
6897 }
6898 if ((order_type == blas_rowmajor && lda < n) ||
6899 (order_type == blas_colmajor && lda < m))
6900 continue;
6901
6902 /* For the sake of speed, we throw out this case at random */
6903 if (xrand(seed) >= test_prob)
6904 continue;
6905
6906 /* in the trivial cases, no need to run testgen */
6907 if (m > 0 && n > 0)
6908 BLAS_sgemv2_testgen(norm, order_type, trans_type, m, n,
6909 &alpha, alpha_flag, A, lda,
6910 head_x_gen, tail_x_gen, &beta,
6911 beta_flag, y_gen, seed, head_r_true,
6912 tail_r_true);
6913
6914 count++;
6915
6916 /* varying incx */
6917 for (incx_val = -2; incx_val <= 2; incx_val++) {
6918 if (incx_val == 0)
6919 continue;
6920
6921 /* setting incx */
6922 incx = incx_val;
6923
6924
6925 scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
6926 scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
6927
6928 /* varying incy */
6929 for (incy_val = -2; incy_val <= 2; incy_val++) {
6930 if (incy_val == 0)
6931 continue;
6932
6933 /* setting incy */
6934 incy = incy_val;
6935
6936
6937 scopy_vector(y_gen, m_i, 1, y, incy_val);
6938
6939 /* call BLAS_sgemv2_x */
6940 FPU_FIX_STOP;
6941 BLAS_sgemv2_x(order_type, trans_type, m, n, alpha, A,
6942 lda, head_x, tail_x, incx_val, beta, y,
6943 incy_val, prec);
6944 FPU_FIX_START;
6945
6946 /* set y starting index */
6947 iy = 0;
6948 if (incy < 0)
6949 iy = -(m_i - 1) * incy;
6950
6951 /* computing the ratio */
6952 if (m > 0 && n > 0)
6953 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
6954 /* copy row j of A to temp */
6955 sge_copy_row(order_type, trans_type, m_i, n_i, A,
6956 lda, temp, j);
6957
6958 test_BLAS_sdot2(n_i, blas_no_conj, alpha, beta,
6959 y_gen[k], y[iy], head_r_true[k],
6960 tail_r_true[k], temp, 1, head_x,
6961 tail_x, incx_val, eps_int, un_int,
6962 &ratios[j]);
6963
6964 /* take the max ratio */
6965 if (j == 0) {
6966 ratio = ratios[0];
6967 /* The !<= below causes NaN error to be detected.
6968 Note that (NaN > thresh) is always false. */
6969 } else if (!(ratios[j] <= ratio)) {
6970 ratio = ratios[j];
6971 }
6972 iy += incy;
6973 }
6974
6975 /* Increase the number of bad ratio, if the ratio
6976 is bigger than the threshold.
6977 The !<= below causes NaN error to be detected.
6978 Note that (NaN > thresh) is always false. */
6979 if (!(ratio <= thresh)) {
6980 bad_ratios++;
6981
6982 if ((debug == 3) && /* print only when debug is on */
6983 (count != old_count) && /* print if old vector is different
6984 from the current one */
6985 (d_count == find_max_ratio) &&
6986 (p_count <= max_print) &&
6987 (ratio > 0.5 * ratio_max)) {
6988 old_count = count;
6989
6990 printf
6991 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
6992 fname, m, n, ntests, thresh);
6993
6994 /* Print test info */
6995 switch (prec) {
6996 case blas_prec_single:
6997 printf("single ");
6998 break;
6999 case blas_prec_double:
7000 printf("double ");
7001 break;
7002 case blas_prec_indigenous:
7003 printf("indigenous ");
7004 break;
7005 case blas_prec_extra:
7006 printf("extra ");
7007 break;
7008 }
7009 switch (norm) {
7010 case -1:
7011 printf("near_underflow ");
7012 break;
7013 case 0:
7014 printf("near_one ");
7015 break;
7016 case 1:
7017 printf("near_overflow ");
7018 break;
7019 }
7020 switch (order_type) {
7021 case blas_rowmajor:
7022 printf("row_major ");
7023 break;
7024 case blas_colmajor:
7025 printf("col_major ");
7026 break;
7027 }
7028 switch (trans_type) {
7029 case blas_no_trans:
7030 printf("no_trans ");
7031 break;
7032 case blas_trans:
7033 printf("trans ");
7034 break;
7035 case blas_conj_trans:
7036 printf("conj_trans ");
7037 break;
7038 }
7039
7040 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
7041 incy);
7042
7043 sge_print_matrix(A, m_i, n_i, lda, order_type,
7044 "A");
7045
7046 sprint_vector(head_x, n_i, incx_val, "head_x");
7047 sprint_vector(tail_x, n_i, incx_val, "tail_x");
7048 sprint_vector(y_gen, m_i, 1, "y_gen");
7049 sprint_vector(y, m_i, incy_val, "y_final");
7050
7051 printf(" ");
7052 printf("alpha = ");
7053 printf("%16.8e", alpha);
7054 printf("\n ");
7055 printf("beta = ");
7056 printf("%16.8e", beta);
7057 printf("\n");
7058 for (j = 0, k = 0; j < m_i * incy_gen;
7059 j += incy_gen, k++) {
7060 printf(" ");
7061 printf("[%24.16e, %24.16e]", head_r_true[j],
7062 tail_r_true[j]);
7063 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
7064 }
7065
7066 printf(" ratio=%.4e\n", ratio);
7067 p_count++;
7068 }
7069 if (bad_ratios >= MAX_BAD_TESTS) {
7070 printf("\ntoo many failures, exiting....");
7071 printf("\nTesting and compilation");
7072 printf(" are incomplete\n\n");
7073 goto end;
7074 }
7075 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
7076 printf("\nFlagrant ratio error, exiting...");
7077 printf("\nTesting and compilation");
7078 printf(" are incomplete\n\n");
7079 goto end;
7080 }
7081 }
7082 if (d_count == 0) {
7083 if (ratio > ratio_max)
7084 ratio_max = ratio;
7085
7086 if (ratio != 0.0 && ratio < ratio_min)
7087 ratio_min = ratio;
7088
7089 tot_tests++;
7090 }
7091 } /* incy */
7092 } /* incx */
7093 } /* lda */
7094 } /* trans */
7095 } /* order */
7096 } /* tests */
7097 } /* norm */
7098 } /* prec */
7099 } /* beta */
7100 } /* alpha */
7101 } /* debug */
7102
7103 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
7104 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
7105 fname, m, n, ntests, thresh);
7106 printf
7107 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
7108 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
7109 ratio_min, ratio_max);
7110 }
7111
7112 end:
7113 FPU_FIX_STOP;
7114
7115 blas_free(head_x);
7116 blas_free(tail_x);
7117 blas_free(y);
7118 blas_free(head_x_gen);
7119 blas_free(tail_x_gen);
7120 blas_free(y_gen);
7121 blas_free(temp);
7122 blas_free(A);
7123 blas_free(head_r_true);
7124 blas_free(tail_r_true);
7125 blas_free(ratios);
7126
7127 *min_ratio = ratio_min;
7128 *num_bad_ratio = bad_ratios;
7129 *num_tests = tot_tests;
7130 return ratio_max;
7131 }
do_test_dgemv2_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)7132 double do_test_dgemv2_x(int m, int n, int ntests, int *seed, double thresh,
7133 int debug, float test_prob, double *min_ratio,
7134 int *num_bad_ratio, int *num_tests)
7135
7136 /*
7137 * Purpose
7138 * =======
7139 *
7140 * Runs a series of tests on GEMV2.
7141 *
7142 * Arguments
7143 * =========
7144 *
7145 * m (input) int
7146 * The number of rows
7147 *
7148 * n (input) int
7149 * The number of columns
7150 *
7151 * ntests (input) int
7152 * The number of tests to run for each set of attributes.
7153 *
7154 * seed (input/output) int
7155 * The seed for the random number generator used in testgen().
7156 *
7157 * thresh (input) double
7158 * When the ratio returned from test() exceeds the specified
7159 * threshold, the current size, r_true, r_comp, and ratio will be
7160 * printed. (Since ratio is supposed to be O(1), we can set thresh
7161 * to ~10.)
7162 *
7163 * debug (input) int
7164 * If debug=3, print summary
7165 * If debug=2, print summary only if the number of bad ratios > 0
7166 * If debug=1, print complete info if tests fail
7167 * If debug=0, return max ratio
7168 *
7169 * test_prob (input) float
7170 * The specified test will be performed only if the generated
7171 * random exceeds this threshold.
7172 *
7173 * min_ratio (output) double
7174 * The minimum ratio
7175 *
7176 * num_bad_ratio (output) int
7177 * The number of tests fail; they are above the threshold.
7178 *
7179 * num_tests (output) int
7180 * The number of tests is being performed.
7181 *
7182 * Return value
7183 * ============
7184 *
7185 * The maximum ratio if run successfully, otherwise return -1
7186 *
7187 * Code structure
7188 * ==============
7189 *
7190 * debug loop -- if debug is one, the first loop computes the max ratio
7191 * -- and the last(second) loop outputs debugging information,
7192 * -- if the test fail and its ratio > 0.5 * max ratio.
7193 * -- if debug is zero, the loop is executed once
7194 * alpha loop -- varying alpha: 0, 1, or random
7195 * beta loop -- varying beta: 0, 1, or random
7196 * prec loop -- varying internal prec: single, double, or extra
7197 * norm loop -- varying norm: near undeflow, near one, or
7198 * -- near overflow
7199 * numtest loop -- how many times the test is perform with
7200 * -- above set of attributes
7201 * order loop -- varying order type: rowmajor or colmajor
7202 * trans loop -- varying uplo type: upper or lower
7203 * lda loop -- varying lda: m, m+1, 2m
7204 * incx loop -- varying incx: -2, -1, 1, 2
7205 * incy loop -- varying incy: -2, -1, 1, 2
7206 */
7207 {
7208 /* function name */
7209 const char fname[] = "BLAS_dgemv2_x";
7210
7211 /* max number of debug lines to print */
7212 const int max_print = 8;
7213
7214 /* Variables in the "x_val" form are loop vars for corresponding
7215 variables */
7216 int i; /* iterate through the repeating tests */
7217 int j, k; /* multipurpose counters or variables */
7218 int iy; /* use to index y */
7219 int incx_val, incy_val, /* for testing different inc values */
7220 incx, incy;
7221 int incy_gen; /* for complex case inc=2, for real case inc=1 */
7222 int d_count; /* counter for debug */
7223 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
7224 int p_count; /* counter for the number of debug lines printed */
7225 int tot_tests; /* total number of tests to be done */
7226 int norm; /* input values of near underflow/one/overflow */
7227 double ratio_max; /* the current maximum ratio */
7228 double ratio_min; /* the current minimum ratio */
7229 double *ratios; /* a temporary variable for calculating ratio */
7230 double ratio; /* the per-use test ratio from test() */
7231 int bad_ratios; /* the number of ratios over the threshold */
7232 double eps_int; /* the internal epsilon expected--2^(-24) for float */
7233 double un_int; /* the internal underflow threshold */
7234 double alpha;
7235 double beta;
7236 double *A;
7237 double *head_x;
7238 double *tail_x;
7239 double *y;
7240 double *temp; /* use for calculating ratio */
7241
7242 /* x_gen and y_gen are used to store vectors generated by testgen.
7243 they eventually are copied back to x and y */
7244 double *head_x_gen;
7245 double *tail_x_gen;
7246 double *y_gen;
7247
7248 /* the true r calculated by testgen(), in double-double */
7249 double *head_r_true, *tail_r_true;
7250 int alpha_val;
7251 int alpha_flag; /* input flag for BLAS_dgemv2_testgen */
7252 int beta_val;
7253 int beta_flag; /* input flag for BLAS_dgemv2_testgen */
7254 int order_val;
7255 enum blas_order_type order_type;
7256 int prec_val;
7257 enum blas_prec_type prec;
7258 int trans_val;
7259 enum blas_trans_type trans_type;
7260 int m_i;
7261 int n_i;
7262 int max_mn; /* the max of m and n */
7263 int lda_val;
7264 int lda;
7265 int saved_seed; /* for saving the original seed */
7266 int count, old_count; /* use for counting the number of testgen calls * 2 */
7267
7268 FPU_FIX_DECL;
7269
7270 /* test for bad arguments */
7271 if (n < 0 || m < 0 || ntests < 0)
7272 BLAS_error(fname, 0, 0, NULL);
7273
7274 /* initialization */
7275 *num_bad_ratio = 0;
7276 *num_tests = 0;
7277 *min_ratio = 0.0;
7278
7279 saved_seed = *seed;
7280 ratio_min = 1e308;
7281 ratio_max = 0.0;
7282 ratio = 0.0;
7283 tot_tests = 0;
7284 p_count = 0;
7285 count = 0;
7286 find_max_ratio = 0;
7287 bad_ratios = 0;
7288 old_count = 0;
7289
7290 if (debug == 3)
7291 find_max_ratio = 1;
7292 max_mn = MAX(m, n);
7293 if (m == 0 || n == 0) {
7294 return 0.0;
7295 }
7296
7297 FPU_FIX_START;
7298
7299 incy_gen = 1;
7300
7301
7302 /* get space for calculation */
7303 head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
7304 if (max_mn * 2 > 0 && head_x == NULL) {
7305 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7306 }
7307 tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
7308 if (max_mn * 2 > 0 && tail_x == NULL) {
7309 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7310 }
7311 y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
7312 if (max_mn * 2 > 0 && y == NULL) {
7313 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7314 }
7315 head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
7316 if (max_mn > 0 && head_x_gen == NULL) {
7317 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7318 }
7319 tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
7320 if (max_mn > 0 && tail_x_gen == NULL) {
7321 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7322 }
7323 y_gen = (double *) blas_malloc(max_mn * sizeof(double));
7324 if (max_mn > 0 && y_gen == NULL) {
7325 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7326 }
7327 temp = (double *) blas_malloc(max_mn * sizeof(double));
7328 if (max_mn > 0 && temp == NULL) {
7329 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7330 }
7331 head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
7332 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
7333 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
7334 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7335 }
7336 ratios = (double *) blas_malloc(max_mn * sizeof(double));
7337 if (max_mn > 0 && ratios == NULL) {
7338 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7339 }
7340 A =
7341 (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
7342 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
7343 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7344 }
7345
7346 /* The debug iteration:
7347 If debug=1, then will execute the iteration twice. First, compute the
7348 max ratio. Second, print info if ratio > (50% * ratio_max). */
7349 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
7350 bad_ratios = 0; /* set to zero */
7351
7352 if ((debug == 3) && (d_count == find_max_ratio))
7353 *seed = saved_seed; /* restore the original seed */
7354
7355 /* varying alpha */
7356 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
7357 alpha_flag = 0;
7358 switch (alpha_val) {
7359 case 0:
7360 alpha = 0.0;
7361 alpha_flag = 1;
7362 break;
7363 case 1:
7364 alpha = 1.0;
7365 alpha_flag = 1;
7366 break;
7367 }
7368
7369 /* varying beta */
7370 for (beta_val = 0; beta_val < 3; beta_val++) {
7371 beta_flag = 0;
7372 switch (beta_val) {
7373 case 0:
7374 beta = 0.0;
7375 beta_flag = 1;
7376 break;
7377 case 1:
7378 beta = 1.0;
7379 beta_flag = 1;
7380 break;
7381 }
7382
7383
7384 /* varying extra precs */
7385 for (prec_val = 0; prec_val <= 2; prec_val++) {
7386 switch (prec_val) {
7387 case 0:
7388 eps_int = power(2, -BITS_D);
7389 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
7390 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
7391 prec = blas_prec_double;
7392 break;
7393 case 1:
7394 eps_int = power(2, -BITS_D);
7395 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
7396 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
7397 prec = blas_prec_double;
7398 break;
7399 case 2:
7400 default:
7401 eps_int = power(2, -BITS_E);
7402 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
7403 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
7404 prec = blas_prec_extra;
7405 break;
7406 }
7407
7408 /* values near underflow, 1, or overflow */
7409 for (norm = -1; norm <= 1; norm++) {
7410
7411 /* number of tests */
7412 for (i = 0; i < ntests; i++) {
7413
7414 /* row or col major */
7415 for (order_val = 0; order_val < 2; order_val++) {
7416 switch (order_val) {
7417 case 0:
7418 order_type = blas_rowmajor;
7419 break;
7420 case 1:
7421 default:
7422 order_type = blas_colmajor;
7423 break;
7424 }
7425
7426 /* no_trans, trans, or conj_trans */
7427 for (trans_val = 0; trans_val < 3; trans_val++) {
7428 switch (trans_val) {
7429 case 0:
7430 trans_type = blas_no_trans;
7431 m_i = m;
7432 n_i = n;
7433 break;
7434 case 1:
7435 trans_type = blas_trans;
7436 m_i = n;
7437 n_i = m;
7438 break;
7439 case 2:
7440 default:
7441 trans_type = blas_conj_trans;
7442 m_i = n;
7443 n_i = m;
7444 break;
7445 }
7446
7447 /* lda=n, n+1, or 2n */
7448 for (lda_val = 0; lda_val < 3; lda_val++) {
7449 switch (lda_val) {
7450 case 0:
7451 lda = m_i;
7452 break;
7453 case 1:
7454 lda = m_i + 1;
7455 break;
7456 case 2:
7457 default:
7458 lda = 2 * m_i;
7459 break;
7460 }
7461 if ((order_type == blas_rowmajor && lda < n) ||
7462 (order_type == blas_colmajor && lda < m))
7463 continue;
7464
7465 /* For the sake of speed, we throw out this case at random */
7466 if (xrand(seed) >= test_prob)
7467 continue;
7468
7469 /* in the trivial cases, no need to run testgen */
7470 if (m > 0 && n > 0)
7471 BLAS_dgemv2_testgen(norm, order_type, trans_type, m, n,
7472 &alpha, alpha_flag, A, lda,
7473 head_x_gen, tail_x_gen, &beta,
7474 beta_flag, y_gen, seed, head_r_true,
7475 tail_r_true);
7476
7477 count++;
7478
7479 /* varying incx */
7480 for (incx_val = -2; incx_val <= 2; incx_val++) {
7481 if (incx_val == 0)
7482 continue;
7483
7484 /* setting incx */
7485 incx = incx_val;
7486
7487
7488 dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
7489 dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
7490
7491 /* varying incy */
7492 for (incy_val = -2; incy_val <= 2; incy_val++) {
7493 if (incy_val == 0)
7494 continue;
7495
7496 /* setting incy */
7497 incy = incy_val;
7498
7499
7500 dcopy_vector(y_gen, m_i, 1, y, incy_val);
7501
7502 /* call BLAS_dgemv2_x */
7503 FPU_FIX_STOP;
7504 BLAS_dgemv2_x(order_type, trans_type, m, n, alpha, A,
7505 lda, head_x, tail_x, incx_val, beta, y,
7506 incy_val, prec);
7507 FPU_FIX_START;
7508
7509 /* set y starting index */
7510 iy = 0;
7511 if (incy < 0)
7512 iy = -(m_i - 1) * incy;
7513
7514 /* computing the ratio */
7515 if (m > 0 && n > 0)
7516 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
7517 /* copy row j of A to temp */
7518 dge_copy_row(order_type, trans_type, m_i, n_i, A,
7519 lda, temp, j);
7520
7521 test_BLAS_ddot2(n_i, blas_no_conj, alpha, beta,
7522 y_gen[k], y[iy], head_r_true[k],
7523 tail_r_true[k], temp, 1, head_x,
7524 tail_x, incx_val, eps_int, un_int,
7525 &ratios[j]);
7526
7527 /* take the max ratio */
7528 if (j == 0) {
7529 ratio = ratios[0];
7530 /* The !<= below causes NaN error to be detected.
7531 Note that (NaN > thresh) is always false. */
7532 } else if (!(ratios[j] <= ratio)) {
7533 ratio = ratios[j];
7534 }
7535 iy += incy;
7536 }
7537
7538 /* Increase the number of bad ratio, if the ratio
7539 is bigger than the threshold.
7540 The !<= below causes NaN error to be detected.
7541 Note that (NaN > thresh) is always false. */
7542 if (!(ratio <= thresh)) {
7543 bad_ratios++;
7544
7545 if ((debug == 3) && /* print only when debug is on */
7546 (count != old_count) && /* print if old vector is different
7547 from the current one */
7548 (d_count == find_max_ratio) &&
7549 (p_count <= max_print) &&
7550 (ratio > 0.5 * ratio_max)) {
7551 old_count = count;
7552
7553 printf
7554 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
7555 fname, m, n, ntests, thresh);
7556
7557 /* Print test info */
7558 switch (prec) {
7559 case blas_prec_single:
7560 printf("single ");
7561 break;
7562 case blas_prec_double:
7563 printf("double ");
7564 break;
7565 case blas_prec_indigenous:
7566 printf("indigenous ");
7567 break;
7568 case blas_prec_extra:
7569 printf("extra ");
7570 break;
7571 }
7572 switch (norm) {
7573 case -1:
7574 printf("near_underflow ");
7575 break;
7576 case 0:
7577 printf("near_one ");
7578 break;
7579 case 1:
7580 printf("near_overflow ");
7581 break;
7582 }
7583 switch (order_type) {
7584 case blas_rowmajor:
7585 printf("row_major ");
7586 break;
7587 case blas_colmajor:
7588 printf("col_major ");
7589 break;
7590 }
7591 switch (trans_type) {
7592 case blas_no_trans:
7593 printf("no_trans ");
7594 break;
7595 case blas_trans:
7596 printf("trans ");
7597 break;
7598 case blas_conj_trans:
7599 printf("conj_trans ");
7600 break;
7601 }
7602
7603 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
7604 incy);
7605
7606 dge_print_matrix(A, m_i, n_i, lda, order_type,
7607 "A");
7608
7609 dprint_vector(head_x, n_i, incx_val, "head_x");
7610 dprint_vector(tail_x, n_i, incx_val, "tail_x");
7611 dprint_vector(y_gen, m_i, 1, "y_gen");
7612 dprint_vector(y, m_i, incy_val, "y_final");
7613
7614 printf(" ");
7615 printf("alpha = ");
7616 printf("%24.16e", alpha);
7617 printf("\n ");
7618 printf("beta = ");
7619 printf("%24.16e", beta);
7620 printf("\n");
7621 for (j = 0, k = 0; j < m_i * incy_gen;
7622 j += incy_gen, k++) {
7623 printf(" ");
7624 printf("[%24.16e, %24.16e]", head_r_true[j],
7625 tail_r_true[j]);
7626 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
7627 }
7628
7629 printf(" ratio=%.4e\n", ratio);
7630 p_count++;
7631 }
7632 if (bad_ratios >= MAX_BAD_TESTS) {
7633 printf("\ntoo many failures, exiting....");
7634 printf("\nTesting and compilation");
7635 printf(" are incomplete\n\n");
7636 goto end;
7637 }
7638 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
7639 printf("\nFlagrant ratio error, exiting...");
7640 printf("\nTesting and compilation");
7641 printf(" are incomplete\n\n");
7642 goto end;
7643 }
7644 }
7645 if (d_count == 0) {
7646 if (ratio > ratio_max)
7647 ratio_max = ratio;
7648
7649 if (ratio != 0.0 && ratio < ratio_min)
7650 ratio_min = ratio;
7651
7652 tot_tests++;
7653 }
7654 } /* incy */
7655 } /* incx */
7656 } /* lda */
7657 } /* trans */
7658 } /* order */
7659 } /* tests */
7660 } /* norm */
7661 } /* prec */
7662 } /* beta */
7663 } /* alpha */
7664 } /* debug */
7665
7666 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
7667 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
7668 fname, m, n, ntests, thresh);
7669 printf
7670 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
7671 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
7672 ratio_min, ratio_max);
7673 }
7674
7675 end:
7676 FPU_FIX_STOP;
7677
7678 blas_free(head_x);
7679 blas_free(tail_x);
7680 blas_free(y);
7681 blas_free(head_x_gen);
7682 blas_free(tail_x_gen);
7683 blas_free(y_gen);
7684 blas_free(temp);
7685 blas_free(A);
7686 blas_free(head_r_true);
7687 blas_free(tail_r_true);
7688 blas_free(ratios);
7689
7690 *min_ratio = ratio_min;
7691 *num_bad_ratio = bad_ratios;
7692 *num_tests = tot_tests;
7693 return ratio_max;
7694 }
do_test_cgemv2_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)7695 double do_test_cgemv2_x(int m, int n, int ntests, int *seed, double thresh,
7696 int debug, float test_prob, double *min_ratio,
7697 int *num_bad_ratio, int *num_tests)
7698
7699 /*
7700 * Purpose
7701 * =======
7702 *
7703 * Runs a series of tests on GEMV2.
7704 *
7705 * Arguments
7706 * =========
7707 *
7708 * m (input) int
7709 * The number of rows
7710 *
7711 * n (input) int
7712 * The number of columns
7713 *
7714 * ntests (input) int
7715 * The number of tests to run for each set of attributes.
7716 *
7717 * seed (input/output) int
7718 * The seed for the random number generator used in testgen().
7719 *
7720 * thresh (input) double
7721 * When the ratio returned from test() exceeds the specified
7722 * threshold, the current size, r_true, r_comp, and ratio will be
7723 * printed. (Since ratio is supposed to be O(1), we can set thresh
7724 * to ~10.)
7725 *
7726 * debug (input) int
7727 * If debug=3, print summary
7728 * If debug=2, print summary only if the number of bad ratios > 0
7729 * If debug=1, print complete info if tests fail
7730 * If debug=0, return max ratio
7731 *
7732 * test_prob (input) float
7733 * The specified test will be performed only if the generated
7734 * random exceeds this threshold.
7735 *
7736 * min_ratio (output) double
7737 * The minimum ratio
7738 *
7739 * num_bad_ratio (output) int
7740 * The number of tests fail; they are above the threshold.
7741 *
7742 * num_tests (output) int
7743 * The number of tests is being performed.
7744 *
7745 * Return value
7746 * ============
7747 *
7748 * The maximum ratio if run successfully, otherwise return -1
7749 *
7750 * Code structure
7751 * ==============
7752 *
7753 * debug loop -- if debug is one, the first loop computes the max ratio
7754 * -- and the last(second) loop outputs debugging information,
7755 * -- if the test fail and its ratio > 0.5 * max ratio.
7756 * -- if debug is zero, the loop is executed once
7757 * alpha loop -- varying alpha: 0, 1, or random
7758 * beta loop -- varying beta: 0, 1, or random
7759 * prec loop -- varying internal prec: single, double, or extra
7760 * norm loop -- varying norm: near undeflow, near one, or
7761 * -- near overflow
7762 * numtest loop -- how many times the test is perform with
7763 * -- above set of attributes
7764 * order loop -- varying order type: rowmajor or colmajor
7765 * trans loop -- varying uplo type: upper or lower
7766 * lda loop -- varying lda: m, m+1, 2m
7767 * incx loop -- varying incx: -2, -1, 1, 2
7768 * incy loop -- varying incy: -2, -1, 1, 2
7769 */
7770 {
7771 /* function name */
7772 const char fname[] = "BLAS_cgemv2_x";
7773
7774 /* max number of debug lines to print */
7775 const int max_print = 8;
7776
7777 /* Variables in the "x_val" form are loop vars for corresponding
7778 variables */
7779 int i; /* iterate through the repeating tests */
7780 int j, k; /* multipurpose counters or variables */
7781 int iy; /* use to index y */
7782 int incx_val, incy_val, /* for testing different inc values */
7783 incx, incy;
7784 int incy_gen; /* for complex case inc=2, for real case inc=1 */
7785 int d_count; /* counter for debug */
7786 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
7787 int p_count; /* counter for the number of debug lines printed */
7788 int tot_tests; /* total number of tests to be done */
7789 int norm; /* input values of near underflow/one/overflow */
7790 double ratio_max; /* the current maximum ratio */
7791 double ratio_min; /* the current minimum ratio */
7792 double *ratios; /* a temporary variable for calculating ratio */
7793 double ratio; /* the per-use test ratio from test() */
7794 int bad_ratios; /* the number of ratios over the threshold */
7795 double eps_int; /* the internal epsilon expected--2^(-24) for float */
7796 double un_int; /* the internal underflow threshold */
7797 float alpha[2];
7798 float beta[2];
7799 float *A;
7800 float *head_x;
7801 float *tail_x;
7802 float *y;
7803 float *temp; /* use for calculating ratio */
7804
7805 /* x_gen and y_gen are used to store vectors generated by testgen.
7806 they eventually are copied back to x and y */
7807 float *head_x_gen;
7808 float *tail_x_gen;
7809 float *y_gen;
7810
7811 /* the true r calculated by testgen(), in double-double */
7812 double *head_r_true, *tail_r_true;
7813
7814 int alpha_val;
7815 int alpha_flag; /* input flag for BLAS_cgemv2_testgen */
7816 int beta_val;
7817 int beta_flag; /* input flag for BLAS_cgemv2_testgen */
7818 int order_val;
7819 enum blas_order_type order_type;
7820 int prec_val;
7821 enum blas_prec_type prec;
7822 int trans_val;
7823 enum blas_trans_type trans_type;
7824 int m_i;
7825 int n_i;
7826 int max_mn; /* the max of m and n */
7827 int lda_val;
7828 int lda;
7829 int saved_seed; /* for saving the original seed */
7830 int count, old_count; /* use for counting the number of testgen calls * 2 */
7831
7832 FPU_FIX_DECL;
7833
7834 /* test for bad arguments */
7835 if (n < 0 || m < 0 || ntests < 0)
7836 BLAS_error(fname, 0, 0, NULL);
7837
7838 /* initialization */
7839 *num_bad_ratio = 0;
7840 *num_tests = 0;
7841 *min_ratio = 0.0;
7842
7843 saved_seed = *seed;
7844 ratio_min = 1e308;
7845 ratio_max = 0.0;
7846 ratio = 0.0;
7847 tot_tests = 0;
7848 p_count = 0;
7849 count = 0;
7850 find_max_ratio = 0;
7851 bad_ratios = 0;
7852 old_count = 0;
7853
7854 if (debug == 3)
7855 find_max_ratio = 1;
7856 max_mn = MAX(m, n);
7857 if (m == 0 || n == 0) {
7858 return 0.0;
7859 }
7860
7861 FPU_FIX_START;
7862
7863 incy_gen = 1;
7864 incy_gen *= 2;
7865
7866 /* get space for calculation */
7867 head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
7868 if (max_mn * 2 > 0 && head_x == NULL) {
7869 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7870 }
7871 tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
7872 if (max_mn * 2 > 0 && tail_x == NULL) {
7873 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7874 }
7875 y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
7876 if (max_mn * 2 > 0 && y == NULL) {
7877 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7878 }
7879 head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
7880 if (max_mn > 0 && head_x_gen == NULL) {
7881 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7882 }
7883 tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
7884 if (max_mn > 0 && tail_x_gen == NULL) {
7885 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7886 }
7887 y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
7888 if (max_mn > 0 && y_gen == NULL) {
7889 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7890 }
7891 temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
7892 if (max_mn > 0 && temp == NULL) {
7893 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7894 }
7895 head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
7896 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
7897 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
7898 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7899 }
7900 ratios = (double *) blas_malloc(max_mn * sizeof(double));
7901 if (max_mn > 0 && ratios == NULL) {
7902 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7903 }
7904 A =
7905 (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
7906 2);
7907 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
7908 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7909 }
7910
7911 /* The debug iteration:
7912 If debug=1, then will execute the iteration twice. First, compute the
7913 max ratio. Second, print info if ratio > (50% * ratio_max). */
7914 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
7915 bad_ratios = 0; /* set to zero */
7916
7917 if ((debug == 3) && (d_count == find_max_ratio))
7918 *seed = saved_seed; /* restore the original seed */
7919
7920 /* varying alpha */
7921 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
7922 alpha_flag = 0;
7923 switch (alpha_val) {
7924 case 0:
7925 alpha[0] = alpha[1] = 0.0;
7926 alpha_flag = 1;
7927 break;
7928 case 1:
7929 alpha[0] = 1.0;
7930 alpha[1] = 0.0;
7931 alpha_flag = 1;
7932 break;
7933 }
7934
7935 /* varying beta */
7936 for (beta_val = 0; beta_val < 3; beta_val++) {
7937 beta_flag = 0;
7938 switch (beta_val) {
7939 case 0:
7940 beta[0] = beta[1] = 0.0;
7941 beta_flag = 1;
7942 break;
7943 case 1:
7944 beta[0] = 1.0;
7945 beta[1] = 0.0;
7946 beta_flag = 1;
7947 break;
7948 }
7949
7950
7951 /* varying extra precs */
7952 for (prec_val = 0; prec_val <= 2; prec_val++) {
7953 switch (prec_val) {
7954 case 0:
7955 eps_int = power(2, -BITS_S);
7956 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
7957 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
7958 prec = blas_prec_single;
7959 break;
7960 case 1:
7961 eps_int = power(2, -BITS_D);
7962 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
7963 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
7964 prec = blas_prec_double;
7965 break;
7966 case 2:
7967 default:
7968 eps_int = power(2, -BITS_E);
7969 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
7970 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
7971 prec = blas_prec_extra;
7972 break;
7973 }
7974
7975 /* values near underflow, 1, or overflow */
7976 for (norm = -1; norm <= 1; norm++) {
7977
7978 /* number of tests */
7979 for (i = 0; i < ntests; i++) {
7980
7981 /* row or col major */
7982 for (order_val = 0; order_val < 2; order_val++) {
7983 switch (order_val) {
7984 case 0:
7985 order_type = blas_rowmajor;
7986 break;
7987 case 1:
7988 default:
7989 order_type = blas_colmajor;
7990 break;
7991 }
7992
7993 /* no_trans, trans, or conj_trans */
7994 for (trans_val = 0; trans_val < 3; trans_val++) {
7995 switch (trans_val) {
7996 case 0:
7997 trans_type = blas_no_trans;
7998 m_i = m;
7999 n_i = n;
8000 break;
8001 case 1:
8002 trans_type = blas_trans;
8003 m_i = n;
8004 n_i = m;
8005 break;
8006 case 2:
8007 default:
8008 trans_type = blas_conj_trans;
8009 m_i = n;
8010 n_i = m;
8011 break;
8012 }
8013
8014 /* lda=n, n+1, or 2n */
8015 for (lda_val = 0; lda_val < 3; lda_val++) {
8016 switch (lda_val) {
8017 case 0:
8018 lda = m_i;
8019 break;
8020 case 1:
8021 lda = m_i + 1;
8022 break;
8023 case 2:
8024 default:
8025 lda = 2 * m_i;
8026 break;
8027 }
8028 if ((order_type == blas_rowmajor && lda < n) ||
8029 (order_type == blas_colmajor && lda < m))
8030 continue;
8031
8032 /* For the sake of speed, we throw out this case at random */
8033 if (xrand(seed) >= test_prob)
8034 continue;
8035
8036 /* in the trivial cases, no need to run testgen */
8037 if (m > 0 && n > 0)
8038 BLAS_cgemv2_testgen(norm, order_type, trans_type, m, n,
8039 &alpha, alpha_flag, A, lda,
8040 head_x_gen, tail_x_gen, &beta,
8041 beta_flag, y_gen, seed, head_r_true,
8042 tail_r_true);
8043
8044 count++;
8045
8046 /* varying incx */
8047 for (incx_val = -2; incx_val <= 2; incx_val++) {
8048 if (incx_val == 0)
8049 continue;
8050
8051 /* setting incx */
8052 incx = incx_val;
8053 incx *= 2;
8054
8055 ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
8056 ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
8057
8058 /* varying incy */
8059 for (incy_val = -2; incy_val <= 2; incy_val++) {
8060 if (incy_val == 0)
8061 continue;
8062
8063 /* setting incy */
8064 incy = incy_val;
8065 incy *= 2;
8066
8067 ccopy_vector(y_gen, m_i, 1, y, incy_val);
8068
8069 /* call BLAS_cgemv2_x */
8070 FPU_FIX_STOP;
8071 BLAS_cgemv2_x(order_type, trans_type, m, n, alpha, A,
8072 lda, head_x, tail_x, incx_val, beta, y,
8073 incy_val, prec);
8074 FPU_FIX_START;
8075
8076 /* set y starting index */
8077 iy = 0;
8078 if (incy < 0)
8079 iy = -(m_i - 1) * incy;
8080
8081 /* computing the ratio */
8082 if (m > 0 && n > 0)
8083 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
8084 /* copy row j of A to temp */
8085 cge_copy_row(order_type, trans_type, m_i, n_i, A,
8086 lda, temp, j);
8087
8088 test_BLAS_cdot2(n_i, blas_no_conj, alpha, beta,
8089 &y_gen[k], &y[iy],
8090 &head_r_true[k], &tail_r_true[k],
8091 temp, 1, head_x, tail_x, incx_val,
8092 eps_int, un_int, &ratios[j]);
8093
8094 /* take the max ratio */
8095 if (j == 0) {
8096 ratio = ratios[0];
8097 /* The !<= below causes NaN error to be detected.
8098 Note that (NaN > thresh) is always false. */
8099 } else if (!(ratios[j] <= ratio)) {
8100 ratio = ratios[j];
8101 }
8102 iy += incy;
8103 }
8104
8105 /* Increase the number of bad ratio, if the ratio
8106 is bigger than the threshold.
8107 The !<= below causes NaN error to be detected.
8108 Note that (NaN > thresh) is always false. */
8109 if (!(ratio <= thresh)) {
8110 bad_ratios++;
8111
8112 if ((debug == 3) && /* print only when debug is on */
8113 (count != old_count) && /* print if old vector is different
8114 from the current one */
8115 (d_count == find_max_ratio) &&
8116 (p_count <= max_print) &&
8117 (ratio > 0.5 * ratio_max)) {
8118 old_count = count;
8119
8120 printf
8121 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
8122 fname, m, n, ntests, thresh);
8123
8124 /* Print test info */
8125 switch (prec) {
8126 case blas_prec_single:
8127 printf("single ");
8128 break;
8129 case blas_prec_double:
8130 printf("double ");
8131 break;
8132 case blas_prec_indigenous:
8133 printf("indigenous ");
8134 break;
8135 case blas_prec_extra:
8136 printf("extra ");
8137 break;
8138 }
8139 switch (norm) {
8140 case -1:
8141 printf("near_underflow ");
8142 break;
8143 case 0:
8144 printf("near_one ");
8145 break;
8146 case 1:
8147 printf("near_overflow ");
8148 break;
8149 }
8150 switch (order_type) {
8151 case blas_rowmajor:
8152 printf("row_major ");
8153 break;
8154 case blas_colmajor:
8155 printf("col_major ");
8156 break;
8157 }
8158 switch (trans_type) {
8159 case blas_no_trans:
8160 printf("no_trans ");
8161 break;
8162 case blas_trans:
8163 printf("trans ");
8164 break;
8165 case blas_conj_trans:
8166 printf("conj_trans ");
8167 break;
8168 }
8169
8170 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
8171 incy);
8172
8173 cge_print_matrix(A, m_i, n_i, lda, order_type,
8174 "A");
8175
8176 cprint_vector(head_x, n_i, incx_val, "head_x");
8177 cprint_vector(tail_x, n_i, incx_val, "tail_x");
8178 cprint_vector(y_gen, m_i, 1, "y_gen");
8179 cprint_vector(y, m_i, incy_val, "y_final");
8180
8181 printf(" ");
8182 printf("alpha = ");
8183 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
8184 printf("\n ");
8185 printf("beta = ");
8186 printf("(%16.8e, %16.8e)", beta[0], beta[1]);
8187 printf("\n");
8188 for (j = 0, k = 0; j < m_i * incy_gen;
8189 j += incy_gen, k++) {
8190 printf(" ");
8191 printf
8192 ("([%24.16e %24.16e], [%24.16e %24.16e])",
8193 head_r_true[j], tail_r_true[j],
8194 head_r_true[j + 1], tail_r_true[j + 1]);
8195 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
8196 }
8197
8198 printf(" ratio=%.4e\n", ratio);
8199 p_count++;
8200 }
8201 if (bad_ratios >= MAX_BAD_TESTS) {
8202 printf("\ntoo many failures, exiting....");
8203 printf("\nTesting and compilation");
8204 printf(" are incomplete\n\n");
8205 goto end;
8206 }
8207 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
8208 printf("\nFlagrant ratio error, exiting...");
8209 printf("\nTesting and compilation");
8210 printf(" are incomplete\n\n");
8211 goto end;
8212 }
8213 }
8214 if (d_count == 0) {
8215 if (ratio > ratio_max)
8216 ratio_max = ratio;
8217
8218 if (ratio != 0.0 && ratio < ratio_min)
8219 ratio_min = ratio;
8220
8221 tot_tests++;
8222 }
8223 } /* incy */
8224 } /* incx */
8225 } /* lda */
8226 } /* trans */
8227 } /* order */
8228 } /* tests */
8229 } /* norm */
8230 } /* prec */
8231 } /* beta */
8232 } /* alpha */
8233 } /* debug */
8234
8235 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
8236 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
8237 fname, m, n, ntests, thresh);
8238 printf
8239 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
8240 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
8241 ratio_min, ratio_max);
8242 }
8243
8244 end:
8245 FPU_FIX_STOP;
8246
8247 blas_free(head_x);
8248 blas_free(tail_x);
8249 blas_free(y);
8250 blas_free(head_x_gen);
8251 blas_free(tail_x_gen);
8252 blas_free(y_gen);
8253 blas_free(temp);
8254 blas_free(A);
8255 blas_free(head_r_true);
8256 blas_free(tail_r_true);
8257 blas_free(ratios);
8258
8259 *min_ratio = ratio_min;
8260 *num_bad_ratio = bad_ratios;
8261 *num_tests = tot_tests;
8262 return ratio_max;
8263 }
do_test_zgemv2_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)8264 double do_test_zgemv2_x(int m, int n, int ntests, int *seed, double thresh,
8265 int debug, float test_prob, double *min_ratio,
8266 int *num_bad_ratio, int *num_tests)
8267
8268 /*
8269 * Purpose
8270 * =======
8271 *
8272 * Runs a series of tests on GEMV2.
8273 *
8274 * Arguments
8275 * =========
8276 *
8277 * m (input) int
8278 * The number of rows
8279 *
8280 * n (input) int
8281 * The number of columns
8282 *
8283 * ntests (input) int
8284 * The number of tests to run for each set of attributes.
8285 *
8286 * seed (input/output) int
8287 * The seed for the random number generator used in testgen().
8288 *
8289 * thresh (input) double
8290 * When the ratio returned from test() exceeds the specified
8291 * threshold, the current size, r_true, r_comp, and ratio will be
8292 * printed. (Since ratio is supposed to be O(1), we can set thresh
8293 * to ~10.)
8294 *
8295 * debug (input) int
8296 * If debug=3, print summary
8297 * If debug=2, print summary only if the number of bad ratios > 0
8298 * If debug=1, print complete info if tests fail
8299 * If debug=0, return max ratio
8300 *
8301 * test_prob (input) float
8302 * The specified test will be performed only if the generated
8303 * random exceeds this threshold.
8304 *
8305 * min_ratio (output) double
8306 * The minimum ratio
8307 *
8308 * num_bad_ratio (output) int
8309 * The number of tests fail; they are above the threshold.
8310 *
8311 * num_tests (output) int
8312 * The number of tests is being performed.
8313 *
8314 * Return value
8315 * ============
8316 *
8317 * The maximum ratio if run successfully, otherwise return -1
8318 *
8319 * Code structure
8320 * ==============
8321 *
8322 * debug loop -- if debug is one, the first loop computes the max ratio
8323 * -- and the last(second) loop outputs debugging information,
8324 * -- if the test fail and its ratio > 0.5 * max ratio.
8325 * -- if debug is zero, the loop is executed once
8326 * alpha loop -- varying alpha: 0, 1, or random
8327 * beta loop -- varying beta: 0, 1, or random
8328 * prec loop -- varying internal prec: single, double, or extra
8329 * norm loop -- varying norm: near undeflow, near one, or
8330 * -- near overflow
8331 * numtest loop -- how many times the test is perform with
8332 * -- above set of attributes
8333 * order loop -- varying order type: rowmajor or colmajor
8334 * trans loop -- varying uplo type: upper or lower
8335 * lda loop -- varying lda: m, m+1, 2m
8336 * incx loop -- varying incx: -2, -1, 1, 2
8337 * incy loop -- varying incy: -2, -1, 1, 2
8338 */
8339 {
8340 /* function name */
8341 const char fname[] = "BLAS_zgemv2_x";
8342
8343 /* max number of debug lines to print */
8344 const int max_print = 8;
8345
8346 /* Variables in the "x_val" form are loop vars for corresponding
8347 variables */
8348 int i; /* iterate through the repeating tests */
8349 int j, k; /* multipurpose counters or variables */
8350 int iy; /* use to index y */
8351 int incx_val, incy_val, /* for testing different inc values */
8352 incx, incy;
8353 int incy_gen; /* for complex case inc=2, for real case inc=1 */
8354 int d_count; /* counter for debug */
8355 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
8356 int p_count; /* counter for the number of debug lines printed */
8357 int tot_tests; /* total number of tests to be done */
8358 int norm; /* input values of near underflow/one/overflow */
8359 double ratio_max; /* the current maximum ratio */
8360 double ratio_min; /* the current minimum ratio */
8361 double *ratios; /* a temporary variable for calculating ratio */
8362 double ratio; /* the per-use test ratio from test() */
8363 int bad_ratios; /* the number of ratios over the threshold */
8364 double eps_int; /* the internal epsilon expected--2^(-24) for float */
8365 double un_int; /* the internal underflow threshold */
8366 double alpha[2];
8367 double beta[2];
8368 double *A;
8369 double *head_x;
8370 double *tail_x;
8371 double *y;
8372 double *temp; /* use for calculating ratio */
8373
8374 /* x_gen and y_gen are used to store vectors generated by testgen.
8375 they eventually are copied back to x and y */
8376 double *head_x_gen;
8377 double *tail_x_gen;
8378 double *y_gen;
8379
8380 /* the true r calculated by testgen(), in double-double */
8381 double *head_r_true, *tail_r_true;
8382
8383 int alpha_val;
8384 int alpha_flag; /* input flag for BLAS_zgemv2_testgen */
8385 int beta_val;
8386 int beta_flag; /* input flag for BLAS_zgemv2_testgen */
8387 int order_val;
8388 enum blas_order_type order_type;
8389 int prec_val;
8390 enum blas_prec_type prec;
8391 int trans_val;
8392 enum blas_trans_type trans_type;
8393 int m_i;
8394 int n_i;
8395 int max_mn; /* the max of m and n */
8396 int lda_val;
8397 int lda;
8398 int saved_seed; /* for saving the original seed */
8399 int count, old_count; /* use for counting the number of testgen calls * 2 */
8400
8401 FPU_FIX_DECL;
8402
8403 /* test for bad arguments */
8404 if (n < 0 || m < 0 || ntests < 0)
8405 BLAS_error(fname, 0, 0, NULL);
8406
8407 /* initialization */
8408 *num_bad_ratio = 0;
8409 *num_tests = 0;
8410 *min_ratio = 0.0;
8411
8412 saved_seed = *seed;
8413 ratio_min = 1e308;
8414 ratio_max = 0.0;
8415 ratio = 0.0;
8416 tot_tests = 0;
8417 p_count = 0;
8418 count = 0;
8419 find_max_ratio = 0;
8420 bad_ratios = 0;
8421 old_count = 0;
8422
8423 if (debug == 3)
8424 find_max_ratio = 1;
8425 max_mn = MAX(m, n);
8426 if (m == 0 || n == 0) {
8427 return 0.0;
8428 }
8429
8430 FPU_FIX_START;
8431
8432 incy_gen = 1;
8433 incy_gen *= 2;
8434
8435 /* get space for calculation */
8436 head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
8437 if (max_mn * 2 > 0 && head_x == NULL) {
8438 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8439 }
8440 tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
8441 if (max_mn * 2 > 0 && tail_x == NULL) {
8442 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8443 }
8444 y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
8445 if (max_mn * 2 > 0 && y == NULL) {
8446 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8447 }
8448 head_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
8449 if (max_mn > 0 && head_x_gen == NULL) {
8450 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8451 }
8452 tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
8453 if (max_mn > 0 && tail_x_gen == NULL) {
8454 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8455 }
8456 y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
8457 if (max_mn > 0 && y_gen == NULL) {
8458 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8459 }
8460 temp = (double *) blas_malloc(max_mn * sizeof(double) * 2);
8461 if (max_mn > 0 && temp == NULL) {
8462 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8463 }
8464 head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
8465 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
8466 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
8467 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8468 }
8469 ratios = (double *) blas_malloc(max_mn * sizeof(double));
8470 if (max_mn > 0 && ratios == NULL) {
8471 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8472 }
8473 A =
8474 (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double) *
8475 2);
8476 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
8477 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8478 }
8479
8480 /* The debug iteration:
8481 If debug=1, then will execute the iteration twice. First, compute the
8482 max ratio. Second, print info if ratio > (50% * ratio_max). */
8483 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
8484 bad_ratios = 0; /* set to zero */
8485
8486 if ((debug == 3) && (d_count == find_max_ratio))
8487 *seed = saved_seed; /* restore the original seed */
8488
8489 /* varying alpha */
8490 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
8491 alpha_flag = 0;
8492 switch (alpha_val) {
8493 case 0:
8494 alpha[0] = alpha[1] = 0.0;
8495 alpha_flag = 1;
8496 break;
8497 case 1:
8498 alpha[0] = 1.0;
8499 alpha[1] = 0.0;
8500 alpha_flag = 1;
8501 break;
8502 }
8503
8504 /* varying beta */
8505 for (beta_val = 0; beta_val < 3; beta_val++) {
8506 beta_flag = 0;
8507 switch (beta_val) {
8508 case 0:
8509 beta[0] = beta[1] = 0.0;
8510 beta_flag = 1;
8511 break;
8512 case 1:
8513 beta[0] = 1.0;
8514 beta[1] = 0.0;
8515 beta_flag = 1;
8516 break;
8517 }
8518
8519
8520 /* varying extra precs */
8521 for (prec_val = 0; prec_val <= 2; prec_val++) {
8522 switch (prec_val) {
8523 case 0:
8524 eps_int = power(2, -BITS_D);
8525 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
8526 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
8527 prec = blas_prec_double;
8528 break;
8529 case 1:
8530 eps_int = power(2, -BITS_D);
8531 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
8532 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
8533 prec = blas_prec_double;
8534 break;
8535 case 2:
8536 default:
8537 eps_int = power(2, -BITS_E);
8538 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
8539 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
8540 prec = blas_prec_extra;
8541 break;
8542 }
8543
8544 /* values near underflow, 1, or overflow */
8545 for (norm = -1; norm <= 1; norm++) {
8546
8547 /* number of tests */
8548 for (i = 0; i < ntests; i++) {
8549
8550 /* row or col major */
8551 for (order_val = 0; order_val < 2; order_val++) {
8552 switch (order_val) {
8553 case 0:
8554 order_type = blas_rowmajor;
8555 break;
8556 case 1:
8557 default:
8558 order_type = blas_colmajor;
8559 break;
8560 }
8561
8562 /* no_trans, trans, or conj_trans */
8563 for (trans_val = 0; trans_val < 3; trans_val++) {
8564 switch (trans_val) {
8565 case 0:
8566 trans_type = blas_no_trans;
8567 m_i = m;
8568 n_i = n;
8569 break;
8570 case 1:
8571 trans_type = blas_trans;
8572 m_i = n;
8573 n_i = m;
8574 break;
8575 case 2:
8576 default:
8577 trans_type = blas_conj_trans;
8578 m_i = n;
8579 n_i = m;
8580 break;
8581 }
8582
8583 /* lda=n, n+1, or 2n */
8584 for (lda_val = 0; lda_val < 3; lda_val++) {
8585 switch (lda_val) {
8586 case 0:
8587 lda = m_i;
8588 break;
8589 case 1:
8590 lda = m_i + 1;
8591 break;
8592 case 2:
8593 default:
8594 lda = 2 * m_i;
8595 break;
8596 }
8597 if ((order_type == blas_rowmajor && lda < n) ||
8598 (order_type == blas_colmajor && lda < m))
8599 continue;
8600
8601 /* For the sake of speed, we throw out this case at random */
8602 if (xrand(seed) >= test_prob)
8603 continue;
8604
8605 /* in the trivial cases, no need to run testgen */
8606 if (m > 0 && n > 0)
8607 BLAS_zgemv2_testgen(norm, order_type, trans_type, m, n,
8608 &alpha, alpha_flag, A, lda,
8609 head_x_gen, tail_x_gen, &beta,
8610 beta_flag, y_gen, seed, head_r_true,
8611 tail_r_true);
8612
8613 count++;
8614
8615 /* varying incx */
8616 for (incx_val = -2; incx_val <= 2; incx_val++) {
8617 if (incx_val == 0)
8618 continue;
8619
8620 /* setting incx */
8621 incx = incx_val;
8622 incx *= 2;
8623
8624 zcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
8625 zcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
8626
8627 /* varying incy */
8628 for (incy_val = -2; incy_val <= 2; incy_val++) {
8629 if (incy_val == 0)
8630 continue;
8631
8632 /* setting incy */
8633 incy = incy_val;
8634 incy *= 2;
8635
8636 zcopy_vector(y_gen, m_i, 1, y, incy_val);
8637
8638 /* call BLAS_zgemv2_x */
8639 FPU_FIX_STOP;
8640 BLAS_zgemv2_x(order_type, trans_type, m, n, alpha, A,
8641 lda, head_x, tail_x, incx_val, beta, y,
8642 incy_val, prec);
8643 FPU_FIX_START;
8644
8645 /* set y starting index */
8646 iy = 0;
8647 if (incy < 0)
8648 iy = -(m_i - 1) * incy;
8649
8650 /* computing the ratio */
8651 if (m > 0 && n > 0)
8652 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
8653 /* copy row j of A to temp */
8654 zge_copy_row(order_type, trans_type, m_i, n_i, A,
8655 lda, temp, j);
8656
8657 test_BLAS_zdot2(n_i, blas_no_conj, alpha, beta,
8658 &y_gen[k], &y[iy],
8659 &head_r_true[k], &tail_r_true[k],
8660 temp, 1, head_x, tail_x, incx_val,
8661 eps_int, un_int, &ratios[j]);
8662
8663 /* take the max ratio */
8664 if (j == 0) {
8665 ratio = ratios[0];
8666 /* The !<= below causes NaN error to be detected.
8667 Note that (NaN > thresh) is always false. */
8668 } else if (!(ratios[j] <= ratio)) {
8669 ratio = ratios[j];
8670 }
8671 iy += incy;
8672 }
8673
8674 /* Increase the number of bad ratio, if the ratio
8675 is bigger than the threshold.
8676 The !<= below causes NaN error to be detected.
8677 Note that (NaN > thresh) is always false. */
8678 if (!(ratio <= thresh)) {
8679 bad_ratios++;
8680
8681 if ((debug == 3) && /* print only when debug is on */
8682 (count != old_count) && /* print if old vector is different
8683 from the current one */
8684 (d_count == find_max_ratio) &&
8685 (p_count <= max_print) &&
8686 (ratio > 0.5 * ratio_max)) {
8687 old_count = count;
8688
8689 printf
8690 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
8691 fname, m, n, ntests, thresh);
8692
8693 /* Print test info */
8694 switch (prec) {
8695 case blas_prec_single:
8696 printf("single ");
8697 break;
8698 case blas_prec_double:
8699 printf("double ");
8700 break;
8701 case blas_prec_indigenous:
8702 printf("indigenous ");
8703 break;
8704 case blas_prec_extra:
8705 printf("extra ");
8706 break;
8707 }
8708 switch (norm) {
8709 case -1:
8710 printf("near_underflow ");
8711 break;
8712 case 0:
8713 printf("near_one ");
8714 break;
8715 case 1:
8716 printf("near_overflow ");
8717 break;
8718 }
8719 switch (order_type) {
8720 case blas_rowmajor:
8721 printf("row_major ");
8722 break;
8723 case blas_colmajor:
8724 printf("col_major ");
8725 break;
8726 }
8727 switch (trans_type) {
8728 case blas_no_trans:
8729 printf("no_trans ");
8730 break;
8731 case blas_trans:
8732 printf("trans ");
8733 break;
8734 case blas_conj_trans:
8735 printf("conj_trans ");
8736 break;
8737 }
8738
8739 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
8740 incy);
8741
8742 zge_print_matrix(A, m_i, n_i, lda, order_type,
8743 "A");
8744
8745 zprint_vector(head_x, n_i, incx_val, "head_x");
8746 zprint_vector(tail_x, n_i, incx_val, "tail_x");
8747 zprint_vector(y_gen, m_i, 1, "y_gen");
8748 zprint_vector(y, m_i, incy_val, "y_final");
8749
8750 printf(" ");
8751 printf("alpha = ");
8752 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
8753 printf("\n ");
8754 printf("beta = ");
8755 printf("(%24.16e, %24.16e)", beta[0], beta[1]);
8756 printf("\n");
8757 for (j = 0, k = 0; j < m_i * incy_gen;
8758 j += incy_gen, k++) {
8759 printf(" ");
8760 printf
8761 ("([%24.16e %24.16e], [%24.16e %24.16e])",
8762 head_r_true[j], tail_r_true[j],
8763 head_r_true[j + 1], tail_r_true[j + 1]);
8764 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
8765 }
8766
8767 printf(" ratio=%.4e\n", ratio);
8768 p_count++;
8769 }
8770 if (bad_ratios >= MAX_BAD_TESTS) {
8771 printf("\ntoo many failures, exiting....");
8772 printf("\nTesting and compilation");
8773 printf(" are incomplete\n\n");
8774 goto end;
8775 }
8776 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
8777 printf("\nFlagrant ratio error, exiting...");
8778 printf("\nTesting and compilation");
8779 printf(" are incomplete\n\n");
8780 goto end;
8781 }
8782 }
8783 if (d_count == 0) {
8784 if (ratio > ratio_max)
8785 ratio_max = ratio;
8786
8787 if (ratio != 0.0 && ratio < ratio_min)
8788 ratio_min = ratio;
8789
8790 tot_tests++;
8791 }
8792 } /* incy */
8793 } /* incx */
8794 } /* lda */
8795 } /* trans */
8796 } /* order */
8797 } /* tests */
8798 } /* norm */
8799 } /* prec */
8800 } /* beta */
8801 } /* alpha */
8802 } /* debug */
8803
8804 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
8805 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
8806 fname, m, n, ntests, thresh);
8807 printf
8808 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
8809 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
8810 ratio_min, ratio_max);
8811 }
8812
8813 end:
8814 FPU_FIX_STOP;
8815
8816 blas_free(head_x);
8817 blas_free(tail_x);
8818 blas_free(y);
8819 blas_free(head_x_gen);
8820 blas_free(tail_x_gen);
8821 blas_free(y_gen);
8822 blas_free(temp);
8823 blas_free(A);
8824 blas_free(head_r_true);
8825 blas_free(tail_r_true);
8826 blas_free(ratios);
8827
8828 *min_ratio = ratio_min;
8829 *num_bad_ratio = bad_ratios;
8830 *num_tests = tot_tests;
8831 return ratio_max;
8832 }
do_test_dgemv2_d_s_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)8833 double do_test_dgemv2_d_s_x(int m, int n, int ntests, int *seed,
8834 double thresh, int debug, float test_prob,
8835 double *min_ratio, int *num_bad_ratio,
8836 int *num_tests)
8837
8838 /*
8839 * Purpose
8840 * =======
8841 *
8842 * Runs a series of tests on GEMV2.
8843 *
8844 * Arguments
8845 * =========
8846 *
8847 * m (input) int
8848 * The number of rows
8849 *
8850 * n (input) int
8851 * The number of columns
8852 *
8853 * ntests (input) int
8854 * The number of tests to run for each set of attributes.
8855 *
8856 * seed (input/output) int
8857 * The seed for the random number generator used in testgen().
8858 *
8859 * thresh (input) double
8860 * When the ratio returned from test() exceeds the specified
8861 * threshold, the current size, r_true, r_comp, and ratio will be
8862 * printed. (Since ratio is supposed to be O(1), we can set thresh
8863 * to ~10.)
8864 *
8865 * debug (input) int
8866 * If debug=3, print summary
8867 * If debug=2, print summary only if the number of bad ratios > 0
8868 * If debug=1, print complete info if tests fail
8869 * If debug=0, return max ratio
8870 *
8871 * test_prob (input) float
8872 * The specified test will be performed only if the generated
8873 * random exceeds this threshold.
8874 *
8875 * min_ratio (output) double
8876 * The minimum ratio
8877 *
8878 * num_bad_ratio (output) int
8879 * The number of tests fail; they are above the threshold.
8880 *
8881 * num_tests (output) int
8882 * The number of tests is being performed.
8883 *
8884 * Return value
8885 * ============
8886 *
8887 * The maximum ratio if run successfully, otherwise return -1
8888 *
8889 * Code structure
8890 * ==============
8891 *
8892 * debug loop -- if debug is one, the first loop computes the max ratio
8893 * -- and the last(second) loop outputs debugging information,
8894 * -- if the test fail and its ratio > 0.5 * max ratio.
8895 * -- if debug is zero, the loop is executed once
8896 * alpha loop -- varying alpha: 0, 1, or random
8897 * beta loop -- varying beta: 0, 1, or random
8898 * prec loop -- varying internal prec: single, double, or extra
8899 * norm loop -- varying norm: near undeflow, near one, or
8900 * -- near overflow
8901 * numtest loop -- how many times the test is perform with
8902 * -- above set of attributes
8903 * order loop -- varying order type: rowmajor or colmajor
8904 * trans loop -- varying uplo type: upper or lower
8905 * lda loop -- varying lda: m, m+1, 2m
8906 * incx loop -- varying incx: -2, -1, 1, 2
8907 * incy loop -- varying incy: -2, -1, 1, 2
8908 */
8909 {
8910 /* function name */
8911 const char fname[] = "BLAS_dgemv2_d_s_x";
8912
8913 /* max number of debug lines to print */
8914 const int max_print = 8;
8915
8916 /* Variables in the "x_val" form are loop vars for corresponding
8917 variables */
8918 int i; /* iterate through the repeating tests */
8919 int j, k; /* multipurpose counters or variables */
8920 int iy; /* use to index y */
8921 int incx_val, incy_val, /* for testing different inc values */
8922 incx, incy;
8923 int incy_gen; /* for complex case inc=2, for real case inc=1 */
8924 int d_count; /* counter for debug */
8925 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
8926 int p_count; /* counter for the number of debug lines printed */
8927 int tot_tests; /* total number of tests to be done */
8928 int norm; /* input values of near underflow/one/overflow */
8929 double ratio_max; /* the current maximum ratio */
8930 double ratio_min; /* the current minimum ratio */
8931 double *ratios; /* a temporary variable for calculating ratio */
8932 double ratio; /* the per-use test ratio from test() */
8933 int bad_ratios; /* the number of ratios over the threshold */
8934 double eps_int; /* the internal epsilon expected--2^(-24) for float */
8935 double un_int; /* the internal underflow threshold */
8936 double alpha;
8937 double beta;
8938 double *A;
8939 float *head_x;
8940 float *tail_x;
8941 double *y;
8942 double *temp; /* use for calculating ratio */
8943
8944 /* x_gen and y_gen are used to store vectors generated by testgen.
8945 they eventually are copied back to x and y */
8946 float *head_x_gen;
8947 float *tail_x_gen;
8948 double *y_gen;
8949
8950 /* the true r calculated by testgen(), in double-double */
8951 double *head_r_true, *tail_r_true;
8952 int alpha_val;
8953 int alpha_flag; /* input flag for BLAS_dgemv2_d_s_testgen */
8954 int beta_val;
8955 int beta_flag; /* input flag for BLAS_dgemv2_d_s_testgen */
8956 int order_val;
8957 enum blas_order_type order_type;
8958 int prec_val;
8959 enum blas_prec_type prec;
8960 int trans_val;
8961 enum blas_trans_type trans_type;
8962 int m_i;
8963 int n_i;
8964 int max_mn; /* the max of m and n */
8965 int lda_val;
8966 int lda;
8967 int saved_seed; /* for saving the original seed */
8968 int count, old_count; /* use for counting the number of testgen calls * 2 */
8969
8970 FPU_FIX_DECL;
8971
8972 /* test for bad arguments */
8973 if (n < 0 || m < 0 || ntests < 0)
8974 BLAS_error(fname, 0, 0, NULL);
8975
8976 /* initialization */
8977 *num_bad_ratio = 0;
8978 *num_tests = 0;
8979 *min_ratio = 0.0;
8980
8981 saved_seed = *seed;
8982 ratio_min = 1e308;
8983 ratio_max = 0.0;
8984 ratio = 0.0;
8985 tot_tests = 0;
8986 p_count = 0;
8987 count = 0;
8988 find_max_ratio = 0;
8989 bad_ratios = 0;
8990 old_count = 0;
8991
8992 if (debug == 3)
8993 find_max_ratio = 1;
8994 max_mn = MAX(m, n);
8995 if (m == 0 || n == 0) {
8996 return 0.0;
8997 }
8998
8999 FPU_FIX_START;
9000
9001 incy_gen = 1;
9002
9003
9004 /* get space for calculation */
9005 head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
9006 if (max_mn * 2 > 0 && head_x == NULL) {
9007 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9008 }
9009 tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
9010 if (max_mn * 2 > 0 && tail_x == NULL) {
9011 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9012 }
9013 y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
9014 if (max_mn * 2 > 0 && y == NULL) {
9015 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9016 }
9017 head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
9018 if (max_mn > 0 && head_x_gen == NULL) {
9019 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9020 }
9021 tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
9022 if (max_mn > 0 && tail_x_gen == NULL) {
9023 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9024 }
9025 y_gen = (double *) blas_malloc(max_mn * sizeof(double));
9026 if (max_mn > 0 && y_gen == NULL) {
9027 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9028 }
9029 temp = (double *) blas_malloc(max_mn * sizeof(double));
9030 if (max_mn > 0 && temp == NULL) {
9031 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9032 }
9033 head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
9034 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
9035 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
9036 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9037 }
9038 ratios = (double *) blas_malloc(max_mn * sizeof(double));
9039 if (max_mn > 0 && ratios == NULL) {
9040 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9041 }
9042 A =
9043 (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
9044 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
9045 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9046 }
9047
9048 /* The debug iteration:
9049 If debug=1, then will execute the iteration twice. First, compute the
9050 max ratio. Second, print info if ratio > (50% * ratio_max). */
9051 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
9052 bad_ratios = 0; /* set to zero */
9053
9054 if ((debug == 3) && (d_count == find_max_ratio))
9055 *seed = saved_seed; /* restore the original seed */
9056
9057 /* varying alpha */
9058 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
9059 alpha_flag = 0;
9060 switch (alpha_val) {
9061 case 0:
9062 alpha = 0.0;
9063 alpha_flag = 1;
9064 break;
9065 case 1:
9066 alpha = 1.0;
9067 alpha_flag = 1;
9068 break;
9069 }
9070
9071 /* varying beta */
9072 for (beta_val = 0; beta_val < 3; beta_val++) {
9073 beta_flag = 0;
9074 switch (beta_val) {
9075 case 0:
9076 beta = 0.0;
9077 beta_flag = 1;
9078 break;
9079 case 1:
9080 beta = 1.0;
9081 beta_flag = 1;
9082 break;
9083 }
9084
9085
9086 /* varying extra precs */
9087 for (prec_val = 0; prec_val <= 2; prec_val++) {
9088 switch (prec_val) {
9089 case 0:
9090 eps_int = power(2, -BITS_D);
9091 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
9092 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
9093 prec = blas_prec_double;
9094 break;
9095 case 1:
9096 eps_int = power(2, -BITS_D);
9097 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
9098 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
9099 prec = blas_prec_double;
9100 break;
9101 case 2:
9102 default:
9103 eps_int = power(2, -BITS_E);
9104 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
9105 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
9106 prec = blas_prec_extra;
9107 break;
9108 }
9109
9110 /* values near underflow, 1, or overflow */
9111 for (norm = -1; norm <= 1; norm++) {
9112
9113 /* number of tests */
9114 for (i = 0; i < ntests; i++) {
9115
9116 /* row or col major */
9117 for (order_val = 0; order_val < 2; order_val++) {
9118 switch (order_val) {
9119 case 0:
9120 order_type = blas_rowmajor;
9121 break;
9122 case 1:
9123 default:
9124 order_type = blas_colmajor;
9125 break;
9126 }
9127
9128 /* no_trans, trans, or conj_trans */
9129 for (trans_val = 0; trans_val < 3; trans_val++) {
9130 switch (trans_val) {
9131 case 0:
9132 trans_type = blas_no_trans;
9133 m_i = m;
9134 n_i = n;
9135 break;
9136 case 1:
9137 trans_type = blas_trans;
9138 m_i = n;
9139 n_i = m;
9140 break;
9141 case 2:
9142 default:
9143 trans_type = blas_conj_trans;
9144 m_i = n;
9145 n_i = m;
9146 break;
9147 }
9148
9149 /* lda=n, n+1, or 2n */
9150 for (lda_val = 0; lda_val < 3; lda_val++) {
9151 switch (lda_val) {
9152 case 0:
9153 lda = m_i;
9154 break;
9155 case 1:
9156 lda = m_i + 1;
9157 break;
9158 case 2:
9159 default:
9160 lda = 2 * m_i;
9161 break;
9162 }
9163 if ((order_type == blas_rowmajor && lda < n) ||
9164 (order_type == blas_colmajor && lda < m))
9165 continue;
9166
9167 /* For the sake of speed, we throw out this case at random */
9168 if (xrand(seed) >= test_prob)
9169 continue;
9170
9171 /* in the trivial cases, no need to run testgen */
9172 if (m > 0 && n > 0)
9173 BLAS_dgemv2_d_s_testgen(norm, order_type, trans_type, m,
9174 n, &alpha, alpha_flag, A, lda,
9175 head_x_gen, tail_x_gen, &beta,
9176 beta_flag, y_gen, seed,
9177 head_r_true, tail_r_true);
9178
9179 count++;
9180
9181 /* varying incx */
9182 for (incx_val = -2; incx_val <= 2; incx_val++) {
9183 if (incx_val == 0)
9184 continue;
9185
9186 /* setting incx */
9187 incx = incx_val;
9188
9189
9190 scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
9191 scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
9192
9193 /* varying incy */
9194 for (incy_val = -2; incy_val <= 2; incy_val++) {
9195 if (incy_val == 0)
9196 continue;
9197
9198 /* setting incy */
9199 incy = incy_val;
9200
9201
9202 dcopy_vector(y_gen, m_i, 1, y, incy_val);
9203
9204 /* call BLAS_dgemv2_d_s_x */
9205 FPU_FIX_STOP;
9206 BLAS_dgemv2_d_s_x(order_type, trans_type, m, n, alpha,
9207 A, lda, head_x, tail_x, incx_val,
9208 beta, y, incy_val, prec);
9209 FPU_FIX_START;
9210
9211 /* set y starting index */
9212 iy = 0;
9213 if (incy < 0)
9214 iy = -(m_i - 1) * incy;
9215
9216 /* computing the ratio */
9217 if (m > 0 && n > 0)
9218 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
9219 /* copy row j of A to temp */
9220 dge_copy_row(order_type, trans_type, m_i, n_i, A,
9221 lda, temp, j);
9222
9223 test_BLAS_ddot2_d_s(n_i, blas_no_conj, alpha,
9224 beta, y_gen[k], y[iy],
9225 head_r_true[k],
9226 tail_r_true[k], temp, 1,
9227 head_x, tail_x, incx_val,
9228 eps_int, un_int, &ratios[j]);
9229
9230 /* take the max ratio */
9231 if (j == 0) {
9232 ratio = ratios[0];
9233 /* The !<= below causes NaN error to be detected.
9234 Note that (NaN > thresh) is always false. */
9235 } else if (!(ratios[j] <= ratio)) {
9236 ratio = ratios[j];
9237 }
9238 iy += incy;
9239 }
9240
9241 /* Increase the number of bad ratio, if the ratio
9242 is bigger than the threshold.
9243 The !<= below causes NaN error to be detected.
9244 Note that (NaN > thresh) is always false. */
9245 if (!(ratio <= thresh)) {
9246 bad_ratios++;
9247
9248 if ((debug == 3) && /* print only when debug is on */
9249 (count != old_count) && /* print if old vector is different
9250 from the current one */
9251 (d_count == find_max_ratio) &&
9252 (p_count <= max_print) &&
9253 (ratio > 0.5 * ratio_max)) {
9254 old_count = count;
9255
9256 printf
9257 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
9258 fname, m, n, ntests, thresh);
9259
9260 /* Print test info */
9261 switch (prec) {
9262 case blas_prec_single:
9263 printf("single ");
9264 break;
9265 case blas_prec_double:
9266 printf("double ");
9267 break;
9268 case blas_prec_indigenous:
9269 printf("indigenous ");
9270 break;
9271 case blas_prec_extra:
9272 printf("extra ");
9273 break;
9274 }
9275 switch (norm) {
9276 case -1:
9277 printf("near_underflow ");
9278 break;
9279 case 0:
9280 printf("near_one ");
9281 break;
9282 case 1:
9283 printf("near_overflow ");
9284 break;
9285 }
9286 switch (order_type) {
9287 case blas_rowmajor:
9288 printf("row_major ");
9289 break;
9290 case blas_colmajor:
9291 printf("col_major ");
9292 break;
9293 }
9294 switch (trans_type) {
9295 case blas_no_trans:
9296 printf("no_trans ");
9297 break;
9298 case blas_trans:
9299 printf("trans ");
9300 break;
9301 case blas_conj_trans:
9302 printf("conj_trans ");
9303 break;
9304 }
9305
9306 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
9307 incy);
9308
9309 dge_print_matrix(A, m_i, n_i, lda, order_type,
9310 "A");
9311
9312 sprint_vector(head_x, n_i, incx_val, "head_x");
9313 sprint_vector(tail_x, n_i, incx_val, "tail_x");
9314 dprint_vector(y_gen, m_i, 1, "y_gen");
9315 dprint_vector(y, m_i, incy_val, "y_final");
9316
9317 printf(" ");
9318 printf("alpha = ");
9319 printf("%24.16e", alpha);
9320 printf("\n ");
9321 printf("beta = ");
9322 printf("%24.16e", beta);
9323 printf("\n");
9324 for (j = 0, k = 0; j < m_i * incy_gen;
9325 j += incy_gen, k++) {
9326 printf(" ");
9327 printf("[%24.16e, %24.16e]", head_r_true[j],
9328 tail_r_true[j]);
9329 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
9330 }
9331
9332 printf(" ratio=%.4e\n", ratio);
9333 p_count++;
9334 }
9335 if (bad_ratios >= MAX_BAD_TESTS) {
9336 printf("\ntoo many failures, exiting....");
9337 printf("\nTesting and compilation");
9338 printf(" are incomplete\n\n");
9339 goto end;
9340 }
9341 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
9342 printf("\nFlagrant ratio error, exiting...");
9343 printf("\nTesting and compilation");
9344 printf(" are incomplete\n\n");
9345 goto end;
9346 }
9347 }
9348 if (d_count == 0) {
9349 if (ratio > ratio_max)
9350 ratio_max = ratio;
9351
9352 if (ratio != 0.0 && ratio < ratio_min)
9353 ratio_min = ratio;
9354
9355 tot_tests++;
9356 }
9357 } /* incy */
9358 } /* incx */
9359 } /* lda */
9360 } /* trans */
9361 } /* order */
9362 } /* tests */
9363 } /* norm */
9364 } /* prec */
9365 } /* beta */
9366 } /* alpha */
9367 } /* debug */
9368
9369 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
9370 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
9371 fname, m, n, ntests, thresh);
9372 printf
9373 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
9374 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
9375 ratio_min, ratio_max);
9376 }
9377
9378 end:
9379 FPU_FIX_STOP;
9380
9381 blas_free(head_x);
9382 blas_free(tail_x);
9383 blas_free(y);
9384 blas_free(head_x_gen);
9385 blas_free(tail_x_gen);
9386 blas_free(y_gen);
9387 blas_free(temp);
9388 blas_free(A);
9389 blas_free(head_r_true);
9390 blas_free(tail_r_true);
9391 blas_free(ratios);
9392
9393 *min_ratio = ratio_min;
9394 *num_bad_ratio = bad_ratios;
9395 *num_tests = tot_tests;
9396 return ratio_max;
9397 }
do_test_dgemv2_s_d_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)9398 double do_test_dgemv2_s_d_x(int m, int n, int ntests, int *seed,
9399 double thresh, int debug, float test_prob,
9400 double *min_ratio, int *num_bad_ratio,
9401 int *num_tests)
9402
9403 /*
9404 * Purpose
9405 * =======
9406 *
9407 * Runs a series of tests on GEMV2.
9408 *
9409 * Arguments
9410 * =========
9411 *
9412 * m (input) int
9413 * The number of rows
9414 *
9415 * n (input) int
9416 * The number of columns
9417 *
9418 * ntests (input) int
9419 * The number of tests to run for each set of attributes.
9420 *
9421 * seed (input/output) int
9422 * The seed for the random number generator used in testgen().
9423 *
9424 * thresh (input) double
9425 * When the ratio returned from test() exceeds the specified
9426 * threshold, the current size, r_true, r_comp, and ratio will be
9427 * printed. (Since ratio is supposed to be O(1), we can set thresh
9428 * to ~10.)
9429 *
9430 * debug (input) int
9431 * If debug=3, print summary
9432 * If debug=2, print summary only if the number of bad ratios > 0
9433 * If debug=1, print complete info if tests fail
9434 * If debug=0, return max ratio
9435 *
9436 * test_prob (input) float
9437 * The specified test will be performed only if the generated
9438 * random exceeds this threshold.
9439 *
9440 * min_ratio (output) double
9441 * The minimum ratio
9442 *
9443 * num_bad_ratio (output) int
9444 * The number of tests fail; they are above the threshold.
9445 *
9446 * num_tests (output) int
9447 * The number of tests is being performed.
9448 *
9449 * Return value
9450 * ============
9451 *
9452 * The maximum ratio if run successfully, otherwise return -1
9453 *
9454 * Code structure
9455 * ==============
9456 *
9457 * debug loop -- if debug is one, the first loop computes the max ratio
9458 * -- and the last(second) loop outputs debugging information,
9459 * -- if the test fail and its ratio > 0.5 * max ratio.
9460 * -- if debug is zero, the loop is executed once
9461 * alpha loop -- varying alpha: 0, 1, or random
9462 * beta loop -- varying beta: 0, 1, or random
9463 * prec loop -- varying internal prec: single, double, or extra
9464 * norm loop -- varying norm: near undeflow, near one, or
9465 * -- near overflow
9466 * numtest loop -- how many times the test is perform with
9467 * -- above set of attributes
9468 * order loop -- varying order type: rowmajor or colmajor
9469 * trans loop -- varying uplo type: upper or lower
9470 * lda loop -- varying lda: m, m+1, 2m
9471 * incx loop -- varying incx: -2, -1, 1, 2
9472 * incy loop -- varying incy: -2, -1, 1, 2
9473 */
9474 {
9475 /* function name */
9476 const char fname[] = "BLAS_dgemv2_s_d_x";
9477
9478 /* max number of debug lines to print */
9479 const int max_print = 8;
9480
9481 /* Variables in the "x_val" form are loop vars for corresponding
9482 variables */
9483 int i; /* iterate through the repeating tests */
9484 int j, k; /* multipurpose counters or variables */
9485 int iy; /* use to index y */
9486 int incx_val, incy_val, /* for testing different inc values */
9487 incx, incy;
9488 int incy_gen; /* for complex case inc=2, for real case inc=1 */
9489 int d_count; /* counter for debug */
9490 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
9491 int p_count; /* counter for the number of debug lines printed */
9492 int tot_tests; /* total number of tests to be done */
9493 int norm; /* input values of near underflow/one/overflow */
9494 double ratio_max; /* the current maximum ratio */
9495 double ratio_min; /* the current minimum ratio */
9496 double *ratios; /* a temporary variable for calculating ratio */
9497 double ratio; /* the per-use test ratio from test() */
9498 int bad_ratios; /* the number of ratios over the threshold */
9499 double eps_int; /* the internal epsilon expected--2^(-24) for float */
9500 double un_int; /* the internal underflow threshold */
9501 double alpha;
9502 double beta;
9503 float *A;
9504 double *head_x;
9505 double *tail_x;
9506 double *y;
9507 float *temp; /* use for calculating ratio */
9508
9509 /* x_gen and y_gen are used to store vectors generated by testgen.
9510 they eventually are copied back to x and y */
9511 double *head_x_gen;
9512 double *tail_x_gen;
9513 double *y_gen;
9514
9515 /* the true r calculated by testgen(), in double-double */
9516 double *head_r_true, *tail_r_true;
9517 int alpha_val;
9518 int alpha_flag; /* input flag for BLAS_dgemv2_s_d_testgen */
9519 int beta_val;
9520 int beta_flag; /* input flag for BLAS_dgemv2_s_d_testgen */
9521 int order_val;
9522 enum blas_order_type order_type;
9523 int prec_val;
9524 enum blas_prec_type prec;
9525 int trans_val;
9526 enum blas_trans_type trans_type;
9527 int m_i;
9528 int n_i;
9529 int max_mn; /* the max of m and n */
9530 int lda_val;
9531 int lda;
9532 int saved_seed; /* for saving the original seed */
9533 int count, old_count; /* use for counting the number of testgen calls * 2 */
9534
9535 FPU_FIX_DECL;
9536
9537 /* test for bad arguments */
9538 if (n < 0 || m < 0 || ntests < 0)
9539 BLAS_error(fname, 0, 0, NULL);
9540
9541 /* initialization */
9542 *num_bad_ratio = 0;
9543 *num_tests = 0;
9544 *min_ratio = 0.0;
9545
9546 saved_seed = *seed;
9547 ratio_min = 1e308;
9548 ratio_max = 0.0;
9549 ratio = 0.0;
9550 tot_tests = 0;
9551 p_count = 0;
9552 count = 0;
9553 find_max_ratio = 0;
9554 bad_ratios = 0;
9555 old_count = 0;
9556
9557 if (debug == 3)
9558 find_max_ratio = 1;
9559 max_mn = MAX(m, n);
9560 if (m == 0 || n == 0) {
9561 return 0.0;
9562 }
9563
9564 FPU_FIX_START;
9565
9566 incy_gen = 1;
9567
9568
9569 /* get space for calculation */
9570 head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
9571 if (max_mn * 2 > 0 && head_x == NULL) {
9572 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9573 }
9574 tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
9575 if (max_mn * 2 > 0 && tail_x == NULL) {
9576 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9577 }
9578 y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
9579 if (max_mn * 2 > 0 && y == NULL) {
9580 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9581 }
9582 head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
9583 if (max_mn > 0 && head_x_gen == NULL) {
9584 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9585 }
9586 tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
9587 if (max_mn > 0 && tail_x_gen == NULL) {
9588 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9589 }
9590 y_gen = (double *) blas_malloc(max_mn * sizeof(double));
9591 if (max_mn > 0 && y_gen == NULL) {
9592 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9593 }
9594 temp = (float *) blas_malloc(max_mn * sizeof(float));
9595 if (max_mn > 0 && temp == NULL) {
9596 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9597 }
9598 head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
9599 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
9600 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
9601 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9602 }
9603 ratios = (double *) blas_malloc(max_mn * sizeof(double));
9604 if (max_mn > 0 && ratios == NULL) {
9605 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9606 }
9607 A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
9608 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
9609 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9610 }
9611
9612 /* The debug iteration:
9613 If debug=1, then will execute the iteration twice. First, compute the
9614 max ratio. Second, print info if ratio > (50% * ratio_max). */
9615 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
9616 bad_ratios = 0; /* set to zero */
9617
9618 if ((debug == 3) && (d_count == find_max_ratio))
9619 *seed = saved_seed; /* restore the original seed */
9620
9621 /* varying alpha */
9622 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
9623 alpha_flag = 0;
9624 switch (alpha_val) {
9625 case 0:
9626 alpha = 0.0;
9627 alpha_flag = 1;
9628 break;
9629 case 1:
9630 alpha = 1.0;
9631 alpha_flag = 1;
9632 break;
9633 }
9634
9635 /* varying beta */
9636 for (beta_val = 0; beta_val < 3; beta_val++) {
9637 beta_flag = 0;
9638 switch (beta_val) {
9639 case 0:
9640 beta = 0.0;
9641 beta_flag = 1;
9642 break;
9643 case 1:
9644 beta = 1.0;
9645 beta_flag = 1;
9646 break;
9647 }
9648
9649
9650 /* varying extra precs */
9651 for (prec_val = 0; prec_val <= 2; prec_val++) {
9652 switch (prec_val) {
9653 case 0:
9654 eps_int = power(2, -BITS_D);
9655 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
9656 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
9657 prec = blas_prec_double;
9658 break;
9659 case 1:
9660 eps_int = power(2, -BITS_D);
9661 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
9662 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
9663 prec = blas_prec_double;
9664 break;
9665 case 2:
9666 default:
9667 eps_int = power(2, -BITS_E);
9668 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
9669 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
9670 prec = blas_prec_extra;
9671 break;
9672 }
9673
9674 /* values near underflow, 1, or overflow */
9675 for (norm = -1; norm <= 1; norm++) {
9676
9677 /* number of tests */
9678 for (i = 0; i < ntests; i++) {
9679
9680 /* row or col major */
9681 for (order_val = 0; order_val < 2; order_val++) {
9682 switch (order_val) {
9683 case 0:
9684 order_type = blas_rowmajor;
9685 break;
9686 case 1:
9687 default:
9688 order_type = blas_colmajor;
9689 break;
9690 }
9691
9692 /* no_trans, trans, or conj_trans */
9693 for (trans_val = 0; trans_val < 3; trans_val++) {
9694 switch (trans_val) {
9695 case 0:
9696 trans_type = blas_no_trans;
9697 m_i = m;
9698 n_i = n;
9699 break;
9700 case 1:
9701 trans_type = blas_trans;
9702 m_i = n;
9703 n_i = m;
9704 break;
9705 case 2:
9706 default:
9707 trans_type = blas_conj_trans;
9708 m_i = n;
9709 n_i = m;
9710 break;
9711 }
9712
9713 /* lda=n, n+1, or 2n */
9714 for (lda_val = 0; lda_val < 3; lda_val++) {
9715 switch (lda_val) {
9716 case 0:
9717 lda = m_i;
9718 break;
9719 case 1:
9720 lda = m_i + 1;
9721 break;
9722 case 2:
9723 default:
9724 lda = 2 * m_i;
9725 break;
9726 }
9727 if ((order_type == blas_rowmajor && lda < n) ||
9728 (order_type == blas_colmajor && lda < m))
9729 continue;
9730
9731 /* For the sake of speed, we throw out this case at random */
9732 if (xrand(seed) >= test_prob)
9733 continue;
9734
9735 /* in the trivial cases, no need to run testgen */
9736 if (m > 0 && n > 0)
9737 BLAS_dgemv2_s_d_testgen(norm, order_type, trans_type, m,
9738 n, &alpha, alpha_flag, A, lda,
9739 head_x_gen, tail_x_gen, &beta,
9740 beta_flag, y_gen, seed,
9741 head_r_true, tail_r_true);
9742
9743 count++;
9744
9745 /* varying incx */
9746 for (incx_val = -2; incx_val <= 2; incx_val++) {
9747 if (incx_val == 0)
9748 continue;
9749
9750 /* setting incx */
9751 incx = incx_val;
9752
9753
9754 dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
9755 dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
9756
9757 /* varying incy */
9758 for (incy_val = -2; incy_val <= 2; incy_val++) {
9759 if (incy_val == 0)
9760 continue;
9761
9762 /* setting incy */
9763 incy = incy_val;
9764
9765
9766 dcopy_vector(y_gen, m_i, 1, y, incy_val);
9767
9768 /* call BLAS_dgemv2_s_d_x */
9769 FPU_FIX_STOP;
9770 BLAS_dgemv2_s_d_x(order_type, trans_type, m, n, alpha,
9771 A, lda, head_x, tail_x, incx_val,
9772 beta, y, incy_val, prec);
9773 FPU_FIX_START;
9774
9775 /* set y starting index */
9776 iy = 0;
9777 if (incy < 0)
9778 iy = -(m_i - 1) * incy;
9779
9780 /* computing the ratio */
9781 if (m > 0 && n > 0)
9782 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
9783 /* copy row j of A to temp */
9784 sge_copy_row(order_type, trans_type, m_i, n_i, A,
9785 lda, temp, j);
9786
9787 test_BLAS_ddot2_s_d(n_i, blas_no_conj, alpha,
9788 beta, y_gen[k], y[iy],
9789 head_r_true[k],
9790 tail_r_true[k], temp, 1,
9791 head_x, tail_x, incx_val,
9792 eps_int, un_int, &ratios[j]);
9793
9794 /* take the max ratio */
9795 if (j == 0) {
9796 ratio = ratios[0];
9797 /* The !<= below causes NaN error to be detected.
9798 Note that (NaN > thresh) is always false. */
9799 } else if (!(ratios[j] <= ratio)) {
9800 ratio = ratios[j];
9801 }
9802 iy += incy;
9803 }
9804
9805 /* Increase the number of bad ratio, if the ratio
9806 is bigger than the threshold.
9807 The !<= below causes NaN error to be detected.
9808 Note that (NaN > thresh) is always false. */
9809 if (!(ratio <= thresh)) {
9810 bad_ratios++;
9811
9812 if ((debug == 3) && /* print only when debug is on */
9813 (count != old_count) && /* print if old vector is different
9814 from the current one */
9815 (d_count == find_max_ratio) &&
9816 (p_count <= max_print) &&
9817 (ratio > 0.5 * ratio_max)) {
9818 old_count = count;
9819
9820 printf
9821 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
9822 fname, m, n, ntests, thresh);
9823
9824 /* Print test info */
9825 switch (prec) {
9826 case blas_prec_single:
9827 printf("single ");
9828 break;
9829 case blas_prec_double:
9830 printf("double ");
9831 break;
9832 case blas_prec_indigenous:
9833 printf("indigenous ");
9834 break;
9835 case blas_prec_extra:
9836 printf("extra ");
9837 break;
9838 }
9839 switch (norm) {
9840 case -1:
9841 printf("near_underflow ");
9842 break;
9843 case 0:
9844 printf("near_one ");
9845 break;
9846 case 1:
9847 printf("near_overflow ");
9848 break;
9849 }
9850 switch (order_type) {
9851 case blas_rowmajor:
9852 printf("row_major ");
9853 break;
9854 case blas_colmajor:
9855 printf("col_major ");
9856 break;
9857 }
9858 switch (trans_type) {
9859 case blas_no_trans:
9860 printf("no_trans ");
9861 break;
9862 case blas_trans:
9863 printf("trans ");
9864 break;
9865 case blas_conj_trans:
9866 printf("conj_trans ");
9867 break;
9868 }
9869
9870 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
9871 incy);
9872
9873 sge_print_matrix(A, m_i, n_i, lda, order_type,
9874 "A");
9875
9876 dprint_vector(head_x, n_i, incx_val, "head_x");
9877 dprint_vector(tail_x, n_i, incx_val, "tail_x");
9878 dprint_vector(y_gen, m_i, 1, "y_gen");
9879 dprint_vector(y, m_i, incy_val, "y_final");
9880
9881 printf(" ");
9882 printf("alpha = ");
9883 printf("%24.16e", alpha);
9884 printf("\n ");
9885 printf("beta = ");
9886 printf("%24.16e", beta);
9887 printf("\n");
9888 for (j = 0, k = 0; j < m_i * incy_gen;
9889 j += incy_gen, k++) {
9890 printf(" ");
9891 printf("[%24.16e, %24.16e]", head_r_true[j],
9892 tail_r_true[j]);
9893 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
9894 }
9895
9896 printf(" ratio=%.4e\n", ratio);
9897 p_count++;
9898 }
9899 if (bad_ratios >= MAX_BAD_TESTS) {
9900 printf("\ntoo many failures, exiting....");
9901 printf("\nTesting and compilation");
9902 printf(" are incomplete\n\n");
9903 goto end;
9904 }
9905 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
9906 printf("\nFlagrant ratio error, exiting...");
9907 printf("\nTesting and compilation");
9908 printf(" are incomplete\n\n");
9909 goto end;
9910 }
9911 }
9912 if (d_count == 0) {
9913 if (ratio > ratio_max)
9914 ratio_max = ratio;
9915
9916 if (ratio != 0.0 && ratio < ratio_min)
9917 ratio_min = ratio;
9918
9919 tot_tests++;
9920 }
9921 } /* incy */
9922 } /* incx */
9923 } /* lda */
9924 } /* trans */
9925 } /* order */
9926 } /* tests */
9927 } /* norm */
9928 } /* prec */
9929 } /* beta */
9930 } /* alpha */
9931 } /* debug */
9932
9933 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
9934 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
9935 fname, m, n, ntests, thresh);
9936 printf
9937 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
9938 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
9939 ratio_min, ratio_max);
9940 }
9941
9942 end:
9943 FPU_FIX_STOP;
9944
9945 blas_free(head_x);
9946 blas_free(tail_x);
9947 blas_free(y);
9948 blas_free(head_x_gen);
9949 blas_free(tail_x_gen);
9950 blas_free(y_gen);
9951 blas_free(temp);
9952 blas_free(A);
9953 blas_free(head_r_true);
9954 blas_free(tail_r_true);
9955 blas_free(ratios);
9956
9957 *min_ratio = ratio_min;
9958 *num_bad_ratio = bad_ratios;
9959 *num_tests = tot_tests;
9960 return ratio_max;
9961 }
do_test_dgemv2_s_s_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)9962 double do_test_dgemv2_s_s_x(int m, int n, int ntests, int *seed,
9963 double thresh, int debug, float test_prob,
9964 double *min_ratio, int *num_bad_ratio,
9965 int *num_tests)
9966
9967 /*
9968 * Purpose
9969 * =======
9970 *
9971 * Runs a series of tests on GEMV2.
9972 *
9973 * Arguments
9974 * =========
9975 *
9976 * m (input) int
9977 * The number of rows
9978 *
9979 * n (input) int
9980 * The number of columns
9981 *
9982 * ntests (input) int
9983 * The number of tests to run for each set of attributes.
9984 *
9985 * seed (input/output) int
9986 * The seed for the random number generator used in testgen().
9987 *
9988 * thresh (input) double
9989 * When the ratio returned from test() exceeds the specified
9990 * threshold, the current size, r_true, r_comp, and ratio will be
9991 * printed. (Since ratio is supposed to be O(1), we can set thresh
9992 * to ~10.)
9993 *
9994 * debug (input) int
9995 * If debug=3, print summary
9996 * If debug=2, print summary only if the number of bad ratios > 0
9997 * If debug=1, print complete info if tests fail
9998 * If debug=0, return max ratio
9999 *
10000 * test_prob (input) float
10001 * The specified test will be performed only if the generated
10002 * random exceeds this threshold.
10003 *
10004 * min_ratio (output) double
10005 * The minimum ratio
10006 *
10007 * num_bad_ratio (output) int
10008 * The number of tests fail; they are above the threshold.
10009 *
10010 * num_tests (output) int
10011 * The number of tests is being performed.
10012 *
10013 * Return value
10014 * ============
10015 *
10016 * The maximum ratio if run successfully, otherwise return -1
10017 *
10018 * Code structure
10019 * ==============
10020 *
10021 * debug loop -- if debug is one, the first loop computes the max ratio
10022 * -- and the last(second) loop outputs debugging information,
10023 * -- if the test fail and its ratio > 0.5 * max ratio.
10024 * -- if debug is zero, the loop is executed once
10025 * alpha loop -- varying alpha: 0, 1, or random
10026 * beta loop -- varying beta: 0, 1, or random
10027 * prec loop -- varying internal prec: single, double, or extra
10028 * norm loop -- varying norm: near undeflow, near one, or
10029 * -- near overflow
10030 * numtest loop -- how many times the test is perform with
10031 * -- above set of attributes
10032 * order loop -- varying order type: rowmajor or colmajor
10033 * trans loop -- varying uplo type: upper or lower
10034 * lda loop -- varying lda: m, m+1, 2m
10035 * incx loop -- varying incx: -2, -1, 1, 2
10036 * incy loop -- varying incy: -2, -1, 1, 2
10037 */
10038 {
10039 /* function name */
10040 const char fname[] = "BLAS_dgemv2_s_s_x";
10041
10042 /* max number of debug lines to print */
10043 const int max_print = 8;
10044
10045 /* Variables in the "x_val" form are loop vars for corresponding
10046 variables */
10047 int i; /* iterate through the repeating tests */
10048 int j, k; /* multipurpose counters or variables */
10049 int iy; /* use to index y */
10050 int incx_val, incy_val, /* for testing different inc values */
10051 incx, incy;
10052 int incy_gen; /* for complex case inc=2, for real case inc=1 */
10053 int d_count; /* counter for debug */
10054 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
10055 int p_count; /* counter for the number of debug lines printed */
10056 int tot_tests; /* total number of tests to be done */
10057 int norm; /* input values of near underflow/one/overflow */
10058 double ratio_max; /* the current maximum ratio */
10059 double ratio_min; /* the current minimum ratio */
10060 double *ratios; /* a temporary variable for calculating ratio */
10061 double ratio; /* the per-use test ratio from test() */
10062 int bad_ratios; /* the number of ratios over the threshold */
10063 double eps_int; /* the internal epsilon expected--2^(-24) for float */
10064 double un_int; /* the internal underflow threshold */
10065 double alpha;
10066 double beta;
10067 float *A;
10068 float *head_x;
10069 float *tail_x;
10070 double *y;
10071 float *temp; /* use for calculating ratio */
10072
10073 /* x_gen and y_gen are used to store vectors generated by testgen.
10074 they eventually are copied back to x and y */
10075 float *head_x_gen;
10076 float *tail_x_gen;
10077 double *y_gen;
10078
10079 /* the true r calculated by testgen(), in double-double */
10080 double *head_r_true, *tail_r_true;
10081 int alpha_val;
10082 int alpha_flag; /* input flag for BLAS_dgemv2_s_s_testgen */
10083 int beta_val;
10084 int beta_flag; /* input flag for BLAS_dgemv2_s_s_testgen */
10085 int order_val;
10086 enum blas_order_type order_type;
10087 int prec_val;
10088 enum blas_prec_type prec;
10089 int trans_val;
10090 enum blas_trans_type trans_type;
10091 int m_i;
10092 int n_i;
10093 int max_mn; /* the max of m and n */
10094 int lda_val;
10095 int lda;
10096 int saved_seed; /* for saving the original seed */
10097 int count, old_count; /* use for counting the number of testgen calls * 2 */
10098
10099 FPU_FIX_DECL;
10100
10101 /* test for bad arguments */
10102 if (n < 0 || m < 0 || ntests < 0)
10103 BLAS_error(fname, 0, 0, NULL);
10104
10105 /* initialization */
10106 *num_bad_ratio = 0;
10107 *num_tests = 0;
10108 *min_ratio = 0.0;
10109
10110 saved_seed = *seed;
10111 ratio_min = 1e308;
10112 ratio_max = 0.0;
10113 ratio = 0.0;
10114 tot_tests = 0;
10115 p_count = 0;
10116 count = 0;
10117 find_max_ratio = 0;
10118 bad_ratios = 0;
10119 old_count = 0;
10120
10121 if (debug == 3)
10122 find_max_ratio = 1;
10123 max_mn = MAX(m, n);
10124 if (m == 0 || n == 0) {
10125 return 0.0;
10126 }
10127
10128 FPU_FIX_START;
10129
10130 incy_gen = 1;
10131
10132
10133 /* get space for calculation */
10134 head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
10135 if (max_mn * 2 > 0 && head_x == NULL) {
10136 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10137 }
10138 tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
10139 if (max_mn * 2 > 0 && tail_x == NULL) {
10140 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10141 }
10142 y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
10143 if (max_mn * 2 > 0 && y == NULL) {
10144 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10145 }
10146 head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
10147 if (max_mn > 0 && head_x_gen == NULL) {
10148 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10149 }
10150 tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
10151 if (max_mn > 0 && tail_x_gen == NULL) {
10152 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10153 }
10154 y_gen = (double *) blas_malloc(max_mn * sizeof(double));
10155 if (max_mn > 0 && y_gen == NULL) {
10156 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10157 }
10158 temp = (float *) blas_malloc(max_mn * sizeof(float));
10159 if (max_mn > 0 && temp == NULL) {
10160 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10161 }
10162 head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
10163 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
10164 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
10165 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10166 }
10167 ratios = (double *) blas_malloc(max_mn * sizeof(double));
10168 if (max_mn > 0 && ratios == NULL) {
10169 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10170 }
10171 A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
10172 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
10173 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10174 }
10175
10176 /* The debug iteration:
10177 If debug=1, then will execute the iteration twice. First, compute the
10178 max ratio. Second, print info if ratio > (50% * ratio_max). */
10179 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
10180 bad_ratios = 0; /* set to zero */
10181
10182 if ((debug == 3) && (d_count == find_max_ratio))
10183 *seed = saved_seed; /* restore the original seed */
10184
10185 /* varying alpha */
10186 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
10187 alpha_flag = 0;
10188 switch (alpha_val) {
10189 case 0:
10190 alpha = 0.0;
10191 alpha_flag = 1;
10192 break;
10193 case 1:
10194 alpha = 1.0;
10195 alpha_flag = 1;
10196 break;
10197 }
10198
10199 /* varying beta */
10200 for (beta_val = 0; beta_val < 3; beta_val++) {
10201 beta_flag = 0;
10202 switch (beta_val) {
10203 case 0:
10204 beta = 0.0;
10205 beta_flag = 1;
10206 break;
10207 case 1:
10208 beta = 1.0;
10209 beta_flag = 1;
10210 break;
10211 }
10212
10213
10214 /* varying extra precs */
10215 for (prec_val = 0; prec_val <= 2; prec_val++) {
10216 switch (prec_val) {
10217 case 0:
10218 eps_int = power(2, -BITS_D);
10219 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
10220 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
10221 prec = blas_prec_double;
10222 break;
10223 case 1:
10224 eps_int = power(2, -BITS_D);
10225 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
10226 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
10227 prec = blas_prec_double;
10228 break;
10229 case 2:
10230 default:
10231 eps_int = power(2, -BITS_E);
10232 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
10233 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
10234 prec = blas_prec_extra;
10235 break;
10236 }
10237
10238 /* values near underflow, 1, or overflow */
10239 for (norm = -1; norm <= 1; norm++) {
10240
10241 /* number of tests */
10242 for (i = 0; i < ntests; i++) {
10243
10244 /* row or col major */
10245 for (order_val = 0; order_val < 2; order_val++) {
10246 switch (order_val) {
10247 case 0:
10248 order_type = blas_rowmajor;
10249 break;
10250 case 1:
10251 default:
10252 order_type = blas_colmajor;
10253 break;
10254 }
10255
10256 /* no_trans, trans, or conj_trans */
10257 for (trans_val = 0; trans_val < 3; trans_val++) {
10258 switch (trans_val) {
10259 case 0:
10260 trans_type = blas_no_trans;
10261 m_i = m;
10262 n_i = n;
10263 break;
10264 case 1:
10265 trans_type = blas_trans;
10266 m_i = n;
10267 n_i = m;
10268 break;
10269 case 2:
10270 default:
10271 trans_type = blas_conj_trans;
10272 m_i = n;
10273 n_i = m;
10274 break;
10275 }
10276
10277 /* lda=n, n+1, or 2n */
10278 for (lda_val = 0; lda_val < 3; lda_val++) {
10279 switch (lda_val) {
10280 case 0:
10281 lda = m_i;
10282 break;
10283 case 1:
10284 lda = m_i + 1;
10285 break;
10286 case 2:
10287 default:
10288 lda = 2 * m_i;
10289 break;
10290 }
10291 if ((order_type == blas_rowmajor && lda < n) ||
10292 (order_type == blas_colmajor && lda < m))
10293 continue;
10294
10295 /* For the sake of speed, we throw out this case at random */
10296 if (xrand(seed) >= test_prob)
10297 continue;
10298
10299 /* in the trivial cases, no need to run testgen */
10300 if (m > 0 && n > 0)
10301 BLAS_dgemv2_s_s_testgen(norm, order_type, trans_type, m,
10302 n, &alpha, alpha_flag, A, lda,
10303 head_x_gen, tail_x_gen, &beta,
10304 beta_flag, y_gen, seed,
10305 head_r_true, tail_r_true);
10306
10307 count++;
10308
10309 /* varying incx */
10310 for (incx_val = -2; incx_val <= 2; incx_val++) {
10311 if (incx_val == 0)
10312 continue;
10313
10314 /* setting incx */
10315 incx = incx_val;
10316
10317
10318 scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
10319 scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
10320
10321 /* varying incy */
10322 for (incy_val = -2; incy_val <= 2; incy_val++) {
10323 if (incy_val == 0)
10324 continue;
10325
10326 /* setting incy */
10327 incy = incy_val;
10328
10329
10330 dcopy_vector(y_gen, m_i, 1, y, incy_val);
10331
10332 /* call BLAS_dgemv2_s_s_x */
10333 FPU_FIX_STOP;
10334 BLAS_dgemv2_s_s_x(order_type, trans_type, m, n, alpha,
10335 A, lda, head_x, tail_x, incx_val,
10336 beta, y, incy_val, prec);
10337 FPU_FIX_START;
10338
10339 /* set y starting index */
10340 iy = 0;
10341 if (incy < 0)
10342 iy = -(m_i - 1) * incy;
10343
10344 /* computing the ratio */
10345 if (m > 0 && n > 0)
10346 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
10347 /* copy row j of A to temp */
10348 sge_copy_row(order_type, trans_type, m_i, n_i, A,
10349 lda, temp, j);
10350
10351 test_BLAS_ddot2_s_s(n_i, blas_no_conj, alpha,
10352 beta, y_gen[k], y[iy],
10353 head_r_true[k],
10354 tail_r_true[k], temp, 1,
10355 head_x, tail_x, incx_val,
10356 eps_int, un_int, &ratios[j]);
10357
10358 /* take the max ratio */
10359 if (j == 0) {
10360 ratio = ratios[0];
10361 /* The !<= below causes NaN error to be detected.
10362 Note that (NaN > thresh) is always false. */
10363 } else if (!(ratios[j] <= ratio)) {
10364 ratio = ratios[j];
10365 }
10366 iy += incy;
10367 }
10368
10369 /* Increase the number of bad ratio, if the ratio
10370 is bigger than the threshold.
10371 The !<= below causes NaN error to be detected.
10372 Note that (NaN > thresh) is always false. */
10373 if (!(ratio <= thresh)) {
10374 bad_ratios++;
10375
10376 if ((debug == 3) && /* print only when debug is on */
10377 (count != old_count) && /* print if old vector is different
10378 from the current one */
10379 (d_count == find_max_ratio) &&
10380 (p_count <= max_print) &&
10381 (ratio > 0.5 * ratio_max)) {
10382 old_count = count;
10383
10384 printf
10385 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
10386 fname, m, n, ntests, thresh);
10387
10388 /* Print test info */
10389 switch (prec) {
10390 case blas_prec_single:
10391 printf("single ");
10392 break;
10393 case blas_prec_double:
10394 printf("double ");
10395 break;
10396 case blas_prec_indigenous:
10397 printf("indigenous ");
10398 break;
10399 case blas_prec_extra:
10400 printf("extra ");
10401 break;
10402 }
10403 switch (norm) {
10404 case -1:
10405 printf("near_underflow ");
10406 break;
10407 case 0:
10408 printf("near_one ");
10409 break;
10410 case 1:
10411 printf("near_overflow ");
10412 break;
10413 }
10414 switch (order_type) {
10415 case blas_rowmajor:
10416 printf("row_major ");
10417 break;
10418 case blas_colmajor:
10419 printf("col_major ");
10420 break;
10421 }
10422 switch (trans_type) {
10423 case blas_no_trans:
10424 printf("no_trans ");
10425 break;
10426 case blas_trans:
10427 printf("trans ");
10428 break;
10429 case blas_conj_trans:
10430 printf("conj_trans ");
10431 break;
10432 }
10433
10434 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
10435 incy);
10436
10437 sge_print_matrix(A, m_i, n_i, lda, order_type,
10438 "A");
10439
10440 sprint_vector(head_x, n_i, incx_val, "head_x");
10441 sprint_vector(tail_x, n_i, incx_val, "tail_x");
10442 dprint_vector(y_gen, m_i, 1, "y_gen");
10443 dprint_vector(y, m_i, incy_val, "y_final");
10444
10445 printf(" ");
10446 printf("alpha = ");
10447 printf("%24.16e", alpha);
10448 printf("\n ");
10449 printf("beta = ");
10450 printf("%24.16e", beta);
10451 printf("\n");
10452 for (j = 0, k = 0; j < m_i * incy_gen;
10453 j += incy_gen, k++) {
10454 printf(" ");
10455 printf("[%24.16e, %24.16e]", head_r_true[j],
10456 tail_r_true[j]);
10457 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
10458 }
10459
10460 printf(" ratio=%.4e\n", ratio);
10461 p_count++;
10462 }
10463 if (bad_ratios >= MAX_BAD_TESTS) {
10464 printf("\ntoo many failures, exiting....");
10465 printf("\nTesting and compilation");
10466 printf(" are incomplete\n\n");
10467 goto end;
10468 }
10469 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
10470 printf("\nFlagrant ratio error, exiting...");
10471 printf("\nTesting and compilation");
10472 printf(" are incomplete\n\n");
10473 goto end;
10474 }
10475 }
10476 if (d_count == 0) {
10477 if (ratio > ratio_max)
10478 ratio_max = ratio;
10479
10480 if (ratio != 0.0 && ratio < ratio_min)
10481 ratio_min = ratio;
10482
10483 tot_tests++;
10484 }
10485 } /* incy */
10486 } /* incx */
10487 } /* lda */
10488 } /* trans */
10489 } /* order */
10490 } /* tests */
10491 } /* norm */
10492 } /* prec */
10493 } /* beta */
10494 } /* alpha */
10495 } /* debug */
10496
10497 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
10498 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
10499 fname, m, n, ntests, thresh);
10500 printf
10501 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
10502 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
10503 ratio_min, ratio_max);
10504 }
10505
10506 end:
10507 FPU_FIX_STOP;
10508
10509 blas_free(head_x);
10510 blas_free(tail_x);
10511 blas_free(y);
10512 blas_free(head_x_gen);
10513 blas_free(tail_x_gen);
10514 blas_free(y_gen);
10515 blas_free(temp);
10516 blas_free(A);
10517 blas_free(head_r_true);
10518 blas_free(tail_r_true);
10519 blas_free(ratios);
10520
10521 *min_ratio = ratio_min;
10522 *num_bad_ratio = bad_ratios;
10523 *num_tests = tot_tests;
10524 return ratio_max;
10525 }
do_test_zgemv2_z_c_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)10526 double do_test_zgemv2_z_c_x(int m, int n, int ntests, int *seed,
10527 double thresh, int debug, float test_prob,
10528 double *min_ratio, int *num_bad_ratio,
10529 int *num_tests)
10530
10531 /*
10532 * Purpose
10533 * =======
10534 *
10535 * Runs a series of tests on GEMV2.
10536 *
10537 * Arguments
10538 * =========
10539 *
10540 * m (input) int
10541 * The number of rows
10542 *
10543 * n (input) int
10544 * The number of columns
10545 *
10546 * ntests (input) int
10547 * The number of tests to run for each set of attributes.
10548 *
10549 * seed (input/output) int
10550 * The seed for the random number generator used in testgen().
10551 *
10552 * thresh (input) double
10553 * When the ratio returned from test() exceeds the specified
10554 * threshold, the current size, r_true, r_comp, and ratio will be
10555 * printed. (Since ratio is supposed to be O(1), we can set thresh
10556 * to ~10.)
10557 *
10558 * debug (input) int
10559 * If debug=3, print summary
10560 * If debug=2, print summary only if the number of bad ratios > 0
10561 * If debug=1, print complete info if tests fail
10562 * If debug=0, return max ratio
10563 *
10564 * test_prob (input) float
10565 * The specified test will be performed only if the generated
10566 * random exceeds this threshold.
10567 *
10568 * min_ratio (output) double
10569 * The minimum ratio
10570 *
10571 * num_bad_ratio (output) int
10572 * The number of tests fail; they are above the threshold.
10573 *
10574 * num_tests (output) int
10575 * The number of tests is being performed.
10576 *
10577 * Return value
10578 * ============
10579 *
10580 * The maximum ratio if run successfully, otherwise return -1
10581 *
10582 * Code structure
10583 * ==============
10584 *
10585 * debug loop -- if debug is one, the first loop computes the max ratio
10586 * -- and the last(second) loop outputs debugging information,
10587 * -- if the test fail and its ratio > 0.5 * max ratio.
10588 * -- if debug is zero, the loop is executed once
10589 * alpha loop -- varying alpha: 0, 1, or random
10590 * beta loop -- varying beta: 0, 1, or random
10591 * prec loop -- varying internal prec: single, double, or extra
10592 * norm loop -- varying norm: near undeflow, near one, or
10593 * -- near overflow
10594 * numtest loop -- how many times the test is perform with
10595 * -- above set of attributes
10596 * order loop -- varying order type: rowmajor or colmajor
10597 * trans loop -- varying uplo type: upper or lower
10598 * lda loop -- varying lda: m, m+1, 2m
10599 * incx loop -- varying incx: -2, -1, 1, 2
10600 * incy loop -- varying incy: -2, -1, 1, 2
10601 */
10602 {
10603 /* function name */
10604 const char fname[] = "BLAS_zgemv2_z_c_x";
10605
10606 /* max number of debug lines to print */
10607 const int max_print = 8;
10608
10609 /* Variables in the "x_val" form are loop vars for corresponding
10610 variables */
10611 int i; /* iterate through the repeating tests */
10612 int j, k; /* multipurpose counters or variables */
10613 int iy; /* use to index y */
10614 int incx_val, incy_val, /* for testing different inc values */
10615 incx, incy;
10616 int incy_gen; /* for complex case inc=2, for real case inc=1 */
10617 int d_count; /* counter for debug */
10618 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
10619 int p_count; /* counter for the number of debug lines printed */
10620 int tot_tests; /* total number of tests to be done */
10621 int norm; /* input values of near underflow/one/overflow */
10622 double ratio_max; /* the current maximum ratio */
10623 double ratio_min; /* the current minimum ratio */
10624 double *ratios; /* a temporary variable for calculating ratio */
10625 double ratio; /* the per-use test ratio from test() */
10626 int bad_ratios; /* the number of ratios over the threshold */
10627 double eps_int; /* the internal epsilon expected--2^(-24) for float */
10628 double un_int; /* the internal underflow threshold */
10629 double alpha[2];
10630 double beta[2];
10631 double *A;
10632 float *head_x;
10633 float *tail_x;
10634 double *y;
10635 double *temp; /* use for calculating ratio */
10636
10637 /* x_gen and y_gen are used to store vectors generated by testgen.
10638 they eventually are copied back to x and y */
10639 float *head_x_gen;
10640 float *tail_x_gen;
10641 double *y_gen;
10642
10643 /* the true r calculated by testgen(), in double-double */
10644 double *head_r_true, *tail_r_true;
10645
10646 int alpha_val;
10647 int alpha_flag; /* input flag for BLAS_zgemv2_z_c_testgen */
10648 int beta_val;
10649 int beta_flag; /* input flag for BLAS_zgemv2_z_c_testgen */
10650 int order_val;
10651 enum blas_order_type order_type;
10652 int prec_val;
10653 enum blas_prec_type prec;
10654 int trans_val;
10655 enum blas_trans_type trans_type;
10656 int m_i;
10657 int n_i;
10658 int max_mn; /* the max of m and n */
10659 int lda_val;
10660 int lda;
10661 int saved_seed; /* for saving the original seed */
10662 int count, old_count; /* use for counting the number of testgen calls * 2 */
10663
10664 FPU_FIX_DECL;
10665
10666 /* test for bad arguments */
10667 if (n < 0 || m < 0 || ntests < 0)
10668 BLAS_error(fname, 0, 0, NULL);
10669
10670 /* initialization */
10671 *num_bad_ratio = 0;
10672 *num_tests = 0;
10673 *min_ratio = 0.0;
10674
10675 saved_seed = *seed;
10676 ratio_min = 1e308;
10677 ratio_max = 0.0;
10678 ratio = 0.0;
10679 tot_tests = 0;
10680 p_count = 0;
10681 count = 0;
10682 find_max_ratio = 0;
10683 bad_ratios = 0;
10684 old_count = 0;
10685
10686 if (debug == 3)
10687 find_max_ratio = 1;
10688 max_mn = MAX(m, n);
10689 if (m == 0 || n == 0) {
10690 return 0.0;
10691 }
10692
10693 FPU_FIX_START;
10694
10695 incy_gen = 1;
10696 incy_gen *= 2;
10697
10698 /* get space for calculation */
10699 head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
10700 if (max_mn * 2 > 0 && head_x == NULL) {
10701 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10702 }
10703 tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
10704 if (max_mn * 2 > 0 && tail_x == NULL) {
10705 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10706 }
10707 y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
10708 if (max_mn * 2 > 0 && y == NULL) {
10709 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10710 }
10711 head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
10712 if (max_mn > 0 && head_x_gen == NULL) {
10713 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10714 }
10715 tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
10716 if (max_mn > 0 && tail_x_gen == NULL) {
10717 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10718 }
10719 y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
10720 if (max_mn > 0 && y_gen == NULL) {
10721 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10722 }
10723 temp = (double *) blas_malloc(max_mn * sizeof(double) * 2);
10724 if (max_mn > 0 && temp == NULL) {
10725 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10726 }
10727 head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
10728 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
10729 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
10730 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10731 }
10732 ratios = (double *) blas_malloc(max_mn * sizeof(double));
10733 if (max_mn > 0 && ratios == NULL) {
10734 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10735 }
10736 A =
10737 (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double) *
10738 2);
10739 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
10740 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10741 }
10742
10743 /* The debug iteration:
10744 If debug=1, then will execute the iteration twice. First, compute the
10745 max ratio. Second, print info if ratio > (50% * ratio_max). */
10746 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
10747 bad_ratios = 0; /* set to zero */
10748
10749 if ((debug == 3) && (d_count == find_max_ratio))
10750 *seed = saved_seed; /* restore the original seed */
10751
10752 /* varying alpha */
10753 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
10754 alpha_flag = 0;
10755 switch (alpha_val) {
10756 case 0:
10757 alpha[0] = alpha[1] = 0.0;
10758 alpha_flag = 1;
10759 break;
10760 case 1:
10761 alpha[0] = 1.0;
10762 alpha[1] = 0.0;
10763 alpha_flag = 1;
10764 break;
10765 }
10766
10767 /* varying beta */
10768 for (beta_val = 0; beta_val < 3; beta_val++) {
10769 beta_flag = 0;
10770 switch (beta_val) {
10771 case 0:
10772 beta[0] = beta[1] = 0.0;
10773 beta_flag = 1;
10774 break;
10775 case 1:
10776 beta[0] = 1.0;
10777 beta[1] = 0.0;
10778 beta_flag = 1;
10779 break;
10780 }
10781
10782
10783 /* varying extra precs */
10784 for (prec_val = 0; prec_val <= 2; prec_val++) {
10785 switch (prec_val) {
10786 case 0:
10787 eps_int = power(2, -BITS_D);
10788 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
10789 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
10790 prec = blas_prec_double;
10791 break;
10792 case 1:
10793 eps_int = power(2, -BITS_D);
10794 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
10795 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
10796 prec = blas_prec_double;
10797 break;
10798 case 2:
10799 default:
10800 eps_int = power(2, -BITS_E);
10801 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
10802 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
10803 prec = blas_prec_extra;
10804 break;
10805 }
10806
10807 /* values near underflow, 1, or overflow */
10808 for (norm = -1; norm <= 1; norm++) {
10809
10810 /* number of tests */
10811 for (i = 0; i < ntests; i++) {
10812
10813 /* row or col major */
10814 for (order_val = 0; order_val < 2; order_val++) {
10815 switch (order_val) {
10816 case 0:
10817 order_type = blas_rowmajor;
10818 break;
10819 case 1:
10820 default:
10821 order_type = blas_colmajor;
10822 break;
10823 }
10824
10825 /* no_trans, trans, or conj_trans */
10826 for (trans_val = 0; trans_val < 3; trans_val++) {
10827 switch (trans_val) {
10828 case 0:
10829 trans_type = blas_no_trans;
10830 m_i = m;
10831 n_i = n;
10832 break;
10833 case 1:
10834 trans_type = blas_trans;
10835 m_i = n;
10836 n_i = m;
10837 break;
10838 case 2:
10839 default:
10840 trans_type = blas_conj_trans;
10841 m_i = n;
10842 n_i = m;
10843 break;
10844 }
10845
10846 /* lda=n, n+1, or 2n */
10847 for (lda_val = 0; lda_val < 3; lda_val++) {
10848 switch (lda_val) {
10849 case 0:
10850 lda = m_i;
10851 break;
10852 case 1:
10853 lda = m_i + 1;
10854 break;
10855 case 2:
10856 default:
10857 lda = 2 * m_i;
10858 break;
10859 }
10860 if ((order_type == blas_rowmajor && lda < n) ||
10861 (order_type == blas_colmajor && lda < m))
10862 continue;
10863
10864 /* For the sake of speed, we throw out this case at random */
10865 if (xrand(seed) >= test_prob)
10866 continue;
10867
10868 /* in the trivial cases, no need to run testgen */
10869 if (m > 0 && n > 0)
10870 BLAS_zgemv2_z_c_testgen(norm, order_type, trans_type, m,
10871 n, &alpha, alpha_flag, A, lda,
10872 head_x_gen, tail_x_gen, &beta,
10873 beta_flag, y_gen, seed,
10874 head_r_true, tail_r_true);
10875
10876 count++;
10877
10878 /* varying incx */
10879 for (incx_val = -2; incx_val <= 2; incx_val++) {
10880 if (incx_val == 0)
10881 continue;
10882
10883 /* setting incx */
10884 incx = incx_val;
10885 incx *= 2;
10886
10887 ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
10888 ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
10889
10890 /* varying incy */
10891 for (incy_val = -2; incy_val <= 2; incy_val++) {
10892 if (incy_val == 0)
10893 continue;
10894
10895 /* setting incy */
10896 incy = incy_val;
10897 incy *= 2;
10898
10899 zcopy_vector(y_gen, m_i, 1, y, incy_val);
10900
10901 /* call BLAS_zgemv2_z_c_x */
10902 FPU_FIX_STOP;
10903 BLAS_zgemv2_z_c_x(order_type, trans_type, m, n, alpha,
10904 A, lda, head_x, tail_x, incx_val,
10905 beta, y, incy_val, prec);
10906 FPU_FIX_START;
10907
10908 /* set y starting index */
10909 iy = 0;
10910 if (incy < 0)
10911 iy = -(m_i - 1) * incy;
10912
10913 /* computing the ratio */
10914 if (m > 0 && n > 0)
10915 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
10916 /* copy row j of A to temp */
10917 zge_copy_row(order_type, trans_type, m_i, n_i, A,
10918 lda, temp, j);
10919
10920 test_BLAS_zdot2_z_c(n_i, blas_no_conj, alpha,
10921 beta, &y_gen[k], &y[iy],
10922 &head_r_true[k],
10923 &tail_r_true[k], temp, 1,
10924 head_x, tail_x, incx_val,
10925 eps_int, un_int, &ratios[j]);
10926
10927 /* take the max ratio */
10928 if (j == 0) {
10929 ratio = ratios[0];
10930 /* The !<= below causes NaN error to be detected.
10931 Note that (NaN > thresh) is always false. */
10932 } else if (!(ratios[j] <= ratio)) {
10933 ratio = ratios[j];
10934 }
10935 iy += incy;
10936 }
10937
10938 /* Increase the number of bad ratio, if the ratio
10939 is bigger than the threshold.
10940 The !<= below causes NaN error to be detected.
10941 Note that (NaN > thresh) is always false. */
10942 if (!(ratio <= thresh)) {
10943 bad_ratios++;
10944
10945 if ((debug == 3) && /* print only when debug is on */
10946 (count != old_count) && /* print if old vector is different
10947 from the current one */
10948 (d_count == find_max_ratio) &&
10949 (p_count <= max_print) &&
10950 (ratio > 0.5 * ratio_max)) {
10951 old_count = count;
10952
10953 printf
10954 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
10955 fname, m, n, ntests, thresh);
10956
10957 /* Print test info */
10958 switch (prec) {
10959 case blas_prec_single:
10960 printf("single ");
10961 break;
10962 case blas_prec_double:
10963 printf("double ");
10964 break;
10965 case blas_prec_indigenous:
10966 printf("indigenous ");
10967 break;
10968 case blas_prec_extra:
10969 printf("extra ");
10970 break;
10971 }
10972 switch (norm) {
10973 case -1:
10974 printf("near_underflow ");
10975 break;
10976 case 0:
10977 printf("near_one ");
10978 break;
10979 case 1:
10980 printf("near_overflow ");
10981 break;
10982 }
10983 switch (order_type) {
10984 case blas_rowmajor:
10985 printf("row_major ");
10986 break;
10987 case blas_colmajor:
10988 printf("col_major ");
10989 break;
10990 }
10991 switch (trans_type) {
10992 case blas_no_trans:
10993 printf("no_trans ");
10994 break;
10995 case blas_trans:
10996 printf("trans ");
10997 break;
10998 case blas_conj_trans:
10999 printf("conj_trans ");
11000 break;
11001 }
11002
11003 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
11004 incy);
11005
11006 zge_print_matrix(A, m_i, n_i, lda, order_type,
11007 "A");
11008
11009 cprint_vector(head_x, n_i, incx_val, "head_x");
11010 cprint_vector(tail_x, n_i, incx_val, "tail_x");
11011 zprint_vector(y_gen, m_i, 1, "y_gen");
11012 zprint_vector(y, m_i, incy_val, "y_final");
11013
11014 printf(" ");
11015 printf("alpha = ");
11016 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
11017 printf("\n ");
11018 printf("beta = ");
11019 printf("(%24.16e, %24.16e)", beta[0], beta[1]);
11020 printf("\n");
11021 for (j = 0, k = 0; j < m_i * incy_gen;
11022 j += incy_gen, k++) {
11023 printf(" ");
11024 printf
11025 ("([%24.16e %24.16e], [%24.16e %24.16e])",
11026 head_r_true[j], tail_r_true[j],
11027 head_r_true[j + 1], tail_r_true[j + 1]);
11028 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
11029 }
11030
11031 printf(" ratio=%.4e\n", ratio);
11032 p_count++;
11033 }
11034 if (bad_ratios >= MAX_BAD_TESTS) {
11035 printf("\ntoo many failures, exiting....");
11036 printf("\nTesting and compilation");
11037 printf(" are incomplete\n\n");
11038 goto end;
11039 }
11040 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
11041 printf("\nFlagrant ratio error, exiting...");
11042 printf("\nTesting and compilation");
11043 printf(" are incomplete\n\n");
11044 goto end;
11045 }
11046 }
11047 if (d_count == 0) {
11048 if (ratio > ratio_max)
11049 ratio_max = ratio;
11050
11051 if (ratio != 0.0 && ratio < ratio_min)
11052 ratio_min = ratio;
11053
11054 tot_tests++;
11055 }
11056 } /* incy */
11057 } /* incx */
11058 } /* lda */
11059 } /* trans */
11060 } /* order */
11061 } /* tests */
11062 } /* norm */
11063 } /* prec */
11064 } /* beta */
11065 } /* alpha */
11066 } /* debug */
11067
11068 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
11069 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
11070 fname, m, n, ntests, thresh);
11071 printf
11072 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
11073 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
11074 ratio_min, ratio_max);
11075 }
11076
11077 end:
11078 FPU_FIX_STOP;
11079
11080 blas_free(head_x);
11081 blas_free(tail_x);
11082 blas_free(y);
11083 blas_free(head_x_gen);
11084 blas_free(tail_x_gen);
11085 blas_free(y_gen);
11086 blas_free(temp);
11087 blas_free(A);
11088 blas_free(head_r_true);
11089 blas_free(tail_r_true);
11090 blas_free(ratios);
11091
11092 *min_ratio = ratio_min;
11093 *num_bad_ratio = bad_ratios;
11094 *num_tests = tot_tests;
11095 return ratio_max;
11096 }
do_test_zgemv2_c_z_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)11097 double do_test_zgemv2_c_z_x(int m, int n, int ntests, int *seed,
11098 double thresh, int debug, float test_prob,
11099 double *min_ratio, int *num_bad_ratio,
11100 int *num_tests)
11101
11102 /*
11103 * Purpose
11104 * =======
11105 *
11106 * Runs a series of tests on GEMV2.
11107 *
11108 * Arguments
11109 * =========
11110 *
11111 * m (input) int
11112 * The number of rows
11113 *
11114 * n (input) int
11115 * The number of columns
11116 *
11117 * ntests (input) int
11118 * The number of tests to run for each set of attributes.
11119 *
11120 * seed (input/output) int
11121 * The seed for the random number generator used in testgen().
11122 *
11123 * thresh (input) double
11124 * When the ratio returned from test() exceeds the specified
11125 * threshold, the current size, r_true, r_comp, and ratio will be
11126 * printed. (Since ratio is supposed to be O(1), we can set thresh
11127 * to ~10.)
11128 *
11129 * debug (input) int
11130 * If debug=3, print summary
11131 * If debug=2, print summary only if the number of bad ratios > 0
11132 * If debug=1, print complete info if tests fail
11133 * If debug=0, return max ratio
11134 *
11135 * test_prob (input) float
11136 * The specified test will be performed only if the generated
11137 * random exceeds this threshold.
11138 *
11139 * min_ratio (output) double
11140 * The minimum ratio
11141 *
11142 * num_bad_ratio (output) int
11143 * The number of tests fail; they are above the threshold.
11144 *
11145 * num_tests (output) int
11146 * The number of tests is being performed.
11147 *
11148 * Return value
11149 * ============
11150 *
11151 * The maximum ratio if run successfully, otherwise return -1
11152 *
11153 * Code structure
11154 * ==============
11155 *
11156 * debug loop -- if debug is one, the first loop computes the max ratio
11157 * -- and the last(second) loop outputs debugging information,
11158 * -- if the test fail and its ratio > 0.5 * max ratio.
11159 * -- if debug is zero, the loop is executed once
11160 * alpha loop -- varying alpha: 0, 1, or random
11161 * beta loop -- varying beta: 0, 1, or random
11162 * prec loop -- varying internal prec: single, double, or extra
11163 * norm loop -- varying norm: near undeflow, near one, or
11164 * -- near overflow
11165 * numtest loop -- how many times the test is perform with
11166 * -- above set of attributes
11167 * order loop -- varying order type: rowmajor or colmajor
11168 * trans loop -- varying uplo type: upper or lower
11169 * lda loop -- varying lda: m, m+1, 2m
11170 * incx loop -- varying incx: -2, -1, 1, 2
11171 * incy loop -- varying incy: -2, -1, 1, 2
11172 */
11173 {
11174 /* function name */
11175 const char fname[] = "BLAS_zgemv2_c_z_x";
11176
11177 /* max number of debug lines to print */
11178 const int max_print = 8;
11179
11180 /* Variables in the "x_val" form are loop vars for corresponding
11181 variables */
11182 int i; /* iterate through the repeating tests */
11183 int j, k; /* multipurpose counters or variables */
11184 int iy; /* use to index y */
11185 int incx_val, incy_val, /* for testing different inc values */
11186 incx, incy;
11187 int incy_gen; /* for complex case inc=2, for real case inc=1 */
11188 int d_count; /* counter for debug */
11189 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
11190 int p_count; /* counter for the number of debug lines printed */
11191 int tot_tests; /* total number of tests to be done */
11192 int norm; /* input values of near underflow/one/overflow */
11193 double ratio_max; /* the current maximum ratio */
11194 double ratio_min; /* the current minimum ratio */
11195 double *ratios; /* a temporary variable for calculating ratio */
11196 double ratio; /* the per-use test ratio from test() */
11197 int bad_ratios; /* the number of ratios over the threshold */
11198 double eps_int; /* the internal epsilon expected--2^(-24) for float */
11199 double un_int; /* the internal underflow threshold */
11200 double alpha[2];
11201 double beta[2];
11202 float *A;
11203 double *head_x;
11204 double *tail_x;
11205 double *y;
11206 float *temp; /* use for calculating ratio */
11207
11208 /* x_gen and y_gen are used to store vectors generated by testgen.
11209 they eventually are copied back to x and y */
11210 double *head_x_gen;
11211 double *tail_x_gen;
11212 double *y_gen;
11213
11214 /* the true r calculated by testgen(), in double-double */
11215 double *head_r_true, *tail_r_true;
11216
11217 int alpha_val;
11218 int alpha_flag; /* input flag for BLAS_zgemv2_c_z_testgen */
11219 int beta_val;
11220 int beta_flag; /* input flag for BLAS_zgemv2_c_z_testgen */
11221 int order_val;
11222 enum blas_order_type order_type;
11223 int prec_val;
11224 enum blas_prec_type prec;
11225 int trans_val;
11226 enum blas_trans_type trans_type;
11227 int m_i;
11228 int n_i;
11229 int max_mn; /* the max of m and n */
11230 int lda_val;
11231 int lda;
11232 int saved_seed; /* for saving the original seed */
11233 int count, old_count; /* use for counting the number of testgen calls * 2 */
11234
11235 FPU_FIX_DECL;
11236
11237 /* test for bad arguments */
11238 if (n < 0 || m < 0 || ntests < 0)
11239 BLAS_error(fname, 0, 0, NULL);
11240
11241 /* initialization */
11242 *num_bad_ratio = 0;
11243 *num_tests = 0;
11244 *min_ratio = 0.0;
11245
11246 saved_seed = *seed;
11247 ratio_min = 1e308;
11248 ratio_max = 0.0;
11249 ratio = 0.0;
11250 tot_tests = 0;
11251 p_count = 0;
11252 count = 0;
11253 find_max_ratio = 0;
11254 bad_ratios = 0;
11255 old_count = 0;
11256
11257 if (debug == 3)
11258 find_max_ratio = 1;
11259 max_mn = MAX(m, n);
11260 if (m == 0 || n == 0) {
11261 return 0.0;
11262 }
11263
11264 FPU_FIX_START;
11265
11266 incy_gen = 1;
11267 incy_gen *= 2;
11268
11269 /* get space for calculation */
11270 head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
11271 if (max_mn * 2 > 0 && head_x == NULL) {
11272 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11273 }
11274 tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
11275 if (max_mn * 2 > 0 && tail_x == NULL) {
11276 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11277 }
11278 y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
11279 if (max_mn * 2 > 0 && y == NULL) {
11280 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11281 }
11282 head_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
11283 if (max_mn > 0 && head_x_gen == NULL) {
11284 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11285 }
11286 tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
11287 if (max_mn > 0 && tail_x_gen == NULL) {
11288 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11289 }
11290 y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
11291 if (max_mn > 0 && y_gen == NULL) {
11292 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11293 }
11294 temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
11295 if (max_mn > 0 && temp == NULL) {
11296 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11297 }
11298 head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
11299 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
11300 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
11301 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11302 }
11303 ratios = (double *) blas_malloc(max_mn * sizeof(double));
11304 if (max_mn > 0 && ratios == NULL) {
11305 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11306 }
11307 A =
11308 (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
11309 2);
11310 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
11311 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11312 }
11313
11314 /* The debug iteration:
11315 If debug=1, then will execute the iteration twice. First, compute the
11316 max ratio. Second, print info if ratio > (50% * ratio_max). */
11317 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
11318 bad_ratios = 0; /* set to zero */
11319
11320 if ((debug == 3) && (d_count == find_max_ratio))
11321 *seed = saved_seed; /* restore the original seed */
11322
11323 /* varying alpha */
11324 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
11325 alpha_flag = 0;
11326 switch (alpha_val) {
11327 case 0:
11328 alpha[0] = alpha[1] = 0.0;
11329 alpha_flag = 1;
11330 break;
11331 case 1:
11332 alpha[0] = 1.0;
11333 alpha[1] = 0.0;
11334 alpha_flag = 1;
11335 break;
11336 }
11337
11338 /* varying beta */
11339 for (beta_val = 0; beta_val < 3; beta_val++) {
11340 beta_flag = 0;
11341 switch (beta_val) {
11342 case 0:
11343 beta[0] = beta[1] = 0.0;
11344 beta_flag = 1;
11345 break;
11346 case 1:
11347 beta[0] = 1.0;
11348 beta[1] = 0.0;
11349 beta_flag = 1;
11350 break;
11351 }
11352
11353
11354 /* varying extra precs */
11355 for (prec_val = 0; prec_val <= 2; prec_val++) {
11356 switch (prec_val) {
11357 case 0:
11358 eps_int = power(2, -BITS_D);
11359 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
11360 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
11361 prec = blas_prec_double;
11362 break;
11363 case 1:
11364 eps_int = power(2, -BITS_D);
11365 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
11366 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
11367 prec = blas_prec_double;
11368 break;
11369 case 2:
11370 default:
11371 eps_int = power(2, -BITS_E);
11372 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
11373 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
11374 prec = blas_prec_extra;
11375 break;
11376 }
11377
11378 /* values near underflow, 1, or overflow */
11379 for (norm = -1; norm <= 1; norm++) {
11380
11381 /* number of tests */
11382 for (i = 0; i < ntests; i++) {
11383
11384 /* row or col major */
11385 for (order_val = 0; order_val < 2; order_val++) {
11386 switch (order_val) {
11387 case 0:
11388 order_type = blas_rowmajor;
11389 break;
11390 case 1:
11391 default:
11392 order_type = blas_colmajor;
11393 break;
11394 }
11395
11396 /* no_trans, trans, or conj_trans */
11397 for (trans_val = 0; trans_val < 3; trans_val++) {
11398 switch (trans_val) {
11399 case 0:
11400 trans_type = blas_no_trans;
11401 m_i = m;
11402 n_i = n;
11403 break;
11404 case 1:
11405 trans_type = blas_trans;
11406 m_i = n;
11407 n_i = m;
11408 break;
11409 case 2:
11410 default:
11411 trans_type = blas_conj_trans;
11412 m_i = n;
11413 n_i = m;
11414 break;
11415 }
11416
11417 /* lda=n, n+1, or 2n */
11418 for (lda_val = 0; lda_val < 3; lda_val++) {
11419 switch (lda_val) {
11420 case 0:
11421 lda = m_i;
11422 break;
11423 case 1:
11424 lda = m_i + 1;
11425 break;
11426 case 2:
11427 default:
11428 lda = 2 * m_i;
11429 break;
11430 }
11431 if ((order_type == blas_rowmajor && lda < n) ||
11432 (order_type == blas_colmajor && lda < m))
11433 continue;
11434
11435 /* For the sake of speed, we throw out this case at random */
11436 if (xrand(seed) >= test_prob)
11437 continue;
11438
11439 /* in the trivial cases, no need to run testgen */
11440 if (m > 0 && n > 0)
11441 BLAS_zgemv2_c_z_testgen(norm, order_type, trans_type, m,
11442 n, &alpha, alpha_flag, A, lda,
11443 head_x_gen, tail_x_gen, &beta,
11444 beta_flag, y_gen, seed,
11445 head_r_true, tail_r_true);
11446
11447 count++;
11448
11449 /* varying incx */
11450 for (incx_val = -2; incx_val <= 2; incx_val++) {
11451 if (incx_val == 0)
11452 continue;
11453
11454 /* setting incx */
11455 incx = incx_val;
11456 incx *= 2;
11457
11458 zcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
11459 zcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
11460
11461 /* varying incy */
11462 for (incy_val = -2; incy_val <= 2; incy_val++) {
11463 if (incy_val == 0)
11464 continue;
11465
11466 /* setting incy */
11467 incy = incy_val;
11468 incy *= 2;
11469
11470 zcopy_vector(y_gen, m_i, 1, y, incy_val);
11471
11472 /* call BLAS_zgemv2_c_z_x */
11473 FPU_FIX_STOP;
11474 BLAS_zgemv2_c_z_x(order_type, trans_type, m, n, alpha,
11475 A, lda, head_x, tail_x, incx_val,
11476 beta, y, incy_val, prec);
11477 FPU_FIX_START;
11478
11479 /* set y starting index */
11480 iy = 0;
11481 if (incy < 0)
11482 iy = -(m_i - 1) * incy;
11483
11484 /* computing the ratio */
11485 if (m > 0 && n > 0)
11486 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
11487 /* copy row j of A to temp */
11488 cge_copy_row(order_type, trans_type, m_i, n_i, A,
11489 lda, temp, j);
11490
11491 test_BLAS_zdot2_c_z(n_i, blas_no_conj, alpha,
11492 beta, &y_gen[k], &y[iy],
11493 &head_r_true[k],
11494 &tail_r_true[k], temp, 1,
11495 head_x, tail_x, incx_val,
11496 eps_int, un_int, &ratios[j]);
11497
11498 /* take the max ratio */
11499 if (j == 0) {
11500 ratio = ratios[0];
11501 /* The !<= below causes NaN error to be detected.
11502 Note that (NaN > thresh) is always false. */
11503 } else if (!(ratios[j] <= ratio)) {
11504 ratio = ratios[j];
11505 }
11506 iy += incy;
11507 }
11508
11509 /* Increase the number of bad ratio, if the ratio
11510 is bigger than the threshold.
11511 The !<= below causes NaN error to be detected.
11512 Note that (NaN > thresh) is always false. */
11513 if (!(ratio <= thresh)) {
11514 bad_ratios++;
11515
11516 if ((debug == 3) && /* print only when debug is on */
11517 (count != old_count) && /* print if old vector is different
11518 from the current one */
11519 (d_count == find_max_ratio) &&
11520 (p_count <= max_print) &&
11521 (ratio > 0.5 * ratio_max)) {
11522 old_count = count;
11523
11524 printf
11525 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
11526 fname, m, n, ntests, thresh);
11527
11528 /* Print test info */
11529 switch (prec) {
11530 case blas_prec_single:
11531 printf("single ");
11532 break;
11533 case blas_prec_double:
11534 printf("double ");
11535 break;
11536 case blas_prec_indigenous:
11537 printf("indigenous ");
11538 break;
11539 case blas_prec_extra:
11540 printf("extra ");
11541 break;
11542 }
11543 switch (norm) {
11544 case -1:
11545 printf("near_underflow ");
11546 break;
11547 case 0:
11548 printf("near_one ");
11549 break;
11550 case 1:
11551 printf("near_overflow ");
11552 break;
11553 }
11554 switch (order_type) {
11555 case blas_rowmajor:
11556 printf("row_major ");
11557 break;
11558 case blas_colmajor:
11559 printf("col_major ");
11560 break;
11561 }
11562 switch (trans_type) {
11563 case blas_no_trans:
11564 printf("no_trans ");
11565 break;
11566 case blas_trans:
11567 printf("trans ");
11568 break;
11569 case blas_conj_trans:
11570 printf("conj_trans ");
11571 break;
11572 }
11573
11574 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
11575 incy);
11576
11577 cge_print_matrix(A, m_i, n_i, lda, order_type,
11578 "A");
11579
11580 zprint_vector(head_x, n_i, incx_val, "head_x");
11581 zprint_vector(tail_x, n_i, incx_val, "tail_x");
11582 zprint_vector(y_gen, m_i, 1, "y_gen");
11583 zprint_vector(y, m_i, incy_val, "y_final");
11584
11585 printf(" ");
11586 printf("alpha = ");
11587 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
11588 printf("\n ");
11589 printf("beta = ");
11590 printf("(%24.16e, %24.16e)", beta[0], beta[1]);
11591 printf("\n");
11592 for (j = 0, k = 0; j < m_i * incy_gen;
11593 j += incy_gen, k++) {
11594 printf(" ");
11595 printf
11596 ("([%24.16e %24.16e], [%24.16e %24.16e])",
11597 head_r_true[j], tail_r_true[j],
11598 head_r_true[j + 1], tail_r_true[j + 1]);
11599 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
11600 }
11601
11602 printf(" ratio=%.4e\n", ratio);
11603 p_count++;
11604 }
11605 if (bad_ratios >= MAX_BAD_TESTS) {
11606 printf("\ntoo many failures, exiting....");
11607 printf("\nTesting and compilation");
11608 printf(" are incomplete\n\n");
11609 goto end;
11610 }
11611 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
11612 printf("\nFlagrant ratio error, exiting...");
11613 printf("\nTesting and compilation");
11614 printf(" are incomplete\n\n");
11615 goto end;
11616 }
11617 }
11618 if (d_count == 0) {
11619 if (ratio > ratio_max)
11620 ratio_max = ratio;
11621
11622 if (ratio != 0.0 && ratio < ratio_min)
11623 ratio_min = ratio;
11624
11625 tot_tests++;
11626 }
11627 } /* incy */
11628 } /* incx */
11629 } /* lda */
11630 } /* trans */
11631 } /* order */
11632 } /* tests */
11633 } /* norm */
11634 } /* prec */
11635 } /* beta */
11636 } /* alpha */
11637 } /* debug */
11638
11639 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
11640 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
11641 fname, m, n, ntests, thresh);
11642 printf
11643 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
11644 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
11645 ratio_min, ratio_max);
11646 }
11647
11648 end:
11649 FPU_FIX_STOP;
11650
11651 blas_free(head_x);
11652 blas_free(tail_x);
11653 blas_free(y);
11654 blas_free(head_x_gen);
11655 blas_free(tail_x_gen);
11656 blas_free(y_gen);
11657 blas_free(temp);
11658 blas_free(A);
11659 blas_free(head_r_true);
11660 blas_free(tail_r_true);
11661 blas_free(ratios);
11662
11663 *min_ratio = ratio_min;
11664 *num_bad_ratio = bad_ratios;
11665 *num_tests = tot_tests;
11666 return ratio_max;
11667 }
do_test_zgemv2_c_c_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)11668 double do_test_zgemv2_c_c_x(int m, int n, int ntests, int *seed,
11669 double thresh, int debug, float test_prob,
11670 double *min_ratio, int *num_bad_ratio,
11671 int *num_tests)
11672
11673 /*
11674 * Purpose
11675 * =======
11676 *
11677 * Runs a series of tests on GEMV2.
11678 *
11679 * Arguments
11680 * =========
11681 *
11682 * m (input) int
11683 * The number of rows
11684 *
11685 * n (input) int
11686 * The number of columns
11687 *
11688 * ntests (input) int
11689 * The number of tests to run for each set of attributes.
11690 *
11691 * seed (input/output) int
11692 * The seed for the random number generator used in testgen().
11693 *
11694 * thresh (input) double
11695 * When the ratio returned from test() exceeds the specified
11696 * threshold, the current size, r_true, r_comp, and ratio will be
11697 * printed. (Since ratio is supposed to be O(1), we can set thresh
11698 * to ~10.)
11699 *
11700 * debug (input) int
11701 * If debug=3, print summary
11702 * If debug=2, print summary only if the number of bad ratios > 0
11703 * If debug=1, print complete info if tests fail
11704 * If debug=0, return max ratio
11705 *
11706 * test_prob (input) float
11707 * The specified test will be performed only if the generated
11708 * random exceeds this threshold.
11709 *
11710 * min_ratio (output) double
11711 * The minimum ratio
11712 *
11713 * num_bad_ratio (output) int
11714 * The number of tests fail; they are above the threshold.
11715 *
11716 * num_tests (output) int
11717 * The number of tests is being performed.
11718 *
11719 * Return value
11720 * ============
11721 *
11722 * The maximum ratio if run successfully, otherwise return -1
11723 *
11724 * Code structure
11725 * ==============
11726 *
11727 * debug loop -- if debug is one, the first loop computes the max ratio
11728 * -- and the last(second) loop outputs debugging information,
11729 * -- if the test fail and its ratio > 0.5 * max ratio.
11730 * -- if debug is zero, the loop is executed once
11731 * alpha loop -- varying alpha: 0, 1, or random
11732 * beta loop -- varying beta: 0, 1, or random
11733 * prec loop -- varying internal prec: single, double, or extra
11734 * norm loop -- varying norm: near undeflow, near one, or
11735 * -- near overflow
11736 * numtest loop -- how many times the test is perform with
11737 * -- above set of attributes
11738 * order loop -- varying order type: rowmajor or colmajor
11739 * trans loop -- varying uplo type: upper or lower
11740 * lda loop -- varying lda: m, m+1, 2m
11741 * incx loop -- varying incx: -2, -1, 1, 2
11742 * incy loop -- varying incy: -2, -1, 1, 2
11743 */
11744 {
11745 /* function name */
11746 const char fname[] = "BLAS_zgemv2_c_c_x";
11747
11748 /* max number of debug lines to print */
11749 const int max_print = 8;
11750
11751 /* Variables in the "x_val" form are loop vars for corresponding
11752 variables */
11753 int i; /* iterate through the repeating tests */
11754 int j, k; /* multipurpose counters or variables */
11755 int iy; /* use to index y */
11756 int incx_val, incy_val, /* for testing different inc values */
11757 incx, incy;
11758 int incy_gen; /* for complex case inc=2, for real case inc=1 */
11759 int d_count; /* counter for debug */
11760 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
11761 int p_count; /* counter for the number of debug lines printed */
11762 int tot_tests; /* total number of tests to be done */
11763 int norm; /* input values of near underflow/one/overflow */
11764 double ratio_max; /* the current maximum ratio */
11765 double ratio_min; /* the current minimum ratio */
11766 double *ratios; /* a temporary variable for calculating ratio */
11767 double ratio; /* the per-use test ratio from test() */
11768 int bad_ratios; /* the number of ratios over the threshold */
11769 double eps_int; /* the internal epsilon expected--2^(-24) for float */
11770 double un_int; /* the internal underflow threshold */
11771 double alpha[2];
11772 double beta[2];
11773 float *A;
11774 float *head_x;
11775 float *tail_x;
11776 double *y;
11777 float *temp; /* use for calculating ratio */
11778
11779 /* x_gen and y_gen are used to store vectors generated by testgen.
11780 they eventually are copied back to x and y */
11781 float *head_x_gen;
11782 float *tail_x_gen;
11783 double *y_gen;
11784
11785 /* the true r calculated by testgen(), in double-double */
11786 double *head_r_true, *tail_r_true;
11787
11788 int alpha_val;
11789 int alpha_flag; /* input flag for BLAS_zgemv2_c_c_testgen */
11790 int beta_val;
11791 int beta_flag; /* input flag for BLAS_zgemv2_c_c_testgen */
11792 int order_val;
11793 enum blas_order_type order_type;
11794 int prec_val;
11795 enum blas_prec_type prec;
11796 int trans_val;
11797 enum blas_trans_type trans_type;
11798 int m_i;
11799 int n_i;
11800 int max_mn; /* the max of m and n */
11801 int lda_val;
11802 int lda;
11803 int saved_seed; /* for saving the original seed */
11804 int count, old_count; /* use for counting the number of testgen calls * 2 */
11805
11806 FPU_FIX_DECL;
11807
11808 /* test for bad arguments */
11809 if (n < 0 || m < 0 || ntests < 0)
11810 BLAS_error(fname, 0, 0, NULL);
11811
11812 /* initialization */
11813 *num_bad_ratio = 0;
11814 *num_tests = 0;
11815 *min_ratio = 0.0;
11816
11817 saved_seed = *seed;
11818 ratio_min = 1e308;
11819 ratio_max = 0.0;
11820 ratio = 0.0;
11821 tot_tests = 0;
11822 p_count = 0;
11823 count = 0;
11824 find_max_ratio = 0;
11825 bad_ratios = 0;
11826 old_count = 0;
11827
11828 if (debug == 3)
11829 find_max_ratio = 1;
11830 max_mn = MAX(m, n);
11831 if (m == 0 || n == 0) {
11832 return 0.0;
11833 }
11834
11835 FPU_FIX_START;
11836
11837 incy_gen = 1;
11838 incy_gen *= 2;
11839
11840 /* get space for calculation */
11841 head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
11842 if (max_mn * 2 > 0 && head_x == NULL) {
11843 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11844 }
11845 tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
11846 if (max_mn * 2 > 0 && tail_x == NULL) {
11847 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11848 }
11849 y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
11850 if (max_mn * 2 > 0 && y == NULL) {
11851 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11852 }
11853 head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
11854 if (max_mn > 0 && head_x_gen == NULL) {
11855 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11856 }
11857 tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
11858 if (max_mn > 0 && tail_x_gen == NULL) {
11859 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11860 }
11861 y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
11862 if (max_mn > 0 && y_gen == NULL) {
11863 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11864 }
11865 temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
11866 if (max_mn > 0 && temp == NULL) {
11867 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11868 }
11869 head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
11870 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
11871 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
11872 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11873 }
11874 ratios = (double *) blas_malloc(max_mn * sizeof(double));
11875 if (max_mn > 0 && ratios == NULL) {
11876 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11877 }
11878 A =
11879 (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
11880 2);
11881 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
11882 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11883 }
11884
11885 /* The debug iteration:
11886 If debug=1, then will execute the iteration twice. First, compute the
11887 max ratio. Second, print info if ratio > (50% * ratio_max). */
11888 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
11889 bad_ratios = 0; /* set to zero */
11890
11891 if ((debug == 3) && (d_count == find_max_ratio))
11892 *seed = saved_seed; /* restore the original seed */
11893
11894 /* varying alpha */
11895 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
11896 alpha_flag = 0;
11897 switch (alpha_val) {
11898 case 0:
11899 alpha[0] = alpha[1] = 0.0;
11900 alpha_flag = 1;
11901 break;
11902 case 1:
11903 alpha[0] = 1.0;
11904 alpha[1] = 0.0;
11905 alpha_flag = 1;
11906 break;
11907 }
11908
11909 /* varying beta */
11910 for (beta_val = 0; beta_val < 3; beta_val++) {
11911 beta_flag = 0;
11912 switch (beta_val) {
11913 case 0:
11914 beta[0] = beta[1] = 0.0;
11915 beta_flag = 1;
11916 break;
11917 case 1:
11918 beta[0] = 1.0;
11919 beta[1] = 0.0;
11920 beta_flag = 1;
11921 break;
11922 }
11923
11924
11925 /* varying extra precs */
11926 for (prec_val = 0; prec_val <= 2; prec_val++) {
11927 switch (prec_val) {
11928 case 0:
11929 eps_int = power(2, -BITS_D);
11930 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
11931 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
11932 prec = blas_prec_double;
11933 break;
11934 case 1:
11935 eps_int = power(2, -BITS_D);
11936 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
11937 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
11938 prec = blas_prec_double;
11939 break;
11940 case 2:
11941 default:
11942 eps_int = power(2, -BITS_E);
11943 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
11944 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
11945 prec = blas_prec_extra;
11946 break;
11947 }
11948
11949 /* values near underflow, 1, or overflow */
11950 for (norm = -1; norm <= 1; norm++) {
11951
11952 /* number of tests */
11953 for (i = 0; i < ntests; i++) {
11954
11955 /* row or col major */
11956 for (order_val = 0; order_val < 2; order_val++) {
11957 switch (order_val) {
11958 case 0:
11959 order_type = blas_rowmajor;
11960 break;
11961 case 1:
11962 default:
11963 order_type = blas_colmajor;
11964 break;
11965 }
11966
11967 /* no_trans, trans, or conj_trans */
11968 for (trans_val = 0; trans_val < 3; trans_val++) {
11969 switch (trans_val) {
11970 case 0:
11971 trans_type = blas_no_trans;
11972 m_i = m;
11973 n_i = n;
11974 break;
11975 case 1:
11976 trans_type = blas_trans;
11977 m_i = n;
11978 n_i = m;
11979 break;
11980 case 2:
11981 default:
11982 trans_type = blas_conj_trans;
11983 m_i = n;
11984 n_i = m;
11985 break;
11986 }
11987
11988 /* lda=n, n+1, or 2n */
11989 for (lda_val = 0; lda_val < 3; lda_val++) {
11990 switch (lda_val) {
11991 case 0:
11992 lda = m_i;
11993 break;
11994 case 1:
11995 lda = m_i + 1;
11996 break;
11997 case 2:
11998 default:
11999 lda = 2 * m_i;
12000 break;
12001 }
12002 if ((order_type == blas_rowmajor && lda < n) ||
12003 (order_type == blas_colmajor && lda < m))
12004 continue;
12005
12006 /* For the sake of speed, we throw out this case at random */
12007 if (xrand(seed) >= test_prob)
12008 continue;
12009
12010 /* in the trivial cases, no need to run testgen */
12011 if (m > 0 && n > 0)
12012 BLAS_zgemv2_c_c_testgen(norm, order_type, trans_type, m,
12013 n, &alpha, alpha_flag, A, lda,
12014 head_x_gen, tail_x_gen, &beta,
12015 beta_flag, y_gen, seed,
12016 head_r_true, tail_r_true);
12017
12018 count++;
12019
12020 /* varying incx */
12021 for (incx_val = -2; incx_val <= 2; incx_val++) {
12022 if (incx_val == 0)
12023 continue;
12024
12025 /* setting incx */
12026 incx = incx_val;
12027 incx *= 2;
12028
12029 ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
12030 ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
12031
12032 /* varying incy */
12033 for (incy_val = -2; incy_val <= 2; incy_val++) {
12034 if (incy_val == 0)
12035 continue;
12036
12037 /* setting incy */
12038 incy = incy_val;
12039 incy *= 2;
12040
12041 zcopy_vector(y_gen, m_i, 1, y, incy_val);
12042
12043 /* call BLAS_zgemv2_c_c_x */
12044 FPU_FIX_STOP;
12045 BLAS_zgemv2_c_c_x(order_type, trans_type, m, n, alpha,
12046 A, lda, head_x, tail_x, incx_val,
12047 beta, y, incy_val, prec);
12048 FPU_FIX_START;
12049
12050 /* set y starting index */
12051 iy = 0;
12052 if (incy < 0)
12053 iy = -(m_i - 1) * incy;
12054
12055 /* computing the ratio */
12056 if (m > 0 && n > 0)
12057 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
12058 /* copy row j of A to temp */
12059 cge_copy_row(order_type, trans_type, m_i, n_i, A,
12060 lda, temp, j);
12061
12062 test_BLAS_zdot2_c_c(n_i, blas_no_conj, alpha,
12063 beta, &y_gen[k], &y[iy],
12064 &head_r_true[k],
12065 &tail_r_true[k], temp, 1,
12066 head_x, tail_x, incx_val,
12067 eps_int, un_int, &ratios[j]);
12068
12069 /* take the max ratio */
12070 if (j == 0) {
12071 ratio = ratios[0];
12072 /* The !<= below causes NaN error to be detected.
12073 Note that (NaN > thresh) is always false. */
12074 } else if (!(ratios[j] <= ratio)) {
12075 ratio = ratios[j];
12076 }
12077 iy += incy;
12078 }
12079
12080 /* Increase the number of bad ratio, if the ratio
12081 is bigger than the threshold.
12082 The !<= below causes NaN error to be detected.
12083 Note that (NaN > thresh) is always false. */
12084 if (!(ratio <= thresh)) {
12085 bad_ratios++;
12086
12087 if ((debug == 3) && /* print only when debug is on */
12088 (count != old_count) && /* print if old vector is different
12089 from the current one */
12090 (d_count == find_max_ratio) &&
12091 (p_count <= max_print) &&
12092 (ratio > 0.5 * ratio_max)) {
12093 old_count = count;
12094
12095 printf
12096 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
12097 fname, m, n, ntests, thresh);
12098
12099 /* Print test info */
12100 switch (prec) {
12101 case blas_prec_single:
12102 printf("single ");
12103 break;
12104 case blas_prec_double:
12105 printf("double ");
12106 break;
12107 case blas_prec_indigenous:
12108 printf("indigenous ");
12109 break;
12110 case blas_prec_extra:
12111 printf("extra ");
12112 break;
12113 }
12114 switch (norm) {
12115 case -1:
12116 printf("near_underflow ");
12117 break;
12118 case 0:
12119 printf("near_one ");
12120 break;
12121 case 1:
12122 printf("near_overflow ");
12123 break;
12124 }
12125 switch (order_type) {
12126 case blas_rowmajor:
12127 printf("row_major ");
12128 break;
12129 case blas_colmajor:
12130 printf("col_major ");
12131 break;
12132 }
12133 switch (trans_type) {
12134 case blas_no_trans:
12135 printf("no_trans ");
12136 break;
12137 case blas_trans:
12138 printf("trans ");
12139 break;
12140 case blas_conj_trans:
12141 printf("conj_trans ");
12142 break;
12143 }
12144
12145 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
12146 incy);
12147
12148 cge_print_matrix(A, m_i, n_i, lda, order_type,
12149 "A");
12150
12151 cprint_vector(head_x, n_i, incx_val, "head_x");
12152 cprint_vector(tail_x, n_i, incx_val, "tail_x");
12153 zprint_vector(y_gen, m_i, 1, "y_gen");
12154 zprint_vector(y, m_i, incy_val, "y_final");
12155
12156 printf(" ");
12157 printf("alpha = ");
12158 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
12159 printf("\n ");
12160 printf("beta = ");
12161 printf("(%24.16e, %24.16e)", beta[0], beta[1]);
12162 printf("\n");
12163 for (j = 0, k = 0; j < m_i * incy_gen;
12164 j += incy_gen, k++) {
12165 printf(" ");
12166 printf
12167 ("([%24.16e %24.16e], [%24.16e %24.16e])",
12168 head_r_true[j], tail_r_true[j],
12169 head_r_true[j + 1], tail_r_true[j + 1]);
12170 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
12171 }
12172
12173 printf(" ratio=%.4e\n", ratio);
12174 p_count++;
12175 }
12176 if (bad_ratios >= MAX_BAD_TESTS) {
12177 printf("\ntoo many failures, exiting....");
12178 printf("\nTesting and compilation");
12179 printf(" are incomplete\n\n");
12180 goto end;
12181 }
12182 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
12183 printf("\nFlagrant ratio error, exiting...");
12184 printf("\nTesting and compilation");
12185 printf(" are incomplete\n\n");
12186 goto end;
12187 }
12188 }
12189 if (d_count == 0) {
12190 if (ratio > ratio_max)
12191 ratio_max = ratio;
12192
12193 if (ratio != 0.0 && ratio < ratio_min)
12194 ratio_min = ratio;
12195
12196 tot_tests++;
12197 }
12198 } /* incy */
12199 } /* incx */
12200 } /* lda */
12201 } /* trans */
12202 } /* order */
12203 } /* tests */
12204 } /* norm */
12205 } /* prec */
12206 } /* beta */
12207 } /* alpha */
12208 } /* debug */
12209
12210 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
12211 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
12212 fname, m, n, ntests, thresh);
12213 printf
12214 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
12215 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
12216 ratio_min, ratio_max);
12217 }
12218
12219 end:
12220 FPU_FIX_STOP;
12221
12222 blas_free(head_x);
12223 blas_free(tail_x);
12224 blas_free(y);
12225 blas_free(head_x_gen);
12226 blas_free(tail_x_gen);
12227 blas_free(y_gen);
12228 blas_free(temp);
12229 blas_free(A);
12230 blas_free(head_r_true);
12231 blas_free(tail_r_true);
12232 blas_free(ratios);
12233
12234 *min_ratio = ratio_min;
12235 *num_bad_ratio = bad_ratios;
12236 *num_tests = tot_tests;
12237 return ratio_max;
12238 }
do_test_cgemv2_c_s_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)12239 double do_test_cgemv2_c_s_x(int m, int n, int ntests, int *seed,
12240 double thresh, int debug, float test_prob,
12241 double *min_ratio, int *num_bad_ratio,
12242 int *num_tests)
12243
12244 /*
12245 * Purpose
12246 * =======
12247 *
12248 * Runs a series of tests on GEMV2.
12249 *
12250 * Arguments
12251 * =========
12252 *
12253 * m (input) int
12254 * The number of rows
12255 *
12256 * n (input) int
12257 * The number of columns
12258 *
12259 * ntests (input) int
12260 * The number of tests to run for each set of attributes.
12261 *
12262 * seed (input/output) int
12263 * The seed for the random number generator used in testgen().
12264 *
12265 * thresh (input) double
12266 * When the ratio returned from test() exceeds the specified
12267 * threshold, the current size, r_true, r_comp, and ratio will be
12268 * printed. (Since ratio is supposed to be O(1), we can set thresh
12269 * to ~10.)
12270 *
12271 * debug (input) int
12272 * If debug=3, print summary
12273 * If debug=2, print summary only if the number of bad ratios > 0
12274 * If debug=1, print complete info if tests fail
12275 * If debug=0, return max ratio
12276 *
12277 * test_prob (input) float
12278 * The specified test will be performed only if the generated
12279 * random exceeds this threshold.
12280 *
12281 * min_ratio (output) double
12282 * The minimum ratio
12283 *
12284 * num_bad_ratio (output) int
12285 * The number of tests fail; they are above the threshold.
12286 *
12287 * num_tests (output) int
12288 * The number of tests is being performed.
12289 *
12290 * Return value
12291 * ============
12292 *
12293 * The maximum ratio if run successfully, otherwise return -1
12294 *
12295 * Code structure
12296 * ==============
12297 *
12298 * debug loop -- if debug is one, the first loop computes the max ratio
12299 * -- and the last(second) loop outputs debugging information,
12300 * -- if the test fail and its ratio > 0.5 * max ratio.
12301 * -- if debug is zero, the loop is executed once
12302 * alpha loop -- varying alpha: 0, 1, or random
12303 * beta loop -- varying beta: 0, 1, or random
12304 * prec loop -- varying internal prec: single, double, or extra
12305 * norm loop -- varying norm: near undeflow, near one, or
12306 * -- near overflow
12307 * numtest loop -- how many times the test is perform with
12308 * -- above set of attributes
12309 * order loop -- varying order type: rowmajor or colmajor
12310 * trans loop -- varying uplo type: upper or lower
12311 * lda loop -- varying lda: m, m+1, 2m
12312 * incx loop -- varying incx: -2, -1, 1, 2
12313 * incy loop -- varying incy: -2, -1, 1, 2
12314 */
12315 {
12316 /* function name */
12317 const char fname[] = "BLAS_cgemv2_c_s_x";
12318
12319 /* max number of debug lines to print */
12320 const int max_print = 8;
12321
12322 /* Variables in the "x_val" form are loop vars for corresponding
12323 variables */
12324 int i; /* iterate through the repeating tests */
12325 int j, k; /* multipurpose counters or variables */
12326 int iy; /* use to index y */
12327 int incx_val, incy_val, /* for testing different inc values */
12328 incx, incy;
12329 int incy_gen; /* for complex case inc=2, for real case inc=1 */
12330 int d_count; /* counter for debug */
12331 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
12332 int p_count; /* counter for the number of debug lines printed */
12333 int tot_tests; /* total number of tests to be done */
12334 int norm; /* input values of near underflow/one/overflow */
12335 double ratio_max; /* the current maximum ratio */
12336 double ratio_min; /* the current minimum ratio */
12337 double *ratios; /* a temporary variable for calculating ratio */
12338 double ratio; /* the per-use test ratio from test() */
12339 int bad_ratios; /* the number of ratios over the threshold */
12340 double eps_int; /* the internal epsilon expected--2^(-24) for float */
12341 double un_int; /* the internal underflow threshold */
12342 float alpha[2];
12343 float beta[2];
12344 float *A;
12345 float *head_x;
12346 float *tail_x;
12347 float *y;
12348 float *temp; /* use for calculating ratio */
12349
12350 /* x_gen and y_gen are used to store vectors generated by testgen.
12351 they eventually are copied back to x and y */
12352 float *head_x_gen;
12353 float *tail_x_gen;
12354 float *y_gen;
12355
12356 /* the true r calculated by testgen(), in double-double */
12357 double *head_r_true, *tail_r_true;
12358
12359 int alpha_val;
12360 int alpha_flag; /* input flag for BLAS_cgemv2_c_s_testgen */
12361 int beta_val;
12362 int beta_flag; /* input flag for BLAS_cgemv2_c_s_testgen */
12363 int order_val;
12364 enum blas_order_type order_type;
12365 int prec_val;
12366 enum blas_prec_type prec;
12367 int trans_val;
12368 enum blas_trans_type trans_type;
12369 int m_i;
12370 int n_i;
12371 int max_mn; /* the max of m and n */
12372 int lda_val;
12373 int lda;
12374 int saved_seed; /* for saving the original seed */
12375 int count, old_count; /* use for counting the number of testgen calls * 2 */
12376
12377 FPU_FIX_DECL;
12378
12379 /* test for bad arguments */
12380 if (n < 0 || m < 0 || ntests < 0)
12381 BLAS_error(fname, 0, 0, NULL);
12382
12383 /* initialization */
12384 *num_bad_ratio = 0;
12385 *num_tests = 0;
12386 *min_ratio = 0.0;
12387
12388 saved_seed = *seed;
12389 ratio_min = 1e308;
12390 ratio_max = 0.0;
12391 ratio = 0.0;
12392 tot_tests = 0;
12393 p_count = 0;
12394 count = 0;
12395 find_max_ratio = 0;
12396 bad_ratios = 0;
12397 old_count = 0;
12398
12399 if (debug == 3)
12400 find_max_ratio = 1;
12401 max_mn = MAX(m, n);
12402 if (m == 0 || n == 0) {
12403 return 0.0;
12404 }
12405
12406 FPU_FIX_START;
12407
12408 incy_gen = 1;
12409 incy_gen *= 2;
12410
12411 /* get space for calculation */
12412 head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
12413 if (max_mn * 2 > 0 && head_x == NULL) {
12414 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12415 }
12416 tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
12417 if (max_mn * 2 > 0 && tail_x == NULL) {
12418 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12419 }
12420 y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
12421 if (max_mn * 2 > 0 && y == NULL) {
12422 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12423 }
12424 head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
12425 if (max_mn > 0 && head_x_gen == NULL) {
12426 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12427 }
12428 tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
12429 if (max_mn > 0 && tail_x_gen == NULL) {
12430 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12431 }
12432 y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
12433 if (max_mn > 0 && y_gen == NULL) {
12434 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12435 }
12436 temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
12437 if (max_mn > 0 && temp == NULL) {
12438 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12439 }
12440 head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
12441 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
12442 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
12443 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12444 }
12445 ratios = (double *) blas_malloc(max_mn * sizeof(double));
12446 if (max_mn > 0 && ratios == NULL) {
12447 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12448 }
12449 A =
12450 (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
12451 2);
12452 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
12453 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12454 }
12455
12456 /* The debug iteration:
12457 If debug=1, then will execute the iteration twice. First, compute the
12458 max ratio. Second, print info if ratio > (50% * ratio_max). */
12459 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
12460 bad_ratios = 0; /* set to zero */
12461
12462 if ((debug == 3) && (d_count == find_max_ratio))
12463 *seed = saved_seed; /* restore the original seed */
12464
12465 /* varying alpha */
12466 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
12467 alpha_flag = 0;
12468 switch (alpha_val) {
12469 case 0:
12470 alpha[0] = alpha[1] = 0.0;
12471 alpha_flag = 1;
12472 break;
12473 case 1:
12474 alpha[0] = 1.0;
12475 alpha[1] = 0.0;
12476 alpha_flag = 1;
12477 break;
12478 }
12479
12480 /* varying beta */
12481 for (beta_val = 0; beta_val < 3; beta_val++) {
12482 beta_flag = 0;
12483 switch (beta_val) {
12484 case 0:
12485 beta[0] = beta[1] = 0.0;
12486 beta_flag = 1;
12487 break;
12488 case 1:
12489 beta[0] = 1.0;
12490 beta[1] = 0.0;
12491 beta_flag = 1;
12492 break;
12493 }
12494
12495
12496 /* varying extra precs */
12497 for (prec_val = 0; prec_val <= 2; prec_val++) {
12498 switch (prec_val) {
12499 case 0:
12500 eps_int = power(2, -BITS_S);
12501 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
12502 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
12503 prec = blas_prec_single;
12504 break;
12505 case 1:
12506 eps_int = power(2, -BITS_D);
12507 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
12508 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
12509 prec = blas_prec_double;
12510 break;
12511 case 2:
12512 default:
12513 eps_int = power(2, -BITS_E);
12514 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
12515 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
12516 prec = blas_prec_extra;
12517 break;
12518 }
12519
12520 /* values near underflow, 1, or overflow */
12521 for (norm = -1; norm <= 1; norm++) {
12522
12523 /* number of tests */
12524 for (i = 0; i < ntests; i++) {
12525
12526 /* row or col major */
12527 for (order_val = 0; order_val < 2; order_val++) {
12528 switch (order_val) {
12529 case 0:
12530 order_type = blas_rowmajor;
12531 break;
12532 case 1:
12533 default:
12534 order_type = blas_colmajor;
12535 break;
12536 }
12537
12538 /* no_trans, trans, or conj_trans */
12539 for (trans_val = 0; trans_val < 3; trans_val++) {
12540 switch (trans_val) {
12541 case 0:
12542 trans_type = blas_no_trans;
12543 m_i = m;
12544 n_i = n;
12545 break;
12546 case 1:
12547 trans_type = blas_trans;
12548 m_i = n;
12549 n_i = m;
12550 break;
12551 case 2:
12552 default:
12553 trans_type = blas_conj_trans;
12554 m_i = n;
12555 n_i = m;
12556 break;
12557 }
12558
12559 /* lda=n, n+1, or 2n */
12560 for (lda_val = 0; lda_val < 3; lda_val++) {
12561 switch (lda_val) {
12562 case 0:
12563 lda = m_i;
12564 break;
12565 case 1:
12566 lda = m_i + 1;
12567 break;
12568 case 2:
12569 default:
12570 lda = 2 * m_i;
12571 break;
12572 }
12573 if ((order_type == blas_rowmajor && lda < n) ||
12574 (order_type == blas_colmajor && lda < m))
12575 continue;
12576
12577 /* For the sake of speed, we throw out this case at random */
12578 if (xrand(seed) >= test_prob)
12579 continue;
12580
12581 /* in the trivial cases, no need to run testgen */
12582 if (m > 0 && n > 0)
12583 BLAS_cgemv2_c_s_testgen(norm, order_type, trans_type, m,
12584 n, &alpha, alpha_flag, A, lda,
12585 head_x_gen, tail_x_gen, &beta,
12586 beta_flag, y_gen, seed,
12587 head_r_true, tail_r_true);
12588
12589 count++;
12590
12591 /* varying incx */
12592 for (incx_val = -2; incx_val <= 2; incx_val++) {
12593 if (incx_val == 0)
12594 continue;
12595
12596 /* setting incx */
12597 incx = incx_val;
12598
12599
12600 scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
12601 scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
12602
12603 /* varying incy */
12604 for (incy_val = -2; incy_val <= 2; incy_val++) {
12605 if (incy_val == 0)
12606 continue;
12607
12608 /* setting incy */
12609 incy = incy_val;
12610 incy *= 2;
12611
12612 ccopy_vector(y_gen, m_i, 1, y, incy_val);
12613
12614 /* call BLAS_cgemv2_c_s_x */
12615 FPU_FIX_STOP;
12616 BLAS_cgemv2_c_s_x(order_type, trans_type, m, n, alpha,
12617 A, lda, head_x, tail_x, incx_val,
12618 beta, y, incy_val, prec);
12619 FPU_FIX_START;
12620
12621 /* set y starting index */
12622 iy = 0;
12623 if (incy < 0)
12624 iy = -(m_i - 1) * incy;
12625
12626 /* computing the ratio */
12627 if (m > 0 && n > 0)
12628 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
12629 /* copy row j of A to temp */
12630 cge_copy_row(order_type, trans_type, m_i, n_i, A,
12631 lda, temp, j);
12632
12633 test_BLAS_cdot2_c_s(n_i, blas_no_conj, alpha,
12634 beta, &y_gen[k], &y[iy],
12635 &head_r_true[k],
12636 &tail_r_true[k], temp, 1,
12637 head_x, tail_x, incx_val,
12638 eps_int, un_int, &ratios[j]);
12639
12640 /* take the max ratio */
12641 if (j == 0) {
12642 ratio = ratios[0];
12643 /* The !<= below causes NaN error to be detected.
12644 Note that (NaN > thresh) is always false. */
12645 } else if (!(ratios[j] <= ratio)) {
12646 ratio = ratios[j];
12647 }
12648 iy += incy;
12649 }
12650
12651 /* Increase the number of bad ratio, if the ratio
12652 is bigger than the threshold.
12653 The !<= below causes NaN error to be detected.
12654 Note that (NaN > thresh) is always false. */
12655 if (!(ratio <= thresh)) {
12656 bad_ratios++;
12657
12658 if ((debug == 3) && /* print only when debug is on */
12659 (count != old_count) && /* print if old vector is different
12660 from the current one */
12661 (d_count == find_max_ratio) &&
12662 (p_count <= max_print) &&
12663 (ratio > 0.5 * ratio_max)) {
12664 old_count = count;
12665
12666 printf
12667 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
12668 fname, m, n, ntests, thresh);
12669
12670 /* Print test info */
12671 switch (prec) {
12672 case blas_prec_single:
12673 printf("single ");
12674 break;
12675 case blas_prec_double:
12676 printf("double ");
12677 break;
12678 case blas_prec_indigenous:
12679 printf("indigenous ");
12680 break;
12681 case blas_prec_extra:
12682 printf("extra ");
12683 break;
12684 }
12685 switch (norm) {
12686 case -1:
12687 printf("near_underflow ");
12688 break;
12689 case 0:
12690 printf("near_one ");
12691 break;
12692 case 1:
12693 printf("near_overflow ");
12694 break;
12695 }
12696 switch (order_type) {
12697 case blas_rowmajor:
12698 printf("row_major ");
12699 break;
12700 case blas_colmajor:
12701 printf("col_major ");
12702 break;
12703 }
12704 switch (trans_type) {
12705 case blas_no_trans:
12706 printf("no_trans ");
12707 break;
12708 case blas_trans:
12709 printf("trans ");
12710 break;
12711 case blas_conj_trans:
12712 printf("conj_trans ");
12713 break;
12714 }
12715
12716 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
12717 incy);
12718
12719 cge_print_matrix(A, m_i, n_i, lda, order_type,
12720 "A");
12721
12722 sprint_vector(head_x, n_i, incx_val, "head_x");
12723 sprint_vector(tail_x, n_i, incx_val, "tail_x");
12724 cprint_vector(y_gen, m_i, 1, "y_gen");
12725 cprint_vector(y, m_i, incy_val, "y_final");
12726
12727 printf(" ");
12728 printf("alpha = ");
12729 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
12730 printf("\n ");
12731 printf("beta = ");
12732 printf("(%16.8e, %16.8e)", beta[0], beta[1]);
12733 printf("\n");
12734 for (j = 0, k = 0; j < m_i * incy_gen;
12735 j += incy_gen, k++) {
12736 printf(" ");
12737 printf
12738 ("([%24.16e %24.16e], [%24.16e %24.16e])",
12739 head_r_true[j], tail_r_true[j],
12740 head_r_true[j + 1], tail_r_true[j + 1]);
12741 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
12742 }
12743
12744 printf(" ratio=%.4e\n", ratio);
12745 p_count++;
12746 }
12747 if (bad_ratios >= MAX_BAD_TESTS) {
12748 printf("\ntoo many failures, exiting....");
12749 printf("\nTesting and compilation");
12750 printf(" are incomplete\n\n");
12751 goto end;
12752 }
12753 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
12754 printf("\nFlagrant ratio error, exiting...");
12755 printf("\nTesting and compilation");
12756 printf(" are incomplete\n\n");
12757 goto end;
12758 }
12759 }
12760 if (d_count == 0) {
12761 if (ratio > ratio_max)
12762 ratio_max = ratio;
12763
12764 if (ratio != 0.0 && ratio < ratio_min)
12765 ratio_min = ratio;
12766
12767 tot_tests++;
12768 }
12769 } /* incy */
12770 } /* incx */
12771 } /* lda */
12772 } /* trans */
12773 } /* order */
12774 } /* tests */
12775 } /* norm */
12776 } /* prec */
12777 } /* beta */
12778 } /* alpha */
12779 } /* debug */
12780
12781 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
12782 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
12783 fname, m, n, ntests, thresh);
12784 printf
12785 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
12786 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
12787 ratio_min, ratio_max);
12788 }
12789
12790 end:
12791 FPU_FIX_STOP;
12792
12793 blas_free(head_x);
12794 blas_free(tail_x);
12795 blas_free(y);
12796 blas_free(head_x_gen);
12797 blas_free(tail_x_gen);
12798 blas_free(y_gen);
12799 blas_free(temp);
12800 blas_free(A);
12801 blas_free(head_r_true);
12802 blas_free(tail_r_true);
12803 blas_free(ratios);
12804
12805 *min_ratio = ratio_min;
12806 *num_bad_ratio = bad_ratios;
12807 *num_tests = tot_tests;
12808 return ratio_max;
12809 }
do_test_cgemv2_s_c_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)12810 double do_test_cgemv2_s_c_x(int m, int n, int ntests, int *seed,
12811 double thresh, int debug, float test_prob,
12812 double *min_ratio, int *num_bad_ratio,
12813 int *num_tests)
12814
12815 /*
12816 * Purpose
12817 * =======
12818 *
12819 * Runs a series of tests on GEMV2.
12820 *
12821 * Arguments
12822 * =========
12823 *
12824 * m (input) int
12825 * The number of rows
12826 *
12827 * n (input) int
12828 * The number of columns
12829 *
12830 * ntests (input) int
12831 * The number of tests to run for each set of attributes.
12832 *
12833 * seed (input/output) int
12834 * The seed for the random number generator used in testgen().
12835 *
12836 * thresh (input) double
12837 * When the ratio returned from test() exceeds the specified
12838 * threshold, the current size, r_true, r_comp, and ratio will be
12839 * printed. (Since ratio is supposed to be O(1), we can set thresh
12840 * to ~10.)
12841 *
12842 * debug (input) int
12843 * If debug=3, print summary
12844 * If debug=2, print summary only if the number of bad ratios > 0
12845 * If debug=1, print complete info if tests fail
12846 * If debug=0, return max ratio
12847 *
12848 * test_prob (input) float
12849 * The specified test will be performed only if the generated
12850 * random exceeds this threshold.
12851 *
12852 * min_ratio (output) double
12853 * The minimum ratio
12854 *
12855 * num_bad_ratio (output) int
12856 * The number of tests fail; they are above the threshold.
12857 *
12858 * num_tests (output) int
12859 * The number of tests is being performed.
12860 *
12861 * Return value
12862 * ============
12863 *
12864 * The maximum ratio if run successfully, otherwise return -1
12865 *
12866 * Code structure
12867 * ==============
12868 *
12869 * debug loop -- if debug is one, the first loop computes the max ratio
12870 * -- and the last(second) loop outputs debugging information,
12871 * -- if the test fail and its ratio > 0.5 * max ratio.
12872 * -- if debug is zero, the loop is executed once
12873 * alpha loop -- varying alpha: 0, 1, or random
12874 * beta loop -- varying beta: 0, 1, or random
12875 * prec loop -- varying internal prec: single, double, or extra
12876 * norm loop -- varying norm: near undeflow, near one, or
12877 * -- near overflow
12878 * numtest loop -- how many times the test is perform with
12879 * -- above set of attributes
12880 * order loop -- varying order type: rowmajor or colmajor
12881 * trans loop -- varying uplo type: upper or lower
12882 * lda loop -- varying lda: m, m+1, 2m
12883 * incx loop -- varying incx: -2, -1, 1, 2
12884 * incy loop -- varying incy: -2, -1, 1, 2
12885 */
12886 {
12887 /* function name */
12888 const char fname[] = "BLAS_cgemv2_s_c_x";
12889
12890 /* max number of debug lines to print */
12891 const int max_print = 8;
12892
12893 /* Variables in the "x_val" form are loop vars for corresponding
12894 variables */
12895 int i; /* iterate through the repeating tests */
12896 int j, k; /* multipurpose counters or variables */
12897 int iy; /* use to index y */
12898 int incx_val, incy_val, /* for testing different inc values */
12899 incx, incy;
12900 int incy_gen; /* for complex case inc=2, for real case inc=1 */
12901 int d_count; /* counter for debug */
12902 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
12903 int p_count; /* counter for the number of debug lines printed */
12904 int tot_tests; /* total number of tests to be done */
12905 int norm; /* input values of near underflow/one/overflow */
12906 double ratio_max; /* the current maximum ratio */
12907 double ratio_min; /* the current minimum ratio */
12908 double *ratios; /* a temporary variable for calculating ratio */
12909 double ratio; /* the per-use test ratio from test() */
12910 int bad_ratios; /* the number of ratios over the threshold */
12911 double eps_int; /* the internal epsilon expected--2^(-24) for float */
12912 double un_int; /* the internal underflow threshold */
12913 float alpha[2];
12914 float beta[2];
12915 float *A;
12916 float *head_x;
12917 float *tail_x;
12918 float *y;
12919 float *temp; /* use for calculating ratio */
12920
12921 /* x_gen and y_gen are used to store vectors generated by testgen.
12922 they eventually are copied back to x and y */
12923 float *head_x_gen;
12924 float *tail_x_gen;
12925 float *y_gen;
12926
12927 /* the true r calculated by testgen(), in double-double */
12928 double *head_r_true, *tail_r_true;
12929
12930 int alpha_val;
12931 int alpha_flag; /* input flag for BLAS_cgemv2_s_c_testgen */
12932 int beta_val;
12933 int beta_flag; /* input flag for BLAS_cgemv2_s_c_testgen */
12934 int order_val;
12935 enum blas_order_type order_type;
12936 int prec_val;
12937 enum blas_prec_type prec;
12938 int trans_val;
12939 enum blas_trans_type trans_type;
12940 int m_i;
12941 int n_i;
12942 int max_mn; /* the max of m and n */
12943 int lda_val;
12944 int lda;
12945 int saved_seed; /* for saving the original seed */
12946 int count, old_count; /* use for counting the number of testgen calls * 2 */
12947
12948 FPU_FIX_DECL;
12949
12950 /* test for bad arguments */
12951 if (n < 0 || m < 0 || ntests < 0)
12952 BLAS_error(fname, 0, 0, NULL);
12953
12954 /* initialization */
12955 *num_bad_ratio = 0;
12956 *num_tests = 0;
12957 *min_ratio = 0.0;
12958
12959 saved_seed = *seed;
12960 ratio_min = 1e308;
12961 ratio_max = 0.0;
12962 ratio = 0.0;
12963 tot_tests = 0;
12964 p_count = 0;
12965 count = 0;
12966 find_max_ratio = 0;
12967 bad_ratios = 0;
12968 old_count = 0;
12969
12970 if (debug == 3)
12971 find_max_ratio = 1;
12972 max_mn = MAX(m, n);
12973 if (m == 0 || n == 0) {
12974 return 0.0;
12975 }
12976
12977 FPU_FIX_START;
12978
12979 incy_gen = 1;
12980 incy_gen *= 2;
12981
12982 /* get space for calculation */
12983 head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
12984 if (max_mn * 2 > 0 && head_x == NULL) {
12985 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12986 }
12987 tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
12988 if (max_mn * 2 > 0 && tail_x == NULL) {
12989 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12990 }
12991 y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
12992 if (max_mn * 2 > 0 && y == NULL) {
12993 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12994 }
12995 head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
12996 if (max_mn > 0 && head_x_gen == NULL) {
12997 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12998 }
12999 tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
13000 if (max_mn > 0 && tail_x_gen == NULL) {
13001 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13002 }
13003 y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
13004 if (max_mn > 0 && y_gen == NULL) {
13005 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13006 }
13007 temp = (float *) blas_malloc(max_mn * sizeof(float));
13008 if (max_mn > 0 && temp == NULL) {
13009 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13010 }
13011 head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
13012 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
13013 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
13014 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13015 }
13016 ratios = (double *) blas_malloc(max_mn * sizeof(double));
13017 if (max_mn > 0 && ratios == NULL) {
13018 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13019 }
13020 A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
13021 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
13022 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13023 }
13024
13025 /* The debug iteration:
13026 If debug=1, then will execute the iteration twice. First, compute the
13027 max ratio. Second, print info if ratio > (50% * ratio_max). */
13028 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
13029 bad_ratios = 0; /* set to zero */
13030
13031 if ((debug == 3) && (d_count == find_max_ratio))
13032 *seed = saved_seed; /* restore the original seed */
13033
13034 /* varying alpha */
13035 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
13036 alpha_flag = 0;
13037 switch (alpha_val) {
13038 case 0:
13039 alpha[0] = alpha[1] = 0.0;
13040 alpha_flag = 1;
13041 break;
13042 case 1:
13043 alpha[0] = 1.0;
13044 alpha[1] = 0.0;
13045 alpha_flag = 1;
13046 break;
13047 }
13048
13049 /* varying beta */
13050 for (beta_val = 0; beta_val < 3; beta_val++) {
13051 beta_flag = 0;
13052 switch (beta_val) {
13053 case 0:
13054 beta[0] = beta[1] = 0.0;
13055 beta_flag = 1;
13056 break;
13057 case 1:
13058 beta[0] = 1.0;
13059 beta[1] = 0.0;
13060 beta_flag = 1;
13061 break;
13062 }
13063
13064
13065 /* varying extra precs */
13066 for (prec_val = 0; prec_val <= 2; prec_val++) {
13067 switch (prec_val) {
13068 case 0:
13069 eps_int = power(2, -BITS_S);
13070 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
13071 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
13072 prec = blas_prec_single;
13073 break;
13074 case 1:
13075 eps_int = power(2, -BITS_D);
13076 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
13077 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
13078 prec = blas_prec_double;
13079 break;
13080 case 2:
13081 default:
13082 eps_int = power(2, -BITS_E);
13083 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
13084 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
13085 prec = blas_prec_extra;
13086 break;
13087 }
13088
13089 /* values near underflow, 1, or overflow */
13090 for (norm = -1; norm <= 1; norm++) {
13091
13092 /* number of tests */
13093 for (i = 0; i < ntests; i++) {
13094
13095 /* row or col major */
13096 for (order_val = 0; order_val < 2; order_val++) {
13097 switch (order_val) {
13098 case 0:
13099 order_type = blas_rowmajor;
13100 break;
13101 case 1:
13102 default:
13103 order_type = blas_colmajor;
13104 break;
13105 }
13106
13107 /* no_trans, trans, or conj_trans */
13108 for (trans_val = 0; trans_val < 3; trans_val++) {
13109 switch (trans_val) {
13110 case 0:
13111 trans_type = blas_no_trans;
13112 m_i = m;
13113 n_i = n;
13114 break;
13115 case 1:
13116 trans_type = blas_trans;
13117 m_i = n;
13118 n_i = m;
13119 break;
13120 case 2:
13121 default:
13122 trans_type = blas_conj_trans;
13123 m_i = n;
13124 n_i = m;
13125 break;
13126 }
13127
13128 /* lda=n, n+1, or 2n */
13129 for (lda_val = 0; lda_val < 3; lda_val++) {
13130 switch (lda_val) {
13131 case 0:
13132 lda = m_i;
13133 break;
13134 case 1:
13135 lda = m_i + 1;
13136 break;
13137 case 2:
13138 default:
13139 lda = 2 * m_i;
13140 break;
13141 }
13142 if ((order_type == blas_rowmajor && lda < n) ||
13143 (order_type == blas_colmajor && lda < m))
13144 continue;
13145
13146 /* For the sake of speed, we throw out this case at random */
13147 if (xrand(seed) >= test_prob)
13148 continue;
13149
13150 /* in the trivial cases, no need to run testgen */
13151 if (m > 0 && n > 0)
13152 BLAS_cgemv2_s_c_testgen(norm, order_type, trans_type, m,
13153 n, &alpha, alpha_flag, A, lda,
13154 head_x_gen, tail_x_gen, &beta,
13155 beta_flag, y_gen, seed,
13156 head_r_true, tail_r_true);
13157
13158 count++;
13159
13160 /* varying incx */
13161 for (incx_val = -2; incx_val <= 2; incx_val++) {
13162 if (incx_val == 0)
13163 continue;
13164
13165 /* setting incx */
13166 incx = incx_val;
13167 incx *= 2;
13168
13169 ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
13170 ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
13171
13172 /* varying incy */
13173 for (incy_val = -2; incy_val <= 2; incy_val++) {
13174 if (incy_val == 0)
13175 continue;
13176
13177 /* setting incy */
13178 incy = incy_val;
13179 incy *= 2;
13180
13181 ccopy_vector(y_gen, m_i, 1, y, incy_val);
13182
13183 /* call BLAS_cgemv2_s_c_x */
13184 FPU_FIX_STOP;
13185 BLAS_cgemv2_s_c_x(order_type, trans_type, m, n, alpha,
13186 A, lda, head_x, tail_x, incx_val,
13187 beta, y, incy_val, prec);
13188 FPU_FIX_START;
13189
13190 /* set y starting index */
13191 iy = 0;
13192 if (incy < 0)
13193 iy = -(m_i - 1) * incy;
13194
13195 /* computing the ratio */
13196 if (m > 0 && n > 0)
13197 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
13198 /* copy row j of A to temp */
13199 sge_copy_row(order_type, trans_type, m_i, n_i, A,
13200 lda, temp, j);
13201
13202 test_BLAS_cdot2_s_c(n_i, blas_no_conj, alpha,
13203 beta, &y_gen[k], &y[iy],
13204 &head_r_true[k],
13205 &tail_r_true[k], temp, 1,
13206 head_x, tail_x, incx_val,
13207 eps_int, un_int, &ratios[j]);
13208
13209 /* take the max ratio */
13210 if (j == 0) {
13211 ratio = ratios[0];
13212 /* The !<= below causes NaN error to be detected.
13213 Note that (NaN > thresh) is always false. */
13214 } else if (!(ratios[j] <= ratio)) {
13215 ratio = ratios[j];
13216 }
13217 iy += incy;
13218 }
13219
13220 /* Increase the number of bad ratio, if the ratio
13221 is bigger than the threshold.
13222 The !<= below causes NaN error to be detected.
13223 Note that (NaN > thresh) is always false. */
13224 if (!(ratio <= thresh)) {
13225 bad_ratios++;
13226
13227 if ((debug == 3) && /* print only when debug is on */
13228 (count != old_count) && /* print if old vector is different
13229 from the current one */
13230 (d_count == find_max_ratio) &&
13231 (p_count <= max_print) &&
13232 (ratio > 0.5 * ratio_max)) {
13233 old_count = count;
13234
13235 printf
13236 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
13237 fname, m, n, ntests, thresh);
13238
13239 /* Print test info */
13240 switch (prec) {
13241 case blas_prec_single:
13242 printf("single ");
13243 break;
13244 case blas_prec_double:
13245 printf("double ");
13246 break;
13247 case blas_prec_indigenous:
13248 printf("indigenous ");
13249 break;
13250 case blas_prec_extra:
13251 printf("extra ");
13252 break;
13253 }
13254 switch (norm) {
13255 case -1:
13256 printf("near_underflow ");
13257 break;
13258 case 0:
13259 printf("near_one ");
13260 break;
13261 case 1:
13262 printf("near_overflow ");
13263 break;
13264 }
13265 switch (order_type) {
13266 case blas_rowmajor:
13267 printf("row_major ");
13268 break;
13269 case blas_colmajor:
13270 printf("col_major ");
13271 break;
13272 }
13273 switch (trans_type) {
13274 case blas_no_trans:
13275 printf("no_trans ");
13276 break;
13277 case blas_trans:
13278 printf("trans ");
13279 break;
13280 case blas_conj_trans:
13281 printf("conj_trans ");
13282 break;
13283 }
13284
13285 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
13286 incy);
13287
13288 sge_print_matrix(A, m_i, n_i, lda, order_type,
13289 "A");
13290
13291 cprint_vector(head_x, n_i, incx_val, "head_x");
13292 cprint_vector(tail_x, n_i, incx_val, "tail_x");
13293 cprint_vector(y_gen, m_i, 1, "y_gen");
13294 cprint_vector(y, m_i, incy_val, "y_final");
13295
13296 printf(" ");
13297 printf("alpha = ");
13298 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
13299 printf("\n ");
13300 printf("beta = ");
13301 printf("(%16.8e, %16.8e)", beta[0], beta[1]);
13302 printf("\n");
13303 for (j = 0, k = 0; j < m_i * incy_gen;
13304 j += incy_gen, k++) {
13305 printf(" ");
13306 printf
13307 ("([%24.16e %24.16e], [%24.16e %24.16e])",
13308 head_r_true[j], tail_r_true[j],
13309 head_r_true[j + 1], tail_r_true[j + 1]);
13310 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
13311 }
13312
13313 printf(" ratio=%.4e\n", ratio);
13314 p_count++;
13315 }
13316 if (bad_ratios >= MAX_BAD_TESTS) {
13317 printf("\ntoo many failures, exiting....");
13318 printf("\nTesting and compilation");
13319 printf(" are incomplete\n\n");
13320 goto end;
13321 }
13322 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
13323 printf("\nFlagrant ratio error, exiting...");
13324 printf("\nTesting and compilation");
13325 printf(" are incomplete\n\n");
13326 goto end;
13327 }
13328 }
13329 if (d_count == 0) {
13330 if (ratio > ratio_max)
13331 ratio_max = ratio;
13332
13333 if (ratio != 0.0 && ratio < ratio_min)
13334 ratio_min = ratio;
13335
13336 tot_tests++;
13337 }
13338 } /* incy */
13339 } /* incx */
13340 } /* lda */
13341 } /* trans */
13342 } /* order */
13343 } /* tests */
13344 } /* norm */
13345 } /* prec */
13346 } /* beta */
13347 } /* alpha */
13348 } /* debug */
13349
13350 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
13351 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
13352 fname, m, n, ntests, thresh);
13353 printf
13354 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
13355 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
13356 ratio_min, ratio_max);
13357 }
13358
13359 end:
13360 FPU_FIX_STOP;
13361
13362 blas_free(head_x);
13363 blas_free(tail_x);
13364 blas_free(y);
13365 blas_free(head_x_gen);
13366 blas_free(tail_x_gen);
13367 blas_free(y_gen);
13368 blas_free(temp);
13369 blas_free(A);
13370 blas_free(head_r_true);
13371 blas_free(tail_r_true);
13372 blas_free(ratios);
13373
13374 *min_ratio = ratio_min;
13375 *num_bad_ratio = bad_ratios;
13376 *num_tests = tot_tests;
13377 return ratio_max;
13378 }
do_test_cgemv2_s_s_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)13379 double do_test_cgemv2_s_s_x(int m, int n, int ntests, int *seed,
13380 double thresh, int debug, float test_prob,
13381 double *min_ratio, int *num_bad_ratio,
13382 int *num_tests)
13383
13384 /*
13385 * Purpose
13386 * =======
13387 *
13388 * Runs a series of tests on GEMV2.
13389 *
13390 * Arguments
13391 * =========
13392 *
13393 * m (input) int
13394 * The number of rows
13395 *
13396 * n (input) int
13397 * The number of columns
13398 *
13399 * ntests (input) int
13400 * The number of tests to run for each set of attributes.
13401 *
13402 * seed (input/output) int
13403 * The seed for the random number generator used in testgen().
13404 *
13405 * thresh (input) double
13406 * When the ratio returned from test() exceeds the specified
13407 * threshold, the current size, r_true, r_comp, and ratio will be
13408 * printed. (Since ratio is supposed to be O(1), we can set thresh
13409 * to ~10.)
13410 *
13411 * debug (input) int
13412 * If debug=3, print summary
13413 * If debug=2, print summary only if the number of bad ratios > 0
13414 * If debug=1, print complete info if tests fail
13415 * If debug=0, return max ratio
13416 *
13417 * test_prob (input) float
13418 * The specified test will be performed only if the generated
13419 * random exceeds this threshold.
13420 *
13421 * min_ratio (output) double
13422 * The minimum ratio
13423 *
13424 * num_bad_ratio (output) int
13425 * The number of tests fail; they are above the threshold.
13426 *
13427 * num_tests (output) int
13428 * The number of tests is being performed.
13429 *
13430 * Return value
13431 * ============
13432 *
13433 * The maximum ratio if run successfully, otherwise return -1
13434 *
13435 * Code structure
13436 * ==============
13437 *
13438 * debug loop -- if debug is one, the first loop computes the max ratio
13439 * -- and the last(second) loop outputs debugging information,
13440 * -- if the test fail and its ratio > 0.5 * max ratio.
13441 * -- if debug is zero, the loop is executed once
13442 * alpha loop -- varying alpha: 0, 1, or random
13443 * beta loop -- varying beta: 0, 1, or random
13444 * prec loop -- varying internal prec: single, double, or extra
13445 * norm loop -- varying norm: near undeflow, near one, or
13446 * -- near overflow
13447 * numtest loop -- how many times the test is perform with
13448 * -- above set of attributes
13449 * order loop -- varying order type: rowmajor or colmajor
13450 * trans loop -- varying uplo type: upper or lower
13451 * lda loop -- varying lda: m, m+1, 2m
13452 * incx loop -- varying incx: -2, -1, 1, 2
13453 * incy loop -- varying incy: -2, -1, 1, 2
13454 */
13455 {
13456 /* function name */
13457 const char fname[] = "BLAS_cgemv2_s_s_x";
13458
13459 /* max number of debug lines to print */
13460 const int max_print = 8;
13461
13462 /* Variables in the "x_val" form are loop vars for corresponding
13463 variables */
13464 int i; /* iterate through the repeating tests */
13465 int j, k; /* multipurpose counters or variables */
13466 int iy; /* use to index y */
13467 int incx_val, incy_val, /* for testing different inc values */
13468 incx, incy;
13469 int incy_gen; /* for complex case inc=2, for real case inc=1 */
13470 int d_count; /* counter for debug */
13471 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
13472 int p_count; /* counter for the number of debug lines printed */
13473 int tot_tests; /* total number of tests to be done */
13474 int norm; /* input values of near underflow/one/overflow */
13475 double ratio_max; /* the current maximum ratio */
13476 double ratio_min; /* the current minimum ratio */
13477 double *ratios; /* a temporary variable for calculating ratio */
13478 double ratio; /* the per-use test ratio from test() */
13479 int bad_ratios; /* the number of ratios over the threshold */
13480 double eps_int; /* the internal epsilon expected--2^(-24) for float */
13481 double un_int; /* the internal underflow threshold */
13482 float alpha[2];
13483 float beta[2];
13484 float *A;
13485 float *head_x;
13486 float *tail_x;
13487 float *y;
13488 float *temp; /* use for calculating ratio */
13489
13490 /* x_gen and y_gen are used to store vectors generated by testgen.
13491 they eventually are copied back to x and y */
13492 float *head_x_gen;
13493 float *tail_x_gen;
13494 float *y_gen;
13495
13496 /* the true r calculated by testgen(), in double-double */
13497 double *head_r_true, *tail_r_true;
13498
13499 int alpha_val;
13500 int alpha_flag; /* input flag for BLAS_cgemv2_s_s_testgen */
13501 int beta_val;
13502 int beta_flag; /* input flag for BLAS_cgemv2_s_s_testgen */
13503 int order_val;
13504 enum blas_order_type order_type;
13505 int prec_val;
13506 enum blas_prec_type prec;
13507 int trans_val;
13508 enum blas_trans_type trans_type;
13509 int m_i;
13510 int n_i;
13511 int max_mn; /* the max of m and n */
13512 int lda_val;
13513 int lda;
13514 int saved_seed; /* for saving the original seed */
13515 int count, old_count; /* use for counting the number of testgen calls * 2 */
13516
13517 FPU_FIX_DECL;
13518
13519 /* test for bad arguments */
13520 if (n < 0 || m < 0 || ntests < 0)
13521 BLAS_error(fname, 0, 0, NULL);
13522
13523 /* initialization */
13524 *num_bad_ratio = 0;
13525 *num_tests = 0;
13526 *min_ratio = 0.0;
13527
13528 saved_seed = *seed;
13529 ratio_min = 1e308;
13530 ratio_max = 0.0;
13531 ratio = 0.0;
13532 tot_tests = 0;
13533 p_count = 0;
13534 count = 0;
13535 find_max_ratio = 0;
13536 bad_ratios = 0;
13537 old_count = 0;
13538
13539 if (debug == 3)
13540 find_max_ratio = 1;
13541 max_mn = MAX(m, n);
13542 if (m == 0 || n == 0) {
13543 return 0.0;
13544 }
13545
13546 FPU_FIX_START;
13547
13548 incy_gen = 1;
13549 incy_gen *= 2;
13550
13551 /* get space for calculation */
13552 head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
13553 if (max_mn * 2 > 0 && head_x == NULL) {
13554 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13555 }
13556 tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
13557 if (max_mn * 2 > 0 && tail_x == NULL) {
13558 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13559 }
13560 y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
13561 if (max_mn * 2 > 0 && y == NULL) {
13562 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13563 }
13564 head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
13565 if (max_mn > 0 && head_x_gen == NULL) {
13566 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13567 }
13568 tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
13569 if (max_mn > 0 && tail_x_gen == NULL) {
13570 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13571 }
13572 y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
13573 if (max_mn > 0 && y_gen == NULL) {
13574 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13575 }
13576 temp = (float *) blas_malloc(max_mn * sizeof(float));
13577 if (max_mn > 0 && temp == NULL) {
13578 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13579 }
13580 head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
13581 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
13582 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
13583 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13584 }
13585 ratios = (double *) blas_malloc(max_mn * sizeof(double));
13586 if (max_mn > 0 && ratios == NULL) {
13587 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13588 }
13589 A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
13590 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
13591 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13592 }
13593
13594 /* The debug iteration:
13595 If debug=1, then will execute the iteration twice. First, compute the
13596 max ratio. Second, print info if ratio > (50% * ratio_max). */
13597 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
13598 bad_ratios = 0; /* set to zero */
13599
13600 if ((debug == 3) && (d_count == find_max_ratio))
13601 *seed = saved_seed; /* restore the original seed */
13602
13603 /* varying alpha */
13604 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
13605 alpha_flag = 0;
13606 switch (alpha_val) {
13607 case 0:
13608 alpha[0] = alpha[1] = 0.0;
13609 alpha_flag = 1;
13610 break;
13611 case 1:
13612 alpha[0] = 1.0;
13613 alpha[1] = 0.0;
13614 alpha_flag = 1;
13615 break;
13616 }
13617
13618 /* varying beta */
13619 for (beta_val = 0; beta_val < 3; beta_val++) {
13620 beta_flag = 0;
13621 switch (beta_val) {
13622 case 0:
13623 beta[0] = beta[1] = 0.0;
13624 beta_flag = 1;
13625 break;
13626 case 1:
13627 beta[0] = 1.0;
13628 beta[1] = 0.0;
13629 beta_flag = 1;
13630 break;
13631 }
13632
13633
13634 /* varying extra precs */
13635 for (prec_val = 0; prec_val <= 2; prec_val++) {
13636 switch (prec_val) {
13637 case 0:
13638 eps_int = power(2, -BITS_S);
13639 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
13640 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
13641 prec = blas_prec_single;
13642 break;
13643 case 1:
13644 eps_int = power(2, -BITS_D);
13645 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
13646 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
13647 prec = blas_prec_double;
13648 break;
13649 case 2:
13650 default:
13651 eps_int = power(2, -BITS_E);
13652 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
13653 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
13654 prec = blas_prec_extra;
13655 break;
13656 }
13657
13658 /* values near underflow, 1, or overflow */
13659 for (norm = -1; norm <= 1; norm++) {
13660
13661 /* number of tests */
13662 for (i = 0; i < ntests; i++) {
13663
13664 /* row or col major */
13665 for (order_val = 0; order_val < 2; order_val++) {
13666 switch (order_val) {
13667 case 0:
13668 order_type = blas_rowmajor;
13669 break;
13670 case 1:
13671 default:
13672 order_type = blas_colmajor;
13673 break;
13674 }
13675
13676 /* no_trans, trans, or conj_trans */
13677 for (trans_val = 0; trans_val < 3; trans_val++) {
13678 switch (trans_val) {
13679 case 0:
13680 trans_type = blas_no_trans;
13681 m_i = m;
13682 n_i = n;
13683 break;
13684 case 1:
13685 trans_type = blas_trans;
13686 m_i = n;
13687 n_i = m;
13688 break;
13689 case 2:
13690 default:
13691 trans_type = blas_conj_trans;
13692 m_i = n;
13693 n_i = m;
13694 break;
13695 }
13696
13697 /* lda=n, n+1, or 2n */
13698 for (lda_val = 0; lda_val < 3; lda_val++) {
13699 switch (lda_val) {
13700 case 0:
13701 lda = m_i;
13702 break;
13703 case 1:
13704 lda = m_i + 1;
13705 break;
13706 case 2:
13707 default:
13708 lda = 2 * m_i;
13709 break;
13710 }
13711 if ((order_type == blas_rowmajor && lda < n) ||
13712 (order_type == blas_colmajor && lda < m))
13713 continue;
13714
13715 /* For the sake of speed, we throw out this case at random */
13716 if (xrand(seed) >= test_prob)
13717 continue;
13718
13719 /* in the trivial cases, no need to run testgen */
13720 if (m > 0 && n > 0)
13721 BLAS_cgemv2_s_s_testgen(norm, order_type, trans_type, m,
13722 n, &alpha, alpha_flag, A, lda,
13723 head_x_gen, tail_x_gen, &beta,
13724 beta_flag, y_gen, seed,
13725 head_r_true, tail_r_true);
13726
13727 count++;
13728
13729 /* varying incx */
13730 for (incx_val = -2; incx_val <= 2; incx_val++) {
13731 if (incx_val == 0)
13732 continue;
13733
13734 /* setting incx */
13735 incx = incx_val;
13736
13737
13738 scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
13739 scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
13740
13741 /* varying incy */
13742 for (incy_val = -2; incy_val <= 2; incy_val++) {
13743 if (incy_val == 0)
13744 continue;
13745
13746 /* setting incy */
13747 incy = incy_val;
13748 incy *= 2;
13749
13750 ccopy_vector(y_gen, m_i, 1, y, incy_val);
13751
13752 /* call BLAS_cgemv2_s_s_x */
13753 FPU_FIX_STOP;
13754 BLAS_cgemv2_s_s_x(order_type, trans_type, m, n, alpha,
13755 A, lda, head_x, tail_x, incx_val,
13756 beta, y, incy_val, prec);
13757 FPU_FIX_START;
13758
13759 /* set y starting index */
13760 iy = 0;
13761 if (incy < 0)
13762 iy = -(m_i - 1) * incy;
13763
13764 /* computing the ratio */
13765 if (m > 0 && n > 0)
13766 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
13767 /* copy row j of A to temp */
13768 sge_copy_row(order_type, trans_type, m_i, n_i, A,
13769 lda, temp, j);
13770
13771 test_BLAS_cdot2_s_s(n_i, blas_no_conj, alpha,
13772 beta, &y_gen[k], &y[iy],
13773 &head_r_true[k],
13774 &tail_r_true[k], temp, 1,
13775 head_x, tail_x, incx_val,
13776 eps_int, un_int, &ratios[j]);
13777
13778 /* take the max ratio */
13779 if (j == 0) {
13780 ratio = ratios[0];
13781 /* The !<= below causes NaN error to be detected.
13782 Note that (NaN > thresh) is always false. */
13783 } else if (!(ratios[j] <= ratio)) {
13784 ratio = ratios[j];
13785 }
13786 iy += incy;
13787 }
13788
13789 /* Increase the number of bad ratio, if the ratio
13790 is bigger than the threshold.
13791 The !<= below causes NaN error to be detected.
13792 Note that (NaN > thresh) is always false. */
13793 if (!(ratio <= thresh)) {
13794 bad_ratios++;
13795
13796 if ((debug == 3) && /* print only when debug is on */
13797 (count != old_count) && /* print if old vector is different
13798 from the current one */
13799 (d_count == find_max_ratio) &&
13800 (p_count <= max_print) &&
13801 (ratio > 0.5 * ratio_max)) {
13802 old_count = count;
13803
13804 printf
13805 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
13806 fname, m, n, ntests, thresh);
13807
13808 /* Print test info */
13809 switch (prec) {
13810 case blas_prec_single:
13811 printf("single ");
13812 break;
13813 case blas_prec_double:
13814 printf("double ");
13815 break;
13816 case blas_prec_indigenous:
13817 printf("indigenous ");
13818 break;
13819 case blas_prec_extra:
13820 printf("extra ");
13821 break;
13822 }
13823 switch (norm) {
13824 case -1:
13825 printf("near_underflow ");
13826 break;
13827 case 0:
13828 printf("near_one ");
13829 break;
13830 case 1:
13831 printf("near_overflow ");
13832 break;
13833 }
13834 switch (order_type) {
13835 case blas_rowmajor:
13836 printf("row_major ");
13837 break;
13838 case blas_colmajor:
13839 printf("col_major ");
13840 break;
13841 }
13842 switch (trans_type) {
13843 case blas_no_trans:
13844 printf("no_trans ");
13845 break;
13846 case blas_trans:
13847 printf("trans ");
13848 break;
13849 case blas_conj_trans:
13850 printf("conj_trans ");
13851 break;
13852 }
13853
13854 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
13855 incy);
13856
13857 sge_print_matrix(A, m_i, n_i, lda, order_type,
13858 "A");
13859
13860 sprint_vector(head_x, n_i, incx_val, "head_x");
13861 sprint_vector(tail_x, n_i, incx_val, "tail_x");
13862 cprint_vector(y_gen, m_i, 1, "y_gen");
13863 cprint_vector(y, m_i, incy_val, "y_final");
13864
13865 printf(" ");
13866 printf("alpha = ");
13867 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
13868 printf("\n ");
13869 printf("beta = ");
13870 printf("(%16.8e, %16.8e)", beta[0], beta[1]);
13871 printf("\n");
13872 for (j = 0, k = 0; j < m_i * incy_gen;
13873 j += incy_gen, k++) {
13874 printf(" ");
13875 printf
13876 ("([%24.16e %24.16e], [%24.16e %24.16e])",
13877 head_r_true[j], tail_r_true[j],
13878 head_r_true[j + 1], tail_r_true[j + 1]);
13879 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
13880 }
13881
13882 printf(" ratio=%.4e\n", ratio);
13883 p_count++;
13884 }
13885 if (bad_ratios >= MAX_BAD_TESTS) {
13886 printf("\ntoo many failures, exiting....");
13887 printf("\nTesting and compilation");
13888 printf(" are incomplete\n\n");
13889 goto end;
13890 }
13891 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
13892 printf("\nFlagrant ratio error, exiting...");
13893 printf("\nTesting and compilation");
13894 printf(" are incomplete\n\n");
13895 goto end;
13896 }
13897 }
13898 if (d_count == 0) {
13899 if (ratio > ratio_max)
13900 ratio_max = ratio;
13901
13902 if (ratio != 0.0 && ratio < ratio_min)
13903 ratio_min = ratio;
13904
13905 tot_tests++;
13906 }
13907 } /* incy */
13908 } /* incx */
13909 } /* lda */
13910 } /* trans */
13911 } /* order */
13912 } /* tests */
13913 } /* norm */
13914 } /* prec */
13915 } /* beta */
13916 } /* alpha */
13917 } /* debug */
13918
13919 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
13920 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
13921 fname, m, n, ntests, thresh);
13922 printf
13923 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
13924 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
13925 ratio_min, ratio_max);
13926 }
13927
13928 end:
13929 FPU_FIX_STOP;
13930
13931 blas_free(head_x);
13932 blas_free(tail_x);
13933 blas_free(y);
13934 blas_free(head_x_gen);
13935 blas_free(tail_x_gen);
13936 blas_free(y_gen);
13937 blas_free(temp);
13938 blas_free(A);
13939 blas_free(head_r_true);
13940 blas_free(tail_r_true);
13941 blas_free(ratios);
13942
13943 *min_ratio = ratio_min;
13944 *num_bad_ratio = bad_ratios;
13945 *num_tests = tot_tests;
13946 return ratio_max;
13947 }
do_test_zgemv2_z_d_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)13948 double do_test_zgemv2_z_d_x(int m, int n, int ntests, int *seed,
13949 double thresh, int debug, float test_prob,
13950 double *min_ratio, int *num_bad_ratio,
13951 int *num_tests)
13952
13953 /*
13954 * Purpose
13955 * =======
13956 *
13957 * Runs a series of tests on GEMV2.
13958 *
13959 * Arguments
13960 * =========
13961 *
13962 * m (input) int
13963 * The number of rows
13964 *
13965 * n (input) int
13966 * The number of columns
13967 *
13968 * ntests (input) int
13969 * The number of tests to run for each set of attributes.
13970 *
13971 * seed (input/output) int
13972 * The seed for the random number generator used in testgen().
13973 *
13974 * thresh (input) double
13975 * When the ratio returned from test() exceeds the specified
13976 * threshold, the current size, r_true, r_comp, and ratio will be
13977 * printed. (Since ratio is supposed to be O(1), we can set thresh
13978 * to ~10.)
13979 *
13980 * debug (input) int
13981 * If debug=3, print summary
13982 * If debug=2, print summary only if the number of bad ratios > 0
13983 * If debug=1, print complete info if tests fail
13984 * If debug=0, return max ratio
13985 *
13986 * test_prob (input) float
13987 * The specified test will be performed only if the generated
13988 * random exceeds this threshold.
13989 *
13990 * min_ratio (output) double
13991 * The minimum ratio
13992 *
13993 * num_bad_ratio (output) int
13994 * The number of tests fail; they are above the threshold.
13995 *
13996 * num_tests (output) int
13997 * The number of tests is being performed.
13998 *
13999 * Return value
14000 * ============
14001 *
14002 * The maximum ratio if run successfully, otherwise return -1
14003 *
14004 * Code structure
14005 * ==============
14006 *
14007 * debug loop -- if debug is one, the first loop computes the max ratio
14008 * -- and the last(second) loop outputs debugging information,
14009 * -- if the test fail and its ratio > 0.5 * max ratio.
14010 * -- if debug is zero, the loop is executed once
14011 * alpha loop -- varying alpha: 0, 1, or random
14012 * beta loop -- varying beta: 0, 1, or random
14013 * prec loop -- varying internal prec: single, double, or extra
14014 * norm loop -- varying norm: near undeflow, near one, or
14015 * -- near overflow
14016 * numtest loop -- how many times the test is perform with
14017 * -- above set of attributes
14018 * order loop -- varying order type: rowmajor or colmajor
14019 * trans loop -- varying uplo type: upper or lower
14020 * lda loop -- varying lda: m, m+1, 2m
14021 * incx loop -- varying incx: -2, -1, 1, 2
14022 * incy loop -- varying incy: -2, -1, 1, 2
14023 */
14024 {
14025 /* function name */
14026 const char fname[] = "BLAS_zgemv2_z_d_x";
14027
14028 /* max number of debug lines to print */
14029 const int max_print = 8;
14030
14031 /* Variables in the "x_val" form are loop vars for corresponding
14032 variables */
14033 int i; /* iterate through the repeating tests */
14034 int j, k; /* multipurpose counters or variables */
14035 int iy; /* use to index y */
14036 int incx_val, incy_val, /* for testing different inc values */
14037 incx, incy;
14038 int incy_gen; /* for complex case inc=2, for real case inc=1 */
14039 int d_count; /* counter for debug */
14040 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
14041 int p_count; /* counter for the number of debug lines printed */
14042 int tot_tests; /* total number of tests to be done */
14043 int norm; /* input values of near underflow/one/overflow */
14044 double ratio_max; /* the current maximum ratio */
14045 double ratio_min; /* the current minimum ratio */
14046 double *ratios; /* a temporary variable for calculating ratio */
14047 double ratio; /* the per-use test ratio from test() */
14048 int bad_ratios; /* the number of ratios over the threshold */
14049 double eps_int; /* the internal epsilon expected--2^(-24) for float */
14050 double un_int; /* the internal underflow threshold */
14051 double alpha[2];
14052 double beta[2];
14053 double *A;
14054 double *head_x;
14055 double *tail_x;
14056 double *y;
14057 double *temp; /* use for calculating ratio */
14058
14059 /* x_gen and y_gen are used to store vectors generated by testgen.
14060 they eventually are copied back to x and y */
14061 double *head_x_gen;
14062 double *tail_x_gen;
14063 double *y_gen;
14064
14065 /* the true r calculated by testgen(), in double-double */
14066 double *head_r_true, *tail_r_true;
14067
14068 int alpha_val;
14069 int alpha_flag; /* input flag for BLAS_zgemv2_z_d_testgen */
14070 int beta_val;
14071 int beta_flag; /* input flag for BLAS_zgemv2_z_d_testgen */
14072 int order_val;
14073 enum blas_order_type order_type;
14074 int prec_val;
14075 enum blas_prec_type prec;
14076 int trans_val;
14077 enum blas_trans_type trans_type;
14078 int m_i;
14079 int n_i;
14080 int max_mn; /* the max of m and n */
14081 int lda_val;
14082 int lda;
14083 int saved_seed; /* for saving the original seed */
14084 int count, old_count; /* use for counting the number of testgen calls * 2 */
14085
14086 FPU_FIX_DECL;
14087
14088 /* test for bad arguments */
14089 if (n < 0 || m < 0 || ntests < 0)
14090 BLAS_error(fname, 0, 0, NULL);
14091
14092 /* initialization */
14093 *num_bad_ratio = 0;
14094 *num_tests = 0;
14095 *min_ratio = 0.0;
14096
14097 saved_seed = *seed;
14098 ratio_min = 1e308;
14099 ratio_max = 0.0;
14100 ratio = 0.0;
14101 tot_tests = 0;
14102 p_count = 0;
14103 count = 0;
14104 find_max_ratio = 0;
14105 bad_ratios = 0;
14106 old_count = 0;
14107
14108 if (debug == 3)
14109 find_max_ratio = 1;
14110 max_mn = MAX(m, n);
14111 if (m == 0 || n == 0) {
14112 return 0.0;
14113 }
14114
14115 FPU_FIX_START;
14116
14117 incy_gen = 1;
14118 incy_gen *= 2;
14119
14120 /* get space for calculation */
14121 head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
14122 if (max_mn * 2 > 0 && head_x == NULL) {
14123 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14124 }
14125 tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
14126 if (max_mn * 2 > 0 && tail_x == NULL) {
14127 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14128 }
14129 y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
14130 if (max_mn * 2 > 0 && y == NULL) {
14131 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14132 }
14133 head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
14134 if (max_mn > 0 && head_x_gen == NULL) {
14135 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14136 }
14137 tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
14138 if (max_mn > 0 && tail_x_gen == NULL) {
14139 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14140 }
14141 y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
14142 if (max_mn > 0 && y_gen == NULL) {
14143 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14144 }
14145 temp = (double *) blas_malloc(max_mn * sizeof(double) * 2);
14146 if (max_mn > 0 && temp == NULL) {
14147 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14148 }
14149 head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
14150 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
14151 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
14152 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14153 }
14154 ratios = (double *) blas_malloc(max_mn * sizeof(double));
14155 if (max_mn > 0 && ratios == NULL) {
14156 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14157 }
14158 A =
14159 (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double) *
14160 2);
14161 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
14162 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14163 }
14164
14165 /* The debug iteration:
14166 If debug=1, then will execute the iteration twice. First, compute the
14167 max ratio. Second, print info if ratio > (50% * ratio_max). */
14168 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
14169 bad_ratios = 0; /* set to zero */
14170
14171 if ((debug == 3) && (d_count == find_max_ratio))
14172 *seed = saved_seed; /* restore the original seed */
14173
14174 /* varying alpha */
14175 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
14176 alpha_flag = 0;
14177 switch (alpha_val) {
14178 case 0:
14179 alpha[0] = alpha[1] = 0.0;
14180 alpha_flag = 1;
14181 break;
14182 case 1:
14183 alpha[0] = 1.0;
14184 alpha[1] = 0.0;
14185 alpha_flag = 1;
14186 break;
14187 }
14188
14189 /* varying beta */
14190 for (beta_val = 0; beta_val < 3; beta_val++) {
14191 beta_flag = 0;
14192 switch (beta_val) {
14193 case 0:
14194 beta[0] = beta[1] = 0.0;
14195 beta_flag = 1;
14196 break;
14197 case 1:
14198 beta[0] = 1.0;
14199 beta[1] = 0.0;
14200 beta_flag = 1;
14201 break;
14202 }
14203
14204
14205 /* varying extra precs */
14206 for (prec_val = 0; prec_val <= 2; prec_val++) {
14207 switch (prec_val) {
14208 case 0:
14209 eps_int = power(2, -BITS_D);
14210 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
14211 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
14212 prec = blas_prec_double;
14213 break;
14214 case 1:
14215 eps_int = power(2, -BITS_D);
14216 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
14217 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
14218 prec = blas_prec_double;
14219 break;
14220 case 2:
14221 default:
14222 eps_int = power(2, -BITS_E);
14223 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
14224 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
14225 prec = blas_prec_extra;
14226 break;
14227 }
14228
14229 /* values near underflow, 1, or overflow */
14230 for (norm = -1; norm <= 1; norm++) {
14231
14232 /* number of tests */
14233 for (i = 0; i < ntests; i++) {
14234
14235 /* row or col major */
14236 for (order_val = 0; order_val < 2; order_val++) {
14237 switch (order_val) {
14238 case 0:
14239 order_type = blas_rowmajor;
14240 break;
14241 case 1:
14242 default:
14243 order_type = blas_colmajor;
14244 break;
14245 }
14246
14247 /* no_trans, trans, or conj_trans */
14248 for (trans_val = 0; trans_val < 3; trans_val++) {
14249 switch (trans_val) {
14250 case 0:
14251 trans_type = blas_no_trans;
14252 m_i = m;
14253 n_i = n;
14254 break;
14255 case 1:
14256 trans_type = blas_trans;
14257 m_i = n;
14258 n_i = m;
14259 break;
14260 case 2:
14261 default:
14262 trans_type = blas_conj_trans;
14263 m_i = n;
14264 n_i = m;
14265 break;
14266 }
14267
14268 /* lda=n, n+1, or 2n */
14269 for (lda_val = 0; lda_val < 3; lda_val++) {
14270 switch (lda_val) {
14271 case 0:
14272 lda = m_i;
14273 break;
14274 case 1:
14275 lda = m_i + 1;
14276 break;
14277 case 2:
14278 default:
14279 lda = 2 * m_i;
14280 break;
14281 }
14282 if ((order_type == blas_rowmajor && lda < n) ||
14283 (order_type == blas_colmajor && lda < m))
14284 continue;
14285
14286 /* For the sake of speed, we throw out this case at random */
14287 if (xrand(seed) >= test_prob)
14288 continue;
14289
14290 /* in the trivial cases, no need to run testgen */
14291 if (m > 0 && n > 0)
14292 BLAS_zgemv2_z_d_testgen(norm, order_type, trans_type, m,
14293 n, &alpha, alpha_flag, A, lda,
14294 head_x_gen, tail_x_gen, &beta,
14295 beta_flag, y_gen, seed,
14296 head_r_true, tail_r_true);
14297
14298 count++;
14299
14300 /* varying incx */
14301 for (incx_val = -2; incx_val <= 2; incx_val++) {
14302 if (incx_val == 0)
14303 continue;
14304
14305 /* setting incx */
14306 incx = incx_val;
14307
14308
14309 dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
14310 dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
14311
14312 /* varying incy */
14313 for (incy_val = -2; incy_val <= 2; incy_val++) {
14314 if (incy_val == 0)
14315 continue;
14316
14317 /* setting incy */
14318 incy = incy_val;
14319 incy *= 2;
14320
14321 zcopy_vector(y_gen, m_i, 1, y, incy_val);
14322
14323 /* call BLAS_zgemv2_z_d_x */
14324 FPU_FIX_STOP;
14325 BLAS_zgemv2_z_d_x(order_type, trans_type, m, n, alpha,
14326 A, lda, head_x, tail_x, incx_val,
14327 beta, y, incy_val, prec);
14328 FPU_FIX_START;
14329
14330 /* set y starting index */
14331 iy = 0;
14332 if (incy < 0)
14333 iy = -(m_i - 1) * incy;
14334
14335 /* computing the ratio */
14336 if (m > 0 && n > 0)
14337 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
14338 /* copy row j of A to temp */
14339 zge_copy_row(order_type, trans_type, m_i, n_i, A,
14340 lda, temp, j);
14341
14342 test_BLAS_zdot2_z_d(n_i, blas_no_conj, alpha,
14343 beta, &y_gen[k], &y[iy],
14344 &head_r_true[k],
14345 &tail_r_true[k], temp, 1,
14346 head_x, tail_x, incx_val,
14347 eps_int, un_int, &ratios[j]);
14348
14349 /* take the max ratio */
14350 if (j == 0) {
14351 ratio = ratios[0];
14352 /* The !<= below causes NaN error to be detected.
14353 Note that (NaN > thresh) is always false. */
14354 } else if (!(ratios[j] <= ratio)) {
14355 ratio = ratios[j];
14356 }
14357 iy += incy;
14358 }
14359
14360 /* Increase the number of bad ratio, if the ratio
14361 is bigger than the threshold.
14362 The !<= below causes NaN error to be detected.
14363 Note that (NaN > thresh) is always false. */
14364 if (!(ratio <= thresh)) {
14365 bad_ratios++;
14366
14367 if ((debug == 3) && /* print only when debug is on */
14368 (count != old_count) && /* print if old vector is different
14369 from the current one */
14370 (d_count == find_max_ratio) &&
14371 (p_count <= max_print) &&
14372 (ratio > 0.5 * ratio_max)) {
14373 old_count = count;
14374
14375 printf
14376 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
14377 fname, m, n, ntests, thresh);
14378
14379 /* Print test info */
14380 switch (prec) {
14381 case blas_prec_single:
14382 printf("single ");
14383 break;
14384 case blas_prec_double:
14385 printf("double ");
14386 break;
14387 case blas_prec_indigenous:
14388 printf("indigenous ");
14389 break;
14390 case blas_prec_extra:
14391 printf("extra ");
14392 break;
14393 }
14394 switch (norm) {
14395 case -1:
14396 printf("near_underflow ");
14397 break;
14398 case 0:
14399 printf("near_one ");
14400 break;
14401 case 1:
14402 printf("near_overflow ");
14403 break;
14404 }
14405 switch (order_type) {
14406 case blas_rowmajor:
14407 printf("row_major ");
14408 break;
14409 case blas_colmajor:
14410 printf("col_major ");
14411 break;
14412 }
14413 switch (trans_type) {
14414 case blas_no_trans:
14415 printf("no_trans ");
14416 break;
14417 case blas_trans:
14418 printf("trans ");
14419 break;
14420 case blas_conj_trans:
14421 printf("conj_trans ");
14422 break;
14423 }
14424
14425 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
14426 incy);
14427
14428 zge_print_matrix(A, m_i, n_i, lda, order_type,
14429 "A");
14430
14431 dprint_vector(head_x, n_i, incx_val, "head_x");
14432 dprint_vector(tail_x, n_i, incx_val, "tail_x");
14433 zprint_vector(y_gen, m_i, 1, "y_gen");
14434 zprint_vector(y, m_i, incy_val, "y_final");
14435
14436 printf(" ");
14437 printf("alpha = ");
14438 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
14439 printf("\n ");
14440 printf("beta = ");
14441 printf("(%24.16e, %24.16e)", beta[0], beta[1]);
14442 printf("\n");
14443 for (j = 0, k = 0; j < m_i * incy_gen;
14444 j += incy_gen, k++) {
14445 printf(" ");
14446 printf
14447 ("([%24.16e %24.16e], [%24.16e %24.16e])",
14448 head_r_true[j], tail_r_true[j],
14449 head_r_true[j + 1], tail_r_true[j + 1]);
14450 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
14451 }
14452
14453 printf(" ratio=%.4e\n", ratio);
14454 p_count++;
14455 }
14456 if (bad_ratios >= MAX_BAD_TESTS) {
14457 printf("\ntoo many failures, exiting....");
14458 printf("\nTesting and compilation");
14459 printf(" are incomplete\n\n");
14460 goto end;
14461 }
14462 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
14463 printf("\nFlagrant ratio error, exiting...");
14464 printf("\nTesting and compilation");
14465 printf(" are incomplete\n\n");
14466 goto end;
14467 }
14468 }
14469 if (d_count == 0) {
14470 if (ratio > ratio_max)
14471 ratio_max = ratio;
14472
14473 if (ratio != 0.0 && ratio < ratio_min)
14474 ratio_min = ratio;
14475
14476 tot_tests++;
14477 }
14478 } /* incy */
14479 } /* incx */
14480 } /* lda */
14481 } /* trans */
14482 } /* order */
14483 } /* tests */
14484 } /* norm */
14485 } /* prec */
14486 } /* beta */
14487 } /* alpha */
14488 } /* debug */
14489
14490 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
14491 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
14492 fname, m, n, ntests, thresh);
14493 printf
14494 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
14495 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
14496 ratio_min, ratio_max);
14497 }
14498
14499 end:
14500 FPU_FIX_STOP;
14501
14502 blas_free(head_x);
14503 blas_free(tail_x);
14504 blas_free(y);
14505 blas_free(head_x_gen);
14506 blas_free(tail_x_gen);
14507 blas_free(y_gen);
14508 blas_free(temp);
14509 blas_free(A);
14510 blas_free(head_r_true);
14511 blas_free(tail_r_true);
14512 blas_free(ratios);
14513
14514 *min_ratio = ratio_min;
14515 *num_bad_ratio = bad_ratios;
14516 *num_tests = tot_tests;
14517 return ratio_max;
14518 }
do_test_zgemv2_d_z_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)14519 double do_test_zgemv2_d_z_x(int m, int n, int ntests, int *seed,
14520 double thresh, int debug, float test_prob,
14521 double *min_ratio, int *num_bad_ratio,
14522 int *num_tests)
14523
14524 /*
14525 * Purpose
14526 * =======
14527 *
14528 * Runs a series of tests on GEMV2.
14529 *
14530 * Arguments
14531 * =========
14532 *
14533 * m (input) int
14534 * The number of rows
14535 *
14536 * n (input) int
14537 * The number of columns
14538 *
14539 * ntests (input) int
14540 * The number of tests to run for each set of attributes.
14541 *
14542 * seed (input/output) int
14543 * The seed for the random number generator used in testgen().
14544 *
14545 * thresh (input) double
14546 * When the ratio returned from test() exceeds the specified
14547 * threshold, the current size, r_true, r_comp, and ratio will be
14548 * printed. (Since ratio is supposed to be O(1), we can set thresh
14549 * to ~10.)
14550 *
14551 * debug (input) int
14552 * If debug=3, print summary
14553 * If debug=2, print summary only if the number of bad ratios > 0
14554 * If debug=1, print complete info if tests fail
14555 * If debug=0, return max ratio
14556 *
14557 * test_prob (input) float
14558 * The specified test will be performed only if the generated
14559 * random exceeds this threshold.
14560 *
14561 * min_ratio (output) double
14562 * The minimum ratio
14563 *
14564 * num_bad_ratio (output) int
14565 * The number of tests fail; they are above the threshold.
14566 *
14567 * num_tests (output) int
14568 * The number of tests is being performed.
14569 *
14570 * Return value
14571 * ============
14572 *
14573 * The maximum ratio if run successfully, otherwise return -1
14574 *
14575 * Code structure
14576 * ==============
14577 *
14578 * debug loop -- if debug is one, the first loop computes the max ratio
14579 * -- and the last(second) loop outputs debugging information,
14580 * -- if the test fail and its ratio > 0.5 * max ratio.
14581 * -- if debug is zero, the loop is executed once
14582 * alpha loop -- varying alpha: 0, 1, or random
14583 * beta loop -- varying beta: 0, 1, or random
14584 * prec loop -- varying internal prec: single, double, or extra
14585 * norm loop -- varying norm: near undeflow, near one, or
14586 * -- near overflow
14587 * numtest loop -- how many times the test is perform with
14588 * -- above set of attributes
14589 * order loop -- varying order type: rowmajor or colmajor
14590 * trans loop -- varying uplo type: upper or lower
14591 * lda loop -- varying lda: m, m+1, 2m
14592 * incx loop -- varying incx: -2, -1, 1, 2
14593 * incy loop -- varying incy: -2, -1, 1, 2
14594 */
14595 {
14596 /* function name */
14597 const char fname[] = "BLAS_zgemv2_d_z_x";
14598
14599 /* max number of debug lines to print */
14600 const int max_print = 8;
14601
14602 /* Variables in the "x_val" form are loop vars for corresponding
14603 variables */
14604 int i; /* iterate through the repeating tests */
14605 int j, k; /* multipurpose counters or variables */
14606 int iy; /* use to index y */
14607 int incx_val, incy_val, /* for testing different inc values */
14608 incx, incy;
14609 int incy_gen; /* for complex case inc=2, for real case inc=1 */
14610 int d_count; /* counter for debug */
14611 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
14612 int p_count; /* counter for the number of debug lines printed */
14613 int tot_tests; /* total number of tests to be done */
14614 int norm; /* input values of near underflow/one/overflow */
14615 double ratio_max; /* the current maximum ratio */
14616 double ratio_min; /* the current minimum ratio */
14617 double *ratios; /* a temporary variable for calculating ratio */
14618 double ratio; /* the per-use test ratio from test() */
14619 int bad_ratios; /* the number of ratios over the threshold */
14620 double eps_int; /* the internal epsilon expected--2^(-24) for float */
14621 double un_int; /* the internal underflow threshold */
14622 double alpha[2];
14623 double beta[2];
14624 double *A;
14625 double *head_x;
14626 double *tail_x;
14627 double *y;
14628 double *temp; /* use for calculating ratio */
14629
14630 /* x_gen and y_gen are used to store vectors generated by testgen.
14631 they eventually are copied back to x and y */
14632 double *head_x_gen;
14633 double *tail_x_gen;
14634 double *y_gen;
14635
14636 /* the true r calculated by testgen(), in double-double */
14637 double *head_r_true, *tail_r_true;
14638
14639 int alpha_val;
14640 int alpha_flag; /* input flag for BLAS_zgemv2_d_z_testgen */
14641 int beta_val;
14642 int beta_flag; /* input flag for BLAS_zgemv2_d_z_testgen */
14643 int order_val;
14644 enum blas_order_type order_type;
14645 int prec_val;
14646 enum blas_prec_type prec;
14647 int trans_val;
14648 enum blas_trans_type trans_type;
14649 int m_i;
14650 int n_i;
14651 int max_mn; /* the max of m and n */
14652 int lda_val;
14653 int lda;
14654 int saved_seed; /* for saving the original seed */
14655 int count, old_count; /* use for counting the number of testgen calls * 2 */
14656
14657 FPU_FIX_DECL;
14658
14659 /* test for bad arguments */
14660 if (n < 0 || m < 0 || ntests < 0)
14661 BLAS_error(fname, 0, 0, NULL);
14662
14663 /* initialization */
14664 *num_bad_ratio = 0;
14665 *num_tests = 0;
14666 *min_ratio = 0.0;
14667
14668 saved_seed = *seed;
14669 ratio_min = 1e308;
14670 ratio_max = 0.0;
14671 ratio = 0.0;
14672 tot_tests = 0;
14673 p_count = 0;
14674 count = 0;
14675 find_max_ratio = 0;
14676 bad_ratios = 0;
14677 old_count = 0;
14678
14679 if (debug == 3)
14680 find_max_ratio = 1;
14681 max_mn = MAX(m, n);
14682 if (m == 0 || n == 0) {
14683 return 0.0;
14684 }
14685
14686 FPU_FIX_START;
14687
14688 incy_gen = 1;
14689 incy_gen *= 2;
14690
14691 /* get space for calculation */
14692 head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
14693 if (max_mn * 2 > 0 && head_x == NULL) {
14694 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14695 }
14696 tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
14697 if (max_mn * 2 > 0 && tail_x == NULL) {
14698 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14699 }
14700 y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
14701 if (max_mn * 2 > 0 && y == NULL) {
14702 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14703 }
14704 head_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
14705 if (max_mn > 0 && head_x_gen == NULL) {
14706 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14707 }
14708 tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
14709 if (max_mn > 0 && tail_x_gen == NULL) {
14710 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14711 }
14712 y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
14713 if (max_mn > 0 && y_gen == NULL) {
14714 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14715 }
14716 temp = (double *) blas_malloc(max_mn * sizeof(double));
14717 if (max_mn > 0 && temp == NULL) {
14718 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14719 }
14720 head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
14721 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
14722 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
14723 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14724 }
14725 ratios = (double *) blas_malloc(max_mn * sizeof(double));
14726 if (max_mn > 0 && ratios == NULL) {
14727 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14728 }
14729 A =
14730 (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
14731 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
14732 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14733 }
14734
14735 /* The debug iteration:
14736 If debug=1, then will execute the iteration twice. First, compute the
14737 max ratio. Second, print info if ratio > (50% * ratio_max). */
14738 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
14739 bad_ratios = 0; /* set to zero */
14740
14741 if ((debug == 3) && (d_count == find_max_ratio))
14742 *seed = saved_seed; /* restore the original seed */
14743
14744 /* varying alpha */
14745 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
14746 alpha_flag = 0;
14747 switch (alpha_val) {
14748 case 0:
14749 alpha[0] = alpha[1] = 0.0;
14750 alpha_flag = 1;
14751 break;
14752 case 1:
14753 alpha[0] = 1.0;
14754 alpha[1] = 0.0;
14755 alpha_flag = 1;
14756 break;
14757 }
14758
14759 /* varying beta */
14760 for (beta_val = 0; beta_val < 3; beta_val++) {
14761 beta_flag = 0;
14762 switch (beta_val) {
14763 case 0:
14764 beta[0] = beta[1] = 0.0;
14765 beta_flag = 1;
14766 break;
14767 case 1:
14768 beta[0] = 1.0;
14769 beta[1] = 0.0;
14770 beta_flag = 1;
14771 break;
14772 }
14773
14774
14775 /* varying extra precs */
14776 for (prec_val = 0; prec_val <= 2; prec_val++) {
14777 switch (prec_val) {
14778 case 0:
14779 eps_int = power(2, -BITS_D);
14780 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
14781 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
14782 prec = blas_prec_double;
14783 break;
14784 case 1:
14785 eps_int = power(2, -BITS_D);
14786 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
14787 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
14788 prec = blas_prec_double;
14789 break;
14790 case 2:
14791 default:
14792 eps_int = power(2, -BITS_E);
14793 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
14794 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
14795 prec = blas_prec_extra;
14796 break;
14797 }
14798
14799 /* values near underflow, 1, or overflow */
14800 for (norm = -1; norm <= 1; norm++) {
14801
14802 /* number of tests */
14803 for (i = 0; i < ntests; i++) {
14804
14805 /* row or col major */
14806 for (order_val = 0; order_val < 2; order_val++) {
14807 switch (order_val) {
14808 case 0:
14809 order_type = blas_rowmajor;
14810 break;
14811 case 1:
14812 default:
14813 order_type = blas_colmajor;
14814 break;
14815 }
14816
14817 /* no_trans, trans, or conj_trans */
14818 for (trans_val = 0; trans_val < 3; trans_val++) {
14819 switch (trans_val) {
14820 case 0:
14821 trans_type = blas_no_trans;
14822 m_i = m;
14823 n_i = n;
14824 break;
14825 case 1:
14826 trans_type = blas_trans;
14827 m_i = n;
14828 n_i = m;
14829 break;
14830 case 2:
14831 default:
14832 trans_type = blas_conj_trans;
14833 m_i = n;
14834 n_i = m;
14835 break;
14836 }
14837
14838 /* lda=n, n+1, or 2n */
14839 for (lda_val = 0; lda_val < 3; lda_val++) {
14840 switch (lda_val) {
14841 case 0:
14842 lda = m_i;
14843 break;
14844 case 1:
14845 lda = m_i + 1;
14846 break;
14847 case 2:
14848 default:
14849 lda = 2 * m_i;
14850 break;
14851 }
14852 if ((order_type == blas_rowmajor && lda < n) ||
14853 (order_type == blas_colmajor && lda < m))
14854 continue;
14855
14856 /* For the sake of speed, we throw out this case at random */
14857 if (xrand(seed) >= test_prob)
14858 continue;
14859
14860 /* in the trivial cases, no need to run testgen */
14861 if (m > 0 && n > 0)
14862 BLAS_zgemv2_d_z_testgen(norm, order_type, trans_type, m,
14863 n, &alpha, alpha_flag, A, lda,
14864 head_x_gen, tail_x_gen, &beta,
14865 beta_flag, y_gen, seed,
14866 head_r_true, tail_r_true);
14867
14868 count++;
14869
14870 /* varying incx */
14871 for (incx_val = -2; incx_val <= 2; incx_val++) {
14872 if (incx_val == 0)
14873 continue;
14874
14875 /* setting incx */
14876 incx = incx_val;
14877 incx *= 2;
14878
14879 zcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
14880 zcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
14881
14882 /* varying incy */
14883 for (incy_val = -2; incy_val <= 2; incy_val++) {
14884 if (incy_val == 0)
14885 continue;
14886
14887 /* setting incy */
14888 incy = incy_val;
14889 incy *= 2;
14890
14891 zcopy_vector(y_gen, m_i, 1, y, incy_val);
14892
14893 /* call BLAS_zgemv2_d_z_x */
14894 FPU_FIX_STOP;
14895 BLAS_zgemv2_d_z_x(order_type, trans_type, m, n, alpha,
14896 A, lda, head_x, tail_x, incx_val,
14897 beta, y, incy_val, prec);
14898 FPU_FIX_START;
14899
14900 /* set y starting index */
14901 iy = 0;
14902 if (incy < 0)
14903 iy = -(m_i - 1) * incy;
14904
14905 /* computing the ratio */
14906 if (m > 0 && n > 0)
14907 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
14908 /* copy row j of A to temp */
14909 dge_copy_row(order_type, trans_type, m_i, n_i, A,
14910 lda, temp, j);
14911
14912 test_BLAS_zdot2_d_z(n_i, blas_no_conj, alpha,
14913 beta, &y_gen[k], &y[iy],
14914 &head_r_true[k],
14915 &tail_r_true[k], temp, 1,
14916 head_x, tail_x, incx_val,
14917 eps_int, un_int, &ratios[j]);
14918
14919 /* take the max ratio */
14920 if (j == 0) {
14921 ratio = ratios[0];
14922 /* The !<= below causes NaN error to be detected.
14923 Note that (NaN > thresh) is always false. */
14924 } else if (!(ratios[j] <= ratio)) {
14925 ratio = ratios[j];
14926 }
14927 iy += incy;
14928 }
14929
14930 /* Increase the number of bad ratio, if the ratio
14931 is bigger than the threshold.
14932 The !<= below causes NaN error to be detected.
14933 Note that (NaN > thresh) is always false. */
14934 if (!(ratio <= thresh)) {
14935 bad_ratios++;
14936
14937 if ((debug == 3) && /* print only when debug is on */
14938 (count != old_count) && /* print if old vector is different
14939 from the current one */
14940 (d_count == find_max_ratio) &&
14941 (p_count <= max_print) &&
14942 (ratio > 0.5 * ratio_max)) {
14943 old_count = count;
14944
14945 printf
14946 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
14947 fname, m, n, ntests, thresh);
14948
14949 /* Print test info */
14950 switch (prec) {
14951 case blas_prec_single:
14952 printf("single ");
14953 break;
14954 case blas_prec_double:
14955 printf("double ");
14956 break;
14957 case blas_prec_indigenous:
14958 printf("indigenous ");
14959 break;
14960 case blas_prec_extra:
14961 printf("extra ");
14962 break;
14963 }
14964 switch (norm) {
14965 case -1:
14966 printf("near_underflow ");
14967 break;
14968 case 0:
14969 printf("near_one ");
14970 break;
14971 case 1:
14972 printf("near_overflow ");
14973 break;
14974 }
14975 switch (order_type) {
14976 case blas_rowmajor:
14977 printf("row_major ");
14978 break;
14979 case blas_colmajor:
14980 printf("col_major ");
14981 break;
14982 }
14983 switch (trans_type) {
14984 case blas_no_trans:
14985 printf("no_trans ");
14986 break;
14987 case blas_trans:
14988 printf("trans ");
14989 break;
14990 case blas_conj_trans:
14991 printf("conj_trans ");
14992 break;
14993 }
14994
14995 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
14996 incy);
14997
14998 dge_print_matrix(A, m_i, n_i, lda, order_type,
14999 "A");
15000
15001 zprint_vector(head_x, n_i, incx_val, "head_x");
15002 zprint_vector(tail_x, n_i, incx_val, "tail_x");
15003 zprint_vector(y_gen, m_i, 1, "y_gen");
15004 zprint_vector(y, m_i, incy_val, "y_final");
15005
15006 printf(" ");
15007 printf("alpha = ");
15008 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
15009 printf("\n ");
15010 printf("beta = ");
15011 printf("(%24.16e, %24.16e)", beta[0], beta[1]);
15012 printf("\n");
15013 for (j = 0, k = 0; j < m_i * incy_gen;
15014 j += incy_gen, k++) {
15015 printf(" ");
15016 printf
15017 ("([%24.16e %24.16e], [%24.16e %24.16e])",
15018 head_r_true[j], tail_r_true[j],
15019 head_r_true[j + 1], tail_r_true[j + 1]);
15020 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
15021 }
15022
15023 printf(" ratio=%.4e\n", ratio);
15024 p_count++;
15025 }
15026 if (bad_ratios >= MAX_BAD_TESTS) {
15027 printf("\ntoo many failures, exiting....");
15028 printf("\nTesting and compilation");
15029 printf(" are incomplete\n\n");
15030 goto end;
15031 }
15032 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
15033 printf("\nFlagrant ratio error, exiting...");
15034 printf("\nTesting and compilation");
15035 printf(" are incomplete\n\n");
15036 goto end;
15037 }
15038 }
15039 if (d_count == 0) {
15040 if (ratio > ratio_max)
15041 ratio_max = ratio;
15042
15043 if (ratio != 0.0 && ratio < ratio_min)
15044 ratio_min = ratio;
15045
15046 tot_tests++;
15047 }
15048 } /* incy */
15049 } /* incx */
15050 } /* lda */
15051 } /* trans */
15052 } /* order */
15053 } /* tests */
15054 } /* norm */
15055 } /* prec */
15056 } /* beta */
15057 } /* alpha */
15058 } /* debug */
15059
15060 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
15061 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
15062 fname, m, n, ntests, thresh);
15063 printf
15064 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
15065 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
15066 ratio_min, ratio_max);
15067 }
15068
15069 end:
15070 FPU_FIX_STOP;
15071
15072 blas_free(head_x);
15073 blas_free(tail_x);
15074 blas_free(y);
15075 blas_free(head_x_gen);
15076 blas_free(tail_x_gen);
15077 blas_free(y_gen);
15078 blas_free(temp);
15079 blas_free(A);
15080 blas_free(head_r_true);
15081 blas_free(tail_r_true);
15082 blas_free(ratios);
15083
15084 *min_ratio = ratio_min;
15085 *num_bad_ratio = bad_ratios;
15086 *num_tests = tot_tests;
15087 return ratio_max;
15088 }
do_test_zgemv2_d_d_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)15089 double do_test_zgemv2_d_d_x(int m, int n, int ntests, int *seed,
15090 double thresh, int debug, float test_prob,
15091 double *min_ratio, int *num_bad_ratio,
15092 int *num_tests)
15093
15094 /*
15095 * Purpose
15096 * =======
15097 *
15098 * Runs a series of tests on GEMV2.
15099 *
15100 * Arguments
15101 * =========
15102 *
15103 * m (input) int
15104 * The number of rows
15105 *
15106 * n (input) int
15107 * The number of columns
15108 *
15109 * ntests (input) int
15110 * The number of tests to run for each set of attributes.
15111 *
15112 * seed (input/output) int
15113 * The seed for the random number generator used in testgen().
15114 *
15115 * thresh (input) double
15116 * When the ratio returned from test() exceeds the specified
15117 * threshold, the current size, r_true, r_comp, and ratio will be
15118 * printed. (Since ratio is supposed to be O(1), we can set thresh
15119 * to ~10.)
15120 *
15121 * debug (input) int
15122 * If debug=3, print summary
15123 * If debug=2, print summary only if the number of bad ratios > 0
15124 * If debug=1, print complete info if tests fail
15125 * If debug=0, return max ratio
15126 *
15127 * test_prob (input) float
15128 * The specified test will be performed only if the generated
15129 * random exceeds this threshold.
15130 *
15131 * min_ratio (output) double
15132 * The minimum ratio
15133 *
15134 * num_bad_ratio (output) int
15135 * The number of tests fail; they are above the threshold.
15136 *
15137 * num_tests (output) int
15138 * The number of tests is being performed.
15139 *
15140 * Return value
15141 * ============
15142 *
15143 * The maximum ratio if run successfully, otherwise return -1
15144 *
15145 * Code structure
15146 * ==============
15147 *
15148 * debug loop -- if debug is one, the first loop computes the max ratio
15149 * -- and the last(second) loop outputs debugging information,
15150 * -- if the test fail and its ratio > 0.5 * max ratio.
15151 * -- if debug is zero, the loop is executed once
15152 * alpha loop -- varying alpha: 0, 1, or random
15153 * beta loop -- varying beta: 0, 1, or random
15154 * prec loop -- varying internal prec: single, double, or extra
15155 * norm loop -- varying norm: near undeflow, near one, or
15156 * -- near overflow
15157 * numtest loop -- how many times the test is perform with
15158 * -- above set of attributes
15159 * order loop -- varying order type: rowmajor or colmajor
15160 * trans loop -- varying uplo type: upper or lower
15161 * lda loop -- varying lda: m, m+1, 2m
15162 * incx loop -- varying incx: -2, -1, 1, 2
15163 * incy loop -- varying incy: -2, -1, 1, 2
15164 */
15165 {
15166 /* function name */
15167 const char fname[] = "BLAS_zgemv2_d_d_x";
15168
15169 /* max number of debug lines to print */
15170 const int max_print = 8;
15171
15172 /* Variables in the "x_val" form are loop vars for corresponding
15173 variables */
15174 int i; /* iterate through the repeating tests */
15175 int j, k; /* multipurpose counters or variables */
15176 int iy; /* use to index y */
15177 int incx_val, incy_val, /* for testing different inc values */
15178 incx, incy;
15179 int incy_gen; /* for complex case inc=2, for real case inc=1 */
15180 int d_count; /* counter for debug */
15181 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
15182 int p_count; /* counter for the number of debug lines printed */
15183 int tot_tests; /* total number of tests to be done */
15184 int norm; /* input values of near underflow/one/overflow */
15185 double ratio_max; /* the current maximum ratio */
15186 double ratio_min; /* the current minimum ratio */
15187 double *ratios; /* a temporary variable for calculating ratio */
15188 double ratio; /* the per-use test ratio from test() */
15189 int bad_ratios; /* the number of ratios over the threshold */
15190 double eps_int; /* the internal epsilon expected--2^(-24) for float */
15191 double un_int; /* the internal underflow threshold */
15192 double alpha[2];
15193 double beta[2];
15194 double *A;
15195 double *head_x;
15196 double *tail_x;
15197 double *y;
15198 double *temp; /* use for calculating ratio */
15199
15200 /* x_gen and y_gen are used to store vectors generated by testgen.
15201 they eventually are copied back to x and y */
15202 double *head_x_gen;
15203 double *tail_x_gen;
15204 double *y_gen;
15205
15206 /* the true r calculated by testgen(), in double-double */
15207 double *head_r_true, *tail_r_true;
15208
15209 int alpha_val;
15210 int alpha_flag; /* input flag for BLAS_zgemv2_d_d_testgen */
15211 int beta_val;
15212 int beta_flag; /* input flag for BLAS_zgemv2_d_d_testgen */
15213 int order_val;
15214 enum blas_order_type order_type;
15215 int prec_val;
15216 enum blas_prec_type prec;
15217 int trans_val;
15218 enum blas_trans_type trans_type;
15219 int m_i;
15220 int n_i;
15221 int max_mn; /* the max of m and n */
15222 int lda_val;
15223 int lda;
15224 int saved_seed; /* for saving the original seed */
15225 int count, old_count; /* use for counting the number of testgen calls * 2 */
15226
15227 FPU_FIX_DECL;
15228
15229 /* test for bad arguments */
15230 if (n < 0 || m < 0 || ntests < 0)
15231 BLAS_error(fname, 0, 0, NULL);
15232
15233 /* initialization */
15234 *num_bad_ratio = 0;
15235 *num_tests = 0;
15236 *min_ratio = 0.0;
15237
15238 saved_seed = *seed;
15239 ratio_min = 1e308;
15240 ratio_max = 0.0;
15241 ratio = 0.0;
15242 tot_tests = 0;
15243 p_count = 0;
15244 count = 0;
15245 find_max_ratio = 0;
15246 bad_ratios = 0;
15247 old_count = 0;
15248
15249 if (debug == 3)
15250 find_max_ratio = 1;
15251 max_mn = MAX(m, n);
15252 if (m == 0 || n == 0) {
15253 return 0.0;
15254 }
15255
15256 FPU_FIX_START;
15257
15258 incy_gen = 1;
15259 incy_gen *= 2;
15260
15261 /* get space for calculation */
15262 head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
15263 if (max_mn * 2 > 0 && head_x == NULL) {
15264 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
15265 }
15266 tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
15267 if (max_mn * 2 > 0 && tail_x == NULL) {
15268 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
15269 }
15270 y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
15271 if (max_mn * 2 > 0 && y == NULL) {
15272 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
15273 }
15274 head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
15275 if (max_mn > 0 && head_x_gen == NULL) {
15276 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
15277 }
15278 tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
15279 if (max_mn > 0 && tail_x_gen == NULL) {
15280 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
15281 }
15282 y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
15283 if (max_mn > 0 && y_gen == NULL) {
15284 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
15285 }
15286 temp = (double *) blas_malloc(max_mn * sizeof(double));
15287 if (max_mn > 0 && temp == NULL) {
15288 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
15289 }
15290 head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
15291 tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
15292 if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
15293 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
15294 }
15295 ratios = (double *) blas_malloc(max_mn * sizeof(double));
15296 if (max_mn > 0 && ratios == NULL) {
15297 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
15298 }
15299 A =
15300 (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
15301 if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
15302 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
15303 }
15304
15305 /* The debug iteration:
15306 If debug=1, then will execute the iteration twice. First, compute the
15307 max ratio. Second, print info if ratio > (50% * ratio_max). */
15308 for (d_count = 0; d_count <= find_max_ratio; d_count++) {
15309 bad_ratios = 0; /* set to zero */
15310
15311 if ((debug == 3) && (d_count == find_max_ratio))
15312 *seed = saved_seed; /* restore the original seed */
15313
15314 /* varying alpha */
15315 for (alpha_val = 0; alpha_val < 3; alpha_val++) {
15316 alpha_flag = 0;
15317 switch (alpha_val) {
15318 case 0:
15319 alpha[0] = alpha[1] = 0.0;
15320 alpha_flag = 1;
15321 break;
15322 case 1:
15323 alpha[0] = 1.0;
15324 alpha[1] = 0.0;
15325 alpha_flag = 1;
15326 break;
15327 }
15328
15329 /* varying beta */
15330 for (beta_val = 0; beta_val < 3; beta_val++) {
15331 beta_flag = 0;
15332 switch (beta_val) {
15333 case 0:
15334 beta[0] = beta[1] = 0.0;
15335 beta_flag = 1;
15336 break;
15337 case 1:
15338 beta[0] = 1.0;
15339 beta[1] = 0.0;
15340 beta_flag = 1;
15341 break;
15342 }
15343
15344
15345 /* varying extra precs */
15346 for (prec_val = 0; prec_val <= 2; prec_val++) {
15347 switch (prec_val) {
15348 case 0:
15349 eps_int = power(2, -BITS_D);
15350 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
15351 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
15352 prec = blas_prec_double;
15353 break;
15354 case 1:
15355 eps_int = power(2, -BITS_D);
15356 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
15357 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
15358 prec = blas_prec_double;
15359 break;
15360 case 2:
15361 default:
15362 eps_int = power(2, -BITS_E);
15363 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
15364 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
15365 prec = blas_prec_extra;
15366 break;
15367 }
15368
15369 /* values near underflow, 1, or overflow */
15370 for (norm = -1; norm <= 1; norm++) {
15371
15372 /* number of tests */
15373 for (i = 0; i < ntests; i++) {
15374
15375 /* row or col major */
15376 for (order_val = 0; order_val < 2; order_val++) {
15377 switch (order_val) {
15378 case 0:
15379 order_type = blas_rowmajor;
15380 break;
15381 case 1:
15382 default:
15383 order_type = blas_colmajor;
15384 break;
15385 }
15386
15387 /* no_trans, trans, or conj_trans */
15388 for (trans_val = 0; trans_val < 3; trans_val++) {
15389 switch (trans_val) {
15390 case 0:
15391 trans_type = blas_no_trans;
15392 m_i = m;
15393 n_i = n;
15394 break;
15395 case 1:
15396 trans_type = blas_trans;
15397 m_i = n;
15398 n_i = m;
15399 break;
15400 case 2:
15401 default:
15402 trans_type = blas_conj_trans;
15403 m_i = n;
15404 n_i = m;
15405 break;
15406 }
15407
15408 /* lda=n, n+1, or 2n */
15409 for (lda_val = 0; lda_val < 3; lda_val++) {
15410 switch (lda_val) {
15411 case 0:
15412 lda = m_i;
15413 break;
15414 case 1:
15415 lda = m_i + 1;
15416 break;
15417 case 2:
15418 default:
15419 lda = 2 * m_i;
15420 break;
15421 }
15422 if ((order_type == blas_rowmajor && lda < n) ||
15423 (order_type == blas_colmajor && lda < m))
15424 continue;
15425
15426 /* For the sake of speed, we throw out this case at random */
15427 if (xrand(seed) >= test_prob)
15428 continue;
15429
15430 /* in the trivial cases, no need to run testgen */
15431 if (m > 0 && n > 0)
15432 BLAS_zgemv2_d_d_testgen(norm, order_type, trans_type, m,
15433 n, &alpha, alpha_flag, A, lda,
15434 head_x_gen, tail_x_gen, &beta,
15435 beta_flag, y_gen, seed,
15436 head_r_true, tail_r_true);
15437
15438 count++;
15439
15440 /* varying incx */
15441 for (incx_val = -2; incx_val <= 2; incx_val++) {
15442 if (incx_val == 0)
15443 continue;
15444
15445 /* setting incx */
15446 incx = incx_val;
15447
15448
15449 dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
15450 dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
15451
15452 /* varying incy */
15453 for (incy_val = -2; incy_val <= 2; incy_val++) {
15454 if (incy_val == 0)
15455 continue;
15456
15457 /* setting incy */
15458 incy = incy_val;
15459 incy *= 2;
15460
15461 zcopy_vector(y_gen, m_i, 1, y, incy_val);
15462
15463 /* call BLAS_zgemv2_d_d_x */
15464 FPU_FIX_STOP;
15465 BLAS_zgemv2_d_d_x(order_type, trans_type, m, n, alpha,
15466 A, lda, head_x, tail_x, incx_val,
15467 beta, y, incy_val, prec);
15468 FPU_FIX_START;
15469
15470 /* set y starting index */
15471 iy = 0;
15472 if (incy < 0)
15473 iy = -(m_i - 1) * incy;
15474
15475 /* computing the ratio */
15476 if (m > 0 && n > 0)
15477 for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
15478 /* copy row j of A to temp */
15479 dge_copy_row(order_type, trans_type, m_i, n_i, A,
15480 lda, temp, j);
15481
15482 test_BLAS_zdot2_d_d(n_i, blas_no_conj, alpha,
15483 beta, &y_gen[k], &y[iy],
15484 &head_r_true[k],
15485 &tail_r_true[k], temp, 1,
15486 head_x, tail_x, incx_val,
15487 eps_int, un_int, &ratios[j]);
15488
15489 /* take the max ratio */
15490 if (j == 0) {
15491 ratio = ratios[0];
15492 /* The !<= below causes NaN error to be detected.
15493 Note that (NaN > thresh) is always false. */
15494 } else if (!(ratios[j] <= ratio)) {
15495 ratio = ratios[j];
15496 }
15497 iy += incy;
15498 }
15499
15500 /* Increase the number of bad ratio, if the ratio
15501 is bigger than the threshold.
15502 The !<= below causes NaN error to be detected.
15503 Note that (NaN > thresh) is always false. */
15504 if (!(ratio <= thresh)) {
15505 bad_ratios++;
15506
15507 if ((debug == 3) && /* print only when debug is on */
15508 (count != old_count) && /* print if old vector is different
15509 from the current one */
15510 (d_count == find_max_ratio) &&
15511 (p_count <= max_print) &&
15512 (ratio > 0.5 * ratio_max)) {
15513 old_count = count;
15514
15515 printf
15516 ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
15517 fname, m, n, ntests, thresh);
15518
15519 /* Print test info */
15520 switch (prec) {
15521 case blas_prec_single:
15522 printf("single ");
15523 break;
15524 case blas_prec_double:
15525 printf("double ");
15526 break;
15527 case blas_prec_indigenous:
15528 printf("indigenous ");
15529 break;
15530 case blas_prec_extra:
15531 printf("extra ");
15532 break;
15533 }
15534 switch (norm) {
15535 case -1:
15536 printf("near_underflow ");
15537 break;
15538 case 0:
15539 printf("near_one ");
15540 break;
15541 case 1:
15542 printf("near_overflow ");
15543 break;
15544 }
15545 switch (order_type) {
15546 case blas_rowmajor:
15547 printf("row_major ");
15548 break;
15549 case blas_colmajor:
15550 printf("col_major ");
15551 break;
15552 }
15553 switch (trans_type) {
15554 case blas_no_trans:
15555 printf("no_trans ");
15556 break;
15557 case blas_trans:
15558 printf("trans ");
15559 break;
15560 case blas_conj_trans:
15561 printf("conj_trans ");
15562 break;
15563 }
15564
15565 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
15566 incy);
15567
15568 dge_print_matrix(A, m_i, n_i, lda, order_type,
15569 "A");
15570
15571 dprint_vector(head_x, n_i, incx_val, "head_x");
15572 dprint_vector(tail_x, n_i, incx_val, "tail_x");
15573 zprint_vector(y_gen, m_i, 1, "y_gen");
15574 zprint_vector(y, m_i, incy_val, "y_final");
15575
15576 printf(" ");
15577 printf("alpha = ");
15578 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
15579 printf("\n ");
15580 printf("beta = ");
15581 printf("(%24.16e, %24.16e)", beta[0], beta[1]);
15582 printf("\n");
15583 for (j = 0, k = 0; j < m_i * incy_gen;
15584 j += incy_gen, k++) {
15585 printf(" ");
15586 printf
15587 ("([%24.16e %24.16e], [%24.16e %24.16e])",
15588 head_r_true[j], tail_r_true[j],
15589 head_r_true[j + 1], tail_r_true[j + 1]);
15590 printf(", ratio[%d]=%.4e\n", k, ratios[k]);
15591 }
15592
15593 printf(" ratio=%.4e\n", ratio);
15594 p_count++;
15595 }
15596 if (bad_ratios >= MAX_BAD_TESTS) {
15597 printf("\ntoo many failures, exiting....");
15598 printf("\nTesting and compilation");
15599 printf(" are incomplete\n\n");
15600 goto end;
15601 }
15602 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
15603 printf("\nFlagrant ratio error, exiting...");
15604 printf("\nTesting and compilation");
15605 printf(" are incomplete\n\n");
15606 goto end;
15607 }
15608 }
15609 if (d_count == 0) {
15610 if (ratio > ratio_max)
15611 ratio_max = ratio;
15612
15613 if (ratio != 0.0 && ratio < ratio_min)
15614 ratio_min = ratio;
15615
15616 tot_tests++;
15617 }
15618 } /* incy */
15619 } /* incx */
15620 } /* lda */
15621 } /* trans */
15622 } /* order */
15623 } /* tests */
15624 } /* norm */
15625 } /* prec */
15626 } /* beta */
15627 } /* alpha */
15628 } /* debug */
15629
15630 if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
15631 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
15632 fname, m, n, ntests, thresh);
15633 printf
15634 (" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
15635 bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
15636 ratio_min, ratio_max);
15637 }
15638
15639 end:
15640 FPU_FIX_STOP;
15641
15642 blas_free(head_x);
15643 blas_free(tail_x);
15644 blas_free(y);
15645 blas_free(head_x_gen);
15646 blas_free(tail_x_gen);
15647 blas_free(y_gen);
15648 blas_free(temp);
15649 blas_free(A);
15650 blas_free(head_r_true);
15651 blas_free(tail_r_true);
15652 blas_free(ratios);
15653
15654 *min_ratio = ratio_min;
15655 *num_bad_ratio = bad_ratios;
15656 *num_tests = tot_tests;
15657 return ratio_max;
15658 }
15659
15660 #define NUMPAIRS 12
15661
main(int argc,char ** argv)15662 int main(int argc, char **argv)
15663 {
15664 int nsizes, ntests, debug;
15665 double thresh, test_prob;
15666 double total_min_ratio, total_max_ratio;
15667 int total_bad_ratios;
15668 int seed, num_bad_ratio, num_tests;
15669 int total_tests, nr_failed_routines = 0, nr_routines = 0;
15670 double min_ratio, max_ratio;
15671 const char *base_routine = "gemv2";
15672 char *fname;
15673 int n;
15674
15675 int m, i;
15676 int mn_pairs[NUMPAIRS][2] =
15677 { {0, 0}, {1, 0}, {0, 1}, {1, 1}, {1, 2}, {2, 1},
15678 {3, 1}, {2, 3}, {3, 3}, {2, 4}, {6, 6}, {10, 8}
15679 };
15680
15681 if (argc != 6) {
15682 printf("Usage:\n");
15683 printf("do_test_gemv2 <nsizes> <ntests> <thresh> <debug> <test_prob>\n");
15684 printf(" <nsizes>: number of sizes to be run.\n");
15685 printf
15686 (" <ntests>: the number of tests performed for each set of attributes\n");
15687 printf
15688 (" <thresh>: to catch bad ratios if it is greater than <thresh>\n");
15689 printf(" <debug>: 0, 1, 2, or 3; \n");
15690 printf(" if 0, no printing \n");
15691 printf(" if 1, print error summary only if tests fail\n");
15692 printf(" if 2, print error summary for each n\n");
15693 printf(" if 3, print complete info each test fails \n");
15694 printf("<test_prob>: probability of preforming a given \n");
15695 printf(" test case: 0.0 does no tests, 1.0 does all tests\n");
15696 return -1;
15697 } else {
15698 nsizes = atoi(argv[1]);
15699 ntests = atoi(argv[2]);
15700 thresh = atof(argv[3]);
15701 debug = atoi(argv[4]);
15702 test_prob = atof(argv[5]);
15703 }
15704
15705 seed = 1999;
15706
15707 if (nsizes < 0 || ntests < 0 || debug < 0 || debug > 3)
15708 BLAS_error("Testing gemv2", 0, 0, NULL);
15709
15710 printf("Testing %s...\n", base_routine);
15711 printf("INPUT: nsizes = %d, ntests = %d, thresh = %4.2f, debug = %d\n\n",
15712 nsizes, ntests, thresh, debug);
15713
15714
15715
15716
15717 min_ratio = 1e308;
15718 max_ratio = 0.0;
15719 total_bad_ratios = 0;
15720 total_tests = 0;
15721 fname = "BLAS_dgemv2_d_s";
15722 printf("Testing %s...\n", fname);
15723 for (i = 0; i < nsizes; i++) {
15724 m = mn_pairs[i][0];
15725 n = mn_pairs[i][1];
15726 total_max_ratio =
15727 do_test_dgemv2_d_s(m, n, 1, &seed, thresh, debug, test_prob,
15728 &total_min_ratio, &num_bad_ratio, &num_tests);
15729 if (total_max_ratio > max_ratio)
15730 max_ratio = total_max_ratio;
15731
15732 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
15733 min_ratio = total_min_ratio;
15734
15735 total_bad_ratios += num_bad_ratio;
15736 total_tests += num_tests;
15737 }
15738
15739 if (min_ratio == 1e308)
15740 min_ratio = 0.0;
15741
15742 nr_routines++;
15743 if (total_bad_ratios == 0)
15744 printf("PASS> ");
15745 else {
15746 nr_failed_routines++;
15747 printf("FAIL> ");
15748 }
15749
15750 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
15751 fname, total_bad_ratios, total_tests, max_ratio);
15752
15753 min_ratio = 1e308;
15754 max_ratio = 0.0;
15755 total_bad_ratios = 0;
15756 total_tests = 0;
15757 fname = "BLAS_dgemv2_s_d";
15758 printf("Testing %s...\n", fname);
15759 for (i = 0; i < nsizes; i++) {
15760 m = mn_pairs[i][0];
15761 n = mn_pairs[i][1];
15762 total_max_ratio =
15763 do_test_dgemv2_s_d(m, n, 1, &seed, thresh, debug, test_prob,
15764 &total_min_ratio, &num_bad_ratio, &num_tests);
15765 if (total_max_ratio > max_ratio)
15766 max_ratio = total_max_ratio;
15767
15768 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
15769 min_ratio = total_min_ratio;
15770
15771 total_bad_ratios += num_bad_ratio;
15772 total_tests += num_tests;
15773 }
15774
15775 if (min_ratio == 1e308)
15776 min_ratio = 0.0;
15777
15778 nr_routines++;
15779 if (total_bad_ratios == 0)
15780 printf("PASS> ");
15781 else {
15782 nr_failed_routines++;
15783 printf("FAIL> ");
15784 }
15785
15786 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
15787 fname, total_bad_ratios, total_tests, max_ratio);
15788
15789 min_ratio = 1e308;
15790 max_ratio = 0.0;
15791 total_bad_ratios = 0;
15792 total_tests = 0;
15793 fname = "BLAS_dgemv2_s_s";
15794 printf("Testing %s...\n", fname);
15795 for (i = 0; i < nsizes; i++) {
15796 m = mn_pairs[i][0];
15797 n = mn_pairs[i][1];
15798 total_max_ratio =
15799 do_test_dgemv2_s_s(m, n, 1, &seed, thresh, debug, test_prob,
15800 &total_min_ratio, &num_bad_ratio, &num_tests);
15801 if (total_max_ratio > max_ratio)
15802 max_ratio = total_max_ratio;
15803
15804 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
15805 min_ratio = total_min_ratio;
15806
15807 total_bad_ratios += num_bad_ratio;
15808 total_tests += num_tests;
15809 }
15810
15811 if (min_ratio == 1e308)
15812 min_ratio = 0.0;
15813
15814 nr_routines++;
15815 if (total_bad_ratios == 0)
15816 printf("PASS> ");
15817 else {
15818 nr_failed_routines++;
15819 printf("FAIL> ");
15820 }
15821
15822 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
15823 fname, total_bad_ratios, total_tests, max_ratio);
15824
15825 min_ratio = 1e308;
15826 max_ratio = 0.0;
15827 total_bad_ratios = 0;
15828 total_tests = 0;
15829 fname = "BLAS_zgemv2_z_c";
15830 printf("Testing %s...\n", fname);
15831 for (i = 0; i < nsizes; i++) {
15832 m = mn_pairs[i][0];
15833 n = mn_pairs[i][1];
15834 total_max_ratio =
15835 do_test_zgemv2_z_c(m, n, 1, &seed, thresh, debug, test_prob,
15836 &total_min_ratio, &num_bad_ratio, &num_tests);
15837 if (total_max_ratio > max_ratio)
15838 max_ratio = total_max_ratio;
15839
15840 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
15841 min_ratio = total_min_ratio;
15842
15843 total_bad_ratios += num_bad_ratio;
15844 total_tests += num_tests;
15845 }
15846
15847 if (min_ratio == 1e308)
15848 min_ratio = 0.0;
15849
15850 nr_routines++;
15851 if (total_bad_ratios == 0)
15852 printf("PASS> ");
15853 else {
15854 nr_failed_routines++;
15855 printf("FAIL> ");
15856 }
15857
15858 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
15859 fname, total_bad_ratios, total_tests, max_ratio);
15860
15861 min_ratio = 1e308;
15862 max_ratio = 0.0;
15863 total_bad_ratios = 0;
15864 total_tests = 0;
15865 fname = "BLAS_zgemv2_c_z";
15866 printf("Testing %s...\n", fname);
15867 for (i = 0; i < nsizes; i++) {
15868 m = mn_pairs[i][0];
15869 n = mn_pairs[i][1];
15870 total_max_ratio =
15871 do_test_zgemv2_c_z(m, n, 1, &seed, thresh, debug, test_prob,
15872 &total_min_ratio, &num_bad_ratio, &num_tests);
15873 if (total_max_ratio > max_ratio)
15874 max_ratio = total_max_ratio;
15875
15876 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
15877 min_ratio = total_min_ratio;
15878
15879 total_bad_ratios += num_bad_ratio;
15880 total_tests += num_tests;
15881 }
15882
15883 if (min_ratio == 1e308)
15884 min_ratio = 0.0;
15885
15886 nr_routines++;
15887 if (total_bad_ratios == 0)
15888 printf("PASS> ");
15889 else {
15890 nr_failed_routines++;
15891 printf("FAIL> ");
15892 }
15893
15894 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
15895 fname, total_bad_ratios, total_tests, max_ratio);
15896
15897 min_ratio = 1e308;
15898 max_ratio = 0.0;
15899 total_bad_ratios = 0;
15900 total_tests = 0;
15901 fname = "BLAS_zgemv2_c_c";
15902 printf("Testing %s...\n", fname);
15903 for (i = 0; i < nsizes; i++) {
15904 m = mn_pairs[i][0];
15905 n = mn_pairs[i][1];
15906 total_max_ratio =
15907 do_test_zgemv2_c_c(m, n, 1, &seed, thresh, debug, test_prob,
15908 &total_min_ratio, &num_bad_ratio, &num_tests);
15909 if (total_max_ratio > max_ratio)
15910 max_ratio = total_max_ratio;
15911
15912 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
15913 min_ratio = total_min_ratio;
15914
15915 total_bad_ratios += num_bad_ratio;
15916 total_tests += num_tests;
15917 }
15918
15919 if (min_ratio == 1e308)
15920 min_ratio = 0.0;
15921
15922 nr_routines++;
15923 if (total_bad_ratios == 0)
15924 printf("PASS> ");
15925 else {
15926 nr_failed_routines++;
15927 printf("FAIL> ");
15928 }
15929
15930 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
15931 fname, total_bad_ratios, total_tests, max_ratio);
15932
15933 min_ratio = 1e308;
15934 max_ratio = 0.0;
15935 total_bad_ratios = 0;
15936 total_tests = 0;
15937 fname = "BLAS_cgemv2_c_s";
15938 printf("Testing %s...\n", fname);
15939 for (i = 0; i < nsizes; i++) {
15940 m = mn_pairs[i][0];
15941 n = mn_pairs[i][1];
15942 total_max_ratio =
15943 do_test_cgemv2_c_s(m, n, 1, &seed, thresh, debug, test_prob,
15944 &total_min_ratio, &num_bad_ratio, &num_tests);
15945 if (total_max_ratio > max_ratio)
15946 max_ratio = total_max_ratio;
15947
15948 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
15949 min_ratio = total_min_ratio;
15950
15951 total_bad_ratios += num_bad_ratio;
15952 total_tests += num_tests;
15953 }
15954
15955 if (min_ratio == 1e308)
15956 min_ratio = 0.0;
15957
15958 nr_routines++;
15959 if (total_bad_ratios == 0)
15960 printf("PASS> ");
15961 else {
15962 nr_failed_routines++;
15963 printf("FAIL> ");
15964 }
15965
15966 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
15967 fname, total_bad_ratios, total_tests, max_ratio);
15968
15969 min_ratio = 1e308;
15970 max_ratio = 0.0;
15971 total_bad_ratios = 0;
15972 total_tests = 0;
15973 fname = "BLAS_cgemv2_s_c";
15974 printf("Testing %s...\n", fname);
15975 for (i = 0; i < nsizes; i++) {
15976 m = mn_pairs[i][0];
15977 n = mn_pairs[i][1];
15978 total_max_ratio =
15979 do_test_cgemv2_s_c(m, n, 1, &seed, thresh, debug, test_prob,
15980 &total_min_ratio, &num_bad_ratio, &num_tests);
15981 if (total_max_ratio > max_ratio)
15982 max_ratio = total_max_ratio;
15983
15984 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
15985 min_ratio = total_min_ratio;
15986
15987 total_bad_ratios += num_bad_ratio;
15988 total_tests += num_tests;
15989 }
15990
15991 if (min_ratio == 1e308)
15992 min_ratio = 0.0;
15993
15994 nr_routines++;
15995 if (total_bad_ratios == 0)
15996 printf("PASS> ");
15997 else {
15998 nr_failed_routines++;
15999 printf("FAIL> ");
16000 }
16001
16002 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16003 fname, total_bad_ratios, total_tests, max_ratio);
16004
16005 min_ratio = 1e308;
16006 max_ratio = 0.0;
16007 total_bad_ratios = 0;
16008 total_tests = 0;
16009 fname = "BLAS_cgemv2_s_s";
16010 printf("Testing %s...\n", fname);
16011 for (i = 0; i < nsizes; i++) {
16012 m = mn_pairs[i][0];
16013 n = mn_pairs[i][1];
16014 total_max_ratio =
16015 do_test_cgemv2_s_s(m, n, 1, &seed, thresh, debug, test_prob,
16016 &total_min_ratio, &num_bad_ratio, &num_tests);
16017 if (total_max_ratio > max_ratio)
16018 max_ratio = total_max_ratio;
16019
16020 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16021 min_ratio = total_min_ratio;
16022
16023 total_bad_ratios += num_bad_ratio;
16024 total_tests += num_tests;
16025 }
16026
16027 if (min_ratio == 1e308)
16028 min_ratio = 0.0;
16029
16030 nr_routines++;
16031 if (total_bad_ratios == 0)
16032 printf("PASS> ");
16033 else {
16034 nr_failed_routines++;
16035 printf("FAIL> ");
16036 }
16037
16038 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16039 fname, total_bad_ratios, total_tests, max_ratio);
16040
16041 min_ratio = 1e308;
16042 max_ratio = 0.0;
16043 total_bad_ratios = 0;
16044 total_tests = 0;
16045 fname = "BLAS_zgemv2_z_d";
16046 printf("Testing %s...\n", fname);
16047 for (i = 0; i < nsizes; i++) {
16048 m = mn_pairs[i][0];
16049 n = mn_pairs[i][1];
16050 total_max_ratio =
16051 do_test_zgemv2_z_d(m, n, 1, &seed, thresh, debug, test_prob,
16052 &total_min_ratio, &num_bad_ratio, &num_tests);
16053 if (total_max_ratio > max_ratio)
16054 max_ratio = total_max_ratio;
16055
16056 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16057 min_ratio = total_min_ratio;
16058
16059 total_bad_ratios += num_bad_ratio;
16060 total_tests += num_tests;
16061 }
16062
16063 if (min_ratio == 1e308)
16064 min_ratio = 0.0;
16065
16066 nr_routines++;
16067 if (total_bad_ratios == 0)
16068 printf("PASS> ");
16069 else {
16070 nr_failed_routines++;
16071 printf("FAIL> ");
16072 }
16073
16074 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16075 fname, total_bad_ratios, total_tests, max_ratio);
16076
16077 min_ratio = 1e308;
16078 max_ratio = 0.0;
16079 total_bad_ratios = 0;
16080 total_tests = 0;
16081 fname = "BLAS_zgemv2_d_z";
16082 printf("Testing %s...\n", fname);
16083 for (i = 0; i < nsizes; i++) {
16084 m = mn_pairs[i][0];
16085 n = mn_pairs[i][1];
16086 total_max_ratio =
16087 do_test_zgemv2_d_z(m, n, 1, &seed, thresh, debug, test_prob,
16088 &total_min_ratio, &num_bad_ratio, &num_tests);
16089 if (total_max_ratio > max_ratio)
16090 max_ratio = total_max_ratio;
16091
16092 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16093 min_ratio = total_min_ratio;
16094
16095 total_bad_ratios += num_bad_ratio;
16096 total_tests += num_tests;
16097 }
16098
16099 if (min_ratio == 1e308)
16100 min_ratio = 0.0;
16101
16102 nr_routines++;
16103 if (total_bad_ratios == 0)
16104 printf("PASS> ");
16105 else {
16106 nr_failed_routines++;
16107 printf("FAIL> ");
16108 }
16109
16110 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16111 fname, total_bad_ratios, total_tests, max_ratio);
16112
16113 min_ratio = 1e308;
16114 max_ratio = 0.0;
16115 total_bad_ratios = 0;
16116 total_tests = 0;
16117 fname = "BLAS_zgemv2_d_d";
16118 printf("Testing %s...\n", fname);
16119 for (i = 0; i < nsizes; i++) {
16120 m = mn_pairs[i][0];
16121 n = mn_pairs[i][1];
16122 total_max_ratio =
16123 do_test_zgemv2_d_d(m, n, 1, &seed, thresh, debug, test_prob,
16124 &total_min_ratio, &num_bad_ratio, &num_tests);
16125 if (total_max_ratio > max_ratio)
16126 max_ratio = total_max_ratio;
16127
16128 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16129 min_ratio = total_min_ratio;
16130
16131 total_bad_ratios += num_bad_ratio;
16132 total_tests += num_tests;
16133 }
16134
16135 if (min_ratio == 1e308)
16136 min_ratio = 0.0;
16137
16138 nr_routines++;
16139 if (total_bad_ratios == 0)
16140 printf("PASS> ");
16141 else {
16142 nr_failed_routines++;
16143 printf("FAIL> ");
16144 }
16145
16146 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16147 fname, total_bad_ratios, total_tests, max_ratio);
16148
16149 min_ratio = 1e308;
16150 max_ratio = 0.0;
16151 total_bad_ratios = 0;
16152 total_tests = 0;
16153 fname = "BLAS_sgemv2_x";
16154 printf("Testing %s...\n", fname);
16155 for (i = 0; i < nsizes; i++) {
16156 m = mn_pairs[i][0];
16157 n = mn_pairs[i][1];
16158 total_max_ratio =
16159 do_test_sgemv2_x(m, n, 1, &seed, thresh, debug, test_prob,
16160 &total_min_ratio, &num_bad_ratio, &num_tests);
16161 if (total_max_ratio > max_ratio)
16162 max_ratio = total_max_ratio;
16163
16164 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16165 min_ratio = total_min_ratio;
16166
16167 total_bad_ratios += num_bad_ratio;
16168 total_tests += num_tests;
16169 }
16170
16171 if (min_ratio == 1e308)
16172 min_ratio = 0.0;
16173
16174 nr_routines++;
16175 if (total_bad_ratios == 0)
16176 printf("PASS> ");
16177 else {
16178 nr_failed_routines++;
16179 printf("FAIL> ");
16180 }
16181
16182 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16183 fname, total_bad_ratios, total_tests, max_ratio);
16184
16185 min_ratio = 1e308;
16186 max_ratio = 0.0;
16187 total_bad_ratios = 0;
16188 total_tests = 0;
16189 fname = "BLAS_dgemv2_x";
16190 printf("Testing %s...\n", fname);
16191 for (i = 0; i < nsizes; i++) {
16192 m = mn_pairs[i][0];
16193 n = mn_pairs[i][1];
16194 total_max_ratio =
16195 do_test_dgemv2_x(m, n, 1, &seed, thresh, debug, test_prob,
16196 &total_min_ratio, &num_bad_ratio, &num_tests);
16197 if (total_max_ratio > max_ratio)
16198 max_ratio = total_max_ratio;
16199
16200 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16201 min_ratio = total_min_ratio;
16202
16203 total_bad_ratios += num_bad_ratio;
16204 total_tests += num_tests;
16205 }
16206
16207 if (min_ratio == 1e308)
16208 min_ratio = 0.0;
16209
16210 nr_routines++;
16211 if (total_bad_ratios == 0)
16212 printf("PASS> ");
16213 else {
16214 nr_failed_routines++;
16215 printf("FAIL> ");
16216 }
16217
16218 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16219 fname, total_bad_ratios, total_tests, max_ratio);
16220
16221 min_ratio = 1e308;
16222 max_ratio = 0.0;
16223 total_bad_ratios = 0;
16224 total_tests = 0;
16225 fname = "BLAS_cgemv2_x";
16226 printf("Testing %s...\n", fname);
16227 for (i = 0; i < nsizes; i++) {
16228 m = mn_pairs[i][0];
16229 n = mn_pairs[i][1];
16230 total_max_ratio =
16231 do_test_cgemv2_x(m, n, 1, &seed, thresh, debug, test_prob,
16232 &total_min_ratio, &num_bad_ratio, &num_tests);
16233 if (total_max_ratio > max_ratio)
16234 max_ratio = total_max_ratio;
16235
16236 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16237 min_ratio = total_min_ratio;
16238
16239 total_bad_ratios += num_bad_ratio;
16240 total_tests += num_tests;
16241 }
16242
16243 if (min_ratio == 1e308)
16244 min_ratio = 0.0;
16245
16246 nr_routines++;
16247 if (total_bad_ratios == 0)
16248 printf("PASS> ");
16249 else {
16250 nr_failed_routines++;
16251 printf("FAIL> ");
16252 }
16253
16254 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16255 fname, total_bad_ratios, total_tests, max_ratio);
16256
16257 min_ratio = 1e308;
16258 max_ratio = 0.0;
16259 total_bad_ratios = 0;
16260 total_tests = 0;
16261 fname = "BLAS_zgemv2_x";
16262 printf("Testing %s...\n", fname);
16263 for (i = 0; i < nsizes; i++) {
16264 m = mn_pairs[i][0];
16265 n = mn_pairs[i][1];
16266 total_max_ratio =
16267 do_test_zgemv2_x(m, n, 1, &seed, thresh, debug, test_prob,
16268 &total_min_ratio, &num_bad_ratio, &num_tests);
16269 if (total_max_ratio > max_ratio)
16270 max_ratio = total_max_ratio;
16271
16272 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16273 min_ratio = total_min_ratio;
16274
16275 total_bad_ratios += num_bad_ratio;
16276 total_tests += num_tests;
16277 }
16278
16279 if (min_ratio == 1e308)
16280 min_ratio = 0.0;
16281
16282 nr_routines++;
16283 if (total_bad_ratios == 0)
16284 printf("PASS> ");
16285 else {
16286 nr_failed_routines++;
16287 printf("FAIL> ");
16288 }
16289
16290 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16291 fname, total_bad_ratios, total_tests, max_ratio);
16292
16293 min_ratio = 1e308;
16294 max_ratio = 0.0;
16295 total_bad_ratios = 0;
16296 total_tests = 0;
16297 fname = "BLAS_dgemv2_d_s_x";
16298 printf("Testing %s...\n", fname);
16299 for (i = 0; i < nsizes; i++) {
16300 m = mn_pairs[i][0];
16301 n = mn_pairs[i][1];
16302 total_max_ratio =
16303 do_test_dgemv2_d_s_x(m, n, 1, &seed, thresh, debug, test_prob,
16304 &total_min_ratio, &num_bad_ratio, &num_tests);
16305 if (total_max_ratio > max_ratio)
16306 max_ratio = total_max_ratio;
16307
16308 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16309 min_ratio = total_min_ratio;
16310
16311 total_bad_ratios += num_bad_ratio;
16312 total_tests += num_tests;
16313 }
16314
16315 if (min_ratio == 1e308)
16316 min_ratio = 0.0;
16317
16318 nr_routines++;
16319 if (total_bad_ratios == 0)
16320 printf("PASS> ");
16321 else {
16322 nr_failed_routines++;
16323 printf("FAIL> ");
16324 }
16325
16326 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16327 fname, total_bad_ratios, total_tests, max_ratio);
16328
16329 min_ratio = 1e308;
16330 max_ratio = 0.0;
16331 total_bad_ratios = 0;
16332 total_tests = 0;
16333 fname = "BLAS_dgemv2_s_d_x";
16334 printf("Testing %s...\n", fname);
16335 for (i = 0; i < nsizes; i++) {
16336 m = mn_pairs[i][0];
16337 n = mn_pairs[i][1];
16338 total_max_ratio =
16339 do_test_dgemv2_s_d_x(m, n, 1, &seed, thresh, debug, test_prob,
16340 &total_min_ratio, &num_bad_ratio, &num_tests);
16341 if (total_max_ratio > max_ratio)
16342 max_ratio = total_max_ratio;
16343
16344 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16345 min_ratio = total_min_ratio;
16346
16347 total_bad_ratios += num_bad_ratio;
16348 total_tests += num_tests;
16349 }
16350
16351 if (min_ratio == 1e308)
16352 min_ratio = 0.0;
16353
16354 nr_routines++;
16355 if (total_bad_ratios == 0)
16356 printf("PASS> ");
16357 else {
16358 nr_failed_routines++;
16359 printf("FAIL> ");
16360 }
16361
16362 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16363 fname, total_bad_ratios, total_tests, max_ratio);
16364
16365 min_ratio = 1e308;
16366 max_ratio = 0.0;
16367 total_bad_ratios = 0;
16368 total_tests = 0;
16369 fname = "BLAS_dgemv2_s_s_x";
16370 printf("Testing %s...\n", fname);
16371 for (i = 0; i < nsizes; i++) {
16372 m = mn_pairs[i][0];
16373 n = mn_pairs[i][1];
16374 total_max_ratio =
16375 do_test_dgemv2_s_s_x(m, n, 1, &seed, thresh, debug, test_prob,
16376 &total_min_ratio, &num_bad_ratio, &num_tests);
16377 if (total_max_ratio > max_ratio)
16378 max_ratio = total_max_ratio;
16379
16380 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16381 min_ratio = total_min_ratio;
16382
16383 total_bad_ratios += num_bad_ratio;
16384 total_tests += num_tests;
16385 }
16386
16387 if (min_ratio == 1e308)
16388 min_ratio = 0.0;
16389
16390 nr_routines++;
16391 if (total_bad_ratios == 0)
16392 printf("PASS> ");
16393 else {
16394 nr_failed_routines++;
16395 printf("FAIL> ");
16396 }
16397
16398 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16399 fname, total_bad_ratios, total_tests, max_ratio);
16400
16401 min_ratio = 1e308;
16402 max_ratio = 0.0;
16403 total_bad_ratios = 0;
16404 total_tests = 0;
16405 fname = "BLAS_zgemv2_z_c_x";
16406 printf("Testing %s...\n", fname);
16407 for (i = 0; i < nsizes; i++) {
16408 m = mn_pairs[i][0];
16409 n = mn_pairs[i][1];
16410 total_max_ratio =
16411 do_test_zgemv2_z_c_x(m, n, 1, &seed, thresh, debug, test_prob,
16412 &total_min_ratio, &num_bad_ratio, &num_tests);
16413 if (total_max_ratio > max_ratio)
16414 max_ratio = total_max_ratio;
16415
16416 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16417 min_ratio = total_min_ratio;
16418
16419 total_bad_ratios += num_bad_ratio;
16420 total_tests += num_tests;
16421 }
16422
16423 if (min_ratio == 1e308)
16424 min_ratio = 0.0;
16425
16426 nr_routines++;
16427 if (total_bad_ratios == 0)
16428 printf("PASS> ");
16429 else {
16430 nr_failed_routines++;
16431 printf("FAIL> ");
16432 }
16433
16434 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16435 fname, total_bad_ratios, total_tests, max_ratio);
16436
16437 min_ratio = 1e308;
16438 max_ratio = 0.0;
16439 total_bad_ratios = 0;
16440 total_tests = 0;
16441 fname = "BLAS_zgemv2_c_z_x";
16442 printf("Testing %s...\n", fname);
16443 for (i = 0; i < nsizes; i++) {
16444 m = mn_pairs[i][0];
16445 n = mn_pairs[i][1];
16446 total_max_ratio =
16447 do_test_zgemv2_c_z_x(m, n, 1, &seed, thresh, debug, test_prob,
16448 &total_min_ratio, &num_bad_ratio, &num_tests);
16449 if (total_max_ratio > max_ratio)
16450 max_ratio = total_max_ratio;
16451
16452 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16453 min_ratio = total_min_ratio;
16454
16455 total_bad_ratios += num_bad_ratio;
16456 total_tests += num_tests;
16457 }
16458
16459 if (min_ratio == 1e308)
16460 min_ratio = 0.0;
16461
16462 nr_routines++;
16463 if (total_bad_ratios == 0)
16464 printf("PASS> ");
16465 else {
16466 nr_failed_routines++;
16467 printf("FAIL> ");
16468 }
16469
16470 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16471 fname, total_bad_ratios, total_tests, max_ratio);
16472
16473 min_ratio = 1e308;
16474 max_ratio = 0.0;
16475 total_bad_ratios = 0;
16476 total_tests = 0;
16477 fname = "BLAS_zgemv2_c_c_x";
16478 printf("Testing %s...\n", fname);
16479 for (i = 0; i < nsizes; i++) {
16480 m = mn_pairs[i][0];
16481 n = mn_pairs[i][1];
16482 total_max_ratio =
16483 do_test_zgemv2_c_c_x(m, n, 1, &seed, thresh, debug, test_prob,
16484 &total_min_ratio, &num_bad_ratio, &num_tests);
16485 if (total_max_ratio > max_ratio)
16486 max_ratio = total_max_ratio;
16487
16488 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16489 min_ratio = total_min_ratio;
16490
16491 total_bad_ratios += num_bad_ratio;
16492 total_tests += num_tests;
16493 }
16494
16495 if (min_ratio == 1e308)
16496 min_ratio = 0.0;
16497
16498 nr_routines++;
16499 if (total_bad_ratios == 0)
16500 printf("PASS> ");
16501 else {
16502 nr_failed_routines++;
16503 printf("FAIL> ");
16504 }
16505
16506 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16507 fname, total_bad_ratios, total_tests, max_ratio);
16508
16509 min_ratio = 1e308;
16510 max_ratio = 0.0;
16511 total_bad_ratios = 0;
16512 total_tests = 0;
16513 fname = "BLAS_cgemv2_c_s_x";
16514 printf("Testing %s...\n", fname);
16515 for (i = 0; i < nsizes; i++) {
16516 m = mn_pairs[i][0];
16517 n = mn_pairs[i][1];
16518 total_max_ratio =
16519 do_test_cgemv2_c_s_x(m, n, 1, &seed, thresh, debug, test_prob,
16520 &total_min_ratio, &num_bad_ratio, &num_tests);
16521 if (total_max_ratio > max_ratio)
16522 max_ratio = total_max_ratio;
16523
16524 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16525 min_ratio = total_min_ratio;
16526
16527 total_bad_ratios += num_bad_ratio;
16528 total_tests += num_tests;
16529 }
16530
16531 if (min_ratio == 1e308)
16532 min_ratio = 0.0;
16533
16534 nr_routines++;
16535 if (total_bad_ratios == 0)
16536 printf("PASS> ");
16537 else {
16538 nr_failed_routines++;
16539 printf("FAIL> ");
16540 }
16541
16542 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16543 fname, total_bad_ratios, total_tests, max_ratio);
16544
16545 min_ratio = 1e308;
16546 max_ratio = 0.0;
16547 total_bad_ratios = 0;
16548 total_tests = 0;
16549 fname = "BLAS_cgemv2_s_c_x";
16550 printf("Testing %s...\n", fname);
16551 for (i = 0; i < nsizes; i++) {
16552 m = mn_pairs[i][0];
16553 n = mn_pairs[i][1];
16554 total_max_ratio =
16555 do_test_cgemv2_s_c_x(m, n, 1, &seed, thresh, debug, test_prob,
16556 &total_min_ratio, &num_bad_ratio, &num_tests);
16557 if (total_max_ratio > max_ratio)
16558 max_ratio = total_max_ratio;
16559
16560 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16561 min_ratio = total_min_ratio;
16562
16563 total_bad_ratios += num_bad_ratio;
16564 total_tests += num_tests;
16565 }
16566
16567 if (min_ratio == 1e308)
16568 min_ratio = 0.0;
16569
16570 nr_routines++;
16571 if (total_bad_ratios == 0)
16572 printf("PASS> ");
16573 else {
16574 nr_failed_routines++;
16575 printf("FAIL> ");
16576 }
16577
16578 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16579 fname, total_bad_ratios, total_tests, max_ratio);
16580
16581 min_ratio = 1e308;
16582 max_ratio = 0.0;
16583 total_bad_ratios = 0;
16584 total_tests = 0;
16585 fname = "BLAS_cgemv2_s_s_x";
16586 printf("Testing %s...\n", fname);
16587 for (i = 0; i < nsizes; i++) {
16588 m = mn_pairs[i][0];
16589 n = mn_pairs[i][1];
16590 total_max_ratio =
16591 do_test_cgemv2_s_s_x(m, n, 1, &seed, thresh, debug, test_prob,
16592 &total_min_ratio, &num_bad_ratio, &num_tests);
16593 if (total_max_ratio > max_ratio)
16594 max_ratio = total_max_ratio;
16595
16596 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16597 min_ratio = total_min_ratio;
16598
16599 total_bad_ratios += num_bad_ratio;
16600 total_tests += num_tests;
16601 }
16602
16603 if (min_ratio == 1e308)
16604 min_ratio = 0.0;
16605
16606 nr_routines++;
16607 if (total_bad_ratios == 0)
16608 printf("PASS> ");
16609 else {
16610 nr_failed_routines++;
16611 printf("FAIL> ");
16612 }
16613
16614 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16615 fname, total_bad_ratios, total_tests, max_ratio);
16616
16617 min_ratio = 1e308;
16618 max_ratio = 0.0;
16619 total_bad_ratios = 0;
16620 total_tests = 0;
16621 fname = "BLAS_zgemv2_z_d_x";
16622 printf("Testing %s...\n", fname);
16623 for (i = 0; i < nsizes; i++) {
16624 m = mn_pairs[i][0];
16625 n = mn_pairs[i][1];
16626 total_max_ratio =
16627 do_test_zgemv2_z_d_x(m, n, 1, &seed, thresh, debug, test_prob,
16628 &total_min_ratio, &num_bad_ratio, &num_tests);
16629 if (total_max_ratio > max_ratio)
16630 max_ratio = total_max_ratio;
16631
16632 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16633 min_ratio = total_min_ratio;
16634
16635 total_bad_ratios += num_bad_ratio;
16636 total_tests += num_tests;
16637 }
16638
16639 if (min_ratio == 1e308)
16640 min_ratio = 0.0;
16641
16642 nr_routines++;
16643 if (total_bad_ratios == 0)
16644 printf("PASS> ");
16645 else {
16646 nr_failed_routines++;
16647 printf("FAIL> ");
16648 }
16649
16650 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16651 fname, total_bad_ratios, total_tests, max_ratio);
16652
16653 min_ratio = 1e308;
16654 max_ratio = 0.0;
16655 total_bad_ratios = 0;
16656 total_tests = 0;
16657 fname = "BLAS_zgemv2_d_z_x";
16658 printf("Testing %s...\n", fname);
16659 for (i = 0; i < nsizes; i++) {
16660 m = mn_pairs[i][0];
16661 n = mn_pairs[i][1];
16662 total_max_ratio =
16663 do_test_zgemv2_d_z_x(m, n, 1, &seed, thresh, debug, test_prob,
16664 &total_min_ratio, &num_bad_ratio, &num_tests);
16665 if (total_max_ratio > max_ratio)
16666 max_ratio = total_max_ratio;
16667
16668 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16669 min_ratio = total_min_ratio;
16670
16671 total_bad_ratios += num_bad_ratio;
16672 total_tests += num_tests;
16673 }
16674
16675 if (min_ratio == 1e308)
16676 min_ratio = 0.0;
16677
16678 nr_routines++;
16679 if (total_bad_ratios == 0)
16680 printf("PASS> ");
16681 else {
16682 nr_failed_routines++;
16683 printf("FAIL> ");
16684 }
16685
16686 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16687 fname, total_bad_ratios, total_tests, max_ratio);
16688
16689 min_ratio = 1e308;
16690 max_ratio = 0.0;
16691 total_bad_ratios = 0;
16692 total_tests = 0;
16693 fname = "BLAS_zgemv2_d_d_x";
16694 printf("Testing %s...\n", fname);
16695 for (i = 0; i < nsizes; i++) {
16696 m = mn_pairs[i][0];
16697 n = mn_pairs[i][1];
16698 total_max_ratio =
16699 do_test_zgemv2_d_d_x(m, n, 1, &seed, thresh, debug, test_prob,
16700 &total_min_ratio, &num_bad_ratio, &num_tests);
16701 if (total_max_ratio > max_ratio)
16702 max_ratio = total_max_ratio;
16703
16704 if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16705 min_ratio = total_min_ratio;
16706
16707 total_bad_ratios += num_bad_ratio;
16708 total_tests += num_tests;
16709 }
16710
16711 if (min_ratio == 1e308)
16712 min_ratio = 0.0;
16713
16714 nr_routines++;
16715 if (total_bad_ratios == 0)
16716 printf("PASS> ");
16717 else {
16718 nr_failed_routines++;
16719 printf("FAIL> ");
16720 }
16721
16722 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16723 fname, total_bad_ratios, total_tests, max_ratio);
16724
16725
16726
16727 printf("\n");
16728 if (nr_failed_routines)
16729 printf("FAILED ");
16730 else
16731 printf("PASSED ");
16732 printf("%-10s: FAIL/TOTAL = %d/%d\n",
16733 base_routine, nr_failed_routines, nr_routines);
16734
16735 return 0;
16736 }
16737