1dnl Generates test code for gemv2 2#include <stdlib.h> 3#include <stdio.h> 4#include <math.h> 5#include "blas_extended.h" 6#include "blas_extended_private.h" 7#include "blas_extended_test.h" 8include(cblas.m4)dnl 9include(test-common.m4)dnl 10include(gemv2/gemv2-common.m4)dnl 11dnl 12dnl 13dnl ----------------------------------------------------------------------- 14dnl Usage: DO_TEST_GEMV2_COMMENT(extended) 15dnl if extended, then print info about prec loop in the code 16dnl structure 17dnl ----------------------------------------------------------------------- 18define(`DO_TEST_GEMV2_COMMENT',` 19/* 20 * Purpose 21 * ======= 22 * 23 * Runs a series of tests on GEMV2. 24 * 25 * Arguments 26 * ========= 27 * 28 * m (input) int 29 * The number of rows 30 * 31 * n (input) int 32 * The number of columns 33 * 34 * ntests (input) int 35 * The number of tests to run for each set of attributes. 36 * 37 * seed (input/output) int 38 * The seed for the random number generator used in testgen(). 39 * 40 * thresh (input) double 41 * When the ratio returned from test() exceeds the specified 42 * threshold, the current size, r_true, r_comp, and ratio will be 43 * printed. (Since ratio is supposed to be O(1), we can set thresh 44 * to ~10.) 45 * 46 * debug (input) int 47 * If debug=3, print summary 48 * If debug=2, print summary only if the number of bad ratios > 0 49 * If debug=1, print complete info if tests fail 50 * If debug=0, return max ratio 51 * 52 * test_prob (input) float 53 * The specified test will be performed only if the generated 54 * random exceeds this threshold. 55 * 56 * min_ratio (output) double 57 * The minimum ratio 58 * 59 * num_bad_ratio (output) int 60 * The number of tests fail; they are above the threshold. 61 * 62 * num_tests (output) int 63 * The number of tests is being performed. 64 * 65 * Return value 66 * ============ 67 * 68 * The maximum ratio if run successfully, otherwise return -1 69 * 70 * Code structure 71 * ============== 72 * 73 * debug loop -- if debug is one, the first loop computes the max ratio 74 * -- and the last(second) loop outputs debugging information, 75 * -- if the test fail and its ratio > 0.5 * max ratio. 76 * -- if debug is zero, the loop is executed once 77 * alpha loop -- varying alpha: 0, 1, or random 78 * beta loop -- varying beta: 0, 1, or random 79ifelse(`$1', `_x',` * prec loop -- varying internal prec: single, double, or extra', `') 80 * norm loop -- varying norm: near undeflow, near one, or 81 * -- near overflow 82 * numtest loop -- how many times the test is perform with 83 * -- above set of attributes 84 * order loop -- varying order type: rowmajor or colmajor 85 * trans loop -- varying uplo type: upper or lower 86 * lda loop -- varying lda: m, m+1, 2m 87 * incx loop -- varying incx: -2, -1, 1, 2 88 * incy loop -- varying incy: -2, -1, 1, 2 89 */')dnl 90dnl 91dnl 92dnl 93dnl ----------------------------------------------------------------------- 94dnl Usage: DO_TEST_GEMV2(abr_typeltr, x_typeltr, y_typeltr, extended) 95dnl 96dnl aby_typeltr : the type and precision of alpha, beta and y 97dnl A_typeltr : the type and precision of A 98dnl x_typeltr : the type and precision of x 99dnl extended : `' if no extended, or `_x' if extended 100dnl Each type and precision specifier can be one of 101dnl s ... real and single 102dnl d ... real and double 103dnl c ... complex and single 104dnl z ... complex and double 105dnl ---------------------------------------------------------------------- 106define(`DO_TEST_GEMV2', 107 `double DO_TEST_GEMV2_NAME($1, $2, $3, $4)(int m, int n, int ntests, dnl 108 int *seed, double thresh, int debug, float test_prob, dnl 109 double *min_ratio, int *num_bad_ratio, int *num_tests) 110DO_TEST_GEMV2_COMMENT($4) 111DO_TEST_GEMV2_BODY($1, $2, $3, $4)')dnl 112dnl 113dnl 114dnl -------------------------------------------------------------------- 115dnl Usage: DO_TEST_GEMV2_BODY(aby_typeltr, A_typeltr, x_typeltr, extended) 116dnl 117dnl aby_typeltr : the type and precision of alpha, beta, y 118dnl A_typeltr : the type and precision of A 119dnl x_typeltr : the type and precision of x 120dnl extended : `' if no extended, or `_x' if extended 121dnl Each type and precision specifier can be one of 122dnl s ... real and single 123dnl d ... real and double 124dnl c ... complex and single 125dnl z ... complex and double 126dnl --------------------------------------------------------------------- 127define(`DO_TEST_GEMV2_BODY', 128`{ 129 /* function name */ 130 const char fname[] = "GEMV2_NAME($1, $2, $3, $4)"; 131 132 /* max number of debug lines to print */ 133 const int max_print = 8; 134 135 /* Variables in the "x_val" form are loop vars for corresponding 136 variables */ 137 int i; /* iterate through the repeating tests */ 138 int j, k; /* multipurpose counters or variables */ 139 int iy; /* use to index y */ 140 int incx_val, incy_val, /* for testing different inc values */ 141 incx, incy; 142 int incy_gen; /* for complex case inc=2, for real case inc=1 */ 143 int d_count; /* counter for debug */ 144 int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */ 145 int p_count; /* counter for the number of debug lines printed*/ 146 int tot_tests; /* total number of tests to be done */ 147 int norm; /* input values of near underflow/one/overflow */ 148 double ratio_max; /* the current maximum ratio */ 149 double ratio_min; /* the current minimum ratio */ 150 double *ratios; /* a temporary variable for calculating ratio */ 151 double ratio; /* the per-use test ratio from test() */ 152 int bad_ratios; /* the number of ratios over the threshold */ 153 double eps_int; /* the internal epsilon expected--2^(-24) for float */ 154 double un_int; /* the internal underflow threshold */ 155 DECLARE(alpha, $1_type) 156 DECLARE(beta, $1_type) 157 DECLARE_VECTOR(A, $2_type) 158 DECLARE_VECTOR(head_x, $3_type) 159 DECLARE_VECTOR(tail_x, $3_type) 160 DECLARE_VECTOR(y, $1_type) 161 DECLARE_VECTOR(temp, $2_type) /* use for calculating ratio */ 162 163 /* x_gen and y_gen are used to store vectors generated by testgen. 164 they eventually are copied back to x and y */ 165 DECLARE_VECTOR(head_x_gen, $3_type) 166 DECLARE_VECTOR(tail_x_gen, $3_type) 167 DECLARE_VECTOR(y_gen, $1_type) 168 169 /* the true r calculated by testgen(), in double-double */ 170 DECLARE_VECTOR(r_true, EXTRA_TYPE($1_type)) 171 int alpha_val; 172 int alpha_flag; /* input flag for TESTGEN_GEMV2_NAME($1, $2, $3, $4) */ 173 int beta_val; 174 int beta_flag; /* input flag for TESTGEN_GEMV2_NAME($1, $2, $3, $4) */ 175 int order_val; 176 enum blas_order_type order_type; 177 ifelse(`$4', `_x', `int prec_val;') 178 enum blas_prec_type prec; 179 int trans_val; 180 enum blas_trans_type trans_type; 181 int m_i; 182 int n_i; 183 int max_mn; /* the max of m and n */ 184 int lda_val; 185 int lda; 186 int saved_seed; /* for saving the original seed */ 187 int count, old_count; /* use for counting the number of testgen calls * 2 */ 188 189 FPU_FIX_DECL; 190 191 /* test for bad arguments */ 192 if (n < 0 || m < 0 || ntests < 0) BLAS_error(fname, 0, 0, NULL); 193 194 /* initialization */ 195 *num_bad_ratio = 0; 196 *num_tests = 0; 197 *min_ratio = 0.0; 198 199 saved_seed = *seed; 200 ratio_min = 1e308; 201 ratio_max = 0.0; 202 ratio = 0.0; 203 tot_tests = 0; 204 p_count = 0; 205 count = 0; 206 find_max_ratio = 0; 207 bad_ratios = 0; 208 old_count = 0; 209 210 if (debug == 3) 211 find_max_ratio = 1; 212 max_mn = MAX(m, n); 213 if (m == 0 || n == 0) { 214 return 0.0; 215 } 216 217 FPU_FIX_START; 218 219 incy_gen = 1; 220 INC_ADJUST(incy_gen, $1_type) 221 222 /* get space for calculation */ 223 MALLOC_VECTOR(head_x, $3_type, max_mn*2) 224 MALLOC_VECTOR(tail_x, $3_type, max_mn*2) 225 MALLOC_VECTOR(y, $1_type, max_mn*2) 226 MALLOC_VECTOR(head_x_gen, $3_type, max_mn) 227 MALLOC_VECTOR(tail_x_gen, $3_type, max_mn) 228 MALLOC_VECTOR(y_gen, $1_type, max_mn) 229 MALLOC_VECTOR(temp, $2_type, max_mn) 230 MALLOC_VECTOR(r_true, EXTRA_TYPE($1_type), max_mn) 231 MALLOC_VECTOR(ratios, real_D, max_mn) 232 MALLOC_VECTOR(A, $2_type, (m-1+n-1+1)*max_mn*2) 233 234 /* The debug iteration: 235 If debug=1, then will execute the iteration twice. First, compute the 236 max ratio. Second, print info if ratio > (50% * ratio_max). */ 237 for (d_count=0; d_count<= find_max_ratio; d_count++) { 238 bad_ratios = 0; /* set to zero */ 239 240 if ((debug == 3) && (d_count == find_max_ratio)) 241 *seed = saved_seed; /* restore the original seed */ 242 243 /* varying alpha */ 244 for (alpha_val=0; alpha_val<3; alpha_val++) { 245 SET_ALPHA($1_type) 246 247 /* varying beta */ 248 for (beta_val=0; beta_val<3; beta_val++) { 249 SET_BETA($1_type) 250 251 ifelse($4, _x, ` 252 /* varying extra precs */ 253 for (prec_val = 0; prec_val <= 2; prec_val++) {') 254 SET_INTERNAL_PARAMS($1_type, $4) 255 256 /* values near underflow, 1, or overflow */ 257 for (norm = -1; norm <= 1; norm++) { 258 259 /* number of tests */ 260 for (i=0; i<ntests; i++){ 261 262 /* row or col major */ 263 for (order_val=0; order_val<2; order_val++) { 264 switch(order_val){ 265 case 0: order_type = blas_rowmajor; break; 266 case 1: default: order_type = blas_colmajor; break; 267 } 268 269 /* no_trans, trans, or conj_trans */ 270 for (trans_val=0; trans_val<3; trans_val++) { 271 switch(trans_val){ 272 case 0: trans_type = blas_no_trans; m_i=m; n_i=n; break; 273 case 1: trans_type = blas_trans; m_i=n; n_i=m; break; 274 case 2: default: trans_type = blas_conj_trans; m_i=n; n_i=m; break; 275 } 276 277 /* lda=n, n+1, or 2n*/ 278 for (lda_val=0; lda_val<3; lda_val++){ 279 switch(lda_val){ 280 case 0: lda=m_i; break; 281 case 1: lda=m_i+1; break; 282 case 2: default: lda=2*m_i; break; 283 } 284 if ((order_type == blas_rowmajor && lda < n) || 285 (order_type == blas_colmajor && lda < m)) 286 continue; 287 288 /* For the sake of speed, we throw out this case at random */ 289 if ( xrand(seed) >= test_prob ) continue; 290 291 /* in the trivial cases, no need to run testgen */ 292 if (m > 0 && n > 0) 293 TESTGEN_GEMV2_NAME($1, $2, $3)(norm, order_type, dnl 294 trans_type, m, n, &alpha, alpha_flag, A, dnl 295 lda, head_x_gen, tail_x_gen, &beta, dnl 296 beta_flag, y_gen, seed, HEAD(r_true), TAIL(r_true)); 297 298 count++; 299 300 /* varying incx */ 301 for (incx_val = -2; incx_val <= 2; incx_val++){ 302 if (incx_val == 0) continue; 303 304 /* setting incx */ 305 incx = incx_val; 306 INC_ADJUST(incx, $3_type) 307 308 $3copy_vector(head_x_gen, n_i, 1, head_x, incx_val); 309 $3copy_vector(tail_x_gen, n_i, 1, tail_x, incx_val); 310 311 /* varying incy */ 312 for (incy_val = -2; incy_val <= 2; incy_val++){ 313 if (incy_val == 0) continue; 314 315 /* setting incy */ 316 incy = incy_val; 317 INC_ADJUST(incy, $1_type) 318 319 $1copy_vector(y_gen, m_i, 1, y, incy_val); 320 321 /* call GEMV2_NAME($1, $2, $3, $4) */ 322 FPU_FIX_STOP; 323 GEMV2_NAME($1, $2, $3, $4)(order_type, trans_type, dnl 324 m, n, alpha, A, lda, head_x, tail_x, incx_val, dnl 325 beta, y, incy_val ifelse(`$4', `_x', `, prec')); 326 FPU_FIX_START; 327 328 /* set y starting index */ 329 iy=0; 330 if (incy < 0) iy = -(m_i-1)*incy; 331 332 /* computing the ratio */ 333 if (m>0 && n>0) 334 for(j=0, k=0; j<m_i; j++, k += incy_gen){ 335 /* copy row j of A to temp */ 336 $2ge_copy_row(order_type, trans_type, m_i, n_i, A, lda, temp, j); 337 338 TEST_DOT2_NAME($1, $2, $3, $4)(n_i, blas_no_conj, dnl 339 alpha, beta, VECTOR_ELEMENT(y_gen, k, $1_type), dnl 340 VECTOR_ELEMENT(y, iy, $1_type), dnl 341 VECTOR_ELEMENT(r_true, k, EXTRA_TYPE($1_type)), dnl 342 temp, 1, head_x, tail_x, incx_val, eps_int, dnl 343 un_int, &ratios[j]); 344 345 /* take the max ratio */ 346 if (j==0) { 347 ratio = ratios[0]; 348 /* The !<= below causes NaN error to be detected. 349 Note that (NaN > thresh) is always false. */ 350 } else if ( !(ratios[j] <= ratio) ) { 351 ratio = ratios[j]; 352 } 353 iy += incy; 354 } 355 356 /* Increase the number of bad ratio, if the ratio 357 is bigger than the threshold. 358 The !<= below causes NaN error to be detected. 359 Note that (NaN > thresh) is always false. */ 360 if ( !(ratio <= thresh) ) { 361 bad_ratios++; 362 363 if ((debug==3) && /* print only when debug is on */ 364 (count != old_count) && /* print if old vector is different 365 from the current one */ 366 (d_count == find_max_ratio) && 367 (p_count <= max_print) && 368 (ratio > 0.5*ratio_max)) 369 { 370 old_count = count; 371 372 printf("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n", 373 fname, m, n, ntests, thresh); 374 375 /* Print test info */ 376 PRINT_PREC(prec) 377 PRINT_NORM(norm) 378 PRINT_ORDER(order_type) 379 PRINT_TRANS(trans_type) 380 381 printf("lda=%d, incx=%d, incy=%d:\n", lda, incx, incy); 382 383 $2ge_print_matrix(A, m_i, n_i, lda, order_type, "A"); 384 385 $3print_vector(head_x, n_i, incx_val, "head_x"); 386 $3print_vector(tail_x, n_i, incx_val, "tail_x"); 387 $1print_vector(y_gen, m_i, 1, "y_gen"); 388 $1print_vector(y, m_i, incy_val, "y_final"); 389 390 printf(" "); PRINT_VAR(alpha, $1_type) 391 printf("\n "); PRINT_VAR(beta, $1_type) 392 printf("\n"); 393 for (j=0, k=0; j<m_i*incy_gen; j += incy_gen, k++){ 394 printf(" "); 395 PRINT_ARRAY_ELEM(r_true, j, EXTRA_TYPE($1_type)) 396 printf(", ratio[%d]=%.4e\n", k, ratios[k]); 397 } 398 399 printf(" ratio=%.4e\n", ratio); 400 p_count++; 401 } 402 if (bad_ratios >= MAX_BAD_TESTS) { 403 printf("\ntoo many failures, exiting...."); 404 printf("\nTesting and compilation"); 405 printf(" are incomplete\n\n"); 406 goto end; 407 } 408 if ( !(ratio <= TOTAL_FAILURE_THRESHOLD) ) { 409 printf("\nFlagrant ratio error, exiting..."); 410 printf("\nTesting and compilation"); 411 printf(" are incomplete\n\n"); 412 goto end; 413 } 414 } 415 if (d_count==0) { 416 if (ratio > ratio_max) 417 ratio_max = ratio; 418 419 if (ratio != 0.0 && ratio < ratio_min) 420 ratio_min = ratio; 421 422 tot_tests++; 423 } 424 }/* incy */ 425 }/* incx */ 426 }/* lda */ 427 } /* trans */ 428 } /* order */ 429 } /* tests */ 430 } /* norm */ 431ifelse(`$4', `_x', ` } /* prec */') 432 } /* beta */ 433 } /* alpha */ 434 } /* debug */ 435 436 if ((debug == 2) || 437 ((debug == 1) && bad_ratios > 0)){ 438 printf(" %s: m = %d, n = %d, ntests = %d, thresh = %4.2f\n", 439 fname, m, n, ntests, thresh); 440 printf(" bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n", 441 bad_ratios, tot_tests, 442 ((double)bad_ratios)/((double)tot_tests), ratio_min, ratio_max); 443 } 444 445end: 446 FPU_FIX_STOP; 447 448 FREE_VECTOR(head_x, $3_type) 449 FREE_VECTOR(tail_x, $3_type) 450 FREE_VECTOR(y, $1_type) 451 FREE_VECTOR(head_x_gen, $3_type) 452 FREE_VECTOR(tail_x_gen, $3_type) 453 FREE_VECTOR(y_gen, $1_type) 454 FREE_VECTOR(temp, $2_type) 455 FREE_VECTOR(A, $2_type) 456 FREE_VECTOR(r_true, EXTRA_TYPE($1_type)) 457 FREE_VECTOR(ratios, real_D) 458 459 *min_ratio = ratio_min; 460 *num_bad_ratio = bad_ratios; 461 *num_tests = tot_tests; 462 return ratio_max; 463}' )dnl 464dnl 465dnl 466dnl 467dnl -------------------------------------------------------------------- 468dnl Usage: TESTGEN_GEMV2_NAME(aby_typeltr, A_typeltr, x_typeltr) 469dnl create a testgen_gemv2 name 470dnl -------------------------------------------------------------------- 471define(`TESTGEN_GEMV2_NAME', `ifelse( 472 `$2&&$3', `$1&&$1', `BLAS_$1gemv2_testgen', 473 `BLAS_$1gemv2_$2_$3_testgen')')dnl 474dnl 475dnl 476dnl ------------------------------------------------------------------- 477dnl Usage: DO_TEST_GEMV2_NAME(abr_typeltr, x_typeltr, y_typeltr, extended) 478dnl create do_test_gemv2 name 479dnl ------------------------------------------------------------------- 480define(`DO_TEST_GEMV2_NAME', `ifelse( 481 `$2&&$3', `$1&&$1', 482 `do_test_$1gemv2$4', 483 `do_test_$1gemv2_$2_$3$4')') dnl 484dnl 485dnl 486dnl 487dnl ------------------------------------------------------------------- 488dnl Usage: CALL_DO_TEST_GEMV2(abr_typeltr, x_typeltr, y_typeltr, extended) 489dnl 490dnl abr_type : the type and precision of alpha, beta and r 491dnl x_type : the type and precision of x 492dnl y_type : the type and precision of y 493dnl extended : `' if no extended, or `_x' if extended 494dnl Each type and precision specifier can be one of 495dnl s ... real and single 496dnl d ... real and double 497dnl c ... complex and single 498dnl z ... complex and double 499dnl ------------------------------------------------------------------- 500define(`CALL_DO_TEST_GEMV2', 501 ` min_ratio = 1e308; max_ratio = 0.0; 502 total_bad_ratios = 0; total_tests = 0; 503 fname = "GEMV2_NAME($1, $2, $3, $4)"; 504 printf("Testing %s...\n", fname); 505 for(i=0; i<nsizes; i++){ 506 m = mn_pairs[i][0]; 507 n = mn_pairs[i][1]; 508 total_max_ratio = DO_TEST_GEMV2_NAME($1, $2, $3, $4)(m, n, 1, dnl 509 &seed, thresh, debug, test_prob, &total_min_ratio, dnl 510 &num_bad_ratio, &num_tests); 511 if (total_max_ratio > max_ratio) 512 max_ratio = total_max_ratio; 513 514 if (total_min_ratio != 0 && total_min_ratio < min_ratio) 515 min_ratio = total_min_ratio; 516 517 total_bad_ratios += num_bad_ratio; 518 total_tests += num_tests; 519 } 520 521 if (min_ratio == 1e308) min_ratio = 0.0; 522 523 nr_routines++; 524 if (total_bad_ratios == 0) 525 printf("PASS> "); 526 else{ 527 nr_failed_routines++; 528 printf("FAIL> "); 529 } 530 531 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n", 532 fname, total_bad_ratios, total_tests, max_ratio); 533')dnl 534dnl 535dnl 536dnl 537FOREACH(`GEMV2_ARGS', ` 538DO_TEST_GEMV2(arg)')dnl 539 540#define NUMPAIRS 12 541MAIN(` 542 int m, i; 543 int mn_pairs[NUMPAIRS][2]={{0, 0}, {1, 0}, {0, 1}, {1, 1}, {1, 2}, {2, 1}, 544 {3, 1}, {2, 3}, {3, 3}, {2, 4}, {6, 6}, {10, 8}};', ` 545FOREACH(`GEMV2_ARGS', ` 546CALL_DO_TEST_GEMV2(arg)')')dnl 547 548