1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <math.h>
4 #include "blas_extended.h"
5 #include "blas_extended_private.h"
6 #include "blas_extended_test.h"
7
8 #define UPLO_START 0
9 #define UPLO_END 1
10
11 #define SIDE_START 0
12 #define SIDE_END 1
13
14 #define ORDER_START 0
15 #define ORDER_END 1
16
17 #define ALPHA_START 0
18 #define ALPHA_END 2
19
20 #define BETA_START 0
21 #define BETA_END 2
22
23 #define NORM_START -1
24 #define NORM_END 1
25
26 #define LDA_START 0
27 #define LDA_END 2
28
29 #define LDB_START 0
30 #define LDB_END 2
31
32 #define LDC_START 0
33 #define LDC_END 2
34
35 #define PREC_START 0
36 #define PREC_END 2
37
38 #define RANDOMIZE_START 0
39 #define RANDOMIZE_END 1
40
41 #define NUM_DATA 7
42
do_test_zhemm_z_c(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)43 void do_test_zhemm_z_c(int m, int n,
44 int ntests, int *seed, double thresh, int debug,
45 float test_prob, double *min_ratio, double *max_ratio,
46 int *num_bad_ratio, int *num_tests)
47 {
48
49 /* Function name */
50 const char fname[] = "BLAS_zhemm_z_c";
51
52 int i, j;
53 int ci, cij;
54 int incci, inccij;
55 int test_count;
56 int bad_ratio_count;
57
58 int ri, rij;
59 int incri, incrij;
60 int inca, incb, incc;
61
62 double ratio;
63
64 double ratio_min, ratio_max;
65
66 double eps_int; /* internal machine epsilon */
67 double un_int; /* internal underflow threshold */
68
69 double rin[2];
70 double rout[2];
71 double head_r_true_elem[2], tail_r_true_elem[2];
72
73 enum blas_order_type order_type;
74 enum blas_side_type side_type;
75 enum blas_uplo_type uplo_type;
76 enum blas_prec_type prec;
77
78 int order_val, uplo_val, side_val;
79 int lda_val, ldb_val, ldc_val;
80 int alpha_val, beta_val;
81 int randomize_val;
82
83
84
85 int lda, ldb, ldc;
86 int alpha_flag, beta_flag;
87 int saved_seed;
88 int norm;
89 int test_no;
90 int max_mn;
91 int m_i, n_i;
92
93 double alpha[2];
94 double beta[2];
95 double *a;
96 float *b;
97 double *c;
98 double *a_vec;
99 float *b_vec;
100
101 /* generated test values for c */
102 double *c_gen;
103
104 double *ratios;
105
106 /* true result calculated by testgen, in double-double */
107 double *head_r_true, *tail_r_true;
108
109
110 FPU_FIX_DECL;
111
112 if (n < 0 || m < 0 || ntests < 0)
113 BLAS_error(fname, 0, 0, NULL);
114
115 /* initialization */
116 saved_seed = *seed;
117 ratio = 0.0;
118 ratio_min = 1e308;
119 ratio_max = 0.0;
120
121 *num_tests = 0;
122 *num_bad_ratio = 0;
123 *min_ratio = 0.0;
124 *max_ratio = 0.0;
125
126 if (m == 0 || n == 0)
127 return;
128
129 FPU_FIX_START;
130
131 max_mn = (m > n) ? m : n;
132
133 inca = incb = incc = 1;
134 inca *= 2;
135 incb *= 2;
136 incc *= 2;
137
138 /* allocate memory for arrays */
139 c = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
140 if (2 * m * n > 0 && c == NULL) {
141 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
142 }
143 c_gen = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
144 if (2 * m * n > 0 && c_gen == NULL) {
145 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
146 }
147 a = (double *) blas_malloc(2 * max_mn * max_mn * sizeof(double) * 2);
148 if (2 * max_mn * max_mn > 0 && a == NULL) {
149 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
150 }
151 b = (float *) blas_malloc(2 * m * n * sizeof(float) * 2);
152 if (2 * m * n > 0 && b == NULL) {
153 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
154 }
155 a_vec = (double *) blas_malloc(max_mn * sizeof(double) * 2);
156 if (max_mn > 0 && a_vec == NULL) {
157 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
158 }
159 b_vec = (float *) blas_malloc(max_mn * sizeof(float) * 2);
160 if (max_mn > 0 && b_vec == NULL) {
161 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
162 }
163 head_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
164 tail_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
165 if (2 * m * n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
166 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
167 }
168 ratios = (double *) blas_malloc(2 * m * n * sizeof(double));
169 if (2 * m * n > 0 && ratios == NULL) {
170 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
171 }
172
173 test_count = 0;
174 bad_ratio_count = 0;
175
176 /* vary alpha */
177 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
178
179 alpha_flag = 0;
180 switch (alpha_val) {
181 case 0:
182 alpha[0] = alpha[1] = 0.0;
183 alpha_flag = 1;
184 break;
185 case 1:
186 alpha[0] = 1.0;
187 alpha[1] = 0.0;
188 alpha_flag = 1;
189 break;
190 }
191
192 /* vary beta */
193 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
194 beta_flag = 0;
195 switch (beta_val) {
196 case 0:
197 beta[0] = beta[1] = 0.0;
198 beta_flag = 1;
199 break;
200 case 1:
201 beta[0] = 1.0;
202 beta[1] = 0.0;
203 beta_flag = 1;
204 break;
205 }
206
207
208 eps_int = power(2, -BITS_D);
209 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
210 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
211 prec = blas_prec_double;
212
213 /* vary norm -- underflow, approx 1, overflow */
214 for (norm = NORM_START; norm <= NORM_END; norm++) {
215
216 /* number of tests */
217 for (test_no = 0; test_no < ntests; test_no++) {
218
219 /* vary storage format */
220 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
221
222 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
223
224 /* vary upper / lower variation */
225 for (uplo_val = UPLO_START; uplo_val <= UPLO_END; uplo_val++) {
226
227 uplo_type = (uplo_val == 0) ? blas_upper : blas_lower;
228
229 /* vary transpositions of B */
230 for (side_val = SIDE_START; side_val <= SIDE_END; side_val++) {
231
232 side_type = (side_val == 0) ?
233 blas_left_side : blas_right_side;
234
235 if (side_type == blas_left_side) {
236 m_i = m;
237 n_i = n;
238 } else {
239 m_i = n;
240 n_i = m;
241 }
242
243 /* vary lda = k, k+1, 2*k */
244 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
245
246 lda = (lda_val == 0) ? m_i :
247 (lda_val == 1) ? m_i + 1 : 2 * m_i;
248
249 /* vary ldb = n, n+1, 2*n */
250 for (ldb_val = LDB_START; ldb_val <= LDB_END; ldb_val++) {
251
252 if (order_type == blas_colmajor)
253 ldb = (ldb_val == 0) ? m :
254 (ldb_val == 1) ? m + 1 : 2 * m;
255 else
256 ldb = (ldb_val == 0) ? n :
257 (ldb_val == 1) ? n + 1 : 2 * n;
258
259 /* vary ldc = k, k+1, 2*k */
260 for (ldc_val = LDC_START; ldc_val <= LDC_END; ldc_val++) {
261
262 if (order_type == blas_colmajor)
263 ldc = (ldc_val == 0) ? m :
264 (ldc_val == 1) ? m + 1 : 2 * m;
265 else
266 ldc = (ldc_val == 0) ? n :
267 (ldc_val == 1) ? n + 1 : 2 * n;
268
269 /* vary randomize = 0, 1 */
270 for (randomize_val = RANDOMIZE_START;
271 randomize_val <= RANDOMIZE_END; randomize_val++) {
272
273 /* For the sake of speed, we throw out this case at random */
274 if (xrand(seed) >= test_prob)
275 continue;
276
277 saved_seed = *seed;
278
279 /* finally we are here to generate the test case */
280 BLAS_zhemm_z_c_testgen(norm, order_type,
281 uplo_type, side_type, m, n,
282 randomize_val, &alpha,
283 alpha_flag, &beta, beta_flag,
284 a, lda, b, ldb, c, ldc, seed,
285 head_r_true, tail_r_true);
286 test_count++;
287
288 /* copy generated C matrix since this will be
289 over written */
290 zge_copy_matrix(order_type, m, n, c_gen, ldc, c, ldc);
291
292 /* call hemm routines to be tested */
293 FPU_FIX_STOP;
294 BLAS_zhemm_z_c(order_type, side_type,
295 uplo_type, m, n, alpha, a, lda, b, ldb,
296 beta, c, ldc);
297 FPU_FIX_START;
298
299 /* now compute the ratio using test_BLAS_xdot */
300 /* copy a row from A, a column from B, run
301 dot test */
302
303 if ((order_type == blas_colmajor &&
304 side_type == blas_left_side) ||
305 (order_type == blas_rowmajor &&
306 side_type == blas_right_side)) {
307 incci = 1;
308 inccij = ldc;
309 } else {
310 incci = ldc;
311 inccij = 1;
312 }
313
314 incri = incci;
315 incrij = inccij;
316 incci *= 2;
317 inccij *= 2;
318
319 for (i = 0, ci = 0, ri = 0;
320 i < m_i; i++, ci += incci, ri += incri) {
321 zhe_copy_row(order_type, uplo_type, side_type,
322 m_i, a, lda, a_vec, i);
323 for (j = 0, cij = ci, rij = ri;
324 j < n_i; j++, cij += inccij, rij += incrij) {
325 /* copy i-th row of A and j-th col of B */
326 if (side_type == blas_left_side)
327 cge_copy_col(order_type, blas_no_trans,
328 m, n, b, ldb, b_vec, j);
329 else
330 cge_copy_row(order_type, blas_no_trans,
331 m, n, b, ldb, b_vec, j);
332 rin[0] = c_gen[cij];
333 rin[1] = c_gen[cij + 1];
334 rout[0] = c[cij];
335 rout[1] = c[cij + 1];
336 head_r_true_elem[0] = head_r_true[cij];
337 head_r_true_elem[1] = head_r_true[cij + 1];
338 tail_r_true_elem[0] = tail_r_true[cij];
339 tail_r_true_elem[1] = tail_r_true[cij + 1];
340
341 test_BLAS_zdot_z_c(m_i,
342 blas_no_conj,
343 alpha, beta, rin, rout,
344 head_r_true_elem,
345 tail_r_true_elem, a_vec, 1,
346 b_vec, 1, eps_int, un_int,
347 &ratios[rij]);
348
349 /* take the max ratio */
350 if (rij == 0) {
351 ratio = ratios[0];
352 /* The !<= below causes NaN error to be detected.
353 Note that (NaN > thresh) is always false. */
354 } else if (!(ratios[rij] <= ratio)) {
355 ratio = ratios[rij];
356 }
357
358 }
359 } /* end of dot-test loop */
360
361 /* Increase the number of bad ratio, if the ratio
362 is bigger than the threshold.
363 The !<= below causes NaN error to be detected.
364 Note that (NaN > thresh) is always false. */
365 if (!(ratio <= thresh)) {
366
367 if (debug == 3) {
368 printf("Seed = %d\n", saved_seed);
369 printf("m %d n %d\n", m, n);
370 printf("LDA %d LDB %d LDC %d\n", lda, ldb, ldc);
371
372 if (order_type == blas_rowmajor)
373 printf("row ");
374 else
375 printf("col ");
376
377 if (uplo_type == blas_upper)
378 printf("upper ");
379 else
380 printf("lower");
381
382 if (side_type == blas_left_side)
383 printf(" left\n");
384 else
385 printf(" right\n");
386
387 if (randomize_val == 1)
388 printf("Randomized\n");
389 else
390 printf("Not Randomized\n");
391
392 printf("NORM %d, ALPHA %d, BETA %d\n",
393 norm, alpha_val, beta_val);
394
395 /* print out info */
396 printf("alpha = ");
397 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);;
398 printf(" ");
399 printf("beta = ");
400 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
401 printf("\n");
402
403 printf("a\n");
404 zhe_print_matrix(a, m_i, lda, order_type,
405 uplo_type);
406 cge_print_matrix(b, m, n, ldb, order_type, "B");
407 zge_print_matrix(c_gen, m, n, ldc, order_type,
408 "C_gen");
409 zge_print_matrix(c, m, n, ldc, order_type, "C");
410 zge_print_matrix(head_r_true, m, n, ldc,
411 order_type, "head_r_true");
412
413 printf("ratio = %g\n", ratio);
414 }
415 bad_ratio_count++;
416 if (bad_ratio_count >= MAX_BAD_TESTS) {
417 printf("\ntoo many failures, exiting....");
418 printf("\nTesting and compilation");
419 printf(" are incomplete\n\n");
420 goto end;
421 }
422 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
423 printf("\nFlagrant ratio %e, exiting...", ratio);
424 printf("\nTesting and compilation");
425 printf(" are incomplete\n\n");
426 goto end;
427 }
428 }
429
430 if (ratio > ratio_max)
431 ratio_max = ratio;
432
433 if (ratio != 0.0 && ratio < ratio_min)
434 ratio_min = ratio;
435
436 } /* end of randomize loop */
437
438 } /* end of ldc loop */
439
440 } /* end of ldb loop */
441
442 } /* end of lda loop */
443
444 } /* end of side loop */
445
446 } /* end of uplo loop */
447
448 } /* end of order loop */
449
450 } /* end of nr test loop */
451
452 } /* end of norm loop */
453
454
455
456 } /* end of beta loop */
457
458 } /* end of alpha loop */
459
460 end:
461 FPU_FIX_STOP;
462
463 blas_free(c);
464 blas_free(a);
465 blas_free(b);
466 blas_free(c_gen);
467 blas_free(head_r_true);
468 blas_free(tail_r_true);
469 blas_free(ratios);
470 blas_free(a_vec);
471 blas_free(b_vec);
472
473 *max_ratio = ratio_max;
474 *min_ratio = ratio_min;
475 *num_tests = test_count;
476 *num_bad_ratio = bad_ratio_count;
477
478 }
do_test_zhemm_c_z(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)479 void do_test_zhemm_c_z(int m, int n,
480 int ntests, int *seed, double thresh, int debug,
481 float test_prob, double *min_ratio, double *max_ratio,
482 int *num_bad_ratio, int *num_tests)
483 {
484
485 /* Function name */
486 const char fname[] = "BLAS_zhemm_c_z";
487
488 int i, j;
489 int ci, cij;
490 int incci, inccij;
491 int test_count;
492 int bad_ratio_count;
493
494 int ri, rij;
495 int incri, incrij;
496 int inca, incb, incc;
497
498 double ratio;
499
500 double ratio_min, ratio_max;
501
502 double eps_int; /* internal machine epsilon */
503 double un_int; /* internal underflow threshold */
504
505 double rin[2];
506 double rout[2];
507 double head_r_true_elem[2], tail_r_true_elem[2];
508
509 enum blas_order_type order_type;
510 enum blas_side_type side_type;
511 enum blas_uplo_type uplo_type;
512 enum blas_prec_type prec;
513
514 int order_val, uplo_val, side_val;
515 int lda_val, ldb_val, ldc_val;
516 int alpha_val, beta_val;
517 int randomize_val;
518
519
520
521 int lda, ldb, ldc;
522 int alpha_flag, beta_flag;
523 int saved_seed;
524 int norm;
525 int test_no;
526 int max_mn;
527 int m_i, n_i;
528
529 double alpha[2];
530 double beta[2];
531 float *a;
532 double *b;
533 double *c;
534 float *a_vec;
535 double *b_vec;
536
537 /* generated test values for c */
538 double *c_gen;
539
540 double *ratios;
541
542 /* true result calculated by testgen, in double-double */
543 double *head_r_true, *tail_r_true;
544
545
546 FPU_FIX_DECL;
547
548 if (n < 0 || m < 0 || ntests < 0)
549 BLAS_error(fname, 0, 0, NULL);
550
551 /* initialization */
552 saved_seed = *seed;
553 ratio = 0.0;
554 ratio_min = 1e308;
555 ratio_max = 0.0;
556
557 *num_tests = 0;
558 *num_bad_ratio = 0;
559 *min_ratio = 0.0;
560 *max_ratio = 0.0;
561
562 if (m == 0 || n == 0)
563 return;
564
565 FPU_FIX_START;
566
567 max_mn = (m > n) ? m : n;
568
569 inca = incb = incc = 1;
570 inca *= 2;
571 incb *= 2;
572 incc *= 2;
573
574 /* allocate memory for arrays */
575 c = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
576 if (2 * m * n > 0 && c == NULL) {
577 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
578 }
579 c_gen = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
580 if (2 * m * n > 0 && c_gen == NULL) {
581 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
582 }
583 a = (float *) blas_malloc(2 * max_mn * max_mn * sizeof(float) * 2);
584 if (2 * max_mn * max_mn > 0 && a == NULL) {
585 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
586 }
587 b = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
588 if (2 * m * n > 0 && b == NULL) {
589 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
590 }
591 a_vec = (float *) blas_malloc(max_mn * sizeof(float) * 2);
592 if (max_mn > 0 && a_vec == NULL) {
593 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
594 }
595 b_vec = (double *) blas_malloc(max_mn * sizeof(double) * 2);
596 if (max_mn > 0 && b_vec == NULL) {
597 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
598 }
599 head_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
600 tail_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
601 if (2 * m * n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
602 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
603 }
604 ratios = (double *) blas_malloc(2 * m * n * sizeof(double));
605 if (2 * m * n > 0 && ratios == NULL) {
606 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
607 }
608
609 test_count = 0;
610 bad_ratio_count = 0;
611
612 /* vary alpha */
613 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
614
615 alpha_flag = 0;
616 switch (alpha_val) {
617 case 0:
618 alpha[0] = alpha[1] = 0.0;
619 alpha_flag = 1;
620 break;
621 case 1:
622 alpha[0] = 1.0;
623 alpha[1] = 0.0;
624 alpha_flag = 1;
625 break;
626 }
627
628 /* vary beta */
629 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
630 beta_flag = 0;
631 switch (beta_val) {
632 case 0:
633 beta[0] = beta[1] = 0.0;
634 beta_flag = 1;
635 break;
636 case 1:
637 beta[0] = 1.0;
638 beta[1] = 0.0;
639 beta_flag = 1;
640 break;
641 }
642
643
644 eps_int = power(2, -BITS_D);
645 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
646 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
647 prec = blas_prec_double;
648
649 /* vary norm -- underflow, approx 1, overflow */
650 for (norm = NORM_START; norm <= NORM_END; norm++) {
651
652 /* number of tests */
653 for (test_no = 0; test_no < ntests; test_no++) {
654
655 /* vary storage format */
656 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
657
658 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
659
660 /* vary upper / lower variation */
661 for (uplo_val = UPLO_START; uplo_val <= UPLO_END; uplo_val++) {
662
663 uplo_type = (uplo_val == 0) ? blas_upper : blas_lower;
664
665 /* vary transpositions of B */
666 for (side_val = SIDE_START; side_val <= SIDE_END; side_val++) {
667
668 side_type = (side_val == 0) ?
669 blas_left_side : blas_right_side;
670
671 if (side_type == blas_left_side) {
672 m_i = m;
673 n_i = n;
674 } else {
675 m_i = n;
676 n_i = m;
677 }
678
679 /* vary lda = k, k+1, 2*k */
680 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
681
682 lda = (lda_val == 0) ? m_i :
683 (lda_val == 1) ? m_i + 1 : 2 * m_i;
684
685 /* vary ldb = n, n+1, 2*n */
686 for (ldb_val = LDB_START; ldb_val <= LDB_END; ldb_val++) {
687
688 if (order_type == blas_colmajor)
689 ldb = (ldb_val == 0) ? m :
690 (ldb_val == 1) ? m + 1 : 2 * m;
691 else
692 ldb = (ldb_val == 0) ? n :
693 (ldb_val == 1) ? n + 1 : 2 * n;
694
695 /* vary ldc = k, k+1, 2*k */
696 for (ldc_val = LDC_START; ldc_val <= LDC_END; ldc_val++) {
697
698 if (order_type == blas_colmajor)
699 ldc = (ldc_val == 0) ? m :
700 (ldc_val == 1) ? m + 1 : 2 * m;
701 else
702 ldc = (ldc_val == 0) ? n :
703 (ldc_val == 1) ? n + 1 : 2 * n;
704
705 /* vary randomize = 0, 1 */
706 for (randomize_val = RANDOMIZE_START;
707 randomize_val <= RANDOMIZE_END; randomize_val++) {
708
709 /* For the sake of speed, we throw out this case at random */
710 if (xrand(seed) >= test_prob)
711 continue;
712
713 saved_seed = *seed;
714
715 /* finally we are here to generate the test case */
716 BLAS_zhemm_c_z_testgen(norm, order_type,
717 uplo_type, side_type, m, n,
718 randomize_val, &alpha,
719 alpha_flag, &beta, beta_flag,
720 a, lda, b, ldb, c, ldc, seed,
721 head_r_true, tail_r_true);
722 test_count++;
723
724 /* copy generated C matrix since this will be
725 over written */
726 zge_copy_matrix(order_type, m, n, c_gen, ldc, c, ldc);
727
728 /* call hemm routines to be tested */
729 FPU_FIX_STOP;
730 BLAS_zhemm_c_z(order_type, side_type,
731 uplo_type, m, n, alpha, a, lda, b, ldb,
732 beta, c, ldc);
733 FPU_FIX_START;
734
735 /* now compute the ratio using test_BLAS_xdot */
736 /* copy a row from A, a column from B, run
737 dot test */
738
739 if ((order_type == blas_colmajor &&
740 side_type == blas_left_side) ||
741 (order_type == blas_rowmajor &&
742 side_type == blas_right_side)) {
743 incci = 1;
744 inccij = ldc;
745 } else {
746 incci = ldc;
747 inccij = 1;
748 }
749
750 incri = incci;
751 incrij = inccij;
752 incci *= 2;
753 inccij *= 2;
754
755 for (i = 0, ci = 0, ri = 0;
756 i < m_i; i++, ci += incci, ri += incri) {
757 che_copy_row(order_type, uplo_type, side_type,
758 m_i, a, lda, a_vec, i);
759 for (j = 0, cij = ci, rij = ri;
760 j < n_i; j++, cij += inccij, rij += incrij) {
761 /* copy i-th row of A and j-th col of B */
762 if (side_type == blas_left_side)
763 zge_copy_col(order_type, blas_no_trans,
764 m, n, b, ldb, b_vec, j);
765 else
766 zge_copy_row(order_type, blas_no_trans,
767 m, n, b, ldb, b_vec, j);
768 rin[0] = c_gen[cij];
769 rin[1] = c_gen[cij + 1];
770 rout[0] = c[cij];
771 rout[1] = c[cij + 1];
772 head_r_true_elem[0] = head_r_true[cij];
773 head_r_true_elem[1] = head_r_true[cij + 1];
774 tail_r_true_elem[0] = tail_r_true[cij];
775 tail_r_true_elem[1] = tail_r_true[cij + 1];
776
777 test_BLAS_zdot_c_z(m_i,
778 blas_no_conj,
779 alpha, beta, rin, rout,
780 head_r_true_elem,
781 tail_r_true_elem, a_vec, 1,
782 b_vec, 1, eps_int, un_int,
783 &ratios[rij]);
784
785 /* take the max ratio */
786 if (rij == 0) {
787 ratio = ratios[0];
788 /* The !<= below causes NaN error to be detected.
789 Note that (NaN > thresh) is always false. */
790 } else if (!(ratios[rij] <= ratio)) {
791 ratio = ratios[rij];
792 }
793
794 }
795 } /* end of dot-test loop */
796
797 /* Increase the number of bad ratio, if the ratio
798 is bigger than the threshold.
799 The !<= below causes NaN error to be detected.
800 Note that (NaN > thresh) is always false. */
801 if (!(ratio <= thresh)) {
802
803 if (debug == 3) {
804 printf("Seed = %d\n", saved_seed);
805 printf("m %d n %d\n", m, n);
806 printf("LDA %d LDB %d LDC %d\n", lda, ldb, ldc);
807
808 if (order_type == blas_rowmajor)
809 printf("row ");
810 else
811 printf("col ");
812
813 if (uplo_type == blas_upper)
814 printf("upper ");
815 else
816 printf("lower");
817
818 if (side_type == blas_left_side)
819 printf(" left\n");
820 else
821 printf(" right\n");
822
823 if (randomize_val == 1)
824 printf("Randomized\n");
825 else
826 printf("Not Randomized\n");
827
828 printf("NORM %d, ALPHA %d, BETA %d\n",
829 norm, alpha_val, beta_val);
830
831 /* print out info */
832 printf("alpha = ");
833 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);;
834 printf(" ");
835 printf("beta = ");
836 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
837 printf("\n");
838
839 printf("a\n");
840 che_print_matrix(a, m_i, lda, order_type,
841 uplo_type);
842 zge_print_matrix(b, m, n, ldb, order_type, "B");
843 zge_print_matrix(c_gen, m, n, ldc, order_type,
844 "C_gen");
845 zge_print_matrix(c, m, n, ldc, order_type, "C");
846 zge_print_matrix(head_r_true, m, n, ldc,
847 order_type, "head_r_true");
848
849 printf("ratio = %g\n", ratio);
850 }
851 bad_ratio_count++;
852 if (bad_ratio_count >= MAX_BAD_TESTS) {
853 printf("\ntoo many failures, exiting....");
854 printf("\nTesting and compilation");
855 printf(" are incomplete\n\n");
856 goto end;
857 }
858 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
859 printf("\nFlagrant ratio %e, exiting...", ratio);
860 printf("\nTesting and compilation");
861 printf(" are incomplete\n\n");
862 goto end;
863 }
864 }
865
866 if (ratio > ratio_max)
867 ratio_max = ratio;
868
869 if (ratio != 0.0 && ratio < ratio_min)
870 ratio_min = ratio;
871
872 } /* end of randomize loop */
873
874 } /* end of ldc loop */
875
876 } /* end of ldb loop */
877
878 } /* end of lda loop */
879
880 } /* end of side loop */
881
882 } /* end of uplo loop */
883
884 } /* end of order loop */
885
886 } /* end of nr test loop */
887
888 } /* end of norm loop */
889
890
891
892 } /* end of beta loop */
893
894 } /* end of alpha loop */
895
896 end:
897 FPU_FIX_STOP;
898
899 blas_free(c);
900 blas_free(a);
901 blas_free(b);
902 blas_free(c_gen);
903 blas_free(head_r_true);
904 blas_free(tail_r_true);
905 blas_free(ratios);
906 blas_free(a_vec);
907 blas_free(b_vec);
908
909 *max_ratio = ratio_max;
910 *min_ratio = ratio_min;
911 *num_tests = test_count;
912 *num_bad_ratio = bad_ratio_count;
913
914 }
do_test_zhemm_c_c(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)915 void do_test_zhemm_c_c(int m, int n,
916 int ntests, int *seed, double thresh, int debug,
917 float test_prob, double *min_ratio, double *max_ratio,
918 int *num_bad_ratio, int *num_tests)
919 {
920
921 /* Function name */
922 const char fname[] = "BLAS_zhemm_c_c";
923
924 int i, j;
925 int ci, cij;
926 int incci, inccij;
927 int test_count;
928 int bad_ratio_count;
929
930 int ri, rij;
931 int incri, incrij;
932 int inca, incb, incc;
933
934 double ratio;
935
936 double ratio_min, ratio_max;
937
938 double eps_int; /* internal machine epsilon */
939 double un_int; /* internal underflow threshold */
940
941 double rin[2];
942 double rout[2];
943 double head_r_true_elem[2], tail_r_true_elem[2];
944
945 enum blas_order_type order_type;
946 enum blas_side_type side_type;
947 enum blas_uplo_type uplo_type;
948 enum blas_prec_type prec;
949
950 int order_val, uplo_val, side_val;
951 int lda_val, ldb_val, ldc_val;
952 int alpha_val, beta_val;
953 int randomize_val;
954
955
956
957 int lda, ldb, ldc;
958 int alpha_flag, beta_flag;
959 int saved_seed;
960 int norm;
961 int test_no;
962 int max_mn;
963 int m_i, n_i;
964
965 double alpha[2];
966 double beta[2];
967 float *a;
968 float *b;
969 double *c;
970 float *a_vec;
971 float *b_vec;
972
973 /* generated test values for c */
974 double *c_gen;
975
976 double *ratios;
977
978 /* true result calculated by testgen, in double-double */
979 double *head_r_true, *tail_r_true;
980
981
982 FPU_FIX_DECL;
983
984 if (n < 0 || m < 0 || ntests < 0)
985 BLAS_error(fname, 0, 0, NULL);
986
987 /* initialization */
988 saved_seed = *seed;
989 ratio = 0.0;
990 ratio_min = 1e308;
991 ratio_max = 0.0;
992
993 *num_tests = 0;
994 *num_bad_ratio = 0;
995 *min_ratio = 0.0;
996 *max_ratio = 0.0;
997
998 if (m == 0 || n == 0)
999 return;
1000
1001 FPU_FIX_START;
1002
1003 max_mn = (m > n) ? m : n;
1004
1005 inca = incb = incc = 1;
1006 inca *= 2;
1007 incb *= 2;
1008 incc *= 2;
1009
1010 /* allocate memory for arrays */
1011 c = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
1012 if (2 * m * n > 0 && c == NULL) {
1013 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1014 }
1015 c_gen = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
1016 if (2 * m * n > 0 && c_gen == NULL) {
1017 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1018 }
1019 a = (float *) blas_malloc(2 * max_mn * max_mn * sizeof(float) * 2);
1020 if (2 * max_mn * max_mn > 0 && a == NULL) {
1021 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1022 }
1023 b = (float *) blas_malloc(2 * m * n * sizeof(float) * 2);
1024 if (2 * m * n > 0 && b == NULL) {
1025 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1026 }
1027 a_vec = (float *) blas_malloc(max_mn * sizeof(float) * 2);
1028 if (max_mn > 0 && a_vec == NULL) {
1029 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1030 }
1031 b_vec = (float *) blas_malloc(max_mn * sizeof(float) * 2);
1032 if (max_mn > 0 && b_vec == NULL) {
1033 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1034 }
1035 head_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
1036 tail_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
1037 if (2 * m * n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
1038 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1039 }
1040 ratios = (double *) blas_malloc(2 * m * n * sizeof(double));
1041 if (2 * m * n > 0 && ratios == NULL) {
1042 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1043 }
1044
1045 test_count = 0;
1046 bad_ratio_count = 0;
1047
1048 /* vary alpha */
1049 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
1050
1051 alpha_flag = 0;
1052 switch (alpha_val) {
1053 case 0:
1054 alpha[0] = alpha[1] = 0.0;
1055 alpha_flag = 1;
1056 break;
1057 case 1:
1058 alpha[0] = 1.0;
1059 alpha[1] = 0.0;
1060 alpha_flag = 1;
1061 break;
1062 }
1063
1064 /* vary beta */
1065 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
1066 beta_flag = 0;
1067 switch (beta_val) {
1068 case 0:
1069 beta[0] = beta[1] = 0.0;
1070 beta_flag = 1;
1071 break;
1072 case 1:
1073 beta[0] = 1.0;
1074 beta[1] = 0.0;
1075 beta_flag = 1;
1076 break;
1077 }
1078
1079
1080 eps_int = power(2, -BITS_D);
1081 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1082 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1083 prec = blas_prec_double;
1084
1085 /* vary norm -- underflow, approx 1, overflow */
1086 for (norm = NORM_START; norm <= NORM_END; norm++) {
1087
1088 /* number of tests */
1089 for (test_no = 0; test_no < ntests; test_no++) {
1090
1091 /* vary storage format */
1092 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
1093
1094 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
1095
1096 /* vary upper / lower variation */
1097 for (uplo_val = UPLO_START; uplo_val <= UPLO_END; uplo_val++) {
1098
1099 uplo_type = (uplo_val == 0) ? blas_upper : blas_lower;
1100
1101 /* vary transpositions of B */
1102 for (side_val = SIDE_START; side_val <= SIDE_END; side_val++) {
1103
1104 side_type = (side_val == 0) ?
1105 blas_left_side : blas_right_side;
1106
1107 if (side_type == blas_left_side) {
1108 m_i = m;
1109 n_i = n;
1110 } else {
1111 m_i = n;
1112 n_i = m;
1113 }
1114
1115 /* vary lda = k, k+1, 2*k */
1116 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
1117
1118 lda = (lda_val == 0) ? m_i :
1119 (lda_val == 1) ? m_i + 1 : 2 * m_i;
1120
1121 /* vary ldb = n, n+1, 2*n */
1122 for (ldb_val = LDB_START; ldb_val <= LDB_END; ldb_val++) {
1123
1124 if (order_type == blas_colmajor)
1125 ldb = (ldb_val == 0) ? m :
1126 (ldb_val == 1) ? m + 1 : 2 * m;
1127 else
1128 ldb = (ldb_val == 0) ? n :
1129 (ldb_val == 1) ? n + 1 : 2 * n;
1130
1131 /* vary ldc = k, k+1, 2*k */
1132 for (ldc_val = LDC_START; ldc_val <= LDC_END; ldc_val++) {
1133
1134 if (order_type == blas_colmajor)
1135 ldc = (ldc_val == 0) ? m :
1136 (ldc_val == 1) ? m + 1 : 2 * m;
1137 else
1138 ldc = (ldc_val == 0) ? n :
1139 (ldc_val == 1) ? n + 1 : 2 * n;
1140
1141 /* vary randomize = 0, 1 */
1142 for (randomize_val = RANDOMIZE_START;
1143 randomize_val <= RANDOMIZE_END; randomize_val++) {
1144
1145 /* For the sake of speed, we throw out this case at random */
1146 if (xrand(seed) >= test_prob)
1147 continue;
1148
1149 saved_seed = *seed;
1150
1151 /* finally we are here to generate the test case */
1152 BLAS_zhemm_c_c_testgen(norm, order_type,
1153 uplo_type, side_type, m, n,
1154 randomize_val, &alpha,
1155 alpha_flag, &beta, beta_flag,
1156 a, lda, b, ldb, c, ldc, seed,
1157 head_r_true, tail_r_true);
1158 test_count++;
1159
1160 /* copy generated C matrix since this will be
1161 over written */
1162 zge_copy_matrix(order_type, m, n, c_gen, ldc, c, ldc);
1163
1164 /* call hemm routines to be tested */
1165 FPU_FIX_STOP;
1166 BLAS_zhemm_c_c(order_type, side_type,
1167 uplo_type, m, n, alpha, a, lda, b, ldb,
1168 beta, c, ldc);
1169 FPU_FIX_START;
1170
1171 /* now compute the ratio using test_BLAS_xdot */
1172 /* copy a row from A, a column from B, run
1173 dot test */
1174
1175 if ((order_type == blas_colmajor &&
1176 side_type == blas_left_side) ||
1177 (order_type == blas_rowmajor &&
1178 side_type == blas_right_side)) {
1179 incci = 1;
1180 inccij = ldc;
1181 } else {
1182 incci = ldc;
1183 inccij = 1;
1184 }
1185
1186 incri = incci;
1187 incrij = inccij;
1188 incci *= 2;
1189 inccij *= 2;
1190
1191 for (i = 0, ci = 0, ri = 0;
1192 i < m_i; i++, ci += incci, ri += incri) {
1193 che_copy_row(order_type, uplo_type, side_type,
1194 m_i, a, lda, a_vec, i);
1195 for (j = 0, cij = ci, rij = ri;
1196 j < n_i; j++, cij += inccij, rij += incrij) {
1197 /* copy i-th row of A and j-th col of B */
1198 if (side_type == blas_left_side)
1199 cge_copy_col(order_type, blas_no_trans,
1200 m, n, b, ldb, b_vec, j);
1201 else
1202 cge_copy_row(order_type, blas_no_trans,
1203 m, n, b, ldb, b_vec, j);
1204 rin[0] = c_gen[cij];
1205 rin[1] = c_gen[cij + 1];
1206 rout[0] = c[cij];
1207 rout[1] = c[cij + 1];
1208 head_r_true_elem[0] = head_r_true[cij];
1209 head_r_true_elem[1] = head_r_true[cij + 1];
1210 tail_r_true_elem[0] = tail_r_true[cij];
1211 tail_r_true_elem[1] = tail_r_true[cij + 1];
1212
1213 test_BLAS_zdot_c_c(m_i,
1214 blas_no_conj,
1215 alpha, beta, rin, rout,
1216 head_r_true_elem,
1217 tail_r_true_elem, a_vec, 1,
1218 b_vec, 1, eps_int, un_int,
1219 &ratios[rij]);
1220
1221 /* take the max ratio */
1222 if (rij == 0) {
1223 ratio = ratios[0];
1224 /* The !<= below causes NaN error to be detected.
1225 Note that (NaN > thresh) is always false. */
1226 } else if (!(ratios[rij] <= ratio)) {
1227 ratio = ratios[rij];
1228 }
1229
1230 }
1231 } /* end of dot-test loop */
1232
1233 /* Increase the number of bad ratio, if the ratio
1234 is bigger than the threshold.
1235 The !<= below causes NaN error to be detected.
1236 Note that (NaN > thresh) is always false. */
1237 if (!(ratio <= thresh)) {
1238
1239 if (debug == 3) {
1240 printf("Seed = %d\n", saved_seed);
1241 printf("m %d n %d\n", m, n);
1242 printf("LDA %d LDB %d LDC %d\n", lda, ldb, ldc);
1243
1244 if (order_type == blas_rowmajor)
1245 printf("row ");
1246 else
1247 printf("col ");
1248
1249 if (uplo_type == blas_upper)
1250 printf("upper ");
1251 else
1252 printf("lower");
1253
1254 if (side_type == blas_left_side)
1255 printf(" left\n");
1256 else
1257 printf(" right\n");
1258
1259 if (randomize_val == 1)
1260 printf("Randomized\n");
1261 else
1262 printf("Not Randomized\n");
1263
1264 printf("NORM %d, ALPHA %d, BETA %d\n",
1265 norm, alpha_val, beta_val);
1266
1267 /* print out info */
1268 printf("alpha = ");
1269 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);;
1270 printf(" ");
1271 printf("beta = ");
1272 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
1273 printf("\n");
1274
1275 printf("a\n");
1276 che_print_matrix(a, m_i, lda, order_type,
1277 uplo_type);
1278 cge_print_matrix(b, m, n, ldb, order_type, "B");
1279 zge_print_matrix(c_gen, m, n, ldc, order_type,
1280 "C_gen");
1281 zge_print_matrix(c, m, n, ldc, order_type, "C");
1282 zge_print_matrix(head_r_true, m, n, ldc,
1283 order_type, "head_r_true");
1284
1285 printf("ratio = %g\n", ratio);
1286 }
1287 bad_ratio_count++;
1288 if (bad_ratio_count >= MAX_BAD_TESTS) {
1289 printf("\ntoo many failures, exiting....");
1290 printf("\nTesting and compilation");
1291 printf(" are incomplete\n\n");
1292 goto end;
1293 }
1294 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
1295 printf("\nFlagrant ratio %e, exiting...", ratio);
1296 printf("\nTesting and compilation");
1297 printf(" are incomplete\n\n");
1298 goto end;
1299 }
1300 }
1301
1302 if (ratio > ratio_max)
1303 ratio_max = ratio;
1304
1305 if (ratio != 0.0 && ratio < ratio_min)
1306 ratio_min = ratio;
1307
1308 } /* end of randomize loop */
1309
1310 } /* end of ldc loop */
1311
1312 } /* end of ldb loop */
1313
1314 } /* end of lda loop */
1315
1316 } /* end of side loop */
1317
1318 } /* end of uplo loop */
1319
1320 } /* end of order loop */
1321
1322 } /* end of nr test loop */
1323
1324 } /* end of norm loop */
1325
1326
1327
1328 } /* end of beta loop */
1329
1330 } /* end of alpha loop */
1331
1332 end:
1333 FPU_FIX_STOP;
1334
1335 blas_free(c);
1336 blas_free(a);
1337 blas_free(b);
1338 blas_free(c_gen);
1339 blas_free(head_r_true);
1340 blas_free(tail_r_true);
1341 blas_free(ratios);
1342 blas_free(a_vec);
1343 blas_free(b_vec);
1344
1345 *max_ratio = ratio_max;
1346 *min_ratio = ratio_min;
1347 *num_tests = test_count;
1348 *num_bad_ratio = bad_ratio_count;
1349
1350 }
do_test_chemm_c_s(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)1351 void do_test_chemm_c_s(int m, int n,
1352 int ntests, int *seed, double thresh, int debug,
1353 float test_prob, double *min_ratio, double *max_ratio,
1354 int *num_bad_ratio, int *num_tests)
1355 {
1356
1357 /* Function name */
1358 const char fname[] = "BLAS_chemm_c_s";
1359
1360 int i, j;
1361 int ci, cij;
1362 int incci, inccij;
1363 int test_count;
1364 int bad_ratio_count;
1365
1366 int ri, rij;
1367 int incri, incrij;
1368 int inca, incb, incc;
1369
1370 double ratio;
1371
1372 double ratio_min, ratio_max;
1373
1374 double eps_int; /* internal machine epsilon */
1375 double un_int; /* internal underflow threshold */
1376
1377 float rin[2];
1378 float rout[2];
1379 double head_r_true_elem[2], tail_r_true_elem[2];
1380
1381 enum blas_order_type order_type;
1382 enum blas_side_type side_type;
1383 enum blas_uplo_type uplo_type;
1384 enum blas_prec_type prec;
1385
1386 int order_val, uplo_val, side_val;
1387 int lda_val, ldb_val, ldc_val;
1388 int alpha_val, beta_val;
1389 int randomize_val;
1390
1391
1392
1393 int lda, ldb, ldc;
1394 int alpha_flag, beta_flag;
1395 int saved_seed;
1396 int norm;
1397 int test_no;
1398 int max_mn;
1399 int m_i, n_i;
1400
1401 float alpha[2];
1402 float beta[2];
1403 float *a;
1404 float *b;
1405 float *c;
1406 float *a_vec;
1407 float *b_vec;
1408
1409 /* generated test values for c */
1410 float *c_gen;
1411
1412 double *ratios;
1413
1414 /* true result calculated by testgen, in double-double */
1415 double *head_r_true, *tail_r_true;
1416
1417
1418 FPU_FIX_DECL;
1419
1420 if (n < 0 || m < 0 || ntests < 0)
1421 BLAS_error(fname, 0, 0, NULL);
1422
1423 /* initialization */
1424 saved_seed = *seed;
1425 ratio = 0.0;
1426 ratio_min = 1e308;
1427 ratio_max = 0.0;
1428
1429 *num_tests = 0;
1430 *num_bad_ratio = 0;
1431 *min_ratio = 0.0;
1432 *max_ratio = 0.0;
1433
1434 if (m == 0 || n == 0)
1435 return;
1436
1437 FPU_FIX_START;
1438
1439 max_mn = (m > n) ? m : n;
1440
1441 inca = incb = incc = 1;
1442 inca *= 2;
1443
1444 incc *= 2;
1445
1446 /* allocate memory for arrays */
1447 c = (float *) blas_malloc(2 * m * n * sizeof(float) * 2);
1448 if (2 * m * n > 0 && c == NULL) {
1449 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1450 }
1451 c_gen = (float *) blas_malloc(2 * m * n * sizeof(float) * 2);
1452 if (2 * m * n > 0 && c_gen == NULL) {
1453 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1454 }
1455 a = (float *) blas_malloc(2 * max_mn * max_mn * sizeof(float) * 2);
1456 if (2 * max_mn * max_mn > 0 && a == NULL) {
1457 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1458 }
1459 b = (float *) blas_malloc(2 * m * n * sizeof(float));
1460 if (2 * m * n > 0 && b == NULL) {
1461 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1462 }
1463 a_vec = (float *) blas_malloc(max_mn * sizeof(float) * 2);
1464 if (max_mn > 0 && a_vec == NULL) {
1465 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1466 }
1467 b_vec = (float *) blas_malloc(max_mn * sizeof(float));
1468 if (max_mn > 0 && b_vec == NULL) {
1469 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1470 }
1471 head_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
1472 tail_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
1473 if (2 * m * n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
1474 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1475 }
1476 ratios = (double *) blas_malloc(2 * m * n * sizeof(double));
1477 if (2 * m * n > 0 && ratios == NULL) {
1478 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1479 }
1480
1481 test_count = 0;
1482 bad_ratio_count = 0;
1483
1484 /* vary alpha */
1485 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
1486
1487 alpha_flag = 0;
1488 switch (alpha_val) {
1489 case 0:
1490 alpha[0] = alpha[1] = 0.0;
1491 alpha_flag = 1;
1492 break;
1493 case 1:
1494 alpha[0] = 1.0;
1495 alpha[1] = 0.0;
1496 alpha_flag = 1;
1497 break;
1498 }
1499
1500 /* vary beta */
1501 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
1502 beta_flag = 0;
1503 switch (beta_val) {
1504 case 0:
1505 beta[0] = beta[1] = 0.0;
1506 beta_flag = 1;
1507 break;
1508 case 1:
1509 beta[0] = 1.0;
1510 beta[1] = 0.0;
1511 beta_flag = 1;
1512 break;
1513 }
1514
1515
1516 eps_int = power(2, -BITS_S);
1517 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
1518 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
1519 prec = blas_prec_single;
1520
1521 /* vary norm -- underflow, approx 1, overflow */
1522 for (norm = NORM_START; norm <= NORM_END; norm++) {
1523
1524 /* number of tests */
1525 for (test_no = 0; test_no < ntests; test_no++) {
1526
1527 /* vary storage format */
1528 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
1529
1530 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
1531
1532 /* vary upper / lower variation */
1533 for (uplo_val = UPLO_START; uplo_val <= UPLO_END; uplo_val++) {
1534
1535 uplo_type = (uplo_val == 0) ? blas_upper : blas_lower;
1536
1537 /* vary transpositions of B */
1538 for (side_val = SIDE_START; side_val <= SIDE_END; side_val++) {
1539
1540 side_type = (side_val == 0) ?
1541 blas_left_side : blas_right_side;
1542
1543 if (side_type == blas_left_side) {
1544 m_i = m;
1545 n_i = n;
1546 } else {
1547 m_i = n;
1548 n_i = m;
1549 }
1550
1551 /* vary lda = k, k+1, 2*k */
1552 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
1553
1554 lda = (lda_val == 0) ? m_i :
1555 (lda_val == 1) ? m_i + 1 : 2 * m_i;
1556
1557 /* vary ldb = n, n+1, 2*n */
1558 for (ldb_val = LDB_START; ldb_val <= LDB_END; ldb_val++) {
1559
1560 if (order_type == blas_colmajor)
1561 ldb = (ldb_val == 0) ? m :
1562 (ldb_val == 1) ? m + 1 : 2 * m;
1563 else
1564 ldb = (ldb_val == 0) ? n :
1565 (ldb_val == 1) ? n + 1 : 2 * n;
1566
1567 /* vary ldc = k, k+1, 2*k */
1568 for (ldc_val = LDC_START; ldc_val <= LDC_END; ldc_val++) {
1569
1570 if (order_type == blas_colmajor)
1571 ldc = (ldc_val == 0) ? m :
1572 (ldc_val == 1) ? m + 1 : 2 * m;
1573 else
1574 ldc = (ldc_val == 0) ? n :
1575 (ldc_val == 1) ? n + 1 : 2 * n;
1576
1577 /* vary randomize = 0, 1 */
1578 for (randomize_val = RANDOMIZE_START;
1579 randomize_val <= RANDOMIZE_END; randomize_val++) {
1580
1581 /* For the sake of speed, we throw out this case at random */
1582 if (xrand(seed) >= test_prob)
1583 continue;
1584
1585 saved_seed = *seed;
1586
1587 /* finally we are here to generate the test case */
1588 BLAS_chemm_c_s_testgen(norm, order_type,
1589 uplo_type, side_type, m, n,
1590 randomize_val, &alpha,
1591 alpha_flag, &beta, beta_flag,
1592 a, lda, b, ldb, c, ldc, seed,
1593 head_r_true, tail_r_true);
1594 test_count++;
1595
1596 /* copy generated C matrix since this will be
1597 over written */
1598 cge_copy_matrix(order_type, m, n, c_gen, ldc, c, ldc);
1599
1600 /* call hemm routines to be tested */
1601 FPU_FIX_STOP;
1602 BLAS_chemm_c_s(order_type, side_type,
1603 uplo_type, m, n, alpha, a, lda, b, ldb,
1604 beta, c, ldc);
1605 FPU_FIX_START;
1606
1607 /* now compute the ratio using test_BLAS_xdot */
1608 /* copy a row from A, a column from B, run
1609 dot test */
1610
1611 if ((order_type == blas_colmajor &&
1612 side_type == blas_left_side) ||
1613 (order_type == blas_rowmajor &&
1614 side_type == blas_right_side)) {
1615 incci = 1;
1616 inccij = ldc;
1617 } else {
1618 incci = ldc;
1619 inccij = 1;
1620 }
1621
1622 incri = incci;
1623 incrij = inccij;
1624 incci *= 2;
1625 inccij *= 2;
1626
1627 for (i = 0, ci = 0, ri = 0;
1628 i < m_i; i++, ci += incci, ri += incri) {
1629 che_copy_row(order_type, uplo_type, side_type,
1630 m_i, a, lda, a_vec, i);
1631 for (j = 0, cij = ci, rij = ri;
1632 j < n_i; j++, cij += inccij, rij += incrij) {
1633 /* copy i-th row of A and j-th col of B */
1634 if (side_type == blas_left_side)
1635 sge_copy_col(order_type, blas_no_trans,
1636 m, n, b, ldb, b_vec, j);
1637 else
1638 sge_copy_row(order_type, blas_no_trans,
1639 m, n, b, ldb, b_vec, j);
1640 rin[0] = c_gen[cij];
1641 rin[1] = c_gen[cij + 1];
1642 rout[0] = c[cij];
1643 rout[1] = c[cij + 1];
1644 head_r_true_elem[0] = head_r_true[cij];
1645 head_r_true_elem[1] = head_r_true[cij + 1];
1646 tail_r_true_elem[0] = tail_r_true[cij];
1647 tail_r_true_elem[1] = tail_r_true[cij + 1];
1648
1649 test_BLAS_cdot_c_s(m_i,
1650 blas_no_conj,
1651 alpha, beta, rin, rout,
1652 head_r_true_elem,
1653 tail_r_true_elem, a_vec, 1,
1654 b_vec, 1, eps_int, un_int,
1655 &ratios[rij]);
1656
1657 /* take the max ratio */
1658 if (rij == 0) {
1659 ratio = ratios[0];
1660 /* The !<= below causes NaN error to be detected.
1661 Note that (NaN > thresh) is always false. */
1662 } else if (!(ratios[rij] <= ratio)) {
1663 ratio = ratios[rij];
1664 }
1665
1666 }
1667 } /* end of dot-test loop */
1668
1669 /* Increase the number of bad ratio, if the ratio
1670 is bigger than the threshold.
1671 The !<= below causes NaN error to be detected.
1672 Note that (NaN > thresh) is always false. */
1673 if (!(ratio <= thresh)) {
1674
1675 if (debug == 3) {
1676 printf("Seed = %d\n", saved_seed);
1677 printf("m %d n %d\n", m, n);
1678 printf("LDA %d LDB %d LDC %d\n", lda, ldb, ldc);
1679
1680 if (order_type == blas_rowmajor)
1681 printf("row ");
1682 else
1683 printf("col ");
1684
1685 if (uplo_type == blas_upper)
1686 printf("upper ");
1687 else
1688 printf("lower");
1689
1690 if (side_type == blas_left_side)
1691 printf(" left\n");
1692 else
1693 printf(" right\n");
1694
1695 if (randomize_val == 1)
1696 printf("Randomized\n");
1697 else
1698 printf("Not Randomized\n");
1699
1700 printf("NORM %d, ALPHA %d, BETA %d\n",
1701 norm, alpha_val, beta_val);
1702
1703 /* print out info */
1704 printf("alpha = ");
1705 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);;
1706 printf(" ");
1707 printf("beta = ");
1708 printf("(%16.8e, %16.8e)", beta[0], beta[1]);;
1709 printf("\n");
1710
1711 printf("a\n");
1712 che_print_matrix(a, m_i, lda, order_type,
1713 uplo_type);
1714 sge_print_matrix(b, m, n, ldb, order_type, "B");
1715 cge_print_matrix(c_gen, m, n, ldc, order_type,
1716 "C_gen");
1717 cge_print_matrix(c, m, n, ldc, order_type, "C");
1718 zge_print_matrix(head_r_true, m, n, ldc,
1719 order_type, "head_r_true");
1720
1721 printf("ratio = %g\n", ratio);
1722 }
1723 bad_ratio_count++;
1724 if (bad_ratio_count >= MAX_BAD_TESTS) {
1725 printf("\ntoo many failures, exiting....");
1726 printf("\nTesting and compilation");
1727 printf(" are incomplete\n\n");
1728 goto end;
1729 }
1730 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
1731 printf("\nFlagrant ratio %e, exiting...", ratio);
1732 printf("\nTesting and compilation");
1733 printf(" are incomplete\n\n");
1734 goto end;
1735 }
1736 }
1737
1738 if (ratio > ratio_max)
1739 ratio_max = ratio;
1740
1741 if (ratio != 0.0 && ratio < ratio_min)
1742 ratio_min = ratio;
1743
1744 } /* end of randomize loop */
1745
1746 } /* end of ldc loop */
1747
1748 } /* end of ldb loop */
1749
1750 } /* end of lda loop */
1751
1752 } /* end of side loop */
1753
1754 } /* end of uplo loop */
1755
1756 } /* end of order loop */
1757
1758 } /* end of nr test loop */
1759
1760 } /* end of norm loop */
1761
1762
1763
1764 } /* end of beta loop */
1765
1766 } /* end of alpha loop */
1767
1768 end:
1769 FPU_FIX_STOP;
1770
1771 blas_free(c);
1772 blas_free(a);
1773 blas_free(b);
1774 blas_free(c_gen);
1775 blas_free(head_r_true);
1776 blas_free(tail_r_true);
1777 blas_free(ratios);
1778 blas_free(a_vec);
1779 blas_free(b_vec);
1780
1781 *max_ratio = ratio_max;
1782 *min_ratio = ratio_min;
1783 *num_tests = test_count;
1784 *num_bad_ratio = bad_ratio_count;
1785
1786 }
do_test_zhemm_z_d(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)1787 void do_test_zhemm_z_d(int m, int n,
1788 int ntests, int *seed, double thresh, int debug,
1789 float test_prob, double *min_ratio, double *max_ratio,
1790 int *num_bad_ratio, int *num_tests)
1791 {
1792
1793 /* Function name */
1794 const char fname[] = "BLAS_zhemm_z_d";
1795
1796 int i, j;
1797 int ci, cij;
1798 int incci, inccij;
1799 int test_count;
1800 int bad_ratio_count;
1801
1802 int ri, rij;
1803 int incri, incrij;
1804 int inca, incb, incc;
1805
1806 double ratio;
1807
1808 double ratio_min, ratio_max;
1809
1810 double eps_int; /* internal machine epsilon */
1811 double un_int; /* internal underflow threshold */
1812
1813 double rin[2];
1814 double rout[2];
1815 double head_r_true_elem[2], tail_r_true_elem[2];
1816
1817 enum blas_order_type order_type;
1818 enum blas_side_type side_type;
1819 enum blas_uplo_type uplo_type;
1820 enum blas_prec_type prec;
1821
1822 int order_val, uplo_val, side_val;
1823 int lda_val, ldb_val, ldc_val;
1824 int alpha_val, beta_val;
1825 int randomize_val;
1826
1827
1828
1829 int lda, ldb, ldc;
1830 int alpha_flag, beta_flag;
1831 int saved_seed;
1832 int norm;
1833 int test_no;
1834 int max_mn;
1835 int m_i, n_i;
1836
1837 double alpha[2];
1838 double beta[2];
1839 double *a;
1840 double *b;
1841 double *c;
1842 double *a_vec;
1843 double *b_vec;
1844
1845 /* generated test values for c */
1846 double *c_gen;
1847
1848 double *ratios;
1849
1850 /* true result calculated by testgen, in double-double */
1851 double *head_r_true, *tail_r_true;
1852
1853
1854 FPU_FIX_DECL;
1855
1856 if (n < 0 || m < 0 || ntests < 0)
1857 BLAS_error(fname, 0, 0, NULL);
1858
1859 /* initialization */
1860 saved_seed = *seed;
1861 ratio = 0.0;
1862 ratio_min = 1e308;
1863 ratio_max = 0.0;
1864
1865 *num_tests = 0;
1866 *num_bad_ratio = 0;
1867 *min_ratio = 0.0;
1868 *max_ratio = 0.0;
1869
1870 if (m == 0 || n == 0)
1871 return;
1872
1873 FPU_FIX_START;
1874
1875 max_mn = (m > n) ? m : n;
1876
1877 inca = incb = incc = 1;
1878 inca *= 2;
1879
1880 incc *= 2;
1881
1882 /* allocate memory for arrays */
1883 c = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
1884 if (2 * m * n > 0 && c == NULL) {
1885 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1886 }
1887 c_gen = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
1888 if (2 * m * n > 0 && c_gen == NULL) {
1889 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1890 }
1891 a = (double *) blas_malloc(2 * max_mn * max_mn * sizeof(double) * 2);
1892 if (2 * max_mn * max_mn > 0 && a == NULL) {
1893 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1894 }
1895 b = (double *) blas_malloc(2 * m * n * sizeof(double));
1896 if (2 * m * n > 0 && b == NULL) {
1897 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1898 }
1899 a_vec = (double *) blas_malloc(max_mn * sizeof(double) * 2);
1900 if (max_mn > 0 && a_vec == NULL) {
1901 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1902 }
1903 b_vec = (double *) blas_malloc(max_mn * sizeof(double));
1904 if (max_mn > 0 && b_vec == NULL) {
1905 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1906 }
1907 head_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
1908 tail_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
1909 if (2 * m * n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
1910 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1911 }
1912 ratios = (double *) blas_malloc(2 * m * n * sizeof(double));
1913 if (2 * m * n > 0 && ratios == NULL) {
1914 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1915 }
1916
1917 test_count = 0;
1918 bad_ratio_count = 0;
1919
1920 /* vary alpha */
1921 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
1922
1923 alpha_flag = 0;
1924 switch (alpha_val) {
1925 case 0:
1926 alpha[0] = alpha[1] = 0.0;
1927 alpha_flag = 1;
1928 break;
1929 case 1:
1930 alpha[0] = 1.0;
1931 alpha[1] = 0.0;
1932 alpha_flag = 1;
1933 break;
1934 }
1935
1936 /* vary beta */
1937 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
1938 beta_flag = 0;
1939 switch (beta_val) {
1940 case 0:
1941 beta[0] = beta[1] = 0.0;
1942 beta_flag = 1;
1943 break;
1944 case 1:
1945 beta[0] = 1.0;
1946 beta[1] = 0.0;
1947 beta_flag = 1;
1948 break;
1949 }
1950
1951
1952 eps_int = power(2, -BITS_D);
1953 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1954 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1955 prec = blas_prec_double;
1956
1957 /* vary norm -- underflow, approx 1, overflow */
1958 for (norm = NORM_START; norm <= NORM_END; norm++) {
1959
1960 /* number of tests */
1961 for (test_no = 0; test_no < ntests; test_no++) {
1962
1963 /* vary storage format */
1964 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
1965
1966 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
1967
1968 /* vary upper / lower variation */
1969 for (uplo_val = UPLO_START; uplo_val <= UPLO_END; uplo_val++) {
1970
1971 uplo_type = (uplo_val == 0) ? blas_upper : blas_lower;
1972
1973 /* vary transpositions of B */
1974 for (side_val = SIDE_START; side_val <= SIDE_END; side_val++) {
1975
1976 side_type = (side_val == 0) ?
1977 blas_left_side : blas_right_side;
1978
1979 if (side_type == blas_left_side) {
1980 m_i = m;
1981 n_i = n;
1982 } else {
1983 m_i = n;
1984 n_i = m;
1985 }
1986
1987 /* vary lda = k, k+1, 2*k */
1988 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
1989
1990 lda = (lda_val == 0) ? m_i :
1991 (lda_val == 1) ? m_i + 1 : 2 * m_i;
1992
1993 /* vary ldb = n, n+1, 2*n */
1994 for (ldb_val = LDB_START; ldb_val <= LDB_END; ldb_val++) {
1995
1996 if (order_type == blas_colmajor)
1997 ldb = (ldb_val == 0) ? m :
1998 (ldb_val == 1) ? m + 1 : 2 * m;
1999 else
2000 ldb = (ldb_val == 0) ? n :
2001 (ldb_val == 1) ? n + 1 : 2 * n;
2002
2003 /* vary ldc = k, k+1, 2*k */
2004 for (ldc_val = LDC_START; ldc_val <= LDC_END; ldc_val++) {
2005
2006 if (order_type == blas_colmajor)
2007 ldc = (ldc_val == 0) ? m :
2008 (ldc_val == 1) ? m + 1 : 2 * m;
2009 else
2010 ldc = (ldc_val == 0) ? n :
2011 (ldc_val == 1) ? n + 1 : 2 * n;
2012
2013 /* vary randomize = 0, 1 */
2014 for (randomize_val = RANDOMIZE_START;
2015 randomize_val <= RANDOMIZE_END; randomize_val++) {
2016
2017 /* For the sake of speed, we throw out this case at random */
2018 if (xrand(seed) >= test_prob)
2019 continue;
2020
2021 saved_seed = *seed;
2022
2023 /* finally we are here to generate the test case */
2024 BLAS_zhemm_z_d_testgen(norm, order_type,
2025 uplo_type, side_type, m, n,
2026 randomize_val, &alpha,
2027 alpha_flag, &beta, beta_flag,
2028 a, lda, b, ldb, c, ldc, seed,
2029 head_r_true, tail_r_true);
2030 test_count++;
2031
2032 /* copy generated C matrix since this will be
2033 over written */
2034 zge_copy_matrix(order_type, m, n, c_gen, ldc, c, ldc);
2035
2036 /* call hemm routines to be tested */
2037 FPU_FIX_STOP;
2038 BLAS_zhemm_z_d(order_type, side_type,
2039 uplo_type, m, n, alpha, a, lda, b, ldb,
2040 beta, c, ldc);
2041 FPU_FIX_START;
2042
2043 /* now compute the ratio using test_BLAS_xdot */
2044 /* copy a row from A, a column from B, run
2045 dot test */
2046
2047 if ((order_type == blas_colmajor &&
2048 side_type == blas_left_side) ||
2049 (order_type == blas_rowmajor &&
2050 side_type == blas_right_side)) {
2051 incci = 1;
2052 inccij = ldc;
2053 } else {
2054 incci = ldc;
2055 inccij = 1;
2056 }
2057
2058 incri = incci;
2059 incrij = inccij;
2060 incci *= 2;
2061 inccij *= 2;
2062
2063 for (i = 0, ci = 0, ri = 0;
2064 i < m_i; i++, ci += incci, ri += incri) {
2065 zhe_copy_row(order_type, uplo_type, side_type,
2066 m_i, a, lda, a_vec, i);
2067 for (j = 0, cij = ci, rij = ri;
2068 j < n_i; j++, cij += inccij, rij += incrij) {
2069 /* copy i-th row of A and j-th col of B */
2070 if (side_type == blas_left_side)
2071 dge_copy_col(order_type, blas_no_trans,
2072 m, n, b, ldb, b_vec, j);
2073 else
2074 dge_copy_row(order_type, blas_no_trans,
2075 m, n, b, ldb, b_vec, j);
2076 rin[0] = c_gen[cij];
2077 rin[1] = c_gen[cij + 1];
2078 rout[0] = c[cij];
2079 rout[1] = c[cij + 1];
2080 head_r_true_elem[0] = head_r_true[cij];
2081 head_r_true_elem[1] = head_r_true[cij + 1];
2082 tail_r_true_elem[0] = tail_r_true[cij];
2083 tail_r_true_elem[1] = tail_r_true[cij + 1];
2084
2085 test_BLAS_zdot_z_d(m_i,
2086 blas_no_conj,
2087 alpha, beta, rin, rout,
2088 head_r_true_elem,
2089 tail_r_true_elem, a_vec, 1,
2090 b_vec, 1, eps_int, un_int,
2091 &ratios[rij]);
2092
2093 /* take the max ratio */
2094 if (rij == 0) {
2095 ratio = ratios[0];
2096 /* The !<= below causes NaN error to be detected.
2097 Note that (NaN > thresh) is always false. */
2098 } else if (!(ratios[rij] <= ratio)) {
2099 ratio = ratios[rij];
2100 }
2101
2102 }
2103 } /* end of dot-test loop */
2104
2105 /* Increase the number of bad ratio, if the ratio
2106 is bigger than the threshold.
2107 The !<= below causes NaN error to be detected.
2108 Note that (NaN > thresh) is always false. */
2109 if (!(ratio <= thresh)) {
2110
2111 if (debug == 3) {
2112 printf("Seed = %d\n", saved_seed);
2113 printf("m %d n %d\n", m, n);
2114 printf("LDA %d LDB %d LDC %d\n", lda, ldb, ldc);
2115
2116 if (order_type == blas_rowmajor)
2117 printf("row ");
2118 else
2119 printf("col ");
2120
2121 if (uplo_type == blas_upper)
2122 printf("upper ");
2123 else
2124 printf("lower");
2125
2126 if (side_type == blas_left_side)
2127 printf(" left\n");
2128 else
2129 printf(" right\n");
2130
2131 if (randomize_val == 1)
2132 printf("Randomized\n");
2133 else
2134 printf("Not Randomized\n");
2135
2136 printf("NORM %d, ALPHA %d, BETA %d\n",
2137 norm, alpha_val, beta_val);
2138
2139 /* print out info */
2140 printf("alpha = ");
2141 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);;
2142 printf(" ");
2143 printf("beta = ");
2144 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
2145 printf("\n");
2146
2147 printf("a\n");
2148 zhe_print_matrix(a, m_i, lda, order_type,
2149 uplo_type);
2150 dge_print_matrix(b, m, n, ldb, order_type, "B");
2151 zge_print_matrix(c_gen, m, n, ldc, order_type,
2152 "C_gen");
2153 zge_print_matrix(c, m, n, ldc, order_type, "C");
2154 zge_print_matrix(head_r_true, m, n, ldc,
2155 order_type, "head_r_true");
2156
2157 printf("ratio = %g\n", ratio);
2158 }
2159 bad_ratio_count++;
2160 if (bad_ratio_count >= MAX_BAD_TESTS) {
2161 printf("\ntoo many failures, exiting....");
2162 printf("\nTesting and compilation");
2163 printf(" are incomplete\n\n");
2164 goto end;
2165 }
2166 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
2167 printf("\nFlagrant ratio %e, exiting...", ratio);
2168 printf("\nTesting and compilation");
2169 printf(" are incomplete\n\n");
2170 goto end;
2171 }
2172 }
2173
2174 if (ratio > ratio_max)
2175 ratio_max = ratio;
2176
2177 if (ratio != 0.0 && ratio < ratio_min)
2178 ratio_min = ratio;
2179
2180 } /* end of randomize loop */
2181
2182 } /* end of ldc loop */
2183
2184 } /* end of ldb loop */
2185
2186 } /* end of lda loop */
2187
2188 } /* end of side loop */
2189
2190 } /* end of uplo loop */
2191
2192 } /* end of order loop */
2193
2194 } /* end of nr test loop */
2195
2196 } /* end of norm loop */
2197
2198
2199
2200 } /* end of beta loop */
2201
2202 } /* end of alpha loop */
2203
2204 end:
2205 FPU_FIX_STOP;
2206
2207 blas_free(c);
2208 blas_free(a);
2209 blas_free(b);
2210 blas_free(c_gen);
2211 blas_free(head_r_true);
2212 blas_free(tail_r_true);
2213 blas_free(ratios);
2214 blas_free(a_vec);
2215 blas_free(b_vec);
2216
2217 *max_ratio = ratio_max;
2218 *min_ratio = ratio_min;
2219 *num_tests = test_count;
2220 *num_bad_ratio = bad_ratio_count;
2221
2222 }
do_test_chemm_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)2223 void do_test_chemm_x(int m, int n,
2224 int ntests, int *seed, double thresh, int debug,
2225 float test_prob, double *min_ratio, double *max_ratio,
2226 int *num_bad_ratio, int *num_tests)
2227 {
2228
2229 /* Function name */
2230 const char fname[] = "BLAS_chemm_x";
2231
2232 int i, j;
2233 int ci, cij;
2234 int incci, inccij;
2235 int test_count;
2236 int bad_ratio_count;
2237
2238 int ri, rij;
2239 int incri, incrij;
2240 int inca, incb, incc;
2241
2242 double ratio;
2243
2244 double ratio_min, ratio_max;
2245
2246 double eps_int; /* internal machine epsilon */
2247 double un_int; /* internal underflow threshold */
2248
2249 float rin[2];
2250 float rout[2];
2251 double head_r_true_elem[2], tail_r_true_elem[2];
2252
2253 enum blas_order_type order_type;
2254 enum blas_side_type side_type;
2255 enum blas_uplo_type uplo_type;
2256 enum blas_prec_type prec;
2257
2258 int order_val, uplo_val, side_val;
2259 int lda_val, ldb_val, ldc_val;
2260 int alpha_val, beta_val;
2261 int randomize_val;
2262
2263 int prec_val;
2264
2265 int lda, ldb, ldc;
2266 int alpha_flag, beta_flag;
2267 int saved_seed;
2268 int norm;
2269 int test_no;
2270 int max_mn;
2271 int m_i, n_i;
2272
2273 float alpha[2];
2274 float beta[2];
2275 float *a;
2276 float *b;
2277 float *c;
2278 float *a_vec;
2279 float *b_vec;
2280
2281 /* generated test values for c */
2282 float *c_gen;
2283
2284 double *ratios;
2285
2286 /* true result calculated by testgen, in double-double */
2287 double *head_r_true, *tail_r_true;
2288
2289
2290 FPU_FIX_DECL;
2291
2292 if (n < 0 || m < 0 || ntests < 0)
2293 BLAS_error(fname, 0, 0, NULL);
2294
2295 /* initialization */
2296 saved_seed = *seed;
2297 ratio = 0.0;
2298 ratio_min = 1e308;
2299 ratio_max = 0.0;
2300
2301 *num_tests = 0;
2302 *num_bad_ratio = 0;
2303 *min_ratio = 0.0;
2304 *max_ratio = 0.0;
2305
2306 if (m == 0 || n == 0)
2307 return;
2308
2309 FPU_FIX_START;
2310
2311 max_mn = (m > n) ? m : n;
2312
2313 inca = incb = incc = 1;
2314 inca *= 2;
2315 incb *= 2;
2316 incc *= 2;
2317
2318 /* allocate memory for arrays */
2319 c = (float *) blas_malloc(2 * m * n * sizeof(float) * 2);
2320 if (2 * m * n > 0 && c == NULL) {
2321 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2322 }
2323 c_gen = (float *) blas_malloc(2 * m * n * sizeof(float) * 2);
2324 if (2 * m * n > 0 && c_gen == NULL) {
2325 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2326 }
2327 a = (float *) blas_malloc(2 * max_mn * max_mn * sizeof(float) * 2);
2328 if (2 * max_mn * max_mn > 0 && a == NULL) {
2329 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2330 }
2331 b = (float *) blas_malloc(2 * m * n * sizeof(float) * 2);
2332 if (2 * m * n > 0 && b == NULL) {
2333 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2334 }
2335 a_vec = (float *) blas_malloc(max_mn * sizeof(float) * 2);
2336 if (max_mn > 0 && a_vec == NULL) {
2337 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2338 }
2339 b_vec = (float *) blas_malloc(max_mn * sizeof(float) * 2);
2340 if (max_mn > 0 && b_vec == NULL) {
2341 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2342 }
2343 head_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
2344 tail_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
2345 if (2 * m * n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
2346 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2347 }
2348 ratios = (double *) blas_malloc(2 * m * n * sizeof(double));
2349 if (2 * m * n > 0 && ratios == NULL) {
2350 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2351 }
2352
2353 test_count = 0;
2354 bad_ratio_count = 0;
2355
2356 /* vary alpha */
2357 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
2358
2359 alpha_flag = 0;
2360 switch (alpha_val) {
2361 case 0:
2362 alpha[0] = alpha[1] = 0.0;
2363 alpha_flag = 1;
2364 break;
2365 case 1:
2366 alpha[0] = 1.0;
2367 alpha[1] = 0.0;
2368 alpha_flag = 1;
2369 break;
2370 }
2371
2372 /* vary beta */
2373 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
2374 beta_flag = 0;
2375 switch (beta_val) {
2376 case 0:
2377 beta[0] = beta[1] = 0.0;
2378 beta_flag = 1;
2379 break;
2380 case 1:
2381 beta[0] = 1.0;
2382 beta[1] = 0.0;
2383 beta_flag = 1;
2384 break;
2385 }
2386
2387
2388 /* varying extra precs */
2389 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
2390 switch (prec_val) {
2391 case 0:
2392 eps_int = power(2, -BITS_S);
2393 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
2394 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
2395 prec = blas_prec_single;
2396 break;
2397 case 1:
2398 eps_int = power(2, -BITS_D);
2399 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
2400 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
2401 prec = blas_prec_double;
2402 break;
2403 case 2:
2404 default:
2405 eps_int = power(2, -BITS_E);
2406 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
2407 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
2408 prec = blas_prec_extra;
2409 break;
2410 }
2411
2412 /* vary norm -- underflow, approx 1, overflow */
2413 for (norm = NORM_START; norm <= NORM_END; norm++) {
2414
2415 /* number of tests */
2416 for (test_no = 0; test_no < ntests; test_no++) {
2417
2418 /* vary storage format */
2419 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
2420
2421 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
2422
2423 /* vary upper / lower variation */
2424 for (uplo_val = UPLO_START; uplo_val <= UPLO_END; uplo_val++) {
2425
2426 uplo_type = (uplo_val == 0) ? blas_upper : blas_lower;
2427
2428 /* vary transpositions of B */
2429 for (side_val = SIDE_START; side_val <= SIDE_END; side_val++) {
2430
2431 side_type = (side_val == 0) ?
2432 blas_left_side : blas_right_side;
2433
2434 if (side_type == blas_left_side) {
2435 m_i = m;
2436 n_i = n;
2437 } else {
2438 m_i = n;
2439 n_i = m;
2440 }
2441
2442 /* vary lda = k, k+1, 2*k */
2443 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
2444
2445 lda = (lda_val == 0) ? m_i :
2446 (lda_val == 1) ? m_i + 1 : 2 * m_i;
2447
2448 /* vary ldb = n, n+1, 2*n */
2449 for (ldb_val = LDB_START; ldb_val <= LDB_END; ldb_val++) {
2450
2451 if (order_type == blas_colmajor)
2452 ldb = (ldb_val == 0) ? m :
2453 (ldb_val == 1) ? m + 1 : 2 * m;
2454 else
2455 ldb = (ldb_val == 0) ? n :
2456 (ldb_val == 1) ? n + 1 : 2 * n;
2457
2458 /* vary ldc = k, k+1, 2*k */
2459 for (ldc_val = LDC_START; ldc_val <= LDC_END; ldc_val++) {
2460
2461 if (order_type == blas_colmajor)
2462 ldc = (ldc_val == 0) ? m :
2463 (ldc_val == 1) ? m + 1 : 2 * m;
2464 else
2465 ldc = (ldc_val == 0) ? n :
2466 (ldc_val == 1) ? n + 1 : 2 * n;
2467
2468 /* vary randomize = 0, 1 */
2469 for (randomize_val = RANDOMIZE_START;
2470 randomize_val <= RANDOMIZE_END;
2471 randomize_val++) {
2472
2473 /* For the sake of speed, we throw out this case at random */
2474 if (xrand(seed) >= test_prob)
2475 continue;
2476
2477 saved_seed = *seed;
2478
2479 /* finally we are here to generate the test case */
2480 BLAS_chemm_testgen(norm, order_type,
2481 uplo_type, side_type, m, n,
2482 randomize_val, &alpha,
2483 alpha_flag, &beta, beta_flag, a,
2484 lda, b, ldb, c, ldc, seed,
2485 head_r_true, tail_r_true);
2486 test_count++;
2487
2488 /* copy generated C matrix since this will be
2489 over written */
2490 cge_copy_matrix(order_type, m, n, c_gen, ldc, c,
2491 ldc);
2492
2493 /* call hemm routines to be tested */
2494 FPU_FIX_STOP;
2495 BLAS_chemm_x(order_type, side_type,
2496 uplo_type, m, n, alpha, a, lda, b, ldb,
2497 beta, c, ldc, prec);
2498 FPU_FIX_START;
2499
2500 /* now compute the ratio using test_BLAS_xdot */
2501 /* copy a row from A, a column from B, run
2502 dot test */
2503
2504 if ((order_type == blas_colmajor &&
2505 side_type == blas_left_side) ||
2506 (order_type == blas_rowmajor &&
2507 side_type == blas_right_side)) {
2508 incci = 1;
2509 inccij = ldc;
2510 } else {
2511 incci = ldc;
2512 inccij = 1;
2513 }
2514
2515 incri = incci;
2516 incrij = inccij;
2517 incci *= 2;
2518 inccij *= 2;
2519
2520 for (i = 0, ci = 0, ri = 0;
2521 i < m_i; i++, ci += incci, ri += incri) {
2522 che_copy_row(order_type, uplo_type, side_type,
2523 m_i, a, lda, a_vec, i);
2524 for (j = 0, cij = ci, rij = ri;
2525 j < n_i; j++, cij += inccij, rij += incrij) {
2526 /* copy i-th row of A and j-th col of B */
2527 if (side_type == blas_left_side)
2528 cge_copy_col(order_type, blas_no_trans,
2529 m, n, b, ldb, b_vec, j);
2530 else
2531 cge_copy_row(order_type, blas_no_trans,
2532 m, n, b, ldb, b_vec, j);
2533 rin[0] = c_gen[cij];
2534 rin[1] = c_gen[cij + 1];
2535 rout[0] = c[cij];
2536 rout[1] = c[cij + 1];
2537 head_r_true_elem[0] = head_r_true[cij];
2538 head_r_true_elem[1] = head_r_true[cij + 1];
2539 tail_r_true_elem[0] = tail_r_true[cij];
2540 tail_r_true_elem[1] = tail_r_true[cij + 1];
2541
2542 test_BLAS_cdot(m_i,
2543 blas_no_conj,
2544 alpha, beta, rin, rout,
2545 head_r_true_elem,
2546 tail_r_true_elem, a_vec, 1,
2547 b_vec, 1, eps_int, un_int,
2548 &ratios[rij]);
2549
2550 /* take the max ratio */
2551 if (rij == 0) {
2552 ratio = ratios[0];
2553 /* The !<= below causes NaN error to be detected.
2554 Note that (NaN > thresh) is always false. */
2555 } else if (!(ratios[rij] <= ratio)) {
2556 ratio = ratios[rij];
2557 }
2558
2559 }
2560 } /* end of dot-test loop */
2561
2562 /* Increase the number of bad ratio, if the ratio
2563 is bigger than the threshold.
2564 The !<= below causes NaN error to be detected.
2565 Note that (NaN > thresh) is always false. */
2566 if (!(ratio <= thresh)) {
2567
2568 if (debug == 3) {
2569 printf("Seed = %d\n", saved_seed);
2570 printf("m %d n %d\n", m, n);
2571 printf("LDA %d LDB %d LDC %d\n", lda, ldb,
2572 ldc);
2573
2574 if (order_type == blas_rowmajor)
2575 printf("row ");
2576 else
2577 printf("col ");
2578
2579 if (uplo_type == blas_upper)
2580 printf("upper ");
2581 else
2582 printf("lower");
2583
2584 if (side_type == blas_left_side)
2585 printf(" left\n");
2586 else
2587 printf(" right\n");
2588
2589 if (randomize_val == 1)
2590 printf("Randomized\n");
2591 else
2592 printf("Not Randomized\n");
2593
2594 printf("NORM %d, ALPHA %d, BETA %d\n",
2595 norm, alpha_val, beta_val);
2596
2597 /* print out info */
2598 printf("alpha = ");
2599 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);;
2600 printf(" ");
2601 printf("beta = ");
2602 printf("(%16.8e, %16.8e)", beta[0], beta[1]);;
2603 printf("\n");
2604
2605 printf("a\n");
2606 che_print_matrix(a, m_i, lda, order_type,
2607 uplo_type);
2608 cge_print_matrix(b, m, n, ldb, order_type, "B");
2609 cge_print_matrix(c_gen, m, n, ldc, order_type,
2610 "C_gen");
2611 cge_print_matrix(c, m, n, ldc, order_type, "C");
2612 zge_print_matrix(head_r_true, m, n, ldc,
2613 order_type, "head_r_true");
2614
2615 printf("ratio = %g\n", ratio);
2616 }
2617 bad_ratio_count++;
2618 if (bad_ratio_count >= MAX_BAD_TESTS) {
2619 printf("\ntoo many failures, exiting....");
2620 printf("\nTesting and compilation");
2621 printf(" are incomplete\n\n");
2622 goto end;
2623 }
2624 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
2625 printf("\nFlagrant ratio %e, exiting...",
2626 ratio);
2627 printf("\nTesting and compilation");
2628 printf(" are incomplete\n\n");
2629 goto end;
2630 }
2631 }
2632
2633 if (ratio > ratio_max)
2634 ratio_max = ratio;
2635
2636 if (ratio != 0.0 && ratio < ratio_min)
2637 ratio_min = ratio;
2638
2639 } /* end of randomize loop */
2640
2641 } /* end of ldc loop */
2642
2643 } /* end of ldb loop */
2644
2645 } /* end of lda loop */
2646
2647 } /* end of side loop */
2648
2649 } /* end of uplo loop */
2650
2651 } /* end of order loop */
2652
2653 } /* end of nr test loop */
2654
2655 } /* end of norm loop */
2656
2657
2658 } /* end of prec loop */
2659
2660 } /* end of beta loop */
2661
2662 } /* end of alpha loop */
2663
2664 end:
2665 FPU_FIX_STOP;
2666
2667 blas_free(c);
2668 blas_free(a);
2669 blas_free(b);
2670 blas_free(c_gen);
2671 blas_free(head_r_true);
2672 blas_free(tail_r_true);
2673 blas_free(ratios);
2674 blas_free(a_vec);
2675 blas_free(b_vec);
2676
2677 *max_ratio = ratio_max;
2678 *min_ratio = ratio_min;
2679 *num_tests = test_count;
2680 *num_bad_ratio = bad_ratio_count;
2681
2682 }
do_test_zhemm_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)2683 void do_test_zhemm_x(int m, int n,
2684 int ntests, int *seed, double thresh, int debug,
2685 float test_prob, double *min_ratio, double *max_ratio,
2686 int *num_bad_ratio, int *num_tests)
2687 {
2688
2689 /* Function name */
2690 const char fname[] = "BLAS_zhemm_x";
2691
2692 int i, j;
2693 int ci, cij;
2694 int incci, inccij;
2695 int test_count;
2696 int bad_ratio_count;
2697
2698 int ri, rij;
2699 int incri, incrij;
2700 int inca, incb, incc;
2701
2702 double ratio;
2703
2704 double ratio_min, ratio_max;
2705
2706 double eps_int; /* internal machine epsilon */
2707 double un_int; /* internal underflow threshold */
2708
2709 double rin[2];
2710 double rout[2];
2711 double head_r_true_elem[2], tail_r_true_elem[2];
2712
2713 enum blas_order_type order_type;
2714 enum blas_side_type side_type;
2715 enum blas_uplo_type uplo_type;
2716 enum blas_prec_type prec;
2717
2718 int order_val, uplo_val, side_val;
2719 int lda_val, ldb_val, ldc_val;
2720 int alpha_val, beta_val;
2721 int randomize_val;
2722
2723 int prec_val;
2724
2725 int lda, ldb, ldc;
2726 int alpha_flag, beta_flag;
2727 int saved_seed;
2728 int norm;
2729 int test_no;
2730 int max_mn;
2731 int m_i, n_i;
2732
2733 double alpha[2];
2734 double beta[2];
2735 double *a;
2736 double *b;
2737 double *c;
2738 double *a_vec;
2739 double *b_vec;
2740
2741 /* generated test values for c */
2742 double *c_gen;
2743
2744 double *ratios;
2745
2746 /* true result calculated by testgen, in double-double */
2747 double *head_r_true, *tail_r_true;
2748
2749
2750 FPU_FIX_DECL;
2751
2752 if (n < 0 || m < 0 || ntests < 0)
2753 BLAS_error(fname, 0, 0, NULL);
2754
2755 /* initialization */
2756 saved_seed = *seed;
2757 ratio = 0.0;
2758 ratio_min = 1e308;
2759 ratio_max = 0.0;
2760
2761 *num_tests = 0;
2762 *num_bad_ratio = 0;
2763 *min_ratio = 0.0;
2764 *max_ratio = 0.0;
2765
2766 if (m == 0 || n == 0)
2767 return;
2768
2769 FPU_FIX_START;
2770
2771 max_mn = (m > n) ? m : n;
2772
2773 inca = incb = incc = 1;
2774 inca *= 2;
2775 incb *= 2;
2776 incc *= 2;
2777
2778 /* allocate memory for arrays */
2779 c = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
2780 if (2 * m * n > 0 && c == NULL) {
2781 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2782 }
2783 c_gen = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
2784 if (2 * m * n > 0 && c_gen == NULL) {
2785 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2786 }
2787 a = (double *) blas_malloc(2 * max_mn * max_mn * sizeof(double) * 2);
2788 if (2 * max_mn * max_mn > 0 && a == NULL) {
2789 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2790 }
2791 b = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
2792 if (2 * m * n > 0 && b == NULL) {
2793 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2794 }
2795 a_vec = (double *) blas_malloc(max_mn * sizeof(double) * 2);
2796 if (max_mn > 0 && a_vec == NULL) {
2797 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2798 }
2799 b_vec = (double *) blas_malloc(max_mn * sizeof(double) * 2);
2800 if (max_mn > 0 && b_vec == NULL) {
2801 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2802 }
2803 head_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
2804 tail_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
2805 if (2 * m * n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
2806 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2807 }
2808 ratios = (double *) blas_malloc(2 * m * n * sizeof(double));
2809 if (2 * m * n > 0 && ratios == NULL) {
2810 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2811 }
2812
2813 test_count = 0;
2814 bad_ratio_count = 0;
2815
2816 /* vary alpha */
2817 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
2818
2819 alpha_flag = 0;
2820 switch (alpha_val) {
2821 case 0:
2822 alpha[0] = alpha[1] = 0.0;
2823 alpha_flag = 1;
2824 break;
2825 case 1:
2826 alpha[0] = 1.0;
2827 alpha[1] = 0.0;
2828 alpha_flag = 1;
2829 break;
2830 }
2831
2832 /* vary beta */
2833 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
2834 beta_flag = 0;
2835 switch (beta_val) {
2836 case 0:
2837 beta[0] = beta[1] = 0.0;
2838 beta_flag = 1;
2839 break;
2840 case 1:
2841 beta[0] = 1.0;
2842 beta[1] = 0.0;
2843 beta_flag = 1;
2844 break;
2845 }
2846
2847
2848 /* varying extra precs */
2849 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
2850 switch (prec_val) {
2851 case 0:
2852 eps_int = power(2, -BITS_D);
2853 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
2854 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
2855 prec = blas_prec_double;
2856 break;
2857 case 1:
2858 eps_int = power(2, -BITS_D);
2859 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
2860 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
2861 prec = blas_prec_double;
2862 break;
2863 case 2:
2864 default:
2865 eps_int = power(2, -BITS_E);
2866 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
2867 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
2868 prec = blas_prec_extra;
2869 break;
2870 }
2871
2872 /* vary norm -- underflow, approx 1, overflow */
2873 for (norm = NORM_START; norm <= NORM_END; norm++) {
2874
2875 /* number of tests */
2876 for (test_no = 0; test_no < ntests; test_no++) {
2877
2878 /* vary storage format */
2879 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
2880
2881 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
2882
2883 /* vary upper / lower variation */
2884 for (uplo_val = UPLO_START; uplo_val <= UPLO_END; uplo_val++) {
2885
2886 uplo_type = (uplo_val == 0) ? blas_upper : blas_lower;
2887
2888 /* vary transpositions of B */
2889 for (side_val = SIDE_START; side_val <= SIDE_END; side_val++) {
2890
2891 side_type = (side_val == 0) ?
2892 blas_left_side : blas_right_side;
2893
2894 if (side_type == blas_left_side) {
2895 m_i = m;
2896 n_i = n;
2897 } else {
2898 m_i = n;
2899 n_i = m;
2900 }
2901
2902 /* vary lda = k, k+1, 2*k */
2903 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
2904
2905 lda = (lda_val == 0) ? m_i :
2906 (lda_val == 1) ? m_i + 1 : 2 * m_i;
2907
2908 /* vary ldb = n, n+1, 2*n */
2909 for (ldb_val = LDB_START; ldb_val <= LDB_END; ldb_val++) {
2910
2911 if (order_type == blas_colmajor)
2912 ldb = (ldb_val == 0) ? m :
2913 (ldb_val == 1) ? m + 1 : 2 * m;
2914 else
2915 ldb = (ldb_val == 0) ? n :
2916 (ldb_val == 1) ? n + 1 : 2 * n;
2917
2918 /* vary ldc = k, k+1, 2*k */
2919 for (ldc_val = LDC_START; ldc_val <= LDC_END; ldc_val++) {
2920
2921 if (order_type == blas_colmajor)
2922 ldc = (ldc_val == 0) ? m :
2923 (ldc_val == 1) ? m + 1 : 2 * m;
2924 else
2925 ldc = (ldc_val == 0) ? n :
2926 (ldc_val == 1) ? n + 1 : 2 * n;
2927
2928 /* vary randomize = 0, 1 */
2929 for (randomize_val = RANDOMIZE_START;
2930 randomize_val <= RANDOMIZE_END;
2931 randomize_val++) {
2932
2933 /* For the sake of speed, we throw out this case at random */
2934 if (xrand(seed) >= test_prob)
2935 continue;
2936
2937 saved_seed = *seed;
2938
2939 /* finally we are here to generate the test case */
2940 BLAS_zhemm_testgen(norm, order_type,
2941 uplo_type, side_type, m, n,
2942 randomize_val, &alpha,
2943 alpha_flag, &beta, beta_flag, a,
2944 lda, b, ldb, c, ldc, seed,
2945 head_r_true, tail_r_true);
2946 test_count++;
2947
2948 /* copy generated C matrix since this will be
2949 over written */
2950 zge_copy_matrix(order_type, m, n, c_gen, ldc, c,
2951 ldc);
2952
2953 /* call hemm routines to be tested */
2954 FPU_FIX_STOP;
2955 BLAS_zhemm_x(order_type, side_type,
2956 uplo_type, m, n, alpha, a, lda, b, ldb,
2957 beta, c, ldc, prec);
2958 FPU_FIX_START;
2959
2960 /* now compute the ratio using test_BLAS_xdot */
2961 /* copy a row from A, a column from B, run
2962 dot test */
2963
2964 if ((order_type == blas_colmajor &&
2965 side_type == blas_left_side) ||
2966 (order_type == blas_rowmajor &&
2967 side_type == blas_right_side)) {
2968 incci = 1;
2969 inccij = ldc;
2970 } else {
2971 incci = ldc;
2972 inccij = 1;
2973 }
2974
2975 incri = incci;
2976 incrij = inccij;
2977 incci *= 2;
2978 inccij *= 2;
2979
2980 for (i = 0, ci = 0, ri = 0;
2981 i < m_i; i++, ci += incci, ri += incri) {
2982 zhe_copy_row(order_type, uplo_type, side_type,
2983 m_i, a, lda, a_vec, i);
2984 for (j = 0, cij = ci, rij = ri;
2985 j < n_i; j++, cij += inccij, rij += incrij) {
2986 /* copy i-th row of A and j-th col of B */
2987 if (side_type == blas_left_side)
2988 zge_copy_col(order_type, blas_no_trans,
2989 m, n, b, ldb, b_vec, j);
2990 else
2991 zge_copy_row(order_type, blas_no_trans,
2992 m, n, b, ldb, b_vec, j);
2993 rin[0] = c_gen[cij];
2994 rin[1] = c_gen[cij + 1];
2995 rout[0] = c[cij];
2996 rout[1] = c[cij + 1];
2997 head_r_true_elem[0] = head_r_true[cij];
2998 head_r_true_elem[1] = head_r_true[cij + 1];
2999 tail_r_true_elem[0] = tail_r_true[cij];
3000 tail_r_true_elem[1] = tail_r_true[cij + 1];
3001
3002 test_BLAS_zdot(m_i,
3003 blas_no_conj,
3004 alpha, beta, rin, rout,
3005 head_r_true_elem,
3006 tail_r_true_elem, a_vec, 1,
3007 b_vec, 1, eps_int, un_int,
3008 &ratios[rij]);
3009
3010 /* take the max ratio */
3011 if (rij == 0) {
3012 ratio = ratios[0];
3013 /* The !<= below causes NaN error to be detected.
3014 Note that (NaN > thresh) is always false. */
3015 } else if (!(ratios[rij] <= ratio)) {
3016 ratio = ratios[rij];
3017 }
3018
3019 }
3020 } /* end of dot-test loop */
3021
3022 /* Increase the number of bad ratio, if the ratio
3023 is bigger than the threshold.
3024 The !<= below causes NaN error to be detected.
3025 Note that (NaN > thresh) is always false. */
3026 if (!(ratio <= thresh)) {
3027
3028 if (debug == 3) {
3029 printf("Seed = %d\n", saved_seed);
3030 printf("m %d n %d\n", m, n);
3031 printf("LDA %d LDB %d LDC %d\n", lda, ldb,
3032 ldc);
3033
3034 if (order_type == blas_rowmajor)
3035 printf("row ");
3036 else
3037 printf("col ");
3038
3039 if (uplo_type == blas_upper)
3040 printf("upper ");
3041 else
3042 printf("lower");
3043
3044 if (side_type == blas_left_side)
3045 printf(" left\n");
3046 else
3047 printf(" right\n");
3048
3049 if (randomize_val == 1)
3050 printf("Randomized\n");
3051 else
3052 printf("Not Randomized\n");
3053
3054 printf("NORM %d, ALPHA %d, BETA %d\n",
3055 norm, alpha_val, beta_val);
3056
3057 /* print out info */
3058 printf("alpha = ");
3059 printf("(%24.16e, %24.16e)", alpha[0],
3060 alpha[1]);;
3061 printf(" ");
3062 printf("beta = ");
3063 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
3064 printf("\n");
3065
3066 printf("a\n");
3067 zhe_print_matrix(a, m_i, lda, order_type,
3068 uplo_type);
3069 zge_print_matrix(b, m, n, ldb, order_type, "B");
3070 zge_print_matrix(c_gen, m, n, ldc, order_type,
3071 "C_gen");
3072 zge_print_matrix(c, m, n, ldc, order_type, "C");
3073 zge_print_matrix(head_r_true, m, n, ldc,
3074 order_type, "head_r_true");
3075
3076 printf("ratio = %g\n", ratio);
3077 }
3078 bad_ratio_count++;
3079 if (bad_ratio_count >= MAX_BAD_TESTS) {
3080 printf("\ntoo many failures, exiting....");
3081 printf("\nTesting and compilation");
3082 printf(" are incomplete\n\n");
3083 goto end;
3084 }
3085 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
3086 printf("\nFlagrant ratio %e, exiting...",
3087 ratio);
3088 printf("\nTesting and compilation");
3089 printf(" are incomplete\n\n");
3090 goto end;
3091 }
3092 }
3093
3094 if (ratio > ratio_max)
3095 ratio_max = ratio;
3096
3097 if (ratio != 0.0 && ratio < ratio_min)
3098 ratio_min = ratio;
3099
3100 } /* end of randomize loop */
3101
3102 } /* end of ldc loop */
3103
3104 } /* end of ldb loop */
3105
3106 } /* end of lda loop */
3107
3108 } /* end of side loop */
3109
3110 } /* end of uplo loop */
3111
3112 } /* end of order loop */
3113
3114 } /* end of nr test loop */
3115
3116 } /* end of norm loop */
3117
3118
3119 } /* end of prec loop */
3120
3121 } /* end of beta loop */
3122
3123 } /* end of alpha loop */
3124
3125 end:
3126 FPU_FIX_STOP;
3127
3128 blas_free(c);
3129 blas_free(a);
3130 blas_free(b);
3131 blas_free(c_gen);
3132 blas_free(head_r_true);
3133 blas_free(tail_r_true);
3134 blas_free(ratios);
3135 blas_free(a_vec);
3136 blas_free(b_vec);
3137
3138 *max_ratio = ratio_max;
3139 *min_ratio = ratio_min;
3140 *num_tests = test_count;
3141 *num_bad_ratio = bad_ratio_count;
3142
3143 }
do_test_zhemm_z_c_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)3144 void do_test_zhemm_z_c_x(int m, int n,
3145 int ntests, int *seed, double thresh, int debug,
3146 float test_prob, double *min_ratio,
3147 double *max_ratio, int *num_bad_ratio,
3148 int *num_tests)
3149 {
3150
3151 /* Function name */
3152 const char fname[] = "BLAS_zhemm_z_c_x";
3153
3154 int i, j;
3155 int ci, cij;
3156 int incci, inccij;
3157 int test_count;
3158 int bad_ratio_count;
3159
3160 int ri, rij;
3161 int incri, incrij;
3162 int inca, incb, incc;
3163
3164 double ratio;
3165
3166 double ratio_min, ratio_max;
3167
3168 double eps_int; /* internal machine epsilon */
3169 double un_int; /* internal underflow threshold */
3170
3171 double rin[2];
3172 double rout[2];
3173 double head_r_true_elem[2], tail_r_true_elem[2];
3174
3175 enum blas_order_type order_type;
3176 enum blas_side_type side_type;
3177 enum blas_uplo_type uplo_type;
3178 enum blas_prec_type prec;
3179
3180 int order_val, uplo_val, side_val;
3181 int lda_val, ldb_val, ldc_val;
3182 int alpha_val, beta_val;
3183 int randomize_val;
3184
3185 int prec_val;
3186
3187 int lda, ldb, ldc;
3188 int alpha_flag, beta_flag;
3189 int saved_seed;
3190 int norm;
3191 int test_no;
3192 int max_mn;
3193 int m_i, n_i;
3194
3195 double alpha[2];
3196 double beta[2];
3197 double *a;
3198 float *b;
3199 double *c;
3200 double *a_vec;
3201 float *b_vec;
3202
3203 /* generated test values for c */
3204 double *c_gen;
3205
3206 double *ratios;
3207
3208 /* true result calculated by testgen, in double-double */
3209 double *head_r_true, *tail_r_true;
3210
3211
3212 FPU_FIX_DECL;
3213
3214 if (n < 0 || m < 0 || ntests < 0)
3215 BLAS_error(fname, 0, 0, NULL);
3216
3217 /* initialization */
3218 saved_seed = *seed;
3219 ratio = 0.0;
3220 ratio_min = 1e308;
3221 ratio_max = 0.0;
3222
3223 *num_tests = 0;
3224 *num_bad_ratio = 0;
3225 *min_ratio = 0.0;
3226 *max_ratio = 0.0;
3227
3228 if (m == 0 || n == 0)
3229 return;
3230
3231 FPU_FIX_START;
3232
3233 max_mn = (m > n) ? m : n;
3234
3235 inca = incb = incc = 1;
3236 inca *= 2;
3237 incb *= 2;
3238 incc *= 2;
3239
3240 /* allocate memory for arrays */
3241 c = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
3242 if (2 * m * n > 0 && c == NULL) {
3243 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3244 }
3245 c_gen = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
3246 if (2 * m * n > 0 && c_gen == NULL) {
3247 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3248 }
3249 a = (double *) blas_malloc(2 * max_mn * max_mn * sizeof(double) * 2);
3250 if (2 * max_mn * max_mn > 0 && a == NULL) {
3251 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3252 }
3253 b = (float *) blas_malloc(2 * m * n * sizeof(float) * 2);
3254 if (2 * m * n > 0 && b == NULL) {
3255 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3256 }
3257 a_vec = (double *) blas_malloc(max_mn * sizeof(double) * 2);
3258 if (max_mn > 0 && a_vec == NULL) {
3259 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3260 }
3261 b_vec = (float *) blas_malloc(max_mn * sizeof(float) * 2);
3262 if (max_mn > 0 && b_vec == NULL) {
3263 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3264 }
3265 head_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
3266 tail_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
3267 if (2 * m * n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
3268 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3269 }
3270 ratios = (double *) blas_malloc(2 * m * n * sizeof(double));
3271 if (2 * m * n > 0 && ratios == NULL) {
3272 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3273 }
3274
3275 test_count = 0;
3276 bad_ratio_count = 0;
3277
3278 /* vary alpha */
3279 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
3280
3281 alpha_flag = 0;
3282 switch (alpha_val) {
3283 case 0:
3284 alpha[0] = alpha[1] = 0.0;
3285 alpha_flag = 1;
3286 break;
3287 case 1:
3288 alpha[0] = 1.0;
3289 alpha[1] = 0.0;
3290 alpha_flag = 1;
3291 break;
3292 }
3293
3294 /* vary beta */
3295 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
3296 beta_flag = 0;
3297 switch (beta_val) {
3298 case 0:
3299 beta[0] = beta[1] = 0.0;
3300 beta_flag = 1;
3301 break;
3302 case 1:
3303 beta[0] = 1.0;
3304 beta[1] = 0.0;
3305 beta_flag = 1;
3306 break;
3307 }
3308
3309
3310 /* varying extra precs */
3311 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
3312 switch (prec_val) {
3313 case 0:
3314 eps_int = power(2, -BITS_D);
3315 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
3316 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
3317 prec = blas_prec_double;
3318 break;
3319 case 1:
3320 eps_int = power(2, -BITS_D);
3321 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
3322 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
3323 prec = blas_prec_double;
3324 break;
3325 case 2:
3326 default:
3327 eps_int = power(2, -BITS_E);
3328 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
3329 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
3330 prec = blas_prec_extra;
3331 break;
3332 }
3333
3334 /* vary norm -- underflow, approx 1, overflow */
3335 for (norm = NORM_START; norm <= NORM_END; norm++) {
3336
3337 /* number of tests */
3338 for (test_no = 0; test_no < ntests; test_no++) {
3339
3340 /* vary storage format */
3341 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
3342
3343 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
3344
3345 /* vary upper / lower variation */
3346 for (uplo_val = UPLO_START; uplo_val <= UPLO_END; uplo_val++) {
3347
3348 uplo_type = (uplo_val == 0) ? blas_upper : blas_lower;
3349
3350 /* vary transpositions of B */
3351 for (side_val = SIDE_START; side_val <= SIDE_END; side_val++) {
3352
3353 side_type = (side_val == 0) ?
3354 blas_left_side : blas_right_side;
3355
3356 if (side_type == blas_left_side) {
3357 m_i = m;
3358 n_i = n;
3359 } else {
3360 m_i = n;
3361 n_i = m;
3362 }
3363
3364 /* vary lda = k, k+1, 2*k */
3365 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
3366
3367 lda = (lda_val == 0) ? m_i :
3368 (lda_val == 1) ? m_i + 1 : 2 * m_i;
3369
3370 /* vary ldb = n, n+1, 2*n */
3371 for (ldb_val = LDB_START; ldb_val <= LDB_END; ldb_val++) {
3372
3373 if (order_type == blas_colmajor)
3374 ldb = (ldb_val == 0) ? m :
3375 (ldb_val == 1) ? m + 1 : 2 * m;
3376 else
3377 ldb = (ldb_val == 0) ? n :
3378 (ldb_val == 1) ? n + 1 : 2 * n;
3379
3380 /* vary ldc = k, k+1, 2*k */
3381 for (ldc_val = LDC_START; ldc_val <= LDC_END; ldc_val++) {
3382
3383 if (order_type == blas_colmajor)
3384 ldc = (ldc_val == 0) ? m :
3385 (ldc_val == 1) ? m + 1 : 2 * m;
3386 else
3387 ldc = (ldc_val == 0) ? n :
3388 (ldc_val == 1) ? n + 1 : 2 * n;
3389
3390 /* vary randomize = 0, 1 */
3391 for (randomize_val = RANDOMIZE_START;
3392 randomize_val <= RANDOMIZE_END;
3393 randomize_val++) {
3394
3395 /* For the sake of speed, we throw out this case at random */
3396 if (xrand(seed) >= test_prob)
3397 continue;
3398
3399 saved_seed = *seed;
3400
3401 /* finally we are here to generate the test case */
3402 BLAS_zhemm_z_c_testgen(norm, order_type,
3403 uplo_type, side_type, m, n,
3404 randomize_val, &alpha,
3405 alpha_flag, &beta, beta_flag,
3406 a, lda, b, ldb, c, ldc, seed,
3407 head_r_true, tail_r_true);
3408 test_count++;
3409
3410 /* copy generated C matrix since this will be
3411 over written */
3412 zge_copy_matrix(order_type, m, n, c_gen, ldc, c,
3413 ldc);
3414
3415 /* call hemm routines to be tested */
3416 FPU_FIX_STOP;
3417 BLAS_zhemm_z_c_x(order_type, side_type,
3418 uplo_type, m, n, alpha, a, lda, b,
3419 ldb, beta, c, ldc, prec);
3420 FPU_FIX_START;
3421
3422 /* now compute the ratio using test_BLAS_xdot */
3423 /* copy a row from A, a column from B, run
3424 dot test */
3425
3426 if ((order_type == blas_colmajor &&
3427 side_type == blas_left_side) ||
3428 (order_type == blas_rowmajor &&
3429 side_type == blas_right_side)) {
3430 incci = 1;
3431 inccij = ldc;
3432 } else {
3433 incci = ldc;
3434 inccij = 1;
3435 }
3436
3437 incri = incci;
3438 incrij = inccij;
3439 incci *= 2;
3440 inccij *= 2;
3441
3442 for (i = 0, ci = 0, ri = 0;
3443 i < m_i; i++, ci += incci, ri += incri) {
3444 zhe_copy_row(order_type, uplo_type, side_type,
3445 m_i, a, lda, a_vec, i);
3446 for (j = 0, cij = ci, rij = ri;
3447 j < n_i; j++, cij += inccij, rij += incrij) {
3448 /* copy i-th row of A and j-th col of B */
3449 if (side_type == blas_left_side)
3450 cge_copy_col(order_type, blas_no_trans,
3451 m, n, b, ldb, b_vec, j);
3452 else
3453 cge_copy_row(order_type, blas_no_trans,
3454 m, n, b, ldb, b_vec, j);
3455 rin[0] = c_gen[cij];
3456 rin[1] = c_gen[cij + 1];
3457 rout[0] = c[cij];
3458 rout[1] = c[cij + 1];
3459 head_r_true_elem[0] = head_r_true[cij];
3460 head_r_true_elem[1] = head_r_true[cij + 1];
3461 tail_r_true_elem[0] = tail_r_true[cij];
3462 tail_r_true_elem[1] = tail_r_true[cij + 1];
3463
3464 test_BLAS_zdot_z_c(m_i,
3465 blas_no_conj,
3466 alpha, beta, rin, rout,
3467 head_r_true_elem,
3468 tail_r_true_elem, a_vec, 1,
3469 b_vec, 1, eps_int, un_int,
3470 &ratios[rij]);
3471
3472 /* take the max ratio */
3473 if (rij == 0) {
3474 ratio = ratios[0];
3475 /* The !<= below causes NaN error to be detected.
3476 Note that (NaN > thresh) is always false. */
3477 } else if (!(ratios[rij] <= ratio)) {
3478 ratio = ratios[rij];
3479 }
3480
3481 }
3482 } /* end of dot-test loop */
3483
3484 /* Increase the number of bad ratio, if the ratio
3485 is bigger than the threshold.
3486 The !<= below causes NaN error to be detected.
3487 Note that (NaN > thresh) is always false. */
3488 if (!(ratio <= thresh)) {
3489
3490 if (debug == 3) {
3491 printf("Seed = %d\n", saved_seed);
3492 printf("m %d n %d\n", m, n);
3493 printf("LDA %d LDB %d LDC %d\n", lda, ldb,
3494 ldc);
3495
3496 if (order_type == blas_rowmajor)
3497 printf("row ");
3498 else
3499 printf("col ");
3500
3501 if (uplo_type == blas_upper)
3502 printf("upper ");
3503 else
3504 printf("lower");
3505
3506 if (side_type == blas_left_side)
3507 printf(" left\n");
3508 else
3509 printf(" right\n");
3510
3511 if (randomize_val == 1)
3512 printf("Randomized\n");
3513 else
3514 printf("Not Randomized\n");
3515
3516 printf("NORM %d, ALPHA %d, BETA %d\n",
3517 norm, alpha_val, beta_val);
3518
3519 /* print out info */
3520 printf("alpha = ");
3521 printf("(%24.16e, %24.16e)", alpha[0],
3522 alpha[1]);;
3523 printf(" ");
3524 printf("beta = ");
3525 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
3526 printf("\n");
3527
3528 printf("a\n");
3529 zhe_print_matrix(a, m_i, lda, order_type,
3530 uplo_type);
3531 cge_print_matrix(b, m, n, ldb, order_type, "B");
3532 zge_print_matrix(c_gen, m, n, ldc, order_type,
3533 "C_gen");
3534 zge_print_matrix(c, m, n, ldc, order_type, "C");
3535 zge_print_matrix(head_r_true, m, n, ldc,
3536 order_type, "head_r_true");
3537
3538 printf("ratio = %g\n", ratio);
3539 }
3540 bad_ratio_count++;
3541 if (bad_ratio_count >= MAX_BAD_TESTS) {
3542 printf("\ntoo many failures, exiting....");
3543 printf("\nTesting and compilation");
3544 printf(" are incomplete\n\n");
3545 goto end;
3546 }
3547 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
3548 printf("\nFlagrant ratio %e, exiting...",
3549 ratio);
3550 printf("\nTesting and compilation");
3551 printf(" are incomplete\n\n");
3552 goto end;
3553 }
3554 }
3555
3556 if (ratio > ratio_max)
3557 ratio_max = ratio;
3558
3559 if (ratio != 0.0 && ratio < ratio_min)
3560 ratio_min = ratio;
3561
3562 } /* end of randomize loop */
3563
3564 } /* end of ldc loop */
3565
3566 } /* end of ldb loop */
3567
3568 } /* end of lda loop */
3569
3570 } /* end of side loop */
3571
3572 } /* end of uplo loop */
3573
3574 } /* end of order loop */
3575
3576 } /* end of nr test loop */
3577
3578 } /* end of norm loop */
3579
3580
3581 } /* end of prec loop */
3582
3583 } /* end of beta loop */
3584
3585 } /* end of alpha loop */
3586
3587 end:
3588 FPU_FIX_STOP;
3589
3590 blas_free(c);
3591 blas_free(a);
3592 blas_free(b);
3593 blas_free(c_gen);
3594 blas_free(head_r_true);
3595 blas_free(tail_r_true);
3596 blas_free(ratios);
3597 blas_free(a_vec);
3598 blas_free(b_vec);
3599
3600 *max_ratio = ratio_max;
3601 *min_ratio = ratio_min;
3602 *num_tests = test_count;
3603 *num_bad_ratio = bad_ratio_count;
3604
3605 }
do_test_zhemm_c_z_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)3606 void do_test_zhemm_c_z_x(int m, int n,
3607 int ntests, int *seed, double thresh, int debug,
3608 float test_prob, double *min_ratio,
3609 double *max_ratio, int *num_bad_ratio,
3610 int *num_tests)
3611 {
3612
3613 /* Function name */
3614 const char fname[] = "BLAS_zhemm_c_z_x";
3615
3616 int i, j;
3617 int ci, cij;
3618 int incci, inccij;
3619 int test_count;
3620 int bad_ratio_count;
3621
3622 int ri, rij;
3623 int incri, incrij;
3624 int inca, incb, incc;
3625
3626 double ratio;
3627
3628 double ratio_min, ratio_max;
3629
3630 double eps_int; /* internal machine epsilon */
3631 double un_int; /* internal underflow threshold */
3632
3633 double rin[2];
3634 double rout[2];
3635 double head_r_true_elem[2], tail_r_true_elem[2];
3636
3637 enum blas_order_type order_type;
3638 enum blas_side_type side_type;
3639 enum blas_uplo_type uplo_type;
3640 enum blas_prec_type prec;
3641
3642 int order_val, uplo_val, side_val;
3643 int lda_val, ldb_val, ldc_val;
3644 int alpha_val, beta_val;
3645 int randomize_val;
3646
3647 int prec_val;
3648
3649 int lda, ldb, ldc;
3650 int alpha_flag, beta_flag;
3651 int saved_seed;
3652 int norm;
3653 int test_no;
3654 int max_mn;
3655 int m_i, n_i;
3656
3657 double alpha[2];
3658 double beta[2];
3659 float *a;
3660 double *b;
3661 double *c;
3662 float *a_vec;
3663 double *b_vec;
3664
3665 /* generated test values for c */
3666 double *c_gen;
3667
3668 double *ratios;
3669
3670 /* true result calculated by testgen, in double-double */
3671 double *head_r_true, *tail_r_true;
3672
3673
3674 FPU_FIX_DECL;
3675
3676 if (n < 0 || m < 0 || ntests < 0)
3677 BLAS_error(fname, 0, 0, NULL);
3678
3679 /* initialization */
3680 saved_seed = *seed;
3681 ratio = 0.0;
3682 ratio_min = 1e308;
3683 ratio_max = 0.0;
3684
3685 *num_tests = 0;
3686 *num_bad_ratio = 0;
3687 *min_ratio = 0.0;
3688 *max_ratio = 0.0;
3689
3690 if (m == 0 || n == 0)
3691 return;
3692
3693 FPU_FIX_START;
3694
3695 max_mn = (m > n) ? m : n;
3696
3697 inca = incb = incc = 1;
3698 inca *= 2;
3699 incb *= 2;
3700 incc *= 2;
3701
3702 /* allocate memory for arrays */
3703 c = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
3704 if (2 * m * n > 0 && c == NULL) {
3705 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3706 }
3707 c_gen = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
3708 if (2 * m * n > 0 && c_gen == NULL) {
3709 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3710 }
3711 a = (float *) blas_malloc(2 * max_mn * max_mn * sizeof(float) * 2);
3712 if (2 * max_mn * max_mn > 0 && a == NULL) {
3713 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3714 }
3715 b = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
3716 if (2 * m * n > 0 && b == NULL) {
3717 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3718 }
3719 a_vec = (float *) blas_malloc(max_mn * sizeof(float) * 2);
3720 if (max_mn > 0 && a_vec == NULL) {
3721 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3722 }
3723 b_vec = (double *) blas_malloc(max_mn * sizeof(double) * 2);
3724 if (max_mn > 0 && b_vec == NULL) {
3725 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3726 }
3727 head_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
3728 tail_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
3729 if (2 * m * n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
3730 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3731 }
3732 ratios = (double *) blas_malloc(2 * m * n * sizeof(double));
3733 if (2 * m * n > 0 && ratios == NULL) {
3734 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3735 }
3736
3737 test_count = 0;
3738 bad_ratio_count = 0;
3739
3740 /* vary alpha */
3741 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
3742
3743 alpha_flag = 0;
3744 switch (alpha_val) {
3745 case 0:
3746 alpha[0] = alpha[1] = 0.0;
3747 alpha_flag = 1;
3748 break;
3749 case 1:
3750 alpha[0] = 1.0;
3751 alpha[1] = 0.0;
3752 alpha_flag = 1;
3753 break;
3754 }
3755
3756 /* vary beta */
3757 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
3758 beta_flag = 0;
3759 switch (beta_val) {
3760 case 0:
3761 beta[0] = beta[1] = 0.0;
3762 beta_flag = 1;
3763 break;
3764 case 1:
3765 beta[0] = 1.0;
3766 beta[1] = 0.0;
3767 beta_flag = 1;
3768 break;
3769 }
3770
3771
3772 /* varying extra precs */
3773 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
3774 switch (prec_val) {
3775 case 0:
3776 eps_int = power(2, -BITS_D);
3777 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
3778 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
3779 prec = blas_prec_double;
3780 break;
3781 case 1:
3782 eps_int = power(2, -BITS_D);
3783 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
3784 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
3785 prec = blas_prec_double;
3786 break;
3787 case 2:
3788 default:
3789 eps_int = power(2, -BITS_E);
3790 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
3791 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
3792 prec = blas_prec_extra;
3793 break;
3794 }
3795
3796 /* vary norm -- underflow, approx 1, overflow */
3797 for (norm = NORM_START; norm <= NORM_END; norm++) {
3798
3799 /* number of tests */
3800 for (test_no = 0; test_no < ntests; test_no++) {
3801
3802 /* vary storage format */
3803 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
3804
3805 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
3806
3807 /* vary upper / lower variation */
3808 for (uplo_val = UPLO_START; uplo_val <= UPLO_END; uplo_val++) {
3809
3810 uplo_type = (uplo_val == 0) ? blas_upper : blas_lower;
3811
3812 /* vary transpositions of B */
3813 for (side_val = SIDE_START; side_val <= SIDE_END; side_val++) {
3814
3815 side_type = (side_val == 0) ?
3816 blas_left_side : blas_right_side;
3817
3818 if (side_type == blas_left_side) {
3819 m_i = m;
3820 n_i = n;
3821 } else {
3822 m_i = n;
3823 n_i = m;
3824 }
3825
3826 /* vary lda = k, k+1, 2*k */
3827 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
3828
3829 lda = (lda_val == 0) ? m_i :
3830 (lda_val == 1) ? m_i + 1 : 2 * m_i;
3831
3832 /* vary ldb = n, n+1, 2*n */
3833 for (ldb_val = LDB_START; ldb_val <= LDB_END; ldb_val++) {
3834
3835 if (order_type == blas_colmajor)
3836 ldb = (ldb_val == 0) ? m :
3837 (ldb_val == 1) ? m + 1 : 2 * m;
3838 else
3839 ldb = (ldb_val == 0) ? n :
3840 (ldb_val == 1) ? n + 1 : 2 * n;
3841
3842 /* vary ldc = k, k+1, 2*k */
3843 for (ldc_val = LDC_START; ldc_val <= LDC_END; ldc_val++) {
3844
3845 if (order_type == blas_colmajor)
3846 ldc = (ldc_val == 0) ? m :
3847 (ldc_val == 1) ? m + 1 : 2 * m;
3848 else
3849 ldc = (ldc_val == 0) ? n :
3850 (ldc_val == 1) ? n + 1 : 2 * n;
3851
3852 /* vary randomize = 0, 1 */
3853 for (randomize_val = RANDOMIZE_START;
3854 randomize_val <= RANDOMIZE_END;
3855 randomize_val++) {
3856
3857 /* For the sake of speed, we throw out this case at random */
3858 if (xrand(seed) >= test_prob)
3859 continue;
3860
3861 saved_seed = *seed;
3862
3863 /* finally we are here to generate the test case */
3864 BLAS_zhemm_c_z_testgen(norm, order_type,
3865 uplo_type, side_type, m, n,
3866 randomize_val, &alpha,
3867 alpha_flag, &beta, beta_flag,
3868 a, lda, b, ldb, c, ldc, seed,
3869 head_r_true, tail_r_true);
3870 test_count++;
3871
3872 /* copy generated C matrix since this will be
3873 over written */
3874 zge_copy_matrix(order_type, m, n, c_gen, ldc, c,
3875 ldc);
3876
3877 /* call hemm routines to be tested */
3878 FPU_FIX_STOP;
3879 BLAS_zhemm_c_z_x(order_type, side_type,
3880 uplo_type, m, n, alpha, a, lda, b,
3881 ldb, beta, c, ldc, prec);
3882 FPU_FIX_START;
3883
3884 /* now compute the ratio using test_BLAS_xdot */
3885 /* copy a row from A, a column from B, run
3886 dot test */
3887
3888 if ((order_type == blas_colmajor &&
3889 side_type == blas_left_side) ||
3890 (order_type == blas_rowmajor &&
3891 side_type == blas_right_side)) {
3892 incci = 1;
3893 inccij = ldc;
3894 } else {
3895 incci = ldc;
3896 inccij = 1;
3897 }
3898
3899 incri = incci;
3900 incrij = inccij;
3901 incci *= 2;
3902 inccij *= 2;
3903
3904 for (i = 0, ci = 0, ri = 0;
3905 i < m_i; i++, ci += incci, ri += incri) {
3906 che_copy_row(order_type, uplo_type, side_type,
3907 m_i, a, lda, a_vec, i);
3908 for (j = 0, cij = ci, rij = ri;
3909 j < n_i; j++, cij += inccij, rij += incrij) {
3910 /* copy i-th row of A and j-th col of B */
3911 if (side_type == blas_left_side)
3912 zge_copy_col(order_type, blas_no_trans,
3913 m, n, b, ldb, b_vec, j);
3914 else
3915 zge_copy_row(order_type, blas_no_trans,
3916 m, n, b, ldb, b_vec, j);
3917 rin[0] = c_gen[cij];
3918 rin[1] = c_gen[cij + 1];
3919 rout[0] = c[cij];
3920 rout[1] = c[cij + 1];
3921 head_r_true_elem[0] = head_r_true[cij];
3922 head_r_true_elem[1] = head_r_true[cij + 1];
3923 tail_r_true_elem[0] = tail_r_true[cij];
3924 tail_r_true_elem[1] = tail_r_true[cij + 1];
3925
3926 test_BLAS_zdot_c_z(m_i,
3927 blas_no_conj,
3928 alpha, beta, rin, rout,
3929 head_r_true_elem,
3930 tail_r_true_elem, a_vec, 1,
3931 b_vec, 1, eps_int, un_int,
3932 &ratios[rij]);
3933
3934 /* take the max ratio */
3935 if (rij == 0) {
3936 ratio = ratios[0];
3937 /* The !<= below causes NaN error to be detected.
3938 Note that (NaN > thresh) is always false. */
3939 } else if (!(ratios[rij] <= ratio)) {
3940 ratio = ratios[rij];
3941 }
3942
3943 }
3944 } /* end of dot-test loop */
3945
3946 /* Increase the number of bad ratio, if the ratio
3947 is bigger than the threshold.
3948 The !<= below causes NaN error to be detected.
3949 Note that (NaN > thresh) is always false. */
3950 if (!(ratio <= thresh)) {
3951
3952 if (debug == 3) {
3953 printf("Seed = %d\n", saved_seed);
3954 printf("m %d n %d\n", m, n);
3955 printf("LDA %d LDB %d LDC %d\n", lda, ldb,
3956 ldc);
3957
3958 if (order_type == blas_rowmajor)
3959 printf("row ");
3960 else
3961 printf("col ");
3962
3963 if (uplo_type == blas_upper)
3964 printf("upper ");
3965 else
3966 printf("lower");
3967
3968 if (side_type == blas_left_side)
3969 printf(" left\n");
3970 else
3971 printf(" right\n");
3972
3973 if (randomize_val == 1)
3974 printf("Randomized\n");
3975 else
3976 printf("Not Randomized\n");
3977
3978 printf("NORM %d, ALPHA %d, BETA %d\n",
3979 norm, alpha_val, beta_val);
3980
3981 /* print out info */
3982 printf("alpha = ");
3983 printf("(%24.16e, %24.16e)", alpha[0],
3984 alpha[1]);;
3985 printf(" ");
3986 printf("beta = ");
3987 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
3988 printf("\n");
3989
3990 printf("a\n");
3991 che_print_matrix(a, m_i, lda, order_type,
3992 uplo_type);
3993 zge_print_matrix(b, m, n, ldb, order_type, "B");
3994 zge_print_matrix(c_gen, m, n, ldc, order_type,
3995 "C_gen");
3996 zge_print_matrix(c, m, n, ldc, order_type, "C");
3997 zge_print_matrix(head_r_true, m, n, ldc,
3998 order_type, "head_r_true");
3999
4000 printf("ratio = %g\n", ratio);
4001 }
4002 bad_ratio_count++;
4003 if (bad_ratio_count >= MAX_BAD_TESTS) {
4004 printf("\ntoo many failures, exiting....");
4005 printf("\nTesting and compilation");
4006 printf(" are incomplete\n\n");
4007 goto end;
4008 }
4009 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
4010 printf("\nFlagrant ratio %e, exiting...",
4011 ratio);
4012 printf("\nTesting and compilation");
4013 printf(" are incomplete\n\n");
4014 goto end;
4015 }
4016 }
4017
4018 if (ratio > ratio_max)
4019 ratio_max = ratio;
4020
4021 if (ratio != 0.0 && ratio < ratio_min)
4022 ratio_min = ratio;
4023
4024 } /* end of randomize loop */
4025
4026 } /* end of ldc loop */
4027
4028 } /* end of ldb loop */
4029
4030 } /* end of lda loop */
4031
4032 } /* end of side loop */
4033
4034 } /* end of uplo loop */
4035
4036 } /* end of order loop */
4037
4038 } /* end of nr test loop */
4039
4040 } /* end of norm loop */
4041
4042
4043 } /* end of prec loop */
4044
4045 } /* end of beta loop */
4046
4047 } /* end of alpha loop */
4048
4049 end:
4050 FPU_FIX_STOP;
4051
4052 blas_free(c);
4053 blas_free(a);
4054 blas_free(b);
4055 blas_free(c_gen);
4056 blas_free(head_r_true);
4057 blas_free(tail_r_true);
4058 blas_free(ratios);
4059 blas_free(a_vec);
4060 blas_free(b_vec);
4061
4062 *max_ratio = ratio_max;
4063 *min_ratio = ratio_min;
4064 *num_tests = test_count;
4065 *num_bad_ratio = bad_ratio_count;
4066
4067 }
do_test_zhemm_c_c_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)4068 void do_test_zhemm_c_c_x(int m, int n,
4069 int ntests, int *seed, double thresh, int debug,
4070 float test_prob, double *min_ratio,
4071 double *max_ratio, int *num_bad_ratio,
4072 int *num_tests)
4073 {
4074
4075 /* Function name */
4076 const char fname[] = "BLAS_zhemm_c_c_x";
4077
4078 int i, j;
4079 int ci, cij;
4080 int incci, inccij;
4081 int test_count;
4082 int bad_ratio_count;
4083
4084 int ri, rij;
4085 int incri, incrij;
4086 int inca, incb, incc;
4087
4088 double ratio;
4089
4090 double ratio_min, ratio_max;
4091
4092 double eps_int; /* internal machine epsilon */
4093 double un_int; /* internal underflow threshold */
4094
4095 double rin[2];
4096 double rout[2];
4097 double head_r_true_elem[2], tail_r_true_elem[2];
4098
4099 enum blas_order_type order_type;
4100 enum blas_side_type side_type;
4101 enum blas_uplo_type uplo_type;
4102 enum blas_prec_type prec;
4103
4104 int order_val, uplo_val, side_val;
4105 int lda_val, ldb_val, ldc_val;
4106 int alpha_val, beta_val;
4107 int randomize_val;
4108
4109 int prec_val;
4110
4111 int lda, ldb, ldc;
4112 int alpha_flag, beta_flag;
4113 int saved_seed;
4114 int norm;
4115 int test_no;
4116 int max_mn;
4117 int m_i, n_i;
4118
4119 double alpha[2];
4120 double beta[2];
4121 float *a;
4122 float *b;
4123 double *c;
4124 float *a_vec;
4125 float *b_vec;
4126
4127 /* generated test values for c */
4128 double *c_gen;
4129
4130 double *ratios;
4131
4132 /* true result calculated by testgen, in double-double */
4133 double *head_r_true, *tail_r_true;
4134
4135
4136 FPU_FIX_DECL;
4137
4138 if (n < 0 || m < 0 || ntests < 0)
4139 BLAS_error(fname, 0, 0, NULL);
4140
4141 /* initialization */
4142 saved_seed = *seed;
4143 ratio = 0.0;
4144 ratio_min = 1e308;
4145 ratio_max = 0.0;
4146
4147 *num_tests = 0;
4148 *num_bad_ratio = 0;
4149 *min_ratio = 0.0;
4150 *max_ratio = 0.0;
4151
4152 if (m == 0 || n == 0)
4153 return;
4154
4155 FPU_FIX_START;
4156
4157 max_mn = (m > n) ? m : n;
4158
4159 inca = incb = incc = 1;
4160 inca *= 2;
4161 incb *= 2;
4162 incc *= 2;
4163
4164 /* allocate memory for arrays */
4165 c = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
4166 if (2 * m * n > 0 && c == NULL) {
4167 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4168 }
4169 c_gen = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
4170 if (2 * m * n > 0 && c_gen == NULL) {
4171 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4172 }
4173 a = (float *) blas_malloc(2 * max_mn * max_mn * sizeof(float) * 2);
4174 if (2 * max_mn * max_mn > 0 && a == NULL) {
4175 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4176 }
4177 b = (float *) blas_malloc(2 * m * n * sizeof(float) * 2);
4178 if (2 * m * n > 0 && b == NULL) {
4179 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4180 }
4181 a_vec = (float *) blas_malloc(max_mn * sizeof(float) * 2);
4182 if (max_mn > 0 && a_vec == NULL) {
4183 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4184 }
4185 b_vec = (float *) blas_malloc(max_mn * sizeof(float) * 2);
4186 if (max_mn > 0 && b_vec == NULL) {
4187 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4188 }
4189 head_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
4190 tail_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
4191 if (2 * m * n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
4192 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4193 }
4194 ratios = (double *) blas_malloc(2 * m * n * sizeof(double));
4195 if (2 * m * n > 0 && ratios == NULL) {
4196 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4197 }
4198
4199 test_count = 0;
4200 bad_ratio_count = 0;
4201
4202 /* vary alpha */
4203 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
4204
4205 alpha_flag = 0;
4206 switch (alpha_val) {
4207 case 0:
4208 alpha[0] = alpha[1] = 0.0;
4209 alpha_flag = 1;
4210 break;
4211 case 1:
4212 alpha[0] = 1.0;
4213 alpha[1] = 0.0;
4214 alpha_flag = 1;
4215 break;
4216 }
4217
4218 /* vary beta */
4219 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
4220 beta_flag = 0;
4221 switch (beta_val) {
4222 case 0:
4223 beta[0] = beta[1] = 0.0;
4224 beta_flag = 1;
4225 break;
4226 case 1:
4227 beta[0] = 1.0;
4228 beta[1] = 0.0;
4229 beta_flag = 1;
4230 break;
4231 }
4232
4233
4234 /* varying extra precs */
4235 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
4236 switch (prec_val) {
4237 case 0:
4238 eps_int = power(2, -BITS_D);
4239 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
4240 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
4241 prec = blas_prec_double;
4242 break;
4243 case 1:
4244 eps_int = power(2, -BITS_D);
4245 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
4246 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
4247 prec = blas_prec_double;
4248 break;
4249 case 2:
4250 default:
4251 eps_int = power(2, -BITS_E);
4252 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
4253 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
4254 prec = blas_prec_extra;
4255 break;
4256 }
4257
4258 /* vary norm -- underflow, approx 1, overflow */
4259 for (norm = NORM_START; norm <= NORM_END; norm++) {
4260
4261 /* number of tests */
4262 for (test_no = 0; test_no < ntests; test_no++) {
4263
4264 /* vary storage format */
4265 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
4266
4267 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
4268
4269 /* vary upper / lower variation */
4270 for (uplo_val = UPLO_START; uplo_val <= UPLO_END; uplo_val++) {
4271
4272 uplo_type = (uplo_val == 0) ? blas_upper : blas_lower;
4273
4274 /* vary transpositions of B */
4275 for (side_val = SIDE_START; side_val <= SIDE_END; side_val++) {
4276
4277 side_type = (side_val == 0) ?
4278 blas_left_side : blas_right_side;
4279
4280 if (side_type == blas_left_side) {
4281 m_i = m;
4282 n_i = n;
4283 } else {
4284 m_i = n;
4285 n_i = m;
4286 }
4287
4288 /* vary lda = k, k+1, 2*k */
4289 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
4290
4291 lda = (lda_val == 0) ? m_i :
4292 (lda_val == 1) ? m_i + 1 : 2 * m_i;
4293
4294 /* vary ldb = n, n+1, 2*n */
4295 for (ldb_val = LDB_START; ldb_val <= LDB_END; ldb_val++) {
4296
4297 if (order_type == blas_colmajor)
4298 ldb = (ldb_val == 0) ? m :
4299 (ldb_val == 1) ? m + 1 : 2 * m;
4300 else
4301 ldb = (ldb_val == 0) ? n :
4302 (ldb_val == 1) ? n + 1 : 2 * n;
4303
4304 /* vary ldc = k, k+1, 2*k */
4305 for (ldc_val = LDC_START; ldc_val <= LDC_END; ldc_val++) {
4306
4307 if (order_type == blas_colmajor)
4308 ldc = (ldc_val == 0) ? m :
4309 (ldc_val == 1) ? m + 1 : 2 * m;
4310 else
4311 ldc = (ldc_val == 0) ? n :
4312 (ldc_val == 1) ? n + 1 : 2 * n;
4313
4314 /* vary randomize = 0, 1 */
4315 for (randomize_val = RANDOMIZE_START;
4316 randomize_val <= RANDOMIZE_END;
4317 randomize_val++) {
4318
4319 /* For the sake of speed, we throw out this case at random */
4320 if (xrand(seed) >= test_prob)
4321 continue;
4322
4323 saved_seed = *seed;
4324
4325 /* finally we are here to generate the test case */
4326 BLAS_zhemm_c_c_testgen(norm, order_type,
4327 uplo_type, side_type, m, n,
4328 randomize_val, &alpha,
4329 alpha_flag, &beta, beta_flag,
4330 a, lda, b, ldb, c, ldc, seed,
4331 head_r_true, tail_r_true);
4332 test_count++;
4333
4334 /* copy generated C matrix since this will be
4335 over written */
4336 zge_copy_matrix(order_type, m, n, c_gen, ldc, c,
4337 ldc);
4338
4339 /* call hemm routines to be tested */
4340 FPU_FIX_STOP;
4341 BLAS_zhemm_c_c_x(order_type, side_type,
4342 uplo_type, m, n, alpha, a, lda, b,
4343 ldb, beta, c, ldc, prec);
4344 FPU_FIX_START;
4345
4346 /* now compute the ratio using test_BLAS_xdot */
4347 /* copy a row from A, a column from B, run
4348 dot test */
4349
4350 if ((order_type == blas_colmajor &&
4351 side_type == blas_left_side) ||
4352 (order_type == blas_rowmajor &&
4353 side_type == blas_right_side)) {
4354 incci = 1;
4355 inccij = ldc;
4356 } else {
4357 incci = ldc;
4358 inccij = 1;
4359 }
4360
4361 incri = incci;
4362 incrij = inccij;
4363 incci *= 2;
4364 inccij *= 2;
4365
4366 for (i = 0, ci = 0, ri = 0;
4367 i < m_i; i++, ci += incci, ri += incri) {
4368 che_copy_row(order_type, uplo_type, side_type,
4369 m_i, a, lda, a_vec, i);
4370 for (j = 0, cij = ci, rij = ri;
4371 j < n_i; j++, cij += inccij, rij += incrij) {
4372 /* copy i-th row of A and j-th col of B */
4373 if (side_type == blas_left_side)
4374 cge_copy_col(order_type, blas_no_trans,
4375 m, n, b, ldb, b_vec, j);
4376 else
4377 cge_copy_row(order_type, blas_no_trans,
4378 m, n, b, ldb, b_vec, j);
4379 rin[0] = c_gen[cij];
4380 rin[1] = c_gen[cij + 1];
4381 rout[0] = c[cij];
4382 rout[1] = c[cij + 1];
4383 head_r_true_elem[0] = head_r_true[cij];
4384 head_r_true_elem[1] = head_r_true[cij + 1];
4385 tail_r_true_elem[0] = tail_r_true[cij];
4386 tail_r_true_elem[1] = tail_r_true[cij + 1];
4387
4388 test_BLAS_zdot_c_c(m_i,
4389 blas_no_conj,
4390 alpha, beta, rin, rout,
4391 head_r_true_elem,
4392 tail_r_true_elem, a_vec, 1,
4393 b_vec, 1, eps_int, un_int,
4394 &ratios[rij]);
4395
4396 /* take the max ratio */
4397 if (rij == 0) {
4398 ratio = ratios[0];
4399 /* The !<= below causes NaN error to be detected.
4400 Note that (NaN > thresh) is always false. */
4401 } else if (!(ratios[rij] <= ratio)) {
4402 ratio = ratios[rij];
4403 }
4404
4405 }
4406 } /* end of dot-test loop */
4407
4408 /* Increase the number of bad ratio, if the ratio
4409 is bigger than the threshold.
4410 The !<= below causes NaN error to be detected.
4411 Note that (NaN > thresh) is always false. */
4412 if (!(ratio <= thresh)) {
4413
4414 if (debug == 3) {
4415 printf("Seed = %d\n", saved_seed);
4416 printf("m %d n %d\n", m, n);
4417 printf("LDA %d LDB %d LDC %d\n", lda, ldb,
4418 ldc);
4419
4420 if (order_type == blas_rowmajor)
4421 printf("row ");
4422 else
4423 printf("col ");
4424
4425 if (uplo_type == blas_upper)
4426 printf("upper ");
4427 else
4428 printf("lower");
4429
4430 if (side_type == blas_left_side)
4431 printf(" left\n");
4432 else
4433 printf(" right\n");
4434
4435 if (randomize_val == 1)
4436 printf("Randomized\n");
4437 else
4438 printf("Not Randomized\n");
4439
4440 printf("NORM %d, ALPHA %d, BETA %d\n",
4441 norm, alpha_val, beta_val);
4442
4443 /* print out info */
4444 printf("alpha = ");
4445 printf("(%24.16e, %24.16e)", alpha[0],
4446 alpha[1]);;
4447 printf(" ");
4448 printf("beta = ");
4449 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
4450 printf("\n");
4451
4452 printf("a\n");
4453 che_print_matrix(a, m_i, lda, order_type,
4454 uplo_type);
4455 cge_print_matrix(b, m, n, ldb, order_type, "B");
4456 zge_print_matrix(c_gen, m, n, ldc, order_type,
4457 "C_gen");
4458 zge_print_matrix(c, m, n, ldc, order_type, "C");
4459 zge_print_matrix(head_r_true, m, n, ldc,
4460 order_type, "head_r_true");
4461
4462 printf("ratio = %g\n", ratio);
4463 }
4464 bad_ratio_count++;
4465 if (bad_ratio_count >= MAX_BAD_TESTS) {
4466 printf("\ntoo many failures, exiting....");
4467 printf("\nTesting and compilation");
4468 printf(" are incomplete\n\n");
4469 goto end;
4470 }
4471 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
4472 printf("\nFlagrant ratio %e, exiting...",
4473 ratio);
4474 printf("\nTesting and compilation");
4475 printf(" are incomplete\n\n");
4476 goto end;
4477 }
4478 }
4479
4480 if (ratio > ratio_max)
4481 ratio_max = ratio;
4482
4483 if (ratio != 0.0 && ratio < ratio_min)
4484 ratio_min = ratio;
4485
4486 } /* end of randomize loop */
4487
4488 } /* end of ldc loop */
4489
4490 } /* end of ldb loop */
4491
4492 } /* end of lda loop */
4493
4494 } /* end of side loop */
4495
4496 } /* end of uplo loop */
4497
4498 } /* end of order loop */
4499
4500 } /* end of nr test loop */
4501
4502 } /* end of norm loop */
4503
4504
4505 } /* end of prec loop */
4506
4507 } /* end of beta loop */
4508
4509 } /* end of alpha loop */
4510
4511 end:
4512 FPU_FIX_STOP;
4513
4514 blas_free(c);
4515 blas_free(a);
4516 blas_free(b);
4517 blas_free(c_gen);
4518 blas_free(head_r_true);
4519 blas_free(tail_r_true);
4520 blas_free(ratios);
4521 blas_free(a_vec);
4522 blas_free(b_vec);
4523
4524 *max_ratio = ratio_max;
4525 *min_ratio = ratio_min;
4526 *num_tests = test_count;
4527 *num_bad_ratio = bad_ratio_count;
4528
4529 }
do_test_chemm_c_s_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)4530 void do_test_chemm_c_s_x(int m, int n,
4531 int ntests, int *seed, double thresh, int debug,
4532 float test_prob, double *min_ratio,
4533 double *max_ratio, int *num_bad_ratio,
4534 int *num_tests)
4535 {
4536
4537 /* Function name */
4538 const char fname[] = "BLAS_chemm_c_s_x";
4539
4540 int i, j;
4541 int ci, cij;
4542 int incci, inccij;
4543 int test_count;
4544 int bad_ratio_count;
4545
4546 int ri, rij;
4547 int incri, incrij;
4548 int inca, incb, incc;
4549
4550 double ratio;
4551
4552 double ratio_min, ratio_max;
4553
4554 double eps_int; /* internal machine epsilon */
4555 double un_int; /* internal underflow threshold */
4556
4557 float rin[2];
4558 float rout[2];
4559 double head_r_true_elem[2], tail_r_true_elem[2];
4560
4561 enum blas_order_type order_type;
4562 enum blas_side_type side_type;
4563 enum blas_uplo_type uplo_type;
4564 enum blas_prec_type prec;
4565
4566 int order_val, uplo_val, side_val;
4567 int lda_val, ldb_val, ldc_val;
4568 int alpha_val, beta_val;
4569 int randomize_val;
4570
4571 int prec_val;
4572
4573 int lda, ldb, ldc;
4574 int alpha_flag, beta_flag;
4575 int saved_seed;
4576 int norm;
4577 int test_no;
4578 int max_mn;
4579 int m_i, n_i;
4580
4581 float alpha[2];
4582 float beta[2];
4583 float *a;
4584 float *b;
4585 float *c;
4586 float *a_vec;
4587 float *b_vec;
4588
4589 /* generated test values for c */
4590 float *c_gen;
4591
4592 double *ratios;
4593
4594 /* true result calculated by testgen, in double-double */
4595 double *head_r_true, *tail_r_true;
4596
4597
4598 FPU_FIX_DECL;
4599
4600 if (n < 0 || m < 0 || ntests < 0)
4601 BLAS_error(fname, 0, 0, NULL);
4602
4603 /* initialization */
4604 saved_seed = *seed;
4605 ratio = 0.0;
4606 ratio_min = 1e308;
4607 ratio_max = 0.0;
4608
4609 *num_tests = 0;
4610 *num_bad_ratio = 0;
4611 *min_ratio = 0.0;
4612 *max_ratio = 0.0;
4613
4614 if (m == 0 || n == 0)
4615 return;
4616
4617 FPU_FIX_START;
4618
4619 max_mn = (m > n) ? m : n;
4620
4621 inca = incb = incc = 1;
4622 inca *= 2;
4623
4624 incc *= 2;
4625
4626 /* allocate memory for arrays */
4627 c = (float *) blas_malloc(2 * m * n * sizeof(float) * 2);
4628 if (2 * m * n > 0 && c == NULL) {
4629 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4630 }
4631 c_gen = (float *) blas_malloc(2 * m * n * sizeof(float) * 2);
4632 if (2 * m * n > 0 && c_gen == NULL) {
4633 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4634 }
4635 a = (float *) blas_malloc(2 * max_mn * max_mn * sizeof(float) * 2);
4636 if (2 * max_mn * max_mn > 0 && a == NULL) {
4637 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4638 }
4639 b = (float *) blas_malloc(2 * m * n * sizeof(float));
4640 if (2 * m * n > 0 && b == NULL) {
4641 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4642 }
4643 a_vec = (float *) blas_malloc(max_mn * sizeof(float) * 2);
4644 if (max_mn > 0 && a_vec == NULL) {
4645 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4646 }
4647 b_vec = (float *) blas_malloc(max_mn * sizeof(float));
4648 if (max_mn > 0 && b_vec == NULL) {
4649 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4650 }
4651 head_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
4652 tail_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
4653 if (2 * m * n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
4654 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4655 }
4656 ratios = (double *) blas_malloc(2 * m * n * sizeof(double));
4657 if (2 * m * n > 0 && ratios == NULL) {
4658 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4659 }
4660
4661 test_count = 0;
4662 bad_ratio_count = 0;
4663
4664 /* vary alpha */
4665 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
4666
4667 alpha_flag = 0;
4668 switch (alpha_val) {
4669 case 0:
4670 alpha[0] = alpha[1] = 0.0;
4671 alpha_flag = 1;
4672 break;
4673 case 1:
4674 alpha[0] = 1.0;
4675 alpha[1] = 0.0;
4676 alpha_flag = 1;
4677 break;
4678 }
4679
4680 /* vary beta */
4681 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
4682 beta_flag = 0;
4683 switch (beta_val) {
4684 case 0:
4685 beta[0] = beta[1] = 0.0;
4686 beta_flag = 1;
4687 break;
4688 case 1:
4689 beta[0] = 1.0;
4690 beta[1] = 0.0;
4691 beta_flag = 1;
4692 break;
4693 }
4694
4695
4696 /* varying extra precs */
4697 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
4698 switch (prec_val) {
4699 case 0:
4700 eps_int = power(2, -BITS_S);
4701 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
4702 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
4703 prec = blas_prec_single;
4704 break;
4705 case 1:
4706 eps_int = power(2, -BITS_D);
4707 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
4708 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
4709 prec = blas_prec_double;
4710 break;
4711 case 2:
4712 default:
4713 eps_int = power(2, -BITS_E);
4714 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
4715 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
4716 prec = blas_prec_extra;
4717 break;
4718 }
4719
4720 /* vary norm -- underflow, approx 1, overflow */
4721 for (norm = NORM_START; norm <= NORM_END; norm++) {
4722
4723 /* number of tests */
4724 for (test_no = 0; test_no < ntests; test_no++) {
4725
4726 /* vary storage format */
4727 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
4728
4729 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
4730
4731 /* vary upper / lower variation */
4732 for (uplo_val = UPLO_START; uplo_val <= UPLO_END; uplo_val++) {
4733
4734 uplo_type = (uplo_val == 0) ? blas_upper : blas_lower;
4735
4736 /* vary transpositions of B */
4737 for (side_val = SIDE_START; side_val <= SIDE_END; side_val++) {
4738
4739 side_type = (side_val == 0) ?
4740 blas_left_side : blas_right_side;
4741
4742 if (side_type == blas_left_side) {
4743 m_i = m;
4744 n_i = n;
4745 } else {
4746 m_i = n;
4747 n_i = m;
4748 }
4749
4750 /* vary lda = k, k+1, 2*k */
4751 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
4752
4753 lda = (lda_val == 0) ? m_i :
4754 (lda_val == 1) ? m_i + 1 : 2 * m_i;
4755
4756 /* vary ldb = n, n+1, 2*n */
4757 for (ldb_val = LDB_START; ldb_val <= LDB_END; ldb_val++) {
4758
4759 if (order_type == blas_colmajor)
4760 ldb = (ldb_val == 0) ? m :
4761 (ldb_val == 1) ? m + 1 : 2 * m;
4762 else
4763 ldb = (ldb_val == 0) ? n :
4764 (ldb_val == 1) ? n + 1 : 2 * n;
4765
4766 /* vary ldc = k, k+1, 2*k */
4767 for (ldc_val = LDC_START; ldc_val <= LDC_END; ldc_val++) {
4768
4769 if (order_type == blas_colmajor)
4770 ldc = (ldc_val == 0) ? m :
4771 (ldc_val == 1) ? m + 1 : 2 * m;
4772 else
4773 ldc = (ldc_val == 0) ? n :
4774 (ldc_val == 1) ? n + 1 : 2 * n;
4775
4776 /* vary randomize = 0, 1 */
4777 for (randomize_val = RANDOMIZE_START;
4778 randomize_val <= RANDOMIZE_END;
4779 randomize_val++) {
4780
4781 /* For the sake of speed, we throw out this case at random */
4782 if (xrand(seed) >= test_prob)
4783 continue;
4784
4785 saved_seed = *seed;
4786
4787 /* finally we are here to generate the test case */
4788 BLAS_chemm_c_s_testgen(norm, order_type,
4789 uplo_type, side_type, m, n,
4790 randomize_val, &alpha,
4791 alpha_flag, &beta, beta_flag,
4792 a, lda, b, ldb, c, ldc, seed,
4793 head_r_true, tail_r_true);
4794 test_count++;
4795
4796 /* copy generated C matrix since this will be
4797 over written */
4798 cge_copy_matrix(order_type, m, n, c_gen, ldc, c,
4799 ldc);
4800
4801 /* call hemm routines to be tested */
4802 FPU_FIX_STOP;
4803 BLAS_chemm_c_s_x(order_type, side_type,
4804 uplo_type, m, n, alpha, a, lda, b,
4805 ldb, beta, c, ldc, prec);
4806 FPU_FIX_START;
4807
4808 /* now compute the ratio using test_BLAS_xdot */
4809 /* copy a row from A, a column from B, run
4810 dot test */
4811
4812 if ((order_type == blas_colmajor &&
4813 side_type == blas_left_side) ||
4814 (order_type == blas_rowmajor &&
4815 side_type == blas_right_side)) {
4816 incci = 1;
4817 inccij = ldc;
4818 } else {
4819 incci = ldc;
4820 inccij = 1;
4821 }
4822
4823 incri = incci;
4824 incrij = inccij;
4825 incci *= 2;
4826 inccij *= 2;
4827
4828 for (i = 0, ci = 0, ri = 0;
4829 i < m_i; i++, ci += incci, ri += incri) {
4830 che_copy_row(order_type, uplo_type, side_type,
4831 m_i, a, lda, a_vec, i);
4832 for (j = 0, cij = ci, rij = ri;
4833 j < n_i; j++, cij += inccij, rij += incrij) {
4834 /* copy i-th row of A and j-th col of B */
4835 if (side_type == blas_left_side)
4836 sge_copy_col(order_type, blas_no_trans,
4837 m, n, b, ldb, b_vec, j);
4838 else
4839 sge_copy_row(order_type, blas_no_trans,
4840 m, n, b, ldb, b_vec, j);
4841 rin[0] = c_gen[cij];
4842 rin[1] = c_gen[cij + 1];
4843 rout[0] = c[cij];
4844 rout[1] = c[cij + 1];
4845 head_r_true_elem[0] = head_r_true[cij];
4846 head_r_true_elem[1] = head_r_true[cij + 1];
4847 tail_r_true_elem[0] = tail_r_true[cij];
4848 tail_r_true_elem[1] = tail_r_true[cij + 1];
4849
4850 test_BLAS_cdot_c_s(m_i,
4851 blas_no_conj,
4852 alpha, beta, rin, rout,
4853 head_r_true_elem,
4854 tail_r_true_elem, a_vec, 1,
4855 b_vec, 1, eps_int, un_int,
4856 &ratios[rij]);
4857
4858 /* take the max ratio */
4859 if (rij == 0) {
4860 ratio = ratios[0];
4861 /* The !<= below causes NaN error to be detected.
4862 Note that (NaN > thresh) is always false. */
4863 } else if (!(ratios[rij] <= ratio)) {
4864 ratio = ratios[rij];
4865 }
4866
4867 }
4868 } /* end of dot-test loop */
4869
4870 /* Increase the number of bad ratio, if the ratio
4871 is bigger than the threshold.
4872 The !<= below causes NaN error to be detected.
4873 Note that (NaN > thresh) is always false. */
4874 if (!(ratio <= thresh)) {
4875
4876 if (debug == 3) {
4877 printf("Seed = %d\n", saved_seed);
4878 printf("m %d n %d\n", m, n);
4879 printf("LDA %d LDB %d LDC %d\n", lda, ldb,
4880 ldc);
4881
4882 if (order_type == blas_rowmajor)
4883 printf("row ");
4884 else
4885 printf("col ");
4886
4887 if (uplo_type == blas_upper)
4888 printf("upper ");
4889 else
4890 printf("lower");
4891
4892 if (side_type == blas_left_side)
4893 printf(" left\n");
4894 else
4895 printf(" right\n");
4896
4897 if (randomize_val == 1)
4898 printf("Randomized\n");
4899 else
4900 printf("Not Randomized\n");
4901
4902 printf("NORM %d, ALPHA %d, BETA %d\n",
4903 norm, alpha_val, beta_val);
4904
4905 /* print out info */
4906 printf("alpha = ");
4907 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);;
4908 printf(" ");
4909 printf("beta = ");
4910 printf("(%16.8e, %16.8e)", beta[0], beta[1]);;
4911 printf("\n");
4912
4913 printf("a\n");
4914 che_print_matrix(a, m_i, lda, order_type,
4915 uplo_type);
4916 sge_print_matrix(b, m, n, ldb, order_type, "B");
4917 cge_print_matrix(c_gen, m, n, ldc, order_type,
4918 "C_gen");
4919 cge_print_matrix(c, m, n, ldc, order_type, "C");
4920 zge_print_matrix(head_r_true, m, n, ldc,
4921 order_type, "head_r_true");
4922
4923 printf("ratio = %g\n", ratio);
4924 }
4925 bad_ratio_count++;
4926 if (bad_ratio_count >= MAX_BAD_TESTS) {
4927 printf("\ntoo many failures, exiting....");
4928 printf("\nTesting and compilation");
4929 printf(" are incomplete\n\n");
4930 goto end;
4931 }
4932 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
4933 printf("\nFlagrant ratio %e, exiting...",
4934 ratio);
4935 printf("\nTesting and compilation");
4936 printf(" are incomplete\n\n");
4937 goto end;
4938 }
4939 }
4940
4941 if (ratio > ratio_max)
4942 ratio_max = ratio;
4943
4944 if (ratio != 0.0 && ratio < ratio_min)
4945 ratio_min = ratio;
4946
4947 } /* end of randomize loop */
4948
4949 } /* end of ldc loop */
4950
4951 } /* end of ldb loop */
4952
4953 } /* end of lda loop */
4954
4955 } /* end of side loop */
4956
4957 } /* end of uplo loop */
4958
4959 } /* end of order loop */
4960
4961 } /* end of nr test loop */
4962
4963 } /* end of norm loop */
4964
4965
4966 } /* end of prec loop */
4967
4968 } /* end of beta loop */
4969
4970 } /* end of alpha loop */
4971
4972 end:
4973 FPU_FIX_STOP;
4974
4975 blas_free(c);
4976 blas_free(a);
4977 blas_free(b);
4978 blas_free(c_gen);
4979 blas_free(head_r_true);
4980 blas_free(tail_r_true);
4981 blas_free(ratios);
4982 blas_free(a_vec);
4983 blas_free(b_vec);
4984
4985 *max_ratio = ratio_max;
4986 *min_ratio = ratio_min;
4987 *num_tests = test_count;
4988 *num_bad_ratio = bad_ratio_count;
4989
4990 }
do_test_zhemm_z_d_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)4991 void do_test_zhemm_z_d_x(int m, int n,
4992 int ntests, int *seed, double thresh, int debug,
4993 float test_prob, double *min_ratio,
4994 double *max_ratio, int *num_bad_ratio,
4995 int *num_tests)
4996 {
4997
4998 /* Function name */
4999 const char fname[] = "BLAS_zhemm_z_d_x";
5000
5001 int i, j;
5002 int ci, cij;
5003 int incci, inccij;
5004 int test_count;
5005 int bad_ratio_count;
5006
5007 int ri, rij;
5008 int incri, incrij;
5009 int inca, incb, incc;
5010
5011 double ratio;
5012
5013 double ratio_min, ratio_max;
5014
5015 double eps_int; /* internal machine epsilon */
5016 double un_int; /* internal underflow threshold */
5017
5018 double rin[2];
5019 double rout[2];
5020 double head_r_true_elem[2], tail_r_true_elem[2];
5021
5022 enum blas_order_type order_type;
5023 enum blas_side_type side_type;
5024 enum blas_uplo_type uplo_type;
5025 enum blas_prec_type prec;
5026
5027 int order_val, uplo_val, side_val;
5028 int lda_val, ldb_val, ldc_val;
5029 int alpha_val, beta_val;
5030 int randomize_val;
5031
5032 int prec_val;
5033
5034 int lda, ldb, ldc;
5035 int alpha_flag, beta_flag;
5036 int saved_seed;
5037 int norm;
5038 int test_no;
5039 int max_mn;
5040 int m_i, n_i;
5041
5042 double alpha[2];
5043 double beta[2];
5044 double *a;
5045 double *b;
5046 double *c;
5047 double *a_vec;
5048 double *b_vec;
5049
5050 /* generated test values for c */
5051 double *c_gen;
5052
5053 double *ratios;
5054
5055 /* true result calculated by testgen, in double-double */
5056 double *head_r_true, *tail_r_true;
5057
5058
5059 FPU_FIX_DECL;
5060
5061 if (n < 0 || m < 0 || ntests < 0)
5062 BLAS_error(fname, 0, 0, NULL);
5063
5064 /* initialization */
5065 saved_seed = *seed;
5066 ratio = 0.0;
5067 ratio_min = 1e308;
5068 ratio_max = 0.0;
5069
5070 *num_tests = 0;
5071 *num_bad_ratio = 0;
5072 *min_ratio = 0.0;
5073 *max_ratio = 0.0;
5074
5075 if (m == 0 || n == 0)
5076 return;
5077
5078 FPU_FIX_START;
5079
5080 max_mn = (m > n) ? m : n;
5081
5082 inca = incb = incc = 1;
5083 inca *= 2;
5084
5085 incc *= 2;
5086
5087 /* allocate memory for arrays */
5088 c = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
5089 if (2 * m * n > 0 && c == NULL) {
5090 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5091 }
5092 c_gen = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
5093 if (2 * m * n > 0 && c_gen == NULL) {
5094 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5095 }
5096 a = (double *) blas_malloc(2 * max_mn * max_mn * sizeof(double) * 2);
5097 if (2 * max_mn * max_mn > 0 && a == NULL) {
5098 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5099 }
5100 b = (double *) blas_malloc(2 * m * n * sizeof(double));
5101 if (2 * m * n > 0 && b == NULL) {
5102 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5103 }
5104 a_vec = (double *) blas_malloc(max_mn * sizeof(double) * 2);
5105 if (max_mn > 0 && a_vec == NULL) {
5106 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5107 }
5108 b_vec = (double *) blas_malloc(max_mn * sizeof(double));
5109 if (max_mn > 0 && b_vec == NULL) {
5110 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5111 }
5112 head_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
5113 tail_r_true = (double *) blas_malloc(2 * m * n * sizeof(double) * 2);
5114 if (2 * m * n > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
5115 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5116 }
5117 ratios = (double *) blas_malloc(2 * m * n * sizeof(double));
5118 if (2 * m * n > 0 && ratios == NULL) {
5119 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5120 }
5121
5122 test_count = 0;
5123 bad_ratio_count = 0;
5124
5125 /* vary alpha */
5126 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
5127
5128 alpha_flag = 0;
5129 switch (alpha_val) {
5130 case 0:
5131 alpha[0] = alpha[1] = 0.0;
5132 alpha_flag = 1;
5133 break;
5134 case 1:
5135 alpha[0] = 1.0;
5136 alpha[1] = 0.0;
5137 alpha_flag = 1;
5138 break;
5139 }
5140
5141 /* vary beta */
5142 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
5143 beta_flag = 0;
5144 switch (beta_val) {
5145 case 0:
5146 beta[0] = beta[1] = 0.0;
5147 beta_flag = 1;
5148 break;
5149 case 1:
5150 beta[0] = 1.0;
5151 beta[1] = 0.0;
5152 beta_flag = 1;
5153 break;
5154 }
5155
5156
5157 /* varying extra precs */
5158 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
5159 switch (prec_val) {
5160 case 0:
5161 eps_int = power(2, -BITS_D);
5162 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
5163 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
5164 prec = blas_prec_double;
5165 break;
5166 case 1:
5167 eps_int = power(2, -BITS_D);
5168 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
5169 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
5170 prec = blas_prec_double;
5171 break;
5172 case 2:
5173 default:
5174 eps_int = power(2, -BITS_E);
5175 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
5176 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
5177 prec = blas_prec_extra;
5178 break;
5179 }
5180
5181 /* vary norm -- underflow, approx 1, overflow */
5182 for (norm = NORM_START; norm <= NORM_END; norm++) {
5183
5184 /* number of tests */
5185 for (test_no = 0; test_no < ntests; test_no++) {
5186
5187 /* vary storage format */
5188 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
5189
5190 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
5191
5192 /* vary upper / lower variation */
5193 for (uplo_val = UPLO_START; uplo_val <= UPLO_END; uplo_val++) {
5194
5195 uplo_type = (uplo_val == 0) ? blas_upper : blas_lower;
5196
5197 /* vary transpositions of B */
5198 for (side_val = SIDE_START; side_val <= SIDE_END; side_val++) {
5199
5200 side_type = (side_val == 0) ?
5201 blas_left_side : blas_right_side;
5202
5203 if (side_type == blas_left_side) {
5204 m_i = m;
5205 n_i = n;
5206 } else {
5207 m_i = n;
5208 n_i = m;
5209 }
5210
5211 /* vary lda = k, k+1, 2*k */
5212 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
5213
5214 lda = (lda_val == 0) ? m_i :
5215 (lda_val == 1) ? m_i + 1 : 2 * m_i;
5216
5217 /* vary ldb = n, n+1, 2*n */
5218 for (ldb_val = LDB_START; ldb_val <= LDB_END; ldb_val++) {
5219
5220 if (order_type == blas_colmajor)
5221 ldb = (ldb_val == 0) ? m :
5222 (ldb_val == 1) ? m + 1 : 2 * m;
5223 else
5224 ldb = (ldb_val == 0) ? n :
5225 (ldb_val == 1) ? n + 1 : 2 * n;
5226
5227 /* vary ldc = k, k+1, 2*k */
5228 for (ldc_val = LDC_START; ldc_val <= LDC_END; ldc_val++) {
5229
5230 if (order_type == blas_colmajor)
5231 ldc = (ldc_val == 0) ? m :
5232 (ldc_val == 1) ? m + 1 : 2 * m;
5233 else
5234 ldc = (ldc_val == 0) ? n :
5235 (ldc_val == 1) ? n + 1 : 2 * n;
5236
5237 /* vary randomize = 0, 1 */
5238 for (randomize_val = RANDOMIZE_START;
5239 randomize_val <= RANDOMIZE_END;
5240 randomize_val++) {
5241
5242 /* For the sake of speed, we throw out this case at random */
5243 if (xrand(seed) >= test_prob)
5244 continue;
5245
5246 saved_seed = *seed;
5247
5248 /* finally we are here to generate the test case */
5249 BLAS_zhemm_z_d_testgen(norm, order_type,
5250 uplo_type, side_type, m, n,
5251 randomize_val, &alpha,
5252 alpha_flag, &beta, beta_flag,
5253 a, lda, b, ldb, c, ldc, seed,
5254 head_r_true, tail_r_true);
5255 test_count++;
5256
5257 /* copy generated C matrix since this will be
5258 over written */
5259 zge_copy_matrix(order_type, m, n, c_gen, ldc, c,
5260 ldc);
5261
5262 /* call hemm routines to be tested */
5263 FPU_FIX_STOP;
5264 BLAS_zhemm_z_d_x(order_type, side_type,
5265 uplo_type, m, n, alpha, a, lda, b,
5266 ldb, beta, c, ldc, prec);
5267 FPU_FIX_START;
5268
5269 /* now compute the ratio using test_BLAS_xdot */
5270 /* copy a row from A, a column from B, run
5271 dot test */
5272
5273 if ((order_type == blas_colmajor &&
5274 side_type == blas_left_side) ||
5275 (order_type == blas_rowmajor &&
5276 side_type == blas_right_side)) {
5277 incci = 1;
5278 inccij = ldc;
5279 } else {
5280 incci = ldc;
5281 inccij = 1;
5282 }
5283
5284 incri = incci;
5285 incrij = inccij;
5286 incci *= 2;
5287 inccij *= 2;
5288
5289 for (i = 0, ci = 0, ri = 0;
5290 i < m_i; i++, ci += incci, ri += incri) {
5291 zhe_copy_row(order_type, uplo_type, side_type,
5292 m_i, a, lda, a_vec, i);
5293 for (j = 0, cij = ci, rij = ri;
5294 j < n_i; j++, cij += inccij, rij += incrij) {
5295 /* copy i-th row of A and j-th col of B */
5296 if (side_type == blas_left_side)
5297 dge_copy_col(order_type, blas_no_trans,
5298 m, n, b, ldb, b_vec, j);
5299 else
5300 dge_copy_row(order_type, blas_no_trans,
5301 m, n, b, ldb, b_vec, j);
5302 rin[0] = c_gen[cij];
5303 rin[1] = c_gen[cij + 1];
5304 rout[0] = c[cij];
5305 rout[1] = c[cij + 1];
5306 head_r_true_elem[0] = head_r_true[cij];
5307 head_r_true_elem[1] = head_r_true[cij + 1];
5308 tail_r_true_elem[0] = tail_r_true[cij];
5309 tail_r_true_elem[1] = tail_r_true[cij + 1];
5310
5311 test_BLAS_zdot_z_d(m_i,
5312 blas_no_conj,
5313 alpha, beta, rin, rout,
5314 head_r_true_elem,
5315 tail_r_true_elem, a_vec, 1,
5316 b_vec, 1, eps_int, un_int,
5317 &ratios[rij]);
5318
5319 /* take the max ratio */
5320 if (rij == 0) {
5321 ratio = ratios[0];
5322 /* The !<= below causes NaN error to be detected.
5323 Note that (NaN > thresh) is always false. */
5324 } else if (!(ratios[rij] <= ratio)) {
5325 ratio = ratios[rij];
5326 }
5327
5328 }
5329 } /* end of dot-test loop */
5330
5331 /* Increase the number of bad ratio, if the ratio
5332 is bigger than the threshold.
5333 The !<= below causes NaN error to be detected.
5334 Note that (NaN > thresh) is always false. */
5335 if (!(ratio <= thresh)) {
5336
5337 if (debug == 3) {
5338 printf("Seed = %d\n", saved_seed);
5339 printf("m %d n %d\n", m, n);
5340 printf("LDA %d LDB %d LDC %d\n", lda, ldb,
5341 ldc);
5342
5343 if (order_type == blas_rowmajor)
5344 printf("row ");
5345 else
5346 printf("col ");
5347
5348 if (uplo_type == blas_upper)
5349 printf("upper ");
5350 else
5351 printf("lower");
5352
5353 if (side_type == blas_left_side)
5354 printf(" left\n");
5355 else
5356 printf(" right\n");
5357
5358 if (randomize_val == 1)
5359 printf("Randomized\n");
5360 else
5361 printf("Not Randomized\n");
5362
5363 printf("NORM %d, ALPHA %d, BETA %d\n",
5364 norm, alpha_val, beta_val);
5365
5366 /* print out info */
5367 printf("alpha = ");
5368 printf("(%24.16e, %24.16e)", alpha[0],
5369 alpha[1]);;
5370 printf(" ");
5371 printf("beta = ");
5372 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
5373 printf("\n");
5374
5375 printf("a\n");
5376 zhe_print_matrix(a, m_i, lda, order_type,
5377 uplo_type);
5378 dge_print_matrix(b, m, n, ldb, order_type, "B");
5379 zge_print_matrix(c_gen, m, n, ldc, order_type,
5380 "C_gen");
5381 zge_print_matrix(c, m, n, ldc, order_type, "C");
5382 zge_print_matrix(head_r_true, m, n, ldc,
5383 order_type, "head_r_true");
5384
5385 printf("ratio = %g\n", ratio);
5386 }
5387 bad_ratio_count++;
5388 if (bad_ratio_count >= MAX_BAD_TESTS) {
5389 printf("\ntoo many failures, exiting....");
5390 printf("\nTesting and compilation");
5391 printf(" are incomplete\n\n");
5392 goto end;
5393 }
5394 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
5395 printf("\nFlagrant ratio %e, exiting...",
5396 ratio);
5397 printf("\nTesting and compilation");
5398 printf(" are incomplete\n\n");
5399 goto end;
5400 }
5401 }
5402
5403 if (ratio > ratio_max)
5404 ratio_max = ratio;
5405
5406 if (ratio != 0.0 && ratio < ratio_min)
5407 ratio_min = ratio;
5408
5409 } /* end of randomize loop */
5410
5411 } /* end of ldc loop */
5412
5413 } /* end of ldb loop */
5414
5415 } /* end of lda loop */
5416
5417 } /* end of side loop */
5418
5419 } /* end of uplo loop */
5420
5421 } /* end of order loop */
5422
5423 } /* end of nr test loop */
5424
5425 } /* end of norm loop */
5426
5427
5428 } /* end of prec loop */
5429
5430 } /* end of beta loop */
5431
5432 } /* end of alpha loop */
5433
5434 end:
5435 FPU_FIX_STOP;
5436
5437 blas_free(c);
5438 blas_free(a);
5439 blas_free(b);
5440 blas_free(c_gen);
5441 blas_free(head_r_true);
5442 blas_free(tail_r_true);
5443 blas_free(ratios);
5444 blas_free(a_vec);
5445 blas_free(b_vec);
5446
5447 *max_ratio = ratio_max;
5448 *min_ratio = ratio_min;
5449 *num_tests = test_count;
5450 *num_bad_ratio = bad_ratio_count;
5451
5452 }
5453
main(int argc,char ** argv)5454 int main(int argc, char **argv)
5455 {
5456 int nsizes, ntests, debug;
5457 double thresh, test_prob;
5458 double total_min_ratio, total_max_ratio;
5459 int total_bad_ratios;
5460 int seed, num_bad_ratio, num_tests;
5461 int total_tests, nr_failed_routines = 0, nr_routines = 0;
5462 double min_ratio, max_ratio;
5463 const char *base_routine = "hemm";
5464 char *fname;
5465 int n;
5466
5467 int m, i;
5468 int mn_data[NUM_DATA][2] = { {4, 4}, {2, 3}, {4, 2}, {8, 8},
5469 {10, 1}, {1, 1}, {1, 7}
5470 };
5471
5472 if (argc != 6) {
5473 printf("Usage:\n");
5474 printf("do_test_hemm <nsizes> <ntests> <thresh> <debug> <test_prob>\n");
5475 printf(" <nsizes>: number of sizes to be run.\n");
5476 printf
5477 (" <ntests>: the number of tests performed for each set of attributes\n");
5478 printf
5479 (" <thresh>: to catch bad ratios if it is greater than <thresh>\n");
5480 printf(" <debug>: 0, 1, 2, or 3; \n");
5481 printf(" if 0, no printing \n");
5482 printf(" if 1, print error summary only if tests fail\n");
5483 printf(" if 2, print error summary for each n\n");
5484 printf(" if 3, print complete info each test fails \n");
5485 printf("<test_prob>: probability of preforming a given \n");
5486 printf(" test case: 0.0 does no tests, 1.0 does all tests\n");
5487 return -1;
5488 } else {
5489 nsizes = atoi(argv[1]);
5490 ntests = atoi(argv[2]);
5491 thresh = atof(argv[3]);
5492 debug = atoi(argv[4]);
5493 test_prob = atof(argv[5]);
5494 }
5495
5496 seed = 1999;
5497
5498 if (nsizes < 0 || ntests < 0 || debug < 0 || debug > 3)
5499 BLAS_error("Testing hemm", 0, 0, NULL);
5500
5501 printf("Testing %s...\n", base_routine);
5502 printf("INPUT: nsizes = %d, ntests = %d, thresh = %4.2f, debug = %d\n\n",
5503 nsizes, ntests, thresh, debug);
5504
5505
5506
5507
5508
5509 fname = "BLAS_zhemm_z_c";
5510 printf("Testing %s...\n", fname);
5511 total_tests = 0;
5512 total_bad_ratios = 0;
5513 total_min_ratio = 1e308;
5514 total_max_ratio = 0.0;
5515 for (i = 0; i < nsizes; i++) {
5516 m = mn_data[i][0];
5517 n = mn_data[i][1];
5518
5519 do_test_zhemm_z_c(m, n, ntests, &seed, thresh, debug,
5520 test_prob, &min_ratio, &max_ratio, &num_bad_ratio,
5521 &num_tests);
5522
5523 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
5524 printf(" [%d %d]: ", m, n);
5525 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
5526 num_bad_ratio, num_tests, min_ratio, max_ratio);
5527 }
5528
5529 total_tests += num_tests;
5530 total_bad_ratios += num_bad_ratio;
5531 if (total_min_ratio > min_ratio)
5532 total_min_ratio = min_ratio;
5533 if (total_max_ratio < max_ratio)
5534 total_max_ratio = max_ratio;
5535 }
5536
5537 nr_routines++;
5538 if (total_bad_ratios == 0)
5539 printf("PASS> ");
5540 else {
5541 printf("FAIL> ");
5542 nr_failed_routines++;
5543 }
5544 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
5545 fname, total_bad_ratios, total_tests, max_ratio);
5546
5547 fname = "BLAS_zhemm_c_z";
5548 printf("Testing %s...\n", fname);
5549 total_tests = 0;
5550 total_bad_ratios = 0;
5551 total_min_ratio = 1e308;
5552 total_max_ratio = 0.0;
5553 for (i = 0; i < nsizes; i++) {
5554 m = mn_data[i][0];
5555 n = mn_data[i][1];
5556
5557 do_test_zhemm_c_z(m, n, ntests, &seed, thresh, debug,
5558 test_prob, &min_ratio, &max_ratio, &num_bad_ratio,
5559 &num_tests);
5560
5561 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
5562 printf(" [%d %d]: ", m, n);
5563 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
5564 num_bad_ratio, num_tests, min_ratio, max_ratio);
5565 }
5566
5567 total_tests += num_tests;
5568 total_bad_ratios += num_bad_ratio;
5569 if (total_min_ratio > min_ratio)
5570 total_min_ratio = min_ratio;
5571 if (total_max_ratio < max_ratio)
5572 total_max_ratio = max_ratio;
5573 }
5574
5575 nr_routines++;
5576 if (total_bad_ratios == 0)
5577 printf("PASS> ");
5578 else {
5579 printf("FAIL> ");
5580 nr_failed_routines++;
5581 }
5582 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
5583 fname, total_bad_ratios, total_tests, max_ratio);
5584
5585 fname = "BLAS_zhemm_c_c";
5586 printf("Testing %s...\n", fname);
5587 total_tests = 0;
5588 total_bad_ratios = 0;
5589 total_min_ratio = 1e308;
5590 total_max_ratio = 0.0;
5591 for (i = 0; i < nsizes; i++) {
5592 m = mn_data[i][0];
5593 n = mn_data[i][1];
5594
5595 do_test_zhemm_c_c(m, n, ntests, &seed, thresh, debug,
5596 test_prob, &min_ratio, &max_ratio, &num_bad_ratio,
5597 &num_tests);
5598
5599 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
5600 printf(" [%d %d]: ", m, n);
5601 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
5602 num_bad_ratio, num_tests, min_ratio, max_ratio);
5603 }
5604
5605 total_tests += num_tests;
5606 total_bad_ratios += num_bad_ratio;
5607 if (total_min_ratio > min_ratio)
5608 total_min_ratio = min_ratio;
5609 if (total_max_ratio < max_ratio)
5610 total_max_ratio = max_ratio;
5611 }
5612
5613 nr_routines++;
5614 if (total_bad_ratios == 0)
5615 printf("PASS> ");
5616 else {
5617 printf("FAIL> ");
5618 nr_failed_routines++;
5619 }
5620 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
5621 fname, total_bad_ratios, total_tests, max_ratio);
5622
5623 fname = "BLAS_chemm_c_s";
5624 printf("Testing %s...\n", fname);
5625 total_tests = 0;
5626 total_bad_ratios = 0;
5627 total_min_ratio = 1e308;
5628 total_max_ratio = 0.0;
5629 for (i = 0; i < nsizes; i++) {
5630 m = mn_data[i][0];
5631 n = mn_data[i][1];
5632
5633 do_test_chemm_c_s(m, n, ntests, &seed, thresh, debug,
5634 test_prob, &min_ratio, &max_ratio, &num_bad_ratio,
5635 &num_tests);
5636
5637 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
5638 printf(" [%d %d]: ", m, n);
5639 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
5640 num_bad_ratio, num_tests, min_ratio, max_ratio);
5641 }
5642
5643 total_tests += num_tests;
5644 total_bad_ratios += num_bad_ratio;
5645 if (total_min_ratio > min_ratio)
5646 total_min_ratio = min_ratio;
5647 if (total_max_ratio < max_ratio)
5648 total_max_ratio = max_ratio;
5649 }
5650
5651 nr_routines++;
5652 if (total_bad_ratios == 0)
5653 printf("PASS> ");
5654 else {
5655 printf("FAIL> ");
5656 nr_failed_routines++;
5657 }
5658 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
5659 fname, total_bad_ratios, total_tests, max_ratio);
5660
5661 fname = "BLAS_zhemm_z_d";
5662 printf("Testing %s...\n", fname);
5663 total_tests = 0;
5664 total_bad_ratios = 0;
5665 total_min_ratio = 1e308;
5666 total_max_ratio = 0.0;
5667 for (i = 0; i < nsizes; i++) {
5668 m = mn_data[i][0];
5669 n = mn_data[i][1];
5670
5671 do_test_zhemm_z_d(m, n, ntests, &seed, thresh, debug,
5672 test_prob, &min_ratio, &max_ratio, &num_bad_ratio,
5673 &num_tests);
5674
5675 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
5676 printf(" [%d %d]: ", m, n);
5677 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
5678 num_bad_ratio, num_tests, min_ratio, max_ratio);
5679 }
5680
5681 total_tests += num_tests;
5682 total_bad_ratios += num_bad_ratio;
5683 if (total_min_ratio > min_ratio)
5684 total_min_ratio = min_ratio;
5685 if (total_max_ratio < max_ratio)
5686 total_max_ratio = max_ratio;
5687 }
5688
5689 nr_routines++;
5690 if (total_bad_ratios == 0)
5691 printf("PASS> ");
5692 else {
5693 printf("FAIL> ");
5694 nr_failed_routines++;
5695 }
5696 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
5697 fname, total_bad_ratios, total_tests, max_ratio);
5698
5699 fname = "BLAS_chemm_x";
5700 printf("Testing %s...\n", fname);
5701 total_tests = 0;
5702 total_bad_ratios = 0;
5703 total_min_ratio = 1e308;
5704 total_max_ratio = 0.0;
5705 for (i = 0; i < nsizes; i++) {
5706 m = mn_data[i][0];
5707 n = mn_data[i][1];
5708
5709 do_test_chemm_x(m, n, ntests, &seed, thresh, debug,
5710 test_prob, &min_ratio, &max_ratio, &num_bad_ratio,
5711 &num_tests);
5712
5713 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
5714 printf(" [%d %d]: ", m, n);
5715 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
5716 num_bad_ratio, num_tests, min_ratio, max_ratio);
5717 }
5718
5719 total_tests += num_tests;
5720 total_bad_ratios += num_bad_ratio;
5721 if (total_min_ratio > min_ratio)
5722 total_min_ratio = min_ratio;
5723 if (total_max_ratio < max_ratio)
5724 total_max_ratio = max_ratio;
5725 }
5726
5727 nr_routines++;
5728 if (total_bad_ratios == 0)
5729 printf("PASS> ");
5730 else {
5731 printf("FAIL> ");
5732 nr_failed_routines++;
5733 }
5734 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
5735 fname, total_bad_ratios, total_tests, max_ratio);
5736
5737 fname = "BLAS_zhemm_x";
5738 printf("Testing %s...\n", fname);
5739 total_tests = 0;
5740 total_bad_ratios = 0;
5741 total_min_ratio = 1e308;
5742 total_max_ratio = 0.0;
5743 for (i = 0; i < nsizes; i++) {
5744 m = mn_data[i][0];
5745 n = mn_data[i][1];
5746
5747 do_test_zhemm_x(m, n, ntests, &seed, thresh, debug,
5748 test_prob, &min_ratio, &max_ratio, &num_bad_ratio,
5749 &num_tests);
5750
5751 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
5752 printf(" [%d %d]: ", m, n);
5753 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
5754 num_bad_ratio, num_tests, min_ratio, max_ratio);
5755 }
5756
5757 total_tests += num_tests;
5758 total_bad_ratios += num_bad_ratio;
5759 if (total_min_ratio > min_ratio)
5760 total_min_ratio = min_ratio;
5761 if (total_max_ratio < max_ratio)
5762 total_max_ratio = max_ratio;
5763 }
5764
5765 nr_routines++;
5766 if (total_bad_ratios == 0)
5767 printf("PASS> ");
5768 else {
5769 printf("FAIL> ");
5770 nr_failed_routines++;
5771 }
5772 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
5773 fname, total_bad_ratios, total_tests, max_ratio);
5774
5775 fname = "BLAS_zhemm_z_c_x";
5776 printf("Testing %s...\n", fname);
5777 total_tests = 0;
5778 total_bad_ratios = 0;
5779 total_min_ratio = 1e308;
5780 total_max_ratio = 0.0;
5781 for (i = 0; i < nsizes; i++) {
5782 m = mn_data[i][0];
5783 n = mn_data[i][1];
5784
5785 do_test_zhemm_z_c_x(m, n, ntests, &seed, thresh, debug,
5786 test_prob, &min_ratio, &max_ratio, &num_bad_ratio,
5787 &num_tests);
5788
5789 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
5790 printf(" [%d %d]: ", m, n);
5791 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
5792 num_bad_ratio, num_tests, min_ratio, max_ratio);
5793 }
5794
5795 total_tests += num_tests;
5796 total_bad_ratios += num_bad_ratio;
5797 if (total_min_ratio > min_ratio)
5798 total_min_ratio = min_ratio;
5799 if (total_max_ratio < max_ratio)
5800 total_max_ratio = max_ratio;
5801 }
5802
5803 nr_routines++;
5804 if (total_bad_ratios == 0)
5805 printf("PASS> ");
5806 else {
5807 printf("FAIL> ");
5808 nr_failed_routines++;
5809 }
5810 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
5811 fname, total_bad_ratios, total_tests, max_ratio);
5812
5813 fname = "BLAS_zhemm_c_z_x";
5814 printf("Testing %s...\n", fname);
5815 total_tests = 0;
5816 total_bad_ratios = 0;
5817 total_min_ratio = 1e308;
5818 total_max_ratio = 0.0;
5819 for (i = 0; i < nsizes; i++) {
5820 m = mn_data[i][0];
5821 n = mn_data[i][1];
5822
5823 do_test_zhemm_c_z_x(m, n, ntests, &seed, thresh, debug,
5824 test_prob, &min_ratio, &max_ratio, &num_bad_ratio,
5825 &num_tests);
5826
5827 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
5828 printf(" [%d %d]: ", m, n);
5829 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
5830 num_bad_ratio, num_tests, min_ratio, max_ratio);
5831 }
5832
5833 total_tests += num_tests;
5834 total_bad_ratios += num_bad_ratio;
5835 if (total_min_ratio > min_ratio)
5836 total_min_ratio = min_ratio;
5837 if (total_max_ratio < max_ratio)
5838 total_max_ratio = max_ratio;
5839 }
5840
5841 nr_routines++;
5842 if (total_bad_ratios == 0)
5843 printf("PASS> ");
5844 else {
5845 printf("FAIL> ");
5846 nr_failed_routines++;
5847 }
5848 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
5849 fname, total_bad_ratios, total_tests, max_ratio);
5850
5851 fname = "BLAS_zhemm_c_c_x";
5852 printf("Testing %s...\n", fname);
5853 total_tests = 0;
5854 total_bad_ratios = 0;
5855 total_min_ratio = 1e308;
5856 total_max_ratio = 0.0;
5857 for (i = 0; i < nsizes; i++) {
5858 m = mn_data[i][0];
5859 n = mn_data[i][1];
5860
5861 do_test_zhemm_c_c_x(m, n, ntests, &seed, thresh, debug,
5862 test_prob, &min_ratio, &max_ratio, &num_bad_ratio,
5863 &num_tests);
5864
5865 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
5866 printf(" [%d %d]: ", m, n);
5867 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
5868 num_bad_ratio, num_tests, min_ratio, max_ratio);
5869 }
5870
5871 total_tests += num_tests;
5872 total_bad_ratios += num_bad_ratio;
5873 if (total_min_ratio > min_ratio)
5874 total_min_ratio = min_ratio;
5875 if (total_max_ratio < max_ratio)
5876 total_max_ratio = max_ratio;
5877 }
5878
5879 nr_routines++;
5880 if (total_bad_ratios == 0)
5881 printf("PASS> ");
5882 else {
5883 printf("FAIL> ");
5884 nr_failed_routines++;
5885 }
5886 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
5887 fname, total_bad_ratios, total_tests, max_ratio);
5888
5889 fname = "BLAS_chemm_c_s_x";
5890 printf("Testing %s...\n", fname);
5891 total_tests = 0;
5892 total_bad_ratios = 0;
5893 total_min_ratio = 1e308;
5894 total_max_ratio = 0.0;
5895 for (i = 0; i < nsizes; i++) {
5896 m = mn_data[i][0];
5897 n = mn_data[i][1];
5898
5899 do_test_chemm_c_s_x(m, n, ntests, &seed, thresh, debug,
5900 test_prob, &min_ratio, &max_ratio, &num_bad_ratio,
5901 &num_tests);
5902
5903 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
5904 printf(" [%d %d]: ", m, n);
5905 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
5906 num_bad_ratio, num_tests, min_ratio, max_ratio);
5907 }
5908
5909 total_tests += num_tests;
5910 total_bad_ratios += num_bad_ratio;
5911 if (total_min_ratio > min_ratio)
5912 total_min_ratio = min_ratio;
5913 if (total_max_ratio < max_ratio)
5914 total_max_ratio = max_ratio;
5915 }
5916
5917 nr_routines++;
5918 if (total_bad_ratios == 0)
5919 printf("PASS> ");
5920 else {
5921 printf("FAIL> ");
5922 nr_failed_routines++;
5923 }
5924 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
5925 fname, total_bad_ratios, total_tests, max_ratio);
5926
5927 fname = "BLAS_zhemm_z_d_x";
5928 printf("Testing %s...\n", fname);
5929 total_tests = 0;
5930 total_bad_ratios = 0;
5931 total_min_ratio = 1e308;
5932 total_max_ratio = 0.0;
5933 for (i = 0; i < nsizes; i++) {
5934 m = mn_data[i][0];
5935 n = mn_data[i][1];
5936
5937 do_test_zhemm_z_d_x(m, n, ntests, &seed, thresh, debug,
5938 test_prob, &min_ratio, &max_ratio, &num_bad_ratio,
5939 &num_tests);
5940
5941 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
5942 printf(" [%d %d]: ", m, n);
5943 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
5944 num_bad_ratio, num_tests, min_ratio, max_ratio);
5945 }
5946
5947 total_tests += num_tests;
5948 total_bad_ratios += num_bad_ratio;
5949 if (total_min_ratio > min_ratio)
5950 total_min_ratio = min_ratio;
5951 if (total_max_ratio < max_ratio)
5952 total_max_ratio = max_ratio;
5953 }
5954
5955 nr_routines++;
5956 if (total_bad_ratios == 0)
5957 printf("PASS> ");
5958 else {
5959 printf("FAIL> ");
5960 nr_failed_routines++;
5961 }
5962 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
5963 fname, total_bad_ratios, total_tests, max_ratio);
5964
5965
5966
5967 printf("\n");
5968 if (nr_failed_routines)
5969 printf("FAILED ");
5970 else
5971 printf("PASSED ");
5972 printf("%-10s: FAIL/TOTAL = %d/%d\n",
5973 base_routine, nr_failed_routines, nr_routines);
5974
5975 return 0;
5976 }
5977