1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <math.h>
4 #include "blas_extended.h"
5 #include "blas_extended_private.h"
6 #include "blas_extended_test.h"
7
8 /* 0 -- 1 */
9 #define ORDER_START 0
10 #define ORDER_END 1
11
12 /* 0 -- 2 */
13 #define ALPHA_START 0
14 #define ALPHA_END 2
15
16 /* 0 -- 2 */
17 #define BETA_START 0
18 #define BETA_END 2
19
20 /* -1 -- 1 */
21 #define NORM_START -1
22 #define NORM_END 1
23
24 /* 0 -- 2 */
25 #define LDA_START 0
26 #define LDA_END 2
27
28 /* 0 -- 2 */
29 #define PREC_START 0
30 #define PREC_END 2
31
32 /* 0 -- 1 */
33 #define RANDOMIZE_START 0
34 #define RANDOMIZE_END 1
35
36 /* -2 -- 2 (Stride) */
37 #define INCX_START -2
38 #define INCX_END 2
39
40 /* -2 -- 2 (Stride) */
41 #define INCY_START -2
42 #define INCY_END 2
43
44 #define NUM_DATA 7
45
46
47
48
49
50
51
52
53
54
do_test_dge_sum_mv_d_s(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)55 void do_test_dge_sum_mv_d_s
56 (int m, int n,
57 int ntests, int *seed, double thresh, int debug, float test_prob,
58 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
59
60 /* Function name */
61 const char fname[] = "BLAS_dge_sum_mv_d_s";
62
63 int i;
64 int yi;
65 int incyi, y_starti, incx_veci;
66 int test_count;
67 int bad_ratio_count;
68
69 int ri;
70 int incri;
71 int inca, incx, incy;
72
73 double ratio;
74
75 double ratio_min, ratio_max;
76
77 double eps_int; /* internal machine epsilon */
78 double un_int; /* internal underflow threshold */
79
80 double rin;
81 double rout;
82 double head_r_true_elem, tail_r_true_elem;
83
84 enum blas_order_type order_type;
85 enum blas_prec_type prec;
86
87 int order_val;
88 int lda_val, incx_val, incy_val;
89 int ldb_val;
90 int alpha_val, beta_val;
91 int randomize_val;
92
93
94
95 int lda, ldb;
96 int alpha_flag, beta_flag;
97 int saved_seed;
98 int norm;
99 int test_no;
100
101 int n_i, m_i;
102 int inca_veci;
103
104 double alpha;
105 double beta;
106 double beta_zero_fake;
107 double alpha_use;
108 double *a;
109 double *a_use;
110 double *B;
111 double *B_use;
112 float *x;
113 double *y;
114 double *a_vec;
115 float *x_vec;
116
117
118 double *ratios;
119
120 /* true result calculated by testgen, in double-double */
121 double *head_r_true, *tail_r_true;
122
123 FPU_FIX_DECL;
124
125 beta_zero_fake = 0.0;
126
127 if (n < 0 || ntests < 0)
128 BLAS_error(fname, -3, n, NULL);
129
130 /* initialization */
131 saved_seed = *seed;
132 ratio = 0.0;
133 ratio_min = 1e308;
134 ratio_max = 0.0;
135
136 *num_tests = 0;
137 *num_bad_ratio = 0;
138 *min_ratio = 0.0;
139 *max_ratio = 0.0;
140
141 if (n == 0)
142 return;
143
144 FPU_FIX_START;
145
146 n_i = n;
147 m_i = m;
148
149 inca = incx = incy = 1;
150
151
152
153
154 /* allocate memory for arrays */
155 y = (double *) blas_malloc(4 * m_i * sizeof(double));
156 if (4 * m_i > 0 && y == NULL) {
157 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
158 }
159 a = (double *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(double));
160 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
161 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
162 }
163 a_use = (double *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(double));
164 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
165 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
166 }
167 B = (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double));
168 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
169 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
170 }
171 B_use = (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double));
172 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
173 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
174 }
175 x = (float *) blas_malloc(4 * n_i * sizeof(float));
176 if (4 * n_i > 0 && x == NULL) {
177 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
178 }
179
180 inca_veci = 1;
181
182 a_vec = (double *) blas_malloc(2 * n_i * sizeof(double));
183 if (2 * n_i > 0 && a_vec == NULL) {
184 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
185 }
186 x_vec = (float *) blas_malloc(2 * n_i * sizeof(float));
187 if (2 * n_i > 0 && x_vec == NULL) {
188 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
189 }
190 head_r_true = (double *) blas_malloc(m_i * sizeof(double));
191 tail_r_true = (double *) blas_malloc(m_i * sizeof(double));
192 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
193 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
194 }
195 ratios = (double *) blas_malloc(m_i * sizeof(double));
196 if (m_i > 0 && ratios == NULL) {
197 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
198 }
199
200 test_count = 0;
201 bad_ratio_count = 0;
202
203 /* vary alpha */
204 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
205
206 alpha_flag = 0;
207 switch (alpha_val) {
208 case 0:
209 alpha = 0.0;
210 alpha_flag = 1;
211 break;
212 case 1:
213 alpha = 1.0;
214 alpha_flag = 1;
215 break;
216 }
217
218 /* vary beta */
219 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
220 beta_flag = 0;
221 switch (beta_val) {
222 case 0:
223 beta = 0.0;
224 beta_flag = 1;
225 break;
226 case 1:
227 beta = 1.0;
228 beta_flag = 1;
229 break;
230 }
231
232
233 eps_int = power(2, -BITS_D);
234 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
235 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
236 prec = blas_prec_double;
237
238 /* vary norm -- underflow, approx 1, overflow */
239 for (norm = NORM_START; norm <= NORM_END; norm++) {
240
241 /* number of tests */
242 for (test_no = 0; test_no < ntests; test_no++) {
243
244
245 /* vary storage format */
246 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
247
248 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
249
250 /* vary lda = n_i, n_i+1, 2*n_i */
251 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
252
253 if (order_type == blas_rowmajor) {
254 lda = (lda_val == 0) ? n_i :
255 (lda_val == 1) ? n_i + 1 : n_i * n_i;
256 } else {
257 lda = (lda_val == 0) ? m_i :
258 (lda_val == 1) ? m_i + 1 : m_i * m_i;
259 }
260
261 /* vary ldb = n_i, n_i+1, 2*n_i */
262 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
263
264 if (order_type == blas_rowmajor) {
265 ldb = (ldb_val == 0) ? n_i :
266 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
267 } else {
268 ldb = (ldb_val == 0) ? m_i :
269 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
270 }
271
272 for (randomize_val = RANDOMIZE_START;
273 randomize_val <= RANDOMIZE_END; randomize_val++) {
274
275 /* For the sake of speed, we throw out this case at random */
276 if (xrand(seed) >= test_prob)
277 continue;
278
279 /* finally we are here to generate the test case */
280 /* alpha_use, a_use, B_use are the generated alpha, a, B
281 * before any scaling.
282 * That is, in the generator, alpha == beta == alpha_use
283 * before scaling. */
284
285 saved_seed = *seed;
286 BLAS_dge_sum_mv_d_s_testgen(norm, order_type,
287 m, n, randomize_val, &alpha,
288 alpha_flag, &beta, beta_flag, a,
289 lda, B, ldb, x_vec, 1,
290 &alpha_use, a_use, B_use, seed,
291 head_r_true, tail_r_true);
292
293 /* vary incx = 1, 2 */
294 for (incx_val = INCX_START; incx_val <= INCX_END;
295 incx_val++) {
296
297 incx = incx_val;
298 if (0 == incx)
299 continue;
300
301 scopy_vector(x_vec, n_i, 1, x, incx);
302
303 /* vary incy = 1, 2 */
304 for (incy_val = INCY_START; incy_val <= INCY_END;
305 incy_val++) {
306
307 incy = incy_val;
308 if (0 == incy)
309 continue;
310
311 test_count++;
312
313 /* call ge_sum_mv routines to be tested */
314 FPU_FIX_STOP;
315 BLAS_dge_sum_mv_d_s(order_type,
316 m, n, alpha, a, lda, x, incx, beta,
317 B, ldb, y, incy);
318 FPU_FIX_START;
319
320 /* now compute the ratio using test_BLAS_xdot */
321 /* copy a row from A, use x, run
322 dot test */
323
324 incyi = incy;
325
326 incri = 1;
327 incx_veci = 1;
328
329
330
331 if (incy < 0) {
332 y_starti = (-m_i + 1) * incyi;
333 } else {
334 y_starti = 0;
335 }
336 /* make two copies of x into x_vec. redundant */
337 scopy_vector(x, n_i, incx, x_vec, 1);
338 scopy_vector(x, n_i, incx, (x_vec + (n_i * incx_veci)),
339 1);
340 for (i = 0, yi = y_starti, ri = 0; i < m_i;
341 i++, yi += incyi, ri += incri) {
342 dge_copy_row(order_type, blas_no_trans, m_i, n_i,
343 a_use, lda, a_vec, i);
344 dge_copy_row(order_type, blas_no_trans, m_i, n_i,
345 B_use, ldb, (a_vec + inca_veci * n_i),
346 i);
347
348 rin = 0.0;
349 rout = y[yi];
350 head_r_true_elem = head_r_true[ri];
351 tail_r_true_elem = tail_r_true[ri];
352
353 test_BLAS_ddot_d_s(2 * n_i,
354 blas_no_conj,
355 alpha_use, beta_zero_fake, rin,
356 rout, head_r_true_elem,
357 tail_r_true_elem, a_vec, 1, x_vec,
358 1, eps_int, un_int, &ratios[i]);
359
360 /* take the max ratio */
361 if (i == 0) {
362 ratio = ratios[0];
363 /* The !<= below causes NaN errors
364 * to be included.
365 * Note that (NaN > 0) is false */
366 } else if (!(ratios[i] <= ratio)) {
367 ratio = ratios[i];
368 }
369 } /* end of dot-test loop */
370
371 /* The !<= below causes NaN errors
372 * to be included.
373 * Note that (NaN > 0) is false */
374 if (!(ratio <= thresh)) {
375
376 if (debug == 3) {
377 printf("\n\t\tTest # %d\n", test_count);
378 printf("y type : d, a type : d, x type : s\n");
379 printf("Seed = %d\t", saved_seed);
380 printf("n %d, m %d\n", n, m);
381 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
382 ldb, incx, incx);
383
384 if (order_type == blas_rowmajor)
385 printf("row ");
386 else
387 printf("col ");
388
389 printf("NORM %d, ALPHA %d, BETA %d\n",
390 norm, alpha_val, beta_val);
391 printf("randomize %d\n", randomize_val);
392
393 /* print out info */
394 printf("alpha = ");
395 printf("%24.16e", alpha);;
396 printf(" ");
397 printf("beta = ");
398 printf("%24.16e", beta);;
399 printf("\n");
400 printf("alpha_use = ");
401 printf("%24.16e", alpha_use);;
402 printf("\n");
403
404 dge_print_matrix(a, m_i, n_i, lda, order_type, "A");
405 dge_print_matrix(B, m_i, n_i, ldb, order_type, "B");
406 sprint_vector(x, n_i, incx, "x");
407
408 dprint_vector(y, m_i, incy, "y");
409
410 dprint_vector(head_r_true, m_i, 1, "head_r_true");
411
412 dge_print_matrix(a_use, m_i, n_i, lda, order_type,
413 "A_use");
414 dge_print_matrix(B_use, m_i, n_i, ldb, order_type,
415 "B_use");
416
417 dprint_vector(ratios, m_i, 1, "ratios");
418 printf("ratio = %g\n", ratio);
419 fflush(stdout);
420 }
421 bad_ratio_count++;
422 if (bad_ratio_count >= MAX_BAD_TESTS) {
423 printf("\ntoo many failures, exiting....");
424 printf("\nTesting and compilation");
425 printf(" are incomplete\n\n");
426 goto end;
427 }
428 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
429 printf("\nFlagrant ratio error, exiting...");
430 printf("\nTesting and compilation");
431 printf(" are incomplete\n\n");
432 goto end;
433 }
434 }
435
436 if (!(ratio <= ratio_max))
437 ratio_max = ratio;
438
439 if (ratio != 0.0 && !(ratio >= ratio_min))
440 ratio_min = ratio;
441
442 } /* end of incy loop */
443
444 } /* end of incx loop */
445
446 } /* end of randmize loop */
447
448 } /* end of ldb loop */
449
450 } /* end of lda loop */
451
452 } /* end of order loop */
453
454 } /* end of nr test loop */
455
456 } /* end of norm loop */
457
458
459
460 } /* end of beta loop */
461
462 } /* end of alpha loop */
463
464 FPU_FIX_STOP;
465
466 end:
467 blas_free(y);
468 blas_free(a);
469 blas_free(a_use);
470 blas_free(B);
471 blas_free(B_use);
472 blas_free(x);
473 blas_free(head_r_true);
474 blas_free(tail_r_true);
475 blas_free(ratios);
476 blas_free(a_vec);
477 blas_free(x_vec);
478
479 *max_ratio = ratio_max;
480 *min_ratio = ratio_min;
481 *num_tests = test_count;
482 *num_bad_ratio = bad_ratio_count;
483
484 }
do_test_dge_sum_mv_s_d(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)485 void do_test_dge_sum_mv_s_d
486 (int m, int n,
487 int ntests, int *seed, double thresh, int debug, float test_prob,
488 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
489
490 /* Function name */
491 const char fname[] = "BLAS_dge_sum_mv_s_d";
492
493 int i;
494 int yi;
495 int incyi, y_starti, incx_veci;
496 int test_count;
497 int bad_ratio_count;
498
499 int ri;
500 int incri;
501 int inca, incx, incy;
502
503 double ratio;
504
505 double ratio_min, ratio_max;
506
507 double eps_int; /* internal machine epsilon */
508 double un_int; /* internal underflow threshold */
509
510 double rin;
511 double rout;
512 double head_r_true_elem, tail_r_true_elem;
513
514 enum blas_order_type order_type;
515 enum blas_prec_type prec;
516
517 int order_val;
518 int lda_val, incx_val, incy_val;
519 int ldb_val;
520 int alpha_val, beta_val;
521 int randomize_val;
522
523
524
525 int lda, ldb;
526 int alpha_flag, beta_flag;
527 int saved_seed;
528 int norm;
529 int test_no;
530
531 int n_i, m_i;
532 int inca_veci;
533
534 double alpha;
535 double beta;
536 double beta_zero_fake;
537 double alpha_use;
538 float *a;
539 float *a_use;
540 float *B;
541 float *B_use;
542 double *x;
543 double *y;
544 float *a_vec;
545 double *x_vec;
546
547
548 double *ratios;
549
550 /* true result calculated by testgen, in double-double */
551 double *head_r_true, *tail_r_true;
552
553 FPU_FIX_DECL;
554
555 beta_zero_fake = 0.0;
556
557 if (n < 0 || ntests < 0)
558 BLAS_error(fname, -3, n, NULL);
559
560 /* initialization */
561 saved_seed = *seed;
562 ratio = 0.0;
563 ratio_min = 1e308;
564 ratio_max = 0.0;
565
566 *num_tests = 0;
567 *num_bad_ratio = 0;
568 *min_ratio = 0.0;
569 *max_ratio = 0.0;
570
571 if (n == 0)
572 return;
573
574 FPU_FIX_START;
575
576 n_i = n;
577 m_i = m;
578
579 inca = incx = incy = 1;
580
581
582
583
584 /* allocate memory for arrays */
585 y = (double *) blas_malloc(4 * m_i * sizeof(double));
586 if (4 * m_i > 0 && y == NULL) {
587 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
588 }
589 a = (float *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(float));
590 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
591 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
592 }
593 a_use = (float *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(float));
594 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
595 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
596 }
597 B = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float));
598 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
599 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
600 }
601 B_use = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float));
602 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
603 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
604 }
605 x = (double *) blas_malloc(4 * n_i * sizeof(double));
606 if (4 * n_i > 0 && x == NULL) {
607 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
608 }
609
610 inca_veci = 1;
611
612 a_vec = (float *) blas_malloc(2 * n_i * sizeof(float));
613 if (2 * n_i > 0 && a_vec == NULL) {
614 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
615 }
616 x_vec = (double *) blas_malloc(2 * n_i * sizeof(double));
617 if (2 * n_i > 0 && x_vec == NULL) {
618 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
619 }
620 head_r_true = (double *) blas_malloc(m_i * sizeof(double));
621 tail_r_true = (double *) blas_malloc(m_i * sizeof(double));
622 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
623 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
624 }
625 ratios = (double *) blas_malloc(m_i * sizeof(double));
626 if (m_i > 0 && ratios == NULL) {
627 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
628 }
629
630 test_count = 0;
631 bad_ratio_count = 0;
632
633 /* vary alpha */
634 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
635
636 alpha_flag = 0;
637 switch (alpha_val) {
638 case 0:
639 alpha = 0.0;
640 alpha_flag = 1;
641 break;
642 case 1:
643 alpha = 1.0;
644 alpha_flag = 1;
645 break;
646 }
647
648 /* vary beta */
649 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
650 beta_flag = 0;
651 switch (beta_val) {
652 case 0:
653 beta = 0.0;
654 beta_flag = 1;
655 break;
656 case 1:
657 beta = 1.0;
658 beta_flag = 1;
659 break;
660 }
661
662
663 eps_int = power(2, -BITS_D);
664 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
665 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
666 prec = blas_prec_double;
667
668 /* vary norm -- underflow, approx 1, overflow */
669 for (norm = NORM_START; norm <= NORM_END; norm++) {
670
671 /* number of tests */
672 for (test_no = 0; test_no < ntests; test_no++) {
673
674
675 /* vary storage format */
676 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
677
678 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
679
680 /* vary lda = n_i, n_i+1, 2*n_i */
681 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
682
683 if (order_type == blas_rowmajor) {
684 lda = (lda_val == 0) ? n_i :
685 (lda_val == 1) ? n_i + 1 : n_i * n_i;
686 } else {
687 lda = (lda_val == 0) ? m_i :
688 (lda_val == 1) ? m_i + 1 : m_i * m_i;
689 }
690
691 /* vary ldb = n_i, n_i+1, 2*n_i */
692 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
693
694 if (order_type == blas_rowmajor) {
695 ldb = (ldb_val == 0) ? n_i :
696 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
697 } else {
698 ldb = (ldb_val == 0) ? m_i :
699 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
700 }
701
702 for (randomize_val = RANDOMIZE_START;
703 randomize_val <= RANDOMIZE_END; randomize_val++) {
704
705 /* For the sake of speed, we throw out this case at random */
706 if (xrand(seed) >= test_prob)
707 continue;
708
709 /* finally we are here to generate the test case */
710 /* alpha_use, a_use, B_use are the generated alpha, a, B
711 * before any scaling.
712 * That is, in the generator, alpha == beta == alpha_use
713 * before scaling. */
714
715 saved_seed = *seed;
716 BLAS_dge_sum_mv_s_d_testgen(norm, order_type,
717 m, n, randomize_val, &alpha,
718 alpha_flag, &beta, beta_flag, a,
719 lda, B, ldb, x_vec, 1,
720 &alpha_use, a_use, B_use, seed,
721 head_r_true, tail_r_true);
722
723 /* vary incx = 1, 2 */
724 for (incx_val = INCX_START; incx_val <= INCX_END;
725 incx_val++) {
726
727 incx = incx_val;
728 if (0 == incx)
729 continue;
730
731 dcopy_vector(x_vec, n_i, 1, x, incx);
732
733 /* vary incy = 1, 2 */
734 for (incy_val = INCY_START; incy_val <= INCY_END;
735 incy_val++) {
736
737 incy = incy_val;
738 if (0 == incy)
739 continue;
740
741 test_count++;
742
743 /* call ge_sum_mv routines to be tested */
744 FPU_FIX_STOP;
745 BLAS_dge_sum_mv_s_d(order_type,
746 m, n, alpha, a, lda, x, incx, beta,
747 B, ldb, y, incy);
748 FPU_FIX_START;
749
750 /* now compute the ratio using test_BLAS_xdot */
751 /* copy a row from A, use x, run
752 dot test */
753
754 incyi = incy;
755
756 incri = 1;
757 incx_veci = 1;
758
759
760
761 if (incy < 0) {
762 y_starti = (-m_i + 1) * incyi;
763 } else {
764 y_starti = 0;
765 }
766 /* make two copies of x into x_vec. redundant */
767 dcopy_vector(x, n_i, incx, x_vec, 1);
768 dcopy_vector(x, n_i, incx, (x_vec + (n_i * incx_veci)),
769 1);
770 for (i = 0, yi = y_starti, ri = 0; i < m_i;
771 i++, yi += incyi, ri += incri) {
772 sge_copy_row(order_type, blas_no_trans, m_i, n_i,
773 a_use, lda, a_vec, i);
774 sge_copy_row(order_type, blas_no_trans, m_i, n_i,
775 B_use, ldb, (a_vec + inca_veci * n_i),
776 i);
777
778 rin = 0.0;
779 rout = y[yi];
780 head_r_true_elem = head_r_true[ri];
781 tail_r_true_elem = tail_r_true[ri];
782
783 test_BLAS_ddot_s_d(2 * n_i,
784 blas_no_conj,
785 alpha_use, beta_zero_fake, rin,
786 rout, head_r_true_elem,
787 tail_r_true_elem, a_vec, 1, x_vec,
788 1, eps_int, un_int, &ratios[i]);
789
790 /* take the max ratio */
791 if (i == 0) {
792 ratio = ratios[0];
793 /* The !<= below causes NaN errors
794 * to be included.
795 * Note that (NaN > 0) is false */
796 } else if (!(ratios[i] <= ratio)) {
797 ratio = ratios[i];
798 }
799 } /* end of dot-test loop */
800
801 /* The !<= below causes NaN errors
802 * to be included.
803 * Note that (NaN > 0) is false */
804 if (!(ratio <= thresh)) {
805
806 if (debug == 3) {
807 printf("\n\t\tTest # %d\n", test_count);
808 printf("y type : d, a type : s, x type : d\n");
809 printf("Seed = %d\t", saved_seed);
810 printf("n %d, m %d\n", n, m);
811 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
812 ldb, incx, incx);
813
814 if (order_type == blas_rowmajor)
815 printf("row ");
816 else
817 printf("col ");
818
819 printf("NORM %d, ALPHA %d, BETA %d\n",
820 norm, alpha_val, beta_val);
821 printf("randomize %d\n", randomize_val);
822
823 /* print out info */
824 printf("alpha = ");
825 printf("%24.16e", alpha);;
826 printf(" ");
827 printf("beta = ");
828 printf("%24.16e", beta);;
829 printf("\n");
830 printf("alpha_use = ");
831 printf("%24.16e", alpha_use);;
832 printf("\n");
833
834 sge_print_matrix(a, m_i, n_i, lda, order_type, "A");
835 sge_print_matrix(B, m_i, n_i, ldb, order_type, "B");
836 dprint_vector(x, n_i, incx, "x");
837
838 dprint_vector(y, m_i, incy, "y");
839
840 dprint_vector(head_r_true, m_i, 1, "head_r_true");
841
842 sge_print_matrix(a_use, m_i, n_i, lda, order_type,
843 "A_use");
844 sge_print_matrix(B_use, m_i, n_i, ldb, order_type,
845 "B_use");
846
847 dprint_vector(ratios, m_i, 1, "ratios");
848 printf("ratio = %g\n", ratio);
849 fflush(stdout);
850 }
851 bad_ratio_count++;
852 if (bad_ratio_count >= MAX_BAD_TESTS) {
853 printf("\ntoo many failures, exiting....");
854 printf("\nTesting and compilation");
855 printf(" are incomplete\n\n");
856 goto end;
857 }
858 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
859 printf("\nFlagrant ratio error, exiting...");
860 printf("\nTesting and compilation");
861 printf(" are incomplete\n\n");
862 goto end;
863 }
864 }
865
866 if (!(ratio <= ratio_max))
867 ratio_max = ratio;
868
869 if (ratio != 0.0 && !(ratio >= ratio_min))
870 ratio_min = ratio;
871
872 } /* end of incy loop */
873
874 } /* end of incx loop */
875
876 } /* end of randmize loop */
877
878 } /* end of ldb loop */
879
880 } /* end of lda loop */
881
882 } /* end of order loop */
883
884 } /* end of nr test loop */
885
886 } /* end of norm loop */
887
888
889
890 } /* end of beta loop */
891
892 } /* end of alpha loop */
893
894 FPU_FIX_STOP;
895
896 end:
897 blas_free(y);
898 blas_free(a);
899 blas_free(a_use);
900 blas_free(B);
901 blas_free(B_use);
902 blas_free(x);
903 blas_free(head_r_true);
904 blas_free(tail_r_true);
905 blas_free(ratios);
906 blas_free(a_vec);
907 blas_free(x_vec);
908
909 *max_ratio = ratio_max;
910 *min_ratio = ratio_min;
911 *num_tests = test_count;
912 *num_bad_ratio = bad_ratio_count;
913
914 }
do_test_dge_sum_mv_s_s(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)915 void do_test_dge_sum_mv_s_s
916 (int m, int n,
917 int ntests, int *seed, double thresh, int debug, float test_prob,
918 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
919
920 /* Function name */
921 const char fname[] = "BLAS_dge_sum_mv_s_s";
922
923 int i;
924 int yi;
925 int incyi, y_starti, incx_veci;
926 int test_count;
927 int bad_ratio_count;
928
929 int ri;
930 int incri;
931 int inca, incx, incy;
932
933 double ratio;
934
935 double ratio_min, ratio_max;
936
937 double eps_int; /* internal machine epsilon */
938 double un_int; /* internal underflow threshold */
939
940 double rin;
941 double rout;
942 double head_r_true_elem, tail_r_true_elem;
943
944 enum blas_order_type order_type;
945 enum blas_prec_type prec;
946
947 int order_val;
948 int lda_val, incx_val, incy_val;
949 int ldb_val;
950 int alpha_val, beta_val;
951 int randomize_val;
952
953
954
955 int lda, ldb;
956 int alpha_flag, beta_flag;
957 int saved_seed;
958 int norm;
959 int test_no;
960
961 int n_i, m_i;
962 int inca_veci;
963
964 double alpha;
965 double beta;
966 double beta_zero_fake;
967 double alpha_use;
968 float *a;
969 float *a_use;
970 float *B;
971 float *B_use;
972 float *x;
973 double *y;
974 float *a_vec;
975 float *x_vec;
976
977
978 double *ratios;
979
980 /* true result calculated by testgen, in double-double */
981 double *head_r_true, *tail_r_true;
982
983 FPU_FIX_DECL;
984
985 beta_zero_fake = 0.0;
986
987 if (n < 0 || ntests < 0)
988 BLAS_error(fname, -3, n, NULL);
989
990 /* initialization */
991 saved_seed = *seed;
992 ratio = 0.0;
993 ratio_min = 1e308;
994 ratio_max = 0.0;
995
996 *num_tests = 0;
997 *num_bad_ratio = 0;
998 *min_ratio = 0.0;
999 *max_ratio = 0.0;
1000
1001 if (n == 0)
1002 return;
1003
1004 FPU_FIX_START;
1005
1006 n_i = n;
1007 m_i = m;
1008
1009 inca = incx = incy = 1;
1010
1011
1012
1013
1014 /* allocate memory for arrays */
1015 y = (double *) blas_malloc(4 * m_i * sizeof(double));
1016 if (4 * m_i > 0 && y == NULL) {
1017 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1018 }
1019 a = (float *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(float));
1020 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
1021 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1022 }
1023 a_use = (float *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(float));
1024 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
1025 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1026 }
1027 B = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float));
1028 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
1029 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1030 }
1031 B_use = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float));
1032 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
1033 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1034 }
1035 x = (float *) blas_malloc(4 * n_i * sizeof(float));
1036 if (4 * n_i > 0 && x == NULL) {
1037 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1038 }
1039
1040 inca_veci = 1;
1041
1042 a_vec = (float *) blas_malloc(2 * n_i * sizeof(float));
1043 if (2 * n_i > 0 && a_vec == NULL) {
1044 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1045 }
1046 x_vec = (float *) blas_malloc(2 * n_i * sizeof(float));
1047 if (2 * n_i > 0 && x_vec == NULL) {
1048 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1049 }
1050 head_r_true = (double *) blas_malloc(m_i * sizeof(double));
1051 tail_r_true = (double *) blas_malloc(m_i * sizeof(double));
1052 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
1053 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1054 }
1055 ratios = (double *) blas_malloc(m_i * sizeof(double));
1056 if (m_i > 0 && ratios == NULL) {
1057 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1058 }
1059
1060 test_count = 0;
1061 bad_ratio_count = 0;
1062
1063 /* vary alpha */
1064 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
1065
1066 alpha_flag = 0;
1067 switch (alpha_val) {
1068 case 0:
1069 alpha = 0.0;
1070 alpha_flag = 1;
1071 break;
1072 case 1:
1073 alpha = 1.0;
1074 alpha_flag = 1;
1075 break;
1076 }
1077
1078 /* vary beta */
1079 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
1080 beta_flag = 0;
1081 switch (beta_val) {
1082 case 0:
1083 beta = 0.0;
1084 beta_flag = 1;
1085 break;
1086 case 1:
1087 beta = 1.0;
1088 beta_flag = 1;
1089 break;
1090 }
1091
1092
1093 eps_int = power(2, -BITS_D);
1094 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1095 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1096 prec = blas_prec_double;
1097
1098 /* vary norm -- underflow, approx 1, overflow */
1099 for (norm = NORM_START; norm <= NORM_END; norm++) {
1100
1101 /* number of tests */
1102 for (test_no = 0; test_no < ntests; test_no++) {
1103
1104
1105 /* vary storage format */
1106 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
1107
1108 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
1109
1110 /* vary lda = n_i, n_i+1, 2*n_i */
1111 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
1112
1113 if (order_type == blas_rowmajor) {
1114 lda = (lda_val == 0) ? n_i :
1115 (lda_val == 1) ? n_i + 1 : n_i * n_i;
1116 } else {
1117 lda = (lda_val == 0) ? m_i :
1118 (lda_val == 1) ? m_i + 1 : m_i * m_i;
1119 }
1120
1121 /* vary ldb = n_i, n_i+1, 2*n_i */
1122 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
1123
1124 if (order_type == blas_rowmajor) {
1125 ldb = (ldb_val == 0) ? n_i :
1126 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
1127 } else {
1128 ldb = (ldb_val == 0) ? m_i :
1129 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
1130 }
1131
1132 for (randomize_val = RANDOMIZE_START;
1133 randomize_val <= RANDOMIZE_END; randomize_val++) {
1134
1135 /* For the sake of speed, we throw out this case at random */
1136 if (xrand(seed) >= test_prob)
1137 continue;
1138
1139 /* finally we are here to generate the test case */
1140 /* alpha_use, a_use, B_use are the generated alpha, a, B
1141 * before any scaling.
1142 * That is, in the generator, alpha == beta == alpha_use
1143 * before scaling. */
1144
1145 saved_seed = *seed;
1146 BLAS_dge_sum_mv_s_s_testgen(norm, order_type,
1147 m, n, randomize_val, &alpha,
1148 alpha_flag, &beta, beta_flag, a,
1149 lda, B, ldb, x_vec, 1,
1150 &alpha_use, a_use, B_use, seed,
1151 head_r_true, tail_r_true);
1152
1153 /* vary incx = 1, 2 */
1154 for (incx_val = INCX_START; incx_val <= INCX_END;
1155 incx_val++) {
1156
1157 incx = incx_val;
1158 if (0 == incx)
1159 continue;
1160
1161 scopy_vector(x_vec, n_i, 1, x, incx);
1162
1163 /* vary incy = 1, 2 */
1164 for (incy_val = INCY_START; incy_val <= INCY_END;
1165 incy_val++) {
1166
1167 incy = incy_val;
1168 if (0 == incy)
1169 continue;
1170
1171 test_count++;
1172
1173 /* call ge_sum_mv routines to be tested */
1174 FPU_FIX_STOP;
1175 BLAS_dge_sum_mv_s_s(order_type,
1176 m, n, alpha, a, lda, x, incx, beta,
1177 B, ldb, y, incy);
1178 FPU_FIX_START;
1179
1180 /* now compute the ratio using test_BLAS_xdot */
1181 /* copy a row from A, use x, run
1182 dot test */
1183
1184 incyi = incy;
1185
1186 incri = 1;
1187 incx_veci = 1;
1188
1189
1190
1191 if (incy < 0) {
1192 y_starti = (-m_i + 1) * incyi;
1193 } else {
1194 y_starti = 0;
1195 }
1196 /* make two copies of x into x_vec. redundant */
1197 scopy_vector(x, n_i, incx, x_vec, 1);
1198 scopy_vector(x, n_i, incx, (x_vec + (n_i * incx_veci)),
1199 1);
1200 for (i = 0, yi = y_starti, ri = 0; i < m_i;
1201 i++, yi += incyi, ri += incri) {
1202 sge_copy_row(order_type, blas_no_trans, m_i, n_i,
1203 a_use, lda, a_vec, i);
1204 sge_copy_row(order_type, blas_no_trans, m_i, n_i,
1205 B_use, ldb, (a_vec + inca_veci * n_i),
1206 i);
1207
1208 rin = 0.0;
1209 rout = y[yi];
1210 head_r_true_elem = head_r_true[ri];
1211 tail_r_true_elem = tail_r_true[ri];
1212
1213 test_BLAS_ddot_s_s(2 * n_i,
1214 blas_no_conj,
1215 alpha_use, beta_zero_fake, rin,
1216 rout, head_r_true_elem,
1217 tail_r_true_elem, a_vec, 1, x_vec,
1218 1, eps_int, un_int, &ratios[i]);
1219
1220 /* take the max ratio */
1221 if (i == 0) {
1222 ratio = ratios[0];
1223 /* The !<= below causes NaN errors
1224 * to be included.
1225 * Note that (NaN > 0) is false */
1226 } else if (!(ratios[i] <= ratio)) {
1227 ratio = ratios[i];
1228 }
1229 } /* end of dot-test loop */
1230
1231 /* The !<= below causes NaN errors
1232 * to be included.
1233 * Note that (NaN > 0) is false */
1234 if (!(ratio <= thresh)) {
1235
1236 if (debug == 3) {
1237 printf("\n\t\tTest # %d\n", test_count);
1238 printf("y type : d, a type : s, x type : s\n");
1239 printf("Seed = %d\t", saved_seed);
1240 printf("n %d, m %d\n", n, m);
1241 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
1242 ldb, incx, incx);
1243
1244 if (order_type == blas_rowmajor)
1245 printf("row ");
1246 else
1247 printf("col ");
1248
1249 printf("NORM %d, ALPHA %d, BETA %d\n",
1250 norm, alpha_val, beta_val);
1251 printf("randomize %d\n", randomize_val);
1252
1253 /* print out info */
1254 printf("alpha = ");
1255 printf("%24.16e", alpha);;
1256 printf(" ");
1257 printf("beta = ");
1258 printf("%24.16e", beta);;
1259 printf("\n");
1260 printf("alpha_use = ");
1261 printf("%24.16e", alpha_use);;
1262 printf("\n");
1263
1264 sge_print_matrix(a, m_i, n_i, lda, order_type, "A");
1265 sge_print_matrix(B, m_i, n_i, ldb, order_type, "B");
1266 sprint_vector(x, n_i, incx, "x");
1267
1268 dprint_vector(y, m_i, incy, "y");
1269
1270 dprint_vector(head_r_true, m_i, 1, "head_r_true");
1271
1272 sge_print_matrix(a_use, m_i, n_i, lda, order_type,
1273 "A_use");
1274 sge_print_matrix(B_use, m_i, n_i, ldb, order_type,
1275 "B_use");
1276
1277 dprint_vector(ratios, m_i, 1, "ratios");
1278 printf("ratio = %g\n", ratio);
1279 fflush(stdout);
1280 }
1281 bad_ratio_count++;
1282 if (bad_ratio_count >= MAX_BAD_TESTS) {
1283 printf("\ntoo many failures, exiting....");
1284 printf("\nTesting and compilation");
1285 printf(" are incomplete\n\n");
1286 goto end;
1287 }
1288 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
1289 printf("\nFlagrant ratio error, exiting...");
1290 printf("\nTesting and compilation");
1291 printf(" are incomplete\n\n");
1292 goto end;
1293 }
1294 }
1295
1296 if (!(ratio <= ratio_max))
1297 ratio_max = ratio;
1298
1299 if (ratio != 0.0 && !(ratio >= ratio_min))
1300 ratio_min = ratio;
1301
1302 } /* end of incy loop */
1303
1304 } /* end of incx loop */
1305
1306 } /* end of randmize loop */
1307
1308 } /* end of ldb loop */
1309
1310 } /* end of lda loop */
1311
1312 } /* end of order loop */
1313
1314 } /* end of nr test loop */
1315
1316 } /* end of norm loop */
1317
1318
1319
1320 } /* end of beta loop */
1321
1322 } /* end of alpha loop */
1323
1324 FPU_FIX_STOP;
1325
1326 end:
1327 blas_free(y);
1328 blas_free(a);
1329 blas_free(a_use);
1330 blas_free(B);
1331 blas_free(B_use);
1332 blas_free(x);
1333 blas_free(head_r_true);
1334 blas_free(tail_r_true);
1335 blas_free(ratios);
1336 blas_free(a_vec);
1337 blas_free(x_vec);
1338
1339 *max_ratio = ratio_max;
1340 *min_ratio = ratio_min;
1341 *num_tests = test_count;
1342 *num_bad_ratio = bad_ratio_count;
1343
1344 }
do_test_zge_sum_mv_z_c(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)1345 void do_test_zge_sum_mv_z_c
1346 (int m, int n,
1347 int ntests, int *seed, double thresh, int debug, float test_prob,
1348 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
1349
1350 /* Function name */
1351 const char fname[] = "BLAS_zge_sum_mv_z_c";
1352
1353 int i;
1354 int yi;
1355 int incyi, y_starti, incx_veci;
1356 int test_count;
1357 int bad_ratio_count;
1358
1359 int ri;
1360 int incri;
1361 int inca, incx, incy;
1362
1363 double ratio;
1364
1365 double ratio_min, ratio_max;
1366
1367 double eps_int; /* internal machine epsilon */
1368 double un_int; /* internal underflow threshold */
1369
1370 double rin[2];
1371 double rout[2];
1372 double head_r_true_elem[2], tail_r_true_elem[2];
1373
1374 enum blas_order_type order_type;
1375 enum blas_prec_type prec;
1376
1377 int order_val;
1378 int lda_val, incx_val, incy_val;
1379 int ldb_val;
1380 int alpha_val, beta_val;
1381 int randomize_val;
1382
1383
1384
1385 int lda, ldb;
1386 int alpha_flag, beta_flag;
1387 int saved_seed;
1388 int norm;
1389 int test_no;
1390
1391 int n_i, m_i;
1392 int inca_veci;
1393
1394 double alpha[2];
1395 double beta[2];
1396 double beta_zero_fake[2];
1397 double alpha_use[2];
1398 double *a;
1399 double *a_use;
1400 double *B;
1401 double *B_use;
1402 float *x;
1403 double *y;
1404 double *a_vec;
1405 float *x_vec;
1406
1407
1408 double *ratios;
1409
1410 /* true result calculated by testgen, in double-double */
1411 double *head_r_true, *tail_r_true;
1412
1413
1414 FPU_FIX_DECL;
1415
1416 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
1417
1418 if (n < 0 || ntests < 0)
1419 BLAS_error(fname, -3, n, NULL);
1420
1421 /* initialization */
1422 saved_seed = *seed;
1423 ratio = 0.0;
1424 ratio_min = 1e308;
1425 ratio_max = 0.0;
1426
1427 *num_tests = 0;
1428 *num_bad_ratio = 0;
1429 *min_ratio = 0.0;
1430 *max_ratio = 0.0;
1431
1432 if (n == 0)
1433 return;
1434
1435 FPU_FIX_START;
1436
1437 n_i = n;
1438 m_i = m;
1439
1440 inca = incx = incy = 1;
1441 inca *= 2;
1442 incx *= 2;
1443 incy *= 2;
1444
1445 /* allocate memory for arrays */
1446 y = (double *) blas_malloc(4 * m_i * sizeof(double) * 2);
1447 if (4 * m_i > 0 && y == NULL) {
1448 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1449 }
1450 a = (double *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(double) * 2);
1451 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
1452 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1453 }
1454 a_use =
1455 (double *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(double) * 2);
1456 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
1457 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1458 }
1459 B = (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double) * 2);
1460 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
1461 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1462 }
1463 B_use =
1464 (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double) * 2);
1465 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
1466 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1467 }
1468 x = (float *) blas_malloc(4 * n_i * sizeof(float) * 2);
1469 if (4 * n_i > 0 && x == NULL) {
1470 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1471 }
1472
1473 inca_veci = 1;
1474 inca_veci *= 2;
1475 a_vec = (double *) blas_malloc(2 * n_i * sizeof(double) * 2);
1476 if (2 * n_i > 0 && a_vec == NULL) {
1477 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1478 }
1479 x_vec = (float *) blas_malloc(2 * n_i * sizeof(float) * 2);
1480 if (2 * n_i > 0 && x_vec == NULL) {
1481 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1482 }
1483 head_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
1484 tail_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
1485 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
1486 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1487 }
1488 ratios = (double *) blas_malloc(m_i * sizeof(double));
1489 if (m_i > 0 && ratios == NULL) {
1490 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1491 }
1492
1493 test_count = 0;
1494 bad_ratio_count = 0;
1495
1496 /* vary alpha */
1497 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
1498
1499 alpha_flag = 0;
1500 switch (alpha_val) {
1501 case 0:
1502 alpha[0] = alpha[1] = 0.0;
1503 alpha_flag = 1;
1504 break;
1505 case 1:
1506 alpha[0] = 1.0;
1507 alpha[1] = 0.0;
1508 alpha_flag = 1;
1509 break;
1510 }
1511
1512 /* vary beta */
1513 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
1514 beta_flag = 0;
1515 switch (beta_val) {
1516 case 0:
1517 beta[0] = beta[1] = 0.0;
1518 beta_flag = 1;
1519 break;
1520 case 1:
1521 beta[0] = 1.0;
1522 beta[1] = 0.0;
1523 beta_flag = 1;
1524 break;
1525 }
1526
1527
1528 eps_int = power(2, -BITS_D);
1529 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1530 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1531 prec = blas_prec_double;
1532
1533 /* vary norm -- underflow, approx 1, overflow */
1534 for (norm = NORM_START; norm <= NORM_END; norm++) {
1535
1536 /* number of tests */
1537 for (test_no = 0; test_no < ntests; test_no++) {
1538
1539
1540 /* vary storage format */
1541 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
1542
1543 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
1544
1545 /* vary lda = n_i, n_i+1, 2*n_i */
1546 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
1547
1548 if (order_type == blas_rowmajor) {
1549 lda = (lda_val == 0) ? n_i :
1550 (lda_val == 1) ? n_i + 1 : n_i * n_i;
1551 } else {
1552 lda = (lda_val == 0) ? m_i :
1553 (lda_val == 1) ? m_i + 1 : m_i * m_i;
1554 }
1555
1556 /* vary ldb = n_i, n_i+1, 2*n_i */
1557 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
1558
1559 if (order_type == blas_rowmajor) {
1560 ldb = (ldb_val == 0) ? n_i :
1561 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
1562 } else {
1563 ldb = (ldb_val == 0) ? m_i :
1564 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
1565 }
1566
1567 for (randomize_val = RANDOMIZE_START;
1568 randomize_val <= RANDOMIZE_END; randomize_val++) {
1569
1570 /* For the sake of speed, we throw out this case at random */
1571 if (xrand(seed) >= test_prob)
1572 continue;
1573
1574 /* finally we are here to generate the test case */
1575 /* alpha_use, a_use, B_use are the generated alpha, a, B
1576 * before any scaling.
1577 * That is, in the generator, alpha == beta == alpha_use
1578 * before scaling. */
1579
1580 saved_seed = *seed;
1581 BLAS_zge_sum_mv_z_c_testgen(norm, order_type,
1582 m, n, randomize_val, &alpha,
1583 alpha_flag, &beta, beta_flag, a,
1584 lda, B, ldb, x_vec, 1,
1585 &alpha_use, a_use, B_use, seed,
1586 head_r_true, tail_r_true);
1587
1588 /* vary incx = 1, 2 */
1589 for (incx_val = INCX_START; incx_val <= INCX_END;
1590 incx_val++) {
1591
1592 incx = incx_val;
1593 if (0 == incx)
1594 continue;
1595
1596 ccopy_vector(x_vec, n_i, 1, x, incx);
1597
1598 /* vary incy = 1, 2 */
1599 for (incy_val = INCY_START; incy_val <= INCY_END;
1600 incy_val++) {
1601
1602 incy = incy_val;
1603 if (0 == incy)
1604 continue;
1605
1606 test_count++;
1607
1608 /* call ge_sum_mv routines to be tested */
1609 FPU_FIX_STOP;
1610 BLAS_zge_sum_mv_z_c(order_type,
1611 m, n, alpha, a, lda, x, incx, beta,
1612 B, ldb, y, incy);
1613 FPU_FIX_START;
1614
1615 /* now compute the ratio using test_BLAS_xdot */
1616 /* copy a row from A, use x, run
1617 dot test */
1618
1619 incyi = incy;
1620
1621 incri = 1;
1622 incx_veci = 1;
1623 incx_veci *= 2;
1624 incyi *= 2;
1625 incri *= 2;
1626 if (incy < 0) {
1627 y_starti = (-m_i + 1) * incyi;
1628 } else {
1629 y_starti = 0;
1630 }
1631 /* make two copies of x into x_vec. redundant */
1632 ccopy_vector(x, n_i, incx, x_vec, 1);
1633 ccopy_vector(x, n_i, incx, (x_vec + (n_i * incx_veci)),
1634 1);
1635 for (i = 0, yi = y_starti, ri = 0; i < m_i;
1636 i++, yi += incyi, ri += incri) {
1637 zge_copy_row(order_type, blas_no_trans, m_i, n_i,
1638 a_use, lda, a_vec, i);
1639 zge_copy_row(order_type, blas_no_trans, m_i, n_i,
1640 B_use, ldb, (a_vec + inca_veci * n_i),
1641 i);
1642
1643 rin[0] = rin[1] = 0.0;
1644 rout[0] = y[yi];
1645 rout[1] = y[yi + 1];
1646 head_r_true_elem[0] = head_r_true[ri];
1647 head_r_true_elem[1] = head_r_true[ri + 1];
1648 tail_r_true_elem[0] = tail_r_true[ri];
1649 tail_r_true_elem[1] = tail_r_true[ri + 1];
1650
1651 test_BLAS_zdot_z_c(2 * n_i,
1652 blas_no_conj,
1653 alpha_use, beta_zero_fake, rin,
1654 rout, head_r_true_elem,
1655 tail_r_true_elem, a_vec, 1, x_vec,
1656 1, eps_int, un_int, &ratios[i]);
1657
1658 /* take the max ratio */
1659 if (i == 0) {
1660 ratio = ratios[0];
1661 /* The !<= below causes NaN errors
1662 * to be included.
1663 * Note that (NaN > 0) is false */
1664 } else if (!(ratios[i] <= ratio)) {
1665 ratio = ratios[i];
1666 }
1667 } /* end of dot-test loop */
1668
1669 /* The !<= below causes NaN errors
1670 * to be included.
1671 * Note that (NaN > 0) is false */
1672 if (!(ratio <= thresh)) {
1673
1674 if (debug == 3) {
1675 printf("\n\t\tTest # %d\n", test_count);
1676 printf("y type : z, a type : z, x type : c\n");
1677 printf("Seed = %d\t", saved_seed);
1678 printf("n %d, m %d\n", n, m);
1679 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
1680 ldb, incx, incx);
1681
1682 if (order_type == blas_rowmajor)
1683 printf("row ");
1684 else
1685 printf("col ");
1686
1687 printf("NORM %d, ALPHA %d, BETA %d\n",
1688 norm, alpha_val, beta_val);
1689 printf("randomize %d\n", randomize_val);
1690
1691 /* print out info */
1692 printf("alpha = ");
1693 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);;
1694 printf(" ");
1695 printf("beta = ");
1696 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
1697 printf("\n");
1698 printf("alpha_use = ");
1699 printf("(%24.16e, %24.16e)", alpha_use[0],
1700 alpha_use[1]);;
1701 printf("\n");
1702
1703 zge_print_matrix(a, m_i, n_i, lda, order_type, "A");
1704 zge_print_matrix(B, m_i, n_i, ldb, order_type, "B");
1705 cprint_vector(x, n_i, incx, "x");
1706
1707 zprint_vector(y, m_i, incy, "y");
1708
1709 zprint_vector(head_r_true, m_i, 1, "head_r_true");
1710
1711 zge_print_matrix(a_use, m_i, n_i, lda, order_type,
1712 "A_use");
1713 zge_print_matrix(B_use, m_i, n_i, ldb, order_type,
1714 "B_use");
1715
1716 dprint_vector(ratios, m_i, 1, "ratios");
1717 printf("ratio = %g\n", ratio);
1718 fflush(stdout);
1719 }
1720 bad_ratio_count++;
1721 if (bad_ratio_count >= MAX_BAD_TESTS) {
1722 printf("\ntoo many failures, exiting....");
1723 printf("\nTesting and compilation");
1724 printf(" are incomplete\n\n");
1725 goto end;
1726 }
1727 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
1728 printf("\nFlagrant ratio error, exiting...");
1729 printf("\nTesting and compilation");
1730 printf(" are incomplete\n\n");
1731 goto end;
1732 }
1733 }
1734
1735 if (!(ratio <= ratio_max))
1736 ratio_max = ratio;
1737
1738 if (ratio != 0.0 && !(ratio >= ratio_min))
1739 ratio_min = ratio;
1740
1741 } /* end of incy loop */
1742
1743 } /* end of incx loop */
1744
1745 } /* end of randmize loop */
1746
1747 } /* end of ldb loop */
1748
1749 } /* end of lda loop */
1750
1751 } /* end of order loop */
1752
1753 } /* end of nr test loop */
1754
1755 } /* end of norm loop */
1756
1757
1758
1759 } /* end of beta loop */
1760
1761 } /* end of alpha loop */
1762
1763 FPU_FIX_STOP;
1764
1765 end:
1766 blas_free(y);
1767 blas_free(a);
1768 blas_free(a_use);
1769 blas_free(B);
1770 blas_free(B_use);
1771 blas_free(x);
1772 blas_free(head_r_true);
1773 blas_free(tail_r_true);
1774 blas_free(ratios);
1775 blas_free(a_vec);
1776 blas_free(x_vec);
1777
1778 *max_ratio = ratio_max;
1779 *min_ratio = ratio_min;
1780 *num_tests = test_count;
1781 *num_bad_ratio = bad_ratio_count;
1782
1783 }
do_test_zge_sum_mv_c_z(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)1784 void do_test_zge_sum_mv_c_z
1785 (int m, int n,
1786 int ntests, int *seed, double thresh, int debug, float test_prob,
1787 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
1788
1789 /* Function name */
1790 const char fname[] = "BLAS_zge_sum_mv_c_z";
1791
1792 int i;
1793 int yi;
1794 int incyi, y_starti, incx_veci;
1795 int test_count;
1796 int bad_ratio_count;
1797
1798 int ri;
1799 int incri;
1800 int inca, incx, incy;
1801
1802 double ratio;
1803
1804 double ratio_min, ratio_max;
1805
1806 double eps_int; /* internal machine epsilon */
1807 double un_int; /* internal underflow threshold */
1808
1809 double rin[2];
1810 double rout[2];
1811 double head_r_true_elem[2], tail_r_true_elem[2];
1812
1813 enum blas_order_type order_type;
1814 enum blas_prec_type prec;
1815
1816 int order_val;
1817 int lda_val, incx_val, incy_val;
1818 int ldb_val;
1819 int alpha_val, beta_val;
1820 int randomize_val;
1821
1822
1823
1824 int lda, ldb;
1825 int alpha_flag, beta_flag;
1826 int saved_seed;
1827 int norm;
1828 int test_no;
1829
1830 int n_i, m_i;
1831 int inca_veci;
1832
1833 double alpha[2];
1834 double beta[2];
1835 double beta_zero_fake[2];
1836 double alpha_use[2];
1837 float *a;
1838 float *a_use;
1839 float *B;
1840 float *B_use;
1841 double *x;
1842 double *y;
1843 float *a_vec;
1844 double *x_vec;
1845
1846
1847 double *ratios;
1848
1849 /* true result calculated by testgen, in double-double */
1850 double *head_r_true, *tail_r_true;
1851
1852
1853 FPU_FIX_DECL;
1854
1855 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
1856
1857 if (n < 0 || ntests < 0)
1858 BLAS_error(fname, -3, n, NULL);
1859
1860 /* initialization */
1861 saved_seed = *seed;
1862 ratio = 0.0;
1863 ratio_min = 1e308;
1864 ratio_max = 0.0;
1865
1866 *num_tests = 0;
1867 *num_bad_ratio = 0;
1868 *min_ratio = 0.0;
1869 *max_ratio = 0.0;
1870
1871 if (n == 0)
1872 return;
1873
1874 FPU_FIX_START;
1875
1876 n_i = n;
1877 m_i = m;
1878
1879 inca = incx = incy = 1;
1880 inca *= 2;
1881 incx *= 2;
1882 incy *= 2;
1883
1884 /* allocate memory for arrays */
1885 y = (double *) blas_malloc(4 * m_i * sizeof(double) * 2);
1886 if (4 * m_i > 0 && y == NULL) {
1887 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1888 }
1889 a = (float *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(float) * 2);
1890 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
1891 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1892 }
1893 a_use =
1894 (float *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(float) * 2);
1895 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
1896 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1897 }
1898 B = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float) * 2);
1899 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
1900 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1901 }
1902 B_use =
1903 (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float) * 2);
1904 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
1905 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1906 }
1907 x = (double *) blas_malloc(4 * n_i * sizeof(double) * 2);
1908 if (4 * n_i > 0 && x == NULL) {
1909 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1910 }
1911
1912 inca_veci = 1;
1913 inca_veci *= 2;
1914 a_vec = (float *) blas_malloc(2 * n_i * sizeof(float) * 2);
1915 if (2 * n_i > 0 && a_vec == NULL) {
1916 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1917 }
1918 x_vec = (double *) blas_malloc(2 * n_i * sizeof(double) * 2);
1919 if (2 * n_i > 0 && x_vec == NULL) {
1920 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1921 }
1922 head_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
1923 tail_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
1924 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
1925 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1926 }
1927 ratios = (double *) blas_malloc(m_i * sizeof(double));
1928 if (m_i > 0 && ratios == NULL) {
1929 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1930 }
1931
1932 test_count = 0;
1933 bad_ratio_count = 0;
1934
1935 /* vary alpha */
1936 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
1937
1938 alpha_flag = 0;
1939 switch (alpha_val) {
1940 case 0:
1941 alpha[0] = alpha[1] = 0.0;
1942 alpha_flag = 1;
1943 break;
1944 case 1:
1945 alpha[0] = 1.0;
1946 alpha[1] = 0.0;
1947 alpha_flag = 1;
1948 break;
1949 }
1950
1951 /* vary beta */
1952 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
1953 beta_flag = 0;
1954 switch (beta_val) {
1955 case 0:
1956 beta[0] = beta[1] = 0.0;
1957 beta_flag = 1;
1958 break;
1959 case 1:
1960 beta[0] = 1.0;
1961 beta[1] = 0.0;
1962 beta_flag = 1;
1963 break;
1964 }
1965
1966
1967 eps_int = power(2, -BITS_D);
1968 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1969 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1970 prec = blas_prec_double;
1971
1972 /* vary norm -- underflow, approx 1, overflow */
1973 for (norm = NORM_START; norm <= NORM_END; norm++) {
1974
1975 /* number of tests */
1976 for (test_no = 0; test_no < ntests; test_no++) {
1977
1978
1979 /* vary storage format */
1980 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
1981
1982 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
1983
1984 /* vary lda = n_i, n_i+1, 2*n_i */
1985 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
1986
1987 if (order_type == blas_rowmajor) {
1988 lda = (lda_val == 0) ? n_i :
1989 (lda_val == 1) ? n_i + 1 : n_i * n_i;
1990 } else {
1991 lda = (lda_val == 0) ? m_i :
1992 (lda_val == 1) ? m_i + 1 : m_i * m_i;
1993 }
1994
1995 /* vary ldb = n_i, n_i+1, 2*n_i */
1996 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
1997
1998 if (order_type == blas_rowmajor) {
1999 ldb = (ldb_val == 0) ? n_i :
2000 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
2001 } else {
2002 ldb = (ldb_val == 0) ? m_i :
2003 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
2004 }
2005
2006 for (randomize_val = RANDOMIZE_START;
2007 randomize_val <= RANDOMIZE_END; randomize_val++) {
2008
2009 /* For the sake of speed, we throw out this case at random */
2010 if (xrand(seed) >= test_prob)
2011 continue;
2012
2013 /* finally we are here to generate the test case */
2014 /* alpha_use, a_use, B_use are the generated alpha, a, B
2015 * before any scaling.
2016 * That is, in the generator, alpha == beta == alpha_use
2017 * before scaling. */
2018
2019 saved_seed = *seed;
2020 BLAS_zge_sum_mv_c_z_testgen(norm, order_type,
2021 m, n, randomize_val, &alpha,
2022 alpha_flag, &beta, beta_flag, a,
2023 lda, B, ldb, x_vec, 1,
2024 &alpha_use, a_use, B_use, seed,
2025 head_r_true, tail_r_true);
2026
2027 /* vary incx = 1, 2 */
2028 for (incx_val = INCX_START; incx_val <= INCX_END;
2029 incx_val++) {
2030
2031 incx = incx_val;
2032 if (0 == incx)
2033 continue;
2034
2035 zcopy_vector(x_vec, n_i, 1, x, incx);
2036
2037 /* vary incy = 1, 2 */
2038 for (incy_val = INCY_START; incy_val <= INCY_END;
2039 incy_val++) {
2040
2041 incy = incy_val;
2042 if (0 == incy)
2043 continue;
2044
2045 test_count++;
2046
2047 /* call ge_sum_mv routines to be tested */
2048 FPU_FIX_STOP;
2049 BLAS_zge_sum_mv_c_z(order_type,
2050 m, n, alpha, a, lda, x, incx, beta,
2051 B, ldb, y, incy);
2052 FPU_FIX_START;
2053
2054 /* now compute the ratio using test_BLAS_xdot */
2055 /* copy a row from A, use x, run
2056 dot test */
2057
2058 incyi = incy;
2059
2060 incri = 1;
2061 incx_veci = 1;
2062 incx_veci *= 2;
2063 incyi *= 2;
2064 incri *= 2;
2065 if (incy < 0) {
2066 y_starti = (-m_i + 1) * incyi;
2067 } else {
2068 y_starti = 0;
2069 }
2070 /* make two copies of x into x_vec. redundant */
2071 zcopy_vector(x, n_i, incx, x_vec, 1);
2072 zcopy_vector(x, n_i, incx, (x_vec + (n_i * incx_veci)),
2073 1);
2074 for (i = 0, yi = y_starti, ri = 0; i < m_i;
2075 i++, yi += incyi, ri += incri) {
2076 cge_copy_row(order_type, blas_no_trans, m_i, n_i,
2077 a_use, lda, a_vec, i);
2078 cge_copy_row(order_type, blas_no_trans, m_i, n_i,
2079 B_use, ldb, (a_vec + inca_veci * n_i),
2080 i);
2081
2082 rin[0] = rin[1] = 0.0;
2083 rout[0] = y[yi];
2084 rout[1] = y[yi + 1];
2085 head_r_true_elem[0] = head_r_true[ri];
2086 head_r_true_elem[1] = head_r_true[ri + 1];
2087 tail_r_true_elem[0] = tail_r_true[ri];
2088 tail_r_true_elem[1] = tail_r_true[ri + 1];
2089
2090 test_BLAS_zdot_c_z(2 * n_i,
2091 blas_no_conj,
2092 alpha_use, beta_zero_fake, rin,
2093 rout, head_r_true_elem,
2094 tail_r_true_elem, a_vec, 1, x_vec,
2095 1, eps_int, un_int, &ratios[i]);
2096
2097 /* take the max ratio */
2098 if (i == 0) {
2099 ratio = ratios[0];
2100 /* The !<= below causes NaN errors
2101 * to be included.
2102 * Note that (NaN > 0) is false */
2103 } else if (!(ratios[i] <= ratio)) {
2104 ratio = ratios[i];
2105 }
2106 } /* end of dot-test loop */
2107
2108 /* The !<= below causes NaN errors
2109 * to be included.
2110 * Note that (NaN > 0) is false */
2111 if (!(ratio <= thresh)) {
2112
2113 if (debug == 3) {
2114 printf("\n\t\tTest # %d\n", test_count);
2115 printf("y type : z, a type : c, x type : z\n");
2116 printf("Seed = %d\t", saved_seed);
2117 printf("n %d, m %d\n", n, m);
2118 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
2119 ldb, incx, incx);
2120
2121 if (order_type == blas_rowmajor)
2122 printf("row ");
2123 else
2124 printf("col ");
2125
2126 printf("NORM %d, ALPHA %d, BETA %d\n",
2127 norm, alpha_val, beta_val);
2128 printf("randomize %d\n", randomize_val);
2129
2130 /* print out info */
2131 printf("alpha = ");
2132 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);;
2133 printf(" ");
2134 printf("beta = ");
2135 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
2136 printf("\n");
2137 printf("alpha_use = ");
2138 printf("(%24.16e, %24.16e)", alpha_use[0],
2139 alpha_use[1]);;
2140 printf("\n");
2141
2142 cge_print_matrix(a, m_i, n_i, lda, order_type, "A");
2143 cge_print_matrix(B, m_i, n_i, ldb, order_type, "B");
2144 zprint_vector(x, n_i, incx, "x");
2145
2146 zprint_vector(y, m_i, incy, "y");
2147
2148 zprint_vector(head_r_true, m_i, 1, "head_r_true");
2149
2150 cge_print_matrix(a_use, m_i, n_i, lda, order_type,
2151 "A_use");
2152 cge_print_matrix(B_use, m_i, n_i, ldb, order_type,
2153 "B_use");
2154
2155 dprint_vector(ratios, m_i, 1, "ratios");
2156 printf("ratio = %g\n", ratio);
2157 fflush(stdout);
2158 }
2159 bad_ratio_count++;
2160 if (bad_ratio_count >= MAX_BAD_TESTS) {
2161 printf("\ntoo many failures, exiting....");
2162 printf("\nTesting and compilation");
2163 printf(" are incomplete\n\n");
2164 goto end;
2165 }
2166 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
2167 printf("\nFlagrant ratio error, exiting...");
2168 printf("\nTesting and compilation");
2169 printf(" are incomplete\n\n");
2170 goto end;
2171 }
2172 }
2173
2174 if (!(ratio <= ratio_max))
2175 ratio_max = ratio;
2176
2177 if (ratio != 0.0 && !(ratio >= ratio_min))
2178 ratio_min = ratio;
2179
2180 } /* end of incy loop */
2181
2182 } /* end of incx loop */
2183
2184 } /* end of randmize loop */
2185
2186 } /* end of ldb loop */
2187
2188 } /* end of lda loop */
2189
2190 } /* end of order loop */
2191
2192 } /* end of nr test loop */
2193
2194 } /* end of norm loop */
2195
2196
2197
2198 } /* end of beta loop */
2199
2200 } /* end of alpha loop */
2201
2202 FPU_FIX_STOP;
2203
2204 end:
2205 blas_free(y);
2206 blas_free(a);
2207 blas_free(a_use);
2208 blas_free(B);
2209 blas_free(B_use);
2210 blas_free(x);
2211 blas_free(head_r_true);
2212 blas_free(tail_r_true);
2213 blas_free(ratios);
2214 blas_free(a_vec);
2215 blas_free(x_vec);
2216
2217 *max_ratio = ratio_max;
2218 *min_ratio = ratio_min;
2219 *num_tests = test_count;
2220 *num_bad_ratio = bad_ratio_count;
2221
2222 }
do_test_zge_sum_mv_c_c(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)2223 void do_test_zge_sum_mv_c_c
2224 (int m, int n,
2225 int ntests, int *seed, double thresh, int debug, float test_prob,
2226 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
2227
2228 /* Function name */
2229 const char fname[] = "BLAS_zge_sum_mv_c_c";
2230
2231 int i;
2232 int yi;
2233 int incyi, y_starti, incx_veci;
2234 int test_count;
2235 int bad_ratio_count;
2236
2237 int ri;
2238 int incri;
2239 int inca, incx, incy;
2240
2241 double ratio;
2242
2243 double ratio_min, ratio_max;
2244
2245 double eps_int; /* internal machine epsilon */
2246 double un_int; /* internal underflow threshold */
2247
2248 double rin[2];
2249 double rout[2];
2250 double head_r_true_elem[2], tail_r_true_elem[2];
2251
2252 enum blas_order_type order_type;
2253 enum blas_prec_type prec;
2254
2255 int order_val;
2256 int lda_val, incx_val, incy_val;
2257 int ldb_val;
2258 int alpha_val, beta_val;
2259 int randomize_val;
2260
2261
2262
2263 int lda, ldb;
2264 int alpha_flag, beta_flag;
2265 int saved_seed;
2266 int norm;
2267 int test_no;
2268
2269 int n_i, m_i;
2270 int inca_veci;
2271
2272 double alpha[2];
2273 double beta[2];
2274 double beta_zero_fake[2];
2275 double alpha_use[2];
2276 float *a;
2277 float *a_use;
2278 float *B;
2279 float *B_use;
2280 float *x;
2281 double *y;
2282 float *a_vec;
2283 float *x_vec;
2284
2285
2286 double *ratios;
2287
2288 /* true result calculated by testgen, in double-double */
2289 double *head_r_true, *tail_r_true;
2290
2291
2292 FPU_FIX_DECL;
2293
2294 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
2295
2296 if (n < 0 || ntests < 0)
2297 BLAS_error(fname, -3, n, NULL);
2298
2299 /* initialization */
2300 saved_seed = *seed;
2301 ratio = 0.0;
2302 ratio_min = 1e308;
2303 ratio_max = 0.0;
2304
2305 *num_tests = 0;
2306 *num_bad_ratio = 0;
2307 *min_ratio = 0.0;
2308 *max_ratio = 0.0;
2309
2310 if (n == 0)
2311 return;
2312
2313 FPU_FIX_START;
2314
2315 n_i = n;
2316 m_i = m;
2317
2318 inca = incx = incy = 1;
2319 inca *= 2;
2320 incx *= 2;
2321 incy *= 2;
2322
2323 /* allocate memory for arrays */
2324 y = (double *) blas_malloc(4 * m_i * sizeof(double) * 2);
2325 if (4 * m_i > 0 && y == NULL) {
2326 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2327 }
2328 a = (float *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(float) * 2);
2329 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
2330 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2331 }
2332 a_use =
2333 (float *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(float) * 2);
2334 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
2335 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2336 }
2337 B = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float) * 2);
2338 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
2339 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2340 }
2341 B_use =
2342 (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float) * 2);
2343 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
2344 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2345 }
2346 x = (float *) blas_malloc(4 * n_i * sizeof(float) * 2);
2347 if (4 * n_i > 0 && x == NULL) {
2348 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2349 }
2350
2351 inca_veci = 1;
2352 inca_veci *= 2;
2353 a_vec = (float *) blas_malloc(2 * n_i * sizeof(float) * 2);
2354 if (2 * n_i > 0 && a_vec == NULL) {
2355 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2356 }
2357 x_vec = (float *) blas_malloc(2 * n_i * sizeof(float) * 2);
2358 if (2 * n_i > 0 && x_vec == NULL) {
2359 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2360 }
2361 head_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
2362 tail_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
2363 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
2364 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2365 }
2366 ratios = (double *) blas_malloc(m_i * sizeof(double));
2367 if (m_i > 0 && ratios == NULL) {
2368 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2369 }
2370
2371 test_count = 0;
2372 bad_ratio_count = 0;
2373
2374 /* vary alpha */
2375 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
2376
2377 alpha_flag = 0;
2378 switch (alpha_val) {
2379 case 0:
2380 alpha[0] = alpha[1] = 0.0;
2381 alpha_flag = 1;
2382 break;
2383 case 1:
2384 alpha[0] = 1.0;
2385 alpha[1] = 0.0;
2386 alpha_flag = 1;
2387 break;
2388 }
2389
2390 /* vary beta */
2391 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
2392 beta_flag = 0;
2393 switch (beta_val) {
2394 case 0:
2395 beta[0] = beta[1] = 0.0;
2396 beta_flag = 1;
2397 break;
2398 case 1:
2399 beta[0] = 1.0;
2400 beta[1] = 0.0;
2401 beta_flag = 1;
2402 break;
2403 }
2404
2405
2406 eps_int = power(2, -BITS_D);
2407 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
2408 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
2409 prec = blas_prec_double;
2410
2411 /* vary norm -- underflow, approx 1, overflow */
2412 for (norm = NORM_START; norm <= NORM_END; norm++) {
2413
2414 /* number of tests */
2415 for (test_no = 0; test_no < ntests; test_no++) {
2416
2417
2418 /* vary storage format */
2419 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
2420
2421 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
2422
2423 /* vary lda = n_i, n_i+1, 2*n_i */
2424 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
2425
2426 if (order_type == blas_rowmajor) {
2427 lda = (lda_val == 0) ? n_i :
2428 (lda_val == 1) ? n_i + 1 : n_i * n_i;
2429 } else {
2430 lda = (lda_val == 0) ? m_i :
2431 (lda_val == 1) ? m_i + 1 : m_i * m_i;
2432 }
2433
2434 /* vary ldb = n_i, n_i+1, 2*n_i */
2435 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
2436
2437 if (order_type == blas_rowmajor) {
2438 ldb = (ldb_val == 0) ? n_i :
2439 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
2440 } else {
2441 ldb = (ldb_val == 0) ? m_i :
2442 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
2443 }
2444
2445 for (randomize_val = RANDOMIZE_START;
2446 randomize_val <= RANDOMIZE_END; randomize_val++) {
2447
2448 /* For the sake of speed, we throw out this case at random */
2449 if (xrand(seed) >= test_prob)
2450 continue;
2451
2452 /* finally we are here to generate the test case */
2453 /* alpha_use, a_use, B_use are the generated alpha, a, B
2454 * before any scaling.
2455 * That is, in the generator, alpha == beta == alpha_use
2456 * before scaling. */
2457
2458 saved_seed = *seed;
2459 BLAS_zge_sum_mv_c_c_testgen(norm, order_type,
2460 m, n, randomize_val, &alpha,
2461 alpha_flag, &beta, beta_flag, a,
2462 lda, B, ldb, x_vec, 1,
2463 &alpha_use, a_use, B_use, seed,
2464 head_r_true, tail_r_true);
2465
2466 /* vary incx = 1, 2 */
2467 for (incx_val = INCX_START; incx_val <= INCX_END;
2468 incx_val++) {
2469
2470 incx = incx_val;
2471 if (0 == incx)
2472 continue;
2473
2474 ccopy_vector(x_vec, n_i, 1, x, incx);
2475
2476 /* vary incy = 1, 2 */
2477 for (incy_val = INCY_START; incy_val <= INCY_END;
2478 incy_val++) {
2479
2480 incy = incy_val;
2481 if (0 == incy)
2482 continue;
2483
2484 test_count++;
2485
2486 /* call ge_sum_mv routines to be tested */
2487 FPU_FIX_STOP;
2488 BLAS_zge_sum_mv_c_c(order_type,
2489 m, n, alpha, a, lda, x, incx, beta,
2490 B, ldb, y, incy);
2491 FPU_FIX_START;
2492
2493 /* now compute the ratio using test_BLAS_xdot */
2494 /* copy a row from A, use x, run
2495 dot test */
2496
2497 incyi = incy;
2498
2499 incri = 1;
2500 incx_veci = 1;
2501 incx_veci *= 2;
2502 incyi *= 2;
2503 incri *= 2;
2504 if (incy < 0) {
2505 y_starti = (-m_i + 1) * incyi;
2506 } else {
2507 y_starti = 0;
2508 }
2509 /* make two copies of x into x_vec. redundant */
2510 ccopy_vector(x, n_i, incx, x_vec, 1);
2511 ccopy_vector(x, n_i, incx, (x_vec + (n_i * incx_veci)),
2512 1);
2513 for (i = 0, yi = y_starti, ri = 0; i < m_i;
2514 i++, yi += incyi, ri += incri) {
2515 cge_copy_row(order_type, blas_no_trans, m_i, n_i,
2516 a_use, lda, a_vec, i);
2517 cge_copy_row(order_type, blas_no_trans, m_i, n_i,
2518 B_use, ldb, (a_vec + inca_veci * n_i),
2519 i);
2520
2521 rin[0] = rin[1] = 0.0;
2522 rout[0] = y[yi];
2523 rout[1] = y[yi + 1];
2524 head_r_true_elem[0] = head_r_true[ri];
2525 head_r_true_elem[1] = head_r_true[ri + 1];
2526 tail_r_true_elem[0] = tail_r_true[ri];
2527 tail_r_true_elem[1] = tail_r_true[ri + 1];
2528
2529 test_BLAS_zdot_c_c(2 * n_i,
2530 blas_no_conj,
2531 alpha_use, beta_zero_fake, rin,
2532 rout, head_r_true_elem,
2533 tail_r_true_elem, a_vec, 1, x_vec,
2534 1, eps_int, un_int, &ratios[i]);
2535
2536 /* take the max ratio */
2537 if (i == 0) {
2538 ratio = ratios[0];
2539 /* The !<= below causes NaN errors
2540 * to be included.
2541 * Note that (NaN > 0) is false */
2542 } else if (!(ratios[i] <= ratio)) {
2543 ratio = ratios[i];
2544 }
2545 } /* end of dot-test loop */
2546
2547 /* The !<= below causes NaN errors
2548 * to be included.
2549 * Note that (NaN > 0) is false */
2550 if (!(ratio <= thresh)) {
2551
2552 if (debug == 3) {
2553 printf("\n\t\tTest # %d\n", test_count);
2554 printf("y type : z, a type : c, x type : c\n");
2555 printf("Seed = %d\t", saved_seed);
2556 printf("n %d, m %d\n", n, m);
2557 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
2558 ldb, incx, incx);
2559
2560 if (order_type == blas_rowmajor)
2561 printf("row ");
2562 else
2563 printf("col ");
2564
2565 printf("NORM %d, ALPHA %d, BETA %d\n",
2566 norm, alpha_val, beta_val);
2567 printf("randomize %d\n", randomize_val);
2568
2569 /* print out info */
2570 printf("alpha = ");
2571 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);;
2572 printf(" ");
2573 printf("beta = ");
2574 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
2575 printf("\n");
2576 printf("alpha_use = ");
2577 printf("(%24.16e, %24.16e)", alpha_use[0],
2578 alpha_use[1]);;
2579 printf("\n");
2580
2581 cge_print_matrix(a, m_i, n_i, lda, order_type, "A");
2582 cge_print_matrix(B, m_i, n_i, ldb, order_type, "B");
2583 cprint_vector(x, n_i, incx, "x");
2584
2585 zprint_vector(y, m_i, incy, "y");
2586
2587 zprint_vector(head_r_true, m_i, 1, "head_r_true");
2588
2589 cge_print_matrix(a_use, m_i, n_i, lda, order_type,
2590 "A_use");
2591 cge_print_matrix(B_use, m_i, n_i, ldb, order_type,
2592 "B_use");
2593
2594 dprint_vector(ratios, m_i, 1, "ratios");
2595 printf("ratio = %g\n", ratio);
2596 fflush(stdout);
2597 }
2598 bad_ratio_count++;
2599 if (bad_ratio_count >= MAX_BAD_TESTS) {
2600 printf("\ntoo many failures, exiting....");
2601 printf("\nTesting and compilation");
2602 printf(" are incomplete\n\n");
2603 goto end;
2604 }
2605 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
2606 printf("\nFlagrant ratio error, exiting...");
2607 printf("\nTesting and compilation");
2608 printf(" are incomplete\n\n");
2609 goto end;
2610 }
2611 }
2612
2613 if (!(ratio <= ratio_max))
2614 ratio_max = ratio;
2615
2616 if (ratio != 0.0 && !(ratio >= ratio_min))
2617 ratio_min = ratio;
2618
2619 } /* end of incy loop */
2620
2621 } /* end of incx loop */
2622
2623 } /* end of randmize loop */
2624
2625 } /* end of ldb loop */
2626
2627 } /* end of lda loop */
2628
2629 } /* end of order loop */
2630
2631 } /* end of nr test loop */
2632
2633 } /* end of norm loop */
2634
2635
2636
2637 } /* end of beta loop */
2638
2639 } /* end of alpha loop */
2640
2641 FPU_FIX_STOP;
2642
2643 end:
2644 blas_free(y);
2645 blas_free(a);
2646 blas_free(a_use);
2647 blas_free(B);
2648 blas_free(B_use);
2649 blas_free(x);
2650 blas_free(head_r_true);
2651 blas_free(tail_r_true);
2652 blas_free(ratios);
2653 blas_free(a_vec);
2654 blas_free(x_vec);
2655
2656 *max_ratio = ratio_max;
2657 *min_ratio = ratio_min;
2658 *num_tests = test_count;
2659 *num_bad_ratio = bad_ratio_count;
2660
2661 }
do_test_cge_sum_mv_c_s(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)2662 void do_test_cge_sum_mv_c_s
2663 (int m, int n,
2664 int ntests, int *seed, double thresh, int debug, float test_prob,
2665 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
2666
2667 /* Function name */
2668 const char fname[] = "BLAS_cge_sum_mv_c_s";
2669
2670 int i;
2671 int yi;
2672 int incyi, y_starti, incx_veci;
2673 int test_count;
2674 int bad_ratio_count;
2675
2676 int ri;
2677 int incri;
2678 int inca, incx, incy;
2679
2680 double ratio;
2681
2682 double ratio_min, ratio_max;
2683
2684 double eps_int; /* internal machine epsilon */
2685 double un_int; /* internal underflow threshold */
2686
2687 float rin[2];
2688 float rout[2];
2689 double head_r_true_elem[2], tail_r_true_elem[2];
2690
2691 enum blas_order_type order_type;
2692 enum blas_prec_type prec;
2693
2694 int order_val;
2695 int lda_val, incx_val, incy_val;
2696 int ldb_val;
2697 int alpha_val, beta_val;
2698 int randomize_val;
2699
2700
2701
2702 int lda, ldb;
2703 int alpha_flag, beta_flag;
2704 int saved_seed;
2705 int norm;
2706 int test_no;
2707
2708 int n_i, m_i;
2709 int inca_veci;
2710
2711 float alpha[2];
2712 float beta[2];
2713 float beta_zero_fake[2];
2714 float alpha_use[2];
2715 float *a;
2716 float *a_use;
2717 float *B;
2718 float *B_use;
2719 float *x;
2720 float *y;
2721 float *a_vec;
2722 float *x_vec;
2723
2724
2725 double *ratios;
2726
2727 /* true result calculated by testgen, in double-double */
2728 double *head_r_true, *tail_r_true;
2729
2730
2731 FPU_FIX_DECL;
2732
2733 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
2734
2735 if (n < 0 || ntests < 0)
2736 BLAS_error(fname, -3, n, NULL);
2737
2738 /* initialization */
2739 saved_seed = *seed;
2740 ratio = 0.0;
2741 ratio_min = 1e308;
2742 ratio_max = 0.0;
2743
2744 *num_tests = 0;
2745 *num_bad_ratio = 0;
2746 *min_ratio = 0.0;
2747 *max_ratio = 0.0;
2748
2749 if (n == 0)
2750 return;
2751
2752 FPU_FIX_START;
2753
2754 n_i = n;
2755 m_i = m;
2756
2757 inca = incx = incy = 1;
2758 inca *= 2;
2759
2760 incy *= 2;
2761
2762 /* allocate memory for arrays */
2763 y = (float *) blas_malloc(4 * m_i * sizeof(float) * 2);
2764 if (4 * m_i > 0 && y == NULL) {
2765 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2766 }
2767 a = (float *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(float) * 2);
2768 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
2769 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2770 }
2771 a_use =
2772 (float *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(float) * 2);
2773 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
2774 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2775 }
2776 B = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float) * 2);
2777 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
2778 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2779 }
2780 B_use =
2781 (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float) * 2);
2782 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
2783 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2784 }
2785 x = (float *) blas_malloc(4 * n_i * sizeof(float));
2786 if (4 * n_i > 0 && x == NULL) {
2787 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2788 }
2789
2790 inca_veci = 1;
2791 inca_veci *= 2;
2792 a_vec = (float *) blas_malloc(2 * n_i * sizeof(float) * 2);
2793 if (2 * n_i > 0 && a_vec == NULL) {
2794 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2795 }
2796 x_vec = (float *) blas_malloc(2 * n_i * sizeof(float));
2797 if (2 * n_i > 0 && x_vec == NULL) {
2798 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2799 }
2800 head_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
2801 tail_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
2802 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
2803 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2804 }
2805 ratios = (double *) blas_malloc(m_i * sizeof(double));
2806 if (m_i > 0 && ratios == NULL) {
2807 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2808 }
2809
2810 test_count = 0;
2811 bad_ratio_count = 0;
2812
2813 /* vary alpha */
2814 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
2815
2816 alpha_flag = 0;
2817 switch (alpha_val) {
2818 case 0:
2819 alpha[0] = alpha[1] = 0.0;
2820 alpha_flag = 1;
2821 break;
2822 case 1:
2823 alpha[0] = 1.0;
2824 alpha[1] = 0.0;
2825 alpha_flag = 1;
2826 break;
2827 }
2828
2829 /* vary beta */
2830 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
2831 beta_flag = 0;
2832 switch (beta_val) {
2833 case 0:
2834 beta[0] = beta[1] = 0.0;
2835 beta_flag = 1;
2836 break;
2837 case 1:
2838 beta[0] = 1.0;
2839 beta[1] = 0.0;
2840 beta_flag = 1;
2841 break;
2842 }
2843
2844
2845 eps_int = power(2, -BITS_S);
2846 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
2847 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
2848 prec = blas_prec_single;
2849
2850 /* vary norm -- underflow, approx 1, overflow */
2851 for (norm = NORM_START; norm <= NORM_END; norm++) {
2852
2853 /* number of tests */
2854 for (test_no = 0; test_no < ntests; test_no++) {
2855
2856
2857 /* vary storage format */
2858 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
2859
2860 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
2861
2862 /* vary lda = n_i, n_i+1, 2*n_i */
2863 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
2864
2865 if (order_type == blas_rowmajor) {
2866 lda = (lda_val == 0) ? n_i :
2867 (lda_val == 1) ? n_i + 1 : n_i * n_i;
2868 } else {
2869 lda = (lda_val == 0) ? m_i :
2870 (lda_val == 1) ? m_i + 1 : m_i * m_i;
2871 }
2872
2873 /* vary ldb = n_i, n_i+1, 2*n_i */
2874 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
2875
2876 if (order_type == blas_rowmajor) {
2877 ldb = (ldb_val == 0) ? n_i :
2878 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
2879 } else {
2880 ldb = (ldb_val == 0) ? m_i :
2881 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
2882 }
2883
2884 for (randomize_val = RANDOMIZE_START;
2885 randomize_val <= RANDOMIZE_END; randomize_val++) {
2886
2887 /* For the sake of speed, we throw out this case at random */
2888 if (xrand(seed) >= test_prob)
2889 continue;
2890
2891 /* finally we are here to generate the test case */
2892 /* alpha_use, a_use, B_use are the generated alpha, a, B
2893 * before any scaling.
2894 * That is, in the generator, alpha == beta == alpha_use
2895 * before scaling. */
2896
2897 saved_seed = *seed;
2898 BLAS_cge_sum_mv_c_s_testgen(norm, order_type,
2899 m, n, randomize_val, &alpha,
2900 alpha_flag, &beta, beta_flag, a,
2901 lda, B, ldb, x_vec, 1,
2902 &alpha_use, a_use, B_use, seed,
2903 head_r_true, tail_r_true);
2904
2905 /* vary incx = 1, 2 */
2906 for (incx_val = INCX_START; incx_val <= INCX_END;
2907 incx_val++) {
2908
2909 incx = incx_val;
2910 if (0 == incx)
2911 continue;
2912
2913 scopy_vector(x_vec, n_i, 1, x, incx);
2914
2915 /* vary incy = 1, 2 */
2916 for (incy_val = INCY_START; incy_val <= INCY_END;
2917 incy_val++) {
2918
2919 incy = incy_val;
2920 if (0 == incy)
2921 continue;
2922
2923 test_count++;
2924
2925 /* call ge_sum_mv routines to be tested */
2926 FPU_FIX_STOP;
2927 BLAS_cge_sum_mv_c_s(order_type,
2928 m, n, alpha, a, lda, x, incx, beta,
2929 B, ldb, y, incy);
2930 FPU_FIX_START;
2931
2932 /* now compute the ratio using test_BLAS_xdot */
2933 /* copy a row from A, use x, run
2934 dot test */
2935
2936 incyi = incy;
2937
2938 incri = 1;
2939 incx_veci = 1;
2940
2941 incyi *= 2;
2942 incri *= 2;
2943 if (incy < 0) {
2944 y_starti = (-m_i + 1) * incyi;
2945 } else {
2946 y_starti = 0;
2947 }
2948 /* make two copies of x into x_vec. redundant */
2949 scopy_vector(x, n_i, incx, x_vec, 1);
2950 scopy_vector(x, n_i, incx, (x_vec + (n_i * incx_veci)),
2951 1);
2952 for (i = 0, yi = y_starti, ri = 0; i < m_i;
2953 i++, yi += incyi, ri += incri) {
2954 cge_copy_row(order_type, blas_no_trans, m_i, n_i,
2955 a_use, lda, a_vec, i);
2956 cge_copy_row(order_type, blas_no_trans, m_i, n_i,
2957 B_use, ldb, (a_vec + inca_veci * n_i),
2958 i);
2959
2960 rin[0] = rin[1] = 0.0;
2961 rout[0] = y[yi];
2962 rout[1] = y[yi + 1];
2963 head_r_true_elem[0] = head_r_true[ri];
2964 head_r_true_elem[1] = head_r_true[ri + 1];
2965 tail_r_true_elem[0] = tail_r_true[ri];
2966 tail_r_true_elem[1] = tail_r_true[ri + 1];
2967
2968 test_BLAS_cdot_c_s(2 * n_i,
2969 blas_no_conj,
2970 alpha_use, beta_zero_fake, rin,
2971 rout, head_r_true_elem,
2972 tail_r_true_elem, a_vec, 1, x_vec,
2973 1, eps_int, un_int, &ratios[i]);
2974
2975 /* take the max ratio */
2976 if (i == 0) {
2977 ratio = ratios[0];
2978 /* The !<= below causes NaN errors
2979 * to be included.
2980 * Note that (NaN > 0) is false */
2981 } else if (!(ratios[i] <= ratio)) {
2982 ratio = ratios[i];
2983 }
2984 } /* end of dot-test loop */
2985
2986 /* The !<= below causes NaN errors
2987 * to be included.
2988 * Note that (NaN > 0) is false */
2989 if (!(ratio <= thresh)) {
2990
2991 if (debug == 3) {
2992 printf("\n\t\tTest # %d\n", test_count);
2993 printf("y type : c, a type : c, x type : s\n");
2994 printf("Seed = %d\t", saved_seed);
2995 printf("n %d, m %d\n", n, m);
2996 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
2997 ldb, incx, incx);
2998
2999 if (order_type == blas_rowmajor)
3000 printf("row ");
3001 else
3002 printf("col ");
3003
3004 printf("NORM %d, ALPHA %d, BETA %d\n",
3005 norm, alpha_val, beta_val);
3006 printf("randomize %d\n", randomize_val);
3007
3008 /* print out info */
3009 printf("alpha = ");
3010 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);;
3011 printf(" ");
3012 printf("beta = ");
3013 printf("(%16.8e, %16.8e)", beta[0], beta[1]);;
3014 printf("\n");
3015 printf("alpha_use = ");
3016 printf("(%16.8e, %16.8e)", alpha_use[0],
3017 alpha_use[1]);;
3018 printf("\n");
3019
3020 cge_print_matrix(a, m_i, n_i, lda, order_type, "A");
3021 cge_print_matrix(B, m_i, n_i, ldb, order_type, "B");
3022 sprint_vector(x, n_i, incx, "x");
3023
3024 cprint_vector(y, m_i, incy, "y");
3025
3026 zprint_vector(head_r_true, m_i, 1, "head_r_true");
3027
3028 cge_print_matrix(a_use, m_i, n_i, lda, order_type,
3029 "A_use");
3030 cge_print_matrix(B_use, m_i, n_i, ldb, order_type,
3031 "B_use");
3032
3033 dprint_vector(ratios, m_i, 1, "ratios");
3034 printf("ratio = %g\n", ratio);
3035 fflush(stdout);
3036 }
3037 bad_ratio_count++;
3038 if (bad_ratio_count >= MAX_BAD_TESTS) {
3039 printf("\ntoo many failures, exiting....");
3040 printf("\nTesting and compilation");
3041 printf(" are incomplete\n\n");
3042 goto end;
3043 }
3044 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
3045 printf("\nFlagrant ratio error, exiting...");
3046 printf("\nTesting and compilation");
3047 printf(" are incomplete\n\n");
3048 goto end;
3049 }
3050 }
3051
3052 if (!(ratio <= ratio_max))
3053 ratio_max = ratio;
3054
3055 if (ratio != 0.0 && !(ratio >= ratio_min))
3056 ratio_min = ratio;
3057
3058 } /* end of incy loop */
3059
3060 } /* end of incx loop */
3061
3062 } /* end of randmize loop */
3063
3064 } /* end of ldb loop */
3065
3066 } /* end of lda loop */
3067
3068 } /* end of order loop */
3069
3070 } /* end of nr test loop */
3071
3072 } /* end of norm loop */
3073
3074
3075
3076 } /* end of beta loop */
3077
3078 } /* end of alpha loop */
3079
3080 FPU_FIX_STOP;
3081
3082 end:
3083 blas_free(y);
3084 blas_free(a);
3085 blas_free(a_use);
3086 blas_free(B);
3087 blas_free(B_use);
3088 blas_free(x);
3089 blas_free(head_r_true);
3090 blas_free(tail_r_true);
3091 blas_free(ratios);
3092 blas_free(a_vec);
3093 blas_free(x_vec);
3094
3095 *max_ratio = ratio_max;
3096 *min_ratio = ratio_min;
3097 *num_tests = test_count;
3098 *num_bad_ratio = bad_ratio_count;
3099
3100 }
do_test_cge_sum_mv_s_c(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)3101 void do_test_cge_sum_mv_s_c
3102 (int m, int n,
3103 int ntests, int *seed, double thresh, int debug, float test_prob,
3104 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
3105
3106 /* Function name */
3107 const char fname[] = "BLAS_cge_sum_mv_s_c";
3108
3109 int i;
3110 int yi;
3111 int incyi, y_starti, incx_veci;
3112 int test_count;
3113 int bad_ratio_count;
3114
3115 int ri;
3116 int incri;
3117 int inca, incx, incy;
3118
3119 double ratio;
3120
3121 double ratio_min, ratio_max;
3122
3123 double eps_int; /* internal machine epsilon */
3124 double un_int; /* internal underflow threshold */
3125
3126 float rin[2];
3127 float rout[2];
3128 double head_r_true_elem[2], tail_r_true_elem[2];
3129
3130 enum blas_order_type order_type;
3131 enum blas_prec_type prec;
3132
3133 int order_val;
3134 int lda_val, incx_val, incy_val;
3135 int ldb_val;
3136 int alpha_val, beta_val;
3137 int randomize_val;
3138
3139
3140
3141 int lda, ldb;
3142 int alpha_flag, beta_flag;
3143 int saved_seed;
3144 int norm;
3145 int test_no;
3146
3147 int n_i, m_i;
3148 int inca_veci;
3149
3150 float alpha[2];
3151 float beta[2];
3152 float beta_zero_fake[2];
3153 float alpha_use[2];
3154 float *a;
3155 float *a_use;
3156 float *B;
3157 float *B_use;
3158 float *x;
3159 float *y;
3160 float *a_vec;
3161 float *x_vec;
3162
3163
3164 double *ratios;
3165
3166 /* true result calculated by testgen, in double-double */
3167 double *head_r_true, *tail_r_true;
3168
3169
3170 FPU_FIX_DECL;
3171
3172 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
3173
3174 if (n < 0 || ntests < 0)
3175 BLAS_error(fname, -3, n, NULL);
3176
3177 /* initialization */
3178 saved_seed = *seed;
3179 ratio = 0.0;
3180 ratio_min = 1e308;
3181 ratio_max = 0.0;
3182
3183 *num_tests = 0;
3184 *num_bad_ratio = 0;
3185 *min_ratio = 0.0;
3186 *max_ratio = 0.0;
3187
3188 if (n == 0)
3189 return;
3190
3191 FPU_FIX_START;
3192
3193 n_i = n;
3194 m_i = m;
3195
3196 inca = incx = incy = 1;
3197
3198 incx *= 2;
3199 incy *= 2;
3200
3201 /* allocate memory for arrays */
3202 y = (float *) blas_malloc(4 * m_i * sizeof(float) * 2);
3203 if (4 * m_i > 0 && y == NULL) {
3204 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3205 }
3206 a = (float *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(float));
3207 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
3208 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3209 }
3210 a_use = (float *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(float));
3211 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
3212 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3213 }
3214 B = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float));
3215 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
3216 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3217 }
3218 B_use = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float));
3219 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
3220 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3221 }
3222 x = (float *) blas_malloc(4 * n_i * sizeof(float) * 2);
3223 if (4 * n_i > 0 && x == NULL) {
3224 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3225 }
3226
3227 inca_veci = 1;
3228
3229 a_vec = (float *) blas_malloc(2 * n_i * sizeof(float));
3230 if (2 * n_i > 0 && a_vec == NULL) {
3231 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3232 }
3233 x_vec = (float *) blas_malloc(2 * n_i * sizeof(float) * 2);
3234 if (2 * n_i > 0 && x_vec == NULL) {
3235 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3236 }
3237 head_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
3238 tail_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
3239 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
3240 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3241 }
3242 ratios = (double *) blas_malloc(m_i * sizeof(double));
3243 if (m_i > 0 && ratios == NULL) {
3244 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3245 }
3246
3247 test_count = 0;
3248 bad_ratio_count = 0;
3249
3250 /* vary alpha */
3251 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
3252
3253 alpha_flag = 0;
3254 switch (alpha_val) {
3255 case 0:
3256 alpha[0] = alpha[1] = 0.0;
3257 alpha_flag = 1;
3258 break;
3259 case 1:
3260 alpha[0] = 1.0;
3261 alpha[1] = 0.0;
3262 alpha_flag = 1;
3263 break;
3264 }
3265
3266 /* vary beta */
3267 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
3268 beta_flag = 0;
3269 switch (beta_val) {
3270 case 0:
3271 beta[0] = beta[1] = 0.0;
3272 beta_flag = 1;
3273 break;
3274 case 1:
3275 beta[0] = 1.0;
3276 beta[1] = 0.0;
3277 beta_flag = 1;
3278 break;
3279 }
3280
3281
3282 eps_int = power(2, -BITS_S);
3283 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
3284 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
3285 prec = blas_prec_single;
3286
3287 /* vary norm -- underflow, approx 1, overflow */
3288 for (norm = NORM_START; norm <= NORM_END; norm++) {
3289
3290 /* number of tests */
3291 for (test_no = 0; test_no < ntests; test_no++) {
3292
3293
3294 /* vary storage format */
3295 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
3296
3297 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
3298
3299 /* vary lda = n_i, n_i+1, 2*n_i */
3300 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
3301
3302 if (order_type == blas_rowmajor) {
3303 lda = (lda_val == 0) ? n_i :
3304 (lda_val == 1) ? n_i + 1 : n_i * n_i;
3305 } else {
3306 lda = (lda_val == 0) ? m_i :
3307 (lda_val == 1) ? m_i + 1 : m_i * m_i;
3308 }
3309
3310 /* vary ldb = n_i, n_i+1, 2*n_i */
3311 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
3312
3313 if (order_type == blas_rowmajor) {
3314 ldb = (ldb_val == 0) ? n_i :
3315 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
3316 } else {
3317 ldb = (ldb_val == 0) ? m_i :
3318 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
3319 }
3320
3321 for (randomize_val = RANDOMIZE_START;
3322 randomize_val <= RANDOMIZE_END; randomize_val++) {
3323
3324 /* For the sake of speed, we throw out this case at random */
3325 if (xrand(seed) >= test_prob)
3326 continue;
3327
3328 /* finally we are here to generate the test case */
3329 /* alpha_use, a_use, B_use are the generated alpha, a, B
3330 * before any scaling.
3331 * That is, in the generator, alpha == beta == alpha_use
3332 * before scaling. */
3333
3334 saved_seed = *seed;
3335 BLAS_cge_sum_mv_s_c_testgen(norm, order_type,
3336 m, n, randomize_val, &alpha,
3337 alpha_flag, &beta, beta_flag, a,
3338 lda, B, ldb, x_vec, 1,
3339 &alpha_use, a_use, B_use, seed,
3340 head_r_true, tail_r_true);
3341
3342 /* vary incx = 1, 2 */
3343 for (incx_val = INCX_START; incx_val <= INCX_END;
3344 incx_val++) {
3345
3346 incx = incx_val;
3347 if (0 == incx)
3348 continue;
3349
3350 ccopy_vector(x_vec, n_i, 1, x, incx);
3351
3352 /* vary incy = 1, 2 */
3353 for (incy_val = INCY_START; incy_val <= INCY_END;
3354 incy_val++) {
3355
3356 incy = incy_val;
3357 if (0 == incy)
3358 continue;
3359
3360 test_count++;
3361
3362 /* call ge_sum_mv routines to be tested */
3363 FPU_FIX_STOP;
3364 BLAS_cge_sum_mv_s_c(order_type,
3365 m, n, alpha, a, lda, x, incx, beta,
3366 B, ldb, y, incy);
3367 FPU_FIX_START;
3368
3369 /* now compute the ratio using test_BLAS_xdot */
3370 /* copy a row from A, use x, run
3371 dot test */
3372
3373 incyi = incy;
3374
3375 incri = 1;
3376 incx_veci = 1;
3377 incx_veci *= 2;
3378 incyi *= 2;
3379 incri *= 2;
3380 if (incy < 0) {
3381 y_starti = (-m_i + 1) * incyi;
3382 } else {
3383 y_starti = 0;
3384 }
3385 /* make two copies of x into x_vec. redundant */
3386 ccopy_vector(x, n_i, incx, x_vec, 1);
3387 ccopy_vector(x, n_i, incx, (x_vec + (n_i * incx_veci)),
3388 1);
3389 for (i = 0, yi = y_starti, ri = 0; i < m_i;
3390 i++, yi += incyi, ri += incri) {
3391 sge_copy_row(order_type, blas_no_trans, m_i, n_i,
3392 a_use, lda, a_vec, i);
3393 sge_copy_row(order_type, blas_no_trans, m_i, n_i,
3394 B_use, ldb, (a_vec + inca_veci * n_i),
3395 i);
3396
3397 rin[0] = rin[1] = 0.0;
3398 rout[0] = y[yi];
3399 rout[1] = y[yi + 1];
3400 head_r_true_elem[0] = head_r_true[ri];
3401 head_r_true_elem[1] = head_r_true[ri + 1];
3402 tail_r_true_elem[0] = tail_r_true[ri];
3403 tail_r_true_elem[1] = tail_r_true[ri + 1];
3404
3405 test_BLAS_cdot_s_c(2 * n_i,
3406 blas_no_conj,
3407 alpha_use, beta_zero_fake, rin,
3408 rout, head_r_true_elem,
3409 tail_r_true_elem, a_vec, 1, x_vec,
3410 1, eps_int, un_int, &ratios[i]);
3411
3412 /* take the max ratio */
3413 if (i == 0) {
3414 ratio = ratios[0];
3415 /* The !<= below causes NaN errors
3416 * to be included.
3417 * Note that (NaN > 0) is false */
3418 } else if (!(ratios[i] <= ratio)) {
3419 ratio = ratios[i];
3420 }
3421 } /* end of dot-test loop */
3422
3423 /* The !<= below causes NaN errors
3424 * to be included.
3425 * Note that (NaN > 0) is false */
3426 if (!(ratio <= thresh)) {
3427
3428 if (debug == 3) {
3429 printf("\n\t\tTest # %d\n", test_count);
3430 printf("y type : c, a type : s, x type : c\n");
3431 printf("Seed = %d\t", saved_seed);
3432 printf("n %d, m %d\n", n, m);
3433 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
3434 ldb, incx, incx);
3435
3436 if (order_type == blas_rowmajor)
3437 printf("row ");
3438 else
3439 printf("col ");
3440
3441 printf("NORM %d, ALPHA %d, BETA %d\n",
3442 norm, alpha_val, beta_val);
3443 printf("randomize %d\n", randomize_val);
3444
3445 /* print out info */
3446 printf("alpha = ");
3447 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);;
3448 printf(" ");
3449 printf("beta = ");
3450 printf("(%16.8e, %16.8e)", beta[0], beta[1]);;
3451 printf("\n");
3452 printf("alpha_use = ");
3453 printf("(%16.8e, %16.8e)", alpha_use[0],
3454 alpha_use[1]);;
3455 printf("\n");
3456
3457 sge_print_matrix(a, m_i, n_i, lda, order_type, "A");
3458 sge_print_matrix(B, m_i, n_i, ldb, order_type, "B");
3459 cprint_vector(x, n_i, incx, "x");
3460
3461 cprint_vector(y, m_i, incy, "y");
3462
3463 zprint_vector(head_r_true, m_i, 1, "head_r_true");
3464
3465 sge_print_matrix(a_use, m_i, n_i, lda, order_type,
3466 "A_use");
3467 sge_print_matrix(B_use, m_i, n_i, ldb, order_type,
3468 "B_use");
3469
3470 dprint_vector(ratios, m_i, 1, "ratios");
3471 printf("ratio = %g\n", ratio);
3472 fflush(stdout);
3473 }
3474 bad_ratio_count++;
3475 if (bad_ratio_count >= MAX_BAD_TESTS) {
3476 printf("\ntoo many failures, exiting....");
3477 printf("\nTesting and compilation");
3478 printf(" are incomplete\n\n");
3479 goto end;
3480 }
3481 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
3482 printf("\nFlagrant ratio error, exiting...");
3483 printf("\nTesting and compilation");
3484 printf(" are incomplete\n\n");
3485 goto end;
3486 }
3487 }
3488
3489 if (!(ratio <= ratio_max))
3490 ratio_max = ratio;
3491
3492 if (ratio != 0.0 && !(ratio >= ratio_min))
3493 ratio_min = ratio;
3494
3495 } /* end of incy loop */
3496
3497 } /* end of incx loop */
3498
3499 } /* end of randmize loop */
3500
3501 } /* end of ldb loop */
3502
3503 } /* end of lda loop */
3504
3505 } /* end of order loop */
3506
3507 } /* end of nr test loop */
3508
3509 } /* end of norm loop */
3510
3511
3512
3513 } /* end of beta loop */
3514
3515 } /* end of alpha loop */
3516
3517 FPU_FIX_STOP;
3518
3519 end:
3520 blas_free(y);
3521 blas_free(a);
3522 blas_free(a_use);
3523 blas_free(B);
3524 blas_free(B_use);
3525 blas_free(x);
3526 blas_free(head_r_true);
3527 blas_free(tail_r_true);
3528 blas_free(ratios);
3529 blas_free(a_vec);
3530 blas_free(x_vec);
3531
3532 *max_ratio = ratio_max;
3533 *min_ratio = ratio_min;
3534 *num_tests = test_count;
3535 *num_bad_ratio = bad_ratio_count;
3536
3537 }
do_test_cge_sum_mv_s_s(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)3538 void do_test_cge_sum_mv_s_s
3539 (int m, int n,
3540 int ntests, int *seed, double thresh, int debug, float test_prob,
3541 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
3542
3543 /* Function name */
3544 const char fname[] = "BLAS_cge_sum_mv_s_s";
3545
3546 int i;
3547 int yi;
3548 int incyi, y_starti, incx_veci;
3549 int test_count;
3550 int bad_ratio_count;
3551
3552 int ri;
3553 int incri;
3554 int inca, incx, incy;
3555
3556 double ratio;
3557
3558 double ratio_min, ratio_max;
3559
3560 double eps_int; /* internal machine epsilon */
3561 double un_int; /* internal underflow threshold */
3562
3563 float rin[2];
3564 float rout[2];
3565 double head_r_true_elem[2], tail_r_true_elem[2];
3566
3567 enum blas_order_type order_type;
3568 enum blas_prec_type prec;
3569
3570 int order_val;
3571 int lda_val, incx_val, incy_val;
3572 int ldb_val;
3573 int alpha_val, beta_val;
3574 int randomize_val;
3575
3576
3577
3578 int lda, ldb;
3579 int alpha_flag, beta_flag;
3580 int saved_seed;
3581 int norm;
3582 int test_no;
3583
3584 int n_i, m_i;
3585 int inca_veci;
3586
3587 float alpha[2];
3588 float beta[2];
3589 float beta_zero_fake[2];
3590 float alpha_use[2];
3591 float *a;
3592 float *a_use;
3593 float *B;
3594 float *B_use;
3595 float *x;
3596 float *y;
3597 float *a_vec;
3598 float *x_vec;
3599
3600
3601 double *ratios;
3602
3603 /* true result calculated by testgen, in double-double */
3604 double *head_r_true, *tail_r_true;
3605
3606
3607 FPU_FIX_DECL;
3608
3609 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
3610
3611 if (n < 0 || ntests < 0)
3612 BLAS_error(fname, -3, n, NULL);
3613
3614 /* initialization */
3615 saved_seed = *seed;
3616 ratio = 0.0;
3617 ratio_min = 1e308;
3618 ratio_max = 0.0;
3619
3620 *num_tests = 0;
3621 *num_bad_ratio = 0;
3622 *min_ratio = 0.0;
3623 *max_ratio = 0.0;
3624
3625 if (n == 0)
3626 return;
3627
3628 FPU_FIX_START;
3629
3630 n_i = n;
3631 m_i = m;
3632
3633 inca = incx = incy = 1;
3634
3635
3636 incy *= 2;
3637
3638 /* allocate memory for arrays */
3639 y = (float *) blas_malloc(4 * m_i * sizeof(float) * 2);
3640 if (4 * m_i > 0 && y == NULL) {
3641 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3642 }
3643 a = (float *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(float));
3644 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
3645 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3646 }
3647 a_use = (float *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(float));
3648 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
3649 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3650 }
3651 B = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float));
3652 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
3653 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3654 }
3655 B_use = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float));
3656 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
3657 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3658 }
3659 x = (float *) blas_malloc(4 * n_i * sizeof(float));
3660 if (4 * n_i > 0 && x == NULL) {
3661 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3662 }
3663
3664 inca_veci = 1;
3665
3666 a_vec = (float *) blas_malloc(2 * n_i * sizeof(float));
3667 if (2 * n_i > 0 && a_vec == NULL) {
3668 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3669 }
3670 x_vec = (float *) blas_malloc(2 * n_i * sizeof(float));
3671 if (2 * n_i > 0 && x_vec == NULL) {
3672 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3673 }
3674 head_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
3675 tail_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
3676 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
3677 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3678 }
3679 ratios = (double *) blas_malloc(m_i * sizeof(double));
3680 if (m_i > 0 && ratios == NULL) {
3681 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3682 }
3683
3684 test_count = 0;
3685 bad_ratio_count = 0;
3686
3687 /* vary alpha */
3688 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
3689
3690 alpha_flag = 0;
3691 switch (alpha_val) {
3692 case 0:
3693 alpha[0] = alpha[1] = 0.0;
3694 alpha_flag = 1;
3695 break;
3696 case 1:
3697 alpha[0] = 1.0;
3698 alpha[1] = 0.0;
3699 alpha_flag = 1;
3700 break;
3701 }
3702
3703 /* vary beta */
3704 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
3705 beta_flag = 0;
3706 switch (beta_val) {
3707 case 0:
3708 beta[0] = beta[1] = 0.0;
3709 beta_flag = 1;
3710 break;
3711 case 1:
3712 beta[0] = 1.0;
3713 beta[1] = 0.0;
3714 beta_flag = 1;
3715 break;
3716 }
3717
3718
3719 eps_int = power(2, -BITS_S);
3720 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
3721 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
3722 prec = blas_prec_single;
3723
3724 /* vary norm -- underflow, approx 1, overflow */
3725 for (norm = NORM_START; norm <= NORM_END; norm++) {
3726
3727 /* number of tests */
3728 for (test_no = 0; test_no < ntests; test_no++) {
3729
3730
3731 /* vary storage format */
3732 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
3733
3734 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
3735
3736 /* vary lda = n_i, n_i+1, 2*n_i */
3737 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
3738
3739 if (order_type == blas_rowmajor) {
3740 lda = (lda_val == 0) ? n_i :
3741 (lda_val == 1) ? n_i + 1 : n_i * n_i;
3742 } else {
3743 lda = (lda_val == 0) ? m_i :
3744 (lda_val == 1) ? m_i + 1 : m_i * m_i;
3745 }
3746
3747 /* vary ldb = n_i, n_i+1, 2*n_i */
3748 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
3749
3750 if (order_type == blas_rowmajor) {
3751 ldb = (ldb_val == 0) ? n_i :
3752 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
3753 } else {
3754 ldb = (ldb_val == 0) ? m_i :
3755 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
3756 }
3757
3758 for (randomize_val = RANDOMIZE_START;
3759 randomize_val <= RANDOMIZE_END; randomize_val++) {
3760
3761 /* For the sake of speed, we throw out this case at random */
3762 if (xrand(seed) >= test_prob)
3763 continue;
3764
3765 /* finally we are here to generate the test case */
3766 /* alpha_use, a_use, B_use are the generated alpha, a, B
3767 * before any scaling.
3768 * That is, in the generator, alpha == beta == alpha_use
3769 * before scaling. */
3770
3771 saved_seed = *seed;
3772 BLAS_cge_sum_mv_s_s_testgen(norm, order_type,
3773 m, n, randomize_val, &alpha,
3774 alpha_flag, &beta, beta_flag, a,
3775 lda, B, ldb, x_vec, 1,
3776 &alpha_use, a_use, B_use, seed,
3777 head_r_true, tail_r_true);
3778
3779 /* vary incx = 1, 2 */
3780 for (incx_val = INCX_START; incx_val <= INCX_END;
3781 incx_val++) {
3782
3783 incx = incx_val;
3784 if (0 == incx)
3785 continue;
3786
3787 scopy_vector(x_vec, n_i, 1, x, incx);
3788
3789 /* vary incy = 1, 2 */
3790 for (incy_val = INCY_START; incy_val <= INCY_END;
3791 incy_val++) {
3792
3793 incy = incy_val;
3794 if (0 == incy)
3795 continue;
3796
3797 test_count++;
3798
3799 /* call ge_sum_mv routines to be tested */
3800 FPU_FIX_STOP;
3801 BLAS_cge_sum_mv_s_s(order_type,
3802 m, n, alpha, a, lda, x, incx, beta,
3803 B, ldb, y, incy);
3804 FPU_FIX_START;
3805
3806 /* now compute the ratio using test_BLAS_xdot */
3807 /* copy a row from A, use x, run
3808 dot test */
3809
3810 incyi = incy;
3811
3812 incri = 1;
3813 incx_veci = 1;
3814
3815 incyi *= 2;
3816 incri *= 2;
3817 if (incy < 0) {
3818 y_starti = (-m_i + 1) * incyi;
3819 } else {
3820 y_starti = 0;
3821 }
3822 /* make two copies of x into x_vec. redundant */
3823 scopy_vector(x, n_i, incx, x_vec, 1);
3824 scopy_vector(x, n_i, incx, (x_vec + (n_i * incx_veci)),
3825 1);
3826 for (i = 0, yi = y_starti, ri = 0; i < m_i;
3827 i++, yi += incyi, ri += incri) {
3828 sge_copy_row(order_type, blas_no_trans, m_i, n_i,
3829 a_use, lda, a_vec, i);
3830 sge_copy_row(order_type, blas_no_trans, m_i, n_i,
3831 B_use, ldb, (a_vec + inca_veci * n_i),
3832 i);
3833
3834 rin[0] = rin[1] = 0.0;
3835 rout[0] = y[yi];
3836 rout[1] = y[yi + 1];
3837 head_r_true_elem[0] = head_r_true[ri];
3838 head_r_true_elem[1] = head_r_true[ri + 1];
3839 tail_r_true_elem[0] = tail_r_true[ri];
3840 tail_r_true_elem[1] = tail_r_true[ri + 1];
3841
3842 test_BLAS_cdot_s_s(2 * n_i,
3843 blas_no_conj,
3844 alpha_use, beta_zero_fake, rin,
3845 rout, head_r_true_elem,
3846 tail_r_true_elem, a_vec, 1, x_vec,
3847 1, eps_int, un_int, &ratios[i]);
3848
3849 /* take the max ratio */
3850 if (i == 0) {
3851 ratio = ratios[0];
3852 /* The !<= below causes NaN errors
3853 * to be included.
3854 * Note that (NaN > 0) is false */
3855 } else if (!(ratios[i] <= ratio)) {
3856 ratio = ratios[i];
3857 }
3858 } /* end of dot-test loop */
3859
3860 /* The !<= below causes NaN errors
3861 * to be included.
3862 * Note that (NaN > 0) is false */
3863 if (!(ratio <= thresh)) {
3864
3865 if (debug == 3) {
3866 printf("\n\t\tTest # %d\n", test_count);
3867 printf("y type : c, a type : s, x type : s\n");
3868 printf("Seed = %d\t", saved_seed);
3869 printf("n %d, m %d\n", n, m);
3870 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
3871 ldb, incx, incx);
3872
3873 if (order_type == blas_rowmajor)
3874 printf("row ");
3875 else
3876 printf("col ");
3877
3878 printf("NORM %d, ALPHA %d, BETA %d\n",
3879 norm, alpha_val, beta_val);
3880 printf("randomize %d\n", randomize_val);
3881
3882 /* print out info */
3883 printf("alpha = ");
3884 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);;
3885 printf(" ");
3886 printf("beta = ");
3887 printf("(%16.8e, %16.8e)", beta[0], beta[1]);;
3888 printf("\n");
3889 printf("alpha_use = ");
3890 printf("(%16.8e, %16.8e)", alpha_use[0],
3891 alpha_use[1]);;
3892 printf("\n");
3893
3894 sge_print_matrix(a, m_i, n_i, lda, order_type, "A");
3895 sge_print_matrix(B, m_i, n_i, ldb, order_type, "B");
3896 sprint_vector(x, n_i, incx, "x");
3897
3898 cprint_vector(y, m_i, incy, "y");
3899
3900 zprint_vector(head_r_true, m_i, 1, "head_r_true");
3901
3902 sge_print_matrix(a_use, m_i, n_i, lda, order_type,
3903 "A_use");
3904 sge_print_matrix(B_use, m_i, n_i, ldb, order_type,
3905 "B_use");
3906
3907 dprint_vector(ratios, m_i, 1, "ratios");
3908 printf("ratio = %g\n", ratio);
3909 fflush(stdout);
3910 }
3911 bad_ratio_count++;
3912 if (bad_ratio_count >= MAX_BAD_TESTS) {
3913 printf("\ntoo many failures, exiting....");
3914 printf("\nTesting and compilation");
3915 printf(" are incomplete\n\n");
3916 goto end;
3917 }
3918 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
3919 printf("\nFlagrant ratio error, exiting...");
3920 printf("\nTesting and compilation");
3921 printf(" are incomplete\n\n");
3922 goto end;
3923 }
3924 }
3925
3926 if (!(ratio <= ratio_max))
3927 ratio_max = ratio;
3928
3929 if (ratio != 0.0 && !(ratio >= ratio_min))
3930 ratio_min = ratio;
3931
3932 } /* end of incy loop */
3933
3934 } /* end of incx loop */
3935
3936 } /* end of randmize loop */
3937
3938 } /* end of ldb loop */
3939
3940 } /* end of lda loop */
3941
3942 } /* end of order loop */
3943
3944 } /* end of nr test loop */
3945
3946 } /* end of norm loop */
3947
3948
3949
3950 } /* end of beta loop */
3951
3952 } /* end of alpha loop */
3953
3954 FPU_FIX_STOP;
3955
3956 end:
3957 blas_free(y);
3958 blas_free(a);
3959 blas_free(a_use);
3960 blas_free(B);
3961 blas_free(B_use);
3962 blas_free(x);
3963 blas_free(head_r_true);
3964 blas_free(tail_r_true);
3965 blas_free(ratios);
3966 blas_free(a_vec);
3967 blas_free(x_vec);
3968
3969 *max_ratio = ratio_max;
3970 *min_ratio = ratio_min;
3971 *num_tests = test_count;
3972 *num_bad_ratio = bad_ratio_count;
3973
3974 }
do_test_zge_sum_mv_z_d(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)3975 void do_test_zge_sum_mv_z_d
3976 (int m, int n,
3977 int ntests, int *seed, double thresh, int debug, float test_prob,
3978 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
3979
3980 /* Function name */
3981 const char fname[] = "BLAS_zge_sum_mv_z_d";
3982
3983 int i;
3984 int yi;
3985 int incyi, y_starti, incx_veci;
3986 int test_count;
3987 int bad_ratio_count;
3988
3989 int ri;
3990 int incri;
3991 int inca, incx, incy;
3992
3993 double ratio;
3994
3995 double ratio_min, ratio_max;
3996
3997 double eps_int; /* internal machine epsilon */
3998 double un_int; /* internal underflow threshold */
3999
4000 double rin[2];
4001 double rout[2];
4002 double head_r_true_elem[2], tail_r_true_elem[2];
4003
4004 enum blas_order_type order_type;
4005 enum blas_prec_type prec;
4006
4007 int order_val;
4008 int lda_val, incx_val, incy_val;
4009 int ldb_val;
4010 int alpha_val, beta_val;
4011 int randomize_val;
4012
4013
4014
4015 int lda, ldb;
4016 int alpha_flag, beta_flag;
4017 int saved_seed;
4018 int norm;
4019 int test_no;
4020
4021 int n_i, m_i;
4022 int inca_veci;
4023
4024 double alpha[2];
4025 double beta[2];
4026 double beta_zero_fake[2];
4027 double alpha_use[2];
4028 double *a;
4029 double *a_use;
4030 double *B;
4031 double *B_use;
4032 double *x;
4033 double *y;
4034 double *a_vec;
4035 double *x_vec;
4036
4037
4038 double *ratios;
4039
4040 /* true result calculated by testgen, in double-double */
4041 double *head_r_true, *tail_r_true;
4042
4043
4044 FPU_FIX_DECL;
4045
4046 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
4047
4048 if (n < 0 || ntests < 0)
4049 BLAS_error(fname, -3, n, NULL);
4050
4051 /* initialization */
4052 saved_seed = *seed;
4053 ratio = 0.0;
4054 ratio_min = 1e308;
4055 ratio_max = 0.0;
4056
4057 *num_tests = 0;
4058 *num_bad_ratio = 0;
4059 *min_ratio = 0.0;
4060 *max_ratio = 0.0;
4061
4062 if (n == 0)
4063 return;
4064
4065 FPU_FIX_START;
4066
4067 n_i = n;
4068 m_i = m;
4069
4070 inca = incx = incy = 1;
4071 inca *= 2;
4072
4073 incy *= 2;
4074
4075 /* allocate memory for arrays */
4076 y = (double *) blas_malloc(4 * m_i * sizeof(double) * 2);
4077 if (4 * m_i > 0 && y == NULL) {
4078 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4079 }
4080 a = (double *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(double) * 2);
4081 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
4082 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4083 }
4084 a_use =
4085 (double *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(double) * 2);
4086 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
4087 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4088 }
4089 B = (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double) * 2);
4090 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
4091 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4092 }
4093 B_use =
4094 (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double) * 2);
4095 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
4096 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4097 }
4098 x = (double *) blas_malloc(4 * n_i * sizeof(double));
4099 if (4 * n_i > 0 && x == NULL) {
4100 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4101 }
4102
4103 inca_veci = 1;
4104 inca_veci *= 2;
4105 a_vec = (double *) blas_malloc(2 * n_i * sizeof(double) * 2);
4106 if (2 * n_i > 0 && a_vec == NULL) {
4107 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4108 }
4109 x_vec = (double *) blas_malloc(2 * n_i * sizeof(double));
4110 if (2 * n_i > 0 && x_vec == NULL) {
4111 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4112 }
4113 head_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
4114 tail_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
4115 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
4116 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4117 }
4118 ratios = (double *) blas_malloc(m_i * sizeof(double));
4119 if (m_i > 0 && ratios == NULL) {
4120 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4121 }
4122
4123 test_count = 0;
4124 bad_ratio_count = 0;
4125
4126 /* vary alpha */
4127 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
4128
4129 alpha_flag = 0;
4130 switch (alpha_val) {
4131 case 0:
4132 alpha[0] = alpha[1] = 0.0;
4133 alpha_flag = 1;
4134 break;
4135 case 1:
4136 alpha[0] = 1.0;
4137 alpha[1] = 0.0;
4138 alpha_flag = 1;
4139 break;
4140 }
4141
4142 /* vary beta */
4143 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
4144 beta_flag = 0;
4145 switch (beta_val) {
4146 case 0:
4147 beta[0] = beta[1] = 0.0;
4148 beta_flag = 1;
4149 break;
4150 case 1:
4151 beta[0] = 1.0;
4152 beta[1] = 0.0;
4153 beta_flag = 1;
4154 break;
4155 }
4156
4157
4158 eps_int = power(2, -BITS_D);
4159 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
4160 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
4161 prec = blas_prec_double;
4162
4163 /* vary norm -- underflow, approx 1, overflow */
4164 for (norm = NORM_START; norm <= NORM_END; norm++) {
4165
4166 /* number of tests */
4167 for (test_no = 0; test_no < ntests; test_no++) {
4168
4169
4170 /* vary storage format */
4171 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
4172
4173 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
4174
4175 /* vary lda = n_i, n_i+1, 2*n_i */
4176 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
4177
4178 if (order_type == blas_rowmajor) {
4179 lda = (lda_val == 0) ? n_i :
4180 (lda_val == 1) ? n_i + 1 : n_i * n_i;
4181 } else {
4182 lda = (lda_val == 0) ? m_i :
4183 (lda_val == 1) ? m_i + 1 : m_i * m_i;
4184 }
4185
4186 /* vary ldb = n_i, n_i+1, 2*n_i */
4187 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
4188
4189 if (order_type == blas_rowmajor) {
4190 ldb = (ldb_val == 0) ? n_i :
4191 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
4192 } else {
4193 ldb = (ldb_val == 0) ? m_i :
4194 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
4195 }
4196
4197 for (randomize_val = RANDOMIZE_START;
4198 randomize_val <= RANDOMIZE_END; randomize_val++) {
4199
4200 /* For the sake of speed, we throw out this case at random */
4201 if (xrand(seed) >= test_prob)
4202 continue;
4203
4204 /* finally we are here to generate the test case */
4205 /* alpha_use, a_use, B_use are the generated alpha, a, B
4206 * before any scaling.
4207 * That is, in the generator, alpha == beta == alpha_use
4208 * before scaling. */
4209
4210 saved_seed = *seed;
4211 BLAS_zge_sum_mv_z_d_testgen(norm, order_type,
4212 m, n, randomize_val, &alpha,
4213 alpha_flag, &beta, beta_flag, a,
4214 lda, B, ldb, x_vec, 1,
4215 &alpha_use, a_use, B_use, seed,
4216 head_r_true, tail_r_true);
4217
4218 /* vary incx = 1, 2 */
4219 for (incx_val = INCX_START; incx_val <= INCX_END;
4220 incx_val++) {
4221
4222 incx = incx_val;
4223 if (0 == incx)
4224 continue;
4225
4226 dcopy_vector(x_vec, n_i, 1, x, incx);
4227
4228 /* vary incy = 1, 2 */
4229 for (incy_val = INCY_START; incy_val <= INCY_END;
4230 incy_val++) {
4231
4232 incy = incy_val;
4233 if (0 == incy)
4234 continue;
4235
4236 test_count++;
4237
4238 /* call ge_sum_mv routines to be tested */
4239 FPU_FIX_STOP;
4240 BLAS_zge_sum_mv_z_d(order_type,
4241 m, n, alpha, a, lda, x, incx, beta,
4242 B, ldb, y, incy);
4243 FPU_FIX_START;
4244
4245 /* now compute the ratio using test_BLAS_xdot */
4246 /* copy a row from A, use x, run
4247 dot test */
4248
4249 incyi = incy;
4250
4251 incri = 1;
4252 incx_veci = 1;
4253
4254 incyi *= 2;
4255 incri *= 2;
4256 if (incy < 0) {
4257 y_starti = (-m_i + 1) * incyi;
4258 } else {
4259 y_starti = 0;
4260 }
4261 /* make two copies of x into x_vec. redundant */
4262 dcopy_vector(x, n_i, incx, x_vec, 1);
4263 dcopy_vector(x, n_i, incx, (x_vec + (n_i * incx_veci)),
4264 1);
4265 for (i = 0, yi = y_starti, ri = 0; i < m_i;
4266 i++, yi += incyi, ri += incri) {
4267 zge_copy_row(order_type, blas_no_trans, m_i, n_i,
4268 a_use, lda, a_vec, i);
4269 zge_copy_row(order_type, blas_no_trans, m_i, n_i,
4270 B_use, ldb, (a_vec + inca_veci * n_i),
4271 i);
4272
4273 rin[0] = rin[1] = 0.0;
4274 rout[0] = y[yi];
4275 rout[1] = y[yi + 1];
4276 head_r_true_elem[0] = head_r_true[ri];
4277 head_r_true_elem[1] = head_r_true[ri + 1];
4278 tail_r_true_elem[0] = tail_r_true[ri];
4279 tail_r_true_elem[1] = tail_r_true[ri + 1];
4280
4281 test_BLAS_zdot_z_d(2 * n_i,
4282 blas_no_conj,
4283 alpha_use, beta_zero_fake, rin,
4284 rout, head_r_true_elem,
4285 tail_r_true_elem, a_vec, 1, x_vec,
4286 1, eps_int, un_int, &ratios[i]);
4287
4288 /* take the max ratio */
4289 if (i == 0) {
4290 ratio = ratios[0];
4291 /* The !<= below causes NaN errors
4292 * to be included.
4293 * Note that (NaN > 0) is false */
4294 } else if (!(ratios[i] <= ratio)) {
4295 ratio = ratios[i];
4296 }
4297 } /* end of dot-test loop */
4298
4299 /* The !<= below causes NaN errors
4300 * to be included.
4301 * Note that (NaN > 0) is false */
4302 if (!(ratio <= thresh)) {
4303
4304 if (debug == 3) {
4305 printf("\n\t\tTest # %d\n", test_count);
4306 printf("y type : z, a type : z, x type : d\n");
4307 printf("Seed = %d\t", saved_seed);
4308 printf("n %d, m %d\n", n, m);
4309 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
4310 ldb, incx, incx);
4311
4312 if (order_type == blas_rowmajor)
4313 printf("row ");
4314 else
4315 printf("col ");
4316
4317 printf("NORM %d, ALPHA %d, BETA %d\n",
4318 norm, alpha_val, beta_val);
4319 printf("randomize %d\n", randomize_val);
4320
4321 /* print out info */
4322 printf("alpha = ");
4323 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);;
4324 printf(" ");
4325 printf("beta = ");
4326 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
4327 printf("\n");
4328 printf("alpha_use = ");
4329 printf("(%24.16e, %24.16e)", alpha_use[0],
4330 alpha_use[1]);;
4331 printf("\n");
4332
4333 zge_print_matrix(a, m_i, n_i, lda, order_type, "A");
4334 zge_print_matrix(B, m_i, n_i, ldb, order_type, "B");
4335 dprint_vector(x, n_i, incx, "x");
4336
4337 zprint_vector(y, m_i, incy, "y");
4338
4339 zprint_vector(head_r_true, m_i, 1, "head_r_true");
4340
4341 zge_print_matrix(a_use, m_i, n_i, lda, order_type,
4342 "A_use");
4343 zge_print_matrix(B_use, m_i, n_i, ldb, order_type,
4344 "B_use");
4345
4346 dprint_vector(ratios, m_i, 1, "ratios");
4347 printf("ratio = %g\n", ratio);
4348 fflush(stdout);
4349 }
4350 bad_ratio_count++;
4351 if (bad_ratio_count >= MAX_BAD_TESTS) {
4352 printf("\ntoo many failures, exiting....");
4353 printf("\nTesting and compilation");
4354 printf(" are incomplete\n\n");
4355 goto end;
4356 }
4357 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
4358 printf("\nFlagrant ratio error, exiting...");
4359 printf("\nTesting and compilation");
4360 printf(" are incomplete\n\n");
4361 goto end;
4362 }
4363 }
4364
4365 if (!(ratio <= ratio_max))
4366 ratio_max = ratio;
4367
4368 if (ratio != 0.0 && !(ratio >= ratio_min))
4369 ratio_min = ratio;
4370
4371 } /* end of incy loop */
4372
4373 } /* end of incx loop */
4374
4375 } /* end of randmize loop */
4376
4377 } /* end of ldb loop */
4378
4379 } /* end of lda loop */
4380
4381 } /* end of order loop */
4382
4383 } /* end of nr test loop */
4384
4385 } /* end of norm loop */
4386
4387
4388
4389 } /* end of beta loop */
4390
4391 } /* end of alpha loop */
4392
4393 FPU_FIX_STOP;
4394
4395 end:
4396 blas_free(y);
4397 blas_free(a);
4398 blas_free(a_use);
4399 blas_free(B);
4400 blas_free(B_use);
4401 blas_free(x);
4402 blas_free(head_r_true);
4403 blas_free(tail_r_true);
4404 blas_free(ratios);
4405 blas_free(a_vec);
4406 blas_free(x_vec);
4407
4408 *max_ratio = ratio_max;
4409 *min_ratio = ratio_min;
4410 *num_tests = test_count;
4411 *num_bad_ratio = bad_ratio_count;
4412
4413 }
do_test_zge_sum_mv_d_z(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)4414 void do_test_zge_sum_mv_d_z
4415 (int m, int n,
4416 int ntests, int *seed, double thresh, int debug, float test_prob,
4417 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
4418
4419 /* Function name */
4420 const char fname[] = "BLAS_zge_sum_mv_d_z";
4421
4422 int i;
4423 int yi;
4424 int incyi, y_starti, incx_veci;
4425 int test_count;
4426 int bad_ratio_count;
4427
4428 int ri;
4429 int incri;
4430 int inca, incx, incy;
4431
4432 double ratio;
4433
4434 double ratio_min, ratio_max;
4435
4436 double eps_int; /* internal machine epsilon */
4437 double un_int; /* internal underflow threshold */
4438
4439 double rin[2];
4440 double rout[2];
4441 double head_r_true_elem[2], tail_r_true_elem[2];
4442
4443 enum blas_order_type order_type;
4444 enum blas_prec_type prec;
4445
4446 int order_val;
4447 int lda_val, incx_val, incy_val;
4448 int ldb_val;
4449 int alpha_val, beta_val;
4450 int randomize_val;
4451
4452
4453
4454 int lda, ldb;
4455 int alpha_flag, beta_flag;
4456 int saved_seed;
4457 int norm;
4458 int test_no;
4459
4460 int n_i, m_i;
4461 int inca_veci;
4462
4463 double alpha[2];
4464 double beta[2];
4465 double beta_zero_fake[2];
4466 double alpha_use[2];
4467 double *a;
4468 double *a_use;
4469 double *B;
4470 double *B_use;
4471 double *x;
4472 double *y;
4473 double *a_vec;
4474 double *x_vec;
4475
4476
4477 double *ratios;
4478
4479 /* true result calculated by testgen, in double-double */
4480 double *head_r_true, *tail_r_true;
4481
4482
4483 FPU_FIX_DECL;
4484
4485 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
4486
4487 if (n < 0 || ntests < 0)
4488 BLAS_error(fname, -3, n, NULL);
4489
4490 /* initialization */
4491 saved_seed = *seed;
4492 ratio = 0.0;
4493 ratio_min = 1e308;
4494 ratio_max = 0.0;
4495
4496 *num_tests = 0;
4497 *num_bad_ratio = 0;
4498 *min_ratio = 0.0;
4499 *max_ratio = 0.0;
4500
4501 if (n == 0)
4502 return;
4503
4504 FPU_FIX_START;
4505
4506 n_i = n;
4507 m_i = m;
4508
4509 inca = incx = incy = 1;
4510
4511 incx *= 2;
4512 incy *= 2;
4513
4514 /* allocate memory for arrays */
4515 y = (double *) blas_malloc(4 * m_i * sizeof(double) * 2);
4516 if (4 * m_i > 0 && y == NULL) {
4517 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4518 }
4519 a = (double *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(double));
4520 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
4521 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4522 }
4523 a_use = (double *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(double));
4524 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
4525 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4526 }
4527 B = (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double));
4528 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
4529 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4530 }
4531 B_use = (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double));
4532 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
4533 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4534 }
4535 x = (double *) blas_malloc(4 * n_i * sizeof(double) * 2);
4536 if (4 * n_i > 0 && x == NULL) {
4537 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4538 }
4539
4540 inca_veci = 1;
4541
4542 a_vec = (double *) blas_malloc(2 * n_i * sizeof(double));
4543 if (2 * n_i > 0 && a_vec == NULL) {
4544 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4545 }
4546 x_vec = (double *) blas_malloc(2 * n_i * sizeof(double) * 2);
4547 if (2 * n_i > 0 && x_vec == NULL) {
4548 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4549 }
4550 head_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
4551 tail_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
4552 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
4553 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4554 }
4555 ratios = (double *) blas_malloc(m_i * sizeof(double));
4556 if (m_i > 0 && ratios == NULL) {
4557 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4558 }
4559
4560 test_count = 0;
4561 bad_ratio_count = 0;
4562
4563 /* vary alpha */
4564 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
4565
4566 alpha_flag = 0;
4567 switch (alpha_val) {
4568 case 0:
4569 alpha[0] = alpha[1] = 0.0;
4570 alpha_flag = 1;
4571 break;
4572 case 1:
4573 alpha[0] = 1.0;
4574 alpha[1] = 0.0;
4575 alpha_flag = 1;
4576 break;
4577 }
4578
4579 /* vary beta */
4580 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
4581 beta_flag = 0;
4582 switch (beta_val) {
4583 case 0:
4584 beta[0] = beta[1] = 0.0;
4585 beta_flag = 1;
4586 break;
4587 case 1:
4588 beta[0] = 1.0;
4589 beta[1] = 0.0;
4590 beta_flag = 1;
4591 break;
4592 }
4593
4594
4595 eps_int = power(2, -BITS_D);
4596 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
4597 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
4598 prec = blas_prec_double;
4599
4600 /* vary norm -- underflow, approx 1, overflow */
4601 for (norm = NORM_START; norm <= NORM_END; norm++) {
4602
4603 /* number of tests */
4604 for (test_no = 0; test_no < ntests; test_no++) {
4605
4606
4607 /* vary storage format */
4608 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
4609
4610 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
4611
4612 /* vary lda = n_i, n_i+1, 2*n_i */
4613 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
4614
4615 if (order_type == blas_rowmajor) {
4616 lda = (lda_val == 0) ? n_i :
4617 (lda_val == 1) ? n_i + 1 : n_i * n_i;
4618 } else {
4619 lda = (lda_val == 0) ? m_i :
4620 (lda_val == 1) ? m_i + 1 : m_i * m_i;
4621 }
4622
4623 /* vary ldb = n_i, n_i+1, 2*n_i */
4624 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
4625
4626 if (order_type == blas_rowmajor) {
4627 ldb = (ldb_val == 0) ? n_i :
4628 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
4629 } else {
4630 ldb = (ldb_val == 0) ? m_i :
4631 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
4632 }
4633
4634 for (randomize_val = RANDOMIZE_START;
4635 randomize_val <= RANDOMIZE_END; randomize_val++) {
4636
4637 /* For the sake of speed, we throw out this case at random */
4638 if (xrand(seed) >= test_prob)
4639 continue;
4640
4641 /* finally we are here to generate the test case */
4642 /* alpha_use, a_use, B_use are the generated alpha, a, B
4643 * before any scaling.
4644 * That is, in the generator, alpha == beta == alpha_use
4645 * before scaling. */
4646
4647 saved_seed = *seed;
4648 BLAS_zge_sum_mv_d_z_testgen(norm, order_type,
4649 m, n, randomize_val, &alpha,
4650 alpha_flag, &beta, beta_flag, a,
4651 lda, B, ldb, x_vec, 1,
4652 &alpha_use, a_use, B_use, seed,
4653 head_r_true, tail_r_true);
4654
4655 /* vary incx = 1, 2 */
4656 for (incx_val = INCX_START; incx_val <= INCX_END;
4657 incx_val++) {
4658
4659 incx = incx_val;
4660 if (0 == incx)
4661 continue;
4662
4663 zcopy_vector(x_vec, n_i, 1, x, incx);
4664
4665 /* vary incy = 1, 2 */
4666 for (incy_val = INCY_START; incy_val <= INCY_END;
4667 incy_val++) {
4668
4669 incy = incy_val;
4670 if (0 == incy)
4671 continue;
4672
4673 test_count++;
4674
4675 /* call ge_sum_mv routines to be tested */
4676 FPU_FIX_STOP;
4677 BLAS_zge_sum_mv_d_z(order_type,
4678 m, n, alpha, a, lda, x, incx, beta,
4679 B, ldb, y, incy);
4680 FPU_FIX_START;
4681
4682 /* now compute the ratio using test_BLAS_xdot */
4683 /* copy a row from A, use x, run
4684 dot test */
4685
4686 incyi = incy;
4687
4688 incri = 1;
4689 incx_veci = 1;
4690 incx_veci *= 2;
4691 incyi *= 2;
4692 incri *= 2;
4693 if (incy < 0) {
4694 y_starti = (-m_i + 1) * incyi;
4695 } else {
4696 y_starti = 0;
4697 }
4698 /* make two copies of x into x_vec. redundant */
4699 zcopy_vector(x, n_i, incx, x_vec, 1);
4700 zcopy_vector(x, n_i, incx, (x_vec + (n_i * incx_veci)),
4701 1);
4702 for (i = 0, yi = y_starti, ri = 0; i < m_i;
4703 i++, yi += incyi, ri += incri) {
4704 dge_copy_row(order_type, blas_no_trans, m_i, n_i,
4705 a_use, lda, a_vec, i);
4706 dge_copy_row(order_type, blas_no_trans, m_i, n_i,
4707 B_use, ldb, (a_vec + inca_veci * n_i),
4708 i);
4709
4710 rin[0] = rin[1] = 0.0;
4711 rout[0] = y[yi];
4712 rout[1] = y[yi + 1];
4713 head_r_true_elem[0] = head_r_true[ri];
4714 head_r_true_elem[1] = head_r_true[ri + 1];
4715 tail_r_true_elem[0] = tail_r_true[ri];
4716 tail_r_true_elem[1] = tail_r_true[ri + 1];
4717
4718 test_BLAS_zdot_d_z(2 * n_i,
4719 blas_no_conj,
4720 alpha_use, beta_zero_fake, rin,
4721 rout, head_r_true_elem,
4722 tail_r_true_elem, a_vec, 1, x_vec,
4723 1, eps_int, un_int, &ratios[i]);
4724
4725 /* take the max ratio */
4726 if (i == 0) {
4727 ratio = ratios[0];
4728 /* The !<= below causes NaN errors
4729 * to be included.
4730 * Note that (NaN > 0) is false */
4731 } else if (!(ratios[i] <= ratio)) {
4732 ratio = ratios[i];
4733 }
4734 } /* end of dot-test loop */
4735
4736 /* The !<= below causes NaN errors
4737 * to be included.
4738 * Note that (NaN > 0) is false */
4739 if (!(ratio <= thresh)) {
4740
4741 if (debug == 3) {
4742 printf("\n\t\tTest # %d\n", test_count);
4743 printf("y type : z, a type : d, x type : z\n");
4744 printf("Seed = %d\t", saved_seed);
4745 printf("n %d, m %d\n", n, m);
4746 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
4747 ldb, incx, incx);
4748
4749 if (order_type == blas_rowmajor)
4750 printf("row ");
4751 else
4752 printf("col ");
4753
4754 printf("NORM %d, ALPHA %d, BETA %d\n",
4755 norm, alpha_val, beta_val);
4756 printf("randomize %d\n", randomize_val);
4757
4758 /* print out info */
4759 printf("alpha = ");
4760 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);;
4761 printf(" ");
4762 printf("beta = ");
4763 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
4764 printf("\n");
4765 printf("alpha_use = ");
4766 printf("(%24.16e, %24.16e)", alpha_use[0],
4767 alpha_use[1]);;
4768 printf("\n");
4769
4770 dge_print_matrix(a, m_i, n_i, lda, order_type, "A");
4771 dge_print_matrix(B, m_i, n_i, ldb, order_type, "B");
4772 zprint_vector(x, n_i, incx, "x");
4773
4774 zprint_vector(y, m_i, incy, "y");
4775
4776 zprint_vector(head_r_true, m_i, 1, "head_r_true");
4777
4778 dge_print_matrix(a_use, m_i, n_i, lda, order_type,
4779 "A_use");
4780 dge_print_matrix(B_use, m_i, n_i, ldb, order_type,
4781 "B_use");
4782
4783 dprint_vector(ratios, m_i, 1, "ratios");
4784 printf("ratio = %g\n", ratio);
4785 fflush(stdout);
4786 }
4787 bad_ratio_count++;
4788 if (bad_ratio_count >= MAX_BAD_TESTS) {
4789 printf("\ntoo many failures, exiting....");
4790 printf("\nTesting and compilation");
4791 printf(" are incomplete\n\n");
4792 goto end;
4793 }
4794 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
4795 printf("\nFlagrant ratio error, exiting...");
4796 printf("\nTesting and compilation");
4797 printf(" are incomplete\n\n");
4798 goto end;
4799 }
4800 }
4801
4802 if (!(ratio <= ratio_max))
4803 ratio_max = ratio;
4804
4805 if (ratio != 0.0 && !(ratio >= ratio_min))
4806 ratio_min = ratio;
4807
4808 } /* end of incy loop */
4809
4810 } /* end of incx loop */
4811
4812 } /* end of randmize loop */
4813
4814 } /* end of ldb loop */
4815
4816 } /* end of lda loop */
4817
4818 } /* end of order loop */
4819
4820 } /* end of nr test loop */
4821
4822 } /* end of norm loop */
4823
4824
4825
4826 } /* end of beta loop */
4827
4828 } /* end of alpha loop */
4829
4830 FPU_FIX_STOP;
4831
4832 end:
4833 blas_free(y);
4834 blas_free(a);
4835 blas_free(a_use);
4836 blas_free(B);
4837 blas_free(B_use);
4838 blas_free(x);
4839 blas_free(head_r_true);
4840 blas_free(tail_r_true);
4841 blas_free(ratios);
4842 blas_free(a_vec);
4843 blas_free(x_vec);
4844
4845 *max_ratio = ratio_max;
4846 *min_ratio = ratio_min;
4847 *num_tests = test_count;
4848 *num_bad_ratio = bad_ratio_count;
4849
4850 }
do_test_zge_sum_mv_d_d(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)4851 void do_test_zge_sum_mv_d_d
4852 (int m, int n,
4853 int ntests, int *seed, double thresh, int debug, float test_prob,
4854 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
4855
4856 /* Function name */
4857 const char fname[] = "BLAS_zge_sum_mv_d_d";
4858
4859 int i;
4860 int yi;
4861 int incyi, y_starti, incx_veci;
4862 int test_count;
4863 int bad_ratio_count;
4864
4865 int ri;
4866 int incri;
4867 int inca, incx, incy;
4868
4869 double ratio;
4870
4871 double ratio_min, ratio_max;
4872
4873 double eps_int; /* internal machine epsilon */
4874 double un_int; /* internal underflow threshold */
4875
4876 double rin[2];
4877 double rout[2];
4878 double head_r_true_elem[2], tail_r_true_elem[2];
4879
4880 enum blas_order_type order_type;
4881 enum blas_prec_type prec;
4882
4883 int order_val;
4884 int lda_val, incx_val, incy_val;
4885 int ldb_val;
4886 int alpha_val, beta_val;
4887 int randomize_val;
4888
4889
4890
4891 int lda, ldb;
4892 int alpha_flag, beta_flag;
4893 int saved_seed;
4894 int norm;
4895 int test_no;
4896
4897 int n_i, m_i;
4898 int inca_veci;
4899
4900 double alpha[2];
4901 double beta[2];
4902 double beta_zero_fake[2];
4903 double alpha_use[2];
4904 double *a;
4905 double *a_use;
4906 double *B;
4907 double *B_use;
4908 double *x;
4909 double *y;
4910 double *a_vec;
4911 double *x_vec;
4912
4913
4914 double *ratios;
4915
4916 /* true result calculated by testgen, in double-double */
4917 double *head_r_true, *tail_r_true;
4918
4919
4920 FPU_FIX_DECL;
4921
4922 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
4923
4924 if (n < 0 || ntests < 0)
4925 BLAS_error(fname, -3, n, NULL);
4926
4927 /* initialization */
4928 saved_seed = *seed;
4929 ratio = 0.0;
4930 ratio_min = 1e308;
4931 ratio_max = 0.0;
4932
4933 *num_tests = 0;
4934 *num_bad_ratio = 0;
4935 *min_ratio = 0.0;
4936 *max_ratio = 0.0;
4937
4938 if (n == 0)
4939 return;
4940
4941 FPU_FIX_START;
4942
4943 n_i = n;
4944 m_i = m;
4945
4946 inca = incx = incy = 1;
4947
4948
4949 incy *= 2;
4950
4951 /* allocate memory for arrays */
4952 y = (double *) blas_malloc(4 * m_i * sizeof(double) * 2);
4953 if (4 * m_i > 0 && y == NULL) {
4954 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4955 }
4956 a = (double *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(double));
4957 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
4958 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4959 }
4960 a_use = (double *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(double));
4961 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
4962 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4963 }
4964 B = (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double));
4965 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
4966 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4967 }
4968 B_use = (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double));
4969 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
4970 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4971 }
4972 x = (double *) blas_malloc(4 * n_i * sizeof(double));
4973 if (4 * n_i > 0 && x == NULL) {
4974 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4975 }
4976
4977 inca_veci = 1;
4978
4979 a_vec = (double *) blas_malloc(2 * n_i * sizeof(double));
4980 if (2 * n_i > 0 && a_vec == NULL) {
4981 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4982 }
4983 x_vec = (double *) blas_malloc(2 * n_i * sizeof(double));
4984 if (2 * n_i > 0 && x_vec == NULL) {
4985 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4986 }
4987 head_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
4988 tail_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
4989 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
4990 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4991 }
4992 ratios = (double *) blas_malloc(m_i * sizeof(double));
4993 if (m_i > 0 && ratios == NULL) {
4994 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4995 }
4996
4997 test_count = 0;
4998 bad_ratio_count = 0;
4999
5000 /* vary alpha */
5001 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
5002
5003 alpha_flag = 0;
5004 switch (alpha_val) {
5005 case 0:
5006 alpha[0] = alpha[1] = 0.0;
5007 alpha_flag = 1;
5008 break;
5009 case 1:
5010 alpha[0] = 1.0;
5011 alpha[1] = 0.0;
5012 alpha_flag = 1;
5013 break;
5014 }
5015
5016 /* vary beta */
5017 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
5018 beta_flag = 0;
5019 switch (beta_val) {
5020 case 0:
5021 beta[0] = beta[1] = 0.0;
5022 beta_flag = 1;
5023 break;
5024 case 1:
5025 beta[0] = 1.0;
5026 beta[1] = 0.0;
5027 beta_flag = 1;
5028 break;
5029 }
5030
5031
5032 eps_int = power(2, -BITS_D);
5033 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
5034 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
5035 prec = blas_prec_double;
5036
5037 /* vary norm -- underflow, approx 1, overflow */
5038 for (norm = NORM_START; norm <= NORM_END; norm++) {
5039
5040 /* number of tests */
5041 for (test_no = 0; test_no < ntests; test_no++) {
5042
5043
5044 /* vary storage format */
5045 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
5046
5047 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
5048
5049 /* vary lda = n_i, n_i+1, 2*n_i */
5050 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
5051
5052 if (order_type == blas_rowmajor) {
5053 lda = (lda_val == 0) ? n_i :
5054 (lda_val == 1) ? n_i + 1 : n_i * n_i;
5055 } else {
5056 lda = (lda_val == 0) ? m_i :
5057 (lda_val == 1) ? m_i + 1 : m_i * m_i;
5058 }
5059
5060 /* vary ldb = n_i, n_i+1, 2*n_i */
5061 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
5062
5063 if (order_type == blas_rowmajor) {
5064 ldb = (ldb_val == 0) ? n_i :
5065 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
5066 } else {
5067 ldb = (ldb_val == 0) ? m_i :
5068 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
5069 }
5070
5071 for (randomize_val = RANDOMIZE_START;
5072 randomize_val <= RANDOMIZE_END; randomize_val++) {
5073
5074 /* For the sake of speed, we throw out this case at random */
5075 if (xrand(seed) >= test_prob)
5076 continue;
5077
5078 /* finally we are here to generate the test case */
5079 /* alpha_use, a_use, B_use are the generated alpha, a, B
5080 * before any scaling.
5081 * That is, in the generator, alpha == beta == alpha_use
5082 * before scaling. */
5083
5084 saved_seed = *seed;
5085 BLAS_zge_sum_mv_d_d_testgen(norm, order_type,
5086 m, n, randomize_val, &alpha,
5087 alpha_flag, &beta, beta_flag, a,
5088 lda, B, ldb, x_vec, 1,
5089 &alpha_use, a_use, B_use, seed,
5090 head_r_true, tail_r_true);
5091
5092 /* vary incx = 1, 2 */
5093 for (incx_val = INCX_START; incx_val <= INCX_END;
5094 incx_val++) {
5095
5096 incx = incx_val;
5097 if (0 == incx)
5098 continue;
5099
5100 dcopy_vector(x_vec, n_i, 1, x, incx);
5101
5102 /* vary incy = 1, 2 */
5103 for (incy_val = INCY_START; incy_val <= INCY_END;
5104 incy_val++) {
5105
5106 incy = incy_val;
5107 if (0 == incy)
5108 continue;
5109
5110 test_count++;
5111
5112 /* call ge_sum_mv routines to be tested */
5113 FPU_FIX_STOP;
5114 BLAS_zge_sum_mv_d_d(order_type,
5115 m, n, alpha, a, lda, x, incx, beta,
5116 B, ldb, y, incy);
5117 FPU_FIX_START;
5118
5119 /* now compute the ratio using test_BLAS_xdot */
5120 /* copy a row from A, use x, run
5121 dot test */
5122
5123 incyi = incy;
5124
5125 incri = 1;
5126 incx_veci = 1;
5127
5128 incyi *= 2;
5129 incri *= 2;
5130 if (incy < 0) {
5131 y_starti = (-m_i + 1) * incyi;
5132 } else {
5133 y_starti = 0;
5134 }
5135 /* make two copies of x into x_vec. redundant */
5136 dcopy_vector(x, n_i, incx, x_vec, 1);
5137 dcopy_vector(x, n_i, incx, (x_vec + (n_i * incx_veci)),
5138 1);
5139 for (i = 0, yi = y_starti, ri = 0; i < m_i;
5140 i++, yi += incyi, ri += incri) {
5141 dge_copy_row(order_type, blas_no_trans, m_i, n_i,
5142 a_use, lda, a_vec, i);
5143 dge_copy_row(order_type, blas_no_trans, m_i, n_i,
5144 B_use, ldb, (a_vec + inca_veci * n_i),
5145 i);
5146
5147 rin[0] = rin[1] = 0.0;
5148 rout[0] = y[yi];
5149 rout[1] = y[yi + 1];
5150 head_r_true_elem[0] = head_r_true[ri];
5151 head_r_true_elem[1] = head_r_true[ri + 1];
5152 tail_r_true_elem[0] = tail_r_true[ri];
5153 tail_r_true_elem[1] = tail_r_true[ri + 1];
5154
5155 test_BLAS_zdot_d_d(2 * n_i,
5156 blas_no_conj,
5157 alpha_use, beta_zero_fake, rin,
5158 rout, head_r_true_elem,
5159 tail_r_true_elem, a_vec, 1, x_vec,
5160 1, eps_int, un_int, &ratios[i]);
5161
5162 /* take the max ratio */
5163 if (i == 0) {
5164 ratio = ratios[0];
5165 /* The !<= below causes NaN errors
5166 * to be included.
5167 * Note that (NaN > 0) is false */
5168 } else if (!(ratios[i] <= ratio)) {
5169 ratio = ratios[i];
5170 }
5171 } /* end of dot-test loop */
5172
5173 /* The !<= below causes NaN errors
5174 * to be included.
5175 * Note that (NaN > 0) is false */
5176 if (!(ratio <= thresh)) {
5177
5178 if (debug == 3) {
5179 printf("\n\t\tTest # %d\n", test_count);
5180 printf("y type : z, a type : d, x type : d\n");
5181 printf("Seed = %d\t", saved_seed);
5182 printf("n %d, m %d\n", n, m);
5183 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
5184 ldb, incx, incx);
5185
5186 if (order_type == blas_rowmajor)
5187 printf("row ");
5188 else
5189 printf("col ");
5190
5191 printf("NORM %d, ALPHA %d, BETA %d\n",
5192 norm, alpha_val, beta_val);
5193 printf("randomize %d\n", randomize_val);
5194
5195 /* print out info */
5196 printf("alpha = ");
5197 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);;
5198 printf(" ");
5199 printf("beta = ");
5200 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
5201 printf("\n");
5202 printf("alpha_use = ");
5203 printf("(%24.16e, %24.16e)", alpha_use[0],
5204 alpha_use[1]);;
5205 printf("\n");
5206
5207 dge_print_matrix(a, m_i, n_i, lda, order_type, "A");
5208 dge_print_matrix(B, m_i, n_i, ldb, order_type, "B");
5209 dprint_vector(x, n_i, incx, "x");
5210
5211 zprint_vector(y, m_i, incy, "y");
5212
5213 zprint_vector(head_r_true, m_i, 1, "head_r_true");
5214
5215 dge_print_matrix(a_use, m_i, n_i, lda, order_type,
5216 "A_use");
5217 dge_print_matrix(B_use, m_i, n_i, ldb, order_type,
5218 "B_use");
5219
5220 dprint_vector(ratios, m_i, 1, "ratios");
5221 printf("ratio = %g\n", ratio);
5222 fflush(stdout);
5223 }
5224 bad_ratio_count++;
5225 if (bad_ratio_count >= MAX_BAD_TESTS) {
5226 printf("\ntoo many failures, exiting....");
5227 printf("\nTesting and compilation");
5228 printf(" are incomplete\n\n");
5229 goto end;
5230 }
5231 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
5232 printf("\nFlagrant ratio error, exiting...");
5233 printf("\nTesting and compilation");
5234 printf(" are incomplete\n\n");
5235 goto end;
5236 }
5237 }
5238
5239 if (!(ratio <= ratio_max))
5240 ratio_max = ratio;
5241
5242 if (ratio != 0.0 && !(ratio >= ratio_min))
5243 ratio_min = ratio;
5244
5245 } /* end of incy loop */
5246
5247 } /* end of incx loop */
5248
5249 } /* end of randmize loop */
5250
5251 } /* end of ldb loop */
5252
5253 } /* end of lda loop */
5254
5255 } /* end of order loop */
5256
5257 } /* end of nr test loop */
5258
5259 } /* end of norm loop */
5260
5261
5262
5263 } /* end of beta loop */
5264
5265 } /* end of alpha loop */
5266
5267 FPU_FIX_STOP;
5268
5269 end:
5270 blas_free(y);
5271 blas_free(a);
5272 blas_free(a_use);
5273 blas_free(B);
5274 blas_free(B_use);
5275 blas_free(x);
5276 blas_free(head_r_true);
5277 blas_free(tail_r_true);
5278 blas_free(ratios);
5279 blas_free(a_vec);
5280 blas_free(x_vec);
5281
5282 *max_ratio = ratio_max;
5283 *min_ratio = ratio_min;
5284 *num_tests = test_count;
5285 *num_bad_ratio = bad_ratio_count;
5286
5287 }
do_test_sge_sum_mv_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)5288 void do_test_sge_sum_mv_x
5289 (int m, int n,
5290 int ntests, int *seed, double thresh, int debug, float test_prob,
5291 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
5292
5293 /* Function name */
5294 const char fname[] = "BLAS_sge_sum_mv_x";
5295
5296 int i;
5297 int yi;
5298 int incyi, y_starti, incx_veci;
5299 int test_count;
5300 int bad_ratio_count;
5301
5302 int ri;
5303 int incri;
5304 int inca, incx, incy;
5305
5306 double ratio;
5307
5308 double ratio_min, ratio_max;
5309
5310 double eps_int; /* internal machine epsilon */
5311 double un_int; /* internal underflow threshold */
5312
5313 float rin;
5314 float rout;
5315 double head_r_true_elem, tail_r_true_elem;
5316
5317 enum blas_order_type order_type;
5318 enum blas_prec_type prec;
5319
5320 int order_val;
5321 int lda_val, incx_val, incy_val;
5322 int ldb_val;
5323 int alpha_val, beta_val;
5324 int randomize_val;
5325
5326 int prec_val;
5327
5328 int lda, ldb;
5329 int alpha_flag, beta_flag;
5330 int saved_seed;
5331 int norm;
5332 int test_no;
5333
5334 int n_i, m_i;
5335 int inca_veci;
5336
5337 float alpha;
5338 float beta;
5339 float beta_zero_fake;
5340 float alpha_use;
5341 float *a;
5342 float *a_use;
5343 float *B;
5344 float *B_use;
5345 float *x;
5346 float *y;
5347 float *a_vec;
5348 float *x_vec;
5349
5350
5351 double *ratios;
5352
5353 /* true result calculated by testgen, in double-double */
5354 double *head_r_true, *tail_r_true;
5355
5356 FPU_FIX_DECL;
5357
5358 beta_zero_fake = 0.0;
5359
5360 if (n < 0 || ntests < 0)
5361 BLAS_error(fname, -3, n, NULL);
5362
5363 /* initialization */
5364 saved_seed = *seed;
5365 ratio = 0.0;
5366 ratio_min = 1e308;
5367 ratio_max = 0.0;
5368
5369 *num_tests = 0;
5370 *num_bad_ratio = 0;
5371 *min_ratio = 0.0;
5372 *max_ratio = 0.0;
5373
5374 if (n == 0)
5375 return;
5376
5377 FPU_FIX_START;
5378
5379 n_i = n;
5380 m_i = m;
5381
5382 inca = incx = incy = 1;
5383
5384
5385
5386
5387 /* allocate memory for arrays */
5388 y = (float *) blas_malloc(4 * m_i * sizeof(float));
5389 if (4 * m_i > 0 && y == NULL) {
5390 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5391 }
5392 a = (float *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(float));
5393 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
5394 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5395 }
5396 a_use = (float *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(float));
5397 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
5398 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5399 }
5400 B = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float));
5401 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
5402 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5403 }
5404 B_use = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float));
5405 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
5406 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5407 }
5408 x = (float *) blas_malloc(4 * n_i * sizeof(float));
5409 if (4 * n_i > 0 && x == NULL) {
5410 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5411 }
5412
5413 inca_veci = 1;
5414
5415 a_vec = (float *) blas_malloc(2 * n_i * sizeof(float));
5416 if (2 * n_i > 0 && a_vec == NULL) {
5417 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5418 }
5419 x_vec = (float *) blas_malloc(2 * n_i * sizeof(float));
5420 if (2 * n_i > 0 && x_vec == NULL) {
5421 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5422 }
5423 head_r_true = (double *) blas_malloc(m_i * sizeof(double));
5424 tail_r_true = (double *) blas_malloc(m_i * sizeof(double));
5425 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
5426 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5427 }
5428 ratios = (double *) blas_malloc(m_i * sizeof(double));
5429 if (m_i > 0 && ratios == NULL) {
5430 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5431 }
5432
5433 test_count = 0;
5434 bad_ratio_count = 0;
5435
5436 /* vary alpha */
5437 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
5438
5439 alpha_flag = 0;
5440 switch (alpha_val) {
5441 case 0:
5442 alpha = 0.0;
5443 alpha_flag = 1;
5444 break;
5445 case 1:
5446 alpha = 1.0;
5447 alpha_flag = 1;
5448 break;
5449 }
5450
5451 /* vary beta */
5452 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
5453 beta_flag = 0;
5454 switch (beta_val) {
5455 case 0:
5456 beta = 0.0;
5457 beta_flag = 1;
5458 break;
5459 case 1:
5460 beta = 1.0;
5461 beta_flag = 1;
5462 break;
5463 }
5464
5465
5466 /* varying extra precs */
5467 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
5468 switch (prec_val) {
5469 case 0:
5470 eps_int = power(2, -BITS_S);
5471 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
5472 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
5473 prec = blas_prec_single;
5474 break;
5475 case 1:
5476 eps_int = power(2, -BITS_D);
5477 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
5478 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
5479 prec = blas_prec_double;
5480 break;
5481 case 2:
5482 default:
5483 eps_int = power(2, -BITS_E);
5484 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
5485 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
5486 prec = blas_prec_extra;
5487 break;
5488 }
5489
5490 /* vary norm -- underflow, approx 1, overflow */
5491 for (norm = NORM_START; norm <= NORM_END; norm++) {
5492
5493 /* number of tests */
5494 for (test_no = 0; test_no < ntests; test_no++) {
5495
5496
5497 /* vary storage format */
5498 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
5499
5500 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
5501
5502 /* vary lda = n_i, n_i+1, 2*n_i */
5503 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
5504
5505 if (order_type == blas_rowmajor) {
5506 lda = (lda_val == 0) ? n_i :
5507 (lda_val == 1) ? n_i + 1 : n_i * n_i;
5508 } else {
5509 lda = (lda_val == 0) ? m_i :
5510 (lda_val == 1) ? m_i + 1 : m_i * m_i;
5511 }
5512
5513 /* vary ldb = n_i, n_i+1, 2*n_i */
5514 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
5515
5516 if (order_type == blas_rowmajor) {
5517 ldb = (ldb_val == 0) ? n_i :
5518 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
5519 } else {
5520 ldb = (ldb_val == 0) ? m_i :
5521 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
5522 }
5523
5524 for (randomize_val = RANDOMIZE_START;
5525 randomize_val <= RANDOMIZE_END; randomize_val++) {
5526
5527 /* For the sake of speed, we throw out this case at random */
5528 if (xrand(seed) >= test_prob)
5529 continue;
5530
5531 /* finally we are here to generate the test case */
5532 /* alpha_use, a_use, B_use are the generated alpha, a, B
5533 * before any scaling.
5534 * That is, in the generator, alpha == beta == alpha_use
5535 * before scaling. */
5536
5537 saved_seed = *seed;
5538 BLAS_sge_sum_mv_testgen(norm, order_type,
5539 m, n, randomize_val, &alpha,
5540 alpha_flag, &beta, beta_flag, a,
5541 lda, B, ldb, x_vec, 1, &alpha_use,
5542 a_use, B_use, seed, head_r_true,
5543 tail_r_true);
5544
5545 /* vary incx = 1, 2 */
5546 for (incx_val = INCX_START; incx_val <= INCX_END;
5547 incx_val++) {
5548
5549 incx = incx_val;
5550 if (0 == incx)
5551 continue;
5552
5553 scopy_vector(x_vec, n_i, 1, x, incx);
5554
5555 /* vary incy = 1, 2 */
5556 for (incy_val = INCY_START; incy_val <= INCY_END;
5557 incy_val++) {
5558
5559 incy = incy_val;
5560 if (0 == incy)
5561 continue;
5562
5563 test_count++;
5564
5565 /* call ge_sum_mv routines to be tested */
5566 FPU_FIX_STOP;
5567 BLAS_sge_sum_mv_x(order_type,
5568 m, n, alpha, a, lda, x, incx, beta,
5569 B, ldb, y, incy, prec);
5570 FPU_FIX_START;
5571
5572 /* now compute the ratio using test_BLAS_xdot */
5573 /* copy a row from A, use x, run
5574 dot test */
5575
5576 incyi = incy;
5577
5578 incri = 1;
5579 incx_veci = 1;
5580
5581
5582
5583 if (incy < 0) {
5584 y_starti = (-m_i + 1) * incyi;
5585 } else {
5586 y_starti = 0;
5587 }
5588 /* make two copies of x into x_vec. redundant */
5589 scopy_vector(x, n_i, incx, x_vec, 1);
5590 scopy_vector(x, n_i, incx,
5591 (x_vec + (n_i * incx_veci)), 1);
5592 for (i = 0, yi = y_starti, ri = 0; i < m_i;
5593 i++, yi += incyi, ri += incri) {
5594 sge_copy_row(order_type, blas_no_trans, m_i, n_i,
5595 a_use, lda, a_vec, i);
5596 sge_copy_row(order_type, blas_no_trans, m_i, n_i,
5597 B_use, ldb, (a_vec + inca_veci * n_i),
5598 i);
5599
5600 rin = 0.0;
5601 rout = y[yi];
5602 head_r_true_elem = head_r_true[ri];
5603 tail_r_true_elem = tail_r_true[ri];
5604
5605 test_BLAS_sdot(2 * n_i,
5606 blas_no_conj,
5607 alpha_use, beta_zero_fake, rin, rout,
5608 head_r_true_elem, tail_r_true_elem,
5609 a_vec, 1, x_vec, 1, eps_int, un_int,
5610 &ratios[i]);
5611
5612 /* take the max ratio */
5613 if (i == 0) {
5614 ratio = ratios[0];
5615 /* The !<= below causes NaN errors
5616 * to be included.
5617 * Note that (NaN > 0) is false */
5618 } else if (!(ratios[i] <= ratio)) {
5619 ratio = ratios[i];
5620 }
5621 } /* end of dot-test loop */
5622
5623 /* The !<= below causes NaN errors
5624 * to be included.
5625 * Note that (NaN > 0) is false */
5626 if (!(ratio <= thresh)) {
5627
5628 if (debug == 3) {
5629 printf("\n\t\tTest # %d\n", test_count);
5630 printf("y type : s, a type : s, x type : s\n");
5631 printf("Seed = %d\t", saved_seed);
5632 printf("n %d, m %d\n", n, m);
5633 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
5634 ldb, incx, incx);
5635
5636 if (order_type == blas_rowmajor)
5637 printf("row ");
5638 else
5639 printf("col ");
5640
5641 printf("NORM %d, ALPHA %d, BETA %d\n",
5642 norm, alpha_val, beta_val);
5643 printf("randomize %d\n", randomize_val);
5644
5645 /* print out info */
5646 printf("alpha = ");
5647 printf("%16.8e", alpha);;
5648 printf(" ");
5649 printf("beta = ");
5650 printf("%16.8e", beta);;
5651 printf("\n");
5652 printf("alpha_use = ");
5653 printf("%16.8e", alpha_use);;
5654 printf("\n");
5655
5656 sge_print_matrix(a, m_i, n_i, lda, order_type,
5657 "A");
5658 sge_print_matrix(B, m_i, n_i, ldb, order_type,
5659 "B");
5660 sprint_vector(x, n_i, incx, "x");
5661
5662 sprint_vector(y, m_i, incy, "y");
5663
5664 dprint_vector(head_r_true, m_i, 1, "head_r_true");
5665
5666 sge_print_matrix(a_use, m_i, n_i, lda, order_type,
5667 "A_use");
5668 sge_print_matrix(B_use, m_i, n_i, ldb, order_type,
5669 "B_use");
5670
5671 dprint_vector(ratios, m_i, 1, "ratios");
5672 printf("ratio = %g\n", ratio);
5673 fflush(stdout);
5674 }
5675 bad_ratio_count++;
5676 if (bad_ratio_count >= MAX_BAD_TESTS) {
5677 printf("\ntoo many failures, exiting....");
5678 printf("\nTesting and compilation");
5679 printf(" are incomplete\n\n");
5680 goto end;
5681 }
5682 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
5683 printf("\nFlagrant ratio error, exiting...");
5684 printf("\nTesting and compilation");
5685 printf(" are incomplete\n\n");
5686 goto end;
5687 }
5688 }
5689
5690 if (!(ratio <= ratio_max))
5691 ratio_max = ratio;
5692
5693 if (ratio != 0.0 && !(ratio >= ratio_min))
5694 ratio_min = ratio;
5695
5696 } /* end of incy loop */
5697
5698 } /* end of incx loop */
5699
5700 } /* end of randmize loop */
5701
5702 } /* end of ldb loop */
5703
5704 } /* end of lda loop */
5705
5706 } /* end of order loop */
5707
5708 } /* end of nr test loop */
5709
5710 } /* end of norm loop */
5711
5712
5713 } /* end of prec loop */
5714
5715 } /* end of beta loop */
5716
5717 } /* end of alpha loop */
5718
5719 FPU_FIX_STOP;
5720
5721 end:
5722 blas_free(y);
5723 blas_free(a);
5724 blas_free(a_use);
5725 blas_free(B);
5726 blas_free(B_use);
5727 blas_free(x);
5728 blas_free(head_r_true);
5729 blas_free(tail_r_true);
5730 blas_free(ratios);
5731 blas_free(a_vec);
5732 blas_free(x_vec);
5733
5734 *max_ratio = ratio_max;
5735 *min_ratio = ratio_min;
5736 *num_tests = test_count;
5737 *num_bad_ratio = bad_ratio_count;
5738
5739 }
do_test_dge_sum_mv_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)5740 void do_test_dge_sum_mv_x
5741 (int m, int n,
5742 int ntests, int *seed, double thresh, int debug, float test_prob,
5743 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
5744
5745 /* Function name */
5746 const char fname[] = "BLAS_dge_sum_mv_x";
5747
5748 int i;
5749 int yi;
5750 int incyi, y_starti, incx_veci;
5751 int test_count;
5752 int bad_ratio_count;
5753
5754 int ri;
5755 int incri;
5756 int inca, incx, incy;
5757
5758 double ratio;
5759
5760 double ratio_min, ratio_max;
5761
5762 double eps_int; /* internal machine epsilon */
5763 double un_int; /* internal underflow threshold */
5764
5765 double rin;
5766 double rout;
5767 double head_r_true_elem, tail_r_true_elem;
5768
5769 enum blas_order_type order_type;
5770 enum blas_prec_type prec;
5771
5772 int order_val;
5773 int lda_val, incx_val, incy_val;
5774 int ldb_val;
5775 int alpha_val, beta_val;
5776 int randomize_val;
5777
5778 int prec_val;
5779
5780 int lda, ldb;
5781 int alpha_flag, beta_flag;
5782 int saved_seed;
5783 int norm;
5784 int test_no;
5785
5786 int n_i, m_i;
5787 int inca_veci;
5788
5789 double alpha;
5790 double beta;
5791 double beta_zero_fake;
5792 double alpha_use;
5793 double *a;
5794 double *a_use;
5795 double *B;
5796 double *B_use;
5797 double *x;
5798 double *y;
5799 double *a_vec;
5800 double *x_vec;
5801
5802
5803 double *ratios;
5804
5805 /* true result calculated by testgen, in double-double */
5806 double *head_r_true, *tail_r_true;
5807
5808 FPU_FIX_DECL;
5809
5810 beta_zero_fake = 0.0;
5811
5812 if (n < 0 || ntests < 0)
5813 BLAS_error(fname, -3, n, NULL);
5814
5815 /* initialization */
5816 saved_seed = *seed;
5817 ratio = 0.0;
5818 ratio_min = 1e308;
5819 ratio_max = 0.0;
5820
5821 *num_tests = 0;
5822 *num_bad_ratio = 0;
5823 *min_ratio = 0.0;
5824 *max_ratio = 0.0;
5825
5826 if (n == 0)
5827 return;
5828
5829 FPU_FIX_START;
5830
5831 n_i = n;
5832 m_i = m;
5833
5834 inca = incx = incy = 1;
5835
5836
5837
5838
5839 /* allocate memory for arrays */
5840 y = (double *) blas_malloc(4 * m_i * sizeof(double));
5841 if (4 * m_i > 0 && y == NULL) {
5842 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5843 }
5844 a = (double *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(double));
5845 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
5846 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5847 }
5848 a_use = (double *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(double));
5849 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
5850 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5851 }
5852 B = (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double));
5853 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
5854 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5855 }
5856 B_use = (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double));
5857 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
5858 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5859 }
5860 x = (double *) blas_malloc(4 * n_i * sizeof(double));
5861 if (4 * n_i > 0 && x == NULL) {
5862 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5863 }
5864
5865 inca_veci = 1;
5866
5867 a_vec = (double *) blas_malloc(2 * n_i * sizeof(double));
5868 if (2 * n_i > 0 && a_vec == NULL) {
5869 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5870 }
5871 x_vec = (double *) blas_malloc(2 * n_i * sizeof(double));
5872 if (2 * n_i > 0 && x_vec == NULL) {
5873 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5874 }
5875 head_r_true = (double *) blas_malloc(m_i * sizeof(double));
5876 tail_r_true = (double *) blas_malloc(m_i * sizeof(double));
5877 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
5878 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5879 }
5880 ratios = (double *) blas_malloc(m_i * sizeof(double));
5881 if (m_i > 0 && ratios == NULL) {
5882 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5883 }
5884
5885 test_count = 0;
5886 bad_ratio_count = 0;
5887
5888 /* vary alpha */
5889 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
5890
5891 alpha_flag = 0;
5892 switch (alpha_val) {
5893 case 0:
5894 alpha = 0.0;
5895 alpha_flag = 1;
5896 break;
5897 case 1:
5898 alpha = 1.0;
5899 alpha_flag = 1;
5900 break;
5901 }
5902
5903 /* vary beta */
5904 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
5905 beta_flag = 0;
5906 switch (beta_val) {
5907 case 0:
5908 beta = 0.0;
5909 beta_flag = 1;
5910 break;
5911 case 1:
5912 beta = 1.0;
5913 beta_flag = 1;
5914 break;
5915 }
5916
5917
5918 /* varying extra precs */
5919 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
5920 switch (prec_val) {
5921 case 0:
5922 eps_int = power(2, -BITS_D);
5923 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
5924 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
5925 prec = blas_prec_double;
5926 break;
5927 case 1:
5928 eps_int = power(2, -BITS_D);
5929 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
5930 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
5931 prec = blas_prec_double;
5932 break;
5933 case 2:
5934 default:
5935 eps_int = power(2, -BITS_E);
5936 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
5937 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
5938 prec = blas_prec_extra;
5939 break;
5940 }
5941
5942 /* vary norm -- underflow, approx 1, overflow */
5943 for (norm = NORM_START; norm <= NORM_END; norm++) {
5944
5945 /* number of tests */
5946 for (test_no = 0; test_no < ntests; test_no++) {
5947
5948
5949 /* vary storage format */
5950 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
5951
5952 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
5953
5954 /* vary lda = n_i, n_i+1, 2*n_i */
5955 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
5956
5957 if (order_type == blas_rowmajor) {
5958 lda = (lda_val == 0) ? n_i :
5959 (lda_val == 1) ? n_i + 1 : n_i * n_i;
5960 } else {
5961 lda = (lda_val == 0) ? m_i :
5962 (lda_val == 1) ? m_i + 1 : m_i * m_i;
5963 }
5964
5965 /* vary ldb = n_i, n_i+1, 2*n_i */
5966 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
5967
5968 if (order_type == blas_rowmajor) {
5969 ldb = (ldb_val == 0) ? n_i :
5970 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
5971 } else {
5972 ldb = (ldb_val == 0) ? m_i :
5973 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
5974 }
5975
5976 for (randomize_val = RANDOMIZE_START;
5977 randomize_val <= RANDOMIZE_END; randomize_val++) {
5978
5979 /* For the sake of speed, we throw out this case at random */
5980 if (xrand(seed) >= test_prob)
5981 continue;
5982
5983 /* finally we are here to generate the test case */
5984 /* alpha_use, a_use, B_use are the generated alpha, a, B
5985 * before any scaling.
5986 * That is, in the generator, alpha == beta == alpha_use
5987 * before scaling. */
5988
5989 saved_seed = *seed;
5990 BLAS_dge_sum_mv_testgen(norm, order_type,
5991 m, n, randomize_val, &alpha,
5992 alpha_flag, &beta, beta_flag, a,
5993 lda, B, ldb, x_vec, 1, &alpha_use,
5994 a_use, B_use, seed, head_r_true,
5995 tail_r_true);
5996
5997 /* vary incx = 1, 2 */
5998 for (incx_val = INCX_START; incx_val <= INCX_END;
5999 incx_val++) {
6000
6001 incx = incx_val;
6002 if (0 == incx)
6003 continue;
6004
6005 dcopy_vector(x_vec, n_i, 1, x, incx);
6006
6007 /* vary incy = 1, 2 */
6008 for (incy_val = INCY_START; incy_val <= INCY_END;
6009 incy_val++) {
6010
6011 incy = incy_val;
6012 if (0 == incy)
6013 continue;
6014
6015 test_count++;
6016
6017 /* call ge_sum_mv routines to be tested */
6018 FPU_FIX_STOP;
6019 BLAS_dge_sum_mv_x(order_type,
6020 m, n, alpha, a, lda, x, incx, beta,
6021 B, ldb, y, incy, prec);
6022 FPU_FIX_START;
6023
6024 /* now compute the ratio using test_BLAS_xdot */
6025 /* copy a row from A, use x, run
6026 dot test */
6027
6028 incyi = incy;
6029
6030 incri = 1;
6031 incx_veci = 1;
6032
6033
6034
6035 if (incy < 0) {
6036 y_starti = (-m_i + 1) * incyi;
6037 } else {
6038 y_starti = 0;
6039 }
6040 /* make two copies of x into x_vec. redundant */
6041 dcopy_vector(x, n_i, incx, x_vec, 1);
6042 dcopy_vector(x, n_i, incx,
6043 (x_vec + (n_i * incx_veci)), 1);
6044 for (i = 0, yi = y_starti, ri = 0; i < m_i;
6045 i++, yi += incyi, ri += incri) {
6046 dge_copy_row(order_type, blas_no_trans, m_i, n_i,
6047 a_use, lda, a_vec, i);
6048 dge_copy_row(order_type, blas_no_trans, m_i, n_i,
6049 B_use, ldb, (a_vec + inca_veci * n_i),
6050 i);
6051
6052 rin = 0.0;
6053 rout = y[yi];
6054 head_r_true_elem = head_r_true[ri];
6055 tail_r_true_elem = tail_r_true[ri];
6056
6057 test_BLAS_ddot(2 * n_i,
6058 blas_no_conj,
6059 alpha_use, beta_zero_fake, rin, rout,
6060 head_r_true_elem, tail_r_true_elem,
6061 a_vec, 1, x_vec, 1, eps_int, un_int,
6062 &ratios[i]);
6063
6064 /* take the max ratio */
6065 if (i == 0) {
6066 ratio = ratios[0];
6067 /* The !<= below causes NaN errors
6068 * to be included.
6069 * Note that (NaN > 0) is false */
6070 } else if (!(ratios[i] <= ratio)) {
6071 ratio = ratios[i];
6072 }
6073 } /* end of dot-test loop */
6074
6075 /* The !<= below causes NaN errors
6076 * to be included.
6077 * Note that (NaN > 0) is false */
6078 if (!(ratio <= thresh)) {
6079
6080 if (debug == 3) {
6081 printf("\n\t\tTest # %d\n", test_count);
6082 printf("y type : d, a type : d, x type : d\n");
6083 printf("Seed = %d\t", saved_seed);
6084 printf("n %d, m %d\n", n, m);
6085 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
6086 ldb, incx, incx);
6087
6088 if (order_type == blas_rowmajor)
6089 printf("row ");
6090 else
6091 printf("col ");
6092
6093 printf("NORM %d, ALPHA %d, BETA %d\n",
6094 norm, alpha_val, beta_val);
6095 printf("randomize %d\n", randomize_val);
6096
6097 /* print out info */
6098 printf("alpha = ");
6099 printf("%24.16e", alpha);;
6100 printf(" ");
6101 printf("beta = ");
6102 printf("%24.16e", beta);;
6103 printf("\n");
6104 printf("alpha_use = ");
6105 printf("%24.16e", alpha_use);;
6106 printf("\n");
6107
6108 dge_print_matrix(a, m_i, n_i, lda, order_type,
6109 "A");
6110 dge_print_matrix(B, m_i, n_i, ldb, order_type,
6111 "B");
6112 dprint_vector(x, n_i, incx, "x");
6113
6114 dprint_vector(y, m_i, incy, "y");
6115
6116 dprint_vector(head_r_true, m_i, 1, "head_r_true");
6117
6118 dge_print_matrix(a_use, m_i, n_i, lda, order_type,
6119 "A_use");
6120 dge_print_matrix(B_use, m_i, n_i, ldb, order_type,
6121 "B_use");
6122
6123 dprint_vector(ratios, m_i, 1, "ratios");
6124 printf("ratio = %g\n", ratio);
6125 fflush(stdout);
6126 }
6127 bad_ratio_count++;
6128 if (bad_ratio_count >= MAX_BAD_TESTS) {
6129 printf("\ntoo many failures, exiting....");
6130 printf("\nTesting and compilation");
6131 printf(" are incomplete\n\n");
6132 goto end;
6133 }
6134 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
6135 printf("\nFlagrant ratio error, exiting...");
6136 printf("\nTesting and compilation");
6137 printf(" are incomplete\n\n");
6138 goto end;
6139 }
6140 }
6141
6142 if (!(ratio <= ratio_max))
6143 ratio_max = ratio;
6144
6145 if (ratio != 0.0 && !(ratio >= ratio_min))
6146 ratio_min = ratio;
6147
6148 } /* end of incy loop */
6149
6150 } /* end of incx loop */
6151
6152 } /* end of randmize loop */
6153
6154 } /* end of ldb loop */
6155
6156 } /* end of lda loop */
6157
6158 } /* end of order loop */
6159
6160 } /* end of nr test loop */
6161
6162 } /* end of norm loop */
6163
6164
6165 } /* end of prec loop */
6166
6167 } /* end of beta loop */
6168
6169 } /* end of alpha loop */
6170
6171 FPU_FIX_STOP;
6172
6173 end:
6174 blas_free(y);
6175 blas_free(a);
6176 blas_free(a_use);
6177 blas_free(B);
6178 blas_free(B_use);
6179 blas_free(x);
6180 blas_free(head_r_true);
6181 blas_free(tail_r_true);
6182 blas_free(ratios);
6183 blas_free(a_vec);
6184 blas_free(x_vec);
6185
6186 *max_ratio = ratio_max;
6187 *min_ratio = ratio_min;
6188 *num_tests = test_count;
6189 *num_bad_ratio = bad_ratio_count;
6190
6191 }
do_test_cge_sum_mv_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)6192 void do_test_cge_sum_mv_x
6193 (int m, int n,
6194 int ntests, int *seed, double thresh, int debug, float test_prob,
6195 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
6196
6197 /* Function name */
6198 const char fname[] = "BLAS_cge_sum_mv_x";
6199
6200 int i;
6201 int yi;
6202 int incyi, y_starti, incx_veci;
6203 int test_count;
6204 int bad_ratio_count;
6205
6206 int ri;
6207 int incri;
6208 int inca, incx, incy;
6209
6210 double ratio;
6211
6212 double ratio_min, ratio_max;
6213
6214 double eps_int; /* internal machine epsilon */
6215 double un_int; /* internal underflow threshold */
6216
6217 float rin[2];
6218 float rout[2];
6219 double head_r_true_elem[2], tail_r_true_elem[2];
6220
6221 enum blas_order_type order_type;
6222 enum blas_prec_type prec;
6223
6224 int order_val;
6225 int lda_val, incx_val, incy_val;
6226 int ldb_val;
6227 int alpha_val, beta_val;
6228 int randomize_val;
6229
6230 int prec_val;
6231
6232 int lda, ldb;
6233 int alpha_flag, beta_flag;
6234 int saved_seed;
6235 int norm;
6236 int test_no;
6237
6238 int n_i, m_i;
6239 int inca_veci;
6240
6241 float alpha[2];
6242 float beta[2];
6243 float beta_zero_fake[2];
6244 float alpha_use[2];
6245 float *a;
6246 float *a_use;
6247 float *B;
6248 float *B_use;
6249 float *x;
6250 float *y;
6251 float *a_vec;
6252 float *x_vec;
6253
6254
6255 double *ratios;
6256
6257 /* true result calculated by testgen, in double-double */
6258 double *head_r_true, *tail_r_true;
6259
6260
6261 FPU_FIX_DECL;
6262
6263 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
6264
6265 if (n < 0 || ntests < 0)
6266 BLAS_error(fname, -3, n, NULL);
6267
6268 /* initialization */
6269 saved_seed = *seed;
6270 ratio = 0.0;
6271 ratio_min = 1e308;
6272 ratio_max = 0.0;
6273
6274 *num_tests = 0;
6275 *num_bad_ratio = 0;
6276 *min_ratio = 0.0;
6277 *max_ratio = 0.0;
6278
6279 if (n == 0)
6280 return;
6281
6282 FPU_FIX_START;
6283
6284 n_i = n;
6285 m_i = m;
6286
6287 inca = incx = incy = 1;
6288 inca *= 2;
6289 incx *= 2;
6290 incy *= 2;
6291
6292 /* allocate memory for arrays */
6293 y = (float *) blas_malloc(4 * m_i * sizeof(float) * 2);
6294 if (4 * m_i > 0 && y == NULL) {
6295 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6296 }
6297 a = (float *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(float) * 2);
6298 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
6299 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6300 }
6301 a_use =
6302 (float *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(float) * 2);
6303 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
6304 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6305 }
6306 B = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float) * 2);
6307 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
6308 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6309 }
6310 B_use =
6311 (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float) * 2);
6312 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
6313 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6314 }
6315 x = (float *) blas_malloc(4 * n_i * sizeof(float) * 2);
6316 if (4 * n_i > 0 && x == NULL) {
6317 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6318 }
6319
6320 inca_veci = 1;
6321 inca_veci *= 2;
6322 a_vec = (float *) blas_malloc(2 * n_i * sizeof(float) * 2);
6323 if (2 * n_i > 0 && a_vec == NULL) {
6324 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6325 }
6326 x_vec = (float *) blas_malloc(2 * n_i * sizeof(float) * 2);
6327 if (2 * n_i > 0 && x_vec == NULL) {
6328 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6329 }
6330 head_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
6331 tail_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
6332 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
6333 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6334 }
6335 ratios = (double *) blas_malloc(m_i * sizeof(double));
6336 if (m_i > 0 && ratios == NULL) {
6337 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6338 }
6339
6340 test_count = 0;
6341 bad_ratio_count = 0;
6342
6343 /* vary alpha */
6344 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
6345
6346 alpha_flag = 0;
6347 switch (alpha_val) {
6348 case 0:
6349 alpha[0] = alpha[1] = 0.0;
6350 alpha_flag = 1;
6351 break;
6352 case 1:
6353 alpha[0] = 1.0;
6354 alpha[1] = 0.0;
6355 alpha_flag = 1;
6356 break;
6357 }
6358
6359 /* vary beta */
6360 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
6361 beta_flag = 0;
6362 switch (beta_val) {
6363 case 0:
6364 beta[0] = beta[1] = 0.0;
6365 beta_flag = 1;
6366 break;
6367 case 1:
6368 beta[0] = 1.0;
6369 beta[1] = 0.0;
6370 beta_flag = 1;
6371 break;
6372 }
6373
6374
6375 /* varying extra precs */
6376 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
6377 switch (prec_val) {
6378 case 0:
6379 eps_int = power(2, -BITS_S);
6380 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
6381 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
6382 prec = blas_prec_single;
6383 break;
6384 case 1:
6385 eps_int = power(2, -BITS_D);
6386 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
6387 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
6388 prec = blas_prec_double;
6389 break;
6390 case 2:
6391 default:
6392 eps_int = power(2, -BITS_E);
6393 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
6394 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
6395 prec = blas_prec_extra;
6396 break;
6397 }
6398
6399 /* vary norm -- underflow, approx 1, overflow */
6400 for (norm = NORM_START; norm <= NORM_END; norm++) {
6401
6402 /* number of tests */
6403 for (test_no = 0; test_no < ntests; test_no++) {
6404
6405
6406 /* vary storage format */
6407 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
6408
6409 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
6410
6411 /* vary lda = n_i, n_i+1, 2*n_i */
6412 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
6413
6414 if (order_type == blas_rowmajor) {
6415 lda = (lda_val == 0) ? n_i :
6416 (lda_val == 1) ? n_i + 1 : n_i * n_i;
6417 } else {
6418 lda = (lda_val == 0) ? m_i :
6419 (lda_val == 1) ? m_i + 1 : m_i * m_i;
6420 }
6421
6422 /* vary ldb = n_i, n_i+1, 2*n_i */
6423 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
6424
6425 if (order_type == blas_rowmajor) {
6426 ldb = (ldb_val == 0) ? n_i :
6427 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
6428 } else {
6429 ldb = (ldb_val == 0) ? m_i :
6430 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
6431 }
6432
6433 for (randomize_val = RANDOMIZE_START;
6434 randomize_val <= RANDOMIZE_END; randomize_val++) {
6435
6436 /* For the sake of speed, we throw out this case at random */
6437 if (xrand(seed) >= test_prob)
6438 continue;
6439
6440 /* finally we are here to generate the test case */
6441 /* alpha_use, a_use, B_use are the generated alpha, a, B
6442 * before any scaling.
6443 * That is, in the generator, alpha == beta == alpha_use
6444 * before scaling. */
6445
6446 saved_seed = *seed;
6447 BLAS_cge_sum_mv_testgen(norm, order_type,
6448 m, n, randomize_val, &alpha,
6449 alpha_flag, &beta, beta_flag, a,
6450 lda, B, ldb, x_vec, 1, &alpha_use,
6451 a_use, B_use, seed, head_r_true,
6452 tail_r_true);
6453
6454 /* vary incx = 1, 2 */
6455 for (incx_val = INCX_START; incx_val <= INCX_END;
6456 incx_val++) {
6457
6458 incx = incx_val;
6459 if (0 == incx)
6460 continue;
6461
6462 ccopy_vector(x_vec, n_i, 1, x, incx);
6463
6464 /* vary incy = 1, 2 */
6465 for (incy_val = INCY_START; incy_val <= INCY_END;
6466 incy_val++) {
6467
6468 incy = incy_val;
6469 if (0 == incy)
6470 continue;
6471
6472 test_count++;
6473
6474 /* call ge_sum_mv routines to be tested */
6475 FPU_FIX_STOP;
6476 BLAS_cge_sum_mv_x(order_type,
6477 m, n, alpha, a, lda, x, incx, beta,
6478 B, ldb, y, incy, prec);
6479 FPU_FIX_START;
6480
6481 /* now compute the ratio using test_BLAS_xdot */
6482 /* copy a row from A, use x, run
6483 dot test */
6484
6485 incyi = incy;
6486
6487 incri = 1;
6488 incx_veci = 1;
6489 incx_veci *= 2;
6490 incyi *= 2;
6491 incri *= 2;
6492 if (incy < 0) {
6493 y_starti = (-m_i + 1) * incyi;
6494 } else {
6495 y_starti = 0;
6496 }
6497 /* make two copies of x into x_vec. redundant */
6498 ccopy_vector(x, n_i, incx, x_vec, 1);
6499 ccopy_vector(x, n_i, incx,
6500 (x_vec + (n_i * incx_veci)), 1);
6501 for (i = 0, yi = y_starti, ri = 0; i < m_i;
6502 i++, yi += incyi, ri += incri) {
6503 cge_copy_row(order_type, blas_no_trans, m_i, n_i,
6504 a_use, lda, a_vec, i);
6505 cge_copy_row(order_type, blas_no_trans, m_i, n_i,
6506 B_use, ldb, (a_vec + inca_veci * n_i),
6507 i);
6508
6509 rin[0] = rin[1] = 0.0;
6510 rout[0] = y[yi];
6511 rout[1] = y[yi + 1];
6512 head_r_true_elem[0] = head_r_true[ri];
6513 head_r_true_elem[1] = head_r_true[ri + 1];
6514 tail_r_true_elem[0] = tail_r_true[ri];
6515 tail_r_true_elem[1] = tail_r_true[ri + 1];
6516
6517 test_BLAS_cdot(2 * n_i,
6518 blas_no_conj,
6519 alpha_use, beta_zero_fake, rin, rout,
6520 head_r_true_elem, tail_r_true_elem,
6521 a_vec, 1, x_vec, 1, eps_int, un_int,
6522 &ratios[i]);
6523
6524 /* take the max ratio */
6525 if (i == 0) {
6526 ratio = ratios[0];
6527 /* The !<= below causes NaN errors
6528 * to be included.
6529 * Note that (NaN > 0) is false */
6530 } else if (!(ratios[i] <= ratio)) {
6531 ratio = ratios[i];
6532 }
6533 } /* end of dot-test loop */
6534
6535 /* The !<= below causes NaN errors
6536 * to be included.
6537 * Note that (NaN > 0) is false */
6538 if (!(ratio <= thresh)) {
6539
6540 if (debug == 3) {
6541 printf("\n\t\tTest # %d\n", test_count);
6542 printf("y type : c, a type : c, x type : c\n");
6543 printf("Seed = %d\t", saved_seed);
6544 printf("n %d, m %d\n", n, m);
6545 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
6546 ldb, incx, incx);
6547
6548 if (order_type == blas_rowmajor)
6549 printf("row ");
6550 else
6551 printf("col ");
6552
6553 printf("NORM %d, ALPHA %d, BETA %d\n",
6554 norm, alpha_val, beta_val);
6555 printf("randomize %d\n", randomize_val);
6556
6557 /* print out info */
6558 printf("alpha = ");
6559 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);;
6560 printf(" ");
6561 printf("beta = ");
6562 printf("(%16.8e, %16.8e)", beta[0], beta[1]);;
6563 printf("\n");
6564 printf("alpha_use = ");
6565 printf("(%16.8e, %16.8e)", alpha_use[0],
6566 alpha_use[1]);;
6567 printf("\n");
6568
6569 cge_print_matrix(a, m_i, n_i, lda, order_type,
6570 "A");
6571 cge_print_matrix(B, m_i, n_i, ldb, order_type,
6572 "B");
6573 cprint_vector(x, n_i, incx, "x");
6574
6575 cprint_vector(y, m_i, incy, "y");
6576
6577 zprint_vector(head_r_true, m_i, 1, "head_r_true");
6578
6579 cge_print_matrix(a_use, m_i, n_i, lda, order_type,
6580 "A_use");
6581 cge_print_matrix(B_use, m_i, n_i, ldb, order_type,
6582 "B_use");
6583
6584 dprint_vector(ratios, m_i, 1, "ratios");
6585 printf("ratio = %g\n", ratio);
6586 fflush(stdout);
6587 }
6588 bad_ratio_count++;
6589 if (bad_ratio_count >= MAX_BAD_TESTS) {
6590 printf("\ntoo many failures, exiting....");
6591 printf("\nTesting and compilation");
6592 printf(" are incomplete\n\n");
6593 goto end;
6594 }
6595 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
6596 printf("\nFlagrant ratio error, exiting...");
6597 printf("\nTesting and compilation");
6598 printf(" are incomplete\n\n");
6599 goto end;
6600 }
6601 }
6602
6603 if (!(ratio <= ratio_max))
6604 ratio_max = ratio;
6605
6606 if (ratio != 0.0 && !(ratio >= ratio_min))
6607 ratio_min = ratio;
6608
6609 } /* end of incy loop */
6610
6611 } /* end of incx loop */
6612
6613 } /* end of randmize loop */
6614
6615 } /* end of ldb loop */
6616
6617 } /* end of lda loop */
6618
6619 } /* end of order loop */
6620
6621 } /* end of nr test loop */
6622
6623 } /* end of norm loop */
6624
6625
6626 } /* end of prec loop */
6627
6628 } /* end of beta loop */
6629
6630 } /* end of alpha loop */
6631
6632 FPU_FIX_STOP;
6633
6634 end:
6635 blas_free(y);
6636 blas_free(a);
6637 blas_free(a_use);
6638 blas_free(B);
6639 blas_free(B_use);
6640 blas_free(x);
6641 blas_free(head_r_true);
6642 blas_free(tail_r_true);
6643 blas_free(ratios);
6644 blas_free(a_vec);
6645 blas_free(x_vec);
6646
6647 *max_ratio = ratio_max;
6648 *min_ratio = ratio_min;
6649 *num_tests = test_count;
6650 *num_bad_ratio = bad_ratio_count;
6651
6652 }
do_test_zge_sum_mv_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)6653 void do_test_zge_sum_mv_x
6654 (int m, int n,
6655 int ntests, int *seed, double thresh, int debug, float test_prob,
6656 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
6657
6658 /* Function name */
6659 const char fname[] = "BLAS_zge_sum_mv_x";
6660
6661 int i;
6662 int yi;
6663 int incyi, y_starti, incx_veci;
6664 int test_count;
6665 int bad_ratio_count;
6666
6667 int ri;
6668 int incri;
6669 int inca, incx, incy;
6670
6671 double ratio;
6672
6673 double ratio_min, ratio_max;
6674
6675 double eps_int; /* internal machine epsilon */
6676 double un_int; /* internal underflow threshold */
6677
6678 double rin[2];
6679 double rout[2];
6680 double head_r_true_elem[2], tail_r_true_elem[2];
6681
6682 enum blas_order_type order_type;
6683 enum blas_prec_type prec;
6684
6685 int order_val;
6686 int lda_val, incx_val, incy_val;
6687 int ldb_val;
6688 int alpha_val, beta_val;
6689 int randomize_val;
6690
6691 int prec_val;
6692
6693 int lda, ldb;
6694 int alpha_flag, beta_flag;
6695 int saved_seed;
6696 int norm;
6697 int test_no;
6698
6699 int n_i, m_i;
6700 int inca_veci;
6701
6702 double alpha[2];
6703 double beta[2];
6704 double beta_zero_fake[2];
6705 double alpha_use[2];
6706 double *a;
6707 double *a_use;
6708 double *B;
6709 double *B_use;
6710 double *x;
6711 double *y;
6712 double *a_vec;
6713 double *x_vec;
6714
6715
6716 double *ratios;
6717
6718 /* true result calculated by testgen, in double-double */
6719 double *head_r_true, *tail_r_true;
6720
6721
6722 FPU_FIX_DECL;
6723
6724 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
6725
6726 if (n < 0 || ntests < 0)
6727 BLAS_error(fname, -3, n, NULL);
6728
6729 /* initialization */
6730 saved_seed = *seed;
6731 ratio = 0.0;
6732 ratio_min = 1e308;
6733 ratio_max = 0.0;
6734
6735 *num_tests = 0;
6736 *num_bad_ratio = 0;
6737 *min_ratio = 0.0;
6738 *max_ratio = 0.0;
6739
6740 if (n == 0)
6741 return;
6742
6743 FPU_FIX_START;
6744
6745 n_i = n;
6746 m_i = m;
6747
6748 inca = incx = incy = 1;
6749 inca *= 2;
6750 incx *= 2;
6751 incy *= 2;
6752
6753 /* allocate memory for arrays */
6754 y = (double *) blas_malloc(4 * m_i * sizeof(double) * 2);
6755 if (4 * m_i > 0 && y == NULL) {
6756 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6757 }
6758 a = (double *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(double) * 2);
6759 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
6760 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6761 }
6762 a_use =
6763 (double *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(double) * 2);
6764 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
6765 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6766 }
6767 B = (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double) * 2);
6768 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
6769 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6770 }
6771 B_use =
6772 (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double) * 2);
6773 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
6774 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6775 }
6776 x = (double *) blas_malloc(4 * n_i * sizeof(double) * 2);
6777 if (4 * n_i > 0 && x == NULL) {
6778 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6779 }
6780
6781 inca_veci = 1;
6782 inca_veci *= 2;
6783 a_vec = (double *) blas_malloc(2 * n_i * sizeof(double) * 2);
6784 if (2 * n_i > 0 && a_vec == NULL) {
6785 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6786 }
6787 x_vec = (double *) blas_malloc(2 * n_i * sizeof(double) * 2);
6788 if (2 * n_i > 0 && x_vec == NULL) {
6789 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6790 }
6791 head_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
6792 tail_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
6793 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
6794 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6795 }
6796 ratios = (double *) blas_malloc(m_i * sizeof(double));
6797 if (m_i > 0 && ratios == NULL) {
6798 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6799 }
6800
6801 test_count = 0;
6802 bad_ratio_count = 0;
6803
6804 /* vary alpha */
6805 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
6806
6807 alpha_flag = 0;
6808 switch (alpha_val) {
6809 case 0:
6810 alpha[0] = alpha[1] = 0.0;
6811 alpha_flag = 1;
6812 break;
6813 case 1:
6814 alpha[0] = 1.0;
6815 alpha[1] = 0.0;
6816 alpha_flag = 1;
6817 break;
6818 }
6819
6820 /* vary beta */
6821 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
6822 beta_flag = 0;
6823 switch (beta_val) {
6824 case 0:
6825 beta[0] = beta[1] = 0.0;
6826 beta_flag = 1;
6827 break;
6828 case 1:
6829 beta[0] = 1.0;
6830 beta[1] = 0.0;
6831 beta_flag = 1;
6832 break;
6833 }
6834
6835
6836 /* varying extra precs */
6837 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
6838 switch (prec_val) {
6839 case 0:
6840 eps_int = power(2, -BITS_D);
6841 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
6842 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
6843 prec = blas_prec_double;
6844 break;
6845 case 1:
6846 eps_int = power(2, -BITS_D);
6847 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
6848 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
6849 prec = blas_prec_double;
6850 break;
6851 case 2:
6852 default:
6853 eps_int = power(2, -BITS_E);
6854 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
6855 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
6856 prec = blas_prec_extra;
6857 break;
6858 }
6859
6860 /* vary norm -- underflow, approx 1, overflow */
6861 for (norm = NORM_START; norm <= NORM_END; norm++) {
6862
6863 /* number of tests */
6864 for (test_no = 0; test_no < ntests; test_no++) {
6865
6866
6867 /* vary storage format */
6868 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
6869
6870 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
6871
6872 /* vary lda = n_i, n_i+1, 2*n_i */
6873 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
6874
6875 if (order_type == blas_rowmajor) {
6876 lda = (lda_val == 0) ? n_i :
6877 (lda_val == 1) ? n_i + 1 : n_i * n_i;
6878 } else {
6879 lda = (lda_val == 0) ? m_i :
6880 (lda_val == 1) ? m_i + 1 : m_i * m_i;
6881 }
6882
6883 /* vary ldb = n_i, n_i+1, 2*n_i */
6884 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
6885
6886 if (order_type == blas_rowmajor) {
6887 ldb = (ldb_val == 0) ? n_i :
6888 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
6889 } else {
6890 ldb = (ldb_val == 0) ? m_i :
6891 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
6892 }
6893
6894 for (randomize_val = RANDOMIZE_START;
6895 randomize_val <= RANDOMIZE_END; randomize_val++) {
6896
6897 /* For the sake of speed, we throw out this case at random */
6898 if (xrand(seed) >= test_prob)
6899 continue;
6900
6901 /* finally we are here to generate the test case */
6902 /* alpha_use, a_use, B_use are the generated alpha, a, B
6903 * before any scaling.
6904 * That is, in the generator, alpha == beta == alpha_use
6905 * before scaling. */
6906
6907 saved_seed = *seed;
6908 BLAS_zge_sum_mv_testgen(norm, order_type,
6909 m, n, randomize_val, &alpha,
6910 alpha_flag, &beta, beta_flag, a,
6911 lda, B, ldb, x_vec, 1, &alpha_use,
6912 a_use, B_use, seed, head_r_true,
6913 tail_r_true);
6914
6915 /* vary incx = 1, 2 */
6916 for (incx_val = INCX_START; incx_val <= INCX_END;
6917 incx_val++) {
6918
6919 incx = incx_val;
6920 if (0 == incx)
6921 continue;
6922
6923 zcopy_vector(x_vec, n_i, 1, x, incx);
6924
6925 /* vary incy = 1, 2 */
6926 for (incy_val = INCY_START; incy_val <= INCY_END;
6927 incy_val++) {
6928
6929 incy = incy_val;
6930 if (0 == incy)
6931 continue;
6932
6933 test_count++;
6934
6935 /* call ge_sum_mv routines to be tested */
6936 FPU_FIX_STOP;
6937 BLAS_zge_sum_mv_x(order_type,
6938 m, n, alpha, a, lda, x, incx, beta,
6939 B, ldb, y, incy, prec);
6940 FPU_FIX_START;
6941
6942 /* now compute the ratio using test_BLAS_xdot */
6943 /* copy a row from A, use x, run
6944 dot test */
6945
6946 incyi = incy;
6947
6948 incri = 1;
6949 incx_veci = 1;
6950 incx_veci *= 2;
6951 incyi *= 2;
6952 incri *= 2;
6953 if (incy < 0) {
6954 y_starti = (-m_i + 1) * incyi;
6955 } else {
6956 y_starti = 0;
6957 }
6958 /* make two copies of x into x_vec. redundant */
6959 zcopy_vector(x, n_i, incx, x_vec, 1);
6960 zcopy_vector(x, n_i, incx,
6961 (x_vec + (n_i * incx_veci)), 1);
6962 for (i = 0, yi = y_starti, ri = 0; i < m_i;
6963 i++, yi += incyi, ri += incri) {
6964 zge_copy_row(order_type, blas_no_trans, m_i, n_i,
6965 a_use, lda, a_vec, i);
6966 zge_copy_row(order_type, blas_no_trans, m_i, n_i,
6967 B_use, ldb, (a_vec + inca_veci * n_i),
6968 i);
6969
6970 rin[0] = rin[1] = 0.0;
6971 rout[0] = y[yi];
6972 rout[1] = y[yi + 1];
6973 head_r_true_elem[0] = head_r_true[ri];
6974 head_r_true_elem[1] = head_r_true[ri + 1];
6975 tail_r_true_elem[0] = tail_r_true[ri];
6976 tail_r_true_elem[1] = tail_r_true[ri + 1];
6977
6978 test_BLAS_zdot(2 * n_i,
6979 blas_no_conj,
6980 alpha_use, beta_zero_fake, rin, rout,
6981 head_r_true_elem, tail_r_true_elem,
6982 a_vec, 1, x_vec, 1, eps_int, un_int,
6983 &ratios[i]);
6984
6985 /* take the max ratio */
6986 if (i == 0) {
6987 ratio = ratios[0];
6988 /* The !<= below causes NaN errors
6989 * to be included.
6990 * Note that (NaN > 0) is false */
6991 } else if (!(ratios[i] <= ratio)) {
6992 ratio = ratios[i];
6993 }
6994 } /* end of dot-test loop */
6995
6996 /* The !<= below causes NaN errors
6997 * to be included.
6998 * Note that (NaN > 0) is false */
6999 if (!(ratio <= thresh)) {
7000
7001 if (debug == 3) {
7002 printf("\n\t\tTest # %d\n", test_count);
7003 printf("y type : z, a type : z, x type : z\n");
7004 printf("Seed = %d\t", saved_seed);
7005 printf("n %d, m %d\n", n, m);
7006 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
7007 ldb, incx, incx);
7008
7009 if (order_type == blas_rowmajor)
7010 printf("row ");
7011 else
7012 printf("col ");
7013
7014 printf("NORM %d, ALPHA %d, BETA %d\n",
7015 norm, alpha_val, beta_val);
7016 printf("randomize %d\n", randomize_val);
7017
7018 /* print out info */
7019 printf("alpha = ");
7020 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);;
7021 printf(" ");
7022 printf("beta = ");
7023 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
7024 printf("\n");
7025 printf("alpha_use = ");
7026 printf("(%24.16e, %24.16e)", alpha_use[0],
7027 alpha_use[1]);;
7028 printf("\n");
7029
7030 zge_print_matrix(a, m_i, n_i, lda, order_type,
7031 "A");
7032 zge_print_matrix(B, m_i, n_i, ldb, order_type,
7033 "B");
7034 zprint_vector(x, n_i, incx, "x");
7035
7036 zprint_vector(y, m_i, incy, "y");
7037
7038 zprint_vector(head_r_true, m_i, 1, "head_r_true");
7039
7040 zge_print_matrix(a_use, m_i, n_i, lda, order_type,
7041 "A_use");
7042 zge_print_matrix(B_use, m_i, n_i, ldb, order_type,
7043 "B_use");
7044
7045 dprint_vector(ratios, m_i, 1, "ratios");
7046 printf("ratio = %g\n", ratio);
7047 fflush(stdout);
7048 }
7049 bad_ratio_count++;
7050 if (bad_ratio_count >= MAX_BAD_TESTS) {
7051 printf("\ntoo many failures, exiting....");
7052 printf("\nTesting and compilation");
7053 printf(" are incomplete\n\n");
7054 goto end;
7055 }
7056 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
7057 printf("\nFlagrant ratio error, exiting...");
7058 printf("\nTesting and compilation");
7059 printf(" are incomplete\n\n");
7060 goto end;
7061 }
7062 }
7063
7064 if (!(ratio <= ratio_max))
7065 ratio_max = ratio;
7066
7067 if (ratio != 0.0 && !(ratio >= ratio_min))
7068 ratio_min = ratio;
7069
7070 } /* end of incy loop */
7071
7072 } /* end of incx loop */
7073
7074 } /* end of randmize loop */
7075
7076 } /* end of ldb loop */
7077
7078 } /* end of lda loop */
7079
7080 } /* end of order loop */
7081
7082 } /* end of nr test loop */
7083
7084 } /* end of norm loop */
7085
7086
7087 } /* end of prec loop */
7088
7089 } /* end of beta loop */
7090
7091 } /* end of alpha loop */
7092
7093 FPU_FIX_STOP;
7094
7095 end:
7096 blas_free(y);
7097 blas_free(a);
7098 blas_free(a_use);
7099 blas_free(B);
7100 blas_free(B_use);
7101 blas_free(x);
7102 blas_free(head_r_true);
7103 blas_free(tail_r_true);
7104 blas_free(ratios);
7105 blas_free(a_vec);
7106 blas_free(x_vec);
7107
7108 *max_ratio = ratio_max;
7109 *min_ratio = ratio_min;
7110 *num_tests = test_count;
7111 *num_bad_ratio = bad_ratio_count;
7112
7113 }
do_test_dge_sum_mv_d_s_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)7114 void do_test_dge_sum_mv_d_s_x
7115 (int m, int n,
7116 int ntests, int *seed, double thresh, int debug, float test_prob,
7117 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
7118
7119 /* Function name */
7120 const char fname[] = "BLAS_dge_sum_mv_d_s_x";
7121
7122 int i;
7123 int yi;
7124 int incyi, y_starti, incx_veci;
7125 int test_count;
7126 int bad_ratio_count;
7127
7128 int ri;
7129 int incri;
7130 int inca, incx, incy;
7131
7132 double ratio;
7133
7134 double ratio_min, ratio_max;
7135
7136 double eps_int; /* internal machine epsilon */
7137 double un_int; /* internal underflow threshold */
7138
7139 double rin;
7140 double rout;
7141 double head_r_true_elem, tail_r_true_elem;
7142
7143 enum blas_order_type order_type;
7144 enum blas_prec_type prec;
7145
7146 int order_val;
7147 int lda_val, incx_val, incy_val;
7148 int ldb_val;
7149 int alpha_val, beta_val;
7150 int randomize_val;
7151
7152 int prec_val;
7153
7154 int lda, ldb;
7155 int alpha_flag, beta_flag;
7156 int saved_seed;
7157 int norm;
7158 int test_no;
7159
7160 int n_i, m_i;
7161 int inca_veci;
7162
7163 double alpha;
7164 double beta;
7165 double beta_zero_fake;
7166 double alpha_use;
7167 double *a;
7168 double *a_use;
7169 double *B;
7170 double *B_use;
7171 float *x;
7172 double *y;
7173 double *a_vec;
7174 float *x_vec;
7175
7176
7177 double *ratios;
7178
7179 /* true result calculated by testgen, in double-double */
7180 double *head_r_true, *tail_r_true;
7181
7182 FPU_FIX_DECL;
7183
7184 beta_zero_fake = 0.0;
7185
7186 if (n < 0 || ntests < 0)
7187 BLAS_error(fname, -3, n, NULL);
7188
7189 /* initialization */
7190 saved_seed = *seed;
7191 ratio = 0.0;
7192 ratio_min = 1e308;
7193 ratio_max = 0.0;
7194
7195 *num_tests = 0;
7196 *num_bad_ratio = 0;
7197 *min_ratio = 0.0;
7198 *max_ratio = 0.0;
7199
7200 if (n == 0)
7201 return;
7202
7203 FPU_FIX_START;
7204
7205 n_i = n;
7206 m_i = m;
7207
7208 inca = incx = incy = 1;
7209
7210
7211
7212
7213 /* allocate memory for arrays */
7214 y = (double *) blas_malloc(4 * m_i * sizeof(double));
7215 if (4 * m_i > 0 && y == NULL) {
7216 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7217 }
7218 a = (double *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(double));
7219 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
7220 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7221 }
7222 a_use = (double *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(double));
7223 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
7224 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7225 }
7226 B = (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double));
7227 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
7228 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7229 }
7230 B_use = (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double));
7231 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
7232 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7233 }
7234 x = (float *) blas_malloc(4 * n_i * sizeof(float));
7235 if (4 * n_i > 0 && x == NULL) {
7236 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7237 }
7238
7239 inca_veci = 1;
7240
7241 a_vec = (double *) blas_malloc(2 * n_i * sizeof(double));
7242 if (2 * n_i > 0 && a_vec == NULL) {
7243 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7244 }
7245 x_vec = (float *) blas_malloc(2 * n_i * sizeof(float));
7246 if (2 * n_i > 0 && x_vec == NULL) {
7247 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7248 }
7249 head_r_true = (double *) blas_malloc(m_i * sizeof(double));
7250 tail_r_true = (double *) blas_malloc(m_i * sizeof(double));
7251 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
7252 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7253 }
7254 ratios = (double *) blas_malloc(m_i * sizeof(double));
7255 if (m_i > 0 && ratios == NULL) {
7256 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7257 }
7258
7259 test_count = 0;
7260 bad_ratio_count = 0;
7261
7262 /* vary alpha */
7263 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
7264
7265 alpha_flag = 0;
7266 switch (alpha_val) {
7267 case 0:
7268 alpha = 0.0;
7269 alpha_flag = 1;
7270 break;
7271 case 1:
7272 alpha = 1.0;
7273 alpha_flag = 1;
7274 break;
7275 }
7276
7277 /* vary beta */
7278 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
7279 beta_flag = 0;
7280 switch (beta_val) {
7281 case 0:
7282 beta = 0.0;
7283 beta_flag = 1;
7284 break;
7285 case 1:
7286 beta = 1.0;
7287 beta_flag = 1;
7288 break;
7289 }
7290
7291
7292 /* varying extra precs */
7293 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
7294 switch (prec_val) {
7295 case 0:
7296 eps_int = power(2, -BITS_D);
7297 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
7298 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
7299 prec = blas_prec_double;
7300 break;
7301 case 1:
7302 eps_int = power(2, -BITS_D);
7303 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
7304 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
7305 prec = blas_prec_double;
7306 break;
7307 case 2:
7308 default:
7309 eps_int = power(2, -BITS_E);
7310 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
7311 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
7312 prec = blas_prec_extra;
7313 break;
7314 }
7315
7316 /* vary norm -- underflow, approx 1, overflow */
7317 for (norm = NORM_START; norm <= NORM_END; norm++) {
7318
7319 /* number of tests */
7320 for (test_no = 0; test_no < ntests; test_no++) {
7321
7322
7323 /* vary storage format */
7324 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
7325
7326 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
7327
7328 /* vary lda = n_i, n_i+1, 2*n_i */
7329 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
7330
7331 if (order_type == blas_rowmajor) {
7332 lda = (lda_val == 0) ? n_i :
7333 (lda_val == 1) ? n_i + 1 : n_i * n_i;
7334 } else {
7335 lda = (lda_val == 0) ? m_i :
7336 (lda_val == 1) ? m_i + 1 : m_i * m_i;
7337 }
7338
7339 /* vary ldb = n_i, n_i+1, 2*n_i */
7340 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
7341
7342 if (order_type == blas_rowmajor) {
7343 ldb = (ldb_val == 0) ? n_i :
7344 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
7345 } else {
7346 ldb = (ldb_val == 0) ? m_i :
7347 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
7348 }
7349
7350 for (randomize_val = RANDOMIZE_START;
7351 randomize_val <= RANDOMIZE_END; randomize_val++) {
7352
7353 /* For the sake of speed, we throw out this case at random */
7354 if (xrand(seed) >= test_prob)
7355 continue;
7356
7357 /* finally we are here to generate the test case */
7358 /* alpha_use, a_use, B_use are the generated alpha, a, B
7359 * before any scaling.
7360 * That is, in the generator, alpha == beta == alpha_use
7361 * before scaling. */
7362
7363 saved_seed = *seed;
7364 BLAS_dge_sum_mv_d_s_testgen(norm, order_type,
7365 m, n, randomize_val, &alpha,
7366 alpha_flag, &beta, beta_flag,
7367 a, lda, B, ldb, x_vec, 1,
7368 &alpha_use, a_use, B_use,
7369 seed, head_r_true,
7370 tail_r_true);
7371
7372 /* vary incx = 1, 2 */
7373 for (incx_val = INCX_START; incx_val <= INCX_END;
7374 incx_val++) {
7375
7376 incx = incx_val;
7377 if (0 == incx)
7378 continue;
7379
7380 scopy_vector(x_vec, n_i, 1, x, incx);
7381
7382 /* vary incy = 1, 2 */
7383 for (incy_val = INCY_START; incy_val <= INCY_END;
7384 incy_val++) {
7385
7386 incy = incy_val;
7387 if (0 == incy)
7388 continue;
7389
7390 test_count++;
7391
7392 /* call ge_sum_mv routines to be tested */
7393 FPU_FIX_STOP;
7394 BLAS_dge_sum_mv_d_s_x(order_type,
7395 m, n, alpha, a, lda, x, incx,
7396 beta, B, ldb, y, incy, prec);
7397 FPU_FIX_START;
7398
7399 /* now compute the ratio using test_BLAS_xdot */
7400 /* copy a row from A, use x, run
7401 dot test */
7402
7403 incyi = incy;
7404
7405 incri = 1;
7406 incx_veci = 1;
7407
7408
7409
7410 if (incy < 0) {
7411 y_starti = (-m_i + 1) * incyi;
7412 } else {
7413 y_starti = 0;
7414 }
7415 /* make two copies of x into x_vec. redundant */
7416 scopy_vector(x, n_i, incx, x_vec, 1);
7417 scopy_vector(x, n_i, incx,
7418 (x_vec + (n_i * incx_veci)), 1);
7419 for (i = 0, yi = y_starti, ri = 0; i < m_i;
7420 i++, yi += incyi, ri += incri) {
7421 dge_copy_row(order_type, blas_no_trans, m_i, n_i,
7422 a_use, lda, a_vec, i);
7423 dge_copy_row(order_type, blas_no_trans, m_i, n_i,
7424 B_use, ldb, (a_vec + inca_veci * n_i),
7425 i);
7426
7427 rin = 0.0;
7428 rout = y[yi];
7429 head_r_true_elem = head_r_true[ri];
7430 tail_r_true_elem = tail_r_true[ri];
7431
7432 test_BLAS_ddot_d_s(2 * n_i,
7433 blas_no_conj,
7434 alpha_use, beta_zero_fake, rin,
7435 rout, head_r_true_elem,
7436 tail_r_true_elem, a_vec, 1,
7437 x_vec, 1, eps_int, un_int,
7438 &ratios[i]);
7439
7440 /* take the max ratio */
7441 if (i == 0) {
7442 ratio = ratios[0];
7443 /* The !<= below causes NaN errors
7444 * to be included.
7445 * Note that (NaN > 0) is false */
7446 } else if (!(ratios[i] <= ratio)) {
7447 ratio = ratios[i];
7448 }
7449 } /* end of dot-test loop */
7450
7451 /* The !<= below causes NaN errors
7452 * to be included.
7453 * Note that (NaN > 0) is false */
7454 if (!(ratio <= thresh)) {
7455
7456 if (debug == 3) {
7457 printf("\n\t\tTest # %d\n", test_count);
7458 printf("y type : d, a type : d, x type : s\n");
7459 printf("Seed = %d\t", saved_seed);
7460 printf("n %d, m %d\n", n, m);
7461 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
7462 ldb, incx, incx);
7463
7464 if (order_type == blas_rowmajor)
7465 printf("row ");
7466 else
7467 printf("col ");
7468
7469 printf("NORM %d, ALPHA %d, BETA %d\n",
7470 norm, alpha_val, beta_val);
7471 printf("randomize %d\n", randomize_val);
7472
7473 /* print out info */
7474 printf("alpha = ");
7475 printf("%24.16e", alpha);;
7476 printf(" ");
7477 printf("beta = ");
7478 printf("%24.16e", beta);;
7479 printf("\n");
7480 printf("alpha_use = ");
7481 printf("%24.16e", alpha_use);;
7482 printf("\n");
7483
7484 dge_print_matrix(a, m_i, n_i, lda, order_type,
7485 "A");
7486 dge_print_matrix(B, m_i, n_i, ldb, order_type,
7487 "B");
7488 sprint_vector(x, n_i, incx, "x");
7489
7490 dprint_vector(y, m_i, incy, "y");
7491
7492 dprint_vector(head_r_true, m_i, 1, "head_r_true");
7493
7494 dge_print_matrix(a_use, m_i, n_i, lda, order_type,
7495 "A_use");
7496 dge_print_matrix(B_use, m_i, n_i, ldb, order_type,
7497 "B_use");
7498
7499 dprint_vector(ratios, m_i, 1, "ratios");
7500 printf("ratio = %g\n", ratio);
7501 fflush(stdout);
7502 }
7503 bad_ratio_count++;
7504 if (bad_ratio_count >= MAX_BAD_TESTS) {
7505 printf("\ntoo many failures, exiting....");
7506 printf("\nTesting and compilation");
7507 printf(" are incomplete\n\n");
7508 goto end;
7509 }
7510 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
7511 printf("\nFlagrant ratio error, exiting...");
7512 printf("\nTesting and compilation");
7513 printf(" are incomplete\n\n");
7514 goto end;
7515 }
7516 }
7517
7518 if (!(ratio <= ratio_max))
7519 ratio_max = ratio;
7520
7521 if (ratio != 0.0 && !(ratio >= ratio_min))
7522 ratio_min = ratio;
7523
7524 } /* end of incy loop */
7525
7526 } /* end of incx loop */
7527
7528 } /* end of randmize loop */
7529
7530 } /* end of ldb loop */
7531
7532 } /* end of lda loop */
7533
7534 } /* end of order loop */
7535
7536 } /* end of nr test loop */
7537
7538 } /* end of norm loop */
7539
7540
7541 } /* end of prec loop */
7542
7543 } /* end of beta loop */
7544
7545 } /* end of alpha loop */
7546
7547 FPU_FIX_STOP;
7548
7549 end:
7550 blas_free(y);
7551 blas_free(a);
7552 blas_free(a_use);
7553 blas_free(B);
7554 blas_free(B_use);
7555 blas_free(x);
7556 blas_free(head_r_true);
7557 blas_free(tail_r_true);
7558 blas_free(ratios);
7559 blas_free(a_vec);
7560 blas_free(x_vec);
7561
7562 *max_ratio = ratio_max;
7563 *min_ratio = ratio_min;
7564 *num_tests = test_count;
7565 *num_bad_ratio = bad_ratio_count;
7566
7567 }
do_test_dge_sum_mv_s_d_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)7568 void do_test_dge_sum_mv_s_d_x
7569 (int m, int n,
7570 int ntests, int *seed, double thresh, int debug, float test_prob,
7571 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
7572
7573 /* Function name */
7574 const char fname[] = "BLAS_dge_sum_mv_s_d_x";
7575
7576 int i;
7577 int yi;
7578 int incyi, y_starti, incx_veci;
7579 int test_count;
7580 int bad_ratio_count;
7581
7582 int ri;
7583 int incri;
7584 int inca, incx, incy;
7585
7586 double ratio;
7587
7588 double ratio_min, ratio_max;
7589
7590 double eps_int; /* internal machine epsilon */
7591 double un_int; /* internal underflow threshold */
7592
7593 double rin;
7594 double rout;
7595 double head_r_true_elem, tail_r_true_elem;
7596
7597 enum blas_order_type order_type;
7598 enum blas_prec_type prec;
7599
7600 int order_val;
7601 int lda_val, incx_val, incy_val;
7602 int ldb_val;
7603 int alpha_val, beta_val;
7604 int randomize_val;
7605
7606 int prec_val;
7607
7608 int lda, ldb;
7609 int alpha_flag, beta_flag;
7610 int saved_seed;
7611 int norm;
7612 int test_no;
7613
7614 int n_i, m_i;
7615 int inca_veci;
7616
7617 double alpha;
7618 double beta;
7619 double beta_zero_fake;
7620 double alpha_use;
7621 float *a;
7622 float *a_use;
7623 float *B;
7624 float *B_use;
7625 double *x;
7626 double *y;
7627 float *a_vec;
7628 double *x_vec;
7629
7630
7631 double *ratios;
7632
7633 /* true result calculated by testgen, in double-double */
7634 double *head_r_true, *tail_r_true;
7635
7636 FPU_FIX_DECL;
7637
7638 beta_zero_fake = 0.0;
7639
7640 if (n < 0 || ntests < 0)
7641 BLAS_error(fname, -3, n, NULL);
7642
7643 /* initialization */
7644 saved_seed = *seed;
7645 ratio = 0.0;
7646 ratio_min = 1e308;
7647 ratio_max = 0.0;
7648
7649 *num_tests = 0;
7650 *num_bad_ratio = 0;
7651 *min_ratio = 0.0;
7652 *max_ratio = 0.0;
7653
7654 if (n == 0)
7655 return;
7656
7657 FPU_FIX_START;
7658
7659 n_i = n;
7660 m_i = m;
7661
7662 inca = incx = incy = 1;
7663
7664
7665
7666
7667 /* allocate memory for arrays */
7668 y = (double *) blas_malloc(4 * m_i * sizeof(double));
7669 if (4 * m_i > 0 && y == NULL) {
7670 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7671 }
7672 a = (float *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(float));
7673 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
7674 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7675 }
7676 a_use = (float *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(float));
7677 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
7678 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7679 }
7680 B = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float));
7681 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
7682 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7683 }
7684 B_use = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float));
7685 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
7686 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7687 }
7688 x = (double *) blas_malloc(4 * n_i * sizeof(double));
7689 if (4 * n_i > 0 && x == NULL) {
7690 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7691 }
7692
7693 inca_veci = 1;
7694
7695 a_vec = (float *) blas_malloc(2 * n_i * sizeof(float));
7696 if (2 * n_i > 0 && a_vec == NULL) {
7697 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7698 }
7699 x_vec = (double *) blas_malloc(2 * n_i * sizeof(double));
7700 if (2 * n_i > 0 && x_vec == NULL) {
7701 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7702 }
7703 head_r_true = (double *) blas_malloc(m_i * sizeof(double));
7704 tail_r_true = (double *) blas_malloc(m_i * sizeof(double));
7705 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
7706 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7707 }
7708 ratios = (double *) blas_malloc(m_i * sizeof(double));
7709 if (m_i > 0 && ratios == NULL) {
7710 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7711 }
7712
7713 test_count = 0;
7714 bad_ratio_count = 0;
7715
7716 /* vary alpha */
7717 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
7718
7719 alpha_flag = 0;
7720 switch (alpha_val) {
7721 case 0:
7722 alpha = 0.0;
7723 alpha_flag = 1;
7724 break;
7725 case 1:
7726 alpha = 1.0;
7727 alpha_flag = 1;
7728 break;
7729 }
7730
7731 /* vary beta */
7732 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
7733 beta_flag = 0;
7734 switch (beta_val) {
7735 case 0:
7736 beta = 0.0;
7737 beta_flag = 1;
7738 break;
7739 case 1:
7740 beta = 1.0;
7741 beta_flag = 1;
7742 break;
7743 }
7744
7745
7746 /* varying extra precs */
7747 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
7748 switch (prec_val) {
7749 case 0:
7750 eps_int = power(2, -BITS_D);
7751 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
7752 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
7753 prec = blas_prec_double;
7754 break;
7755 case 1:
7756 eps_int = power(2, -BITS_D);
7757 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
7758 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
7759 prec = blas_prec_double;
7760 break;
7761 case 2:
7762 default:
7763 eps_int = power(2, -BITS_E);
7764 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
7765 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
7766 prec = blas_prec_extra;
7767 break;
7768 }
7769
7770 /* vary norm -- underflow, approx 1, overflow */
7771 for (norm = NORM_START; norm <= NORM_END; norm++) {
7772
7773 /* number of tests */
7774 for (test_no = 0; test_no < ntests; test_no++) {
7775
7776
7777 /* vary storage format */
7778 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
7779
7780 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
7781
7782 /* vary lda = n_i, n_i+1, 2*n_i */
7783 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
7784
7785 if (order_type == blas_rowmajor) {
7786 lda = (lda_val == 0) ? n_i :
7787 (lda_val == 1) ? n_i + 1 : n_i * n_i;
7788 } else {
7789 lda = (lda_val == 0) ? m_i :
7790 (lda_val == 1) ? m_i + 1 : m_i * m_i;
7791 }
7792
7793 /* vary ldb = n_i, n_i+1, 2*n_i */
7794 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
7795
7796 if (order_type == blas_rowmajor) {
7797 ldb = (ldb_val == 0) ? n_i :
7798 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
7799 } else {
7800 ldb = (ldb_val == 0) ? m_i :
7801 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
7802 }
7803
7804 for (randomize_val = RANDOMIZE_START;
7805 randomize_val <= RANDOMIZE_END; randomize_val++) {
7806
7807 /* For the sake of speed, we throw out this case at random */
7808 if (xrand(seed) >= test_prob)
7809 continue;
7810
7811 /* finally we are here to generate the test case */
7812 /* alpha_use, a_use, B_use are the generated alpha, a, B
7813 * before any scaling.
7814 * That is, in the generator, alpha == beta == alpha_use
7815 * before scaling. */
7816
7817 saved_seed = *seed;
7818 BLAS_dge_sum_mv_s_d_testgen(norm, order_type,
7819 m, n, randomize_val, &alpha,
7820 alpha_flag, &beta, beta_flag,
7821 a, lda, B, ldb, x_vec, 1,
7822 &alpha_use, a_use, B_use,
7823 seed, head_r_true,
7824 tail_r_true);
7825
7826 /* vary incx = 1, 2 */
7827 for (incx_val = INCX_START; incx_val <= INCX_END;
7828 incx_val++) {
7829
7830 incx = incx_val;
7831 if (0 == incx)
7832 continue;
7833
7834 dcopy_vector(x_vec, n_i, 1, x, incx);
7835
7836 /* vary incy = 1, 2 */
7837 for (incy_val = INCY_START; incy_val <= INCY_END;
7838 incy_val++) {
7839
7840 incy = incy_val;
7841 if (0 == incy)
7842 continue;
7843
7844 test_count++;
7845
7846 /* call ge_sum_mv routines to be tested */
7847 FPU_FIX_STOP;
7848 BLAS_dge_sum_mv_s_d_x(order_type,
7849 m, n, alpha, a, lda, x, incx,
7850 beta, B, ldb, y, incy, prec);
7851 FPU_FIX_START;
7852
7853 /* now compute the ratio using test_BLAS_xdot */
7854 /* copy a row from A, use x, run
7855 dot test */
7856
7857 incyi = incy;
7858
7859 incri = 1;
7860 incx_veci = 1;
7861
7862
7863
7864 if (incy < 0) {
7865 y_starti = (-m_i + 1) * incyi;
7866 } else {
7867 y_starti = 0;
7868 }
7869 /* make two copies of x into x_vec. redundant */
7870 dcopy_vector(x, n_i, incx, x_vec, 1);
7871 dcopy_vector(x, n_i, incx,
7872 (x_vec + (n_i * incx_veci)), 1);
7873 for (i = 0, yi = y_starti, ri = 0; i < m_i;
7874 i++, yi += incyi, ri += incri) {
7875 sge_copy_row(order_type, blas_no_trans, m_i, n_i,
7876 a_use, lda, a_vec, i);
7877 sge_copy_row(order_type, blas_no_trans, m_i, n_i,
7878 B_use, ldb, (a_vec + inca_veci * n_i),
7879 i);
7880
7881 rin = 0.0;
7882 rout = y[yi];
7883 head_r_true_elem = head_r_true[ri];
7884 tail_r_true_elem = tail_r_true[ri];
7885
7886 test_BLAS_ddot_s_d(2 * n_i,
7887 blas_no_conj,
7888 alpha_use, beta_zero_fake, rin,
7889 rout, head_r_true_elem,
7890 tail_r_true_elem, a_vec, 1,
7891 x_vec, 1, eps_int, un_int,
7892 &ratios[i]);
7893
7894 /* take the max ratio */
7895 if (i == 0) {
7896 ratio = ratios[0];
7897 /* The !<= below causes NaN errors
7898 * to be included.
7899 * Note that (NaN > 0) is false */
7900 } else if (!(ratios[i] <= ratio)) {
7901 ratio = ratios[i];
7902 }
7903 } /* end of dot-test loop */
7904
7905 /* The !<= below causes NaN errors
7906 * to be included.
7907 * Note that (NaN > 0) is false */
7908 if (!(ratio <= thresh)) {
7909
7910 if (debug == 3) {
7911 printf("\n\t\tTest # %d\n", test_count);
7912 printf("y type : d, a type : s, x type : d\n");
7913 printf("Seed = %d\t", saved_seed);
7914 printf("n %d, m %d\n", n, m);
7915 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
7916 ldb, incx, incx);
7917
7918 if (order_type == blas_rowmajor)
7919 printf("row ");
7920 else
7921 printf("col ");
7922
7923 printf("NORM %d, ALPHA %d, BETA %d\n",
7924 norm, alpha_val, beta_val);
7925 printf("randomize %d\n", randomize_val);
7926
7927 /* print out info */
7928 printf("alpha = ");
7929 printf("%24.16e", alpha);;
7930 printf(" ");
7931 printf("beta = ");
7932 printf("%24.16e", beta);;
7933 printf("\n");
7934 printf("alpha_use = ");
7935 printf("%24.16e", alpha_use);;
7936 printf("\n");
7937
7938 sge_print_matrix(a, m_i, n_i, lda, order_type,
7939 "A");
7940 sge_print_matrix(B, m_i, n_i, ldb, order_type,
7941 "B");
7942 dprint_vector(x, n_i, incx, "x");
7943
7944 dprint_vector(y, m_i, incy, "y");
7945
7946 dprint_vector(head_r_true, m_i, 1, "head_r_true");
7947
7948 sge_print_matrix(a_use, m_i, n_i, lda, order_type,
7949 "A_use");
7950 sge_print_matrix(B_use, m_i, n_i, ldb, order_type,
7951 "B_use");
7952
7953 dprint_vector(ratios, m_i, 1, "ratios");
7954 printf("ratio = %g\n", ratio);
7955 fflush(stdout);
7956 }
7957 bad_ratio_count++;
7958 if (bad_ratio_count >= MAX_BAD_TESTS) {
7959 printf("\ntoo many failures, exiting....");
7960 printf("\nTesting and compilation");
7961 printf(" are incomplete\n\n");
7962 goto end;
7963 }
7964 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
7965 printf("\nFlagrant ratio error, exiting...");
7966 printf("\nTesting and compilation");
7967 printf(" are incomplete\n\n");
7968 goto end;
7969 }
7970 }
7971
7972 if (!(ratio <= ratio_max))
7973 ratio_max = ratio;
7974
7975 if (ratio != 0.0 && !(ratio >= ratio_min))
7976 ratio_min = ratio;
7977
7978 } /* end of incy loop */
7979
7980 } /* end of incx loop */
7981
7982 } /* end of randmize loop */
7983
7984 } /* end of ldb loop */
7985
7986 } /* end of lda loop */
7987
7988 } /* end of order loop */
7989
7990 } /* end of nr test loop */
7991
7992 } /* end of norm loop */
7993
7994
7995 } /* end of prec loop */
7996
7997 } /* end of beta loop */
7998
7999 } /* end of alpha loop */
8000
8001 FPU_FIX_STOP;
8002
8003 end:
8004 blas_free(y);
8005 blas_free(a);
8006 blas_free(a_use);
8007 blas_free(B);
8008 blas_free(B_use);
8009 blas_free(x);
8010 blas_free(head_r_true);
8011 blas_free(tail_r_true);
8012 blas_free(ratios);
8013 blas_free(a_vec);
8014 blas_free(x_vec);
8015
8016 *max_ratio = ratio_max;
8017 *min_ratio = ratio_min;
8018 *num_tests = test_count;
8019 *num_bad_ratio = bad_ratio_count;
8020
8021 }
do_test_dge_sum_mv_s_s_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)8022 void do_test_dge_sum_mv_s_s_x
8023 (int m, int n,
8024 int ntests, int *seed, double thresh, int debug, float test_prob,
8025 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
8026
8027 /* Function name */
8028 const char fname[] = "BLAS_dge_sum_mv_s_s_x";
8029
8030 int i;
8031 int yi;
8032 int incyi, y_starti, incx_veci;
8033 int test_count;
8034 int bad_ratio_count;
8035
8036 int ri;
8037 int incri;
8038 int inca, incx, incy;
8039
8040 double ratio;
8041
8042 double ratio_min, ratio_max;
8043
8044 double eps_int; /* internal machine epsilon */
8045 double un_int; /* internal underflow threshold */
8046
8047 double rin;
8048 double rout;
8049 double head_r_true_elem, tail_r_true_elem;
8050
8051 enum blas_order_type order_type;
8052 enum blas_prec_type prec;
8053
8054 int order_val;
8055 int lda_val, incx_val, incy_val;
8056 int ldb_val;
8057 int alpha_val, beta_val;
8058 int randomize_val;
8059
8060 int prec_val;
8061
8062 int lda, ldb;
8063 int alpha_flag, beta_flag;
8064 int saved_seed;
8065 int norm;
8066 int test_no;
8067
8068 int n_i, m_i;
8069 int inca_veci;
8070
8071 double alpha;
8072 double beta;
8073 double beta_zero_fake;
8074 double alpha_use;
8075 float *a;
8076 float *a_use;
8077 float *B;
8078 float *B_use;
8079 float *x;
8080 double *y;
8081 float *a_vec;
8082 float *x_vec;
8083
8084
8085 double *ratios;
8086
8087 /* true result calculated by testgen, in double-double */
8088 double *head_r_true, *tail_r_true;
8089
8090 FPU_FIX_DECL;
8091
8092 beta_zero_fake = 0.0;
8093
8094 if (n < 0 || ntests < 0)
8095 BLAS_error(fname, -3, n, NULL);
8096
8097 /* initialization */
8098 saved_seed = *seed;
8099 ratio = 0.0;
8100 ratio_min = 1e308;
8101 ratio_max = 0.0;
8102
8103 *num_tests = 0;
8104 *num_bad_ratio = 0;
8105 *min_ratio = 0.0;
8106 *max_ratio = 0.0;
8107
8108 if (n == 0)
8109 return;
8110
8111 FPU_FIX_START;
8112
8113 n_i = n;
8114 m_i = m;
8115
8116 inca = incx = incy = 1;
8117
8118
8119
8120
8121 /* allocate memory for arrays */
8122 y = (double *) blas_malloc(4 * m_i * sizeof(double));
8123 if (4 * m_i > 0 && y == NULL) {
8124 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8125 }
8126 a = (float *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(float));
8127 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
8128 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8129 }
8130 a_use = (float *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(float));
8131 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
8132 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8133 }
8134 B = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float));
8135 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
8136 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8137 }
8138 B_use = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float));
8139 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
8140 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8141 }
8142 x = (float *) blas_malloc(4 * n_i * sizeof(float));
8143 if (4 * n_i > 0 && x == NULL) {
8144 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8145 }
8146
8147 inca_veci = 1;
8148
8149 a_vec = (float *) blas_malloc(2 * n_i * sizeof(float));
8150 if (2 * n_i > 0 && a_vec == NULL) {
8151 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8152 }
8153 x_vec = (float *) blas_malloc(2 * n_i * sizeof(float));
8154 if (2 * n_i > 0 && x_vec == NULL) {
8155 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8156 }
8157 head_r_true = (double *) blas_malloc(m_i * sizeof(double));
8158 tail_r_true = (double *) blas_malloc(m_i * sizeof(double));
8159 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
8160 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8161 }
8162 ratios = (double *) blas_malloc(m_i * sizeof(double));
8163 if (m_i > 0 && ratios == NULL) {
8164 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8165 }
8166
8167 test_count = 0;
8168 bad_ratio_count = 0;
8169
8170 /* vary alpha */
8171 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
8172
8173 alpha_flag = 0;
8174 switch (alpha_val) {
8175 case 0:
8176 alpha = 0.0;
8177 alpha_flag = 1;
8178 break;
8179 case 1:
8180 alpha = 1.0;
8181 alpha_flag = 1;
8182 break;
8183 }
8184
8185 /* vary beta */
8186 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
8187 beta_flag = 0;
8188 switch (beta_val) {
8189 case 0:
8190 beta = 0.0;
8191 beta_flag = 1;
8192 break;
8193 case 1:
8194 beta = 1.0;
8195 beta_flag = 1;
8196 break;
8197 }
8198
8199
8200 /* varying extra precs */
8201 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
8202 switch (prec_val) {
8203 case 0:
8204 eps_int = power(2, -BITS_D);
8205 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
8206 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
8207 prec = blas_prec_double;
8208 break;
8209 case 1:
8210 eps_int = power(2, -BITS_D);
8211 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
8212 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
8213 prec = blas_prec_double;
8214 break;
8215 case 2:
8216 default:
8217 eps_int = power(2, -BITS_E);
8218 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
8219 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
8220 prec = blas_prec_extra;
8221 break;
8222 }
8223
8224 /* vary norm -- underflow, approx 1, overflow */
8225 for (norm = NORM_START; norm <= NORM_END; norm++) {
8226
8227 /* number of tests */
8228 for (test_no = 0; test_no < ntests; test_no++) {
8229
8230
8231 /* vary storage format */
8232 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
8233
8234 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
8235
8236 /* vary lda = n_i, n_i+1, 2*n_i */
8237 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
8238
8239 if (order_type == blas_rowmajor) {
8240 lda = (lda_val == 0) ? n_i :
8241 (lda_val == 1) ? n_i + 1 : n_i * n_i;
8242 } else {
8243 lda = (lda_val == 0) ? m_i :
8244 (lda_val == 1) ? m_i + 1 : m_i * m_i;
8245 }
8246
8247 /* vary ldb = n_i, n_i+1, 2*n_i */
8248 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
8249
8250 if (order_type == blas_rowmajor) {
8251 ldb = (ldb_val == 0) ? n_i :
8252 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
8253 } else {
8254 ldb = (ldb_val == 0) ? m_i :
8255 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
8256 }
8257
8258 for (randomize_val = RANDOMIZE_START;
8259 randomize_val <= RANDOMIZE_END; randomize_val++) {
8260
8261 /* For the sake of speed, we throw out this case at random */
8262 if (xrand(seed) >= test_prob)
8263 continue;
8264
8265 /* finally we are here to generate the test case */
8266 /* alpha_use, a_use, B_use are the generated alpha, a, B
8267 * before any scaling.
8268 * That is, in the generator, alpha == beta == alpha_use
8269 * before scaling. */
8270
8271 saved_seed = *seed;
8272 BLAS_dge_sum_mv_s_s_testgen(norm, order_type,
8273 m, n, randomize_val, &alpha,
8274 alpha_flag, &beta, beta_flag,
8275 a, lda, B, ldb, x_vec, 1,
8276 &alpha_use, a_use, B_use,
8277 seed, head_r_true,
8278 tail_r_true);
8279
8280 /* vary incx = 1, 2 */
8281 for (incx_val = INCX_START; incx_val <= INCX_END;
8282 incx_val++) {
8283
8284 incx = incx_val;
8285 if (0 == incx)
8286 continue;
8287
8288 scopy_vector(x_vec, n_i, 1, x, incx);
8289
8290 /* vary incy = 1, 2 */
8291 for (incy_val = INCY_START; incy_val <= INCY_END;
8292 incy_val++) {
8293
8294 incy = incy_val;
8295 if (0 == incy)
8296 continue;
8297
8298 test_count++;
8299
8300 /* call ge_sum_mv routines to be tested */
8301 FPU_FIX_STOP;
8302 BLAS_dge_sum_mv_s_s_x(order_type,
8303 m, n, alpha, a, lda, x, incx,
8304 beta, B, ldb, y, incy, prec);
8305 FPU_FIX_START;
8306
8307 /* now compute the ratio using test_BLAS_xdot */
8308 /* copy a row from A, use x, run
8309 dot test */
8310
8311 incyi = incy;
8312
8313 incri = 1;
8314 incx_veci = 1;
8315
8316
8317
8318 if (incy < 0) {
8319 y_starti = (-m_i + 1) * incyi;
8320 } else {
8321 y_starti = 0;
8322 }
8323 /* make two copies of x into x_vec. redundant */
8324 scopy_vector(x, n_i, incx, x_vec, 1);
8325 scopy_vector(x, n_i, incx,
8326 (x_vec + (n_i * incx_veci)), 1);
8327 for (i = 0, yi = y_starti, ri = 0; i < m_i;
8328 i++, yi += incyi, ri += incri) {
8329 sge_copy_row(order_type, blas_no_trans, m_i, n_i,
8330 a_use, lda, a_vec, i);
8331 sge_copy_row(order_type, blas_no_trans, m_i, n_i,
8332 B_use, ldb, (a_vec + inca_veci * n_i),
8333 i);
8334
8335 rin = 0.0;
8336 rout = y[yi];
8337 head_r_true_elem = head_r_true[ri];
8338 tail_r_true_elem = tail_r_true[ri];
8339
8340 test_BLAS_ddot_s_s(2 * n_i,
8341 blas_no_conj,
8342 alpha_use, beta_zero_fake, rin,
8343 rout, head_r_true_elem,
8344 tail_r_true_elem, a_vec, 1,
8345 x_vec, 1, eps_int, un_int,
8346 &ratios[i]);
8347
8348 /* take the max ratio */
8349 if (i == 0) {
8350 ratio = ratios[0];
8351 /* The !<= below causes NaN errors
8352 * to be included.
8353 * Note that (NaN > 0) is false */
8354 } else if (!(ratios[i] <= ratio)) {
8355 ratio = ratios[i];
8356 }
8357 } /* end of dot-test loop */
8358
8359 /* The !<= below causes NaN errors
8360 * to be included.
8361 * Note that (NaN > 0) is false */
8362 if (!(ratio <= thresh)) {
8363
8364 if (debug == 3) {
8365 printf("\n\t\tTest # %d\n", test_count);
8366 printf("y type : d, a type : s, x type : s\n");
8367 printf("Seed = %d\t", saved_seed);
8368 printf("n %d, m %d\n", n, m);
8369 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
8370 ldb, incx, incx);
8371
8372 if (order_type == blas_rowmajor)
8373 printf("row ");
8374 else
8375 printf("col ");
8376
8377 printf("NORM %d, ALPHA %d, BETA %d\n",
8378 norm, alpha_val, beta_val);
8379 printf("randomize %d\n", randomize_val);
8380
8381 /* print out info */
8382 printf("alpha = ");
8383 printf("%24.16e", alpha);;
8384 printf(" ");
8385 printf("beta = ");
8386 printf("%24.16e", beta);;
8387 printf("\n");
8388 printf("alpha_use = ");
8389 printf("%24.16e", alpha_use);;
8390 printf("\n");
8391
8392 sge_print_matrix(a, m_i, n_i, lda, order_type,
8393 "A");
8394 sge_print_matrix(B, m_i, n_i, ldb, order_type,
8395 "B");
8396 sprint_vector(x, n_i, incx, "x");
8397
8398 dprint_vector(y, m_i, incy, "y");
8399
8400 dprint_vector(head_r_true, m_i, 1, "head_r_true");
8401
8402 sge_print_matrix(a_use, m_i, n_i, lda, order_type,
8403 "A_use");
8404 sge_print_matrix(B_use, m_i, n_i, ldb, order_type,
8405 "B_use");
8406
8407 dprint_vector(ratios, m_i, 1, "ratios");
8408 printf("ratio = %g\n", ratio);
8409 fflush(stdout);
8410 }
8411 bad_ratio_count++;
8412 if (bad_ratio_count >= MAX_BAD_TESTS) {
8413 printf("\ntoo many failures, exiting....");
8414 printf("\nTesting and compilation");
8415 printf(" are incomplete\n\n");
8416 goto end;
8417 }
8418 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
8419 printf("\nFlagrant ratio error, exiting...");
8420 printf("\nTesting and compilation");
8421 printf(" are incomplete\n\n");
8422 goto end;
8423 }
8424 }
8425
8426 if (!(ratio <= ratio_max))
8427 ratio_max = ratio;
8428
8429 if (ratio != 0.0 && !(ratio >= ratio_min))
8430 ratio_min = ratio;
8431
8432 } /* end of incy loop */
8433
8434 } /* end of incx loop */
8435
8436 } /* end of randmize loop */
8437
8438 } /* end of ldb loop */
8439
8440 } /* end of lda loop */
8441
8442 } /* end of order loop */
8443
8444 } /* end of nr test loop */
8445
8446 } /* end of norm loop */
8447
8448
8449 } /* end of prec loop */
8450
8451 } /* end of beta loop */
8452
8453 } /* end of alpha loop */
8454
8455 FPU_FIX_STOP;
8456
8457 end:
8458 blas_free(y);
8459 blas_free(a);
8460 blas_free(a_use);
8461 blas_free(B);
8462 blas_free(B_use);
8463 blas_free(x);
8464 blas_free(head_r_true);
8465 blas_free(tail_r_true);
8466 blas_free(ratios);
8467 blas_free(a_vec);
8468 blas_free(x_vec);
8469
8470 *max_ratio = ratio_max;
8471 *min_ratio = ratio_min;
8472 *num_tests = test_count;
8473 *num_bad_ratio = bad_ratio_count;
8474
8475 }
do_test_zge_sum_mv_z_c_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)8476 void do_test_zge_sum_mv_z_c_x
8477 (int m, int n,
8478 int ntests, int *seed, double thresh, int debug, float test_prob,
8479 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
8480
8481 /* Function name */
8482 const char fname[] = "BLAS_zge_sum_mv_z_c_x";
8483
8484 int i;
8485 int yi;
8486 int incyi, y_starti, incx_veci;
8487 int test_count;
8488 int bad_ratio_count;
8489
8490 int ri;
8491 int incri;
8492 int inca, incx, incy;
8493
8494 double ratio;
8495
8496 double ratio_min, ratio_max;
8497
8498 double eps_int; /* internal machine epsilon */
8499 double un_int; /* internal underflow threshold */
8500
8501 double rin[2];
8502 double rout[2];
8503 double head_r_true_elem[2], tail_r_true_elem[2];
8504
8505 enum blas_order_type order_type;
8506 enum blas_prec_type prec;
8507
8508 int order_val;
8509 int lda_val, incx_val, incy_val;
8510 int ldb_val;
8511 int alpha_val, beta_val;
8512 int randomize_val;
8513
8514 int prec_val;
8515
8516 int lda, ldb;
8517 int alpha_flag, beta_flag;
8518 int saved_seed;
8519 int norm;
8520 int test_no;
8521
8522 int n_i, m_i;
8523 int inca_veci;
8524
8525 double alpha[2];
8526 double beta[2];
8527 double beta_zero_fake[2];
8528 double alpha_use[2];
8529 double *a;
8530 double *a_use;
8531 double *B;
8532 double *B_use;
8533 float *x;
8534 double *y;
8535 double *a_vec;
8536 float *x_vec;
8537
8538
8539 double *ratios;
8540
8541 /* true result calculated by testgen, in double-double */
8542 double *head_r_true, *tail_r_true;
8543
8544
8545 FPU_FIX_DECL;
8546
8547 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
8548
8549 if (n < 0 || ntests < 0)
8550 BLAS_error(fname, -3, n, NULL);
8551
8552 /* initialization */
8553 saved_seed = *seed;
8554 ratio = 0.0;
8555 ratio_min = 1e308;
8556 ratio_max = 0.0;
8557
8558 *num_tests = 0;
8559 *num_bad_ratio = 0;
8560 *min_ratio = 0.0;
8561 *max_ratio = 0.0;
8562
8563 if (n == 0)
8564 return;
8565
8566 FPU_FIX_START;
8567
8568 n_i = n;
8569 m_i = m;
8570
8571 inca = incx = incy = 1;
8572 inca *= 2;
8573 incx *= 2;
8574 incy *= 2;
8575
8576 /* allocate memory for arrays */
8577 y = (double *) blas_malloc(4 * m_i * sizeof(double) * 2);
8578 if (4 * m_i > 0 && y == NULL) {
8579 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8580 }
8581 a = (double *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(double) * 2);
8582 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
8583 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8584 }
8585 a_use =
8586 (double *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(double) * 2);
8587 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
8588 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8589 }
8590 B = (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double) * 2);
8591 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
8592 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8593 }
8594 B_use =
8595 (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double) * 2);
8596 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
8597 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8598 }
8599 x = (float *) blas_malloc(4 * n_i * sizeof(float) * 2);
8600 if (4 * n_i > 0 && x == NULL) {
8601 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8602 }
8603
8604 inca_veci = 1;
8605 inca_veci *= 2;
8606 a_vec = (double *) blas_malloc(2 * n_i * sizeof(double) * 2);
8607 if (2 * n_i > 0 && a_vec == NULL) {
8608 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8609 }
8610 x_vec = (float *) blas_malloc(2 * n_i * sizeof(float) * 2);
8611 if (2 * n_i > 0 && x_vec == NULL) {
8612 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8613 }
8614 head_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
8615 tail_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
8616 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
8617 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8618 }
8619 ratios = (double *) blas_malloc(m_i * sizeof(double));
8620 if (m_i > 0 && ratios == NULL) {
8621 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8622 }
8623
8624 test_count = 0;
8625 bad_ratio_count = 0;
8626
8627 /* vary alpha */
8628 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
8629
8630 alpha_flag = 0;
8631 switch (alpha_val) {
8632 case 0:
8633 alpha[0] = alpha[1] = 0.0;
8634 alpha_flag = 1;
8635 break;
8636 case 1:
8637 alpha[0] = 1.0;
8638 alpha[1] = 0.0;
8639 alpha_flag = 1;
8640 break;
8641 }
8642
8643 /* vary beta */
8644 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
8645 beta_flag = 0;
8646 switch (beta_val) {
8647 case 0:
8648 beta[0] = beta[1] = 0.0;
8649 beta_flag = 1;
8650 break;
8651 case 1:
8652 beta[0] = 1.0;
8653 beta[1] = 0.0;
8654 beta_flag = 1;
8655 break;
8656 }
8657
8658
8659 /* varying extra precs */
8660 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
8661 switch (prec_val) {
8662 case 0:
8663 eps_int = power(2, -BITS_D);
8664 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
8665 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
8666 prec = blas_prec_double;
8667 break;
8668 case 1:
8669 eps_int = power(2, -BITS_D);
8670 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
8671 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
8672 prec = blas_prec_double;
8673 break;
8674 case 2:
8675 default:
8676 eps_int = power(2, -BITS_E);
8677 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
8678 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
8679 prec = blas_prec_extra;
8680 break;
8681 }
8682
8683 /* vary norm -- underflow, approx 1, overflow */
8684 for (norm = NORM_START; norm <= NORM_END; norm++) {
8685
8686 /* number of tests */
8687 for (test_no = 0; test_no < ntests; test_no++) {
8688
8689
8690 /* vary storage format */
8691 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
8692
8693 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
8694
8695 /* vary lda = n_i, n_i+1, 2*n_i */
8696 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
8697
8698 if (order_type == blas_rowmajor) {
8699 lda = (lda_val == 0) ? n_i :
8700 (lda_val == 1) ? n_i + 1 : n_i * n_i;
8701 } else {
8702 lda = (lda_val == 0) ? m_i :
8703 (lda_val == 1) ? m_i + 1 : m_i * m_i;
8704 }
8705
8706 /* vary ldb = n_i, n_i+1, 2*n_i */
8707 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
8708
8709 if (order_type == blas_rowmajor) {
8710 ldb = (ldb_val == 0) ? n_i :
8711 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
8712 } else {
8713 ldb = (ldb_val == 0) ? m_i :
8714 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
8715 }
8716
8717 for (randomize_val = RANDOMIZE_START;
8718 randomize_val <= RANDOMIZE_END; randomize_val++) {
8719
8720 /* For the sake of speed, we throw out this case at random */
8721 if (xrand(seed) >= test_prob)
8722 continue;
8723
8724 /* finally we are here to generate the test case */
8725 /* alpha_use, a_use, B_use are the generated alpha, a, B
8726 * before any scaling.
8727 * That is, in the generator, alpha == beta == alpha_use
8728 * before scaling. */
8729
8730 saved_seed = *seed;
8731 BLAS_zge_sum_mv_z_c_testgen(norm, order_type,
8732 m, n, randomize_val, &alpha,
8733 alpha_flag, &beta, beta_flag,
8734 a, lda, B, ldb, x_vec, 1,
8735 &alpha_use, a_use, B_use,
8736 seed, head_r_true,
8737 tail_r_true);
8738
8739 /* vary incx = 1, 2 */
8740 for (incx_val = INCX_START; incx_val <= INCX_END;
8741 incx_val++) {
8742
8743 incx = incx_val;
8744 if (0 == incx)
8745 continue;
8746
8747 ccopy_vector(x_vec, n_i, 1, x, incx);
8748
8749 /* vary incy = 1, 2 */
8750 for (incy_val = INCY_START; incy_val <= INCY_END;
8751 incy_val++) {
8752
8753 incy = incy_val;
8754 if (0 == incy)
8755 continue;
8756
8757 test_count++;
8758
8759 /* call ge_sum_mv routines to be tested */
8760 FPU_FIX_STOP;
8761 BLAS_zge_sum_mv_z_c_x(order_type,
8762 m, n, alpha, a, lda, x, incx,
8763 beta, B, ldb, y, incy, prec);
8764 FPU_FIX_START;
8765
8766 /* now compute the ratio using test_BLAS_xdot */
8767 /* copy a row from A, use x, run
8768 dot test */
8769
8770 incyi = incy;
8771
8772 incri = 1;
8773 incx_veci = 1;
8774 incx_veci *= 2;
8775 incyi *= 2;
8776 incri *= 2;
8777 if (incy < 0) {
8778 y_starti = (-m_i + 1) * incyi;
8779 } else {
8780 y_starti = 0;
8781 }
8782 /* make two copies of x into x_vec. redundant */
8783 ccopy_vector(x, n_i, incx, x_vec, 1);
8784 ccopy_vector(x, n_i, incx,
8785 (x_vec + (n_i * incx_veci)), 1);
8786 for (i = 0, yi = y_starti, ri = 0; i < m_i;
8787 i++, yi += incyi, ri += incri) {
8788 zge_copy_row(order_type, blas_no_trans, m_i, n_i,
8789 a_use, lda, a_vec, i);
8790 zge_copy_row(order_type, blas_no_trans, m_i, n_i,
8791 B_use, ldb, (a_vec + inca_veci * n_i),
8792 i);
8793
8794 rin[0] = rin[1] = 0.0;
8795 rout[0] = y[yi];
8796 rout[1] = y[yi + 1];
8797 head_r_true_elem[0] = head_r_true[ri];
8798 head_r_true_elem[1] = head_r_true[ri + 1];
8799 tail_r_true_elem[0] = tail_r_true[ri];
8800 tail_r_true_elem[1] = tail_r_true[ri + 1];
8801
8802 test_BLAS_zdot_z_c(2 * n_i,
8803 blas_no_conj,
8804 alpha_use, beta_zero_fake, rin,
8805 rout, head_r_true_elem,
8806 tail_r_true_elem, a_vec, 1,
8807 x_vec, 1, eps_int, un_int,
8808 &ratios[i]);
8809
8810 /* take the max ratio */
8811 if (i == 0) {
8812 ratio = ratios[0];
8813 /* The !<= below causes NaN errors
8814 * to be included.
8815 * Note that (NaN > 0) is false */
8816 } else if (!(ratios[i] <= ratio)) {
8817 ratio = ratios[i];
8818 }
8819 } /* end of dot-test loop */
8820
8821 /* The !<= below causes NaN errors
8822 * to be included.
8823 * Note that (NaN > 0) is false */
8824 if (!(ratio <= thresh)) {
8825
8826 if (debug == 3) {
8827 printf("\n\t\tTest # %d\n", test_count);
8828 printf("y type : z, a type : z, x type : c\n");
8829 printf("Seed = %d\t", saved_seed);
8830 printf("n %d, m %d\n", n, m);
8831 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
8832 ldb, incx, incx);
8833
8834 if (order_type == blas_rowmajor)
8835 printf("row ");
8836 else
8837 printf("col ");
8838
8839 printf("NORM %d, ALPHA %d, BETA %d\n",
8840 norm, alpha_val, beta_val);
8841 printf("randomize %d\n", randomize_val);
8842
8843 /* print out info */
8844 printf("alpha = ");
8845 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);;
8846 printf(" ");
8847 printf("beta = ");
8848 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
8849 printf("\n");
8850 printf("alpha_use = ");
8851 printf("(%24.16e, %24.16e)", alpha_use[0],
8852 alpha_use[1]);;
8853 printf("\n");
8854
8855 zge_print_matrix(a, m_i, n_i, lda, order_type,
8856 "A");
8857 zge_print_matrix(B, m_i, n_i, ldb, order_type,
8858 "B");
8859 cprint_vector(x, n_i, incx, "x");
8860
8861 zprint_vector(y, m_i, incy, "y");
8862
8863 zprint_vector(head_r_true, m_i, 1, "head_r_true");
8864
8865 zge_print_matrix(a_use, m_i, n_i, lda, order_type,
8866 "A_use");
8867 zge_print_matrix(B_use, m_i, n_i, ldb, order_type,
8868 "B_use");
8869
8870 dprint_vector(ratios, m_i, 1, "ratios");
8871 printf("ratio = %g\n", ratio);
8872 fflush(stdout);
8873 }
8874 bad_ratio_count++;
8875 if (bad_ratio_count >= MAX_BAD_TESTS) {
8876 printf("\ntoo many failures, exiting....");
8877 printf("\nTesting and compilation");
8878 printf(" are incomplete\n\n");
8879 goto end;
8880 }
8881 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
8882 printf("\nFlagrant ratio error, exiting...");
8883 printf("\nTesting and compilation");
8884 printf(" are incomplete\n\n");
8885 goto end;
8886 }
8887 }
8888
8889 if (!(ratio <= ratio_max))
8890 ratio_max = ratio;
8891
8892 if (ratio != 0.0 && !(ratio >= ratio_min))
8893 ratio_min = ratio;
8894
8895 } /* end of incy loop */
8896
8897 } /* end of incx loop */
8898
8899 } /* end of randmize loop */
8900
8901 } /* end of ldb loop */
8902
8903 } /* end of lda loop */
8904
8905 } /* end of order loop */
8906
8907 } /* end of nr test loop */
8908
8909 } /* end of norm loop */
8910
8911
8912 } /* end of prec loop */
8913
8914 } /* end of beta loop */
8915
8916 } /* end of alpha loop */
8917
8918 FPU_FIX_STOP;
8919
8920 end:
8921 blas_free(y);
8922 blas_free(a);
8923 blas_free(a_use);
8924 blas_free(B);
8925 blas_free(B_use);
8926 blas_free(x);
8927 blas_free(head_r_true);
8928 blas_free(tail_r_true);
8929 blas_free(ratios);
8930 blas_free(a_vec);
8931 blas_free(x_vec);
8932
8933 *max_ratio = ratio_max;
8934 *min_ratio = ratio_min;
8935 *num_tests = test_count;
8936 *num_bad_ratio = bad_ratio_count;
8937
8938 }
do_test_zge_sum_mv_c_z_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)8939 void do_test_zge_sum_mv_c_z_x
8940 (int m, int n,
8941 int ntests, int *seed, double thresh, int debug, float test_prob,
8942 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
8943
8944 /* Function name */
8945 const char fname[] = "BLAS_zge_sum_mv_c_z_x";
8946
8947 int i;
8948 int yi;
8949 int incyi, y_starti, incx_veci;
8950 int test_count;
8951 int bad_ratio_count;
8952
8953 int ri;
8954 int incri;
8955 int inca, incx, incy;
8956
8957 double ratio;
8958
8959 double ratio_min, ratio_max;
8960
8961 double eps_int; /* internal machine epsilon */
8962 double un_int; /* internal underflow threshold */
8963
8964 double rin[2];
8965 double rout[2];
8966 double head_r_true_elem[2], tail_r_true_elem[2];
8967
8968 enum blas_order_type order_type;
8969 enum blas_prec_type prec;
8970
8971 int order_val;
8972 int lda_val, incx_val, incy_val;
8973 int ldb_val;
8974 int alpha_val, beta_val;
8975 int randomize_val;
8976
8977 int prec_val;
8978
8979 int lda, ldb;
8980 int alpha_flag, beta_flag;
8981 int saved_seed;
8982 int norm;
8983 int test_no;
8984
8985 int n_i, m_i;
8986 int inca_veci;
8987
8988 double alpha[2];
8989 double beta[2];
8990 double beta_zero_fake[2];
8991 double alpha_use[2];
8992 float *a;
8993 float *a_use;
8994 float *B;
8995 float *B_use;
8996 double *x;
8997 double *y;
8998 float *a_vec;
8999 double *x_vec;
9000
9001
9002 double *ratios;
9003
9004 /* true result calculated by testgen, in double-double */
9005 double *head_r_true, *tail_r_true;
9006
9007
9008 FPU_FIX_DECL;
9009
9010 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
9011
9012 if (n < 0 || ntests < 0)
9013 BLAS_error(fname, -3, n, NULL);
9014
9015 /* initialization */
9016 saved_seed = *seed;
9017 ratio = 0.0;
9018 ratio_min = 1e308;
9019 ratio_max = 0.0;
9020
9021 *num_tests = 0;
9022 *num_bad_ratio = 0;
9023 *min_ratio = 0.0;
9024 *max_ratio = 0.0;
9025
9026 if (n == 0)
9027 return;
9028
9029 FPU_FIX_START;
9030
9031 n_i = n;
9032 m_i = m;
9033
9034 inca = incx = incy = 1;
9035 inca *= 2;
9036 incx *= 2;
9037 incy *= 2;
9038
9039 /* allocate memory for arrays */
9040 y = (double *) blas_malloc(4 * m_i * sizeof(double) * 2);
9041 if (4 * m_i > 0 && y == NULL) {
9042 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9043 }
9044 a = (float *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(float) * 2);
9045 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
9046 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9047 }
9048 a_use =
9049 (float *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(float) * 2);
9050 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
9051 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9052 }
9053 B = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float) * 2);
9054 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
9055 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9056 }
9057 B_use =
9058 (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float) * 2);
9059 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
9060 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9061 }
9062 x = (double *) blas_malloc(4 * n_i * sizeof(double) * 2);
9063 if (4 * n_i > 0 && x == NULL) {
9064 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9065 }
9066
9067 inca_veci = 1;
9068 inca_veci *= 2;
9069 a_vec = (float *) blas_malloc(2 * n_i * sizeof(float) * 2);
9070 if (2 * n_i > 0 && a_vec == NULL) {
9071 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9072 }
9073 x_vec = (double *) blas_malloc(2 * n_i * sizeof(double) * 2);
9074 if (2 * n_i > 0 && x_vec == NULL) {
9075 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9076 }
9077 head_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
9078 tail_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
9079 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
9080 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9081 }
9082 ratios = (double *) blas_malloc(m_i * sizeof(double));
9083 if (m_i > 0 && ratios == NULL) {
9084 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9085 }
9086
9087 test_count = 0;
9088 bad_ratio_count = 0;
9089
9090 /* vary alpha */
9091 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
9092
9093 alpha_flag = 0;
9094 switch (alpha_val) {
9095 case 0:
9096 alpha[0] = alpha[1] = 0.0;
9097 alpha_flag = 1;
9098 break;
9099 case 1:
9100 alpha[0] = 1.0;
9101 alpha[1] = 0.0;
9102 alpha_flag = 1;
9103 break;
9104 }
9105
9106 /* vary beta */
9107 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
9108 beta_flag = 0;
9109 switch (beta_val) {
9110 case 0:
9111 beta[0] = beta[1] = 0.0;
9112 beta_flag = 1;
9113 break;
9114 case 1:
9115 beta[0] = 1.0;
9116 beta[1] = 0.0;
9117 beta_flag = 1;
9118 break;
9119 }
9120
9121
9122 /* varying extra precs */
9123 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
9124 switch (prec_val) {
9125 case 0:
9126 eps_int = power(2, -BITS_D);
9127 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
9128 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
9129 prec = blas_prec_double;
9130 break;
9131 case 1:
9132 eps_int = power(2, -BITS_D);
9133 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
9134 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
9135 prec = blas_prec_double;
9136 break;
9137 case 2:
9138 default:
9139 eps_int = power(2, -BITS_E);
9140 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
9141 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
9142 prec = blas_prec_extra;
9143 break;
9144 }
9145
9146 /* vary norm -- underflow, approx 1, overflow */
9147 for (norm = NORM_START; norm <= NORM_END; norm++) {
9148
9149 /* number of tests */
9150 for (test_no = 0; test_no < ntests; test_no++) {
9151
9152
9153 /* vary storage format */
9154 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
9155
9156 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
9157
9158 /* vary lda = n_i, n_i+1, 2*n_i */
9159 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
9160
9161 if (order_type == blas_rowmajor) {
9162 lda = (lda_val == 0) ? n_i :
9163 (lda_val == 1) ? n_i + 1 : n_i * n_i;
9164 } else {
9165 lda = (lda_val == 0) ? m_i :
9166 (lda_val == 1) ? m_i + 1 : m_i * m_i;
9167 }
9168
9169 /* vary ldb = n_i, n_i+1, 2*n_i */
9170 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
9171
9172 if (order_type == blas_rowmajor) {
9173 ldb = (ldb_val == 0) ? n_i :
9174 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
9175 } else {
9176 ldb = (ldb_val == 0) ? m_i :
9177 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
9178 }
9179
9180 for (randomize_val = RANDOMIZE_START;
9181 randomize_val <= RANDOMIZE_END; randomize_val++) {
9182
9183 /* For the sake of speed, we throw out this case at random */
9184 if (xrand(seed) >= test_prob)
9185 continue;
9186
9187 /* finally we are here to generate the test case */
9188 /* alpha_use, a_use, B_use are the generated alpha, a, B
9189 * before any scaling.
9190 * That is, in the generator, alpha == beta == alpha_use
9191 * before scaling. */
9192
9193 saved_seed = *seed;
9194 BLAS_zge_sum_mv_c_z_testgen(norm, order_type,
9195 m, n, randomize_val, &alpha,
9196 alpha_flag, &beta, beta_flag,
9197 a, lda, B, ldb, x_vec, 1,
9198 &alpha_use, a_use, B_use,
9199 seed, head_r_true,
9200 tail_r_true);
9201
9202 /* vary incx = 1, 2 */
9203 for (incx_val = INCX_START; incx_val <= INCX_END;
9204 incx_val++) {
9205
9206 incx = incx_val;
9207 if (0 == incx)
9208 continue;
9209
9210 zcopy_vector(x_vec, n_i, 1, x, incx);
9211
9212 /* vary incy = 1, 2 */
9213 for (incy_val = INCY_START; incy_val <= INCY_END;
9214 incy_val++) {
9215
9216 incy = incy_val;
9217 if (0 == incy)
9218 continue;
9219
9220 test_count++;
9221
9222 /* call ge_sum_mv routines to be tested */
9223 FPU_FIX_STOP;
9224 BLAS_zge_sum_mv_c_z_x(order_type,
9225 m, n, alpha, a, lda, x, incx,
9226 beta, B, ldb, y, incy, prec);
9227 FPU_FIX_START;
9228
9229 /* now compute the ratio using test_BLAS_xdot */
9230 /* copy a row from A, use x, run
9231 dot test */
9232
9233 incyi = incy;
9234
9235 incri = 1;
9236 incx_veci = 1;
9237 incx_veci *= 2;
9238 incyi *= 2;
9239 incri *= 2;
9240 if (incy < 0) {
9241 y_starti = (-m_i + 1) * incyi;
9242 } else {
9243 y_starti = 0;
9244 }
9245 /* make two copies of x into x_vec. redundant */
9246 zcopy_vector(x, n_i, incx, x_vec, 1);
9247 zcopy_vector(x, n_i, incx,
9248 (x_vec + (n_i * incx_veci)), 1);
9249 for (i = 0, yi = y_starti, ri = 0; i < m_i;
9250 i++, yi += incyi, ri += incri) {
9251 cge_copy_row(order_type, blas_no_trans, m_i, n_i,
9252 a_use, lda, a_vec, i);
9253 cge_copy_row(order_type, blas_no_trans, m_i, n_i,
9254 B_use, ldb, (a_vec + inca_veci * n_i),
9255 i);
9256
9257 rin[0] = rin[1] = 0.0;
9258 rout[0] = y[yi];
9259 rout[1] = y[yi + 1];
9260 head_r_true_elem[0] = head_r_true[ri];
9261 head_r_true_elem[1] = head_r_true[ri + 1];
9262 tail_r_true_elem[0] = tail_r_true[ri];
9263 tail_r_true_elem[1] = tail_r_true[ri + 1];
9264
9265 test_BLAS_zdot_c_z(2 * n_i,
9266 blas_no_conj,
9267 alpha_use, beta_zero_fake, rin,
9268 rout, head_r_true_elem,
9269 tail_r_true_elem, a_vec, 1,
9270 x_vec, 1, eps_int, un_int,
9271 &ratios[i]);
9272
9273 /* take the max ratio */
9274 if (i == 0) {
9275 ratio = ratios[0];
9276 /* The !<= below causes NaN errors
9277 * to be included.
9278 * Note that (NaN > 0) is false */
9279 } else if (!(ratios[i] <= ratio)) {
9280 ratio = ratios[i];
9281 }
9282 } /* end of dot-test loop */
9283
9284 /* The !<= below causes NaN errors
9285 * to be included.
9286 * Note that (NaN > 0) is false */
9287 if (!(ratio <= thresh)) {
9288
9289 if (debug == 3) {
9290 printf("\n\t\tTest # %d\n", test_count);
9291 printf("y type : z, a type : c, x type : z\n");
9292 printf("Seed = %d\t", saved_seed);
9293 printf("n %d, m %d\n", n, m);
9294 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
9295 ldb, incx, incx);
9296
9297 if (order_type == blas_rowmajor)
9298 printf("row ");
9299 else
9300 printf("col ");
9301
9302 printf("NORM %d, ALPHA %d, BETA %d\n",
9303 norm, alpha_val, beta_val);
9304 printf("randomize %d\n", randomize_val);
9305
9306 /* print out info */
9307 printf("alpha = ");
9308 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);;
9309 printf(" ");
9310 printf("beta = ");
9311 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
9312 printf("\n");
9313 printf("alpha_use = ");
9314 printf("(%24.16e, %24.16e)", alpha_use[0],
9315 alpha_use[1]);;
9316 printf("\n");
9317
9318 cge_print_matrix(a, m_i, n_i, lda, order_type,
9319 "A");
9320 cge_print_matrix(B, m_i, n_i, ldb, order_type,
9321 "B");
9322 zprint_vector(x, n_i, incx, "x");
9323
9324 zprint_vector(y, m_i, incy, "y");
9325
9326 zprint_vector(head_r_true, m_i, 1, "head_r_true");
9327
9328 cge_print_matrix(a_use, m_i, n_i, lda, order_type,
9329 "A_use");
9330 cge_print_matrix(B_use, m_i, n_i, ldb, order_type,
9331 "B_use");
9332
9333 dprint_vector(ratios, m_i, 1, "ratios");
9334 printf("ratio = %g\n", ratio);
9335 fflush(stdout);
9336 }
9337 bad_ratio_count++;
9338 if (bad_ratio_count >= MAX_BAD_TESTS) {
9339 printf("\ntoo many failures, exiting....");
9340 printf("\nTesting and compilation");
9341 printf(" are incomplete\n\n");
9342 goto end;
9343 }
9344 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
9345 printf("\nFlagrant ratio error, exiting...");
9346 printf("\nTesting and compilation");
9347 printf(" are incomplete\n\n");
9348 goto end;
9349 }
9350 }
9351
9352 if (!(ratio <= ratio_max))
9353 ratio_max = ratio;
9354
9355 if (ratio != 0.0 && !(ratio >= ratio_min))
9356 ratio_min = ratio;
9357
9358 } /* end of incy loop */
9359
9360 } /* end of incx loop */
9361
9362 } /* end of randmize loop */
9363
9364 } /* end of ldb loop */
9365
9366 } /* end of lda loop */
9367
9368 } /* end of order loop */
9369
9370 } /* end of nr test loop */
9371
9372 } /* end of norm loop */
9373
9374
9375 } /* end of prec loop */
9376
9377 } /* end of beta loop */
9378
9379 } /* end of alpha loop */
9380
9381 FPU_FIX_STOP;
9382
9383 end:
9384 blas_free(y);
9385 blas_free(a);
9386 blas_free(a_use);
9387 blas_free(B);
9388 blas_free(B_use);
9389 blas_free(x);
9390 blas_free(head_r_true);
9391 blas_free(tail_r_true);
9392 blas_free(ratios);
9393 blas_free(a_vec);
9394 blas_free(x_vec);
9395
9396 *max_ratio = ratio_max;
9397 *min_ratio = ratio_min;
9398 *num_tests = test_count;
9399 *num_bad_ratio = bad_ratio_count;
9400
9401 }
do_test_zge_sum_mv_c_c_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)9402 void do_test_zge_sum_mv_c_c_x
9403 (int m, int n,
9404 int ntests, int *seed, double thresh, int debug, float test_prob,
9405 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
9406
9407 /* Function name */
9408 const char fname[] = "BLAS_zge_sum_mv_c_c_x";
9409
9410 int i;
9411 int yi;
9412 int incyi, y_starti, incx_veci;
9413 int test_count;
9414 int bad_ratio_count;
9415
9416 int ri;
9417 int incri;
9418 int inca, incx, incy;
9419
9420 double ratio;
9421
9422 double ratio_min, ratio_max;
9423
9424 double eps_int; /* internal machine epsilon */
9425 double un_int; /* internal underflow threshold */
9426
9427 double rin[2];
9428 double rout[2];
9429 double head_r_true_elem[2], tail_r_true_elem[2];
9430
9431 enum blas_order_type order_type;
9432 enum blas_prec_type prec;
9433
9434 int order_val;
9435 int lda_val, incx_val, incy_val;
9436 int ldb_val;
9437 int alpha_val, beta_val;
9438 int randomize_val;
9439
9440 int prec_val;
9441
9442 int lda, ldb;
9443 int alpha_flag, beta_flag;
9444 int saved_seed;
9445 int norm;
9446 int test_no;
9447
9448 int n_i, m_i;
9449 int inca_veci;
9450
9451 double alpha[2];
9452 double beta[2];
9453 double beta_zero_fake[2];
9454 double alpha_use[2];
9455 float *a;
9456 float *a_use;
9457 float *B;
9458 float *B_use;
9459 float *x;
9460 double *y;
9461 float *a_vec;
9462 float *x_vec;
9463
9464
9465 double *ratios;
9466
9467 /* true result calculated by testgen, in double-double */
9468 double *head_r_true, *tail_r_true;
9469
9470
9471 FPU_FIX_DECL;
9472
9473 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
9474
9475 if (n < 0 || ntests < 0)
9476 BLAS_error(fname, -3, n, NULL);
9477
9478 /* initialization */
9479 saved_seed = *seed;
9480 ratio = 0.0;
9481 ratio_min = 1e308;
9482 ratio_max = 0.0;
9483
9484 *num_tests = 0;
9485 *num_bad_ratio = 0;
9486 *min_ratio = 0.0;
9487 *max_ratio = 0.0;
9488
9489 if (n == 0)
9490 return;
9491
9492 FPU_FIX_START;
9493
9494 n_i = n;
9495 m_i = m;
9496
9497 inca = incx = incy = 1;
9498 inca *= 2;
9499 incx *= 2;
9500 incy *= 2;
9501
9502 /* allocate memory for arrays */
9503 y = (double *) blas_malloc(4 * m_i * sizeof(double) * 2);
9504 if (4 * m_i > 0 && y == NULL) {
9505 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9506 }
9507 a = (float *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(float) * 2);
9508 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
9509 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9510 }
9511 a_use =
9512 (float *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(float) * 2);
9513 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
9514 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9515 }
9516 B = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float) * 2);
9517 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
9518 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9519 }
9520 B_use =
9521 (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float) * 2);
9522 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
9523 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9524 }
9525 x = (float *) blas_malloc(4 * n_i * sizeof(float) * 2);
9526 if (4 * n_i > 0 && x == NULL) {
9527 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9528 }
9529
9530 inca_veci = 1;
9531 inca_veci *= 2;
9532 a_vec = (float *) blas_malloc(2 * n_i * sizeof(float) * 2);
9533 if (2 * n_i > 0 && a_vec == NULL) {
9534 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9535 }
9536 x_vec = (float *) blas_malloc(2 * n_i * sizeof(float) * 2);
9537 if (2 * n_i > 0 && x_vec == NULL) {
9538 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9539 }
9540 head_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
9541 tail_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
9542 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
9543 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9544 }
9545 ratios = (double *) blas_malloc(m_i * sizeof(double));
9546 if (m_i > 0 && ratios == NULL) {
9547 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9548 }
9549
9550 test_count = 0;
9551 bad_ratio_count = 0;
9552
9553 /* vary alpha */
9554 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
9555
9556 alpha_flag = 0;
9557 switch (alpha_val) {
9558 case 0:
9559 alpha[0] = alpha[1] = 0.0;
9560 alpha_flag = 1;
9561 break;
9562 case 1:
9563 alpha[0] = 1.0;
9564 alpha[1] = 0.0;
9565 alpha_flag = 1;
9566 break;
9567 }
9568
9569 /* vary beta */
9570 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
9571 beta_flag = 0;
9572 switch (beta_val) {
9573 case 0:
9574 beta[0] = beta[1] = 0.0;
9575 beta_flag = 1;
9576 break;
9577 case 1:
9578 beta[0] = 1.0;
9579 beta[1] = 0.0;
9580 beta_flag = 1;
9581 break;
9582 }
9583
9584
9585 /* varying extra precs */
9586 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
9587 switch (prec_val) {
9588 case 0:
9589 eps_int = power(2, -BITS_D);
9590 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
9591 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
9592 prec = blas_prec_double;
9593 break;
9594 case 1:
9595 eps_int = power(2, -BITS_D);
9596 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
9597 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
9598 prec = blas_prec_double;
9599 break;
9600 case 2:
9601 default:
9602 eps_int = power(2, -BITS_E);
9603 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
9604 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
9605 prec = blas_prec_extra;
9606 break;
9607 }
9608
9609 /* vary norm -- underflow, approx 1, overflow */
9610 for (norm = NORM_START; norm <= NORM_END; norm++) {
9611
9612 /* number of tests */
9613 for (test_no = 0; test_no < ntests; test_no++) {
9614
9615
9616 /* vary storage format */
9617 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
9618
9619 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
9620
9621 /* vary lda = n_i, n_i+1, 2*n_i */
9622 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
9623
9624 if (order_type == blas_rowmajor) {
9625 lda = (lda_val == 0) ? n_i :
9626 (lda_val == 1) ? n_i + 1 : n_i * n_i;
9627 } else {
9628 lda = (lda_val == 0) ? m_i :
9629 (lda_val == 1) ? m_i + 1 : m_i * m_i;
9630 }
9631
9632 /* vary ldb = n_i, n_i+1, 2*n_i */
9633 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
9634
9635 if (order_type == blas_rowmajor) {
9636 ldb = (ldb_val == 0) ? n_i :
9637 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
9638 } else {
9639 ldb = (ldb_val == 0) ? m_i :
9640 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
9641 }
9642
9643 for (randomize_val = RANDOMIZE_START;
9644 randomize_val <= RANDOMIZE_END; randomize_val++) {
9645
9646 /* For the sake of speed, we throw out this case at random */
9647 if (xrand(seed) >= test_prob)
9648 continue;
9649
9650 /* finally we are here to generate the test case */
9651 /* alpha_use, a_use, B_use are the generated alpha, a, B
9652 * before any scaling.
9653 * That is, in the generator, alpha == beta == alpha_use
9654 * before scaling. */
9655
9656 saved_seed = *seed;
9657 BLAS_zge_sum_mv_c_c_testgen(norm, order_type,
9658 m, n, randomize_val, &alpha,
9659 alpha_flag, &beta, beta_flag,
9660 a, lda, B, ldb, x_vec, 1,
9661 &alpha_use, a_use, B_use,
9662 seed, head_r_true,
9663 tail_r_true);
9664
9665 /* vary incx = 1, 2 */
9666 for (incx_val = INCX_START; incx_val <= INCX_END;
9667 incx_val++) {
9668
9669 incx = incx_val;
9670 if (0 == incx)
9671 continue;
9672
9673 ccopy_vector(x_vec, n_i, 1, x, incx);
9674
9675 /* vary incy = 1, 2 */
9676 for (incy_val = INCY_START; incy_val <= INCY_END;
9677 incy_val++) {
9678
9679 incy = incy_val;
9680 if (0 == incy)
9681 continue;
9682
9683 test_count++;
9684
9685 /* call ge_sum_mv routines to be tested */
9686 FPU_FIX_STOP;
9687 BLAS_zge_sum_mv_c_c_x(order_type,
9688 m, n, alpha, a, lda, x, incx,
9689 beta, B, ldb, y, incy, prec);
9690 FPU_FIX_START;
9691
9692 /* now compute the ratio using test_BLAS_xdot */
9693 /* copy a row from A, use x, run
9694 dot test */
9695
9696 incyi = incy;
9697
9698 incri = 1;
9699 incx_veci = 1;
9700 incx_veci *= 2;
9701 incyi *= 2;
9702 incri *= 2;
9703 if (incy < 0) {
9704 y_starti = (-m_i + 1) * incyi;
9705 } else {
9706 y_starti = 0;
9707 }
9708 /* make two copies of x into x_vec. redundant */
9709 ccopy_vector(x, n_i, incx, x_vec, 1);
9710 ccopy_vector(x, n_i, incx,
9711 (x_vec + (n_i * incx_veci)), 1);
9712 for (i = 0, yi = y_starti, ri = 0; i < m_i;
9713 i++, yi += incyi, ri += incri) {
9714 cge_copy_row(order_type, blas_no_trans, m_i, n_i,
9715 a_use, lda, a_vec, i);
9716 cge_copy_row(order_type, blas_no_trans, m_i, n_i,
9717 B_use, ldb, (a_vec + inca_veci * n_i),
9718 i);
9719
9720 rin[0] = rin[1] = 0.0;
9721 rout[0] = y[yi];
9722 rout[1] = y[yi + 1];
9723 head_r_true_elem[0] = head_r_true[ri];
9724 head_r_true_elem[1] = head_r_true[ri + 1];
9725 tail_r_true_elem[0] = tail_r_true[ri];
9726 tail_r_true_elem[1] = tail_r_true[ri + 1];
9727
9728 test_BLAS_zdot_c_c(2 * n_i,
9729 blas_no_conj,
9730 alpha_use, beta_zero_fake, rin,
9731 rout, head_r_true_elem,
9732 tail_r_true_elem, a_vec, 1,
9733 x_vec, 1, eps_int, un_int,
9734 &ratios[i]);
9735
9736 /* take the max ratio */
9737 if (i == 0) {
9738 ratio = ratios[0];
9739 /* The !<= below causes NaN errors
9740 * to be included.
9741 * Note that (NaN > 0) is false */
9742 } else if (!(ratios[i] <= ratio)) {
9743 ratio = ratios[i];
9744 }
9745 } /* end of dot-test loop */
9746
9747 /* The !<= below causes NaN errors
9748 * to be included.
9749 * Note that (NaN > 0) is false */
9750 if (!(ratio <= thresh)) {
9751
9752 if (debug == 3) {
9753 printf("\n\t\tTest # %d\n", test_count);
9754 printf("y type : z, a type : c, x type : c\n");
9755 printf("Seed = %d\t", saved_seed);
9756 printf("n %d, m %d\n", n, m);
9757 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
9758 ldb, incx, incx);
9759
9760 if (order_type == blas_rowmajor)
9761 printf("row ");
9762 else
9763 printf("col ");
9764
9765 printf("NORM %d, ALPHA %d, BETA %d\n",
9766 norm, alpha_val, beta_val);
9767 printf("randomize %d\n", randomize_val);
9768
9769 /* print out info */
9770 printf("alpha = ");
9771 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);;
9772 printf(" ");
9773 printf("beta = ");
9774 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
9775 printf("\n");
9776 printf("alpha_use = ");
9777 printf("(%24.16e, %24.16e)", alpha_use[0],
9778 alpha_use[1]);;
9779 printf("\n");
9780
9781 cge_print_matrix(a, m_i, n_i, lda, order_type,
9782 "A");
9783 cge_print_matrix(B, m_i, n_i, ldb, order_type,
9784 "B");
9785 cprint_vector(x, n_i, incx, "x");
9786
9787 zprint_vector(y, m_i, incy, "y");
9788
9789 zprint_vector(head_r_true, m_i, 1, "head_r_true");
9790
9791 cge_print_matrix(a_use, m_i, n_i, lda, order_type,
9792 "A_use");
9793 cge_print_matrix(B_use, m_i, n_i, ldb, order_type,
9794 "B_use");
9795
9796 dprint_vector(ratios, m_i, 1, "ratios");
9797 printf("ratio = %g\n", ratio);
9798 fflush(stdout);
9799 }
9800 bad_ratio_count++;
9801 if (bad_ratio_count >= MAX_BAD_TESTS) {
9802 printf("\ntoo many failures, exiting....");
9803 printf("\nTesting and compilation");
9804 printf(" are incomplete\n\n");
9805 goto end;
9806 }
9807 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
9808 printf("\nFlagrant ratio error, exiting...");
9809 printf("\nTesting and compilation");
9810 printf(" are incomplete\n\n");
9811 goto end;
9812 }
9813 }
9814
9815 if (!(ratio <= ratio_max))
9816 ratio_max = ratio;
9817
9818 if (ratio != 0.0 && !(ratio >= ratio_min))
9819 ratio_min = ratio;
9820
9821 } /* end of incy loop */
9822
9823 } /* end of incx loop */
9824
9825 } /* end of randmize loop */
9826
9827 } /* end of ldb loop */
9828
9829 } /* end of lda loop */
9830
9831 } /* end of order loop */
9832
9833 } /* end of nr test loop */
9834
9835 } /* end of norm loop */
9836
9837
9838 } /* end of prec loop */
9839
9840 } /* end of beta loop */
9841
9842 } /* end of alpha loop */
9843
9844 FPU_FIX_STOP;
9845
9846 end:
9847 blas_free(y);
9848 blas_free(a);
9849 blas_free(a_use);
9850 blas_free(B);
9851 blas_free(B_use);
9852 blas_free(x);
9853 blas_free(head_r_true);
9854 blas_free(tail_r_true);
9855 blas_free(ratios);
9856 blas_free(a_vec);
9857 blas_free(x_vec);
9858
9859 *max_ratio = ratio_max;
9860 *min_ratio = ratio_min;
9861 *num_tests = test_count;
9862 *num_bad_ratio = bad_ratio_count;
9863
9864 }
do_test_cge_sum_mv_c_s_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)9865 void do_test_cge_sum_mv_c_s_x
9866 (int m, int n,
9867 int ntests, int *seed, double thresh, int debug, float test_prob,
9868 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
9869
9870 /* Function name */
9871 const char fname[] = "BLAS_cge_sum_mv_c_s_x";
9872
9873 int i;
9874 int yi;
9875 int incyi, y_starti, incx_veci;
9876 int test_count;
9877 int bad_ratio_count;
9878
9879 int ri;
9880 int incri;
9881 int inca, incx, incy;
9882
9883 double ratio;
9884
9885 double ratio_min, ratio_max;
9886
9887 double eps_int; /* internal machine epsilon */
9888 double un_int; /* internal underflow threshold */
9889
9890 float rin[2];
9891 float rout[2];
9892 double head_r_true_elem[2], tail_r_true_elem[2];
9893
9894 enum blas_order_type order_type;
9895 enum blas_prec_type prec;
9896
9897 int order_val;
9898 int lda_val, incx_val, incy_val;
9899 int ldb_val;
9900 int alpha_val, beta_val;
9901 int randomize_val;
9902
9903 int prec_val;
9904
9905 int lda, ldb;
9906 int alpha_flag, beta_flag;
9907 int saved_seed;
9908 int norm;
9909 int test_no;
9910
9911 int n_i, m_i;
9912 int inca_veci;
9913
9914 float alpha[2];
9915 float beta[2];
9916 float beta_zero_fake[2];
9917 float alpha_use[2];
9918 float *a;
9919 float *a_use;
9920 float *B;
9921 float *B_use;
9922 float *x;
9923 float *y;
9924 float *a_vec;
9925 float *x_vec;
9926
9927
9928 double *ratios;
9929
9930 /* true result calculated by testgen, in double-double */
9931 double *head_r_true, *tail_r_true;
9932
9933
9934 FPU_FIX_DECL;
9935
9936 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
9937
9938 if (n < 0 || ntests < 0)
9939 BLAS_error(fname, -3, n, NULL);
9940
9941 /* initialization */
9942 saved_seed = *seed;
9943 ratio = 0.0;
9944 ratio_min = 1e308;
9945 ratio_max = 0.0;
9946
9947 *num_tests = 0;
9948 *num_bad_ratio = 0;
9949 *min_ratio = 0.0;
9950 *max_ratio = 0.0;
9951
9952 if (n == 0)
9953 return;
9954
9955 FPU_FIX_START;
9956
9957 n_i = n;
9958 m_i = m;
9959
9960 inca = incx = incy = 1;
9961 inca *= 2;
9962
9963 incy *= 2;
9964
9965 /* allocate memory for arrays */
9966 y = (float *) blas_malloc(4 * m_i * sizeof(float) * 2);
9967 if (4 * m_i > 0 && y == NULL) {
9968 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9969 }
9970 a = (float *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(float) * 2);
9971 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
9972 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9973 }
9974 a_use =
9975 (float *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(float) * 2);
9976 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
9977 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9978 }
9979 B = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float) * 2);
9980 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
9981 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9982 }
9983 B_use =
9984 (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float) * 2);
9985 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
9986 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9987 }
9988 x = (float *) blas_malloc(4 * n_i * sizeof(float));
9989 if (4 * n_i > 0 && x == NULL) {
9990 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9991 }
9992
9993 inca_veci = 1;
9994 inca_veci *= 2;
9995 a_vec = (float *) blas_malloc(2 * n_i * sizeof(float) * 2);
9996 if (2 * n_i > 0 && a_vec == NULL) {
9997 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9998 }
9999 x_vec = (float *) blas_malloc(2 * n_i * sizeof(float));
10000 if (2 * n_i > 0 && x_vec == NULL) {
10001 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10002 }
10003 head_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
10004 tail_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
10005 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
10006 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10007 }
10008 ratios = (double *) blas_malloc(m_i * sizeof(double));
10009 if (m_i > 0 && ratios == NULL) {
10010 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10011 }
10012
10013 test_count = 0;
10014 bad_ratio_count = 0;
10015
10016 /* vary alpha */
10017 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
10018
10019 alpha_flag = 0;
10020 switch (alpha_val) {
10021 case 0:
10022 alpha[0] = alpha[1] = 0.0;
10023 alpha_flag = 1;
10024 break;
10025 case 1:
10026 alpha[0] = 1.0;
10027 alpha[1] = 0.0;
10028 alpha_flag = 1;
10029 break;
10030 }
10031
10032 /* vary beta */
10033 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
10034 beta_flag = 0;
10035 switch (beta_val) {
10036 case 0:
10037 beta[0] = beta[1] = 0.0;
10038 beta_flag = 1;
10039 break;
10040 case 1:
10041 beta[0] = 1.0;
10042 beta[1] = 0.0;
10043 beta_flag = 1;
10044 break;
10045 }
10046
10047
10048 /* varying extra precs */
10049 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
10050 switch (prec_val) {
10051 case 0:
10052 eps_int = power(2, -BITS_S);
10053 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
10054 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
10055 prec = blas_prec_single;
10056 break;
10057 case 1:
10058 eps_int = power(2, -BITS_D);
10059 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
10060 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
10061 prec = blas_prec_double;
10062 break;
10063 case 2:
10064 default:
10065 eps_int = power(2, -BITS_E);
10066 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
10067 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
10068 prec = blas_prec_extra;
10069 break;
10070 }
10071
10072 /* vary norm -- underflow, approx 1, overflow */
10073 for (norm = NORM_START; norm <= NORM_END; norm++) {
10074
10075 /* number of tests */
10076 for (test_no = 0; test_no < ntests; test_no++) {
10077
10078
10079 /* vary storage format */
10080 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
10081
10082 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
10083
10084 /* vary lda = n_i, n_i+1, 2*n_i */
10085 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
10086
10087 if (order_type == blas_rowmajor) {
10088 lda = (lda_val == 0) ? n_i :
10089 (lda_val == 1) ? n_i + 1 : n_i * n_i;
10090 } else {
10091 lda = (lda_val == 0) ? m_i :
10092 (lda_val == 1) ? m_i + 1 : m_i * m_i;
10093 }
10094
10095 /* vary ldb = n_i, n_i+1, 2*n_i */
10096 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
10097
10098 if (order_type == blas_rowmajor) {
10099 ldb = (ldb_val == 0) ? n_i :
10100 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
10101 } else {
10102 ldb = (ldb_val == 0) ? m_i :
10103 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
10104 }
10105
10106 for (randomize_val = RANDOMIZE_START;
10107 randomize_val <= RANDOMIZE_END; randomize_val++) {
10108
10109 /* For the sake of speed, we throw out this case at random */
10110 if (xrand(seed) >= test_prob)
10111 continue;
10112
10113 /* finally we are here to generate the test case */
10114 /* alpha_use, a_use, B_use are the generated alpha, a, B
10115 * before any scaling.
10116 * That is, in the generator, alpha == beta == alpha_use
10117 * before scaling. */
10118
10119 saved_seed = *seed;
10120 BLAS_cge_sum_mv_c_s_testgen(norm, order_type,
10121 m, n, randomize_val, &alpha,
10122 alpha_flag, &beta, beta_flag,
10123 a, lda, B, ldb, x_vec, 1,
10124 &alpha_use, a_use, B_use,
10125 seed, head_r_true,
10126 tail_r_true);
10127
10128 /* vary incx = 1, 2 */
10129 for (incx_val = INCX_START; incx_val <= INCX_END;
10130 incx_val++) {
10131
10132 incx = incx_val;
10133 if (0 == incx)
10134 continue;
10135
10136 scopy_vector(x_vec, n_i, 1, x, incx);
10137
10138 /* vary incy = 1, 2 */
10139 for (incy_val = INCY_START; incy_val <= INCY_END;
10140 incy_val++) {
10141
10142 incy = incy_val;
10143 if (0 == incy)
10144 continue;
10145
10146 test_count++;
10147
10148 /* call ge_sum_mv routines to be tested */
10149 FPU_FIX_STOP;
10150 BLAS_cge_sum_mv_c_s_x(order_type,
10151 m, n, alpha, a, lda, x, incx,
10152 beta, B, ldb, y, incy, prec);
10153 FPU_FIX_START;
10154
10155 /* now compute the ratio using test_BLAS_xdot */
10156 /* copy a row from A, use x, run
10157 dot test */
10158
10159 incyi = incy;
10160
10161 incri = 1;
10162 incx_veci = 1;
10163
10164 incyi *= 2;
10165 incri *= 2;
10166 if (incy < 0) {
10167 y_starti = (-m_i + 1) * incyi;
10168 } else {
10169 y_starti = 0;
10170 }
10171 /* make two copies of x into x_vec. redundant */
10172 scopy_vector(x, n_i, incx, x_vec, 1);
10173 scopy_vector(x, n_i, incx,
10174 (x_vec + (n_i * incx_veci)), 1);
10175 for (i = 0, yi = y_starti, ri = 0; i < m_i;
10176 i++, yi += incyi, ri += incri) {
10177 cge_copy_row(order_type, blas_no_trans, m_i, n_i,
10178 a_use, lda, a_vec, i);
10179 cge_copy_row(order_type, blas_no_trans, m_i, n_i,
10180 B_use, ldb, (a_vec + inca_veci * n_i),
10181 i);
10182
10183 rin[0] = rin[1] = 0.0;
10184 rout[0] = y[yi];
10185 rout[1] = y[yi + 1];
10186 head_r_true_elem[0] = head_r_true[ri];
10187 head_r_true_elem[1] = head_r_true[ri + 1];
10188 tail_r_true_elem[0] = tail_r_true[ri];
10189 tail_r_true_elem[1] = tail_r_true[ri + 1];
10190
10191 test_BLAS_cdot_c_s(2 * n_i,
10192 blas_no_conj,
10193 alpha_use, beta_zero_fake, rin,
10194 rout, head_r_true_elem,
10195 tail_r_true_elem, a_vec, 1,
10196 x_vec, 1, eps_int, un_int,
10197 &ratios[i]);
10198
10199 /* take the max ratio */
10200 if (i == 0) {
10201 ratio = ratios[0];
10202 /* The !<= below causes NaN errors
10203 * to be included.
10204 * Note that (NaN > 0) is false */
10205 } else if (!(ratios[i] <= ratio)) {
10206 ratio = ratios[i];
10207 }
10208 } /* end of dot-test loop */
10209
10210 /* The !<= below causes NaN errors
10211 * to be included.
10212 * Note that (NaN > 0) is false */
10213 if (!(ratio <= thresh)) {
10214
10215 if (debug == 3) {
10216 printf("\n\t\tTest # %d\n", test_count);
10217 printf("y type : c, a type : c, x type : s\n");
10218 printf("Seed = %d\t", saved_seed);
10219 printf("n %d, m %d\n", n, m);
10220 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
10221 ldb, incx, incx);
10222
10223 if (order_type == blas_rowmajor)
10224 printf("row ");
10225 else
10226 printf("col ");
10227
10228 printf("NORM %d, ALPHA %d, BETA %d\n",
10229 norm, alpha_val, beta_val);
10230 printf("randomize %d\n", randomize_val);
10231
10232 /* print out info */
10233 printf("alpha = ");
10234 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);;
10235 printf(" ");
10236 printf("beta = ");
10237 printf("(%16.8e, %16.8e)", beta[0], beta[1]);;
10238 printf("\n");
10239 printf("alpha_use = ");
10240 printf("(%16.8e, %16.8e)", alpha_use[0],
10241 alpha_use[1]);;
10242 printf("\n");
10243
10244 cge_print_matrix(a, m_i, n_i, lda, order_type,
10245 "A");
10246 cge_print_matrix(B, m_i, n_i, ldb, order_type,
10247 "B");
10248 sprint_vector(x, n_i, incx, "x");
10249
10250 cprint_vector(y, m_i, incy, "y");
10251
10252 zprint_vector(head_r_true, m_i, 1, "head_r_true");
10253
10254 cge_print_matrix(a_use, m_i, n_i, lda, order_type,
10255 "A_use");
10256 cge_print_matrix(B_use, m_i, n_i, ldb, order_type,
10257 "B_use");
10258
10259 dprint_vector(ratios, m_i, 1, "ratios");
10260 printf("ratio = %g\n", ratio);
10261 fflush(stdout);
10262 }
10263 bad_ratio_count++;
10264 if (bad_ratio_count >= MAX_BAD_TESTS) {
10265 printf("\ntoo many failures, exiting....");
10266 printf("\nTesting and compilation");
10267 printf(" are incomplete\n\n");
10268 goto end;
10269 }
10270 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
10271 printf("\nFlagrant ratio error, exiting...");
10272 printf("\nTesting and compilation");
10273 printf(" are incomplete\n\n");
10274 goto end;
10275 }
10276 }
10277
10278 if (!(ratio <= ratio_max))
10279 ratio_max = ratio;
10280
10281 if (ratio != 0.0 && !(ratio >= ratio_min))
10282 ratio_min = ratio;
10283
10284 } /* end of incy loop */
10285
10286 } /* end of incx loop */
10287
10288 } /* end of randmize loop */
10289
10290 } /* end of ldb loop */
10291
10292 } /* end of lda loop */
10293
10294 } /* end of order loop */
10295
10296 } /* end of nr test loop */
10297
10298 } /* end of norm loop */
10299
10300
10301 } /* end of prec loop */
10302
10303 } /* end of beta loop */
10304
10305 } /* end of alpha loop */
10306
10307 FPU_FIX_STOP;
10308
10309 end:
10310 blas_free(y);
10311 blas_free(a);
10312 blas_free(a_use);
10313 blas_free(B);
10314 blas_free(B_use);
10315 blas_free(x);
10316 blas_free(head_r_true);
10317 blas_free(tail_r_true);
10318 blas_free(ratios);
10319 blas_free(a_vec);
10320 blas_free(x_vec);
10321
10322 *max_ratio = ratio_max;
10323 *min_ratio = ratio_min;
10324 *num_tests = test_count;
10325 *num_bad_ratio = bad_ratio_count;
10326
10327 }
do_test_cge_sum_mv_s_c_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)10328 void do_test_cge_sum_mv_s_c_x
10329 (int m, int n,
10330 int ntests, int *seed, double thresh, int debug, float test_prob,
10331 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
10332
10333 /* Function name */
10334 const char fname[] = "BLAS_cge_sum_mv_s_c_x";
10335
10336 int i;
10337 int yi;
10338 int incyi, y_starti, incx_veci;
10339 int test_count;
10340 int bad_ratio_count;
10341
10342 int ri;
10343 int incri;
10344 int inca, incx, incy;
10345
10346 double ratio;
10347
10348 double ratio_min, ratio_max;
10349
10350 double eps_int; /* internal machine epsilon */
10351 double un_int; /* internal underflow threshold */
10352
10353 float rin[2];
10354 float rout[2];
10355 double head_r_true_elem[2], tail_r_true_elem[2];
10356
10357 enum blas_order_type order_type;
10358 enum blas_prec_type prec;
10359
10360 int order_val;
10361 int lda_val, incx_val, incy_val;
10362 int ldb_val;
10363 int alpha_val, beta_val;
10364 int randomize_val;
10365
10366 int prec_val;
10367
10368 int lda, ldb;
10369 int alpha_flag, beta_flag;
10370 int saved_seed;
10371 int norm;
10372 int test_no;
10373
10374 int n_i, m_i;
10375 int inca_veci;
10376
10377 float alpha[2];
10378 float beta[2];
10379 float beta_zero_fake[2];
10380 float alpha_use[2];
10381 float *a;
10382 float *a_use;
10383 float *B;
10384 float *B_use;
10385 float *x;
10386 float *y;
10387 float *a_vec;
10388 float *x_vec;
10389
10390
10391 double *ratios;
10392
10393 /* true result calculated by testgen, in double-double */
10394 double *head_r_true, *tail_r_true;
10395
10396
10397 FPU_FIX_DECL;
10398
10399 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
10400
10401 if (n < 0 || ntests < 0)
10402 BLAS_error(fname, -3, n, NULL);
10403
10404 /* initialization */
10405 saved_seed = *seed;
10406 ratio = 0.0;
10407 ratio_min = 1e308;
10408 ratio_max = 0.0;
10409
10410 *num_tests = 0;
10411 *num_bad_ratio = 0;
10412 *min_ratio = 0.0;
10413 *max_ratio = 0.0;
10414
10415 if (n == 0)
10416 return;
10417
10418 FPU_FIX_START;
10419
10420 n_i = n;
10421 m_i = m;
10422
10423 inca = incx = incy = 1;
10424
10425 incx *= 2;
10426 incy *= 2;
10427
10428 /* allocate memory for arrays */
10429 y = (float *) blas_malloc(4 * m_i * sizeof(float) * 2);
10430 if (4 * m_i > 0 && y == NULL) {
10431 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10432 }
10433 a = (float *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(float));
10434 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
10435 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10436 }
10437 a_use = (float *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(float));
10438 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
10439 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10440 }
10441 B = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float));
10442 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
10443 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10444 }
10445 B_use = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float));
10446 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
10447 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10448 }
10449 x = (float *) blas_malloc(4 * n_i * sizeof(float) * 2);
10450 if (4 * n_i > 0 && x == NULL) {
10451 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10452 }
10453
10454 inca_veci = 1;
10455
10456 a_vec = (float *) blas_malloc(2 * n_i * sizeof(float));
10457 if (2 * n_i > 0 && a_vec == NULL) {
10458 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10459 }
10460 x_vec = (float *) blas_malloc(2 * n_i * sizeof(float) * 2);
10461 if (2 * n_i > 0 && x_vec == NULL) {
10462 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10463 }
10464 head_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
10465 tail_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
10466 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
10467 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10468 }
10469 ratios = (double *) blas_malloc(m_i * sizeof(double));
10470 if (m_i > 0 && ratios == NULL) {
10471 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10472 }
10473
10474 test_count = 0;
10475 bad_ratio_count = 0;
10476
10477 /* vary alpha */
10478 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
10479
10480 alpha_flag = 0;
10481 switch (alpha_val) {
10482 case 0:
10483 alpha[0] = alpha[1] = 0.0;
10484 alpha_flag = 1;
10485 break;
10486 case 1:
10487 alpha[0] = 1.0;
10488 alpha[1] = 0.0;
10489 alpha_flag = 1;
10490 break;
10491 }
10492
10493 /* vary beta */
10494 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
10495 beta_flag = 0;
10496 switch (beta_val) {
10497 case 0:
10498 beta[0] = beta[1] = 0.0;
10499 beta_flag = 1;
10500 break;
10501 case 1:
10502 beta[0] = 1.0;
10503 beta[1] = 0.0;
10504 beta_flag = 1;
10505 break;
10506 }
10507
10508
10509 /* varying extra precs */
10510 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
10511 switch (prec_val) {
10512 case 0:
10513 eps_int = power(2, -BITS_S);
10514 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
10515 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
10516 prec = blas_prec_single;
10517 break;
10518 case 1:
10519 eps_int = power(2, -BITS_D);
10520 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
10521 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
10522 prec = blas_prec_double;
10523 break;
10524 case 2:
10525 default:
10526 eps_int = power(2, -BITS_E);
10527 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
10528 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
10529 prec = blas_prec_extra;
10530 break;
10531 }
10532
10533 /* vary norm -- underflow, approx 1, overflow */
10534 for (norm = NORM_START; norm <= NORM_END; norm++) {
10535
10536 /* number of tests */
10537 for (test_no = 0; test_no < ntests; test_no++) {
10538
10539
10540 /* vary storage format */
10541 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
10542
10543 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
10544
10545 /* vary lda = n_i, n_i+1, 2*n_i */
10546 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
10547
10548 if (order_type == blas_rowmajor) {
10549 lda = (lda_val == 0) ? n_i :
10550 (lda_val == 1) ? n_i + 1 : n_i * n_i;
10551 } else {
10552 lda = (lda_val == 0) ? m_i :
10553 (lda_val == 1) ? m_i + 1 : m_i * m_i;
10554 }
10555
10556 /* vary ldb = n_i, n_i+1, 2*n_i */
10557 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
10558
10559 if (order_type == blas_rowmajor) {
10560 ldb = (ldb_val == 0) ? n_i :
10561 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
10562 } else {
10563 ldb = (ldb_val == 0) ? m_i :
10564 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
10565 }
10566
10567 for (randomize_val = RANDOMIZE_START;
10568 randomize_val <= RANDOMIZE_END; randomize_val++) {
10569
10570 /* For the sake of speed, we throw out this case at random */
10571 if (xrand(seed) >= test_prob)
10572 continue;
10573
10574 /* finally we are here to generate the test case */
10575 /* alpha_use, a_use, B_use are the generated alpha, a, B
10576 * before any scaling.
10577 * That is, in the generator, alpha == beta == alpha_use
10578 * before scaling. */
10579
10580 saved_seed = *seed;
10581 BLAS_cge_sum_mv_s_c_testgen(norm, order_type,
10582 m, n, randomize_val, &alpha,
10583 alpha_flag, &beta, beta_flag,
10584 a, lda, B, ldb, x_vec, 1,
10585 &alpha_use, a_use, B_use,
10586 seed, head_r_true,
10587 tail_r_true);
10588
10589 /* vary incx = 1, 2 */
10590 for (incx_val = INCX_START; incx_val <= INCX_END;
10591 incx_val++) {
10592
10593 incx = incx_val;
10594 if (0 == incx)
10595 continue;
10596
10597 ccopy_vector(x_vec, n_i, 1, x, incx);
10598
10599 /* vary incy = 1, 2 */
10600 for (incy_val = INCY_START; incy_val <= INCY_END;
10601 incy_val++) {
10602
10603 incy = incy_val;
10604 if (0 == incy)
10605 continue;
10606
10607 test_count++;
10608
10609 /* call ge_sum_mv routines to be tested */
10610 FPU_FIX_STOP;
10611 BLAS_cge_sum_mv_s_c_x(order_type,
10612 m, n, alpha, a, lda, x, incx,
10613 beta, B, ldb, y, incy, prec);
10614 FPU_FIX_START;
10615
10616 /* now compute the ratio using test_BLAS_xdot */
10617 /* copy a row from A, use x, run
10618 dot test */
10619
10620 incyi = incy;
10621
10622 incri = 1;
10623 incx_veci = 1;
10624 incx_veci *= 2;
10625 incyi *= 2;
10626 incri *= 2;
10627 if (incy < 0) {
10628 y_starti = (-m_i + 1) * incyi;
10629 } else {
10630 y_starti = 0;
10631 }
10632 /* make two copies of x into x_vec. redundant */
10633 ccopy_vector(x, n_i, incx, x_vec, 1);
10634 ccopy_vector(x, n_i, incx,
10635 (x_vec + (n_i * incx_veci)), 1);
10636 for (i = 0, yi = y_starti, ri = 0; i < m_i;
10637 i++, yi += incyi, ri += incri) {
10638 sge_copy_row(order_type, blas_no_trans, m_i, n_i,
10639 a_use, lda, a_vec, i);
10640 sge_copy_row(order_type, blas_no_trans, m_i, n_i,
10641 B_use, ldb, (a_vec + inca_veci * n_i),
10642 i);
10643
10644 rin[0] = rin[1] = 0.0;
10645 rout[0] = y[yi];
10646 rout[1] = y[yi + 1];
10647 head_r_true_elem[0] = head_r_true[ri];
10648 head_r_true_elem[1] = head_r_true[ri + 1];
10649 tail_r_true_elem[0] = tail_r_true[ri];
10650 tail_r_true_elem[1] = tail_r_true[ri + 1];
10651
10652 test_BLAS_cdot_s_c(2 * n_i,
10653 blas_no_conj,
10654 alpha_use, beta_zero_fake, rin,
10655 rout, head_r_true_elem,
10656 tail_r_true_elem, a_vec, 1,
10657 x_vec, 1, eps_int, un_int,
10658 &ratios[i]);
10659
10660 /* take the max ratio */
10661 if (i == 0) {
10662 ratio = ratios[0];
10663 /* The !<= below causes NaN errors
10664 * to be included.
10665 * Note that (NaN > 0) is false */
10666 } else if (!(ratios[i] <= ratio)) {
10667 ratio = ratios[i];
10668 }
10669 } /* end of dot-test loop */
10670
10671 /* The !<= below causes NaN errors
10672 * to be included.
10673 * Note that (NaN > 0) is false */
10674 if (!(ratio <= thresh)) {
10675
10676 if (debug == 3) {
10677 printf("\n\t\tTest # %d\n", test_count);
10678 printf("y type : c, a type : s, x type : c\n");
10679 printf("Seed = %d\t", saved_seed);
10680 printf("n %d, m %d\n", n, m);
10681 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
10682 ldb, incx, incx);
10683
10684 if (order_type == blas_rowmajor)
10685 printf("row ");
10686 else
10687 printf("col ");
10688
10689 printf("NORM %d, ALPHA %d, BETA %d\n",
10690 norm, alpha_val, beta_val);
10691 printf("randomize %d\n", randomize_val);
10692
10693 /* print out info */
10694 printf("alpha = ");
10695 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);;
10696 printf(" ");
10697 printf("beta = ");
10698 printf("(%16.8e, %16.8e)", beta[0], beta[1]);;
10699 printf("\n");
10700 printf("alpha_use = ");
10701 printf("(%16.8e, %16.8e)", alpha_use[0],
10702 alpha_use[1]);;
10703 printf("\n");
10704
10705 sge_print_matrix(a, m_i, n_i, lda, order_type,
10706 "A");
10707 sge_print_matrix(B, m_i, n_i, ldb, order_type,
10708 "B");
10709 cprint_vector(x, n_i, incx, "x");
10710
10711 cprint_vector(y, m_i, incy, "y");
10712
10713 zprint_vector(head_r_true, m_i, 1, "head_r_true");
10714
10715 sge_print_matrix(a_use, m_i, n_i, lda, order_type,
10716 "A_use");
10717 sge_print_matrix(B_use, m_i, n_i, ldb, order_type,
10718 "B_use");
10719
10720 dprint_vector(ratios, m_i, 1, "ratios");
10721 printf("ratio = %g\n", ratio);
10722 fflush(stdout);
10723 }
10724 bad_ratio_count++;
10725 if (bad_ratio_count >= MAX_BAD_TESTS) {
10726 printf("\ntoo many failures, exiting....");
10727 printf("\nTesting and compilation");
10728 printf(" are incomplete\n\n");
10729 goto end;
10730 }
10731 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
10732 printf("\nFlagrant ratio error, exiting...");
10733 printf("\nTesting and compilation");
10734 printf(" are incomplete\n\n");
10735 goto end;
10736 }
10737 }
10738
10739 if (!(ratio <= ratio_max))
10740 ratio_max = ratio;
10741
10742 if (ratio != 0.0 && !(ratio >= ratio_min))
10743 ratio_min = ratio;
10744
10745 } /* end of incy loop */
10746
10747 } /* end of incx loop */
10748
10749 } /* end of randmize loop */
10750
10751 } /* end of ldb loop */
10752
10753 } /* end of lda loop */
10754
10755 } /* end of order loop */
10756
10757 } /* end of nr test loop */
10758
10759 } /* end of norm loop */
10760
10761
10762 } /* end of prec loop */
10763
10764 } /* end of beta loop */
10765
10766 } /* end of alpha loop */
10767
10768 FPU_FIX_STOP;
10769
10770 end:
10771 blas_free(y);
10772 blas_free(a);
10773 blas_free(a_use);
10774 blas_free(B);
10775 blas_free(B_use);
10776 blas_free(x);
10777 blas_free(head_r_true);
10778 blas_free(tail_r_true);
10779 blas_free(ratios);
10780 blas_free(a_vec);
10781 blas_free(x_vec);
10782
10783 *max_ratio = ratio_max;
10784 *min_ratio = ratio_min;
10785 *num_tests = test_count;
10786 *num_bad_ratio = bad_ratio_count;
10787
10788 }
do_test_cge_sum_mv_s_s_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)10789 void do_test_cge_sum_mv_s_s_x
10790 (int m, int n,
10791 int ntests, int *seed, double thresh, int debug, float test_prob,
10792 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
10793
10794 /* Function name */
10795 const char fname[] = "BLAS_cge_sum_mv_s_s_x";
10796
10797 int i;
10798 int yi;
10799 int incyi, y_starti, incx_veci;
10800 int test_count;
10801 int bad_ratio_count;
10802
10803 int ri;
10804 int incri;
10805 int inca, incx, incy;
10806
10807 double ratio;
10808
10809 double ratio_min, ratio_max;
10810
10811 double eps_int; /* internal machine epsilon */
10812 double un_int; /* internal underflow threshold */
10813
10814 float rin[2];
10815 float rout[2];
10816 double head_r_true_elem[2], tail_r_true_elem[2];
10817
10818 enum blas_order_type order_type;
10819 enum blas_prec_type prec;
10820
10821 int order_val;
10822 int lda_val, incx_val, incy_val;
10823 int ldb_val;
10824 int alpha_val, beta_val;
10825 int randomize_val;
10826
10827 int prec_val;
10828
10829 int lda, ldb;
10830 int alpha_flag, beta_flag;
10831 int saved_seed;
10832 int norm;
10833 int test_no;
10834
10835 int n_i, m_i;
10836 int inca_veci;
10837
10838 float alpha[2];
10839 float beta[2];
10840 float beta_zero_fake[2];
10841 float alpha_use[2];
10842 float *a;
10843 float *a_use;
10844 float *B;
10845 float *B_use;
10846 float *x;
10847 float *y;
10848 float *a_vec;
10849 float *x_vec;
10850
10851
10852 double *ratios;
10853
10854 /* true result calculated by testgen, in double-double */
10855 double *head_r_true, *tail_r_true;
10856
10857
10858 FPU_FIX_DECL;
10859
10860 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
10861
10862 if (n < 0 || ntests < 0)
10863 BLAS_error(fname, -3, n, NULL);
10864
10865 /* initialization */
10866 saved_seed = *seed;
10867 ratio = 0.0;
10868 ratio_min = 1e308;
10869 ratio_max = 0.0;
10870
10871 *num_tests = 0;
10872 *num_bad_ratio = 0;
10873 *min_ratio = 0.0;
10874 *max_ratio = 0.0;
10875
10876 if (n == 0)
10877 return;
10878
10879 FPU_FIX_START;
10880
10881 n_i = n;
10882 m_i = m;
10883
10884 inca = incx = incy = 1;
10885
10886
10887 incy *= 2;
10888
10889 /* allocate memory for arrays */
10890 y = (float *) blas_malloc(4 * m_i * sizeof(float) * 2);
10891 if (4 * m_i > 0 && y == NULL) {
10892 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10893 }
10894 a = (float *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(float));
10895 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
10896 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10897 }
10898 a_use = (float *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(float));
10899 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
10900 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10901 }
10902 B = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float));
10903 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
10904 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10905 }
10906 B_use = (float *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(float));
10907 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
10908 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10909 }
10910 x = (float *) blas_malloc(4 * n_i * sizeof(float));
10911 if (4 * n_i > 0 && x == NULL) {
10912 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10913 }
10914
10915 inca_veci = 1;
10916
10917 a_vec = (float *) blas_malloc(2 * n_i * sizeof(float));
10918 if (2 * n_i > 0 && a_vec == NULL) {
10919 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10920 }
10921 x_vec = (float *) blas_malloc(2 * n_i * sizeof(float));
10922 if (2 * n_i > 0 && x_vec == NULL) {
10923 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10924 }
10925 head_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
10926 tail_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
10927 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
10928 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10929 }
10930 ratios = (double *) blas_malloc(m_i * sizeof(double));
10931 if (m_i > 0 && ratios == NULL) {
10932 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10933 }
10934
10935 test_count = 0;
10936 bad_ratio_count = 0;
10937
10938 /* vary alpha */
10939 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
10940
10941 alpha_flag = 0;
10942 switch (alpha_val) {
10943 case 0:
10944 alpha[0] = alpha[1] = 0.0;
10945 alpha_flag = 1;
10946 break;
10947 case 1:
10948 alpha[0] = 1.0;
10949 alpha[1] = 0.0;
10950 alpha_flag = 1;
10951 break;
10952 }
10953
10954 /* vary beta */
10955 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
10956 beta_flag = 0;
10957 switch (beta_val) {
10958 case 0:
10959 beta[0] = beta[1] = 0.0;
10960 beta_flag = 1;
10961 break;
10962 case 1:
10963 beta[0] = 1.0;
10964 beta[1] = 0.0;
10965 beta_flag = 1;
10966 break;
10967 }
10968
10969
10970 /* varying extra precs */
10971 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
10972 switch (prec_val) {
10973 case 0:
10974 eps_int = power(2, -BITS_S);
10975 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
10976 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
10977 prec = blas_prec_single;
10978 break;
10979 case 1:
10980 eps_int = power(2, -BITS_D);
10981 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
10982 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
10983 prec = blas_prec_double;
10984 break;
10985 case 2:
10986 default:
10987 eps_int = power(2, -BITS_E);
10988 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
10989 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
10990 prec = blas_prec_extra;
10991 break;
10992 }
10993
10994 /* vary norm -- underflow, approx 1, overflow */
10995 for (norm = NORM_START; norm <= NORM_END; norm++) {
10996
10997 /* number of tests */
10998 for (test_no = 0; test_no < ntests; test_no++) {
10999
11000
11001 /* vary storage format */
11002 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
11003
11004 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
11005
11006 /* vary lda = n_i, n_i+1, 2*n_i */
11007 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
11008
11009 if (order_type == blas_rowmajor) {
11010 lda = (lda_val == 0) ? n_i :
11011 (lda_val == 1) ? n_i + 1 : n_i * n_i;
11012 } else {
11013 lda = (lda_val == 0) ? m_i :
11014 (lda_val == 1) ? m_i + 1 : m_i * m_i;
11015 }
11016
11017 /* vary ldb = n_i, n_i+1, 2*n_i */
11018 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
11019
11020 if (order_type == blas_rowmajor) {
11021 ldb = (ldb_val == 0) ? n_i :
11022 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
11023 } else {
11024 ldb = (ldb_val == 0) ? m_i :
11025 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
11026 }
11027
11028 for (randomize_val = RANDOMIZE_START;
11029 randomize_val <= RANDOMIZE_END; randomize_val++) {
11030
11031 /* For the sake of speed, we throw out this case at random */
11032 if (xrand(seed) >= test_prob)
11033 continue;
11034
11035 /* finally we are here to generate the test case */
11036 /* alpha_use, a_use, B_use are the generated alpha, a, B
11037 * before any scaling.
11038 * That is, in the generator, alpha == beta == alpha_use
11039 * before scaling. */
11040
11041 saved_seed = *seed;
11042 BLAS_cge_sum_mv_s_s_testgen(norm, order_type,
11043 m, n, randomize_val, &alpha,
11044 alpha_flag, &beta, beta_flag,
11045 a, lda, B, ldb, x_vec, 1,
11046 &alpha_use, a_use, B_use,
11047 seed, head_r_true,
11048 tail_r_true);
11049
11050 /* vary incx = 1, 2 */
11051 for (incx_val = INCX_START; incx_val <= INCX_END;
11052 incx_val++) {
11053
11054 incx = incx_val;
11055 if (0 == incx)
11056 continue;
11057
11058 scopy_vector(x_vec, n_i, 1, x, incx);
11059
11060 /* vary incy = 1, 2 */
11061 for (incy_val = INCY_START; incy_val <= INCY_END;
11062 incy_val++) {
11063
11064 incy = incy_val;
11065 if (0 == incy)
11066 continue;
11067
11068 test_count++;
11069
11070 /* call ge_sum_mv routines to be tested */
11071 FPU_FIX_STOP;
11072 BLAS_cge_sum_mv_s_s_x(order_type,
11073 m, n, alpha, a, lda, x, incx,
11074 beta, B, ldb, y, incy, prec);
11075 FPU_FIX_START;
11076
11077 /* now compute the ratio using test_BLAS_xdot */
11078 /* copy a row from A, use x, run
11079 dot test */
11080
11081 incyi = incy;
11082
11083 incri = 1;
11084 incx_veci = 1;
11085
11086 incyi *= 2;
11087 incri *= 2;
11088 if (incy < 0) {
11089 y_starti = (-m_i + 1) * incyi;
11090 } else {
11091 y_starti = 0;
11092 }
11093 /* make two copies of x into x_vec. redundant */
11094 scopy_vector(x, n_i, incx, x_vec, 1);
11095 scopy_vector(x, n_i, incx,
11096 (x_vec + (n_i * incx_veci)), 1);
11097 for (i = 0, yi = y_starti, ri = 0; i < m_i;
11098 i++, yi += incyi, ri += incri) {
11099 sge_copy_row(order_type, blas_no_trans, m_i, n_i,
11100 a_use, lda, a_vec, i);
11101 sge_copy_row(order_type, blas_no_trans, m_i, n_i,
11102 B_use, ldb, (a_vec + inca_veci * n_i),
11103 i);
11104
11105 rin[0] = rin[1] = 0.0;
11106 rout[0] = y[yi];
11107 rout[1] = y[yi + 1];
11108 head_r_true_elem[0] = head_r_true[ri];
11109 head_r_true_elem[1] = head_r_true[ri + 1];
11110 tail_r_true_elem[0] = tail_r_true[ri];
11111 tail_r_true_elem[1] = tail_r_true[ri + 1];
11112
11113 test_BLAS_cdot_s_s(2 * n_i,
11114 blas_no_conj,
11115 alpha_use, beta_zero_fake, rin,
11116 rout, head_r_true_elem,
11117 tail_r_true_elem, a_vec, 1,
11118 x_vec, 1, eps_int, un_int,
11119 &ratios[i]);
11120
11121 /* take the max ratio */
11122 if (i == 0) {
11123 ratio = ratios[0];
11124 /* The !<= below causes NaN errors
11125 * to be included.
11126 * Note that (NaN > 0) is false */
11127 } else if (!(ratios[i] <= ratio)) {
11128 ratio = ratios[i];
11129 }
11130 } /* end of dot-test loop */
11131
11132 /* The !<= below causes NaN errors
11133 * to be included.
11134 * Note that (NaN > 0) is false */
11135 if (!(ratio <= thresh)) {
11136
11137 if (debug == 3) {
11138 printf("\n\t\tTest # %d\n", test_count);
11139 printf("y type : c, a type : s, x type : s\n");
11140 printf("Seed = %d\t", saved_seed);
11141 printf("n %d, m %d\n", n, m);
11142 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
11143 ldb, incx, incx);
11144
11145 if (order_type == blas_rowmajor)
11146 printf("row ");
11147 else
11148 printf("col ");
11149
11150 printf("NORM %d, ALPHA %d, BETA %d\n",
11151 norm, alpha_val, beta_val);
11152 printf("randomize %d\n", randomize_val);
11153
11154 /* print out info */
11155 printf("alpha = ");
11156 printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);;
11157 printf(" ");
11158 printf("beta = ");
11159 printf("(%16.8e, %16.8e)", beta[0], beta[1]);;
11160 printf("\n");
11161 printf("alpha_use = ");
11162 printf("(%16.8e, %16.8e)", alpha_use[0],
11163 alpha_use[1]);;
11164 printf("\n");
11165
11166 sge_print_matrix(a, m_i, n_i, lda, order_type,
11167 "A");
11168 sge_print_matrix(B, m_i, n_i, ldb, order_type,
11169 "B");
11170 sprint_vector(x, n_i, incx, "x");
11171
11172 cprint_vector(y, m_i, incy, "y");
11173
11174 zprint_vector(head_r_true, m_i, 1, "head_r_true");
11175
11176 sge_print_matrix(a_use, m_i, n_i, lda, order_type,
11177 "A_use");
11178 sge_print_matrix(B_use, m_i, n_i, ldb, order_type,
11179 "B_use");
11180
11181 dprint_vector(ratios, m_i, 1, "ratios");
11182 printf("ratio = %g\n", ratio);
11183 fflush(stdout);
11184 }
11185 bad_ratio_count++;
11186 if (bad_ratio_count >= MAX_BAD_TESTS) {
11187 printf("\ntoo many failures, exiting....");
11188 printf("\nTesting and compilation");
11189 printf(" are incomplete\n\n");
11190 goto end;
11191 }
11192 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
11193 printf("\nFlagrant ratio error, exiting...");
11194 printf("\nTesting and compilation");
11195 printf(" are incomplete\n\n");
11196 goto end;
11197 }
11198 }
11199
11200 if (!(ratio <= ratio_max))
11201 ratio_max = ratio;
11202
11203 if (ratio != 0.0 && !(ratio >= ratio_min))
11204 ratio_min = ratio;
11205
11206 } /* end of incy loop */
11207
11208 } /* end of incx loop */
11209
11210 } /* end of randmize loop */
11211
11212 } /* end of ldb loop */
11213
11214 } /* end of lda loop */
11215
11216 } /* end of order loop */
11217
11218 } /* end of nr test loop */
11219
11220 } /* end of norm loop */
11221
11222
11223 } /* end of prec loop */
11224
11225 } /* end of beta loop */
11226
11227 } /* end of alpha loop */
11228
11229 FPU_FIX_STOP;
11230
11231 end:
11232 blas_free(y);
11233 blas_free(a);
11234 blas_free(a_use);
11235 blas_free(B);
11236 blas_free(B_use);
11237 blas_free(x);
11238 blas_free(head_r_true);
11239 blas_free(tail_r_true);
11240 blas_free(ratios);
11241 blas_free(a_vec);
11242 blas_free(x_vec);
11243
11244 *max_ratio = ratio_max;
11245 *min_ratio = ratio_min;
11246 *num_tests = test_count;
11247 *num_bad_ratio = bad_ratio_count;
11248
11249 }
do_test_zge_sum_mv_z_d_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)11250 void do_test_zge_sum_mv_z_d_x
11251 (int m, int n,
11252 int ntests, int *seed, double thresh, int debug, float test_prob,
11253 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
11254
11255 /* Function name */
11256 const char fname[] = "BLAS_zge_sum_mv_z_d_x";
11257
11258 int i;
11259 int yi;
11260 int incyi, y_starti, incx_veci;
11261 int test_count;
11262 int bad_ratio_count;
11263
11264 int ri;
11265 int incri;
11266 int inca, incx, incy;
11267
11268 double ratio;
11269
11270 double ratio_min, ratio_max;
11271
11272 double eps_int; /* internal machine epsilon */
11273 double un_int; /* internal underflow threshold */
11274
11275 double rin[2];
11276 double rout[2];
11277 double head_r_true_elem[2], tail_r_true_elem[2];
11278
11279 enum blas_order_type order_type;
11280 enum blas_prec_type prec;
11281
11282 int order_val;
11283 int lda_val, incx_val, incy_val;
11284 int ldb_val;
11285 int alpha_val, beta_val;
11286 int randomize_val;
11287
11288 int prec_val;
11289
11290 int lda, ldb;
11291 int alpha_flag, beta_flag;
11292 int saved_seed;
11293 int norm;
11294 int test_no;
11295
11296 int n_i, m_i;
11297 int inca_veci;
11298
11299 double alpha[2];
11300 double beta[2];
11301 double beta_zero_fake[2];
11302 double alpha_use[2];
11303 double *a;
11304 double *a_use;
11305 double *B;
11306 double *B_use;
11307 double *x;
11308 double *y;
11309 double *a_vec;
11310 double *x_vec;
11311
11312
11313 double *ratios;
11314
11315 /* true result calculated by testgen, in double-double */
11316 double *head_r_true, *tail_r_true;
11317
11318
11319 FPU_FIX_DECL;
11320
11321 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
11322
11323 if (n < 0 || ntests < 0)
11324 BLAS_error(fname, -3, n, NULL);
11325
11326 /* initialization */
11327 saved_seed = *seed;
11328 ratio = 0.0;
11329 ratio_min = 1e308;
11330 ratio_max = 0.0;
11331
11332 *num_tests = 0;
11333 *num_bad_ratio = 0;
11334 *min_ratio = 0.0;
11335 *max_ratio = 0.0;
11336
11337 if (n == 0)
11338 return;
11339
11340 FPU_FIX_START;
11341
11342 n_i = n;
11343 m_i = m;
11344
11345 inca = incx = incy = 1;
11346 inca *= 2;
11347
11348 incy *= 2;
11349
11350 /* allocate memory for arrays */
11351 y = (double *) blas_malloc(4 * m_i * sizeof(double) * 2);
11352 if (4 * m_i > 0 && y == NULL) {
11353 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11354 }
11355 a = (double *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(double) * 2);
11356 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
11357 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11358 }
11359 a_use =
11360 (double *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(double) * 2);
11361 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
11362 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11363 }
11364 B = (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double) * 2);
11365 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
11366 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11367 }
11368 B_use =
11369 (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double) * 2);
11370 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
11371 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11372 }
11373 x = (double *) blas_malloc(4 * n_i * sizeof(double));
11374 if (4 * n_i > 0 && x == NULL) {
11375 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11376 }
11377
11378 inca_veci = 1;
11379 inca_veci *= 2;
11380 a_vec = (double *) blas_malloc(2 * n_i * sizeof(double) * 2);
11381 if (2 * n_i > 0 && a_vec == NULL) {
11382 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11383 }
11384 x_vec = (double *) blas_malloc(2 * n_i * sizeof(double));
11385 if (2 * n_i > 0 && x_vec == NULL) {
11386 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11387 }
11388 head_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
11389 tail_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
11390 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
11391 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11392 }
11393 ratios = (double *) blas_malloc(m_i * sizeof(double));
11394 if (m_i > 0 && ratios == NULL) {
11395 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11396 }
11397
11398 test_count = 0;
11399 bad_ratio_count = 0;
11400
11401 /* vary alpha */
11402 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
11403
11404 alpha_flag = 0;
11405 switch (alpha_val) {
11406 case 0:
11407 alpha[0] = alpha[1] = 0.0;
11408 alpha_flag = 1;
11409 break;
11410 case 1:
11411 alpha[0] = 1.0;
11412 alpha[1] = 0.0;
11413 alpha_flag = 1;
11414 break;
11415 }
11416
11417 /* vary beta */
11418 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
11419 beta_flag = 0;
11420 switch (beta_val) {
11421 case 0:
11422 beta[0] = beta[1] = 0.0;
11423 beta_flag = 1;
11424 break;
11425 case 1:
11426 beta[0] = 1.0;
11427 beta[1] = 0.0;
11428 beta_flag = 1;
11429 break;
11430 }
11431
11432
11433 /* varying extra precs */
11434 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
11435 switch (prec_val) {
11436 case 0:
11437 eps_int = power(2, -BITS_D);
11438 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
11439 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
11440 prec = blas_prec_double;
11441 break;
11442 case 1:
11443 eps_int = power(2, -BITS_D);
11444 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
11445 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
11446 prec = blas_prec_double;
11447 break;
11448 case 2:
11449 default:
11450 eps_int = power(2, -BITS_E);
11451 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
11452 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
11453 prec = blas_prec_extra;
11454 break;
11455 }
11456
11457 /* vary norm -- underflow, approx 1, overflow */
11458 for (norm = NORM_START; norm <= NORM_END; norm++) {
11459
11460 /* number of tests */
11461 for (test_no = 0; test_no < ntests; test_no++) {
11462
11463
11464 /* vary storage format */
11465 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
11466
11467 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
11468
11469 /* vary lda = n_i, n_i+1, 2*n_i */
11470 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
11471
11472 if (order_type == blas_rowmajor) {
11473 lda = (lda_val == 0) ? n_i :
11474 (lda_val == 1) ? n_i + 1 : n_i * n_i;
11475 } else {
11476 lda = (lda_val == 0) ? m_i :
11477 (lda_val == 1) ? m_i + 1 : m_i * m_i;
11478 }
11479
11480 /* vary ldb = n_i, n_i+1, 2*n_i */
11481 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
11482
11483 if (order_type == blas_rowmajor) {
11484 ldb = (ldb_val == 0) ? n_i :
11485 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
11486 } else {
11487 ldb = (ldb_val == 0) ? m_i :
11488 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
11489 }
11490
11491 for (randomize_val = RANDOMIZE_START;
11492 randomize_val <= RANDOMIZE_END; randomize_val++) {
11493
11494 /* For the sake of speed, we throw out this case at random */
11495 if (xrand(seed) >= test_prob)
11496 continue;
11497
11498 /* finally we are here to generate the test case */
11499 /* alpha_use, a_use, B_use are the generated alpha, a, B
11500 * before any scaling.
11501 * That is, in the generator, alpha == beta == alpha_use
11502 * before scaling. */
11503
11504 saved_seed = *seed;
11505 BLAS_zge_sum_mv_z_d_testgen(norm, order_type,
11506 m, n, randomize_val, &alpha,
11507 alpha_flag, &beta, beta_flag,
11508 a, lda, B, ldb, x_vec, 1,
11509 &alpha_use, a_use, B_use,
11510 seed, head_r_true,
11511 tail_r_true);
11512
11513 /* vary incx = 1, 2 */
11514 for (incx_val = INCX_START; incx_val <= INCX_END;
11515 incx_val++) {
11516
11517 incx = incx_val;
11518 if (0 == incx)
11519 continue;
11520
11521 dcopy_vector(x_vec, n_i, 1, x, incx);
11522
11523 /* vary incy = 1, 2 */
11524 for (incy_val = INCY_START; incy_val <= INCY_END;
11525 incy_val++) {
11526
11527 incy = incy_val;
11528 if (0 == incy)
11529 continue;
11530
11531 test_count++;
11532
11533 /* call ge_sum_mv routines to be tested */
11534 FPU_FIX_STOP;
11535 BLAS_zge_sum_mv_z_d_x(order_type,
11536 m, n, alpha, a, lda, x, incx,
11537 beta, B, ldb, y, incy, prec);
11538 FPU_FIX_START;
11539
11540 /* now compute the ratio using test_BLAS_xdot */
11541 /* copy a row from A, use x, run
11542 dot test */
11543
11544 incyi = incy;
11545
11546 incri = 1;
11547 incx_veci = 1;
11548
11549 incyi *= 2;
11550 incri *= 2;
11551 if (incy < 0) {
11552 y_starti = (-m_i + 1) * incyi;
11553 } else {
11554 y_starti = 0;
11555 }
11556 /* make two copies of x into x_vec. redundant */
11557 dcopy_vector(x, n_i, incx, x_vec, 1);
11558 dcopy_vector(x, n_i, incx,
11559 (x_vec + (n_i * incx_veci)), 1);
11560 for (i = 0, yi = y_starti, ri = 0; i < m_i;
11561 i++, yi += incyi, ri += incri) {
11562 zge_copy_row(order_type, blas_no_trans, m_i, n_i,
11563 a_use, lda, a_vec, i);
11564 zge_copy_row(order_type, blas_no_trans, m_i, n_i,
11565 B_use, ldb, (a_vec + inca_veci * n_i),
11566 i);
11567
11568 rin[0] = rin[1] = 0.0;
11569 rout[0] = y[yi];
11570 rout[1] = y[yi + 1];
11571 head_r_true_elem[0] = head_r_true[ri];
11572 head_r_true_elem[1] = head_r_true[ri + 1];
11573 tail_r_true_elem[0] = tail_r_true[ri];
11574 tail_r_true_elem[1] = tail_r_true[ri + 1];
11575
11576 test_BLAS_zdot_z_d(2 * n_i,
11577 blas_no_conj,
11578 alpha_use, beta_zero_fake, rin,
11579 rout, head_r_true_elem,
11580 tail_r_true_elem, a_vec, 1,
11581 x_vec, 1, eps_int, un_int,
11582 &ratios[i]);
11583
11584 /* take the max ratio */
11585 if (i == 0) {
11586 ratio = ratios[0];
11587 /* The !<= below causes NaN errors
11588 * to be included.
11589 * Note that (NaN > 0) is false */
11590 } else if (!(ratios[i] <= ratio)) {
11591 ratio = ratios[i];
11592 }
11593 } /* end of dot-test loop */
11594
11595 /* The !<= below causes NaN errors
11596 * to be included.
11597 * Note that (NaN > 0) is false */
11598 if (!(ratio <= thresh)) {
11599
11600 if (debug == 3) {
11601 printf("\n\t\tTest # %d\n", test_count);
11602 printf("y type : z, a type : z, x type : d\n");
11603 printf("Seed = %d\t", saved_seed);
11604 printf("n %d, m %d\n", n, m);
11605 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
11606 ldb, incx, incx);
11607
11608 if (order_type == blas_rowmajor)
11609 printf("row ");
11610 else
11611 printf("col ");
11612
11613 printf("NORM %d, ALPHA %d, BETA %d\n",
11614 norm, alpha_val, beta_val);
11615 printf("randomize %d\n", randomize_val);
11616
11617 /* print out info */
11618 printf("alpha = ");
11619 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);;
11620 printf(" ");
11621 printf("beta = ");
11622 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
11623 printf("\n");
11624 printf("alpha_use = ");
11625 printf("(%24.16e, %24.16e)", alpha_use[0],
11626 alpha_use[1]);;
11627 printf("\n");
11628
11629 zge_print_matrix(a, m_i, n_i, lda, order_type,
11630 "A");
11631 zge_print_matrix(B, m_i, n_i, ldb, order_type,
11632 "B");
11633 dprint_vector(x, n_i, incx, "x");
11634
11635 zprint_vector(y, m_i, incy, "y");
11636
11637 zprint_vector(head_r_true, m_i, 1, "head_r_true");
11638
11639 zge_print_matrix(a_use, m_i, n_i, lda, order_type,
11640 "A_use");
11641 zge_print_matrix(B_use, m_i, n_i, ldb, order_type,
11642 "B_use");
11643
11644 dprint_vector(ratios, m_i, 1, "ratios");
11645 printf("ratio = %g\n", ratio);
11646 fflush(stdout);
11647 }
11648 bad_ratio_count++;
11649 if (bad_ratio_count >= MAX_BAD_TESTS) {
11650 printf("\ntoo many failures, exiting....");
11651 printf("\nTesting and compilation");
11652 printf(" are incomplete\n\n");
11653 goto end;
11654 }
11655 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
11656 printf("\nFlagrant ratio error, exiting...");
11657 printf("\nTesting and compilation");
11658 printf(" are incomplete\n\n");
11659 goto end;
11660 }
11661 }
11662
11663 if (!(ratio <= ratio_max))
11664 ratio_max = ratio;
11665
11666 if (ratio != 0.0 && !(ratio >= ratio_min))
11667 ratio_min = ratio;
11668
11669 } /* end of incy loop */
11670
11671 } /* end of incx loop */
11672
11673 } /* end of randmize loop */
11674
11675 } /* end of ldb loop */
11676
11677 } /* end of lda loop */
11678
11679 } /* end of order loop */
11680
11681 } /* end of nr test loop */
11682
11683 } /* end of norm loop */
11684
11685
11686 } /* end of prec loop */
11687
11688 } /* end of beta loop */
11689
11690 } /* end of alpha loop */
11691
11692 FPU_FIX_STOP;
11693
11694 end:
11695 blas_free(y);
11696 blas_free(a);
11697 blas_free(a_use);
11698 blas_free(B);
11699 blas_free(B_use);
11700 blas_free(x);
11701 blas_free(head_r_true);
11702 blas_free(tail_r_true);
11703 blas_free(ratios);
11704 blas_free(a_vec);
11705 blas_free(x_vec);
11706
11707 *max_ratio = ratio_max;
11708 *min_ratio = ratio_min;
11709 *num_tests = test_count;
11710 *num_bad_ratio = bad_ratio_count;
11711
11712 }
do_test_zge_sum_mv_d_z_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)11713 void do_test_zge_sum_mv_d_z_x
11714 (int m, int n,
11715 int ntests, int *seed, double thresh, int debug, float test_prob,
11716 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
11717
11718 /* Function name */
11719 const char fname[] = "BLAS_zge_sum_mv_d_z_x";
11720
11721 int i;
11722 int yi;
11723 int incyi, y_starti, incx_veci;
11724 int test_count;
11725 int bad_ratio_count;
11726
11727 int ri;
11728 int incri;
11729 int inca, incx, incy;
11730
11731 double ratio;
11732
11733 double ratio_min, ratio_max;
11734
11735 double eps_int; /* internal machine epsilon */
11736 double un_int; /* internal underflow threshold */
11737
11738 double rin[2];
11739 double rout[2];
11740 double head_r_true_elem[2], tail_r_true_elem[2];
11741
11742 enum blas_order_type order_type;
11743 enum blas_prec_type prec;
11744
11745 int order_val;
11746 int lda_val, incx_val, incy_val;
11747 int ldb_val;
11748 int alpha_val, beta_val;
11749 int randomize_val;
11750
11751 int prec_val;
11752
11753 int lda, ldb;
11754 int alpha_flag, beta_flag;
11755 int saved_seed;
11756 int norm;
11757 int test_no;
11758
11759 int n_i, m_i;
11760 int inca_veci;
11761
11762 double alpha[2];
11763 double beta[2];
11764 double beta_zero_fake[2];
11765 double alpha_use[2];
11766 double *a;
11767 double *a_use;
11768 double *B;
11769 double *B_use;
11770 double *x;
11771 double *y;
11772 double *a_vec;
11773 double *x_vec;
11774
11775
11776 double *ratios;
11777
11778 /* true result calculated by testgen, in double-double */
11779 double *head_r_true, *tail_r_true;
11780
11781
11782 FPU_FIX_DECL;
11783
11784 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
11785
11786 if (n < 0 || ntests < 0)
11787 BLAS_error(fname, -3, n, NULL);
11788
11789 /* initialization */
11790 saved_seed = *seed;
11791 ratio = 0.0;
11792 ratio_min = 1e308;
11793 ratio_max = 0.0;
11794
11795 *num_tests = 0;
11796 *num_bad_ratio = 0;
11797 *min_ratio = 0.0;
11798 *max_ratio = 0.0;
11799
11800 if (n == 0)
11801 return;
11802
11803 FPU_FIX_START;
11804
11805 n_i = n;
11806 m_i = m;
11807
11808 inca = incx = incy = 1;
11809
11810 incx *= 2;
11811 incy *= 2;
11812
11813 /* allocate memory for arrays */
11814 y = (double *) blas_malloc(4 * m_i * sizeof(double) * 2);
11815 if (4 * m_i > 0 && y == NULL) {
11816 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11817 }
11818 a = (double *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(double));
11819 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
11820 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11821 }
11822 a_use = (double *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(double));
11823 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
11824 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11825 }
11826 B = (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double));
11827 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
11828 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11829 }
11830 B_use = (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double));
11831 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
11832 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11833 }
11834 x = (double *) blas_malloc(4 * n_i * sizeof(double) * 2);
11835 if (4 * n_i > 0 && x == NULL) {
11836 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11837 }
11838
11839 inca_veci = 1;
11840
11841 a_vec = (double *) blas_malloc(2 * n_i * sizeof(double));
11842 if (2 * n_i > 0 && a_vec == NULL) {
11843 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11844 }
11845 x_vec = (double *) blas_malloc(2 * n_i * sizeof(double) * 2);
11846 if (2 * n_i > 0 && x_vec == NULL) {
11847 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11848 }
11849 head_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
11850 tail_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
11851 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
11852 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11853 }
11854 ratios = (double *) blas_malloc(m_i * sizeof(double));
11855 if (m_i > 0 && ratios == NULL) {
11856 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11857 }
11858
11859 test_count = 0;
11860 bad_ratio_count = 0;
11861
11862 /* vary alpha */
11863 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
11864
11865 alpha_flag = 0;
11866 switch (alpha_val) {
11867 case 0:
11868 alpha[0] = alpha[1] = 0.0;
11869 alpha_flag = 1;
11870 break;
11871 case 1:
11872 alpha[0] = 1.0;
11873 alpha[1] = 0.0;
11874 alpha_flag = 1;
11875 break;
11876 }
11877
11878 /* vary beta */
11879 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
11880 beta_flag = 0;
11881 switch (beta_val) {
11882 case 0:
11883 beta[0] = beta[1] = 0.0;
11884 beta_flag = 1;
11885 break;
11886 case 1:
11887 beta[0] = 1.0;
11888 beta[1] = 0.0;
11889 beta_flag = 1;
11890 break;
11891 }
11892
11893
11894 /* varying extra precs */
11895 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
11896 switch (prec_val) {
11897 case 0:
11898 eps_int = power(2, -BITS_D);
11899 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
11900 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
11901 prec = blas_prec_double;
11902 break;
11903 case 1:
11904 eps_int = power(2, -BITS_D);
11905 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
11906 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
11907 prec = blas_prec_double;
11908 break;
11909 case 2:
11910 default:
11911 eps_int = power(2, -BITS_E);
11912 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
11913 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
11914 prec = blas_prec_extra;
11915 break;
11916 }
11917
11918 /* vary norm -- underflow, approx 1, overflow */
11919 for (norm = NORM_START; norm <= NORM_END; norm++) {
11920
11921 /* number of tests */
11922 for (test_no = 0; test_no < ntests; test_no++) {
11923
11924
11925 /* vary storage format */
11926 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
11927
11928 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
11929
11930 /* vary lda = n_i, n_i+1, 2*n_i */
11931 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
11932
11933 if (order_type == blas_rowmajor) {
11934 lda = (lda_val == 0) ? n_i :
11935 (lda_val == 1) ? n_i + 1 : n_i * n_i;
11936 } else {
11937 lda = (lda_val == 0) ? m_i :
11938 (lda_val == 1) ? m_i + 1 : m_i * m_i;
11939 }
11940
11941 /* vary ldb = n_i, n_i+1, 2*n_i */
11942 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
11943
11944 if (order_type == blas_rowmajor) {
11945 ldb = (ldb_val == 0) ? n_i :
11946 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
11947 } else {
11948 ldb = (ldb_val == 0) ? m_i :
11949 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
11950 }
11951
11952 for (randomize_val = RANDOMIZE_START;
11953 randomize_val <= RANDOMIZE_END; randomize_val++) {
11954
11955 /* For the sake of speed, we throw out this case at random */
11956 if (xrand(seed) >= test_prob)
11957 continue;
11958
11959 /* finally we are here to generate the test case */
11960 /* alpha_use, a_use, B_use are the generated alpha, a, B
11961 * before any scaling.
11962 * That is, in the generator, alpha == beta == alpha_use
11963 * before scaling. */
11964
11965 saved_seed = *seed;
11966 BLAS_zge_sum_mv_d_z_testgen(norm, order_type,
11967 m, n, randomize_val, &alpha,
11968 alpha_flag, &beta, beta_flag,
11969 a, lda, B, ldb, x_vec, 1,
11970 &alpha_use, a_use, B_use,
11971 seed, head_r_true,
11972 tail_r_true);
11973
11974 /* vary incx = 1, 2 */
11975 for (incx_val = INCX_START; incx_val <= INCX_END;
11976 incx_val++) {
11977
11978 incx = incx_val;
11979 if (0 == incx)
11980 continue;
11981
11982 zcopy_vector(x_vec, n_i, 1, x, incx);
11983
11984 /* vary incy = 1, 2 */
11985 for (incy_val = INCY_START; incy_val <= INCY_END;
11986 incy_val++) {
11987
11988 incy = incy_val;
11989 if (0 == incy)
11990 continue;
11991
11992 test_count++;
11993
11994 /* call ge_sum_mv routines to be tested */
11995 FPU_FIX_STOP;
11996 BLAS_zge_sum_mv_d_z_x(order_type,
11997 m, n, alpha, a, lda, x, incx,
11998 beta, B, ldb, y, incy, prec);
11999 FPU_FIX_START;
12000
12001 /* now compute the ratio using test_BLAS_xdot */
12002 /* copy a row from A, use x, run
12003 dot test */
12004
12005 incyi = incy;
12006
12007 incri = 1;
12008 incx_veci = 1;
12009 incx_veci *= 2;
12010 incyi *= 2;
12011 incri *= 2;
12012 if (incy < 0) {
12013 y_starti = (-m_i + 1) * incyi;
12014 } else {
12015 y_starti = 0;
12016 }
12017 /* make two copies of x into x_vec. redundant */
12018 zcopy_vector(x, n_i, incx, x_vec, 1);
12019 zcopy_vector(x, n_i, incx,
12020 (x_vec + (n_i * incx_veci)), 1);
12021 for (i = 0, yi = y_starti, ri = 0; i < m_i;
12022 i++, yi += incyi, ri += incri) {
12023 dge_copy_row(order_type, blas_no_trans, m_i, n_i,
12024 a_use, lda, a_vec, i);
12025 dge_copy_row(order_type, blas_no_trans, m_i, n_i,
12026 B_use, ldb, (a_vec + inca_veci * n_i),
12027 i);
12028
12029 rin[0] = rin[1] = 0.0;
12030 rout[0] = y[yi];
12031 rout[1] = y[yi + 1];
12032 head_r_true_elem[0] = head_r_true[ri];
12033 head_r_true_elem[1] = head_r_true[ri + 1];
12034 tail_r_true_elem[0] = tail_r_true[ri];
12035 tail_r_true_elem[1] = tail_r_true[ri + 1];
12036
12037 test_BLAS_zdot_d_z(2 * n_i,
12038 blas_no_conj,
12039 alpha_use, beta_zero_fake, rin,
12040 rout, head_r_true_elem,
12041 tail_r_true_elem, a_vec, 1,
12042 x_vec, 1, eps_int, un_int,
12043 &ratios[i]);
12044
12045 /* take the max ratio */
12046 if (i == 0) {
12047 ratio = ratios[0];
12048 /* The !<= below causes NaN errors
12049 * to be included.
12050 * Note that (NaN > 0) is false */
12051 } else if (!(ratios[i] <= ratio)) {
12052 ratio = ratios[i];
12053 }
12054 } /* end of dot-test loop */
12055
12056 /* The !<= below causes NaN errors
12057 * to be included.
12058 * Note that (NaN > 0) is false */
12059 if (!(ratio <= thresh)) {
12060
12061 if (debug == 3) {
12062 printf("\n\t\tTest # %d\n", test_count);
12063 printf("y type : z, a type : d, x type : z\n");
12064 printf("Seed = %d\t", saved_seed);
12065 printf("n %d, m %d\n", n, m);
12066 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
12067 ldb, incx, incx);
12068
12069 if (order_type == blas_rowmajor)
12070 printf("row ");
12071 else
12072 printf("col ");
12073
12074 printf("NORM %d, ALPHA %d, BETA %d\n",
12075 norm, alpha_val, beta_val);
12076 printf("randomize %d\n", randomize_val);
12077
12078 /* print out info */
12079 printf("alpha = ");
12080 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);;
12081 printf(" ");
12082 printf("beta = ");
12083 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
12084 printf("\n");
12085 printf("alpha_use = ");
12086 printf("(%24.16e, %24.16e)", alpha_use[0],
12087 alpha_use[1]);;
12088 printf("\n");
12089
12090 dge_print_matrix(a, m_i, n_i, lda, order_type,
12091 "A");
12092 dge_print_matrix(B, m_i, n_i, ldb, order_type,
12093 "B");
12094 zprint_vector(x, n_i, incx, "x");
12095
12096 zprint_vector(y, m_i, incy, "y");
12097
12098 zprint_vector(head_r_true, m_i, 1, "head_r_true");
12099
12100 dge_print_matrix(a_use, m_i, n_i, lda, order_type,
12101 "A_use");
12102 dge_print_matrix(B_use, m_i, n_i, ldb, order_type,
12103 "B_use");
12104
12105 dprint_vector(ratios, m_i, 1, "ratios");
12106 printf("ratio = %g\n", ratio);
12107 fflush(stdout);
12108 }
12109 bad_ratio_count++;
12110 if (bad_ratio_count >= MAX_BAD_TESTS) {
12111 printf("\ntoo many failures, exiting....");
12112 printf("\nTesting and compilation");
12113 printf(" are incomplete\n\n");
12114 goto end;
12115 }
12116 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
12117 printf("\nFlagrant ratio error, exiting...");
12118 printf("\nTesting and compilation");
12119 printf(" are incomplete\n\n");
12120 goto end;
12121 }
12122 }
12123
12124 if (!(ratio <= ratio_max))
12125 ratio_max = ratio;
12126
12127 if (ratio != 0.0 && !(ratio >= ratio_min))
12128 ratio_min = ratio;
12129
12130 } /* end of incy loop */
12131
12132 } /* end of incx loop */
12133
12134 } /* end of randmize loop */
12135
12136 } /* end of ldb loop */
12137
12138 } /* end of lda loop */
12139
12140 } /* end of order loop */
12141
12142 } /* end of nr test loop */
12143
12144 } /* end of norm loop */
12145
12146
12147 } /* end of prec loop */
12148
12149 } /* end of beta loop */
12150
12151 } /* end of alpha loop */
12152
12153 FPU_FIX_STOP;
12154
12155 end:
12156 blas_free(y);
12157 blas_free(a);
12158 blas_free(a_use);
12159 blas_free(B);
12160 blas_free(B_use);
12161 blas_free(x);
12162 blas_free(head_r_true);
12163 blas_free(tail_r_true);
12164 blas_free(ratios);
12165 blas_free(a_vec);
12166 blas_free(x_vec);
12167
12168 *max_ratio = ratio_max;
12169 *min_ratio = ratio_min;
12170 *num_tests = test_count;
12171 *num_bad_ratio = bad_ratio_count;
12172
12173 }
do_test_zge_sum_mv_d_d_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,double * max_ratio,int * num_bad_ratio,int * num_tests)12174 void do_test_zge_sum_mv_d_d_x
12175 (int m, int n,
12176 int ntests, int *seed, double thresh, int debug, float test_prob,
12177 double *min_ratio, double *max_ratio, int *num_bad_ratio, int *num_tests) {
12178
12179 /* Function name */
12180 const char fname[] = "BLAS_zge_sum_mv_d_d_x";
12181
12182 int i;
12183 int yi;
12184 int incyi, y_starti, incx_veci;
12185 int test_count;
12186 int bad_ratio_count;
12187
12188 int ri;
12189 int incri;
12190 int inca, incx, incy;
12191
12192 double ratio;
12193
12194 double ratio_min, ratio_max;
12195
12196 double eps_int; /* internal machine epsilon */
12197 double un_int; /* internal underflow threshold */
12198
12199 double rin[2];
12200 double rout[2];
12201 double head_r_true_elem[2], tail_r_true_elem[2];
12202
12203 enum blas_order_type order_type;
12204 enum blas_prec_type prec;
12205
12206 int order_val;
12207 int lda_val, incx_val, incy_val;
12208 int ldb_val;
12209 int alpha_val, beta_val;
12210 int randomize_val;
12211
12212 int prec_val;
12213
12214 int lda, ldb;
12215 int alpha_flag, beta_flag;
12216 int saved_seed;
12217 int norm;
12218 int test_no;
12219
12220 int n_i, m_i;
12221 int inca_veci;
12222
12223 double alpha[2];
12224 double beta[2];
12225 double beta_zero_fake[2];
12226 double alpha_use[2];
12227 double *a;
12228 double *a_use;
12229 double *B;
12230 double *B_use;
12231 double *x;
12232 double *y;
12233 double *a_vec;
12234 double *x_vec;
12235
12236
12237 double *ratios;
12238
12239 /* true result calculated by testgen, in double-double */
12240 double *head_r_true, *tail_r_true;
12241
12242
12243 FPU_FIX_DECL;
12244
12245 beta_zero_fake[0] = beta_zero_fake[1] = 0.0;
12246
12247 if (n < 0 || ntests < 0)
12248 BLAS_error(fname, -3, n, NULL);
12249
12250 /* initialization */
12251 saved_seed = *seed;
12252 ratio = 0.0;
12253 ratio_min = 1e308;
12254 ratio_max = 0.0;
12255
12256 *num_tests = 0;
12257 *num_bad_ratio = 0;
12258 *min_ratio = 0.0;
12259 *max_ratio = 0.0;
12260
12261 if (n == 0)
12262 return;
12263
12264 FPU_FIX_START;
12265
12266 n_i = n;
12267 m_i = m;
12268
12269 inca = incx = incy = 1;
12270
12271
12272 incy *= 2;
12273
12274 /* allocate memory for arrays */
12275 y = (double *) blas_malloc(4 * m_i * sizeof(double) * 2);
12276 if (4 * m_i > 0 && y == NULL) {
12277 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12278 }
12279 a = (double *) blas_malloc(2 * n_i * m_i * m_i * n_i * sizeof(double));
12280 if (2 * n_i * m_i * m_i * n_i > 0 && a == NULL) {
12281 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12282 }
12283 a_use = (double *) blas_malloc(2 * m_i * n_i * m_i * n_i * sizeof(double));
12284 if (2 * m_i * n_i * m_i * n_i > 0 && a_use == NULL) {
12285 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12286 }
12287 B = (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double));
12288 if (2 * n_i * n_i * m_i * m_i > 0 && B == NULL) {
12289 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12290 }
12291 B_use = (double *) blas_malloc(2 * n_i * n_i * m_i * m_i * sizeof(double));
12292 if (2 * n_i * n_i * m_i * m_i > 0 && B_use == NULL) {
12293 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12294 }
12295 x = (double *) blas_malloc(4 * n_i * sizeof(double));
12296 if (4 * n_i > 0 && x == NULL) {
12297 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12298 }
12299
12300 inca_veci = 1;
12301
12302 a_vec = (double *) blas_malloc(2 * n_i * sizeof(double));
12303 if (2 * n_i > 0 && a_vec == NULL) {
12304 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12305 }
12306 x_vec = (double *) blas_malloc(2 * n_i * sizeof(double));
12307 if (2 * n_i > 0 && x_vec == NULL) {
12308 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12309 }
12310 head_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
12311 tail_r_true = (double *) blas_malloc(m_i * sizeof(double) * 2);
12312 if (m_i > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
12313 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12314 }
12315 ratios = (double *) blas_malloc(m_i * sizeof(double));
12316 if (m_i > 0 && ratios == NULL) {
12317 BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12318 }
12319
12320 test_count = 0;
12321 bad_ratio_count = 0;
12322
12323 /* vary alpha */
12324 for (alpha_val = ALPHA_START; alpha_val <= ALPHA_END; alpha_val++) {
12325
12326 alpha_flag = 0;
12327 switch (alpha_val) {
12328 case 0:
12329 alpha[0] = alpha[1] = 0.0;
12330 alpha_flag = 1;
12331 break;
12332 case 1:
12333 alpha[0] = 1.0;
12334 alpha[1] = 0.0;
12335 alpha_flag = 1;
12336 break;
12337 }
12338
12339 /* vary beta */
12340 for (beta_val = BETA_START; beta_val <= BETA_END; beta_val++) {
12341 beta_flag = 0;
12342 switch (beta_val) {
12343 case 0:
12344 beta[0] = beta[1] = 0.0;
12345 beta_flag = 1;
12346 break;
12347 case 1:
12348 beta[0] = 1.0;
12349 beta[1] = 0.0;
12350 beta_flag = 1;
12351 break;
12352 }
12353
12354
12355 /* varying extra precs */
12356 for (prec_val = PREC_START; prec_val <= PREC_END; prec_val++) {
12357 switch (prec_val) {
12358 case 0:
12359 eps_int = power(2, -BITS_D);
12360 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
12361 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
12362 prec = blas_prec_double;
12363 break;
12364 case 1:
12365 eps_int = power(2, -BITS_D);
12366 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
12367 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
12368 prec = blas_prec_double;
12369 break;
12370 case 2:
12371 default:
12372 eps_int = power(2, -BITS_E);
12373 un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
12374 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
12375 prec = blas_prec_extra;
12376 break;
12377 }
12378
12379 /* vary norm -- underflow, approx 1, overflow */
12380 for (norm = NORM_START; norm <= NORM_END; norm++) {
12381
12382 /* number of tests */
12383 for (test_no = 0; test_no < ntests; test_no++) {
12384
12385
12386 /* vary storage format */
12387 for (order_val = ORDER_START; order_val <= ORDER_END; order_val++) {
12388
12389 order_type = (order_val == 0) ? blas_colmajor : blas_rowmajor;
12390
12391 /* vary lda = n_i, n_i+1, 2*n_i */
12392 for (lda_val = LDA_START; lda_val <= LDA_END; lda_val++) {
12393
12394 if (order_type == blas_rowmajor) {
12395 lda = (lda_val == 0) ? n_i :
12396 (lda_val == 1) ? n_i + 1 : n_i * n_i;
12397 } else {
12398 lda = (lda_val == 0) ? m_i :
12399 (lda_val == 1) ? m_i + 1 : m_i * m_i;
12400 }
12401
12402 /* vary ldb = n_i, n_i+1, 2*n_i */
12403 for (ldb_val = LDA_START; ldb_val <= LDA_END; ldb_val++) {
12404
12405 if (order_type == blas_rowmajor) {
12406 ldb = (ldb_val == 0) ? n_i :
12407 (ldb_val == 1) ? n_i + 1 : n_i * n_i;
12408 } else {
12409 ldb = (ldb_val == 0) ? m_i :
12410 (ldb_val == 1) ? m_i + 1 : m_i * m_i;
12411 }
12412
12413 for (randomize_val = RANDOMIZE_START;
12414 randomize_val <= RANDOMIZE_END; randomize_val++) {
12415
12416 /* For the sake of speed, we throw out this case at random */
12417 if (xrand(seed) >= test_prob)
12418 continue;
12419
12420 /* finally we are here to generate the test case */
12421 /* alpha_use, a_use, B_use are the generated alpha, a, B
12422 * before any scaling.
12423 * That is, in the generator, alpha == beta == alpha_use
12424 * before scaling. */
12425
12426 saved_seed = *seed;
12427 BLAS_zge_sum_mv_d_d_testgen(norm, order_type,
12428 m, n, randomize_val, &alpha,
12429 alpha_flag, &beta, beta_flag,
12430 a, lda, B, ldb, x_vec, 1,
12431 &alpha_use, a_use, B_use,
12432 seed, head_r_true,
12433 tail_r_true);
12434
12435 /* vary incx = 1, 2 */
12436 for (incx_val = INCX_START; incx_val <= INCX_END;
12437 incx_val++) {
12438
12439 incx = incx_val;
12440 if (0 == incx)
12441 continue;
12442
12443 dcopy_vector(x_vec, n_i, 1, x, incx);
12444
12445 /* vary incy = 1, 2 */
12446 for (incy_val = INCY_START; incy_val <= INCY_END;
12447 incy_val++) {
12448
12449 incy = incy_val;
12450 if (0 == incy)
12451 continue;
12452
12453 test_count++;
12454
12455 /* call ge_sum_mv routines to be tested */
12456 FPU_FIX_STOP;
12457 BLAS_zge_sum_mv_d_d_x(order_type,
12458 m, n, alpha, a, lda, x, incx,
12459 beta, B, ldb, y, incy, prec);
12460 FPU_FIX_START;
12461
12462 /* now compute the ratio using test_BLAS_xdot */
12463 /* copy a row from A, use x, run
12464 dot test */
12465
12466 incyi = incy;
12467
12468 incri = 1;
12469 incx_veci = 1;
12470
12471 incyi *= 2;
12472 incri *= 2;
12473 if (incy < 0) {
12474 y_starti = (-m_i + 1) * incyi;
12475 } else {
12476 y_starti = 0;
12477 }
12478 /* make two copies of x into x_vec. redundant */
12479 dcopy_vector(x, n_i, incx, x_vec, 1);
12480 dcopy_vector(x, n_i, incx,
12481 (x_vec + (n_i * incx_veci)), 1);
12482 for (i = 0, yi = y_starti, ri = 0; i < m_i;
12483 i++, yi += incyi, ri += incri) {
12484 dge_copy_row(order_type, blas_no_trans, m_i, n_i,
12485 a_use, lda, a_vec, i);
12486 dge_copy_row(order_type, blas_no_trans, m_i, n_i,
12487 B_use, ldb, (a_vec + inca_veci * n_i),
12488 i);
12489
12490 rin[0] = rin[1] = 0.0;
12491 rout[0] = y[yi];
12492 rout[1] = y[yi + 1];
12493 head_r_true_elem[0] = head_r_true[ri];
12494 head_r_true_elem[1] = head_r_true[ri + 1];
12495 tail_r_true_elem[0] = tail_r_true[ri];
12496 tail_r_true_elem[1] = tail_r_true[ri + 1];
12497
12498 test_BLAS_zdot_d_d(2 * n_i,
12499 blas_no_conj,
12500 alpha_use, beta_zero_fake, rin,
12501 rout, head_r_true_elem,
12502 tail_r_true_elem, a_vec, 1,
12503 x_vec, 1, eps_int, un_int,
12504 &ratios[i]);
12505
12506 /* take the max ratio */
12507 if (i == 0) {
12508 ratio = ratios[0];
12509 /* The !<= below causes NaN errors
12510 * to be included.
12511 * Note that (NaN > 0) is false */
12512 } else if (!(ratios[i] <= ratio)) {
12513 ratio = ratios[i];
12514 }
12515 } /* end of dot-test loop */
12516
12517 /* The !<= below causes NaN errors
12518 * to be included.
12519 * Note that (NaN > 0) is false */
12520 if (!(ratio <= thresh)) {
12521
12522 if (debug == 3) {
12523 printf("\n\t\tTest # %d\n", test_count);
12524 printf("y type : z, a type : d, x type : d\n");
12525 printf("Seed = %d\t", saved_seed);
12526 printf("n %d, m %d\n", n, m);
12527 printf("LDA %d LDB %d, INCX %d INCY %d\n", lda,
12528 ldb, incx, incx);
12529
12530 if (order_type == blas_rowmajor)
12531 printf("row ");
12532 else
12533 printf("col ");
12534
12535 printf("NORM %d, ALPHA %d, BETA %d\n",
12536 norm, alpha_val, beta_val);
12537 printf("randomize %d\n", randomize_val);
12538
12539 /* print out info */
12540 printf("alpha = ");
12541 printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);;
12542 printf(" ");
12543 printf("beta = ");
12544 printf("(%24.16e, %24.16e)", beta[0], beta[1]);;
12545 printf("\n");
12546 printf("alpha_use = ");
12547 printf("(%24.16e, %24.16e)", alpha_use[0],
12548 alpha_use[1]);;
12549 printf("\n");
12550
12551 dge_print_matrix(a, m_i, n_i, lda, order_type,
12552 "A");
12553 dge_print_matrix(B, m_i, n_i, ldb, order_type,
12554 "B");
12555 dprint_vector(x, n_i, incx, "x");
12556
12557 zprint_vector(y, m_i, incy, "y");
12558
12559 zprint_vector(head_r_true, m_i, 1, "head_r_true");
12560
12561 dge_print_matrix(a_use, m_i, n_i, lda, order_type,
12562 "A_use");
12563 dge_print_matrix(B_use, m_i, n_i, ldb, order_type,
12564 "B_use");
12565
12566 dprint_vector(ratios, m_i, 1, "ratios");
12567 printf("ratio = %g\n", ratio);
12568 fflush(stdout);
12569 }
12570 bad_ratio_count++;
12571 if (bad_ratio_count >= MAX_BAD_TESTS) {
12572 printf("\ntoo many failures, exiting....");
12573 printf("\nTesting and compilation");
12574 printf(" are incomplete\n\n");
12575 goto end;
12576 }
12577 if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
12578 printf("\nFlagrant ratio error, exiting...");
12579 printf("\nTesting and compilation");
12580 printf(" are incomplete\n\n");
12581 goto end;
12582 }
12583 }
12584
12585 if (!(ratio <= ratio_max))
12586 ratio_max = ratio;
12587
12588 if (ratio != 0.0 && !(ratio >= ratio_min))
12589 ratio_min = ratio;
12590
12591 } /* end of incy loop */
12592
12593 } /* end of incx loop */
12594
12595 } /* end of randmize loop */
12596
12597 } /* end of ldb loop */
12598
12599 } /* end of lda loop */
12600
12601 } /* end of order loop */
12602
12603 } /* end of nr test loop */
12604
12605 } /* end of norm loop */
12606
12607
12608 } /* end of prec loop */
12609
12610 } /* end of beta loop */
12611
12612 } /* end of alpha loop */
12613
12614 FPU_FIX_STOP;
12615
12616 end:
12617 blas_free(y);
12618 blas_free(a);
12619 blas_free(a_use);
12620 blas_free(B);
12621 blas_free(B_use);
12622 blas_free(x);
12623 blas_free(head_r_true);
12624 blas_free(tail_r_true);
12625 blas_free(ratios);
12626 blas_free(a_vec);
12627 blas_free(x_vec);
12628
12629 *max_ratio = ratio_max;
12630 *min_ratio = ratio_min;
12631 *num_tests = test_count;
12632 *num_bad_ratio = bad_ratio_count;
12633
12634 }
12635
main(int argc,char ** argv)12636 int main(int argc, char **argv)
12637 {
12638 int nsizes, ntests, debug;
12639 double thresh, test_prob;
12640 double total_min_ratio, total_max_ratio;
12641 int total_bad_ratios;
12642 int seed, num_bad_ratio, num_tests;
12643 int total_tests, nr_failed_routines = 0, nr_routines = 0;
12644 double min_ratio, max_ratio;
12645 const char *base_routine = "ge_sum_mv";
12646 char *fname;
12647 int n;
12648
12649 int m, i;
12650 int n_data[NUM_DATA][2] =
12651 { {1, 1}, {1, 2}, {3, 2}, {8, 6}, {9, 10}, {4, 4}, {7, 7} };
12652
12653 if (argc != 6) {
12654 printf("Usage:\n");
12655 printf
12656 ("do_test_ge_sum_mv <nsizes> <ntests> <thresh> <debug> <test_prob>\n");
12657 printf(" <nsizes>: number of sizes to be run.\n");
12658 printf
12659 (" <ntests>: the number of tests performed for each set of attributes\n");
12660 printf
12661 (" <thresh>: to catch bad ratios if it is greater than <thresh>\n");
12662 printf(" <debug>: 0, 1, 2, or 3; \n");
12663 printf(" if 0, no printing \n");
12664 printf(" if 1, print error summary only if tests fail\n");
12665 printf(" if 2, print error summary for each n\n");
12666 printf(" if 3, print complete info each test fails \n");
12667 printf("<test_prob>: probability of preforming a given \n");
12668 printf(" test case: 0.0 does no tests, 1.0 does all tests\n");
12669 return -1;
12670 } else {
12671 nsizes = atoi(argv[1]);
12672 ntests = atoi(argv[2]);
12673 thresh = atof(argv[3]);
12674 debug = atoi(argv[4]);
12675 test_prob = atof(argv[5]);
12676 }
12677
12678 seed = 1999;
12679
12680 if (nsizes < 0 || ntests < 0 || debug < 0 || debug > 3)
12681 BLAS_error("Testing ge_sum_mv", 0, 0, NULL);
12682
12683 printf("Testing %s...\n", base_routine);
12684 printf("INPUT: nsizes = %d, ntests = %d, thresh = %4.2f, debug = %d\n\n",
12685 nsizes, ntests, thresh, debug);
12686
12687
12688
12689
12690
12691 fname = "BLAS_dge_sum_mv_d_s";
12692 printf("Testing %s...\n", fname);
12693 total_tests = 0;
12694 total_bad_ratios = 0;
12695 total_min_ratio = 1e308;
12696 total_max_ratio = 0.0;
12697 for (i = 0; i < nsizes; i++) {
12698 m = n_data[i][0];
12699 n = n_data[i][1];
12700
12701 do_test_dge_sum_mv_d_s(m, n,
12702 ntests, &seed, thresh, debug,
12703 test_prob,
12704 &min_ratio, &max_ratio, &num_bad_ratio,
12705 &num_tests);
12706
12707 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
12708 printf(" [%d %d]: ", n, n);
12709 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
12710 num_bad_ratio, num_tests, min_ratio, max_ratio);
12711 }
12712
12713 total_tests += num_tests;
12714 total_bad_ratios += num_bad_ratio;
12715 if (total_min_ratio > min_ratio)
12716 total_min_ratio = min_ratio;
12717 if (total_max_ratio < max_ratio)
12718 total_max_ratio = max_ratio;
12719 }
12720
12721 nr_routines++;
12722 if (total_bad_ratios == 0)
12723 printf("PASS> ");
12724 else {
12725 printf("FAIL> ");
12726 nr_failed_routines++;
12727 }
12728 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
12729 fname, total_bad_ratios, total_tests, max_ratio);
12730
12731 fname = "BLAS_dge_sum_mv_s_d";
12732 printf("Testing %s...\n", fname);
12733 total_tests = 0;
12734 total_bad_ratios = 0;
12735 total_min_ratio = 1e308;
12736 total_max_ratio = 0.0;
12737 for (i = 0; i < nsizes; i++) {
12738 m = n_data[i][0];
12739 n = n_data[i][1];
12740
12741 do_test_dge_sum_mv_s_d(m, n,
12742 ntests, &seed, thresh, debug,
12743 test_prob,
12744 &min_ratio, &max_ratio, &num_bad_ratio,
12745 &num_tests);
12746
12747 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
12748 printf(" [%d %d]: ", n, n);
12749 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
12750 num_bad_ratio, num_tests, min_ratio, max_ratio);
12751 }
12752
12753 total_tests += num_tests;
12754 total_bad_ratios += num_bad_ratio;
12755 if (total_min_ratio > min_ratio)
12756 total_min_ratio = min_ratio;
12757 if (total_max_ratio < max_ratio)
12758 total_max_ratio = max_ratio;
12759 }
12760
12761 nr_routines++;
12762 if (total_bad_ratios == 0)
12763 printf("PASS> ");
12764 else {
12765 printf("FAIL> ");
12766 nr_failed_routines++;
12767 }
12768 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
12769 fname, total_bad_ratios, total_tests, max_ratio);
12770
12771 fname = "BLAS_dge_sum_mv_s_s";
12772 printf("Testing %s...\n", fname);
12773 total_tests = 0;
12774 total_bad_ratios = 0;
12775 total_min_ratio = 1e308;
12776 total_max_ratio = 0.0;
12777 for (i = 0; i < nsizes; i++) {
12778 m = n_data[i][0];
12779 n = n_data[i][1];
12780
12781 do_test_dge_sum_mv_s_s(m, n,
12782 ntests, &seed, thresh, debug,
12783 test_prob,
12784 &min_ratio, &max_ratio, &num_bad_ratio,
12785 &num_tests);
12786
12787 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
12788 printf(" [%d %d]: ", n, n);
12789 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
12790 num_bad_ratio, num_tests, min_ratio, max_ratio);
12791 }
12792
12793 total_tests += num_tests;
12794 total_bad_ratios += num_bad_ratio;
12795 if (total_min_ratio > min_ratio)
12796 total_min_ratio = min_ratio;
12797 if (total_max_ratio < max_ratio)
12798 total_max_ratio = max_ratio;
12799 }
12800
12801 nr_routines++;
12802 if (total_bad_ratios == 0)
12803 printf("PASS> ");
12804 else {
12805 printf("FAIL> ");
12806 nr_failed_routines++;
12807 }
12808 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
12809 fname, total_bad_ratios, total_tests, max_ratio);
12810
12811 fname = "BLAS_zge_sum_mv_z_c";
12812 printf("Testing %s...\n", fname);
12813 total_tests = 0;
12814 total_bad_ratios = 0;
12815 total_min_ratio = 1e308;
12816 total_max_ratio = 0.0;
12817 for (i = 0; i < nsizes; i++) {
12818 m = n_data[i][0];
12819 n = n_data[i][1];
12820
12821 do_test_zge_sum_mv_z_c(m, n,
12822 ntests, &seed, thresh, debug,
12823 test_prob,
12824 &min_ratio, &max_ratio, &num_bad_ratio,
12825 &num_tests);
12826
12827 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
12828 printf(" [%d %d]: ", n, n);
12829 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
12830 num_bad_ratio, num_tests, min_ratio, max_ratio);
12831 }
12832
12833 total_tests += num_tests;
12834 total_bad_ratios += num_bad_ratio;
12835 if (total_min_ratio > min_ratio)
12836 total_min_ratio = min_ratio;
12837 if (total_max_ratio < max_ratio)
12838 total_max_ratio = max_ratio;
12839 }
12840
12841 nr_routines++;
12842 if (total_bad_ratios == 0)
12843 printf("PASS> ");
12844 else {
12845 printf("FAIL> ");
12846 nr_failed_routines++;
12847 }
12848 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
12849 fname, total_bad_ratios, total_tests, max_ratio);
12850
12851 fname = "BLAS_zge_sum_mv_c_z";
12852 printf("Testing %s...\n", fname);
12853 total_tests = 0;
12854 total_bad_ratios = 0;
12855 total_min_ratio = 1e308;
12856 total_max_ratio = 0.0;
12857 for (i = 0; i < nsizes; i++) {
12858 m = n_data[i][0];
12859 n = n_data[i][1];
12860
12861 do_test_zge_sum_mv_c_z(m, n,
12862 ntests, &seed, thresh, debug,
12863 test_prob,
12864 &min_ratio, &max_ratio, &num_bad_ratio,
12865 &num_tests);
12866
12867 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
12868 printf(" [%d %d]: ", n, n);
12869 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
12870 num_bad_ratio, num_tests, min_ratio, max_ratio);
12871 }
12872
12873 total_tests += num_tests;
12874 total_bad_ratios += num_bad_ratio;
12875 if (total_min_ratio > min_ratio)
12876 total_min_ratio = min_ratio;
12877 if (total_max_ratio < max_ratio)
12878 total_max_ratio = max_ratio;
12879 }
12880
12881 nr_routines++;
12882 if (total_bad_ratios == 0)
12883 printf("PASS> ");
12884 else {
12885 printf("FAIL> ");
12886 nr_failed_routines++;
12887 }
12888 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
12889 fname, total_bad_ratios, total_tests, max_ratio);
12890
12891 fname = "BLAS_zge_sum_mv_c_c";
12892 printf("Testing %s...\n", fname);
12893 total_tests = 0;
12894 total_bad_ratios = 0;
12895 total_min_ratio = 1e308;
12896 total_max_ratio = 0.0;
12897 for (i = 0; i < nsizes; i++) {
12898 m = n_data[i][0];
12899 n = n_data[i][1];
12900
12901 do_test_zge_sum_mv_c_c(m, n,
12902 ntests, &seed, thresh, debug,
12903 test_prob,
12904 &min_ratio, &max_ratio, &num_bad_ratio,
12905 &num_tests);
12906
12907 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
12908 printf(" [%d %d]: ", n, n);
12909 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
12910 num_bad_ratio, num_tests, min_ratio, max_ratio);
12911 }
12912
12913 total_tests += num_tests;
12914 total_bad_ratios += num_bad_ratio;
12915 if (total_min_ratio > min_ratio)
12916 total_min_ratio = min_ratio;
12917 if (total_max_ratio < max_ratio)
12918 total_max_ratio = max_ratio;
12919 }
12920
12921 nr_routines++;
12922 if (total_bad_ratios == 0)
12923 printf("PASS> ");
12924 else {
12925 printf("FAIL> ");
12926 nr_failed_routines++;
12927 }
12928 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
12929 fname, total_bad_ratios, total_tests, max_ratio);
12930
12931 fname = "BLAS_cge_sum_mv_c_s";
12932 printf("Testing %s...\n", fname);
12933 total_tests = 0;
12934 total_bad_ratios = 0;
12935 total_min_ratio = 1e308;
12936 total_max_ratio = 0.0;
12937 for (i = 0; i < nsizes; i++) {
12938 m = n_data[i][0];
12939 n = n_data[i][1];
12940
12941 do_test_cge_sum_mv_c_s(m, n,
12942 ntests, &seed, thresh, debug,
12943 test_prob,
12944 &min_ratio, &max_ratio, &num_bad_ratio,
12945 &num_tests);
12946
12947 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
12948 printf(" [%d %d]: ", n, n);
12949 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
12950 num_bad_ratio, num_tests, min_ratio, max_ratio);
12951 }
12952
12953 total_tests += num_tests;
12954 total_bad_ratios += num_bad_ratio;
12955 if (total_min_ratio > min_ratio)
12956 total_min_ratio = min_ratio;
12957 if (total_max_ratio < max_ratio)
12958 total_max_ratio = max_ratio;
12959 }
12960
12961 nr_routines++;
12962 if (total_bad_ratios == 0)
12963 printf("PASS> ");
12964 else {
12965 printf("FAIL> ");
12966 nr_failed_routines++;
12967 }
12968 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
12969 fname, total_bad_ratios, total_tests, max_ratio);
12970
12971 fname = "BLAS_cge_sum_mv_s_c";
12972 printf("Testing %s...\n", fname);
12973 total_tests = 0;
12974 total_bad_ratios = 0;
12975 total_min_ratio = 1e308;
12976 total_max_ratio = 0.0;
12977 for (i = 0; i < nsizes; i++) {
12978 m = n_data[i][0];
12979 n = n_data[i][1];
12980
12981 do_test_cge_sum_mv_s_c(m, n,
12982 ntests, &seed, thresh, debug,
12983 test_prob,
12984 &min_ratio, &max_ratio, &num_bad_ratio,
12985 &num_tests);
12986
12987 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
12988 printf(" [%d %d]: ", n, n);
12989 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
12990 num_bad_ratio, num_tests, min_ratio, max_ratio);
12991 }
12992
12993 total_tests += num_tests;
12994 total_bad_ratios += num_bad_ratio;
12995 if (total_min_ratio > min_ratio)
12996 total_min_ratio = min_ratio;
12997 if (total_max_ratio < max_ratio)
12998 total_max_ratio = max_ratio;
12999 }
13000
13001 nr_routines++;
13002 if (total_bad_ratios == 0)
13003 printf("PASS> ");
13004 else {
13005 printf("FAIL> ");
13006 nr_failed_routines++;
13007 }
13008 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
13009 fname, total_bad_ratios, total_tests, max_ratio);
13010
13011 fname = "BLAS_cge_sum_mv_s_s";
13012 printf("Testing %s...\n", fname);
13013 total_tests = 0;
13014 total_bad_ratios = 0;
13015 total_min_ratio = 1e308;
13016 total_max_ratio = 0.0;
13017 for (i = 0; i < nsizes; i++) {
13018 m = n_data[i][0];
13019 n = n_data[i][1];
13020
13021 do_test_cge_sum_mv_s_s(m, n,
13022 ntests, &seed, thresh, debug,
13023 test_prob,
13024 &min_ratio, &max_ratio, &num_bad_ratio,
13025 &num_tests);
13026
13027 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
13028 printf(" [%d %d]: ", n, n);
13029 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
13030 num_bad_ratio, num_tests, min_ratio, max_ratio);
13031 }
13032
13033 total_tests += num_tests;
13034 total_bad_ratios += num_bad_ratio;
13035 if (total_min_ratio > min_ratio)
13036 total_min_ratio = min_ratio;
13037 if (total_max_ratio < max_ratio)
13038 total_max_ratio = max_ratio;
13039 }
13040
13041 nr_routines++;
13042 if (total_bad_ratios == 0)
13043 printf("PASS> ");
13044 else {
13045 printf("FAIL> ");
13046 nr_failed_routines++;
13047 }
13048 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
13049 fname, total_bad_ratios, total_tests, max_ratio);
13050
13051 fname = "BLAS_zge_sum_mv_z_d";
13052 printf("Testing %s...\n", fname);
13053 total_tests = 0;
13054 total_bad_ratios = 0;
13055 total_min_ratio = 1e308;
13056 total_max_ratio = 0.0;
13057 for (i = 0; i < nsizes; i++) {
13058 m = n_data[i][0];
13059 n = n_data[i][1];
13060
13061 do_test_zge_sum_mv_z_d(m, n,
13062 ntests, &seed, thresh, debug,
13063 test_prob,
13064 &min_ratio, &max_ratio, &num_bad_ratio,
13065 &num_tests);
13066
13067 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
13068 printf(" [%d %d]: ", n, n);
13069 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
13070 num_bad_ratio, num_tests, min_ratio, max_ratio);
13071 }
13072
13073 total_tests += num_tests;
13074 total_bad_ratios += num_bad_ratio;
13075 if (total_min_ratio > min_ratio)
13076 total_min_ratio = min_ratio;
13077 if (total_max_ratio < max_ratio)
13078 total_max_ratio = max_ratio;
13079 }
13080
13081 nr_routines++;
13082 if (total_bad_ratios == 0)
13083 printf("PASS> ");
13084 else {
13085 printf("FAIL> ");
13086 nr_failed_routines++;
13087 }
13088 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
13089 fname, total_bad_ratios, total_tests, max_ratio);
13090
13091 fname = "BLAS_zge_sum_mv_d_z";
13092 printf("Testing %s...\n", fname);
13093 total_tests = 0;
13094 total_bad_ratios = 0;
13095 total_min_ratio = 1e308;
13096 total_max_ratio = 0.0;
13097 for (i = 0; i < nsizes; i++) {
13098 m = n_data[i][0];
13099 n = n_data[i][1];
13100
13101 do_test_zge_sum_mv_d_z(m, n,
13102 ntests, &seed, thresh, debug,
13103 test_prob,
13104 &min_ratio, &max_ratio, &num_bad_ratio,
13105 &num_tests);
13106
13107 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
13108 printf(" [%d %d]: ", n, n);
13109 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
13110 num_bad_ratio, num_tests, min_ratio, max_ratio);
13111 }
13112
13113 total_tests += num_tests;
13114 total_bad_ratios += num_bad_ratio;
13115 if (total_min_ratio > min_ratio)
13116 total_min_ratio = min_ratio;
13117 if (total_max_ratio < max_ratio)
13118 total_max_ratio = max_ratio;
13119 }
13120
13121 nr_routines++;
13122 if (total_bad_ratios == 0)
13123 printf("PASS> ");
13124 else {
13125 printf("FAIL> ");
13126 nr_failed_routines++;
13127 }
13128 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
13129 fname, total_bad_ratios, total_tests, max_ratio);
13130
13131 fname = "BLAS_zge_sum_mv_d_d";
13132 printf("Testing %s...\n", fname);
13133 total_tests = 0;
13134 total_bad_ratios = 0;
13135 total_min_ratio = 1e308;
13136 total_max_ratio = 0.0;
13137 for (i = 0; i < nsizes; i++) {
13138 m = n_data[i][0];
13139 n = n_data[i][1];
13140
13141 do_test_zge_sum_mv_d_d(m, n,
13142 ntests, &seed, thresh, debug,
13143 test_prob,
13144 &min_ratio, &max_ratio, &num_bad_ratio,
13145 &num_tests);
13146
13147 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
13148 printf(" [%d %d]: ", n, n);
13149 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
13150 num_bad_ratio, num_tests, min_ratio, max_ratio);
13151 }
13152
13153 total_tests += num_tests;
13154 total_bad_ratios += num_bad_ratio;
13155 if (total_min_ratio > min_ratio)
13156 total_min_ratio = min_ratio;
13157 if (total_max_ratio < max_ratio)
13158 total_max_ratio = max_ratio;
13159 }
13160
13161 nr_routines++;
13162 if (total_bad_ratios == 0)
13163 printf("PASS> ");
13164 else {
13165 printf("FAIL> ");
13166 nr_failed_routines++;
13167 }
13168 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
13169 fname, total_bad_ratios, total_tests, max_ratio);
13170
13171 fname = "BLAS_sge_sum_mv_x";
13172 printf("Testing %s...\n", fname);
13173 total_tests = 0;
13174 total_bad_ratios = 0;
13175 total_min_ratio = 1e308;
13176 total_max_ratio = 0.0;
13177 for (i = 0; i < nsizes; i++) {
13178 m = n_data[i][0];
13179 n = n_data[i][1];
13180
13181 do_test_sge_sum_mv_x(m, n,
13182 ntests, &seed, thresh, debug,
13183 test_prob,
13184 &min_ratio, &max_ratio, &num_bad_ratio, &num_tests);
13185
13186 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
13187 printf(" [%d %d]: ", n, n);
13188 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
13189 num_bad_ratio, num_tests, min_ratio, max_ratio);
13190 }
13191
13192 total_tests += num_tests;
13193 total_bad_ratios += num_bad_ratio;
13194 if (total_min_ratio > min_ratio)
13195 total_min_ratio = min_ratio;
13196 if (total_max_ratio < max_ratio)
13197 total_max_ratio = max_ratio;
13198 }
13199
13200 nr_routines++;
13201 if (total_bad_ratios == 0)
13202 printf("PASS> ");
13203 else {
13204 printf("FAIL> ");
13205 nr_failed_routines++;
13206 }
13207 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
13208 fname, total_bad_ratios, total_tests, max_ratio);
13209
13210 fname = "BLAS_dge_sum_mv_x";
13211 printf("Testing %s...\n", fname);
13212 total_tests = 0;
13213 total_bad_ratios = 0;
13214 total_min_ratio = 1e308;
13215 total_max_ratio = 0.0;
13216 for (i = 0; i < nsizes; i++) {
13217 m = n_data[i][0];
13218 n = n_data[i][1];
13219
13220 do_test_dge_sum_mv_x(m, n,
13221 ntests, &seed, thresh, debug,
13222 test_prob,
13223 &min_ratio, &max_ratio, &num_bad_ratio, &num_tests);
13224
13225 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
13226 printf(" [%d %d]: ", n, n);
13227 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
13228 num_bad_ratio, num_tests, min_ratio, max_ratio);
13229 }
13230
13231 total_tests += num_tests;
13232 total_bad_ratios += num_bad_ratio;
13233 if (total_min_ratio > min_ratio)
13234 total_min_ratio = min_ratio;
13235 if (total_max_ratio < max_ratio)
13236 total_max_ratio = max_ratio;
13237 }
13238
13239 nr_routines++;
13240 if (total_bad_ratios == 0)
13241 printf("PASS> ");
13242 else {
13243 printf("FAIL> ");
13244 nr_failed_routines++;
13245 }
13246 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
13247 fname, total_bad_ratios, total_tests, max_ratio);
13248
13249 fname = "BLAS_cge_sum_mv_x";
13250 printf("Testing %s...\n", fname);
13251 total_tests = 0;
13252 total_bad_ratios = 0;
13253 total_min_ratio = 1e308;
13254 total_max_ratio = 0.0;
13255 for (i = 0; i < nsizes; i++) {
13256 m = n_data[i][0];
13257 n = n_data[i][1];
13258
13259 do_test_cge_sum_mv_x(m, n,
13260 ntests, &seed, thresh, debug,
13261 test_prob,
13262 &min_ratio, &max_ratio, &num_bad_ratio, &num_tests);
13263
13264 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
13265 printf(" [%d %d]: ", n, n);
13266 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
13267 num_bad_ratio, num_tests, min_ratio, max_ratio);
13268 }
13269
13270 total_tests += num_tests;
13271 total_bad_ratios += num_bad_ratio;
13272 if (total_min_ratio > min_ratio)
13273 total_min_ratio = min_ratio;
13274 if (total_max_ratio < max_ratio)
13275 total_max_ratio = max_ratio;
13276 }
13277
13278 nr_routines++;
13279 if (total_bad_ratios == 0)
13280 printf("PASS> ");
13281 else {
13282 printf("FAIL> ");
13283 nr_failed_routines++;
13284 }
13285 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
13286 fname, total_bad_ratios, total_tests, max_ratio);
13287
13288 fname = "BLAS_zge_sum_mv_x";
13289 printf("Testing %s...\n", fname);
13290 total_tests = 0;
13291 total_bad_ratios = 0;
13292 total_min_ratio = 1e308;
13293 total_max_ratio = 0.0;
13294 for (i = 0; i < nsizes; i++) {
13295 m = n_data[i][0];
13296 n = n_data[i][1];
13297
13298 do_test_zge_sum_mv_x(m, n,
13299 ntests, &seed, thresh, debug,
13300 test_prob,
13301 &min_ratio, &max_ratio, &num_bad_ratio, &num_tests);
13302
13303 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
13304 printf(" [%d %d]: ", n, n);
13305 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
13306 num_bad_ratio, num_tests, min_ratio, max_ratio);
13307 }
13308
13309 total_tests += num_tests;
13310 total_bad_ratios += num_bad_ratio;
13311 if (total_min_ratio > min_ratio)
13312 total_min_ratio = min_ratio;
13313 if (total_max_ratio < max_ratio)
13314 total_max_ratio = max_ratio;
13315 }
13316
13317 nr_routines++;
13318 if (total_bad_ratios == 0)
13319 printf("PASS> ");
13320 else {
13321 printf("FAIL> ");
13322 nr_failed_routines++;
13323 }
13324 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
13325 fname, total_bad_ratios, total_tests, max_ratio);
13326
13327 fname = "BLAS_dge_sum_mv_d_s_x";
13328 printf("Testing %s...\n", fname);
13329 total_tests = 0;
13330 total_bad_ratios = 0;
13331 total_min_ratio = 1e308;
13332 total_max_ratio = 0.0;
13333 for (i = 0; i < nsizes; i++) {
13334 m = n_data[i][0];
13335 n = n_data[i][1];
13336
13337 do_test_dge_sum_mv_d_s_x(m, n,
13338 ntests, &seed, thresh, debug,
13339 test_prob,
13340 &min_ratio, &max_ratio, &num_bad_ratio,
13341 &num_tests);
13342
13343 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
13344 printf(" [%d %d]: ", n, n);
13345 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
13346 num_bad_ratio, num_tests, min_ratio, max_ratio);
13347 }
13348
13349 total_tests += num_tests;
13350 total_bad_ratios += num_bad_ratio;
13351 if (total_min_ratio > min_ratio)
13352 total_min_ratio = min_ratio;
13353 if (total_max_ratio < max_ratio)
13354 total_max_ratio = max_ratio;
13355 }
13356
13357 nr_routines++;
13358 if (total_bad_ratios == 0)
13359 printf("PASS> ");
13360 else {
13361 printf("FAIL> ");
13362 nr_failed_routines++;
13363 }
13364 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
13365 fname, total_bad_ratios, total_tests, max_ratio);
13366
13367 fname = "BLAS_dge_sum_mv_s_d_x";
13368 printf("Testing %s...\n", fname);
13369 total_tests = 0;
13370 total_bad_ratios = 0;
13371 total_min_ratio = 1e308;
13372 total_max_ratio = 0.0;
13373 for (i = 0; i < nsizes; i++) {
13374 m = n_data[i][0];
13375 n = n_data[i][1];
13376
13377 do_test_dge_sum_mv_s_d_x(m, n,
13378 ntests, &seed, thresh, debug,
13379 test_prob,
13380 &min_ratio, &max_ratio, &num_bad_ratio,
13381 &num_tests);
13382
13383 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
13384 printf(" [%d %d]: ", n, n);
13385 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
13386 num_bad_ratio, num_tests, min_ratio, max_ratio);
13387 }
13388
13389 total_tests += num_tests;
13390 total_bad_ratios += num_bad_ratio;
13391 if (total_min_ratio > min_ratio)
13392 total_min_ratio = min_ratio;
13393 if (total_max_ratio < max_ratio)
13394 total_max_ratio = max_ratio;
13395 }
13396
13397 nr_routines++;
13398 if (total_bad_ratios == 0)
13399 printf("PASS> ");
13400 else {
13401 printf("FAIL> ");
13402 nr_failed_routines++;
13403 }
13404 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
13405 fname, total_bad_ratios, total_tests, max_ratio);
13406
13407 fname = "BLAS_dge_sum_mv_s_s_x";
13408 printf("Testing %s...\n", fname);
13409 total_tests = 0;
13410 total_bad_ratios = 0;
13411 total_min_ratio = 1e308;
13412 total_max_ratio = 0.0;
13413 for (i = 0; i < nsizes; i++) {
13414 m = n_data[i][0];
13415 n = n_data[i][1];
13416
13417 do_test_dge_sum_mv_s_s_x(m, n,
13418 ntests, &seed, thresh, debug,
13419 test_prob,
13420 &min_ratio, &max_ratio, &num_bad_ratio,
13421 &num_tests);
13422
13423 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
13424 printf(" [%d %d]: ", n, n);
13425 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
13426 num_bad_ratio, num_tests, min_ratio, max_ratio);
13427 }
13428
13429 total_tests += num_tests;
13430 total_bad_ratios += num_bad_ratio;
13431 if (total_min_ratio > min_ratio)
13432 total_min_ratio = min_ratio;
13433 if (total_max_ratio < max_ratio)
13434 total_max_ratio = max_ratio;
13435 }
13436
13437 nr_routines++;
13438 if (total_bad_ratios == 0)
13439 printf("PASS> ");
13440 else {
13441 printf("FAIL> ");
13442 nr_failed_routines++;
13443 }
13444 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
13445 fname, total_bad_ratios, total_tests, max_ratio);
13446
13447 fname = "BLAS_zge_sum_mv_z_c_x";
13448 printf("Testing %s...\n", fname);
13449 total_tests = 0;
13450 total_bad_ratios = 0;
13451 total_min_ratio = 1e308;
13452 total_max_ratio = 0.0;
13453 for (i = 0; i < nsizes; i++) {
13454 m = n_data[i][0];
13455 n = n_data[i][1];
13456
13457 do_test_zge_sum_mv_z_c_x(m, n,
13458 ntests, &seed, thresh, debug,
13459 test_prob,
13460 &min_ratio, &max_ratio, &num_bad_ratio,
13461 &num_tests);
13462
13463 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
13464 printf(" [%d %d]: ", n, n);
13465 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
13466 num_bad_ratio, num_tests, min_ratio, max_ratio);
13467 }
13468
13469 total_tests += num_tests;
13470 total_bad_ratios += num_bad_ratio;
13471 if (total_min_ratio > min_ratio)
13472 total_min_ratio = min_ratio;
13473 if (total_max_ratio < max_ratio)
13474 total_max_ratio = max_ratio;
13475 }
13476
13477 nr_routines++;
13478 if (total_bad_ratios == 0)
13479 printf("PASS> ");
13480 else {
13481 printf("FAIL> ");
13482 nr_failed_routines++;
13483 }
13484 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
13485 fname, total_bad_ratios, total_tests, max_ratio);
13486
13487 fname = "BLAS_zge_sum_mv_c_z_x";
13488 printf("Testing %s...\n", fname);
13489 total_tests = 0;
13490 total_bad_ratios = 0;
13491 total_min_ratio = 1e308;
13492 total_max_ratio = 0.0;
13493 for (i = 0; i < nsizes; i++) {
13494 m = n_data[i][0];
13495 n = n_data[i][1];
13496
13497 do_test_zge_sum_mv_c_z_x(m, n,
13498 ntests, &seed, thresh, debug,
13499 test_prob,
13500 &min_ratio, &max_ratio, &num_bad_ratio,
13501 &num_tests);
13502
13503 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
13504 printf(" [%d %d]: ", n, n);
13505 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
13506 num_bad_ratio, num_tests, min_ratio, max_ratio);
13507 }
13508
13509 total_tests += num_tests;
13510 total_bad_ratios += num_bad_ratio;
13511 if (total_min_ratio > min_ratio)
13512 total_min_ratio = min_ratio;
13513 if (total_max_ratio < max_ratio)
13514 total_max_ratio = max_ratio;
13515 }
13516
13517 nr_routines++;
13518 if (total_bad_ratios == 0)
13519 printf("PASS> ");
13520 else {
13521 printf("FAIL> ");
13522 nr_failed_routines++;
13523 }
13524 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
13525 fname, total_bad_ratios, total_tests, max_ratio);
13526
13527 fname = "BLAS_zge_sum_mv_c_c_x";
13528 printf("Testing %s...\n", fname);
13529 total_tests = 0;
13530 total_bad_ratios = 0;
13531 total_min_ratio = 1e308;
13532 total_max_ratio = 0.0;
13533 for (i = 0; i < nsizes; i++) {
13534 m = n_data[i][0];
13535 n = n_data[i][1];
13536
13537 do_test_zge_sum_mv_c_c_x(m, n,
13538 ntests, &seed, thresh, debug,
13539 test_prob,
13540 &min_ratio, &max_ratio, &num_bad_ratio,
13541 &num_tests);
13542
13543 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
13544 printf(" [%d %d]: ", n, n);
13545 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
13546 num_bad_ratio, num_tests, min_ratio, max_ratio);
13547 }
13548
13549 total_tests += num_tests;
13550 total_bad_ratios += num_bad_ratio;
13551 if (total_min_ratio > min_ratio)
13552 total_min_ratio = min_ratio;
13553 if (total_max_ratio < max_ratio)
13554 total_max_ratio = max_ratio;
13555 }
13556
13557 nr_routines++;
13558 if (total_bad_ratios == 0)
13559 printf("PASS> ");
13560 else {
13561 printf("FAIL> ");
13562 nr_failed_routines++;
13563 }
13564 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
13565 fname, total_bad_ratios, total_tests, max_ratio);
13566
13567 fname = "BLAS_cge_sum_mv_c_s_x";
13568 printf("Testing %s...\n", fname);
13569 total_tests = 0;
13570 total_bad_ratios = 0;
13571 total_min_ratio = 1e308;
13572 total_max_ratio = 0.0;
13573 for (i = 0; i < nsizes; i++) {
13574 m = n_data[i][0];
13575 n = n_data[i][1];
13576
13577 do_test_cge_sum_mv_c_s_x(m, n,
13578 ntests, &seed, thresh, debug,
13579 test_prob,
13580 &min_ratio, &max_ratio, &num_bad_ratio,
13581 &num_tests);
13582
13583 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
13584 printf(" [%d %d]: ", n, n);
13585 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
13586 num_bad_ratio, num_tests, min_ratio, max_ratio);
13587 }
13588
13589 total_tests += num_tests;
13590 total_bad_ratios += num_bad_ratio;
13591 if (total_min_ratio > min_ratio)
13592 total_min_ratio = min_ratio;
13593 if (total_max_ratio < max_ratio)
13594 total_max_ratio = max_ratio;
13595 }
13596
13597 nr_routines++;
13598 if (total_bad_ratios == 0)
13599 printf("PASS> ");
13600 else {
13601 printf("FAIL> ");
13602 nr_failed_routines++;
13603 }
13604 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
13605 fname, total_bad_ratios, total_tests, max_ratio);
13606
13607 fname = "BLAS_cge_sum_mv_s_c_x";
13608 printf("Testing %s...\n", fname);
13609 total_tests = 0;
13610 total_bad_ratios = 0;
13611 total_min_ratio = 1e308;
13612 total_max_ratio = 0.0;
13613 for (i = 0; i < nsizes; i++) {
13614 m = n_data[i][0];
13615 n = n_data[i][1];
13616
13617 do_test_cge_sum_mv_s_c_x(m, n,
13618 ntests, &seed, thresh, debug,
13619 test_prob,
13620 &min_ratio, &max_ratio, &num_bad_ratio,
13621 &num_tests);
13622
13623 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
13624 printf(" [%d %d]: ", n, n);
13625 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
13626 num_bad_ratio, num_tests, min_ratio, max_ratio);
13627 }
13628
13629 total_tests += num_tests;
13630 total_bad_ratios += num_bad_ratio;
13631 if (total_min_ratio > min_ratio)
13632 total_min_ratio = min_ratio;
13633 if (total_max_ratio < max_ratio)
13634 total_max_ratio = max_ratio;
13635 }
13636
13637 nr_routines++;
13638 if (total_bad_ratios == 0)
13639 printf("PASS> ");
13640 else {
13641 printf("FAIL> ");
13642 nr_failed_routines++;
13643 }
13644 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
13645 fname, total_bad_ratios, total_tests, max_ratio);
13646
13647 fname = "BLAS_cge_sum_mv_s_s_x";
13648 printf("Testing %s...\n", fname);
13649 total_tests = 0;
13650 total_bad_ratios = 0;
13651 total_min_ratio = 1e308;
13652 total_max_ratio = 0.0;
13653 for (i = 0; i < nsizes; i++) {
13654 m = n_data[i][0];
13655 n = n_data[i][1];
13656
13657 do_test_cge_sum_mv_s_s_x(m, n,
13658 ntests, &seed, thresh, debug,
13659 test_prob,
13660 &min_ratio, &max_ratio, &num_bad_ratio,
13661 &num_tests);
13662
13663 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
13664 printf(" [%d %d]: ", n, n);
13665 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
13666 num_bad_ratio, num_tests, min_ratio, max_ratio);
13667 }
13668
13669 total_tests += num_tests;
13670 total_bad_ratios += num_bad_ratio;
13671 if (total_min_ratio > min_ratio)
13672 total_min_ratio = min_ratio;
13673 if (total_max_ratio < max_ratio)
13674 total_max_ratio = max_ratio;
13675 }
13676
13677 nr_routines++;
13678 if (total_bad_ratios == 0)
13679 printf("PASS> ");
13680 else {
13681 printf("FAIL> ");
13682 nr_failed_routines++;
13683 }
13684 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
13685 fname, total_bad_ratios, total_tests, max_ratio);
13686
13687 fname = "BLAS_zge_sum_mv_z_d_x";
13688 printf("Testing %s...\n", fname);
13689 total_tests = 0;
13690 total_bad_ratios = 0;
13691 total_min_ratio = 1e308;
13692 total_max_ratio = 0.0;
13693 for (i = 0; i < nsizes; i++) {
13694 m = n_data[i][0];
13695 n = n_data[i][1];
13696
13697 do_test_zge_sum_mv_z_d_x(m, n,
13698 ntests, &seed, thresh, debug,
13699 test_prob,
13700 &min_ratio, &max_ratio, &num_bad_ratio,
13701 &num_tests);
13702
13703 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
13704 printf(" [%d %d]: ", n, n);
13705 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
13706 num_bad_ratio, num_tests, min_ratio, max_ratio);
13707 }
13708
13709 total_tests += num_tests;
13710 total_bad_ratios += num_bad_ratio;
13711 if (total_min_ratio > min_ratio)
13712 total_min_ratio = min_ratio;
13713 if (total_max_ratio < max_ratio)
13714 total_max_ratio = max_ratio;
13715 }
13716
13717 nr_routines++;
13718 if (total_bad_ratios == 0)
13719 printf("PASS> ");
13720 else {
13721 printf("FAIL> ");
13722 nr_failed_routines++;
13723 }
13724 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
13725 fname, total_bad_ratios, total_tests, max_ratio);
13726
13727 fname = "BLAS_zge_sum_mv_d_z_x";
13728 printf("Testing %s...\n", fname);
13729 total_tests = 0;
13730 total_bad_ratios = 0;
13731 total_min_ratio = 1e308;
13732 total_max_ratio = 0.0;
13733 for (i = 0; i < nsizes; i++) {
13734 m = n_data[i][0];
13735 n = n_data[i][1];
13736
13737 do_test_zge_sum_mv_d_z_x(m, n,
13738 ntests, &seed, thresh, debug,
13739 test_prob,
13740 &min_ratio, &max_ratio, &num_bad_ratio,
13741 &num_tests);
13742
13743 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
13744 printf(" [%d %d]: ", n, n);
13745 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
13746 num_bad_ratio, num_tests, min_ratio, max_ratio);
13747 }
13748
13749 total_tests += num_tests;
13750 total_bad_ratios += num_bad_ratio;
13751 if (total_min_ratio > min_ratio)
13752 total_min_ratio = min_ratio;
13753 if (total_max_ratio < max_ratio)
13754 total_max_ratio = max_ratio;
13755 }
13756
13757 nr_routines++;
13758 if (total_bad_ratios == 0)
13759 printf("PASS> ");
13760 else {
13761 printf("FAIL> ");
13762 nr_failed_routines++;
13763 }
13764 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
13765 fname, total_bad_ratios, total_tests, max_ratio);
13766
13767 fname = "BLAS_zge_sum_mv_d_d_x";
13768 printf("Testing %s...\n", fname);
13769 total_tests = 0;
13770 total_bad_ratios = 0;
13771 total_min_ratio = 1e308;
13772 total_max_ratio = 0.0;
13773 for (i = 0; i < nsizes; i++) {
13774 m = n_data[i][0];
13775 n = n_data[i][1];
13776
13777 do_test_zge_sum_mv_d_d_x(m, n,
13778 ntests, &seed, thresh, debug,
13779 test_prob,
13780 &min_ratio, &max_ratio, &num_bad_ratio,
13781 &num_tests);
13782
13783 if (debug == 2 || (debug == 1 && num_bad_ratio > 0)) {
13784 printf(" [%d %d]: ", n, n);
13785 printf("bad/total = %d/%d, min_ratio = %g, max_ratio = %g\n",
13786 num_bad_ratio, num_tests, min_ratio, max_ratio);
13787 }
13788
13789 total_tests += num_tests;
13790 total_bad_ratios += num_bad_ratio;
13791 if (total_min_ratio > min_ratio)
13792 total_min_ratio = min_ratio;
13793 if (total_max_ratio < max_ratio)
13794 total_max_ratio = max_ratio;
13795 }
13796
13797 nr_routines++;
13798 if (total_bad_ratios == 0)
13799 printf("PASS> ");
13800 else {
13801 printf("FAIL> ");
13802 nr_failed_routines++;
13803 }
13804 printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
13805 fname, total_bad_ratios, total_tests, max_ratio);
13806
13807
13808
13809 printf("\n");
13810 if (nr_failed_routines)
13811 printf("FAILED ");
13812 else
13813 printf("PASSED ");
13814 printf("%-10s: FAIL/TOTAL = %d/%d\n",
13815 base_routine, nr_failed_routines, nr_routines);
13816
13817 return 0;
13818 }
13819