1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <math.h>
4 #include "blas_extended.h"
5 #include "blas_extended_private.h"
6 #include "blas_extended_test.h"
7 
do_test_dgemv2_d_s(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)8 double do_test_dgemv2_d_s(int m, int n, int ntests, int *seed, double thresh,
9 			  int debug, float test_prob, double *min_ratio,
10 			  int *num_bad_ratio, int *num_tests)
11 
12 /*
13  * Purpose
14  * =======
15  *
16  * Runs a series of tests on GEMV2.
17  *
18  * Arguments
19  * =========
20  *
21  * m         (input) int
22  *           The number of rows
23  *
24  * n         (input) int
25  *           The number of columns
26  *
27  * ntests    (input) int
28  *           The number of tests to run for each set of attributes.
29  *
30  * seed      (input/output) int
31  *           The seed for the random number generator used in testgen().
32  *
33  * thresh    (input) double
34  *           When the ratio returned from test() exceeds the specified
35  *           threshold, the current size, r_true, r_comp, and ratio will be
36  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
37  *           to ~10.)
38  *
39  * debug     (input) int
40  *           If debug=3, print summary
41  *           If debug=2, print summary only if the number of bad ratios > 0
42  *           If debug=1, print complete info if tests fail
43  *           If debug=0, return max ratio
44  *
45  * test_prob (input) float
46  *           The specified test will be performed only if the generated
47  *           random exceeds this threshold.
48  *
49  * min_ratio (output) double
50  *           The minimum ratio
51  *
52  * num_bad_ratio (output) int
53  *               The number of tests fail; they are above the threshold.
54  *
55  * num_tests (output) int
56  *           The number of tests is being performed.
57  *
58  * Return value
59  * ============
60  *
61  * The maximum ratio if run successfully, otherwise return -1
62  *
63  * Code structure
64  * ==============
65  *
66  *  debug loop  -- if debug is one, the first loop computes the max ratio
67  *              -- and the last(second) loop outputs debugging information,
68  *              -- if the test fail and its ratio > 0.5 * max ratio.
69  *              -- if debug is zero, the loop is executed once
70  *    alpha loop  -- varying alpha: 0, 1, or random
71  *      beta loop   -- varying beta: 0, 1, or random
72 
73  *          norm loop   -- varying norm: near undeflow, near one, or
74  *                        -- near overflow
75  *            numtest loop  -- how many times the test is perform with
76  *                            -- above set of attributes
77  *              order loop   -- varying order type: rowmajor or colmajor
78  *                trans loop    -- varying uplo type: upper or lower
79  *                  lda loop      -- varying lda: m, m+1, 2m
80  *                    incx loop     -- varying incx: -2, -1, 1, 2
81  *                      incy loop     -- varying incy: -2, -1, 1, 2
82  */
83 {
84   /* function name */
85   const char fname[] = "BLAS_dgemv2_d_s";
86 
87   /* max number of debug lines to print */
88   const int max_print = 8;
89 
90   /* Variables in the "x_val" form are loop vars for corresponding
91      variables */
92   int i;			/* iterate through the repeating tests */
93   int j, k;			/* multipurpose counters or variables */
94   int iy;			/* use to index y */
95   int incx_val, incy_val,	/* for testing different inc values */
96     incx, incy;
97   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
98   int d_count;			/* counter for debug */
99   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
100   int p_count;			/* counter for the number of debug lines printed */
101   int tot_tests;		/* total number of tests to be done */
102   int norm;			/* input values of near underflow/one/overflow */
103   double ratio_max;		/* the current maximum ratio */
104   double ratio_min;		/* the current minimum ratio */
105   double *ratios;		/* a temporary variable for calculating ratio */
106   double ratio;			/* the per-use test ratio from test() */
107   int bad_ratios;		/* the number of ratios over the threshold */
108   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
109   double un_int;		/* the internal underflow threshold */
110   double alpha;
111   double beta;
112   double *A;
113   float *head_x;
114   float *tail_x;
115   double *y;
116   double *temp;			/* use for calculating ratio */
117 
118   /* x_gen and y_gen are used to store vectors generated by testgen.
119      they eventually are copied back to x and y */
120   float *head_x_gen;
121   float *tail_x_gen;
122   double *y_gen;
123 
124   /* the true r calculated by testgen(), in double-double */
125   double *head_r_true, *tail_r_true;
126   int alpha_val;
127   int alpha_flag;		/* input flag for BLAS_dgemv2_d_s_testgen */
128   int beta_val;
129   int beta_flag;		/* input flag for BLAS_dgemv2_d_s_testgen */
130   int order_val;
131   enum blas_order_type order_type;
132 
133   enum blas_prec_type prec;
134   int trans_val;
135   enum blas_trans_type trans_type;
136   int m_i;
137   int n_i;
138   int max_mn;			/* the max of m and n */
139   int lda_val;
140   int lda;
141   int saved_seed;		/* for saving the original seed */
142   int count, old_count;		/* use for counting the number of testgen calls * 2 */
143 
144   FPU_FIX_DECL;
145 
146   /* test for bad arguments */
147   if (n < 0 || m < 0 || ntests < 0)
148     BLAS_error(fname, 0, 0, NULL);
149 
150   /* initialization */
151   *num_bad_ratio = 0;
152   *num_tests = 0;
153   *min_ratio = 0.0;
154 
155   saved_seed = *seed;
156   ratio_min = 1e308;
157   ratio_max = 0.0;
158   ratio = 0.0;
159   tot_tests = 0;
160   p_count = 0;
161   count = 0;
162   find_max_ratio = 0;
163   bad_ratios = 0;
164   old_count = 0;
165 
166   if (debug == 3)
167     find_max_ratio = 1;
168   max_mn = MAX(m, n);
169   if (m == 0 || n == 0) {
170     return 0.0;
171   }
172 
173   FPU_FIX_START;
174 
175   incy_gen = 1;
176 
177 
178   /* get space for calculation */
179   head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
180   if (max_mn * 2 > 0 && head_x == NULL) {
181     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
182   }
183   tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
184   if (max_mn * 2 > 0 && tail_x == NULL) {
185     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
186   }
187   y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
188   if (max_mn * 2 > 0 && y == NULL) {
189     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
190   }
191   head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
192   if (max_mn > 0 && head_x_gen == NULL) {
193     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
194   }
195   tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
196   if (max_mn > 0 && tail_x_gen == NULL) {
197     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
198   }
199   y_gen = (double *) blas_malloc(max_mn * sizeof(double));
200   if (max_mn > 0 && y_gen == NULL) {
201     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
202   }
203   temp = (double *) blas_malloc(max_mn * sizeof(double));
204   if (max_mn > 0 && temp == NULL) {
205     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
206   }
207   head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
208   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
209   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
210     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
211   }
212   ratios = (double *) blas_malloc(max_mn * sizeof(double));
213   if (max_mn > 0 && ratios == NULL) {
214     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
215   }
216   A =
217     (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
218   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
219     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
220   }
221 
222   /* The debug iteration:
223      If debug=1, then will execute the iteration twice. First, compute the
224      max ratio. Second, print info if ratio > (50% * ratio_max). */
225   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
226     bad_ratios = 0;		/* set to zero */
227 
228     if ((debug == 3) && (d_count == find_max_ratio))
229       *seed = saved_seed;	/* restore the original seed */
230 
231     /* varying alpha */
232     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
233       alpha_flag = 0;
234       switch (alpha_val) {
235       case 0:
236 	alpha = 0.0;
237 	alpha_flag = 1;
238 	break;
239       case 1:
240 	alpha = 1.0;
241 	alpha_flag = 1;
242 	break;
243       }
244 
245       /* varying beta */
246       for (beta_val = 0; beta_val < 3; beta_val++) {
247 	beta_flag = 0;
248 	switch (beta_val) {
249 	case 0:
250 	  beta = 0.0;
251 	  beta_flag = 1;
252 	  break;
253 	case 1:
254 	  beta = 1.0;
255 	  beta_flag = 1;
256 	  break;
257 	}
258 
259 
260 	eps_int = power(2, -BITS_D);
261 	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
262 		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
263 	prec = blas_prec_double;
264 
265 	/* values near underflow, 1, or overflow */
266 	for (norm = -1; norm <= 1; norm++) {
267 
268 	  /* number of tests */
269 	  for (i = 0; i < ntests; i++) {
270 
271 	    /* row or col major */
272 	    for (order_val = 0; order_val < 2; order_val++) {
273 	      switch (order_val) {
274 	      case 0:
275 		order_type = blas_rowmajor;
276 		break;
277 	      case 1:
278 	      default:
279 		order_type = blas_colmajor;
280 		break;
281 	      }
282 
283 	      /* no_trans, trans, or conj_trans */
284 	      for (trans_val = 0; trans_val < 3; trans_val++) {
285 		switch (trans_val) {
286 		case 0:
287 		  trans_type = blas_no_trans;
288 		  m_i = m;
289 		  n_i = n;
290 		  break;
291 		case 1:
292 		  trans_type = blas_trans;
293 		  m_i = n;
294 		  n_i = m;
295 		  break;
296 		case 2:
297 		default:
298 		  trans_type = blas_conj_trans;
299 		  m_i = n;
300 		  n_i = m;
301 		  break;
302 		}
303 
304 		/* lda=n, n+1, or 2n */
305 		for (lda_val = 0; lda_val < 3; lda_val++) {
306 		  switch (lda_val) {
307 		  case 0:
308 		    lda = m_i;
309 		    break;
310 		  case 1:
311 		    lda = m_i + 1;
312 		    break;
313 		  case 2:
314 		  default:
315 		    lda = 2 * m_i;
316 		    break;
317 		  }
318 		  if ((order_type == blas_rowmajor && lda < n) ||
319 		      (order_type == blas_colmajor && lda < m))
320 		    continue;
321 
322 		  /* For the sake of speed, we throw out this case at random */
323 		  if (xrand(seed) >= test_prob)
324 		    continue;
325 
326 		  /* in the trivial cases, no need to run testgen */
327 		  if (m > 0 && n > 0)
328 		    BLAS_dgemv2_d_s_testgen(norm, order_type, trans_type, m,
329 					    n, &alpha, alpha_flag, A, lda,
330 					    head_x_gen, tail_x_gen, &beta,
331 					    beta_flag, y_gen, seed,
332 					    head_r_true, tail_r_true);
333 
334 		  count++;
335 
336 		  /* varying incx */
337 		  for (incx_val = -2; incx_val <= 2; incx_val++) {
338 		    if (incx_val == 0)
339 		      continue;
340 
341 		    /* setting incx */
342 		    incx = incx_val;
343 
344 
345 		    scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
346 		    scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
347 
348 		    /* varying incy */
349 		    for (incy_val = -2; incy_val <= 2; incy_val++) {
350 		      if (incy_val == 0)
351 			continue;
352 
353 		      /* setting incy */
354 		      incy = incy_val;
355 
356 
357 		      dcopy_vector(y_gen, m_i, 1, y, incy_val);
358 
359 		      /* call BLAS_dgemv2_d_s */
360 		      FPU_FIX_STOP;
361 		      BLAS_dgemv2_d_s(order_type, trans_type, m, n, alpha, A,
362 				      lda, head_x, tail_x, incx_val, beta, y,
363 				      incy_val);
364 		      FPU_FIX_START;
365 
366 		      /* set y starting index */
367 		      iy = 0;
368 		      if (incy < 0)
369 			iy = -(m_i - 1) * incy;
370 
371 		      /* computing the ratio */
372 		      if (m > 0 && n > 0)
373 			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
374 			  /* copy row j of A to temp */
375 			  dge_copy_row(order_type, trans_type, m_i, n_i, A,
376 				       lda, temp, j);
377 
378 			  test_BLAS_ddot2_d_s(n_i, blas_no_conj, alpha, beta,
379 					      y_gen[k], y[iy], head_r_true[k],
380 					      tail_r_true[k], temp, 1, head_x,
381 					      tail_x, incx_val, eps_int,
382 					      un_int, &ratios[j]);
383 
384 			  /* take the max ratio */
385 			  if (j == 0) {
386 			    ratio = ratios[0];
387 			    /* The !<= below causes NaN error to be detected.
388 			       Note that (NaN > thresh) is always false. */
389 			  } else if (!(ratios[j] <= ratio)) {
390 			    ratio = ratios[j];
391 			  }
392 			  iy += incy;
393 			}
394 
395 		      /* Increase the number of bad ratio, if the ratio
396 		         is bigger than the threshold.
397 		         The !<= below causes NaN error to be detected.
398 		         Note that (NaN > thresh) is always false. */
399 		      if (!(ratio <= thresh)) {
400 			bad_ratios++;
401 
402 			if ((debug == 3) &&	/* print only when debug is on */
403 			    (count != old_count) &&	/* print if old vector is different
404 							   from the current one */
405 			    (d_count == find_max_ratio) &&
406 			    (p_count <= max_print) &&
407 			    (ratio > 0.5 * ratio_max)) {
408 			  old_count = count;
409 
410 			  printf
411 			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
412 			     fname, m, n, ntests, thresh);
413 
414 			  /* Print test info */
415 			  switch (prec) {
416 			  case blas_prec_single:
417 			    printf("single ");
418 			    break;
419 			  case blas_prec_double:
420 			    printf("double ");
421 			    break;
422 			  case blas_prec_indigenous:
423 			    printf("indigenous ");
424 			    break;
425 			  case blas_prec_extra:
426 			    printf("extra ");
427 			    break;
428 			  }
429 			  switch (norm) {
430 			  case -1:
431 			    printf("near_underflow ");
432 			    break;
433 			  case 0:
434 			    printf("near_one ");
435 			    break;
436 			  case 1:
437 			    printf("near_overflow ");
438 			    break;
439 			  }
440 			  switch (order_type) {
441 			  case blas_rowmajor:
442 			    printf("row_major ");
443 			    break;
444 			  case blas_colmajor:
445 			    printf("col_major ");
446 			    break;
447 			  }
448 			  switch (trans_type) {
449 			  case blas_no_trans:
450 			    printf("no_trans ");
451 			    break;
452 			  case blas_trans:
453 			    printf("trans ");
454 			    break;
455 			  case blas_conj_trans:
456 			    printf("conj_trans ");
457 			    break;
458 			  }
459 
460 			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
461 				 incy);
462 
463 			  dge_print_matrix(A, m_i, n_i, lda, order_type, "A");
464 
465 			  sprint_vector(head_x, n_i, incx_val, "head_x");
466 			  sprint_vector(tail_x, n_i, incx_val, "tail_x");
467 			  dprint_vector(y_gen, m_i, 1, "y_gen");
468 			  dprint_vector(y, m_i, incy_val, "y_final");
469 
470 			  printf("      ");
471 			  printf("alpha = ");
472 			  printf("%24.16e", alpha);
473 			  printf("\n      ");
474 			  printf("beta = ");
475 			  printf("%24.16e", beta);
476 			  printf("\n");
477 			  for (j = 0, k = 0; j < m_i * incy_gen;
478 			       j += incy_gen, k++) {
479 			    printf("      ");
480 			    printf("[%24.16e, %24.16e]", head_r_true[j],
481 				   tail_r_true[j]);
482 			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
483 			  }
484 
485 			  printf("      ratio=%.4e\n", ratio);
486 			  p_count++;
487 			}
488 			if (bad_ratios >= MAX_BAD_TESTS) {
489 			  printf("\ntoo many failures, exiting....");
490 			  printf("\nTesting and compilation");
491 			  printf(" are incomplete\n\n");
492 			  goto end;
493 			}
494 			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
495 			  printf("\nFlagrant ratio error, exiting...");
496 			  printf("\nTesting and compilation");
497 			  printf(" are incomplete\n\n");
498 			  goto end;
499 			}
500 		      }
501 		      if (d_count == 0) {
502 			if (ratio > ratio_max)
503 			  ratio_max = ratio;
504 
505 			if (ratio != 0.0 && ratio < ratio_min)
506 			  ratio_min = ratio;
507 
508 			tot_tests++;
509 		      }
510 		    }		/* incy */
511 		  }		/* incx */
512 		}		/* lda */
513 	      }			/* trans */
514 	    }			/* order */
515 	  }			/* tests */
516 	}			/* norm */
517 
518       }				/* beta */
519     }				/* alpha */
520   }				/* debug */
521 
522   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
523     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
524 	   fname, m, n, ntests, thresh);
525     printf
526       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
527        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
528        ratio_min, ratio_max);
529   }
530 
531 end:
532   FPU_FIX_STOP;
533 
534   blas_free(head_x);
535   blas_free(tail_x);
536   blas_free(y);
537   blas_free(head_x_gen);
538   blas_free(tail_x_gen);
539   blas_free(y_gen);
540   blas_free(temp);
541   blas_free(A);
542   blas_free(head_r_true);
543   blas_free(tail_r_true);
544   blas_free(ratios);
545 
546   *min_ratio = ratio_min;
547   *num_bad_ratio = bad_ratios;
548   *num_tests = tot_tests;
549   return ratio_max;
550 }
do_test_dgemv2_s_d(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)551 double do_test_dgemv2_s_d(int m, int n, int ntests, int *seed, double thresh,
552 			  int debug, float test_prob, double *min_ratio,
553 			  int *num_bad_ratio, int *num_tests)
554 
555 /*
556  * Purpose
557  * =======
558  *
559  * Runs a series of tests on GEMV2.
560  *
561  * Arguments
562  * =========
563  *
564  * m         (input) int
565  *           The number of rows
566  *
567  * n         (input) int
568  *           The number of columns
569  *
570  * ntests    (input) int
571  *           The number of tests to run for each set of attributes.
572  *
573  * seed      (input/output) int
574  *           The seed for the random number generator used in testgen().
575  *
576  * thresh    (input) double
577  *           When the ratio returned from test() exceeds the specified
578  *           threshold, the current size, r_true, r_comp, and ratio will be
579  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
580  *           to ~10.)
581  *
582  * debug     (input) int
583  *           If debug=3, print summary
584  *           If debug=2, print summary only if the number of bad ratios > 0
585  *           If debug=1, print complete info if tests fail
586  *           If debug=0, return max ratio
587  *
588  * test_prob (input) float
589  *           The specified test will be performed only if the generated
590  *           random exceeds this threshold.
591  *
592  * min_ratio (output) double
593  *           The minimum ratio
594  *
595  * num_bad_ratio (output) int
596  *               The number of tests fail; they are above the threshold.
597  *
598  * num_tests (output) int
599  *           The number of tests is being performed.
600  *
601  * Return value
602  * ============
603  *
604  * The maximum ratio if run successfully, otherwise return -1
605  *
606  * Code structure
607  * ==============
608  *
609  *  debug loop  -- if debug is one, the first loop computes the max ratio
610  *              -- and the last(second) loop outputs debugging information,
611  *              -- if the test fail and its ratio > 0.5 * max ratio.
612  *              -- if debug is zero, the loop is executed once
613  *    alpha loop  -- varying alpha: 0, 1, or random
614  *      beta loop   -- varying beta: 0, 1, or random
615 
616  *          norm loop   -- varying norm: near undeflow, near one, or
617  *                        -- near overflow
618  *            numtest loop  -- how many times the test is perform with
619  *                            -- above set of attributes
620  *              order loop   -- varying order type: rowmajor or colmajor
621  *                trans loop    -- varying uplo type: upper or lower
622  *                  lda loop      -- varying lda: m, m+1, 2m
623  *                    incx loop     -- varying incx: -2, -1, 1, 2
624  *                      incy loop     -- varying incy: -2, -1, 1, 2
625  */
626 {
627   /* function name */
628   const char fname[] = "BLAS_dgemv2_s_d";
629 
630   /* max number of debug lines to print */
631   const int max_print = 8;
632 
633   /* Variables in the "x_val" form are loop vars for corresponding
634      variables */
635   int i;			/* iterate through the repeating tests */
636   int j, k;			/* multipurpose counters or variables */
637   int iy;			/* use to index y */
638   int incx_val, incy_val,	/* for testing different inc values */
639     incx, incy;
640   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
641   int d_count;			/* counter for debug */
642   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
643   int p_count;			/* counter for the number of debug lines printed */
644   int tot_tests;		/* total number of tests to be done */
645   int norm;			/* input values of near underflow/one/overflow */
646   double ratio_max;		/* the current maximum ratio */
647   double ratio_min;		/* the current minimum ratio */
648   double *ratios;		/* a temporary variable for calculating ratio */
649   double ratio;			/* the per-use test ratio from test() */
650   int bad_ratios;		/* the number of ratios over the threshold */
651   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
652   double un_int;		/* the internal underflow threshold */
653   double alpha;
654   double beta;
655   float *A;
656   double *head_x;
657   double *tail_x;
658   double *y;
659   float *temp;			/* use for calculating ratio */
660 
661   /* x_gen and y_gen are used to store vectors generated by testgen.
662      they eventually are copied back to x and y */
663   double *head_x_gen;
664   double *tail_x_gen;
665   double *y_gen;
666 
667   /* the true r calculated by testgen(), in double-double */
668   double *head_r_true, *tail_r_true;
669   int alpha_val;
670   int alpha_flag;		/* input flag for BLAS_dgemv2_s_d_testgen */
671   int beta_val;
672   int beta_flag;		/* input flag for BLAS_dgemv2_s_d_testgen */
673   int order_val;
674   enum blas_order_type order_type;
675 
676   enum blas_prec_type prec;
677   int trans_val;
678   enum blas_trans_type trans_type;
679   int m_i;
680   int n_i;
681   int max_mn;			/* the max of m and n */
682   int lda_val;
683   int lda;
684   int saved_seed;		/* for saving the original seed */
685   int count, old_count;		/* use for counting the number of testgen calls * 2 */
686 
687   FPU_FIX_DECL;
688 
689   /* test for bad arguments */
690   if (n < 0 || m < 0 || ntests < 0)
691     BLAS_error(fname, 0, 0, NULL);
692 
693   /* initialization */
694   *num_bad_ratio = 0;
695   *num_tests = 0;
696   *min_ratio = 0.0;
697 
698   saved_seed = *seed;
699   ratio_min = 1e308;
700   ratio_max = 0.0;
701   ratio = 0.0;
702   tot_tests = 0;
703   p_count = 0;
704   count = 0;
705   find_max_ratio = 0;
706   bad_ratios = 0;
707   old_count = 0;
708 
709   if (debug == 3)
710     find_max_ratio = 1;
711   max_mn = MAX(m, n);
712   if (m == 0 || n == 0) {
713     return 0.0;
714   }
715 
716   FPU_FIX_START;
717 
718   incy_gen = 1;
719 
720 
721   /* get space for calculation */
722   head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
723   if (max_mn * 2 > 0 && head_x == NULL) {
724     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
725   }
726   tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
727   if (max_mn * 2 > 0 && tail_x == NULL) {
728     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
729   }
730   y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
731   if (max_mn * 2 > 0 && y == NULL) {
732     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
733   }
734   head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
735   if (max_mn > 0 && head_x_gen == NULL) {
736     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
737   }
738   tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
739   if (max_mn > 0 && tail_x_gen == NULL) {
740     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
741   }
742   y_gen = (double *) blas_malloc(max_mn * sizeof(double));
743   if (max_mn > 0 && y_gen == NULL) {
744     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
745   }
746   temp = (float *) blas_malloc(max_mn * sizeof(float));
747   if (max_mn > 0 && temp == NULL) {
748     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
749   }
750   head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
751   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
752   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
753     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
754   }
755   ratios = (double *) blas_malloc(max_mn * sizeof(double));
756   if (max_mn > 0 && ratios == NULL) {
757     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
758   }
759   A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
760   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
761     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
762   }
763 
764   /* The debug iteration:
765      If debug=1, then will execute the iteration twice. First, compute the
766      max ratio. Second, print info if ratio > (50% * ratio_max). */
767   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
768     bad_ratios = 0;		/* set to zero */
769 
770     if ((debug == 3) && (d_count == find_max_ratio))
771       *seed = saved_seed;	/* restore the original seed */
772 
773     /* varying alpha */
774     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
775       alpha_flag = 0;
776       switch (alpha_val) {
777       case 0:
778 	alpha = 0.0;
779 	alpha_flag = 1;
780 	break;
781       case 1:
782 	alpha = 1.0;
783 	alpha_flag = 1;
784 	break;
785       }
786 
787       /* varying beta */
788       for (beta_val = 0; beta_val < 3; beta_val++) {
789 	beta_flag = 0;
790 	switch (beta_val) {
791 	case 0:
792 	  beta = 0.0;
793 	  beta_flag = 1;
794 	  break;
795 	case 1:
796 	  beta = 1.0;
797 	  beta_flag = 1;
798 	  break;
799 	}
800 
801 
802 	eps_int = power(2, -BITS_D);
803 	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
804 		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
805 	prec = blas_prec_double;
806 
807 	/* values near underflow, 1, or overflow */
808 	for (norm = -1; norm <= 1; norm++) {
809 
810 	  /* number of tests */
811 	  for (i = 0; i < ntests; i++) {
812 
813 	    /* row or col major */
814 	    for (order_val = 0; order_val < 2; order_val++) {
815 	      switch (order_val) {
816 	      case 0:
817 		order_type = blas_rowmajor;
818 		break;
819 	      case 1:
820 	      default:
821 		order_type = blas_colmajor;
822 		break;
823 	      }
824 
825 	      /* no_trans, trans, or conj_trans */
826 	      for (trans_val = 0; trans_val < 3; trans_val++) {
827 		switch (trans_val) {
828 		case 0:
829 		  trans_type = blas_no_trans;
830 		  m_i = m;
831 		  n_i = n;
832 		  break;
833 		case 1:
834 		  trans_type = blas_trans;
835 		  m_i = n;
836 		  n_i = m;
837 		  break;
838 		case 2:
839 		default:
840 		  trans_type = blas_conj_trans;
841 		  m_i = n;
842 		  n_i = m;
843 		  break;
844 		}
845 
846 		/* lda=n, n+1, or 2n */
847 		for (lda_val = 0; lda_val < 3; lda_val++) {
848 		  switch (lda_val) {
849 		  case 0:
850 		    lda = m_i;
851 		    break;
852 		  case 1:
853 		    lda = m_i + 1;
854 		    break;
855 		  case 2:
856 		  default:
857 		    lda = 2 * m_i;
858 		    break;
859 		  }
860 		  if ((order_type == blas_rowmajor && lda < n) ||
861 		      (order_type == blas_colmajor && lda < m))
862 		    continue;
863 
864 		  /* For the sake of speed, we throw out this case at random */
865 		  if (xrand(seed) >= test_prob)
866 		    continue;
867 
868 		  /* in the trivial cases, no need to run testgen */
869 		  if (m > 0 && n > 0)
870 		    BLAS_dgemv2_s_d_testgen(norm, order_type, trans_type, m,
871 					    n, &alpha, alpha_flag, A, lda,
872 					    head_x_gen, tail_x_gen, &beta,
873 					    beta_flag, y_gen, seed,
874 					    head_r_true, tail_r_true);
875 
876 		  count++;
877 
878 		  /* varying incx */
879 		  for (incx_val = -2; incx_val <= 2; incx_val++) {
880 		    if (incx_val == 0)
881 		      continue;
882 
883 		    /* setting incx */
884 		    incx = incx_val;
885 
886 
887 		    dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
888 		    dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
889 
890 		    /* varying incy */
891 		    for (incy_val = -2; incy_val <= 2; incy_val++) {
892 		      if (incy_val == 0)
893 			continue;
894 
895 		      /* setting incy */
896 		      incy = incy_val;
897 
898 
899 		      dcopy_vector(y_gen, m_i, 1, y, incy_val);
900 
901 		      /* call BLAS_dgemv2_s_d */
902 		      FPU_FIX_STOP;
903 		      BLAS_dgemv2_s_d(order_type, trans_type, m, n, alpha, A,
904 				      lda, head_x, tail_x, incx_val, beta, y,
905 				      incy_val);
906 		      FPU_FIX_START;
907 
908 		      /* set y starting index */
909 		      iy = 0;
910 		      if (incy < 0)
911 			iy = -(m_i - 1) * incy;
912 
913 		      /* computing the ratio */
914 		      if (m > 0 && n > 0)
915 			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
916 			  /* copy row j of A to temp */
917 			  sge_copy_row(order_type, trans_type, m_i, n_i, A,
918 				       lda, temp, j);
919 
920 			  test_BLAS_ddot2_s_d(n_i, blas_no_conj, alpha, beta,
921 					      y_gen[k], y[iy], head_r_true[k],
922 					      tail_r_true[k], temp, 1, head_x,
923 					      tail_x, incx_val, eps_int,
924 					      un_int, &ratios[j]);
925 
926 			  /* take the max ratio */
927 			  if (j == 0) {
928 			    ratio = ratios[0];
929 			    /* The !<= below causes NaN error to be detected.
930 			       Note that (NaN > thresh) is always false. */
931 			  } else if (!(ratios[j] <= ratio)) {
932 			    ratio = ratios[j];
933 			  }
934 			  iy += incy;
935 			}
936 
937 		      /* Increase the number of bad ratio, if the ratio
938 		         is bigger than the threshold.
939 		         The !<= below causes NaN error to be detected.
940 		         Note that (NaN > thresh) is always false. */
941 		      if (!(ratio <= thresh)) {
942 			bad_ratios++;
943 
944 			if ((debug == 3) &&	/* print only when debug is on */
945 			    (count != old_count) &&	/* print if old vector is different
946 							   from the current one */
947 			    (d_count == find_max_ratio) &&
948 			    (p_count <= max_print) &&
949 			    (ratio > 0.5 * ratio_max)) {
950 			  old_count = count;
951 
952 			  printf
953 			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
954 			     fname, m, n, ntests, thresh);
955 
956 			  /* Print test info */
957 			  switch (prec) {
958 			  case blas_prec_single:
959 			    printf("single ");
960 			    break;
961 			  case blas_prec_double:
962 			    printf("double ");
963 			    break;
964 			  case blas_prec_indigenous:
965 			    printf("indigenous ");
966 			    break;
967 			  case blas_prec_extra:
968 			    printf("extra ");
969 			    break;
970 			  }
971 			  switch (norm) {
972 			  case -1:
973 			    printf("near_underflow ");
974 			    break;
975 			  case 0:
976 			    printf("near_one ");
977 			    break;
978 			  case 1:
979 			    printf("near_overflow ");
980 			    break;
981 			  }
982 			  switch (order_type) {
983 			  case blas_rowmajor:
984 			    printf("row_major ");
985 			    break;
986 			  case blas_colmajor:
987 			    printf("col_major ");
988 			    break;
989 			  }
990 			  switch (trans_type) {
991 			  case blas_no_trans:
992 			    printf("no_trans ");
993 			    break;
994 			  case blas_trans:
995 			    printf("trans ");
996 			    break;
997 			  case blas_conj_trans:
998 			    printf("conj_trans ");
999 			    break;
1000 			  }
1001 
1002 			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
1003 				 incy);
1004 
1005 			  sge_print_matrix(A, m_i, n_i, lda, order_type, "A");
1006 
1007 			  dprint_vector(head_x, n_i, incx_val, "head_x");
1008 			  dprint_vector(tail_x, n_i, incx_val, "tail_x");
1009 			  dprint_vector(y_gen, m_i, 1, "y_gen");
1010 			  dprint_vector(y, m_i, incy_val, "y_final");
1011 
1012 			  printf("      ");
1013 			  printf("alpha = ");
1014 			  printf("%24.16e", alpha);
1015 			  printf("\n      ");
1016 			  printf("beta = ");
1017 			  printf("%24.16e", beta);
1018 			  printf("\n");
1019 			  for (j = 0, k = 0; j < m_i * incy_gen;
1020 			       j += incy_gen, k++) {
1021 			    printf("      ");
1022 			    printf("[%24.16e, %24.16e]", head_r_true[j],
1023 				   tail_r_true[j]);
1024 			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
1025 			  }
1026 
1027 			  printf("      ratio=%.4e\n", ratio);
1028 			  p_count++;
1029 			}
1030 			if (bad_ratios >= MAX_BAD_TESTS) {
1031 			  printf("\ntoo many failures, exiting....");
1032 			  printf("\nTesting and compilation");
1033 			  printf(" are incomplete\n\n");
1034 			  goto end;
1035 			}
1036 			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
1037 			  printf("\nFlagrant ratio error, exiting...");
1038 			  printf("\nTesting and compilation");
1039 			  printf(" are incomplete\n\n");
1040 			  goto end;
1041 			}
1042 		      }
1043 		      if (d_count == 0) {
1044 			if (ratio > ratio_max)
1045 			  ratio_max = ratio;
1046 
1047 			if (ratio != 0.0 && ratio < ratio_min)
1048 			  ratio_min = ratio;
1049 
1050 			tot_tests++;
1051 		      }
1052 		    }		/* incy */
1053 		  }		/* incx */
1054 		}		/* lda */
1055 	      }			/* trans */
1056 	    }			/* order */
1057 	  }			/* tests */
1058 	}			/* norm */
1059 
1060       }				/* beta */
1061     }				/* alpha */
1062   }				/* debug */
1063 
1064   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
1065     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
1066 	   fname, m, n, ntests, thresh);
1067     printf
1068       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
1069        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
1070        ratio_min, ratio_max);
1071   }
1072 
1073 end:
1074   FPU_FIX_STOP;
1075 
1076   blas_free(head_x);
1077   blas_free(tail_x);
1078   blas_free(y);
1079   blas_free(head_x_gen);
1080   blas_free(tail_x_gen);
1081   blas_free(y_gen);
1082   blas_free(temp);
1083   blas_free(A);
1084   blas_free(head_r_true);
1085   blas_free(tail_r_true);
1086   blas_free(ratios);
1087 
1088   *min_ratio = ratio_min;
1089   *num_bad_ratio = bad_ratios;
1090   *num_tests = tot_tests;
1091   return ratio_max;
1092 }
do_test_dgemv2_s_s(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)1093 double do_test_dgemv2_s_s(int m, int n, int ntests, int *seed, double thresh,
1094 			  int debug, float test_prob, double *min_ratio,
1095 			  int *num_bad_ratio, int *num_tests)
1096 
1097 /*
1098  * Purpose
1099  * =======
1100  *
1101  * Runs a series of tests on GEMV2.
1102  *
1103  * Arguments
1104  * =========
1105  *
1106  * m         (input) int
1107  *           The number of rows
1108  *
1109  * n         (input) int
1110  *           The number of columns
1111  *
1112  * ntests    (input) int
1113  *           The number of tests to run for each set of attributes.
1114  *
1115  * seed      (input/output) int
1116  *           The seed for the random number generator used in testgen().
1117  *
1118  * thresh    (input) double
1119  *           When the ratio returned from test() exceeds the specified
1120  *           threshold, the current size, r_true, r_comp, and ratio will be
1121  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
1122  *           to ~10.)
1123  *
1124  * debug     (input) int
1125  *           If debug=3, print summary
1126  *           If debug=2, print summary only if the number of bad ratios > 0
1127  *           If debug=1, print complete info if tests fail
1128  *           If debug=0, return max ratio
1129  *
1130  * test_prob (input) float
1131  *           The specified test will be performed only if the generated
1132  *           random exceeds this threshold.
1133  *
1134  * min_ratio (output) double
1135  *           The minimum ratio
1136  *
1137  * num_bad_ratio (output) int
1138  *               The number of tests fail; they are above the threshold.
1139  *
1140  * num_tests (output) int
1141  *           The number of tests is being performed.
1142  *
1143  * Return value
1144  * ============
1145  *
1146  * The maximum ratio if run successfully, otherwise return -1
1147  *
1148  * Code structure
1149  * ==============
1150  *
1151  *  debug loop  -- if debug is one, the first loop computes the max ratio
1152  *              -- and the last(second) loop outputs debugging information,
1153  *              -- if the test fail and its ratio > 0.5 * max ratio.
1154  *              -- if debug is zero, the loop is executed once
1155  *    alpha loop  -- varying alpha: 0, 1, or random
1156  *      beta loop   -- varying beta: 0, 1, or random
1157 
1158  *          norm loop   -- varying norm: near undeflow, near one, or
1159  *                        -- near overflow
1160  *            numtest loop  -- how many times the test is perform with
1161  *                            -- above set of attributes
1162  *              order loop   -- varying order type: rowmajor or colmajor
1163  *                trans loop    -- varying uplo type: upper or lower
1164  *                  lda loop      -- varying lda: m, m+1, 2m
1165  *                    incx loop     -- varying incx: -2, -1, 1, 2
1166  *                      incy loop     -- varying incy: -2, -1, 1, 2
1167  */
1168 {
1169   /* function name */
1170   const char fname[] = "BLAS_dgemv2_s_s";
1171 
1172   /* max number of debug lines to print */
1173   const int max_print = 8;
1174 
1175   /* Variables in the "x_val" form are loop vars for corresponding
1176      variables */
1177   int i;			/* iterate through the repeating tests */
1178   int j, k;			/* multipurpose counters or variables */
1179   int iy;			/* use to index y */
1180   int incx_val, incy_val,	/* for testing different inc values */
1181     incx, incy;
1182   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
1183   int d_count;			/* counter for debug */
1184   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
1185   int p_count;			/* counter for the number of debug lines printed */
1186   int tot_tests;		/* total number of tests to be done */
1187   int norm;			/* input values of near underflow/one/overflow */
1188   double ratio_max;		/* the current maximum ratio */
1189   double ratio_min;		/* the current minimum ratio */
1190   double *ratios;		/* a temporary variable for calculating ratio */
1191   double ratio;			/* the per-use test ratio from test() */
1192   int bad_ratios;		/* the number of ratios over the threshold */
1193   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
1194   double un_int;		/* the internal underflow threshold */
1195   double alpha;
1196   double beta;
1197   float *A;
1198   float *head_x;
1199   float *tail_x;
1200   double *y;
1201   float *temp;			/* use for calculating ratio */
1202 
1203   /* x_gen and y_gen are used to store vectors generated by testgen.
1204      they eventually are copied back to x and y */
1205   float *head_x_gen;
1206   float *tail_x_gen;
1207   double *y_gen;
1208 
1209   /* the true r calculated by testgen(), in double-double */
1210   double *head_r_true, *tail_r_true;
1211   int alpha_val;
1212   int alpha_flag;		/* input flag for BLAS_dgemv2_s_s_testgen */
1213   int beta_val;
1214   int beta_flag;		/* input flag for BLAS_dgemv2_s_s_testgen */
1215   int order_val;
1216   enum blas_order_type order_type;
1217 
1218   enum blas_prec_type prec;
1219   int trans_val;
1220   enum blas_trans_type trans_type;
1221   int m_i;
1222   int n_i;
1223   int max_mn;			/* the max of m and n */
1224   int lda_val;
1225   int lda;
1226   int saved_seed;		/* for saving the original seed */
1227   int count, old_count;		/* use for counting the number of testgen calls * 2 */
1228 
1229   FPU_FIX_DECL;
1230 
1231   /* test for bad arguments */
1232   if (n < 0 || m < 0 || ntests < 0)
1233     BLAS_error(fname, 0, 0, NULL);
1234 
1235   /* initialization */
1236   *num_bad_ratio = 0;
1237   *num_tests = 0;
1238   *min_ratio = 0.0;
1239 
1240   saved_seed = *seed;
1241   ratio_min = 1e308;
1242   ratio_max = 0.0;
1243   ratio = 0.0;
1244   tot_tests = 0;
1245   p_count = 0;
1246   count = 0;
1247   find_max_ratio = 0;
1248   bad_ratios = 0;
1249   old_count = 0;
1250 
1251   if (debug == 3)
1252     find_max_ratio = 1;
1253   max_mn = MAX(m, n);
1254   if (m == 0 || n == 0) {
1255     return 0.0;
1256   }
1257 
1258   FPU_FIX_START;
1259 
1260   incy_gen = 1;
1261 
1262 
1263   /* get space for calculation */
1264   head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
1265   if (max_mn * 2 > 0 && head_x == NULL) {
1266     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1267   }
1268   tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
1269   if (max_mn * 2 > 0 && tail_x == NULL) {
1270     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1271   }
1272   y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
1273   if (max_mn * 2 > 0 && y == NULL) {
1274     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1275   }
1276   head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
1277   if (max_mn > 0 && head_x_gen == NULL) {
1278     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1279   }
1280   tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
1281   if (max_mn > 0 && tail_x_gen == NULL) {
1282     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1283   }
1284   y_gen = (double *) blas_malloc(max_mn * sizeof(double));
1285   if (max_mn > 0 && y_gen == NULL) {
1286     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1287   }
1288   temp = (float *) blas_malloc(max_mn * sizeof(float));
1289   if (max_mn > 0 && temp == NULL) {
1290     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1291   }
1292   head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
1293   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
1294   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
1295     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1296   }
1297   ratios = (double *) blas_malloc(max_mn * sizeof(double));
1298   if (max_mn > 0 && ratios == NULL) {
1299     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1300   }
1301   A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
1302   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
1303     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1304   }
1305 
1306   /* The debug iteration:
1307      If debug=1, then will execute the iteration twice. First, compute the
1308      max ratio. Second, print info if ratio > (50% * ratio_max). */
1309   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
1310     bad_ratios = 0;		/* set to zero */
1311 
1312     if ((debug == 3) && (d_count == find_max_ratio))
1313       *seed = saved_seed;	/* restore the original seed */
1314 
1315     /* varying alpha */
1316     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
1317       alpha_flag = 0;
1318       switch (alpha_val) {
1319       case 0:
1320 	alpha = 0.0;
1321 	alpha_flag = 1;
1322 	break;
1323       case 1:
1324 	alpha = 1.0;
1325 	alpha_flag = 1;
1326 	break;
1327       }
1328 
1329       /* varying beta */
1330       for (beta_val = 0; beta_val < 3; beta_val++) {
1331 	beta_flag = 0;
1332 	switch (beta_val) {
1333 	case 0:
1334 	  beta = 0.0;
1335 	  beta_flag = 1;
1336 	  break;
1337 	case 1:
1338 	  beta = 1.0;
1339 	  beta_flag = 1;
1340 	  break;
1341 	}
1342 
1343 
1344 	eps_int = power(2, -BITS_D);
1345 	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1346 		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1347 	prec = blas_prec_double;
1348 
1349 	/* values near underflow, 1, or overflow */
1350 	for (norm = -1; norm <= 1; norm++) {
1351 
1352 	  /* number of tests */
1353 	  for (i = 0; i < ntests; i++) {
1354 
1355 	    /* row or col major */
1356 	    for (order_val = 0; order_val < 2; order_val++) {
1357 	      switch (order_val) {
1358 	      case 0:
1359 		order_type = blas_rowmajor;
1360 		break;
1361 	      case 1:
1362 	      default:
1363 		order_type = blas_colmajor;
1364 		break;
1365 	      }
1366 
1367 	      /* no_trans, trans, or conj_trans */
1368 	      for (trans_val = 0; trans_val < 3; trans_val++) {
1369 		switch (trans_val) {
1370 		case 0:
1371 		  trans_type = blas_no_trans;
1372 		  m_i = m;
1373 		  n_i = n;
1374 		  break;
1375 		case 1:
1376 		  trans_type = blas_trans;
1377 		  m_i = n;
1378 		  n_i = m;
1379 		  break;
1380 		case 2:
1381 		default:
1382 		  trans_type = blas_conj_trans;
1383 		  m_i = n;
1384 		  n_i = m;
1385 		  break;
1386 		}
1387 
1388 		/* lda=n, n+1, or 2n */
1389 		for (lda_val = 0; lda_val < 3; lda_val++) {
1390 		  switch (lda_val) {
1391 		  case 0:
1392 		    lda = m_i;
1393 		    break;
1394 		  case 1:
1395 		    lda = m_i + 1;
1396 		    break;
1397 		  case 2:
1398 		  default:
1399 		    lda = 2 * m_i;
1400 		    break;
1401 		  }
1402 		  if ((order_type == blas_rowmajor && lda < n) ||
1403 		      (order_type == blas_colmajor && lda < m))
1404 		    continue;
1405 
1406 		  /* For the sake of speed, we throw out this case at random */
1407 		  if (xrand(seed) >= test_prob)
1408 		    continue;
1409 
1410 		  /* in the trivial cases, no need to run testgen */
1411 		  if (m > 0 && n > 0)
1412 		    BLAS_dgemv2_s_s_testgen(norm, order_type, trans_type, m,
1413 					    n, &alpha, alpha_flag, A, lda,
1414 					    head_x_gen, tail_x_gen, &beta,
1415 					    beta_flag, y_gen, seed,
1416 					    head_r_true, tail_r_true);
1417 
1418 		  count++;
1419 
1420 		  /* varying incx */
1421 		  for (incx_val = -2; incx_val <= 2; incx_val++) {
1422 		    if (incx_val == 0)
1423 		      continue;
1424 
1425 		    /* setting incx */
1426 		    incx = incx_val;
1427 
1428 
1429 		    scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
1430 		    scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
1431 
1432 		    /* varying incy */
1433 		    for (incy_val = -2; incy_val <= 2; incy_val++) {
1434 		      if (incy_val == 0)
1435 			continue;
1436 
1437 		      /* setting incy */
1438 		      incy = incy_val;
1439 
1440 
1441 		      dcopy_vector(y_gen, m_i, 1, y, incy_val);
1442 
1443 		      /* call BLAS_dgemv2_s_s */
1444 		      FPU_FIX_STOP;
1445 		      BLAS_dgemv2_s_s(order_type, trans_type, m, n, alpha, A,
1446 				      lda, head_x, tail_x, incx_val, beta, y,
1447 				      incy_val);
1448 		      FPU_FIX_START;
1449 
1450 		      /* set y starting index */
1451 		      iy = 0;
1452 		      if (incy < 0)
1453 			iy = -(m_i - 1) * incy;
1454 
1455 		      /* computing the ratio */
1456 		      if (m > 0 && n > 0)
1457 			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
1458 			  /* copy row j of A to temp */
1459 			  sge_copy_row(order_type, trans_type, m_i, n_i, A,
1460 				       lda, temp, j);
1461 
1462 			  test_BLAS_ddot2_s_s(n_i, blas_no_conj, alpha, beta,
1463 					      y_gen[k], y[iy], head_r_true[k],
1464 					      tail_r_true[k], temp, 1, head_x,
1465 					      tail_x, incx_val, eps_int,
1466 					      un_int, &ratios[j]);
1467 
1468 			  /* take the max ratio */
1469 			  if (j == 0) {
1470 			    ratio = ratios[0];
1471 			    /* The !<= below causes NaN error to be detected.
1472 			       Note that (NaN > thresh) is always false. */
1473 			  } else if (!(ratios[j] <= ratio)) {
1474 			    ratio = ratios[j];
1475 			  }
1476 			  iy += incy;
1477 			}
1478 
1479 		      /* Increase the number of bad ratio, if the ratio
1480 		         is bigger than the threshold.
1481 		         The !<= below causes NaN error to be detected.
1482 		         Note that (NaN > thresh) is always false. */
1483 		      if (!(ratio <= thresh)) {
1484 			bad_ratios++;
1485 
1486 			if ((debug == 3) &&	/* print only when debug is on */
1487 			    (count != old_count) &&	/* print if old vector is different
1488 							   from the current one */
1489 			    (d_count == find_max_ratio) &&
1490 			    (p_count <= max_print) &&
1491 			    (ratio > 0.5 * ratio_max)) {
1492 			  old_count = count;
1493 
1494 			  printf
1495 			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
1496 			     fname, m, n, ntests, thresh);
1497 
1498 			  /* Print test info */
1499 			  switch (prec) {
1500 			  case blas_prec_single:
1501 			    printf("single ");
1502 			    break;
1503 			  case blas_prec_double:
1504 			    printf("double ");
1505 			    break;
1506 			  case blas_prec_indigenous:
1507 			    printf("indigenous ");
1508 			    break;
1509 			  case blas_prec_extra:
1510 			    printf("extra ");
1511 			    break;
1512 			  }
1513 			  switch (norm) {
1514 			  case -1:
1515 			    printf("near_underflow ");
1516 			    break;
1517 			  case 0:
1518 			    printf("near_one ");
1519 			    break;
1520 			  case 1:
1521 			    printf("near_overflow ");
1522 			    break;
1523 			  }
1524 			  switch (order_type) {
1525 			  case blas_rowmajor:
1526 			    printf("row_major ");
1527 			    break;
1528 			  case blas_colmajor:
1529 			    printf("col_major ");
1530 			    break;
1531 			  }
1532 			  switch (trans_type) {
1533 			  case blas_no_trans:
1534 			    printf("no_trans ");
1535 			    break;
1536 			  case blas_trans:
1537 			    printf("trans ");
1538 			    break;
1539 			  case blas_conj_trans:
1540 			    printf("conj_trans ");
1541 			    break;
1542 			  }
1543 
1544 			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
1545 				 incy);
1546 
1547 			  sge_print_matrix(A, m_i, n_i, lda, order_type, "A");
1548 
1549 			  sprint_vector(head_x, n_i, incx_val, "head_x");
1550 			  sprint_vector(tail_x, n_i, incx_val, "tail_x");
1551 			  dprint_vector(y_gen, m_i, 1, "y_gen");
1552 			  dprint_vector(y, m_i, incy_val, "y_final");
1553 
1554 			  printf("      ");
1555 			  printf("alpha = ");
1556 			  printf("%24.16e", alpha);
1557 			  printf("\n      ");
1558 			  printf("beta = ");
1559 			  printf("%24.16e", beta);
1560 			  printf("\n");
1561 			  for (j = 0, k = 0; j < m_i * incy_gen;
1562 			       j += incy_gen, k++) {
1563 			    printf("      ");
1564 			    printf("[%24.16e, %24.16e]", head_r_true[j],
1565 				   tail_r_true[j]);
1566 			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
1567 			  }
1568 
1569 			  printf("      ratio=%.4e\n", ratio);
1570 			  p_count++;
1571 			}
1572 			if (bad_ratios >= MAX_BAD_TESTS) {
1573 			  printf("\ntoo many failures, exiting....");
1574 			  printf("\nTesting and compilation");
1575 			  printf(" are incomplete\n\n");
1576 			  goto end;
1577 			}
1578 			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
1579 			  printf("\nFlagrant ratio error, exiting...");
1580 			  printf("\nTesting and compilation");
1581 			  printf(" are incomplete\n\n");
1582 			  goto end;
1583 			}
1584 		      }
1585 		      if (d_count == 0) {
1586 			if (ratio > ratio_max)
1587 			  ratio_max = ratio;
1588 
1589 			if (ratio != 0.0 && ratio < ratio_min)
1590 			  ratio_min = ratio;
1591 
1592 			tot_tests++;
1593 		      }
1594 		    }		/* incy */
1595 		  }		/* incx */
1596 		}		/* lda */
1597 	      }			/* trans */
1598 	    }			/* order */
1599 	  }			/* tests */
1600 	}			/* norm */
1601 
1602       }				/* beta */
1603     }				/* alpha */
1604   }				/* debug */
1605 
1606   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
1607     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
1608 	   fname, m, n, ntests, thresh);
1609     printf
1610       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
1611        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
1612        ratio_min, ratio_max);
1613   }
1614 
1615 end:
1616   FPU_FIX_STOP;
1617 
1618   blas_free(head_x);
1619   blas_free(tail_x);
1620   blas_free(y);
1621   blas_free(head_x_gen);
1622   blas_free(tail_x_gen);
1623   blas_free(y_gen);
1624   blas_free(temp);
1625   blas_free(A);
1626   blas_free(head_r_true);
1627   blas_free(tail_r_true);
1628   blas_free(ratios);
1629 
1630   *min_ratio = ratio_min;
1631   *num_bad_ratio = bad_ratios;
1632   *num_tests = tot_tests;
1633   return ratio_max;
1634 }
do_test_zgemv2_z_c(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)1635 double do_test_zgemv2_z_c(int m, int n, int ntests, int *seed, double thresh,
1636 			  int debug, float test_prob, double *min_ratio,
1637 			  int *num_bad_ratio, int *num_tests)
1638 
1639 /*
1640  * Purpose
1641  * =======
1642  *
1643  * Runs a series of tests on GEMV2.
1644  *
1645  * Arguments
1646  * =========
1647  *
1648  * m         (input) int
1649  *           The number of rows
1650  *
1651  * n         (input) int
1652  *           The number of columns
1653  *
1654  * ntests    (input) int
1655  *           The number of tests to run for each set of attributes.
1656  *
1657  * seed      (input/output) int
1658  *           The seed for the random number generator used in testgen().
1659  *
1660  * thresh    (input) double
1661  *           When the ratio returned from test() exceeds the specified
1662  *           threshold, the current size, r_true, r_comp, and ratio will be
1663  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
1664  *           to ~10.)
1665  *
1666  * debug     (input) int
1667  *           If debug=3, print summary
1668  *           If debug=2, print summary only if the number of bad ratios > 0
1669  *           If debug=1, print complete info if tests fail
1670  *           If debug=0, return max ratio
1671  *
1672  * test_prob (input) float
1673  *           The specified test will be performed only if the generated
1674  *           random exceeds this threshold.
1675  *
1676  * min_ratio (output) double
1677  *           The minimum ratio
1678  *
1679  * num_bad_ratio (output) int
1680  *               The number of tests fail; they are above the threshold.
1681  *
1682  * num_tests (output) int
1683  *           The number of tests is being performed.
1684  *
1685  * Return value
1686  * ============
1687  *
1688  * The maximum ratio if run successfully, otherwise return -1
1689  *
1690  * Code structure
1691  * ==============
1692  *
1693  *  debug loop  -- if debug is one, the first loop computes the max ratio
1694  *              -- and the last(second) loop outputs debugging information,
1695  *              -- if the test fail and its ratio > 0.5 * max ratio.
1696  *              -- if debug is zero, the loop is executed once
1697  *    alpha loop  -- varying alpha: 0, 1, or random
1698  *      beta loop   -- varying beta: 0, 1, or random
1699 
1700  *          norm loop   -- varying norm: near undeflow, near one, or
1701  *                        -- near overflow
1702  *            numtest loop  -- how many times the test is perform with
1703  *                            -- above set of attributes
1704  *              order loop   -- varying order type: rowmajor or colmajor
1705  *                trans loop    -- varying uplo type: upper or lower
1706  *                  lda loop      -- varying lda: m, m+1, 2m
1707  *                    incx loop     -- varying incx: -2, -1, 1, 2
1708  *                      incy loop     -- varying incy: -2, -1, 1, 2
1709  */
1710 {
1711   /* function name */
1712   const char fname[] = "BLAS_zgemv2_z_c";
1713 
1714   /* max number of debug lines to print */
1715   const int max_print = 8;
1716 
1717   /* Variables in the "x_val" form are loop vars for corresponding
1718      variables */
1719   int i;			/* iterate through the repeating tests */
1720   int j, k;			/* multipurpose counters or variables */
1721   int iy;			/* use to index y */
1722   int incx_val, incy_val,	/* for testing different inc values */
1723     incx, incy;
1724   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
1725   int d_count;			/* counter for debug */
1726   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
1727   int p_count;			/* counter for the number of debug lines printed */
1728   int tot_tests;		/* total number of tests to be done */
1729   int norm;			/* input values of near underflow/one/overflow */
1730   double ratio_max;		/* the current maximum ratio */
1731   double ratio_min;		/* the current minimum ratio */
1732   double *ratios;		/* a temporary variable for calculating ratio */
1733   double ratio;			/* the per-use test ratio from test() */
1734   int bad_ratios;		/* the number of ratios over the threshold */
1735   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
1736   double un_int;		/* the internal underflow threshold */
1737   double alpha[2];
1738   double beta[2];
1739   double *A;
1740   float *head_x;
1741   float *tail_x;
1742   double *y;
1743   double *temp;			/* use for calculating ratio */
1744 
1745   /* x_gen and y_gen are used to store vectors generated by testgen.
1746      they eventually are copied back to x and y */
1747   float *head_x_gen;
1748   float *tail_x_gen;
1749   double *y_gen;
1750 
1751   /* the true r calculated by testgen(), in double-double */
1752   double *head_r_true, *tail_r_true;
1753 
1754   int alpha_val;
1755   int alpha_flag;		/* input flag for BLAS_zgemv2_z_c_testgen */
1756   int beta_val;
1757   int beta_flag;		/* input flag for BLAS_zgemv2_z_c_testgen */
1758   int order_val;
1759   enum blas_order_type order_type;
1760 
1761   enum blas_prec_type prec;
1762   int trans_val;
1763   enum blas_trans_type trans_type;
1764   int m_i;
1765   int n_i;
1766   int max_mn;			/* the max of m and n */
1767   int lda_val;
1768   int lda;
1769   int saved_seed;		/* for saving the original seed */
1770   int count, old_count;		/* use for counting the number of testgen calls * 2 */
1771 
1772   FPU_FIX_DECL;
1773 
1774   /* test for bad arguments */
1775   if (n < 0 || m < 0 || ntests < 0)
1776     BLAS_error(fname, 0, 0, NULL);
1777 
1778   /* initialization */
1779   *num_bad_ratio = 0;
1780   *num_tests = 0;
1781   *min_ratio = 0.0;
1782 
1783   saved_seed = *seed;
1784   ratio_min = 1e308;
1785   ratio_max = 0.0;
1786   ratio = 0.0;
1787   tot_tests = 0;
1788   p_count = 0;
1789   count = 0;
1790   find_max_ratio = 0;
1791   bad_ratios = 0;
1792   old_count = 0;
1793 
1794   if (debug == 3)
1795     find_max_ratio = 1;
1796   max_mn = MAX(m, n);
1797   if (m == 0 || n == 0) {
1798     return 0.0;
1799   }
1800 
1801   FPU_FIX_START;
1802 
1803   incy_gen = 1;
1804   incy_gen *= 2;
1805 
1806   /* get space for calculation */
1807   head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
1808   if (max_mn * 2 > 0 && head_x == NULL) {
1809     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1810   }
1811   tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
1812   if (max_mn * 2 > 0 && tail_x == NULL) {
1813     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1814   }
1815   y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
1816   if (max_mn * 2 > 0 && y == NULL) {
1817     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1818   }
1819   head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
1820   if (max_mn > 0 && head_x_gen == NULL) {
1821     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1822   }
1823   tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
1824   if (max_mn > 0 && tail_x_gen == NULL) {
1825     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1826   }
1827   y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
1828   if (max_mn > 0 && y_gen == NULL) {
1829     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1830   }
1831   temp = (double *) blas_malloc(max_mn * sizeof(double) * 2);
1832   if (max_mn > 0 && temp == NULL) {
1833     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1834   }
1835   head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
1836   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
1837   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
1838     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1839   }
1840   ratios = (double *) blas_malloc(max_mn * sizeof(double));
1841   if (max_mn > 0 && ratios == NULL) {
1842     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1843   }
1844   A =
1845     (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double) *
1846 			   2);
1847   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
1848     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
1849   }
1850 
1851   /* The debug iteration:
1852      If debug=1, then will execute the iteration twice. First, compute the
1853      max ratio. Second, print info if ratio > (50% * ratio_max). */
1854   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
1855     bad_ratios = 0;		/* set to zero */
1856 
1857     if ((debug == 3) && (d_count == find_max_ratio))
1858       *seed = saved_seed;	/* restore the original seed */
1859 
1860     /* varying alpha */
1861     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
1862       alpha_flag = 0;
1863       switch (alpha_val) {
1864       case 0:
1865 	alpha[0] = alpha[1] = 0.0;
1866 	alpha_flag = 1;
1867 	break;
1868       case 1:
1869 	alpha[0] = 1.0;
1870 	alpha[1] = 0.0;
1871 	alpha_flag = 1;
1872 	break;
1873       }
1874 
1875       /* varying beta */
1876       for (beta_val = 0; beta_val < 3; beta_val++) {
1877 	beta_flag = 0;
1878 	switch (beta_val) {
1879 	case 0:
1880 	  beta[0] = beta[1] = 0.0;
1881 	  beta_flag = 1;
1882 	  break;
1883 	case 1:
1884 	  beta[0] = 1.0;
1885 	  beta[1] = 0.0;
1886 	  beta_flag = 1;
1887 	  break;
1888 	}
1889 
1890 
1891 	eps_int = power(2, -BITS_D);
1892 	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
1893 		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
1894 	prec = blas_prec_double;
1895 
1896 	/* values near underflow, 1, or overflow */
1897 	for (norm = -1; norm <= 1; norm++) {
1898 
1899 	  /* number of tests */
1900 	  for (i = 0; i < ntests; i++) {
1901 
1902 	    /* row or col major */
1903 	    for (order_val = 0; order_val < 2; order_val++) {
1904 	      switch (order_val) {
1905 	      case 0:
1906 		order_type = blas_rowmajor;
1907 		break;
1908 	      case 1:
1909 	      default:
1910 		order_type = blas_colmajor;
1911 		break;
1912 	      }
1913 
1914 	      /* no_trans, trans, or conj_trans */
1915 	      for (trans_val = 0; trans_val < 3; trans_val++) {
1916 		switch (trans_val) {
1917 		case 0:
1918 		  trans_type = blas_no_trans;
1919 		  m_i = m;
1920 		  n_i = n;
1921 		  break;
1922 		case 1:
1923 		  trans_type = blas_trans;
1924 		  m_i = n;
1925 		  n_i = m;
1926 		  break;
1927 		case 2:
1928 		default:
1929 		  trans_type = blas_conj_trans;
1930 		  m_i = n;
1931 		  n_i = m;
1932 		  break;
1933 		}
1934 
1935 		/* lda=n, n+1, or 2n */
1936 		for (lda_val = 0; lda_val < 3; lda_val++) {
1937 		  switch (lda_val) {
1938 		  case 0:
1939 		    lda = m_i;
1940 		    break;
1941 		  case 1:
1942 		    lda = m_i + 1;
1943 		    break;
1944 		  case 2:
1945 		  default:
1946 		    lda = 2 * m_i;
1947 		    break;
1948 		  }
1949 		  if ((order_type == blas_rowmajor && lda < n) ||
1950 		      (order_type == blas_colmajor && lda < m))
1951 		    continue;
1952 
1953 		  /* For the sake of speed, we throw out this case at random */
1954 		  if (xrand(seed) >= test_prob)
1955 		    continue;
1956 
1957 		  /* in the trivial cases, no need to run testgen */
1958 		  if (m > 0 && n > 0)
1959 		    BLAS_zgemv2_z_c_testgen(norm, order_type, trans_type, m,
1960 					    n, &alpha, alpha_flag, A, lda,
1961 					    head_x_gen, tail_x_gen, &beta,
1962 					    beta_flag, y_gen, seed,
1963 					    head_r_true, tail_r_true);
1964 
1965 		  count++;
1966 
1967 		  /* varying incx */
1968 		  for (incx_val = -2; incx_val <= 2; incx_val++) {
1969 		    if (incx_val == 0)
1970 		      continue;
1971 
1972 		    /* setting incx */
1973 		    incx = incx_val;
1974 		    incx *= 2;
1975 
1976 		    ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
1977 		    ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
1978 
1979 		    /* varying incy */
1980 		    for (incy_val = -2; incy_val <= 2; incy_val++) {
1981 		      if (incy_val == 0)
1982 			continue;
1983 
1984 		      /* setting incy */
1985 		      incy = incy_val;
1986 		      incy *= 2;
1987 
1988 		      zcopy_vector(y_gen, m_i, 1, y, incy_val);
1989 
1990 		      /* call BLAS_zgemv2_z_c */
1991 		      FPU_FIX_STOP;
1992 		      BLAS_zgemv2_z_c(order_type, trans_type, m, n, alpha, A,
1993 				      lda, head_x, tail_x, incx_val, beta, y,
1994 				      incy_val);
1995 		      FPU_FIX_START;
1996 
1997 		      /* set y starting index */
1998 		      iy = 0;
1999 		      if (incy < 0)
2000 			iy = -(m_i - 1) * incy;
2001 
2002 		      /* computing the ratio */
2003 		      if (m > 0 && n > 0)
2004 			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
2005 			  /* copy row j of A to temp */
2006 			  zge_copy_row(order_type, trans_type, m_i, n_i, A,
2007 				       lda, temp, j);
2008 
2009 			  test_BLAS_zdot2_z_c(n_i, blas_no_conj, alpha, beta,
2010 					      &y_gen[k], &y[iy],
2011 					      &head_r_true[k],
2012 					      &tail_r_true[k], temp, 1,
2013 					      head_x, tail_x, incx_val,
2014 					      eps_int, un_int, &ratios[j]);
2015 
2016 			  /* take the max ratio */
2017 			  if (j == 0) {
2018 			    ratio = ratios[0];
2019 			    /* The !<= below causes NaN error to be detected.
2020 			       Note that (NaN > thresh) is always false. */
2021 			  } else if (!(ratios[j] <= ratio)) {
2022 			    ratio = ratios[j];
2023 			  }
2024 			  iy += incy;
2025 			}
2026 
2027 		      /* Increase the number of bad ratio, if the ratio
2028 		         is bigger than the threshold.
2029 		         The !<= below causes NaN error to be detected.
2030 		         Note that (NaN > thresh) is always false. */
2031 		      if (!(ratio <= thresh)) {
2032 			bad_ratios++;
2033 
2034 			if ((debug == 3) &&	/* print only when debug is on */
2035 			    (count != old_count) &&	/* print if old vector is different
2036 							   from the current one */
2037 			    (d_count == find_max_ratio) &&
2038 			    (p_count <= max_print) &&
2039 			    (ratio > 0.5 * ratio_max)) {
2040 			  old_count = count;
2041 
2042 			  printf
2043 			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
2044 			     fname, m, n, ntests, thresh);
2045 
2046 			  /* Print test info */
2047 			  switch (prec) {
2048 			  case blas_prec_single:
2049 			    printf("single ");
2050 			    break;
2051 			  case blas_prec_double:
2052 			    printf("double ");
2053 			    break;
2054 			  case blas_prec_indigenous:
2055 			    printf("indigenous ");
2056 			    break;
2057 			  case blas_prec_extra:
2058 			    printf("extra ");
2059 			    break;
2060 			  }
2061 			  switch (norm) {
2062 			  case -1:
2063 			    printf("near_underflow ");
2064 			    break;
2065 			  case 0:
2066 			    printf("near_one ");
2067 			    break;
2068 			  case 1:
2069 			    printf("near_overflow ");
2070 			    break;
2071 			  }
2072 			  switch (order_type) {
2073 			  case blas_rowmajor:
2074 			    printf("row_major ");
2075 			    break;
2076 			  case blas_colmajor:
2077 			    printf("col_major ");
2078 			    break;
2079 			  }
2080 			  switch (trans_type) {
2081 			  case blas_no_trans:
2082 			    printf("no_trans ");
2083 			    break;
2084 			  case blas_trans:
2085 			    printf("trans ");
2086 			    break;
2087 			  case blas_conj_trans:
2088 			    printf("conj_trans ");
2089 			    break;
2090 			  }
2091 
2092 			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
2093 				 incy);
2094 
2095 			  zge_print_matrix(A, m_i, n_i, lda, order_type, "A");
2096 
2097 			  cprint_vector(head_x, n_i, incx_val, "head_x");
2098 			  cprint_vector(tail_x, n_i, incx_val, "tail_x");
2099 			  zprint_vector(y_gen, m_i, 1, "y_gen");
2100 			  zprint_vector(y, m_i, incy_val, "y_final");
2101 
2102 			  printf("      ");
2103 			  printf("alpha = ");
2104 			  printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
2105 			  printf("\n      ");
2106 			  printf("beta = ");
2107 			  printf("(%24.16e, %24.16e)", beta[0], beta[1]);
2108 			  printf("\n");
2109 			  for (j = 0, k = 0; j < m_i * incy_gen;
2110 			       j += incy_gen, k++) {
2111 			    printf("      ");
2112 			    printf("([%24.16e  %24.16e], [%24.16e %24.16e])",
2113 				   head_r_true[j], tail_r_true[j],
2114 				   head_r_true[j + 1], tail_r_true[j + 1]);
2115 			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
2116 			  }
2117 
2118 			  printf("      ratio=%.4e\n", ratio);
2119 			  p_count++;
2120 			}
2121 			if (bad_ratios >= MAX_BAD_TESTS) {
2122 			  printf("\ntoo many failures, exiting....");
2123 			  printf("\nTesting and compilation");
2124 			  printf(" are incomplete\n\n");
2125 			  goto end;
2126 			}
2127 			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
2128 			  printf("\nFlagrant ratio error, exiting...");
2129 			  printf("\nTesting and compilation");
2130 			  printf(" are incomplete\n\n");
2131 			  goto end;
2132 			}
2133 		      }
2134 		      if (d_count == 0) {
2135 			if (ratio > ratio_max)
2136 			  ratio_max = ratio;
2137 
2138 			if (ratio != 0.0 && ratio < ratio_min)
2139 			  ratio_min = ratio;
2140 
2141 			tot_tests++;
2142 		      }
2143 		    }		/* incy */
2144 		  }		/* incx */
2145 		}		/* lda */
2146 	      }			/* trans */
2147 	    }			/* order */
2148 	  }			/* tests */
2149 	}			/* norm */
2150 
2151       }				/* beta */
2152     }				/* alpha */
2153   }				/* debug */
2154 
2155   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
2156     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
2157 	   fname, m, n, ntests, thresh);
2158     printf
2159       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
2160        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
2161        ratio_min, ratio_max);
2162   }
2163 
2164 end:
2165   FPU_FIX_STOP;
2166 
2167   blas_free(head_x);
2168   blas_free(tail_x);
2169   blas_free(y);
2170   blas_free(head_x_gen);
2171   blas_free(tail_x_gen);
2172   blas_free(y_gen);
2173   blas_free(temp);
2174   blas_free(A);
2175   blas_free(head_r_true);
2176   blas_free(tail_r_true);
2177   blas_free(ratios);
2178 
2179   *min_ratio = ratio_min;
2180   *num_bad_ratio = bad_ratios;
2181   *num_tests = tot_tests;
2182   return ratio_max;
2183 }
do_test_zgemv2_c_z(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)2184 double do_test_zgemv2_c_z(int m, int n, int ntests, int *seed, double thresh,
2185 			  int debug, float test_prob, double *min_ratio,
2186 			  int *num_bad_ratio, int *num_tests)
2187 
2188 /*
2189  * Purpose
2190  * =======
2191  *
2192  * Runs a series of tests on GEMV2.
2193  *
2194  * Arguments
2195  * =========
2196  *
2197  * m         (input) int
2198  *           The number of rows
2199  *
2200  * n         (input) int
2201  *           The number of columns
2202  *
2203  * ntests    (input) int
2204  *           The number of tests to run for each set of attributes.
2205  *
2206  * seed      (input/output) int
2207  *           The seed for the random number generator used in testgen().
2208  *
2209  * thresh    (input) double
2210  *           When the ratio returned from test() exceeds the specified
2211  *           threshold, the current size, r_true, r_comp, and ratio will be
2212  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
2213  *           to ~10.)
2214  *
2215  * debug     (input) int
2216  *           If debug=3, print summary
2217  *           If debug=2, print summary only if the number of bad ratios > 0
2218  *           If debug=1, print complete info if tests fail
2219  *           If debug=0, return max ratio
2220  *
2221  * test_prob (input) float
2222  *           The specified test will be performed only if the generated
2223  *           random exceeds this threshold.
2224  *
2225  * min_ratio (output) double
2226  *           The minimum ratio
2227  *
2228  * num_bad_ratio (output) int
2229  *               The number of tests fail; they are above the threshold.
2230  *
2231  * num_tests (output) int
2232  *           The number of tests is being performed.
2233  *
2234  * Return value
2235  * ============
2236  *
2237  * The maximum ratio if run successfully, otherwise return -1
2238  *
2239  * Code structure
2240  * ==============
2241  *
2242  *  debug loop  -- if debug is one, the first loop computes the max ratio
2243  *              -- and the last(second) loop outputs debugging information,
2244  *              -- if the test fail and its ratio > 0.5 * max ratio.
2245  *              -- if debug is zero, the loop is executed once
2246  *    alpha loop  -- varying alpha: 0, 1, or random
2247  *      beta loop   -- varying beta: 0, 1, or random
2248 
2249  *          norm loop   -- varying norm: near undeflow, near one, or
2250  *                        -- near overflow
2251  *            numtest loop  -- how many times the test is perform with
2252  *                            -- above set of attributes
2253  *              order loop   -- varying order type: rowmajor or colmajor
2254  *                trans loop    -- varying uplo type: upper or lower
2255  *                  lda loop      -- varying lda: m, m+1, 2m
2256  *                    incx loop     -- varying incx: -2, -1, 1, 2
2257  *                      incy loop     -- varying incy: -2, -1, 1, 2
2258  */
2259 {
2260   /* function name */
2261   const char fname[] = "BLAS_zgemv2_c_z";
2262 
2263   /* max number of debug lines to print */
2264   const int max_print = 8;
2265 
2266   /* Variables in the "x_val" form are loop vars for corresponding
2267      variables */
2268   int i;			/* iterate through the repeating tests */
2269   int j, k;			/* multipurpose counters or variables */
2270   int iy;			/* use to index y */
2271   int incx_val, incy_val,	/* for testing different inc values */
2272     incx, incy;
2273   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
2274   int d_count;			/* counter for debug */
2275   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
2276   int p_count;			/* counter for the number of debug lines printed */
2277   int tot_tests;		/* total number of tests to be done */
2278   int norm;			/* input values of near underflow/one/overflow */
2279   double ratio_max;		/* the current maximum ratio */
2280   double ratio_min;		/* the current minimum ratio */
2281   double *ratios;		/* a temporary variable for calculating ratio */
2282   double ratio;			/* the per-use test ratio from test() */
2283   int bad_ratios;		/* the number of ratios over the threshold */
2284   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
2285   double un_int;		/* the internal underflow threshold */
2286   double alpha[2];
2287   double beta[2];
2288   float *A;
2289   double *head_x;
2290   double *tail_x;
2291   double *y;
2292   float *temp;			/* use for calculating ratio */
2293 
2294   /* x_gen and y_gen are used to store vectors generated by testgen.
2295      they eventually are copied back to x and y */
2296   double *head_x_gen;
2297   double *tail_x_gen;
2298   double *y_gen;
2299 
2300   /* the true r calculated by testgen(), in double-double */
2301   double *head_r_true, *tail_r_true;
2302 
2303   int alpha_val;
2304   int alpha_flag;		/* input flag for BLAS_zgemv2_c_z_testgen */
2305   int beta_val;
2306   int beta_flag;		/* input flag for BLAS_zgemv2_c_z_testgen */
2307   int order_val;
2308   enum blas_order_type order_type;
2309 
2310   enum blas_prec_type prec;
2311   int trans_val;
2312   enum blas_trans_type trans_type;
2313   int m_i;
2314   int n_i;
2315   int max_mn;			/* the max of m and n */
2316   int lda_val;
2317   int lda;
2318   int saved_seed;		/* for saving the original seed */
2319   int count, old_count;		/* use for counting the number of testgen calls * 2 */
2320 
2321   FPU_FIX_DECL;
2322 
2323   /* test for bad arguments */
2324   if (n < 0 || m < 0 || ntests < 0)
2325     BLAS_error(fname, 0, 0, NULL);
2326 
2327   /* initialization */
2328   *num_bad_ratio = 0;
2329   *num_tests = 0;
2330   *min_ratio = 0.0;
2331 
2332   saved_seed = *seed;
2333   ratio_min = 1e308;
2334   ratio_max = 0.0;
2335   ratio = 0.0;
2336   tot_tests = 0;
2337   p_count = 0;
2338   count = 0;
2339   find_max_ratio = 0;
2340   bad_ratios = 0;
2341   old_count = 0;
2342 
2343   if (debug == 3)
2344     find_max_ratio = 1;
2345   max_mn = MAX(m, n);
2346   if (m == 0 || n == 0) {
2347     return 0.0;
2348   }
2349 
2350   FPU_FIX_START;
2351 
2352   incy_gen = 1;
2353   incy_gen *= 2;
2354 
2355   /* get space for calculation */
2356   head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
2357   if (max_mn * 2 > 0 && head_x == NULL) {
2358     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2359   }
2360   tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
2361   if (max_mn * 2 > 0 && tail_x == NULL) {
2362     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2363   }
2364   y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
2365   if (max_mn * 2 > 0 && y == NULL) {
2366     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2367   }
2368   head_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
2369   if (max_mn > 0 && head_x_gen == NULL) {
2370     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2371   }
2372   tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
2373   if (max_mn > 0 && tail_x_gen == NULL) {
2374     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2375   }
2376   y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
2377   if (max_mn > 0 && y_gen == NULL) {
2378     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2379   }
2380   temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
2381   if (max_mn > 0 && temp == NULL) {
2382     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2383   }
2384   head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
2385   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
2386   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
2387     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2388   }
2389   ratios = (double *) blas_malloc(max_mn * sizeof(double));
2390   if (max_mn > 0 && ratios == NULL) {
2391     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2392   }
2393   A =
2394     (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
2395 			  2);
2396   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
2397     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2398   }
2399 
2400   /* The debug iteration:
2401      If debug=1, then will execute the iteration twice. First, compute the
2402      max ratio. Second, print info if ratio > (50% * ratio_max). */
2403   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
2404     bad_ratios = 0;		/* set to zero */
2405 
2406     if ((debug == 3) && (d_count == find_max_ratio))
2407       *seed = saved_seed;	/* restore the original seed */
2408 
2409     /* varying alpha */
2410     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
2411       alpha_flag = 0;
2412       switch (alpha_val) {
2413       case 0:
2414 	alpha[0] = alpha[1] = 0.0;
2415 	alpha_flag = 1;
2416 	break;
2417       case 1:
2418 	alpha[0] = 1.0;
2419 	alpha[1] = 0.0;
2420 	alpha_flag = 1;
2421 	break;
2422       }
2423 
2424       /* varying beta */
2425       for (beta_val = 0; beta_val < 3; beta_val++) {
2426 	beta_flag = 0;
2427 	switch (beta_val) {
2428 	case 0:
2429 	  beta[0] = beta[1] = 0.0;
2430 	  beta_flag = 1;
2431 	  break;
2432 	case 1:
2433 	  beta[0] = 1.0;
2434 	  beta[1] = 0.0;
2435 	  beta_flag = 1;
2436 	  break;
2437 	}
2438 
2439 
2440 	eps_int = power(2, -BITS_D);
2441 	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
2442 		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
2443 	prec = blas_prec_double;
2444 
2445 	/* values near underflow, 1, or overflow */
2446 	for (norm = -1; norm <= 1; norm++) {
2447 
2448 	  /* number of tests */
2449 	  for (i = 0; i < ntests; i++) {
2450 
2451 	    /* row or col major */
2452 	    for (order_val = 0; order_val < 2; order_val++) {
2453 	      switch (order_val) {
2454 	      case 0:
2455 		order_type = blas_rowmajor;
2456 		break;
2457 	      case 1:
2458 	      default:
2459 		order_type = blas_colmajor;
2460 		break;
2461 	      }
2462 
2463 	      /* no_trans, trans, or conj_trans */
2464 	      for (trans_val = 0; trans_val < 3; trans_val++) {
2465 		switch (trans_val) {
2466 		case 0:
2467 		  trans_type = blas_no_trans;
2468 		  m_i = m;
2469 		  n_i = n;
2470 		  break;
2471 		case 1:
2472 		  trans_type = blas_trans;
2473 		  m_i = n;
2474 		  n_i = m;
2475 		  break;
2476 		case 2:
2477 		default:
2478 		  trans_type = blas_conj_trans;
2479 		  m_i = n;
2480 		  n_i = m;
2481 		  break;
2482 		}
2483 
2484 		/* lda=n, n+1, or 2n */
2485 		for (lda_val = 0; lda_val < 3; lda_val++) {
2486 		  switch (lda_val) {
2487 		  case 0:
2488 		    lda = m_i;
2489 		    break;
2490 		  case 1:
2491 		    lda = m_i + 1;
2492 		    break;
2493 		  case 2:
2494 		  default:
2495 		    lda = 2 * m_i;
2496 		    break;
2497 		  }
2498 		  if ((order_type == blas_rowmajor && lda < n) ||
2499 		      (order_type == blas_colmajor && lda < m))
2500 		    continue;
2501 
2502 		  /* For the sake of speed, we throw out this case at random */
2503 		  if (xrand(seed) >= test_prob)
2504 		    continue;
2505 
2506 		  /* in the trivial cases, no need to run testgen */
2507 		  if (m > 0 && n > 0)
2508 		    BLAS_zgemv2_c_z_testgen(norm, order_type, trans_type, m,
2509 					    n, &alpha, alpha_flag, A, lda,
2510 					    head_x_gen, tail_x_gen, &beta,
2511 					    beta_flag, y_gen, seed,
2512 					    head_r_true, tail_r_true);
2513 
2514 		  count++;
2515 
2516 		  /* varying incx */
2517 		  for (incx_val = -2; incx_val <= 2; incx_val++) {
2518 		    if (incx_val == 0)
2519 		      continue;
2520 
2521 		    /* setting incx */
2522 		    incx = incx_val;
2523 		    incx *= 2;
2524 
2525 		    zcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
2526 		    zcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
2527 
2528 		    /* varying incy */
2529 		    for (incy_val = -2; incy_val <= 2; incy_val++) {
2530 		      if (incy_val == 0)
2531 			continue;
2532 
2533 		      /* setting incy */
2534 		      incy = incy_val;
2535 		      incy *= 2;
2536 
2537 		      zcopy_vector(y_gen, m_i, 1, y, incy_val);
2538 
2539 		      /* call BLAS_zgemv2_c_z */
2540 		      FPU_FIX_STOP;
2541 		      BLAS_zgemv2_c_z(order_type, trans_type, m, n, alpha, A,
2542 				      lda, head_x, tail_x, incx_val, beta, y,
2543 				      incy_val);
2544 		      FPU_FIX_START;
2545 
2546 		      /* set y starting index */
2547 		      iy = 0;
2548 		      if (incy < 0)
2549 			iy = -(m_i - 1) * incy;
2550 
2551 		      /* computing the ratio */
2552 		      if (m > 0 && n > 0)
2553 			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
2554 			  /* copy row j of A to temp */
2555 			  cge_copy_row(order_type, trans_type, m_i, n_i, A,
2556 				       lda, temp, j);
2557 
2558 			  test_BLAS_zdot2_c_z(n_i, blas_no_conj, alpha, beta,
2559 					      &y_gen[k], &y[iy],
2560 					      &head_r_true[k],
2561 					      &tail_r_true[k], temp, 1,
2562 					      head_x, tail_x, incx_val,
2563 					      eps_int, un_int, &ratios[j]);
2564 
2565 			  /* take the max ratio */
2566 			  if (j == 0) {
2567 			    ratio = ratios[0];
2568 			    /* The !<= below causes NaN error to be detected.
2569 			       Note that (NaN > thresh) is always false. */
2570 			  } else if (!(ratios[j] <= ratio)) {
2571 			    ratio = ratios[j];
2572 			  }
2573 			  iy += incy;
2574 			}
2575 
2576 		      /* Increase the number of bad ratio, if the ratio
2577 		         is bigger than the threshold.
2578 		         The !<= below causes NaN error to be detected.
2579 		         Note that (NaN > thresh) is always false. */
2580 		      if (!(ratio <= thresh)) {
2581 			bad_ratios++;
2582 
2583 			if ((debug == 3) &&	/* print only when debug is on */
2584 			    (count != old_count) &&	/* print if old vector is different
2585 							   from the current one */
2586 			    (d_count == find_max_ratio) &&
2587 			    (p_count <= max_print) &&
2588 			    (ratio > 0.5 * ratio_max)) {
2589 			  old_count = count;
2590 
2591 			  printf
2592 			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
2593 			     fname, m, n, ntests, thresh);
2594 
2595 			  /* Print test info */
2596 			  switch (prec) {
2597 			  case blas_prec_single:
2598 			    printf("single ");
2599 			    break;
2600 			  case blas_prec_double:
2601 			    printf("double ");
2602 			    break;
2603 			  case blas_prec_indigenous:
2604 			    printf("indigenous ");
2605 			    break;
2606 			  case blas_prec_extra:
2607 			    printf("extra ");
2608 			    break;
2609 			  }
2610 			  switch (norm) {
2611 			  case -1:
2612 			    printf("near_underflow ");
2613 			    break;
2614 			  case 0:
2615 			    printf("near_one ");
2616 			    break;
2617 			  case 1:
2618 			    printf("near_overflow ");
2619 			    break;
2620 			  }
2621 			  switch (order_type) {
2622 			  case blas_rowmajor:
2623 			    printf("row_major ");
2624 			    break;
2625 			  case blas_colmajor:
2626 			    printf("col_major ");
2627 			    break;
2628 			  }
2629 			  switch (trans_type) {
2630 			  case blas_no_trans:
2631 			    printf("no_trans ");
2632 			    break;
2633 			  case blas_trans:
2634 			    printf("trans ");
2635 			    break;
2636 			  case blas_conj_trans:
2637 			    printf("conj_trans ");
2638 			    break;
2639 			  }
2640 
2641 			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
2642 				 incy);
2643 
2644 			  cge_print_matrix(A, m_i, n_i, lda, order_type, "A");
2645 
2646 			  zprint_vector(head_x, n_i, incx_val, "head_x");
2647 			  zprint_vector(tail_x, n_i, incx_val, "tail_x");
2648 			  zprint_vector(y_gen, m_i, 1, "y_gen");
2649 			  zprint_vector(y, m_i, incy_val, "y_final");
2650 
2651 			  printf("      ");
2652 			  printf("alpha = ");
2653 			  printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
2654 			  printf("\n      ");
2655 			  printf("beta = ");
2656 			  printf("(%24.16e, %24.16e)", beta[0], beta[1]);
2657 			  printf("\n");
2658 			  for (j = 0, k = 0; j < m_i * incy_gen;
2659 			       j += incy_gen, k++) {
2660 			    printf("      ");
2661 			    printf("([%24.16e  %24.16e], [%24.16e %24.16e])",
2662 				   head_r_true[j], tail_r_true[j],
2663 				   head_r_true[j + 1], tail_r_true[j + 1]);
2664 			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
2665 			  }
2666 
2667 			  printf("      ratio=%.4e\n", ratio);
2668 			  p_count++;
2669 			}
2670 			if (bad_ratios >= MAX_BAD_TESTS) {
2671 			  printf("\ntoo many failures, exiting....");
2672 			  printf("\nTesting and compilation");
2673 			  printf(" are incomplete\n\n");
2674 			  goto end;
2675 			}
2676 			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
2677 			  printf("\nFlagrant ratio error, exiting...");
2678 			  printf("\nTesting and compilation");
2679 			  printf(" are incomplete\n\n");
2680 			  goto end;
2681 			}
2682 		      }
2683 		      if (d_count == 0) {
2684 			if (ratio > ratio_max)
2685 			  ratio_max = ratio;
2686 
2687 			if (ratio != 0.0 && ratio < ratio_min)
2688 			  ratio_min = ratio;
2689 
2690 			tot_tests++;
2691 		      }
2692 		    }		/* incy */
2693 		  }		/* incx */
2694 		}		/* lda */
2695 	      }			/* trans */
2696 	    }			/* order */
2697 	  }			/* tests */
2698 	}			/* norm */
2699 
2700       }				/* beta */
2701     }				/* alpha */
2702   }				/* debug */
2703 
2704   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
2705     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
2706 	   fname, m, n, ntests, thresh);
2707     printf
2708       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
2709        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
2710        ratio_min, ratio_max);
2711   }
2712 
2713 end:
2714   FPU_FIX_STOP;
2715 
2716   blas_free(head_x);
2717   blas_free(tail_x);
2718   blas_free(y);
2719   blas_free(head_x_gen);
2720   blas_free(tail_x_gen);
2721   blas_free(y_gen);
2722   blas_free(temp);
2723   blas_free(A);
2724   blas_free(head_r_true);
2725   blas_free(tail_r_true);
2726   blas_free(ratios);
2727 
2728   *min_ratio = ratio_min;
2729   *num_bad_ratio = bad_ratios;
2730   *num_tests = tot_tests;
2731   return ratio_max;
2732 }
do_test_zgemv2_c_c(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)2733 double do_test_zgemv2_c_c(int m, int n, int ntests, int *seed, double thresh,
2734 			  int debug, float test_prob, double *min_ratio,
2735 			  int *num_bad_ratio, int *num_tests)
2736 
2737 /*
2738  * Purpose
2739  * =======
2740  *
2741  * Runs a series of tests on GEMV2.
2742  *
2743  * Arguments
2744  * =========
2745  *
2746  * m         (input) int
2747  *           The number of rows
2748  *
2749  * n         (input) int
2750  *           The number of columns
2751  *
2752  * ntests    (input) int
2753  *           The number of tests to run for each set of attributes.
2754  *
2755  * seed      (input/output) int
2756  *           The seed for the random number generator used in testgen().
2757  *
2758  * thresh    (input) double
2759  *           When the ratio returned from test() exceeds the specified
2760  *           threshold, the current size, r_true, r_comp, and ratio will be
2761  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
2762  *           to ~10.)
2763  *
2764  * debug     (input) int
2765  *           If debug=3, print summary
2766  *           If debug=2, print summary only if the number of bad ratios > 0
2767  *           If debug=1, print complete info if tests fail
2768  *           If debug=0, return max ratio
2769  *
2770  * test_prob (input) float
2771  *           The specified test will be performed only if the generated
2772  *           random exceeds this threshold.
2773  *
2774  * min_ratio (output) double
2775  *           The minimum ratio
2776  *
2777  * num_bad_ratio (output) int
2778  *               The number of tests fail; they are above the threshold.
2779  *
2780  * num_tests (output) int
2781  *           The number of tests is being performed.
2782  *
2783  * Return value
2784  * ============
2785  *
2786  * The maximum ratio if run successfully, otherwise return -1
2787  *
2788  * Code structure
2789  * ==============
2790  *
2791  *  debug loop  -- if debug is one, the first loop computes the max ratio
2792  *              -- and the last(second) loop outputs debugging information,
2793  *              -- if the test fail and its ratio > 0.5 * max ratio.
2794  *              -- if debug is zero, the loop is executed once
2795  *    alpha loop  -- varying alpha: 0, 1, or random
2796  *      beta loop   -- varying beta: 0, 1, or random
2797 
2798  *          norm loop   -- varying norm: near undeflow, near one, or
2799  *                        -- near overflow
2800  *            numtest loop  -- how many times the test is perform with
2801  *                            -- above set of attributes
2802  *              order loop   -- varying order type: rowmajor or colmajor
2803  *                trans loop    -- varying uplo type: upper or lower
2804  *                  lda loop      -- varying lda: m, m+1, 2m
2805  *                    incx loop     -- varying incx: -2, -1, 1, 2
2806  *                      incy loop     -- varying incy: -2, -1, 1, 2
2807  */
2808 {
2809   /* function name */
2810   const char fname[] = "BLAS_zgemv2_c_c";
2811 
2812   /* max number of debug lines to print */
2813   const int max_print = 8;
2814 
2815   /* Variables in the "x_val" form are loop vars for corresponding
2816      variables */
2817   int i;			/* iterate through the repeating tests */
2818   int j, k;			/* multipurpose counters or variables */
2819   int iy;			/* use to index y */
2820   int incx_val, incy_val,	/* for testing different inc values */
2821     incx, incy;
2822   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
2823   int d_count;			/* counter for debug */
2824   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
2825   int p_count;			/* counter for the number of debug lines printed */
2826   int tot_tests;		/* total number of tests to be done */
2827   int norm;			/* input values of near underflow/one/overflow */
2828   double ratio_max;		/* the current maximum ratio */
2829   double ratio_min;		/* the current minimum ratio */
2830   double *ratios;		/* a temporary variable for calculating ratio */
2831   double ratio;			/* the per-use test ratio from test() */
2832   int bad_ratios;		/* the number of ratios over the threshold */
2833   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
2834   double un_int;		/* the internal underflow threshold */
2835   double alpha[2];
2836   double beta[2];
2837   float *A;
2838   float *head_x;
2839   float *tail_x;
2840   double *y;
2841   float *temp;			/* use for calculating ratio */
2842 
2843   /* x_gen and y_gen are used to store vectors generated by testgen.
2844      they eventually are copied back to x and y */
2845   float *head_x_gen;
2846   float *tail_x_gen;
2847   double *y_gen;
2848 
2849   /* the true r calculated by testgen(), in double-double */
2850   double *head_r_true, *tail_r_true;
2851 
2852   int alpha_val;
2853   int alpha_flag;		/* input flag for BLAS_zgemv2_c_c_testgen */
2854   int beta_val;
2855   int beta_flag;		/* input flag for BLAS_zgemv2_c_c_testgen */
2856   int order_val;
2857   enum blas_order_type order_type;
2858 
2859   enum blas_prec_type prec;
2860   int trans_val;
2861   enum blas_trans_type trans_type;
2862   int m_i;
2863   int n_i;
2864   int max_mn;			/* the max of m and n */
2865   int lda_val;
2866   int lda;
2867   int saved_seed;		/* for saving the original seed */
2868   int count, old_count;		/* use for counting the number of testgen calls * 2 */
2869 
2870   FPU_FIX_DECL;
2871 
2872   /* test for bad arguments */
2873   if (n < 0 || m < 0 || ntests < 0)
2874     BLAS_error(fname, 0, 0, NULL);
2875 
2876   /* initialization */
2877   *num_bad_ratio = 0;
2878   *num_tests = 0;
2879   *min_ratio = 0.0;
2880 
2881   saved_seed = *seed;
2882   ratio_min = 1e308;
2883   ratio_max = 0.0;
2884   ratio = 0.0;
2885   tot_tests = 0;
2886   p_count = 0;
2887   count = 0;
2888   find_max_ratio = 0;
2889   bad_ratios = 0;
2890   old_count = 0;
2891 
2892   if (debug == 3)
2893     find_max_ratio = 1;
2894   max_mn = MAX(m, n);
2895   if (m == 0 || n == 0) {
2896     return 0.0;
2897   }
2898 
2899   FPU_FIX_START;
2900 
2901   incy_gen = 1;
2902   incy_gen *= 2;
2903 
2904   /* get space for calculation */
2905   head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
2906   if (max_mn * 2 > 0 && head_x == NULL) {
2907     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2908   }
2909   tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
2910   if (max_mn * 2 > 0 && tail_x == NULL) {
2911     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2912   }
2913   y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
2914   if (max_mn * 2 > 0 && y == NULL) {
2915     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2916   }
2917   head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
2918   if (max_mn > 0 && head_x_gen == NULL) {
2919     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2920   }
2921   tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
2922   if (max_mn > 0 && tail_x_gen == NULL) {
2923     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2924   }
2925   y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
2926   if (max_mn > 0 && y_gen == NULL) {
2927     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2928   }
2929   temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
2930   if (max_mn > 0 && temp == NULL) {
2931     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2932   }
2933   head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
2934   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
2935   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
2936     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2937   }
2938   ratios = (double *) blas_malloc(max_mn * sizeof(double));
2939   if (max_mn > 0 && ratios == NULL) {
2940     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2941   }
2942   A =
2943     (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
2944 			  2);
2945   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
2946     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
2947   }
2948 
2949   /* The debug iteration:
2950      If debug=1, then will execute the iteration twice. First, compute the
2951      max ratio. Second, print info if ratio > (50% * ratio_max). */
2952   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
2953     bad_ratios = 0;		/* set to zero */
2954 
2955     if ((debug == 3) && (d_count == find_max_ratio))
2956       *seed = saved_seed;	/* restore the original seed */
2957 
2958     /* varying alpha */
2959     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
2960       alpha_flag = 0;
2961       switch (alpha_val) {
2962       case 0:
2963 	alpha[0] = alpha[1] = 0.0;
2964 	alpha_flag = 1;
2965 	break;
2966       case 1:
2967 	alpha[0] = 1.0;
2968 	alpha[1] = 0.0;
2969 	alpha_flag = 1;
2970 	break;
2971       }
2972 
2973       /* varying beta */
2974       for (beta_val = 0; beta_val < 3; beta_val++) {
2975 	beta_flag = 0;
2976 	switch (beta_val) {
2977 	case 0:
2978 	  beta[0] = beta[1] = 0.0;
2979 	  beta_flag = 1;
2980 	  break;
2981 	case 1:
2982 	  beta[0] = 1.0;
2983 	  beta[1] = 0.0;
2984 	  beta_flag = 1;
2985 	  break;
2986 	}
2987 
2988 
2989 	eps_int = power(2, -BITS_D);
2990 	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
2991 		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
2992 	prec = blas_prec_double;
2993 
2994 	/* values near underflow, 1, or overflow */
2995 	for (norm = -1; norm <= 1; norm++) {
2996 
2997 	  /* number of tests */
2998 	  for (i = 0; i < ntests; i++) {
2999 
3000 	    /* row or col major */
3001 	    for (order_val = 0; order_val < 2; order_val++) {
3002 	      switch (order_val) {
3003 	      case 0:
3004 		order_type = blas_rowmajor;
3005 		break;
3006 	      case 1:
3007 	      default:
3008 		order_type = blas_colmajor;
3009 		break;
3010 	      }
3011 
3012 	      /* no_trans, trans, or conj_trans */
3013 	      for (trans_val = 0; trans_val < 3; trans_val++) {
3014 		switch (trans_val) {
3015 		case 0:
3016 		  trans_type = blas_no_trans;
3017 		  m_i = m;
3018 		  n_i = n;
3019 		  break;
3020 		case 1:
3021 		  trans_type = blas_trans;
3022 		  m_i = n;
3023 		  n_i = m;
3024 		  break;
3025 		case 2:
3026 		default:
3027 		  trans_type = blas_conj_trans;
3028 		  m_i = n;
3029 		  n_i = m;
3030 		  break;
3031 		}
3032 
3033 		/* lda=n, n+1, or 2n */
3034 		for (lda_val = 0; lda_val < 3; lda_val++) {
3035 		  switch (lda_val) {
3036 		  case 0:
3037 		    lda = m_i;
3038 		    break;
3039 		  case 1:
3040 		    lda = m_i + 1;
3041 		    break;
3042 		  case 2:
3043 		  default:
3044 		    lda = 2 * m_i;
3045 		    break;
3046 		  }
3047 		  if ((order_type == blas_rowmajor && lda < n) ||
3048 		      (order_type == blas_colmajor && lda < m))
3049 		    continue;
3050 
3051 		  /* For the sake of speed, we throw out this case at random */
3052 		  if (xrand(seed) >= test_prob)
3053 		    continue;
3054 
3055 		  /* in the trivial cases, no need to run testgen */
3056 		  if (m > 0 && n > 0)
3057 		    BLAS_zgemv2_c_c_testgen(norm, order_type, trans_type, m,
3058 					    n, &alpha, alpha_flag, A, lda,
3059 					    head_x_gen, tail_x_gen, &beta,
3060 					    beta_flag, y_gen, seed,
3061 					    head_r_true, tail_r_true);
3062 
3063 		  count++;
3064 
3065 		  /* varying incx */
3066 		  for (incx_val = -2; incx_val <= 2; incx_val++) {
3067 		    if (incx_val == 0)
3068 		      continue;
3069 
3070 		    /* setting incx */
3071 		    incx = incx_val;
3072 		    incx *= 2;
3073 
3074 		    ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
3075 		    ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
3076 
3077 		    /* varying incy */
3078 		    for (incy_val = -2; incy_val <= 2; incy_val++) {
3079 		      if (incy_val == 0)
3080 			continue;
3081 
3082 		      /* setting incy */
3083 		      incy = incy_val;
3084 		      incy *= 2;
3085 
3086 		      zcopy_vector(y_gen, m_i, 1, y, incy_val);
3087 
3088 		      /* call BLAS_zgemv2_c_c */
3089 		      FPU_FIX_STOP;
3090 		      BLAS_zgemv2_c_c(order_type, trans_type, m, n, alpha, A,
3091 				      lda, head_x, tail_x, incx_val, beta, y,
3092 				      incy_val);
3093 		      FPU_FIX_START;
3094 
3095 		      /* set y starting index */
3096 		      iy = 0;
3097 		      if (incy < 0)
3098 			iy = -(m_i - 1) * incy;
3099 
3100 		      /* computing the ratio */
3101 		      if (m > 0 && n > 0)
3102 			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
3103 			  /* copy row j of A to temp */
3104 			  cge_copy_row(order_type, trans_type, m_i, n_i, A,
3105 				       lda, temp, j);
3106 
3107 			  test_BLAS_zdot2_c_c(n_i, blas_no_conj, alpha, beta,
3108 					      &y_gen[k], &y[iy],
3109 					      &head_r_true[k],
3110 					      &tail_r_true[k], temp, 1,
3111 					      head_x, tail_x, incx_val,
3112 					      eps_int, un_int, &ratios[j]);
3113 
3114 			  /* take the max ratio */
3115 			  if (j == 0) {
3116 			    ratio = ratios[0];
3117 			    /* The !<= below causes NaN error to be detected.
3118 			       Note that (NaN > thresh) is always false. */
3119 			  } else if (!(ratios[j] <= ratio)) {
3120 			    ratio = ratios[j];
3121 			  }
3122 			  iy += incy;
3123 			}
3124 
3125 		      /* Increase the number of bad ratio, if the ratio
3126 		         is bigger than the threshold.
3127 		         The !<= below causes NaN error to be detected.
3128 		         Note that (NaN > thresh) is always false. */
3129 		      if (!(ratio <= thresh)) {
3130 			bad_ratios++;
3131 
3132 			if ((debug == 3) &&	/* print only when debug is on */
3133 			    (count != old_count) &&	/* print if old vector is different
3134 							   from the current one */
3135 			    (d_count == find_max_ratio) &&
3136 			    (p_count <= max_print) &&
3137 			    (ratio > 0.5 * ratio_max)) {
3138 			  old_count = count;
3139 
3140 			  printf
3141 			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
3142 			     fname, m, n, ntests, thresh);
3143 
3144 			  /* Print test info */
3145 			  switch (prec) {
3146 			  case blas_prec_single:
3147 			    printf("single ");
3148 			    break;
3149 			  case blas_prec_double:
3150 			    printf("double ");
3151 			    break;
3152 			  case blas_prec_indigenous:
3153 			    printf("indigenous ");
3154 			    break;
3155 			  case blas_prec_extra:
3156 			    printf("extra ");
3157 			    break;
3158 			  }
3159 			  switch (norm) {
3160 			  case -1:
3161 			    printf("near_underflow ");
3162 			    break;
3163 			  case 0:
3164 			    printf("near_one ");
3165 			    break;
3166 			  case 1:
3167 			    printf("near_overflow ");
3168 			    break;
3169 			  }
3170 			  switch (order_type) {
3171 			  case blas_rowmajor:
3172 			    printf("row_major ");
3173 			    break;
3174 			  case blas_colmajor:
3175 			    printf("col_major ");
3176 			    break;
3177 			  }
3178 			  switch (trans_type) {
3179 			  case blas_no_trans:
3180 			    printf("no_trans ");
3181 			    break;
3182 			  case blas_trans:
3183 			    printf("trans ");
3184 			    break;
3185 			  case blas_conj_trans:
3186 			    printf("conj_trans ");
3187 			    break;
3188 			  }
3189 
3190 			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
3191 				 incy);
3192 
3193 			  cge_print_matrix(A, m_i, n_i, lda, order_type, "A");
3194 
3195 			  cprint_vector(head_x, n_i, incx_val, "head_x");
3196 			  cprint_vector(tail_x, n_i, incx_val, "tail_x");
3197 			  zprint_vector(y_gen, m_i, 1, "y_gen");
3198 			  zprint_vector(y, m_i, incy_val, "y_final");
3199 
3200 			  printf("      ");
3201 			  printf("alpha = ");
3202 			  printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
3203 			  printf("\n      ");
3204 			  printf("beta = ");
3205 			  printf("(%24.16e, %24.16e)", beta[0], beta[1]);
3206 			  printf("\n");
3207 			  for (j = 0, k = 0; j < m_i * incy_gen;
3208 			       j += incy_gen, k++) {
3209 			    printf("      ");
3210 			    printf("([%24.16e  %24.16e], [%24.16e %24.16e])",
3211 				   head_r_true[j], tail_r_true[j],
3212 				   head_r_true[j + 1], tail_r_true[j + 1]);
3213 			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
3214 			  }
3215 
3216 			  printf("      ratio=%.4e\n", ratio);
3217 			  p_count++;
3218 			}
3219 			if (bad_ratios >= MAX_BAD_TESTS) {
3220 			  printf("\ntoo many failures, exiting....");
3221 			  printf("\nTesting and compilation");
3222 			  printf(" are incomplete\n\n");
3223 			  goto end;
3224 			}
3225 			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
3226 			  printf("\nFlagrant ratio error, exiting...");
3227 			  printf("\nTesting and compilation");
3228 			  printf(" are incomplete\n\n");
3229 			  goto end;
3230 			}
3231 		      }
3232 		      if (d_count == 0) {
3233 			if (ratio > ratio_max)
3234 			  ratio_max = ratio;
3235 
3236 			if (ratio != 0.0 && ratio < ratio_min)
3237 			  ratio_min = ratio;
3238 
3239 			tot_tests++;
3240 		      }
3241 		    }		/* incy */
3242 		  }		/* incx */
3243 		}		/* lda */
3244 	      }			/* trans */
3245 	    }			/* order */
3246 	  }			/* tests */
3247 	}			/* norm */
3248 
3249       }				/* beta */
3250     }				/* alpha */
3251   }				/* debug */
3252 
3253   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
3254     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
3255 	   fname, m, n, ntests, thresh);
3256     printf
3257       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
3258        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
3259        ratio_min, ratio_max);
3260   }
3261 
3262 end:
3263   FPU_FIX_STOP;
3264 
3265   blas_free(head_x);
3266   blas_free(tail_x);
3267   blas_free(y);
3268   blas_free(head_x_gen);
3269   blas_free(tail_x_gen);
3270   blas_free(y_gen);
3271   blas_free(temp);
3272   blas_free(A);
3273   blas_free(head_r_true);
3274   blas_free(tail_r_true);
3275   blas_free(ratios);
3276 
3277   *min_ratio = ratio_min;
3278   *num_bad_ratio = bad_ratios;
3279   *num_tests = tot_tests;
3280   return ratio_max;
3281 }
do_test_cgemv2_c_s(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)3282 double do_test_cgemv2_c_s(int m, int n, int ntests, int *seed, double thresh,
3283 			  int debug, float test_prob, double *min_ratio,
3284 			  int *num_bad_ratio, int *num_tests)
3285 
3286 /*
3287  * Purpose
3288  * =======
3289  *
3290  * Runs a series of tests on GEMV2.
3291  *
3292  * Arguments
3293  * =========
3294  *
3295  * m         (input) int
3296  *           The number of rows
3297  *
3298  * n         (input) int
3299  *           The number of columns
3300  *
3301  * ntests    (input) int
3302  *           The number of tests to run for each set of attributes.
3303  *
3304  * seed      (input/output) int
3305  *           The seed for the random number generator used in testgen().
3306  *
3307  * thresh    (input) double
3308  *           When the ratio returned from test() exceeds the specified
3309  *           threshold, the current size, r_true, r_comp, and ratio will be
3310  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
3311  *           to ~10.)
3312  *
3313  * debug     (input) int
3314  *           If debug=3, print summary
3315  *           If debug=2, print summary only if the number of bad ratios > 0
3316  *           If debug=1, print complete info if tests fail
3317  *           If debug=0, return max ratio
3318  *
3319  * test_prob (input) float
3320  *           The specified test will be performed only if the generated
3321  *           random exceeds this threshold.
3322  *
3323  * min_ratio (output) double
3324  *           The minimum ratio
3325  *
3326  * num_bad_ratio (output) int
3327  *               The number of tests fail; they are above the threshold.
3328  *
3329  * num_tests (output) int
3330  *           The number of tests is being performed.
3331  *
3332  * Return value
3333  * ============
3334  *
3335  * The maximum ratio if run successfully, otherwise return -1
3336  *
3337  * Code structure
3338  * ==============
3339  *
3340  *  debug loop  -- if debug is one, the first loop computes the max ratio
3341  *              -- and the last(second) loop outputs debugging information,
3342  *              -- if the test fail and its ratio > 0.5 * max ratio.
3343  *              -- if debug is zero, the loop is executed once
3344  *    alpha loop  -- varying alpha: 0, 1, or random
3345  *      beta loop   -- varying beta: 0, 1, or random
3346 
3347  *          norm loop   -- varying norm: near undeflow, near one, or
3348  *                        -- near overflow
3349  *            numtest loop  -- how many times the test is perform with
3350  *                            -- above set of attributes
3351  *              order loop   -- varying order type: rowmajor or colmajor
3352  *                trans loop    -- varying uplo type: upper or lower
3353  *                  lda loop      -- varying lda: m, m+1, 2m
3354  *                    incx loop     -- varying incx: -2, -1, 1, 2
3355  *                      incy loop     -- varying incy: -2, -1, 1, 2
3356  */
3357 {
3358   /* function name */
3359   const char fname[] = "BLAS_cgemv2_c_s";
3360 
3361   /* max number of debug lines to print */
3362   const int max_print = 8;
3363 
3364   /* Variables in the "x_val" form are loop vars for corresponding
3365      variables */
3366   int i;			/* iterate through the repeating tests */
3367   int j, k;			/* multipurpose counters or variables */
3368   int iy;			/* use to index y */
3369   int incx_val, incy_val,	/* for testing different inc values */
3370     incx, incy;
3371   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
3372   int d_count;			/* counter for debug */
3373   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
3374   int p_count;			/* counter for the number of debug lines printed */
3375   int tot_tests;		/* total number of tests to be done */
3376   int norm;			/* input values of near underflow/one/overflow */
3377   double ratio_max;		/* the current maximum ratio */
3378   double ratio_min;		/* the current minimum ratio */
3379   double *ratios;		/* a temporary variable for calculating ratio */
3380   double ratio;			/* the per-use test ratio from test() */
3381   int bad_ratios;		/* the number of ratios over the threshold */
3382   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
3383   double un_int;		/* the internal underflow threshold */
3384   float alpha[2];
3385   float beta[2];
3386   float *A;
3387   float *head_x;
3388   float *tail_x;
3389   float *y;
3390   float *temp;			/* use for calculating ratio */
3391 
3392   /* x_gen and y_gen are used to store vectors generated by testgen.
3393      they eventually are copied back to x and y */
3394   float *head_x_gen;
3395   float *tail_x_gen;
3396   float *y_gen;
3397 
3398   /* the true r calculated by testgen(), in double-double */
3399   double *head_r_true, *tail_r_true;
3400 
3401   int alpha_val;
3402   int alpha_flag;		/* input flag for BLAS_cgemv2_c_s_testgen */
3403   int beta_val;
3404   int beta_flag;		/* input flag for BLAS_cgemv2_c_s_testgen */
3405   int order_val;
3406   enum blas_order_type order_type;
3407 
3408   enum blas_prec_type prec;
3409   int trans_val;
3410   enum blas_trans_type trans_type;
3411   int m_i;
3412   int n_i;
3413   int max_mn;			/* the max of m and n */
3414   int lda_val;
3415   int lda;
3416   int saved_seed;		/* for saving the original seed */
3417   int count, old_count;		/* use for counting the number of testgen calls * 2 */
3418 
3419   FPU_FIX_DECL;
3420 
3421   /* test for bad arguments */
3422   if (n < 0 || m < 0 || ntests < 0)
3423     BLAS_error(fname, 0, 0, NULL);
3424 
3425   /* initialization */
3426   *num_bad_ratio = 0;
3427   *num_tests = 0;
3428   *min_ratio = 0.0;
3429 
3430   saved_seed = *seed;
3431   ratio_min = 1e308;
3432   ratio_max = 0.0;
3433   ratio = 0.0;
3434   tot_tests = 0;
3435   p_count = 0;
3436   count = 0;
3437   find_max_ratio = 0;
3438   bad_ratios = 0;
3439   old_count = 0;
3440 
3441   if (debug == 3)
3442     find_max_ratio = 1;
3443   max_mn = MAX(m, n);
3444   if (m == 0 || n == 0) {
3445     return 0.0;
3446   }
3447 
3448   FPU_FIX_START;
3449 
3450   incy_gen = 1;
3451   incy_gen *= 2;
3452 
3453   /* get space for calculation */
3454   head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
3455   if (max_mn * 2 > 0 && head_x == NULL) {
3456     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3457   }
3458   tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
3459   if (max_mn * 2 > 0 && tail_x == NULL) {
3460     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3461   }
3462   y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
3463   if (max_mn * 2 > 0 && y == NULL) {
3464     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3465   }
3466   head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
3467   if (max_mn > 0 && head_x_gen == NULL) {
3468     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3469   }
3470   tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
3471   if (max_mn > 0 && tail_x_gen == NULL) {
3472     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3473   }
3474   y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
3475   if (max_mn > 0 && y_gen == NULL) {
3476     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3477   }
3478   temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
3479   if (max_mn > 0 && temp == NULL) {
3480     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3481   }
3482   head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
3483   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
3484   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
3485     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3486   }
3487   ratios = (double *) blas_malloc(max_mn * sizeof(double));
3488   if (max_mn > 0 && ratios == NULL) {
3489     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3490   }
3491   A =
3492     (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
3493 			  2);
3494   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
3495     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
3496   }
3497 
3498   /* The debug iteration:
3499      If debug=1, then will execute the iteration twice. First, compute the
3500      max ratio. Second, print info if ratio > (50% * ratio_max). */
3501   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
3502     bad_ratios = 0;		/* set to zero */
3503 
3504     if ((debug == 3) && (d_count == find_max_ratio))
3505       *seed = saved_seed;	/* restore the original seed */
3506 
3507     /* varying alpha */
3508     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
3509       alpha_flag = 0;
3510       switch (alpha_val) {
3511       case 0:
3512 	alpha[0] = alpha[1] = 0.0;
3513 	alpha_flag = 1;
3514 	break;
3515       case 1:
3516 	alpha[0] = 1.0;
3517 	alpha[1] = 0.0;
3518 	alpha_flag = 1;
3519 	break;
3520       }
3521 
3522       /* varying beta */
3523       for (beta_val = 0; beta_val < 3; beta_val++) {
3524 	beta_flag = 0;
3525 	switch (beta_val) {
3526 	case 0:
3527 	  beta[0] = beta[1] = 0.0;
3528 	  beta_flag = 1;
3529 	  break;
3530 	case 1:
3531 	  beta[0] = 1.0;
3532 	  beta[1] = 0.0;
3533 	  beta_flag = 1;
3534 	  break;
3535 	}
3536 
3537 
3538 	eps_int = power(2, -BITS_S);
3539 	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
3540 		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
3541 	prec = blas_prec_single;
3542 
3543 	/* values near underflow, 1, or overflow */
3544 	for (norm = -1; norm <= 1; norm++) {
3545 
3546 	  /* number of tests */
3547 	  for (i = 0; i < ntests; i++) {
3548 
3549 	    /* row or col major */
3550 	    for (order_val = 0; order_val < 2; order_val++) {
3551 	      switch (order_val) {
3552 	      case 0:
3553 		order_type = blas_rowmajor;
3554 		break;
3555 	      case 1:
3556 	      default:
3557 		order_type = blas_colmajor;
3558 		break;
3559 	      }
3560 
3561 	      /* no_trans, trans, or conj_trans */
3562 	      for (trans_val = 0; trans_val < 3; trans_val++) {
3563 		switch (trans_val) {
3564 		case 0:
3565 		  trans_type = blas_no_trans;
3566 		  m_i = m;
3567 		  n_i = n;
3568 		  break;
3569 		case 1:
3570 		  trans_type = blas_trans;
3571 		  m_i = n;
3572 		  n_i = m;
3573 		  break;
3574 		case 2:
3575 		default:
3576 		  trans_type = blas_conj_trans;
3577 		  m_i = n;
3578 		  n_i = m;
3579 		  break;
3580 		}
3581 
3582 		/* lda=n, n+1, or 2n */
3583 		for (lda_val = 0; lda_val < 3; lda_val++) {
3584 		  switch (lda_val) {
3585 		  case 0:
3586 		    lda = m_i;
3587 		    break;
3588 		  case 1:
3589 		    lda = m_i + 1;
3590 		    break;
3591 		  case 2:
3592 		  default:
3593 		    lda = 2 * m_i;
3594 		    break;
3595 		  }
3596 		  if ((order_type == blas_rowmajor && lda < n) ||
3597 		      (order_type == blas_colmajor && lda < m))
3598 		    continue;
3599 
3600 		  /* For the sake of speed, we throw out this case at random */
3601 		  if (xrand(seed) >= test_prob)
3602 		    continue;
3603 
3604 		  /* in the trivial cases, no need to run testgen */
3605 		  if (m > 0 && n > 0)
3606 		    BLAS_cgemv2_c_s_testgen(norm, order_type, trans_type, m,
3607 					    n, &alpha, alpha_flag, A, lda,
3608 					    head_x_gen, tail_x_gen, &beta,
3609 					    beta_flag, y_gen, seed,
3610 					    head_r_true, tail_r_true);
3611 
3612 		  count++;
3613 
3614 		  /* varying incx */
3615 		  for (incx_val = -2; incx_val <= 2; incx_val++) {
3616 		    if (incx_val == 0)
3617 		      continue;
3618 
3619 		    /* setting incx */
3620 		    incx = incx_val;
3621 
3622 
3623 		    scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
3624 		    scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
3625 
3626 		    /* varying incy */
3627 		    for (incy_val = -2; incy_val <= 2; incy_val++) {
3628 		      if (incy_val == 0)
3629 			continue;
3630 
3631 		      /* setting incy */
3632 		      incy = incy_val;
3633 		      incy *= 2;
3634 
3635 		      ccopy_vector(y_gen, m_i, 1, y, incy_val);
3636 
3637 		      /* call BLAS_cgemv2_c_s */
3638 		      FPU_FIX_STOP;
3639 		      BLAS_cgemv2_c_s(order_type, trans_type, m, n, alpha, A,
3640 				      lda, head_x, tail_x, incx_val, beta, y,
3641 				      incy_val);
3642 		      FPU_FIX_START;
3643 
3644 		      /* set y starting index */
3645 		      iy = 0;
3646 		      if (incy < 0)
3647 			iy = -(m_i - 1) * incy;
3648 
3649 		      /* computing the ratio */
3650 		      if (m > 0 && n > 0)
3651 			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
3652 			  /* copy row j of A to temp */
3653 			  cge_copy_row(order_type, trans_type, m_i, n_i, A,
3654 				       lda, temp, j);
3655 
3656 			  test_BLAS_cdot2_c_s(n_i, blas_no_conj, alpha, beta,
3657 					      &y_gen[k], &y[iy],
3658 					      &head_r_true[k],
3659 					      &tail_r_true[k], temp, 1,
3660 					      head_x, tail_x, incx_val,
3661 					      eps_int, un_int, &ratios[j]);
3662 
3663 			  /* take the max ratio */
3664 			  if (j == 0) {
3665 			    ratio = ratios[0];
3666 			    /* The !<= below causes NaN error to be detected.
3667 			       Note that (NaN > thresh) is always false. */
3668 			  } else if (!(ratios[j] <= ratio)) {
3669 			    ratio = ratios[j];
3670 			  }
3671 			  iy += incy;
3672 			}
3673 
3674 		      /* Increase the number of bad ratio, if the ratio
3675 		         is bigger than the threshold.
3676 		         The !<= below causes NaN error to be detected.
3677 		         Note that (NaN > thresh) is always false. */
3678 		      if (!(ratio <= thresh)) {
3679 			bad_ratios++;
3680 
3681 			if ((debug == 3) &&	/* print only when debug is on */
3682 			    (count != old_count) &&	/* print if old vector is different
3683 							   from the current one */
3684 			    (d_count == find_max_ratio) &&
3685 			    (p_count <= max_print) &&
3686 			    (ratio > 0.5 * ratio_max)) {
3687 			  old_count = count;
3688 
3689 			  printf
3690 			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
3691 			     fname, m, n, ntests, thresh);
3692 
3693 			  /* Print test info */
3694 			  switch (prec) {
3695 			  case blas_prec_single:
3696 			    printf("single ");
3697 			    break;
3698 			  case blas_prec_double:
3699 			    printf("double ");
3700 			    break;
3701 			  case blas_prec_indigenous:
3702 			    printf("indigenous ");
3703 			    break;
3704 			  case blas_prec_extra:
3705 			    printf("extra ");
3706 			    break;
3707 			  }
3708 			  switch (norm) {
3709 			  case -1:
3710 			    printf("near_underflow ");
3711 			    break;
3712 			  case 0:
3713 			    printf("near_one ");
3714 			    break;
3715 			  case 1:
3716 			    printf("near_overflow ");
3717 			    break;
3718 			  }
3719 			  switch (order_type) {
3720 			  case blas_rowmajor:
3721 			    printf("row_major ");
3722 			    break;
3723 			  case blas_colmajor:
3724 			    printf("col_major ");
3725 			    break;
3726 			  }
3727 			  switch (trans_type) {
3728 			  case blas_no_trans:
3729 			    printf("no_trans ");
3730 			    break;
3731 			  case blas_trans:
3732 			    printf("trans ");
3733 			    break;
3734 			  case blas_conj_trans:
3735 			    printf("conj_trans ");
3736 			    break;
3737 			  }
3738 
3739 			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
3740 				 incy);
3741 
3742 			  cge_print_matrix(A, m_i, n_i, lda, order_type, "A");
3743 
3744 			  sprint_vector(head_x, n_i, incx_val, "head_x");
3745 			  sprint_vector(tail_x, n_i, incx_val, "tail_x");
3746 			  cprint_vector(y_gen, m_i, 1, "y_gen");
3747 			  cprint_vector(y, m_i, incy_val, "y_final");
3748 
3749 			  printf("      ");
3750 			  printf("alpha = ");
3751 			  printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
3752 			  printf("\n      ");
3753 			  printf("beta = ");
3754 			  printf("(%16.8e, %16.8e)", beta[0], beta[1]);
3755 			  printf("\n");
3756 			  for (j = 0, k = 0; j < m_i * incy_gen;
3757 			       j += incy_gen, k++) {
3758 			    printf("      ");
3759 			    printf("([%24.16e  %24.16e], [%24.16e %24.16e])",
3760 				   head_r_true[j], tail_r_true[j],
3761 				   head_r_true[j + 1], tail_r_true[j + 1]);
3762 			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
3763 			  }
3764 
3765 			  printf("      ratio=%.4e\n", ratio);
3766 			  p_count++;
3767 			}
3768 			if (bad_ratios >= MAX_BAD_TESTS) {
3769 			  printf("\ntoo many failures, exiting....");
3770 			  printf("\nTesting and compilation");
3771 			  printf(" are incomplete\n\n");
3772 			  goto end;
3773 			}
3774 			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
3775 			  printf("\nFlagrant ratio error, exiting...");
3776 			  printf("\nTesting and compilation");
3777 			  printf(" are incomplete\n\n");
3778 			  goto end;
3779 			}
3780 		      }
3781 		      if (d_count == 0) {
3782 			if (ratio > ratio_max)
3783 			  ratio_max = ratio;
3784 
3785 			if (ratio != 0.0 && ratio < ratio_min)
3786 			  ratio_min = ratio;
3787 
3788 			tot_tests++;
3789 		      }
3790 		    }		/* incy */
3791 		  }		/* incx */
3792 		}		/* lda */
3793 	      }			/* trans */
3794 	    }			/* order */
3795 	  }			/* tests */
3796 	}			/* norm */
3797 
3798       }				/* beta */
3799     }				/* alpha */
3800   }				/* debug */
3801 
3802   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
3803     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
3804 	   fname, m, n, ntests, thresh);
3805     printf
3806       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
3807        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
3808        ratio_min, ratio_max);
3809   }
3810 
3811 end:
3812   FPU_FIX_STOP;
3813 
3814   blas_free(head_x);
3815   blas_free(tail_x);
3816   blas_free(y);
3817   blas_free(head_x_gen);
3818   blas_free(tail_x_gen);
3819   blas_free(y_gen);
3820   blas_free(temp);
3821   blas_free(A);
3822   blas_free(head_r_true);
3823   blas_free(tail_r_true);
3824   blas_free(ratios);
3825 
3826   *min_ratio = ratio_min;
3827   *num_bad_ratio = bad_ratios;
3828   *num_tests = tot_tests;
3829   return ratio_max;
3830 }
do_test_cgemv2_s_c(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)3831 double do_test_cgemv2_s_c(int m, int n, int ntests, int *seed, double thresh,
3832 			  int debug, float test_prob, double *min_ratio,
3833 			  int *num_bad_ratio, int *num_tests)
3834 
3835 /*
3836  * Purpose
3837  * =======
3838  *
3839  * Runs a series of tests on GEMV2.
3840  *
3841  * Arguments
3842  * =========
3843  *
3844  * m         (input) int
3845  *           The number of rows
3846  *
3847  * n         (input) int
3848  *           The number of columns
3849  *
3850  * ntests    (input) int
3851  *           The number of tests to run for each set of attributes.
3852  *
3853  * seed      (input/output) int
3854  *           The seed for the random number generator used in testgen().
3855  *
3856  * thresh    (input) double
3857  *           When the ratio returned from test() exceeds the specified
3858  *           threshold, the current size, r_true, r_comp, and ratio will be
3859  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
3860  *           to ~10.)
3861  *
3862  * debug     (input) int
3863  *           If debug=3, print summary
3864  *           If debug=2, print summary only if the number of bad ratios > 0
3865  *           If debug=1, print complete info if tests fail
3866  *           If debug=0, return max ratio
3867  *
3868  * test_prob (input) float
3869  *           The specified test will be performed only if the generated
3870  *           random exceeds this threshold.
3871  *
3872  * min_ratio (output) double
3873  *           The minimum ratio
3874  *
3875  * num_bad_ratio (output) int
3876  *               The number of tests fail; they are above the threshold.
3877  *
3878  * num_tests (output) int
3879  *           The number of tests is being performed.
3880  *
3881  * Return value
3882  * ============
3883  *
3884  * The maximum ratio if run successfully, otherwise return -1
3885  *
3886  * Code structure
3887  * ==============
3888  *
3889  *  debug loop  -- if debug is one, the first loop computes the max ratio
3890  *              -- and the last(second) loop outputs debugging information,
3891  *              -- if the test fail and its ratio > 0.5 * max ratio.
3892  *              -- if debug is zero, the loop is executed once
3893  *    alpha loop  -- varying alpha: 0, 1, or random
3894  *      beta loop   -- varying beta: 0, 1, or random
3895 
3896  *          norm loop   -- varying norm: near undeflow, near one, or
3897  *                        -- near overflow
3898  *            numtest loop  -- how many times the test is perform with
3899  *                            -- above set of attributes
3900  *              order loop   -- varying order type: rowmajor or colmajor
3901  *                trans loop    -- varying uplo type: upper or lower
3902  *                  lda loop      -- varying lda: m, m+1, 2m
3903  *                    incx loop     -- varying incx: -2, -1, 1, 2
3904  *                      incy loop     -- varying incy: -2, -1, 1, 2
3905  */
3906 {
3907   /* function name */
3908   const char fname[] = "BLAS_cgemv2_s_c";
3909 
3910   /* max number of debug lines to print */
3911   const int max_print = 8;
3912 
3913   /* Variables in the "x_val" form are loop vars for corresponding
3914      variables */
3915   int i;			/* iterate through the repeating tests */
3916   int j, k;			/* multipurpose counters or variables */
3917   int iy;			/* use to index y */
3918   int incx_val, incy_val,	/* for testing different inc values */
3919     incx, incy;
3920   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
3921   int d_count;			/* counter for debug */
3922   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
3923   int p_count;			/* counter for the number of debug lines printed */
3924   int tot_tests;		/* total number of tests to be done */
3925   int norm;			/* input values of near underflow/one/overflow */
3926   double ratio_max;		/* the current maximum ratio */
3927   double ratio_min;		/* the current minimum ratio */
3928   double *ratios;		/* a temporary variable for calculating ratio */
3929   double ratio;			/* the per-use test ratio from test() */
3930   int bad_ratios;		/* the number of ratios over the threshold */
3931   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
3932   double un_int;		/* the internal underflow threshold */
3933   float alpha[2];
3934   float beta[2];
3935   float *A;
3936   float *head_x;
3937   float *tail_x;
3938   float *y;
3939   float *temp;			/* use for calculating ratio */
3940 
3941   /* x_gen and y_gen are used to store vectors generated by testgen.
3942      they eventually are copied back to x and y */
3943   float *head_x_gen;
3944   float *tail_x_gen;
3945   float *y_gen;
3946 
3947   /* the true r calculated by testgen(), in double-double */
3948   double *head_r_true, *tail_r_true;
3949 
3950   int alpha_val;
3951   int alpha_flag;		/* input flag for BLAS_cgemv2_s_c_testgen */
3952   int beta_val;
3953   int beta_flag;		/* input flag for BLAS_cgemv2_s_c_testgen */
3954   int order_val;
3955   enum blas_order_type order_type;
3956 
3957   enum blas_prec_type prec;
3958   int trans_val;
3959   enum blas_trans_type trans_type;
3960   int m_i;
3961   int n_i;
3962   int max_mn;			/* the max of m and n */
3963   int lda_val;
3964   int lda;
3965   int saved_seed;		/* for saving the original seed */
3966   int count, old_count;		/* use for counting the number of testgen calls * 2 */
3967 
3968   FPU_FIX_DECL;
3969 
3970   /* test for bad arguments */
3971   if (n < 0 || m < 0 || ntests < 0)
3972     BLAS_error(fname, 0, 0, NULL);
3973 
3974   /* initialization */
3975   *num_bad_ratio = 0;
3976   *num_tests = 0;
3977   *min_ratio = 0.0;
3978 
3979   saved_seed = *seed;
3980   ratio_min = 1e308;
3981   ratio_max = 0.0;
3982   ratio = 0.0;
3983   tot_tests = 0;
3984   p_count = 0;
3985   count = 0;
3986   find_max_ratio = 0;
3987   bad_ratios = 0;
3988   old_count = 0;
3989 
3990   if (debug == 3)
3991     find_max_ratio = 1;
3992   max_mn = MAX(m, n);
3993   if (m == 0 || n == 0) {
3994     return 0.0;
3995   }
3996 
3997   FPU_FIX_START;
3998 
3999   incy_gen = 1;
4000   incy_gen *= 2;
4001 
4002   /* get space for calculation */
4003   head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
4004   if (max_mn * 2 > 0 && head_x == NULL) {
4005     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4006   }
4007   tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
4008   if (max_mn * 2 > 0 && tail_x == NULL) {
4009     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4010   }
4011   y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
4012   if (max_mn * 2 > 0 && y == NULL) {
4013     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4014   }
4015   head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
4016   if (max_mn > 0 && head_x_gen == NULL) {
4017     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4018   }
4019   tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
4020   if (max_mn > 0 && tail_x_gen == NULL) {
4021     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4022   }
4023   y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
4024   if (max_mn > 0 && y_gen == NULL) {
4025     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4026   }
4027   temp = (float *) blas_malloc(max_mn * sizeof(float));
4028   if (max_mn > 0 && temp == NULL) {
4029     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4030   }
4031   head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
4032   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
4033   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
4034     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4035   }
4036   ratios = (double *) blas_malloc(max_mn * sizeof(double));
4037   if (max_mn > 0 && ratios == NULL) {
4038     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4039   }
4040   A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
4041   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
4042     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4043   }
4044 
4045   /* The debug iteration:
4046      If debug=1, then will execute the iteration twice. First, compute the
4047      max ratio. Second, print info if ratio > (50% * ratio_max). */
4048   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
4049     bad_ratios = 0;		/* set to zero */
4050 
4051     if ((debug == 3) && (d_count == find_max_ratio))
4052       *seed = saved_seed;	/* restore the original seed */
4053 
4054     /* varying alpha */
4055     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
4056       alpha_flag = 0;
4057       switch (alpha_val) {
4058       case 0:
4059 	alpha[0] = alpha[1] = 0.0;
4060 	alpha_flag = 1;
4061 	break;
4062       case 1:
4063 	alpha[0] = 1.0;
4064 	alpha[1] = 0.0;
4065 	alpha_flag = 1;
4066 	break;
4067       }
4068 
4069       /* varying beta */
4070       for (beta_val = 0; beta_val < 3; beta_val++) {
4071 	beta_flag = 0;
4072 	switch (beta_val) {
4073 	case 0:
4074 	  beta[0] = beta[1] = 0.0;
4075 	  beta_flag = 1;
4076 	  break;
4077 	case 1:
4078 	  beta[0] = 1.0;
4079 	  beta[1] = 0.0;
4080 	  beta_flag = 1;
4081 	  break;
4082 	}
4083 
4084 
4085 	eps_int = power(2, -BITS_S);
4086 	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
4087 		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
4088 	prec = blas_prec_single;
4089 
4090 	/* values near underflow, 1, or overflow */
4091 	for (norm = -1; norm <= 1; norm++) {
4092 
4093 	  /* number of tests */
4094 	  for (i = 0; i < ntests; i++) {
4095 
4096 	    /* row or col major */
4097 	    for (order_val = 0; order_val < 2; order_val++) {
4098 	      switch (order_val) {
4099 	      case 0:
4100 		order_type = blas_rowmajor;
4101 		break;
4102 	      case 1:
4103 	      default:
4104 		order_type = blas_colmajor;
4105 		break;
4106 	      }
4107 
4108 	      /* no_trans, trans, or conj_trans */
4109 	      for (trans_val = 0; trans_val < 3; trans_val++) {
4110 		switch (trans_val) {
4111 		case 0:
4112 		  trans_type = blas_no_trans;
4113 		  m_i = m;
4114 		  n_i = n;
4115 		  break;
4116 		case 1:
4117 		  trans_type = blas_trans;
4118 		  m_i = n;
4119 		  n_i = m;
4120 		  break;
4121 		case 2:
4122 		default:
4123 		  trans_type = blas_conj_trans;
4124 		  m_i = n;
4125 		  n_i = m;
4126 		  break;
4127 		}
4128 
4129 		/* lda=n, n+1, or 2n */
4130 		for (lda_val = 0; lda_val < 3; lda_val++) {
4131 		  switch (lda_val) {
4132 		  case 0:
4133 		    lda = m_i;
4134 		    break;
4135 		  case 1:
4136 		    lda = m_i + 1;
4137 		    break;
4138 		  case 2:
4139 		  default:
4140 		    lda = 2 * m_i;
4141 		    break;
4142 		  }
4143 		  if ((order_type == blas_rowmajor && lda < n) ||
4144 		      (order_type == blas_colmajor && lda < m))
4145 		    continue;
4146 
4147 		  /* For the sake of speed, we throw out this case at random */
4148 		  if (xrand(seed) >= test_prob)
4149 		    continue;
4150 
4151 		  /* in the trivial cases, no need to run testgen */
4152 		  if (m > 0 && n > 0)
4153 		    BLAS_cgemv2_s_c_testgen(norm, order_type, trans_type, m,
4154 					    n, &alpha, alpha_flag, A, lda,
4155 					    head_x_gen, tail_x_gen, &beta,
4156 					    beta_flag, y_gen, seed,
4157 					    head_r_true, tail_r_true);
4158 
4159 		  count++;
4160 
4161 		  /* varying incx */
4162 		  for (incx_val = -2; incx_val <= 2; incx_val++) {
4163 		    if (incx_val == 0)
4164 		      continue;
4165 
4166 		    /* setting incx */
4167 		    incx = incx_val;
4168 		    incx *= 2;
4169 
4170 		    ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
4171 		    ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
4172 
4173 		    /* varying incy */
4174 		    for (incy_val = -2; incy_val <= 2; incy_val++) {
4175 		      if (incy_val == 0)
4176 			continue;
4177 
4178 		      /* setting incy */
4179 		      incy = incy_val;
4180 		      incy *= 2;
4181 
4182 		      ccopy_vector(y_gen, m_i, 1, y, incy_val);
4183 
4184 		      /* call BLAS_cgemv2_s_c */
4185 		      FPU_FIX_STOP;
4186 		      BLAS_cgemv2_s_c(order_type, trans_type, m, n, alpha, A,
4187 				      lda, head_x, tail_x, incx_val, beta, y,
4188 				      incy_val);
4189 		      FPU_FIX_START;
4190 
4191 		      /* set y starting index */
4192 		      iy = 0;
4193 		      if (incy < 0)
4194 			iy = -(m_i - 1) * incy;
4195 
4196 		      /* computing the ratio */
4197 		      if (m > 0 && n > 0)
4198 			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
4199 			  /* copy row j of A to temp */
4200 			  sge_copy_row(order_type, trans_type, m_i, n_i, A,
4201 				       lda, temp, j);
4202 
4203 			  test_BLAS_cdot2_s_c(n_i, blas_no_conj, alpha, beta,
4204 					      &y_gen[k], &y[iy],
4205 					      &head_r_true[k],
4206 					      &tail_r_true[k], temp, 1,
4207 					      head_x, tail_x, incx_val,
4208 					      eps_int, un_int, &ratios[j]);
4209 
4210 			  /* take the max ratio */
4211 			  if (j == 0) {
4212 			    ratio = ratios[0];
4213 			    /* The !<= below causes NaN error to be detected.
4214 			       Note that (NaN > thresh) is always false. */
4215 			  } else if (!(ratios[j] <= ratio)) {
4216 			    ratio = ratios[j];
4217 			  }
4218 			  iy += incy;
4219 			}
4220 
4221 		      /* Increase the number of bad ratio, if the ratio
4222 		         is bigger than the threshold.
4223 		         The !<= below causes NaN error to be detected.
4224 		         Note that (NaN > thresh) is always false. */
4225 		      if (!(ratio <= thresh)) {
4226 			bad_ratios++;
4227 
4228 			if ((debug == 3) &&	/* print only when debug is on */
4229 			    (count != old_count) &&	/* print if old vector is different
4230 							   from the current one */
4231 			    (d_count == find_max_ratio) &&
4232 			    (p_count <= max_print) &&
4233 			    (ratio > 0.5 * ratio_max)) {
4234 			  old_count = count;
4235 
4236 			  printf
4237 			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
4238 			     fname, m, n, ntests, thresh);
4239 
4240 			  /* Print test info */
4241 			  switch (prec) {
4242 			  case blas_prec_single:
4243 			    printf("single ");
4244 			    break;
4245 			  case blas_prec_double:
4246 			    printf("double ");
4247 			    break;
4248 			  case blas_prec_indigenous:
4249 			    printf("indigenous ");
4250 			    break;
4251 			  case blas_prec_extra:
4252 			    printf("extra ");
4253 			    break;
4254 			  }
4255 			  switch (norm) {
4256 			  case -1:
4257 			    printf("near_underflow ");
4258 			    break;
4259 			  case 0:
4260 			    printf("near_one ");
4261 			    break;
4262 			  case 1:
4263 			    printf("near_overflow ");
4264 			    break;
4265 			  }
4266 			  switch (order_type) {
4267 			  case blas_rowmajor:
4268 			    printf("row_major ");
4269 			    break;
4270 			  case blas_colmajor:
4271 			    printf("col_major ");
4272 			    break;
4273 			  }
4274 			  switch (trans_type) {
4275 			  case blas_no_trans:
4276 			    printf("no_trans ");
4277 			    break;
4278 			  case blas_trans:
4279 			    printf("trans ");
4280 			    break;
4281 			  case blas_conj_trans:
4282 			    printf("conj_trans ");
4283 			    break;
4284 			  }
4285 
4286 			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
4287 				 incy);
4288 
4289 			  sge_print_matrix(A, m_i, n_i, lda, order_type, "A");
4290 
4291 			  cprint_vector(head_x, n_i, incx_val, "head_x");
4292 			  cprint_vector(tail_x, n_i, incx_val, "tail_x");
4293 			  cprint_vector(y_gen, m_i, 1, "y_gen");
4294 			  cprint_vector(y, m_i, incy_val, "y_final");
4295 
4296 			  printf("      ");
4297 			  printf("alpha = ");
4298 			  printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
4299 			  printf("\n      ");
4300 			  printf("beta = ");
4301 			  printf("(%16.8e, %16.8e)", beta[0], beta[1]);
4302 			  printf("\n");
4303 			  for (j = 0, k = 0; j < m_i * incy_gen;
4304 			       j += incy_gen, k++) {
4305 			    printf("      ");
4306 			    printf("([%24.16e  %24.16e], [%24.16e %24.16e])",
4307 				   head_r_true[j], tail_r_true[j],
4308 				   head_r_true[j + 1], tail_r_true[j + 1]);
4309 			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
4310 			  }
4311 
4312 			  printf("      ratio=%.4e\n", ratio);
4313 			  p_count++;
4314 			}
4315 			if (bad_ratios >= MAX_BAD_TESTS) {
4316 			  printf("\ntoo many failures, exiting....");
4317 			  printf("\nTesting and compilation");
4318 			  printf(" are incomplete\n\n");
4319 			  goto end;
4320 			}
4321 			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
4322 			  printf("\nFlagrant ratio error, exiting...");
4323 			  printf("\nTesting and compilation");
4324 			  printf(" are incomplete\n\n");
4325 			  goto end;
4326 			}
4327 		      }
4328 		      if (d_count == 0) {
4329 			if (ratio > ratio_max)
4330 			  ratio_max = ratio;
4331 
4332 			if (ratio != 0.0 && ratio < ratio_min)
4333 			  ratio_min = ratio;
4334 
4335 			tot_tests++;
4336 		      }
4337 		    }		/* incy */
4338 		  }		/* incx */
4339 		}		/* lda */
4340 	      }			/* trans */
4341 	    }			/* order */
4342 	  }			/* tests */
4343 	}			/* norm */
4344 
4345       }				/* beta */
4346     }				/* alpha */
4347   }				/* debug */
4348 
4349   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
4350     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
4351 	   fname, m, n, ntests, thresh);
4352     printf
4353       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
4354        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
4355        ratio_min, ratio_max);
4356   }
4357 
4358 end:
4359   FPU_FIX_STOP;
4360 
4361   blas_free(head_x);
4362   blas_free(tail_x);
4363   blas_free(y);
4364   blas_free(head_x_gen);
4365   blas_free(tail_x_gen);
4366   blas_free(y_gen);
4367   blas_free(temp);
4368   blas_free(A);
4369   blas_free(head_r_true);
4370   blas_free(tail_r_true);
4371   blas_free(ratios);
4372 
4373   *min_ratio = ratio_min;
4374   *num_bad_ratio = bad_ratios;
4375   *num_tests = tot_tests;
4376   return ratio_max;
4377 }
do_test_cgemv2_s_s(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)4378 double do_test_cgemv2_s_s(int m, int n, int ntests, int *seed, double thresh,
4379 			  int debug, float test_prob, double *min_ratio,
4380 			  int *num_bad_ratio, int *num_tests)
4381 
4382 /*
4383  * Purpose
4384  * =======
4385  *
4386  * Runs a series of tests on GEMV2.
4387  *
4388  * Arguments
4389  * =========
4390  *
4391  * m         (input) int
4392  *           The number of rows
4393  *
4394  * n         (input) int
4395  *           The number of columns
4396  *
4397  * ntests    (input) int
4398  *           The number of tests to run for each set of attributes.
4399  *
4400  * seed      (input/output) int
4401  *           The seed for the random number generator used in testgen().
4402  *
4403  * thresh    (input) double
4404  *           When the ratio returned from test() exceeds the specified
4405  *           threshold, the current size, r_true, r_comp, and ratio will be
4406  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
4407  *           to ~10.)
4408  *
4409  * debug     (input) int
4410  *           If debug=3, print summary
4411  *           If debug=2, print summary only if the number of bad ratios > 0
4412  *           If debug=1, print complete info if tests fail
4413  *           If debug=0, return max ratio
4414  *
4415  * test_prob (input) float
4416  *           The specified test will be performed only if the generated
4417  *           random exceeds this threshold.
4418  *
4419  * min_ratio (output) double
4420  *           The minimum ratio
4421  *
4422  * num_bad_ratio (output) int
4423  *               The number of tests fail; they are above the threshold.
4424  *
4425  * num_tests (output) int
4426  *           The number of tests is being performed.
4427  *
4428  * Return value
4429  * ============
4430  *
4431  * The maximum ratio if run successfully, otherwise return -1
4432  *
4433  * Code structure
4434  * ==============
4435  *
4436  *  debug loop  -- if debug is one, the first loop computes the max ratio
4437  *              -- and the last(second) loop outputs debugging information,
4438  *              -- if the test fail and its ratio > 0.5 * max ratio.
4439  *              -- if debug is zero, the loop is executed once
4440  *    alpha loop  -- varying alpha: 0, 1, or random
4441  *      beta loop   -- varying beta: 0, 1, or random
4442 
4443  *          norm loop   -- varying norm: near undeflow, near one, or
4444  *                        -- near overflow
4445  *            numtest loop  -- how many times the test is perform with
4446  *                            -- above set of attributes
4447  *              order loop   -- varying order type: rowmajor or colmajor
4448  *                trans loop    -- varying uplo type: upper or lower
4449  *                  lda loop      -- varying lda: m, m+1, 2m
4450  *                    incx loop     -- varying incx: -2, -1, 1, 2
4451  *                      incy loop     -- varying incy: -2, -1, 1, 2
4452  */
4453 {
4454   /* function name */
4455   const char fname[] = "BLAS_cgemv2_s_s";
4456 
4457   /* max number of debug lines to print */
4458   const int max_print = 8;
4459 
4460   /* Variables in the "x_val" form are loop vars for corresponding
4461      variables */
4462   int i;			/* iterate through the repeating tests */
4463   int j, k;			/* multipurpose counters or variables */
4464   int iy;			/* use to index y */
4465   int incx_val, incy_val,	/* for testing different inc values */
4466     incx, incy;
4467   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
4468   int d_count;			/* counter for debug */
4469   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
4470   int p_count;			/* counter for the number of debug lines printed */
4471   int tot_tests;		/* total number of tests to be done */
4472   int norm;			/* input values of near underflow/one/overflow */
4473   double ratio_max;		/* the current maximum ratio */
4474   double ratio_min;		/* the current minimum ratio */
4475   double *ratios;		/* a temporary variable for calculating ratio */
4476   double ratio;			/* the per-use test ratio from test() */
4477   int bad_ratios;		/* the number of ratios over the threshold */
4478   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
4479   double un_int;		/* the internal underflow threshold */
4480   float alpha[2];
4481   float beta[2];
4482   float *A;
4483   float *head_x;
4484   float *tail_x;
4485   float *y;
4486   float *temp;			/* use for calculating ratio */
4487 
4488   /* x_gen and y_gen are used to store vectors generated by testgen.
4489      they eventually are copied back to x and y */
4490   float *head_x_gen;
4491   float *tail_x_gen;
4492   float *y_gen;
4493 
4494   /* the true r calculated by testgen(), in double-double */
4495   double *head_r_true, *tail_r_true;
4496 
4497   int alpha_val;
4498   int alpha_flag;		/* input flag for BLAS_cgemv2_s_s_testgen */
4499   int beta_val;
4500   int beta_flag;		/* input flag for BLAS_cgemv2_s_s_testgen */
4501   int order_val;
4502   enum blas_order_type order_type;
4503 
4504   enum blas_prec_type prec;
4505   int trans_val;
4506   enum blas_trans_type trans_type;
4507   int m_i;
4508   int n_i;
4509   int max_mn;			/* the max of m and n */
4510   int lda_val;
4511   int lda;
4512   int saved_seed;		/* for saving the original seed */
4513   int count, old_count;		/* use for counting the number of testgen calls * 2 */
4514 
4515   FPU_FIX_DECL;
4516 
4517   /* test for bad arguments */
4518   if (n < 0 || m < 0 || ntests < 0)
4519     BLAS_error(fname, 0, 0, NULL);
4520 
4521   /* initialization */
4522   *num_bad_ratio = 0;
4523   *num_tests = 0;
4524   *min_ratio = 0.0;
4525 
4526   saved_seed = *seed;
4527   ratio_min = 1e308;
4528   ratio_max = 0.0;
4529   ratio = 0.0;
4530   tot_tests = 0;
4531   p_count = 0;
4532   count = 0;
4533   find_max_ratio = 0;
4534   bad_ratios = 0;
4535   old_count = 0;
4536 
4537   if (debug == 3)
4538     find_max_ratio = 1;
4539   max_mn = MAX(m, n);
4540   if (m == 0 || n == 0) {
4541     return 0.0;
4542   }
4543 
4544   FPU_FIX_START;
4545 
4546   incy_gen = 1;
4547   incy_gen *= 2;
4548 
4549   /* get space for calculation */
4550   head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
4551   if (max_mn * 2 > 0 && head_x == NULL) {
4552     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4553   }
4554   tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
4555   if (max_mn * 2 > 0 && tail_x == NULL) {
4556     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4557   }
4558   y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
4559   if (max_mn * 2 > 0 && y == NULL) {
4560     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4561   }
4562   head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
4563   if (max_mn > 0 && head_x_gen == NULL) {
4564     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4565   }
4566   tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
4567   if (max_mn > 0 && tail_x_gen == NULL) {
4568     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4569   }
4570   y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
4571   if (max_mn > 0 && y_gen == NULL) {
4572     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4573   }
4574   temp = (float *) blas_malloc(max_mn * sizeof(float));
4575   if (max_mn > 0 && temp == NULL) {
4576     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4577   }
4578   head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
4579   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
4580   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
4581     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4582   }
4583   ratios = (double *) blas_malloc(max_mn * sizeof(double));
4584   if (max_mn > 0 && ratios == NULL) {
4585     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4586   }
4587   A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
4588   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
4589     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
4590   }
4591 
4592   /* The debug iteration:
4593      If debug=1, then will execute the iteration twice. First, compute the
4594      max ratio. Second, print info if ratio > (50% * ratio_max). */
4595   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
4596     bad_ratios = 0;		/* set to zero */
4597 
4598     if ((debug == 3) && (d_count == find_max_ratio))
4599       *seed = saved_seed;	/* restore the original seed */
4600 
4601     /* varying alpha */
4602     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
4603       alpha_flag = 0;
4604       switch (alpha_val) {
4605       case 0:
4606 	alpha[0] = alpha[1] = 0.0;
4607 	alpha_flag = 1;
4608 	break;
4609       case 1:
4610 	alpha[0] = 1.0;
4611 	alpha[1] = 0.0;
4612 	alpha_flag = 1;
4613 	break;
4614       }
4615 
4616       /* varying beta */
4617       for (beta_val = 0; beta_val < 3; beta_val++) {
4618 	beta_flag = 0;
4619 	switch (beta_val) {
4620 	case 0:
4621 	  beta[0] = beta[1] = 0.0;
4622 	  beta_flag = 1;
4623 	  break;
4624 	case 1:
4625 	  beta[0] = 1.0;
4626 	  beta[1] = 0.0;
4627 	  beta_flag = 1;
4628 	  break;
4629 	}
4630 
4631 
4632 	eps_int = power(2, -BITS_S);
4633 	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
4634 		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
4635 	prec = blas_prec_single;
4636 
4637 	/* values near underflow, 1, or overflow */
4638 	for (norm = -1; norm <= 1; norm++) {
4639 
4640 	  /* number of tests */
4641 	  for (i = 0; i < ntests; i++) {
4642 
4643 	    /* row or col major */
4644 	    for (order_val = 0; order_val < 2; order_val++) {
4645 	      switch (order_val) {
4646 	      case 0:
4647 		order_type = blas_rowmajor;
4648 		break;
4649 	      case 1:
4650 	      default:
4651 		order_type = blas_colmajor;
4652 		break;
4653 	      }
4654 
4655 	      /* no_trans, trans, or conj_trans */
4656 	      for (trans_val = 0; trans_val < 3; trans_val++) {
4657 		switch (trans_val) {
4658 		case 0:
4659 		  trans_type = blas_no_trans;
4660 		  m_i = m;
4661 		  n_i = n;
4662 		  break;
4663 		case 1:
4664 		  trans_type = blas_trans;
4665 		  m_i = n;
4666 		  n_i = m;
4667 		  break;
4668 		case 2:
4669 		default:
4670 		  trans_type = blas_conj_trans;
4671 		  m_i = n;
4672 		  n_i = m;
4673 		  break;
4674 		}
4675 
4676 		/* lda=n, n+1, or 2n */
4677 		for (lda_val = 0; lda_val < 3; lda_val++) {
4678 		  switch (lda_val) {
4679 		  case 0:
4680 		    lda = m_i;
4681 		    break;
4682 		  case 1:
4683 		    lda = m_i + 1;
4684 		    break;
4685 		  case 2:
4686 		  default:
4687 		    lda = 2 * m_i;
4688 		    break;
4689 		  }
4690 		  if ((order_type == blas_rowmajor && lda < n) ||
4691 		      (order_type == blas_colmajor && lda < m))
4692 		    continue;
4693 
4694 		  /* For the sake of speed, we throw out this case at random */
4695 		  if (xrand(seed) >= test_prob)
4696 		    continue;
4697 
4698 		  /* in the trivial cases, no need to run testgen */
4699 		  if (m > 0 && n > 0)
4700 		    BLAS_cgemv2_s_s_testgen(norm, order_type, trans_type, m,
4701 					    n, &alpha, alpha_flag, A, lda,
4702 					    head_x_gen, tail_x_gen, &beta,
4703 					    beta_flag, y_gen, seed,
4704 					    head_r_true, tail_r_true);
4705 
4706 		  count++;
4707 
4708 		  /* varying incx */
4709 		  for (incx_val = -2; incx_val <= 2; incx_val++) {
4710 		    if (incx_val == 0)
4711 		      continue;
4712 
4713 		    /* setting incx */
4714 		    incx = incx_val;
4715 
4716 
4717 		    scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
4718 		    scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
4719 
4720 		    /* varying incy */
4721 		    for (incy_val = -2; incy_val <= 2; incy_val++) {
4722 		      if (incy_val == 0)
4723 			continue;
4724 
4725 		      /* setting incy */
4726 		      incy = incy_val;
4727 		      incy *= 2;
4728 
4729 		      ccopy_vector(y_gen, m_i, 1, y, incy_val);
4730 
4731 		      /* call BLAS_cgemv2_s_s */
4732 		      FPU_FIX_STOP;
4733 		      BLAS_cgemv2_s_s(order_type, trans_type, m, n, alpha, A,
4734 				      lda, head_x, tail_x, incx_val, beta, y,
4735 				      incy_val);
4736 		      FPU_FIX_START;
4737 
4738 		      /* set y starting index */
4739 		      iy = 0;
4740 		      if (incy < 0)
4741 			iy = -(m_i - 1) * incy;
4742 
4743 		      /* computing the ratio */
4744 		      if (m > 0 && n > 0)
4745 			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
4746 			  /* copy row j of A to temp */
4747 			  sge_copy_row(order_type, trans_type, m_i, n_i, A,
4748 				       lda, temp, j);
4749 
4750 			  test_BLAS_cdot2_s_s(n_i, blas_no_conj, alpha, beta,
4751 					      &y_gen[k], &y[iy],
4752 					      &head_r_true[k],
4753 					      &tail_r_true[k], temp, 1,
4754 					      head_x, tail_x, incx_val,
4755 					      eps_int, un_int, &ratios[j]);
4756 
4757 			  /* take the max ratio */
4758 			  if (j == 0) {
4759 			    ratio = ratios[0];
4760 			    /* The !<= below causes NaN error to be detected.
4761 			       Note that (NaN > thresh) is always false. */
4762 			  } else if (!(ratios[j] <= ratio)) {
4763 			    ratio = ratios[j];
4764 			  }
4765 			  iy += incy;
4766 			}
4767 
4768 		      /* Increase the number of bad ratio, if the ratio
4769 		         is bigger than the threshold.
4770 		         The !<= below causes NaN error to be detected.
4771 		         Note that (NaN > thresh) is always false. */
4772 		      if (!(ratio <= thresh)) {
4773 			bad_ratios++;
4774 
4775 			if ((debug == 3) &&	/* print only when debug is on */
4776 			    (count != old_count) &&	/* print if old vector is different
4777 							   from the current one */
4778 			    (d_count == find_max_ratio) &&
4779 			    (p_count <= max_print) &&
4780 			    (ratio > 0.5 * ratio_max)) {
4781 			  old_count = count;
4782 
4783 			  printf
4784 			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
4785 			     fname, m, n, ntests, thresh);
4786 
4787 			  /* Print test info */
4788 			  switch (prec) {
4789 			  case blas_prec_single:
4790 			    printf("single ");
4791 			    break;
4792 			  case blas_prec_double:
4793 			    printf("double ");
4794 			    break;
4795 			  case blas_prec_indigenous:
4796 			    printf("indigenous ");
4797 			    break;
4798 			  case blas_prec_extra:
4799 			    printf("extra ");
4800 			    break;
4801 			  }
4802 			  switch (norm) {
4803 			  case -1:
4804 			    printf("near_underflow ");
4805 			    break;
4806 			  case 0:
4807 			    printf("near_one ");
4808 			    break;
4809 			  case 1:
4810 			    printf("near_overflow ");
4811 			    break;
4812 			  }
4813 			  switch (order_type) {
4814 			  case blas_rowmajor:
4815 			    printf("row_major ");
4816 			    break;
4817 			  case blas_colmajor:
4818 			    printf("col_major ");
4819 			    break;
4820 			  }
4821 			  switch (trans_type) {
4822 			  case blas_no_trans:
4823 			    printf("no_trans ");
4824 			    break;
4825 			  case blas_trans:
4826 			    printf("trans ");
4827 			    break;
4828 			  case blas_conj_trans:
4829 			    printf("conj_trans ");
4830 			    break;
4831 			  }
4832 
4833 			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
4834 				 incy);
4835 
4836 			  sge_print_matrix(A, m_i, n_i, lda, order_type, "A");
4837 
4838 			  sprint_vector(head_x, n_i, incx_val, "head_x");
4839 			  sprint_vector(tail_x, n_i, incx_val, "tail_x");
4840 			  cprint_vector(y_gen, m_i, 1, "y_gen");
4841 			  cprint_vector(y, m_i, incy_val, "y_final");
4842 
4843 			  printf("      ");
4844 			  printf("alpha = ");
4845 			  printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
4846 			  printf("\n      ");
4847 			  printf("beta = ");
4848 			  printf("(%16.8e, %16.8e)", beta[0], beta[1]);
4849 			  printf("\n");
4850 			  for (j = 0, k = 0; j < m_i * incy_gen;
4851 			       j += incy_gen, k++) {
4852 			    printf("      ");
4853 			    printf("([%24.16e  %24.16e], [%24.16e %24.16e])",
4854 				   head_r_true[j], tail_r_true[j],
4855 				   head_r_true[j + 1], tail_r_true[j + 1]);
4856 			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
4857 			  }
4858 
4859 			  printf("      ratio=%.4e\n", ratio);
4860 			  p_count++;
4861 			}
4862 			if (bad_ratios >= MAX_BAD_TESTS) {
4863 			  printf("\ntoo many failures, exiting....");
4864 			  printf("\nTesting and compilation");
4865 			  printf(" are incomplete\n\n");
4866 			  goto end;
4867 			}
4868 			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
4869 			  printf("\nFlagrant ratio error, exiting...");
4870 			  printf("\nTesting and compilation");
4871 			  printf(" are incomplete\n\n");
4872 			  goto end;
4873 			}
4874 		      }
4875 		      if (d_count == 0) {
4876 			if (ratio > ratio_max)
4877 			  ratio_max = ratio;
4878 
4879 			if (ratio != 0.0 && ratio < ratio_min)
4880 			  ratio_min = ratio;
4881 
4882 			tot_tests++;
4883 		      }
4884 		    }		/* incy */
4885 		  }		/* incx */
4886 		}		/* lda */
4887 	      }			/* trans */
4888 	    }			/* order */
4889 	  }			/* tests */
4890 	}			/* norm */
4891 
4892       }				/* beta */
4893     }				/* alpha */
4894   }				/* debug */
4895 
4896   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
4897     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
4898 	   fname, m, n, ntests, thresh);
4899     printf
4900       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
4901        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
4902        ratio_min, ratio_max);
4903   }
4904 
4905 end:
4906   FPU_FIX_STOP;
4907 
4908   blas_free(head_x);
4909   blas_free(tail_x);
4910   blas_free(y);
4911   blas_free(head_x_gen);
4912   blas_free(tail_x_gen);
4913   blas_free(y_gen);
4914   blas_free(temp);
4915   blas_free(A);
4916   blas_free(head_r_true);
4917   blas_free(tail_r_true);
4918   blas_free(ratios);
4919 
4920   *min_ratio = ratio_min;
4921   *num_bad_ratio = bad_ratios;
4922   *num_tests = tot_tests;
4923   return ratio_max;
4924 }
do_test_zgemv2_z_d(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)4925 double do_test_zgemv2_z_d(int m, int n, int ntests, int *seed, double thresh,
4926 			  int debug, float test_prob, double *min_ratio,
4927 			  int *num_bad_ratio, int *num_tests)
4928 
4929 /*
4930  * Purpose
4931  * =======
4932  *
4933  * Runs a series of tests on GEMV2.
4934  *
4935  * Arguments
4936  * =========
4937  *
4938  * m         (input) int
4939  *           The number of rows
4940  *
4941  * n         (input) int
4942  *           The number of columns
4943  *
4944  * ntests    (input) int
4945  *           The number of tests to run for each set of attributes.
4946  *
4947  * seed      (input/output) int
4948  *           The seed for the random number generator used in testgen().
4949  *
4950  * thresh    (input) double
4951  *           When the ratio returned from test() exceeds the specified
4952  *           threshold, the current size, r_true, r_comp, and ratio will be
4953  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
4954  *           to ~10.)
4955  *
4956  * debug     (input) int
4957  *           If debug=3, print summary
4958  *           If debug=2, print summary only if the number of bad ratios > 0
4959  *           If debug=1, print complete info if tests fail
4960  *           If debug=0, return max ratio
4961  *
4962  * test_prob (input) float
4963  *           The specified test will be performed only if the generated
4964  *           random exceeds this threshold.
4965  *
4966  * min_ratio (output) double
4967  *           The minimum ratio
4968  *
4969  * num_bad_ratio (output) int
4970  *               The number of tests fail; they are above the threshold.
4971  *
4972  * num_tests (output) int
4973  *           The number of tests is being performed.
4974  *
4975  * Return value
4976  * ============
4977  *
4978  * The maximum ratio if run successfully, otherwise return -1
4979  *
4980  * Code structure
4981  * ==============
4982  *
4983  *  debug loop  -- if debug is one, the first loop computes the max ratio
4984  *              -- and the last(second) loop outputs debugging information,
4985  *              -- if the test fail and its ratio > 0.5 * max ratio.
4986  *              -- if debug is zero, the loop is executed once
4987  *    alpha loop  -- varying alpha: 0, 1, or random
4988  *      beta loop   -- varying beta: 0, 1, or random
4989 
4990  *          norm loop   -- varying norm: near undeflow, near one, or
4991  *                        -- near overflow
4992  *            numtest loop  -- how many times the test is perform with
4993  *                            -- above set of attributes
4994  *              order loop   -- varying order type: rowmajor or colmajor
4995  *                trans loop    -- varying uplo type: upper or lower
4996  *                  lda loop      -- varying lda: m, m+1, 2m
4997  *                    incx loop     -- varying incx: -2, -1, 1, 2
4998  *                      incy loop     -- varying incy: -2, -1, 1, 2
4999  */
5000 {
5001   /* function name */
5002   const char fname[] = "BLAS_zgemv2_z_d";
5003 
5004   /* max number of debug lines to print */
5005   const int max_print = 8;
5006 
5007   /* Variables in the "x_val" form are loop vars for corresponding
5008      variables */
5009   int i;			/* iterate through the repeating tests */
5010   int j, k;			/* multipurpose counters or variables */
5011   int iy;			/* use to index y */
5012   int incx_val, incy_val,	/* for testing different inc values */
5013     incx, incy;
5014   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
5015   int d_count;			/* counter for debug */
5016   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
5017   int p_count;			/* counter for the number of debug lines printed */
5018   int tot_tests;		/* total number of tests to be done */
5019   int norm;			/* input values of near underflow/one/overflow */
5020   double ratio_max;		/* the current maximum ratio */
5021   double ratio_min;		/* the current minimum ratio */
5022   double *ratios;		/* a temporary variable for calculating ratio */
5023   double ratio;			/* the per-use test ratio from test() */
5024   int bad_ratios;		/* the number of ratios over the threshold */
5025   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
5026   double un_int;		/* the internal underflow threshold */
5027   double alpha[2];
5028   double beta[2];
5029   double *A;
5030   double *head_x;
5031   double *tail_x;
5032   double *y;
5033   double *temp;			/* use for calculating ratio */
5034 
5035   /* x_gen and y_gen are used to store vectors generated by testgen.
5036      they eventually are copied back to x and y */
5037   double *head_x_gen;
5038   double *tail_x_gen;
5039   double *y_gen;
5040 
5041   /* the true r calculated by testgen(), in double-double */
5042   double *head_r_true, *tail_r_true;
5043 
5044   int alpha_val;
5045   int alpha_flag;		/* input flag for BLAS_zgemv2_z_d_testgen */
5046   int beta_val;
5047   int beta_flag;		/* input flag for BLAS_zgemv2_z_d_testgen */
5048   int order_val;
5049   enum blas_order_type order_type;
5050 
5051   enum blas_prec_type prec;
5052   int trans_val;
5053   enum blas_trans_type trans_type;
5054   int m_i;
5055   int n_i;
5056   int max_mn;			/* the max of m and n */
5057   int lda_val;
5058   int lda;
5059   int saved_seed;		/* for saving the original seed */
5060   int count, old_count;		/* use for counting the number of testgen calls * 2 */
5061 
5062   FPU_FIX_DECL;
5063 
5064   /* test for bad arguments */
5065   if (n < 0 || m < 0 || ntests < 0)
5066     BLAS_error(fname, 0, 0, NULL);
5067 
5068   /* initialization */
5069   *num_bad_ratio = 0;
5070   *num_tests = 0;
5071   *min_ratio = 0.0;
5072 
5073   saved_seed = *seed;
5074   ratio_min = 1e308;
5075   ratio_max = 0.0;
5076   ratio = 0.0;
5077   tot_tests = 0;
5078   p_count = 0;
5079   count = 0;
5080   find_max_ratio = 0;
5081   bad_ratios = 0;
5082   old_count = 0;
5083 
5084   if (debug == 3)
5085     find_max_ratio = 1;
5086   max_mn = MAX(m, n);
5087   if (m == 0 || n == 0) {
5088     return 0.0;
5089   }
5090 
5091   FPU_FIX_START;
5092 
5093   incy_gen = 1;
5094   incy_gen *= 2;
5095 
5096   /* get space for calculation */
5097   head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
5098   if (max_mn * 2 > 0 && head_x == NULL) {
5099     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5100   }
5101   tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
5102   if (max_mn * 2 > 0 && tail_x == NULL) {
5103     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5104   }
5105   y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
5106   if (max_mn * 2 > 0 && y == NULL) {
5107     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5108   }
5109   head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
5110   if (max_mn > 0 && head_x_gen == NULL) {
5111     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5112   }
5113   tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
5114   if (max_mn > 0 && tail_x_gen == NULL) {
5115     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5116   }
5117   y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
5118   if (max_mn > 0 && y_gen == NULL) {
5119     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5120   }
5121   temp = (double *) blas_malloc(max_mn * sizeof(double) * 2);
5122   if (max_mn > 0 && temp == NULL) {
5123     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5124   }
5125   head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
5126   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
5127   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
5128     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5129   }
5130   ratios = (double *) blas_malloc(max_mn * sizeof(double));
5131   if (max_mn > 0 && ratios == NULL) {
5132     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5133   }
5134   A =
5135     (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double) *
5136 			   2);
5137   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
5138     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5139   }
5140 
5141   /* The debug iteration:
5142      If debug=1, then will execute the iteration twice. First, compute the
5143      max ratio. Second, print info if ratio > (50% * ratio_max). */
5144   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
5145     bad_ratios = 0;		/* set to zero */
5146 
5147     if ((debug == 3) && (d_count == find_max_ratio))
5148       *seed = saved_seed;	/* restore the original seed */
5149 
5150     /* varying alpha */
5151     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
5152       alpha_flag = 0;
5153       switch (alpha_val) {
5154       case 0:
5155 	alpha[0] = alpha[1] = 0.0;
5156 	alpha_flag = 1;
5157 	break;
5158       case 1:
5159 	alpha[0] = 1.0;
5160 	alpha[1] = 0.0;
5161 	alpha_flag = 1;
5162 	break;
5163       }
5164 
5165       /* varying beta */
5166       for (beta_val = 0; beta_val < 3; beta_val++) {
5167 	beta_flag = 0;
5168 	switch (beta_val) {
5169 	case 0:
5170 	  beta[0] = beta[1] = 0.0;
5171 	  beta_flag = 1;
5172 	  break;
5173 	case 1:
5174 	  beta[0] = 1.0;
5175 	  beta[1] = 0.0;
5176 	  beta_flag = 1;
5177 	  break;
5178 	}
5179 
5180 
5181 	eps_int = power(2, -BITS_D);
5182 	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
5183 		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
5184 	prec = blas_prec_double;
5185 
5186 	/* values near underflow, 1, or overflow */
5187 	for (norm = -1; norm <= 1; norm++) {
5188 
5189 	  /* number of tests */
5190 	  for (i = 0; i < ntests; i++) {
5191 
5192 	    /* row or col major */
5193 	    for (order_val = 0; order_val < 2; order_val++) {
5194 	      switch (order_val) {
5195 	      case 0:
5196 		order_type = blas_rowmajor;
5197 		break;
5198 	      case 1:
5199 	      default:
5200 		order_type = blas_colmajor;
5201 		break;
5202 	      }
5203 
5204 	      /* no_trans, trans, or conj_trans */
5205 	      for (trans_val = 0; trans_val < 3; trans_val++) {
5206 		switch (trans_val) {
5207 		case 0:
5208 		  trans_type = blas_no_trans;
5209 		  m_i = m;
5210 		  n_i = n;
5211 		  break;
5212 		case 1:
5213 		  trans_type = blas_trans;
5214 		  m_i = n;
5215 		  n_i = m;
5216 		  break;
5217 		case 2:
5218 		default:
5219 		  trans_type = blas_conj_trans;
5220 		  m_i = n;
5221 		  n_i = m;
5222 		  break;
5223 		}
5224 
5225 		/* lda=n, n+1, or 2n */
5226 		for (lda_val = 0; lda_val < 3; lda_val++) {
5227 		  switch (lda_val) {
5228 		  case 0:
5229 		    lda = m_i;
5230 		    break;
5231 		  case 1:
5232 		    lda = m_i + 1;
5233 		    break;
5234 		  case 2:
5235 		  default:
5236 		    lda = 2 * m_i;
5237 		    break;
5238 		  }
5239 		  if ((order_type == blas_rowmajor && lda < n) ||
5240 		      (order_type == blas_colmajor && lda < m))
5241 		    continue;
5242 
5243 		  /* For the sake of speed, we throw out this case at random */
5244 		  if (xrand(seed) >= test_prob)
5245 		    continue;
5246 
5247 		  /* in the trivial cases, no need to run testgen */
5248 		  if (m > 0 && n > 0)
5249 		    BLAS_zgemv2_z_d_testgen(norm, order_type, trans_type, m,
5250 					    n, &alpha, alpha_flag, A, lda,
5251 					    head_x_gen, tail_x_gen, &beta,
5252 					    beta_flag, y_gen, seed,
5253 					    head_r_true, tail_r_true);
5254 
5255 		  count++;
5256 
5257 		  /* varying incx */
5258 		  for (incx_val = -2; incx_val <= 2; incx_val++) {
5259 		    if (incx_val == 0)
5260 		      continue;
5261 
5262 		    /* setting incx */
5263 		    incx = incx_val;
5264 
5265 
5266 		    dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
5267 		    dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
5268 
5269 		    /* varying incy */
5270 		    for (incy_val = -2; incy_val <= 2; incy_val++) {
5271 		      if (incy_val == 0)
5272 			continue;
5273 
5274 		      /* setting incy */
5275 		      incy = incy_val;
5276 		      incy *= 2;
5277 
5278 		      zcopy_vector(y_gen, m_i, 1, y, incy_val);
5279 
5280 		      /* call BLAS_zgemv2_z_d */
5281 		      FPU_FIX_STOP;
5282 		      BLAS_zgemv2_z_d(order_type, trans_type, m, n, alpha, A,
5283 				      lda, head_x, tail_x, incx_val, beta, y,
5284 				      incy_val);
5285 		      FPU_FIX_START;
5286 
5287 		      /* set y starting index */
5288 		      iy = 0;
5289 		      if (incy < 0)
5290 			iy = -(m_i - 1) * incy;
5291 
5292 		      /* computing the ratio */
5293 		      if (m > 0 && n > 0)
5294 			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
5295 			  /* copy row j of A to temp */
5296 			  zge_copy_row(order_type, trans_type, m_i, n_i, A,
5297 				       lda, temp, j);
5298 
5299 			  test_BLAS_zdot2_z_d(n_i, blas_no_conj, alpha, beta,
5300 					      &y_gen[k], &y[iy],
5301 					      &head_r_true[k],
5302 					      &tail_r_true[k], temp, 1,
5303 					      head_x, tail_x, incx_val,
5304 					      eps_int, un_int, &ratios[j]);
5305 
5306 			  /* take the max ratio */
5307 			  if (j == 0) {
5308 			    ratio = ratios[0];
5309 			    /* The !<= below causes NaN error to be detected.
5310 			       Note that (NaN > thresh) is always false. */
5311 			  } else if (!(ratios[j] <= ratio)) {
5312 			    ratio = ratios[j];
5313 			  }
5314 			  iy += incy;
5315 			}
5316 
5317 		      /* Increase the number of bad ratio, if the ratio
5318 		         is bigger than the threshold.
5319 		         The !<= below causes NaN error to be detected.
5320 		         Note that (NaN > thresh) is always false. */
5321 		      if (!(ratio <= thresh)) {
5322 			bad_ratios++;
5323 
5324 			if ((debug == 3) &&	/* print only when debug is on */
5325 			    (count != old_count) &&	/* print if old vector is different
5326 							   from the current one */
5327 			    (d_count == find_max_ratio) &&
5328 			    (p_count <= max_print) &&
5329 			    (ratio > 0.5 * ratio_max)) {
5330 			  old_count = count;
5331 
5332 			  printf
5333 			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
5334 			     fname, m, n, ntests, thresh);
5335 
5336 			  /* Print test info */
5337 			  switch (prec) {
5338 			  case blas_prec_single:
5339 			    printf("single ");
5340 			    break;
5341 			  case blas_prec_double:
5342 			    printf("double ");
5343 			    break;
5344 			  case blas_prec_indigenous:
5345 			    printf("indigenous ");
5346 			    break;
5347 			  case blas_prec_extra:
5348 			    printf("extra ");
5349 			    break;
5350 			  }
5351 			  switch (norm) {
5352 			  case -1:
5353 			    printf("near_underflow ");
5354 			    break;
5355 			  case 0:
5356 			    printf("near_one ");
5357 			    break;
5358 			  case 1:
5359 			    printf("near_overflow ");
5360 			    break;
5361 			  }
5362 			  switch (order_type) {
5363 			  case blas_rowmajor:
5364 			    printf("row_major ");
5365 			    break;
5366 			  case blas_colmajor:
5367 			    printf("col_major ");
5368 			    break;
5369 			  }
5370 			  switch (trans_type) {
5371 			  case blas_no_trans:
5372 			    printf("no_trans ");
5373 			    break;
5374 			  case blas_trans:
5375 			    printf("trans ");
5376 			    break;
5377 			  case blas_conj_trans:
5378 			    printf("conj_trans ");
5379 			    break;
5380 			  }
5381 
5382 			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
5383 				 incy);
5384 
5385 			  zge_print_matrix(A, m_i, n_i, lda, order_type, "A");
5386 
5387 			  dprint_vector(head_x, n_i, incx_val, "head_x");
5388 			  dprint_vector(tail_x, n_i, incx_val, "tail_x");
5389 			  zprint_vector(y_gen, m_i, 1, "y_gen");
5390 			  zprint_vector(y, m_i, incy_val, "y_final");
5391 
5392 			  printf("      ");
5393 			  printf("alpha = ");
5394 			  printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
5395 			  printf("\n      ");
5396 			  printf("beta = ");
5397 			  printf("(%24.16e, %24.16e)", beta[0], beta[1]);
5398 			  printf("\n");
5399 			  for (j = 0, k = 0; j < m_i * incy_gen;
5400 			       j += incy_gen, k++) {
5401 			    printf("      ");
5402 			    printf("([%24.16e  %24.16e], [%24.16e %24.16e])",
5403 				   head_r_true[j], tail_r_true[j],
5404 				   head_r_true[j + 1], tail_r_true[j + 1]);
5405 			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
5406 			  }
5407 
5408 			  printf("      ratio=%.4e\n", ratio);
5409 			  p_count++;
5410 			}
5411 			if (bad_ratios >= MAX_BAD_TESTS) {
5412 			  printf("\ntoo many failures, exiting....");
5413 			  printf("\nTesting and compilation");
5414 			  printf(" are incomplete\n\n");
5415 			  goto end;
5416 			}
5417 			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
5418 			  printf("\nFlagrant ratio error, exiting...");
5419 			  printf("\nTesting and compilation");
5420 			  printf(" are incomplete\n\n");
5421 			  goto end;
5422 			}
5423 		      }
5424 		      if (d_count == 0) {
5425 			if (ratio > ratio_max)
5426 			  ratio_max = ratio;
5427 
5428 			if (ratio != 0.0 && ratio < ratio_min)
5429 			  ratio_min = ratio;
5430 
5431 			tot_tests++;
5432 		      }
5433 		    }		/* incy */
5434 		  }		/* incx */
5435 		}		/* lda */
5436 	      }			/* trans */
5437 	    }			/* order */
5438 	  }			/* tests */
5439 	}			/* norm */
5440 
5441       }				/* beta */
5442     }				/* alpha */
5443   }				/* debug */
5444 
5445   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
5446     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
5447 	   fname, m, n, ntests, thresh);
5448     printf
5449       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
5450        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
5451        ratio_min, ratio_max);
5452   }
5453 
5454 end:
5455   FPU_FIX_STOP;
5456 
5457   blas_free(head_x);
5458   blas_free(tail_x);
5459   blas_free(y);
5460   blas_free(head_x_gen);
5461   blas_free(tail_x_gen);
5462   blas_free(y_gen);
5463   blas_free(temp);
5464   blas_free(A);
5465   blas_free(head_r_true);
5466   blas_free(tail_r_true);
5467   blas_free(ratios);
5468 
5469   *min_ratio = ratio_min;
5470   *num_bad_ratio = bad_ratios;
5471   *num_tests = tot_tests;
5472   return ratio_max;
5473 }
do_test_zgemv2_d_z(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)5474 double do_test_zgemv2_d_z(int m, int n, int ntests, int *seed, double thresh,
5475 			  int debug, float test_prob, double *min_ratio,
5476 			  int *num_bad_ratio, int *num_tests)
5477 
5478 /*
5479  * Purpose
5480  * =======
5481  *
5482  * Runs a series of tests on GEMV2.
5483  *
5484  * Arguments
5485  * =========
5486  *
5487  * m         (input) int
5488  *           The number of rows
5489  *
5490  * n         (input) int
5491  *           The number of columns
5492  *
5493  * ntests    (input) int
5494  *           The number of tests to run for each set of attributes.
5495  *
5496  * seed      (input/output) int
5497  *           The seed for the random number generator used in testgen().
5498  *
5499  * thresh    (input) double
5500  *           When the ratio returned from test() exceeds the specified
5501  *           threshold, the current size, r_true, r_comp, and ratio will be
5502  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
5503  *           to ~10.)
5504  *
5505  * debug     (input) int
5506  *           If debug=3, print summary
5507  *           If debug=2, print summary only if the number of bad ratios > 0
5508  *           If debug=1, print complete info if tests fail
5509  *           If debug=0, return max ratio
5510  *
5511  * test_prob (input) float
5512  *           The specified test will be performed only if the generated
5513  *           random exceeds this threshold.
5514  *
5515  * min_ratio (output) double
5516  *           The minimum ratio
5517  *
5518  * num_bad_ratio (output) int
5519  *               The number of tests fail; they are above the threshold.
5520  *
5521  * num_tests (output) int
5522  *           The number of tests is being performed.
5523  *
5524  * Return value
5525  * ============
5526  *
5527  * The maximum ratio if run successfully, otherwise return -1
5528  *
5529  * Code structure
5530  * ==============
5531  *
5532  *  debug loop  -- if debug is one, the first loop computes the max ratio
5533  *              -- and the last(second) loop outputs debugging information,
5534  *              -- if the test fail and its ratio > 0.5 * max ratio.
5535  *              -- if debug is zero, the loop is executed once
5536  *    alpha loop  -- varying alpha: 0, 1, or random
5537  *      beta loop   -- varying beta: 0, 1, or random
5538 
5539  *          norm loop   -- varying norm: near undeflow, near one, or
5540  *                        -- near overflow
5541  *            numtest loop  -- how many times the test is perform with
5542  *                            -- above set of attributes
5543  *              order loop   -- varying order type: rowmajor or colmajor
5544  *                trans loop    -- varying uplo type: upper or lower
5545  *                  lda loop      -- varying lda: m, m+1, 2m
5546  *                    incx loop     -- varying incx: -2, -1, 1, 2
5547  *                      incy loop     -- varying incy: -2, -1, 1, 2
5548  */
5549 {
5550   /* function name */
5551   const char fname[] = "BLAS_zgemv2_d_z";
5552 
5553   /* max number of debug lines to print */
5554   const int max_print = 8;
5555 
5556   /* Variables in the "x_val" form are loop vars for corresponding
5557      variables */
5558   int i;			/* iterate through the repeating tests */
5559   int j, k;			/* multipurpose counters or variables */
5560   int iy;			/* use to index y */
5561   int incx_val, incy_val,	/* for testing different inc values */
5562     incx, incy;
5563   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
5564   int d_count;			/* counter for debug */
5565   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
5566   int p_count;			/* counter for the number of debug lines printed */
5567   int tot_tests;		/* total number of tests to be done */
5568   int norm;			/* input values of near underflow/one/overflow */
5569   double ratio_max;		/* the current maximum ratio */
5570   double ratio_min;		/* the current minimum ratio */
5571   double *ratios;		/* a temporary variable for calculating ratio */
5572   double ratio;			/* the per-use test ratio from test() */
5573   int bad_ratios;		/* the number of ratios over the threshold */
5574   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
5575   double un_int;		/* the internal underflow threshold */
5576   double alpha[2];
5577   double beta[2];
5578   double *A;
5579   double *head_x;
5580   double *tail_x;
5581   double *y;
5582   double *temp;			/* use for calculating ratio */
5583 
5584   /* x_gen and y_gen are used to store vectors generated by testgen.
5585      they eventually are copied back to x and y */
5586   double *head_x_gen;
5587   double *tail_x_gen;
5588   double *y_gen;
5589 
5590   /* the true r calculated by testgen(), in double-double */
5591   double *head_r_true, *tail_r_true;
5592 
5593   int alpha_val;
5594   int alpha_flag;		/* input flag for BLAS_zgemv2_d_z_testgen */
5595   int beta_val;
5596   int beta_flag;		/* input flag for BLAS_zgemv2_d_z_testgen */
5597   int order_val;
5598   enum blas_order_type order_type;
5599 
5600   enum blas_prec_type prec;
5601   int trans_val;
5602   enum blas_trans_type trans_type;
5603   int m_i;
5604   int n_i;
5605   int max_mn;			/* the max of m and n */
5606   int lda_val;
5607   int lda;
5608   int saved_seed;		/* for saving the original seed */
5609   int count, old_count;		/* use for counting the number of testgen calls * 2 */
5610 
5611   FPU_FIX_DECL;
5612 
5613   /* test for bad arguments */
5614   if (n < 0 || m < 0 || ntests < 0)
5615     BLAS_error(fname, 0, 0, NULL);
5616 
5617   /* initialization */
5618   *num_bad_ratio = 0;
5619   *num_tests = 0;
5620   *min_ratio = 0.0;
5621 
5622   saved_seed = *seed;
5623   ratio_min = 1e308;
5624   ratio_max = 0.0;
5625   ratio = 0.0;
5626   tot_tests = 0;
5627   p_count = 0;
5628   count = 0;
5629   find_max_ratio = 0;
5630   bad_ratios = 0;
5631   old_count = 0;
5632 
5633   if (debug == 3)
5634     find_max_ratio = 1;
5635   max_mn = MAX(m, n);
5636   if (m == 0 || n == 0) {
5637     return 0.0;
5638   }
5639 
5640   FPU_FIX_START;
5641 
5642   incy_gen = 1;
5643   incy_gen *= 2;
5644 
5645   /* get space for calculation */
5646   head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
5647   if (max_mn * 2 > 0 && head_x == NULL) {
5648     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5649   }
5650   tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
5651   if (max_mn * 2 > 0 && tail_x == NULL) {
5652     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5653   }
5654   y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
5655   if (max_mn * 2 > 0 && y == NULL) {
5656     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5657   }
5658   head_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
5659   if (max_mn > 0 && head_x_gen == NULL) {
5660     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5661   }
5662   tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
5663   if (max_mn > 0 && tail_x_gen == NULL) {
5664     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5665   }
5666   y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
5667   if (max_mn > 0 && y_gen == NULL) {
5668     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5669   }
5670   temp = (double *) blas_malloc(max_mn * sizeof(double));
5671   if (max_mn > 0 && temp == NULL) {
5672     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5673   }
5674   head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
5675   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
5676   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
5677     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5678   }
5679   ratios = (double *) blas_malloc(max_mn * sizeof(double));
5680   if (max_mn > 0 && ratios == NULL) {
5681     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5682   }
5683   A =
5684     (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
5685   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
5686     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
5687   }
5688 
5689   /* The debug iteration:
5690      If debug=1, then will execute the iteration twice. First, compute the
5691      max ratio. Second, print info if ratio > (50% * ratio_max). */
5692   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
5693     bad_ratios = 0;		/* set to zero */
5694 
5695     if ((debug == 3) && (d_count == find_max_ratio))
5696       *seed = saved_seed;	/* restore the original seed */
5697 
5698     /* varying alpha */
5699     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
5700       alpha_flag = 0;
5701       switch (alpha_val) {
5702       case 0:
5703 	alpha[0] = alpha[1] = 0.0;
5704 	alpha_flag = 1;
5705 	break;
5706       case 1:
5707 	alpha[0] = 1.0;
5708 	alpha[1] = 0.0;
5709 	alpha_flag = 1;
5710 	break;
5711       }
5712 
5713       /* varying beta */
5714       for (beta_val = 0; beta_val < 3; beta_val++) {
5715 	beta_flag = 0;
5716 	switch (beta_val) {
5717 	case 0:
5718 	  beta[0] = beta[1] = 0.0;
5719 	  beta_flag = 1;
5720 	  break;
5721 	case 1:
5722 	  beta[0] = 1.0;
5723 	  beta[1] = 0.0;
5724 	  beta_flag = 1;
5725 	  break;
5726 	}
5727 
5728 
5729 	eps_int = power(2, -BITS_D);
5730 	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
5731 		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
5732 	prec = blas_prec_double;
5733 
5734 	/* values near underflow, 1, or overflow */
5735 	for (norm = -1; norm <= 1; norm++) {
5736 
5737 	  /* number of tests */
5738 	  for (i = 0; i < ntests; i++) {
5739 
5740 	    /* row or col major */
5741 	    for (order_val = 0; order_val < 2; order_val++) {
5742 	      switch (order_val) {
5743 	      case 0:
5744 		order_type = blas_rowmajor;
5745 		break;
5746 	      case 1:
5747 	      default:
5748 		order_type = blas_colmajor;
5749 		break;
5750 	      }
5751 
5752 	      /* no_trans, trans, or conj_trans */
5753 	      for (trans_val = 0; trans_val < 3; trans_val++) {
5754 		switch (trans_val) {
5755 		case 0:
5756 		  trans_type = blas_no_trans;
5757 		  m_i = m;
5758 		  n_i = n;
5759 		  break;
5760 		case 1:
5761 		  trans_type = blas_trans;
5762 		  m_i = n;
5763 		  n_i = m;
5764 		  break;
5765 		case 2:
5766 		default:
5767 		  trans_type = blas_conj_trans;
5768 		  m_i = n;
5769 		  n_i = m;
5770 		  break;
5771 		}
5772 
5773 		/* lda=n, n+1, or 2n */
5774 		for (lda_val = 0; lda_val < 3; lda_val++) {
5775 		  switch (lda_val) {
5776 		  case 0:
5777 		    lda = m_i;
5778 		    break;
5779 		  case 1:
5780 		    lda = m_i + 1;
5781 		    break;
5782 		  case 2:
5783 		  default:
5784 		    lda = 2 * m_i;
5785 		    break;
5786 		  }
5787 		  if ((order_type == blas_rowmajor && lda < n) ||
5788 		      (order_type == blas_colmajor && lda < m))
5789 		    continue;
5790 
5791 		  /* For the sake of speed, we throw out this case at random */
5792 		  if (xrand(seed) >= test_prob)
5793 		    continue;
5794 
5795 		  /* in the trivial cases, no need to run testgen */
5796 		  if (m > 0 && n > 0)
5797 		    BLAS_zgemv2_d_z_testgen(norm, order_type, trans_type, m,
5798 					    n, &alpha, alpha_flag, A, lda,
5799 					    head_x_gen, tail_x_gen, &beta,
5800 					    beta_flag, y_gen, seed,
5801 					    head_r_true, tail_r_true);
5802 
5803 		  count++;
5804 
5805 		  /* varying incx */
5806 		  for (incx_val = -2; incx_val <= 2; incx_val++) {
5807 		    if (incx_val == 0)
5808 		      continue;
5809 
5810 		    /* setting incx */
5811 		    incx = incx_val;
5812 		    incx *= 2;
5813 
5814 		    zcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
5815 		    zcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
5816 
5817 		    /* varying incy */
5818 		    for (incy_val = -2; incy_val <= 2; incy_val++) {
5819 		      if (incy_val == 0)
5820 			continue;
5821 
5822 		      /* setting incy */
5823 		      incy = incy_val;
5824 		      incy *= 2;
5825 
5826 		      zcopy_vector(y_gen, m_i, 1, y, incy_val);
5827 
5828 		      /* call BLAS_zgemv2_d_z */
5829 		      FPU_FIX_STOP;
5830 		      BLAS_zgemv2_d_z(order_type, trans_type, m, n, alpha, A,
5831 				      lda, head_x, tail_x, incx_val, beta, y,
5832 				      incy_val);
5833 		      FPU_FIX_START;
5834 
5835 		      /* set y starting index */
5836 		      iy = 0;
5837 		      if (incy < 0)
5838 			iy = -(m_i - 1) * incy;
5839 
5840 		      /* computing the ratio */
5841 		      if (m > 0 && n > 0)
5842 			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
5843 			  /* copy row j of A to temp */
5844 			  dge_copy_row(order_type, trans_type, m_i, n_i, A,
5845 				       lda, temp, j);
5846 
5847 			  test_BLAS_zdot2_d_z(n_i, blas_no_conj, alpha, beta,
5848 					      &y_gen[k], &y[iy],
5849 					      &head_r_true[k],
5850 					      &tail_r_true[k], temp, 1,
5851 					      head_x, tail_x, incx_val,
5852 					      eps_int, un_int, &ratios[j]);
5853 
5854 			  /* take the max ratio */
5855 			  if (j == 0) {
5856 			    ratio = ratios[0];
5857 			    /* The !<= below causes NaN error to be detected.
5858 			       Note that (NaN > thresh) is always false. */
5859 			  } else if (!(ratios[j] <= ratio)) {
5860 			    ratio = ratios[j];
5861 			  }
5862 			  iy += incy;
5863 			}
5864 
5865 		      /* Increase the number of bad ratio, if the ratio
5866 		         is bigger than the threshold.
5867 		         The !<= below causes NaN error to be detected.
5868 		         Note that (NaN > thresh) is always false. */
5869 		      if (!(ratio <= thresh)) {
5870 			bad_ratios++;
5871 
5872 			if ((debug == 3) &&	/* print only when debug is on */
5873 			    (count != old_count) &&	/* print if old vector is different
5874 							   from the current one */
5875 			    (d_count == find_max_ratio) &&
5876 			    (p_count <= max_print) &&
5877 			    (ratio > 0.5 * ratio_max)) {
5878 			  old_count = count;
5879 
5880 			  printf
5881 			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
5882 			     fname, m, n, ntests, thresh);
5883 
5884 			  /* Print test info */
5885 			  switch (prec) {
5886 			  case blas_prec_single:
5887 			    printf("single ");
5888 			    break;
5889 			  case blas_prec_double:
5890 			    printf("double ");
5891 			    break;
5892 			  case blas_prec_indigenous:
5893 			    printf("indigenous ");
5894 			    break;
5895 			  case blas_prec_extra:
5896 			    printf("extra ");
5897 			    break;
5898 			  }
5899 			  switch (norm) {
5900 			  case -1:
5901 			    printf("near_underflow ");
5902 			    break;
5903 			  case 0:
5904 			    printf("near_one ");
5905 			    break;
5906 			  case 1:
5907 			    printf("near_overflow ");
5908 			    break;
5909 			  }
5910 			  switch (order_type) {
5911 			  case blas_rowmajor:
5912 			    printf("row_major ");
5913 			    break;
5914 			  case blas_colmajor:
5915 			    printf("col_major ");
5916 			    break;
5917 			  }
5918 			  switch (trans_type) {
5919 			  case blas_no_trans:
5920 			    printf("no_trans ");
5921 			    break;
5922 			  case blas_trans:
5923 			    printf("trans ");
5924 			    break;
5925 			  case blas_conj_trans:
5926 			    printf("conj_trans ");
5927 			    break;
5928 			  }
5929 
5930 			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
5931 				 incy);
5932 
5933 			  dge_print_matrix(A, m_i, n_i, lda, order_type, "A");
5934 
5935 			  zprint_vector(head_x, n_i, incx_val, "head_x");
5936 			  zprint_vector(tail_x, n_i, incx_val, "tail_x");
5937 			  zprint_vector(y_gen, m_i, 1, "y_gen");
5938 			  zprint_vector(y, m_i, incy_val, "y_final");
5939 
5940 			  printf("      ");
5941 			  printf("alpha = ");
5942 			  printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
5943 			  printf("\n      ");
5944 			  printf("beta = ");
5945 			  printf("(%24.16e, %24.16e)", beta[0], beta[1]);
5946 			  printf("\n");
5947 			  for (j = 0, k = 0; j < m_i * incy_gen;
5948 			       j += incy_gen, k++) {
5949 			    printf("      ");
5950 			    printf("([%24.16e  %24.16e], [%24.16e %24.16e])",
5951 				   head_r_true[j], tail_r_true[j],
5952 				   head_r_true[j + 1], tail_r_true[j + 1]);
5953 			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
5954 			  }
5955 
5956 			  printf("      ratio=%.4e\n", ratio);
5957 			  p_count++;
5958 			}
5959 			if (bad_ratios >= MAX_BAD_TESTS) {
5960 			  printf("\ntoo many failures, exiting....");
5961 			  printf("\nTesting and compilation");
5962 			  printf(" are incomplete\n\n");
5963 			  goto end;
5964 			}
5965 			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
5966 			  printf("\nFlagrant ratio error, exiting...");
5967 			  printf("\nTesting and compilation");
5968 			  printf(" are incomplete\n\n");
5969 			  goto end;
5970 			}
5971 		      }
5972 		      if (d_count == 0) {
5973 			if (ratio > ratio_max)
5974 			  ratio_max = ratio;
5975 
5976 			if (ratio != 0.0 && ratio < ratio_min)
5977 			  ratio_min = ratio;
5978 
5979 			tot_tests++;
5980 		      }
5981 		    }		/* incy */
5982 		  }		/* incx */
5983 		}		/* lda */
5984 	      }			/* trans */
5985 	    }			/* order */
5986 	  }			/* tests */
5987 	}			/* norm */
5988 
5989       }				/* beta */
5990     }				/* alpha */
5991   }				/* debug */
5992 
5993   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
5994     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
5995 	   fname, m, n, ntests, thresh);
5996     printf
5997       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
5998        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
5999        ratio_min, ratio_max);
6000   }
6001 
6002 end:
6003   FPU_FIX_STOP;
6004 
6005   blas_free(head_x);
6006   blas_free(tail_x);
6007   blas_free(y);
6008   blas_free(head_x_gen);
6009   blas_free(tail_x_gen);
6010   blas_free(y_gen);
6011   blas_free(temp);
6012   blas_free(A);
6013   blas_free(head_r_true);
6014   blas_free(tail_r_true);
6015   blas_free(ratios);
6016 
6017   *min_ratio = ratio_min;
6018   *num_bad_ratio = bad_ratios;
6019   *num_tests = tot_tests;
6020   return ratio_max;
6021 }
do_test_zgemv2_d_d(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)6022 double do_test_zgemv2_d_d(int m, int n, int ntests, int *seed, double thresh,
6023 			  int debug, float test_prob, double *min_ratio,
6024 			  int *num_bad_ratio, int *num_tests)
6025 
6026 /*
6027  * Purpose
6028  * =======
6029  *
6030  * Runs a series of tests on GEMV2.
6031  *
6032  * Arguments
6033  * =========
6034  *
6035  * m         (input) int
6036  *           The number of rows
6037  *
6038  * n         (input) int
6039  *           The number of columns
6040  *
6041  * ntests    (input) int
6042  *           The number of tests to run for each set of attributes.
6043  *
6044  * seed      (input/output) int
6045  *           The seed for the random number generator used in testgen().
6046  *
6047  * thresh    (input) double
6048  *           When the ratio returned from test() exceeds the specified
6049  *           threshold, the current size, r_true, r_comp, and ratio will be
6050  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
6051  *           to ~10.)
6052  *
6053  * debug     (input) int
6054  *           If debug=3, print summary
6055  *           If debug=2, print summary only if the number of bad ratios > 0
6056  *           If debug=1, print complete info if tests fail
6057  *           If debug=0, return max ratio
6058  *
6059  * test_prob (input) float
6060  *           The specified test will be performed only if the generated
6061  *           random exceeds this threshold.
6062  *
6063  * min_ratio (output) double
6064  *           The minimum ratio
6065  *
6066  * num_bad_ratio (output) int
6067  *               The number of tests fail; they are above the threshold.
6068  *
6069  * num_tests (output) int
6070  *           The number of tests is being performed.
6071  *
6072  * Return value
6073  * ============
6074  *
6075  * The maximum ratio if run successfully, otherwise return -1
6076  *
6077  * Code structure
6078  * ==============
6079  *
6080  *  debug loop  -- if debug is one, the first loop computes the max ratio
6081  *              -- and the last(second) loop outputs debugging information,
6082  *              -- if the test fail and its ratio > 0.5 * max ratio.
6083  *              -- if debug is zero, the loop is executed once
6084  *    alpha loop  -- varying alpha: 0, 1, or random
6085  *      beta loop   -- varying beta: 0, 1, or random
6086 
6087  *          norm loop   -- varying norm: near undeflow, near one, or
6088  *                        -- near overflow
6089  *            numtest loop  -- how many times the test is perform with
6090  *                            -- above set of attributes
6091  *              order loop   -- varying order type: rowmajor or colmajor
6092  *                trans loop    -- varying uplo type: upper or lower
6093  *                  lda loop      -- varying lda: m, m+1, 2m
6094  *                    incx loop     -- varying incx: -2, -1, 1, 2
6095  *                      incy loop     -- varying incy: -2, -1, 1, 2
6096  */
6097 {
6098   /* function name */
6099   const char fname[] = "BLAS_zgemv2_d_d";
6100 
6101   /* max number of debug lines to print */
6102   const int max_print = 8;
6103 
6104   /* Variables in the "x_val" form are loop vars for corresponding
6105      variables */
6106   int i;			/* iterate through the repeating tests */
6107   int j, k;			/* multipurpose counters or variables */
6108   int iy;			/* use to index y */
6109   int incx_val, incy_val,	/* for testing different inc values */
6110     incx, incy;
6111   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
6112   int d_count;			/* counter for debug */
6113   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
6114   int p_count;			/* counter for the number of debug lines printed */
6115   int tot_tests;		/* total number of tests to be done */
6116   int norm;			/* input values of near underflow/one/overflow */
6117   double ratio_max;		/* the current maximum ratio */
6118   double ratio_min;		/* the current minimum ratio */
6119   double *ratios;		/* a temporary variable for calculating ratio */
6120   double ratio;			/* the per-use test ratio from test() */
6121   int bad_ratios;		/* the number of ratios over the threshold */
6122   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
6123   double un_int;		/* the internal underflow threshold */
6124   double alpha[2];
6125   double beta[2];
6126   double *A;
6127   double *head_x;
6128   double *tail_x;
6129   double *y;
6130   double *temp;			/* use for calculating ratio */
6131 
6132   /* x_gen and y_gen are used to store vectors generated by testgen.
6133      they eventually are copied back to x and y */
6134   double *head_x_gen;
6135   double *tail_x_gen;
6136   double *y_gen;
6137 
6138   /* the true r calculated by testgen(), in double-double */
6139   double *head_r_true, *tail_r_true;
6140 
6141   int alpha_val;
6142   int alpha_flag;		/* input flag for BLAS_zgemv2_d_d_testgen */
6143   int beta_val;
6144   int beta_flag;		/* input flag for BLAS_zgemv2_d_d_testgen */
6145   int order_val;
6146   enum blas_order_type order_type;
6147 
6148   enum blas_prec_type prec;
6149   int trans_val;
6150   enum blas_trans_type trans_type;
6151   int m_i;
6152   int n_i;
6153   int max_mn;			/* the max of m and n */
6154   int lda_val;
6155   int lda;
6156   int saved_seed;		/* for saving the original seed */
6157   int count, old_count;		/* use for counting the number of testgen calls * 2 */
6158 
6159   FPU_FIX_DECL;
6160 
6161   /* test for bad arguments */
6162   if (n < 0 || m < 0 || ntests < 0)
6163     BLAS_error(fname, 0, 0, NULL);
6164 
6165   /* initialization */
6166   *num_bad_ratio = 0;
6167   *num_tests = 0;
6168   *min_ratio = 0.0;
6169 
6170   saved_seed = *seed;
6171   ratio_min = 1e308;
6172   ratio_max = 0.0;
6173   ratio = 0.0;
6174   tot_tests = 0;
6175   p_count = 0;
6176   count = 0;
6177   find_max_ratio = 0;
6178   bad_ratios = 0;
6179   old_count = 0;
6180 
6181   if (debug == 3)
6182     find_max_ratio = 1;
6183   max_mn = MAX(m, n);
6184   if (m == 0 || n == 0) {
6185     return 0.0;
6186   }
6187 
6188   FPU_FIX_START;
6189 
6190   incy_gen = 1;
6191   incy_gen *= 2;
6192 
6193   /* get space for calculation */
6194   head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
6195   if (max_mn * 2 > 0 && head_x == NULL) {
6196     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6197   }
6198   tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
6199   if (max_mn * 2 > 0 && tail_x == NULL) {
6200     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6201   }
6202   y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
6203   if (max_mn * 2 > 0 && y == NULL) {
6204     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6205   }
6206   head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
6207   if (max_mn > 0 && head_x_gen == NULL) {
6208     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6209   }
6210   tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
6211   if (max_mn > 0 && tail_x_gen == NULL) {
6212     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6213   }
6214   y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
6215   if (max_mn > 0 && y_gen == NULL) {
6216     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6217   }
6218   temp = (double *) blas_malloc(max_mn * sizeof(double));
6219   if (max_mn > 0 && temp == NULL) {
6220     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6221   }
6222   head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
6223   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
6224   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
6225     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6226   }
6227   ratios = (double *) blas_malloc(max_mn * sizeof(double));
6228   if (max_mn > 0 && ratios == NULL) {
6229     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6230   }
6231   A =
6232     (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
6233   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
6234     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6235   }
6236 
6237   /* The debug iteration:
6238      If debug=1, then will execute the iteration twice. First, compute the
6239      max ratio. Second, print info if ratio > (50% * ratio_max). */
6240   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
6241     bad_ratios = 0;		/* set to zero */
6242 
6243     if ((debug == 3) && (d_count == find_max_ratio))
6244       *seed = saved_seed;	/* restore the original seed */
6245 
6246     /* varying alpha */
6247     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
6248       alpha_flag = 0;
6249       switch (alpha_val) {
6250       case 0:
6251 	alpha[0] = alpha[1] = 0.0;
6252 	alpha_flag = 1;
6253 	break;
6254       case 1:
6255 	alpha[0] = 1.0;
6256 	alpha[1] = 0.0;
6257 	alpha_flag = 1;
6258 	break;
6259       }
6260 
6261       /* varying beta */
6262       for (beta_val = 0; beta_val < 3; beta_val++) {
6263 	beta_flag = 0;
6264 	switch (beta_val) {
6265 	case 0:
6266 	  beta[0] = beta[1] = 0.0;
6267 	  beta_flag = 1;
6268 	  break;
6269 	case 1:
6270 	  beta[0] = 1.0;
6271 	  beta[1] = 0.0;
6272 	  beta_flag = 1;
6273 	  break;
6274 	}
6275 
6276 
6277 	eps_int = power(2, -BITS_D);
6278 	un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
6279 		     (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
6280 	prec = blas_prec_double;
6281 
6282 	/* values near underflow, 1, or overflow */
6283 	for (norm = -1; norm <= 1; norm++) {
6284 
6285 	  /* number of tests */
6286 	  for (i = 0; i < ntests; i++) {
6287 
6288 	    /* row or col major */
6289 	    for (order_val = 0; order_val < 2; order_val++) {
6290 	      switch (order_val) {
6291 	      case 0:
6292 		order_type = blas_rowmajor;
6293 		break;
6294 	      case 1:
6295 	      default:
6296 		order_type = blas_colmajor;
6297 		break;
6298 	      }
6299 
6300 	      /* no_trans, trans, or conj_trans */
6301 	      for (trans_val = 0; trans_val < 3; trans_val++) {
6302 		switch (trans_val) {
6303 		case 0:
6304 		  trans_type = blas_no_trans;
6305 		  m_i = m;
6306 		  n_i = n;
6307 		  break;
6308 		case 1:
6309 		  trans_type = blas_trans;
6310 		  m_i = n;
6311 		  n_i = m;
6312 		  break;
6313 		case 2:
6314 		default:
6315 		  trans_type = blas_conj_trans;
6316 		  m_i = n;
6317 		  n_i = m;
6318 		  break;
6319 		}
6320 
6321 		/* lda=n, n+1, or 2n */
6322 		for (lda_val = 0; lda_val < 3; lda_val++) {
6323 		  switch (lda_val) {
6324 		  case 0:
6325 		    lda = m_i;
6326 		    break;
6327 		  case 1:
6328 		    lda = m_i + 1;
6329 		    break;
6330 		  case 2:
6331 		  default:
6332 		    lda = 2 * m_i;
6333 		    break;
6334 		  }
6335 		  if ((order_type == blas_rowmajor && lda < n) ||
6336 		      (order_type == blas_colmajor && lda < m))
6337 		    continue;
6338 
6339 		  /* For the sake of speed, we throw out this case at random */
6340 		  if (xrand(seed) >= test_prob)
6341 		    continue;
6342 
6343 		  /* in the trivial cases, no need to run testgen */
6344 		  if (m > 0 && n > 0)
6345 		    BLAS_zgemv2_d_d_testgen(norm, order_type, trans_type, m,
6346 					    n, &alpha, alpha_flag, A, lda,
6347 					    head_x_gen, tail_x_gen, &beta,
6348 					    beta_flag, y_gen, seed,
6349 					    head_r_true, tail_r_true);
6350 
6351 		  count++;
6352 
6353 		  /* varying incx */
6354 		  for (incx_val = -2; incx_val <= 2; incx_val++) {
6355 		    if (incx_val == 0)
6356 		      continue;
6357 
6358 		    /* setting incx */
6359 		    incx = incx_val;
6360 
6361 
6362 		    dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
6363 		    dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
6364 
6365 		    /* varying incy */
6366 		    for (incy_val = -2; incy_val <= 2; incy_val++) {
6367 		      if (incy_val == 0)
6368 			continue;
6369 
6370 		      /* setting incy */
6371 		      incy = incy_val;
6372 		      incy *= 2;
6373 
6374 		      zcopy_vector(y_gen, m_i, 1, y, incy_val);
6375 
6376 		      /* call BLAS_zgemv2_d_d */
6377 		      FPU_FIX_STOP;
6378 		      BLAS_zgemv2_d_d(order_type, trans_type, m, n, alpha, A,
6379 				      lda, head_x, tail_x, incx_val, beta, y,
6380 				      incy_val);
6381 		      FPU_FIX_START;
6382 
6383 		      /* set y starting index */
6384 		      iy = 0;
6385 		      if (incy < 0)
6386 			iy = -(m_i - 1) * incy;
6387 
6388 		      /* computing the ratio */
6389 		      if (m > 0 && n > 0)
6390 			for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
6391 			  /* copy row j of A to temp */
6392 			  dge_copy_row(order_type, trans_type, m_i, n_i, A,
6393 				       lda, temp, j);
6394 
6395 			  test_BLAS_zdot2_d_d(n_i, blas_no_conj, alpha, beta,
6396 					      &y_gen[k], &y[iy],
6397 					      &head_r_true[k],
6398 					      &tail_r_true[k], temp, 1,
6399 					      head_x, tail_x, incx_val,
6400 					      eps_int, un_int, &ratios[j]);
6401 
6402 			  /* take the max ratio */
6403 			  if (j == 0) {
6404 			    ratio = ratios[0];
6405 			    /* The !<= below causes NaN error to be detected.
6406 			       Note that (NaN > thresh) is always false. */
6407 			  } else if (!(ratios[j] <= ratio)) {
6408 			    ratio = ratios[j];
6409 			  }
6410 			  iy += incy;
6411 			}
6412 
6413 		      /* Increase the number of bad ratio, if the ratio
6414 		         is bigger than the threshold.
6415 		         The !<= below causes NaN error to be detected.
6416 		         Note that (NaN > thresh) is always false. */
6417 		      if (!(ratio <= thresh)) {
6418 			bad_ratios++;
6419 
6420 			if ((debug == 3) &&	/* print only when debug is on */
6421 			    (count != old_count) &&	/* print if old vector is different
6422 							   from the current one */
6423 			    (d_count == find_max_ratio) &&
6424 			    (p_count <= max_print) &&
6425 			    (ratio > 0.5 * ratio_max)) {
6426 			  old_count = count;
6427 
6428 			  printf
6429 			    ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
6430 			     fname, m, n, ntests, thresh);
6431 
6432 			  /* Print test info */
6433 			  switch (prec) {
6434 			  case blas_prec_single:
6435 			    printf("single ");
6436 			    break;
6437 			  case blas_prec_double:
6438 			    printf("double ");
6439 			    break;
6440 			  case blas_prec_indigenous:
6441 			    printf("indigenous ");
6442 			    break;
6443 			  case blas_prec_extra:
6444 			    printf("extra ");
6445 			    break;
6446 			  }
6447 			  switch (norm) {
6448 			  case -1:
6449 			    printf("near_underflow ");
6450 			    break;
6451 			  case 0:
6452 			    printf("near_one ");
6453 			    break;
6454 			  case 1:
6455 			    printf("near_overflow ");
6456 			    break;
6457 			  }
6458 			  switch (order_type) {
6459 			  case blas_rowmajor:
6460 			    printf("row_major ");
6461 			    break;
6462 			  case blas_colmajor:
6463 			    printf("col_major ");
6464 			    break;
6465 			  }
6466 			  switch (trans_type) {
6467 			  case blas_no_trans:
6468 			    printf("no_trans ");
6469 			    break;
6470 			  case blas_trans:
6471 			    printf("trans ");
6472 			    break;
6473 			  case blas_conj_trans:
6474 			    printf("conj_trans ");
6475 			    break;
6476 			  }
6477 
6478 			  printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
6479 				 incy);
6480 
6481 			  dge_print_matrix(A, m_i, n_i, lda, order_type, "A");
6482 
6483 			  dprint_vector(head_x, n_i, incx_val, "head_x");
6484 			  dprint_vector(tail_x, n_i, incx_val, "tail_x");
6485 			  zprint_vector(y_gen, m_i, 1, "y_gen");
6486 			  zprint_vector(y, m_i, incy_val, "y_final");
6487 
6488 			  printf("      ");
6489 			  printf("alpha = ");
6490 			  printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
6491 			  printf("\n      ");
6492 			  printf("beta = ");
6493 			  printf("(%24.16e, %24.16e)", beta[0], beta[1]);
6494 			  printf("\n");
6495 			  for (j = 0, k = 0; j < m_i * incy_gen;
6496 			       j += incy_gen, k++) {
6497 			    printf("      ");
6498 			    printf("([%24.16e  %24.16e], [%24.16e %24.16e])",
6499 				   head_r_true[j], tail_r_true[j],
6500 				   head_r_true[j + 1], tail_r_true[j + 1]);
6501 			    printf(", ratio[%d]=%.4e\n", k, ratios[k]);
6502 			  }
6503 
6504 			  printf("      ratio=%.4e\n", ratio);
6505 			  p_count++;
6506 			}
6507 			if (bad_ratios >= MAX_BAD_TESTS) {
6508 			  printf("\ntoo many failures, exiting....");
6509 			  printf("\nTesting and compilation");
6510 			  printf(" are incomplete\n\n");
6511 			  goto end;
6512 			}
6513 			if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
6514 			  printf("\nFlagrant ratio error, exiting...");
6515 			  printf("\nTesting and compilation");
6516 			  printf(" are incomplete\n\n");
6517 			  goto end;
6518 			}
6519 		      }
6520 		      if (d_count == 0) {
6521 			if (ratio > ratio_max)
6522 			  ratio_max = ratio;
6523 
6524 			if (ratio != 0.0 && ratio < ratio_min)
6525 			  ratio_min = ratio;
6526 
6527 			tot_tests++;
6528 		      }
6529 		    }		/* incy */
6530 		  }		/* incx */
6531 		}		/* lda */
6532 	      }			/* trans */
6533 	    }			/* order */
6534 	  }			/* tests */
6535 	}			/* norm */
6536 
6537       }				/* beta */
6538     }				/* alpha */
6539   }				/* debug */
6540 
6541   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
6542     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
6543 	   fname, m, n, ntests, thresh);
6544     printf
6545       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
6546        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
6547        ratio_min, ratio_max);
6548   }
6549 
6550 end:
6551   FPU_FIX_STOP;
6552 
6553   blas_free(head_x);
6554   blas_free(tail_x);
6555   blas_free(y);
6556   blas_free(head_x_gen);
6557   blas_free(tail_x_gen);
6558   blas_free(y_gen);
6559   blas_free(temp);
6560   blas_free(A);
6561   blas_free(head_r_true);
6562   blas_free(tail_r_true);
6563   blas_free(ratios);
6564 
6565   *min_ratio = ratio_min;
6566   *num_bad_ratio = bad_ratios;
6567   *num_tests = tot_tests;
6568   return ratio_max;
6569 }
do_test_sgemv2_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)6570 double do_test_sgemv2_x(int m, int n, int ntests, int *seed, double thresh,
6571 			int debug, float test_prob, double *min_ratio,
6572 			int *num_bad_ratio, int *num_tests)
6573 
6574 /*
6575  * Purpose
6576  * =======
6577  *
6578  * Runs a series of tests on GEMV2.
6579  *
6580  * Arguments
6581  * =========
6582  *
6583  * m         (input) int
6584  *           The number of rows
6585  *
6586  * n         (input) int
6587  *           The number of columns
6588  *
6589  * ntests    (input) int
6590  *           The number of tests to run for each set of attributes.
6591  *
6592  * seed      (input/output) int
6593  *           The seed for the random number generator used in testgen().
6594  *
6595  * thresh    (input) double
6596  *           When the ratio returned from test() exceeds the specified
6597  *           threshold, the current size, r_true, r_comp, and ratio will be
6598  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
6599  *           to ~10.)
6600  *
6601  * debug     (input) int
6602  *           If debug=3, print summary
6603  *           If debug=2, print summary only if the number of bad ratios > 0
6604  *           If debug=1, print complete info if tests fail
6605  *           If debug=0, return max ratio
6606  *
6607  * test_prob (input) float
6608  *           The specified test will be performed only if the generated
6609  *           random exceeds this threshold.
6610  *
6611  * min_ratio (output) double
6612  *           The minimum ratio
6613  *
6614  * num_bad_ratio (output) int
6615  *               The number of tests fail; they are above the threshold.
6616  *
6617  * num_tests (output) int
6618  *           The number of tests is being performed.
6619  *
6620  * Return value
6621  * ============
6622  *
6623  * The maximum ratio if run successfully, otherwise return -1
6624  *
6625  * Code structure
6626  * ==============
6627  *
6628  *  debug loop  -- if debug is one, the first loop computes the max ratio
6629  *              -- and the last(second) loop outputs debugging information,
6630  *              -- if the test fail and its ratio > 0.5 * max ratio.
6631  *              -- if debug is zero, the loop is executed once
6632  *    alpha loop  -- varying alpha: 0, 1, or random
6633  *      beta loop   -- varying beta: 0, 1, or random
6634  *        prec loop   -- varying internal prec: single, double, or extra
6635  *          norm loop   -- varying norm: near undeflow, near one, or
6636  *                        -- near overflow
6637  *            numtest loop  -- how many times the test is perform with
6638  *                            -- above set of attributes
6639  *              order loop   -- varying order type: rowmajor or colmajor
6640  *                trans loop    -- varying uplo type: upper or lower
6641  *                  lda loop      -- varying lda: m, m+1, 2m
6642  *                    incx loop     -- varying incx: -2, -1, 1, 2
6643  *                      incy loop     -- varying incy: -2, -1, 1, 2
6644  */
6645 {
6646   /* function name */
6647   const char fname[] = "BLAS_sgemv2_x";
6648 
6649   /* max number of debug lines to print */
6650   const int max_print = 8;
6651 
6652   /* Variables in the "x_val" form are loop vars for corresponding
6653      variables */
6654   int i;			/* iterate through the repeating tests */
6655   int j, k;			/* multipurpose counters or variables */
6656   int iy;			/* use to index y */
6657   int incx_val, incy_val,	/* for testing different inc values */
6658     incx, incy;
6659   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
6660   int d_count;			/* counter for debug */
6661   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
6662   int p_count;			/* counter for the number of debug lines printed */
6663   int tot_tests;		/* total number of tests to be done */
6664   int norm;			/* input values of near underflow/one/overflow */
6665   double ratio_max;		/* the current maximum ratio */
6666   double ratio_min;		/* the current minimum ratio */
6667   double *ratios;		/* a temporary variable for calculating ratio */
6668   double ratio;			/* the per-use test ratio from test() */
6669   int bad_ratios;		/* the number of ratios over the threshold */
6670   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
6671   double un_int;		/* the internal underflow threshold */
6672   float alpha;
6673   float beta;
6674   float *A;
6675   float *head_x;
6676   float *tail_x;
6677   float *y;
6678   float *temp;			/* use for calculating ratio */
6679 
6680   /* x_gen and y_gen are used to store vectors generated by testgen.
6681      they eventually are copied back to x and y */
6682   float *head_x_gen;
6683   float *tail_x_gen;
6684   float *y_gen;
6685 
6686   /* the true r calculated by testgen(), in double-double */
6687   double *head_r_true, *tail_r_true;
6688   int alpha_val;
6689   int alpha_flag;		/* input flag for BLAS_sgemv2_testgen */
6690   int beta_val;
6691   int beta_flag;		/* input flag for BLAS_sgemv2_testgen */
6692   int order_val;
6693   enum blas_order_type order_type;
6694   int prec_val;
6695   enum blas_prec_type prec;
6696   int trans_val;
6697   enum blas_trans_type trans_type;
6698   int m_i;
6699   int n_i;
6700   int max_mn;			/* the max of m and n */
6701   int lda_val;
6702   int lda;
6703   int saved_seed;		/* for saving the original seed */
6704   int count, old_count;		/* use for counting the number of testgen calls * 2 */
6705 
6706   FPU_FIX_DECL;
6707 
6708   /* test for bad arguments */
6709   if (n < 0 || m < 0 || ntests < 0)
6710     BLAS_error(fname, 0, 0, NULL);
6711 
6712   /* initialization */
6713   *num_bad_ratio = 0;
6714   *num_tests = 0;
6715   *min_ratio = 0.0;
6716 
6717   saved_seed = *seed;
6718   ratio_min = 1e308;
6719   ratio_max = 0.0;
6720   ratio = 0.0;
6721   tot_tests = 0;
6722   p_count = 0;
6723   count = 0;
6724   find_max_ratio = 0;
6725   bad_ratios = 0;
6726   old_count = 0;
6727 
6728   if (debug == 3)
6729     find_max_ratio = 1;
6730   max_mn = MAX(m, n);
6731   if (m == 0 || n == 0) {
6732     return 0.0;
6733   }
6734 
6735   FPU_FIX_START;
6736 
6737   incy_gen = 1;
6738 
6739 
6740   /* get space for calculation */
6741   head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
6742   if (max_mn * 2 > 0 && head_x == NULL) {
6743     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6744   }
6745   tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
6746   if (max_mn * 2 > 0 && tail_x == NULL) {
6747     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6748   }
6749   y = (float *) blas_malloc(max_mn * 2 * sizeof(float));
6750   if (max_mn * 2 > 0 && y == NULL) {
6751     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6752   }
6753   head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
6754   if (max_mn > 0 && head_x_gen == NULL) {
6755     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6756   }
6757   tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
6758   if (max_mn > 0 && tail_x_gen == NULL) {
6759     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6760   }
6761   y_gen = (float *) blas_malloc(max_mn * sizeof(float));
6762   if (max_mn > 0 && y_gen == NULL) {
6763     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6764   }
6765   temp = (float *) blas_malloc(max_mn * sizeof(float));
6766   if (max_mn > 0 && temp == NULL) {
6767     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6768   }
6769   head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
6770   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
6771   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
6772     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6773   }
6774   ratios = (double *) blas_malloc(max_mn * sizeof(double));
6775   if (max_mn > 0 && ratios == NULL) {
6776     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6777   }
6778   A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
6779   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
6780     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
6781   }
6782 
6783   /* The debug iteration:
6784      If debug=1, then will execute the iteration twice. First, compute the
6785      max ratio. Second, print info if ratio > (50% * ratio_max). */
6786   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
6787     bad_ratios = 0;		/* set to zero */
6788 
6789     if ((debug == 3) && (d_count == find_max_ratio))
6790       *seed = saved_seed;	/* restore the original seed */
6791 
6792     /* varying alpha */
6793     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
6794       alpha_flag = 0;
6795       switch (alpha_val) {
6796       case 0:
6797 	alpha = 0.0;
6798 	alpha_flag = 1;
6799 	break;
6800       case 1:
6801 	alpha = 1.0;
6802 	alpha_flag = 1;
6803 	break;
6804       }
6805 
6806       /* varying beta */
6807       for (beta_val = 0; beta_val < 3; beta_val++) {
6808 	beta_flag = 0;
6809 	switch (beta_val) {
6810 	case 0:
6811 	  beta = 0.0;
6812 	  beta_flag = 1;
6813 	  break;
6814 	case 1:
6815 	  beta = 1.0;
6816 	  beta_flag = 1;
6817 	  break;
6818 	}
6819 
6820 
6821 	/* varying extra precs */
6822 	for (prec_val = 0; prec_val <= 2; prec_val++) {
6823 	  switch (prec_val) {
6824 	  case 0:
6825 	    eps_int = power(2, -BITS_S);
6826 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
6827 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
6828 	    prec = blas_prec_single;
6829 	    break;
6830 	  case 1:
6831 	    eps_int = power(2, -BITS_D);
6832 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
6833 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
6834 	    prec = blas_prec_double;
6835 	    break;
6836 	  case 2:
6837 	  default:
6838 	    eps_int = power(2, -BITS_E);
6839 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
6840 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
6841 	    prec = blas_prec_extra;
6842 	    break;
6843 	  }
6844 
6845 	  /* values near underflow, 1, or overflow */
6846 	  for (norm = -1; norm <= 1; norm++) {
6847 
6848 	    /* number of tests */
6849 	    for (i = 0; i < ntests; i++) {
6850 
6851 	      /* row or col major */
6852 	      for (order_val = 0; order_val < 2; order_val++) {
6853 		switch (order_val) {
6854 		case 0:
6855 		  order_type = blas_rowmajor;
6856 		  break;
6857 		case 1:
6858 		default:
6859 		  order_type = blas_colmajor;
6860 		  break;
6861 		}
6862 
6863 		/* no_trans, trans, or conj_trans */
6864 		for (trans_val = 0; trans_val < 3; trans_val++) {
6865 		  switch (trans_val) {
6866 		  case 0:
6867 		    trans_type = blas_no_trans;
6868 		    m_i = m;
6869 		    n_i = n;
6870 		    break;
6871 		  case 1:
6872 		    trans_type = blas_trans;
6873 		    m_i = n;
6874 		    n_i = m;
6875 		    break;
6876 		  case 2:
6877 		  default:
6878 		    trans_type = blas_conj_trans;
6879 		    m_i = n;
6880 		    n_i = m;
6881 		    break;
6882 		  }
6883 
6884 		  /* lda=n, n+1, or 2n */
6885 		  for (lda_val = 0; lda_val < 3; lda_val++) {
6886 		    switch (lda_val) {
6887 		    case 0:
6888 		      lda = m_i;
6889 		      break;
6890 		    case 1:
6891 		      lda = m_i + 1;
6892 		      break;
6893 		    case 2:
6894 		    default:
6895 		      lda = 2 * m_i;
6896 		      break;
6897 		    }
6898 		    if ((order_type == blas_rowmajor && lda < n) ||
6899 			(order_type == blas_colmajor && lda < m))
6900 		      continue;
6901 
6902 		    /* For the sake of speed, we throw out this case at random */
6903 		    if (xrand(seed) >= test_prob)
6904 		      continue;
6905 
6906 		    /* in the trivial cases, no need to run testgen */
6907 		    if (m > 0 && n > 0)
6908 		      BLAS_sgemv2_testgen(norm, order_type, trans_type, m, n,
6909 					  &alpha, alpha_flag, A, lda,
6910 					  head_x_gen, tail_x_gen, &beta,
6911 					  beta_flag, y_gen, seed, head_r_true,
6912 					  tail_r_true);
6913 
6914 		    count++;
6915 
6916 		    /* varying incx */
6917 		    for (incx_val = -2; incx_val <= 2; incx_val++) {
6918 		      if (incx_val == 0)
6919 			continue;
6920 
6921 		      /* setting incx */
6922 		      incx = incx_val;
6923 
6924 
6925 		      scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
6926 		      scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
6927 
6928 		      /* varying incy */
6929 		      for (incy_val = -2; incy_val <= 2; incy_val++) {
6930 			if (incy_val == 0)
6931 			  continue;
6932 
6933 			/* setting incy */
6934 			incy = incy_val;
6935 
6936 
6937 			scopy_vector(y_gen, m_i, 1, y, incy_val);
6938 
6939 			/* call BLAS_sgemv2_x */
6940 			FPU_FIX_STOP;
6941 			BLAS_sgemv2_x(order_type, trans_type, m, n, alpha, A,
6942 				      lda, head_x, tail_x, incx_val, beta, y,
6943 				      incy_val, prec);
6944 			FPU_FIX_START;
6945 
6946 			/* set y starting index */
6947 			iy = 0;
6948 			if (incy < 0)
6949 			  iy = -(m_i - 1) * incy;
6950 
6951 			/* computing the ratio */
6952 			if (m > 0 && n > 0)
6953 			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
6954 			    /* copy row j of A to temp */
6955 			    sge_copy_row(order_type, trans_type, m_i, n_i, A,
6956 					 lda, temp, j);
6957 
6958 			    test_BLAS_sdot2(n_i, blas_no_conj, alpha, beta,
6959 					    y_gen[k], y[iy], head_r_true[k],
6960 					    tail_r_true[k], temp, 1, head_x,
6961 					    tail_x, incx_val, eps_int, un_int,
6962 					    &ratios[j]);
6963 
6964 			    /* take the max ratio */
6965 			    if (j == 0) {
6966 			      ratio = ratios[0];
6967 			      /* The !<= below causes NaN error to be detected.
6968 			         Note that (NaN > thresh) is always false. */
6969 			    } else if (!(ratios[j] <= ratio)) {
6970 			      ratio = ratios[j];
6971 			    }
6972 			    iy += incy;
6973 			  }
6974 
6975 			/* Increase the number of bad ratio, if the ratio
6976 			   is bigger than the threshold.
6977 			   The !<= below causes NaN error to be detected.
6978 			   Note that (NaN > thresh) is always false. */
6979 			if (!(ratio <= thresh)) {
6980 			  bad_ratios++;
6981 
6982 			  if ((debug == 3) &&	/* print only when debug is on */
6983 			      (count != old_count) &&	/* print if old vector is different
6984 							   from the current one */
6985 			      (d_count == find_max_ratio) &&
6986 			      (p_count <= max_print) &&
6987 			      (ratio > 0.5 * ratio_max)) {
6988 			    old_count = count;
6989 
6990 			    printf
6991 			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
6992 			       fname, m, n, ntests, thresh);
6993 
6994 			    /* Print test info */
6995 			    switch (prec) {
6996 			    case blas_prec_single:
6997 			      printf("single ");
6998 			      break;
6999 			    case blas_prec_double:
7000 			      printf("double ");
7001 			      break;
7002 			    case blas_prec_indigenous:
7003 			      printf("indigenous ");
7004 			      break;
7005 			    case blas_prec_extra:
7006 			      printf("extra ");
7007 			      break;
7008 			    }
7009 			    switch (norm) {
7010 			    case -1:
7011 			      printf("near_underflow ");
7012 			      break;
7013 			    case 0:
7014 			      printf("near_one ");
7015 			      break;
7016 			    case 1:
7017 			      printf("near_overflow ");
7018 			      break;
7019 			    }
7020 			    switch (order_type) {
7021 			    case blas_rowmajor:
7022 			      printf("row_major ");
7023 			      break;
7024 			    case blas_colmajor:
7025 			      printf("col_major ");
7026 			      break;
7027 			    }
7028 			    switch (trans_type) {
7029 			    case blas_no_trans:
7030 			      printf("no_trans ");
7031 			      break;
7032 			    case blas_trans:
7033 			      printf("trans ");
7034 			      break;
7035 			    case blas_conj_trans:
7036 			      printf("conj_trans ");
7037 			      break;
7038 			    }
7039 
7040 			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
7041 				   incy);
7042 
7043 			    sge_print_matrix(A, m_i, n_i, lda, order_type,
7044 					     "A");
7045 
7046 			    sprint_vector(head_x, n_i, incx_val, "head_x");
7047 			    sprint_vector(tail_x, n_i, incx_val, "tail_x");
7048 			    sprint_vector(y_gen, m_i, 1, "y_gen");
7049 			    sprint_vector(y, m_i, incy_val, "y_final");
7050 
7051 			    printf("      ");
7052 			    printf("alpha = ");
7053 			    printf("%16.8e", alpha);
7054 			    printf("\n      ");
7055 			    printf("beta = ");
7056 			    printf("%16.8e", beta);
7057 			    printf("\n");
7058 			    for (j = 0, k = 0; j < m_i * incy_gen;
7059 				 j += incy_gen, k++) {
7060 			      printf("      ");
7061 			      printf("[%24.16e, %24.16e]", head_r_true[j],
7062 				     tail_r_true[j]);
7063 			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
7064 			    }
7065 
7066 			    printf("      ratio=%.4e\n", ratio);
7067 			    p_count++;
7068 			  }
7069 			  if (bad_ratios >= MAX_BAD_TESTS) {
7070 			    printf("\ntoo many failures, exiting....");
7071 			    printf("\nTesting and compilation");
7072 			    printf(" are incomplete\n\n");
7073 			    goto end;
7074 			  }
7075 			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
7076 			    printf("\nFlagrant ratio error, exiting...");
7077 			    printf("\nTesting and compilation");
7078 			    printf(" are incomplete\n\n");
7079 			    goto end;
7080 			  }
7081 			}
7082 			if (d_count == 0) {
7083 			  if (ratio > ratio_max)
7084 			    ratio_max = ratio;
7085 
7086 			  if (ratio != 0.0 && ratio < ratio_min)
7087 			    ratio_min = ratio;
7088 
7089 			  tot_tests++;
7090 			}
7091 		      }		/* incy */
7092 		    }		/* incx */
7093 		  }		/* lda */
7094 		}		/* trans */
7095 	      }			/* order */
7096 	    }			/* tests */
7097 	  }			/* norm */
7098 	}			/* prec */
7099       }				/* beta */
7100     }				/* alpha */
7101   }				/* debug */
7102 
7103   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
7104     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
7105 	   fname, m, n, ntests, thresh);
7106     printf
7107       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
7108        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
7109        ratio_min, ratio_max);
7110   }
7111 
7112 end:
7113   FPU_FIX_STOP;
7114 
7115   blas_free(head_x);
7116   blas_free(tail_x);
7117   blas_free(y);
7118   blas_free(head_x_gen);
7119   blas_free(tail_x_gen);
7120   blas_free(y_gen);
7121   blas_free(temp);
7122   blas_free(A);
7123   blas_free(head_r_true);
7124   blas_free(tail_r_true);
7125   blas_free(ratios);
7126 
7127   *min_ratio = ratio_min;
7128   *num_bad_ratio = bad_ratios;
7129   *num_tests = tot_tests;
7130   return ratio_max;
7131 }
do_test_dgemv2_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)7132 double do_test_dgemv2_x(int m, int n, int ntests, int *seed, double thresh,
7133 			int debug, float test_prob, double *min_ratio,
7134 			int *num_bad_ratio, int *num_tests)
7135 
7136 /*
7137  * Purpose
7138  * =======
7139  *
7140  * Runs a series of tests on GEMV2.
7141  *
7142  * Arguments
7143  * =========
7144  *
7145  * m         (input) int
7146  *           The number of rows
7147  *
7148  * n         (input) int
7149  *           The number of columns
7150  *
7151  * ntests    (input) int
7152  *           The number of tests to run for each set of attributes.
7153  *
7154  * seed      (input/output) int
7155  *           The seed for the random number generator used in testgen().
7156  *
7157  * thresh    (input) double
7158  *           When the ratio returned from test() exceeds the specified
7159  *           threshold, the current size, r_true, r_comp, and ratio will be
7160  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
7161  *           to ~10.)
7162  *
7163  * debug     (input) int
7164  *           If debug=3, print summary
7165  *           If debug=2, print summary only if the number of bad ratios > 0
7166  *           If debug=1, print complete info if tests fail
7167  *           If debug=0, return max ratio
7168  *
7169  * test_prob (input) float
7170  *           The specified test will be performed only if the generated
7171  *           random exceeds this threshold.
7172  *
7173  * min_ratio (output) double
7174  *           The minimum ratio
7175  *
7176  * num_bad_ratio (output) int
7177  *               The number of tests fail; they are above the threshold.
7178  *
7179  * num_tests (output) int
7180  *           The number of tests is being performed.
7181  *
7182  * Return value
7183  * ============
7184  *
7185  * The maximum ratio if run successfully, otherwise return -1
7186  *
7187  * Code structure
7188  * ==============
7189  *
7190  *  debug loop  -- if debug is one, the first loop computes the max ratio
7191  *              -- and the last(second) loop outputs debugging information,
7192  *              -- if the test fail and its ratio > 0.5 * max ratio.
7193  *              -- if debug is zero, the loop is executed once
7194  *    alpha loop  -- varying alpha: 0, 1, or random
7195  *      beta loop   -- varying beta: 0, 1, or random
7196  *        prec loop   -- varying internal prec: single, double, or extra
7197  *          norm loop   -- varying norm: near undeflow, near one, or
7198  *                        -- near overflow
7199  *            numtest loop  -- how many times the test is perform with
7200  *                            -- above set of attributes
7201  *              order loop   -- varying order type: rowmajor or colmajor
7202  *                trans loop    -- varying uplo type: upper or lower
7203  *                  lda loop      -- varying lda: m, m+1, 2m
7204  *                    incx loop     -- varying incx: -2, -1, 1, 2
7205  *                      incy loop     -- varying incy: -2, -1, 1, 2
7206  */
7207 {
7208   /* function name */
7209   const char fname[] = "BLAS_dgemv2_x";
7210 
7211   /* max number of debug lines to print */
7212   const int max_print = 8;
7213 
7214   /* Variables in the "x_val" form are loop vars for corresponding
7215      variables */
7216   int i;			/* iterate through the repeating tests */
7217   int j, k;			/* multipurpose counters or variables */
7218   int iy;			/* use to index y */
7219   int incx_val, incy_val,	/* for testing different inc values */
7220     incx, incy;
7221   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
7222   int d_count;			/* counter for debug */
7223   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
7224   int p_count;			/* counter for the number of debug lines printed */
7225   int tot_tests;		/* total number of tests to be done */
7226   int norm;			/* input values of near underflow/one/overflow */
7227   double ratio_max;		/* the current maximum ratio */
7228   double ratio_min;		/* the current minimum ratio */
7229   double *ratios;		/* a temporary variable for calculating ratio */
7230   double ratio;			/* the per-use test ratio from test() */
7231   int bad_ratios;		/* the number of ratios over the threshold */
7232   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
7233   double un_int;		/* the internal underflow threshold */
7234   double alpha;
7235   double beta;
7236   double *A;
7237   double *head_x;
7238   double *tail_x;
7239   double *y;
7240   double *temp;			/* use for calculating ratio */
7241 
7242   /* x_gen and y_gen are used to store vectors generated by testgen.
7243      they eventually are copied back to x and y */
7244   double *head_x_gen;
7245   double *tail_x_gen;
7246   double *y_gen;
7247 
7248   /* the true r calculated by testgen(), in double-double */
7249   double *head_r_true, *tail_r_true;
7250   int alpha_val;
7251   int alpha_flag;		/* input flag for BLAS_dgemv2_testgen */
7252   int beta_val;
7253   int beta_flag;		/* input flag for BLAS_dgemv2_testgen */
7254   int order_val;
7255   enum blas_order_type order_type;
7256   int prec_val;
7257   enum blas_prec_type prec;
7258   int trans_val;
7259   enum blas_trans_type trans_type;
7260   int m_i;
7261   int n_i;
7262   int max_mn;			/* the max of m and n */
7263   int lda_val;
7264   int lda;
7265   int saved_seed;		/* for saving the original seed */
7266   int count, old_count;		/* use for counting the number of testgen calls * 2 */
7267 
7268   FPU_FIX_DECL;
7269 
7270   /* test for bad arguments */
7271   if (n < 0 || m < 0 || ntests < 0)
7272     BLAS_error(fname, 0, 0, NULL);
7273 
7274   /* initialization */
7275   *num_bad_ratio = 0;
7276   *num_tests = 0;
7277   *min_ratio = 0.0;
7278 
7279   saved_seed = *seed;
7280   ratio_min = 1e308;
7281   ratio_max = 0.0;
7282   ratio = 0.0;
7283   tot_tests = 0;
7284   p_count = 0;
7285   count = 0;
7286   find_max_ratio = 0;
7287   bad_ratios = 0;
7288   old_count = 0;
7289 
7290   if (debug == 3)
7291     find_max_ratio = 1;
7292   max_mn = MAX(m, n);
7293   if (m == 0 || n == 0) {
7294     return 0.0;
7295   }
7296 
7297   FPU_FIX_START;
7298 
7299   incy_gen = 1;
7300 
7301 
7302   /* get space for calculation */
7303   head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
7304   if (max_mn * 2 > 0 && head_x == NULL) {
7305     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7306   }
7307   tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
7308   if (max_mn * 2 > 0 && tail_x == NULL) {
7309     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7310   }
7311   y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
7312   if (max_mn * 2 > 0 && y == NULL) {
7313     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7314   }
7315   head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
7316   if (max_mn > 0 && head_x_gen == NULL) {
7317     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7318   }
7319   tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
7320   if (max_mn > 0 && tail_x_gen == NULL) {
7321     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7322   }
7323   y_gen = (double *) blas_malloc(max_mn * sizeof(double));
7324   if (max_mn > 0 && y_gen == NULL) {
7325     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7326   }
7327   temp = (double *) blas_malloc(max_mn * sizeof(double));
7328   if (max_mn > 0 && temp == NULL) {
7329     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7330   }
7331   head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
7332   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
7333   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
7334     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7335   }
7336   ratios = (double *) blas_malloc(max_mn * sizeof(double));
7337   if (max_mn > 0 && ratios == NULL) {
7338     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7339   }
7340   A =
7341     (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
7342   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
7343     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7344   }
7345 
7346   /* The debug iteration:
7347      If debug=1, then will execute the iteration twice. First, compute the
7348      max ratio. Second, print info if ratio > (50% * ratio_max). */
7349   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
7350     bad_ratios = 0;		/* set to zero */
7351 
7352     if ((debug == 3) && (d_count == find_max_ratio))
7353       *seed = saved_seed;	/* restore the original seed */
7354 
7355     /* varying alpha */
7356     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
7357       alpha_flag = 0;
7358       switch (alpha_val) {
7359       case 0:
7360 	alpha = 0.0;
7361 	alpha_flag = 1;
7362 	break;
7363       case 1:
7364 	alpha = 1.0;
7365 	alpha_flag = 1;
7366 	break;
7367       }
7368 
7369       /* varying beta */
7370       for (beta_val = 0; beta_val < 3; beta_val++) {
7371 	beta_flag = 0;
7372 	switch (beta_val) {
7373 	case 0:
7374 	  beta = 0.0;
7375 	  beta_flag = 1;
7376 	  break;
7377 	case 1:
7378 	  beta = 1.0;
7379 	  beta_flag = 1;
7380 	  break;
7381 	}
7382 
7383 
7384 	/* varying extra precs */
7385 	for (prec_val = 0; prec_val <= 2; prec_val++) {
7386 	  switch (prec_val) {
7387 	  case 0:
7388 	    eps_int = power(2, -BITS_D);
7389 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
7390 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
7391 	    prec = blas_prec_double;
7392 	    break;
7393 	  case 1:
7394 	    eps_int = power(2, -BITS_D);
7395 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
7396 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
7397 	    prec = blas_prec_double;
7398 	    break;
7399 	  case 2:
7400 	  default:
7401 	    eps_int = power(2, -BITS_E);
7402 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
7403 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
7404 	    prec = blas_prec_extra;
7405 	    break;
7406 	  }
7407 
7408 	  /* values near underflow, 1, or overflow */
7409 	  for (norm = -1; norm <= 1; norm++) {
7410 
7411 	    /* number of tests */
7412 	    for (i = 0; i < ntests; i++) {
7413 
7414 	      /* row or col major */
7415 	      for (order_val = 0; order_val < 2; order_val++) {
7416 		switch (order_val) {
7417 		case 0:
7418 		  order_type = blas_rowmajor;
7419 		  break;
7420 		case 1:
7421 		default:
7422 		  order_type = blas_colmajor;
7423 		  break;
7424 		}
7425 
7426 		/* no_trans, trans, or conj_trans */
7427 		for (trans_val = 0; trans_val < 3; trans_val++) {
7428 		  switch (trans_val) {
7429 		  case 0:
7430 		    trans_type = blas_no_trans;
7431 		    m_i = m;
7432 		    n_i = n;
7433 		    break;
7434 		  case 1:
7435 		    trans_type = blas_trans;
7436 		    m_i = n;
7437 		    n_i = m;
7438 		    break;
7439 		  case 2:
7440 		  default:
7441 		    trans_type = blas_conj_trans;
7442 		    m_i = n;
7443 		    n_i = m;
7444 		    break;
7445 		  }
7446 
7447 		  /* lda=n, n+1, or 2n */
7448 		  for (lda_val = 0; lda_val < 3; lda_val++) {
7449 		    switch (lda_val) {
7450 		    case 0:
7451 		      lda = m_i;
7452 		      break;
7453 		    case 1:
7454 		      lda = m_i + 1;
7455 		      break;
7456 		    case 2:
7457 		    default:
7458 		      lda = 2 * m_i;
7459 		      break;
7460 		    }
7461 		    if ((order_type == blas_rowmajor && lda < n) ||
7462 			(order_type == blas_colmajor && lda < m))
7463 		      continue;
7464 
7465 		    /* For the sake of speed, we throw out this case at random */
7466 		    if (xrand(seed) >= test_prob)
7467 		      continue;
7468 
7469 		    /* in the trivial cases, no need to run testgen */
7470 		    if (m > 0 && n > 0)
7471 		      BLAS_dgemv2_testgen(norm, order_type, trans_type, m, n,
7472 					  &alpha, alpha_flag, A, lda,
7473 					  head_x_gen, tail_x_gen, &beta,
7474 					  beta_flag, y_gen, seed, head_r_true,
7475 					  tail_r_true);
7476 
7477 		    count++;
7478 
7479 		    /* varying incx */
7480 		    for (incx_val = -2; incx_val <= 2; incx_val++) {
7481 		      if (incx_val == 0)
7482 			continue;
7483 
7484 		      /* setting incx */
7485 		      incx = incx_val;
7486 
7487 
7488 		      dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
7489 		      dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
7490 
7491 		      /* varying incy */
7492 		      for (incy_val = -2; incy_val <= 2; incy_val++) {
7493 			if (incy_val == 0)
7494 			  continue;
7495 
7496 			/* setting incy */
7497 			incy = incy_val;
7498 
7499 
7500 			dcopy_vector(y_gen, m_i, 1, y, incy_val);
7501 
7502 			/* call BLAS_dgemv2_x */
7503 			FPU_FIX_STOP;
7504 			BLAS_dgemv2_x(order_type, trans_type, m, n, alpha, A,
7505 				      lda, head_x, tail_x, incx_val, beta, y,
7506 				      incy_val, prec);
7507 			FPU_FIX_START;
7508 
7509 			/* set y starting index */
7510 			iy = 0;
7511 			if (incy < 0)
7512 			  iy = -(m_i - 1) * incy;
7513 
7514 			/* computing the ratio */
7515 			if (m > 0 && n > 0)
7516 			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
7517 			    /* copy row j of A to temp */
7518 			    dge_copy_row(order_type, trans_type, m_i, n_i, A,
7519 					 lda, temp, j);
7520 
7521 			    test_BLAS_ddot2(n_i, blas_no_conj, alpha, beta,
7522 					    y_gen[k], y[iy], head_r_true[k],
7523 					    tail_r_true[k], temp, 1, head_x,
7524 					    tail_x, incx_val, eps_int, un_int,
7525 					    &ratios[j]);
7526 
7527 			    /* take the max ratio */
7528 			    if (j == 0) {
7529 			      ratio = ratios[0];
7530 			      /* The !<= below causes NaN error to be detected.
7531 			         Note that (NaN > thresh) is always false. */
7532 			    } else if (!(ratios[j] <= ratio)) {
7533 			      ratio = ratios[j];
7534 			    }
7535 			    iy += incy;
7536 			  }
7537 
7538 			/* Increase the number of bad ratio, if the ratio
7539 			   is bigger than the threshold.
7540 			   The !<= below causes NaN error to be detected.
7541 			   Note that (NaN > thresh) is always false. */
7542 			if (!(ratio <= thresh)) {
7543 			  bad_ratios++;
7544 
7545 			  if ((debug == 3) &&	/* print only when debug is on */
7546 			      (count != old_count) &&	/* print if old vector is different
7547 							   from the current one */
7548 			      (d_count == find_max_ratio) &&
7549 			      (p_count <= max_print) &&
7550 			      (ratio > 0.5 * ratio_max)) {
7551 			    old_count = count;
7552 
7553 			    printf
7554 			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
7555 			       fname, m, n, ntests, thresh);
7556 
7557 			    /* Print test info */
7558 			    switch (prec) {
7559 			    case blas_prec_single:
7560 			      printf("single ");
7561 			      break;
7562 			    case blas_prec_double:
7563 			      printf("double ");
7564 			      break;
7565 			    case blas_prec_indigenous:
7566 			      printf("indigenous ");
7567 			      break;
7568 			    case blas_prec_extra:
7569 			      printf("extra ");
7570 			      break;
7571 			    }
7572 			    switch (norm) {
7573 			    case -1:
7574 			      printf("near_underflow ");
7575 			      break;
7576 			    case 0:
7577 			      printf("near_one ");
7578 			      break;
7579 			    case 1:
7580 			      printf("near_overflow ");
7581 			      break;
7582 			    }
7583 			    switch (order_type) {
7584 			    case blas_rowmajor:
7585 			      printf("row_major ");
7586 			      break;
7587 			    case blas_colmajor:
7588 			      printf("col_major ");
7589 			      break;
7590 			    }
7591 			    switch (trans_type) {
7592 			    case blas_no_trans:
7593 			      printf("no_trans ");
7594 			      break;
7595 			    case blas_trans:
7596 			      printf("trans ");
7597 			      break;
7598 			    case blas_conj_trans:
7599 			      printf("conj_trans ");
7600 			      break;
7601 			    }
7602 
7603 			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
7604 				   incy);
7605 
7606 			    dge_print_matrix(A, m_i, n_i, lda, order_type,
7607 					     "A");
7608 
7609 			    dprint_vector(head_x, n_i, incx_val, "head_x");
7610 			    dprint_vector(tail_x, n_i, incx_val, "tail_x");
7611 			    dprint_vector(y_gen, m_i, 1, "y_gen");
7612 			    dprint_vector(y, m_i, incy_val, "y_final");
7613 
7614 			    printf("      ");
7615 			    printf("alpha = ");
7616 			    printf("%24.16e", alpha);
7617 			    printf("\n      ");
7618 			    printf("beta = ");
7619 			    printf("%24.16e", beta);
7620 			    printf("\n");
7621 			    for (j = 0, k = 0; j < m_i * incy_gen;
7622 				 j += incy_gen, k++) {
7623 			      printf("      ");
7624 			      printf("[%24.16e, %24.16e]", head_r_true[j],
7625 				     tail_r_true[j]);
7626 			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
7627 			    }
7628 
7629 			    printf("      ratio=%.4e\n", ratio);
7630 			    p_count++;
7631 			  }
7632 			  if (bad_ratios >= MAX_BAD_TESTS) {
7633 			    printf("\ntoo many failures, exiting....");
7634 			    printf("\nTesting and compilation");
7635 			    printf(" are incomplete\n\n");
7636 			    goto end;
7637 			  }
7638 			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
7639 			    printf("\nFlagrant ratio error, exiting...");
7640 			    printf("\nTesting and compilation");
7641 			    printf(" are incomplete\n\n");
7642 			    goto end;
7643 			  }
7644 			}
7645 			if (d_count == 0) {
7646 			  if (ratio > ratio_max)
7647 			    ratio_max = ratio;
7648 
7649 			  if (ratio != 0.0 && ratio < ratio_min)
7650 			    ratio_min = ratio;
7651 
7652 			  tot_tests++;
7653 			}
7654 		      }		/* incy */
7655 		    }		/* incx */
7656 		  }		/* lda */
7657 		}		/* trans */
7658 	      }			/* order */
7659 	    }			/* tests */
7660 	  }			/* norm */
7661 	}			/* prec */
7662       }				/* beta */
7663     }				/* alpha */
7664   }				/* debug */
7665 
7666   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
7667     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
7668 	   fname, m, n, ntests, thresh);
7669     printf
7670       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
7671        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
7672        ratio_min, ratio_max);
7673   }
7674 
7675 end:
7676   FPU_FIX_STOP;
7677 
7678   blas_free(head_x);
7679   blas_free(tail_x);
7680   blas_free(y);
7681   blas_free(head_x_gen);
7682   blas_free(tail_x_gen);
7683   blas_free(y_gen);
7684   blas_free(temp);
7685   blas_free(A);
7686   blas_free(head_r_true);
7687   blas_free(tail_r_true);
7688   blas_free(ratios);
7689 
7690   *min_ratio = ratio_min;
7691   *num_bad_ratio = bad_ratios;
7692   *num_tests = tot_tests;
7693   return ratio_max;
7694 }
do_test_cgemv2_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)7695 double do_test_cgemv2_x(int m, int n, int ntests, int *seed, double thresh,
7696 			int debug, float test_prob, double *min_ratio,
7697 			int *num_bad_ratio, int *num_tests)
7698 
7699 /*
7700  * Purpose
7701  * =======
7702  *
7703  * Runs a series of tests on GEMV2.
7704  *
7705  * Arguments
7706  * =========
7707  *
7708  * m         (input) int
7709  *           The number of rows
7710  *
7711  * n         (input) int
7712  *           The number of columns
7713  *
7714  * ntests    (input) int
7715  *           The number of tests to run for each set of attributes.
7716  *
7717  * seed      (input/output) int
7718  *           The seed for the random number generator used in testgen().
7719  *
7720  * thresh    (input) double
7721  *           When the ratio returned from test() exceeds the specified
7722  *           threshold, the current size, r_true, r_comp, and ratio will be
7723  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
7724  *           to ~10.)
7725  *
7726  * debug     (input) int
7727  *           If debug=3, print summary
7728  *           If debug=2, print summary only if the number of bad ratios > 0
7729  *           If debug=1, print complete info if tests fail
7730  *           If debug=0, return max ratio
7731  *
7732  * test_prob (input) float
7733  *           The specified test will be performed only if the generated
7734  *           random exceeds this threshold.
7735  *
7736  * min_ratio (output) double
7737  *           The minimum ratio
7738  *
7739  * num_bad_ratio (output) int
7740  *               The number of tests fail; they are above the threshold.
7741  *
7742  * num_tests (output) int
7743  *           The number of tests is being performed.
7744  *
7745  * Return value
7746  * ============
7747  *
7748  * The maximum ratio if run successfully, otherwise return -1
7749  *
7750  * Code structure
7751  * ==============
7752  *
7753  *  debug loop  -- if debug is one, the first loop computes the max ratio
7754  *              -- and the last(second) loop outputs debugging information,
7755  *              -- if the test fail and its ratio > 0.5 * max ratio.
7756  *              -- if debug is zero, the loop is executed once
7757  *    alpha loop  -- varying alpha: 0, 1, or random
7758  *      beta loop   -- varying beta: 0, 1, or random
7759  *        prec loop   -- varying internal prec: single, double, or extra
7760  *          norm loop   -- varying norm: near undeflow, near one, or
7761  *                        -- near overflow
7762  *            numtest loop  -- how many times the test is perform with
7763  *                            -- above set of attributes
7764  *              order loop   -- varying order type: rowmajor or colmajor
7765  *                trans loop    -- varying uplo type: upper or lower
7766  *                  lda loop      -- varying lda: m, m+1, 2m
7767  *                    incx loop     -- varying incx: -2, -1, 1, 2
7768  *                      incy loop     -- varying incy: -2, -1, 1, 2
7769  */
7770 {
7771   /* function name */
7772   const char fname[] = "BLAS_cgemv2_x";
7773 
7774   /* max number of debug lines to print */
7775   const int max_print = 8;
7776 
7777   /* Variables in the "x_val" form are loop vars for corresponding
7778      variables */
7779   int i;			/* iterate through the repeating tests */
7780   int j, k;			/* multipurpose counters or variables */
7781   int iy;			/* use to index y */
7782   int incx_val, incy_val,	/* for testing different inc values */
7783     incx, incy;
7784   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
7785   int d_count;			/* counter for debug */
7786   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
7787   int p_count;			/* counter for the number of debug lines printed */
7788   int tot_tests;		/* total number of tests to be done */
7789   int norm;			/* input values of near underflow/one/overflow */
7790   double ratio_max;		/* the current maximum ratio */
7791   double ratio_min;		/* the current minimum ratio */
7792   double *ratios;		/* a temporary variable for calculating ratio */
7793   double ratio;			/* the per-use test ratio from test() */
7794   int bad_ratios;		/* the number of ratios over the threshold */
7795   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
7796   double un_int;		/* the internal underflow threshold */
7797   float alpha[2];
7798   float beta[2];
7799   float *A;
7800   float *head_x;
7801   float *tail_x;
7802   float *y;
7803   float *temp;			/* use for calculating ratio */
7804 
7805   /* x_gen and y_gen are used to store vectors generated by testgen.
7806      they eventually are copied back to x and y */
7807   float *head_x_gen;
7808   float *tail_x_gen;
7809   float *y_gen;
7810 
7811   /* the true r calculated by testgen(), in double-double */
7812   double *head_r_true, *tail_r_true;
7813 
7814   int alpha_val;
7815   int alpha_flag;		/* input flag for BLAS_cgemv2_testgen */
7816   int beta_val;
7817   int beta_flag;		/* input flag for BLAS_cgemv2_testgen */
7818   int order_val;
7819   enum blas_order_type order_type;
7820   int prec_val;
7821   enum blas_prec_type prec;
7822   int trans_val;
7823   enum blas_trans_type trans_type;
7824   int m_i;
7825   int n_i;
7826   int max_mn;			/* the max of m and n */
7827   int lda_val;
7828   int lda;
7829   int saved_seed;		/* for saving the original seed */
7830   int count, old_count;		/* use for counting the number of testgen calls * 2 */
7831 
7832   FPU_FIX_DECL;
7833 
7834   /* test for bad arguments */
7835   if (n < 0 || m < 0 || ntests < 0)
7836     BLAS_error(fname, 0, 0, NULL);
7837 
7838   /* initialization */
7839   *num_bad_ratio = 0;
7840   *num_tests = 0;
7841   *min_ratio = 0.0;
7842 
7843   saved_seed = *seed;
7844   ratio_min = 1e308;
7845   ratio_max = 0.0;
7846   ratio = 0.0;
7847   tot_tests = 0;
7848   p_count = 0;
7849   count = 0;
7850   find_max_ratio = 0;
7851   bad_ratios = 0;
7852   old_count = 0;
7853 
7854   if (debug == 3)
7855     find_max_ratio = 1;
7856   max_mn = MAX(m, n);
7857   if (m == 0 || n == 0) {
7858     return 0.0;
7859   }
7860 
7861   FPU_FIX_START;
7862 
7863   incy_gen = 1;
7864   incy_gen *= 2;
7865 
7866   /* get space for calculation */
7867   head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
7868   if (max_mn * 2 > 0 && head_x == NULL) {
7869     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7870   }
7871   tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
7872   if (max_mn * 2 > 0 && tail_x == NULL) {
7873     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7874   }
7875   y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
7876   if (max_mn * 2 > 0 && y == NULL) {
7877     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7878   }
7879   head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
7880   if (max_mn > 0 && head_x_gen == NULL) {
7881     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7882   }
7883   tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
7884   if (max_mn > 0 && tail_x_gen == NULL) {
7885     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7886   }
7887   y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
7888   if (max_mn > 0 && y_gen == NULL) {
7889     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7890   }
7891   temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
7892   if (max_mn > 0 && temp == NULL) {
7893     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7894   }
7895   head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
7896   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
7897   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
7898     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7899   }
7900   ratios = (double *) blas_malloc(max_mn * sizeof(double));
7901   if (max_mn > 0 && ratios == NULL) {
7902     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7903   }
7904   A =
7905     (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
7906 			  2);
7907   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
7908     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
7909   }
7910 
7911   /* The debug iteration:
7912      If debug=1, then will execute the iteration twice. First, compute the
7913      max ratio. Second, print info if ratio > (50% * ratio_max). */
7914   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
7915     bad_ratios = 0;		/* set to zero */
7916 
7917     if ((debug == 3) && (d_count == find_max_ratio))
7918       *seed = saved_seed;	/* restore the original seed */
7919 
7920     /* varying alpha */
7921     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
7922       alpha_flag = 0;
7923       switch (alpha_val) {
7924       case 0:
7925 	alpha[0] = alpha[1] = 0.0;
7926 	alpha_flag = 1;
7927 	break;
7928       case 1:
7929 	alpha[0] = 1.0;
7930 	alpha[1] = 0.0;
7931 	alpha_flag = 1;
7932 	break;
7933       }
7934 
7935       /* varying beta */
7936       for (beta_val = 0; beta_val < 3; beta_val++) {
7937 	beta_flag = 0;
7938 	switch (beta_val) {
7939 	case 0:
7940 	  beta[0] = beta[1] = 0.0;
7941 	  beta_flag = 1;
7942 	  break;
7943 	case 1:
7944 	  beta[0] = 1.0;
7945 	  beta[1] = 0.0;
7946 	  beta_flag = 1;
7947 	  break;
7948 	}
7949 
7950 
7951 	/* varying extra precs */
7952 	for (prec_val = 0; prec_val <= 2; prec_val++) {
7953 	  switch (prec_val) {
7954 	  case 0:
7955 	    eps_int = power(2, -BITS_S);
7956 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
7957 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
7958 	    prec = blas_prec_single;
7959 	    break;
7960 	  case 1:
7961 	    eps_int = power(2, -BITS_D);
7962 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
7963 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
7964 	    prec = blas_prec_double;
7965 	    break;
7966 	  case 2:
7967 	  default:
7968 	    eps_int = power(2, -BITS_E);
7969 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
7970 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
7971 	    prec = blas_prec_extra;
7972 	    break;
7973 	  }
7974 
7975 	  /* values near underflow, 1, or overflow */
7976 	  for (norm = -1; norm <= 1; norm++) {
7977 
7978 	    /* number of tests */
7979 	    for (i = 0; i < ntests; i++) {
7980 
7981 	      /* row or col major */
7982 	      for (order_val = 0; order_val < 2; order_val++) {
7983 		switch (order_val) {
7984 		case 0:
7985 		  order_type = blas_rowmajor;
7986 		  break;
7987 		case 1:
7988 		default:
7989 		  order_type = blas_colmajor;
7990 		  break;
7991 		}
7992 
7993 		/* no_trans, trans, or conj_trans */
7994 		for (trans_val = 0; trans_val < 3; trans_val++) {
7995 		  switch (trans_val) {
7996 		  case 0:
7997 		    trans_type = blas_no_trans;
7998 		    m_i = m;
7999 		    n_i = n;
8000 		    break;
8001 		  case 1:
8002 		    trans_type = blas_trans;
8003 		    m_i = n;
8004 		    n_i = m;
8005 		    break;
8006 		  case 2:
8007 		  default:
8008 		    trans_type = blas_conj_trans;
8009 		    m_i = n;
8010 		    n_i = m;
8011 		    break;
8012 		  }
8013 
8014 		  /* lda=n, n+1, or 2n */
8015 		  for (lda_val = 0; lda_val < 3; lda_val++) {
8016 		    switch (lda_val) {
8017 		    case 0:
8018 		      lda = m_i;
8019 		      break;
8020 		    case 1:
8021 		      lda = m_i + 1;
8022 		      break;
8023 		    case 2:
8024 		    default:
8025 		      lda = 2 * m_i;
8026 		      break;
8027 		    }
8028 		    if ((order_type == blas_rowmajor && lda < n) ||
8029 			(order_type == blas_colmajor && lda < m))
8030 		      continue;
8031 
8032 		    /* For the sake of speed, we throw out this case at random */
8033 		    if (xrand(seed) >= test_prob)
8034 		      continue;
8035 
8036 		    /* in the trivial cases, no need to run testgen */
8037 		    if (m > 0 && n > 0)
8038 		      BLAS_cgemv2_testgen(norm, order_type, trans_type, m, n,
8039 					  &alpha, alpha_flag, A, lda,
8040 					  head_x_gen, tail_x_gen, &beta,
8041 					  beta_flag, y_gen, seed, head_r_true,
8042 					  tail_r_true);
8043 
8044 		    count++;
8045 
8046 		    /* varying incx */
8047 		    for (incx_val = -2; incx_val <= 2; incx_val++) {
8048 		      if (incx_val == 0)
8049 			continue;
8050 
8051 		      /* setting incx */
8052 		      incx = incx_val;
8053 		      incx *= 2;
8054 
8055 		      ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
8056 		      ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
8057 
8058 		      /* varying incy */
8059 		      for (incy_val = -2; incy_val <= 2; incy_val++) {
8060 			if (incy_val == 0)
8061 			  continue;
8062 
8063 			/* setting incy */
8064 			incy = incy_val;
8065 			incy *= 2;
8066 
8067 			ccopy_vector(y_gen, m_i, 1, y, incy_val);
8068 
8069 			/* call BLAS_cgemv2_x */
8070 			FPU_FIX_STOP;
8071 			BLAS_cgemv2_x(order_type, trans_type, m, n, alpha, A,
8072 				      lda, head_x, tail_x, incx_val, beta, y,
8073 				      incy_val, prec);
8074 			FPU_FIX_START;
8075 
8076 			/* set y starting index */
8077 			iy = 0;
8078 			if (incy < 0)
8079 			  iy = -(m_i - 1) * incy;
8080 
8081 			/* computing the ratio */
8082 			if (m > 0 && n > 0)
8083 			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
8084 			    /* copy row j of A to temp */
8085 			    cge_copy_row(order_type, trans_type, m_i, n_i, A,
8086 					 lda, temp, j);
8087 
8088 			    test_BLAS_cdot2(n_i, blas_no_conj, alpha, beta,
8089 					    &y_gen[k], &y[iy],
8090 					    &head_r_true[k], &tail_r_true[k],
8091 					    temp, 1, head_x, tail_x, incx_val,
8092 					    eps_int, un_int, &ratios[j]);
8093 
8094 			    /* take the max ratio */
8095 			    if (j == 0) {
8096 			      ratio = ratios[0];
8097 			      /* The !<= below causes NaN error to be detected.
8098 			         Note that (NaN > thresh) is always false. */
8099 			    } else if (!(ratios[j] <= ratio)) {
8100 			      ratio = ratios[j];
8101 			    }
8102 			    iy += incy;
8103 			  }
8104 
8105 			/* Increase the number of bad ratio, if the ratio
8106 			   is bigger than the threshold.
8107 			   The !<= below causes NaN error to be detected.
8108 			   Note that (NaN > thresh) is always false. */
8109 			if (!(ratio <= thresh)) {
8110 			  bad_ratios++;
8111 
8112 			  if ((debug == 3) &&	/* print only when debug is on */
8113 			      (count != old_count) &&	/* print if old vector is different
8114 							   from the current one */
8115 			      (d_count == find_max_ratio) &&
8116 			      (p_count <= max_print) &&
8117 			      (ratio > 0.5 * ratio_max)) {
8118 			    old_count = count;
8119 
8120 			    printf
8121 			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
8122 			       fname, m, n, ntests, thresh);
8123 
8124 			    /* Print test info */
8125 			    switch (prec) {
8126 			    case blas_prec_single:
8127 			      printf("single ");
8128 			      break;
8129 			    case blas_prec_double:
8130 			      printf("double ");
8131 			      break;
8132 			    case blas_prec_indigenous:
8133 			      printf("indigenous ");
8134 			      break;
8135 			    case blas_prec_extra:
8136 			      printf("extra ");
8137 			      break;
8138 			    }
8139 			    switch (norm) {
8140 			    case -1:
8141 			      printf("near_underflow ");
8142 			      break;
8143 			    case 0:
8144 			      printf("near_one ");
8145 			      break;
8146 			    case 1:
8147 			      printf("near_overflow ");
8148 			      break;
8149 			    }
8150 			    switch (order_type) {
8151 			    case blas_rowmajor:
8152 			      printf("row_major ");
8153 			      break;
8154 			    case blas_colmajor:
8155 			      printf("col_major ");
8156 			      break;
8157 			    }
8158 			    switch (trans_type) {
8159 			    case blas_no_trans:
8160 			      printf("no_trans ");
8161 			      break;
8162 			    case blas_trans:
8163 			      printf("trans ");
8164 			      break;
8165 			    case blas_conj_trans:
8166 			      printf("conj_trans ");
8167 			      break;
8168 			    }
8169 
8170 			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
8171 				   incy);
8172 
8173 			    cge_print_matrix(A, m_i, n_i, lda, order_type,
8174 					     "A");
8175 
8176 			    cprint_vector(head_x, n_i, incx_val, "head_x");
8177 			    cprint_vector(tail_x, n_i, incx_val, "tail_x");
8178 			    cprint_vector(y_gen, m_i, 1, "y_gen");
8179 			    cprint_vector(y, m_i, incy_val, "y_final");
8180 
8181 			    printf("      ");
8182 			    printf("alpha = ");
8183 			    printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
8184 			    printf("\n      ");
8185 			    printf("beta = ");
8186 			    printf("(%16.8e, %16.8e)", beta[0], beta[1]);
8187 			    printf("\n");
8188 			    for (j = 0, k = 0; j < m_i * incy_gen;
8189 				 j += incy_gen, k++) {
8190 			      printf("      ");
8191 			      printf
8192 				("([%24.16e  %24.16e], [%24.16e %24.16e])",
8193 				 head_r_true[j], tail_r_true[j],
8194 				 head_r_true[j + 1], tail_r_true[j + 1]);
8195 			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
8196 			    }
8197 
8198 			    printf("      ratio=%.4e\n", ratio);
8199 			    p_count++;
8200 			  }
8201 			  if (bad_ratios >= MAX_BAD_TESTS) {
8202 			    printf("\ntoo many failures, exiting....");
8203 			    printf("\nTesting and compilation");
8204 			    printf(" are incomplete\n\n");
8205 			    goto end;
8206 			  }
8207 			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
8208 			    printf("\nFlagrant ratio error, exiting...");
8209 			    printf("\nTesting and compilation");
8210 			    printf(" are incomplete\n\n");
8211 			    goto end;
8212 			  }
8213 			}
8214 			if (d_count == 0) {
8215 			  if (ratio > ratio_max)
8216 			    ratio_max = ratio;
8217 
8218 			  if (ratio != 0.0 && ratio < ratio_min)
8219 			    ratio_min = ratio;
8220 
8221 			  tot_tests++;
8222 			}
8223 		      }		/* incy */
8224 		    }		/* incx */
8225 		  }		/* lda */
8226 		}		/* trans */
8227 	      }			/* order */
8228 	    }			/* tests */
8229 	  }			/* norm */
8230 	}			/* prec */
8231       }				/* beta */
8232     }				/* alpha */
8233   }				/* debug */
8234 
8235   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
8236     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
8237 	   fname, m, n, ntests, thresh);
8238     printf
8239       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
8240        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
8241        ratio_min, ratio_max);
8242   }
8243 
8244 end:
8245   FPU_FIX_STOP;
8246 
8247   blas_free(head_x);
8248   blas_free(tail_x);
8249   blas_free(y);
8250   blas_free(head_x_gen);
8251   blas_free(tail_x_gen);
8252   blas_free(y_gen);
8253   blas_free(temp);
8254   blas_free(A);
8255   blas_free(head_r_true);
8256   blas_free(tail_r_true);
8257   blas_free(ratios);
8258 
8259   *min_ratio = ratio_min;
8260   *num_bad_ratio = bad_ratios;
8261   *num_tests = tot_tests;
8262   return ratio_max;
8263 }
do_test_zgemv2_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)8264 double do_test_zgemv2_x(int m, int n, int ntests, int *seed, double thresh,
8265 			int debug, float test_prob, double *min_ratio,
8266 			int *num_bad_ratio, int *num_tests)
8267 
8268 /*
8269  * Purpose
8270  * =======
8271  *
8272  * Runs a series of tests on GEMV2.
8273  *
8274  * Arguments
8275  * =========
8276  *
8277  * m         (input) int
8278  *           The number of rows
8279  *
8280  * n         (input) int
8281  *           The number of columns
8282  *
8283  * ntests    (input) int
8284  *           The number of tests to run for each set of attributes.
8285  *
8286  * seed      (input/output) int
8287  *           The seed for the random number generator used in testgen().
8288  *
8289  * thresh    (input) double
8290  *           When the ratio returned from test() exceeds the specified
8291  *           threshold, the current size, r_true, r_comp, and ratio will be
8292  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
8293  *           to ~10.)
8294  *
8295  * debug     (input) int
8296  *           If debug=3, print summary
8297  *           If debug=2, print summary only if the number of bad ratios > 0
8298  *           If debug=1, print complete info if tests fail
8299  *           If debug=0, return max ratio
8300  *
8301  * test_prob (input) float
8302  *           The specified test will be performed only if the generated
8303  *           random exceeds this threshold.
8304  *
8305  * min_ratio (output) double
8306  *           The minimum ratio
8307  *
8308  * num_bad_ratio (output) int
8309  *               The number of tests fail; they are above the threshold.
8310  *
8311  * num_tests (output) int
8312  *           The number of tests is being performed.
8313  *
8314  * Return value
8315  * ============
8316  *
8317  * The maximum ratio if run successfully, otherwise return -1
8318  *
8319  * Code structure
8320  * ==============
8321  *
8322  *  debug loop  -- if debug is one, the first loop computes the max ratio
8323  *              -- and the last(second) loop outputs debugging information,
8324  *              -- if the test fail and its ratio > 0.5 * max ratio.
8325  *              -- if debug is zero, the loop is executed once
8326  *    alpha loop  -- varying alpha: 0, 1, or random
8327  *      beta loop   -- varying beta: 0, 1, or random
8328  *        prec loop   -- varying internal prec: single, double, or extra
8329  *          norm loop   -- varying norm: near undeflow, near one, or
8330  *                        -- near overflow
8331  *            numtest loop  -- how many times the test is perform with
8332  *                            -- above set of attributes
8333  *              order loop   -- varying order type: rowmajor or colmajor
8334  *                trans loop    -- varying uplo type: upper or lower
8335  *                  lda loop      -- varying lda: m, m+1, 2m
8336  *                    incx loop     -- varying incx: -2, -1, 1, 2
8337  *                      incy loop     -- varying incy: -2, -1, 1, 2
8338  */
8339 {
8340   /* function name */
8341   const char fname[] = "BLAS_zgemv2_x";
8342 
8343   /* max number of debug lines to print */
8344   const int max_print = 8;
8345 
8346   /* Variables in the "x_val" form are loop vars for corresponding
8347      variables */
8348   int i;			/* iterate through the repeating tests */
8349   int j, k;			/* multipurpose counters or variables */
8350   int iy;			/* use to index y */
8351   int incx_val, incy_val,	/* for testing different inc values */
8352     incx, incy;
8353   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
8354   int d_count;			/* counter for debug */
8355   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
8356   int p_count;			/* counter for the number of debug lines printed */
8357   int tot_tests;		/* total number of tests to be done */
8358   int norm;			/* input values of near underflow/one/overflow */
8359   double ratio_max;		/* the current maximum ratio */
8360   double ratio_min;		/* the current minimum ratio */
8361   double *ratios;		/* a temporary variable for calculating ratio */
8362   double ratio;			/* the per-use test ratio from test() */
8363   int bad_ratios;		/* the number of ratios over the threshold */
8364   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
8365   double un_int;		/* the internal underflow threshold */
8366   double alpha[2];
8367   double beta[2];
8368   double *A;
8369   double *head_x;
8370   double *tail_x;
8371   double *y;
8372   double *temp;			/* use for calculating ratio */
8373 
8374   /* x_gen and y_gen are used to store vectors generated by testgen.
8375      they eventually are copied back to x and y */
8376   double *head_x_gen;
8377   double *tail_x_gen;
8378   double *y_gen;
8379 
8380   /* the true r calculated by testgen(), in double-double */
8381   double *head_r_true, *tail_r_true;
8382 
8383   int alpha_val;
8384   int alpha_flag;		/* input flag for BLAS_zgemv2_testgen */
8385   int beta_val;
8386   int beta_flag;		/* input flag for BLAS_zgemv2_testgen */
8387   int order_val;
8388   enum blas_order_type order_type;
8389   int prec_val;
8390   enum blas_prec_type prec;
8391   int trans_val;
8392   enum blas_trans_type trans_type;
8393   int m_i;
8394   int n_i;
8395   int max_mn;			/* the max of m and n */
8396   int lda_val;
8397   int lda;
8398   int saved_seed;		/* for saving the original seed */
8399   int count, old_count;		/* use for counting the number of testgen calls * 2 */
8400 
8401   FPU_FIX_DECL;
8402 
8403   /* test for bad arguments */
8404   if (n < 0 || m < 0 || ntests < 0)
8405     BLAS_error(fname, 0, 0, NULL);
8406 
8407   /* initialization */
8408   *num_bad_ratio = 0;
8409   *num_tests = 0;
8410   *min_ratio = 0.0;
8411 
8412   saved_seed = *seed;
8413   ratio_min = 1e308;
8414   ratio_max = 0.0;
8415   ratio = 0.0;
8416   tot_tests = 0;
8417   p_count = 0;
8418   count = 0;
8419   find_max_ratio = 0;
8420   bad_ratios = 0;
8421   old_count = 0;
8422 
8423   if (debug == 3)
8424     find_max_ratio = 1;
8425   max_mn = MAX(m, n);
8426   if (m == 0 || n == 0) {
8427     return 0.0;
8428   }
8429 
8430   FPU_FIX_START;
8431 
8432   incy_gen = 1;
8433   incy_gen *= 2;
8434 
8435   /* get space for calculation */
8436   head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
8437   if (max_mn * 2 > 0 && head_x == NULL) {
8438     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8439   }
8440   tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
8441   if (max_mn * 2 > 0 && tail_x == NULL) {
8442     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8443   }
8444   y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
8445   if (max_mn * 2 > 0 && y == NULL) {
8446     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8447   }
8448   head_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
8449   if (max_mn > 0 && head_x_gen == NULL) {
8450     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8451   }
8452   tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
8453   if (max_mn > 0 && tail_x_gen == NULL) {
8454     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8455   }
8456   y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
8457   if (max_mn > 0 && y_gen == NULL) {
8458     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8459   }
8460   temp = (double *) blas_malloc(max_mn * sizeof(double) * 2);
8461   if (max_mn > 0 && temp == NULL) {
8462     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8463   }
8464   head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
8465   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
8466   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
8467     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8468   }
8469   ratios = (double *) blas_malloc(max_mn * sizeof(double));
8470   if (max_mn > 0 && ratios == NULL) {
8471     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8472   }
8473   A =
8474     (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double) *
8475 			   2);
8476   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
8477     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
8478   }
8479 
8480   /* The debug iteration:
8481      If debug=1, then will execute the iteration twice. First, compute the
8482      max ratio. Second, print info if ratio > (50% * ratio_max). */
8483   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
8484     bad_ratios = 0;		/* set to zero */
8485 
8486     if ((debug == 3) && (d_count == find_max_ratio))
8487       *seed = saved_seed;	/* restore the original seed */
8488 
8489     /* varying alpha */
8490     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
8491       alpha_flag = 0;
8492       switch (alpha_val) {
8493       case 0:
8494 	alpha[0] = alpha[1] = 0.0;
8495 	alpha_flag = 1;
8496 	break;
8497       case 1:
8498 	alpha[0] = 1.0;
8499 	alpha[1] = 0.0;
8500 	alpha_flag = 1;
8501 	break;
8502       }
8503 
8504       /* varying beta */
8505       for (beta_val = 0; beta_val < 3; beta_val++) {
8506 	beta_flag = 0;
8507 	switch (beta_val) {
8508 	case 0:
8509 	  beta[0] = beta[1] = 0.0;
8510 	  beta_flag = 1;
8511 	  break;
8512 	case 1:
8513 	  beta[0] = 1.0;
8514 	  beta[1] = 0.0;
8515 	  beta_flag = 1;
8516 	  break;
8517 	}
8518 
8519 
8520 	/* varying extra precs */
8521 	for (prec_val = 0; prec_val <= 2; prec_val++) {
8522 	  switch (prec_val) {
8523 	  case 0:
8524 	    eps_int = power(2, -BITS_D);
8525 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
8526 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
8527 	    prec = blas_prec_double;
8528 	    break;
8529 	  case 1:
8530 	    eps_int = power(2, -BITS_D);
8531 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
8532 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
8533 	    prec = blas_prec_double;
8534 	    break;
8535 	  case 2:
8536 	  default:
8537 	    eps_int = power(2, -BITS_E);
8538 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
8539 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
8540 	    prec = blas_prec_extra;
8541 	    break;
8542 	  }
8543 
8544 	  /* values near underflow, 1, or overflow */
8545 	  for (norm = -1; norm <= 1; norm++) {
8546 
8547 	    /* number of tests */
8548 	    for (i = 0; i < ntests; i++) {
8549 
8550 	      /* row or col major */
8551 	      for (order_val = 0; order_val < 2; order_val++) {
8552 		switch (order_val) {
8553 		case 0:
8554 		  order_type = blas_rowmajor;
8555 		  break;
8556 		case 1:
8557 		default:
8558 		  order_type = blas_colmajor;
8559 		  break;
8560 		}
8561 
8562 		/* no_trans, trans, or conj_trans */
8563 		for (trans_val = 0; trans_val < 3; trans_val++) {
8564 		  switch (trans_val) {
8565 		  case 0:
8566 		    trans_type = blas_no_trans;
8567 		    m_i = m;
8568 		    n_i = n;
8569 		    break;
8570 		  case 1:
8571 		    trans_type = blas_trans;
8572 		    m_i = n;
8573 		    n_i = m;
8574 		    break;
8575 		  case 2:
8576 		  default:
8577 		    trans_type = blas_conj_trans;
8578 		    m_i = n;
8579 		    n_i = m;
8580 		    break;
8581 		  }
8582 
8583 		  /* lda=n, n+1, or 2n */
8584 		  for (lda_val = 0; lda_val < 3; lda_val++) {
8585 		    switch (lda_val) {
8586 		    case 0:
8587 		      lda = m_i;
8588 		      break;
8589 		    case 1:
8590 		      lda = m_i + 1;
8591 		      break;
8592 		    case 2:
8593 		    default:
8594 		      lda = 2 * m_i;
8595 		      break;
8596 		    }
8597 		    if ((order_type == blas_rowmajor && lda < n) ||
8598 			(order_type == blas_colmajor && lda < m))
8599 		      continue;
8600 
8601 		    /* For the sake of speed, we throw out this case at random */
8602 		    if (xrand(seed) >= test_prob)
8603 		      continue;
8604 
8605 		    /* in the trivial cases, no need to run testgen */
8606 		    if (m > 0 && n > 0)
8607 		      BLAS_zgemv2_testgen(norm, order_type, trans_type, m, n,
8608 					  &alpha, alpha_flag, A, lda,
8609 					  head_x_gen, tail_x_gen, &beta,
8610 					  beta_flag, y_gen, seed, head_r_true,
8611 					  tail_r_true);
8612 
8613 		    count++;
8614 
8615 		    /* varying incx */
8616 		    for (incx_val = -2; incx_val <= 2; incx_val++) {
8617 		      if (incx_val == 0)
8618 			continue;
8619 
8620 		      /* setting incx */
8621 		      incx = incx_val;
8622 		      incx *= 2;
8623 
8624 		      zcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
8625 		      zcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
8626 
8627 		      /* varying incy */
8628 		      for (incy_val = -2; incy_val <= 2; incy_val++) {
8629 			if (incy_val == 0)
8630 			  continue;
8631 
8632 			/* setting incy */
8633 			incy = incy_val;
8634 			incy *= 2;
8635 
8636 			zcopy_vector(y_gen, m_i, 1, y, incy_val);
8637 
8638 			/* call BLAS_zgemv2_x */
8639 			FPU_FIX_STOP;
8640 			BLAS_zgemv2_x(order_type, trans_type, m, n, alpha, A,
8641 				      lda, head_x, tail_x, incx_val, beta, y,
8642 				      incy_val, prec);
8643 			FPU_FIX_START;
8644 
8645 			/* set y starting index */
8646 			iy = 0;
8647 			if (incy < 0)
8648 			  iy = -(m_i - 1) * incy;
8649 
8650 			/* computing the ratio */
8651 			if (m > 0 && n > 0)
8652 			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
8653 			    /* copy row j of A to temp */
8654 			    zge_copy_row(order_type, trans_type, m_i, n_i, A,
8655 					 lda, temp, j);
8656 
8657 			    test_BLAS_zdot2(n_i, blas_no_conj, alpha, beta,
8658 					    &y_gen[k], &y[iy],
8659 					    &head_r_true[k], &tail_r_true[k],
8660 					    temp, 1, head_x, tail_x, incx_val,
8661 					    eps_int, un_int, &ratios[j]);
8662 
8663 			    /* take the max ratio */
8664 			    if (j == 0) {
8665 			      ratio = ratios[0];
8666 			      /* The !<= below causes NaN error to be detected.
8667 			         Note that (NaN > thresh) is always false. */
8668 			    } else if (!(ratios[j] <= ratio)) {
8669 			      ratio = ratios[j];
8670 			    }
8671 			    iy += incy;
8672 			  }
8673 
8674 			/* Increase the number of bad ratio, if the ratio
8675 			   is bigger than the threshold.
8676 			   The !<= below causes NaN error to be detected.
8677 			   Note that (NaN > thresh) is always false. */
8678 			if (!(ratio <= thresh)) {
8679 			  bad_ratios++;
8680 
8681 			  if ((debug == 3) &&	/* print only when debug is on */
8682 			      (count != old_count) &&	/* print if old vector is different
8683 							   from the current one */
8684 			      (d_count == find_max_ratio) &&
8685 			      (p_count <= max_print) &&
8686 			      (ratio > 0.5 * ratio_max)) {
8687 			    old_count = count;
8688 
8689 			    printf
8690 			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
8691 			       fname, m, n, ntests, thresh);
8692 
8693 			    /* Print test info */
8694 			    switch (prec) {
8695 			    case blas_prec_single:
8696 			      printf("single ");
8697 			      break;
8698 			    case blas_prec_double:
8699 			      printf("double ");
8700 			      break;
8701 			    case blas_prec_indigenous:
8702 			      printf("indigenous ");
8703 			      break;
8704 			    case blas_prec_extra:
8705 			      printf("extra ");
8706 			      break;
8707 			    }
8708 			    switch (norm) {
8709 			    case -1:
8710 			      printf("near_underflow ");
8711 			      break;
8712 			    case 0:
8713 			      printf("near_one ");
8714 			      break;
8715 			    case 1:
8716 			      printf("near_overflow ");
8717 			      break;
8718 			    }
8719 			    switch (order_type) {
8720 			    case blas_rowmajor:
8721 			      printf("row_major ");
8722 			      break;
8723 			    case blas_colmajor:
8724 			      printf("col_major ");
8725 			      break;
8726 			    }
8727 			    switch (trans_type) {
8728 			    case blas_no_trans:
8729 			      printf("no_trans ");
8730 			      break;
8731 			    case blas_trans:
8732 			      printf("trans ");
8733 			      break;
8734 			    case blas_conj_trans:
8735 			      printf("conj_trans ");
8736 			      break;
8737 			    }
8738 
8739 			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
8740 				   incy);
8741 
8742 			    zge_print_matrix(A, m_i, n_i, lda, order_type,
8743 					     "A");
8744 
8745 			    zprint_vector(head_x, n_i, incx_val, "head_x");
8746 			    zprint_vector(tail_x, n_i, incx_val, "tail_x");
8747 			    zprint_vector(y_gen, m_i, 1, "y_gen");
8748 			    zprint_vector(y, m_i, incy_val, "y_final");
8749 
8750 			    printf("      ");
8751 			    printf("alpha = ");
8752 			    printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
8753 			    printf("\n      ");
8754 			    printf("beta = ");
8755 			    printf("(%24.16e, %24.16e)", beta[0], beta[1]);
8756 			    printf("\n");
8757 			    for (j = 0, k = 0; j < m_i * incy_gen;
8758 				 j += incy_gen, k++) {
8759 			      printf("      ");
8760 			      printf
8761 				("([%24.16e  %24.16e], [%24.16e %24.16e])",
8762 				 head_r_true[j], tail_r_true[j],
8763 				 head_r_true[j + 1], tail_r_true[j + 1]);
8764 			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
8765 			    }
8766 
8767 			    printf("      ratio=%.4e\n", ratio);
8768 			    p_count++;
8769 			  }
8770 			  if (bad_ratios >= MAX_BAD_TESTS) {
8771 			    printf("\ntoo many failures, exiting....");
8772 			    printf("\nTesting and compilation");
8773 			    printf(" are incomplete\n\n");
8774 			    goto end;
8775 			  }
8776 			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
8777 			    printf("\nFlagrant ratio error, exiting...");
8778 			    printf("\nTesting and compilation");
8779 			    printf(" are incomplete\n\n");
8780 			    goto end;
8781 			  }
8782 			}
8783 			if (d_count == 0) {
8784 			  if (ratio > ratio_max)
8785 			    ratio_max = ratio;
8786 
8787 			  if (ratio != 0.0 && ratio < ratio_min)
8788 			    ratio_min = ratio;
8789 
8790 			  tot_tests++;
8791 			}
8792 		      }		/* incy */
8793 		    }		/* incx */
8794 		  }		/* lda */
8795 		}		/* trans */
8796 	      }			/* order */
8797 	    }			/* tests */
8798 	  }			/* norm */
8799 	}			/* prec */
8800       }				/* beta */
8801     }				/* alpha */
8802   }				/* debug */
8803 
8804   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
8805     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
8806 	   fname, m, n, ntests, thresh);
8807     printf
8808       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
8809        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
8810        ratio_min, ratio_max);
8811   }
8812 
8813 end:
8814   FPU_FIX_STOP;
8815 
8816   blas_free(head_x);
8817   blas_free(tail_x);
8818   blas_free(y);
8819   blas_free(head_x_gen);
8820   blas_free(tail_x_gen);
8821   blas_free(y_gen);
8822   blas_free(temp);
8823   blas_free(A);
8824   blas_free(head_r_true);
8825   blas_free(tail_r_true);
8826   blas_free(ratios);
8827 
8828   *min_ratio = ratio_min;
8829   *num_bad_ratio = bad_ratios;
8830   *num_tests = tot_tests;
8831   return ratio_max;
8832 }
do_test_dgemv2_d_s_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)8833 double do_test_dgemv2_d_s_x(int m, int n, int ntests, int *seed,
8834 			    double thresh, int debug, float test_prob,
8835 			    double *min_ratio, int *num_bad_ratio,
8836 			    int *num_tests)
8837 
8838 /*
8839  * Purpose
8840  * =======
8841  *
8842  * Runs a series of tests on GEMV2.
8843  *
8844  * Arguments
8845  * =========
8846  *
8847  * m         (input) int
8848  *           The number of rows
8849  *
8850  * n         (input) int
8851  *           The number of columns
8852  *
8853  * ntests    (input) int
8854  *           The number of tests to run for each set of attributes.
8855  *
8856  * seed      (input/output) int
8857  *           The seed for the random number generator used in testgen().
8858  *
8859  * thresh    (input) double
8860  *           When the ratio returned from test() exceeds the specified
8861  *           threshold, the current size, r_true, r_comp, and ratio will be
8862  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
8863  *           to ~10.)
8864  *
8865  * debug     (input) int
8866  *           If debug=3, print summary
8867  *           If debug=2, print summary only if the number of bad ratios > 0
8868  *           If debug=1, print complete info if tests fail
8869  *           If debug=0, return max ratio
8870  *
8871  * test_prob (input) float
8872  *           The specified test will be performed only if the generated
8873  *           random exceeds this threshold.
8874  *
8875  * min_ratio (output) double
8876  *           The minimum ratio
8877  *
8878  * num_bad_ratio (output) int
8879  *               The number of tests fail; they are above the threshold.
8880  *
8881  * num_tests (output) int
8882  *           The number of tests is being performed.
8883  *
8884  * Return value
8885  * ============
8886  *
8887  * The maximum ratio if run successfully, otherwise return -1
8888  *
8889  * Code structure
8890  * ==============
8891  *
8892  *  debug loop  -- if debug is one, the first loop computes the max ratio
8893  *              -- and the last(second) loop outputs debugging information,
8894  *              -- if the test fail and its ratio > 0.5 * max ratio.
8895  *              -- if debug is zero, the loop is executed once
8896  *    alpha loop  -- varying alpha: 0, 1, or random
8897  *      beta loop   -- varying beta: 0, 1, or random
8898  *        prec loop   -- varying internal prec: single, double, or extra
8899  *          norm loop   -- varying norm: near undeflow, near one, or
8900  *                        -- near overflow
8901  *            numtest loop  -- how many times the test is perform with
8902  *                            -- above set of attributes
8903  *              order loop   -- varying order type: rowmajor or colmajor
8904  *                trans loop    -- varying uplo type: upper or lower
8905  *                  lda loop      -- varying lda: m, m+1, 2m
8906  *                    incx loop     -- varying incx: -2, -1, 1, 2
8907  *                      incy loop     -- varying incy: -2, -1, 1, 2
8908  */
8909 {
8910   /* function name */
8911   const char fname[] = "BLAS_dgemv2_d_s_x";
8912 
8913   /* max number of debug lines to print */
8914   const int max_print = 8;
8915 
8916   /* Variables in the "x_val" form are loop vars for corresponding
8917      variables */
8918   int i;			/* iterate through the repeating tests */
8919   int j, k;			/* multipurpose counters or variables */
8920   int iy;			/* use to index y */
8921   int incx_val, incy_val,	/* for testing different inc values */
8922     incx, incy;
8923   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
8924   int d_count;			/* counter for debug */
8925   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
8926   int p_count;			/* counter for the number of debug lines printed */
8927   int tot_tests;		/* total number of tests to be done */
8928   int norm;			/* input values of near underflow/one/overflow */
8929   double ratio_max;		/* the current maximum ratio */
8930   double ratio_min;		/* the current minimum ratio */
8931   double *ratios;		/* a temporary variable for calculating ratio */
8932   double ratio;			/* the per-use test ratio from test() */
8933   int bad_ratios;		/* the number of ratios over the threshold */
8934   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
8935   double un_int;		/* the internal underflow threshold */
8936   double alpha;
8937   double beta;
8938   double *A;
8939   float *head_x;
8940   float *tail_x;
8941   double *y;
8942   double *temp;			/* use for calculating ratio */
8943 
8944   /* x_gen and y_gen are used to store vectors generated by testgen.
8945      they eventually are copied back to x and y */
8946   float *head_x_gen;
8947   float *tail_x_gen;
8948   double *y_gen;
8949 
8950   /* the true r calculated by testgen(), in double-double */
8951   double *head_r_true, *tail_r_true;
8952   int alpha_val;
8953   int alpha_flag;		/* input flag for BLAS_dgemv2_d_s_testgen */
8954   int beta_val;
8955   int beta_flag;		/* input flag for BLAS_dgemv2_d_s_testgen */
8956   int order_val;
8957   enum blas_order_type order_type;
8958   int prec_val;
8959   enum blas_prec_type prec;
8960   int trans_val;
8961   enum blas_trans_type trans_type;
8962   int m_i;
8963   int n_i;
8964   int max_mn;			/* the max of m and n */
8965   int lda_val;
8966   int lda;
8967   int saved_seed;		/* for saving the original seed */
8968   int count, old_count;		/* use for counting the number of testgen calls * 2 */
8969 
8970   FPU_FIX_DECL;
8971 
8972   /* test for bad arguments */
8973   if (n < 0 || m < 0 || ntests < 0)
8974     BLAS_error(fname, 0, 0, NULL);
8975 
8976   /* initialization */
8977   *num_bad_ratio = 0;
8978   *num_tests = 0;
8979   *min_ratio = 0.0;
8980 
8981   saved_seed = *seed;
8982   ratio_min = 1e308;
8983   ratio_max = 0.0;
8984   ratio = 0.0;
8985   tot_tests = 0;
8986   p_count = 0;
8987   count = 0;
8988   find_max_ratio = 0;
8989   bad_ratios = 0;
8990   old_count = 0;
8991 
8992   if (debug == 3)
8993     find_max_ratio = 1;
8994   max_mn = MAX(m, n);
8995   if (m == 0 || n == 0) {
8996     return 0.0;
8997   }
8998 
8999   FPU_FIX_START;
9000 
9001   incy_gen = 1;
9002 
9003 
9004   /* get space for calculation */
9005   head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
9006   if (max_mn * 2 > 0 && head_x == NULL) {
9007     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9008   }
9009   tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
9010   if (max_mn * 2 > 0 && tail_x == NULL) {
9011     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9012   }
9013   y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
9014   if (max_mn * 2 > 0 && y == NULL) {
9015     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9016   }
9017   head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
9018   if (max_mn > 0 && head_x_gen == NULL) {
9019     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9020   }
9021   tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
9022   if (max_mn > 0 && tail_x_gen == NULL) {
9023     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9024   }
9025   y_gen = (double *) blas_malloc(max_mn * sizeof(double));
9026   if (max_mn > 0 && y_gen == NULL) {
9027     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9028   }
9029   temp = (double *) blas_malloc(max_mn * sizeof(double));
9030   if (max_mn > 0 && temp == NULL) {
9031     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9032   }
9033   head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
9034   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
9035   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
9036     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9037   }
9038   ratios = (double *) blas_malloc(max_mn * sizeof(double));
9039   if (max_mn > 0 && ratios == NULL) {
9040     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9041   }
9042   A =
9043     (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
9044   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
9045     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9046   }
9047 
9048   /* The debug iteration:
9049      If debug=1, then will execute the iteration twice. First, compute the
9050      max ratio. Second, print info if ratio > (50% * ratio_max). */
9051   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
9052     bad_ratios = 0;		/* set to zero */
9053 
9054     if ((debug == 3) && (d_count == find_max_ratio))
9055       *seed = saved_seed;	/* restore the original seed */
9056 
9057     /* varying alpha */
9058     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
9059       alpha_flag = 0;
9060       switch (alpha_val) {
9061       case 0:
9062 	alpha = 0.0;
9063 	alpha_flag = 1;
9064 	break;
9065       case 1:
9066 	alpha = 1.0;
9067 	alpha_flag = 1;
9068 	break;
9069       }
9070 
9071       /* varying beta */
9072       for (beta_val = 0; beta_val < 3; beta_val++) {
9073 	beta_flag = 0;
9074 	switch (beta_val) {
9075 	case 0:
9076 	  beta = 0.0;
9077 	  beta_flag = 1;
9078 	  break;
9079 	case 1:
9080 	  beta = 1.0;
9081 	  beta_flag = 1;
9082 	  break;
9083 	}
9084 
9085 
9086 	/* varying extra precs */
9087 	for (prec_val = 0; prec_val <= 2; prec_val++) {
9088 	  switch (prec_val) {
9089 	  case 0:
9090 	    eps_int = power(2, -BITS_D);
9091 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
9092 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
9093 	    prec = blas_prec_double;
9094 	    break;
9095 	  case 1:
9096 	    eps_int = power(2, -BITS_D);
9097 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
9098 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
9099 	    prec = blas_prec_double;
9100 	    break;
9101 	  case 2:
9102 	  default:
9103 	    eps_int = power(2, -BITS_E);
9104 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
9105 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
9106 	    prec = blas_prec_extra;
9107 	    break;
9108 	  }
9109 
9110 	  /* values near underflow, 1, or overflow */
9111 	  for (norm = -1; norm <= 1; norm++) {
9112 
9113 	    /* number of tests */
9114 	    for (i = 0; i < ntests; i++) {
9115 
9116 	      /* row or col major */
9117 	      for (order_val = 0; order_val < 2; order_val++) {
9118 		switch (order_val) {
9119 		case 0:
9120 		  order_type = blas_rowmajor;
9121 		  break;
9122 		case 1:
9123 		default:
9124 		  order_type = blas_colmajor;
9125 		  break;
9126 		}
9127 
9128 		/* no_trans, trans, or conj_trans */
9129 		for (trans_val = 0; trans_val < 3; trans_val++) {
9130 		  switch (trans_val) {
9131 		  case 0:
9132 		    trans_type = blas_no_trans;
9133 		    m_i = m;
9134 		    n_i = n;
9135 		    break;
9136 		  case 1:
9137 		    trans_type = blas_trans;
9138 		    m_i = n;
9139 		    n_i = m;
9140 		    break;
9141 		  case 2:
9142 		  default:
9143 		    trans_type = blas_conj_trans;
9144 		    m_i = n;
9145 		    n_i = m;
9146 		    break;
9147 		  }
9148 
9149 		  /* lda=n, n+1, or 2n */
9150 		  for (lda_val = 0; lda_val < 3; lda_val++) {
9151 		    switch (lda_val) {
9152 		    case 0:
9153 		      lda = m_i;
9154 		      break;
9155 		    case 1:
9156 		      lda = m_i + 1;
9157 		      break;
9158 		    case 2:
9159 		    default:
9160 		      lda = 2 * m_i;
9161 		      break;
9162 		    }
9163 		    if ((order_type == blas_rowmajor && lda < n) ||
9164 			(order_type == blas_colmajor && lda < m))
9165 		      continue;
9166 
9167 		    /* For the sake of speed, we throw out this case at random */
9168 		    if (xrand(seed) >= test_prob)
9169 		      continue;
9170 
9171 		    /* in the trivial cases, no need to run testgen */
9172 		    if (m > 0 && n > 0)
9173 		      BLAS_dgemv2_d_s_testgen(norm, order_type, trans_type, m,
9174 					      n, &alpha, alpha_flag, A, lda,
9175 					      head_x_gen, tail_x_gen, &beta,
9176 					      beta_flag, y_gen, seed,
9177 					      head_r_true, tail_r_true);
9178 
9179 		    count++;
9180 
9181 		    /* varying incx */
9182 		    for (incx_val = -2; incx_val <= 2; incx_val++) {
9183 		      if (incx_val == 0)
9184 			continue;
9185 
9186 		      /* setting incx */
9187 		      incx = incx_val;
9188 
9189 
9190 		      scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
9191 		      scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
9192 
9193 		      /* varying incy */
9194 		      for (incy_val = -2; incy_val <= 2; incy_val++) {
9195 			if (incy_val == 0)
9196 			  continue;
9197 
9198 			/* setting incy */
9199 			incy = incy_val;
9200 
9201 
9202 			dcopy_vector(y_gen, m_i, 1, y, incy_val);
9203 
9204 			/* call BLAS_dgemv2_d_s_x */
9205 			FPU_FIX_STOP;
9206 			BLAS_dgemv2_d_s_x(order_type, trans_type, m, n, alpha,
9207 					  A, lda, head_x, tail_x, incx_val,
9208 					  beta, y, incy_val, prec);
9209 			FPU_FIX_START;
9210 
9211 			/* set y starting index */
9212 			iy = 0;
9213 			if (incy < 0)
9214 			  iy = -(m_i - 1) * incy;
9215 
9216 			/* computing the ratio */
9217 			if (m > 0 && n > 0)
9218 			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
9219 			    /* copy row j of A to temp */
9220 			    dge_copy_row(order_type, trans_type, m_i, n_i, A,
9221 					 lda, temp, j);
9222 
9223 			    test_BLAS_ddot2_d_s(n_i, blas_no_conj, alpha,
9224 						beta, y_gen[k], y[iy],
9225 						head_r_true[k],
9226 						tail_r_true[k], temp, 1,
9227 						head_x, tail_x, incx_val,
9228 						eps_int, un_int, &ratios[j]);
9229 
9230 			    /* take the max ratio */
9231 			    if (j == 0) {
9232 			      ratio = ratios[0];
9233 			      /* The !<= below causes NaN error to be detected.
9234 			         Note that (NaN > thresh) is always false. */
9235 			    } else if (!(ratios[j] <= ratio)) {
9236 			      ratio = ratios[j];
9237 			    }
9238 			    iy += incy;
9239 			  }
9240 
9241 			/* Increase the number of bad ratio, if the ratio
9242 			   is bigger than the threshold.
9243 			   The !<= below causes NaN error to be detected.
9244 			   Note that (NaN > thresh) is always false. */
9245 			if (!(ratio <= thresh)) {
9246 			  bad_ratios++;
9247 
9248 			  if ((debug == 3) &&	/* print only when debug is on */
9249 			      (count != old_count) &&	/* print if old vector is different
9250 							   from the current one */
9251 			      (d_count == find_max_ratio) &&
9252 			      (p_count <= max_print) &&
9253 			      (ratio > 0.5 * ratio_max)) {
9254 			    old_count = count;
9255 
9256 			    printf
9257 			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
9258 			       fname, m, n, ntests, thresh);
9259 
9260 			    /* Print test info */
9261 			    switch (prec) {
9262 			    case blas_prec_single:
9263 			      printf("single ");
9264 			      break;
9265 			    case blas_prec_double:
9266 			      printf("double ");
9267 			      break;
9268 			    case blas_prec_indigenous:
9269 			      printf("indigenous ");
9270 			      break;
9271 			    case blas_prec_extra:
9272 			      printf("extra ");
9273 			      break;
9274 			    }
9275 			    switch (norm) {
9276 			    case -1:
9277 			      printf("near_underflow ");
9278 			      break;
9279 			    case 0:
9280 			      printf("near_one ");
9281 			      break;
9282 			    case 1:
9283 			      printf("near_overflow ");
9284 			      break;
9285 			    }
9286 			    switch (order_type) {
9287 			    case blas_rowmajor:
9288 			      printf("row_major ");
9289 			      break;
9290 			    case blas_colmajor:
9291 			      printf("col_major ");
9292 			      break;
9293 			    }
9294 			    switch (trans_type) {
9295 			    case blas_no_trans:
9296 			      printf("no_trans ");
9297 			      break;
9298 			    case blas_trans:
9299 			      printf("trans ");
9300 			      break;
9301 			    case blas_conj_trans:
9302 			      printf("conj_trans ");
9303 			      break;
9304 			    }
9305 
9306 			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
9307 				   incy);
9308 
9309 			    dge_print_matrix(A, m_i, n_i, lda, order_type,
9310 					     "A");
9311 
9312 			    sprint_vector(head_x, n_i, incx_val, "head_x");
9313 			    sprint_vector(tail_x, n_i, incx_val, "tail_x");
9314 			    dprint_vector(y_gen, m_i, 1, "y_gen");
9315 			    dprint_vector(y, m_i, incy_val, "y_final");
9316 
9317 			    printf("      ");
9318 			    printf("alpha = ");
9319 			    printf("%24.16e", alpha);
9320 			    printf("\n      ");
9321 			    printf("beta = ");
9322 			    printf("%24.16e", beta);
9323 			    printf("\n");
9324 			    for (j = 0, k = 0; j < m_i * incy_gen;
9325 				 j += incy_gen, k++) {
9326 			      printf("      ");
9327 			      printf("[%24.16e, %24.16e]", head_r_true[j],
9328 				     tail_r_true[j]);
9329 			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
9330 			    }
9331 
9332 			    printf("      ratio=%.4e\n", ratio);
9333 			    p_count++;
9334 			  }
9335 			  if (bad_ratios >= MAX_BAD_TESTS) {
9336 			    printf("\ntoo many failures, exiting....");
9337 			    printf("\nTesting and compilation");
9338 			    printf(" are incomplete\n\n");
9339 			    goto end;
9340 			  }
9341 			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
9342 			    printf("\nFlagrant ratio error, exiting...");
9343 			    printf("\nTesting and compilation");
9344 			    printf(" are incomplete\n\n");
9345 			    goto end;
9346 			  }
9347 			}
9348 			if (d_count == 0) {
9349 			  if (ratio > ratio_max)
9350 			    ratio_max = ratio;
9351 
9352 			  if (ratio != 0.0 && ratio < ratio_min)
9353 			    ratio_min = ratio;
9354 
9355 			  tot_tests++;
9356 			}
9357 		      }		/* incy */
9358 		    }		/* incx */
9359 		  }		/* lda */
9360 		}		/* trans */
9361 	      }			/* order */
9362 	    }			/* tests */
9363 	  }			/* norm */
9364 	}			/* prec */
9365       }				/* beta */
9366     }				/* alpha */
9367   }				/* debug */
9368 
9369   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
9370     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
9371 	   fname, m, n, ntests, thresh);
9372     printf
9373       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
9374        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
9375        ratio_min, ratio_max);
9376   }
9377 
9378 end:
9379   FPU_FIX_STOP;
9380 
9381   blas_free(head_x);
9382   blas_free(tail_x);
9383   blas_free(y);
9384   blas_free(head_x_gen);
9385   blas_free(tail_x_gen);
9386   blas_free(y_gen);
9387   blas_free(temp);
9388   blas_free(A);
9389   blas_free(head_r_true);
9390   blas_free(tail_r_true);
9391   blas_free(ratios);
9392 
9393   *min_ratio = ratio_min;
9394   *num_bad_ratio = bad_ratios;
9395   *num_tests = tot_tests;
9396   return ratio_max;
9397 }
do_test_dgemv2_s_d_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)9398 double do_test_dgemv2_s_d_x(int m, int n, int ntests, int *seed,
9399 			    double thresh, int debug, float test_prob,
9400 			    double *min_ratio, int *num_bad_ratio,
9401 			    int *num_tests)
9402 
9403 /*
9404  * Purpose
9405  * =======
9406  *
9407  * Runs a series of tests on GEMV2.
9408  *
9409  * Arguments
9410  * =========
9411  *
9412  * m         (input) int
9413  *           The number of rows
9414  *
9415  * n         (input) int
9416  *           The number of columns
9417  *
9418  * ntests    (input) int
9419  *           The number of tests to run for each set of attributes.
9420  *
9421  * seed      (input/output) int
9422  *           The seed for the random number generator used in testgen().
9423  *
9424  * thresh    (input) double
9425  *           When the ratio returned from test() exceeds the specified
9426  *           threshold, the current size, r_true, r_comp, and ratio will be
9427  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
9428  *           to ~10.)
9429  *
9430  * debug     (input) int
9431  *           If debug=3, print summary
9432  *           If debug=2, print summary only if the number of bad ratios > 0
9433  *           If debug=1, print complete info if tests fail
9434  *           If debug=0, return max ratio
9435  *
9436  * test_prob (input) float
9437  *           The specified test will be performed only if the generated
9438  *           random exceeds this threshold.
9439  *
9440  * min_ratio (output) double
9441  *           The minimum ratio
9442  *
9443  * num_bad_ratio (output) int
9444  *               The number of tests fail; they are above the threshold.
9445  *
9446  * num_tests (output) int
9447  *           The number of tests is being performed.
9448  *
9449  * Return value
9450  * ============
9451  *
9452  * The maximum ratio if run successfully, otherwise return -1
9453  *
9454  * Code structure
9455  * ==============
9456  *
9457  *  debug loop  -- if debug is one, the first loop computes the max ratio
9458  *              -- and the last(second) loop outputs debugging information,
9459  *              -- if the test fail and its ratio > 0.5 * max ratio.
9460  *              -- if debug is zero, the loop is executed once
9461  *    alpha loop  -- varying alpha: 0, 1, or random
9462  *      beta loop   -- varying beta: 0, 1, or random
9463  *        prec loop   -- varying internal prec: single, double, or extra
9464  *          norm loop   -- varying norm: near undeflow, near one, or
9465  *                        -- near overflow
9466  *            numtest loop  -- how many times the test is perform with
9467  *                            -- above set of attributes
9468  *              order loop   -- varying order type: rowmajor or colmajor
9469  *                trans loop    -- varying uplo type: upper or lower
9470  *                  lda loop      -- varying lda: m, m+1, 2m
9471  *                    incx loop     -- varying incx: -2, -1, 1, 2
9472  *                      incy loop     -- varying incy: -2, -1, 1, 2
9473  */
9474 {
9475   /* function name */
9476   const char fname[] = "BLAS_dgemv2_s_d_x";
9477 
9478   /* max number of debug lines to print */
9479   const int max_print = 8;
9480 
9481   /* Variables in the "x_val" form are loop vars for corresponding
9482      variables */
9483   int i;			/* iterate through the repeating tests */
9484   int j, k;			/* multipurpose counters or variables */
9485   int iy;			/* use to index y */
9486   int incx_val, incy_val,	/* for testing different inc values */
9487     incx, incy;
9488   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
9489   int d_count;			/* counter for debug */
9490   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
9491   int p_count;			/* counter for the number of debug lines printed */
9492   int tot_tests;		/* total number of tests to be done */
9493   int norm;			/* input values of near underflow/one/overflow */
9494   double ratio_max;		/* the current maximum ratio */
9495   double ratio_min;		/* the current minimum ratio */
9496   double *ratios;		/* a temporary variable for calculating ratio */
9497   double ratio;			/* the per-use test ratio from test() */
9498   int bad_ratios;		/* the number of ratios over the threshold */
9499   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
9500   double un_int;		/* the internal underflow threshold */
9501   double alpha;
9502   double beta;
9503   float *A;
9504   double *head_x;
9505   double *tail_x;
9506   double *y;
9507   float *temp;			/* use for calculating ratio */
9508 
9509   /* x_gen and y_gen are used to store vectors generated by testgen.
9510      they eventually are copied back to x and y */
9511   double *head_x_gen;
9512   double *tail_x_gen;
9513   double *y_gen;
9514 
9515   /* the true r calculated by testgen(), in double-double */
9516   double *head_r_true, *tail_r_true;
9517   int alpha_val;
9518   int alpha_flag;		/* input flag for BLAS_dgemv2_s_d_testgen */
9519   int beta_val;
9520   int beta_flag;		/* input flag for BLAS_dgemv2_s_d_testgen */
9521   int order_val;
9522   enum blas_order_type order_type;
9523   int prec_val;
9524   enum blas_prec_type prec;
9525   int trans_val;
9526   enum blas_trans_type trans_type;
9527   int m_i;
9528   int n_i;
9529   int max_mn;			/* the max of m and n */
9530   int lda_val;
9531   int lda;
9532   int saved_seed;		/* for saving the original seed */
9533   int count, old_count;		/* use for counting the number of testgen calls * 2 */
9534 
9535   FPU_FIX_DECL;
9536 
9537   /* test for bad arguments */
9538   if (n < 0 || m < 0 || ntests < 0)
9539     BLAS_error(fname, 0, 0, NULL);
9540 
9541   /* initialization */
9542   *num_bad_ratio = 0;
9543   *num_tests = 0;
9544   *min_ratio = 0.0;
9545 
9546   saved_seed = *seed;
9547   ratio_min = 1e308;
9548   ratio_max = 0.0;
9549   ratio = 0.0;
9550   tot_tests = 0;
9551   p_count = 0;
9552   count = 0;
9553   find_max_ratio = 0;
9554   bad_ratios = 0;
9555   old_count = 0;
9556 
9557   if (debug == 3)
9558     find_max_ratio = 1;
9559   max_mn = MAX(m, n);
9560   if (m == 0 || n == 0) {
9561     return 0.0;
9562   }
9563 
9564   FPU_FIX_START;
9565 
9566   incy_gen = 1;
9567 
9568 
9569   /* get space for calculation */
9570   head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
9571   if (max_mn * 2 > 0 && head_x == NULL) {
9572     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9573   }
9574   tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
9575   if (max_mn * 2 > 0 && tail_x == NULL) {
9576     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9577   }
9578   y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
9579   if (max_mn * 2 > 0 && y == NULL) {
9580     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9581   }
9582   head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
9583   if (max_mn > 0 && head_x_gen == NULL) {
9584     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9585   }
9586   tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
9587   if (max_mn > 0 && tail_x_gen == NULL) {
9588     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9589   }
9590   y_gen = (double *) blas_malloc(max_mn * sizeof(double));
9591   if (max_mn > 0 && y_gen == NULL) {
9592     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9593   }
9594   temp = (float *) blas_malloc(max_mn * sizeof(float));
9595   if (max_mn > 0 && temp == NULL) {
9596     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9597   }
9598   head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
9599   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
9600   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
9601     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9602   }
9603   ratios = (double *) blas_malloc(max_mn * sizeof(double));
9604   if (max_mn > 0 && ratios == NULL) {
9605     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9606   }
9607   A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
9608   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
9609     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
9610   }
9611 
9612   /* The debug iteration:
9613      If debug=1, then will execute the iteration twice. First, compute the
9614      max ratio. Second, print info if ratio > (50% * ratio_max). */
9615   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
9616     bad_ratios = 0;		/* set to zero */
9617 
9618     if ((debug == 3) && (d_count == find_max_ratio))
9619       *seed = saved_seed;	/* restore the original seed */
9620 
9621     /* varying alpha */
9622     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
9623       alpha_flag = 0;
9624       switch (alpha_val) {
9625       case 0:
9626 	alpha = 0.0;
9627 	alpha_flag = 1;
9628 	break;
9629       case 1:
9630 	alpha = 1.0;
9631 	alpha_flag = 1;
9632 	break;
9633       }
9634 
9635       /* varying beta */
9636       for (beta_val = 0; beta_val < 3; beta_val++) {
9637 	beta_flag = 0;
9638 	switch (beta_val) {
9639 	case 0:
9640 	  beta = 0.0;
9641 	  beta_flag = 1;
9642 	  break;
9643 	case 1:
9644 	  beta = 1.0;
9645 	  beta_flag = 1;
9646 	  break;
9647 	}
9648 
9649 
9650 	/* varying extra precs */
9651 	for (prec_val = 0; prec_val <= 2; prec_val++) {
9652 	  switch (prec_val) {
9653 	  case 0:
9654 	    eps_int = power(2, -BITS_D);
9655 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
9656 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
9657 	    prec = blas_prec_double;
9658 	    break;
9659 	  case 1:
9660 	    eps_int = power(2, -BITS_D);
9661 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
9662 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
9663 	    prec = blas_prec_double;
9664 	    break;
9665 	  case 2:
9666 	  default:
9667 	    eps_int = power(2, -BITS_E);
9668 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
9669 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
9670 	    prec = blas_prec_extra;
9671 	    break;
9672 	  }
9673 
9674 	  /* values near underflow, 1, or overflow */
9675 	  for (norm = -1; norm <= 1; norm++) {
9676 
9677 	    /* number of tests */
9678 	    for (i = 0; i < ntests; i++) {
9679 
9680 	      /* row or col major */
9681 	      for (order_val = 0; order_val < 2; order_val++) {
9682 		switch (order_val) {
9683 		case 0:
9684 		  order_type = blas_rowmajor;
9685 		  break;
9686 		case 1:
9687 		default:
9688 		  order_type = blas_colmajor;
9689 		  break;
9690 		}
9691 
9692 		/* no_trans, trans, or conj_trans */
9693 		for (trans_val = 0; trans_val < 3; trans_val++) {
9694 		  switch (trans_val) {
9695 		  case 0:
9696 		    trans_type = blas_no_trans;
9697 		    m_i = m;
9698 		    n_i = n;
9699 		    break;
9700 		  case 1:
9701 		    trans_type = blas_trans;
9702 		    m_i = n;
9703 		    n_i = m;
9704 		    break;
9705 		  case 2:
9706 		  default:
9707 		    trans_type = blas_conj_trans;
9708 		    m_i = n;
9709 		    n_i = m;
9710 		    break;
9711 		  }
9712 
9713 		  /* lda=n, n+1, or 2n */
9714 		  for (lda_val = 0; lda_val < 3; lda_val++) {
9715 		    switch (lda_val) {
9716 		    case 0:
9717 		      lda = m_i;
9718 		      break;
9719 		    case 1:
9720 		      lda = m_i + 1;
9721 		      break;
9722 		    case 2:
9723 		    default:
9724 		      lda = 2 * m_i;
9725 		      break;
9726 		    }
9727 		    if ((order_type == blas_rowmajor && lda < n) ||
9728 			(order_type == blas_colmajor && lda < m))
9729 		      continue;
9730 
9731 		    /* For the sake of speed, we throw out this case at random */
9732 		    if (xrand(seed) >= test_prob)
9733 		      continue;
9734 
9735 		    /* in the trivial cases, no need to run testgen */
9736 		    if (m > 0 && n > 0)
9737 		      BLAS_dgemv2_s_d_testgen(norm, order_type, trans_type, m,
9738 					      n, &alpha, alpha_flag, A, lda,
9739 					      head_x_gen, tail_x_gen, &beta,
9740 					      beta_flag, y_gen, seed,
9741 					      head_r_true, tail_r_true);
9742 
9743 		    count++;
9744 
9745 		    /* varying incx */
9746 		    for (incx_val = -2; incx_val <= 2; incx_val++) {
9747 		      if (incx_val == 0)
9748 			continue;
9749 
9750 		      /* setting incx */
9751 		      incx = incx_val;
9752 
9753 
9754 		      dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
9755 		      dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
9756 
9757 		      /* varying incy */
9758 		      for (incy_val = -2; incy_val <= 2; incy_val++) {
9759 			if (incy_val == 0)
9760 			  continue;
9761 
9762 			/* setting incy */
9763 			incy = incy_val;
9764 
9765 
9766 			dcopy_vector(y_gen, m_i, 1, y, incy_val);
9767 
9768 			/* call BLAS_dgemv2_s_d_x */
9769 			FPU_FIX_STOP;
9770 			BLAS_dgemv2_s_d_x(order_type, trans_type, m, n, alpha,
9771 					  A, lda, head_x, tail_x, incx_val,
9772 					  beta, y, incy_val, prec);
9773 			FPU_FIX_START;
9774 
9775 			/* set y starting index */
9776 			iy = 0;
9777 			if (incy < 0)
9778 			  iy = -(m_i - 1) * incy;
9779 
9780 			/* computing the ratio */
9781 			if (m > 0 && n > 0)
9782 			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
9783 			    /* copy row j of A to temp */
9784 			    sge_copy_row(order_type, trans_type, m_i, n_i, A,
9785 					 lda, temp, j);
9786 
9787 			    test_BLAS_ddot2_s_d(n_i, blas_no_conj, alpha,
9788 						beta, y_gen[k], y[iy],
9789 						head_r_true[k],
9790 						tail_r_true[k], temp, 1,
9791 						head_x, tail_x, incx_val,
9792 						eps_int, un_int, &ratios[j]);
9793 
9794 			    /* take the max ratio */
9795 			    if (j == 0) {
9796 			      ratio = ratios[0];
9797 			      /* The !<= below causes NaN error to be detected.
9798 			         Note that (NaN > thresh) is always false. */
9799 			    } else if (!(ratios[j] <= ratio)) {
9800 			      ratio = ratios[j];
9801 			    }
9802 			    iy += incy;
9803 			  }
9804 
9805 			/* Increase the number of bad ratio, if the ratio
9806 			   is bigger than the threshold.
9807 			   The !<= below causes NaN error to be detected.
9808 			   Note that (NaN > thresh) is always false. */
9809 			if (!(ratio <= thresh)) {
9810 			  bad_ratios++;
9811 
9812 			  if ((debug == 3) &&	/* print only when debug is on */
9813 			      (count != old_count) &&	/* print if old vector is different
9814 							   from the current one */
9815 			      (d_count == find_max_ratio) &&
9816 			      (p_count <= max_print) &&
9817 			      (ratio > 0.5 * ratio_max)) {
9818 			    old_count = count;
9819 
9820 			    printf
9821 			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
9822 			       fname, m, n, ntests, thresh);
9823 
9824 			    /* Print test info */
9825 			    switch (prec) {
9826 			    case blas_prec_single:
9827 			      printf("single ");
9828 			      break;
9829 			    case blas_prec_double:
9830 			      printf("double ");
9831 			      break;
9832 			    case blas_prec_indigenous:
9833 			      printf("indigenous ");
9834 			      break;
9835 			    case blas_prec_extra:
9836 			      printf("extra ");
9837 			      break;
9838 			    }
9839 			    switch (norm) {
9840 			    case -1:
9841 			      printf("near_underflow ");
9842 			      break;
9843 			    case 0:
9844 			      printf("near_one ");
9845 			      break;
9846 			    case 1:
9847 			      printf("near_overflow ");
9848 			      break;
9849 			    }
9850 			    switch (order_type) {
9851 			    case blas_rowmajor:
9852 			      printf("row_major ");
9853 			      break;
9854 			    case blas_colmajor:
9855 			      printf("col_major ");
9856 			      break;
9857 			    }
9858 			    switch (trans_type) {
9859 			    case blas_no_trans:
9860 			      printf("no_trans ");
9861 			      break;
9862 			    case blas_trans:
9863 			      printf("trans ");
9864 			      break;
9865 			    case blas_conj_trans:
9866 			      printf("conj_trans ");
9867 			      break;
9868 			    }
9869 
9870 			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
9871 				   incy);
9872 
9873 			    sge_print_matrix(A, m_i, n_i, lda, order_type,
9874 					     "A");
9875 
9876 			    dprint_vector(head_x, n_i, incx_val, "head_x");
9877 			    dprint_vector(tail_x, n_i, incx_val, "tail_x");
9878 			    dprint_vector(y_gen, m_i, 1, "y_gen");
9879 			    dprint_vector(y, m_i, incy_val, "y_final");
9880 
9881 			    printf("      ");
9882 			    printf("alpha = ");
9883 			    printf("%24.16e", alpha);
9884 			    printf("\n      ");
9885 			    printf("beta = ");
9886 			    printf("%24.16e", beta);
9887 			    printf("\n");
9888 			    for (j = 0, k = 0; j < m_i * incy_gen;
9889 				 j += incy_gen, k++) {
9890 			      printf("      ");
9891 			      printf("[%24.16e, %24.16e]", head_r_true[j],
9892 				     tail_r_true[j]);
9893 			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
9894 			    }
9895 
9896 			    printf("      ratio=%.4e\n", ratio);
9897 			    p_count++;
9898 			  }
9899 			  if (bad_ratios >= MAX_BAD_TESTS) {
9900 			    printf("\ntoo many failures, exiting....");
9901 			    printf("\nTesting and compilation");
9902 			    printf(" are incomplete\n\n");
9903 			    goto end;
9904 			  }
9905 			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
9906 			    printf("\nFlagrant ratio error, exiting...");
9907 			    printf("\nTesting and compilation");
9908 			    printf(" are incomplete\n\n");
9909 			    goto end;
9910 			  }
9911 			}
9912 			if (d_count == 0) {
9913 			  if (ratio > ratio_max)
9914 			    ratio_max = ratio;
9915 
9916 			  if (ratio != 0.0 && ratio < ratio_min)
9917 			    ratio_min = ratio;
9918 
9919 			  tot_tests++;
9920 			}
9921 		      }		/* incy */
9922 		    }		/* incx */
9923 		  }		/* lda */
9924 		}		/* trans */
9925 	      }			/* order */
9926 	    }			/* tests */
9927 	  }			/* norm */
9928 	}			/* prec */
9929       }				/* beta */
9930     }				/* alpha */
9931   }				/* debug */
9932 
9933   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
9934     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
9935 	   fname, m, n, ntests, thresh);
9936     printf
9937       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
9938        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
9939        ratio_min, ratio_max);
9940   }
9941 
9942 end:
9943   FPU_FIX_STOP;
9944 
9945   blas_free(head_x);
9946   blas_free(tail_x);
9947   blas_free(y);
9948   blas_free(head_x_gen);
9949   blas_free(tail_x_gen);
9950   blas_free(y_gen);
9951   blas_free(temp);
9952   blas_free(A);
9953   blas_free(head_r_true);
9954   blas_free(tail_r_true);
9955   blas_free(ratios);
9956 
9957   *min_ratio = ratio_min;
9958   *num_bad_ratio = bad_ratios;
9959   *num_tests = tot_tests;
9960   return ratio_max;
9961 }
do_test_dgemv2_s_s_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)9962 double do_test_dgemv2_s_s_x(int m, int n, int ntests, int *seed,
9963 			    double thresh, int debug, float test_prob,
9964 			    double *min_ratio, int *num_bad_ratio,
9965 			    int *num_tests)
9966 
9967 /*
9968  * Purpose
9969  * =======
9970  *
9971  * Runs a series of tests on GEMV2.
9972  *
9973  * Arguments
9974  * =========
9975  *
9976  * m         (input) int
9977  *           The number of rows
9978  *
9979  * n         (input) int
9980  *           The number of columns
9981  *
9982  * ntests    (input) int
9983  *           The number of tests to run for each set of attributes.
9984  *
9985  * seed      (input/output) int
9986  *           The seed for the random number generator used in testgen().
9987  *
9988  * thresh    (input) double
9989  *           When the ratio returned from test() exceeds the specified
9990  *           threshold, the current size, r_true, r_comp, and ratio will be
9991  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
9992  *           to ~10.)
9993  *
9994  * debug     (input) int
9995  *           If debug=3, print summary
9996  *           If debug=2, print summary only if the number of bad ratios > 0
9997  *           If debug=1, print complete info if tests fail
9998  *           If debug=0, return max ratio
9999  *
10000  * test_prob (input) float
10001  *           The specified test will be performed only if the generated
10002  *           random exceeds this threshold.
10003  *
10004  * min_ratio (output) double
10005  *           The minimum ratio
10006  *
10007  * num_bad_ratio (output) int
10008  *               The number of tests fail; they are above the threshold.
10009  *
10010  * num_tests (output) int
10011  *           The number of tests is being performed.
10012  *
10013  * Return value
10014  * ============
10015  *
10016  * The maximum ratio if run successfully, otherwise return -1
10017  *
10018  * Code structure
10019  * ==============
10020  *
10021  *  debug loop  -- if debug is one, the first loop computes the max ratio
10022  *              -- and the last(second) loop outputs debugging information,
10023  *              -- if the test fail and its ratio > 0.5 * max ratio.
10024  *              -- if debug is zero, the loop is executed once
10025  *    alpha loop  -- varying alpha: 0, 1, or random
10026  *      beta loop   -- varying beta: 0, 1, or random
10027  *        prec loop   -- varying internal prec: single, double, or extra
10028  *          norm loop   -- varying norm: near undeflow, near one, or
10029  *                        -- near overflow
10030  *            numtest loop  -- how many times the test is perform with
10031  *                            -- above set of attributes
10032  *              order loop   -- varying order type: rowmajor or colmajor
10033  *                trans loop    -- varying uplo type: upper or lower
10034  *                  lda loop      -- varying lda: m, m+1, 2m
10035  *                    incx loop     -- varying incx: -2, -1, 1, 2
10036  *                      incy loop     -- varying incy: -2, -1, 1, 2
10037  */
10038 {
10039   /* function name */
10040   const char fname[] = "BLAS_dgemv2_s_s_x";
10041 
10042   /* max number of debug lines to print */
10043   const int max_print = 8;
10044 
10045   /* Variables in the "x_val" form are loop vars for corresponding
10046      variables */
10047   int i;			/* iterate through the repeating tests */
10048   int j, k;			/* multipurpose counters or variables */
10049   int iy;			/* use to index y */
10050   int incx_val, incy_val,	/* for testing different inc values */
10051     incx, incy;
10052   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
10053   int d_count;			/* counter for debug */
10054   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
10055   int p_count;			/* counter for the number of debug lines printed */
10056   int tot_tests;		/* total number of tests to be done */
10057   int norm;			/* input values of near underflow/one/overflow */
10058   double ratio_max;		/* the current maximum ratio */
10059   double ratio_min;		/* the current minimum ratio */
10060   double *ratios;		/* a temporary variable for calculating ratio */
10061   double ratio;			/* the per-use test ratio from test() */
10062   int bad_ratios;		/* the number of ratios over the threshold */
10063   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
10064   double un_int;		/* the internal underflow threshold */
10065   double alpha;
10066   double beta;
10067   float *A;
10068   float *head_x;
10069   float *tail_x;
10070   double *y;
10071   float *temp;			/* use for calculating ratio */
10072 
10073   /* x_gen and y_gen are used to store vectors generated by testgen.
10074      they eventually are copied back to x and y */
10075   float *head_x_gen;
10076   float *tail_x_gen;
10077   double *y_gen;
10078 
10079   /* the true r calculated by testgen(), in double-double */
10080   double *head_r_true, *tail_r_true;
10081   int alpha_val;
10082   int alpha_flag;		/* input flag for BLAS_dgemv2_s_s_testgen */
10083   int beta_val;
10084   int beta_flag;		/* input flag for BLAS_dgemv2_s_s_testgen */
10085   int order_val;
10086   enum blas_order_type order_type;
10087   int prec_val;
10088   enum blas_prec_type prec;
10089   int trans_val;
10090   enum blas_trans_type trans_type;
10091   int m_i;
10092   int n_i;
10093   int max_mn;			/* the max of m and n */
10094   int lda_val;
10095   int lda;
10096   int saved_seed;		/* for saving the original seed */
10097   int count, old_count;		/* use for counting the number of testgen calls * 2 */
10098 
10099   FPU_FIX_DECL;
10100 
10101   /* test for bad arguments */
10102   if (n < 0 || m < 0 || ntests < 0)
10103     BLAS_error(fname, 0, 0, NULL);
10104 
10105   /* initialization */
10106   *num_bad_ratio = 0;
10107   *num_tests = 0;
10108   *min_ratio = 0.0;
10109 
10110   saved_seed = *seed;
10111   ratio_min = 1e308;
10112   ratio_max = 0.0;
10113   ratio = 0.0;
10114   tot_tests = 0;
10115   p_count = 0;
10116   count = 0;
10117   find_max_ratio = 0;
10118   bad_ratios = 0;
10119   old_count = 0;
10120 
10121   if (debug == 3)
10122     find_max_ratio = 1;
10123   max_mn = MAX(m, n);
10124   if (m == 0 || n == 0) {
10125     return 0.0;
10126   }
10127 
10128   FPU_FIX_START;
10129 
10130   incy_gen = 1;
10131 
10132 
10133   /* get space for calculation */
10134   head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
10135   if (max_mn * 2 > 0 && head_x == NULL) {
10136     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10137   }
10138   tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
10139   if (max_mn * 2 > 0 && tail_x == NULL) {
10140     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10141   }
10142   y = (double *) blas_malloc(max_mn * 2 * sizeof(double));
10143   if (max_mn * 2 > 0 && y == NULL) {
10144     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10145   }
10146   head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
10147   if (max_mn > 0 && head_x_gen == NULL) {
10148     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10149   }
10150   tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
10151   if (max_mn > 0 && tail_x_gen == NULL) {
10152     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10153   }
10154   y_gen = (double *) blas_malloc(max_mn * sizeof(double));
10155   if (max_mn > 0 && y_gen == NULL) {
10156     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10157   }
10158   temp = (float *) blas_malloc(max_mn * sizeof(float));
10159   if (max_mn > 0 && temp == NULL) {
10160     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10161   }
10162   head_r_true = (double *) blas_malloc(max_mn * sizeof(double));
10163   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double));
10164   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
10165     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10166   }
10167   ratios = (double *) blas_malloc(max_mn * sizeof(double));
10168   if (max_mn > 0 && ratios == NULL) {
10169     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10170   }
10171   A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
10172   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
10173     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10174   }
10175 
10176   /* The debug iteration:
10177      If debug=1, then will execute the iteration twice. First, compute the
10178      max ratio. Second, print info if ratio > (50% * ratio_max). */
10179   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
10180     bad_ratios = 0;		/* set to zero */
10181 
10182     if ((debug == 3) && (d_count == find_max_ratio))
10183       *seed = saved_seed;	/* restore the original seed */
10184 
10185     /* varying alpha */
10186     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
10187       alpha_flag = 0;
10188       switch (alpha_val) {
10189       case 0:
10190 	alpha = 0.0;
10191 	alpha_flag = 1;
10192 	break;
10193       case 1:
10194 	alpha = 1.0;
10195 	alpha_flag = 1;
10196 	break;
10197       }
10198 
10199       /* varying beta */
10200       for (beta_val = 0; beta_val < 3; beta_val++) {
10201 	beta_flag = 0;
10202 	switch (beta_val) {
10203 	case 0:
10204 	  beta = 0.0;
10205 	  beta_flag = 1;
10206 	  break;
10207 	case 1:
10208 	  beta = 1.0;
10209 	  beta_flag = 1;
10210 	  break;
10211 	}
10212 
10213 
10214 	/* varying extra precs */
10215 	for (prec_val = 0; prec_val <= 2; prec_val++) {
10216 	  switch (prec_val) {
10217 	  case 0:
10218 	    eps_int = power(2, -BITS_D);
10219 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
10220 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
10221 	    prec = blas_prec_double;
10222 	    break;
10223 	  case 1:
10224 	    eps_int = power(2, -BITS_D);
10225 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
10226 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
10227 	    prec = blas_prec_double;
10228 	    break;
10229 	  case 2:
10230 	  default:
10231 	    eps_int = power(2, -BITS_E);
10232 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
10233 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
10234 	    prec = blas_prec_extra;
10235 	    break;
10236 	  }
10237 
10238 	  /* values near underflow, 1, or overflow */
10239 	  for (norm = -1; norm <= 1; norm++) {
10240 
10241 	    /* number of tests */
10242 	    for (i = 0; i < ntests; i++) {
10243 
10244 	      /* row or col major */
10245 	      for (order_val = 0; order_val < 2; order_val++) {
10246 		switch (order_val) {
10247 		case 0:
10248 		  order_type = blas_rowmajor;
10249 		  break;
10250 		case 1:
10251 		default:
10252 		  order_type = blas_colmajor;
10253 		  break;
10254 		}
10255 
10256 		/* no_trans, trans, or conj_trans */
10257 		for (trans_val = 0; trans_val < 3; trans_val++) {
10258 		  switch (trans_val) {
10259 		  case 0:
10260 		    trans_type = blas_no_trans;
10261 		    m_i = m;
10262 		    n_i = n;
10263 		    break;
10264 		  case 1:
10265 		    trans_type = blas_trans;
10266 		    m_i = n;
10267 		    n_i = m;
10268 		    break;
10269 		  case 2:
10270 		  default:
10271 		    trans_type = blas_conj_trans;
10272 		    m_i = n;
10273 		    n_i = m;
10274 		    break;
10275 		  }
10276 
10277 		  /* lda=n, n+1, or 2n */
10278 		  for (lda_val = 0; lda_val < 3; lda_val++) {
10279 		    switch (lda_val) {
10280 		    case 0:
10281 		      lda = m_i;
10282 		      break;
10283 		    case 1:
10284 		      lda = m_i + 1;
10285 		      break;
10286 		    case 2:
10287 		    default:
10288 		      lda = 2 * m_i;
10289 		      break;
10290 		    }
10291 		    if ((order_type == blas_rowmajor && lda < n) ||
10292 			(order_type == blas_colmajor && lda < m))
10293 		      continue;
10294 
10295 		    /* For the sake of speed, we throw out this case at random */
10296 		    if (xrand(seed) >= test_prob)
10297 		      continue;
10298 
10299 		    /* in the trivial cases, no need to run testgen */
10300 		    if (m > 0 && n > 0)
10301 		      BLAS_dgemv2_s_s_testgen(norm, order_type, trans_type, m,
10302 					      n, &alpha, alpha_flag, A, lda,
10303 					      head_x_gen, tail_x_gen, &beta,
10304 					      beta_flag, y_gen, seed,
10305 					      head_r_true, tail_r_true);
10306 
10307 		    count++;
10308 
10309 		    /* varying incx */
10310 		    for (incx_val = -2; incx_val <= 2; incx_val++) {
10311 		      if (incx_val == 0)
10312 			continue;
10313 
10314 		      /* setting incx */
10315 		      incx = incx_val;
10316 
10317 
10318 		      scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
10319 		      scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
10320 
10321 		      /* varying incy */
10322 		      for (incy_val = -2; incy_val <= 2; incy_val++) {
10323 			if (incy_val == 0)
10324 			  continue;
10325 
10326 			/* setting incy */
10327 			incy = incy_val;
10328 
10329 
10330 			dcopy_vector(y_gen, m_i, 1, y, incy_val);
10331 
10332 			/* call BLAS_dgemv2_s_s_x */
10333 			FPU_FIX_STOP;
10334 			BLAS_dgemv2_s_s_x(order_type, trans_type, m, n, alpha,
10335 					  A, lda, head_x, tail_x, incx_val,
10336 					  beta, y, incy_val, prec);
10337 			FPU_FIX_START;
10338 
10339 			/* set y starting index */
10340 			iy = 0;
10341 			if (incy < 0)
10342 			  iy = -(m_i - 1) * incy;
10343 
10344 			/* computing the ratio */
10345 			if (m > 0 && n > 0)
10346 			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
10347 			    /* copy row j of A to temp */
10348 			    sge_copy_row(order_type, trans_type, m_i, n_i, A,
10349 					 lda, temp, j);
10350 
10351 			    test_BLAS_ddot2_s_s(n_i, blas_no_conj, alpha,
10352 						beta, y_gen[k], y[iy],
10353 						head_r_true[k],
10354 						tail_r_true[k], temp, 1,
10355 						head_x, tail_x, incx_val,
10356 						eps_int, un_int, &ratios[j]);
10357 
10358 			    /* take the max ratio */
10359 			    if (j == 0) {
10360 			      ratio = ratios[0];
10361 			      /* The !<= below causes NaN error to be detected.
10362 			         Note that (NaN > thresh) is always false. */
10363 			    } else if (!(ratios[j] <= ratio)) {
10364 			      ratio = ratios[j];
10365 			    }
10366 			    iy += incy;
10367 			  }
10368 
10369 			/* Increase the number of bad ratio, if the ratio
10370 			   is bigger than the threshold.
10371 			   The !<= below causes NaN error to be detected.
10372 			   Note that (NaN > thresh) is always false. */
10373 			if (!(ratio <= thresh)) {
10374 			  bad_ratios++;
10375 
10376 			  if ((debug == 3) &&	/* print only when debug is on */
10377 			      (count != old_count) &&	/* print if old vector is different
10378 							   from the current one */
10379 			      (d_count == find_max_ratio) &&
10380 			      (p_count <= max_print) &&
10381 			      (ratio > 0.5 * ratio_max)) {
10382 			    old_count = count;
10383 
10384 			    printf
10385 			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
10386 			       fname, m, n, ntests, thresh);
10387 
10388 			    /* Print test info */
10389 			    switch (prec) {
10390 			    case blas_prec_single:
10391 			      printf("single ");
10392 			      break;
10393 			    case blas_prec_double:
10394 			      printf("double ");
10395 			      break;
10396 			    case blas_prec_indigenous:
10397 			      printf("indigenous ");
10398 			      break;
10399 			    case blas_prec_extra:
10400 			      printf("extra ");
10401 			      break;
10402 			    }
10403 			    switch (norm) {
10404 			    case -1:
10405 			      printf("near_underflow ");
10406 			      break;
10407 			    case 0:
10408 			      printf("near_one ");
10409 			      break;
10410 			    case 1:
10411 			      printf("near_overflow ");
10412 			      break;
10413 			    }
10414 			    switch (order_type) {
10415 			    case blas_rowmajor:
10416 			      printf("row_major ");
10417 			      break;
10418 			    case blas_colmajor:
10419 			      printf("col_major ");
10420 			      break;
10421 			    }
10422 			    switch (trans_type) {
10423 			    case blas_no_trans:
10424 			      printf("no_trans ");
10425 			      break;
10426 			    case blas_trans:
10427 			      printf("trans ");
10428 			      break;
10429 			    case blas_conj_trans:
10430 			      printf("conj_trans ");
10431 			      break;
10432 			    }
10433 
10434 			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
10435 				   incy);
10436 
10437 			    sge_print_matrix(A, m_i, n_i, lda, order_type,
10438 					     "A");
10439 
10440 			    sprint_vector(head_x, n_i, incx_val, "head_x");
10441 			    sprint_vector(tail_x, n_i, incx_val, "tail_x");
10442 			    dprint_vector(y_gen, m_i, 1, "y_gen");
10443 			    dprint_vector(y, m_i, incy_val, "y_final");
10444 
10445 			    printf("      ");
10446 			    printf("alpha = ");
10447 			    printf("%24.16e", alpha);
10448 			    printf("\n      ");
10449 			    printf("beta = ");
10450 			    printf("%24.16e", beta);
10451 			    printf("\n");
10452 			    for (j = 0, k = 0; j < m_i * incy_gen;
10453 				 j += incy_gen, k++) {
10454 			      printf("      ");
10455 			      printf("[%24.16e, %24.16e]", head_r_true[j],
10456 				     tail_r_true[j]);
10457 			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
10458 			    }
10459 
10460 			    printf("      ratio=%.4e\n", ratio);
10461 			    p_count++;
10462 			  }
10463 			  if (bad_ratios >= MAX_BAD_TESTS) {
10464 			    printf("\ntoo many failures, exiting....");
10465 			    printf("\nTesting and compilation");
10466 			    printf(" are incomplete\n\n");
10467 			    goto end;
10468 			  }
10469 			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
10470 			    printf("\nFlagrant ratio error, exiting...");
10471 			    printf("\nTesting and compilation");
10472 			    printf(" are incomplete\n\n");
10473 			    goto end;
10474 			  }
10475 			}
10476 			if (d_count == 0) {
10477 			  if (ratio > ratio_max)
10478 			    ratio_max = ratio;
10479 
10480 			  if (ratio != 0.0 && ratio < ratio_min)
10481 			    ratio_min = ratio;
10482 
10483 			  tot_tests++;
10484 			}
10485 		      }		/* incy */
10486 		    }		/* incx */
10487 		  }		/* lda */
10488 		}		/* trans */
10489 	      }			/* order */
10490 	    }			/* tests */
10491 	  }			/* norm */
10492 	}			/* prec */
10493       }				/* beta */
10494     }				/* alpha */
10495   }				/* debug */
10496 
10497   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
10498     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
10499 	   fname, m, n, ntests, thresh);
10500     printf
10501       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
10502        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
10503        ratio_min, ratio_max);
10504   }
10505 
10506 end:
10507   FPU_FIX_STOP;
10508 
10509   blas_free(head_x);
10510   blas_free(tail_x);
10511   blas_free(y);
10512   blas_free(head_x_gen);
10513   blas_free(tail_x_gen);
10514   blas_free(y_gen);
10515   blas_free(temp);
10516   blas_free(A);
10517   blas_free(head_r_true);
10518   blas_free(tail_r_true);
10519   blas_free(ratios);
10520 
10521   *min_ratio = ratio_min;
10522   *num_bad_ratio = bad_ratios;
10523   *num_tests = tot_tests;
10524   return ratio_max;
10525 }
do_test_zgemv2_z_c_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)10526 double do_test_zgemv2_z_c_x(int m, int n, int ntests, int *seed,
10527 			    double thresh, int debug, float test_prob,
10528 			    double *min_ratio, int *num_bad_ratio,
10529 			    int *num_tests)
10530 
10531 /*
10532  * Purpose
10533  * =======
10534  *
10535  * Runs a series of tests on GEMV2.
10536  *
10537  * Arguments
10538  * =========
10539  *
10540  * m         (input) int
10541  *           The number of rows
10542  *
10543  * n         (input) int
10544  *           The number of columns
10545  *
10546  * ntests    (input) int
10547  *           The number of tests to run for each set of attributes.
10548  *
10549  * seed      (input/output) int
10550  *           The seed for the random number generator used in testgen().
10551  *
10552  * thresh    (input) double
10553  *           When the ratio returned from test() exceeds the specified
10554  *           threshold, the current size, r_true, r_comp, and ratio will be
10555  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
10556  *           to ~10.)
10557  *
10558  * debug     (input) int
10559  *           If debug=3, print summary
10560  *           If debug=2, print summary only if the number of bad ratios > 0
10561  *           If debug=1, print complete info if tests fail
10562  *           If debug=0, return max ratio
10563  *
10564  * test_prob (input) float
10565  *           The specified test will be performed only if the generated
10566  *           random exceeds this threshold.
10567  *
10568  * min_ratio (output) double
10569  *           The minimum ratio
10570  *
10571  * num_bad_ratio (output) int
10572  *               The number of tests fail; they are above the threshold.
10573  *
10574  * num_tests (output) int
10575  *           The number of tests is being performed.
10576  *
10577  * Return value
10578  * ============
10579  *
10580  * The maximum ratio if run successfully, otherwise return -1
10581  *
10582  * Code structure
10583  * ==============
10584  *
10585  *  debug loop  -- if debug is one, the first loop computes the max ratio
10586  *              -- and the last(second) loop outputs debugging information,
10587  *              -- if the test fail and its ratio > 0.5 * max ratio.
10588  *              -- if debug is zero, the loop is executed once
10589  *    alpha loop  -- varying alpha: 0, 1, or random
10590  *      beta loop   -- varying beta: 0, 1, or random
10591  *        prec loop   -- varying internal prec: single, double, or extra
10592  *          norm loop   -- varying norm: near undeflow, near one, or
10593  *                        -- near overflow
10594  *            numtest loop  -- how many times the test is perform with
10595  *                            -- above set of attributes
10596  *              order loop   -- varying order type: rowmajor or colmajor
10597  *                trans loop    -- varying uplo type: upper or lower
10598  *                  lda loop      -- varying lda: m, m+1, 2m
10599  *                    incx loop     -- varying incx: -2, -1, 1, 2
10600  *                      incy loop     -- varying incy: -2, -1, 1, 2
10601  */
10602 {
10603   /* function name */
10604   const char fname[] = "BLAS_zgemv2_z_c_x";
10605 
10606   /* max number of debug lines to print */
10607   const int max_print = 8;
10608 
10609   /* Variables in the "x_val" form are loop vars for corresponding
10610      variables */
10611   int i;			/* iterate through the repeating tests */
10612   int j, k;			/* multipurpose counters or variables */
10613   int iy;			/* use to index y */
10614   int incx_val, incy_val,	/* for testing different inc values */
10615     incx, incy;
10616   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
10617   int d_count;			/* counter for debug */
10618   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
10619   int p_count;			/* counter for the number of debug lines printed */
10620   int tot_tests;		/* total number of tests to be done */
10621   int norm;			/* input values of near underflow/one/overflow */
10622   double ratio_max;		/* the current maximum ratio */
10623   double ratio_min;		/* the current minimum ratio */
10624   double *ratios;		/* a temporary variable for calculating ratio */
10625   double ratio;			/* the per-use test ratio from test() */
10626   int bad_ratios;		/* the number of ratios over the threshold */
10627   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
10628   double un_int;		/* the internal underflow threshold */
10629   double alpha[2];
10630   double beta[2];
10631   double *A;
10632   float *head_x;
10633   float *tail_x;
10634   double *y;
10635   double *temp;			/* use for calculating ratio */
10636 
10637   /* x_gen and y_gen are used to store vectors generated by testgen.
10638      they eventually are copied back to x and y */
10639   float *head_x_gen;
10640   float *tail_x_gen;
10641   double *y_gen;
10642 
10643   /* the true r calculated by testgen(), in double-double */
10644   double *head_r_true, *tail_r_true;
10645 
10646   int alpha_val;
10647   int alpha_flag;		/* input flag for BLAS_zgemv2_z_c_testgen */
10648   int beta_val;
10649   int beta_flag;		/* input flag for BLAS_zgemv2_z_c_testgen */
10650   int order_val;
10651   enum blas_order_type order_type;
10652   int prec_val;
10653   enum blas_prec_type prec;
10654   int trans_val;
10655   enum blas_trans_type trans_type;
10656   int m_i;
10657   int n_i;
10658   int max_mn;			/* the max of m and n */
10659   int lda_val;
10660   int lda;
10661   int saved_seed;		/* for saving the original seed */
10662   int count, old_count;		/* use for counting the number of testgen calls * 2 */
10663 
10664   FPU_FIX_DECL;
10665 
10666   /* test for bad arguments */
10667   if (n < 0 || m < 0 || ntests < 0)
10668     BLAS_error(fname, 0, 0, NULL);
10669 
10670   /* initialization */
10671   *num_bad_ratio = 0;
10672   *num_tests = 0;
10673   *min_ratio = 0.0;
10674 
10675   saved_seed = *seed;
10676   ratio_min = 1e308;
10677   ratio_max = 0.0;
10678   ratio = 0.0;
10679   tot_tests = 0;
10680   p_count = 0;
10681   count = 0;
10682   find_max_ratio = 0;
10683   bad_ratios = 0;
10684   old_count = 0;
10685 
10686   if (debug == 3)
10687     find_max_ratio = 1;
10688   max_mn = MAX(m, n);
10689   if (m == 0 || n == 0) {
10690     return 0.0;
10691   }
10692 
10693   FPU_FIX_START;
10694 
10695   incy_gen = 1;
10696   incy_gen *= 2;
10697 
10698   /* get space for calculation */
10699   head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
10700   if (max_mn * 2 > 0 && head_x == NULL) {
10701     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10702   }
10703   tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
10704   if (max_mn * 2 > 0 && tail_x == NULL) {
10705     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10706   }
10707   y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
10708   if (max_mn * 2 > 0 && y == NULL) {
10709     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10710   }
10711   head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
10712   if (max_mn > 0 && head_x_gen == NULL) {
10713     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10714   }
10715   tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
10716   if (max_mn > 0 && tail_x_gen == NULL) {
10717     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10718   }
10719   y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
10720   if (max_mn > 0 && y_gen == NULL) {
10721     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10722   }
10723   temp = (double *) blas_malloc(max_mn * sizeof(double) * 2);
10724   if (max_mn > 0 && temp == NULL) {
10725     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10726   }
10727   head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
10728   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
10729   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
10730     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10731   }
10732   ratios = (double *) blas_malloc(max_mn * sizeof(double));
10733   if (max_mn > 0 && ratios == NULL) {
10734     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10735   }
10736   A =
10737     (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double) *
10738 			   2);
10739   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
10740     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
10741   }
10742 
10743   /* The debug iteration:
10744      If debug=1, then will execute the iteration twice. First, compute the
10745      max ratio. Second, print info if ratio > (50% * ratio_max). */
10746   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
10747     bad_ratios = 0;		/* set to zero */
10748 
10749     if ((debug == 3) && (d_count == find_max_ratio))
10750       *seed = saved_seed;	/* restore the original seed */
10751 
10752     /* varying alpha */
10753     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
10754       alpha_flag = 0;
10755       switch (alpha_val) {
10756       case 0:
10757 	alpha[0] = alpha[1] = 0.0;
10758 	alpha_flag = 1;
10759 	break;
10760       case 1:
10761 	alpha[0] = 1.0;
10762 	alpha[1] = 0.0;
10763 	alpha_flag = 1;
10764 	break;
10765       }
10766 
10767       /* varying beta */
10768       for (beta_val = 0; beta_val < 3; beta_val++) {
10769 	beta_flag = 0;
10770 	switch (beta_val) {
10771 	case 0:
10772 	  beta[0] = beta[1] = 0.0;
10773 	  beta_flag = 1;
10774 	  break;
10775 	case 1:
10776 	  beta[0] = 1.0;
10777 	  beta[1] = 0.0;
10778 	  beta_flag = 1;
10779 	  break;
10780 	}
10781 
10782 
10783 	/* varying extra precs */
10784 	for (prec_val = 0; prec_val <= 2; prec_val++) {
10785 	  switch (prec_val) {
10786 	  case 0:
10787 	    eps_int = power(2, -BITS_D);
10788 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
10789 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
10790 	    prec = blas_prec_double;
10791 	    break;
10792 	  case 1:
10793 	    eps_int = power(2, -BITS_D);
10794 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
10795 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
10796 	    prec = blas_prec_double;
10797 	    break;
10798 	  case 2:
10799 	  default:
10800 	    eps_int = power(2, -BITS_E);
10801 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
10802 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
10803 	    prec = blas_prec_extra;
10804 	    break;
10805 	  }
10806 
10807 	  /* values near underflow, 1, or overflow */
10808 	  for (norm = -1; norm <= 1; norm++) {
10809 
10810 	    /* number of tests */
10811 	    for (i = 0; i < ntests; i++) {
10812 
10813 	      /* row or col major */
10814 	      for (order_val = 0; order_val < 2; order_val++) {
10815 		switch (order_val) {
10816 		case 0:
10817 		  order_type = blas_rowmajor;
10818 		  break;
10819 		case 1:
10820 		default:
10821 		  order_type = blas_colmajor;
10822 		  break;
10823 		}
10824 
10825 		/* no_trans, trans, or conj_trans */
10826 		for (trans_val = 0; trans_val < 3; trans_val++) {
10827 		  switch (trans_val) {
10828 		  case 0:
10829 		    trans_type = blas_no_trans;
10830 		    m_i = m;
10831 		    n_i = n;
10832 		    break;
10833 		  case 1:
10834 		    trans_type = blas_trans;
10835 		    m_i = n;
10836 		    n_i = m;
10837 		    break;
10838 		  case 2:
10839 		  default:
10840 		    trans_type = blas_conj_trans;
10841 		    m_i = n;
10842 		    n_i = m;
10843 		    break;
10844 		  }
10845 
10846 		  /* lda=n, n+1, or 2n */
10847 		  for (lda_val = 0; lda_val < 3; lda_val++) {
10848 		    switch (lda_val) {
10849 		    case 0:
10850 		      lda = m_i;
10851 		      break;
10852 		    case 1:
10853 		      lda = m_i + 1;
10854 		      break;
10855 		    case 2:
10856 		    default:
10857 		      lda = 2 * m_i;
10858 		      break;
10859 		    }
10860 		    if ((order_type == blas_rowmajor && lda < n) ||
10861 			(order_type == blas_colmajor && lda < m))
10862 		      continue;
10863 
10864 		    /* For the sake of speed, we throw out this case at random */
10865 		    if (xrand(seed) >= test_prob)
10866 		      continue;
10867 
10868 		    /* in the trivial cases, no need to run testgen */
10869 		    if (m > 0 && n > 0)
10870 		      BLAS_zgemv2_z_c_testgen(norm, order_type, trans_type, m,
10871 					      n, &alpha, alpha_flag, A, lda,
10872 					      head_x_gen, tail_x_gen, &beta,
10873 					      beta_flag, y_gen, seed,
10874 					      head_r_true, tail_r_true);
10875 
10876 		    count++;
10877 
10878 		    /* varying incx */
10879 		    for (incx_val = -2; incx_val <= 2; incx_val++) {
10880 		      if (incx_val == 0)
10881 			continue;
10882 
10883 		      /* setting incx */
10884 		      incx = incx_val;
10885 		      incx *= 2;
10886 
10887 		      ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
10888 		      ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
10889 
10890 		      /* varying incy */
10891 		      for (incy_val = -2; incy_val <= 2; incy_val++) {
10892 			if (incy_val == 0)
10893 			  continue;
10894 
10895 			/* setting incy */
10896 			incy = incy_val;
10897 			incy *= 2;
10898 
10899 			zcopy_vector(y_gen, m_i, 1, y, incy_val);
10900 
10901 			/* call BLAS_zgemv2_z_c_x */
10902 			FPU_FIX_STOP;
10903 			BLAS_zgemv2_z_c_x(order_type, trans_type, m, n, alpha,
10904 					  A, lda, head_x, tail_x, incx_val,
10905 					  beta, y, incy_val, prec);
10906 			FPU_FIX_START;
10907 
10908 			/* set y starting index */
10909 			iy = 0;
10910 			if (incy < 0)
10911 			  iy = -(m_i - 1) * incy;
10912 
10913 			/* computing the ratio */
10914 			if (m > 0 && n > 0)
10915 			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
10916 			    /* copy row j of A to temp */
10917 			    zge_copy_row(order_type, trans_type, m_i, n_i, A,
10918 					 lda, temp, j);
10919 
10920 			    test_BLAS_zdot2_z_c(n_i, blas_no_conj, alpha,
10921 						beta, &y_gen[k], &y[iy],
10922 						&head_r_true[k],
10923 						&tail_r_true[k], temp, 1,
10924 						head_x, tail_x, incx_val,
10925 						eps_int, un_int, &ratios[j]);
10926 
10927 			    /* take the max ratio */
10928 			    if (j == 0) {
10929 			      ratio = ratios[0];
10930 			      /* The !<= below causes NaN error to be detected.
10931 			         Note that (NaN > thresh) is always false. */
10932 			    } else if (!(ratios[j] <= ratio)) {
10933 			      ratio = ratios[j];
10934 			    }
10935 			    iy += incy;
10936 			  }
10937 
10938 			/* Increase the number of bad ratio, if the ratio
10939 			   is bigger than the threshold.
10940 			   The !<= below causes NaN error to be detected.
10941 			   Note that (NaN > thresh) is always false. */
10942 			if (!(ratio <= thresh)) {
10943 			  bad_ratios++;
10944 
10945 			  if ((debug == 3) &&	/* print only when debug is on */
10946 			      (count != old_count) &&	/* print if old vector is different
10947 							   from the current one */
10948 			      (d_count == find_max_ratio) &&
10949 			      (p_count <= max_print) &&
10950 			      (ratio > 0.5 * ratio_max)) {
10951 			    old_count = count;
10952 
10953 			    printf
10954 			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
10955 			       fname, m, n, ntests, thresh);
10956 
10957 			    /* Print test info */
10958 			    switch (prec) {
10959 			    case blas_prec_single:
10960 			      printf("single ");
10961 			      break;
10962 			    case blas_prec_double:
10963 			      printf("double ");
10964 			      break;
10965 			    case blas_prec_indigenous:
10966 			      printf("indigenous ");
10967 			      break;
10968 			    case blas_prec_extra:
10969 			      printf("extra ");
10970 			      break;
10971 			    }
10972 			    switch (norm) {
10973 			    case -1:
10974 			      printf("near_underflow ");
10975 			      break;
10976 			    case 0:
10977 			      printf("near_one ");
10978 			      break;
10979 			    case 1:
10980 			      printf("near_overflow ");
10981 			      break;
10982 			    }
10983 			    switch (order_type) {
10984 			    case blas_rowmajor:
10985 			      printf("row_major ");
10986 			      break;
10987 			    case blas_colmajor:
10988 			      printf("col_major ");
10989 			      break;
10990 			    }
10991 			    switch (trans_type) {
10992 			    case blas_no_trans:
10993 			      printf("no_trans ");
10994 			      break;
10995 			    case blas_trans:
10996 			      printf("trans ");
10997 			      break;
10998 			    case blas_conj_trans:
10999 			      printf("conj_trans ");
11000 			      break;
11001 			    }
11002 
11003 			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
11004 				   incy);
11005 
11006 			    zge_print_matrix(A, m_i, n_i, lda, order_type,
11007 					     "A");
11008 
11009 			    cprint_vector(head_x, n_i, incx_val, "head_x");
11010 			    cprint_vector(tail_x, n_i, incx_val, "tail_x");
11011 			    zprint_vector(y_gen, m_i, 1, "y_gen");
11012 			    zprint_vector(y, m_i, incy_val, "y_final");
11013 
11014 			    printf("      ");
11015 			    printf("alpha = ");
11016 			    printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
11017 			    printf("\n      ");
11018 			    printf("beta = ");
11019 			    printf("(%24.16e, %24.16e)", beta[0], beta[1]);
11020 			    printf("\n");
11021 			    for (j = 0, k = 0; j < m_i * incy_gen;
11022 				 j += incy_gen, k++) {
11023 			      printf("      ");
11024 			      printf
11025 				("([%24.16e  %24.16e], [%24.16e %24.16e])",
11026 				 head_r_true[j], tail_r_true[j],
11027 				 head_r_true[j + 1], tail_r_true[j + 1]);
11028 			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
11029 			    }
11030 
11031 			    printf("      ratio=%.4e\n", ratio);
11032 			    p_count++;
11033 			  }
11034 			  if (bad_ratios >= MAX_BAD_TESTS) {
11035 			    printf("\ntoo many failures, exiting....");
11036 			    printf("\nTesting and compilation");
11037 			    printf(" are incomplete\n\n");
11038 			    goto end;
11039 			  }
11040 			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
11041 			    printf("\nFlagrant ratio error, exiting...");
11042 			    printf("\nTesting and compilation");
11043 			    printf(" are incomplete\n\n");
11044 			    goto end;
11045 			  }
11046 			}
11047 			if (d_count == 0) {
11048 			  if (ratio > ratio_max)
11049 			    ratio_max = ratio;
11050 
11051 			  if (ratio != 0.0 && ratio < ratio_min)
11052 			    ratio_min = ratio;
11053 
11054 			  tot_tests++;
11055 			}
11056 		      }		/* incy */
11057 		    }		/* incx */
11058 		  }		/* lda */
11059 		}		/* trans */
11060 	      }			/* order */
11061 	    }			/* tests */
11062 	  }			/* norm */
11063 	}			/* prec */
11064       }				/* beta */
11065     }				/* alpha */
11066   }				/* debug */
11067 
11068   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
11069     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
11070 	   fname, m, n, ntests, thresh);
11071     printf
11072       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
11073        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
11074        ratio_min, ratio_max);
11075   }
11076 
11077 end:
11078   FPU_FIX_STOP;
11079 
11080   blas_free(head_x);
11081   blas_free(tail_x);
11082   blas_free(y);
11083   blas_free(head_x_gen);
11084   blas_free(tail_x_gen);
11085   blas_free(y_gen);
11086   blas_free(temp);
11087   blas_free(A);
11088   blas_free(head_r_true);
11089   blas_free(tail_r_true);
11090   blas_free(ratios);
11091 
11092   *min_ratio = ratio_min;
11093   *num_bad_ratio = bad_ratios;
11094   *num_tests = tot_tests;
11095   return ratio_max;
11096 }
do_test_zgemv2_c_z_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)11097 double do_test_zgemv2_c_z_x(int m, int n, int ntests, int *seed,
11098 			    double thresh, int debug, float test_prob,
11099 			    double *min_ratio, int *num_bad_ratio,
11100 			    int *num_tests)
11101 
11102 /*
11103  * Purpose
11104  * =======
11105  *
11106  * Runs a series of tests on GEMV2.
11107  *
11108  * Arguments
11109  * =========
11110  *
11111  * m         (input) int
11112  *           The number of rows
11113  *
11114  * n         (input) int
11115  *           The number of columns
11116  *
11117  * ntests    (input) int
11118  *           The number of tests to run for each set of attributes.
11119  *
11120  * seed      (input/output) int
11121  *           The seed for the random number generator used in testgen().
11122  *
11123  * thresh    (input) double
11124  *           When the ratio returned from test() exceeds the specified
11125  *           threshold, the current size, r_true, r_comp, and ratio will be
11126  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
11127  *           to ~10.)
11128  *
11129  * debug     (input) int
11130  *           If debug=3, print summary
11131  *           If debug=2, print summary only if the number of bad ratios > 0
11132  *           If debug=1, print complete info if tests fail
11133  *           If debug=0, return max ratio
11134  *
11135  * test_prob (input) float
11136  *           The specified test will be performed only if the generated
11137  *           random exceeds this threshold.
11138  *
11139  * min_ratio (output) double
11140  *           The minimum ratio
11141  *
11142  * num_bad_ratio (output) int
11143  *               The number of tests fail; they are above the threshold.
11144  *
11145  * num_tests (output) int
11146  *           The number of tests is being performed.
11147  *
11148  * Return value
11149  * ============
11150  *
11151  * The maximum ratio if run successfully, otherwise return -1
11152  *
11153  * Code structure
11154  * ==============
11155  *
11156  *  debug loop  -- if debug is one, the first loop computes the max ratio
11157  *              -- and the last(second) loop outputs debugging information,
11158  *              -- if the test fail and its ratio > 0.5 * max ratio.
11159  *              -- if debug is zero, the loop is executed once
11160  *    alpha loop  -- varying alpha: 0, 1, or random
11161  *      beta loop   -- varying beta: 0, 1, or random
11162  *        prec loop   -- varying internal prec: single, double, or extra
11163  *          norm loop   -- varying norm: near undeflow, near one, or
11164  *                        -- near overflow
11165  *            numtest loop  -- how many times the test is perform with
11166  *                            -- above set of attributes
11167  *              order loop   -- varying order type: rowmajor or colmajor
11168  *                trans loop    -- varying uplo type: upper or lower
11169  *                  lda loop      -- varying lda: m, m+1, 2m
11170  *                    incx loop     -- varying incx: -2, -1, 1, 2
11171  *                      incy loop     -- varying incy: -2, -1, 1, 2
11172  */
11173 {
11174   /* function name */
11175   const char fname[] = "BLAS_zgemv2_c_z_x";
11176 
11177   /* max number of debug lines to print */
11178   const int max_print = 8;
11179 
11180   /* Variables in the "x_val" form are loop vars for corresponding
11181      variables */
11182   int i;			/* iterate through the repeating tests */
11183   int j, k;			/* multipurpose counters or variables */
11184   int iy;			/* use to index y */
11185   int incx_val, incy_val,	/* for testing different inc values */
11186     incx, incy;
11187   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
11188   int d_count;			/* counter for debug */
11189   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
11190   int p_count;			/* counter for the number of debug lines printed */
11191   int tot_tests;		/* total number of tests to be done */
11192   int norm;			/* input values of near underflow/one/overflow */
11193   double ratio_max;		/* the current maximum ratio */
11194   double ratio_min;		/* the current minimum ratio */
11195   double *ratios;		/* a temporary variable for calculating ratio */
11196   double ratio;			/* the per-use test ratio from test() */
11197   int bad_ratios;		/* the number of ratios over the threshold */
11198   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
11199   double un_int;		/* the internal underflow threshold */
11200   double alpha[2];
11201   double beta[2];
11202   float *A;
11203   double *head_x;
11204   double *tail_x;
11205   double *y;
11206   float *temp;			/* use for calculating ratio */
11207 
11208   /* x_gen and y_gen are used to store vectors generated by testgen.
11209      they eventually are copied back to x and y */
11210   double *head_x_gen;
11211   double *tail_x_gen;
11212   double *y_gen;
11213 
11214   /* the true r calculated by testgen(), in double-double */
11215   double *head_r_true, *tail_r_true;
11216 
11217   int alpha_val;
11218   int alpha_flag;		/* input flag for BLAS_zgemv2_c_z_testgen */
11219   int beta_val;
11220   int beta_flag;		/* input flag for BLAS_zgemv2_c_z_testgen */
11221   int order_val;
11222   enum blas_order_type order_type;
11223   int prec_val;
11224   enum blas_prec_type prec;
11225   int trans_val;
11226   enum blas_trans_type trans_type;
11227   int m_i;
11228   int n_i;
11229   int max_mn;			/* the max of m and n */
11230   int lda_val;
11231   int lda;
11232   int saved_seed;		/* for saving the original seed */
11233   int count, old_count;		/* use for counting the number of testgen calls * 2 */
11234 
11235   FPU_FIX_DECL;
11236 
11237   /* test for bad arguments */
11238   if (n < 0 || m < 0 || ntests < 0)
11239     BLAS_error(fname, 0, 0, NULL);
11240 
11241   /* initialization */
11242   *num_bad_ratio = 0;
11243   *num_tests = 0;
11244   *min_ratio = 0.0;
11245 
11246   saved_seed = *seed;
11247   ratio_min = 1e308;
11248   ratio_max = 0.0;
11249   ratio = 0.0;
11250   tot_tests = 0;
11251   p_count = 0;
11252   count = 0;
11253   find_max_ratio = 0;
11254   bad_ratios = 0;
11255   old_count = 0;
11256 
11257   if (debug == 3)
11258     find_max_ratio = 1;
11259   max_mn = MAX(m, n);
11260   if (m == 0 || n == 0) {
11261     return 0.0;
11262   }
11263 
11264   FPU_FIX_START;
11265 
11266   incy_gen = 1;
11267   incy_gen *= 2;
11268 
11269   /* get space for calculation */
11270   head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
11271   if (max_mn * 2 > 0 && head_x == NULL) {
11272     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11273   }
11274   tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
11275   if (max_mn * 2 > 0 && tail_x == NULL) {
11276     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11277   }
11278   y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
11279   if (max_mn * 2 > 0 && y == NULL) {
11280     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11281   }
11282   head_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
11283   if (max_mn > 0 && head_x_gen == NULL) {
11284     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11285   }
11286   tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
11287   if (max_mn > 0 && tail_x_gen == NULL) {
11288     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11289   }
11290   y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
11291   if (max_mn > 0 && y_gen == NULL) {
11292     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11293   }
11294   temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
11295   if (max_mn > 0 && temp == NULL) {
11296     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11297   }
11298   head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
11299   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
11300   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
11301     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11302   }
11303   ratios = (double *) blas_malloc(max_mn * sizeof(double));
11304   if (max_mn > 0 && ratios == NULL) {
11305     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11306   }
11307   A =
11308     (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
11309 			  2);
11310   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
11311     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11312   }
11313 
11314   /* The debug iteration:
11315      If debug=1, then will execute the iteration twice. First, compute the
11316      max ratio. Second, print info if ratio > (50% * ratio_max). */
11317   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
11318     bad_ratios = 0;		/* set to zero */
11319 
11320     if ((debug == 3) && (d_count == find_max_ratio))
11321       *seed = saved_seed;	/* restore the original seed */
11322 
11323     /* varying alpha */
11324     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
11325       alpha_flag = 0;
11326       switch (alpha_val) {
11327       case 0:
11328 	alpha[0] = alpha[1] = 0.0;
11329 	alpha_flag = 1;
11330 	break;
11331       case 1:
11332 	alpha[0] = 1.0;
11333 	alpha[1] = 0.0;
11334 	alpha_flag = 1;
11335 	break;
11336       }
11337 
11338       /* varying beta */
11339       for (beta_val = 0; beta_val < 3; beta_val++) {
11340 	beta_flag = 0;
11341 	switch (beta_val) {
11342 	case 0:
11343 	  beta[0] = beta[1] = 0.0;
11344 	  beta_flag = 1;
11345 	  break;
11346 	case 1:
11347 	  beta[0] = 1.0;
11348 	  beta[1] = 0.0;
11349 	  beta_flag = 1;
11350 	  break;
11351 	}
11352 
11353 
11354 	/* varying extra precs */
11355 	for (prec_val = 0; prec_val <= 2; prec_val++) {
11356 	  switch (prec_val) {
11357 	  case 0:
11358 	    eps_int = power(2, -BITS_D);
11359 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
11360 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
11361 	    prec = blas_prec_double;
11362 	    break;
11363 	  case 1:
11364 	    eps_int = power(2, -BITS_D);
11365 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
11366 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
11367 	    prec = blas_prec_double;
11368 	    break;
11369 	  case 2:
11370 	  default:
11371 	    eps_int = power(2, -BITS_E);
11372 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
11373 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
11374 	    prec = blas_prec_extra;
11375 	    break;
11376 	  }
11377 
11378 	  /* values near underflow, 1, or overflow */
11379 	  for (norm = -1; norm <= 1; norm++) {
11380 
11381 	    /* number of tests */
11382 	    for (i = 0; i < ntests; i++) {
11383 
11384 	      /* row or col major */
11385 	      for (order_val = 0; order_val < 2; order_val++) {
11386 		switch (order_val) {
11387 		case 0:
11388 		  order_type = blas_rowmajor;
11389 		  break;
11390 		case 1:
11391 		default:
11392 		  order_type = blas_colmajor;
11393 		  break;
11394 		}
11395 
11396 		/* no_trans, trans, or conj_trans */
11397 		for (trans_val = 0; trans_val < 3; trans_val++) {
11398 		  switch (trans_val) {
11399 		  case 0:
11400 		    trans_type = blas_no_trans;
11401 		    m_i = m;
11402 		    n_i = n;
11403 		    break;
11404 		  case 1:
11405 		    trans_type = blas_trans;
11406 		    m_i = n;
11407 		    n_i = m;
11408 		    break;
11409 		  case 2:
11410 		  default:
11411 		    trans_type = blas_conj_trans;
11412 		    m_i = n;
11413 		    n_i = m;
11414 		    break;
11415 		  }
11416 
11417 		  /* lda=n, n+1, or 2n */
11418 		  for (lda_val = 0; lda_val < 3; lda_val++) {
11419 		    switch (lda_val) {
11420 		    case 0:
11421 		      lda = m_i;
11422 		      break;
11423 		    case 1:
11424 		      lda = m_i + 1;
11425 		      break;
11426 		    case 2:
11427 		    default:
11428 		      lda = 2 * m_i;
11429 		      break;
11430 		    }
11431 		    if ((order_type == blas_rowmajor && lda < n) ||
11432 			(order_type == blas_colmajor && lda < m))
11433 		      continue;
11434 
11435 		    /* For the sake of speed, we throw out this case at random */
11436 		    if (xrand(seed) >= test_prob)
11437 		      continue;
11438 
11439 		    /* in the trivial cases, no need to run testgen */
11440 		    if (m > 0 && n > 0)
11441 		      BLAS_zgemv2_c_z_testgen(norm, order_type, trans_type, m,
11442 					      n, &alpha, alpha_flag, A, lda,
11443 					      head_x_gen, tail_x_gen, &beta,
11444 					      beta_flag, y_gen, seed,
11445 					      head_r_true, tail_r_true);
11446 
11447 		    count++;
11448 
11449 		    /* varying incx */
11450 		    for (incx_val = -2; incx_val <= 2; incx_val++) {
11451 		      if (incx_val == 0)
11452 			continue;
11453 
11454 		      /* setting incx */
11455 		      incx = incx_val;
11456 		      incx *= 2;
11457 
11458 		      zcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
11459 		      zcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
11460 
11461 		      /* varying incy */
11462 		      for (incy_val = -2; incy_val <= 2; incy_val++) {
11463 			if (incy_val == 0)
11464 			  continue;
11465 
11466 			/* setting incy */
11467 			incy = incy_val;
11468 			incy *= 2;
11469 
11470 			zcopy_vector(y_gen, m_i, 1, y, incy_val);
11471 
11472 			/* call BLAS_zgemv2_c_z_x */
11473 			FPU_FIX_STOP;
11474 			BLAS_zgemv2_c_z_x(order_type, trans_type, m, n, alpha,
11475 					  A, lda, head_x, tail_x, incx_val,
11476 					  beta, y, incy_val, prec);
11477 			FPU_FIX_START;
11478 
11479 			/* set y starting index */
11480 			iy = 0;
11481 			if (incy < 0)
11482 			  iy = -(m_i - 1) * incy;
11483 
11484 			/* computing the ratio */
11485 			if (m > 0 && n > 0)
11486 			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
11487 			    /* copy row j of A to temp */
11488 			    cge_copy_row(order_type, trans_type, m_i, n_i, A,
11489 					 lda, temp, j);
11490 
11491 			    test_BLAS_zdot2_c_z(n_i, blas_no_conj, alpha,
11492 						beta, &y_gen[k], &y[iy],
11493 						&head_r_true[k],
11494 						&tail_r_true[k], temp, 1,
11495 						head_x, tail_x, incx_val,
11496 						eps_int, un_int, &ratios[j]);
11497 
11498 			    /* take the max ratio */
11499 			    if (j == 0) {
11500 			      ratio = ratios[0];
11501 			      /* The !<= below causes NaN error to be detected.
11502 			         Note that (NaN > thresh) is always false. */
11503 			    } else if (!(ratios[j] <= ratio)) {
11504 			      ratio = ratios[j];
11505 			    }
11506 			    iy += incy;
11507 			  }
11508 
11509 			/* Increase the number of bad ratio, if the ratio
11510 			   is bigger than the threshold.
11511 			   The !<= below causes NaN error to be detected.
11512 			   Note that (NaN > thresh) is always false. */
11513 			if (!(ratio <= thresh)) {
11514 			  bad_ratios++;
11515 
11516 			  if ((debug == 3) &&	/* print only when debug is on */
11517 			      (count != old_count) &&	/* print if old vector is different
11518 							   from the current one */
11519 			      (d_count == find_max_ratio) &&
11520 			      (p_count <= max_print) &&
11521 			      (ratio > 0.5 * ratio_max)) {
11522 			    old_count = count;
11523 
11524 			    printf
11525 			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
11526 			       fname, m, n, ntests, thresh);
11527 
11528 			    /* Print test info */
11529 			    switch (prec) {
11530 			    case blas_prec_single:
11531 			      printf("single ");
11532 			      break;
11533 			    case blas_prec_double:
11534 			      printf("double ");
11535 			      break;
11536 			    case blas_prec_indigenous:
11537 			      printf("indigenous ");
11538 			      break;
11539 			    case blas_prec_extra:
11540 			      printf("extra ");
11541 			      break;
11542 			    }
11543 			    switch (norm) {
11544 			    case -1:
11545 			      printf("near_underflow ");
11546 			      break;
11547 			    case 0:
11548 			      printf("near_one ");
11549 			      break;
11550 			    case 1:
11551 			      printf("near_overflow ");
11552 			      break;
11553 			    }
11554 			    switch (order_type) {
11555 			    case blas_rowmajor:
11556 			      printf("row_major ");
11557 			      break;
11558 			    case blas_colmajor:
11559 			      printf("col_major ");
11560 			      break;
11561 			    }
11562 			    switch (trans_type) {
11563 			    case blas_no_trans:
11564 			      printf("no_trans ");
11565 			      break;
11566 			    case blas_trans:
11567 			      printf("trans ");
11568 			      break;
11569 			    case blas_conj_trans:
11570 			      printf("conj_trans ");
11571 			      break;
11572 			    }
11573 
11574 			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
11575 				   incy);
11576 
11577 			    cge_print_matrix(A, m_i, n_i, lda, order_type,
11578 					     "A");
11579 
11580 			    zprint_vector(head_x, n_i, incx_val, "head_x");
11581 			    zprint_vector(tail_x, n_i, incx_val, "tail_x");
11582 			    zprint_vector(y_gen, m_i, 1, "y_gen");
11583 			    zprint_vector(y, m_i, incy_val, "y_final");
11584 
11585 			    printf("      ");
11586 			    printf("alpha = ");
11587 			    printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
11588 			    printf("\n      ");
11589 			    printf("beta = ");
11590 			    printf("(%24.16e, %24.16e)", beta[0], beta[1]);
11591 			    printf("\n");
11592 			    for (j = 0, k = 0; j < m_i * incy_gen;
11593 				 j += incy_gen, k++) {
11594 			      printf("      ");
11595 			      printf
11596 				("([%24.16e  %24.16e], [%24.16e %24.16e])",
11597 				 head_r_true[j], tail_r_true[j],
11598 				 head_r_true[j + 1], tail_r_true[j + 1]);
11599 			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
11600 			    }
11601 
11602 			    printf("      ratio=%.4e\n", ratio);
11603 			    p_count++;
11604 			  }
11605 			  if (bad_ratios >= MAX_BAD_TESTS) {
11606 			    printf("\ntoo many failures, exiting....");
11607 			    printf("\nTesting and compilation");
11608 			    printf(" are incomplete\n\n");
11609 			    goto end;
11610 			  }
11611 			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
11612 			    printf("\nFlagrant ratio error, exiting...");
11613 			    printf("\nTesting and compilation");
11614 			    printf(" are incomplete\n\n");
11615 			    goto end;
11616 			  }
11617 			}
11618 			if (d_count == 0) {
11619 			  if (ratio > ratio_max)
11620 			    ratio_max = ratio;
11621 
11622 			  if (ratio != 0.0 && ratio < ratio_min)
11623 			    ratio_min = ratio;
11624 
11625 			  tot_tests++;
11626 			}
11627 		      }		/* incy */
11628 		    }		/* incx */
11629 		  }		/* lda */
11630 		}		/* trans */
11631 	      }			/* order */
11632 	    }			/* tests */
11633 	  }			/* norm */
11634 	}			/* prec */
11635       }				/* beta */
11636     }				/* alpha */
11637   }				/* debug */
11638 
11639   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
11640     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
11641 	   fname, m, n, ntests, thresh);
11642     printf
11643       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
11644        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
11645        ratio_min, ratio_max);
11646   }
11647 
11648 end:
11649   FPU_FIX_STOP;
11650 
11651   blas_free(head_x);
11652   blas_free(tail_x);
11653   blas_free(y);
11654   blas_free(head_x_gen);
11655   blas_free(tail_x_gen);
11656   blas_free(y_gen);
11657   blas_free(temp);
11658   blas_free(A);
11659   blas_free(head_r_true);
11660   blas_free(tail_r_true);
11661   blas_free(ratios);
11662 
11663   *min_ratio = ratio_min;
11664   *num_bad_ratio = bad_ratios;
11665   *num_tests = tot_tests;
11666   return ratio_max;
11667 }
do_test_zgemv2_c_c_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)11668 double do_test_zgemv2_c_c_x(int m, int n, int ntests, int *seed,
11669 			    double thresh, int debug, float test_prob,
11670 			    double *min_ratio, int *num_bad_ratio,
11671 			    int *num_tests)
11672 
11673 /*
11674  * Purpose
11675  * =======
11676  *
11677  * Runs a series of tests on GEMV2.
11678  *
11679  * Arguments
11680  * =========
11681  *
11682  * m         (input) int
11683  *           The number of rows
11684  *
11685  * n         (input) int
11686  *           The number of columns
11687  *
11688  * ntests    (input) int
11689  *           The number of tests to run for each set of attributes.
11690  *
11691  * seed      (input/output) int
11692  *           The seed for the random number generator used in testgen().
11693  *
11694  * thresh    (input) double
11695  *           When the ratio returned from test() exceeds the specified
11696  *           threshold, the current size, r_true, r_comp, and ratio will be
11697  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
11698  *           to ~10.)
11699  *
11700  * debug     (input) int
11701  *           If debug=3, print summary
11702  *           If debug=2, print summary only if the number of bad ratios > 0
11703  *           If debug=1, print complete info if tests fail
11704  *           If debug=0, return max ratio
11705  *
11706  * test_prob (input) float
11707  *           The specified test will be performed only if the generated
11708  *           random exceeds this threshold.
11709  *
11710  * min_ratio (output) double
11711  *           The minimum ratio
11712  *
11713  * num_bad_ratio (output) int
11714  *               The number of tests fail; they are above the threshold.
11715  *
11716  * num_tests (output) int
11717  *           The number of tests is being performed.
11718  *
11719  * Return value
11720  * ============
11721  *
11722  * The maximum ratio if run successfully, otherwise return -1
11723  *
11724  * Code structure
11725  * ==============
11726  *
11727  *  debug loop  -- if debug is one, the first loop computes the max ratio
11728  *              -- and the last(second) loop outputs debugging information,
11729  *              -- if the test fail and its ratio > 0.5 * max ratio.
11730  *              -- if debug is zero, the loop is executed once
11731  *    alpha loop  -- varying alpha: 0, 1, or random
11732  *      beta loop   -- varying beta: 0, 1, or random
11733  *        prec loop   -- varying internal prec: single, double, or extra
11734  *          norm loop   -- varying norm: near undeflow, near one, or
11735  *                        -- near overflow
11736  *            numtest loop  -- how many times the test is perform with
11737  *                            -- above set of attributes
11738  *              order loop   -- varying order type: rowmajor or colmajor
11739  *                trans loop    -- varying uplo type: upper or lower
11740  *                  lda loop      -- varying lda: m, m+1, 2m
11741  *                    incx loop     -- varying incx: -2, -1, 1, 2
11742  *                      incy loop     -- varying incy: -2, -1, 1, 2
11743  */
11744 {
11745   /* function name */
11746   const char fname[] = "BLAS_zgemv2_c_c_x";
11747 
11748   /* max number of debug lines to print */
11749   const int max_print = 8;
11750 
11751   /* Variables in the "x_val" form are loop vars for corresponding
11752      variables */
11753   int i;			/* iterate through the repeating tests */
11754   int j, k;			/* multipurpose counters or variables */
11755   int iy;			/* use to index y */
11756   int incx_val, incy_val,	/* for testing different inc values */
11757     incx, incy;
11758   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
11759   int d_count;			/* counter for debug */
11760   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
11761   int p_count;			/* counter for the number of debug lines printed */
11762   int tot_tests;		/* total number of tests to be done */
11763   int norm;			/* input values of near underflow/one/overflow */
11764   double ratio_max;		/* the current maximum ratio */
11765   double ratio_min;		/* the current minimum ratio */
11766   double *ratios;		/* a temporary variable for calculating ratio */
11767   double ratio;			/* the per-use test ratio from test() */
11768   int bad_ratios;		/* the number of ratios over the threshold */
11769   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
11770   double un_int;		/* the internal underflow threshold */
11771   double alpha[2];
11772   double beta[2];
11773   float *A;
11774   float *head_x;
11775   float *tail_x;
11776   double *y;
11777   float *temp;			/* use for calculating ratio */
11778 
11779   /* x_gen and y_gen are used to store vectors generated by testgen.
11780      they eventually are copied back to x and y */
11781   float *head_x_gen;
11782   float *tail_x_gen;
11783   double *y_gen;
11784 
11785   /* the true r calculated by testgen(), in double-double */
11786   double *head_r_true, *tail_r_true;
11787 
11788   int alpha_val;
11789   int alpha_flag;		/* input flag for BLAS_zgemv2_c_c_testgen */
11790   int beta_val;
11791   int beta_flag;		/* input flag for BLAS_zgemv2_c_c_testgen */
11792   int order_val;
11793   enum blas_order_type order_type;
11794   int prec_val;
11795   enum blas_prec_type prec;
11796   int trans_val;
11797   enum blas_trans_type trans_type;
11798   int m_i;
11799   int n_i;
11800   int max_mn;			/* the max of m and n */
11801   int lda_val;
11802   int lda;
11803   int saved_seed;		/* for saving the original seed */
11804   int count, old_count;		/* use for counting the number of testgen calls * 2 */
11805 
11806   FPU_FIX_DECL;
11807 
11808   /* test for bad arguments */
11809   if (n < 0 || m < 0 || ntests < 0)
11810     BLAS_error(fname, 0, 0, NULL);
11811 
11812   /* initialization */
11813   *num_bad_ratio = 0;
11814   *num_tests = 0;
11815   *min_ratio = 0.0;
11816 
11817   saved_seed = *seed;
11818   ratio_min = 1e308;
11819   ratio_max = 0.0;
11820   ratio = 0.0;
11821   tot_tests = 0;
11822   p_count = 0;
11823   count = 0;
11824   find_max_ratio = 0;
11825   bad_ratios = 0;
11826   old_count = 0;
11827 
11828   if (debug == 3)
11829     find_max_ratio = 1;
11830   max_mn = MAX(m, n);
11831   if (m == 0 || n == 0) {
11832     return 0.0;
11833   }
11834 
11835   FPU_FIX_START;
11836 
11837   incy_gen = 1;
11838   incy_gen *= 2;
11839 
11840   /* get space for calculation */
11841   head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
11842   if (max_mn * 2 > 0 && head_x == NULL) {
11843     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11844   }
11845   tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
11846   if (max_mn * 2 > 0 && tail_x == NULL) {
11847     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11848   }
11849   y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
11850   if (max_mn * 2 > 0 && y == NULL) {
11851     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11852   }
11853   head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
11854   if (max_mn > 0 && head_x_gen == NULL) {
11855     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11856   }
11857   tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
11858   if (max_mn > 0 && tail_x_gen == NULL) {
11859     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11860   }
11861   y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
11862   if (max_mn > 0 && y_gen == NULL) {
11863     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11864   }
11865   temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
11866   if (max_mn > 0 && temp == NULL) {
11867     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11868   }
11869   head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
11870   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
11871   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
11872     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11873   }
11874   ratios = (double *) blas_malloc(max_mn * sizeof(double));
11875   if (max_mn > 0 && ratios == NULL) {
11876     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11877   }
11878   A =
11879     (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
11880 			  2);
11881   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
11882     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
11883   }
11884 
11885   /* The debug iteration:
11886      If debug=1, then will execute the iteration twice. First, compute the
11887      max ratio. Second, print info if ratio > (50% * ratio_max). */
11888   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
11889     bad_ratios = 0;		/* set to zero */
11890 
11891     if ((debug == 3) && (d_count == find_max_ratio))
11892       *seed = saved_seed;	/* restore the original seed */
11893 
11894     /* varying alpha */
11895     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
11896       alpha_flag = 0;
11897       switch (alpha_val) {
11898       case 0:
11899 	alpha[0] = alpha[1] = 0.0;
11900 	alpha_flag = 1;
11901 	break;
11902       case 1:
11903 	alpha[0] = 1.0;
11904 	alpha[1] = 0.0;
11905 	alpha_flag = 1;
11906 	break;
11907       }
11908 
11909       /* varying beta */
11910       for (beta_val = 0; beta_val < 3; beta_val++) {
11911 	beta_flag = 0;
11912 	switch (beta_val) {
11913 	case 0:
11914 	  beta[0] = beta[1] = 0.0;
11915 	  beta_flag = 1;
11916 	  break;
11917 	case 1:
11918 	  beta[0] = 1.0;
11919 	  beta[1] = 0.0;
11920 	  beta_flag = 1;
11921 	  break;
11922 	}
11923 
11924 
11925 	/* varying extra precs */
11926 	for (prec_val = 0; prec_val <= 2; prec_val++) {
11927 	  switch (prec_val) {
11928 	  case 0:
11929 	    eps_int = power(2, -BITS_D);
11930 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
11931 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
11932 	    prec = blas_prec_double;
11933 	    break;
11934 	  case 1:
11935 	    eps_int = power(2, -BITS_D);
11936 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
11937 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
11938 	    prec = blas_prec_double;
11939 	    break;
11940 	  case 2:
11941 	  default:
11942 	    eps_int = power(2, -BITS_E);
11943 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
11944 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
11945 	    prec = blas_prec_extra;
11946 	    break;
11947 	  }
11948 
11949 	  /* values near underflow, 1, or overflow */
11950 	  for (norm = -1; norm <= 1; norm++) {
11951 
11952 	    /* number of tests */
11953 	    for (i = 0; i < ntests; i++) {
11954 
11955 	      /* row or col major */
11956 	      for (order_val = 0; order_val < 2; order_val++) {
11957 		switch (order_val) {
11958 		case 0:
11959 		  order_type = blas_rowmajor;
11960 		  break;
11961 		case 1:
11962 		default:
11963 		  order_type = blas_colmajor;
11964 		  break;
11965 		}
11966 
11967 		/* no_trans, trans, or conj_trans */
11968 		for (trans_val = 0; trans_val < 3; trans_val++) {
11969 		  switch (trans_val) {
11970 		  case 0:
11971 		    trans_type = blas_no_trans;
11972 		    m_i = m;
11973 		    n_i = n;
11974 		    break;
11975 		  case 1:
11976 		    trans_type = blas_trans;
11977 		    m_i = n;
11978 		    n_i = m;
11979 		    break;
11980 		  case 2:
11981 		  default:
11982 		    trans_type = blas_conj_trans;
11983 		    m_i = n;
11984 		    n_i = m;
11985 		    break;
11986 		  }
11987 
11988 		  /* lda=n, n+1, or 2n */
11989 		  for (lda_val = 0; lda_val < 3; lda_val++) {
11990 		    switch (lda_val) {
11991 		    case 0:
11992 		      lda = m_i;
11993 		      break;
11994 		    case 1:
11995 		      lda = m_i + 1;
11996 		      break;
11997 		    case 2:
11998 		    default:
11999 		      lda = 2 * m_i;
12000 		      break;
12001 		    }
12002 		    if ((order_type == blas_rowmajor && lda < n) ||
12003 			(order_type == blas_colmajor && lda < m))
12004 		      continue;
12005 
12006 		    /* For the sake of speed, we throw out this case at random */
12007 		    if (xrand(seed) >= test_prob)
12008 		      continue;
12009 
12010 		    /* in the trivial cases, no need to run testgen */
12011 		    if (m > 0 && n > 0)
12012 		      BLAS_zgemv2_c_c_testgen(norm, order_type, trans_type, m,
12013 					      n, &alpha, alpha_flag, A, lda,
12014 					      head_x_gen, tail_x_gen, &beta,
12015 					      beta_flag, y_gen, seed,
12016 					      head_r_true, tail_r_true);
12017 
12018 		    count++;
12019 
12020 		    /* varying incx */
12021 		    for (incx_val = -2; incx_val <= 2; incx_val++) {
12022 		      if (incx_val == 0)
12023 			continue;
12024 
12025 		      /* setting incx */
12026 		      incx = incx_val;
12027 		      incx *= 2;
12028 
12029 		      ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
12030 		      ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
12031 
12032 		      /* varying incy */
12033 		      for (incy_val = -2; incy_val <= 2; incy_val++) {
12034 			if (incy_val == 0)
12035 			  continue;
12036 
12037 			/* setting incy */
12038 			incy = incy_val;
12039 			incy *= 2;
12040 
12041 			zcopy_vector(y_gen, m_i, 1, y, incy_val);
12042 
12043 			/* call BLAS_zgemv2_c_c_x */
12044 			FPU_FIX_STOP;
12045 			BLAS_zgemv2_c_c_x(order_type, trans_type, m, n, alpha,
12046 					  A, lda, head_x, tail_x, incx_val,
12047 					  beta, y, incy_val, prec);
12048 			FPU_FIX_START;
12049 
12050 			/* set y starting index */
12051 			iy = 0;
12052 			if (incy < 0)
12053 			  iy = -(m_i - 1) * incy;
12054 
12055 			/* computing the ratio */
12056 			if (m > 0 && n > 0)
12057 			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
12058 			    /* copy row j of A to temp */
12059 			    cge_copy_row(order_type, trans_type, m_i, n_i, A,
12060 					 lda, temp, j);
12061 
12062 			    test_BLAS_zdot2_c_c(n_i, blas_no_conj, alpha,
12063 						beta, &y_gen[k], &y[iy],
12064 						&head_r_true[k],
12065 						&tail_r_true[k], temp, 1,
12066 						head_x, tail_x, incx_val,
12067 						eps_int, un_int, &ratios[j]);
12068 
12069 			    /* take the max ratio */
12070 			    if (j == 0) {
12071 			      ratio = ratios[0];
12072 			      /* The !<= below causes NaN error to be detected.
12073 			         Note that (NaN > thresh) is always false. */
12074 			    } else if (!(ratios[j] <= ratio)) {
12075 			      ratio = ratios[j];
12076 			    }
12077 			    iy += incy;
12078 			  }
12079 
12080 			/* Increase the number of bad ratio, if the ratio
12081 			   is bigger than the threshold.
12082 			   The !<= below causes NaN error to be detected.
12083 			   Note that (NaN > thresh) is always false. */
12084 			if (!(ratio <= thresh)) {
12085 			  bad_ratios++;
12086 
12087 			  if ((debug == 3) &&	/* print only when debug is on */
12088 			      (count != old_count) &&	/* print if old vector is different
12089 							   from the current one */
12090 			      (d_count == find_max_ratio) &&
12091 			      (p_count <= max_print) &&
12092 			      (ratio > 0.5 * ratio_max)) {
12093 			    old_count = count;
12094 
12095 			    printf
12096 			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
12097 			       fname, m, n, ntests, thresh);
12098 
12099 			    /* Print test info */
12100 			    switch (prec) {
12101 			    case blas_prec_single:
12102 			      printf("single ");
12103 			      break;
12104 			    case blas_prec_double:
12105 			      printf("double ");
12106 			      break;
12107 			    case blas_prec_indigenous:
12108 			      printf("indigenous ");
12109 			      break;
12110 			    case blas_prec_extra:
12111 			      printf("extra ");
12112 			      break;
12113 			    }
12114 			    switch (norm) {
12115 			    case -1:
12116 			      printf("near_underflow ");
12117 			      break;
12118 			    case 0:
12119 			      printf("near_one ");
12120 			      break;
12121 			    case 1:
12122 			      printf("near_overflow ");
12123 			      break;
12124 			    }
12125 			    switch (order_type) {
12126 			    case blas_rowmajor:
12127 			      printf("row_major ");
12128 			      break;
12129 			    case blas_colmajor:
12130 			      printf("col_major ");
12131 			      break;
12132 			    }
12133 			    switch (trans_type) {
12134 			    case blas_no_trans:
12135 			      printf("no_trans ");
12136 			      break;
12137 			    case blas_trans:
12138 			      printf("trans ");
12139 			      break;
12140 			    case blas_conj_trans:
12141 			      printf("conj_trans ");
12142 			      break;
12143 			    }
12144 
12145 			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
12146 				   incy);
12147 
12148 			    cge_print_matrix(A, m_i, n_i, lda, order_type,
12149 					     "A");
12150 
12151 			    cprint_vector(head_x, n_i, incx_val, "head_x");
12152 			    cprint_vector(tail_x, n_i, incx_val, "tail_x");
12153 			    zprint_vector(y_gen, m_i, 1, "y_gen");
12154 			    zprint_vector(y, m_i, incy_val, "y_final");
12155 
12156 			    printf("      ");
12157 			    printf("alpha = ");
12158 			    printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
12159 			    printf("\n      ");
12160 			    printf("beta = ");
12161 			    printf("(%24.16e, %24.16e)", beta[0], beta[1]);
12162 			    printf("\n");
12163 			    for (j = 0, k = 0; j < m_i * incy_gen;
12164 				 j += incy_gen, k++) {
12165 			      printf("      ");
12166 			      printf
12167 				("([%24.16e  %24.16e], [%24.16e %24.16e])",
12168 				 head_r_true[j], tail_r_true[j],
12169 				 head_r_true[j + 1], tail_r_true[j + 1]);
12170 			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
12171 			    }
12172 
12173 			    printf("      ratio=%.4e\n", ratio);
12174 			    p_count++;
12175 			  }
12176 			  if (bad_ratios >= MAX_BAD_TESTS) {
12177 			    printf("\ntoo many failures, exiting....");
12178 			    printf("\nTesting and compilation");
12179 			    printf(" are incomplete\n\n");
12180 			    goto end;
12181 			  }
12182 			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
12183 			    printf("\nFlagrant ratio error, exiting...");
12184 			    printf("\nTesting and compilation");
12185 			    printf(" are incomplete\n\n");
12186 			    goto end;
12187 			  }
12188 			}
12189 			if (d_count == 0) {
12190 			  if (ratio > ratio_max)
12191 			    ratio_max = ratio;
12192 
12193 			  if (ratio != 0.0 && ratio < ratio_min)
12194 			    ratio_min = ratio;
12195 
12196 			  tot_tests++;
12197 			}
12198 		      }		/* incy */
12199 		    }		/* incx */
12200 		  }		/* lda */
12201 		}		/* trans */
12202 	      }			/* order */
12203 	    }			/* tests */
12204 	  }			/* norm */
12205 	}			/* prec */
12206       }				/* beta */
12207     }				/* alpha */
12208   }				/* debug */
12209 
12210   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
12211     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
12212 	   fname, m, n, ntests, thresh);
12213     printf
12214       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
12215        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
12216        ratio_min, ratio_max);
12217   }
12218 
12219 end:
12220   FPU_FIX_STOP;
12221 
12222   blas_free(head_x);
12223   blas_free(tail_x);
12224   blas_free(y);
12225   blas_free(head_x_gen);
12226   blas_free(tail_x_gen);
12227   blas_free(y_gen);
12228   blas_free(temp);
12229   blas_free(A);
12230   blas_free(head_r_true);
12231   blas_free(tail_r_true);
12232   blas_free(ratios);
12233 
12234   *min_ratio = ratio_min;
12235   *num_bad_ratio = bad_ratios;
12236   *num_tests = tot_tests;
12237   return ratio_max;
12238 }
do_test_cgemv2_c_s_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)12239 double do_test_cgemv2_c_s_x(int m, int n, int ntests, int *seed,
12240 			    double thresh, int debug, float test_prob,
12241 			    double *min_ratio, int *num_bad_ratio,
12242 			    int *num_tests)
12243 
12244 /*
12245  * Purpose
12246  * =======
12247  *
12248  * Runs a series of tests on GEMV2.
12249  *
12250  * Arguments
12251  * =========
12252  *
12253  * m         (input) int
12254  *           The number of rows
12255  *
12256  * n         (input) int
12257  *           The number of columns
12258  *
12259  * ntests    (input) int
12260  *           The number of tests to run for each set of attributes.
12261  *
12262  * seed      (input/output) int
12263  *           The seed for the random number generator used in testgen().
12264  *
12265  * thresh    (input) double
12266  *           When the ratio returned from test() exceeds the specified
12267  *           threshold, the current size, r_true, r_comp, and ratio will be
12268  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
12269  *           to ~10.)
12270  *
12271  * debug     (input) int
12272  *           If debug=3, print summary
12273  *           If debug=2, print summary only if the number of bad ratios > 0
12274  *           If debug=1, print complete info if tests fail
12275  *           If debug=0, return max ratio
12276  *
12277  * test_prob (input) float
12278  *           The specified test will be performed only if the generated
12279  *           random exceeds this threshold.
12280  *
12281  * min_ratio (output) double
12282  *           The minimum ratio
12283  *
12284  * num_bad_ratio (output) int
12285  *               The number of tests fail; they are above the threshold.
12286  *
12287  * num_tests (output) int
12288  *           The number of tests is being performed.
12289  *
12290  * Return value
12291  * ============
12292  *
12293  * The maximum ratio if run successfully, otherwise return -1
12294  *
12295  * Code structure
12296  * ==============
12297  *
12298  *  debug loop  -- if debug is one, the first loop computes the max ratio
12299  *              -- and the last(second) loop outputs debugging information,
12300  *              -- if the test fail and its ratio > 0.5 * max ratio.
12301  *              -- if debug is zero, the loop is executed once
12302  *    alpha loop  -- varying alpha: 0, 1, or random
12303  *      beta loop   -- varying beta: 0, 1, or random
12304  *        prec loop   -- varying internal prec: single, double, or extra
12305  *          norm loop   -- varying norm: near undeflow, near one, or
12306  *                        -- near overflow
12307  *            numtest loop  -- how many times the test is perform with
12308  *                            -- above set of attributes
12309  *              order loop   -- varying order type: rowmajor or colmajor
12310  *                trans loop    -- varying uplo type: upper or lower
12311  *                  lda loop      -- varying lda: m, m+1, 2m
12312  *                    incx loop     -- varying incx: -2, -1, 1, 2
12313  *                      incy loop     -- varying incy: -2, -1, 1, 2
12314  */
12315 {
12316   /* function name */
12317   const char fname[] = "BLAS_cgemv2_c_s_x";
12318 
12319   /* max number of debug lines to print */
12320   const int max_print = 8;
12321 
12322   /* Variables in the "x_val" form are loop vars for corresponding
12323      variables */
12324   int i;			/* iterate through the repeating tests */
12325   int j, k;			/* multipurpose counters or variables */
12326   int iy;			/* use to index y */
12327   int incx_val, incy_val,	/* for testing different inc values */
12328     incx, incy;
12329   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
12330   int d_count;			/* counter for debug */
12331   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
12332   int p_count;			/* counter for the number of debug lines printed */
12333   int tot_tests;		/* total number of tests to be done */
12334   int norm;			/* input values of near underflow/one/overflow */
12335   double ratio_max;		/* the current maximum ratio */
12336   double ratio_min;		/* the current minimum ratio */
12337   double *ratios;		/* a temporary variable for calculating ratio */
12338   double ratio;			/* the per-use test ratio from test() */
12339   int bad_ratios;		/* the number of ratios over the threshold */
12340   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
12341   double un_int;		/* the internal underflow threshold */
12342   float alpha[2];
12343   float beta[2];
12344   float *A;
12345   float *head_x;
12346   float *tail_x;
12347   float *y;
12348   float *temp;			/* use for calculating ratio */
12349 
12350   /* x_gen and y_gen are used to store vectors generated by testgen.
12351      they eventually are copied back to x and y */
12352   float *head_x_gen;
12353   float *tail_x_gen;
12354   float *y_gen;
12355 
12356   /* the true r calculated by testgen(), in double-double */
12357   double *head_r_true, *tail_r_true;
12358 
12359   int alpha_val;
12360   int alpha_flag;		/* input flag for BLAS_cgemv2_c_s_testgen */
12361   int beta_val;
12362   int beta_flag;		/* input flag for BLAS_cgemv2_c_s_testgen */
12363   int order_val;
12364   enum blas_order_type order_type;
12365   int prec_val;
12366   enum blas_prec_type prec;
12367   int trans_val;
12368   enum blas_trans_type trans_type;
12369   int m_i;
12370   int n_i;
12371   int max_mn;			/* the max of m and n */
12372   int lda_val;
12373   int lda;
12374   int saved_seed;		/* for saving the original seed */
12375   int count, old_count;		/* use for counting the number of testgen calls * 2 */
12376 
12377   FPU_FIX_DECL;
12378 
12379   /* test for bad arguments */
12380   if (n < 0 || m < 0 || ntests < 0)
12381     BLAS_error(fname, 0, 0, NULL);
12382 
12383   /* initialization */
12384   *num_bad_ratio = 0;
12385   *num_tests = 0;
12386   *min_ratio = 0.0;
12387 
12388   saved_seed = *seed;
12389   ratio_min = 1e308;
12390   ratio_max = 0.0;
12391   ratio = 0.0;
12392   tot_tests = 0;
12393   p_count = 0;
12394   count = 0;
12395   find_max_ratio = 0;
12396   bad_ratios = 0;
12397   old_count = 0;
12398 
12399   if (debug == 3)
12400     find_max_ratio = 1;
12401   max_mn = MAX(m, n);
12402   if (m == 0 || n == 0) {
12403     return 0.0;
12404   }
12405 
12406   FPU_FIX_START;
12407 
12408   incy_gen = 1;
12409   incy_gen *= 2;
12410 
12411   /* get space for calculation */
12412   head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
12413   if (max_mn * 2 > 0 && head_x == NULL) {
12414     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12415   }
12416   tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
12417   if (max_mn * 2 > 0 && tail_x == NULL) {
12418     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12419   }
12420   y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
12421   if (max_mn * 2 > 0 && y == NULL) {
12422     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12423   }
12424   head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
12425   if (max_mn > 0 && head_x_gen == NULL) {
12426     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12427   }
12428   tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
12429   if (max_mn > 0 && tail_x_gen == NULL) {
12430     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12431   }
12432   y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
12433   if (max_mn > 0 && y_gen == NULL) {
12434     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12435   }
12436   temp = (float *) blas_malloc(max_mn * sizeof(float) * 2);
12437   if (max_mn > 0 && temp == NULL) {
12438     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12439   }
12440   head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
12441   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
12442   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
12443     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12444   }
12445   ratios = (double *) blas_malloc(max_mn * sizeof(double));
12446   if (max_mn > 0 && ratios == NULL) {
12447     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12448   }
12449   A =
12450     (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float) *
12451 			  2);
12452   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
12453     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12454   }
12455 
12456   /* The debug iteration:
12457      If debug=1, then will execute the iteration twice. First, compute the
12458      max ratio. Second, print info if ratio > (50% * ratio_max). */
12459   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
12460     bad_ratios = 0;		/* set to zero */
12461 
12462     if ((debug == 3) && (d_count == find_max_ratio))
12463       *seed = saved_seed;	/* restore the original seed */
12464 
12465     /* varying alpha */
12466     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
12467       alpha_flag = 0;
12468       switch (alpha_val) {
12469       case 0:
12470 	alpha[0] = alpha[1] = 0.0;
12471 	alpha_flag = 1;
12472 	break;
12473       case 1:
12474 	alpha[0] = 1.0;
12475 	alpha[1] = 0.0;
12476 	alpha_flag = 1;
12477 	break;
12478       }
12479 
12480       /* varying beta */
12481       for (beta_val = 0; beta_val < 3; beta_val++) {
12482 	beta_flag = 0;
12483 	switch (beta_val) {
12484 	case 0:
12485 	  beta[0] = beta[1] = 0.0;
12486 	  beta_flag = 1;
12487 	  break;
12488 	case 1:
12489 	  beta[0] = 1.0;
12490 	  beta[1] = 0.0;
12491 	  beta_flag = 1;
12492 	  break;
12493 	}
12494 
12495 
12496 	/* varying extra precs */
12497 	for (prec_val = 0; prec_val <= 2; prec_val++) {
12498 	  switch (prec_val) {
12499 	  case 0:
12500 	    eps_int = power(2, -BITS_S);
12501 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
12502 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
12503 	    prec = blas_prec_single;
12504 	    break;
12505 	  case 1:
12506 	    eps_int = power(2, -BITS_D);
12507 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
12508 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
12509 	    prec = blas_prec_double;
12510 	    break;
12511 	  case 2:
12512 	  default:
12513 	    eps_int = power(2, -BITS_E);
12514 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
12515 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
12516 	    prec = blas_prec_extra;
12517 	    break;
12518 	  }
12519 
12520 	  /* values near underflow, 1, or overflow */
12521 	  for (norm = -1; norm <= 1; norm++) {
12522 
12523 	    /* number of tests */
12524 	    for (i = 0; i < ntests; i++) {
12525 
12526 	      /* row or col major */
12527 	      for (order_val = 0; order_val < 2; order_val++) {
12528 		switch (order_val) {
12529 		case 0:
12530 		  order_type = blas_rowmajor;
12531 		  break;
12532 		case 1:
12533 		default:
12534 		  order_type = blas_colmajor;
12535 		  break;
12536 		}
12537 
12538 		/* no_trans, trans, or conj_trans */
12539 		for (trans_val = 0; trans_val < 3; trans_val++) {
12540 		  switch (trans_val) {
12541 		  case 0:
12542 		    trans_type = blas_no_trans;
12543 		    m_i = m;
12544 		    n_i = n;
12545 		    break;
12546 		  case 1:
12547 		    trans_type = blas_trans;
12548 		    m_i = n;
12549 		    n_i = m;
12550 		    break;
12551 		  case 2:
12552 		  default:
12553 		    trans_type = blas_conj_trans;
12554 		    m_i = n;
12555 		    n_i = m;
12556 		    break;
12557 		  }
12558 
12559 		  /* lda=n, n+1, or 2n */
12560 		  for (lda_val = 0; lda_val < 3; lda_val++) {
12561 		    switch (lda_val) {
12562 		    case 0:
12563 		      lda = m_i;
12564 		      break;
12565 		    case 1:
12566 		      lda = m_i + 1;
12567 		      break;
12568 		    case 2:
12569 		    default:
12570 		      lda = 2 * m_i;
12571 		      break;
12572 		    }
12573 		    if ((order_type == blas_rowmajor && lda < n) ||
12574 			(order_type == blas_colmajor && lda < m))
12575 		      continue;
12576 
12577 		    /* For the sake of speed, we throw out this case at random */
12578 		    if (xrand(seed) >= test_prob)
12579 		      continue;
12580 
12581 		    /* in the trivial cases, no need to run testgen */
12582 		    if (m > 0 && n > 0)
12583 		      BLAS_cgemv2_c_s_testgen(norm, order_type, trans_type, m,
12584 					      n, &alpha, alpha_flag, A, lda,
12585 					      head_x_gen, tail_x_gen, &beta,
12586 					      beta_flag, y_gen, seed,
12587 					      head_r_true, tail_r_true);
12588 
12589 		    count++;
12590 
12591 		    /* varying incx */
12592 		    for (incx_val = -2; incx_val <= 2; incx_val++) {
12593 		      if (incx_val == 0)
12594 			continue;
12595 
12596 		      /* setting incx */
12597 		      incx = incx_val;
12598 
12599 
12600 		      scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
12601 		      scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
12602 
12603 		      /* varying incy */
12604 		      for (incy_val = -2; incy_val <= 2; incy_val++) {
12605 			if (incy_val == 0)
12606 			  continue;
12607 
12608 			/* setting incy */
12609 			incy = incy_val;
12610 			incy *= 2;
12611 
12612 			ccopy_vector(y_gen, m_i, 1, y, incy_val);
12613 
12614 			/* call BLAS_cgemv2_c_s_x */
12615 			FPU_FIX_STOP;
12616 			BLAS_cgemv2_c_s_x(order_type, trans_type, m, n, alpha,
12617 					  A, lda, head_x, tail_x, incx_val,
12618 					  beta, y, incy_val, prec);
12619 			FPU_FIX_START;
12620 
12621 			/* set y starting index */
12622 			iy = 0;
12623 			if (incy < 0)
12624 			  iy = -(m_i - 1) * incy;
12625 
12626 			/* computing the ratio */
12627 			if (m > 0 && n > 0)
12628 			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
12629 			    /* copy row j of A to temp */
12630 			    cge_copy_row(order_type, trans_type, m_i, n_i, A,
12631 					 lda, temp, j);
12632 
12633 			    test_BLAS_cdot2_c_s(n_i, blas_no_conj, alpha,
12634 						beta, &y_gen[k], &y[iy],
12635 						&head_r_true[k],
12636 						&tail_r_true[k], temp, 1,
12637 						head_x, tail_x, incx_val,
12638 						eps_int, un_int, &ratios[j]);
12639 
12640 			    /* take the max ratio */
12641 			    if (j == 0) {
12642 			      ratio = ratios[0];
12643 			      /* The !<= below causes NaN error to be detected.
12644 			         Note that (NaN > thresh) is always false. */
12645 			    } else if (!(ratios[j] <= ratio)) {
12646 			      ratio = ratios[j];
12647 			    }
12648 			    iy += incy;
12649 			  }
12650 
12651 			/* Increase the number of bad ratio, if the ratio
12652 			   is bigger than the threshold.
12653 			   The !<= below causes NaN error to be detected.
12654 			   Note that (NaN > thresh) is always false. */
12655 			if (!(ratio <= thresh)) {
12656 			  bad_ratios++;
12657 
12658 			  if ((debug == 3) &&	/* print only when debug is on */
12659 			      (count != old_count) &&	/* print if old vector is different
12660 							   from the current one */
12661 			      (d_count == find_max_ratio) &&
12662 			      (p_count <= max_print) &&
12663 			      (ratio > 0.5 * ratio_max)) {
12664 			    old_count = count;
12665 
12666 			    printf
12667 			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
12668 			       fname, m, n, ntests, thresh);
12669 
12670 			    /* Print test info */
12671 			    switch (prec) {
12672 			    case blas_prec_single:
12673 			      printf("single ");
12674 			      break;
12675 			    case blas_prec_double:
12676 			      printf("double ");
12677 			      break;
12678 			    case blas_prec_indigenous:
12679 			      printf("indigenous ");
12680 			      break;
12681 			    case blas_prec_extra:
12682 			      printf("extra ");
12683 			      break;
12684 			    }
12685 			    switch (norm) {
12686 			    case -1:
12687 			      printf("near_underflow ");
12688 			      break;
12689 			    case 0:
12690 			      printf("near_one ");
12691 			      break;
12692 			    case 1:
12693 			      printf("near_overflow ");
12694 			      break;
12695 			    }
12696 			    switch (order_type) {
12697 			    case blas_rowmajor:
12698 			      printf("row_major ");
12699 			      break;
12700 			    case blas_colmajor:
12701 			      printf("col_major ");
12702 			      break;
12703 			    }
12704 			    switch (trans_type) {
12705 			    case blas_no_trans:
12706 			      printf("no_trans ");
12707 			      break;
12708 			    case blas_trans:
12709 			      printf("trans ");
12710 			      break;
12711 			    case blas_conj_trans:
12712 			      printf("conj_trans ");
12713 			      break;
12714 			    }
12715 
12716 			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
12717 				   incy);
12718 
12719 			    cge_print_matrix(A, m_i, n_i, lda, order_type,
12720 					     "A");
12721 
12722 			    sprint_vector(head_x, n_i, incx_val, "head_x");
12723 			    sprint_vector(tail_x, n_i, incx_val, "tail_x");
12724 			    cprint_vector(y_gen, m_i, 1, "y_gen");
12725 			    cprint_vector(y, m_i, incy_val, "y_final");
12726 
12727 			    printf("      ");
12728 			    printf("alpha = ");
12729 			    printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
12730 			    printf("\n      ");
12731 			    printf("beta = ");
12732 			    printf("(%16.8e, %16.8e)", beta[0], beta[1]);
12733 			    printf("\n");
12734 			    for (j = 0, k = 0; j < m_i * incy_gen;
12735 				 j += incy_gen, k++) {
12736 			      printf("      ");
12737 			      printf
12738 				("([%24.16e  %24.16e], [%24.16e %24.16e])",
12739 				 head_r_true[j], tail_r_true[j],
12740 				 head_r_true[j + 1], tail_r_true[j + 1]);
12741 			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
12742 			    }
12743 
12744 			    printf("      ratio=%.4e\n", ratio);
12745 			    p_count++;
12746 			  }
12747 			  if (bad_ratios >= MAX_BAD_TESTS) {
12748 			    printf("\ntoo many failures, exiting....");
12749 			    printf("\nTesting and compilation");
12750 			    printf(" are incomplete\n\n");
12751 			    goto end;
12752 			  }
12753 			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
12754 			    printf("\nFlagrant ratio error, exiting...");
12755 			    printf("\nTesting and compilation");
12756 			    printf(" are incomplete\n\n");
12757 			    goto end;
12758 			  }
12759 			}
12760 			if (d_count == 0) {
12761 			  if (ratio > ratio_max)
12762 			    ratio_max = ratio;
12763 
12764 			  if (ratio != 0.0 && ratio < ratio_min)
12765 			    ratio_min = ratio;
12766 
12767 			  tot_tests++;
12768 			}
12769 		      }		/* incy */
12770 		    }		/* incx */
12771 		  }		/* lda */
12772 		}		/* trans */
12773 	      }			/* order */
12774 	    }			/* tests */
12775 	  }			/* norm */
12776 	}			/* prec */
12777       }				/* beta */
12778     }				/* alpha */
12779   }				/* debug */
12780 
12781   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
12782     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
12783 	   fname, m, n, ntests, thresh);
12784     printf
12785       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
12786        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
12787        ratio_min, ratio_max);
12788   }
12789 
12790 end:
12791   FPU_FIX_STOP;
12792 
12793   blas_free(head_x);
12794   blas_free(tail_x);
12795   blas_free(y);
12796   blas_free(head_x_gen);
12797   blas_free(tail_x_gen);
12798   blas_free(y_gen);
12799   blas_free(temp);
12800   blas_free(A);
12801   blas_free(head_r_true);
12802   blas_free(tail_r_true);
12803   blas_free(ratios);
12804 
12805   *min_ratio = ratio_min;
12806   *num_bad_ratio = bad_ratios;
12807   *num_tests = tot_tests;
12808   return ratio_max;
12809 }
do_test_cgemv2_s_c_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)12810 double do_test_cgemv2_s_c_x(int m, int n, int ntests, int *seed,
12811 			    double thresh, int debug, float test_prob,
12812 			    double *min_ratio, int *num_bad_ratio,
12813 			    int *num_tests)
12814 
12815 /*
12816  * Purpose
12817  * =======
12818  *
12819  * Runs a series of tests on GEMV2.
12820  *
12821  * Arguments
12822  * =========
12823  *
12824  * m         (input) int
12825  *           The number of rows
12826  *
12827  * n         (input) int
12828  *           The number of columns
12829  *
12830  * ntests    (input) int
12831  *           The number of tests to run for each set of attributes.
12832  *
12833  * seed      (input/output) int
12834  *           The seed for the random number generator used in testgen().
12835  *
12836  * thresh    (input) double
12837  *           When the ratio returned from test() exceeds the specified
12838  *           threshold, the current size, r_true, r_comp, and ratio will be
12839  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
12840  *           to ~10.)
12841  *
12842  * debug     (input) int
12843  *           If debug=3, print summary
12844  *           If debug=2, print summary only if the number of bad ratios > 0
12845  *           If debug=1, print complete info if tests fail
12846  *           If debug=0, return max ratio
12847  *
12848  * test_prob (input) float
12849  *           The specified test will be performed only if the generated
12850  *           random exceeds this threshold.
12851  *
12852  * min_ratio (output) double
12853  *           The minimum ratio
12854  *
12855  * num_bad_ratio (output) int
12856  *               The number of tests fail; they are above the threshold.
12857  *
12858  * num_tests (output) int
12859  *           The number of tests is being performed.
12860  *
12861  * Return value
12862  * ============
12863  *
12864  * The maximum ratio if run successfully, otherwise return -1
12865  *
12866  * Code structure
12867  * ==============
12868  *
12869  *  debug loop  -- if debug is one, the first loop computes the max ratio
12870  *              -- and the last(second) loop outputs debugging information,
12871  *              -- if the test fail and its ratio > 0.5 * max ratio.
12872  *              -- if debug is zero, the loop is executed once
12873  *    alpha loop  -- varying alpha: 0, 1, or random
12874  *      beta loop   -- varying beta: 0, 1, or random
12875  *        prec loop   -- varying internal prec: single, double, or extra
12876  *          norm loop   -- varying norm: near undeflow, near one, or
12877  *                        -- near overflow
12878  *            numtest loop  -- how many times the test is perform with
12879  *                            -- above set of attributes
12880  *              order loop   -- varying order type: rowmajor or colmajor
12881  *                trans loop    -- varying uplo type: upper or lower
12882  *                  lda loop      -- varying lda: m, m+1, 2m
12883  *                    incx loop     -- varying incx: -2, -1, 1, 2
12884  *                      incy loop     -- varying incy: -2, -1, 1, 2
12885  */
12886 {
12887   /* function name */
12888   const char fname[] = "BLAS_cgemv2_s_c_x";
12889 
12890   /* max number of debug lines to print */
12891   const int max_print = 8;
12892 
12893   /* Variables in the "x_val" form are loop vars for corresponding
12894      variables */
12895   int i;			/* iterate through the repeating tests */
12896   int j, k;			/* multipurpose counters or variables */
12897   int iy;			/* use to index y */
12898   int incx_val, incy_val,	/* for testing different inc values */
12899     incx, incy;
12900   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
12901   int d_count;			/* counter for debug */
12902   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
12903   int p_count;			/* counter for the number of debug lines printed */
12904   int tot_tests;		/* total number of tests to be done */
12905   int norm;			/* input values of near underflow/one/overflow */
12906   double ratio_max;		/* the current maximum ratio */
12907   double ratio_min;		/* the current minimum ratio */
12908   double *ratios;		/* a temporary variable for calculating ratio */
12909   double ratio;			/* the per-use test ratio from test() */
12910   int bad_ratios;		/* the number of ratios over the threshold */
12911   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
12912   double un_int;		/* the internal underflow threshold */
12913   float alpha[2];
12914   float beta[2];
12915   float *A;
12916   float *head_x;
12917   float *tail_x;
12918   float *y;
12919   float *temp;			/* use for calculating ratio */
12920 
12921   /* x_gen and y_gen are used to store vectors generated by testgen.
12922      they eventually are copied back to x and y */
12923   float *head_x_gen;
12924   float *tail_x_gen;
12925   float *y_gen;
12926 
12927   /* the true r calculated by testgen(), in double-double */
12928   double *head_r_true, *tail_r_true;
12929 
12930   int alpha_val;
12931   int alpha_flag;		/* input flag for BLAS_cgemv2_s_c_testgen */
12932   int beta_val;
12933   int beta_flag;		/* input flag for BLAS_cgemv2_s_c_testgen */
12934   int order_val;
12935   enum blas_order_type order_type;
12936   int prec_val;
12937   enum blas_prec_type prec;
12938   int trans_val;
12939   enum blas_trans_type trans_type;
12940   int m_i;
12941   int n_i;
12942   int max_mn;			/* the max of m and n */
12943   int lda_val;
12944   int lda;
12945   int saved_seed;		/* for saving the original seed */
12946   int count, old_count;		/* use for counting the number of testgen calls * 2 */
12947 
12948   FPU_FIX_DECL;
12949 
12950   /* test for bad arguments */
12951   if (n < 0 || m < 0 || ntests < 0)
12952     BLAS_error(fname, 0, 0, NULL);
12953 
12954   /* initialization */
12955   *num_bad_ratio = 0;
12956   *num_tests = 0;
12957   *min_ratio = 0.0;
12958 
12959   saved_seed = *seed;
12960   ratio_min = 1e308;
12961   ratio_max = 0.0;
12962   ratio = 0.0;
12963   tot_tests = 0;
12964   p_count = 0;
12965   count = 0;
12966   find_max_ratio = 0;
12967   bad_ratios = 0;
12968   old_count = 0;
12969 
12970   if (debug == 3)
12971     find_max_ratio = 1;
12972   max_mn = MAX(m, n);
12973   if (m == 0 || n == 0) {
12974     return 0.0;
12975   }
12976 
12977   FPU_FIX_START;
12978 
12979   incy_gen = 1;
12980   incy_gen *= 2;
12981 
12982   /* get space for calculation */
12983   head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
12984   if (max_mn * 2 > 0 && head_x == NULL) {
12985     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12986   }
12987   tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
12988   if (max_mn * 2 > 0 && tail_x == NULL) {
12989     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12990   }
12991   y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
12992   if (max_mn * 2 > 0 && y == NULL) {
12993     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12994   }
12995   head_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
12996   if (max_mn > 0 && head_x_gen == NULL) {
12997     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
12998   }
12999   tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
13000   if (max_mn > 0 && tail_x_gen == NULL) {
13001     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13002   }
13003   y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
13004   if (max_mn > 0 && y_gen == NULL) {
13005     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13006   }
13007   temp = (float *) blas_malloc(max_mn * sizeof(float));
13008   if (max_mn > 0 && temp == NULL) {
13009     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13010   }
13011   head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
13012   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
13013   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
13014     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13015   }
13016   ratios = (double *) blas_malloc(max_mn * sizeof(double));
13017   if (max_mn > 0 && ratios == NULL) {
13018     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13019   }
13020   A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
13021   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
13022     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13023   }
13024 
13025   /* The debug iteration:
13026      If debug=1, then will execute the iteration twice. First, compute the
13027      max ratio. Second, print info if ratio > (50% * ratio_max). */
13028   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
13029     bad_ratios = 0;		/* set to zero */
13030 
13031     if ((debug == 3) && (d_count == find_max_ratio))
13032       *seed = saved_seed;	/* restore the original seed */
13033 
13034     /* varying alpha */
13035     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
13036       alpha_flag = 0;
13037       switch (alpha_val) {
13038       case 0:
13039 	alpha[0] = alpha[1] = 0.0;
13040 	alpha_flag = 1;
13041 	break;
13042       case 1:
13043 	alpha[0] = 1.0;
13044 	alpha[1] = 0.0;
13045 	alpha_flag = 1;
13046 	break;
13047       }
13048 
13049       /* varying beta */
13050       for (beta_val = 0; beta_val < 3; beta_val++) {
13051 	beta_flag = 0;
13052 	switch (beta_val) {
13053 	case 0:
13054 	  beta[0] = beta[1] = 0.0;
13055 	  beta_flag = 1;
13056 	  break;
13057 	case 1:
13058 	  beta[0] = 1.0;
13059 	  beta[1] = 0.0;
13060 	  beta_flag = 1;
13061 	  break;
13062 	}
13063 
13064 
13065 	/* varying extra precs */
13066 	for (prec_val = 0; prec_val <= 2; prec_val++) {
13067 	  switch (prec_val) {
13068 	  case 0:
13069 	    eps_int = power(2, -BITS_S);
13070 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
13071 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
13072 	    prec = blas_prec_single;
13073 	    break;
13074 	  case 1:
13075 	    eps_int = power(2, -BITS_D);
13076 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
13077 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
13078 	    prec = blas_prec_double;
13079 	    break;
13080 	  case 2:
13081 	  default:
13082 	    eps_int = power(2, -BITS_E);
13083 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
13084 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
13085 	    prec = blas_prec_extra;
13086 	    break;
13087 	  }
13088 
13089 	  /* values near underflow, 1, or overflow */
13090 	  for (norm = -1; norm <= 1; norm++) {
13091 
13092 	    /* number of tests */
13093 	    for (i = 0; i < ntests; i++) {
13094 
13095 	      /* row or col major */
13096 	      for (order_val = 0; order_val < 2; order_val++) {
13097 		switch (order_val) {
13098 		case 0:
13099 		  order_type = blas_rowmajor;
13100 		  break;
13101 		case 1:
13102 		default:
13103 		  order_type = blas_colmajor;
13104 		  break;
13105 		}
13106 
13107 		/* no_trans, trans, or conj_trans */
13108 		for (trans_val = 0; trans_val < 3; trans_val++) {
13109 		  switch (trans_val) {
13110 		  case 0:
13111 		    trans_type = blas_no_trans;
13112 		    m_i = m;
13113 		    n_i = n;
13114 		    break;
13115 		  case 1:
13116 		    trans_type = blas_trans;
13117 		    m_i = n;
13118 		    n_i = m;
13119 		    break;
13120 		  case 2:
13121 		  default:
13122 		    trans_type = blas_conj_trans;
13123 		    m_i = n;
13124 		    n_i = m;
13125 		    break;
13126 		  }
13127 
13128 		  /* lda=n, n+1, or 2n */
13129 		  for (lda_val = 0; lda_val < 3; lda_val++) {
13130 		    switch (lda_val) {
13131 		    case 0:
13132 		      lda = m_i;
13133 		      break;
13134 		    case 1:
13135 		      lda = m_i + 1;
13136 		      break;
13137 		    case 2:
13138 		    default:
13139 		      lda = 2 * m_i;
13140 		      break;
13141 		    }
13142 		    if ((order_type == blas_rowmajor && lda < n) ||
13143 			(order_type == blas_colmajor && lda < m))
13144 		      continue;
13145 
13146 		    /* For the sake of speed, we throw out this case at random */
13147 		    if (xrand(seed) >= test_prob)
13148 		      continue;
13149 
13150 		    /* in the trivial cases, no need to run testgen */
13151 		    if (m > 0 && n > 0)
13152 		      BLAS_cgemv2_s_c_testgen(norm, order_type, trans_type, m,
13153 					      n, &alpha, alpha_flag, A, lda,
13154 					      head_x_gen, tail_x_gen, &beta,
13155 					      beta_flag, y_gen, seed,
13156 					      head_r_true, tail_r_true);
13157 
13158 		    count++;
13159 
13160 		    /* varying incx */
13161 		    for (incx_val = -2; incx_val <= 2; incx_val++) {
13162 		      if (incx_val == 0)
13163 			continue;
13164 
13165 		      /* setting incx */
13166 		      incx = incx_val;
13167 		      incx *= 2;
13168 
13169 		      ccopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
13170 		      ccopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
13171 
13172 		      /* varying incy */
13173 		      for (incy_val = -2; incy_val <= 2; incy_val++) {
13174 			if (incy_val == 0)
13175 			  continue;
13176 
13177 			/* setting incy */
13178 			incy = incy_val;
13179 			incy *= 2;
13180 
13181 			ccopy_vector(y_gen, m_i, 1, y, incy_val);
13182 
13183 			/* call BLAS_cgemv2_s_c_x */
13184 			FPU_FIX_STOP;
13185 			BLAS_cgemv2_s_c_x(order_type, trans_type, m, n, alpha,
13186 					  A, lda, head_x, tail_x, incx_val,
13187 					  beta, y, incy_val, prec);
13188 			FPU_FIX_START;
13189 
13190 			/* set y starting index */
13191 			iy = 0;
13192 			if (incy < 0)
13193 			  iy = -(m_i - 1) * incy;
13194 
13195 			/* computing the ratio */
13196 			if (m > 0 && n > 0)
13197 			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
13198 			    /* copy row j of A to temp */
13199 			    sge_copy_row(order_type, trans_type, m_i, n_i, A,
13200 					 lda, temp, j);
13201 
13202 			    test_BLAS_cdot2_s_c(n_i, blas_no_conj, alpha,
13203 						beta, &y_gen[k], &y[iy],
13204 						&head_r_true[k],
13205 						&tail_r_true[k], temp, 1,
13206 						head_x, tail_x, incx_val,
13207 						eps_int, un_int, &ratios[j]);
13208 
13209 			    /* take the max ratio */
13210 			    if (j == 0) {
13211 			      ratio = ratios[0];
13212 			      /* The !<= below causes NaN error to be detected.
13213 			         Note that (NaN > thresh) is always false. */
13214 			    } else if (!(ratios[j] <= ratio)) {
13215 			      ratio = ratios[j];
13216 			    }
13217 			    iy += incy;
13218 			  }
13219 
13220 			/* Increase the number of bad ratio, if the ratio
13221 			   is bigger than the threshold.
13222 			   The !<= below causes NaN error to be detected.
13223 			   Note that (NaN > thresh) is always false. */
13224 			if (!(ratio <= thresh)) {
13225 			  bad_ratios++;
13226 
13227 			  if ((debug == 3) &&	/* print only when debug is on */
13228 			      (count != old_count) &&	/* print if old vector is different
13229 							   from the current one */
13230 			      (d_count == find_max_ratio) &&
13231 			      (p_count <= max_print) &&
13232 			      (ratio > 0.5 * ratio_max)) {
13233 			    old_count = count;
13234 
13235 			    printf
13236 			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
13237 			       fname, m, n, ntests, thresh);
13238 
13239 			    /* Print test info */
13240 			    switch (prec) {
13241 			    case blas_prec_single:
13242 			      printf("single ");
13243 			      break;
13244 			    case blas_prec_double:
13245 			      printf("double ");
13246 			      break;
13247 			    case blas_prec_indigenous:
13248 			      printf("indigenous ");
13249 			      break;
13250 			    case blas_prec_extra:
13251 			      printf("extra ");
13252 			      break;
13253 			    }
13254 			    switch (norm) {
13255 			    case -1:
13256 			      printf("near_underflow ");
13257 			      break;
13258 			    case 0:
13259 			      printf("near_one ");
13260 			      break;
13261 			    case 1:
13262 			      printf("near_overflow ");
13263 			      break;
13264 			    }
13265 			    switch (order_type) {
13266 			    case blas_rowmajor:
13267 			      printf("row_major ");
13268 			      break;
13269 			    case blas_colmajor:
13270 			      printf("col_major ");
13271 			      break;
13272 			    }
13273 			    switch (trans_type) {
13274 			    case blas_no_trans:
13275 			      printf("no_trans ");
13276 			      break;
13277 			    case blas_trans:
13278 			      printf("trans ");
13279 			      break;
13280 			    case blas_conj_trans:
13281 			      printf("conj_trans ");
13282 			      break;
13283 			    }
13284 
13285 			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
13286 				   incy);
13287 
13288 			    sge_print_matrix(A, m_i, n_i, lda, order_type,
13289 					     "A");
13290 
13291 			    cprint_vector(head_x, n_i, incx_val, "head_x");
13292 			    cprint_vector(tail_x, n_i, incx_val, "tail_x");
13293 			    cprint_vector(y_gen, m_i, 1, "y_gen");
13294 			    cprint_vector(y, m_i, incy_val, "y_final");
13295 
13296 			    printf("      ");
13297 			    printf("alpha = ");
13298 			    printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
13299 			    printf("\n      ");
13300 			    printf("beta = ");
13301 			    printf("(%16.8e, %16.8e)", beta[0], beta[1]);
13302 			    printf("\n");
13303 			    for (j = 0, k = 0; j < m_i * incy_gen;
13304 				 j += incy_gen, k++) {
13305 			      printf("      ");
13306 			      printf
13307 				("([%24.16e  %24.16e], [%24.16e %24.16e])",
13308 				 head_r_true[j], tail_r_true[j],
13309 				 head_r_true[j + 1], tail_r_true[j + 1]);
13310 			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
13311 			    }
13312 
13313 			    printf("      ratio=%.4e\n", ratio);
13314 			    p_count++;
13315 			  }
13316 			  if (bad_ratios >= MAX_BAD_TESTS) {
13317 			    printf("\ntoo many failures, exiting....");
13318 			    printf("\nTesting and compilation");
13319 			    printf(" are incomplete\n\n");
13320 			    goto end;
13321 			  }
13322 			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
13323 			    printf("\nFlagrant ratio error, exiting...");
13324 			    printf("\nTesting and compilation");
13325 			    printf(" are incomplete\n\n");
13326 			    goto end;
13327 			  }
13328 			}
13329 			if (d_count == 0) {
13330 			  if (ratio > ratio_max)
13331 			    ratio_max = ratio;
13332 
13333 			  if (ratio != 0.0 && ratio < ratio_min)
13334 			    ratio_min = ratio;
13335 
13336 			  tot_tests++;
13337 			}
13338 		      }		/* incy */
13339 		    }		/* incx */
13340 		  }		/* lda */
13341 		}		/* trans */
13342 	      }			/* order */
13343 	    }			/* tests */
13344 	  }			/* norm */
13345 	}			/* prec */
13346       }				/* beta */
13347     }				/* alpha */
13348   }				/* debug */
13349 
13350   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
13351     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
13352 	   fname, m, n, ntests, thresh);
13353     printf
13354       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
13355        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
13356        ratio_min, ratio_max);
13357   }
13358 
13359 end:
13360   FPU_FIX_STOP;
13361 
13362   blas_free(head_x);
13363   blas_free(tail_x);
13364   blas_free(y);
13365   blas_free(head_x_gen);
13366   blas_free(tail_x_gen);
13367   blas_free(y_gen);
13368   blas_free(temp);
13369   blas_free(A);
13370   blas_free(head_r_true);
13371   blas_free(tail_r_true);
13372   blas_free(ratios);
13373 
13374   *min_ratio = ratio_min;
13375   *num_bad_ratio = bad_ratios;
13376   *num_tests = tot_tests;
13377   return ratio_max;
13378 }
do_test_cgemv2_s_s_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)13379 double do_test_cgemv2_s_s_x(int m, int n, int ntests, int *seed,
13380 			    double thresh, int debug, float test_prob,
13381 			    double *min_ratio, int *num_bad_ratio,
13382 			    int *num_tests)
13383 
13384 /*
13385  * Purpose
13386  * =======
13387  *
13388  * Runs a series of tests on GEMV2.
13389  *
13390  * Arguments
13391  * =========
13392  *
13393  * m         (input) int
13394  *           The number of rows
13395  *
13396  * n         (input) int
13397  *           The number of columns
13398  *
13399  * ntests    (input) int
13400  *           The number of tests to run for each set of attributes.
13401  *
13402  * seed      (input/output) int
13403  *           The seed for the random number generator used in testgen().
13404  *
13405  * thresh    (input) double
13406  *           When the ratio returned from test() exceeds the specified
13407  *           threshold, the current size, r_true, r_comp, and ratio will be
13408  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
13409  *           to ~10.)
13410  *
13411  * debug     (input) int
13412  *           If debug=3, print summary
13413  *           If debug=2, print summary only if the number of bad ratios > 0
13414  *           If debug=1, print complete info if tests fail
13415  *           If debug=0, return max ratio
13416  *
13417  * test_prob (input) float
13418  *           The specified test will be performed only if the generated
13419  *           random exceeds this threshold.
13420  *
13421  * min_ratio (output) double
13422  *           The minimum ratio
13423  *
13424  * num_bad_ratio (output) int
13425  *               The number of tests fail; they are above the threshold.
13426  *
13427  * num_tests (output) int
13428  *           The number of tests is being performed.
13429  *
13430  * Return value
13431  * ============
13432  *
13433  * The maximum ratio if run successfully, otherwise return -1
13434  *
13435  * Code structure
13436  * ==============
13437  *
13438  *  debug loop  -- if debug is one, the first loop computes the max ratio
13439  *              -- and the last(second) loop outputs debugging information,
13440  *              -- if the test fail and its ratio > 0.5 * max ratio.
13441  *              -- if debug is zero, the loop is executed once
13442  *    alpha loop  -- varying alpha: 0, 1, or random
13443  *      beta loop   -- varying beta: 0, 1, or random
13444  *        prec loop   -- varying internal prec: single, double, or extra
13445  *          norm loop   -- varying norm: near undeflow, near one, or
13446  *                        -- near overflow
13447  *            numtest loop  -- how many times the test is perform with
13448  *                            -- above set of attributes
13449  *              order loop   -- varying order type: rowmajor or colmajor
13450  *                trans loop    -- varying uplo type: upper or lower
13451  *                  lda loop      -- varying lda: m, m+1, 2m
13452  *                    incx loop     -- varying incx: -2, -1, 1, 2
13453  *                      incy loop     -- varying incy: -2, -1, 1, 2
13454  */
13455 {
13456   /* function name */
13457   const char fname[] = "BLAS_cgemv2_s_s_x";
13458 
13459   /* max number of debug lines to print */
13460   const int max_print = 8;
13461 
13462   /* Variables in the "x_val" form are loop vars for corresponding
13463      variables */
13464   int i;			/* iterate through the repeating tests */
13465   int j, k;			/* multipurpose counters or variables */
13466   int iy;			/* use to index y */
13467   int incx_val, incy_val,	/* for testing different inc values */
13468     incx, incy;
13469   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
13470   int d_count;			/* counter for debug */
13471   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
13472   int p_count;			/* counter for the number of debug lines printed */
13473   int tot_tests;		/* total number of tests to be done */
13474   int norm;			/* input values of near underflow/one/overflow */
13475   double ratio_max;		/* the current maximum ratio */
13476   double ratio_min;		/* the current minimum ratio */
13477   double *ratios;		/* a temporary variable for calculating ratio */
13478   double ratio;			/* the per-use test ratio from test() */
13479   int bad_ratios;		/* the number of ratios over the threshold */
13480   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
13481   double un_int;		/* the internal underflow threshold */
13482   float alpha[2];
13483   float beta[2];
13484   float *A;
13485   float *head_x;
13486   float *tail_x;
13487   float *y;
13488   float *temp;			/* use for calculating ratio */
13489 
13490   /* x_gen and y_gen are used to store vectors generated by testgen.
13491      they eventually are copied back to x and y */
13492   float *head_x_gen;
13493   float *tail_x_gen;
13494   float *y_gen;
13495 
13496   /* the true r calculated by testgen(), in double-double */
13497   double *head_r_true, *tail_r_true;
13498 
13499   int alpha_val;
13500   int alpha_flag;		/* input flag for BLAS_cgemv2_s_s_testgen */
13501   int beta_val;
13502   int beta_flag;		/* input flag for BLAS_cgemv2_s_s_testgen */
13503   int order_val;
13504   enum blas_order_type order_type;
13505   int prec_val;
13506   enum blas_prec_type prec;
13507   int trans_val;
13508   enum blas_trans_type trans_type;
13509   int m_i;
13510   int n_i;
13511   int max_mn;			/* the max of m and n */
13512   int lda_val;
13513   int lda;
13514   int saved_seed;		/* for saving the original seed */
13515   int count, old_count;		/* use for counting the number of testgen calls * 2 */
13516 
13517   FPU_FIX_DECL;
13518 
13519   /* test for bad arguments */
13520   if (n < 0 || m < 0 || ntests < 0)
13521     BLAS_error(fname, 0, 0, NULL);
13522 
13523   /* initialization */
13524   *num_bad_ratio = 0;
13525   *num_tests = 0;
13526   *min_ratio = 0.0;
13527 
13528   saved_seed = *seed;
13529   ratio_min = 1e308;
13530   ratio_max = 0.0;
13531   ratio = 0.0;
13532   tot_tests = 0;
13533   p_count = 0;
13534   count = 0;
13535   find_max_ratio = 0;
13536   bad_ratios = 0;
13537   old_count = 0;
13538 
13539   if (debug == 3)
13540     find_max_ratio = 1;
13541   max_mn = MAX(m, n);
13542   if (m == 0 || n == 0) {
13543     return 0.0;
13544   }
13545 
13546   FPU_FIX_START;
13547 
13548   incy_gen = 1;
13549   incy_gen *= 2;
13550 
13551   /* get space for calculation */
13552   head_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
13553   if (max_mn * 2 > 0 && head_x == NULL) {
13554     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13555   }
13556   tail_x = (float *) blas_malloc(max_mn * 2 * sizeof(float));
13557   if (max_mn * 2 > 0 && tail_x == NULL) {
13558     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13559   }
13560   y = (float *) blas_malloc(max_mn * 2 * sizeof(float) * 2);
13561   if (max_mn * 2 > 0 && y == NULL) {
13562     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13563   }
13564   head_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
13565   if (max_mn > 0 && head_x_gen == NULL) {
13566     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13567   }
13568   tail_x_gen = (float *) blas_malloc(max_mn * sizeof(float));
13569   if (max_mn > 0 && tail_x_gen == NULL) {
13570     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13571   }
13572   y_gen = (float *) blas_malloc(max_mn * sizeof(float) * 2);
13573   if (max_mn > 0 && y_gen == NULL) {
13574     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13575   }
13576   temp = (float *) blas_malloc(max_mn * sizeof(float));
13577   if (max_mn > 0 && temp == NULL) {
13578     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13579   }
13580   head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
13581   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
13582   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
13583     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13584   }
13585   ratios = (double *) blas_malloc(max_mn * sizeof(double));
13586   if (max_mn > 0 && ratios == NULL) {
13587     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13588   }
13589   A = (float *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(float));
13590   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
13591     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
13592   }
13593 
13594   /* The debug iteration:
13595      If debug=1, then will execute the iteration twice. First, compute the
13596      max ratio. Second, print info if ratio > (50% * ratio_max). */
13597   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
13598     bad_ratios = 0;		/* set to zero */
13599 
13600     if ((debug == 3) && (d_count == find_max_ratio))
13601       *seed = saved_seed;	/* restore the original seed */
13602 
13603     /* varying alpha */
13604     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
13605       alpha_flag = 0;
13606       switch (alpha_val) {
13607       case 0:
13608 	alpha[0] = alpha[1] = 0.0;
13609 	alpha_flag = 1;
13610 	break;
13611       case 1:
13612 	alpha[0] = 1.0;
13613 	alpha[1] = 0.0;
13614 	alpha_flag = 1;
13615 	break;
13616       }
13617 
13618       /* varying beta */
13619       for (beta_val = 0; beta_val < 3; beta_val++) {
13620 	beta_flag = 0;
13621 	switch (beta_val) {
13622 	case 0:
13623 	  beta[0] = beta[1] = 0.0;
13624 	  beta_flag = 1;
13625 	  break;
13626 	case 1:
13627 	  beta[0] = 1.0;
13628 	  beta[1] = 0.0;
13629 	  beta_flag = 1;
13630 	  break;
13631 	}
13632 
13633 
13634 	/* varying extra precs */
13635 	for (prec_val = 0; prec_val <= 2; prec_val++) {
13636 	  switch (prec_val) {
13637 	  case 0:
13638 	    eps_int = power(2, -BITS_S);
13639 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_single),
13640 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_single));
13641 	    prec = blas_prec_single;
13642 	    break;
13643 	  case 1:
13644 	    eps_int = power(2, -BITS_D);
13645 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
13646 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
13647 	    prec = blas_prec_double;
13648 	    break;
13649 	  case 2:
13650 	  default:
13651 	    eps_int = power(2, -BITS_E);
13652 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
13653 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
13654 	    prec = blas_prec_extra;
13655 	    break;
13656 	  }
13657 
13658 	  /* values near underflow, 1, or overflow */
13659 	  for (norm = -1; norm <= 1; norm++) {
13660 
13661 	    /* number of tests */
13662 	    for (i = 0; i < ntests; i++) {
13663 
13664 	      /* row or col major */
13665 	      for (order_val = 0; order_val < 2; order_val++) {
13666 		switch (order_val) {
13667 		case 0:
13668 		  order_type = blas_rowmajor;
13669 		  break;
13670 		case 1:
13671 		default:
13672 		  order_type = blas_colmajor;
13673 		  break;
13674 		}
13675 
13676 		/* no_trans, trans, or conj_trans */
13677 		for (trans_val = 0; trans_val < 3; trans_val++) {
13678 		  switch (trans_val) {
13679 		  case 0:
13680 		    trans_type = blas_no_trans;
13681 		    m_i = m;
13682 		    n_i = n;
13683 		    break;
13684 		  case 1:
13685 		    trans_type = blas_trans;
13686 		    m_i = n;
13687 		    n_i = m;
13688 		    break;
13689 		  case 2:
13690 		  default:
13691 		    trans_type = blas_conj_trans;
13692 		    m_i = n;
13693 		    n_i = m;
13694 		    break;
13695 		  }
13696 
13697 		  /* lda=n, n+1, or 2n */
13698 		  for (lda_val = 0; lda_val < 3; lda_val++) {
13699 		    switch (lda_val) {
13700 		    case 0:
13701 		      lda = m_i;
13702 		      break;
13703 		    case 1:
13704 		      lda = m_i + 1;
13705 		      break;
13706 		    case 2:
13707 		    default:
13708 		      lda = 2 * m_i;
13709 		      break;
13710 		    }
13711 		    if ((order_type == blas_rowmajor && lda < n) ||
13712 			(order_type == blas_colmajor && lda < m))
13713 		      continue;
13714 
13715 		    /* For the sake of speed, we throw out this case at random */
13716 		    if (xrand(seed) >= test_prob)
13717 		      continue;
13718 
13719 		    /* in the trivial cases, no need to run testgen */
13720 		    if (m > 0 && n > 0)
13721 		      BLAS_cgemv2_s_s_testgen(norm, order_type, trans_type, m,
13722 					      n, &alpha, alpha_flag, A, lda,
13723 					      head_x_gen, tail_x_gen, &beta,
13724 					      beta_flag, y_gen, seed,
13725 					      head_r_true, tail_r_true);
13726 
13727 		    count++;
13728 
13729 		    /* varying incx */
13730 		    for (incx_val = -2; incx_val <= 2; incx_val++) {
13731 		      if (incx_val == 0)
13732 			continue;
13733 
13734 		      /* setting incx */
13735 		      incx = incx_val;
13736 
13737 
13738 		      scopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
13739 		      scopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
13740 
13741 		      /* varying incy */
13742 		      for (incy_val = -2; incy_val <= 2; incy_val++) {
13743 			if (incy_val == 0)
13744 			  continue;
13745 
13746 			/* setting incy */
13747 			incy = incy_val;
13748 			incy *= 2;
13749 
13750 			ccopy_vector(y_gen, m_i, 1, y, incy_val);
13751 
13752 			/* call BLAS_cgemv2_s_s_x */
13753 			FPU_FIX_STOP;
13754 			BLAS_cgemv2_s_s_x(order_type, trans_type, m, n, alpha,
13755 					  A, lda, head_x, tail_x, incx_val,
13756 					  beta, y, incy_val, prec);
13757 			FPU_FIX_START;
13758 
13759 			/* set y starting index */
13760 			iy = 0;
13761 			if (incy < 0)
13762 			  iy = -(m_i - 1) * incy;
13763 
13764 			/* computing the ratio */
13765 			if (m > 0 && n > 0)
13766 			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
13767 			    /* copy row j of A to temp */
13768 			    sge_copy_row(order_type, trans_type, m_i, n_i, A,
13769 					 lda, temp, j);
13770 
13771 			    test_BLAS_cdot2_s_s(n_i, blas_no_conj, alpha,
13772 						beta, &y_gen[k], &y[iy],
13773 						&head_r_true[k],
13774 						&tail_r_true[k], temp, 1,
13775 						head_x, tail_x, incx_val,
13776 						eps_int, un_int, &ratios[j]);
13777 
13778 			    /* take the max ratio */
13779 			    if (j == 0) {
13780 			      ratio = ratios[0];
13781 			      /* The !<= below causes NaN error to be detected.
13782 			         Note that (NaN > thresh) is always false. */
13783 			    } else if (!(ratios[j] <= ratio)) {
13784 			      ratio = ratios[j];
13785 			    }
13786 			    iy += incy;
13787 			  }
13788 
13789 			/* Increase the number of bad ratio, if the ratio
13790 			   is bigger than the threshold.
13791 			   The !<= below causes NaN error to be detected.
13792 			   Note that (NaN > thresh) is always false. */
13793 			if (!(ratio <= thresh)) {
13794 			  bad_ratios++;
13795 
13796 			  if ((debug == 3) &&	/* print only when debug is on */
13797 			      (count != old_count) &&	/* print if old vector is different
13798 							   from the current one */
13799 			      (d_count == find_max_ratio) &&
13800 			      (p_count <= max_print) &&
13801 			      (ratio > 0.5 * ratio_max)) {
13802 			    old_count = count;
13803 
13804 			    printf
13805 			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
13806 			       fname, m, n, ntests, thresh);
13807 
13808 			    /* Print test info */
13809 			    switch (prec) {
13810 			    case blas_prec_single:
13811 			      printf("single ");
13812 			      break;
13813 			    case blas_prec_double:
13814 			      printf("double ");
13815 			      break;
13816 			    case blas_prec_indigenous:
13817 			      printf("indigenous ");
13818 			      break;
13819 			    case blas_prec_extra:
13820 			      printf("extra ");
13821 			      break;
13822 			    }
13823 			    switch (norm) {
13824 			    case -1:
13825 			      printf("near_underflow ");
13826 			      break;
13827 			    case 0:
13828 			      printf("near_one ");
13829 			      break;
13830 			    case 1:
13831 			      printf("near_overflow ");
13832 			      break;
13833 			    }
13834 			    switch (order_type) {
13835 			    case blas_rowmajor:
13836 			      printf("row_major ");
13837 			      break;
13838 			    case blas_colmajor:
13839 			      printf("col_major ");
13840 			      break;
13841 			    }
13842 			    switch (trans_type) {
13843 			    case blas_no_trans:
13844 			      printf("no_trans ");
13845 			      break;
13846 			    case blas_trans:
13847 			      printf("trans ");
13848 			      break;
13849 			    case blas_conj_trans:
13850 			      printf("conj_trans ");
13851 			      break;
13852 			    }
13853 
13854 			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
13855 				   incy);
13856 
13857 			    sge_print_matrix(A, m_i, n_i, lda, order_type,
13858 					     "A");
13859 
13860 			    sprint_vector(head_x, n_i, incx_val, "head_x");
13861 			    sprint_vector(tail_x, n_i, incx_val, "tail_x");
13862 			    cprint_vector(y_gen, m_i, 1, "y_gen");
13863 			    cprint_vector(y, m_i, incy_val, "y_final");
13864 
13865 			    printf("      ");
13866 			    printf("alpha = ");
13867 			    printf("(%16.8e, %16.8e)", alpha[0], alpha[1]);
13868 			    printf("\n      ");
13869 			    printf("beta = ");
13870 			    printf("(%16.8e, %16.8e)", beta[0], beta[1]);
13871 			    printf("\n");
13872 			    for (j = 0, k = 0; j < m_i * incy_gen;
13873 				 j += incy_gen, k++) {
13874 			      printf("      ");
13875 			      printf
13876 				("([%24.16e  %24.16e], [%24.16e %24.16e])",
13877 				 head_r_true[j], tail_r_true[j],
13878 				 head_r_true[j + 1], tail_r_true[j + 1]);
13879 			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
13880 			    }
13881 
13882 			    printf("      ratio=%.4e\n", ratio);
13883 			    p_count++;
13884 			  }
13885 			  if (bad_ratios >= MAX_BAD_TESTS) {
13886 			    printf("\ntoo many failures, exiting....");
13887 			    printf("\nTesting and compilation");
13888 			    printf(" are incomplete\n\n");
13889 			    goto end;
13890 			  }
13891 			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
13892 			    printf("\nFlagrant ratio error, exiting...");
13893 			    printf("\nTesting and compilation");
13894 			    printf(" are incomplete\n\n");
13895 			    goto end;
13896 			  }
13897 			}
13898 			if (d_count == 0) {
13899 			  if (ratio > ratio_max)
13900 			    ratio_max = ratio;
13901 
13902 			  if (ratio != 0.0 && ratio < ratio_min)
13903 			    ratio_min = ratio;
13904 
13905 			  tot_tests++;
13906 			}
13907 		      }		/* incy */
13908 		    }		/* incx */
13909 		  }		/* lda */
13910 		}		/* trans */
13911 	      }			/* order */
13912 	    }			/* tests */
13913 	  }			/* norm */
13914 	}			/* prec */
13915       }				/* beta */
13916     }				/* alpha */
13917   }				/* debug */
13918 
13919   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
13920     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
13921 	   fname, m, n, ntests, thresh);
13922     printf
13923       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
13924        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
13925        ratio_min, ratio_max);
13926   }
13927 
13928 end:
13929   FPU_FIX_STOP;
13930 
13931   blas_free(head_x);
13932   blas_free(tail_x);
13933   blas_free(y);
13934   blas_free(head_x_gen);
13935   blas_free(tail_x_gen);
13936   blas_free(y_gen);
13937   blas_free(temp);
13938   blas_free(A);
13939   blas_free(head_r_true);
13940   blas_free(tail_r_true);
13941   blas_free(ratios);
13942 
13943   *min_ratio = ratio_min;
13944   *num_bad_ratio = bad_ratios;
13945   *num_tests = tot_tests;
13946   return ratio_max;
13947 }
do_test_zgemv2_z_d_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)13948 double do_test_zgemv2_z_d_x(int m, int n, int ntests, int *seed,
13949 			    double thresh, int debug, float test_prob,
13950 			    double *min_ratio, int *num_bad_ratio,
13951 			    int *num_tests)
13952 
13953 /*
13954  * Purpose
13955  * =======
13956  *
13957  * Runs a series of tests on GEMV2.
13958  *
13959  * Arguments
13960  * =========
13961  *
13962  * m         (input) int
13963  *           The number of rows
13964  *
13965  * n         (input) int
13966  *           The number of columns
13967  *
13968  * ntests    (input) int
13969  *           The number of tests to run for each set of attributes.
13970  *
13971  * seed      (input/output) int
13972  *           The seed for the random number generator used in testgen().
13973  *
13974  * thresh    (input) double
13975  *           When the ratio returned from test() exceeds the specified
13976  *           threshold, the current size, r_true, r_comp, and ratio will be
13977  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
13978  *           to ~10.)
13979  *
13980  * debug     (input) int
13981  *           If debug=3, print summary
13982  *           If debug=2, print summary only if the number of bad ratios > 0
13983  *           If debug=1, print complete info if tests fail
13984  *           If debug=0, return max ratio
13985  *
13986  * test_prob (input) float
13987  *           The specified test will be performed only if the generated
13988  *           random exceeds this threshold.
13989  *
13990  * min_ratio (output) double
13991  *           The minimum ratio
13992  *
13993  * num_bad_ratio (output) int
13994  *               The number of tests fail; they are above the threshold.
13995  *
13996  * num_tests (output) int
13997  *           The number of tests is being performed.
13998  *
13999  * Return value
14000  * ============
14001  *
14002  * The maximum ratio if run successfully, otherwise return -1
14003  *
14004  * Code structure
14005  * ==============
14006  *
14007  *  debug loop  -- if debug is one, the first loop computes the max ratio
14008  *              -- and the last(second) loop outputs debugging information,
14009  *              -- if the test fail and its ratio > 0.5 * max ratio.
14010  *              -- if debug is zero, the loop is executed once
14011  *    alpha loop  -- varying alpha: 0, 1, or random
14012  *      beta loop   -- varying beta: 0, 1, or random
14013  *        prec loop   -- varying internal prec: single, double, or extra
14014  *          norm loop   -- varying norm: near undeflow, near one, or
14015  *                        -- near overflow
14016  *            numtest loop  -- how many times the test is perform with
14017  *                            -- above set of attributes
14018  *              order loop   -- varying order type: rowmajor or colmajor
14019  *                trans loop    -- varying uplo type: upper or lower
14020  *                  lda loop      -- varying lda: m, m+1, 2m
14021  *                    incx loop     -- varying incx: -2, -1, 1, 2
14022  *                      incy loop     -- varying incy: -2, -1, 1, 2
14023  */
14024 {
14025   /* function name */
14026   const char fname[] = "BLAS_zgemv2_z_d_x";
14027 
14028   /* max number of debug lines to print */
14029   const int max_print = 8;
14030 
14031   /* Variables in the "x_val" form are loop vars for corresponding
14032      variables */
14033   int i;			/* iterate through the repeating tests */
14034   int j, k;			/* multipurpose counters or variables */
14035   int iy;			/* use to index y */
14036   int incx_val, incy_val,	/* for testing different inc values */
14037     incx, incy;
14038   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
14039   int d_count;			/* counter for debug */
14040   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
14041   int p_count;			/* counter for the number of debug lines printed */
14042   int tot_tests;		/* total number of tests to be done */
14043   int norm;			/* input values of near underflow/one/overflow */
14044   double ratio_max;		/* the current maximum ratio */
14045   double ratio_min;		/* the current minimum ratio */
14046   double *ratios;		/* a temporary variable for calculating ratio */
14047   double ratio;			/* the per-use test ratio from test() */
14048   int bad_ratios;		/* the number of ratios over the threshold */
14049   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
14050   double un_int;		/* the internal underflow threshold */
14051   double alpha[2];
14052   double beta[2];
14053   double *A;
14054   double *head_x;
14055   double *tail_x;
14056   double *y;
14057   double *temp;			/* use for calculating ratio */
14058 
14059   /* x_gen and y_gen are used to store vectors generated by testgen.
14060      they eventually are copied back to x and y */
14061   double *head_x_gen;
14062   double *tail_x_gen;
14063   double *y_gen;
14064 
14065   /* the true r calculated by testgen(), in double-double */
14066   double *head_r_true, *tail_r_true;
14067 
14068   int alpha_val;
14069   int alpha_flag;		/* input flag for BLAS_zgemv2_z_d_testgen */
14070   int beta_val;
14071   int beta_flag;		/* input flag for BLAS_zgemv2_z_d_testgen */
14072   int order_val;
14073   enum blas_order_type order_type;
14074   int prec_val;
14075   enum blas_prec_type prec;
14076   int trans_val;
14077   enum blas_trans_type trans_type;
14078   int m_i;
14079   int n_i;
14080   int max_mn;			/* the max of m and n */
14081   int lda_val;
14082   int lda;
14083   int saved_seed;		/* for saving the original seed */
14084   int count, old_count;		/* use for counting the number of testgen calls * 2 */
14085 
14086   FPU_FIX_DECL;
14087 
14088   /* test for bad arguments */
14089   if (n < 0 || m < 0 || ntests < 0)
14090     BLAS_error(fname, 0, 0, NULL);
14091 
14092   /* initialization */
14093   *num_bad_ratio = 0;
14094   *num_tests = 0;
14095   *min_ratio = 0.0;
14096 
14097   saved_seed = *seed;
14098   ratio_min = 1e308;
14099   ratio_max = 0.0;
14100   ratio = 0.0;
14101   tot_tests = 0;
14102   p_count = 0;
14103   count = 0;
14104   find_max_ratio = 0;
14105   bad_ratios = 0;
14106   old_count = 0;
14107 
14108   if (debug == 3)
14109     find_max_ratio = 1;
14110   max_mn = MAX(m, n);
14111   if (m == 0 || n == 0) {
14112     return 0.0;
14113   }
14114 
14115   FPU_FIX_START;
14116 
14117   incy_gen = 1;
14118   incy_gen *= 2;
14119 
14120   /* get space for calculation */
14121   head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
14122   if (max_mn * 2 > 0 && head_x == NULL) {
14123     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14124   }
14125   tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
14126   if (max_mn * 2 > 0 && tail_x == NULL) {
14127     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14128   }
14129   y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
14130   if (max_mn * 2 > 0 && y == NULL) {
14131     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14132   }
14133   head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
14134   if (max_mn > 0 && head_x_gen == NULL) {
14135     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14136   }
14137   tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
14138   if (max_mn > 0 && tail_x_gen == NULL) {
14139     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14140   }
14141   y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
14142   if (max_mn > 0 && y_gen == NULL) {
14143     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14144   }
14145   temp = (double *) blas_malloc(max_mn * sizeof(double) * 2);
14146   if (max_mn > 0 && temp == NULL) {
14147     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14148   }
14149   head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
14150   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
14151   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
14152     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14153   }
14154   ratios = (double *) blas_malloc(max_mn * sizeof(double));
14155   if (max_mn > 0 && ratios == NULL) {
14156     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14157   }
14158   A =
14159     (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double) *
14160 			   2);
14161   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
14162     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14163   }
14164 
14165   /* The debug iteration:
14166      If debug=1, then will execute the iteration twice. First, compute the
14167      max ratio. Second, print info if ratio > (50% * ratio_max). */
14168   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
14169     bad_ratios = 0;		/* set to zero */
14170 
14171     if ((debug == 3) && (d_count == find_max_ratio))
14172       *seed = saved_seed;	/* restore the original seed */
14173 
14174     /* varying alpha */
14175     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
14176       alpha_flag = 0;
14177       switch (alpha_val) {
14178       case 0:
14179 	alpha[0] = alpha[1] = 0.0;
14180 	alpha_flag = 1;
14181 	break;
14182       case 1:
14183 	alpha[0] = 1.0;
14184 	alpha[1] = 0.0;
14185 	alpha_flag = 1;
14186 	break;
14187       }
14188 
14189       /* varying beta */
14190       for (beta_val = 0; beta_val < 3; beta_val++) {
14191 	beta_flag = 0;
14192 	switch (beta_val) {
14193 	case 0:
14194 	  beta[0] = beta[1] = 0.0;
14195 	  beta_flag = 1;
14196 	  break;
14197 	case 1:
14198 	  beta[0] = 1.0;
14199 	  beta[1] = 0.0;
14200 	  beta_flag = 1;
14201 	  break;
14202 	}
14203 
14204 
14205 	/* varying extra precs */
14206 	for (prec_val = 0; prec_val <= 2; prec_val++) {
14207 	  switch (prec_val) {
14208 	  case 0:
14209 	    eps_int = power(2, -BITS_D);
14210 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
14211 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
14212 	    prec = blas_prec_double;
14213 	    break;
14214 	  case 1:
14215 	    eps_int = power(2, -BITS_D);
14216 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
14217 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
14218 	    prec = blas_prec_double;
14219 	    break;
14220 	  case 2:
14221 	  default:
14222 	    eps_int = power(2, -BITS_E);
14223 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
14224 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
14225 	    prec = blas_prec_extra;
14226 	    break;
14227 	  }
14228 
14229 	  /* values near underflow, 1, or overflow */
14230 	  for (norm = -1; norm <= 1; norm++) {
14231 
14232 	    /* number of tests */
14233 	    for (i = 0; i < ntests; i++) {
14234 
14235 	      /* row or col major */
14236 	      for (order_val = 0; order_val < 2; order_val++) {
14237 		switch (order_val) {
14238 		case 0:
14239 		  order_type = blas_rowmajor;
14240 		  break;
14241 		case 1:
14242 		default:
14243 		  order_type = blas_colmajor;
14244 		  break;
14245 		}
14246 
14247 		/* no_trans, trans, or conj_trans */
14248 		for (trans_val = 0; trans_val < 3; trans_val++) {
14249 		  switch (trans_val) {
14250 		  case 0:
14251 		    trans_type = blas_no_trans;
14252 		    m_i = m;
14253 		    n_i = n;
14254 		    break;
14255 		  case 1:
14256 		    trans_type = blas_trans;
14257 		    m_i = n;
14258 		    n_i = m;
14259 		    break;
14260 		  case 2:
14261 		  default:
14262 		    trans_type = blas_conj_trans;
14263 		    m_i = n;
14264 		    n_i = m;
14265 		    break;
14266 		  }
14267 
14268 		  /* lda=n, n+1, or 2n */
14269 		  for (lda_val = 0; lda_val < 3; lda_val++) {
14270 		    switch (lda_val) {
14271 		    case 0:
14272 		      lda = m_i;
14273 		      break;
14274 		    case 1:
14275 		      lda = m_i + 1;
14276 		      break;
14277 		    case 2:
14278 		    default:
14279 		      lda = 2 * m_i;
14280 		      break;
14281 		    }
14282 		    if ((order_type == blas_rowmajor && lda < n) ||
14283 			(order_type == blas_colmajor && lda < m))
14284 		      continue;
14285 
14286 		    /* For the sake of speed, we throw out this case at random */
14287 		    if (xrand(seed) >= test_prob)
14288 		      continue;
14289 
14290 		    /* in the trivial cases, no need to run testgen */
14291 		    if (m > 0 && n > 0)
14292 		      BLAS_zgemv2_z_d_testgen(norm, order_type, trans_type, m,
14293 					      n, &alpha, alpha_flag, A, lda,
14294 					      head_x_gen, tail_x_gen, &beta,
14295 					      beta_flag, y_gen, seed,
14296 					      head_r_true, tail_r_true);
14297 
14298 		    count++;
14299 
14300 		    /* varying incx */
14301 		    for (incx_val = -2; incx_val <= 2; incx_val++) {
14302 		      if (incx_val == 0)
14303 			continue;
14304 
14305 		      /* setting incx */
14306 		      incx = incx_val;
14307 
14308 
14309 		      dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
14310 		      dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
14311 
14312 		      /* varying incy */
14313 		      for (incy_val = -2; incy_val <= 2; incy_val++) {
14314 			if (incy_val == 0)
14315 			  continue;
14316 
14317 			/* setting incy */
14318 			incy = incy_val;
14319 			incy *= 2;
14320 
14321 			zcopy_vector(y_gen, m_i, 1, y, incy_val);
14322 
14323 			/* call BLAS_zgemv2_z_d_x */
14324 			FPU_FIX_STOP;
14325 			BLAS_zgemv2_z_d_x(order_type, trans_type, m, n, alpha,
14326 					  A, lda, head_x, tail_x, incx_val,
14327 					  beta, y, incy_val, prec);
14328 			FPU_FIX_START;
14329 
14330 			/* set y starting index */
14331 			iy = 0;
14332 			if (incy < 0)
14333 			  iy = -(m_i - 1) * incy;
14334 
14335 			/* computing the ratio */
14336 			if (m > 0 && n > 0)
14337 			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
14338 			    /* copy row j of A to temp */
14339 			    zge_copy_row(order_type, trans_type, m_i, n_i, A,
14340 					 lda, temp, j);
14341 
14342 			    test_BLAS_zdot2_z_d(n_i, blas_no_conj, alpha,
14343 						beta, &y_gen[k], &y[iy],
14344 						&head_r_true[k],
14345 						&tail_r_true[k], temp, 1,
14346 						head_x, tail_x, incx_val,
14347 						eps_int, un_int, &ratios[j]);
14348 
14349 			    /* take the max ratio */
14350 			    if (j == 0) {
14351 			      ratio = ratios[0];
14352 			      /* The !<= below causes NaN error to be detected.
14353 			         Note that (NaN > thresh) is always false. */
14354 			    } else if (!(ratios[j] <= ratio)) {
14355 			      ratio = ratios[j];
14356 			    }
14357 			    iy += incy;
14358 			  }
14359 
14360 			/* Increase the number of bad ratio, if the ratio
14361 			   is bigger than the threshold.
14362 			   The !<= below causes NaN error to be detected.
14363 			   Note that (NaN > thresh) is always false. */
14364 			if (!(ratio <= thresh)) {
14365 			  bad_ratios++;
14366 
14367 			  if ((debug == 3) &&	/* print only when debug is on */
14368 			      (count != old_count) &&	/* print if old vector is different
14369 							   from the current one */
14370 			      (d_count == find_max_ratio) &&
14371 			      (p_count <= max_print) &&
14372 			      (ratio > 0.5 * ratio_max)) {
14373 			    old_count = count;
14374 
14375 			    printf
14376 			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
14377 			       fname, m, n, ntests, thresh);
14378 
14379 			    /* Print test info */
14380 			    switch (prec) {
14381 			    case blas_prec_single:
14382 			      printf("single ");
14383 			      break;
14384 			    case blas_prec_double:
14385 			      printf("double ");
14386 			      break;
14387 			    case blas_prec_indigenous:
14388 			      printf("indigenous ");
14389 			      break;
14390 			    case blas_prec_extra:
14391 			      printf("extra ");
14392 			      break;
14393 			    }
14394 			    switch (norm) {
14395 			    case -1:
14396 			      printf("near_underflow ");
14397 			      break;
14398 			    case 0:
14399 			      printf("near_one ");
14400 			      break;
14401 			    case 1:
14402 			      printf("near_overflow ");
14403 			      break;
14404 			    }
14405 			    switch (order_type) {
14406 			    case blas_rowmajor:
14407 			      printf("row_major ");
14408 			      break;
14409 			    case blas_colmajor:
14410 			      printf("col_major ");
14411 			      break;
14412 			    }
14413 			    switch (trans_type) {
14414 			    case blas_no_trans:
14415 			      printf("no_trans ");
14416 			      break;
14417 			    case blas_trans:
14418 			      printf("trans ");
14419 			      break;
14420 			    case blas_conj_trans:
14421 			      printf("conj_trans ");
14422 			      break;
14423 			    }
14424 
14425 			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
14426 				   incy);
14427 
14428 			    zge_print_matrix(A, m_i, n_i, lda, order_type,
14429 					     "A");
14430 
14431 			    dprint_vector(head_x, n_i, incx_val, "head_x");
14432 			    dprint_vector(tail_x, n_i, incx_val, "tail_x");
14433 			    zprint_vector(y_gen, m_i, 1, "y_gen");
14434 			    zprint_vector(y, m_i, incy_val, "y_final");
14435 
14436 			    printf("      ");
14437 			    printf("alpha = ");
14438 			    printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
14439 			    printf("\n      ");
14440 			    printf("beta = ");
14441 			    printf("(%24.16e, %24.16e)", beta[0], beta[1]);
14442 			    printf("\n");
14443 			    for (j = 0, k = 0; j < m_i * incy_gen;
14444 				 j += incy_gen, k++) {
14445 			      printf("      ");
14446 			      printf
14447 				("([%24.16e  %24.16e], [%24.16e %24.16e])",
14448 				 head_r_true[j], tail_r_true[j],
14449 				 head_r_true[j + 1], tail_r_true[j + 1]);
14450 			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
14451 			    }
14452 
14453 			    printf("      ratio=%.4e\n", ratio);
14454 			    p_count++;
14455 			  }
14456 			  if (bad_ratios >= MAX_BAD_TESTS) {
14457 			    printf("\ntoo many failures, exiting....");
14458 			    printf("\nTesting and compilation");
14459 			    printf(" are incomplete\n\n");
14460 			    goto end;
14461 			  }
14462 			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
14463 			    printf("\nFlagrant ratio error, exiting...");
14464 			    printf("\nTesting and compilation");
14465 			    printf(" are incomplete\n\n");
14466 			    goto end;
14467 			  }
14468 			}
14469 			if (d_count == 0) {
14470 			  if (ratio > ratio_max)
14471 			    ratio_max = ratio;
14472 
14473 			  if (ratio != 0.0 && ratio < ratio_min)
14474 			    ratio_min = ratio;
14475 
14476 			  tot_tests++;
14477 			}
14478 		      }		/* incy */
14479 		    }		/* incx */
14480 		  }		/* lda */
14481 		}		/* trans */
14482 	      }			/* order */
14483 	    }			/* tests */
14484 	  }			/* norm */
14485 	}			/* prec */
14486       }				/* beta */
14487     }				/* alpha */
14488   }				/* debug */
14489 
14490   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
14491     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
14492 	   fname, m, n, ntests, thresh);
14493     printf
14494       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
14495        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
14496        ratio_min, ratio_max);
14497   }
14498 
14499 end:
14500   FPU_FIX_STOP;
14501 
14502   blas_free(head_x);
14503   blas_free(tail_x);
14504   blas_free(y);
14505   blas_free(head_x_gen);
14506   blas_free(tail_x_gen);
14507   blas_free(y_gen);
14508   blas_free(temp);
14509   blas_free(A);
14510   blas_free(head_r_true);
14511   blas_free(tail_r_true);
14512   blas_free(ratios);
14513 
14514   *min_ratio = ratio_min;
14515   *num_bad_ratio = bad_ratios;
14516   *num_tests = tot_tests;
14517   return ratio_max;
14518 }
do_test_zgemv2_d_z_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)14519 double do_test_zgemv2_d_z_x(int m, int n, int ntests, int *seed,
14520 			    double thresh, int debug, float test_prob,
14521 			    double *min_ratio, int *num_bad_ratio,
14522 			    int *num_tests)
14523 
14524 /*
14525  * Purpose
14526  * =======
14527  *
14528  * Runs a series of tests on GEMV2.
14529  *
14530  * Arguments
14531  * =========
14532  *
14533  * m         (input) int
14534  *           The number of rows
14535  *
14536  * n         (input) int
14537  *           The number of columns
14538  *
14539  * ntests    (input) int
14540  *           The number of tests to run for each set of attributes.
14541  *
14542  * seed      (input/output) int
14543  *           The seed for the random number generator used in testgen().
14544  *
14545  * thresh    (input) double
14546  *           When the ratio returned from test() exceeds the specified
14547  *           threshold, the current size, r_true, r_comp, and ratio will be
14548  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
14549  *           to ~10.)
14550  *
14551  * debug     (input) int
14552  *           If debug=3, print summary
14553  *           If debug=2, print summary only if the number of bad ratios > 0
14554  *           If debug=1, print complete info if tests fail
14555  *           If debug=0, return max ratio
14556  *
14557  * test_prob (input) float
14558  *           The specified test will be performed only if the generated
14559  *           random exceeds this threshold.
14560  *
14561  * min_ratio (output) double
14562  *           The minimum ratio
14563  *
14564  * num_bad_ratio (output) int
14565  *               The number of tests fail; they are above the threshold.
14566  *
14567  * num_tests (output) int
14568  *           The number of tests is being performed.
14569  *
14570  * Return value
14571  * ============
14572  *
14573  * The maximum ratio if run successfully, otherwise return -1
14574  *
14575  * Code structure
14576  * ==============
14577  *
14578  *  debug loop  -- if debug is one, the first loop computes the max ratio
14579  *              -- and the last(second) loop outputs debugging information,
14580  *              -- if the test fail and its ratio > 0.5 * max ratio.
14581  *              -- if debug is zero, the loop is executed once
14582  *    alpha loop  -- varying alpha: 0, 1, or random
14583  *      beta loop   -- varying beta: 0, 1, or random
14584  *        prec loop   -- varying internal prec: single, double, or extra
14585  *          norm loop   -- varying norm: near undeflow, near one, or
14586  *                        -- near overflow
14587  *            numtest loop  -- how many times the test is perform with
14588  *                            -- above set of attributes
14589  *              order loop   -- varying order type: rowmajor or colmajor
14590  *                trans loop    -- varying uplo type: upper or lower
14591  *                  lda loop      -- varying lda: m, m+1, 2m
14592  *                    incx loop     -- varying incx: -2, -1, 1, 2
14593  *                      incy loop     -- varying incy: -2, -1, 1, 2
14594  */
14595 {
14596   /* function name */
14597   const char fname[] = "BLAS_zgemv2_d_z_x";
14598 
14599   /* max number of debug lines to print */
14600   const int max_print = 8;
14601 
14602   /* Variables in the "x_val" form are loop vars for corresponding
14603      variables */
14604   int i;			/* iterate through the repeating tests */
14605   int j, k;			/* multipurpose counters or variables */
14606   int iy;			/* use to index y */
14607   int incx_val, incy_val,	/* for testing different inc values */
14608     incx, incy;
14609   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
14610   int d_count;			/* counter for debug */
14611   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
14612   int p_count;			/* counter for the number of debug lines printed */
14613   int tot_tests;		/* total number of tests to be done */
14614   int norm;			/* input values of near underflow/one/overflow */
14615   double ratio_max;		/* the current maximum ratio */
14616   double ratio_min;		/* the current minimum ratio */
14617   double *ratios;		/* a temporary variable for calculating ratio */
14618   double ratio;			/* the per-use test ratio from test() */
14619   int bad_ratios;		/* the number of ratios over the threshold */
14620   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
14621   double un_int;		/* the internal underflow threshold */
14622   double alpha[2];
14623   double beta[2];
14624   double *A;
14625   double *head_x;
14626   double *tail_x;
14627   double *y;
14628   double *temp;			/* use for calculating ratio */
14629 
14630   /* x_gen and y_gen are used to store vectors generated by testgen.
14631      they eventually are copied back to x and y */
14632   double *head_x_gen;
14633   double *tail_x_gen;
14634   double *y_gen;
14635 
14636   /* the true r calculated by testgen(), in double-double */
14637   double *head_r_true, *tail_r_true;
14638 
14639   int alpha_val;
14640   int alpha_flag;		/* input flag for BLAS_zgemv2_d_z_testgen */
14641   int beta_val;
14642   int beta_flag;		/* input flag for BLAS_zgemv2_d_z_testgen */
14643   int order_val;
14644   enum blas_order_type order_type;
14645   int prec_val;
14646   enum blas_prec_type prec;
14647   int trans_val;
14648   enum blas_trans_type trans_type;
14649   int m_i;
14650   int n_i;
14651   int max_mn;			/* the max of m and n */
14652   int lda_val;
14653   int lda;
14654   int saved_seed;		/* for saving the original seed */
14655   int count, old_count;		/* use for counting the number of testgen calls * 2 */
14656 
14657   FPU_FIX_DECL;
14658 
14659   /* test for bad arguments */
14660   if (n < 0 || m < 0 || ntests < 0)
14661     BLAS_error(fname, 0, 0, NULL);
14662 
14663   /* initialization */
14664   *num_bad_ratio = 0;
14665   *num_tests = 0;
14666   *min_ratio = 0.0;
14667 
14668   saved_seed = *seed;
14669   ratio_min = 1e308;
14670   ratio_max = 0.0;
14671   ratio = 0.0;
14672   tot_tests = 0;
14673   p_count = 0;
14674   count = 0;
14675   find_max_ratio = 0;
14676   bad_ratios = 0;
14677   old_count = 0;
14678 
14679   if (debug == 3)
14680     find_max_ratio = 1;
14681   max_mn = MAX(m, n);
14682   if (m == 0 || n == 0) {
14683     return 0.0;
14684   }
14685 
14686   FPU_FIX_START;
14687 
14688   incy_gen = 1;
14689   incy_gen *= 2;
14690 
14691   /* get space for calculation */
14692   head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
14693   if (max_mn * 2 > 0 && head_x == NULL) {
14694     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14695   }
14696   tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
14697   if (max_mn * 2 > 0 && tail_x == NULL) {
14698     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14699   }
14700   y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
14701   if (max_mn * 2 > 0 && y == NULL) {
14702     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14703   }
14704   head_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
14705   if (max_mn > 0 && head_x_gen == NULL) {
14706     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14707   }
14708   tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
14709   if (max_mn > 0 && tail_x_gen == NULL) {
14710     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14711   }
14712   y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
14713   if (max_mn > 0 && y_gen == NULL) {
14714     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14715   }
14716   temp = (double *) blas_malloc(max_mn * sizeof(double));
14717   if (max_mn > 0 && temp == NULL) {
14718     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14719   }
14720   head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
14721   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
14722   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
14723     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14724   }
14725   ratios = (double *) blas_malloc(max_mn * sizeof(double));
14726   if (max_mn > 0 && ratios == NULL) {
14727     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14728   }
14729   A =
14730     (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
14731   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
14732     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
14733   }
14734 
14735   /* The debug iteration:
14736      If debug=1, then will execute the iteration twice. First, compute the
14737      max ratio. Second, print info if ratio > (50% * ratio_max). */
14738   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
14739     bad_ratios = 0;		/* set to zero */
14740 
14741     if ((debug == 3) && (d_count == find_max_ratio))
14742       *seed = saved_seed;	/* restore the original seed */
14743 
14744     /* varying alpha */
14745     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
14746       alpha_flag = 0;
14747       switch (alpha_val) {
14748       case 0:
14749 	alpha[0] = alpha[1] = 0.0;
14750 	alpha_flag = 1;
14751 	break;
14752       case 1:
14753 	alpha[0] = 1.0;
14754 	alpha[1] = 0.0;
14755 	alpha_flag = 1;
14756 	break;
14757       }
14758 
14759       /* varying beta */
14760       for (beta_val = 0; beta_val < 3; beta_val++) {
14761 	beta_flag = 0;
14762 	switch (beta_val) {
14763 	case 0:
14764 	  beta[0] = beta[1] = 0.0;
14765 	  beta_flag = 1;
14766 	  break;
14767 	case 1:
14768 	  beta[0] = 1.0;
14769 	  beta[1] = 0.0;
14770 	  beta_flag = 1;
14771 	  break;
14772 	}
14773 
14774 
14775 	/* varying extra precs */
14776 	for (prec_val = 0; prec_val <= 2; prec_val++) {
14777 	  switch (prec_val) {
14778 	  case 0:
14779 	    eps_int = power(2, -BITS_D);
14780 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
14781 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
14782 	    prec = blas_prec_double;
14783 	    break;
14784 	  case 1:
14785 	    eps_int = power(2, -BITS_D);
14786 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
14787 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
14788 	    prec = blas_prec_double;
14789 	    break;
14790 	  case 2:
14791 	  default:
14792 	    eps_int = power(2, -BITS_E);
14793 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
14794 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
14795 	    prec = blas_prec_extra;
14796 	    break;
14797 	  }
14798 
14799 	  /* values near underflow, 1, or overflow */
14800 	  for (norm = -1; norm <= 1; norm++) {
14801 
14802 	    /* number of tests */
14803 	    for (i = 0; i < ntests; i++) {
14804 
14805 	      /* row or col major */
14806 	      for (order_val = 0; order_val < 2; order_val++) {
14807 		switch (order_val) {
14808 		case 0:
14809 		  order_type = blas_rowmajor;
14810 		  break;
14811 		case 1:
14812 		default:
14813 		  order_type = blas_colmajor;
14814 		  break;
14815 		}
14816 
14817 		/* no_trans, trans, or conj_trans */
14818 		for (trans_val = 0; trans_val < 3; trans_val++) {
14819 		  switch (trans_val) {
14820 		  case 0:
14821 		    trans_type = blas_no_trans;
14822 		    m_i = m;
14823 		    n_i = n;
14824 		    break;
14825 		  case 1:
14826 		    trans_type = blas_trans;
14827 		    m_i = n;
14828 		    n_i = m;
14829 		    break;
14830 		  case 2:
14831 		  default:
14832 		    trans_type = blas_conj_trans;
14833 		    m_i = n;
14834 		    n_i = m;
14835 		    break;
14836 		  }
14837 
14838 		  /* lda=n, n+1, or 2n */
14839 		  for (lda_val = 0; lda_val < 3; lda_val++) {
14840 		    switch (lda_val) {
14841 		    case 0:
14842 		      lda = m_i;
14843 		      break;
14844 		    case 1:
14845 		      lda = m_i + 1;
14846 		      break;
14847 		    case 2:
14848 		    default:
14849 		      lda = 2 * m_i;
14850 		      break;
14851 		    }
14852 		    if ((order_type == blas_rowmajor && lda < n) ||
14853 			(order_type == blas_colmajor && lda < m))
14854 		      continue;
14855 
14856 		    /* For the sake of speed, we throw out this case at random */
14857 		    if (xrand(seed) >= test_prob)
14858 		      continue;
14859 
14860 		    /* in the trivial cases, no need to run testgen */
14861 		    if (m > 0 && n > 0)
14862 		      BLAS_zgemv2_d_z_testgen(norm, order_type, trans_type, m,
14863 					      n, &alpha, alpha_flag, A, lda,
14864 					      head_x_gen, tail_x_gen, &beta,
14865 					      beta_flag, y_gen, seed,
14866 					      head_r_true, tail_r_true);
14867 
14868 		    count++;
14869 
14870 		    /* varying incx */
14871 		    for (incx_val = -2; incx_val <= 2; incx_val++) {
14872 		      if (incx_val == 0)
14873 			continue;
14874 
14875 		      /* setting incx */
14876 		      incx = incx_val;
14877 		      incx *= 2;
14878 
14879 		      zcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
14880 		      zcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
14881 
14882 		      /* varying incy */
14883 		      for (incy_val = -2; incy_val <= 2; incy_val++) {
14884 			if (incy_val == 0)
14885 			  continue;
14886 
14887 			/* setting incy */
14888 			incy = incy_val;
14889 			incy *= 2;
14890 
14891 			zcopy_vector(y_gen, m_i, 1, y, incy_val);
14892 
14893 			/* call BLAS_zgemv2_d_z_x */
14894 			FPU_FIX_STOP;
14895 			BLAS_zgemv2_d_z_x(order_type, trans_type, m, n, alpha,
14896 					  A, lda, head_x, tail_x, incx_val,
14897 					  beta, y, incy_val, prec);
14898 			FPU_FIX_START;
14899 
14900 			/* set y starting index */
14901 			iy = 0;
14902 			if (incy < 0)
14903 			  iy = -(m_i - 1) * incy;
14904 
14905 			/* computing the ratio */
14906 			if (m > 0 && n > 0)
14907 			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
14908 			    /* copy row j of A to temp */
14909 			    dge_copy_row(order_type, trans_type, m_i, n_i, A,
14910 					 lda, temp, j);
14911 
14912 			    test_BLAS_zdot2_d_z(n_i, blas_no_conj, alpha,
14913 						beta, &y_gen[k], &y[iy],
14914 						&head_r_true[k],
14915 						&tail_r_true[k], temp, 1,
14916 						head_x, tail_x, incx_val,
14917 						eps_int, un_int, &ratios[j]);
14918 
14919 			    /* take the max ratio */
14920 			    if (j == 0) {
14921 			      ratio = ratios[0];
14922 			      /* The !<= below causes NaN error to be detected.
14923 			         Note that (NaN > thresh) is always false. */
14924 			    } else if (!(ratios[j] <= ratio)) {
14925 			      ratio = ratios[j];
14926 			    }
14927 			    iy += incy;
14928 			  }
14929 
14930 			/* Increase the number of bad ratio, if the ratio
14931 			   is bigger than the threshold.
14932 			   The !<= below causes NaN error to be detected.
14933 			   Note that (NaN > thresh) is always false. */
14934 			if (!(ratio <= thresh)) {
14935 			  bad_ratios++;
14936 
14937 			  if ((debug == 3) &&	/* print only when debug is on */
14938 			      (count != old_count) &&	/* print if old vector is different
14939 							   from the current one */
14940 			      (d_count == find_max_ratio) &&
14941 			      (p_count <= max_print) &&
14942 			      (ratio > 0.5 * ratio_max)) {
14943 			    old_count = count;
14944 
14945 			    printf
14946 			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
14947 			       fname, m, n, ntests, thresh);
14948 
14949 			    /* Print test info */
14950 			    switch (prec) {
14951 			    case blas_prec_single:
14952 			      printf("single ");
14953 			      break;
14954 			    case blas_prec_double:
14955 			      printf("double ");
14956 			      break;
14957 			    case blas_prec_indigenous:
14958 			      printf("indigenous ");
14959 			      break;
14960 			    case blas_prec_extra:
14961 			      printf("extra ");
14962 			      break;
14963 			    }
14964 			    switch (norm) {
14965 			    case -1:
14966 			      printf("near_underflow ");
14967 			      break;
14968 			    case 0:
14969 			      printf("near_one ");
14970 			      break;
14971 			    case 1:
14972 			      printf("near_overflow ");
14973 			      break;
14974 			    }
14975 			    switch (order_type) {
14976 			    case blas_rowmajor:
14977 			      printf("row_major ");
14978 			      break;
14979 			    case blas_colmajor:
14980 			      printf("col_major ");
14981 			      break;
14982 			    }
14983 			    switch (trans_type) {
14984 			    case blas_no_trans:
14985 			      printf("no_trans ");
14986 			      break;
14987 			    case blas_trans:
14988 			      printf("trans ");
14989 			      break;
14990 			    case blas_conj_trans:
14991 			      printf("conj_trans ");
14992 			      break;
14993 			    }
14994 
14995 			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
14996 				   incy);
14997 
14998 			    dge_print_matrix(A, m_i, n_i, lda, order_type,
14999 					     "A");
15000 
15001 			    zprint_vector(head_x, n_i, incx_val, "head_x");
15002 			    zprint_vector(tail_x, n_i, incx_val, "tail_x");
15003 			    zprint_vector(y_gen, m_i, 1, "y_gen");
15004 			    zprint_vector(y, m_i, incy_val, "y_final");
15005 
15006 			    printf("      ");
15007 			    printf("alpha = ");
15008 			    printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
15009 			    printf("\n      ");
15010 			    printf("beta = ");
15011 			    printf("(%24.16e, %24.16e)", beta[0], beta[1]);
15012 			    printf("\n");
15013 			    for (j = 0, k = 0; j < m_i * incy_gen;
15014 				 j += incy_gen, k++) {
15015 			      printf("      ");
15016 			      printf
15017 				("([%24.16e  %24.16e], [%24.16e %24.16e])",
15018 				 head_r_true[j], tail_r_true[j],
15019 				 head_r_true[j + 1], tail_r_true[j + 1]);
15020 			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
15021 			    }
15022 
15023 			    printf("      ratio=%.4e\n", ratio);
15024 			    p_count++;
15025 			  }
15026 			  if (bad_ratios >= MAX_BAD_TESTS) {
15027 			    printf("\ntoo many failures, exiting....");
15028 			    printf("\nTesting and compilation");
15029 			    printf(" are incomplete\n\n");
15030 			    goto end;
15031 			  }
15032 			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
15033 			    printf("\nFlagrant ratio error, exiting...");
15034 			    printf("\nTesting and compilation");
15035 			    printf(" are incomplete\n\n");
15036 			    goto end;
15037 			  }
15038 			}
15039 			if (d_count == 0) {
15040 			  if (ratio > ratio_max)
15041 			    ratio_max = ratio;
15042 
15043 			  if (ratio != 0.0 && ratio < ratio_min)
15044 			    ratio_min = ratio;
15045 
15046 			  tot_tests++;
15047 			}
15048 		      }		/* incy */
15049 		    }		/* incx */
15050 		  }		/* lda */
15051 		}		/* trans */
15052 	      }			/* order */
15053 	    }			/* tests */
15054 	  }			/* norm */
15055 	}			/* prec */
15056       }				/* beta */
15057     }				/* alpha */
15058   }				/* debug */
15059 
15060   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
15061     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
15062 	   fname, m, n, ntests, thresh);
15063     printf
15064       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
15065        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
15066        ratio_min, ratio_max);
15067   }
15068 
15069 end:
15070   FPU_FIX_STOP;
15071 
15072   blas_free(head_x);
15073   blas_free(tail_x);
15074   blas_free(y);
15075   blas_free(head_x_gen);
15076   blas_free(tail_x_gen);
15077   blas_free(y_gen);
15078   blas_free(temp);
15079   blas_free(A);
15080   blas_free(head_r_true);
15081   blas_free(tail_r_true);
15082   blas_free(ratios);
15083 
15084   *min_ratio = ratio_min;
15085   *num_bad_ratio = bad_ratios;
15086   *num_tests = tot_tests;
15087   return ratio_max;
15088 }
do_test_zgemv2_d_d_x(int m,int n,int ntests,int * seed,double thresh,int debug,float test_prob,double * min_ratio,int * num_bad_ratio,int * num_tests)15089 double do_test_zgemv2_d_d_x(int m, int n, int ntests, int *seed,
15090 			    double thresh, int debug, float test_prob,
15091 			    double *min_ratio, int *num_bad_ratio,
15092 			    int *num_tests)
15093 
15094 /*
15095  * Purpose
15096  * =======
15097  *
15098  * Runs a series of tests on GEMV2.
15099  *
15100  * Arguments
15101  * =========
15102  *
15103  * m         (input) int
15104  *           The number of rows
15105  *
15106  * n         (input) int
15107  *           The number of columns
15108  *
15109  * ntests    (input) int
15110  *           The number of tests to run for each set of attributes.
15111  *
15112  * seed      (input/output) int
15113  *           The seed for the random number generator used in testgen().
15114  *
15115  * thresh    (input) double
15116  *           When the ratio returned from test() exceeds the specified
15117  *           threshold, the current size, r_true, r_comp, and ratio will be
15118  *           printed.  (Since ratio is supposed to be O(1), we can set thresh
15119  *           to ~10.)
15120  *
15121  * debug     (input) int
15122  *           If debug=3, print summary
15123  *           If debug=2, print summary only if the number of bad ratios > 0
15124  *           If debug=1, print complete info if tests fail
15125  *           If debug=0, return max ratio
15126  *
15127  * test_prob (input) float
15128  *           The specified test will be performed only if the generated
15129  *           random exceeds this threshold.
15130  *
15131  * min_ratio (output) double
15132  *           The minimum ratio
15133  *
15134  * num_bad_ratio (output) int
15135  *               The number of tests fail; they are above the threshold.
15136  *
15137  * num_tests (output) int
15138  *           The number of tests is being performed.
15139  *
15140  * Return value
15141  * ============
15142  *
15143  * The maximum ratio if run successfully, otherwise return -1
15144  *
15145  * Code structure
15146  * ==============
15147  *
15148  *  debug loop  -- if debug is one, the first loop computes the max ratio
15149  *              -- and the last(second) loop outputs debugging information,
15150  *              -- if the test fail and its ratio > 0.5 * max ratio.
15151  *              -- if debug is zero, the loop is executed once
15152  *    alpha loop  -- varying alpha: 0, 1, or random
15153  *      beta loop   -- varying beta: 0, 1, or random
15154  *        prec loop   -- varying internal prec: single, double, or extra
15155  *          norm loop   -- varying norm: near undeflow, near one, or
15156  *                        -- near overflow
15157  *            numtest loop  -- how many times the test is perform with
15158  *                            -- above set of attributes
15159  *              order loop   -- varying order type: rowmajor or colmajor
15160  *                trans loop    -- varying uplo type: upper or lower
15161  *                  lda loop      -- varying lda: m, m+1, 2m
15162  *                    incx loop     -- varying incx: -2, -1, 1, 2
15163  *                      incy loop     -- varying incy: -2, -1, 1, 2
15164  */
15165 {
15166   /* function name */
15167   const char fname[] = "BLAS_zgemv2_d_d_x";
15168 
15169   /* max number of debug lines to print */
15170   const int max_print = 8;
15171 
15172   /* Variables in the "x_val" form are loop vars for corresponding
15173      variables */
15174   int i;			/* iterate through the repeating tests */
15175   int j, k;			/* multipurpose counters or variables */
15176   int iy;			/* use to index y */
15177   int incx_val, incy_val,	/* for testing different inc values */
15178     incx, incy;
15179   int incy_gen;			/* for complex case inc=2, for real case inc=1 */
15180   int d_count;			/* counter for debug */
15181   int find_max_ratio;		/* find_max_ratio = 1 only if debug = 3 */
15182   int p_count;			/* counter for the number of debug lines printed */
15183   int tot_tests;		/* total number of tests to be done */
15184   int norm;			/* input values of near underflow/one/overflow */
15185   double ratio_max;		/* the current maximum ratio */
15186   double ratio_min;		/* the current minimum ratio */
15187   double *ratios;		/* a temporary variable for calculating ratio */
15188   double ratio;			/* the per-use test ratio from test() */
15189   int bad_ratios;		/* the number of ratios over the threshold */
15190   double eps_int;		/* the internal epsilon expected--2^(-24) for float */
15191   double un_int;		/* the internal underflow threshold */
15192   double alpha[2];
15193   double beta[2];
15194   double *A;
15195   double *head_x;
15196   double *tail_x;
15197   double *y;
15198   double *temp;			/* use for calculating ratio */
15199 
15200   /* x_gen and y_gen are used to store vectors generated by testgen.
15201      they eventually are copied back to x and y */
15202   double *head_x_gen;
15203   double *tail_x_gen;
15204   double *y_gen;
15205 
15206   /* the true r calculated by testgen(), in double-double */
15207   double *head_r_true, *tail_r_true;
15208 
15209   int alpha_val;
15210   int alpha_flag;		/* input flag for BLAS_zgemv2_d_d_testgen */
15211   int beta_val;
15212   int beta_flag;		/* input flag for BLAS_zgemv2_d_d_testgen */
15213   int order_val;
15214   enum blas_order_type order_type;
15215   int prec_val;
15216   enum blas_prec_type prec;
15217   int trans_val;
15218   enum blas_trans_type trans_type;
15219   int m_i;
15220   int n_i;
15221   int max_mn;			/* the max of m and n */
15222   int lda_val;
15223   int lda;
15224   int saved_seed;		/* for saving the original seed */
15225   int count, old_count;		/* use for counting the number of testgen calls * 2 */
15226 
15227   FPU_FIX_DECL;
15228 
15229   /* test for bad arguments */
15230   if (n < 0 || m < 0 || ntests < 0)
15231     BLAS_error(fname, 0, 0, NULL);
15232 
15233   /* initialization */
15234   *num_bad_ratio = 0;
15235   *num_tests = 0;
15236   *min_ratio = 0.0;
15237 
15238   saved_seed = *seed;
15239   ratio_min = 1e308;
15240   ratio_max = 0.0;
15241   ratio = 0.0;
15242   tot_tests = 0;
15243   p_count = 0;
15244   count = 0;
15245   find_max_ratio = 0;
15246   bad_ratios = 0;
15247   old_count = 0;
15248 
15249   if (debug == 3)
15250     find_max_ratio = 1;
15251   max_mn = MAX(m, n);
15252   if (m == 0 || n == 0) {
15253     return 0.0;
15254   }
15255 
15256   FPU_FIX_START;
15257 
15258   incy_gen = 1;
15259   incy_gen *= 2;
15260 
15261   /* get space for calculation */
15262   head_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
15263   if (max_mn * 2 > 0 && head_x == NULL) {
15264     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
15265   }
15266   tail_x = (double *) blas_malloc(max_mn * 2 * sizeof(double));
15267   if (max_mn * 2 > 0 && tail_x == NULL) {
15268     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
15269   }
15270   y = (double *) blas_malloc(max_mn * 2 * sizeof(double) * 2);
15271   if (max_mn * 2 > 0 && y == NULL) {
15272     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
15273   }
15274   head_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
15275   if (max_mn > 0 && head_x_gen == NULL) {
15276     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
15277   }
15278   tail_x_gen = (double *) blas_malloc(max_mn * sizeof(double));
15279   if (max_mn > 0 && tail_x_gen == NULL) {
15280     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
15281   }
15282   y_gen = (double *) blas_malloc(max_mn * sizeof(double) * 2);
15283   if (max_mn > 0 && y_gen == NULL) {
15284     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
15285   }
15286   temp = (double *) blas_malloc(max_mn * sizeof(double));
15287   if (max_mn > 0 && temp == NULL) {
15288     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
15289   }
15290   head_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
15291   tail_r_true = (double *) blas_malloc(max_mn * sizeof(double) * 2);
15292   if (max_mn > 0 && (head_r_true == NULL || tail_r_true == NULL)) {
15293     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
15294   }
15295   ratios = (double *) blas_malloc(max_mn * sizeof(double));
15296   if (max_mn > 0 && ratios == NULL) {
15297     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
15298   }
15299   A =
15300     (double *) blas_malloc((m - 1 + n - 1 + 1) * max_mn * 2 * sizeof(double));
15301   if ((m - 1 + n - 1 + 1) * max_mn * 2 > 0 && A == NULL) {
15302     BLAS_error("blas_malloc", 0, 0, "malloc failed.\n");
15303   }
15304 
15305   /* The debug iteration:
15306      If debug=1, then will execute the iteration twice. First, compute the
15307      max ratio. Second, print info if ratio > (50% * ratio_max). */
15308   for (d_count = 0; d_count <= find_max_ratio; d_count++) {
15309     bad_ratios = 0;		/* set to zero */
15310 
15311     if ((debug == 3) && (d_count == find_max_ratio))
15312       *seed = saved_seed;	/* restore the original seed */
15313 
15314     /* varying alpha */
15315     for (alpha_val = 0; alpha_val < 3; alpha_val++) {
15316       alpha_flag = 0;
15317       switch (alpha_val) {
15318       case 0:
15319 	alpha[0] = alpha[1] = 0.0;
15320 	alpha_flag = 1;
15321 	break;
15322       case 1:
15323 	alpha[0] = 1.0;
15324 	alpha[1] = 0.0;
15325 	alpha_flag = 1;
15326 	break;
15327       }
15328 
15329       /* varying beta */
15330       for (beta_val = 0; beta_val < 3; beta_val++) {
15331 	beta_flag = 0;
15332 	switch (beta_val) {
15333 	case 0:
15334 	  beta[0] = beta[1] = 0.0;
15335 	  beta_flag = 1;
15336 	  break;
15337 	case 1:
15338 	  beta[0] = 1.0;
15339 	  beta[1] = 0.0;
15340 	  beta_flag = 1;
15341 	  break;
15342 	}
15343 
15344 
15345 	/* varying extra precs */
15346 	for (prec_val = 0; prec_val <= 2; prec_val++) {
15347 	  switch (prec_val) {
15348 	  case 0:
15349 	    eps_int = power(2, -BITS_D);
15350 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
15351 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
15352 	    prec = blas_prec_double;
15353 	    break;
15354 	  case 1:
15355 	    eps_int = power(2, -BITS_D);
15356 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_double),
15357 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_double));
15358 	    prec = blas_prec_double;
15359 	    break;
15360 	  case 2:
15361 	  default:
15362 	    eps_int = power(2, -BITS_E);
15363 	    un_int = pow((double) BLAS_fpinfo_x(blas_base, blas_prec_extra),
15364 			 (double) BLAS_fpinfo_x(blas_emin, blas_prec_extra));
15365 	    prec = blas_prec_extra;
15366 	    break;
15367 	  }
15368 
15369 	  /* values near underflow, 1, or overflow */
15370 	  for (norm = -1; norm <= 1; norm++) {
15371 
15372 	    /* number of tests */
15373 	    for (i = 0; i < ntests; i++) {
15374 
15375 	      /* row or col major */
15376 	      for (order_val = 0; order_val < 2; order_val++) {
15377 		switch (order_val) {
15378 		case 0:
15379 		  order_type = blas_rowmajor;
15380 		  break;
15381 		case 1:
15382 		default:
15383 		  order_type = blas_colmajor;
15384 		  break;
15385 		}
15386 
15387 		/* no_trans, trans, or conj_trans */
15388 		for (trans_val = 0; trans_val < 3; trans_val++) {
15389 		  switch (trans_val) {
15390 		  case 0:
15391 		    trans_type = blas_no_trans;
15392 		    m_i = m;
15393 		    n_i = n;
15394 		    break;
15395 		  case 1:
15396 		    trans_type = blas_trans;
15397 		    m_i = n;
15398 		    n_i = m;
15399 		    break;
15400 		  case 2:
15401 		  default:
15402 		    trans_type = blas_conj_trans;
15403 		    m_i = n;
15404 		    n_i = m;
15405 		    break;
15406 		  }
15407 
15408 		  /* lda=n, n+1, or 2n */
15409 		  for (lda_val = 0; lda_val < 3; lda_val++) {
15410 		    switch (lda_val) {
15411 		    case 0:
15412 		      lda = m_i;
15413 		      break;
15414 		    case 1:
15415 		      lda = m_i + 1;
15416 		      break;
15417 		    case 2:
15418 		    default:
15419 		      lda = 2 * m_i;
15420 		      break;
15421 		    }
15422 		    if ((order_type == blas_rowmajor && lda < n) ||
15423 			(order_type == blas_colmajor && lda < m))
15424 		      continue;
15425 
15426 		    /* For the sake of speed, we throw out this case at random */
15427 		    if (xrand(seed) >= test_prob)
15428 		      continue;
15429 
15430 		    /* in the trivial cases, no need to run testgen */
15431 		    if (m > 0 && n > 0)
15432 		      BLAS_zgemv2_d_d_testgen(norm, order_type, trans_type, m,
15433 					      n, &alpha, alpha_flag, A, lda,
15434 					      head_x_gen, tail_x_gen, &beta,
15435 					      beta_flag, y_gen, seed,
15436 					      head_r_true, tail_r_true);
15437 
15438 		    count++;
15439 
15440 		    /* varying incx */
15441 		    for (incx_val = -2; incx_val <= 2; incx_val++) {
15442 		      if (incx_val == 0)
15443 			continue;
15444 
15445 		      /* setting incx */
15446 		      incx = incx_val;
15447 
15448 
15449 		      dcopy_vector(head_x_gen, n_i, 1, head_x, incx_val);
15450 		      dcopy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
15451 
15452 		      /* varying incy */
15453 		      for (incy_val = -2; incy_val <= 2; incy_val++) {
15454 			if (incy_val == 0)
15455 			  continue;
15456 
15457 			/* setting incy */
15458 			incy = incy_val;
15459 			incy *= 2;
15460 
15461 			zcopy_vector(y_gen, m_i, 1, y, incy_val);
15462 
15463 			/* call BLAS_zgemv2_d_d_x */
15464 			FPU_FIX_STOP;
15465 			BLAS_zgemv2_d_d_x(order_type, trans_type, m, n, alpha,
15466 					  A, lda, head_x, tail_x, incx_val,
15467 					  beta, y, incy_val, prec);
15468 			FPU_FIX_START;
15469 
15470 			/* set y starting index */
15471 			iy = 0;
15472 			if (incy < 0)
15473 			  iy = -(m_i - 1) * incy;
15474 
15475 			/* computing the ratio */
15476 			if (m > 0 && n > 0)
15477 			  for (j = 0, k = 0; j < m_i; j++, k += incy_gen) {
15478 			    /* copy row j of A to temp */
15479 			    dge_copy_row(order_type, trans_type, m_i, n_i, A,
15480 					 lda, temp, j);
15481 
15482 			    test_BLAS_zdot2_d_d(n_i, blas_no_conj, alpha,
15483 						beta, &y_gen[k], &y[iy],
15484 						&head_r_true[k],
15485 						&tail_r_true[k], temp, 1,
15486 						head_x, tail_x, incx_val,
15487 						eps_int, un_int, &ratios[j]);
15488 
15489 			    /* take the max ratio */
15490 			    if (j == 0) {
15491 			      ratio = ratios[0];
15492 			      /* The !<= below causes NaN error to be detected.
15493 			         Note that (NaN > thresh) is always false. */
15494 			    } else if (!(ratios[j] <= ratio)) {
15495 			      ratio = ratios[j];
15496 			    }
15497 			    iy += incy;
15498 			  }
15499 
15500 			/* Increase the number of bad ratio, if the ratio
15501 			   is bigger than the threshold.
15502 			   The !<= below causes NaN error to be detected.
15503 			   Note that (NaN > thresh) is always false. */
15504 			if (!(ratio <= thresh)) {
15505 			  bad_ratios++;
15506 
15507 			  if ((debug == 3) &&	/* print only when debug is on */
15508 			      (count != old_count) &&	/* print if old vector is different
15509 							   from the current one */
15510 			      (d_count == find_max_ratio) &&
15511 			      (p_count <= max_print) &&
15512 			      (ratio > 0.5 * ratio_max)) {
15513 			    old_count = count;
15514 
15515 			    printf
15516 			      ("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
15517 			       fname, m, n, ntests, thresh);
15518 
15519 			    /* Print test info */
15520 			    switch (prec) {
15521 			    case blas_prec_single:
15522 			      printf("single ");
15523 			      break;
15524 			    case blas_prec_double:
15525 			      printf("double ");
15526 			      break;
15527 			    case blas_prec_indigenous:
15528 			      printf("indigenous ");
15529 			      break;
15530 			    case blas_prec_extra:
15531 			      printf("extra ");
15532 			      break;
15533 			    }
15534 			    switch (norm) {
15535 			    case -1:
15536 			      printf("near_underflow ");
15537 			      break;
15538 			    case 0:
15539 			      printf("near_one ");
15540 			      break;
15541 			    case 1:
15542 			      printf("near_overflow ");
15543 			      break;
15544 			    }
15545 			    switch (order_type) {
15546 			    case blas_rowmajor:
15547 			      printf("row_major ");
15548 			      break;
15549 			    case blas_colmajor:
15550 			      printf("col_major ");
15551 			      break;
15552 			    }
15553 			    switch (trans_type) {
15554 			    case blas_no_trans:
15555 			      printf("no_trans ");
15556 			      break;
15557 			    case blas_trans:
15558 			      printf("trans ");
15559 			      break;
15560 			    case blas_conj_trans:
15561 			      printf("conj_trans ");
15562 			      break;
15563 			    }
15564 
15565 			    printf("lda=%d, incx=%d, incy=%d:\n", lda, incx,
15566 				   incy);
15567 
15568 			    dge_print_matrix(A, m_i, n_i, lda, order_type,
15569 					     "A");
15570 
15571 			    dprint_vector(head_x, n_i, incx_val, "head_x");
15572 			    dprint_vector(tail_x, n_i, incx_val, "tail_x");
15573 			    zprint_vector(y_gen, m_i, 1, "y_gen");
15574 			    zprint_vector(y, m_i, incy_val, "y_final");
15575 
15576 			    printf("      ");
15577 			    printf("alpha = ");
15578 			    printf("(%24.16e, %24.16e)", alpha[0], alpha[1]);
15579 			    printf("\n      ");
15580 			    printf("beta = ");
15581 			    printf("(%24.16e, %24.16e)", beta[0], beta[1]);
15582 			    printf("\n");
15583 			    for (j = 0, k = 0; j < m_i * incy_gen;
15584 				 j += incy_gen, k++) {
15585 			      printf("      ");
15586 			      printf
15587 				("([%24.16e  %24.16e], [%24.16e %24.16e])",
15588 				 head_r_true[j], tail_r_true[j],
15589 				 head_r_true[j + 1], tail_r_true[j + 1]);
15590 			      printf(", ratio[%d]=%.4e\n", k, ratios[k]);
15591 			    }
15592 
15593 			    printf("      ratio=%.4e\n", ratio);
15594 			    p_count++;
15595 			  }
15596 			  if (bad_ratios >= MAX_BAD_TESTS) {
15597 			    printf("\ntoo many failures, exiting....");
15598 			    printf("\nTesting and compilation");
15599 			    printf(" are incomplete\n\n");
15600 			    goto end;
15601 			  }
15602 			  if (!(ratio <= TOTAL_FAILURE_THRESHOLD)) {
15603 			    printf("\nFlagrant ratio error, exiting...");
15604 			    printf("\nTesting and compilation");
15605 			    printf(" are incomplete\n\n");
15606 			    goto end;
15607 			  }
15608 			}
15609 			if (d_count == 0) {
15610 			  if (ratio > ratio_max)
15611 			    ratio_max = ratio;
15612 
15613 			  if (ratio != 0.0 && ratio < ratio_min)
15614 			    ratio_min = ratio;
15615 
15616 			  tot_tests++;
15617 			}
15618 		      }		/* incy */
15619 		    }		/* incx */
15620 		  }		/* lda */
15621 		}		/* trans */
15622 	      }			/* order */
15623 	    }			/* tests */
15624 	  }			/* norm */
15625 	}			/* prec */
15626       }				/* beta */
15627     }				/* alpha */
15628   }				/* debug */
15629 
15630   if ((debug == 2) || ((debug == 1) && bad_ratios > 0)) {
15631     printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
15632 	   fname, m, n, ntests, thresh);
15633     printf
15634       ("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
15635        bad_ratios, tot_tests, ((double) bad_ratios) / ((double) tot_tests),
15636        ratio_min, ratio_max);
15637   }
15638 
15639 end:
15640   FPU_FIX_STOP;
15641 
15642   blas_free(head_x);
15643   blas_free(tail_x);
15644   blas_free(y);
15645   blas_free(head_x_gen);
15646   blas_free(tail_x_gen);
15647   blas_free(y_gen);
15648   blas_free(temp);
15649   blas_free(A);
15650   blas_free(head_r_true);
15651   blas_free(tail_r_true);
15652   blas_free(ratios);
15653 
15654   *min_ratio = ratio_min;
15655   *num_bad_ratio = bad_ratios;
15656   *num_tests = tot_tests;
15657   return ratio_max;
15658 }
15659 
15660 #define NUMPAIRS 12
15661 
main(int argc,char ** argv)15662 int main(int argc, char **argv)
15663 {
15664   int nsizes, ntests, debug;
15665   double thresh, test_prob;
15666   double total_min_ratio, total_max_ratio;
15667   int total_bad_ratios;
15668   int seed, num_bad_ratio, num_tests;
15669   int total_tests, nr_failed_routines = 0, nr_routines = 0;
15670   double min_ratio, max_ratio;
15671   const char *base_routine = "gemv2";
15672   char *fname;
15673   int n;
15674 
15675   int m, i;
15676   int mn_pairs[NUMPAIRS][2] =
15677     { {0, 0}, {1, 0}, {0, 1}, {1, 1}, {1, 2}, {2, 1},
15678   {3, 1}, {2, 3}, {3, 3}, {2, 4}, {6, 6}, {10, 8}
15679   };
15680 
15681   if (argc != 6) {
15682     printf("Usage:\n");
15683     printf("do_test_gemv2 <nsizes> <ntests> <thresh> <debug> <test_prob>\n");
15684     printf("   <nsizes>: number of sizes to be run.\n");
15685     printf
15686       ("   <ntests>: the number of tests performed for each set of attributes\n");
15687     printf
15688       ("   <thresh>: to catch bad ratios if it is greater than <thresh>\n");
15689     printf("    <debug>: 0, 1, 2, or 3; \n");
15690     printf("        if 0, no printing \n");
15691     printf("        if 1, print error summary only if tests fail\n");
15692     printf("        if 2, print error summary for each n\n");
15693     printf("        if 3, print complete info each test fails \n");
15694     printf("<test_prob>: probability of preforming a given \n");
15695     printf("           test case: 0.0 does no tests, 1.0 does all tests\n");
15696     return -1;
15697   } else {
15698     nsizes = atoi(argv[1]);
15699     ntests = atoi(argv[2]);
15700     thresh = atof(argv[3]);
15701     debug = atoi(argv[4]);
15702     test_prob = atof(argv[5]);
15703   }
15704 
15705   seed = 1999;
15706 
15707   if (nsizes < 0 || ntests < 0 || debug < 0 || debug > 3)
15708     BLAS_error("Testing gemv2", 0, 0, NULL);
15709 
15710   printf("Testing %s...\n", base_routine);
15711   printf("INPUT: nsizes = %d, ntests = %d, thresh = %4.2f, debug = %d\n\n",
15712 	 nsizes, ntests, thresh, debug);
15713 
15714 
15715 
15716 
15717   min_ratio = 1e308;
15718   max_ratio = 0.0;
15719   total_bad_ratios = 0;
15720   total_tests = 0;
15721   fname = "BLAS_dgemv2_d_s";
15722   printf("Testing %s...\n", fname);
15723   for (i = 0; i < nsizes; i++) {
15724     m = mn_pairs[i][0];
15725     n = mn_pairs[i][1];
15726     total_max_ratio =
15727       do_test_dgemv2_d_s(m, n, 1, &seed, thresh, debug, test_prob,
15728 			 &total_min_ratio, &num_bad_ratio, &num_tests);
15729     if (total_max_ratio > max_ratio)
15730       max_ratio = total_max_ratio;
15731 
15732     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
15733       min_ratio = total_min_ratio;
15734 
15735     total_bad_ratios += num_bad_ratio;
15736     total_tests += num_tests;
15737   }
15738 
15739   if (min_ratio == 1e308)
15740     min_ratio = 0.0;
15741 
15742   nr_routines++;
15743   if (total_bad_ratios == 0)
15744     printf("PASS> ");
15745   else {
15746     nr_failed_routines++;
15747     printf("FAIL> ");
15748   }
15749 
15750   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
15751 	 fname, total_bad_ratios, total_tests, max_ratio);
15752 
15753   min_ratio = 1e308;
15754   max_ratio = 0.0;
15755   total_bad_ratios = 0;
15756   total_tests = 0;
15757   fname = "BLAS_dgemv2_s_d";
15758   printf("Testing %s...\n", fname);
15759   for (i = 0; i < nsizes; i++) {
15760     m = mn_pairs[i][0];
15761     n = mn_pairs[i][1];
15762     total_max_ratio =
15763       do_test_dgemv2_s_d(m, n, 1, &seed, thresh, debug, test_prob,
15764 			 &total_min_ratio, &num_bad_ratio, &num_tests);
15765     if (total_max_ratio > max_ratio)
15766       max_ratio = total_max_ratio;
15767 
15768     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
15769       min_ratio = total_min_ratio;
15770 
15771     total_bad_ratios += num_bad_ratio;
15772     total_tests += num_tests;
15773   }
15774 
15775   if (min_ratio == 1e308)
15776     min_ratio = 0.0;
15777 
15778   nr_routines++;
15779   if (total_bad_ratios == 0)
15780     printf("PASS> ");
15781   else {
15782     nr_failed_routines++;
15783     printf("FAIL> ");
15784   }
15785 
15786   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
15787 	 fname, total_bad_ratios, total_tests, max_ratio);
15788 
15789   min_ratio = 1e308;
15790   max_ratio = 0.0;
15791   total_bad_ratios = 0;
15792   total_tests = 0;
15793   fname = "BLAS_dgemv2_s_s";
15794   printf("Testing %s...\n", fname);
15795   for (i = 0; i < nsizes; i++) {
15796     m = mn_pairs[i][0];
15797     n = mn_pairs[i][1];
15798     total_max_ratio =
15799       do_test_dgemv2_s_s(m, n, 1, &seed, thresh, debug, test_prob,
15800 			 &total_min_ratio, &num_bad_ratio, &num_tests);
15801     if (total_max_ratio > max_ratio)
15802       max_ratio = total_max_ratio;
15803 
15804     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
15805       min_ratio = total_min_ratio;
15806 
15807     total_bad_ratios += num_bad_ratio;
15808     total_tests += num_tests;
15809   }
15810 
15811   if (min_ratio == 1e308)
15812     min_ratio = 0.0;
15813 
15814   nr_routines++;
15815   if (total_bad_ratios == 0)
15816     printf("PASS> ");
15817   else {
15818     nr_failed_routines++;
15819     printf("FAIL> ");
15820   }
15821 
15822   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
15823 	 fname, total_bad_ratios, total_tests, max_ratio);
15824 
15825   min_ratio = 1e308;
15826   max_ratio = 0.0;
15827   total_bad_ratios = 0;
15828   total_tests = 0;
15829   fname = "BLAS_zgemv2_z_c";
15830   printf("Testing %s...\n", fname);
15831   for (i = 0; i < nsizes; i++) {
15832     m = mn_pairs[i][0];
15833     n = mn_pairs[i][1];
15834     total_max_ratio =
15835       do_test_zgemv2_z_c(m, n, 1, &seed, thresh, debug, test_prob,
15836 			 &total_min_ratio, &num_bad_ratio, &num_tests);
15837     if (total_max_ratio > max_ratio)
15838       max_ratio = total_max_ratio;
15839 
15840     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
15841       min_ratio = total_min_ratio;
15842 
15843     total_bad_ratios += num_bad_ratio;
15844     total_tests += num_tests;
15845   }
15846 
15847   if (min_ratio == 1e308)
15848     min_ratio = 0.0;
15849 
15850   nr_routines++;
15851   if (total_bad_ratios == 0)
15852     printf("PASS> ");
15853   else {
15854     nr_failed_routines++;
15855     printf("FAIL> ");
15856   }
15857 
15858   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
15859 	 fname, total_bad_ratios, total_tests, max_ratio);
15860 
15861   min_ratio = 1e308;
15862   max_ratio = 0.0;
15863   total_bad_ratios = 0;
15864   total_tests = 0;
15865   fname = "BLAS_zgemv2_c_z";
15866   printf("Testing %s...\n", fname);
15867   for (i = 0; i < nsizes; i++) {
15868     m = mn_pairs[i][0];
15869     n = mn_pairs[i][1];
15870     total_max_ratio =
15871       do_test_zgemv2_c_z(m, n, 1, &seed, thresh, debug, test_prob,
15872 			 &total_min_ratio, &num_bad_ratio, &num_tests);
15873     if (total_max_ratio > max_ratio)
15874       max_ratio = total_max_ratio;
15875 
15876     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
15877       min_ratio = total_min_ratio;
15878 
15879     total_bad_ratios += num_bad_ratio;
15880     total_tests += num_tests;
15881   }
15882 
15883   if (min_ratio == 1e308)
15884     min_ratio = 0.0;
15885 
15886   nr_routines++;
15887   if (total_bad_ratios == 0)
15888     printf("PASS> ");
15889   else {
15890     nr_failed_routines++;
15891     printf("FAIL> ");
15892   }
15893 
15894   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
15895 	 fname, total_bad_ratios, total_tests, max_ratio);
15896 
15897   min_ratio = 1e308;
15898   max_ratio = 0.0;
15899   total_bad_ratios = 0;
15900   total_tests = 0;
15901   fname = "BLAS_zgemv2_c_c";
15902   printf("Testing %s...\n", fname);
15903   for (i = 0; i < nsizes; i++) {
15904     m = mn_pairs[i][0];
15905     n = mn_pairs[i][1];
15906     total_max_ratio =
15907       do_test_zgemv2_c_c(m, n, 1, &seed, thresh, debug, test_prob,
15908 			 &total_min_ratio, &num_bad_ratio, &num_tests);
15909     if (total_max_ratio > max_ratio)
15910       max_ratio = total_max_ratio;
15911 
15912     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
15913       min_ratio = total_min_ratio;
15914 
15915     total_bad_ratios += num_bad_ratio;
15916     total_tests += num_tests;
15917   }
15918 
15919   if (min_ratio == 1e308)
15920     min_ratio = 0.0;
15921 
15922   nr_routines++;
15923   if (total_bad_ratios == 0)
15924     printf("PASS> ");
15925   else {
15926     nr_failed_routines++;
15927     printf("FAIL> ");
15928   }
15929 
15930   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
15931 	 fname, total_bad_ratios, total_tests, max_ratio);
15932 
15933   min_ratio = 1e308;
15934   max_ratio = 0.0;
15935   total_bad_ratios = 0;
15936   total_tests = 0;
15937   fname = "BLAS_cgemv2_c_s";
15938   printf("Testing %s...\n", fname);
15939   for (i = 0; i < nsizes; i++) {
15940     m = mn_pairs[i][0];
15941     n = mn_pairs[i][1];
15942     total_max_ratio =
15943       do_test_cgemv2_c_s(m, n, 1, &seed, thresh, debug, test_prob,
15944 			 &total_min_ratio, &num_bad_ratio, &num_tests);
15945     if (total_max_ratio > max_ratio)
15946       max_ratio = total_max_ratio;
15947 
15948     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
15949       min_ratio = total_min_ratio;
15950 
15951     total_bad_ratios += num_bad_ratio;
15952     total_tests += num_tests;
15953   }
15954 
15955   if (min_ratio == 1e308)
15956     min_ratio = 0.0;
15957 
15958   nr_routines++;
15959   if (total_bad_ratios == 0)
15960     printf("PASS> ");
15961   else {
15962     nr_failed_routines++;
15963     printf("FAIL> ");
15964   }
15965 
15966   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
15967 	 fname, total_bad_ratios, total_tests, max_ratio);
15968 
15969   min_ratio = 1e308;
15970   max_ratio = 0.0;
15971   total_bad_ratios = 0;
15972   total_tests = 0;
15973   fname = "BLAS_cgemv2_s_c";
15974   printf("Testing %s...\n", fname);
15975   for (i = 0; i < nsizes; i++) {
15976     m = mn_pairs[i][0];
15977     n = mn_pairs[i][1];
15978     total_max_ratio =
15979       do_test_cgemv2_s_c(m, n, 1, &seed, thresh, debug, test_prob,
15980 			 &total_min_ratio, &num_bad_ratio, &num_tests);
15981     if (total_max_ratio > max_ratio)
15982       max_ratio = total_max_ratio;
15983 
15984     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
15985       min_ratio = total_min_ratio;
15986 
15987     total_bad_ratios += num_bad_ratio;
15988     total_tests += num_tests;
15989   }
15990 
15991   if (min_ratio == 1e308)
15992     min_ratio = 0.0;
15993 
15994   nr_routines++;
15995   if (total_bad_ratios == 0)
15996     printf("PASS> ");
15997   else {
15998     nr_failed_routines++;
15999     printf("FAIL> ");
16000   }
16001 
16002   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16003 	 fname, total_bad_ratios, total_tests, max_ratio);
16004 
16005   min_ratio = 1e308;
16006   max_ratio = 0.0;
16007   total_bad_ratios = 0;
16008   total_tests = 0;
16009   fname = "BLAS_cgemv2_s_s";
16010   printf("Testing %s...\n", fname);
16011   for (i = 0; i < nsizes; i++) {
16012     m = mn_pairs[i][0];
16013     n = mn_pairs[i][1];
16014     total_max_ratio =
16015       do_test_cgemv2_s_s(m, n, 1, &seed, thresh, debug, test_prob,
16016 			 &total_min_ratio, &num_bad_ratio, &num_tests);
16017     if (total_max_ratio > max_ratio)
16018       max_ratio = total_max_ratio;
16019 
16020     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16021       min_ratio = total_min_ratio;
16022 
16023     total_bad_ratios += num_bad_ratio;
16024     total_tests += num_tests;
16025   }
16026 
16027   if (min_ratio == 1e308)
16028     min_ratio = 0.0;
16029 
16030   nr_routines++;
16031   if (total_bad_ratios == 0)
16032     printf("PASS> ");
16033   else {
16034     nr_failed_routines++;
16035     printf("FAIL> ");
16036   }
16037 
16038   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16039 	 fname, total_bad_ratios, total_tests, max_ratio);
16040 
16041   min_ratio = 1e308;
16042   max_ratio = 0.0;
16043   total_bad_ratios = 0;
16044   total_tests = 0;
16045   fname = "BLAS_zgemv2_z_d";
16046   printf("Testing %s...\n", fname);
16047   for (i = 0; i < nsizes; i++) {
16048     m = mn_pairs[i][0];
16049     n = mn_pairs[i][1];
16050     total_max_ratio =
16051       do_test_zgemv2_z_d(m, n, 1, &seed, thresh, debug, test_prob,
16052 			 &total_min_ratio, &num_bad_ratio, &num_tests);
16053     if (total_max_ratio > max_ratio)
16054       max_ratio = total_max_ratio;
16055 
16056     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16057       min_ratio = total_min_ratio;
16058 
16059     total_bad_ratios += num_bad_ratio;
16060     total_tests += num_tests;
16061   }
16062 
16063   if (min_ratio == 1e308)
16064     min_ratio = 0.0;
16065 
16066   nr_routines++;
16067   if (total_bad_ratios == 0)
16068     printf("PASS> ");
16069   else {
16070     nr_failed_routines++;
16071     printf("FAIL> ");
16072   }
16073 
16074   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16075 	 fname, total_bad_ratios, total_tests, max_ratio);
16076 
16077   min_ratio = 1e308;
16078   max_ratio = 0.0;
16079   total_bad_ratios = 0;
16080   total_tests = 0;
16081   fname = "BLAS_zgemv2_d_z";
16082   printf("Testing %s...\n", fname);
16083   for (i = 0; i < nsizes; i++) {
16084     m = mn_pairs[i][0];
16085     n = mn_pairs[i][1];
16086     total_max_ratio =
16087       do_test_zgemv2_d_z(m, n, 1, &seed, thresh, debug, test_prob,
16088 			 &total_min_ratio, &num_bad_ratio, &num_tests);
16089     if (total_max_ratio > max_ratio)
16090       max_ratio = total_max_ratio;
16091 
16092     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16093       min_ratio = total_min_ratio;
16094 
16095     total_bad_ratios += num_bad_ratio;
16096     total_tests += num_tests;
16097   }
16098 
16099   if (min_ratio == 1e308)
16100     min_ratio = 0.0;
16101 
16102   nr_routines++;
16103   if (total_bad_ratios == 0)
16104     printf("PASS> ");
16105   else {
16106     nr_failed_routines++;
16107     printf("FAIL> ");
16108   }
16109 
16110   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16111 	 fname, total_bad_ratios, total_tests, max_ratio);
16112 
16113   min_ratio = 1e308;
16114   max_ratio = 0.0;
16115   total_bad_ratios = 0;
16116   total_tests = 0;
16117   fname = "BLAS_zgemv2_d_d";
16118   printf("Testing %s...\n", fname);
16119   for (i = 0; i < nsizes; i++) {
16120     m = mn_pairs[i][0];
16121     n = mn_pairs[i][1];
16122     total_max_ratio =
16123       do_test_zgemv2_d_d(m, n, 1, &seed, thresh, debug, test_prob,
16124 			 &total_min_ratio, &num_bad_ratio, &num_tests);
16125     if (total_max_ratio > max_ratio)
16126       max_ratio = total_max_ratio;
16127 
16128     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16129       min_ratio = total_min_ratio;
16130 
16131     total_bad_ratios += num_bad_ratio;
16132     total_tests += num_tests;
16133   }
16134 
16135   if (min_ratio == 1e308)
16136     min_ratio = 0.0;
16137 
16138   nr_routines++;
16139   if (total_bad_ratios == 0)
16140     printf("PASS> ");
16141   else {
16142     nr_failed_routines++;
16143     printf("FAIL> ");
16144   }
16145 
16146   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16147 	 fname, total_bad_ratios, total_tests, max_ratio);
16148 
16149   min_ratio = 1e308;
16150   max_ratio = 0.0;
16151   total_bad_ratios = 0;
16152   total_tests = 0;
16153   fname = "BLAS_sgemv2_x";
16154   printf("Testing %s...\n", fname);
16155   for (i = 0; i < nsizes; i++) {
16156     m = mn_pairs[i][0];
16157     n = mn_pairs[i][1];
16158     total_max_ratio =
16159       do_test_sgemv2_x(m, n, 1, &seed, thresh, debug, test_prob,
16160 		       &total_min_ratio, &num_bad_ratio, &num_tests);
16161     if (total_max_ratio > max_ratio)
16162       max_ratio = total_max_ratio;
16163 
16164     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16165       min_ratio = total_min_ratio;
16166 
16167     total_bad_ratios += num_bad_ratio;
16168     total_tests += num_tests;
16169   }
16170 
16171   if (min_ratio == 1e308)
16172     min_ratio = 0.0;
16173 
16174   nr_routines++;
16175   if (total_bad_ratios == 0)
16176     printf("PASS> ");
16177   else {
16178     nr_failed_routines++;
16179     printf("FAIL> ");
16180   }
16181 
16182   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16183 	 fname, total_bad_ratios, total_tests, max_ratio);
16184 
16185   min_ratio = 1e308;
16186   max_ratio = 0.0;
16187   total_bad_ratios = 0;
16188   total_tests = 0;
16189   fname = "BLAS_dgemv2_x";
16190   printf("Testing %s...\n", fname);
16191   for (i = 0; i < nsizes; i++) {
16192     m = mn_pairs[i][0];
16193     n = mn_pairs[i][1];
16194     total_max_ratio =
16195       do_test_dgemv2_x(m, n, 1, &seed, thresh, debug, test_prob,
16196 		       &total_min_ratio, &num_bad_ratio, &num_tests);
16197     if (total_max_ratio > max_ratio)
16198       max_ratio = total_max_ratio;
16199 
16200     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16201       min_ratio = total_min_ratio;
16202 
16203     total_bad_ratios += num_bad_ratio;
16204     total_tests += num_tests;
16205   }
16206 
16207   if (min_ratio == 1e308)
16208     min_ratio = 0.0;
16209 
16210   nr_routines++;
16211   if (total_bad_ratios == 0)
16212     printf("PASS> ");
16213   else {
16214     nr_failed_routines++;
16215     printf("FAIL> ");
16216   }
16217 
16218   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16219 	 fname, total_bad_ratios, total_tests, max_ratio);
16220 
16221   min_ratio = 1e308;
16222   max_ratio = 0.0;
16223   total_bad_ratios = 0;
16224   total_tests = 0;
16225   fname = "BLAS_cgemv2_x";
16226   printf("Testing %s...\n", fname);
16227   for (i = 0; i < nsizes; i++) {
16228     m = mn_pairs[i][0];
16229     n = mn_pairs[i][1];
16230     total_max_ratio =
16231       do_test_cgemv2_x(m, n, 1, &seed, thresh, debug, test_prob,
16232 		       &total_min_ratio, &num_bad_ratio, &num_tests);
16233     if (total_max_ratio > max_ratio)
16234       max_ratio = total_max_ratio;
16235 
16236     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16237       min_ratio = total_min_ratio;
16238 
16239     total_bad_ratios += num_bad_ratio;
16240     total_tests += num_tests;
16241   }
16242 
16243   if (min_ratio == 1e308)
16244     min_ratio = 0.0;
16245 
16246   nr_routines++;
16247   if (total_bad_ratios == 0)
16248     printf("PASS> ");
16249   else {
16250     nr_failed_routines++;
16251     printf("FAIL> ");
16252   }
16253 
16254   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16255 	 fname, total_bad_ratios, total_tests, max_ratio);
16256 
16257   min_ratio = 1e308;
16258   max_ratio = 0.0;
16259   total_bad_ratios = 0;
16260   total_tests = 0;
16261   fname = "BLAS_zgemv2_x";
16262   printf("Testing %s...\n", fname);
16263   for (i = 0; i < nsizes; i++) {
16264     m = mn_pairs[i][0];
16265     n = mn_pairs[i][1];
16266     total_max_ratio =
16267       do_test_zgemv2_x(m, n, 1, &seed, thresh, debug, test_prob,
16268 		       &total_min_ratio, &num_bad_ratio, &num_tests);
16269     if (total_max_ratio > max_ratio)
16270       max_ratio = total_max_ratio;
16271 
16272     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16273       min_ratio = total_min_ratio;
16274 
16275     total_bad_ratios += num_bad_ratio;
16276     total_tests += num_tests;
16277   }
16278 
16279   if (min_ratio == 1e308)
16280     min_ratio = 0.0;
16281 
16282   nr_routines++;
16283   if (total_bad_ratios == 0)
16284     printf("PASS> ");
16285   else {
16286     nr_failed_routines++;
16287     printf("FAIL> ");
16288   }
16289 
16290   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16291 	 fname, total_bad_ratios, total_tests, max_ratio);
16292 
16293   min_ratio = 1e308;
16294   max_ratio = 0.0;
16295   total_bad_ratios = 0;
16296   total_tests = 0;
16297   fname = "BLAS_dgemv2_d_s_x";
16298   printf("Testing %s...\n", fname);
16299   for (i = 0; i < nsizes; i++) {
16300     m = mn_pairs[i][0];
16301     n = mn_pairs[i][1];
16302     total_max_ratio =
16303       do_test_dgemv2_d_s_x(m, n, 1, &seed, thresh, debug, test_prob,
16304 			   &total_min_ratio, &num_bad_ratio, &num_tests);
16305     if (total_max_ratio > max_ratio)
16306       max_ratio = total_max_ratio;
16307 
16308     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16309       min_ratio = total_min_ratio;
16310 
16311     total_bad_ratios += num_bad_ratio;
16312     total_tests += num_tests;
16313   }
16314 
16315   if (min_ratio == 1e308)
16316     min_ratio = 0.0;
16317 
16318   nr_routines++;
16319   if (total_bad_ratios == 0)
16320     printf("PASS> ");
16321   else {
16322     nr_failed_routines++;
16323     printf("FAIL> ");
16324   }
16325 
16326   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16327 	 fname, total_bad_ratios, total_tests, max_ratio);
16328 
16329   min_ratio = 1e308;
16330   max_ratio = 0.0;
16331   total_bad_ratios = 0;
16332   total_tests = 0;
16333   fname = "BLAS_dgemv2_s_d_x";
16334   printf("Testing %s...\n", fname);
16335   for (i = 0; i < nsizes; i++) {
16336     m = mn_pairs[i][0];
16337     n = mn_pairs[i][1];
16338     total_max_ratio =
16339       do_test_dgemv2_s_d_x(m, n, 1, &seed, thresh, debug, test_prob,
16340 			   &total_min_ratio, &num_bad_ratio, &num_tests);
16341     if (total_max_ratio > max_ratio)
16342       max_ratio = total_max_ratio;
16343 
16344     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16345       min_ratio = total_min_ratio;
16346 
16347     total_bad_ratios += num_bad_ratio;
16348     total_tests += num_tests;
16349   }
16350 
16351   if (min_ratio == 1e308)
16352     min_ratio = 0.0;
16353 
16354   nr_routines++;
16355   if (total_bad_ratios == 0)
16356     printf("PASS> ");
16357   else {
16358     nr_failed_routines++;
16359     printf("FAIL> ");
16360   }
16361 
16362   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16363 	 fname, total_bad_ratios, total_tests, max_ratio);
16364 
16365   min_ratio = 1e308;
16366   max_ratio = 0.0;
16367   total_bad_ratios = 0;
16368   total_tests = 0;
16369   fname = "BLAS_dgemv2_s_s_x";
16370   printf("Testing %s...\n", fname);
16371   for (i = 0; i < nsizes; i++) {
16372     m = mn_pairs[i][0];
16373     n = mn_pairs[i][1];
16374     total_max_ratio =
16375       do_test_dgemv2_s_s_x(m, n, 1, &seed, thresh, debug, test_prob,
16376 			   &total_min_ratio, &num_bad_ratio, &num_tests);
16377     if (total_max_ratio > max_ratio)
16378       max_ratio = total_max_ratio;
16379 
16380     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16381       min_ratio = total_min_ratio;
16382 
16383     total_bad_ratios += num_bad_ratio;
16384     total_tests += num_tests;
16385   }
16386 
16387   if (min_ratio == 1e308)
16388     min_ratio = 0.0;
16389 
16390   nr_routines++;
16391   if (total_bad_ratios == 0)
16392     printf("PASS> ");
16393   else {
16394     nr_failed_routines++;
16395     printf("FAIL> ");
16396   }
16397 
16398   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16399 	 fname, total_bad_ratios, total_tests, max_ratio);
16400 
16401   min_ratio = 1e308;
16402   max_ratio = 0.0;
16403   total_bad_ratios = 0;
16404   total_tests = 0;
16405   fname = "BLAS_zgemv2_z_c_x";
16406   printf("Testing %s...\n", fname);
16407   for (i = 0; i < nsizes; i++) {
16408     m = mn_pairs[i][0];
16409     n = mn_pairs[i][1];
16410     total_max_ratio =
16411       do_test_zgemv2_z_c_x(m, n, 1, &seed, thresh, debug, test_prob,
16412 			   &total_min_ratio, &num_bad_ratio, &num_tests);
16413     if (total_max_ratio > max_ratio)
16414       max_ratio = total_max_ratio;
16415 
16416     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16417       min_ratio = total_min_ratio;
16418 
16419     total_bad_ratios += num_bad_ratio;
16420     total_tests += num_tests;
16421   }
16422 
16423   if (min_ratio == 1e308)
16424     min_ratio = 0.0;
16425 
16426   nr_routines++;
16427   if (total_bad_ratios == 0)
16428     printf("PASS> ");
16429   else {
16430     nr_failed_routines++;
16431     printf("FAIL> ");
16432   }
16433 
16434   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16435 	 fname, total_bad_ratios, total_tests, max_ratio);
16436 
16437   min_ratio = 1e308;
16438   max_ratio = 0.0;
16439   total_bad_ratios = 0;
16440   total_tests = 0;
16441   fname = "BLAS_zgemv2_c_z_x";
16442   printf("Testing %s...\n", fname);
16443   for (i = 0; i < nsizes; i++) {
16444     m = mn_pairs[i][0];
16445     n = mn_pairs[i][1];
16446     total_max_ratio =
16447       do_test_zgemv2_c_z_x(m, n, 1, &seed, thresh, debug, test_prob,
16448 			   &total_min_ratio, &num_bad_ratio, &num_tests);
16449     if (total_max_ratio > max_ratio)
16450       max_ratio = total_max_ratio;
16451 
16452     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16453       min_ratio = total_min_ratio;
16454 
16455     total_bad_ratios += num_bad_ratio;
16456     total_tests += num_tests;
16457   }
16458 
16459   if (min_ratio == 1e308)
16460     min_ratio = 0.0;
16461 
16462   nr_routines++;
16463   if (total_bad_ratios == 0)
16464     printf("PASS> ");
16465   else {
16466     nr_failed_routines++;
16467     printf("FAIL> ");
16468   }
16469 
16470   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16471 	 fname, total_bad_ratios, total_tests, max_ratio);
16472 
16473   min_ratio = 1e308;
16474   max_ratio = 0.0;
16475   total_bad_ratios = 0;
16476   total_tests = 0;
16477   fname = "BLAS_zgemv2_c_c_x";
16478   printf("Testing %s...\n", fname);
16479   for (i = 0; i < nsizes; i++) {
16480     m = mn_pairs[i][0];
16481     n = mn_pairs[i][1];
16482     total_max_ratio =
16483       do_test_zgemv2_c_c_x(m, n, 1, &seed, thresh, debug, test_prob,
16484 			   &total_min_ratio, &num_bad_ratio, &num_tests);
16485     if (total_max_ratio > max_ratio)
16486       max_ratio = total_max_ratio;
16487 
16488     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16489       min_ratio = total_min_ratio;
16490 
16491     total_bad_ratios += num_bad_ratio;
16492     total_tests += num_tests;
16493   }
16494 
16495   if (min_ratio == 1e308)
16496     min_ratio = 0.0;
16497 
16498   nr_routines++;
16499   if (total_bad_ratios == 0)
16500     printf("PASS> ");
16501   else {
16502     nr_failed_routines++;
16503     printf("FAIL> ");
16504   }
16505 
16506   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16507 	 fname, total_bad_ratios, total_tests, max_ratio);
16508 
16509   min_ratio = 1e308;
16510   max_ratio = 0.0;
16511   total_bad_ratios = 0;
16512   total_tests = 0;
16513   fname = "BLAS_cgemv2_c_s_x";
16514   printf("Testing %s...\n", fname);
16515   for (i = 0; i < nsizes; i++) {
16516     m = mn_pairs[i][0];
16517     n = mn_pairs[i][1];
16518     total_max_ratio =
16519       do_test_cgemv2_c_s_x(m, n, 1, &seed, thresh, debug, test_prob,
16520 			   &total_min_ratio, &num_bad_ratio, &num_tests);
16521     if (total_max_ratio > max_ratio)
16522       max_ratio = total_max_ratio;
16523 
16524     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16525       min_ratio = total_min_ratio;
16526 
16527     total_bad_ratios += num_bad_ratio;
16528     total_tests += num_tests;
16529   }
16530 
16531   if (min_ratio == 1e308)
16532     min_ratio = 0.0;
16533 
16534   nr_routines++;
16535   if (total_bad_ratios == 0)
16536     printf("PASS> ");
16537   else {
16538     nr_failed_routines++;
16539     printf("FAIL> ");
16540   }
16541 
16542   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16543 	 fname, total_bad_ratios, total_tests, max_ratio);
16544 
16545   min_ratio = 1e308;
16546   max_ratio = 0.0;
16547   total_bad_ratios = 0;
16548   total_tests = 0;
16549   fname = "BLAS_cgemv2_s_c_x";
16550   printf("Testing %s...\n", fname);
16551   for (i = 0; i < nsizes; i++) {
16552     m = mn_pairs[i][0];
16553     n = mn_pairs[i][1];
16554     total_max_ratio =
16555       do_test_cgemv2_s_c_x(m, n, 1, &seed, thresh, debug, test_prob,
16556 			   &total_min_ratio, &num_bad_ratio, &num_tests);
16557     if (total_max_ratio > max_ratio)
16558       max_ratio = total_max_ratio;
16559 
16560     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16561       min_ratio = total_min_ratio;
16562 
16563     total_bad_ratios += num_bad_ratio;
16564     total_tests += num_tests;
16565   }
16566 
16567   if (min_ratio == 1e308)
16568     min_ratio = 0.0;
16569 
16570   nr_routines++;
16571   if (total_bad_ratios == 0)
16572     printf("PASS> ");
16573   else {
16574     nr_failed_routines++;
16575     printf("FAIL> ");
16576   }
16577 
16578   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16579 	 fname, total_bad_ratios, total_tests, max_ratio);
16580 
16581   min_ratio = 1e308;
16582   max_ratio = 0.0;
16583   total_bad_ratios = 0;
16584   total_tests = 0;
16585   fname = "BLAS_cgemv2_s_s_x";
16586   printf("Testing %s...\n", fname);
16587   for (i = 0; i < nsizes; i++) {
16588     m = mn_pairs[i][0];
16589     n = mn_pairs[i][1];
16590     total_max_ratio =
16591       do_test_cgemv2_s_s_x(m, n, 1, &seed, thresh, debug, test_prob,
16592 			   &total_min_ratio, &num_bad_ratio, &num_tests);
16593     if (total_max_ratio > max_ratio)
16594       max_ratio = total_max_ratio;
16595 
16596     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16597       min_ratio = total_min_ratio;
16598 
16599     total_bad_ratios += num_bad_ratio;
16600     total_tests += num_tests;
16601   }
16602 
16603   if (min_ratio == 1e308)
16604     min_ratio = 0.0;
16605 
16606   nr_routines++;
16607   if (total_bad_ratios == 0)
16608     printf("PASS> ");
16609   else {
16610     nr_failed_routines++;
16611     printf("FAIL> ");
16612   }
16613 
16614   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16615 	 fname, total_bad_ratios, total_tests, max_ratio);
16616 
16617   min_ratio = 1e308;
16618   max_ratio = 0.0;
16619   total_bad_ratios = 0;
16620   total_tests = 0;
16621   fname = "BLAS_zgemv2_z_d_x";
16622   printf("Testing %s...\n", fname);
16623   for (i = 0; i < nsizes; i++) {
16624     m = mn_pairs[i][0];
16625     n = mn_pairs[i][1];
16626     total_max_ratio =
16627       do_test_zgemv2_z_d_x(m, n, 1, &seed, thresh, debug, test_prob,
16628 			   &total_min_ratio, &num_bad_ratio, &num_tests);
16629     if (total_max_ratio > max_ratio)
16630       max_ratio = total_max_ratio;
16631 
16632     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16633       min_ratio = total_min_ratio;
16634 
16635     total_bad_ratios += num_bad_ratio;
16636     total_tests += num_tests;
16637   }
16638 
16639   if (min_ratio == 1e308)
16640     min_ratio = 0.0;
16641 
16642   nr_routines++;
16643   if (total_bad_ratios == 0)
16644     printf("PASS> ");
16645   else {
16646     nr_failed_routines++;
16647     printf("FAIL> ");
16648   }
16649 
16650   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16651 	 fname, total_bad_ratios, total_tests, max_ratio);
16652 
16653   min_ratio = 1e308;
16654   max_ratio = 0.0;
16655   total_bad_ratios = 0;
16656   total_tests = 0;
16657   fname = "BLAS_zgemv2_d_z_x";
16658   printf("Testing %s...\n", fname);
16659   for (i = 0; i < nsizes; i++) {
16660     m = mn_pairs[i][0];
16661     n = mn_pairs[i][1];
16662     total_max_ratio =
16663       do_test_zgemv2_d_z_x(m, n, 1, &seed, thresh, debug, test_prob,
16664 			   &total_min_ratio, &num_bad_ratio, &num_tests);
16665     if (total_max_ratio > max_ratio)
16666       max_ratio = total_max_ratio;
16667 
16668     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16669       min_ratio = total_min_ratio;
16670 
16671     total_bad_ratios += num_bad_ratio;
16672     total_tests += num_tests;
16673   }
16674 
16675   if (min_ratio == 1e308)
16676     min_ratio = 0.0;
16677 
16678   nr_routines++;
16679   if (total_bad_ratios == 0)
16680     printf("PASS> ");
16681   else {
16682     nr_failed_routines++;
16683     printf("FAIL> ");
16684   }
16685 
16686   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16687 	 fname, total_bad_ratios, total_tests, max_ratio);
16688 
16689   min_ratio = 1e308;
16690   max_ratio = 0.0;
16691   total_bad_ratios = 0;
16692   total_tests = 0;
16693   fname = "BLAS_zgemv2_d_d_x";
16694   printf("Testing %s...\n", fname);
16695   for (i = 0; i < nsizes; i++) {
16696     m = mn_pairs[i][0];
16697     n = mn_pairs[i][1];
16698     total_max_ratio =
16699       do_test_zgemv2_d_d_x(m, n, 1, &seed, thresh, debug, test_prob,
16700 			   &total_min_ratio, &num_bad_ratio, &num_tests);
16701     if (total_max_ratio > max_ratio)
16702       max_ratio = total_max_ratio;
16703 
16704     if (total_min_ratio != 0 && total_min_ratio < min_ratio)
16705       min_ratio = total_min_ratio;
16706 
16707     total_bad_ratios += num_bad_ratio;
16708     total_tests += num_tests;
16709   }
16710 
16711   if (min_ratio == 1e308)
16712     min_ratio = 0.0;
16713 
16714   nr_routines++;
16715   if (total_bad_ratios == 0)
16716     printf("PASS> ");
16717   else {
16718     nr_failed_routines++;
16719     printf("FAIL> ");
16720   }
16721 
16722   printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
16723 	 fname, total_bad_ratios, total_tests, max_ratio);
16724 
16725 
16726 
16727   printf("\n");
16728   if (nr_failed_routines)
16729     printf("FAILED ");
16730   else
16731     printf("PASSED ");
16732   printf("%-10s: FAIL/TOTAL = %d/%d\n",
16733 	 base_routine, nr_failed_routines, nr_routines);
16734 
16735   return 0;
16736 }
16737