1dnl Generates test code for gemv2
2#include <stdlib.h>
3#include <stdio.h>
4#include <math.h>
5#include "blas_extended.h"
6#include "blas_extended_private.h"
7#include "blas_extended_test.h"
8include(cblas.m4)dnl
9include(test-common.m4)dnl
10include(gemv2/gemv2-common.m4)dnl
11dnl
12dnl
13dnl -----------------------------------------------------------------------
14dnl Usage: DO_TEST_GEMV2_COMMENT(extended)
15dnl        if extended, then print info about prec loop in the code
16dnl        structure
17dnl -----------------------------------------------------------------------
18define(`DO_TEST_GEMV2_COMMENT',`
19/*
20 * Purpose
21 * =======
22 *
23 * Runs a series of tests on GEMV2.
24 *
25 * Arguments
26 * =========
27 *
28 * m         (input) int
29 *           The number of rows
30 *
31 * n         (input) int
32 *           The number of columns
33 *
34 * ntests    (input) int
35 *           The number of tests to run for each set of attributes.
36 *
37 * seed      (input/output) int
38 *           The seed for the random number generator used in testgen().
39 *
40 * thresh    (input) double
41 *           When the ratio returned from test() exceeds the specified
42 *           threshold, the current size, r_true, r_comp, and ratio will be
43 *           printed.  (Since ratio is supposed to be O(1), we can set thresh
44 *           to ~10.)
45 *
46 * debug     (input) int
47 *           If debug=3, print summary
48 *           If debug=2, print summary only if the number of bad ratios > 0
49 *           If debug=1, print complete info if tests fail
50 *           If debug=0, return max ratio
51 *
52 * test_prob (input) float
53 *           The specified test will be performed only if the generated
54 *           random exceeds this threshold.
55 *
56 * min_ratio (output) double
57 *           The minimum ratio
58 *
59 * num_bad_ratio (output) int
60 *               The number of tests fail; they are above the threshold.
61 *
62 * num_tests (output) int
63 *           The number of tests is being performed.
64 *
65 * Return value
66 * ============
67 *
68 * The maximum ratio if run successfully, otherwise return -1
69 *
70 * Code structure
71 * ==============
72 *
73 *  debug loop  -- if debug is one, the first loop computes the max ratio
74 *              -- and the last(second) loop outputs debugging information,
75 *              -- if the test fail and its ratio > 0.5 * max ratio.
76 *              -- if debug is zero, the loop is executed once
77 *    alpha loop  -- varying alpha: 0, 1, or random
78 *      beta loop   -- varying beta: 0, 1, or random
79ifelse(`$1', `_x',` *        prec loop   -- varying internal prec: single, double, or extra', `')
80 *          norm loop   -- varying norm: near undeflow, near one, or
81 *                        -- near overflow
82 *            numtest loop  -- how many times the test is perform with
83 *                            -- above set of attributes
84 *              order loop   -- varying order type: rowmajor or colmajor
85 *                trans loop    -- varying uplo type: upper or lower
86 *                  lda loop      -- varying lda: m, m+1, 2m
87 *                    incx loop     -- varying incx: -2, -1, 1, 2
88 *                      incy loop     -- varying incy: -2, -1, 1, 2
89 */')dnl
90dnl
91dnl
92dnl
93dnl -----------------------------------------------------------------------
94dnl Usage: DO_TEST_GEMV2(abr_typeltr, x_typeltr, y_typeltr, extended)
95dnl
96dnl        aby_typeltr : the type and precision of alpha, beta and y
97dnl        A_typeltr   : the type and precision of A
98dnl        x_typeltr   : the type and precision of x
99dnl        extended    : `' if no extended, or `_x' if extended
100dnl Each type and precision specifier can be one of
101dnl        s    ... real and single
102dnl        d    ... real and double
103dnl        c    ... complex and single
104dnl        z    ... complex and double
105dnl ----------------------------------------------------------------------
106define(`DO_TEST_GEMV2',
107  `double DO_TEST_GEMV2_NAME($1, $2, $3, $4)(int m, int n, int ntests, dnl
108       int *seed, double thresh, int debug, float test_prob, dnl
109       double *min_ratio, int *num_bad_ratio, int *num_tests)
110DO_TEST_GEMV2_COMMENT($4)
111DO_TEST_GEMV2_BODY($1, $2, $3, $4)')dnl
112dnl
113dnl
114dnl --------------------------------------------------------------------
115dnl Usage: DO_TEST_GEMV2_BODY(aby_typeltr, A_typeltr, x_typeltr, extended)
116dnl
117dnl        aby_typeltr : the type and precision of alpha, beta, y
118dnl        A_typeltr   : the type and precision of A
119dnl        x_typeltr   : the type and precision of x
120dnl        extended    : `' if no extended, or `_x' if extended
121dnl Each type and precision specifier can be one of
122dnl        s    ... real and single
123dnl        d    ... real and double
124dnl        c    ... complex and single
125dnl        z    ... complex and double
126dnl ---------------------------------------------------------------------
127define(`DO_TEST_GEMV2_BODY',
128`{
129  /* function name */
130  const char fname[] = "GEMV2_NAME($1, $2, $3, $4)";
131
132  /* max number of debug lines to print */
133  const int max_print = 8;
134
135  /* Variables in the "x_val" form are loop vars for corresponding
136     variables */
137  int i;            /* iterate through the repeating tests */
138  int j, k;         /* multipurpose counters or variables */
139  int iy;           /* use to index y */
140  int incx_val, incy_val, /* for testing different inc values */
141      incx, incy;
142  int incy_gen;     /* for complex case inc=2, for real case inc=1 */
143  int d_count;      /* counter for debug */
144  int find_max_ratio; /* find_max_ratio = 1 only if debug = 3 */
145  int p_count;      /* counter for the number of debug lines printed*/
146  int tot_tests;    /* total number of tests to be done */
147  int norm;         /* input values of near underflow/one/overflow */
148  double ratio_max; /* the current maximum ratio */
149  double ratio_min; /* the current minimum ratio */
150  double *ratios;   /* a temporary variable for calculating ratio */
151  double ratio;     /* the per-use test ratio from test() */
152  int bad_ratios;   /* the number of ratios over the threshold */
153  double eps_int;   /* the internal epsilon expected--2^(-24) for float */
154  double un_int;    /* the internal underflow threshold */
155  DECLARE(alpha, $1_type)
156  DECLARE(beta, $1_type)
157  DECLARE_VECTOR(A, $2_type)
158  DECLARE_VECTOR(head_x, $3_type)
159  DECLARE_VECTOR(tail_x, $3_type)
160  DECLARE_VECTOR(y, $1_type)
161  DECLARE_VECTOR(temp, $2_type) /* use for calculating ratio */
162
163  /* x_gen and y_gen are used to store vectors generated by testgen.
164     they eventually are copied back to x and y */
165  DECLARE_VECTOR(head_x_gen, $3_type)
166  DECLARE_VECTOR(tail_x_gen, $3_type)
167  DECLARE_VECTOR(y_gen, $1_type)
168
169  /* the true r calculated by testgen(), in double-double */
170  DECLARE_VECTOR(r_true, EXTRA_TYPE($1_type))
171  int alpha_val;
172  int alpha_flag;   /* input flag for TESTGEN_GEMV2_NAME($1, $2, $3, $4) */
173  int beta_val;
174  int beta_flag;    /* input flag for TESTGEN_GEMV2_NAME($1, $2, $3, $4) */
175  int order_val;
176  enum blas_order_type order_type;
177  ifelse(`$4', `_x', `int prec_val;')
178  enum blas_prec_type prec;
179  int trans_val;
180  enum blas_trans_type trans_type;
181  int m_i;
182  int n_i;
183  int max_mn; /* the max of m and n */
184  int lda_val;
185  int lda;
186  int saved_seed;   /* for saving the original seed */
187  int count, old_count;  /* use for counting the number of testgen calls * 2 */
188
189  FPU_FIX_DECL;
190
191  /* test for bad arguments */
192  if (n < 0 || m < 0 || ntests < 0) BLAS_error(fname, 0, 0, NULL);
193
194  /* initialization */
195  *num_bad_ratio = 0;
196  *num_tests = 0;
197  *min_ratio = 0.0;
198
199  saved_seed = *seed;
200  ratio_min = 1e308;
201  ratio_max = 0.0;
202  ratio = 0.0;
203  tot_tests = 0;
204  p_count = 0;
205  count = 0;
206  find_max_ratio = 0;
207  bad_ratios = 0;
208  old_count = 0;
209
210  if (debug == 3)
211    find_max_ratio = 1;
212  max_mn = MAX(m, n);
213  if (m == 0 || n == 0) {
214    return 0.0;
215  }
216
217  FPU_FIX_START;
218
219  incy_gen = 1;
220  INC_ADJUST(incy_gen, $1_type)
221
222  /* get space for calculation */
223  MALLOC_VECTOR(head_x, $3_type, max_mn*2)
224  MALLOC_VECTOR(tail_x, $3_type, max_mn*2)
225  MALLOC_VECTOR(y, $1_type, max_mn*2)
226  MALLOC_VECTOR(head_x_gen, $3_type, max_mn)
227  MALLOC_VECTOR(tail_x_gen, $3_type, max_mn)
228  MALLOC_VECTOR(y_gen, $1_type, max_mn)
229  MALLOC_VECTOR(temp, $2_type, max_mn)
230  MALLOC_VECTOR(r_true, EXTRA_TYPE($1_type), max_mn)
231  MALLOC_VECTOR(ratios, real_D, max_mn)
232  MALLOC_VECTOR(A, $2_type, (m-1+n-1+1)*max_mn*2)
233
234  /* The debug iteration:
235     If debug=1, then will execute the iteration twice. First, compute the
236     max ratio. Second, print info if ratio > (50% * ratio_max). */
237  for (d_count=0; d_count<= find_max_ratio; d_count++) {
238    bad_ratios = 0; /* set to zero */
239
240    if ((debug == 3) && (d_count == find_max_ratio))
241      *seed = saved_seed; /* restore the original seed */
242
243    /* varying alpha */
244    for (alpha_val=0; alpha_val<3; alpha_val++) {
245      SET_ALPHA($1_type)
246
247      /* varying beta */
248      for (beta_val=0; beta_val<3; beta_val++) {
249        SET_BETA($1_type)
250
251        ifelse($4, _x, `
252        /* varying extra precs */
253        for (prec_val = 0; prec_val <= 2; prec_val++) {')
254          SET_INTERNAL_PARAMS($1_type, $4)
255
256          /* values near underflow, 1, or overflow */
257          for (norm = -1; norm <= 1; norm++) {
258
259            /* number of tests */
260            for (i=0; i<ntests; i++){
261
262              /* row or col major */
263              for (order_val=0; order_val<2; order_val++) {
264                switch(order_val){
265                case 0: order_type = blas_rowmajor; break;
266                case 1: default: order_type = blas_colmajor; break;
267                }
268
269                /* no_trans, trans, or conj_trans */
270                for (trans_val=0; trans_val<3; trans_val++) {
271                  switch(trans_val){
272                  case 0: trans_type = blas_no_trans; m_i=m; n_i=n; break;
273                  case 1: trans_type = blas_trans; m_i=n; n_i=m; break;
274                  case 2: default: trans_type = blas_conj_trans; m_i=n; n_i=m; break;
275                  }
276
277                      /* lda=n, n+1, or 2n*/
278                      for (lda_val=0; lda_val<3; lda_val++){
279                        switch(lda_val){
280                        case 0: lda=m_i; break;
281                        case 1: lda=m_i+1; break;
282                        case 2: default: lda=2*m_i; break;
283                        }
284                        if ((order_type == blas_rowmajor && lda < n) ||
285                           (order_type == blas_colmajor && lda < m))
286                          continue;
287
288                        /* For the sake of speed, we throw out this case at random */
289                        if ( xrand(seed) >= test_prob ) continue;
290
291                        /* in the trivial cases, no need to run testgen */
292                        if (m > 0 && n > 0)
293                          TESTGEN_GEMV2_NAME($1, $2, $3)(norm, order_type, dnl
294                              trans_type, m, n, &alpha, alpha_flag, A, dnl
295                              lda, head_x_gen, tail_x_gen, &beta, dnl
296                              beta_flag, y_gen, seed, HEAD(r_true), TAIL(r_true));
297
298                        count++;
299
300                        /* varying incx */
301                        for (incx_val = -2; incx_val <= 2; incx_val++){
302                          if (incx_val == 0) continue;
303
304                          /* setting incx */
305                          incx = incx_val;
306                          INC_ADJUST(incx, $3_type)
307
308                          $3copy_vector(head_x_gen, n_i, 1, head_x, incx_val);
309                          $3copy_vector(tail_x_gen, n_i, 1, tail_x, incx_val);
310
311                          /* varying incy */
312                          for (incy_val = -2; incy_val <= 2; incy_val++){
313                            if (incy_val == 0) continue;
314
315                            /* setting incy */
316                            incy = incy_val;
317                            INC_ADJUST(incy, $1_type)
318
319                            $1copy_vector(y_gen, m_i, 1, y, incy_val);
320
321                            /* call GEMV2_NAME($1, $2, $3, $4) */
322                            FPU_FIX_STOP;
323                            GEMV2_NAME($1, $2, $3, $4)(order_type, trans_type, dnl
324                                m, n, alpha, A, lda, head_x, tail_x, incx_val, dnl
325                                beta, y, incy_val ifelse(`$4', `_x', `, prec'));
326                            FPU_FIX_START;
327
328                            /* set y starting index */
329                            iy=0;
330                            if (incy < 0) iy = -(m_i-1)*incy;
331
332                            /* computing the ratio */
333                            if (m>0 && n>0)
334                            for(j=0, k=0; j<m_i; j++, k += incy_gen){
335                              /* copy row j of A to temp */
336                              $2ge_copy_row(order_type, trans_type, m_i, n_i, A, lda, temp, j);
337
338                              TEST_DOT2_NAME($1, $2, $3, $4)(n_i, blas_no_conj, dnl
339                                  alpha, beta, VECTOR_ELEMENT(y_gen, k, $1_type), dnl
340                                  VECTOR_ELEMENT(y, iy, $1_type), dnl
341                                  VECTOR_ELEMENT(r_true, k, EXTRA_TYPE($1_type)), dnl
342                                  temp, 1, head_x, tail_x, incx_val, eps_int, dnl
343                                  un_int, &ratios[j]);
344
345                              /* take the max ratio */
346                              if (j==0) {
347                                ratio = ratios[0];
348                              /* The !<= below causes NaN error to be detected.
349                                 Note that (NaN > thresh) is always false. */
350                              } else if ( !(ratios[j] <= ratio) ) {
351                                ratio = ratios[j];
352                              }
353                              iy += incy;
354                            }
355
356                            /* Increase the number of bad ratio, if the ratio
357                               is bigger than the threshold.
358                               The !<= below causes NaN error to be detected.
359                               Note that (NaN > thresh) is always false. */
360                            if ( !(ratio <= thresh) ) {
361                              bad_ratios++;
362
363                              if ((debug==3) &&          /* print only when debug is on */
364                                  (count != old_count) && /* print if old vector is different
365                                                             from the current one */
366                                  (d_count == find_max_ratio) &&
367                                  (p_count <= max_print) &&
368                                  (ratio > 0.5*ratio_max))
369                              {
370                                old_count = count;
371
372                                printf("FAIL> %s: m = %d, n = %d, ntests = %d, threshold = %4.2f,\n",
373                                        fname, m, n, ntests, thresh);
374
375                                /* Print test info */
376                                PRINT_PREC(prec)
377                                PRINT_NORM(norm)
378                                PRINT_ORDER(order_type)
379                                PRINT_TRANS(trans_type)
380
381                                printf("lda=%d, incx=%d, incy=%d:\n", lda, incx, incy);
382
383                                $2ge_print_matrix(A, m_i, n_i, lda, order_type, "A");
384
385                                $3print_vector(head_x, n_i, incx_val, "head_x");
386                                $3print_vector(tail_x, n_i, incx_val, "tail_x");
387                                $1print_vector(y_gen, m_i, 1, "y_gen");
388                                $1print_vector(y, m_i, incy_val, "y_final");
389
390                                printf("      "); PRINT_VAR(alpha, $1_type)
391                                printf("\n      "); PRINT_VAR(beta, $1_type)
392                                printf("\n");
393                                for (j=0, k=0; j<m_i*incy_gen; j += incy_gen, k++){
394                                  printf("      ");
395                                  PRINT_ARRAY_ELEM(r_true, j, EXTRA_TYPE($1_type))
396                                  printf(", ratio[%d]=%.4e\n", k, ratios[k]);
397                                }
398
399                                printf("      ratio=%.4e\n", ratio);
400                                p_count++;
401                              }
402                              if (bad_ratios >= MAX_BAD_TESTS) {
403                                printf("\ntoo many failures, exiting....");
404                                printf("\nTesting and compilation");
405                                printf(" are incomplete\n\n");
406                                goto end;
407                              }
408                              if ( !(ratio <= TOTAL_FAILURE_THRESHOLD) ) {
409                                printf("\nFlagrant ratio error, exiting...");
410                                printf("\nTesting and compilation");
411                                printf(" are incomplete\n\n");
412                                goto end;
413                              }
414                            }
415                            if (d_count==0) {
416                              if (ratio > ratio_max)
417                                ratio_max = ratio;
418
419                              if (ratio != 0.0 && ratio < ratio_min)
420                                ratio_min = ratio;
421
422                              tot_tests++;
423                            }
424                          }/* incy */
425                        }/* incx */
426                      }/* lda */
427                } /* trans */
428              } /* order */
429            } /* tests */
430          } /* norm */
431ifelse(`$4', `_x', `         } /* prec */')
432      } /* beta */
433    } /* alpha */
434  } /* debug */
435
436  if ((debug == 2) ||
437     ((debug == 1) && bad_ratios > 0)){
438    printf("      %s:  m = %d, n = %d, ntests = %d, thresh = %4.2f\n",
439            fname, m, n, ntests, thresh);
440    printf("      bad/total = %d/%d=%3.2f, min_ratio = %.4e, max_ratio = %.4e\n\n",
441            bad_ratios, tot_tests,
442           ((double)bad_ratios)/((double)tot_tests), ratio_min, ratio_max);
443  }
444
445end:
446  FPU_FIX_STOP;
447
448  FREE_VECTOR(head_x, $3_type)
449  FREE_VECTOR(tail_x, $3_type)
450  FREE_VECTOR(y, $1_type)
451  FREE_VECTOR(head_x_gen, $3_type)
452  FREE_VECTOR(tail_x_gen, $3_type)
453  FREE_VECTOR(y_gen, $1_type)
454  FREE_VECTOR(temp, $2_type)
455  FREE_VECTOR(A, $2_type)
456  FREE_VECTOR(r_true, EXTRA_TYPE($1_type))
457  FREE_VECTOR(ratios, real_D)
458
459  *min_ratio = ratio_min;
460  *num_bad_ratio = bad_ratios;
461  *num_tests = tot_tests;
462  return ratio_max;
463}' )dnl
464dnl
465dnl
466dnl
467dnl --------------------------------------------------------------------
468dnl Usage: TESTGEN_GEMV2_NAME(aby_typeltr, A_typeltr, x_typeltr)
469dnl        create a testgen_gemv2 name
470dnl --------------------------------------------------------------------
471define(`TESTGEN_GEMV2_NAME', `ifelse(
472        `$2&&$3', `$1&&$1', `BLAS_$1gemv2_testgen',
473                            `BLAS_$1gemv2_$2_$3_testgen')')dnl
474dnl
475dnl
476dnl -------------------------------------------------------------------
477dnl Usage: DO_TEST_GEMV2_NAME(abr_typeltr, x_typeltr, y_typeltr, extended)
478dnl        create do_test_gemv2 name
479dnl -------------------------------------------------------------------
480define(`DO_TEST_GEMV2_NAME', `ifelse(
481        `$2&&$3', `$1&&$1',
482        `do_test_$1gemv2$4',
483        `do_test_$1gemv2_$2_$3$4')') dnl
484dnl
485dnl
486dnl
487dnl -------------------------------------------------------------------
488dnl Usage: CALL_DO_TEST_GEMV2(abr_typeltr, x_typeltr, y_typeltr, extended)
489dnl
490dnl        abr_type : the type and precision of alpha, beta and r
491dnl        x_type   : the type and precision of x
492dnl        y_type   : the type and precision of y
493dnl        extended : `' if no extended, or `_x' if extended
494dnl Each type and precision specifier can be one of
495dnl        s    ... real and single
496dnl        d    ... real and double
497dnl        c    ... complex and single
498dnl        z    ... complex and double
499dnl -------------------------------------------------------------------
500define(`CALL_DO_TEST_GEMV2',
501        `  min_ratio = 1e308; max_ratio = 0.0;
502           total_bad_ratios = 0; total_tests = 0;
503           fname = "GEMV2_NAME($1, $2, $3, $4)";
504           printf("Testing %s...\n", fname);
505           for(i=0; i<nsizes; i++){
506             m = mn_pairs[i][0];
507             n = mn_pairs[i][1];
508             total_max_ratio = DO_TEST_GEMV2_NAME($1, $2, $3, $4)(m, n, 1, dnl
509                 &seed, thresh, debug, test_prob, &total_min_ratio, dnl
510                 &num_bad_ratio, &num_tests);
511             if (total_max_ratio > max_ratio)
512               max_ratio = total_max_ratio;
513
514             if (total_min_ratio != 0 && total_min_ratio < min_ratio)
515               min_ratio = total_min_ratio;
516
517             total_bad_ratios += num_bad_ratio;
518             total_tests += num_tests;
519           }
520
521           if (min_ratio == 1e308) min_ratio = 0.0;
522
523           nr_routines++;
524           if (total_bad_ratios == 0)
525             printf("PASS> ");
526           else{
527             nr_failed_routines++;
528             printf("FAIL> ");
529           }
530
531           printf("%-24s: bad/total = %d/%d, max_ratio = %.2e\n\n",
532               fname, total_bad_ratios, total_tests, max_ratio);
533')dnl
534dnl
535dnl
536dnl
537FOREACH(`GEMV2_ARGS', `
538DO_TEST_GEMV2(arg)')dnl
539
540#define NUMPAIRS 12
541MAIN(`
542  int m, i;
543  int mn_pairs[NUMPAIRS][2]={{0, 0}, {1, 0}, {0, 1}, {1, 1}, {1, 2}, {2, 1},
544                             {3, 1}, {2, 3}, {3, 3}, {2, 4}, {6, 6}, {10, 8}};', `
545FOREACH(`GEMV2_ARGS', `
546CALL_DO_TEST_GEMV2(arg)')')dnl
547
548