1 /******************************************************************************
2 * Copyright (c) Intel Corporation - All rights reserved.                      *
3 * This file is part of the LIBXSMM library.                                   *
4 *                                                                             *
5 * For information on the license, see the LICENSE file.                       *
6 * Further information: https://github.com/hfp/libxsmm/                        *
7 * SPDX-License-Identifier: BSD-3-Clause                                       *
8 ******************************************************************************/
9 /* Evangelos Georganas (Intel Corp.)
10 ******************************************************************************/
11 #include <libxsmm.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <stdio.h>
15 #include <math.h>
16 
17 
18 LIBXSMM_INLINE
sfill_matrix(float * matrix,unsigned int ld,unsigned int m,unsigned int n)19 void sfill_matrix ( float *matrix, unsigned int ld, unsigned int m, unsigned int n )
20 {
21   unsigned int i, j;
22   double dtmp;
23 
24   if ( ld < m )
25   {
26      fprintf(stderr,"Error is sfill_matrix: ld=%u m=%u mismatched!\n",ld,m);
27      exit(EXIT_FAILURE);
28   }
29   for ( j = 1; j <= n; j++ )
30   {
31      /* Fill through the leading dimension */
32      for ( i = 1; i <= ld; i++ )
33      {
34         dtmp = 1.0 - 2.0*libxsmm_rng_f64();
35         matrix [ (j-1)*ld + (i-1) ] = (float) dtmp;
36      }
37   }
38 }
39 
main(int argc,char * argv[])40 int main(int argc, char* argv[])
41 {
42   unsigned int m = 64, n = 64, perform_scale = 1, perform_shift = 1, perform_bias = 1, scale_rows = 1, vectors_size, i, j, k, iters = 10000;
43   libxsmm_blasint ld_in = 64, ld_out = 64;
44   float  *sinp, *sout, *scale_vals, *shift_vals, *bias_vals, *ref_out;
45   libxsmm_meltw_scal_flags jit_flags = 0;
46   libxsmm_meltwfunction_scale kernel;
47   libxsmm_meltw_scale_param params;
48   libxsmm_matdiff_info norms_out;
49   unsigned long long l_start, l_end;
50   double l_total = 0.0, l_total2 = 0.0;
51 
52   libxsmm_init();
53 
54   libxsmm_matdiff_clear(&norms_out);
55 
56   if ( argc > 1 ) m             = atoi(argv[1]);
57   if ( argc > 2 ) n             = atoi(argv[2]);
58   if ( argc > 3 ) ld_in         = atoi(argv[3]);
59   if ( argc > 4 ) ld_out        = atoi(argv[4]);
60   if ( argc > 5 ) perform_shift = atoi(argv[5]);
61   if ( argc > 6 ) perform_scale = atoi(argv[6]);
62   if ( argc > 7 ) perform_bias  = atoi(argv[7]);
63   if ( argc > 8 ) scale_rows    = atoi(argv[8]);
64   if ( argc > 9 ) iters         = atoi(argv[9]);
65 
66   m = LIBXSMM_MAX(m,1);
67   n = LIBXSMM_MAX(n,1);
68   ld_in = LIBXSMM_MAX(ld_in,(libxsmm_blasint)m);
69   ld_out = LIBXSMM_MAX(ld_out,(libxsmm_blasint)m);
70 
71   vectors_size = (scale_rows == 1) ? n : m;
72 
73   /* Allocate arrays  */
74   sinp      = (float*) malloc( ld_in*n*sizeof(float) );
75   sout      = (float*) malloc( ld_out*n*sizeof(float) );
76   ref_out   = (float*) malloc( ld_out*n*sizeof(float) );
77 
78   scale_vals = (float*) malloc(vectors_size*sizeof(float) );
79   shift_vals = (float*) malloc(vectors_size*sizeof(float) );
80   bias_vals  = (float*) malloc(vectors_size*sizeof(float) );
81 
82   /* Fill matrices with random data */
83   sfill_matrix ( sinp, ld_in, m, n );
84   sfill_matrix ( scale_vals, vectors_size, vectors_size, 1 );
85   sfill_matrix ( shift_vals, vectors_size, vectors_size, 1 );
86   sfill_matrix ( bias_vals, vectors_size, vectors_size, 1 );
87 
88   /* Calculate reference results...  */
89   if (scale_rows == 1) {
90     for (j = 0; j < n; j++) {
91       float scale = scale_vals[j];
92       float shift = shift_vals[j];
93       float bias  = bias_vals[j];
94       for (i = 0; i < m; i++) {
95         float out;
96         out = sinp[j*ld_in + i];
97         if (perform_shift) out += shift;
98         if (perform_scale) out *= scale;
99         if (perform_bias)  out += bias;
100         ref_out[j*ld_out + i] = out;
101       }
102     }
103   } else {
104     /* In this case we reduce columns */
105     for (i = 0; i < m; i++) {
106       float scale = scale_vals[i];
107       float shift = shift_vals[i];
108       float bias  = bias_vals[i];
109       for (j = 0; j < n; j++) {
110         float out;
111         out = sinp[j*ld_in + i];
112         if (perform_shift) out += shift;
113         if (perform_scale) out *= scale;
114         if (perform_bias)  out += bias;
115         ref_out[j*ld_out + i] = out;
116       }
117     }
118   }
119 
120   /* Generate JITED kernel */
121   if (scale_rows == 1) {
122     jit_flags = LIBXSMM_MELTW_FLAG_SCALE_ROWS;
123   } else {
124     jit_flags = LIBXSMM_MELTW_FLAG_SCALE_COLS;
125   }
126   if (perform_scale == 1) {
127     jit_flags |=  LIBXSMM_MELTW_FLAG_SCALE_MULT;
128   }
129   if (perform_shift == 1) {
130     jit_flags |=  LIBXSMM_MELTW_FLAG_SCALE_SHIFT;
131   }
132   if (perform_bias == 1) {
133     jit_flags |=  LIBXSMM_MELTW_FLAG_SCALE_ADD_BIAS;
134   }
135 
136   printf("JITing scale kernel... \n");
137   kernel = libxsmm_dispatch_meltw_scale(m, n, &ld_in, &ld_out, LIBXSMM_DATATYPE_F32, LIBXSMM_DATATYPE_F32, jit_flags);
138 
139   /* Call JITed kernel and compare result  */
140   printf("Calling JITed reduce kernel... \n");
141   params.in_ptr = sinp;
142   params.out_ptr = sout;
143   params.shift_vals_ptr = shift_vals;
144   params.scale_vals_ptr = scale_vals;
145   params.bias_vals_ptr  = bias_vals;
146   kernel( &params );
147 
148   /* compare */
149   printf("##########################################\n");
150   printf("#   Correctness - Eltwise scale out      #\n");
151   printf("##########################################\n");
152   libxsmm_matdiff(&norms_out, LIBXSMM_DATATYPE_F32, n * ld_out, 1, ref_out, sout, 0, 0);
153   printf("L1 reference  : %.25g\n", norms_out.l1_ref);
154   printf("L1 test       : %.25g\n", norms_out.l1_tst);
155   printf("L2 abs.error  : %.24f\n", norms_out.l2_abs);
156   printf("L2 rel.error  : %.24f\n", norms_out.l2_rel);
157   printf("Linf abs.error: %.24f\n", norms_out.linf_abs);
158   printf("Linf rel.error: %.24f\n", norms_out.linf_rel);
159   printf("Check-norm    : %.24f\n\n", norms_out.normf_rel);
160 
161   l_start = libxsmm_timer_tick();
162   /* Calculate reference results...  */
163   for (k = 0; k < iters; k++) {
164     /* Calculate reference results...  */
165     if (scale_rows == 1) {
166       for (j = 0; j < n; j++) {
167         float scale = scale_vals[j];
168         float shift = shift_vals[j];
169         float bias  = bias_vals[j];
170         for (i = 0; i < m; i++) {
171           float out;
172           out = sinp[j*ld_in + i];
173           if (perform_shift) out += shift;
174           if (perform_scale) out *= scale;
175           if (perform_bias)  out += bias;
176           ref_out[j*ld_out + i] = out;
177         }
178       }
179     } else {
180       /* In this case we reduce columns */
181       for (i = 0; i < m; i++) {
182         float scale = scale_vals[i];
183         float shift = shift_vals[i];
184         float bias  = bias_vals[i];
185         for (j = 0; j < n; j++) {
186           float out;
187           out = sinp[j*ld_in + i];
188           if (perform_shift) out += shift;
189           if (perform_scale) out *= scale;
190           if (perform_bias)  out += bias;
191           ref_out[j*ld_out + i] = out;
192         }
193       }
194     }
195   }
196   l_end = libxsmm_timer_tick();
197   l_total = libxsmm_timer_duration(l_start, l_end);
198   printf("Reference time = %.5g\n", ((double)(l_total)));
199 
200   l_start = libxsmm_timer_tick();
201   for (k = 0; k < iters; k++) {
202     kernel( &params );
203   }
204   l_end = libxsmm_timer_tick();
205   l_total2 = libxsmm_timer_duration(l_start, l_end);
206   printf("Optimized time = %.5g\n", ((double)(l_total2)));
207   printf("Speedup is = %.5g\n", ((double)(l_total/l_total2)));
208 
209 
210 
211   free(sinp);
212   free(sout);
213   free(ref_out);
214   free(scale_vals);
215   free(bias_vals);
216   free(shift_vals);
217 
218   return EXIT_SUCCESS;
219 }
220 
221