1 /*
2  * Microbenchmark for math functions.
3  *
4  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5  * See https://llvm.org/LICENSE.txt for license information.
6  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7  */
8 
9 #undef _GNU_SOURCE
10 #define _GNU_SOURCE 1
11 #include <stdint.h>
12 #include <stdlib.h>
13 #include <stdio.h>
14 #include <string.h>
15 #include <time.h>
16 #include <math.h>
17 #include "mathlib.h"
18 
19 #ifndef WANT_VMATH
20 /* Enable the build of vector math code.  */
21 # define WANT_VMATH 1
22 #endif
23 
24 /* Number of measurements, best result is reported.  */
25 #define MEASURE 60
26 /* Array size.  */
27 #define N 8000
28 /* Iterations over the array.  */
29 #define ITER 125
30 
31 static double *Trace;
32 static size_t trace_size;
33 static double A[N];
34 static float Af[N];
35 static long measurecount = MEASURE;
36 static long itercount = ITER;
37 
38 #if __aarch64__ && WANT_VMATH
39 typedef __f64x2_t v_double;
40 
41 #define v_double_len() 2
42 
43 static inline v_double
v_double_load(const double * p)44 v_double_load (const double *p)
45 {
46   return (v_double){p[0], p[1]};
47 }
48 
49 static inline v_double
v_double_dup(double x)50 v_double_dup (double x)
51 {
52   return (v_double){x, x};
53 }
54 
55 typedef __f32x4_t v_float;
56 
57 #define v_float_len() 4
58 
59 static inline v_float
v_float_load(const float * p)60 v_float_load (const float *p)
61 {
62   return (v_float){p[0], p[1], p[2], p[3]};
63 }
64 
65 static inline v_float
v_float_dup(float x)66 v_float_dup (float x)
67 {
68   return (v_float){x, x, x, x};
69 }
70 #else
71 /* dummy definitions to make things compile.  */
72 typedef double v_double;
73 typedef float v_float;
74 #define v_double_len(x) 1
75 #define v_double_load(x) (x)[0]
76 #define v_double_dup(x) (x)
77 #define v_float_len(x) 1
78 #define v_float_load(x) (x)[0]
79 #define v_float_dup(x) (x)
80 #endif
81 
82 static double
dummy(double x)83 dummy (double x)
84 {
85   return x;
86 }
87 
88 static float
dummyf(float x)89 dummyf (float x)
90 {
91   return x;
92 }
93 
94 #if WANT_VMATH
95 #if __aarch64__
96 static v_double
__v_dummy(v_double x)97 __v_dummy (v_double x)
98 {
99   return x;
100 }
101 
102 static v_float
__v_dummyf(v_float x)103 __v_dummyf (v_float x)
104 {
105   return x;
106 }
107 
108 #ifdef __vpcs
109 __vpcs static v_double
__vn_dummy(v_double x)110 __vn_dummy (v_double x)
111 {
112   return x;
113 }
114 
115 __vpcs static v_float
__vn_dummyf(v_float x)116 __vn_dummyf (v_float x)
117 {
118   return x;
119 }
120 
121 __vpcs static v_float
xy__vn_powf(v_float x)122 xy__vn_powf (v_float x)
123 {
124   return __vn_powf (x, x);
125 }
126 
127 __vpcs static v_float
xy_Z_powf(v_float x)128 xy_Z_powf (v_float x)
129 {
130   return _ZGVnN4vv_powf (x, x);
131 }
132 
133 __vpcs static v_double
xy__vn_pow(v_double x)134 xy__vn_pow (v_double x)
135 {
136   return __vn_pow (x, x);
137 }
138 
139 __vpcs static v_double
xy_Z_pow(v_double x)140 xy_Z_pow (v_double x)
141 {
142   return _ZGVnN2vv_pow (x, x);
143 }
144 #endif
145 
146 static v_float
xy__v_powf(v_float x)147 xy__v_powf (v_float x)
148 {
149   return __v_powf (x, x);
150 }
151 
152 static v_double
xy__v_pow(v_double x)153 xy__v_pow (v_double x)
154 {
155   return __v_pow (x, x);
156 }
157 #endif
158 
159 static float
xy__s_powf(float x)160 xy__s_powf (float x)
161 {
162   return __s_powf (x, x);
163 }
164 
165 static double
xy__s_pow(double x)166 xy__s_pow (double x)
167 {
168   return __s_pow (x, x);
169 }
170 #endif
171 
172 static double
xypow(double x)173 xypow (double x)
174 {
175   return pow (x, x);
176 }
177 
178 static float
xypowf(float x)179 xypowf (float x)
180 {
181   return powf (x, x);
182 }
183 
184 static double
xpow(double x)185 xpow (double x)
186 {
187   return pow (x, 23.4);
188 }
189 
190 static float
xpowf(float x)191 xpowf (float x)
192 {
193   return powf (x, 23.4f);
194 }
195 
196 static double
ypow(double x)197 ypow (double x)
198 {
199   return pow (2.34, x);
200 }
201 
202 static float
ypowf(float x)203 ypowf (float x)
204 {
205   return powf (2.34f, x);
206 }
207 
208 static float
sincosf_wrap(float x)209 sincosf_wrap (float x)
210 {
211   float s, c;
212   sincosf (x, &s, &c);
213   return s + c;
214 }
215 
216 static const struct fun
217 {
218   const char *name;
219   int prec;
220   int vec;
221   double lo;
222   double hi;
223   union
224   {
225     double (*d) (double);
226     float (*f) (float);
227     v_double (*vd) (v_double);
228     v_float (*vf) (v_float);
229 #ifdef __vpcs
230     __vpcs v_double (*vnd) (v_double);
231     __vpcs v_float (*vnf) (v_float);
232 #endif
233   } fun;
234 } funtab[] = {
235 #define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}},
236 #define F(func, lo, hi) {#func, 'f', 0, lo, hi, {.f = func}},
237 #define VD(func, lo, hi) {#func, 'd', 'v', lo, hi, {.vd = func}},
238 #define VF(func, lo, hi) {#func, 'f', 'v', lo, hi, {.vf = func}},
239 #define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}},
240 #define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}},
241 D (dummy, 1.0, 2.0)
242 D (exp, -9.9, 9.9)
243 D (exp, 0.5, 1.0)
244 D (exp2, -9.9, 9.9)
245 D (log, 0.01, 11.1)
246 D (log, 0.999, 1.001)
247 D (log2, 0.01, 11.1)
248 D (log2, 0.999, 1.001)
249 {"pow", 'd', 0, 0.01, 11.1, {.d = xypow}},
250 D (xpow, 0.01, 11.1)
251 D (ypow, -9.9, 9.9)
252 
253 F (dummyf, 1.0, 2.0)
254 F (expf, -9.9, 9.9)
255 F (exp2f, -9.9, 9.9)
256 F (logf, 0.01, 11.1)
257 F (log2f, 0.01, 11.1)
258 {"powf", 'f', 0, 0.01, 11.1, {.f = xypowf}},
259 F (xpowf, 0.01, 11.1)
260 F (ypowf, -9.9, 9.9)
261 {"sincosf", 'f', 0, 0.1, 0.7, {.f = sincosf_wrap}},
262 {"sincosf", 'f', 0, 0.8, 3.1, {.f = sincosf_wrap}},
263 {"sincosf", 'f', 0, -3.1, 3.1, {.f = sincosf_wrap}},
264 {"sincosf", 'f', 0, 3.3, 33.3, {.f = sincosf_wrap}},
265 {"sincosf", 'f', 0, 100, 1000, {.f = sincosf_wrap}},
266 {"sincosf", 'f', 0, 1e6, 1e32, {.f = sincosf_wrap}},
267 F (sinf, 0.1, 0.7)
268 F (sinf, 0.8, 3.1)
269 F (sinf, -3.1, 3.1)
270 F (sinf, 3.3, 33.3)
271 F (sinf, 100, 1000)
272 F (sinf, 1e6, 1e32)
273 F (cosf, 0.1, 0.7)
274 F (cosf, 0.8, 3.1)
275 F (cosf, -3.1, 3.1)
276 F (cosf, 3.3, 33.3)
277 F (cosf, 100, 1000)
278 F (cosf, 1e6, 1e32)
279 #if WANT_VMATH
280 D (__s_sin, -3.1, 3.1)
281 D (__s_cos, -3.1, 3.1)
282 D (__s_exp, -9.9, 9.9)
283 D (__s_log, 0.01, 11.1)
284 {"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}},
285 F (__s_expf, -9.9, 9.9)
286 F (__s_expf_1u, -9.9, 9.9)
287 F (__s_exp2f, -9.9, 9.9)
288 F (__s_exp2f_1u, -9.9, 9.9)
289 F (__s_logf, 0.01, 11.1)
290 {"__s_powf", 'f', 0, 0.01, 11.1, {.f = xy__s_powf}},
291 F (__s_sinf, -3.1, 3.1)
292 F (__s_cosf, -3.1, 3.1)
293 #if __aarch64__
294 VD (__v_dummy, 1.0, 2.0)
295 VD (__v_sin, -3.1, 3.1)
296 VD (__v_cos, -3.1, 3.1)
297 VD (__v_exp, -9.9, 9.9)
298 VD (__v_log, 0.01, 11.1)
299 {"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}},
300 VF (__v_dummyf, 1.0, 2.0)
301 VF (__v_expf, -9.9, 9.9)
302 VF (__v_expf_1u, -9.9, 9.9)
303 VF (__v_exp2f, -9.9, 9.9)
304 VF (__v_exp2f_1u, -9.9, 9.9)
305 VF (__v_logf, 0.01, 11.1)
306 {"__v_powf", 'f', 'v', 0.01, 11.1, {.vf = xy__v_powf}},
307 VF (__v_sinf, -3.1, 3.1)
308 VF (__v_cosf, -3.1, 3.1)
309 #ifdef __vpcs
310 VND (__vn_dummy, 1.0, 2.0)
311 VND (__vn_exp, -9.9, 9.9)
312 VND (_ZGVnN2v_exp, -9.9, 9.9)
313 VND (__vn_log, 0.01, 11.1)
314 VND (_ZGVnN2v_log, 0.01, 11.1)
315 {"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}},
316 {"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}},
317 VND (__vn_sin, -3.1, 3.1)
318 VND (_ZGVnN2v_sin, -3.1, 3.1)
319 VND (__vn_cos, -3.1, 3.1)
320 VND (_ZGVnN2v_cos, -3.1, 3.1)
321 VNF (__vn_dummyf, 1.0, 2.0)
322 VNF (__vn_expf, -9.9, 9.9)
323 VNF (_ZGVnN4v_expf, -9.9, 9.9)
324 VNF (__vn_expf_1u, -9.9, 9.9)
325 VNF (__vn_exp2f, -9.9, 9.9)
326 VNF (_ZGVnN4v_exp2f, -9.9, 9.9)
327 VNF (__vn_exp2f_1u, -9.9, 9.9)
328 VNF (__vn_logf, 0.01, 11.1)
329 VNF (_ZGVnN4v_logf, 0.01, 11.1)
330 {"__vn_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy__vn_powf}},
331 {"_ZGVnN4vv_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy_Z_powf}},
332 VNF (__vn_sinf, -3.1, 3.1)
333 VNF (_ZGVnN4v_sinf, -3.1, 3.1)
334 VNF (__vn_cosf, -3.1, 3.1)
335 VNF (_ZGVnN4v_cosf, -3.1, 3.1)
336 #endif
337 #endif
338 #endif
339 {0},
340 #undef F
341 #undef D
342 #undef VF
343 #undef VD
344 #undef VNF
345 #undef VND
346 };
347 
348 static void
gen_linear(double lo,double hi)349 gen_linear (double lo, double hi)
350 {
351   for (int i = 0; i < N; i++)
352     A[i] = (lo * (N - i) + hi * i) / N;
353 }
354 
355 static void
genf_linear(double lo,double hi)356 genf_linear (double lo, double hi)
357 {
358   for (int i = 0; i < N; i++)
359     Af[i] = (float)(lo * (N - i) + hi * i) / N;
360 }
361 
362 static inline double
asdouble(uint64_t i)363 asdouble (uint64_t i)
364 {
365   union
366   {
367     uint64_t i;
368     double f;
369   } u = {i};
370   return u.f;
371 }
372 
373 static uint64_t seed = 0x0123456789abcdef;
374 
375 static double
frand(double lo,double hi)376 frand (double lo, double hi)
377 {
378   seed = 6364136223846793005ULL * seed + 1;
379   return lo + (hi - lo) * (asdouble (seed >> 12 | 0x3ffULL << 52) - 1.0);
380 }
381 
382 static void
gen_rand(double lo,double hi)383 gen_rand (double lo, double hi)
384 {
385   for (int i = 0; i < N; i++)
386     A[i] = frand (lo, hi);
387 }
388 
389 static void
genf_rand(double lo,double hi)390 genf_rand (double lo, double hi)
391 {
392   for (int i = 0; i < N; i++)
393     Af[i] = (float)frand (lo, hi);
394 }
395 
396 static void
gen_trace(int index)397 gen_trace (int index)
398 {
399   for (int i = 0; i < N; i++)
400     A[i] = Trace[index + i];
401 }
402 
403 static void
genf_trace(int index)404 genf_trace (int index)
405 {
406   for (int i = 0; i < N; i++)
407     Af[i] = (float)Trace[index + i];
408 }
409 
410 static void
run_thruput(double f (double))411 run_thruput (double f (double))
412 {
413   for (int i = 0; i < N; i++)
414     f (A[i]);
415 }
416 
417 static void
runf_thruput(float f (float))418 runf_thruput (float f (float))
419 {
420   for (int i = 0; i < N; i++)
421     f (Af[i]);
422 }
423 
424 volatile double zero = 0;
425 
426 static void
run_latency(double f (double))427 run_latency (double f (double))
428 {
429   double z = zero;
430   double prev = z;
431   for (int i = 0; i < N; i++)
432     prev = f (A[i] + prev * z);
433 }
434 
435 static void
runf_latency(float f (float))436 runf_latency (float f (float))
437 {
438   float z = (float)zero;
439   float prev = z;
440   for (int i = 0; i < N; i++)
441     prev = f (Af[i] + prev * z);
442 }
443 
444 static void
run_v_thruput(v_double f (v_double))445 run_v_thruput (v_double f (v_double))
446 {
447   for (int i = 0; i < N; i += v_double_len ())
448     f (v_double_load (A+i));
449 }
450 
451 static void
runf_v_thruput(v_float f (v_float))452 runf_v_thruput (v_float f (v_float))
453 {
454   for (int i = 0; i < N; i += v_float_len ())
455     f (v_float_load (Af+i));
456 }
457 
458 static void
run_v_latency(v_double f (v_double))459 run_v_latency (v_double f (v_double))
460 {
461   v_double z = v_double_dup (zero);
462   v_double prev = z;
463   for (int i = 0; i < N; i += v_double_len ())
464     prev = f (v_double_load (A+i) + prev * z);
465 }
466 
467 static void
runf_v_latency(v_float f (v_float))468 runf_v_latency (v_float f (v_float))
469 {
470   v_float z = v_float_dup (zero);
471   v_float prev = z;
472   for (int i = 0; i < N; i += v_float_len ())
473     prev = f (v_float_load (Af+i) + prev * z);
474 }
475 
476 #ifdef __vpcs
477 static void
run_vn_thruput(__vpcs v_double f (v_double))478 run_vn_thruput (__vpcs v_double f (v_double))
479 {
480   for (int i = 0; i < N; i += v_double_len ())
481     f (v_double_load (A+i));
482 }
483 
484 static void
runf_vn_thruput(__vpcs v_float f (v_float))485 runf_vn_thruput (__vpcs v_float f (v_float))
486 {
487   for (int i = 0; i < N; i += v_float_len ())
488     f (v_float_load (Af+i));
489 }
490 
491 static void
run_vn_latency(__vpcs v_double f (v_double))492 run_vn_latency (__vpcs v_double f (v_double))
493 {
494   v_double z = v_double_dup (zero);
495   v_double prev = z;
496   for (int i = 0; i < N; i += v_double_len ())
497     prev = f (v_double_load (A+i) + prev * z);
498 }
499 
500 static void
runf_vn_latency(__vpcs v_float f (v_float))501 runf_vn_latency (__vpcs v_float f (v_float))
502 {
503   v_float z = v_float_dup (zero);
504   v_float prev = z;
505   for (int i = 0; i < N; i += v_float_len ())
506     prev = f (v_float_load (Af+i) + prev * z);
507 }
508 #endif
509 
510 static uint64_t
tic(void)511 tic (void)
512 {
513   struct timespec ts;
514   if (clock_gettime (CLOCK_REALTIME, &ts))
515     abort ();
516   return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
517 }
518 
519 #define TIMEIT(run, f) do { \
520   dt = -1; \
521   run (f); /* Warm up.  */ \
522   for (int j = 0; j < measurecount; j++) \
523     { \
524       uint64_t t0 = tic (); \
525       for (int i = 0; i < itercount; i++) \
526 	run (f); \
527       uint64_t t1 = tic (); \
528       if (t1 - t0 < dt) \
529 	dt = t1 - t0; \
530     } \
531 } while (0)
532 
533 static void
bench1(const struct fun * f,int type,double lo,double hi)534 bench1 (const struct fun *f, int type, double lo, double hi)
535 {
536   uint64_t dt = 0;
537   uint64_t ns100;
538   const char *s = type == 't' ? "rthruput" : "latency";
539   int vlen = 1;
540 
541   if (f->vec && f->prec == 'd')
542     vlen = v_double_len();
543   else if (f->vec && f->prec == 'f')
544     vlen = v_float_len();
545 
546   if (f->prec == 'd' && type == 't' && f->vec == 0)
547     TIMEIT (run_thruput, f->fun.d);
548   else if (f->prec == 'd' && type == 'l' && f->vec == 0)
549     TIMEIT (run_latency, f->fun.d);
550   else if (f->prec == 'f' && type == 't' && f->vec == 0)
551     TIMEIT (runf_thruput, f->fun.f);
552   else if (f->prec == 'f' && type == 'l' && f->vec == 0)
553     TIMEIT (runf_latency, f->fun.f);
554   else if (f->prec == 'd' && type == 't' && f->vec == 'v')
555     TIMEIT (run_v_thruput, f->fun.vd);
556   else if (f->prec == 'd' && type == 'l' && f->vec == 'v')
557     TIMEIT (run_v_latency, f->fun.vd);
558   else if (f->prec == 'f' && type == 't' && f->vec == 'v')
559     TIMEIT (runf_v_thruput, f->fun.vf);
560   else if (f->prec == 'f' && type == 'l' && f->vec == 'v')
561     TIMEIT (runf_v_latency, f->fun.vf);
562 #ifdef __vpcs
563   else if (f->prec == 'd' && type == 't' && f->vec == 'n')
564     TIMEIT (run_vn_thruput, f->fun.vnd);
565   else if (f->prec == 'd' && type == 'l' && f->vec == 'n')
566     TIMEIT (run_vn_latency, f->fun.vnd);
567   else if (f->prec == 'f' && type == 't' && f->vec == 'n')
568     TIMEIT (runf_vn_thruput, f->fun.vnf);
569   else if (f->prec == 'f' && type == 'l' && f->vec == 'n')
570     TIMEIT (runf_vn_latency, f->fun.vnf);
571 #endif
572 
573   if (type == 't')
574     {
575       ns100 = (100 * dt + itercount * N / 2) / (itercount * N);
576       printf ("%9s %8s: %4u.%02u ns/elem %10llu ns in [%g %g]\n", f->name, s,
577 	      (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
578 	      (unsigned long long) dt, lo, hi);
579     }
580   else if (type == 'l')
581     {
582       ns100 = (100 * dt + itercount * N / vlen / 2) / (itercount * N / vlen);
583       printf ("%9s %8s: %4u.%02u ns/call %10llu ns in [%g %g]\n", f->name, s,
584 	      (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
585 	      (unsigned long long) dt, lo, hi);
586     }
587   fflush (stdout);
588 }
589 
590 static void
bench(const struct fun * f,double lo,double hi,int type,int gen)591 bench (const struct fun *f, double lo, double hi, int type, int gen)
592 {
593   if (f->prec == 'd' && gen == 'r')
594     gen_rand (lo, hi);
595   else if (f->prec == 'd' && gen == 'l')
596     gen_linear (lo, hi);
597   else if (f->prec == 'd' && gen == 't')
598     gen_trace (0);
599   else if (f->prec == 'f' && gen == 'r')
600     genf_rand (lo, hi);
601   else if (f->prec == 'f' && gen == 'l')
602     genf_linear (lo, hi);
603   else if (f->prec == 'f' && gen == 't')
604     genf_trace (0);
605 
606   if (gen == 't')
607     hi = trace_size / N;
608 
609   if (type == 'b' || type == 't')
610     bench1 (f, 't', lo, hi);
611 
612   if (type == 'b' || type == 'l')
613     bench1 (f, 'l', lo, hi);
614 
615   for (int i = N; i < trace_size; i += N)
616     {
617       if (f->prec == 'd')
618 	gen_trace (i);
619       else
620 	genf_trace (i);
621 
622       lo = i / N;
623       if (type == 'b' || type == 't')
624 	bench1 (f, 't', lo, hi);
625 
626       if (type == 'b' || type == 'l')
627 	bench1 (f, 'l', lo, hi);
628     }
629 }
630 
631 static void
readtrace(const char * name)632 readtrace (const char *name)
633 {
634 	int n = 0;
635 	FILE *f = strcmp (name, "-") == 0 ? stdin : fopen (name, "r");
636 	if (!f)
637 	  {
638 	    printf ("opening \"%s\" failed: %m\n", name);
639 	    exit (1);
640 	  }
641 	for (;;)
642 	  {
643 	    if (n >= trace_size)
644 	      {
645 		trace_size += N;
646 		Trace = realloc (Trace, trace_size * sizeof (Trace[0]));
647 		if (Trace == NULL)
648 		  {
649 		    printf ("out of memory\n");
650 		    exit (1);
651 		  }
652 	      }
653 	    if (fscanf (f, "%lf", Trace + n) != 1)
654 	      break;
655 	    n++;
656 	  }
657 	if (ferror (f) || n == 0)
658 	  {
659 	    printf ("reading \"%s\" failed: %m\n", name);
660 	    exit (1);
661 	  }
662 	fclose (f);
663 	if (n % N == 0)
664 	  trace_size = n;
665 	for (int i = 0; n < trace_size; n++, i++)
666 	  Trace[n] = Trace[i];
667 }
668 
669 static void
usage(void)670 usage (void)
671 {
672   printf ("usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] "
673 	  "[-i low high] [-f tracefile] [-m measurements] [-c iterations] func "
674 	  "[func2 ..]\n");
675   printf ("func:\n");
676   printf ("%7s [run all benchmarks]\n", "all");
677   for (const struct fun *f = funtab; f->name; f++)
678     printf ("%7s [low: %g high: %g]\n", f->name, f->lo, f->hi);
679   exit (1);
680 }
681 
682 int
main(int argc,char * argv[])683 main (int argc, char *argv[])
684 {
685   int usergen = 0, gen = 'r', type = 'b', all = 0;
686   double lo = 0, hi = 0;
687   const char *tracefile = "-";
688 
689   argv++;
690   argc--;
691   for (;;)
692     {
693       if (argc <= 0)
694 	usage ();
695       if (argv[0][0] != '-')
696 	break;
697       else if (argc >= 3 && strcmp (argv[0], "-i") == 0)
698 	{
699 	  usergen = 1;
700 	  lo = strtod (argv[1], 0);
701 	  hi = strtod (argv[2], 0);
702 	  argv += 3;
703 	  argc -= 3;
704 	}
705       else if (argc >= 2 && strcmp (argv[0], "-m") == 0)
706 	{
707 	  measurecount = strtol (argv[1], 0, 0);
708 	  argv += 2;
709 	  argc -= 2;
710 	}
711       else if (argc >= 2 && strcmp (argv[0], "-c") == 0)
712 	{
713 	  itercount = strtol (argv[1], 0, 0);
714 	  argv += 2;
715 	  argc -= 2;
716 	}
717       else if (argc >= 2 && strcmp (argv[0], "-g") == 0)
718 	{
719 	  gen = argv[1][0];
720 	  if (strchr ("rlt", gen) == 0)
721 	    usage ();
722 	  argv += 2;
723 	  argc -= 2;
724 	}
725       else if (argc >= 2 && strcmp (argv[0], "-f") == 0)
726 	{
727 	  gen = 't';  /* -f implies -g trace.  */
728 	  tracefile = argv[1];
729 	  argv += 2;
730 	  argc -= 2;
731 	}
732       else if (argc >= 2 && strcmp (argv[0], "-t") == 0)
733 	{
734 	  type = argv[1][0];
735 	  if (strchr ("ltb", type) == 0)
736 	    usage ();
737 	  argv += 2;
738 	  argc -= 2;
739 	}
740       else
741 	usage ();
742     }
743   if (gen == 't')
744     {
745       readtrace (tracefile);
746       lo = hi = 0;
747       usergen = 1;
748     }
749   while (argc > 0)
750     {
751       int found = 0;
752       all = strcmp (argv[0], "all") == 0;
753       for (const struct fun *f = funtab; f->name; f++)
754 	if (all || strcmp (argv[0], f->name) == 0)
755 	  {
756 	    found = 1;
757 	    if (!usergen)
758 	      {
759 		lo = f->lo;
760 		hi = f->hi;
761 	      }
762 	    bench (f, lo, hi, type, gen);
763 	    if (usergen && !all)
764 	      break;
765 	  }
766       if (!found)
767 	printf ("unknown function: %s\n", argv[0]);
768       argv++;
769       argc--;
770     }
771   return 0;
772 }
773