1 /*
2 * Copyright (c) 2007 - 2017 Joseph Gaeddert
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21 */
22
23 // bench.c
24 //
25 // This file is used in conjunction with benchinclude.h (generated with
26 // scripts/autoscript) to produce an executable for benchmarking the various
27 // signal processing algorithms in liquid.
28 //
29
30
31 // default include headers
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <getopt.h>
35 #include <string.h>
36 #include <math.h>
37 #include <sys/resource.h>
38
39 // define benchmark function pointer
40 typedef void(benchmark_function_t) (
41 struct rusage *_start,
42 struct rusage *_finish,
43 unsigned long int *_num_iterations);
44
45 // define benchmark_t
46 typedef struct {
47 unsigned int id;
48 benchmark_function_t * api;
49 const char* name;
50 unsigned int name_len;
51 unsigned int num_trials;
52 float extime;
53 float rate;
54 float cycles_per_trial;
55 } benchmark_t;
56
57 // define package_t
58 typedef struct {
59 unsigned int id; // package identification
60 unsigned int index; // index of first benchmark
61 unsigned int num_scripts; // number of tests in package
62 const char* name; // package name
63 } package_t;
64
65 // include auto-generated benchmark header
66 //
67 // defines the following symbols:
68 // #define AUTOSCRIPT_VERSION
69 // #define NUM_AUTOSCRIPTS
70 // benchmark_t scripts[NUM_AUTOSCRIPTS]
71 // #define NUM_PACKAGES
72 // package_t packages[NUM_PACKAGES]
73 #include "benchmark_include.h"
74
75 // helper functions:
76 void estimate_cpu_clock(void);
77 void set_num_trials_from_cpu_speed(void);
78 void execute_benchmark(benchmark_t* _benchmark, int _verbose);
79 void execute_package(package_t* _package, int _verbose);
80
81 char convert_units(float * _s);
82 void print_benchmark_results(benchmark_t* _benchmark);
83 void print_package_results(package_t* _package);
84 double calculate_execution_time(struct rusage, struct rusage);
85
86 unsigned long int num_base_trials = 1<<12;
87 float cpu_clock = 1.0f; // cpu clock speed (Hz)
88 float runtime=0.100f; // minimum run time (s)
89
90 FILE * fid; // output file id
91 void output_benchmark_to_file(FILE * _fid, benchmark_t * _benchmark);
92
usage()93 void usage()
94 {
95 // help
96 printf("Usage: benchmark [OPTION]\n");
97 printf("Execute benchmark scripts for liquid-dsp library.\n");
98 printf(" -h : display this help and exit\n");
99 printf(" -v : verbose\n");
100 printf(" -q : quiet\n");
101 printf(" -e : estimate cpu clock frequency and exit\n");
102 printf(" -c : set cpu clock frequency (Hz)\n");
103 printf(" -n <trials> : set number of base trials\n");
104 printf(" -p <package> : run specific package\n");
105 printf(" -b <bench> : run specific benchmark\n");
106 printf(" -t <seconds> : set minimum execution time (s)\n");
107 printf(" -l : list available packages\n");
108 printf(" -L : list all available scripts\n");
109 printf(" -s <search> : run all packages/benchmarks matching search string\n");
110 printf(" -o <file> : export output\n");
111 }
112
113 // main function
main(int argc,char * argv[])114 int main(int argc, char *argv[])
115 {
116 // initialize timing variables
117 unsigned int i, j;
118
119 // options
120 enum {RUN_ALL,
121 RUN_SINGLE_BENCH,
122 RUN_SINGLE_PACKAGE,
123 RUN_SEARCH,
124 } mode = RUN_ALL;
125 unsigned int benchmark_id = 0;
126 unsigned int package_id = 0;
127 int verbose = 1;
128 int autoscale = 1;
129 int cpu_clock_detect = 1;
130 int output_to_file = 0;
131 char filename[128];
132 char search_string[128];
133
134 // get input options
135 int d;
136 while((d = getopt(argc,argv,"hvqec:n:b:p:t:lLs:o:")) != EOF){
137 switch (d) {
138 case 'h': usage(); return 0;
139 case 'v': verbose = 1; break;
140 case 'q': verbose = 0; break;
141 case 'e':
142 estimate_cpu_clock();
143 return 0;
144 case 'c':
145 cpu_clock = atof(optarg);
146 if (cpu_clock < 0) {
147 printf("error: cpu clock speed is negative (%f)\n", cpu_clock);
148 return -1;
149 }
150 cpu_clock_detect = 0;
151 break;
152 case 'n':
153 num_base_trials = atoi(optarg);
154 autoscale = 0;
155 break;
156 case 'b':
157 benchmark_id = atoi(optarg);
158 if (benchmark_id >= NUM_AUTOSCRIPTS) {
159 printf("error, cannot run benchmark %u; index exceeded\n", benchmark_id);
160 return -1;
161 } else {
162 mode = RUN_SINGLE_BENCH;
163 }
164 break;
165 case 'p':
166 package_id = atoi(optarg);
167 if (package_id >= NUM_PACKAGES) {
168 printf("error, cannot run package %u; index exceeded\n", package_id);
169 return -1;
170 } else {
171 mode = RUN_SINGLE_PACKAGE;
172 }
173 break;
174 case 't':
175 runtime = atof(optarg);
176 if (runtime < 1e-3f) runtime = 1e-3f;
177 else if (runtime > 10.f) runtime = 10.0f;
178 printf("minimum runtime: %d ms\n", (int) roundf(runtime*1e3));
179 break;
180 case 'l':
181 // list only packages and exit
182 for (i=0; i<NUM_PACKAGES; i++)
183 printf("%u: %s\n", packages[i].id, packages[i].name);
184 return 0;
185 case 'L':
186 // list packages, scripts and exit
187 for (i=0; i<NUM_PACKAGES; i++) {
188 printf("%u: %s\n", packages[i].id, packages[i].name);
189 for (j=packages[i].index; j<packages[i].num_scripts+packages[i].index; j++)
190 printf(" %-3u: %-22s\n", scripts[j].id, scripts[j].name);
191 }
192 return 0;
193 case 's':
194 mode = RUN_SEARCH;
195 strncpy(search_string, optarg, 128);
196 search_string[127] = '\0';
197 break;
198 case 'o':
199 output_to_file = 1;
200 strcpy(filename, optarg);
201 break;
202 default:
203 usage();
204 return 0;
205 }
206 }
207
208 // run empty loop; a bug was found that sometimes the first package run
209 // resulted in a longer execution time than what the benchmark really
210 // reflected. This loop prevents that from happening.
211 for (i=0; i<1e6; i++) {
212 // do nothing
213 }
214
215 if (cpu_clock_detect)
216 estimate_cpu_clock();
217
218 if (autoscale)
219 set_num_trials_from_cpu_speed();
220
221 switch (mode) {
222 case RUN_ALL:
223 for (i=0; i<NUM_PACKAGES; i++)
224 execute_package( &packages[i], verbose );
225
226 //for (i=0; i<NUM_PACKAGES; i++)
227 // print_package_results( &packages[i] );
228 break;
229 case RUN_SINGLE_BENCH:
230 execute_benchmark( &scripts[benchmark_id], verbose );
231 //print_benchmark_results( &scripts[benchmark_id] );
232 break;
233 case RUN_SINGLE_PACKAGE:
234 execute_package( &packages[package_id], verbose );
235 //print_package_results( &packages[package_id] );
236 break;
237 case RUN_SEARCH:
238 printf("running all packages and benchmarks matching '%s'...\n", search_string);
239 for (i=0; i<NUM_PACKAGES; i++) {
240 // see if search string matches package name
241 if (strstr(packages[i].name, search_string) != NULL) {
242 // run the package
243 execute_package( &packages[i], verbose );
244 }
245 }
246 printf("running all remaining scripts matching '%s'...\n", search_string);
247 for (i=0; i<NUM_AUTOSCRIPTS; i++) {
248 // see if search string matches benchmark name
249 if (strstr(scripts[i].name, search_string) != NULL && scripts[i].num_trials == 0) {
250 // run the benchmark
251 execute_benchmark( &scripts[i], verbose );
252 }
253 }
254 break;
255 default:
256 fprintf(stderr,"invalid mode\n");
257 exit(1);
258 }
259
260 if (output_to_file) {
261 fid = fopen(filename,"w");
262 if (!fid) {
263 printf("error: could not open file %s for writing\n", filename);
264 return 1;
265 }
266
267 // print header
268 fprintf(fid,"# %s : auto-generated file (autoscript version %s)\n", filename, AUTOSCRIPT_VERSION);
269 fprintf(fid,"#\n");
270 fprintf(fid,"# invoked as:\n");
271 fprintf(fid,"# ");
272 for (i=0; i<argc; i++)
273 fprintf(fid," %s", argv[i]);
274 fprintf(fid,"\n");
275 fprintf(fid,"#\n");
276 fprintf(fid,"# properties:\n");
277 fprintf(fid,"# verbose : %s\n", verbose ? "true" : "false");
278 fprintf(fid,"# autoscale : %s\n", autoscale ? "true" : "false");
279 fprintf(fid,"# cpu_clock_detect : %s\n", cpu_clock_detect ? "true" : "false");
280 fprintf(fid,"# search string : '%s'\n", mode == RUN_SEARCH ? search_string : "");
281 fprintf(fid,"# runtime : %12.8f s\n", runtime);
282 fprintf(fid,"# cpu_clock : %e Hz\n", cpu_clock);
283 fprintf(fid,"# cpu_clock determined: %s\n", cpu_clock_detect ? "estimated" : "specified");
284 fprintf(fid,"# num_trials : %lu\n", num_base_trials);
285 fprintf(fid,"#\n");
286 fprintf(fid,"# %-5s %-30s %12s %12s %12s %12s\n",
287 "id", "name", "num trials", "ex.time [s]", "rate [t/s]", "[cycles/t]");
288
289 for (i=0; i<NUM_AUTOSCRIPTS; i++) {
290 if (scripts[i].num_trials > 0)
291 output_benchmark_to_file(fid, &scripts[i]);
292 }
293
294 fclose(fid);
295 printf("results written to %s\n", filename);
296 }
297
298 return 0;
299 }
300
301 // run basic benchmark to estimate CPU clock frequency
estimate_cpu_clock(void)302 void estimate_cpu_clock(void)
303 {
304 printf(" estimating cpu clock frequency...\n");
305 unsigned long int i, n = 1<<4;
306 struct rusage start, finish;
307 double extime;
308
309 // run trials until execution time threshold is exceeded
310 do {
311 // trials
312 n <<= 1;
313
314 // NOTE: Smart compilers will realize that this loop doesn't really do
315 // anything, so they won't actually compute anything. We need to
316 // actually do something interesting here to trick the compiler
317 // into actually crunching these numbers, and then later display
318 // the results, even if they're meaningless
319 unsigned int k = 366001; // large prime number
320 unsigned int g = 184903; // another large prime number
321 unsigned int s = 1;
322 getrusage(RUSAGE_SELF, &start);
323 for (i=0; i<n; i++) {
324 // perform mindless task
325 s = (s*k) % g;
326 }
327 getrusage(RUSAGE_SELF, &finish);
328
329 extime = calculate_execution_time(start, finish);
330
331 // print results to screen
332 // NOTE: it is necessary to do something with the variable 's' so that
333 // the compiler will actually run the above loop
334 printf("%12lu trials in %8.3f ms, s = %6u\n", n, extime*1e3, s);
335 } while (extime < 0.5 && n < (1<<28));
336
337 // estimate cpu clock frequency
338 cpu_clock = 9.5 * n / extime;
339
340 printf(" performed %ld trials in %5.1f ms\n", n, extime * 1e3);
341
342 float clock_format = cpu_clock;
343 char clock_units = convert_units(&clock_format);
344 printf(" estimated clock speed: %7.3f %cHz\n", clock_format, clock_units);
345 }
346
set_num_trials_from_cpu_speed(void)347 void set_num_trials_from_cpu_speed(void)
348 {
349 unsigned long int min_trials = 256;
350 num_base_trials = (unsigned long int) ( cpu_clock / 10e3 );
351 num_base_trials = (num_base_trials < min_trials) ? min_trials : num_base_trials;
352
353 printf(" setting number of base trials to %ld\n", num_base_trials);
354 }
355
execute_benchmark(benchmark_t * _benchmark,int _verbose)356 void execute_benchmark(benchmark_t* _benchmark, int _verbose)
357 {
358 unsigned long int n = num_base_trials;
359 struct rusage start, finish;
360
361 unsigned int num_attempts = 0;
362 unsigned long int num_trials;
363 do {
364 // increment number of attempts
365 num_attempts++;
366
367 // set number of trials and run benchmark
368 num_trials = n;
369 _benchmark->api(&start, &finish, &num_trials);
370 _benchmark->extime = calculate_execution_time(start, finish);
371
372 // check exit criteria
373 if (_benchmark->extime >= runtime) {
374 break;
375 } else if (num_attempts == 30) {
376 fprintf(stderr,"warning: benchmark could not execute over minimum run time\n");
377 break;
378 } else {
379 // increase number of trials
380 n *= 2;
381 }
382 } while (1);
383
384 _benchmark->num_trials = num_trials;
385 _benchmark->rate = (float)(_benchmark->num_trials) / _benchmark->extime;
386 _benchmark->cycles_per_trial = cpu_clock / (_benchmark->rate);
387
388 if (_verbose)
389 print_benchmark_results(_benchmark);
390 }
391
execute_package(package_t * _package,int _verbose)392 void execute_package(package_t* _package, int _verbose)
393 {
394 if (_verbose)
395 printf("%u: %s\n", _package->id, _package->name);
396
397 unsigned int i;
398 for (i=0; i<_package->num_scripts; i++) {
399 execute_benchmark( &scripts[ i + _package->index ], _verbose );
400 }
401 }
402
403 // convert raw value into metric units,
404 // example: "0.01397s" -> "13.97 ms"
convert_units(float * _v)405 char convert_units(float * _v)
406 {
407 char unit;
408 if (*_v < 1e-9) { (*_v) *= 1e12; unit = 'p';}
409 else if (*_v < 1e-6){ (*_v) *= 1e9; unit = 'n';}
410 else if (*_v < 1e-3){ (*_v) *= 1e6; unit = 'u';}
411 else if (*_v < 1e+0){ (*_v) *= 1e3; unit = 'm';}
412 else if (*_v < 1e3) { (*_v) *= 1e+0; unit = ' ';}
413 else if (*_v < 1e6) { (*_v) *= 1e-3; unit = 'k';}
414 else if (*_v < 1e9) { (*_v) *= 1e-6; unit = 'M';}
415 else if (*_v < 1e12){ (*_v) *= 1e-9; unit = 'G';}
416 else { (*_v) *= 1e-12; unit = 'T';}
417
418 return unit;
419 }
420
print_benchmark_results(benchmark_t * _b)421 void print_benchmark_results(benchmark_t* _b)
422 {
423 // format trials (iterations)
424 float trials_format = (float)(_b->num_trials);
425 char trials_units = convert_units(&trials_format);
426
427 // format time (seconds)
428 float extime_format = _b->extime;
429 char extime_units = convert_units(&extime_format);
430
431 // format rate (trials/second)
432 float rate_format = _b->rate;
433 char rate_units = convert_units(&rate_format);
434
435 // format processor efficiency (cycles/trial)
436 float cycles_format = _b->cycles_per_trial;
437 char cycles_units = convert_units(&cycles_format);
438
439 printf(" %-3u: %-30s: %6.2f %c trials / %6.2f %cs (%6.2f %c t/s, %6.2f %c c/t)\n",
440 _b->id, _b->name,
441 trials_format, trials_units,
442 extime_format, extime_units,
443 rate_format, rate_units,
444 cycles_format, cycles_units);
445 }
446
print_package_results(package_t * _package)447 void print_package_results(package_t* _package)
448 {
449 unsigned int i;
450 printf("%u: %s:\n", _package->id, _package->name);
451 for (i=_package->index; i<(_package->index+_package->num_scripts); i++)
452 print_benchmark_results( &scripts[i] );
453
454 printf("\n");
455 }
456
calculate_execution_time(struct rusage _start,struct rusage _finish)457 double calculate_execution_time(struct rusage _start, struct rusage _finish)
458 {
459 return _finish.ru_utime.tv_sec - _start.ru_utime.tv_sec
460 + 1e-6*(_finish.ru_utime.tv_usec - _start.ru_utime.tv_usec)
461 + _finish.ru_stime.tv_sec - _start.ru_stime.tv_sec
462 + 1e-6*(_finish.ru_stime.tv_usec - _start.ru_stime.tv_usec);
463 }
464
output_benchmark_to_file(FILE * _fid,benchmark_t * _benchmark)465 void output_benchmark_to_file(FILE * _fid, benchmark_t * _benchmark)
466 {
467 fprintf(_fid," %-5u %-30s %12u %12.4e %12.4e %12.4e\n",
468 _benchmark->id,
469 _benchmark->name,
470 _benchmark->num_trials,
471 _benchmark->extime,
472 _benchmark->rate,
473 _benchmark->cycles_per_trial);
474 }
475
476