1 /*
2  * Copyright (c) 2007 - 2017 Joseph Gaeddert
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20  * THE SOFTWARE.
21  */
22 
23 // bench.c
24 //
25 // This file is used in conjunction with benchinclude.h (generated with
26 // scripts/autoscript) to produce an executable for benchmarking the various
27 // signal processing algorithms in liquid.
28 //
29 
30 
31 // default include headers
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <getopt.h>
35 #include <string.h>
36 #include <math.h>
37 #include <sys/resource.h>
38 
39 // define benchmark function pointer
40 typedef void(benchmark_function_t) (
41     struct rusage *_start,
42     struct rusage *_finish,
43     unsigned long int *_num_iterations);
44 
45 // define benchmark_t
46 typedef struct {
47     unsigned int id;
48     benchmark_function_t * api;
49     const char* name;
50     unsigned int name_len;
51     unsigned int num_trials;
52     float extime;
53     float rate;
54     float cycles_per_trial;
55 } benchmark_t;
56 
57 // define package_t
58 typedef struct {
59     unsigned int id;            // package identification
60     unsigned int index;         // index of first benchmark
61     unsigned int num_scripts;   // number of tests in package
62     const char* name;           // package name
63 } package_t;
64 
65 // include auto-generated benchmark header
66 //
67 // defines the following symbols:
68 //   #define AUTOSCRIPT_VERSION
69 //   #define NUM_AUTOSCRIPTS
70 //   benchmark_t scripts[NUM_AUTOSCRIPTS]
71 //   #define NUM_PACKAGES
72 //   package_t packages[NUM_PACKAGES]
73 #include "benchmark_include.h"
74 
75 // helper functions:
76 void estimate_cpu_clock(void);
77 void set_num_trials_from_cpu_speed(void);
78 void execute_benchmark(benchmark_t* _benchmark, int _verbose);
79 void execute_package(package_t* _package, int _verbose);
80 
81 char convert_units(float * _s);
82 void print_benchmark_results(benchmark_t* _benchmark);
83 void print_package_results(package_t* _package);
84 double calculate_execution_time(struct rusage, struct rusage);
85 
86 unsigned long int num_base_trials = 1<<12;
87 float cpu_clock = 1.0f; // cpu clock speed (Hz)
88 float runtime=0.100f;   // minimum run time (s)
89 
90 FILE * fid; // output file id
91 void output_benchmark_to_file(FILE * _fid, benchmark_t * _benchmark);
92 
usage()93 void usage()
94 {
95     // help
96     printf("Usage: benchmark [OPTION]\n");
97     printf("Execute benchmark scripts for liquid-dsp library.\n");
98     printf("  -h           : display this help and exit\n");
99     printf("  -v           : verbose\n");
100     printf("  -q           : quiet\n");
101     printf("  -e           : estimate cpu clock frequency and exit\n");
102     printf("  -c           : set cpu clock frequency (Hz)\n");
103     printf("  -n <trials>  : set number of base trials\n");
104     printf("  -p <package> : run specific package\n");
105     printf("  -b <bench>   : run specific benchmark\n");
106     printf("  -t <seconds> : set minimum execution time (s)\n");
107     printf("  -l           : list available packages\n");
108     printf("  -L           : list all available scripts\n");
109     printf("  -s <search>  : run all packages/benchmarks matching search string\n");
110     printf("  -o <file>    : export output\n");
111 }
112 
113 // main function
main(int argc,char * argv[])114 int main(int argc, char *argv[])
115 {
116     // initialize timing variables
117     unsigned int i, j;
118 
119     // options
120     enum {RUN_ALL,
121           RUN_SINGLE_BENCH,
122           RUN_SINGLE_PACKAGE,
123           RUN_SEARCH,
124     } mode = RUN_ALL;
125     unsigned int benchmark_id = 0;
126     unsigned int package_id = 0;
127     int verbose = 1;
128     int autoscale = 1;
129     int cpu_clock_detect = 1;
130     int output_to_file = 0;
131     char filename[128];
132     char search_string[128];
133 
134     // get input options
135     int d;
136     while((d = getopt(argc,argv,"hvqec:n:b:p:t:lLs:o:")) != EOF){
137         switch (d) {
138         case 'h':   usage();        return 0;
139         case 'v':   verbose = 1;    break;
140         case 'q':   verbose = 0;    break;
141         case 'e':
142             estimate_cpu_clock();
143             return 0;
144         case 'c':
145             cpu_clock = atof(optarg);
146             if (cpu_clock < 0) {
147                 printf("error: cpu clock speed is negative (%f)\n", cpu_clock);
148                 return -1;
149             }
150             cpu_clock_detect = 0;
151             break;
152         case 'n':
153             num_base_trials = atoi(optarg);
154             autoscale = 0;
155             break;
156         case 'b':
157             benchmark_id = atoi(optarg);
158             if (benchmark_id >= NUM_AUTOSCRIPTS) {
159                 printf("error, cannot run benchmark %u; index exceeded\n", benchmark_id);
160                 return -1;
161             } else {
162                 mode = RUN_SINGLE_BENCH;
163             }
164             break;
165         case 'p':
166             package_id = atoi(optarg);
167             if (package_id >= NUM_PACKAGES) {
168                 printf("error, cannot run package %u; index exceeded\n", package_id);
169                 return -1;
170             } else {
171                 mode = RUN_SINGLE_PACKAGE;
172             }
173             break;
174         case 't':
175             runtime = atof(optarg);
176             if (runtime < 1e-3f)     runtime = 1e-3f;
177             else if (runtime > 10.f) runtime = 10.0f;
178             printf("minimum runtime: %d ms\n", (int) roundf(runtime*1e3));
179             break;
180         case 'l':
181             // list only packages and exit
182             for (i=0; i<NUM_PACKAGES; i++)
183                 printf("%u: %s\n", packages[i].id, packages[i].name);
184             return 0;
185         case 'L':
186             // list packages, scripts and exit
187             for (i=0; i<NUM_PACKAGES; i++) {
188                 printf("%u: %s\n", packages[i].id, packages[i].name);
189                 for (j=packages[i].index; j<packages[i].num_scripts+packages[i].index; j++)
190                     printf("    %-3u: %-22s\n", scripts[j].id, scripts[j].name);
191             }
192             return 0;
193         case 's':
194             mode = RUN_SEARCH;
195             strncpy(search_string, optarg, 128);
196             search_string[127] = '\0';
197             break;
198         case 'o':
199             output_to_file = 1;
200             strcpy(filename, optarg);
201             break;
202         default:
203             usage();
204             return 0;
205         }
206     }
207 
208     // run empty loop; a bug was found that sometimes the first package run
209     // resulted in a longer execution time than what the benchmark really
210     // reflected.  This loop prevents that from happening.
211     for (i=0; i<1e6; i++) {
212         // do nothing
213     }
214 
215     if (cpu_clock_detect)
216         estimate_cpu_clock();
217 
218     if (autoscale)
219         set_num_trials_from_cpu_speed();
220 
221     switch (mode) {
222     case RUN_ALL:
223         for (i=0; i<NUM_PACKAGES; i++)
224             execute_package( &packages[i], verbose );
225 
226         //for (i=0; i<NUM_PACKAGES; i++)
227         //    print_package_results( &packages[i] );
228         break;
229     case RUN_SINGLE_BENCH:
230         execute_benchmark( &scripts[benchmark_id], verbose );
231         //print_benchmark_results( &scripts[benchmark_id] );
232         break;
233     case RUN_SINGLE_PACKAGE:
234         execute_package( &packages[package_id], verbose );
235         //print_package_results( &packages[package_id] );
236         break;
237     case RUN_SEARCH:
238         printf("running all packages and benchmarks matching '%s'...\n", search_string);
239         for (i=0; i<NUM_PACKAGES; i++) {
240             // see if search string matches package name
241             if (strstr(packages[i].name, search_string) != NULL) {
242                 // run the package
243                 execute_package( &packages[i], verbose );
244             }
245         }
246         printf("running all remaining scripts matching '%s'...\n", search_string);
247         for (i=0; i<NUM_AUTOSCRIPTS; i++) {
248             // see if search string matches benchmark name
249             if (strstr(scripts[i].name, search_string) != NULL && scripts[i].num_trials == 0) {
250                 // run the benchmark
251                 execute_benchmark( &scripts[i], verbose );
252             }
253         }
254         break;
255     default:
256         fprintf(stderr,"invalid mode\n");
257         exit(1);
258     }
259 
260     if (output_to_file) {
261         fid = fopen(filename,"w");
262         if (!fid) {
263             printf("error: could not open file %s for writing\n", filename);
264             return 1;
265         }
266 
267         // print header
268         fprintf(fid,"# %s : auto-generated file (autoscript version %s)\n", filename, AUTOSCRIPT_VERSION);
269         fprintf(fid,"#\n");
270         fprintf(fid,"# invoked as:\n");
271         fprintf(fid,"#   ");
272         for (i=0; i<argc; i++)
273             fprintf(fid," %s", argv[i]);
274         fprintf(fid,"\n");
275         fprintf(fid,"#\n");
276         fprintf(fid,"# properties:\n");
277         fprintf(fid,"#  verbose             :   %s\n", verbose ? "true" : "false");
278         fprintf(fid,"#  autoscale           :   %s\n", autoscale ? "true" : "false");
279         fprintf(fid,"#  cpu_clock_detect    :   %s\n", cpu_clock_detect ? "true" : "false");
280         fprintf(fid,"#  search string       :   '%s'\n", mode == RUN_SEARCH ? search_string : "");
281         fprintf(fid,"#  runtime             :   %12.8f s\n", runtime);
282         fprintf(fid,"#  cpu_clock           :   %e Hz\n", cpu_clock);
283         fprintf(fid,"#  cpu_clock determined:   %s\n", cpu_clock_detect ? "estimated" : "specified");
284         fprintf(fid,"#  num_trials          :   %lu\n", num_base_trials);
285         fprintf(fid,"#\n");
286         fprintf(fid,"# %-5s %-30s %12s %12s %12s %12s\n",
287                 "id", "name", "num trials", "ex.time [s]", "rate [t/s]", "[cycles/t]");
288 
289         for (i=0; i<NUM_AUTOSCRIPTS; i++) {
290             if (scripts[i].num_trials > 0)
291                 output_benchmark_to_file(fid, &scripts[i]);
292         }
293 
294         fclose(fid);
295         printf("results written to %s\n", filename);
296     }
297 
298     return 0;
299 }
300 
301 // run basic benchmark to estimate CPU clock frequency
estimate_cpu_clock(void)302 void estimate_cpu_clock(void)
303 {
304     printf("  estimating cpu clock frequency...\n");
305     unsigned long int i, n = 1<<4;
306     struct rusage start, finish;
307     double extime;
308 
309     // run trials until execution time threshold is exceeded
310     do {
311         // trials
312         n <<= 1;
313 
314         // NOTE: Smart compilers will realize that this loop doesn't really do
315         //       anything, so they won't actually compute anything. We need to
316         //       actually do something interesting here to trick the compiler
317         //       into actually crunching these numbers, and then later display
318         //       the results, even if they're meaningless
319         unsigned int k = 366001;    // large prime number
320         unsigned int g = 184903;    // another large prime number
321         unsigned int s = 1;
322         getrusage(RUSAGE_SELF, &start);
323         for (i=0; i<n; i++) {
324             // perform mindless task
325             s = (s*k) % g;
326         }
327         getrusage(RUSAGE_SELF, &finish);
328 
329         extime = calculate_execution_time(start, finish);
330 
331         // print results to screen
332         // NOTE: it is necessary to do something with the variable 's' so that
333         //       the compiler will actually run the above loop
334         printf("%12lu trials in %8.3f ms, s = %6u\n", n, extime*1e3, s);
335     } while (extime < 0.5 && n < (1<<28));
336 
337     // estimate cpu clock frequency
338     cpu_clock = 9.5 * n / extime;
339 
340     printf("  performed %ld trials in %5.1f ms\n", n, extime * 1e3);
341 
342     float clock_format = cpu_clock;
343     char clock_units = convert_units(&clock_format);
344     printf("  estimated clock speed: %7.3f %cHz\n", clock_format, clock_units);
345 }
346 
set_num_trials_from_cpu_speed(void)347 void set_num_trials_from_cpu_speed(void)
348 {
349     unsigned long int min_trials = 256;
350     num_base_trials = (unsigned long int) ( cpu_clock / 10e3 );
351     num_base_trials = (num_base_trials < min_trials) ? min_trials : num_base_trials;
352 
353     printf("  setting number of base trials to %ld\n", num_base_trials);
354 }
355 
execute_benchmark(benchmark_t * _benchmark,int _verbose)356 void execute_benchmark(benchmark_t* _benchmark, int _verbose)
357 {
358     unsigned long int n = num_base_trials;
359     struct rusage start, finish;
360 
361     unsigned int num_attempts = 0;
362     unsigned long int num_trials;
363     do {
364         // increment number of attempts
365         num_attempts++;
366 
367         // set number of trials and run benchmark
368         num_trials = n;
369         _benchmark->api(&start, &finish, &num_trials);
370         _benchmark->extime = calculate_execution_time(start, finish);
371 
372         // check exit criteria
373         if (_benchmark->extime >= runtime) {
374             break;
375         } else if (num_attempts == 30) {
376             fprintf(stderr,"warning: benchmark could not execute over minimum run time\n");
377             break;
378         } else {
379             // increase number of trials
380             n *= 2;
381         }
382     } while (1);
383 
384     _benchmark->num_trials = num_trials;
385     _benchmark->rate = (float)(_benchmark->num_trials) / _benchmark->extime;
386     _benchmark->cycles_per_trial = cpu_clock / (_benchmark->rate);
387 
388     if (_verbose)
389         print_benchmark_results(_benchmark);
390 }
391 
execute_package(package_t * _package,int _verbose)392 void execute_package(package_t* _package, int _verbose)
393 {
394     if (_verbose)
395         printf("%u: %s\n", _package->id, _package->name);
396 
397     unsigned int i;
398     for (i=0; i<_package->num_scripts; i++) {
399         execute_benchmark( &scripts[ i + _package->index ], _verbose );
400     }
401 }
402 
403 // convert raw value into metric units,
404 //   example: "0.01397s" -> "13.97 ms"
convert_units(float * _v)405 char convert_units(float * _v)
406 {
407     char unit;
408     if (*_v < 1e-9)     {   (*_v) *= 1e12;  unit = 'p';}
409     else if (*_v < 1e-6){   (*_v) *= 1e9;   unit = 'n';}
410     else if (*_v < 1e-3){   (*_v) *= 1e6;   unit = 'u';}
411     else if (*_v < 1e+0){   (*_v) *= 1e3;   unit = 'm';}
412     else if (*_v < 1e3) {   (*_v) *= 1e+0;  unit = ' ';}
413     else if (*_v < 1e6) {   (*_v) *= 1e-3;  unit = 'k';}
414     else if (*_v < 1e9) {   (*_v) *= 1e-6;  unit = 'M';}
415     else if (*_v < 1e12){   (*_v) *= 1e-9;  unit = 'G';}
416     else                {   (*_v) *= 1e-12; unit = 'T';}
417 
418     return unit;
419 }
420 
print_benchmark_results(benchmark_t * _b)421 void print_benchmark_results(benchmark_t* _b)
422 {
423     // format trials (iterations)
424     float trials_format = (float)(_b->num_trials);
425     char trials_units = convert_units(&trials_format);
426 
427     // format time (seconds)
428     float extime_format = _b->extime;
429     char extime_units = convert_units(&extime_format);
430 
431     // format rate (trials/second)
432     float rate_format = _b->rate;
433     char rate_units = convert_units(&rate_format);
434 
435     // format processor efficiency (cycles/trial)
436     float cycles_format = _b->cycles_per_trial;
437     char cycles_units = convert_units(&cycles_format);
438 
439     printf("  %-3u: %-30s: %6.2f %c trials / %6.2f %cs (%6.2f %c t/s, %6.2f %c c/t)\n",
440         _b->id, _b->name,
441         trials_format, trials_units,
442         extime_format, extime_units,
443         rate_format, rate_units,
444         cycles_format, cycles_units);
445 }
446 
print_package_results(package_t * _package)447 void print_package_results(package_t* _package)
448 {
449     unsigned int i;
450     printf("%u: %s:\n", _package->id, _package->name);
451     for (i=_package->index; i<(_package->index+_package->num_scripts); i++)
452         print_benchmark_results( &scripts[i] );
453 
454     printf("\n");
455 }
456 
calculate_execution_time(struct rusage _start,struct rusage _finish)457 double calculate_execution_time(struct rusage _start, struct rusage _finish)
458 {
459     return _finish.ru_utime.tv_sec - _start.ru_utime.tv_sec
460         + 1e-6*(_finish.ru_utime.tv_usec - _start.ru_utime.tv_usec)
461         + _finish.ru_stime.tv_sec - _start.ru_stime.tv_sec
462         + 1e-6*(_finish.ru_stime.tv_usec - _start.ru_stime.tv_usec);
463 }
464 
output_benchmark_to_file(FILE * _fid,benchmark_t * _benchmark)465 void output_benchmark_to_file(FILE * _fid, benchmark_t * _benchmark)
466 {
467     fprintf(_fid,"  %-5u %-30s %12u %12.4e %12.4e %12.4e\n",
468                  _benchmark->id,
469                  _benchmark->name,
470                  _benchmark->num_trials,
471                  _benchmark->extime,
472                  _benchmark->rate,
473                  _benchmark->cycles_per_trial);
474 }
475 
476