1 #include "RunGen.h"
2
3 using namespace Halide::RunGen;
4 using Halide::Tools::BenchmarkConfig;
5
6 namespace {
7
8 struct RegisteredFilter {
9 struct RegisteredFilter *next;
10 int (*filter_argv_call)(void **);
11 const struct halide_filter_metadata_t *filter_metadata;
12 };
13
14 RegisteredFilter *registered_filters = nullptr;
15
halide_register_argv_and_metadata(int (* filter_argv_call)(void **),const struct halide_filter_metadata_t * filter_metadata,const char * const * extra_key_value_pairs)16 extern "C" void halide_register_argv_and_metadata(
17 int (*filter_argv_call)(void **),
18 const struct halide_filter_metadata_t *filter_metadata,
19 const char *const *extra_key_value_pairs) {
20
21 auto *rf = new RegisteredFilter();
22 rf->next = registered_filters;
23 rf->filter_argv_call = filter_argv_call;
24 rf->filter_metadata = filter_metadata;
25 // RunGen ignores extra_key_value_pairs
26 registered_filters = rf;
27 }
28
replace_all(const std::string & str,const std::string & find,const std::string & replace)29 std::string replace_all(const std::string &str,
30 const std::string &find,
31 const std::string &replace) {
32 size_t pos = 0;
33 std::string result = str;
34 while ((pos = result.find(find, pos)) != std::string::npos) {
35 result.replace(pos, find.length(), replace);
36 pos += replace.length();
37 }
38 return result;
39 }
40
usage(const char * argv0)41 void usage(const char *argv0) {
42 const std::string usage = R"USAGE(
43 Usage: $NAME$ argument=value [argument=value... ] [flags]
44
45 Arguments:
46
47 Specify the Generator's input and output values by name, in any order.
48
49 Scalar inputs are specified in the obvious syntax, e.g.
50
51 some_int=42 some_float=3.1415
52
53 You can also use the text `default` or `estimate` to use the default or
54 estimate value of the given input, respectively. (You can join these by
55 commas to give default-then-estimate or estimate-then-default behaviors.)
56
57 Buffer inputs and outputs are specified by pathname:
58
59 some_input_buffer=/path/to/existing/file.png
60 some_output_buffer=/path/to/create/output/file.png
61
62 We currently support JPG, PGM, PNG, PPM format. If the type or dimensions
63 of the input or output file type can't support the data (e.g., your filter
64 uses float32 input and output, and you load/save to PNG), we'll use the most
65 robust approximation within the format and issue a warning to stdout.
66
67 (We anticipate adding other image formats in the future, in particular,
68 TIFF and TMP.)
69
70 For inputs, there are also "pseudo-file" specifiers you can use; currently
71 supported are
72
73 zero:[NUM,NUM,...]
74
75 This input should be an image with the given extents, and all elements
76 set to zero of the appropriate type. (This is useful for benchmarking
77 filters that don't have performance variances with different data.)
78
79 constant:VALUE:[NUM,NUM,...]
80
81 Like zero, but allows an arbitrary value of the input's type.
82
83 identity:[NUM,NUM,...]
84
85 This input should be an image with the given extents, where diagonal
86 elements are set to one of the appropriate type, and the rest are zero.
87 Diagonal elements are those whose first two coordinates are equal.
88
89 random:SEED:[NUM,NUM,...]
90
91 This input should be an image with the given extents, and all elements
92 set to a random value of the appropriate type. The random values will
93 be constructed using the mt19937_64 engine, using the given seed;
94 all floating point values will be in a uniform distribution between
95 0.0 and 1.0, while integral values will be uniform across the entire
96 range of the type.
97
98 (We anticipate adding other pseudo-file inputs in the future, e.g.
99 various random distributions, gradients, rainbows, etc.)
100
101 In place of [NUM,NUM,...] for boundary, you may specify 'auto'; this
102 will run a bounds-query to choose a legal input size given the output
103 size constraints. (In general, this is useful only when also using
104 the --output_extents flag.)
105
106 In place of [NUM,NUM,...] for boundary, you may specify 'estimate';
107 this will use the estimated bounds specified in the code.
108
109 Flags:
110
111 --describe:
112 print names and types of all arguments to stdout and exit.
113
114 --output_extents=[NUM,NUM,...]
115 By default, we attempt to calculate a reasonable size for the output
116 buffers, based on the size of the input buffers and bounds query; if we
117 guess wrong, or you want to explicitly specify the desired output size,
118 you can specify the extent of each dimension with this flag:
119
120 --output_extents=[1000,100] # 2 dimensions: w=1000 h = 100
121 --output_extents=[100,200,3] # 3 dimensions: w=100 h=200 c=3
122
123 Note that if there are multiple outputs, all will be constrained
124 to this shape.
125
126 --verbose:
127 emit extra diagnostic output.
128
129 --quiet:
130 Don't log calls to halide_print() to stdout.
131
132 --benchmarks=all:
133 Run the filter with the given arguments many times to
134 produce an estimate of average execution time; this currently
135 runs "samples" sets of "iterations" each, and chooses the fastest
136 sample set.
137
138 --benchmark_min_time=DURATION_SECONDS [default = 0.1]:
139 Override the default minimum desired benchmarking time; ignored if
140 --benchmarks is not also specified.
141
142 --track_memory:
143 Override Halide memory allocator to track high-water mark of memory
144 allocation during run; note that this may slow down execution, so
145 benchmarks may be inaccurate if you combine --benchmark with this.
146
147 --default_input_buffers=VALUE:
148 Specify the value for all otherwise-unspecified buffer inputs, in the
149 same syntax in use above. If you omit =VALUE, "zero:auto" will be used.
150
151 --default_input_scalars=VALUE:
152 Specify the value for all otherwise-unspecified scalar inputs, in the
153 same syntax in use above. If you omit =VALUE, "estimate,default"
154 will be used.
155
156 --parsable_output:
157 Final output is emitted in an easy-to-parse output (one value per line),
158 rather than easy-for-humans.
159
160 --estimate_all:
161 Request that all inputs and outputs are based on estimate,
162 and fill buffers with random values. This is exactly equivalent to
163 specifying
164
165 --default_input_buffers=estimate_then_auto
166 --default_input_scalars=estimate
167 --output_extents=estimate
168
169 and is a convenience for automated benchmarking.
170
171 Known Issues:
172
173 * Filters running on GPU (vs CPU) have not been tested.
174 * Filters using buffer layouts other than planar (e.g. interleaved/chunky)
175 may be buggy.
176
177 )USAGE";
178
179 std::string basename = split_string(replace_all(argv0, "\\", "/"), "/").back();
180 std::cout << replace_all(usage, "$NAME$", basename);
181 }
182
183 // Utility class for installing memory-tracking machinery into the Halide runtime
184 // when --track_memory is specified.
185 class HalideMemoryTracker {
186 static HalideMemoryTracker *active;
187
188 std::mutex tracker_mutex;
189
190 // Total current CPU memory allocated via halide_malloc.
191 // Access controlled by tracker_mutex.
192 uint64_t memory_allocated{0};
193
194 // High-water mark of CPU memory allocated since program start
195 // (or last call to get_cpu_memory_highwater_reset).
196 // Access controlled by tracker_mutex.
197 uint64_t memory_highwater{0};
198
199 // Map of outstanding allocation sizes.
200 // Access controlled by tracker_mutex.
201 std::map<void *, size_t> memory_size_map;
202
tracker_malloc_impl(void * user_context,size_t x)203 void *tracker_malloc_impl(void *user_context, size_t x) {
204 std::lock_guard<std::mutex> lock(tracker_mutex);
205
206 void *ptr = halide_default_malloc(user_context, x);
207
208 memory_allocated += x;
209 if (memory_highwater < memory_allocated) {
210 memory_highwater = memory_allocated;
211 }
212 if (memory_size_map.find(ptr) != memory_size_map.end()) {
213 halide_error(user_context, "Tracking error in tracker_malloc");
214 }
215 memory_size_map[ptr] = x;
216
217 return ptr;
218 }
219
tracker_free_impl(void * user_context,void * ptr)220 void tracker_free_impl(void *user_context, void *ptr) {
221 std::lock_guard<std::mutex> lock(tracker_mutex);
222 auto it = memory_size_map.find(ptr);
223 if (it == memory_size_map.end()) {
224 halide_error(user_context, "Tracking error in tracker_free");
225 }
226 size_t x = it->second;
227 memory_allocated -= x;
228 memory_size_map.erase(it);
229 halide_default_free(user_context, ptr);
230 }
231
tracker_malloc(void * user_context,size_t x)232 static void *tracker_malloc(void *user_context, size_t x) {
233 return active->tracker_malloc_impl(user_context, x);
234 }
235
tracker_free(void * user_context,void * ptr)236 static void tracker_free(void *user_context, void *ptr) {
237 return active->tracker_free_impl(user_context, ptr);
238 }
239
240 public:
install()241 void install() {
242 assert(!active);
243 active = this;
244 halide_set_custom_malloc(tracker_malloc);
245 halide_set_custom_free(tracker_free);
246 }
247
allocated()248 uint64_t allocated() {
249 std::lock_guard<std::mutex> lock(tracker_mutex);
250 return memory_allocated;
251 }
252
highwater()253 uint64_t highwater() {
254 std::lock_guard<std::mutex> lock(tracker_mutex);
255 return memory_highwater;
256 }
257
highwater_reset()258 void highwater_reset() {
259 std::lock_guard<std::mutex> lock(tracker_mutex);
260 memory_highwater = memory_allocated;
261 }
262 };
263
264 /* static */ HalideMemoryTracker *HalideMemoryTracker::active{nullptr};
265
266 bool log_info = false;
267 bool log_warn = true;
268
do_log_cout(const std::string & s)269 void do_log_cout(const std::string &s) {
270 std::cout << s;
271 }
272
do_log_cerr(const std::string & s)273 void do_log_cerr(const std::string &s) {
274 std::cerr << s;
275 }
276
do_log_info(const std::string & s)277 void do_log_info(const std::string &s) {
278 if (log_info) {
279 do_log_cerr(s);
280 }
281 }
282
do_log_warn(const std::string & s)283 void do_log_warn(const std::string &s) {
284 if (log_warn) {
285 do_log_cerr("Warning: " + s);
286 }
287 }
288
do_log_fail(const std::string & s)289 void do_log_fail(const std::string &s) {
290 do_log_cerr(s);
291 abort();
292 }
293
294 } // namespace
295
296 namespace Halide {
297 namespace RunGen {
298
log()299 Logger log() {
300 return {do_log_cout, do_log_info, do_log_warn, do_log_fail};
301 }
302
303 } // namespace RunGen
304 } // namespace Halide
305
main(int argc,char ** argv)306 int main(int argc, char **argv) {
307 if (argc <= 1) {
308 usage(argv[0]);
309 return 0;
310 }
311
312 if (registered_filters == nullptr) {
313 std::cerr << "No filters registered. Compile RunGenMain.cpp along with at least one 'registration' output from a generator.\n";
314 return -1;
315 }
316
317 // Look for --name
318 std::string filter_name;
319 for (int i = 1; i < argc; ++i) {
320 if (argv[i][0] == '-') {
321 const char *p = argv[i] + 1; // skip -
322 if (p[0] == '-') {
323 p++; // allow -- as well, because why not
324 }
325 std::vector<std::string> v = split_string(p, "=");
326 std::string flag_name = v[0];
327 std::string flag_value = v.size() > 1 ? v[1] : "";
328 if (v.size() > 2) {
329 fail() << "Invalid argument: " << argv[i];
330 }
331 if (flag_name != "name") {
332 continue;
333 }
334 if (!filter_name.empty()) {
335 fail() << "--name cannot be specified twice.";
336 }
337 filter_name = flag_value;
338 if (filter_name.empty()) {
339 fail() << "--name cannot be empty.";
340 }
341 }
342 }
343
344 auto *rf = registered_filters;
345 if (filter_name.empty()) {
346 // Just choose the first one.
347 if (rf->next != nullptr) {
348 std::ostringstream o;
349 o << "Must specify --name if multiple filters are registered; registered filters are:\n";
350 for (auto *rf = registered_filters; rf != nullptr; rf = rf->next) {
351 o << " " << rf->filter_metadata->name << "\n";
352 }
353 o << "\n";
354 fail() << o.str();
355 }
356 } else {
357 for (; rf != nullptr; rf = rf->next) {
358 if (filter_name == rf->filter_metadata->name) {
359 break;
360 }
361 }
362 if (rf == nullptr) {
363 std::ostringstream o;
364 o << "Filter " << filter_name << " not found; registered filters are:\n";
365 for (auto *rf = registered_filters; rf != nullptr; rf = rf->next) {
366 o << " " << rf->filter_metadata->name << "\n";
367 }
368 o << "\n";
369 fail() << o.str();
370 }
371 }
372
373 RunGen r(rf->filter_argv_call, rf->filter_metadata);
374
375 std::string user_specified_output_shape;
376 std::set<std::string> seen_args;
377 bool benchmark = false;
378 bool track_memory = false;
379 bool describe = false;
380 double benchmark_min_time = BenchmarkConfig().min_time;
381 std::string default_input_buffers;
382 std::string default_input_scalars;
383 std::string benchmarks_flag_value;
384 for (int i = 1; i < argc; ++i) {
385 if (argv[i][0] == '-') {
386 const char *p = argv[i] + 1; // skip -
387 if (p[0] == '-') {
388 p++; // allow -- as well, because why not
389 }
390 std::vector<std::string> v = split_string(p, "=");
391 std::string flag_name = v[0];
392 std::string flag_value = v.size() > 1 ? v[1] : "";
393 if (v.size() > 2) {
394 fail() << "Invalid argument: " << argv[i];
395 }
396 if (flag_name == "name") {
397 continue;
398 } else if (flag_name == "verbose") {
399 if (flag_value.empty()) {
400 flag_value = "true";
401 }
402 if (!parse_scalar(flag_value, &log_info)) {
403 fail() << "Invalid value for flag: " << flag_name;
404 }
405 } else if (flag_name == "quiet") {
406 if (flag_value.empty()) {
407 flag_value = "true";
408 }
409 bool quiet;
410 if (!parse_scalar(flag_value, &quiet)) {
411 fail() << "Invalid value for flag: " << flag_name;
412 }
413 r.set_quiet(quiet);
414 } else if (flag_name == "parsable_output") {
415 if (flag_value.empty()) {
416 flag_value = "true";
417 }
418 bool parsable_output;
419 if (!parse_scalar(flag_value, &parsable_output)) {
420 fail() << "Invalid value for flag: " << flag_name;
421 }
422 r.set_parsable_output(parsable_output);
423 } else if (flag_name == "describe") {
424 if (flag_value.empty()) {
425 flag_value = "true";
426 }
427 if (!parse_scalar(flag_value, &describe)) {
428 fail() << "Invalid value for flag: " << flag_name;
429 }
430 } else if (flag_name == "track_memory") {
431 if (flag_value.empty()) {
432 flag_value = "true";
433 }
434 if (!parse_scalar(flag_value, &track_memory)) {
435 fail() << "Invalid value for flag: " << flag_name;
436 }
437 } else if (flag_name == "benchmarks") {
438 benchmarks_flag_value = flag_value;
439 benchmark = true;
440 } else if (flag_name == "benchmark_min_time") {
441 if (!parse_scalar(flag_value, &benchmark_min_time)) {
442 fail() << "Invalid value for flag: " << flag_name;
443 }
444 } else if (flag_name == "default_input_buffers") {
445 default_input_buffers = flag_value;
446 if (default_input_buffers.empty()) {
447 default_input_buffers = "zero:auto";
448 }
449 } else if (flag_name == "default_input_scalars") {
450 default_input_scalars = flag_value;
451 if (default_input_scalars.empty()) {
452 default_input_scalars = "estimate,default";
453 }
454 } else if (flag_name == "output_extents") {
455 user_specified_output_shape = flag_value;
456 } else if (flag_name == "estimate_all") {
457 // Equivalent to:
458 // --default_input_buffers=random:0:estimate_then_auto
459 // --default_input_scalars=estimate
460 // --output_extents=estimate
461 default_input_buffers = "random:0:estimate_then_auto";
462 default_input_scalars = "estimate";
463 user_specified_output_shape = "estimate";
464 } else {
465 usage(argv[0]);
466 fail() << "Unknown flag: " << flag_name;
467 }
468 } else {
469 // Assume it's a named Input or Output for the Generator,
470 // in the form name=value.
471 std::vector<std::string> v = split_string(argv[i], "=");
472 if (v.size() != 2 || v[0].empty() || v[1].empty()) {
473 fail() << "Invalid argument: " << argv[i];
474 }
475 r.parse_one(v[0], v[1], &seen_args);
476 }
477 }
478
479 if (describe) {
480 r.describe();
481 return 0;
482 }
483
484 // It's OK to omit output arguments when we are benchmarking or tracking memory.
485 bool ok_to_omit_outputs = (benchmark || track_memory);
486
487 if (benchmark && track_memory) {
488 warn() << "Using --track_memory with --benchmarks will produce inaccurate benchmark results.";
489 }
490
491 // Check to be sure that all required arguments are specified.
492 r.validate(seen_args, default_input_buffers, default_input_scalars, ok_to_omit_outputs);
493
494 // Parse all the input arguments, loading images as necessary.
495 // (Don't handle outputs yet.)
496 r.load_inputs(user_specified_output_shape);
497
498 // Run a bounds query: we need to figure out how to allocate the output buffers,
499 // and the input buffers might need reshaping to satisfy constraints (e.g. a chunky/interleaved layout).
500 std::vector<Shape> constrained_shapes = r.run_bounds_query();
501
502 r.adapt_input_buffers(constrained_shapes);
503 r.allocate_output_buffers(constrained_shapes);
504
505 // If we're tracking memory, install the memory tracker *after* doing a bounds query.
506 HalideMemoryTracker tracker;
507 if (track_memory) {
508 tracker.install();
509 }
510
511 // This is a single-purpose binary to benchmark this filter, so we
512 // shouldn't be eagerly returning device memory.
513 halide_reuse_device_allocations(nullptr, true);
514
515 if (benchmark) {
516 if (benchmarks_flag_value.empty()) {
517 benchmarks_flag_value = "all";
518 }
519 if (benchmarks_flag_value != "all") {
520 fail() << "The only valid value for --benchmarks is 'all'";
521 }
522 r.run_for_benchmark(benchmark_min_time);
523 } else {
524 r.run_for_output();
525 }
526
527 if (track_memory) {
528 // Ensure that we copy any GPU-output buffers back to host before
529 // we report on memory usage.
530 r.copy_outputs_to_host();
531 std::cout << "Maximum Halide memory: " << tracker.highwater()
532 << " bytes for output of " << r.megapixels_out() << " mpix.\n";
533 }
534
535 // Save the output(s), if necessary.
536 r.save_outputs();
537
538 return 0;
539 }
540