1 #include "Halide.h"
2 #include "halide_benchmark.h"
3 #include "halide_test_dirs.h"
4 
5 #include <chrono>
6 #include <cstdio>
7 
8 using namespace Halide;
9 using namespace Halide::Tools;
10 
main(int argc,char ** argv)11 int main(int argc, char **argv) {
12     Target target = get_jit_target_from_environment();
13     if (target.arch == Target::WebAssembly) {
14         printf("[SKIP] Performance tests are meaningless and/or misleading under WebAssembly interpreter.\n");
15         return 0;
16     }
17 
18     ImageParam src(UInt(8), 1);
19     Func dst;
20     Var x;
21     dst(x) = src(x);
22 
23     dst.vectorize(x, 32, TailStrategy::GuardWithIf);
24 
25     dst.compile_to_assembly(Internal::get_test_tmp_dir() + "halide_memcpy.s", {src}, "halide_memcpy");
26     dst.compile_jit();
27 
28     const int32_t buffer_size = 12345678;
29 
30     Buffer<uint8_t> input(buffer_size);
31     Buffer<uint8_t> output(buffer_size);
32 
33     src.set(input);
34 
35     double t1 = benchmark([&]() {
36         dst.realize(output);
37     });
38 
39     double t2 = benchmark([&]() {
40         memcpy(output.data(), input.data(), input.width());
41     });
42 
43     printf("system memcpy: %.3e byte/s\n", buffer_size / t2);
44     printf("halide memcpy: %.3e byte/s\n", buffer_size / t1);
45 
46     // memcpy will win by a little bit for large inputs because it uses streaming stores
47     if (t1 > t2 * 3) {
48         printf("Halide memcpy is slower than it should be.\n");
49         return -1;
50     }
51 
52     printf("Success!\n");
53     return 0;
54 }
55