1 #include "Halide.h"
2 #include "halide_benchmark.h"
3 #include "halide_test_dirs.h"
4
5 #include <chrono>
6 #include <cstdio>
7
8 using namespace Halide;
9 using namespace Halide::Tools;
10
main(int argc,char ** argv)11 int main(int argc, char **argv) {
12 Target target = get_jit_target_from_environment();
13 if (target.arch == Target::WebAssembly) {
14 printf("[SKIP] Performance tests are meaningless and/or misleading under WebAssembly interpreter.\n");
15 return 0;
16 }
17
18 ImageParam src(UInt(8), 1);
19 Func dst;
20 Var x;
21 dst(x) = src(x);
22
23 dst.vectorize(x, 32, TailStrategy::GuardWithIf);
24
25 dst.compile_to_assembly(Internal::get_test_tmp_dir() + "halide_memcpy.s", {src}, "halide_memcpy");
26 dst.compile_jit();
27
28 const int32_t buffer_size = 12345678;
29
30 Buffer<uint8_t> input(buffer_size);
31 Buffer<uint8_t> output(buffer_size);
32
33 src.set(input);
34
35 double t1 = benchmark([&]() {
36 dst.realize(output);
37 });
38
39 double t2 = benchmark([&]() {
40 memcpy(output.data(), input.data(), input.width());
41 });
42
43 printf("system memcpy: %.3e byte/s\n", buffer_size / t2);
44 printf("halide memcpy: %.3e byte/s\n", buffer_size / t1);
45
46 // memcpy will win by a little bit for large inputs because it uses streaming stores
47 if (t1 > t2 * 3) {
48 printf("Halide memcpy is slower than it should be.\n");
49 return -1;
50 }
51
52 printf("Success!\n");
53 return 0;
54 }
55