1 #include "Halide.h"
2 #include "halide_benchmark.h"
3 #include <cstdio>
4 #include <memory>
5
6 using namespace Halide;
7 using namespace Halide::Tools;
8
test_deinterleave()9 void test_deinterleave() {
10 ImageParam src(UInt(8), 3);
11 Func dst;
12 Var x, y, c;
13
14 dst(x, y, c) = src(x, y, c);
15
16 src.dim(0).set_stride(3).dim(2).set_stride(1).set_bounds(0, 3);
17
18 // This is the default format for Halide, but made explicit for illustration.
19 dst.output_buffer()
20 .dim(0)
21 .set_stride(1)
22 .dim(2)
23 .set_extent(3);
24
25 dst.reorder(c, x, y).unroll(c);
26 dst.vectorize(x, 16);
27
28 // Allocate two 16 megapixel, 3 channel, 8-bit images -- input and output
29
30 // Setup src to be RGB interleaved, with no extra padding between channels or rows.
31 Buffer<uint8_t> src_image = Buffer<uint8_t>::make_interleaved(1 << 12, 1 << 12, 3);
32
33 // Setup dst to be planar, with no extra padding between channels or rows.
34 Buffer<uint8_t> dst_image(1 << 12, 1 << 12, 3);
35
36 src_image.for_each_element([&](int x, int y) {
37 src_image(x, y, 0) = 0;
38 src_image(x, y, 1) = 128;
39 src_image(x, y, 2) = 255;
40 });
41 dst_image.fill(0);
42
43 src.set(src_image);
44
45 dst.compile_jit();
46
47 // Warm up caches, etc.
48 dst.realize(dst_image);
49
50 double t1 = benchmark([&]() {
51 dst.realize(dst_image);
52 });
53
54 printf("Interleaved to planar bandwidth %.3e byte/s.\n",
55 dst_image.number_of_elements() / t1);
56
57 dst_image.for_each_element([&](int x, int y) {
58 assert(dst_image(x, y, 0) == 0);
59 assert(dst_image(x, y, 1) == 128);
60 assert(dst_image(x, y, 2) == 255);
61 });
62
63 // Setup a semi-planar output case.
64 dst_image = Buffer<uint8_t>(1 << 12, 3, 1 << 12);
65 dst_image.transpose(1, 2);
66 dst_image.fill(0);
67
68 double t2 = benchmark([&]() {
69 dst.realize(dst_image);
70 });
71
72 dst_image.for_each_element([&](int x, int y) {
73 assert(dst_image(x, y, 0) == 0);
74 assert(dst_image(x, y, 1) == 128);
75 assert(dst_image(x, y, 2) == 255);
76 });
77
78 printf("Interleaved to semi-planar bandwidth %.3e byte/s.\n",
79 dst_image.number_of_elements() / t2);
80 }
81
test_interleave(bool fast)82 void test_interleave(bool fast) {
83 ImageParam src(UInt(8), 3);
84 Func dst;
85 Var x, y, c;
86
87 dst(x, y, c) = src(x, y, c);
88
89 // This is the default format for Halide, but made explicit for illustration.
90 src.dim(0).set_stride(1).dim(2).set_extent(3);
91
92 dst.output_buffer()
93 .dim(0)
94 .set_stride(3)
95 .dim(2)
96 .set_stride(1)
97 .set_bounds(0, 3);
98
99 if (fast) {
100 dst.reorder(c, x, y).bound(c, 0, 3).unroll(c);
101 dst.vectorize(x, 16);
102 } else {
103 dst.reorder(c, x, y).vectorize(x, 16);
104 }
105
106 // Allocate two 16 megapixel, 3 channel, 8-bit images -- input and output
107
108 // Setup src to be planar
109 Buffer<uint8_t> src_image(1 << 12, 1 << 12, 3);
110
111 // Setup dst to be interleaved
112 Buffer<uint8_t> dst_image = Buffer<uint8_t>::make_interleaved(1 << 12, 1 << 12, 3);
113
114 src_image.for_each_element([&](int x, int y) {
115 src_image(x, y, 0) = 0;
116 src_image(x, y, 1) = 128;
117 src_image(x, y, 2) = 255;
118 });
119 dst_image.fill(0);
120
121 src.set(src_image);
122
123 if (fast) {
124 dst.compile_to_lowered_stmt("rgb_interleave_fast.stmt", dst.infer_arguments());
125 } else {
126 dst.compile_to_lowered_stmt("rgb_interleave_slow.stmt", dst.infer_arguments());
127 }
128
129 // Warm up caches, etc.
130 dst.realize(dst_image);
131
132 double t = benchmark([&]() {
133 dst.realize(dst_image);
134 });
135
136 printf("Planar to interleaved bandwidth %.3e byte/s.\n",
137 dst_image.number_of_elements() / t);
138
139 dst_image.for_each_element([&](int x, int y) {
140 assert(dst_image(x, y, 0) == 0);
141 assert(dst_image(x, y, 1) == 128);
142 assert(dst_image(x, y, 2) == 255);
143 });
144 }
145
main(int argc,char ** argv)146 int main(int argc, char **argv) {
147 Target target = get_jit_target_from_environment();
148 if (target.arch == Target::WebAssembly) {
149 printf("[SKIP] Performance tests are meaningless and/or misleading under WebAssembly interpreter.\n");
150 return 0;
151 }
152
153 test_deinterleave();
154 test_interleave(false);
155 test_interleave(true);
156 printf("Success!\n");
157 return 0;
158 }
159