1 #include "Halide.h"
2
3 using namespace Halide;
4
main(int argc,char ** argv)5 int main(int argc, char **argv) {
6 Target t(get_jit_target_from_environment());
7 if (!t.has_gpu_feature()) {
8 printf("[SKIP] No GPU target enabled.\n");
9 return 0;
10 }
11
12 Func f, g;
13 Var x, y;
14
15 f(x, y) = x + y;
16 g(x, y) = f(x, y);
17
18 Var xi, yi, xii, yii;
19 g.compute_root()
20 .gpu_tile(x, y, xi, yi, 64, 16, TailStrategy::GuardWithIf)
21 .tile(xi, yi, xii, yii, 2, 2)
22 .unroll(xii)
23 .unroll(yii);
24
25 f.compute_at(g, xi)
26 .store_in(MemoryType::Register)
27 .unroll(x)
28 .unroll(y);
29
30 // This tests two things
31
32 // 1) Because of the GuardWithIf on g, we need a variable amount
33 // of f. If you put it in registers it should take an upper bound
34 // on the size required. It should also be possible to unroll it
35 // entirely by injecting if statements.
36
37 // 2) No other test uses MemoryType::Register without also having
38 // a GPULanes loop. This used to break (the allocation would
39 // disappear entirely).
40
41 Buffer<int> result = g.realize(123, 245);
42
43 for (int y = 0; y < result.height(); y++) {
44 for (int x = 0; x < result.width(); x++) {
45 int correct = x + y;
46 if (result(x, y) != correct) {
47 printf("result(%d, %d) = %d instead of %d\n",
48 x, y, result(x, y), correct);
49 return -1;
50 }
51 }
52 }
53
54 printf("Success!\n");
55 return 0;
56 }
57