1 #include "Halide.h"
2 
3 using namespace Halide;
4 
main(int argc,char ** argv)5 int main(int argc, char **argv) {
6     Target t(get_jit_target_from_environment());
7     if (!t.has_gpu_feature()) {
8         printf("[SKIP] No GPU target enabled.\n");
9         return 0;
10     }
11 
12     Func f, g;
13     Var x, y;
14 
15     f(x, y) = x + y;
16     g(x, y) = f(x, y);
17 
18     Var xi, yi, xii, yii;
19     g.compute_root()
20         .gpu_tile(x, y, xi, yi, 64, 16, TailStrategy::GuardWithIf)
21         .tile(xi, yi, xii, yii, 2, 2)
22         .unroll(xii)
23         .unroll(yii);
24 
25     f.compute_at(g, xi)
26         .store_in(MemoryType::Register)
27         .unroll(x)
28         .unroll(y);
29 
30     // This tests two things
31 
32     // 1) Because of the GuardWithIf on g, we need a variable amount
33     // of f. If you put it in registers it should take an upper bound
34     // on the size required. It should also be possible to unroll it
35     // entirely by injecting if statements.
36 
37     // 2) No other test uses MemoryType::Register without also having
38     // a GPULanes loop. This used to break (the allocation would
39     // disappear entirely).
40 
41     Buffer<int> result = g.realize(123, 245);
42 
43     for (int y = 0; y < result.height(); y++) {
44         for (int x = 0; x < result.width(); x++) {
45             int correct = x + y;
46             if (result(x, y) != correct) {
47                 printf("result(%d, %d) = %d instead of %d\n",
48                        x, y, result(x, y), correct);
49                 return -1;
50             }
51         }
52     }
53 
54     printf("Success!\n");
55     return 0;
56 }
57