1 #include "Halide.h"
2 #include <stdio.h>
3 
4 using namespace Halide;
5 
6 // A trace that checks for vector and scalar stores
7 int buffer_index = 0;
8 bool run_tracer = false;
9 int niters_expected = 0;
10 int niters = 0;
11 
intermediate_bound_depend_on_output_trace(void * user_context,const halide_trace_event_t * e)12 int intermediate_bound_depend_on_output_trace(void *user_context, const halide_trace_event_t *e) {
13     std::string buffer_name = "g_" + std::to_string(buffer_index);
14     if (std::string(e->func) == buffer_name) {
15         if (e->event == halide_trace_produce) {
16             run_tracer = true;
17         } else if (e->event == halide_trace_consume) {
18             run_tracer = false;
19         }
20 
21         if (run_tracer && (e->event == halide_trace_store)) {
22             if (!((e->coordinates[0] < e->coordinates[1]) && (e->coordinates[0] >= 0) &&
23                   (e->coordinates[0] <= 199) && (e->coordinates[1] >= 0) &&
24                   (e->coordinates[1] <= 199))) {
25                 printf("Bounds on store of g were supposed to be x < y and x=[0, 99] and y=[0, 99]\n"
26                        "Instead they are: %d %d\n",
27                        e->coordinates[0], e->coordinates[1]);
28                 exit(-1);
29             }
30             niters++;
31         }
32     }
33     return 0;
34 }
35 
func_call_bound_trace(void * user_context,const halide_trace_event_t * e)36 int func_call_bound_trace(void *user_context, const halide_trace_event_t *e) {
37     std::string buffer_name = "g_" + std::to_string(buffer_index);
38     if (std::string(e->func) == buffer_name) {
39         if (e->event == halide_trace_produce) {
40             run_tracer = true;
41         } else if (e->event == halide_trace_consume) {
42             run_tracer = false;
43         }
44 
45         if (run_tracer && (e->event == halide_trace_store)) {
46             if (!((e->coordinates[0] >= 10) && (e->coordinates[0] <= 109))) {
47                 printf("Bounds on store of g were supposed to be x=[10, 109]\n"
48                        "Instead it is: %d\n",
49                        e->coordinates[0]);
50                 exit(-1);
51             }
52             niters++;
53         }
54     }
55     return 0;
56 }
57 
box_bound_trace(void * user_context,const halide_trace_event_t * e)58 int box_bound_trace(void *user_context, const halide_trace_event_t *e) {
59     std::string buffer_name = "g_" + std::to_string(buffer_index);
60     if (std::string(e->func) == buffer_name) {
61         if (e->event == halide_trace_produce) {
62             run_tracer = true;
63         } else if (e->event == halide_trace_consume) {
64             run_tracer = false;
65         }
66 
67         if (run_tracer && (e->event == halide_trace_store)) {
68             if (!((e->coordinates[0] >= 0) && (e->coordinates[0] <= 99) &&
69                   (e->coordinates[1] >= 0) && (e->coordinates[1] <= 99))) {
70                 printf("Bounds on store of g were supposed to be x < y and x=[0, 99] and y=[0, 99]\n"
71                        "Instead they are: %d %d\n",
72                        e->coordinates[0], e->coordinates[1]);
73                 exit(-1);
74             }
75             niters++;
76         }
77     }
78     return 0;
79 }
80 
equality_inequality_bound_test(int index)81 int equality_inequality_bound_test(int index) {
82     buffer_index = index;
83 
84     Func f("f_" + std::to_string(index));
85     Var x("x"), y("y");
86     f(x, y) = x + y;
87 
88     RDom r(0, 100, 0, 100);
89     r.where(r.x < r.y);
90     r.where(!(r.x != 10));
91     f(r.x, r.y) += 1;
92 
93     Buffer<int> im = f.realize(200, 200);
94     for (int y = 0; y < im.height(); y++) {
95         for (int x = 0; x < im.width(); x++) {
96             int correct = x + y;
97             if ((x == 10) && (0 <= y && y <= 99)) {
98                 correct += (x < y) ? 1 : 0;
99             }
100             if (im(x, y) != correct) {
101                 printf("im(%d, %d) = %d instead of %d\n",
102                        x, y, im(x, y), correct);
103                 return -1;
104             }
105         }
106     }
107     return 0;
108 }
109 
split_fuse_test(int index)110 int split_fuse_test(int index) {
111     buffer_index = index;
112 
113     Func f("f_" + std::to_string(index));
114     Var x("x"), y("y");
115     f(x, y) = x + y;
116 
117     RDom r(0, 100, 0, 100);
118     r.where(r.x < r.y);
119     f(r.x, r.y) += 1;
120 
121     RVar rx_outer, rx_inner, r_fused;
122     f.update().reorder(r.y, r.x);
123     f.update().split(r.x, rx_outer, rx_inner, 4);
124     f.update().fuse(rx_inner, r.y, r_fused);
125 
126     Buffer<int> im = f.realize(200, 200);
127     for (int y = 0; y < im.height(); y++) {
128         for (int x = 0; x < im.width(); x++) {
129             int correct = x + y;
130             if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
131                 correct += (x < y) ? 1 : 0;
132             }
133             if (im(x, y) != correct) {
134                 printf("im(%d, %d) = %d instead of %d\n",
135                        x, y, im(x, y), correct);
136                 return -1;
137             }
138         }
139     }
140     return 0;
141 }
142 
free_variable_bound_test(int index)143 int free_variable_bound_test(int index) {
144     buffer_index = index;
145 
146     Func f("f_" + std::to_string(index));
147     Var x("x"), y("y"), z("z");
148     f(x, y, z) = x + y + z;
149 
150     RDom r(0, 100, 0, 100, "r");
151     r.where(r.x < r.y + z);
152     f(r.x, r.y, z) += 1;
153 
154     Buffer<int> im = f.realize(200, 200, 200);
155     for (int z = 0; z < im.channels(); z++) {
156         for (int y = 0; y < im.height(); y++) {
157             for (int x = 0; x < im.width(); x++) {
158                 int correct = x + y + z;
159                 if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
160                     correct += (x < y + z) ? 1 : 0;
161                 }
162                 if (im(x, y, z) != correct) {
163                     printf("im(%d, %d, %d) = %d instead of %d\n",
164                            x, y, z, im(x, y, z), correct);
165                     return -1;
166                 }
167             }
168         }
169     }
170     return 0;
171 }
172 
func_call_inside_bound_test(int index)173 int func_call_inside_bound_test(int index) {
174     buffer_index = index;
175 
176     Func f("f_" + std::to_string(index)), g("g_" + std::to_string(index));
177     Var x("x"), y("y");
178 
179     g(x) = x;
180 
181     f(x, y) = x + y;
182 
183     RDom r(0, 100, 0, 100, "r");
184     r.where(r.x < g(r.y + 10));
185     f(r.x, r.y) += 1;
186 
187     // Expect g to be computed over x=[10, 109].
188     g.compute_root();
189 
190     f.set_custom_trace(&func_call_bound_trace);
191     g.trace_stores();
192     g.trace_realizations();
193 
194     run_tracer = false;
195     niters_expected = 100;
196     niters = 0;
197     Buffer<int> im = f.realize(200, 200);
198 
199     for (int y = 0; y < im.height(); y++) {
200         for (int x = 0; x < im.width(); x++) {
201             int correct = x + y;
202             if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
203                 correct += (x < y + 10) ? 1 : 0;
204             }
205             if (im(x, y) != correct) {
206                 printf("im(%d, %d) = %d instead of %d\n",
207                        x, y, im(x, y), correct);
208                 return -1;
209             }
210         }
211     }
212     if (niters_expected != niters) {
213         printf("func_call_inside_bound_test : Expect niters on g to be %d but got %d instead\n",
214                niters_expected, niters);
215         return -1;
216     }
217     return 0;
218 }
219 
func_call_inside_bound_inline_test(int index)220 int func_call_inside_bound_inline_test(int index) {
221     buffer_index = index;
222 
223     Func f("f_" + std::to_string(index)), g("g_" + std::to_string(index));
224     Func h("h_" + std::to_string(index));
225     Var x("x"), y("y");
226 
227     g(x) = x;
228     h(x) = 2 * x;
229 
230     f(x, y) = x + y;
231 
232     RDom r(0, 100, 0, 100, "r");
233     r.where(r.x < g(r.y) + h(r.x));
234     f(r.x, r.y) += 1;
235 
236     Buffer<int> im = f.realize(200, 200);
237 
238     for (int y = 0; y < im.height(); y++) {
239         for (int x = 0; x < im.width(); x++) {
240             int correct = x + y;
241             if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
242                 correct += (x < y + 2 * x) ? 1 : 0;
243             }
244             if (im(x, y) != correct) {
245                 printf("im(%d, %d) = %d instead of %d\n",
246                        x, y, im(x, y), correct);
247                 return -1;
248             }
249         }
250     }
251     return 0;
252 }
253 
two_linear_bounds_test(int index)254 int two_linear_bounds_test(int index) {
255     buffer_index = index;
256 
257     Func f("f_" + std::to_string(index)), g("g_" + std::to_string(index));
258     Var x("x"), y("y");
259 
260     g(x, y) = x + y;
261 
262     f(x, y) = x + y;
263     RDom r(0, 100, 0, 100);
264     r.where(2 * r.x + 30 < r.y);
265     r.where(r.y >= 100 - r.x);
266     f(r.x, r.y) += 2 * g(r.x, r.y);
267 
268     // Expect g to be computed over x=[0,99] and y=[1,99].
269     g.compute_root();
270 
271     f.set_custom_trace(&box_bound_trace);
272     g.trace_stores();
273     g.trace_realizations();
274 
275     run_tracer = false;
276     // The first condition means r.x. can be at most 34 (2*34 + 30 =
277     // 98 < 99).  The second condition means r.x must be at least 1,
278     // so there are 34 legal values for r.x.  The second condition
279     // also means that r.y is at least 100 - 34 and at most 99, so
280     // there are also 34 legal values of it. We only actually iterate
281     // over a triangle within this box, but Halide takes bounding
282     // boxes for bounds relationships.
283     niters_expected = 34 * 34;
284     niters = 0;
285     Buffer<int> im = f.realize(200, 200);
286     for (int y = 0; y < im.height(); y++) {
287         for (int x = 0; x < im.width(); x++) {
288             int correct = x + y;
289             if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
290                 correct = ((2 * x + 30 < y) && (y >= 100 - x)) ? 3 * correct : correct;
291             }
292             if (im(x, y) != correct) {
293                 printf("im(%d, %d) = %d instead of %d\n",
294                        x, y, im(x, y), correct);
295                 return -1;
296             }
297         }
298     }
299     if (niters_expected != niters) {
300         printf("two_linear_bounds_test : Expect niters on g to be %d but got %d instead\n",
301                niters_expected, niters);
302         return -1;
303     }
304     return 0;
305 }
306 
circle_bound_test(int index)307 int circle_bound_test(int index) {
308     buffer_index = index;
309 
310     Func f("f_" + std::to_string(index)), g("g_" + std::to_string(index));
311     Var x("x"), y("y");
312     g(x, y) = x;
313     f(x, y) = x + y;
314 
315     // Iterate over circle with radius of 10
316     RDom r(0, 100, 0, 100);
317     r.where(r.x * r.x + r.y * r.y <= 100);
318     f(r.x, r.y) += g(r.x, r.y);
319 
320     // Expect g to be still computed over x=[0,99] and y=[0,99]. The predicate
321     // guard for the non-linear term will be left as is in the inner loop of f,
322     // i.e. f loop will still iterate over x=[0,99] and y=[0,99].
323     g.compute_at(f, r.y);
324 
325     f.set_custom_trace(&box_bound_trace);
326     g.trace_stores();
327     g.trace_realizations();
328 
329     run_tracer = false;
330     niters_expected = 100 * 100;
331     niters = 0;
332     Buffer<int> im = f.realize(200, 200);
333     for (int y = 0; y < im.height(); y++) {
334         for (int x = 0; x < im.width(); x++) {
335             int correct = x + y;
336             if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
337                 correct += (x * x + y * y <= 100) ? x : 0;
338             }
339             if (im(x, y) != correct) {
340                 printf("im(%d, %d) = %d instead of %d\n",
341                        x, y, im(x, y), correct);
342                 return -1;
343             }
344         }
345     }
346     return 0;
347 }
348 
intermediate_computed_if_param_test(int index)349 int intermediate_computed_if_param_test(int index) {
350     buffer_index = index;
351 
352     Func f("f_" + std::to_string(index)), g("g_" + std::to_string(index));
353     Var x("x"), y("y");
354     Param<int> p;
355 
356     g(x, y) = x + y;
357 
358     f(x, y) = x + y;
359     RDom r(0, 100, 0, 100);
360     r.where(p > 3);
361     f(r.x, r.y) += 2 * g(r.x, r.y);
362 
363     // Expect g to be only computed over x=[0,99] and y=[0,99] if param is bigger
364     // than 3.
365     g.compute_root();
366 
367     f.set_custom_trace(&box_bound_trace);
368     g.trace_stores();
369     g.trace_realizations();
370 
371     {
372         printf("....Set p to 5, expect g to be computed\n");
373         p.set(5);
374         run_tracer = false;
375         niters_expected = 100 * 100;
376         niters = 0;
377         Buffer<int> im = f.realize(200, 200);
378         for (int y = 0; y < im.height(); y++) {
379             for (int x = 0; x < im.width(); x++) {
380                 int correct = x + y;
381                 if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
382                     correct = 3 * correct;
383                 }
384                 if (im(x, y) != correct) {
385                     printf("im(%d, %d) = %d instead of %d\n",
386                            x, y, im(x, y), correct);
387                     return -1;
388                 }
389             }
390         }
391         if (niters_expected != niters) {
392             printf("intermediate_computed_if_param_test : Expect niters on g to be %d but got %d instead\n",
393                    niters_expected, niters);
394             return -1;
395         }
396     }
397 
398     {
399         printf("....Set p to 0, expect g to be not computed\n");
400         p.set(0);
401         run_tracer = false;
402         niters_expected = 0;
403         niters = 0;
404         Buffer<int> im = f.realize(200, 200);
405         for (int y = 0; y < im.height(); y++) {
406             for (int x = 0; x < im.width(); x++) {
407                 int correct = x + y;
408                 if (im(x, y) != correct) {
409                     printf("im(%d, %d) = %d instead of %d\n",
410                            x, y, im(x, y), correct);
411                     return -1;
412                 }
413             }
414         }
415         if (niters_expected != niters) {
416             printf("intermediate_computed_if_param_test : Expect niters on g to be %d but got %d instead\n",
417                    niters_expected, niters);
418             return -1;
419         }
420     }
421     return 0;
422 }
423 
intermediate_bound_depend_on_output_test(int index)424 int intermediate_bound_depend_on_output_test(int index) {
425     buffer_index = index;
426 
427     Func f("f_" + std::to_string(index)), g("g_" + std::to_string(index));
428     Var x("x"), y("y");
429 
430     g(x, y) = x;
431     f(x, y) = x + y;
432 
433     RDom r(0, 200, 0, 200);
434     r.where(r.x < r.y);
435     f(r.x, r.y) = g(r.x, r.y);
436 
437     // Expect bound of g on r.x to be directly dependent on the simplified
438     // bound of f on r.x, which should have been r.x = [0, r.y) in this case
439     g.compute_at(f, r.y);
440 
441     f.set_custom_trace(&intermediate_bound_depend_on_output_trace);
442     g.trace_stores();
443     g.trace_realizations();
444 
445     run_tracer = false;
446     niters_expected = 200 * 199 / 2;
447     niters = 0;
448     Buffer<int> im = f.realize(200, 200);
449 
450     for (int y = 0; y < im.height(); y++) {
451         for (int x = 0; x < im.width(); x++) {
452             int correct = x + y;
453             if ((0 <= x && x <= 199) && (0 <= y && y <= 199)) {
454                 if (x < y) {
455                     correct = x;
456                 }
457             }
458             if (im(x, y) != correct) {
459                 printf("im(%d, %d) = %d instead of %d\n",
460                        x, y, im(x, y), correct);
461                 return -1;
462             }
463         }
464     }
465     if (niters_expected != niters) {
466         printf("intermediate_bound_depend_on_output_test: Expect niters on g to be %d but got %d instead\n",
467                niters_expected, niters);
468         return -1;
469     }
470     return 0;
471 }
472 
tile_intermediate_bound_depend_on_output_test(int index)473 int tile_intermediate_bound_depend_on_output_test(int index) {
474     buffer_index = index;
475 
476     Func f("f_" + std::to_string(index)), g("g_" + std::to_string(index));
477     Var x("x"), y("y");
478 
479     g(x, y) = x;
480 
481     f(x, y) = x + y;
482 
483     RDom r(0, 200, 0, 200, "r");
484     r.where(r.x < r.y);
485     f(r.x, r.y) += g(r.x, r.y);
486 
487     RVar rxi("rxi"), ryi("ryi");
488     f.update(0).tile(r.x, r.y, rxi, ryi, 8, 8);
489     f.update(0).reorder(rxi, ryi, r.x, r.y);
490 
491     // Expect bound of g on r.x to be directly dependent on the simplified
492     // bound of f on r.x, which should have been r.x = [0, r.y) in this case
493     g.compute_at(f, ryi);
494 
495     f.set_custom_trace(&intermediate_bound_depend_on_output_trace);
496     g.trace_stores();
497     g.trace_realizations();
498 
499     run_tracer = false;
500     niters_expected = 200 * 199 / 2;
501     niters = 0;
502     Buffer<int> im = f.realize(200, 200);
503 
504     for (int y = 0; y < im.height(); y++) {
505         for (int x = 0; x < im.width(); x++) {
506             int correct = x + y;
507             if ((0 <= x && x <= 199) && (0 <= y && y <= 199)) {
508                 correct += (x < y) ? x : 0;
509             }
510             if (im(x, y) != correct) {
511                 printf("im(%d, %d) = %d instead of %d\n",
512                        x, y, im(x, y), correct);
513                 return -1;
514             }
515         }
516     }
517 
518     if (niters_expected != niters) {
519         printf("intermediate_bound_depend_on_output_test: Expect niters on g to be %d but got %d instead\n",
520                niters_expected, niters);
521         return -1;
522     }
523     return 0;
524 }
525 
self_reference_bound_test(int index)526 int self_reference_bound_test(int index) {
527     buffer_index = index;
528 
529     Func f("f_" + std::to_string(index)), g("g_" + std::to_string(index));
530     Var x("x"), y("y");
531     f(x, y) = x + y;
532     g(x, y) = 10;
533 
534     RDom r1(0, 100, 0, 100, "r1");
535     r1.where(f(r1.x, r1.y) >= 40);
536     r1.where(f(r1.x, r1.y) != 50);
537     f(r1.x, r1.y) += 1;
538     f.compute_root();
539 
540     RDom r2(0, 50, 0, 50, "r2");
541     r2.where(f(r2.x, r2.y) < 30);
542     g(r2.x, r2.y) += f(r2.x, r2.y);
543 
544     Buffer<int> im1 = f.realize(200, 200);
545     for (int y = 0; y < im1.height(); y++) {
546         for (int x = 0; x < im1.width(); x++) {
547             int correct = x + y;
548             if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
549                 correct += ((correct >= 40) && (correct != 50)) ? 1 : 0;
550             }
551             if (im1(x, y) != correct) {
552                 printf("im1(%d, %d) = %d instead of %d\n",
553                        x, y, im1(x, y), correct);
554                 return -1;
555             }
556         }
557     }
558 
559     Buffer<int> im2 = g.realize(200, 200);
560     for (int y = 0; y < im2.height(); y++) {
561         for (int x = 0; x < im2.width(); x++) {
562             int correct = 10;
563             if ((0 <= x && x <= 49) && (0 <= y && y <= 49)) {
564                 correct += (im1(x, y) < 30) ? im1(x, y) : 0;
565             }
566             if (im2(x, y) != correct) {
567                 printf("im2(%d, %d) = %d instead of %d\n",
568                        x, y, im2(x, y), correct);
569                 return -1;
570             }
571         }
572     }
573     return 0;
574 }
575 
random_float_bound_test(int index)576 int random_float_bound_test(int index) {
577     buffer_index = index;
578 
579     Func f("f_" + std::to_string(index));
580     Var x("x"), y("y");
581 
582     Expr e1 = random_float() < 0.5f;
583     f(x, y) = Tuple(e1, x + y);
584 
585     RDom r(0, 100, 0, 100);
586     r.where(f(r.x, r.y)[0]);
587     f(r.x, r.y) = Tuple(f(r.x, r.y)[0], f(r.x, r.y)[1] + 10);
588 
589     Realization res = f.realize(200, 200);
590     assert(res.size() == 2);
591     Buffer<bool> im0 = res[0];
592     Buffer<int> im1 = res[1];
593 
594     int n_true = 0;
595     for (int y = 0; y < im1.height(); y++) {
596         for (int x = 0; x < im1.width(); x++) {
597             n_true += im0(x, y);
598             int correct = x + y;
599             if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
600                 correct += im0(x, y) ? 10 : 0;
601             }
602             if (im1(x, y) != correct) {
603                 printf("im1(%d, %d) = %d instead of %d\n",
604                        x, y, im1(x, y), correct);
605                 return -1;
606             }
607         }
608     }
609     if (!(19000 <= n_true && n_true <= 21000)) {
610         printf("Expected n_true to be between 19000 and 21000; got %d instead\n", n_true);
611         return -1;
612     }
613     return 0;
614 }
615 
newton_method_test()616 int newton_method_test() {
617     Func inverse;
618     Var x;
619     // Negating the bits of a float is a piecewise linear approximation to inverting it
620     inverse(x) = {-0.25f * reinterpret<float>(~(reinterpret<uint32_t>(cast<float>(x + 1)))), 0};
621     const int max_iters = 10;
622     RDom r(0, max_iters);
623     Expr not_converged = abs(inverse(x)[0] * (x + 1) - 1) > 0.001f;
624     r.where(not_converged);
625 
626     // Compute the inverse of x using Newton's method, and count the
627     // number of iterations required to reach convergence
628     inverse(x) = {inverse(x)[0] * (2 - (x + 1) * inverse(x)[0]),
629                   r + 1};
630     {
631         Realization r = inverse.realize(128);
632         Buffer<float> r0 = r[0];
633         Buffer<int> r1 = r[1];
634         for (int i = 0; i < r0.width(); i++) {
635             float x = (i + 1);
636             float prod = x * r0(i);
637             int num_iters = r1(i);
638             if (num_iters == max_iters) {
639                 printf("Newton's method didn't converge!\n");
640                 return -1;
641             }
642             if (std::abs(prod - 1) > 0.001) {
643                 printf("Newton's method converged without producing the correct inverse:\n"
644                        "%f * %f = %f (%d iterations)\n",
645                        x, r0(i), prod, r1(i));
646                 return -1;
647             }
648         }
649     }
650     return 0;
651 }
652 
init_on_gpu_update_on_cpu_test(int index)653 int init_on_gpu_update_on_cpu_test(int index) {
654     buffer_index = index;
655 
656     Func f("f_" + std::to_string(index));
657     Var x("x"), y("y");
658     f(x, y) = x + y;
659 
660     RDom r(0, 100, 0, 100);
661     r.where(r.x < r.y);
662     r.where(!(r.x != 10));
663     f(r.x, r.y) += 3;
664 
665     Var xi("xi"), yi("yi");
666     f.gpu_tile(x, y, xi, yi, 4, 4);
667 
668     Buffer<int> im = f.realize(200, 200);
669     for (int y = 0; y < im.height(); y++) {
670         for (int x = 0; x < im.width(); x++) {
671             int correct = x + y;
672             if ((x == 10) && (0 <= y && y <= 99)) {
673                 correct += (x < y) ? 3 : 0;
674             }
675             if (im(x, y) != correct) {
676                 printf("im(%d, %d) = %d instead of %d\n",
677                        x, y, im(x, y), correct);
678                 return -1;
679             }
680         }
681     }
682     return 0;
683 }
684 
init_on_cpu_update_on_gpu_test(int index)685 int init_on_cpu_update_on_gpu_test(int index) {
686     buffer_index = index;
687 
688     Func f("f_" + std::to_string(index));
689     Var x("x"), y("y");
690     f(x, y) = x + y;
691 
692     RDom r(0, 100, 0, 100);
693     r.where(!(r.x != 10));
694     r.where(r.x < r.y);
695     f(r.x, r.y) += 3;
696 
697     RVar rxi("rxi"), ryi("ryi");
698     f.update(0).gpu_tile(r.x, r.y, r.x, r.y, rxi, ryi, 4, 4);
699 
700     Buffer<int> im = f.realize(200, 200);
701     for (int y = 0; y < im.height(); y++) {
702         for (int x = 0; x < im.width(); x++) {
703             int correct = x + y;
704             if ((x == 10) && (0 <= y && y <= 99)) {
705                 correct += (x < y) ? 3 : 0;
706             }
707             if (im(x, y) != correct) {
708                 printf("im(%d, %d) = %d instead of %d\n",
709                        x, y, im(x, y), correct);
710                 return -1;
711             }
712         }
713     }
714     return 0;
715 }
716 
gpu_intermediate_computed_if_param_test(int index)717 int gpu_intermediate_computed_if_param_test(int index) {
718     buffer_index = index;
719 
720     Func f("f_" + std::to_string(index)), g("g_" + std::to_string(index)), h("h_" + std::to_string(index));
721     Var x("x"), y("y");
722     Param<int> p;
723 
724     g(x, y) = x + y;
725     h(x, y) = 10;
726 
727     f(x, y) = x + y;
728     RDom r1(0, 100, 0, 100);
729     r1.where(p > 3);
730     f(r1.x, r1.y) += 2 * g(r1.x, r1.y);
731 
732     RDom r2(0, 100, 0, 100);
733     r2.where(p <= 3);
734     f(r2.x, r2.y) += h(r2.x, r2.y) + g(r2.x, r2.y);
735 
736     RVar r1xi("r1xi"), r1yi("r1yi");
737     f.update(0).specialize(p >= 2).gpu_tile(r1.x, r1.y, r1xi, r1yi, 4, 4);
738     g.compute_root();
739     h.compute_root();
740     Var xi("xi"), yi("yi");
741     h.gpu_tile(x, y, xi, yi, 8, 8);
742 
743     {
744         printf("....Set p to 5, expect g to be computed\n");
745         p.set(5);
746         run_tracer = false;
747         niters_expected = 100 * 100;
748         niters = 0;
749         Buffer<int> im = f.realize(200, 200);
750         for (int y = 0; y < im.height(); y++) {
751             for (int x = 0; x < im.width(); x++) {
752                 int correct = x + y;
753                 if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
754                     correct = 3 * correct;
755                 }
756                 if (im(x, y) != correct) {
757                     printf("im(%d, %d) = %d instead of %d\n",
758                            x, y, im(x, y), correct);
759                     return -1;
760                 }
761             }
762         }
763     }
764 
765     {
766         printf("....Set p to 0, expect g to be not computed\n");
767         p.set(0);
768         run_tracer = false;
769         niters_expected = 0;
770         niters = 0;
771         Buffer<int> im = f.realize(200, 200);
772         for (int y = 0; y < im.height(); y++) {
773             for (int x = 0; x < im.width(); x++) {
774                 int correct = x + y;
775                 if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
776                     correct += 10 + correct;
777                 }
778                 if (im(x, y) != correct) {
779                     printf("im(%d, %d) = %d instead of %d\n",
780                            x, y, im(x, y), correct);
781                     return -1;
782                 }
783             }
784         }
785     }
786     return 0;
787 }
788 
vectorize_predicated_rvar_test()789 int vectorize_predicated_rvar_test() {
790     Func f("f");
791     Var x("x"), y("y");
792     f(x, y) = 0;
793 
794     Expr w = (f.output_buffer().width() / 2) * 2;
795     Expr h = (f.output_buffer().height() / 2) * 2;
796 
797     RDom r(1, w - 2, 1, h - 2);
798     r.where((r.x + r.y) % 2 == 0);
799 
800     f(r.x, r.y) += 10;
801 
802     f.update(0).unroll(r.x, 2).allow_race_conditions().vectorize(r.x, 8);
803 
804     Buffer<int> im = f.realize(200, 200);
805     for (int y = 0; y < im.height(); y++) {
806         for (int x = 0; x < im.width(); x++) {
807             int correct = 0;
808             if ((1 <= x && x < im.width() - 1) && (1 <= y && y < im.height() - 1) &&
809                 ((x + y) % 2 == 0)) {
810                 correct += 10;
811             }
812             if (im(x, y) != correct) {
813                 printf("im(%d, %d) = %d instead of %d\n",
814                        x, y, im(x, y), correct);
815                 return -1;
816             }
817         }
818     }
819     return 0;
820 }
821 
main(int argc,char ** argv)822 int main(int argc, char **argv) {
823     printf("Running equality inequality bound test\n");
824     if (equality_inequality_bound_test(0) != 0) {
825         return -1;
826     }
827 
828     printf("Running split fuse test\n");
829     if (split_fuse_test(1) != 0) {
830         return -1;
831     }
832 
833     printf("Running bound depend on free variable test\n");
834     if (free_variable_bound_test(2) != 0) {
835         return -1;
836     }
837 
838     printf("Running function call inside bound test\n");
839     if (func_call_inside_bound_test(3) != 0) {
840         return -1;
841     }
842 
843     printf("Running function call inside bound inline test\n");
844     if (func_call_inside_bound_inline_test(4) != 0) {
845         return -1;
846     }
847 
848     printf("Running two linear bounds test\n");
849     if (two_linear_bounds_test(5) != 0) {
850         return -1;
851     }
852 
853     printf("Running circular bound test\n");
854     if (circle_bound_test(6) != 0) {
855         return -1;
856     }
857 
858     printf("Running intermediate only computed if param is bigger than certain value test\n");
859     if (intermediate_computed_if_param_test(7) != 0) {
860         return -1;
861     }
862 
863     printf("Running tile intermediate stage depend on output bound test\n");
864     if (tile_intermediate_bound_depend_on_output_test(8) != 0) {
865         return -1;
866     }
867 
868     printf("Running intermediate stage depend on output bound\n");
869     if (intermediate_bound_depend_on_output_test(9) != 0) {
870         return -1;
871     }
872 
873     printf("Running self reference bound test\n");
874     if (self_reference_bound_test(10) != 0) {
875         return -1;
876     }
877 
878     printf("Running random float bound test\n");
879     if (random_float_bound_test(11) != 0) {
880         return -1;
881     }
882 
883     printf("Running newton's method test\n");
884     if (newton_method_test() != 0) {
885         return -1;
886     }
887 
888     printf("Running vectorize predicated rvar test\n");
889     if (vectorize_predicated_rvar_test() != 0) {
890         return -1;
891     }
892 
893     // Run GPU tests now if there is support for GPU.
894     if (!get_jit_target_from_environment().has_gpu_feature()) {
895         // TODO: split this test apart so that the relevant piece can be skipped appropriately
896         // printf("[SKIP] No GPU target enabled.\n");
897         printf("Success!\n");
898         return 0;
899     }
900 
901     printf("Running initialization on gpu and update on cpu test\n");
902     if (init_on_gpu_update_on_cpu_test(12) != 0) {
903         return -1;
904     }
905 
906     printf("Running initialization on cpu and update on gpu test\n");
907     if (init_on_cpu_update_on_gpu_test(13) != 0) {
908         return -1;
909     }
910 
911     printf("Running gpu intermediate only computed if param is bigger than certain value test\n");
912     if (gpu_intermediate_computed_if_param_test(14) != 0) {
913         return -1;
914     }
915 
916     printf("Success!\n");
917 
918     return 0;
919 }
920