1 #include "Halide.h"
2 #include <stdio.h>
3
4 using namespace Halide;
5
6 // A trace that checks for vector and scalar stores
7 int buffer_index = 0;
8 bool run_tracer = false;
9 int niters_expected = 0;
10 int niters = 0;
11
intermediate_bound_depend_on_output_trace(void * user_context,const halide_trace_event_t * e)12 int intermediate_bound_depend_on_output_trace(void *user_context, const halide_trace_event_t *e) {
13 std::string buffer_name = "g_" + std::to_string(buffer_index);
14 if (std::string(e->func) == buffer_name) {
15 if (e->event == halide_trace_produce) {
16 run_tracer = true;
17 } else if (e->event == halide_trace_consume) {
18 run_tracer = false;
19 }
20
21 if (run_tracer && (e->event == halide_trace_store)) {
22 if (!((e->coordinates[0] < e->coordinates[1]) && (e->coordinates[0] >= 0) &&
23 (e->coordinates[0] <= 199) && (e->coordinates[1] >= 0) &&
24 (e->coordinates[1] <= 199))) {
25 printf("Bounds on store of g were supposed to be x < y and x=[0, 99] and y=[0, 99]\n"
26 "Instead they are: %d %d\n",
27 e->coordinates[0], e->coordinates[1]);
28 exit(-1);
29 }
30 niters++;
31 }
32 }
33 return 0;
34 }
35
func_call_bound_trace(void * user_context,const halide_trace_event_t * e)36 int func_call_bound_trace(void *user_context, const halide_trace_event_t *e) {
37 std::string buffer_name = "g_" + std::to_string(buffer_index);
38 if (std::string(e->func) == buffer_name) {
39 if (e->event == halide_trace_produce) {
40 run_tracer = true;
41 } else if (e->event == halide_trace_consume) {
42 run_tracer = false;
43 }
44
45 if (run_tracer && (e->event == halide_trace_store)) {
46 if (!((e->coordinates[0] >= 10) && (e->coordinates[0] <= 109))) {
47 printf("Bounds on store of g were supposed to be x=[10, 109]\n"
48 "Instead it is: %d\n",
49 e->coordinates[0]);
50 exit(-1);
51 }
52 niters++;
53 }
54 }
55 return 0;
56 }
57
box_bound_trace(void * user_context,const halide_trace_event_t * e)58 int box_bound_trace(void *user_context, const halide_trace_event_t *e) {
59 std::string buffer_name = "g_" + std::to_string(buffer_index);
60 if (std::string(e->func) == buffer_name) {
61 if (e->event == halide_trace_produce) {
62 run_tracer = true;
63 } else if (e->event == halide_trace_consume) {
64 run_tracer = false;
65 }
66
67 if (run_tracer && (e->event == halide_trace_store)) {
68 if (!((e->coordinates[0] >= 0) && (e->coordinates[0] <= 99) &&
69 (e->coordinates[1] >= 0) && (e->coordinates[1] <= 99))) {
70 printf("Bounds on store of g were supposed to be x < y and x=[0, 99] and y=[0, 99]\n"
71 "Instead they are: %d %d\n",
72 e->coordinates[0], e->coordinates[1]);
73 exit(-1);
74 }
75 niters++;
76 }
77 }
78 return 0;
79 }
80
equality_inequality_bound_test(int index)81 int equality_inequality_bound_test(int index) {
82 buffer_index = index;
83
84 Func f("f_" + std::to_string(index));
85 Var x("x"), y("y");
86 f(x, y) = x + y;
87
88 RDom r(0, 100, 0, 100);
89 r.where(r.x < r.y);
90 r.where(!(r.x != 10));
91 f(r.x, r.y) += 1;
92
93 Buffer<int> im = f.realize(200, 200);
94 for (int y = 0; y < im.height(); y++) {
95 for (int x = 0; x < im.width(); x++) {
96 int correct = x + y;
97 if ((x == 10) && (0 <= y && y <= 99)) {
98 correct += (x < y) ? 1 : 0;
99 }
100 if (im(x, y) != correct) {
101 printf("im(%d, %d) = %d instead of %d\n",
102 x, y, im(x, y), correct);
103 return -1;
104 }
105 }
106 }
107 return 0;
108 }
109
split_fuse_test(int index)110 int split_fuse_test(int index) {
111 buffer_index = index;
112
113 Func f("f_" + std::to_string(index));
114 Var x("x"), y("y");
115 f(x, y) = x + y;
116
117 RDom r(0, 100, 0, 100);
118 r.where(r.x < r.y);
119 f(r.x, r.y) += 1;
120
121 RVar rx_outer, rx_inner, r_fused;
122 f.update().reorder(r.y, r.x);
123 f.update().split(r.x, rx_outer, rx_inner, 4);
124 f.update().fuse(rx_inner, r.y, r_fused);
125
126 Buffer<int> im = f.realize(200, 200);
127 for (int y = 0; y < im.height(); y++) {
128 for (int x = 0; x < im.width(); x++) {
129 int correct = x + y;
130 if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
131 correct += (x < y) ? 1 : 0;
132 }
133 if (im(x, y) != correct) {
134 printf("im(%d, %d) = %d instead of %d\n",
135 x, y, im(x, y), correct);
136 return -1;
137 }
138 }
139 }
140 return 0;
141 }
142
free_variable_bound_test(int index)143 int free_variable_bound_test(int index) {
144 buffer_index = index;
145
146 Func f("f_" + std::to_string(index));
147 Var x("x"), y("y"), z("z");
148 f(x, y, z) = x + y + z;
149
150 RDom r(0, 100, 0, 100, "r");
151 r.where(r.x < r.y + z);
152 f(r.x, r.y, z) += 1;
153
154 Buffer<int> im = f.realize(200, 200, 200);
155 for (int z = 0; z < im.channels(); z++) {
156 for (int y = 0; y < im.height(); y++) {
157 for (int x = 0; x < im.width(); x++) {
158 int correct = x + y + z;
159 if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
160 correct += (x < y + z) ? 1 : 0;
161 }
162 if (im(x, y, z) != correct) {
163 printf("im(%d, %d, %d) = %d instead of %d\n",
164 x, y, z, im(x, y, z), correct);
165 return -1;
166 }
167 }
168 }
169 }
170 return 0;
171 }
172
func_call_inside_bound_test(int index)173 int func_call_inside_bound_test(int index) {
174 buffer_index = index;
175
176 Func f("f_" + std::to_string(index)), g("g_" + std::to_string(index));
177 Var x("x"), y("y");
178
179 g(x) = x;
180
181 f(x, y) = x + y;
182
183 RDom r(0, 100, 0, 100, "r");
184 r.where(r.x < g(r.y + 10));
185 f(r.x, r.y) += 1;
186
187 // Expect g to be computed over x=[10, 109].
188 g.compute_root();
189
190 f.set_custom_trace(&func_call_bound_trace);
191 g.trace_stores();
192 g.trace_realizations();
193
194 run_tracer = false;
195 niters_expected = 100;
196 niters = 0;
197 Buffer<int> im = f.realize(200, 200);
198
199 for (int y = 0; y < im.height(); y++) {
200 for (int x = 0; x < im.width(); x++) {
201 int correct = x + y;
202 if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
203 correct += (x < y + 10) ? 1 : 0;
204 }
205 if (im(x, y) != correct) {
206 printf("im(%d, %d) = %d instead of %d\n",
207 x, y, im(x, y), correct);
208 return -1;
209 }
210 }
211 }
212 if (niters_expected != niters) {
213 printf("func_call_inside_bound_test : Expect niters on g to be %d but got %d instead\n",
214 niters_expected, niters);
215 return -1;
216 }
217 return 0;
218 }
219
func_call_inside_bound_inline_test(int index)220 int func_call_inside_bound_inline_test(int index) {
221 buffer_index = index;
222
223 Func f("f_" + std::to_string(index)), g("g_" + std::to_string(index));
224 Func h("h_" + std::to_string(index));
225 Var x("x"), y("y");
226
227 g(x) = x;
228 h(x) = 2 * x;
229
230 f(x, y) = x + y;
231
232 RDom r(0, 100, 0, 100, "r");
233 r.where(r.x < g(r.y) + h(r.x));
234 f(r.x, r.y) += 1;
235
236 Buffer<int> im = f.realize(200, 200);
237
238 for (int y = 0; y < im.height(); y++) {
239 for (int x = 0; x < im.width(); x++) {
240 int correct = x + y;
241 if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
242 correct += (x < y + 2 * x) ? 1 : 0;
243 }
244 if (im(x, y) != correct) {
245 printf("im(%d, %d) = %d instead of %d\n",
246 x, y, im(x, y), correct);
247 return -1;
248 }
249 }
250 }
251 return 0;
252 }
253
two_linear_bounds_test(int index)254 int two_linear_bounds_test(int index) {
255 buffer_index = index;
256
257 Func f("f_" + std::to_string(index)), g("g_" + std::to_string(index));
258 Var x("x"), y("y");
259
260 g(x, y) = x + y;
261
262 f(x, y) = x + y;
263 RDom r(0, 100, 0, 100);
264 r.where(2 * r.x + 30 < r.y);
265 r.where(r.y >= 100 - r.x);
266 f(r.x, r.y) += 2 * g(r.x, r.y);
267
268 // Expect g to be computed over x=[0,99] and y=[1,99].
269 g.compute_root();
270
271 f.set_custom_trace(&box_bound_trace);
272 g.trace_stores();
273 g.trace_realizations();
274
275 run_tracer = false;
276 // The first condition means r.x. can be at most 34 (2*34 + 30 =
277 // 98 < 99). The second condition means r.x must be at least 1,
278 // so there are 34 legal values for r.x. The second condition
279 // also means that r.y is at least 100 - 34 and at most 99, so
280 // there are also 34 legal values of it. We only actually iterate
281 // over a triangle within this box, but Halide takes bounding
282 // boxes for bounds relationships.
283 niters_expected = 34 * 34;
284 niters = 0;
285 Buffer<int> im = f.realize(200, 200);
286 for (int y = 0; y < im.height(); y++) {
287 for (int x = 0; x < im.width(); x++) {
288 int correct = x + y;
289 if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
290 correct = ((2 * x + 30 < y) && (y >= 100 - x)) ? 3 * correct : correct;
291 }
292 if (im(x, y) != correct) {
293 printf("im(%d, %d) = %d instead of %d\n",
294 x, y, im(x, y), correct);
295 return -1;
296 }
297 }
298 }
299 if (niters_expected != niters) {
300 printf("two_linear_bounds_test : Expect niters on g to be %d but got %d instead\n",
301 niters_expected, niters);
302 return -1;
303 }
304 return 0;
305 }
306
circle_bound_test(int index)307 int circle_bound_test(int index) {
308 buffer_index = index;
309
310 Func f("f_" + std::to_string(index)), g("g_" + std::to_string(index));
311 Var x("x"), y("y");
312 g(x, y) = x;
313 f(x, y) = x + y;
314
315 // Iterate over circle with radius of 10
316 RDom r(0, 100, 0, 100);
317 r.where(r.x * r.x + r.y * r.y <= 100);
318 f(r.x, r.y) += g(r.x, r.y);
319
320 // Expect g to be still computed over x=[0,99] and y=[0,99]. The predicate
321 // guard for the non-linear term will be left as is in the inner loop of f,
322 // i.e. f loop will still iterate over x=[0,99] and y=[0,99].
323 g.compute_at(f, r.y);
324
325 f.set_custom_trace(&box_bound_trace);
326 g.trace_stores();
327 g.trace_realizations();
328
329 run_tracer = false;
330 niters_expected = 100 * 100;
331 niters = 0;
332 Buffer<int> im = f.realize(200, 200);
333 for (int y = 0; y < im.height(); y++) {
334 for (int x = 0; x < im.width(); x++) {
335 int correct = x + y;
336 if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
337 correct += (x * x + y * y <= 100) ? x : 0;
338 }
339 if (im(x, y) != correct) {
340 printf("im(%d, %d) = %d instead of %d\n",
341 x, y, im(x, y), correct);
342 return -1;
343 }
344 }
345 }
346 return 0;
347 }
348
intermediate_computed_if_param_test(int index)349 int intermediate_computed_if_param_test(int index) {
350 buffer_index = index;
351
352 Func f("f_" + std::to_string(index)), g("g_" + std::to_string(index));
353 Var x("x"), y("y");
354 Param<int> p;
355
356 g(x, y) = x + y;
357
358 f(x, y) = x + y;
359 RDom r(0, 100, 0, 100);
360 r.where(p > 3);
361 f(r.x, r.y) += 2 * g(r.x, r.y);
362
363 // Expect g to be only computed over x=[0,99] and y=[0,99] if param is bigger
364 // than 3.
365 g.compute_root();
366
367 f.set_custom_trace(&box_bound_trace);
368 g.trace_stores();
369 g.trace_realizations();
370
371 {
372 printf("....Set p to 5, expect g to be computed\n");
373 p.set(5);
374 run_tracer = false;
375 niters_expected = 100 * 100;
376 niters = 0;
377 Buffer<int> im = f.realize(200, 200);
378 for (int y = 0; y < im.height(); y++) {
379 for (int x = 0; x < im.width(); x++) {
380 int correct = x + y;
381 if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
382 correct = 3 * correct;
383 }
384 if (im(x, y) != correct) {
385 printf("im(%d, %d) = %d instead of %d\n",
386 x, y, im(x, y), correct);
387 return -1;
388 }
389 }
390 }
391 if (niters_expected != niters) {
392 printf("intermediate_computed_if_param_test : Expect niters on g to be %d but got %d instead\n",
393 niters_expected, niters);
394 return -1;
395 }
396 }
397
398 {
399 printf("....Set p to 0, expect g to be not computed\n");
400 p.set(0);
401 run_tracer = false;
402 niters_expected = 0;
403 niters = 0;
404 Buffer<int> im = f.realize(200, 200);
405 for (int y = 0; y < im.height(); y++) {
406 for (int x = 0; x < im.width(); x++) {
407 int correct = x + y;
408 if (im(x, y) != correct) {
409 printf("im(%d, %d) = %d instead of %d\n",
410 x, y, im(x, y), correct);
411 return -1;
412 }
413 }
414 }
415 if (niters_expected != niters) {
416 printf("intermediate_computed_if_param_test : Expect niters on g to be %d but got %d instead\n",
417 niters_expected, niters);
418 return -1;
419 }
420 }
421 return 0;
422 }
423
intermediate_bound_depend_on_output_test(int index)424 int intermediate_bound_depend_on_output_test(int index) {
425 buffer_index = index;
426
427 Func f("f_" + std::to_string(index)), g("g_" + std::to_string(index));
428 Var x("x"), y("y");
429
430 g(x, y) = x;
431 f(x, y) = x + y;
432
433 RDom r(0, 200, 0, 200);
434 r.where(r.x < r.y);
435 f(r.x, r.y) = g(r.x, r.y);
436
437 // Expect bound of g on r.x to be directly dependent on the simplified
438 // bound of f on r.x, which should have been r.x = [0, r.y) in this case
439 g.compute_at(f, r.y);
440
441 f.set_custom_trace(&intermediate_bound_depend_on_output_trace);
442 g.trace_stores();
443 g.trace_realizations();
444
445 run_tracer = false;
446 niters_expected = 200 * 199 / 2;
447 niters = 0;
448 Buffer<int> im = f.realize(200, 200);
449
450 for (int y = 0; y < im.height(); y++) {
451 for (int x = 0; x < im.width(); x++) {
452 int correct = x + y;
453 if ((0 <= x && x <= 199) && (0 <= y && y <= 199)) {
454 if (x < y) {
455 correct = x;
456 }
457 }
458 if (im(x, y) != correct) {
459 printf("im(%d, %d) = %d instead of %d\n",
460 x, y, im(x, y), correct);
461 return -1;
462 }
463 }
464 }
465 if (niters_expected != niters) {
466 printf("intermediate_bound_depend_on_output_test: Expect niters on g to be %d but got %d instead\n",
467 niters_expected, niters);
468 return -1;
469 }
470 return 0;
471 }
472
tile_intermediate_bound_depend_on_output_test(int index)473 int tile_intermediate_bound_depend_on_output_test(int index) {
474 buffer_index = index;
475
476 Func f("f_" + std::to_string(index)), g("g_" + std::to_string(index));
477 Var x("x"), y("y");
478
479 g(x, y) = x;
480
481 f(x, y) = x + y;
482
483 RDom r(0, 200, 0, 200, "r");
484 r.where(r.x < r.y);
485 f(r.x, r.y) += g(r.x, r.y);
486
487 RVar rxi("rxi"), ryi("ryi");
488 f.update(0).tile(r.x, r.y, rxi, ryi, 8, 8);
489 f.update(0).reorder(rxi, ryi, r.x, r.y);
490
491 // Expect bound of g on r.x to be directly dependent on the simplified
492 // bound of f on r.x, which should have been r.x = [0, r.y) in this case
493 g.compute_at(f, ryi);
494
495 f.set_custom_trace(&intermediate_bound_depend_on_output_trace);
496 g.trace_stores();
497 g.trace_realizations();
498
499 run_tracer = false;
500 niters_expected = 200 * 199 / 2;
501 niters = 0;
502 Buffer<int> im = f.realize(200, 200);
503
504 for (int y = 0; y < im.height(); y++) {
505 for (int x = 0; x < im.width(); x++) {
506 int correct = x + y;
507 if ((0 <= x && x <= 199) && (0 <= y && y <= 199)) {
508 correct += (x < y) ? x : 0;
509 }
510 if (im(x, y) != correct) {
511 printf("im(%d, %d) = %d instead of %d\n",
512 x, y, im(x, y), correct);
513 return -1;
514 }
515 }
516 }
517
518 if (niters_expected != niters) {
519 printf("intermediate_bound_depend_on_output_test: Expect niters on g to be %d but got %d instead\n",
520 niters_expected, niters);
521 return -1;
522 }
523 return 0;
524 }
525
self_reference_bound_test(int index)526 int self_reference_bound_test(int index) {
527 buffer_index = index;
528
529 Func f("f_" + std::to_string(index)), g("g_" + std::to_string(index));
530 Var x("x"), y("y");
531 f(x, y) = x + y;
532 g(x, y) = 10;
533
534 RDom r1(0, 100, 0, 100, "r1");
535 r1.where(f(r1.x, r1.y) >= 40);
536 r1.where(f(r1.x, r1.y) != 50);
537 f(r1.x, r1.y) += 1;
538 f.compute_root();
539
540 RDom r2(0, 50, 0, 50, "r2");
541 r2.where(f(r2.x, r2.y) < 30);
542 g(r2.x, r2.y) += f(r2.x, r2.y);
543
544 Buffer<int> im1 = f.realize(200, 200);
545 for (int y = 0; y < im1.height(); y++) {
546 for (int x = 0; x < im1.width(); x++) {
547 int correct = x + y;
548 if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
549 correct += ((correct >= 40) && (correct != 50)) ? 1 : 0;
550 }
551 if (im1(x, y) != correct) {
552 printf("im1(%d, %d) = %d instead of %d\n",
553 x, y, im1(x, y), correct);
554 return -1;
555 }
556 }
557 }
558
559 Buffer<int> im2 = g.realize(200, 200);
560 for (int y = 0; y < im2.height(); y++) {
561 for (int x = 0; x < im2.width(); x++) {
562 int correct = 10;
563 if ((0 <= x && x <= 49) && (0 <= y && y <= 49)) {
564 correct += (im1(x, y) < 30) ? im1(x, y) : 0;
565 }
566 if (im2(x, y) != correct) {
567 printf("im2(%d, %d) = %d instead of %d\n",
568 x, y, im2(x, y), correct);
569 return -1;
570 }
571 }
572 }
573 return 0;
574 }
575
random_float_bound_test(int index)576 int random_float_bound_test(int index) {
577 buffer_index = index;
578
579 Func f("f_" + std::to_string(index));
580 Var x("x"), y("y");
581
582 Expr e1 = random_float() < 0.5f;
583 f(x, y) = Tuple(e1, x + y);
584
585 RDom r(0, 100, 0, 100);
586 r.where(f(r.x, r.y)[0]);
587 f(r.x, r.y) = Tuple(f(r.x, r.y)[0], f(r.x, r.y)[1] + 10);
588
589 Realization res = f.realize(200, 200);
590 assert(res.size() == 2);
591 Buffer<bool> im0 = res[0];
592 Buffer<int> im1 = res[1];
593
594 int n_true = 0;
595 for (int y = 0; y < im1.height(); y++) {
596 for (int x = 0; x < im1.width(); x++) {
597 n_true += im0(x, y);
598 int correct = x + y;
599 if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
600 correct += im0(x, y) ? 10 : 0;
601 }
602 if (im1(x, y) != correct) {
603 printf("im1(%d, %d) = %d instead of %d\n",
604 x, y, im1(x, y), correct);
605 return -1;
606 }
607 }
608 }
609 if (!(19000 <= n_true && n_true <= 21000)) {
610 printf("Expected n_true to be between 19000 and 21000; got %d instead\n", n_true);
611 return -1;
612 }
613 return 0;
614 }
615
newton_method_test()616 int newton_method_test() {
617 Func inverse;
618 Var x;
619 // Negating the bits of a float is a piecewise linear approximation to inverting it
620 inverse(x) = {-0.25f * reinterpret<float>(~(reinterpret<uint32_t>(cast<float>(x + 1)))), 0};
621 const int max_iters = 10;
622 RDom r(0, max_iters);
623 Expr not_converged = abs(inverse(x)[0] * (x + 1) - 1) > 0.001f;
624 r.where(not_converged);
625
626 // Compute the inverse of x using Newton's method, and count the
627 // number of iterations required to reach convergence
628 inverse(x) = {inverse(x)[0] * (2 - (x + 1) * inverse(x)[0]),
629 r + 1};
630 {
631 Realization r = inverse.realize(128);
632 Buffer<float> r0 = r[0];
633 Buffer<int> r1 = r[1];
634 for (int i = 0; i < r0.width(); i++) {
635 float x = (i + 1);
636 float prod = x * r0(i);
637 int num_iters = r1(i);
638 if (num_iters == max_iters) {
639 printf("Newton's method didn't converge!\n");
640 return -1;
641 }
642 if (std::abs(prod - 1) > 0.001) {
643 printf("Newton's method converged without producing the correct inverse:\n"
644 "%f * %f = %f (%d iterations)\n",
645 x, r0(i), prod, r1(i));
646 return -1;
647 }
648 }
649 }
650 return 0;
651 }
652
init_on_gpu_update_on_cpu_test(int index)653 int init_on_gpu_update_on_cpu_test(int index) {
654 buffer_index = index;
655
656 Func f("f_" + std::to_string(index));
657 Var x("x"), y("y");
658 f(x, y) = x + y;
659
660 RDom r(0, 100, 0, 100);
661 r.where(r.x < r.y);
662 r.where(!(r.x != 10));
663 f(r.x, r.y) += 3;
664
665 Var xi("xi"), yi("yi");
666 f.gpu_tile(x, y, xi, yi, 4, 4);
667
668 Buffer<int> im = f.realize(200, 200);
669 for (int y = 0; y < im.height(); y++) {
670 for (int x = 0; x < im.width(); x++) {
671 int correct = x + y;
672 if ((x == 10) && (0 <= y && y <= 99)) {
673 correct += (x < y) ? 3 : 0;
674 }
675 if (im(x, y) != correct) {
676 printf("im(%d, %d) = %d instead of %d\n",
677 x, y, im(x, y), correct);
678 return -1;
679 }
680 }
681 }
682 return 0;
683 }
684
init_on_cpu_update_on_gpu_test(int index)685 int init_on_cpu_update_on_gpu_test(int index) {
686 buffer_index = index;
687
688 Func f("f_" + std::to_string(index));
689 Var x("x"), y("y");
690 f(x, y) = x + y;
691
692 RDom r(0, 100, 0, 100);
693 r.where(!(r.x != 10));
694 r.where(r.x < r.y);
695 f(r.x, r.y) += 3;
696
697 RVar rxi("rxi"), ryi("ryi");
698 f.update(0).gpu_tile(r.x, r.y, r.x, r.y, rxi, ryi, 4, 4);
699
700 Buffer<int> im = f.realize(200, 200);
701 for (int y = 0; y < im.height(); y++) {
702 for (int x = 0; x < im.width(); x++) {
703 int correct = x + y;
704 if ((x == 10) && (0 <= y && y <= 99)) {
705 correct += (x < y) ? 3 : 0;
706 }
707 if (im(x, y) != correct) {
708 printf("im(%d, %d) = %d instead of %d\n",
709 x, y, im(x, y), correct);
710 return -1;
711 }
712 }
713 }
714 return 0;
715 }
716
gpu_intermediate_computed_if_param_test(int index)717 int gpu_intermediate_computed_if_param_test(int index) {
718 buffer_index = index;
719
720 Func f("f_" + std::to_string(index)), g("g_" + std::to_string(index)), h("h_" + std::to_string(index));
721 Var x("x"), y("y");
722 Param<int> p;
723
724 g(x, y) = x + y;
725 h(x, y) = 10;
726
727 f(x, y) = x + y;
728 RDom r1(0, 100, 0, 100);
729 r1.where(p > 3);
730 f(r1.x, r1.y) += 2 * g(r1.x, r1.y);
731
732 RDom r2(0, 100, 0, 100);
733 r2.where(p <= 3);
734 f(r2.x, r2.y) += h(r2.x, r2.y) + g(r2.x, r2.y);
735
736 RVar r1xi("r1xi"), r1yi("r1yi");
737 f.update(0).specialize(p >= 2).gpu_tile(r1.x, r1.y, r1xi, r1yi, 4, 4);
738 g.compute_root();
739 h.compute_root();
740 Var xi("xi"), yi("yi");
741 h.gpu_tile(x, y, xi, yi, 8, 8);
742
743 {
744 printf("....Set p to 5, expect g to be computed\n");
745 p.set(5);
746 run_tracer = false;
747 niters_expected = 100 * 100;
748 niters = 0;
749 Buffer<int> im = f.realize(200, 200);
750 for (int y = 0; y < im.height(); y++) {
751 for (int x = 0; x < im.width(); x++) {
752 int correct = x + y;
753 if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
754 correct = 3 * correct;
755 }
756 if (im(x, y) != correct) {
757 printf("im(%d, %d) = %d instead of %d\n",
758 x, y, im(x, y), correct);
759 return -1;
760 }
761 }
762 }
763 }
764
765 {
766 printf("....Set p to 0, expect g to be not computed\n");
767 p.set(0);
768 run_tracer = false;
769 niters_expected = 0;
770 niters = 0;
771 Buffer<int> im = f.realize(200, 200);
772 for (int y = 0; y < im.height(); y++) {
773 for (int x = 0; x < im.width(); x++) {
774 int correct = x + y;
775 if ((0 <= x && x <= 99) && (0 <= y && y <= 99)) {
776 correct += 10 + correct;
777 }
778 if (im(x, y) != correct) {
779 printf("im(%d, %d) = %d instead of %d\n",
780 x, y, im(x, y), correct);
781 return -1;
782 }
783 }
784 }
785 }
786 return 0;
787 }
788
vectorize_predicated_rvar_test()789 int vectorize_predicated_rvar_test() {
790 Func f("f");
791 Var x("x"), y("y");
792 f(x, y) = 0;
793
794 Expr w = (f.output_buffer().width() / 2) * 2;
795 Expr h = (f.output_buffer().height() / 2) * 2;
796
797 RDom r(1, w - 2, 1, h - 2);
798 r.where((r.x + r.y) % 2 == 0);
799
800 f(r.x, r.y) += 10;
801
802 f.update(0).unroll(r.x, 2).allow_race_conditions().vectorize(r.x, 8);
803
804 Buffer<int> im = f.realize(200, 200);
805 for (int y = 0; y < im.height(); y++) {
806 for (int x = 0; x < im.width(); x++) {
807 int correct = 0;
808 if ((1 <= x && x < im.width() - 1) && (1 <= y && y < im.height() - 1) &&
809 ((x + y) % 2 == 0)) {
810 correct += 10;
811 }
812 if (im(x, y) != correct) {
813 printf("im(%d, %d) = %d instead of %d\n",
814 x, y, im(x, y), correct);
815 return -1;
816 }
817 }
818 }
819 return 0;
820 }
821
main(int argc,char ** argv)822 int main(int argc, char **argv) {
823 printf("Running equality inequality bound test\n");
824 if (equality_inequality_bound_test(0) != 0) {
825 return -1;
826 }
827
828 printf("Running split fuse test\n");
829 if (split_fuse_test(1) != 0) {
830 return -1;
831 }
832
833 printf("Running bound depend on free variable test\n");
834 if (free_variable_bound_test(2) != 0) {
835 return -1;
836 }
837
838 printf("Running function call inside bound test\n");
839 if (func_call_inside_bound_test(3) != 0) {
840 return -1;
841 }
842
843 printf("Running function call inside bound inline test\n");
844 if (func_call_inside_bound_inline_test(4) != 0) {
845 return -1;
846 }
847
848 printf("Running two linear bounds test\n");
849 if (two_linear_bounds_test(5) != 0) {
850 return -1;
851 }
852
853 printf("Running circular bound test\n");
854 if (circle_bound_test(6) != 0) {
855 return -1;
856 }
857
858 printf("Running intermediate only computed if param is bigger than certain value test\n");
859 if (intermediate_computed_if_param_test(7) != 0) {
860 return -1;
861 }
862
863 printf("Running tile intermediate stage depend on output bound test\n");
864 if (tile_intermediate_bound_depend_on_output_test(8) != 0) {
865 return -1;
866 }
867
868 printf("Running intermediate stage depend on output bound\n");
869 if (intermediate_bound_depend_on_output_test(9) != 0) {
870 return -1;
871 }
872
873 printf("Running self reference bound test\n");
874 if (self_reference_bound_test(10) != 0) {
875 return -1;
876 }
877
878 printf("Running random float bound test\n");
879 if (random_float_bound_test(11) != 0) {
880 return -1;
881 }
882
883 printf("Running newton's method test\n");
884 if (newton_method_test() != 0) {
885 return -1;
886 }
887
888 printf("Running vectorize predicated rvar test\n");
889 if (vectorize_predicated_rvar_test() != 0) {
890 return -1;
891 }
892
893 // Run GPU tests now if there is support for GPU.
894 if (!get_jit_target_from_environment().has_gpu_feature()) {
895 // TODO: split this test apart so that the relevant piece can be skipped appropriately
896 // printf("[SKIP] No GPU target enabled.\n");
897 printf("Success!\n");
898 return 0;
899 }
900
901 printf("Running initialization on gpu and update on cpu test\n");
902 if (init_on_gpu_update_on_cpu_test(12) != 0) {
903 return -1;
904 }
905
906 printf("Running initialization on cpu and update on gpu test\n");
907 if (init_on_cpu_update_on_gpu_test(13) != 0) {
908 return -1;
909 }
910
911 printf("Running gpu intermediate only computed if param is bigger than certain value test\n");
912 if (gpu_intermediate_computed_if_param_test(14) != 0) {
913 return -1;
914 }
915
916 printf("Success!\n");
917
918 return 0;
919 }
920