1 /*
2  * Copyright 2013      Ecole Normale Superieure
3  * Copyright 2015      Sven Verdoolaege
4  *
5  * Use of this software is governed by the MIT license
6  *
7  * Written by Sven Verdoolaege,
8  * Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
9  */
10 
11 #include <string.h>
12 
13 #include <isl/space.h>
14 #include <isl/constraint.h>
15 #include <isl/val.h>
16 #include <isl/aff.h>
17 #include <isl/set.h>
18 #include <isl/map.h>
19 #include <isl/union_set.h>
20 #include <isl/union_map.h>
21 
22 #include "hybrid.h"
23 #include "schedule.h"
24 
25 /* The hybrid tiling implemented in this file is based on
26  * Grosser et al., "Hybrid Hexagonal/Classical Tiling for GPUs".
27  */
28 
29 /* Bounds on relative dependence distances in input to hybrid tiling.
30  * upper is an upper bound on the relative dependence distances
31  * in the first space dimension
32  * -lower is a lower bound on the relative dependence distances
33  * in all space dimensions.
34  *
35  * In particular,
36  *
37  *	d_i >= -lower_i d_0
38  * and
39  *	d_1 <= upper d_0
40  *
41  * for each dependence distance vector d, where d_1 is the component
42  * corresponding to the first space dimension.
43  *
44  * upper and lower are always non-negative.
45  * Some of the values may be NaN if no bound could be found.
46  */
47 struct ppcg_ht_bounds {
48 	isl_val *upper;
49 	isl_multi_val *lower;
50 };
51 
52 /* Free "bounds" along with all its fields.
53  */
ppcg_ht_bounds_free(__isl_take ppcg_ht_bounds * bounds)54 __isl_null ppcg_ht_bounds *ppcg_ht_bounds_free(
55 	__isl_take ppcg_ht_bounds *bounds)
56 {
57 	if (!bounds)
58 		return NULL;
59 	isl_val_free(bounds->upper);
60 	isl_multi_val_free(bounds->lower);
61 	free(bounds);
62 
63 	return NULL;
64 }
65 
66 /* Create a ppcg_ht_bounds object for a band living in "space".
67  * The bounds are initialized to NaN.
68  */
ppcg_ht_bounds_alloc(__isl_take isl_space * space)69 __isl_give ppcg_ht_bounds *ppcg_ht_bounds_alloc(__isl_take isl_space *space)
70 {
71 	int i, n;
72 	isl_ctx *ctx;
73 	ppcg_ht_bounds *bounds;
74 
75 	if (!space)
76 		return NULL;
77 
78 	ctx = isl_space_get_ctx(space);
79 	bounds = isl_alloc_type(ctx, struct ppcg_ht_bounds);
80 	if (!bounds)
81 		goto error;
82 	bounds->upper = isl_val_nan(ctx);
83 	bounds->lower = isl_multi_val_zero(space);
84 	n = isl_multi_val_dim(bounds->lower, isl_dim_set);
85 	for (i = 0; i < n; ++i) {
86 		isl_val *v = isl_val_copy(bounds->upper);
87 		bounds->lower = isl_multi_val_set_val(bounds->lower, i, v);
88 	}
89 
90 	if (!bounds->lower || !bounds->upper)
91 		return ppcg_ht_bounds_free(bounds);
92 
93 	return bounds;
94 error:
95 	isl_space_free(space);
96 	return NULL;
97 }
98 
ppcg_ht_bounds_dump(__isl_keep ppcg_ht_bounds * bounds)99 void ppcg_ht_bounds_dump(__isl_keep ppcg_ht_bounds *bounds)
100 {
101 	if (!bounds)
102 		return;
103 
104 	fprintf(stderr, "lower: ");
105 	isl_multi_val_dump(bounds->lower);
106 	fprintf(stderr, "upper: ");
107 	isl_val_dump(bounds->upper);
108 }
109 
110 /* Return the upper bound on the relative dependence distances
111  * in the first space dimension.
112  */
ppcg_ht_bounds_get_upper(__isl_keep ppcg_ht_bounds * bounds)113 __isl_give isl_val *ppcg_ht_bounds_get_upper(__isl_keep ppcg_ht_bounds *bounds)
114 {
115 	if (!bounds)
116 		return NULL;
117 	return isl_val_copy(bounds->upper);
118 }
119 
120 /* Replace the upper bound on the relative dependence distances
121  * in the first space dimension by "upper".
122  */
ppcg_ht_bounds_set_upper(__isl_take ppcg_ht_bounds * bounds,__isl_take isl_val * upper)123 __isl_give ppcg_ht_bounds *ppcg_ht_bounds_set_upper(
124 	__isl_take ppcg_ht_bounds *bounds, __isl_take isl_val *upper)
125 {
126 	if (!bounds || !upper)
127 		goto error;
128 	isl_val_free(bounds->upper);
129 	bounds->upper = upper;
130 	return bounds;
131 error:
132 	ppcg_ht_bounds_free(bounds);
133 	isl_val_free(upper);
134 	return NULL;
135 }
136 
137 /* Return the lower bound on the relative dependence distances
138  * in space dimension "pos".
139  */
ppcg_ht_bounds_get_lower(__isl_keep ppcg_ht_bounds * bounds,int pos)140 __isl_give isl_val *ppcg_ht_bounds_get_lower(__isl_keep ppcg_ht_bounds *bounds,
141 	int pos)
142 {
143 	if (!bounds)
144 		return NULL;
145 	return isl_multi_val_get_val(bounds->lower, pos);
146 }
147 
148 /* Replace the lower bound on the relative dependence distances
149  * in space dimension "pos" by "lower".
150  */
ppcg_ht_bounds_set_lower(__isl_take ppcg_ht_bounds * bounds,int pos,__isl_take isl_val * lower)151 __isl_give ppcg_ht_bounds *ppcg_ht_bounds_set_lower(
152 	__isl_take ppcg_ht_bounds *bounds, int pos, __isl_take isl_val *lower)
153 {
154 	if (!bounds || !lower)
155 		goto error;
156 	bounds->lower = isl_multi_val_set_val(bounds->lower, pos, lower);
157 	if (!bounds->lower)
158 		return ppcg_ht_bounds_free(bounds);
159 	return bounds;
160 error:
161 	ppcg_ht_bounds_free(bounds);
162 	isl_val_free(lower);
163 	return NULL;
164 }
165 
166 /* Can the bounds on relative dependence distances recorded in "bounds"
167  * be used to perform hybrid tiling?
168  * In particular, have appropriate lower and upper bounds been found?
169  * Any NaN indicates that no corresponding bound was found.
170  */
ppcg_ht_bounds_is_valid(__isl_keep ppcg_ht_bounds * bounds)171 isl_bool ppcg_ht_bounds_is_valid(__isl_keep ppcg_ht_bounds *bounds)
172 {
173 	isl_bool is_nan;
174 	int i, n;
175 
176 	if (!bounds)
177 		return isl_bool_error;
178 	is_nan = isl_val_is_nan(bounds->upper);
179 	if (is_nan < 0)
180 		return isl_bool_error;
181 	if (is_nan)
182 		return isl_bool_false;
183 
184 	n = isl_multi_val_dim(bounds->lower, isl_dim_set);
185 	for (i = 0; i < n; ++i) {
186 		isl_val *v;
187 
188 		v = isl_multi_val_get_val(bounds->lower, i);
189 		is_nan = isl_val_is_nan(v);
190 		if (is_nan < 0)
191 			return isl_bool_error;
192 		if (is_nan)
193 			return isl_bool_false;
194 		isl_val_free(v);
195 	}
196 
197 	return isl_bool_true;
198 }
199 
200 /* Structure that represents the basic hexagonal tiling,
201  * along with information that is needed to perform the hybrid tiling.
202  *
203  * "bounds" are the bounds on the dependence distances that
204  * define the hexagonal shape and the required skewing in the remaining
205  * space dimensions.
206  *
207  * "input_node" points to the input pair of band nodes.
208  * "input_schedule" is the partial schedule of this input pair of band nodes.
209  * The space of this schedule is [P -> C], where P is the space
210  * of the parent node and C is the space of the child node.
211  *
212  * "space_sizes" represent the total size of a tile for the space
213  * dimensions, i.e., those corresponding to the child node.
214  * The space of "space_sizes" is C.
215  * If S_0 is the original tile size in the first space dimension,
216  * then the first entry of "space_sizes" is equal to
217  * W = 2*S_0 + floor(d_l h) + floor(d_u h).
218  * The remaining entries are the same as in the original tile sizes.
219  *
220  * The basic hexagonal tiling "hex" is defined
221  * in a "ts" (time-space) space and corresponds to the phase-1 tiles.
222  * "time_tile" maps the "ts" space to outer time tile.
223  * Is is equal to ts[t, s] -> floor(t/(2 * S_t)), with S_t the original tile
224  * size corresponding to the parent node.
225  * "local_time" maps the "ts" space to the time dimension inside each tile.
226  * It is equal to ts[t, s] -> t mod (2 S_t), with S_t the original tile
227  * size corresponding to the parent node.
228  * "shift_space" shifts the tiles at time tile T = floor(t/(2 S_t))
229  * in the space dimension such that they align to a multiple of W.
230  * It is equal to ts[t, s] -> s + (-(2 * shift_s)*T) % W,
231  * with shift_s = S_0 + floor(d_u h).
232  * "shift_phase" is the shift taken to go from phase 0 to phase 1
233  * It is equal to ts[t, s] -> ts[t + S_t, s + shift_s],
234  * with shift_s = S_0 + floor(d_u h).
235  *
236  * "project_ts" projects the space of the input schedule to the ts-space.
237  * It is equal to [P[t] -> C[s_0, ...]] -> ts[t, s_0].
238  */
239 struct ppcg_ht_tiling {
240 	int ref;
241 
242 	ppcg_ht_bounds *bounds;
243 	isl_schedule_node *input_node;
244 	isl_multi_union_pw_aff *input_schedule;
245 
246 	isl_multi_val *space_sizes;
247 
248 	isl_aff *time_tile;
249 	isl_aff *local_time;
250 	isl_aff *shift_space;
251 	isl_multi_aff *shift_phase;
252 	isl_set *hex;
253 
254 	isl_multi_aff *project_ts;
255 };
256 typedef struct ppcg_ht_tiling ppcg_ht_tiling;
257 
258 /* Return the space of the pair of band nodes that form the input
259  * to the hybrid tiling.
260  * In particular, return the space [P -> C], where P is the space
261  * of the parent node and C is the space of the child node.
262  */
ppcg_ht_tiling_get_input_space(__isl_keep ppcg_ht_tiling * tile)263 __isl_give isl_space *ppcg_ht_tiling_get_input_space(
264 	__isl_keep ppcg_ht_tiling *tile)
265 {
266 	if (!tile)
267 		return NULL;
268 
269 	return isl_multi_union_pw_aff_get_space(tile->input_schedule);
270 }
271 
272 /* Remove a reference to "tile" and free "tile" along with all its fields
273  * as soon as the reference count drops to zero.
274  */
ppcg_ht_tiling_free(__isl_take ppcg_ht_tiling * tiling)275 static __isl_null ppcg_ht_tiling *ppcg_ht_tiling_free(
276 	__isl_take ppcg_ht_tiling *tiling)
277 {
278 	if (!tiling)
279 		return NULL;
280 	if (--tiling->ref > 0)
281 		return NULL;
282 
283 	ppcg_ht_bounds_free(tiling->bounds);
284 	isl_schedule_node_free(tiling->input_node);
285 	isl_multi_union_pw_aff_free(tiling->input_schedule);
286 	isl_multi_val_free(tiling->space_sizes);
287 	isl_aff_free(tiling->time_tile);
288 	isl_aff_free(tiling->local_time);
289 	isl_aff_free(tiling->shift_space);
290 	isl_multi_aff_free(tiling->shift_phase);
291 	isl_set_free(tiling->hex);
292 	isl_multi_aff_free(tiling->project_ts);
293 	free(tiling);
294 
295 	return NULL;
296 }
297 
298 /* Return a new reference to "tiling".
299  */
ppcg_ht_tiling_copy(__isl_keep ppcg_ht_tiling * tiling)300 __isl_give ppcg_ht_tiling *ppcg_ht_tiling_copy(
301 	__isl_keep ppcg_ht_tiling *tiling)
302 {
303 	if (!tiling)
304 		return NULL;
305 
306 	tiling->ref++;
307 	return tiling;
308 }
309 
310 /* Return the isl_ctx to which "tiling" belongs.
311  */
ppcg_ht_tiling_get_ctx(__isl_keep ppcg_ht_tiling * tiling)312 isl_ctx *ppcg_ht_tiling_get_ctx(__isl_keep ppcg_ht_tiling *tiling)
313 {
314 	if (!tiling)
315 		return NULL;
316 
317 	return isl_multi_union_pw_aff_get_ctx(tiling->input_schedule);
318 }
319 
320 /* Representation of one of the two phases of hybrid tiling.
321  *
322  * "tiling" points to the shared tiling data.
323  *
324  * "time_tile", "local_time" and "shift_space" are equal to the corresponding
325  * fields of "tiling", pulled back to the input space.
326  * In case of phase 0, these expressions have also been moved
327  * from phase 1 to phase 0.
328  *
329  * "domain" contains the hexagonal tiling of this phase.
330  *
331  * "space_shift" is the shift that should be added to the space band
332  * in order to be able to apply rectangular tiling to the space.
333  * For phase 1, it is equal to
334  *
335  *	[P[t] -> C[s_0, s_i]] -> C[(-(2 * shift_s)*T) % W, dl_i * u]
336  *
337  * with shift_s = S_0 + floor(d_u h),
338  * T equal to "time_tile" and u equal to "local_time".
339  * For phase 0, it is equal to
340  *
341  *	[P[t] -> C[s_0, s_i]] -> C[shift_s + (-(2 * shift_s)*T) % W, dl_i * u]
342  *
343  * "space_tile" is the space tiling.  It is equal to
344  *
345  *	[P[t] -> C[s]] -> C[floor((s + space_shift)/space_size]
346  */
347 struct ppcg_ht_phase {
348 	ppcg_ht_tiling *tiling;
349 
350 	isl_aff *time_tile;
351 	isl_aff *local_time;
352 	isl_aff *shift_space;
353 	isl_set *domain;
354 
355 	isl_multi_aff *space_shift;
356 	isl_multi_aff *space_tile;
357 };
358 
359 /* Free "phase" along with all its fields.
360  */
ppcg_ht_phase_free(__isl_take ppcg_ht_phase * phase)361 static __isl_null ppcg_ht_phase *ppcg_ht_phase_free(
362 	__isl_take ppcg_ht_phase *phase)
363 {
364 	if (!phase)
365 		return NULL;
366 
367 	ppcg_ht_tiling_free(phase->tiling);
368 	isl_aff_free(phase->time_tile);
369 	isl_aff_free(phase->local_time);
370 	isl_aff_free(phase->shift_space);
371 	isl_set_free(phase->domain);
372 	isl_multi_aff_free(phase->space_shift);
373 	isl_multi_aff_free(phase->space_tile);
374 	free(phase);
375 
376 	return NULL;
377 }
378 
379 /* Wrapper around ppcg_ht_phase_free for use as an argument
380  * to isl_id_set_free_user.
381  */
ppcg_ht_phase_free_wrap(void * user)382 static void ppcg_ht_phase_free_wrap(void *user)
383 {
384 	ppcg_ht_phase *phase = user;
385 
386 	ppcg_ht_phase_free(phase);
387 }
388 
389 /* Return the domain of hybrid tiling phase "phase".
390  */
ppcg_ht_phase_get_domain(ppcg_ht_phase * phase)391 static __isl_give isl_set *ppcg_ht_phase_get_domain(ppcg_ht_phase *phase)
392 {
393 	if (!phase)
394 		return NULL;
395 
396 	return isl_set_copy(phase->domain);
397 }
398 
399 /* Return the space of the pair of band nodes that form the input
400  * to the hybrid tiling of which "phase" is a phase.
401  * In particular, return the space [P -> C], where P is the space
402  * of the parent node and C is the space of the child node.
403  */
ppcg_ht_phase_get_input_space(__isl_keep ppcg_ht_phase * phase)404 static __isl_give isl_space *ppcg_ht_phase_get_input_space(
405 	__isl_keep ppcg_ht_phase *phase)
406 {
407 	if (!phase)
408 		return NULL;
409 
410 	return ppcg_ht_tiling_get_input_space(phase->tiling);
411 }
412 
413 /* Construct the lower left constraint of the hexagonal tile, i.e.,
414  *
415  *	du a - b <= (2h+1) du - duh
416  *	-du a + b + (2h+1) du - duh >= 0
417  *
418  * where duh = floor(du * h).
419  *
420  * This constraint corresponds to (6) in
421  * "Hybrid Hexagonal/Classical Tiling for GPUs".
422  */
hex_lower_left(__isl_take isl_local_space * ls,__isl_keep isl_val * h,__isl_keep isl_val * du,__isl_keep isl_val * duh)423 static __isl_give isl_constraint *hex_lower_left(__isl_take isl_local_space *ls,
424 	__isl_keep isl_val *h, __isl_keep isl_val *du, __isl_keep isl_val *duh)
425 {
426 	isl_val *v;
427 	isl_aff *aff;
428 
429 	v = isl_val_add_ui(isl_val_mul_ui(isl_val_copy(h), 2), 1);
430 	v = isl_val_mul(v, isl_val_copy(du));
431 	v = isl_val_sub(v, isl_val_copy(duh));
432 	aff = isl_aff_val_on_domain(ls, v);
433 	v = isl_val_neg(isl_val_copy(du));
434 	aff = isl_aff_set_coefficient_val(aff, isl_dim_in, 0, v);
435 	aff = isl_aff_set_coefficient_si(aff, isl_dim_in, 1, 1);
436 
437 	return isl_inequality_from_aff(aff);
438 }
439 
440 /* Construct the lower constraint of the hexagonal tile, i.e.,
441  *
442  *	a <= 2h+1
443  *	-a + 2h+1 >= 0
444  *
445  * This constraint corresponds to (7) in
446  * "Hybrid Hexagonal/Classical Tiling for GPUs".
447  */
hex_lower(__isl_take isl_local_space * ls,__isl_keep isl_val * h)448 static __isl_give isl_constraint *hex_lower(__isl_take isl_local_space *ls,
449 	__isl_keep isl_val *h)
450 {
451 	isl_val *v;
452 	isl_aff *aff;
453 
454 	v = isl_val_add_ui(isl_val_mul_ui(isl_val_copy(h), 2), 1);
455 	aff = isl_aff_val_on_domain(ls, v);
456 	aff = isl_aff_set_coefficient_si(aff, isl_dim_in, 0, -1);
457 
458 	return isl_inequality_from_aff(aff);
459 }
460 
461 /* Construct the lower right constraint of the hexagonal tile, i.e.,
462  *
463  *	dl a + b <= (2h+1) dl + duh + (s0-1)
464  *	-dl a - b + (2h+1) dl + duh + (s0-1) >= 0
465  *
466  * where duh = floor(du * h).
467  *
468  * This constraint corresponds to (8) in
469  * "Hybrid Hexagonal/Classical Tiling for GPUs".
470  */
hex_lower_right(__isl_take isl_local_space * ls,__isl_keep isl_val * h,__isl_keep isl_val * s0,__isl_keep isl_val * dl,__isl_keep isl_val * duh)471 static __isl_give isl_constraint *hex_lower_right(
472 	__isl_take isl_local_space *ls, __isl_keep isl_val *h,
473 	__isl_keep isl_val *s0, __isl_keep isl_val *dl, __isl_keep isl_val *duh)
474 {
475 	isl_val *v;
476 	isl_aff *aff;
477 
478 	v = isl_val_add_ui(isl_val_mul_ui(isl_val_copy(h), 2), 1);
479 	v = isl_val_mul(v, isl_val_copy(dl));
480 	v = isl_val_add(v, isl_val_copy(duh));
481 	v = isl_val_add(v, isl_val_copy(s0));
482 	v = isl_val_sub_ui(v, 1);
483 	aff = isl_aff_val_on_domain(ls, v);
484 	v = isl_val_neg(isl_val_copy(dl));
485 	aff = isl_aff_set_coefficient_val(aff, isl_dim_in, 0, v);
486 	aff = isl_aff_set_coefficient_si(aff, isl_dim_in, 1, -1);
487 
488 	return isl_inequality_from_aff(aff);
489 }
490 
491 /* Construct the upper left constraint of the hexagonal tile, i.e.,
492  *
493  *	dl a + b >= h dl - (d - 1)/d				with d = den(dl)
494  *	dl a + b - h dl + (d - 1)/d >= 0
495  *
496  * This constraint corresponds to (10) in
497  * "Hybrid Hexagonal/Classical Tiling for GPUs".
498  */
hex_upper_left(__isl_take isl_local_space * ls,__isl_keep isl_val * h,__isl_keep isl_val * dl)499 static __isl_give isl_constraint *hex_upper_left(__isl_take isl_local_space *ls,
500 	__isl_keep isl_val *h, __isl_keep isl_val *dl)
501 {
502 	isl_val *v, *d;
503 	isl_aff *aff;
504 
505 	d = isl_val_get_den_val(dl);
506 	v = isl_val_sub_ui(isl_val_copy(d), 1);
507 	v = isl_val_div(v, d);
508 	v = isl_val_sub(v, isl_val_mul(isl_val_copy(h), isl_val_copy(dl)));
509 	aff = isl_aff_val_on_domain(ls, v);
510 	aff = isl_aff_set_coefficient_val(aff, isl_dim_in, 0, isl_val_copy(dl));
511 	aff = isl_aff_set_coefficient_si(aff, isl_dim_in, 1, 1);
512 
513 	return isl_inequality_from_aff(aff);
514 }
515 
516 /* Construct the upper right constraint of the hexagonal tile, i.e.,
517  *
518  *	du a - b >= du h - duh - (s0-1) - dlh - (d - 1)/d	with d = den(du)
519  *	du a - b - du h + duh + (s0-1) + dlh + (d - 1)/d >= 0
520  *
521  * where dlh = floor(dl * h) and duh = floor(du * h).
522  *
523  * This constraint corresponds to (12) in
524  * "Hybrid Hexagonal/Classical Tiling for GPUs".
525  */
hex_upper_right(__isl_take isl_local_space * ls,__isl_keep isl_val * h,__isl_keep isl_val * s0,__isl_keep isl_val * du,__isl_keep isl_val * dlh,__isl_keep isl_val * duh)526 static __isl_give isl_constraint *hex_upper_right(
527 	__isl_take isl_local_space *ls, __isl_keep isl_val *h,
528 	__isl_keep isl_val *s0, __isl_keep isl_val *du,
529 	__isl_keep isl_val *dlh, __isl_keep isl_val *duh)
530 {
531 	isl_val *v, *d;
532 	isl_aff *aff;
533 
534 	d = isl_val_get_den_val(du);
535 	v = isl_val_sub_ui(isl_val_copy(d), 1);
536 	v = isl_val_div(v, d);
537 	v = isl_val_sub(v, isl_val_mul(isl_val_copy(h), isl_val_copy(du)));
538 	v = isl_val_add(v, isl_val_copy(duh));
539 	v = isl_val_add(v, isl_val_copy(dlh));
540 	v = isl_val_add(v, isl_val_copy(s0));
541 	v = isl_val_sub_ui(v, 1);
542 	aff = isl_aff_val_on_domain(ls, v);
543 	aff = isl_aff_set_coefficient_val(aff, isl_dim_in, 0, isl_val_copy(du));
544 	aff = isl_aff_set_coefficient_si(aff, isl_dim_in, 1, -1);
545 
546 	return isl_inequality_from_aff(aff);
547 }
548 
549 /* Construct the uppper constraint of the hexagonal tile, i.e.,
550  *
551  *	a >= 0
552  *
553  * This constraint corresponds to (13) in
554  * "Hybrid Hexagonal/Classical Tiling for GPUs".
555  */
hex_upper(__isl_take isl_local_space * ls)556 static __isl_give isl_constraint *hex_upper(__isl_take isl_local_space *ls)
557 {
558 	isl_aff *aff;
559 
560 	aff = isl_aff_var_on_domain(ls, isl_dim_set, 0);
561 
562 	return isl_inequality_from_aff(aff);
563 }
564 
565 /* Construct the basic hexagonal tile shape.
566  * "space" is the 2D space in which the hexagon should be constructed.
567  * h is st-1, with st the tile size in the time dimension
568  * s0 is the tile size in the space dimension
569  * dl is a bound on the negative relative dependence distances, i.e.,
570  *
571  *	d_s >= -dl d_t
572  *
573  * du is a bound on the positive relative dependence distances, i.e.,
574  *
575  *	d_s <= du d_t
576  *
577  * with (d_t,d_s) any dependence distance vector.
578  * dlh = floor(dl * h)
579  * duh = floor(du * h)
580  *
581  * The shape of the hexagon is as follows:
582  *
583  *		0 dlh   dlh+s0-1
584  *		   ______                __
585  * 0		  /      \_             /
586  *		 /         \_          /
587  * h		/            \ ______ /
588  * h+1		\_           //      \\_
589  *		  \_        //         \\_
590  * 2h+1		    \______//            \\
591  *		0   duh   duh+s0-1
592  *		             duh+s0-1+dlh
593  *		                  duh+s0-1+dlh+1+s0+1
594  *
595  * The next hexagon is shifted by duh + dlh + 2 * s0.
596  *
597  * The slope of the "/" constraints is dl.
598  * The slope of the "\_" constraints is du.
599  */
compute_hexagon(__isl_take isl_space * space,__isl_keep isl_val * h,__isl_keep isl_val * s0,__isl_keep isl_val * dl,__isl_keep isl_val * du,__isl_keep isl_val * dlh,__isl_keep isl_val * duh)600 static __isl_give isl_set *compute_hexagon(__isl_take isl_space *space,
601 	__isl_keep isl_val *h, __isl_keep isl_val *s0,
602 	__isl_keep isl_val *dl, __isl_keep isl_val *du,
603 	__isl_keep isl_val *dlh, __isl_keep isl_val *duh)
604 {
605 	isl_local_space *ls;
606 	isl_constraint *c;
607 	isl_basic_set *bset;
608 
609 	ls = isl_local_space_from_space(space);
610 
611 	c = hex_lower_left(isl_local_space_copy(ls), h, du, duh);
612 	bset = isl_basic_set_from_constraint(c);
613 
614 	c = hex_lower(isl_local_space_copy(ls), h);
615 	bset = isl_basic_set_add_constraint(bset, c);
616 
617 	c = hex_lower_right(isl_local_space_copy(ls), h, s0, dl, duh);
618 	bset = isl_basic_set_add_constraint(bset, c);
619 
620 	c = hex_upper_left(isl_local_space_copy(ls), h, dl);
621 	bset = isl_basic_set_add_constraint(bset, c);
622 
623 	c = hex_upper_right(isl_local_space_copy(ls), h, s0, du, dlh, duh);
624 	bset = isl_basic_set_add_constraint(bset, c);
625 
626 	c = hex_upper(ls);
627 	bset = isl_basic_set_add_constraint(bset, c);
628 
629 	return isl_set_from_basic_set(bset);
630 }
631 
632 /* Name of the ts-space.
633  */
634 static const char *ts_space_name = "ts";
635 
636 /* Construct and return the space ts[t, s].
637  */
construct_ts_space(isl_ctx * ctx)638 static __isl_give isl_space *construct_ts_space(isl_ctx *ctx)
639 {
640 	isl_space *s;
641 
642 	s = isl_space_set_alloc(ctx, 0, 2);
643 	s = isl_space_set_tuple_name(s, isl_dim_set, ts_space_name);
644 
645 	return s;
646 }
647 
648 /* Name of the local ts-space.
649  */
650 static const char *local_ts_space_name = "local_ts";
651 
652 /* Construct and return the space local_ts[t, s].
653  */
construct_local_ts_space(isl_ctx * ctx)654 static __isl_give isl_space *construct_local_ts_space(isl_ctx *ctx)
655 {
656 	isl_space *s;
657 
658 	s = isl_space_set_alloc(ctx, 0, 2);
659 	s = isl_space_set_tuple_name(s, isl_dim_set, local_ts_space_name);
660 
661 	return s;
662 }
663 
664 /* Compute the total size of a tile for the space dimensions,
665  * i.e., those corresponding to the child node
666  * of the input pattern.
667  * If S_0 is the original tile size in the first space dimension,
668  * then the first entry of "space_sizes" is equal to
669  * W = 2*S_0 + floor(d_l h) + floor(d_u h).
670  * The remaining entries are the same as in the original tile sizes.
671  * "tile_sizes" contains the original tile sizes, including
672  * the tile size corresponding to the parent node.
673  * "dlh" is equal to floor(d_l h).
674  * "duh" is equal to floor(d_u h).
675  */
compute_space_sizes(__isl_keep isl_multi_val * tile_sizes,__isl_keep isl_val * dlh,__isl_keep isl_val * duh)676 static __isl_give isl_multi_val *compute_space_sizes(
677 	__isl_keep isl_multi_val *tile_sizes,
678 	__isl_keep isl_val *dlh, __isl_keep isl_val *duh)
679 {
680 	isl_val *size;
681 	isl_multi_val *space_sizes;
682 
683 	space_sizes = isl_multi_val_copy(tile_sizes);
684 	space_sizes = isl_multi_val_factor_range(space_sizes);
685 	size = isl_multi_val_get_val(space_sizes, 0);
686 	size = isl_val_mul_ui(size, 2);
687 	size = isl_val_add(size, isl_val_copy(duh));
688 	size = isl_val_add(size, isl_val_copy(dlh));
689 	space_sizes = isl_multi_val_set_val(space_sizes, 0, size);
690 
691 	return space_sizes;
692 }
693 
694 /* Compute the offset of phase 1 with respect to phase 0
695  * in the ts-space ("space").
696  * In particular, return
697  *
698  *	ts[st, s0 + duh]
699  */
compute_phase_shift(__isl_keep isl_space * space,__isl_keep isl_val * st,__isl_keep isl_val * s0,__isl_keep isl_val * duh)700 static __isl_give isl_multi_val *compute_phase_shift(
701 	__isl_keep isl_space *space, __isl_keep isl_val *st,
702 	__isl_keep isl_val *s0, __isl_keep isl_val *duh)
703 {
704 	isl_val *v;
705 	isl_multi_val *phase_shift;
706 
707 	phase_shift = isl_multi_val_zero(isl_space_copy(space));
708 	phase_shift = isl_multi_val_set_val(phase_shift, 0, isl_val_copy(st));
709 	v = isl_val_add(isl_val_copy(duh), isl_val_copy(s0));
710 	phase_shift = isl_multi_val_set_val(phase_shift, 1, v);
711 
712 	return phase_shift;
713 }
714 
715 /* Return the function
716  *
717  *	ts[t, s] -> floor(t/(2 * st))
718  *
719  * representing the time tile.
720  * "space" is the space ts[t, s].
721  */
compute_time_tile(__isl_keep isl_space * space,__isl_keep isl_val * st)722 static __isl_give isl_aff *compute_time_tile(__isl_keep isl_space *space,
723 	__isl_keep isl_val *st)
724 {
725 	isl_val *v;
726 	isl_aff *t;
727 	isl_local_space *ls;
728 
729 	ls = isl_local_space_from_space(isl_space_copy(space));
730 	t = isl_aff_var_on_domain(ls, isl_dim_set, 0);
731 	v = isl_val_mul_ui(isl_val_copy(st), 2);
732 	t = isl_aff_floor(isl_aff_scale_down_val(t, v));
733 
734 	return t;
735 }
736 
737 /* Compute a shift in the space dimension for tiles
738  * at time tile T = floor(t/(2 * S_t))
739  * such that they align to a multiple of the total space tile dimension W.
740  * In particular, compute
741  *
742  *	ts[t, s] -> s + (-(2 * shift_s)*T) % W
743  *
744  * where shift_s is the shift of phase 1 with respect to phase 0
745  * in the space dimension (the first element of "phase_shift").
746  * W is stored in the first element of "space_sizes".
747  * "time_tile" is the function
748  *
749  *	ts[t, s] -> floor(t/(2 * S_T))
750  *
751  * Since phase 1 is shifted by shift_s with respect to phase 0,
752  * the next line of phase 0 (at T+1) is shifted by 2*shift_s
753  * with respect to the previous line (at T).
754  * A shift of -(2 * shift_s)*T therefore allows the basic pattern
755  * (which starts at 0) to be applied.
756  * However, this shift will be used to obtain the tile coordinate
757  * in the first space dimension and if the original values
758  * in the space dimension are non-negative, then the shift should
759  * not make them negative.  Moreover, the shift should be as minimal
760  * as possible.
761  * Since the pattern repeats itself with a period of W in the space
762  * dimension, the shift can be replaced by (-(2 * shift_s)*T) % W.
763  */
compute_shift_space(__isl_keep isl_aff * time_tile,__isl_keep isl_multi_val * space_sizes,__isl_keep isl_multi_val * phase_shift)764 static __isl_give isl_aff *compute_shift_space(__isl_keep isl_aff *time_tile,
765 	__isl_keep isl_multi_val *space_sizes,
766 	__isl_keep isl_multi_val *phase_shift)
767 {
768 	isl_val *v;
769 	isl_aff *s, *t;
770 	isl_local_space *ls;
771 
772 	ls = isl_local_space_from_space(isl_aff_get_domain_space(time_tile));
773 	t = isl_aff_copy(time_tile);
774 	v = isl_val_mul_ui(isl_multi_val_get_val(phase_shift, 1), 2);
775 	v = isl_val_neg(v);
776 	t = isl_aff_scale_val(t, v);
777 	v = isl_multi_val_get_val(space_sizes, 0);
778 	t = isl_aff_mod_val(t, v);
779 	s = isl_aff_var_on_domain(ls, isl_dim_set, 1);
780 	s = isl_aff_add(s, t);
781 
782 	return s;
783 }
784 
785 /* Give the phase_shift ts[S_t, S_0 + floor(d_u h)],
786  * compute a function that applies the shift, i.e.,
787  *
788  *	ts[t, s] -> ts[t + S_t, s + S_0 + floor(d_u h)],
789  */
compute_shift_phase(__isl_keep isl_multi_val * phase_shift)790 static __isl_give isl_multi_aff *compute_shift_phase(
791 	__isl_keep isl_multi_val *phase_shift)
792 {
793 	isl_space *space;
794 	isl_multi_aff *shift;
795 
796 	space = isl_multi_val_get_space(phase_shift);
797 	shift = isl_multi_aff_multi_val_on_space(space,
798 					isl_multi_val_copy(phase_shift));
799 	space = isl_multi_aff_get_space(shift);
800 	shift = isl_multi_aff_add(shift, isl_multi_aff_identity(space));
801 
802 	return shift;
803 }
804 
805 /* Compute a mapping from the ts-space to the local coordinates
806  * within each tile.  In particular, compute
807  *
808  *	ts[t, s] -> local_ts[t % (2 S_t), (s + (-(2 * shift_s)*T) % W) % W]
809  *
810  * "ts" is the space ts[t, s]
811  * "local_ts" is the space local_ts[t, s]
812  * "shift_space" is equal to ts[t, s] -> s + (-(2 * shift_s)*T) % W
813  * "st" is the tile size in the time dimension S_t.
814  * The first element of "space_sizes" is equal to W.
815  */
compute_localize(__isl_keep isl_space * local_ts,__isl_keep isl_aff * shift_space,__isl_keep isl_val * st,__isl_keep isl_multi_val * space_sizes)816 static __isl_give isl_multi_aff *compute_localize(
817 	__isl_keep isl_space *local_ts, __isl_keep isl_aff *shift_space,
818 	__isl_keep isl_val *st, __isl_keep isl_multi_val *space_sizes)
819 {
820 	isl_val *v;
821 	isl_space *space;
822 	isl_aff *s, *t;
823 	isl_multi_aff *localize;
824 
825 	space = isl_aff_get_domain_space(shift_space);
826 	local_ts = isl_space_copy(local_ts);
827 	space = isl_space_map_from_domain_and_range(space, local_ts);
828 	localize = isl_multi_aff_identity(space);
829 	t = isl_multi_aff_get_aff(localize, 0);
830 	v = isl_val_mul_ui(isl_val_copy(st), 2);
831 	t = isl_aff_mod_val(t, v);
832 	localize = isl_multi_aff_set_aff(localize, 0, t);
833 	s = isl_aff_copy(shift_space);
834 	v = isl_multi_val_get_val(space_sizes, 0);
835 	s = isl_aff_mod_val(s, v);
836 	localize = isl_multi_aff_set_aff(localize, 1, s);
837 
838 	return localize;
839 }
840 
841 /* Set the project_ts field of "tiling".
842  *
843  * This field projects the space of the input schedule to the ts-space.
844  * It is equal to [P[t] -> C[s_0, ...]] -> ts[t, s_0].
845  */
ppcg_ht_tiling_set_project_ts(__isl_take ppcg_ht_tiling * tiling)846 static __isl_give ppcg_ht_tiling *ppcg_ht_tiling_set_project_ts(
847 	__isl_take ppcg_ht_tiling *tiling)
848 {
849 	int n;
850 	isl_space *space;
851 	isl_multi_aff *project;
852 
853 	if (!tiling)
854 		return NULL;
855 
856 	space = ppcg_ht_tiling_get_input_space(tiling);
857 	n = isl_space_dim(space, isl_dim_set);
858 	project = isl_multi_aff_project_out_map(space, isl_dim_set, 2, n - 2);
859 	project = isl_multi_aff_set_tuple_name(project,
860 						isl_dim_out, ts_space_name);
861 	if (!project)
862 		return ppcg_ht_tiling_free(tiling);
863 
864 	tiling->project_ts = project;
865 
866 	return tiling;
867 }
868 
869 /* Construct a hybrid tiling description from bounds on the dependence
870  * distances "bounds".
871  * "input_node" points to the original parent node.
872  * "input_schedule" is the combined schedule of the parent and child
873  * node in the input.
874  * "tile_sizes" are the original, user specified tile sizes.
875  */
ppcg_ht_bounds_construct_tiling(__isl_take ppcg_ht_bounds * bounds,__isl_keep isl_schedule_node * input_node,__isl_keep isl_multi_union_pw_aff * input_schedule,__isl_keep isl_multi_val * tile_sizes)876 static __isl_give ppcg_ht_tiling *ppcg_ht_bounds_construct_tiling(
877 	__isl_take ppcg_ht_bounds *bounds,
878 	__isl_keep isl_schedule_node *input_node,
879 	__isl_keep isl_multi_union_pw_aff *input_schedule,
880 	__isl_keep isl_multi_val *tile_sizes)
881 {
882 	isl_ctx *ctx;
883 	ppcg_ht_tiling *tiling;
884 	isl_multi_val *space_sizes, *phase_shift;
885 	isl_aff *time_tile, *shift_space;
886 	isl_multi_aff *localize;
887 	isl_val *h, *duh, *dlh;
888 	isl_val *st, *s0, *du, *dl;
889 	isl_space *ts, *local_ts;
890 
891 	if (!bounds || !input_node || !input_schedule || !tile_sizes)
892 		goto error;
893 
894 	ctx = isl_multi_union_pw_aff_get_ctx(input_schedule);
895 	tiling = isl_calloc_type(ctx, struct ppcg_ht_tiling);
896 	if (!tiling)
897 		goto error;
898 	tiling->ref = 1;
899 
900 	st = isl_multi_val_get_val(tile_sizes, 0);
901 	h = isl_val_sub_ui(isl_val_copy(st), 1);
902 	s0 = isl_multi_val_get_val(tile_sizes, 1);
903 	du = ppcg_ht_bounds_get_upper(bounds);
904 	dl = ppcg_ht_bounds_get_lower(bounds, 0);
905 
906 	duh = isl_val_floor(isl_val_mul(isl_val_copy(du), isl_val_copy(h)));
907 	dlh = isl_val_floor(isl_val_mul(isl_val_copy(dl), isl_val_copy(h)));
908 
909 	ts = construct_ts_space(ctx);
910 	local_ts = construct_local_ts_space(ctx);
911 
912 	space_sizes = compute_space_sizes(tile_sizes, dlh, duh);
913 	phase_shift = compute_phase_shift(ts, st, s0, duh);
914 	time_tile = compute_time_tile(ts, st);
915 	shift_space = compute_shift_space(time_tile, space_sizes, phase_shift);
916 	localize = compute_localize(local_ts, shift_space, st, space_sizes);
917 	isl_space_free(ts);
918 
919 	tiling->input_node = isl_schedule_node_copy(input_node);
920 	tiling->input_schedule = isl_multi_union_pw_aff_copy(input_schedule);
921 	tiling->space_sizes = space_sizes;
922 	tiling->bounds = bounds;
923 	tiling->local_time = isl_multi_aff_get_aff(localize, 0);
924 	tiling->hex = compute_hexagon(local_ts, h, s0, dl, du, dlh, duh);
925 	tiling->hex = isl_set_preimage_multi_aff(tiling->hex, localize);
926 	tiling->time_tile = time_tile;
927 	tiling->shift_space = shift_space;
928 	tiling->shift_phase = compute_shift_phase(phase_shift);
929 	isl_multi_val_free(phase_shift);
930 
931 	isl_val_free(duh);
932 	isl_val_free(dlh);
933 	isl_val_free(du);
934 	isl_val_free(dl);
935 	isl_val_free(s0);
936 	isl_val_free(st);
937 	isl_val_free(h);
938 
939 	if (!tiling->input_schedule || !tiling->local_time || !tiling->hex ||
940 	    !tiling->shift_space || !tiling->shift_phase)
941 		return ppcg_ht_tiling_free(tiling);
942 
943 	tiling = ppcg_ht_tiling_set_project_ts(tiling);
944 
945 	return tiling;
946 error:
947 	ppcg_ht_bounds_free(bounds);
948 	return NULL;
949 }
950 
951 /* Are all members of the band node "node" coincident?
952  */
all_coincident(__isl_keep isl_schedule_node * node)953 static isl_bool all_coincident(__isl_keep isl_schedule_node *node)
954 {
955 	int i, n;
956 
957 	n = isl_schedule_node_band_n_member(node);
958 	for (i = 0; i < n; ++i) {
959 		isl_bool c;
960 
961 		c = isl_schedule_node_band_member_get_coincident(node, i);
962 		if (c < 0 || !c)
963 			return c;
964 	}
965 
966 	return isl_bool_true;
967 }
968 
969 /* Does "node" satisfy the properties of the inner node in the input
970  * pattern for hybrid tiling?
971  * That is, is it a band node with only coincident members, of which
972  * there is at least one?
973  */
has_child_properties(__isl_keep isl_schedule_node * node)974 static isl_bool has_child_properties(__isl_keep isl_schedule_node *node)
975 {
976 	if (!node)
977 		return isl_bool_error;
978 	if (isl_schedule_node_get_type(node) != isl_schedule_node_band)
979 		return isl_bool_false;
980 	if (isl_schedule_node_band_n_member(node) < 1)
981 		return isl_bool_false;
982 	return all_coincident(node);
983 }
984 
985 /* Does "node" satisfy the properties of the outer node in the input
986  * pattern for hybrid tiling?
987  * That is, is it a band node with a single member?
988  */
has_parent_properties(__isl_keep isl_schedule_node * node)989 static isl_bool has_parent_properties(__isl_keep isl_schedule_node *node)
990 {
991 	if (!node)
992 		return isl_bool_error;
993 	if (isl_schedule_node_get_type(node) != isl_schedule_node_band)
994 		return isl_bool_false;
995 	if (isl_schedule_node_band_n_member(node) != 1)
996 		return isl_bool_false;
997 	return isl_bool_true;
998 }
999 
1000 /* Does the parent of "node" satisfy the input patttern for hybrid tiling?
1001  * That is, does "node" satisfy the properties of the inner node and
1002  * does the parent of "node" satisfy the properties of the outer node?
1003  */
ppcg_ht_parent_has_input_pattern(__isl_keep isl_schedule_node * node)1004 isl_bool ppcg_ht_parent_has_input_pattern(__isl_keep isl_schedule_node *node)
1005 {
1006 	isl_bool has_pattern;
1007 
1008 	has_pattern = has_child_properties(node);
1009 	if (has_pattern < 0 || !has_pattern)
1010 		return has_pattern;
1011 
1012 	node = isl_schedule_node_copy(node);
1013 	node = isl_schedule_node_parent(node);
1014 	has_pattern = has_parent_properties(node);
1015 	isl_schedule_node_free(node);
1016 
1017 	return has_pattern;
1018 }
1019 
1020 /* Does "node" satisfy the input patttern for hybrid tiling?
1021  * That is, does "node" satisfy the properties of the outer node and
1022  * does the child of "node" satisfy the properties of the inner node?
1023  */
ppcg_ht_has_input_pattern(__isl_keep isl_schedule_node * node)1024 isl_bool ppcg_ht_has_input_pattern(__isl_keep isl_schedule_node *node)
1025 {
1026 	isl_bool has_pattern;
1027 
1028 	has_pattern = has_parent_properties(node);
1029 	if (has_pattern < 0 || !has_pattern)
1030 		return has_pattern;
1031 
1032 	node = isl_schedule_node_get_child(node, 0);
1033 	has_pattern = has_child_properties(node);
1034 	isl_schedule_node_free(node);
1035 
1036 	return has_pattern;
1037 }
1038 
1039 /* Check that "node" satisfies the input pattern for hybrid tiling.
1040  * Error out if it does not.
1041  */
check_input_pattern(__isl_keep isl_schedule_node * node)1042 static isl_stat check_input_pattern(__isl_keep isl_schedule_node *node)
1043 {
1044 	isl_bool has_pattern;
1045 
1046 	has_pattern = ppcg_ht_has_input_pattern(node);
1047 	if (has_pattern < 0)
1048 		return isl_stat_error;
1049 	if (!has_pattern)
1050 		isl_die(isl_schedule_node_get_ctx(node), isl_error_invalid,
1051 			"invalid input pattern for hybrid tiling",
1052 			return isl_stat_error);
1053 
1054 	return isl_stat_ok;
1055 }
1056 
1057 /* Extract the input schedule from "node", i.e., the product
1058  * of the partial schedules of the parent and child nodes
1059  * in the input pattern.
1060  */
extract_input_schedule(__isl_keep isl_schedule_node * node)1061 static __isl_give isl_multi_union_pw_aff *extract_input_schedule(
1062 	__isl_keep isl_schedule_node *node)
1063 {
1064 	isl_multi_union_pw_aff *partial, *partial2;
1065 
1066 	partial = isl_schedule_node_band_get_partial_schedule(node);
1067 	node = isl_schedule_node_get_child(node, 0);
1068 	partial2 = isl_schedule_node_band_get_partial_schedule(node);
1069 	isl_schedule_node_free(node);
1070 
1071 	return isl_multi_union_pw_aff_range_product(partial, partial2);
1072 }
1073 
1074 /* Collect all dependences from "scop" that are relevant for performing
1075  * hybrid tiling on "node" and its child and map them to the schedule
1076  * space of this pair of nodes.
1077  *
1078  * In case live range reordering is not used,
1079  * the flow and the false dependences are collected.
1080  * In case live range reordering is used,
1081  * the flow and the forced dependences are collected, as well
1082  * as the order dependences that are adjacent to non-local
1083  * flow dependences.
1084  *
1085  * In all cases, only dependences that map to the same instance
1086  * of the outer part of the schedule are considered.
1087  */
collect_deps(struct ppcg_scop * scop,__isl_keep isl_schedule_node * node)1088 static __isl_give isl_map *collect_deps(struct ppcg_scop *scop,
1089 	__isl_keep isl_schedule_node *node)
1090 {
1091 	isl_space *space;
1092 	isl_multi_union_pw_aff *prefix, *partial;
1093 	isl_union_map *flow, *other, *dep, *umap;
1094 	isl_map *map;
1095 
1096 	prefix = isl_schedule_node_get_prefix_schedule_multi_union_pw_aff(node);
1097 	partial = extract_input_schedule(node);
1098 	space = isl_multi_union_pw_aff_get_space(partial);
1099 
1100 	flow = isl_union_map_copy(scop->dep_flow);
1101 	flow = isl_union_map_eq_at_multi_union_pw_aff(flow,
1102 					isl_multi_union_pw_aff_copy(prefix));
1103 	if (!scop->options->live_range_reordering) {
1104 		other = isl_union_map_copy(scop->dep_false);
1105 		other = isl_union_map_eq_at_multi_union_pw_aff(other, prefix);
1106 	} else {
1107 		isl_union_map *local, *non_local, *order, *adj;
1108 		isl_union_set *domain, *range;
1109 
1110 		other = isl_union_map_copy(scop->dep_forced);
1111 		other = isl_union_map_eq_at_multi_union_pw_aff(other,
1112 					isl_multi_union_pw_aff_copy(prefix));
1113 		local = isl_union_map_copy(flow);
1114 		local = isl_union_map_eq_at_multi_union_pw_aff(local,
1115 					isl_multi_union_pw_aff_copy(partial));
1116 		non_local = isl_union_map_copy(flow);
1117 		non_local = isl_union_map_subtract(non_local, local);
1118 
1119 		order = isl_union_map_copy(scop->dep_order);
1120 		order = isl_union_map_eq_at_multi_union_pw_aff(order, prefix);
1121 		adj = isl_union_map_copy(order);
1122 		domain = isl_union_map_domain(isl_union_map_copy(non_local));
1123 		domain = isl_union_set_coalesce(domain);
1124 		adj = isl_union_map_intersect_range(adj, domain);
1125 		other = isl_union_map_union(other, adj);
1126 
1127 		adj = order;
1128 		range = isl_union_map_range(non_local);
1129 		range = isl_union_set_coalesce(range);
1130 		adj = isl_union_map_intersect_domain(adj, range);
1131 		other = isl_union_map_union(other, adj);
1132 	}
1133 	dep = isl_union_map_union(flow, other);
1134 
1135 	umap = isl_union_map_from_multi_union_pw_aff(partial);
1136 	dep = isl_union_map_apply_domain(dep, isl_union_map_copy(umap));
1137 	dep = isl_union_map_apply_range(dep, umap);
1138 
1139 	space = isl_space_map_from_set(space);
1140 	map = isl_union_map_extract_map(dep, space);
1141 	isl_union_map_free(dep);
1142 
1143 	map = isl_map_coalesce(map);
1144 
1145 	return map;
1146 }
1147 
1148 /* Given a constraint of the form
1149  *
1150  *	a i_0 + b i_1 >= 0
1151  * or
1152  *	a i_0 + b i_1 = 0
1153  *
1154  * use it to update one or both of the non-negative bounds
1155  * in "list" = (min, max) such that
1156  *
1157  *	i_1 >= -min i_0
1158  * and
1159  *	i_1 <= max i_0
1160  *
1161  * If b = 0, then the constraint cannot be used.
1162  * Otherwise, the constraint is equivalent to
1163  *
1164  *	sgn(b) i_1 >= - a/abs(b) i_0
1165  * i.e.,
1166  *	i_1 >= - a/abs(b) i_0
1167  * or
1168  *	i_1 <= a/abs(b) i_0
1169  *
1170  * Set the first or second element of "list" to max(0, a/abs(b)),
1171  * according to the sign of "b".  Or set both in case the constraint
1172  * is an equality, taking into account the sign change.
1173  */
list_set_min_max(__isl_take isl_val_list * list,__isl_keep isl_constraint * c)1174 static __isl_give isl_val_list *list_set_min_max(__isl_take isl_val_list *list,
1175 	__isl_keep isl_constraint *c)
1176 {
1177 	isl_val *a, *b;
1178 	int sign;
1179 	int pos;
1180 	isl_bool eq, is_zero, is_neg;
1181 
1182 	eq = isl_constraint_is_equality(c);
1183 	if (eq < 0)
1184 		return isl_val_list_free(list);
1185 
1186 	b = isl_constraint_get_coefficient_val(c, isl_dim_set, 1);
1187 	is_zero = isl_val_is_zero(b);
1188 	if (is_zero == isl_bool_true) {
1189 		isl_val_free(b);
1190 		return list;
1191 	}
1192 	a = isl_constraint_get_coefficient_val(c, isl_dim_set, 0);
1193 	sign = isl_val_sgn(b);
1194 	b = isl_val_abs(b);
1195 	a = isl_val_div(a, b);
1196 
1197 	if (eq)
1198 		b = isl_val_copy(a);
1199 
1200 	pos = sign > 0 ? 0 : 1;
1201 	is_neg = isl_val_is_neg(a);
1202 	if (is_neg == isl_bool_true)
1203 		a = isl_val_set_si(a, 0);
1204 	list = isl_val_list_set_val(list, pos, a);
1205 
1206 	if (!eq)
1207 		return is_neg < 0 ? isl_val_list_free(list) : list;
1208 
1209 	pos = 1 - pos;
1210 	a = isl_val_neg(b);
1211 	is_neg = isl_val_is_neg(a);
1212 	if (is_neg == isl_bool_true)
1213 		a = isl_val_set_si(a, 0);
1214 	list = isl_val_list_set_val(list, pos, a);
1215 
1216 	return is_neg < 0 ? isl_val_list_free(list) : list;
1217 }
1218 
1219 /* If constraint "c" passes through the origin, then try and use it
1220  * to update the non-negative bounds in "list" = (min, max) such that
1221  *
1222  *	i_1 >= -min i_0
1223  * and
1224  *	i_1 <= max i_0
1225  */
set_min_max(__isl_take isl_constraint * c,void * user)1226 static isl_stat set_min_max(__isl_take isl_constraint *c, void *user)
1227 {
1228 	isl_val *v;
1229 	isl_val_list **list = user;
1230 	isl_bool is_zero;
1231 
1232 	v = isl_constraint_get_constant_val(c);
1233 	is_zero = isl_val_is_zero(v);
1234 	isl_val_free(v);
1235 
1236 	if (is_zero == isl_bool_true)
1237 		*list = list_set_min_max(*list, c);
1238 
1239 	isl_constraint_free(c);
1240 	return is_zero < 0 ? isl_stat_error : isl_stat_ok;
1241 }
1242 
1243 /* Given a set of dependence distance vectors "dist", compute
1244  * pair of non-negative bounds min and max such that
1245  *
1246  *	d_pos >= -min d_0
1247  * and
1248  *	d_pos <= max d_0
1249  *
1250  * and return the pair (min, max).
1251  * If no bound can be found in either direction, then the bound
1252  * is replaced by NaN.
1253  *
1254  * The dependence distances are first projected onto the (d_0, d_pos).
1255  * Then the zero dependence distance is added and the convex hull is computed.
1256  * Finally, the bounds are extracted from the constraints of the convex hull
1257  * that pass through the origin.
1258  */
min_max_dist(__isl_keep isl_set * dist,int pos)1259 static __isl_give isl_val_list *min_max_dist(__isl_keep isl_set *dist, int pos)
1260 {
1261 	isl_space *space;
1262 	isl_basic_set *hull;
1263 	int dim;
1264 	isl_ctx *ctx;
1265 	isl_val *nan;
1266 	isl_val_list *list;
1267 
1268 	ctx = isl_set_get_ctx(dist);
1269 	nan = isl_val_nan(ctx);
1270 	list = isl_val_list_alloc(ctx, 2);
1271 	list = isl_val_list_add(list, isl_val_copy(nan));
1272 	list = isl_val_list_add(list, nan);
1273 
1274 	dist = isl_set_copy(dist);
1275 	dim = isl_set_dim(dist, isl_dim_set);
1276 	if (dist && pos >= dim)
1277 		isl_die(ctx, isl_error_internal, "position out of bounds",
1278 			dist = isl_set_free(dist));
1279 	dist = isl_set_project_out(dist, isl_dim_set, pos + 1, dim - (pos + 1));
1280 	dist = isl_set_project_out(dist, isl_dim_set, 1, pos - 1);
1281 
1282 	space = isl_set_get_space(dist);
1283 	dist = isl_set_union(dist, isl_set_from_point(isl_point_zero(space)));
1284 	dist = isl_set_remove_divs(dist);
1285 	hull = isl_set_convex_hull(dist);
1286 
1287 	if (isl_basic_set_foreach_constraint(hull, &set_min_max, &list) < 0)
1288 		list = isl_val_list_free(list);
1289 	isl_basic_set_free(hull);
1290 
1291 	return list;
1292 }
1293 
1294 /* Given a schedule node "node" that, together with its child,
1295  * satisfies the input pattern for hybrid tiling, compute bounds
1296  * on the relative dependence distances of the child node with
1297  * respect to the parent node.  These bounds are needed to
1298  * construct a hybrid tiling.
1299  *
1300  * First all relevant dependences are collected and mapped
1301  * to the schedule space of the pair of nodes.  Then, the
1302  * dependence distances are computed in this space.
1303  *
1304  * These dependence distances are then projected onto a two-dimensional
1305  * space consisting of the single schedule dimension of the outer node
1306  * and one of the schedule dimensions of the inner node.
1307  * The maximal and minimal relative dependence distances are extracted
1308  * from these projections.
1309  * This process is repeated for each of the schedule dimensions
1310  * of the inner node.  For the first dimension, both minimal and
1311  * maximal relative dependence distances are stored in the result.
1312  * For the other dimensions, only the minimal relative dependence
1313  * distance is stored.
1314  */
ppcg_ht_compute_bounds(struct ppcg_scop * scop,__isl_keep isl_schedule_node * node)1315 __isl_give ppcg_ht_bounds *ppcg_ht_compute_bounds(struct ppcg_scop *scop,
1316 	__isl_keep isl_schedule_node *node)
1317 {
1318 	ppcg_ht_bounds *bnd;
1319 	isl_space *space;
1320 	isl_map *map;
1321 	isl_set *dist;
1322 	isl_val_list *pair;
1323 	isl_schedule_node *child;
1324 	int n;
1325 	int i, dim;
1326 
1327 	if (!scop || !node || check_input_pattern(node) < 0)
1328 		return NULL;
1329 
1330 	child = isl_schedule_node_get_child(node, 0);
1331 	space = isl_schedule_node_band_get_space(child);
1332 	dim = isl_schedule_node_band_n_member(child);
1333 	isl_schedule_node_free(child);
1334 	bnd = ppcg_ht_bounds_alloc(space);
1335 	if (!bnd)
1336 		return NULL;
1337 
1338 	map = collect_deps(scop, node);
1339 
1340 	dist = isl_map_deltas(map);
1341 	n = isl_set_dim(dist, isl_dim_param);
1342 	dist = isl_set_project_out(dist, isl_dim_param, 0, n);
1343 
1344 	pair = min_max_dist(dist, 1);
1345 	bnd = ppcg_ht_bounds_set_lower(bnd, 0, isl_val_list_get_val(pair, 0));
1346 	bnd = ppcg_ht_bounds_set_upper(bnd, isl_val_list_get_val(pair, 1));
1347 	isl_val_list_free(pair);
1348 
1349 	for (i = 1; i < dim; ++i) {
1350 		pair = min_max_dist(dist, 1 + i);
1351 		bnd = ppcg_ht_bounds_set_lower(bnd, i,
1352 						isl_val_list_get_val(pair, 0));
1353 		isl_val_list_free(pair);
1354 	}
1355 
1356 	isl_set_free(dist);
1357 
1358 	return bnd;
1359 }
1360 
1361 /* Check if all the fields of "phase" are valid, freeing "phase"
1362  * if they are not.
1363  */
check_phase(__isl_take ppcg_ht_phase * phase)1364 static __isl_give ppcg_ht_phase *check_phase(__isl_take ppcg_ht_phase *phase)
1365 {
1366 	if (!phase)
1367 		return NULL;
1368 
1369 	if (!phase->tiling || !phase->local_time ||
1370 	    !phase->shift_space || !phase->domain)
1371 		return ppcg_ht_phase_free(phase);
1372 
1373 	return phase;
1374 }
1375 
1376 /* Construct a ppcg_ht_phase object, that simply copies
1377  * information from "tiling".
1378  * That is, the result is defined over the "ts" space and
1379  * corresponds to phase 1.
1380  */
construct_phase(__isl_keep ppcg_ht_tiling * tiling)1381 static __isl_give ppcg_ht_phase *construct_phase(
1382 	__isl_keep ppcg_ht_tiling *tiling)
1383 {
1384 	isl_ctx *ctx;
1385 	ppcg_ht_phase *phase;
1386 
1387 	if (!tiling)
1388 		return NULL;
1389 
1390 	ctx = ppcg_ht_tiling_get_ctx(tiling);
1391 	phase = isl_calloc_type(ctx, struct ppcg_ht_phase);
1392 	if (!phase)
1393 		return NULL;
1394 	phase->tiling = ppcg_ht_tiling_copy(tiling);
1395 	phase->time_tile = isl_aff_copy(tiling->time_tile);
1396 	phase->local_time = isl_aff_copy(tiling->local_time);
1397 	phase->shift_space = isl_aff_copy(tiling->shift_space);
1398 	phase->domain = isl_set_copy(tiling->hex);
1399 
1400 	return check_phase(phase);
1401 }
1402 
1403 /* Align the parameters of the elements of "phase" to those of "space".
1404  */
phase_align_params(__isl_take ppcg_ht_phase * phase,__isl_take isl_space * space)1405 static __isl_give ppcg_ht_phase *phase_align_params(
1406 	__isl_take ppcg_ht_phase *phase, __isl_take isl_space *space)
1407 {
1408 	if (!phase)
1409 		goto error;
1410 
1411 	phase->time_tile = isl_aff_align_params(phase->time_tile,
1412 							isl_space_copy(space));
1413 	phase->local_time = isl_aff_align_params(phase->local_time,
1414 							isl_space_copy(space));
1415 	phase->shift_space = isl_aff_align_params(phase->shift_space,
1416 							isl_space_copy(space));
1417 	phase->domain = isl_set_align_params(phase->domain, space);
1418 
1419 	return check_phase(phase);
1420 error:
1421 	isl_space_free(space);
1422 	return NULL;
1423 }
1424 
1425 /* Pull back "phase" over "ma".
1426  * That is, take a phase defined over the range of "ma" and
1427  * turn it into a phase defined over the domain of "ma".
1428  */
pullback_phase(__isl_take ppcg_ht_phase * phase,__isl_take isl_multi_aff * ma)1429 static __isl_give ppcg_ht_phase *pullback_phase(__isl_take ppcg_ht_phase *phase,
1430 	__isl_take isl_multi_aff *ma)
1431 {
1432 	phase = phase_align_params(phase, isl_multi_aff_get_space(ma));
1433 	if (!phase)
1434 		goto error;
1435 
1436 	phase->time_tile = isl_aff_pullback_multi_aff(phase->time_tile,
1437 							isl_multi_aff_copy(ma));
1438 	phase->local_time = isl_aff_pullback_multi_aff(phase->local_time,
1439 							isl_multi_aff_copy(ma));
1440 	phase->shift_space = isl_aff_pullback_multi_aff(phase->shift_space,
1441 							isl_multi_aff_copy(ma));
1442 	phase->domain = isl_set_preimage_multi_aff(phase->domain, ma);
1443 
1444 	return check_phase(phase);
1445 error:
1446 	isl_multi_aff_free(ma);
1447 	return NULL;
1448 }
1449 
1450 /* Pullback "phase" over phase->tiling->shift_phase, which shifts
1451  * phase 0 to phase 1.  The pullback therefore takes a phase 1
1452  * description and turns it into a phase 0 description.
1453  */
shift_phase(__isl_take ppcg_ht_phase * phase)1454 static __isl_give ppcg_ht_phase *shift_phase(__isl_take ppcg_ht_phase *phase)
1455 {
1456 	ppcg_ht_tiling *tiling;
1457 
1458 	if (!phase)
1459 		return NULL;
1460 
1461 	tiling = phase->tiling;
1462 	return pullback_phase(phase, isl_multi_aff_copy(tiling->shift_phase));
1463 }
1464 
1465 /* Take a "phase" defined over the ts-space and plug in the projection
1466  * from the input schedule space to the ts-space.
1467  * The result is then defined over this input schedule space.
1468  */
lift_phase(__isl_take ppcg_ht_phase * phase)1469 static __isl_give ppcg_ht_phase *lift_phase(__isl_take ppcg_ht_phase *phase)
1470 {
1471 	ppcg_ht_tiling *tiling;
1472 
1473 	if (!phase)
1474 		return NULL;
1475 
1476 	tiling = phase->tiling;
1477 	return pullback_phase(phase, isl_multi_aff_copy(tiling->project_ts));
1478 }
1479 
1480 /* Compute the shift that should be added to the space band
1481  * in order to be able to apply rectangular tiling to the space.
1482  * Store the shift in phase->space_shift.
1483  *
1484  * In the first dimension, it is equal to shift_space - s.
1485  * For phase 1, this results in
1486  *
1487  *	(-(2 * shift_s)*T) % W
1488  *
1489  * In phase 0, the "s" in shift_space has been replaced by "s + shift_s",
1490  * so the result is
1491  *
1492  *	shift_s + (-(2 * shift_s)*T) % W
1493  *
1494  * In the other dimensions, the shift is equal to
1495  *
1496  *	dl_i * local_time.
1497  */
compute_space_shift(__isl_take ppcg_ht_phase * phase)1498 static __isl_give ppcg_ht_phase *compute_space_shift(
1499 	__isl_take ppcg_ht_phase *phase)
1500 {
1501 	int i, n;
1502 	isl_space *space;
1503 	isl_local_space *ls;
1504 	isl_aff *aff, *s;
1505 	isl_multi_aff *space_shift;
1506 
1507 	if (!phase)
1508 		return NULL;
1509 
1510 	space = ppcg_ht_phase_get_input_space(phase);
1511 	space = isl_space_unwrap(space);
1512 	space = isl_space_range_map(space);
1513 
1514 	space_shift = isl_multi_aff_zero(space);
1515 	aff = isl_aff_copy(phase->shift_space);
1516 	ls = isl_local_space_from_space(isl_aff_get_domain_space(aff));
1517 	s = isl_aff_var_on_domain(ls, isl_dim_set, 1);
1518 	aff = isl_aff_sub(aff, s);
1519 	space_shift = isl_multi_aff_set_aff(space_shift, 0, aff);
1520 
1521 	n = isl_multi_aff_dim(space_shift, isl_dim_out);
1522 	for (i = 1; i < n; ++i) {
1523 		isl_val *v;
1524 		isl_aff *time;
1525 
1526 		v = ppcg_ht_bounds_get_lower(phase->tiling->bounds, i);
1527 		time = isl_aff_copy(phase->local_time);
1528 		time = isl_aff_scale_val(time, v);
1529 		space_shift = isl_multi_aff_set_aff(space_shift, i, time);
1530 	}
1531 
1532 	if (!space_shift)
1533 		return ppcg_ht_phase_free(phase);
1534 	phase->space_shift = space_shift;
1535 	return phase;
1536 }
1537 
1538 /* Compute the space tiling and store the result in phase->space_tile.
1539  * The space tiling is of the form
1540  *
1541  *	[P[t] -> C[s]] -> C[floor((s + space_shift)/space_size]
1542  */
compute_space_tile(__isl_take ppcg_ht_phase * phase)1543 static __isl_give ppcg_ht_phase *compute_space_tile(
1544 	__isl_take ppcg_ht_phase *phase)
1545 {
1546 	isl_space *space;
1547 	isl_multi_val *space_sizes;
1548 	isl_multi_aff *space_shift;
1549 	isl_multi_aff *tile;
1550 
1551 	if (!phase)
1552 		return NULL;
1553 
1554 	space = ppcg_ht_phase_get_input_space(phase);
1555 	space = isl_space_unwrap(space);
1556 	tile = isl_multi_aff_range_map(space);
1557 	space_shift = isl_multi_aff_copy(phase->space_shift);
1558 	tile = isl_multi_aff_add(space_shift, tile);
1559 	space_sizes = isl_multi_val_copy(phase->tiling->space_sizes);
1560 	tile = isl_multi_aff_scale_down_multi_val(tile, space_sizes);
1561 	tile = isl_multi_aff_floor(tile);
1562 
1563 	if (!tile)
1564 		return ppcg_ht_phase_free(phase);
1565 	phase->space_tile = tile;
1566 	return phase;
1567 }
1568 
1569 /* Construct a representation for one of the two phase for hybrid tiling
1570  * "tiling".  If "shift" is not set, then the phase is constructed
1571  * directly from the hexagonal tile shape in "tiling", which represents
1572  * the phase-1 tiles.  If "shift" is set, then this tile shape is shifted
1573  * back over tiling->shift_phase to obtain the phase-0 tiles.
1574  *
1575  * First copy data from "tiling", then optionally shift the phase and
1576  * finally move the tiling from the "ts" space of "tiling" to
1577  * the space of the input pattern.
1578  *
1579  * After the basic phase has been computed, also compute
1580  * the corresponding space shift.
1581  */
ppcg_ht_tiling_compute_phase(__isl_keep ppcg_ht_tiling * tiling,int shift)1582 static __isl_give ppcg_ht_phase *ppcg_ht_tiling_compute_phase(
1583 	__isl_keep ppcg_ht_tiling *tiling, int shift)
1584 {
1585 	ppcg_ht_phase *phase;
1586 
1587 	phase = construct_phase(tiling);
1588 	if (shift)
1589 		phase = shift_phase(phase);
1590 	phase = lift_phase(phase);
1591 
1592 	phase = compute_space_shift(phase);
1593 	phase = compute_space_tile(phase);
1594 
1595 	return phase;
1596 }
1597 
1598 /* Consruct a function that is equal to the time tile of "phase0"
1599  * on the domain of "phase0" and equal to the time tile of "phase1"
1600  * on the domain of "phase1".
1601  * The two domains are assumed to form a partition of the input
1602  * schedule space.
1603  */
combine_time_tile(__isl_keep ppcg_ht_phase * phase0,__isl_keep ppcg_ht_phase * phase1)1604 static __isl_give isl_pw_multi_aff *combine_time_tile(
1605 	__isl_keep ppcg_ht_phase *phase0, __isl_keep ppcg_ht_phase *phase1)
1606 {
1607 	isl_aff *T;
1608 	isl_pw_aff *time, *time1;
1609 
1610 	if (!phase0 || !phase1)
1611 		return NULL;
1612 
1613 	T = isl_aff_copy(phase0->time_tile);
1614 	time = isl_pw_aff_alloc(ppcg_ht_phase_get_domain(phase0), T);
1615 
1616 	T = isl_aff_copy(phase1->time_tile);
1617 	time1 = isl_pw_aff_alloc(ppcg_ht_phase_get_domain(phase1), T);
1618 
1619 	time = isl_pw_aff_union_add(time, time1);
1620 
1621 	return isl_pw_multi_aff_from_pw_aff(time);
1622 }
1623 
1624 /* Name used in mark nodes that contain a pointer to a ppcg_ht_phase.
1625  */
1626 static char *ppcg_phase_name = "phase";
1627 
1628 /* Does "id" contain a pointer to a ppcg_ht_phase?
1629  * That is, is it called "phase"?
1630  */
is_phase_id(__isl_keep isl_id * id)1631 static isl_bool is_phase_id(__isl_keep isl_id *id)
1632 {
1633 	const char *name;
1634 
1635 	name = isl_id_get_name(id);
1636 	if (!name)
1637 		return isl_bool_error;
1638 
1639 	return !strcmp(name, ppcg_phase_name);
1640 }
1641 
1642 /* Given a mark node with an identifier that points to a ppcg_ht_phase,
1643  * extract this ppcg_ht_phase pointer.
1644  */
ppcg_ht_phase_extract_from_mark(__isl_keep isl_schedule_node * node)1645 __isl_keep ppcg_ht_phase *ppcg_ht_phase_extract_from_mark(
1646 	__isl_keep isl_schedule_node *node)
1647 {
1648 	isl_bool is_phase;
1649 	isl_id *id;
1650 	void *p;
1651 
1652 	if (!node)
1653 		return NULL;
1654 	if (isl_schedule_node_get_type(node) != isl_schedule_node_mark)
1655 		isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
1656 			"not a phase mark", return NULL);
1657 
1658 	id = isl_schedule_node_mark_get_id(node);
1659 	is_phase = is_phase_id(id);
1660 	p = isl_id_get_user(id);
1661 	isl_id_free(id);
1662 
1663 	if (is_phase < 0)
1664 		return NULL;
1665 	if (!is_phase)
1666 		isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
1667 			"not a phase mark", return NULL);
1668 
1669 	return p;
1670 }
1671 
1672 /* Insert a mark node at "node" holding a pointer to "phase".
1673  */
insert_phase(__isl_take isl_schedule_node * node,__isl_take ppcg_ht_phase * phase)1674 static __isl_give isl_schedule_node *insert_phase(
1675 	__isl_take isl_schedule_node *node, __isl_take ppcg_ht_phase *phase)
1676 {
1677 	isl_ctx *ctx;
1678 	isl_id *id;
1679 
1680 	if (!node)
1681 		goto error;
1682 	ctx = isl_schedule_node_get_ctx(node);
1683 	id = isl_id_alloc(ctx, ppcg_phase_name, phase);
1684 	if (!id)
1685 		goto error;
1686 	id = isl_id_set_free_user(id, &ppcg_ht_phase_free_wrap);
1687 	node = isl_schedule_node_insert_mark(node, id);
1688 
1689 	return node;
1690 error:
1691 	ppcg_ht_phase_free(phase);
1692 	isl_schedule_node_free(node);
1693 	return NULL;
1694 }
1695 
1696 /* Construct a mapping from the elements of the original pair of bands
1697  * to which tiling was applied that belong to a tile of "phase"
1698  * to that tile, preserving the values for the outer bands.
1699  *
1700  * The mapping is of the form
1701  *
1702  *	[[outer] -> [P -> C]] -> [[outer] -> [tile]]
1703  *
1704  * where tile is defined by a concatenation of the time_tile and
1705  * the space_tile.
1706  */
construct_tile_map(__isl_keep ppcg_ht_phase * phase)1707 static __isl_give isl_map *construct_tile_map(__isl_keep ppcg_ht_phase *phase)
1708 {
1709 	int depth;
1710 	isl_space *space;
1711 	isl_multi_aff *ma;
1712 	isl_multi_aff *tiling;
1713 	isl_map *el2tile;
1714 
1715 	depth = isl_schedule_node_get_schedule_depth(
1716 						phase->tiling->input_node);
1717 	space = isl_aff_get_space(phase->time_tile);
1718 	space = isl_space_params(space);
1719 	space = isl_space_set_from_params(space);
1720 	space = isl_space_add_dims(space, isl_dim_set, depth);
1721 	space = isl_space_map_from_set(space);
1722 	ma = isl_multi_aff_identity(space);
1723 
1724 	tiling = isl_multi_aff_flat_range_product(
1725 		isl_multi_aff_from_aff(isl_aff_copy(phase->time_tile)),
1726 		isl_multi_aff_copy(phase->space_tile));
1727 	el2tile = isl_map_from_multi_aff(tiling);
1728 	el2tile = isl_map_intersect_domain(el2tile,
1729 						isl_set_copy(phase->domain));
1730 	el2tile = isl_map_product(isl_map_from_multi_aff(ma), el2tile);
1731 
1732 	return el2tile;
1733 }
1734 
1735 /* Return a description of the full tiles of "phase" at the point
1736  * in the original schedule tree where the tiling was applied.
1737  *
1738  * First construct a mapping from the input schedule dimensions
1739  * up to an including the original pair of bands to which hybrid tiling
1740  * was applied to schedule dimensions in which this original pair
1741  * has been replaced by the tiles.
1742  * This mapping is of the form
1743  *
1744  *	[[outer] -> [P -> C]] -> [[outer] -> [tile]]
1745  *
1746  * Apply this mapping to the set of all values for the input
1747  * schedule dimensions and then apply its inverse.
1748  * The result is the set of values for the input schedule dimensions
1749  * that would map to any of the tiles.  Subtracting from this set
1750  * the set of values that are actually executed produces the set
1751  * of values that belong to a tile but that are not executed.
1752  * Mapping these back to the tiles produces a description of
1753  * the partial tiles.  Subtracting these from the set of all tiles
1754  * produces a description of the full tiles in the form
1755  *
1756  *	[[outer] -> [tile]]
1757  */
compute_full_tile(__isl_keep ppcg_ht_phase * phase)1758 static __isl_give isl_set *compute_full_tile(__isl_keep ppcg_ht_phase *phase)
1759 {
1760 	isl_schedule_node *node;
1761 	isl_union_set *domain;
1762 	isl_union_map *prefix, *schedule;
1763 	isl_set *all, *partial, *all_el;
1764 	isl_map *tile2el, *el2tile;
1765 	isl_multi_union_pw_aff *mupa;
1766 
1767 	el2tile = construct_tile_map(phase);
1768 	tile2el = isl_map_reverse(isl_map_copy(el2tile));
1769 
1770 	node = phase->tiling->input_node;
1771 	prefix = isl_schedule_node_get_prefix_schedule_union_map(node);
1772 	domain = isl_schedule_node_get_domain(node);
1773 	mupa = isl_multi_union_pw_aff_copy(phase->tiling->input_schedule);
1774 	schedule = isl_union_map_from_multi_union_pw_aff(mupa);
1775 	schedule = isl_union_map_range_product(prefix, schedule);
1776 	all_el = isl_set_from_union_set(isl_union_set_apply(domain, schedule));
1777 	all_el = isl_set_coalesce(all_el);
1778 
1779 	all = isl_set_apply(isl_set_copy(all_el), isl_map_copy(el2tile));
1780 
1781 	partial = isl_set_copy(all);
1782 	partial = isl_set_apply(partial, tile2el);
1783 	partial = isl_set_subtract(partial, all_el);
1784 	partial = isl_set_apply(partial, el2tile);
1785 
1786 	return isl_set_subtract(all, partial);
1787 }
1788 
1789 /* Copy the AST loop types of the non-isolated part to those
1790  * of the isolated part.
1791  */
set_isolate_loop_type(__isl_take isl_schedule_node * node)1792 static __isl_give isl_schedule_node *set_isolate_loop_type(
1793 	__isl_take isl_schedule_node *node)
1794 {
1795 	int i, n;
1796 
1797 	n = isl_schedule_node_band_n_member(node);
1798 	for (i = 0; i < n; ++i) {
1799 		enum isl_ast_loop_type type;
1800 
1801 		type = isl_schedule_node_band_member_get_ast_loop_type(node, i);
1802 		node = isl_schedule_node_band_member_set_isolate_ast_loop_type(
1803 								node, i, type);
1804 	}
1805 
1806 	return node;
1807 }
1808 
1809 /* If options->isolate_full_tiles is set, then mark the full tiles
1810  * in "node" for isolation.  The full tiles are derived from "phase".
1811  * "node" may point to a part of the tiling, e.g., the space tiling.
1812  *
1813  * The full tiles are originally computed in the form
1814  *
1815  *	[[outer] -> [tile]]
1816  *
1817  * However, the band that "node" points to may only contain
1818  * subset of the tile dimensions.
1819  * The description above is therefore treated as
1820  *
1821  *	[[outer] -> [before; this; after]]
1822  *
1823  * before is of size "pos"; this is of size "dim"; and
1824  * after is of size "out - pos - dim".
1825  * The after part is first project out.  Then the range is split
1826  * into a before and this part and finally the before part is moved
1827  * to the domain, resulting in
1828  *
1829  *	[[outer; before] -> [this]]
1830  *
1831  * This description is then used as the isolate option.
1832  *
1833  * The AST loop type for the isolated part is set to be the same
1834  * as that of the non-isolated part.
1835  */
ppcg_ht_phase_isolate_full_tile_node(__isl_keep ppcg_ht_phase * phase,__isl_take isl_schedule_node * node,struct ppcg_options * options)1836 static __isl_give isl_schedule_node *ppcg_ht_phase_isolate_full_tile_node(
1837 	__isl_keep ppcg_ht_phase *phase, __isl_take isl_schedule_node *node,
1838 	struct ppcg_options *options)
1839 {
1840 	int in, out, pos, depth, dim;
1841 	isl_space *space;
1842 	isl_multi_aff *ma1, *ma2;
1843 	isl_set *tile;
1844 	isl_map *map;
1845 	isl_set *set;
1846 	isl_union_set *opt;
1847 
1848 	if (!options->isolate_full_tiles)
1849 		return node;
1850 
1851 	depth = isl_schedule_node_get_schedule_depth(node);
1852 	dim = isl_schedule_node_band_n_member(node);
1853 
1854 	tile = compute_full_tile(phase);
1855 	map = isl_set_unwrap(tile);
1856 	in = isl_map_dim(map, isl_dim_in);
1857 	out = isl_map_dim(map, isl_dim_out);
1858 	pos = depth - in;
1859 	map = isl_map_project_out(map, isl_dim_out, pos + dim,
1860 				out - (pos + dim));
1861 	space = isl_space_range(isl_map_get_space(map));
1862 	ma1 = isl_multi_aff_project_out_map(isl_space_copy(space),
1863 					   isl_dim_set, pos, dim);
1864 	ma2 = isl_multi_aff_project_out_map(space, isl_dim_set, 0, pos);
1865 	ma1 = isl_multi_aff_range_product(ma1, ma2);
1866 	map = isl_map_apply_range(map, isl_map_from_multi_aff(ma1));
1867 	map = isl_map_uncurry(map);
1868 	map = isl_map_flatten_domain(map);
1869 	set = isl_map_wrap(map);
1870 	set = isl_set_set_tuple_name(set, "isolate");
1871 
1872 	opt = isl_schedule_node_band_get_ast_build_options(node);
1873 	opt = isl_union_set_add_set(opt, set);
1874 	node = isl_schedule_node_band_set_ast_build_options(node, opt);
1875 	node = set_isolate_loop_type(node);
1876 
1877 	return node;
1878 }
1879 
1880 /* Insert a band node for performing the space tiling for "phase" at "node".
1881  * In particular, insert a band node with partial schedule
1882  *
1883  *	[P[t] -> C[s]] -> C[floor((s + space_shift)/space_size)]
1884  *
1885  * pulled back over the input schedule.
1886  * "options" determines whether full tiles should be separated
1887  * from partial tiles.
1888  *
1889  * The first tile dimension iterates over the hexagons in the same
1890  * phase, which are independent by construction.  The first dimension
1891  * is therefore marked coincident.
1892  * All dimensions are also marked for being generated as atomic loops
1893  * because separation is usually not desirable on tile loops.
1894  */
insert_space_tiling(__isl_keep ppcg_ht_phase * phase,__isl_take isl_schedule_node * node,struct ppcg_options * options)1895 static __isl_give isl_schedule_node *insert_space_tiling(
1896 	__isl_keep ppcg_ht_phase *phase, __isl_take isl_schedule_node *node,
1897 	struct ppcg_options *options)
1898 {
1899 	isl_multi_aff *space_tile;
1900 	isl_multi_union_pw_aff *mupa;
1901 
1902 	if (!phase)
1903 		return isl_schedule_node_free(node);
1904 
1905 	space_tile = isl_multi_aff_copy(phase->space_tile);
1906 	mupa = isl_multi_union_pw_aff_copy(phase->tiling->input_schedule);
1907 	mupa = isl_multi_union_pw_aff_apply_multi_aff(mupa, space_tile);
1908 	node = isl_schedule_node_insert_partial_schedule(node, mupa);
1909 	node = ppcg_set_schedule_node_type(node, isl_ast_loop_atomic);
1910 	node = ppcg_ht_phase_isolate_full_tile_node(phase, node, options);
1911 	node = isl_schedule_node_band_member_set_coincident(node, 0, 1);
1912 
1913 	return node;
1914 }
1915 
1916 /* Given a pointer "node" to (a copy of) the original child node
1917  * in the input pattern, adjust its partial schedule such that
1918  * it starts at zero within each tile.
1919  *
1920  * That is, replace "s" by (s + space_shift) % space_sizes.
1921  */
ppcg_ht_phase_shift_space_point(__isl_keep ppcg_ht_phase * phase,__isl_take isl_schedule_node * node)1922 __isl_give isl_schedule_node *ppcg_ht_phase_shift_space_point(
1923 	__isl_keep ppcg_ht_phase *phase, __isl_take isl_schedule_node *node)
1924 {
1925 	isl_multi_val *space_sizes;
1926 	isl_multi_aff *space_shift;
1927 	isl_multi_union_pw_aff *mupa;
1928 
1929 	space_shift = isl_multi_aff_copy(phase->space_shift);
1930 	mupa = isl_multi_union_pw_aff_copy(phase->tiling->input_schedule);
1931 	mupa = isl_multi_union_pw_aff_apply_multi_aff(mupa, space_shift);
1932 	node = isl_schedule_node_band_shift(node, mupa);
1933 	space_sizes = isl_multi_val_copy(phase->tiling->space_sizes);
1934 	node = isl_schedule_node_band_mod(node, space_sizes);
1935 
1936 	return node;
1937 }
1938 
1939 /* Does
1940  *
1941  *	s0 > delta + 2 * {delta * h} - 1
1942  *
1943  * hold?
1944  */
wide_enough(__isl_keep isl_val * s0,__isl_keep isl_val * delta,__isl_keep isl_val * h)1945 static isl_bool wide_enough(__isl_keep isl_val *s0, __isl_keep isl_val *delta,
1946 	__isl_keep isl_val *h)
1947 {
1948 	isl_val *v, *v2;
1949 	isl_bool ok;
1950 
1951 	v = isl_val_mul(isl_val_copy(delta), isl_val_copy(h));
1952 	v2 = isl_val_floor(isl_val_copy(v));
1953 	v = isl_val_sub(v, v2);
1954 	v = isl_val_mul_ui(v, 2);
1955 	v = isl_val_add(v, isl_val_copy(delta));
1956 	v = isl_val_sub_ui(v, 1);
1957 	ok = isl_val_gt(s0, v);
1958 	isl_val_free(v);
1959 
1960 	return ok;
1961 }
1962 
1963 /* Is the tile size specified by "sizes" wide enough in the first space
1964  * dimension, i.e., the base of the hexagon?  This ensures that,
1965  * after hybrid tiling using "bounds" and these sizes,
1966  * neighboring hexagons in the same phase are far enough apart
1967  * that they do not depend on each other.
1968  * The test is only meaningful if the bounds are valid.
1969  *
1970  * Let st be (half) the size in the time dimension and s0 the base
1971  * size in the first space dimension.  Let delta be the dependence
1972  * distance in either positive or negative direction.  In principle,
1973  * it should be enough to have s0 + 1 > delta, i.e., s0 >= delta.
1974  * However, in case of fractional delta, the tile is not extended
1975  * with delta * (st - 1), but instead with floor(delta * (st - 1)).
1976  * The condition therefore needs to be adjusted to
1977  *
1978  *	s0 + 1 > delta + 2 {delta * (st - 1)}
1979  *
1980  * (with {} the fractional part) to account for the two slanted sides.
1981  * The condition in the paper "Hybrid Hexagonal/Classical Tiling for GPUs"
1982  * translates to
1983  *
1984  *	s0 >= delta + {delta * (st - 1)}
1985  *
1986  * Since 1 > frac(delta * (st - 1)), this condition implies
1987  * the condition above.
1988  *
1989  * The condition is checked for both directions.
1990  */
ppcg_ht_bounds_supports_sizes(__isl_keep ppcg_ht_bounds * bounds,__isl_keep isl_multi_val * sizes)1991 isl_bool ppcg_ht_bounds_supports_sizes(__isl_keep ppcg_ht_bounds *bounds,
1992 	__isl_keep isl_multi_val *sizes)
1993 {
1994 	isl_val *s0, *h;
1995 	isl_val *delta;
1996 	isl_bool ok;
1997 
1998 	ok = ppcg_ht_bounds_is_valid(bounds);
1999 	if (ok < 0 || !ok)
2000 		return ok;
2001 
2002 	h = isl_val_sub_ui(isl_multi_val_get_val(sizes, 0), 1);
2003 	s0 = isl_multi_val_get_val(sizes, 1);
2004 
2005 	delta = ppcg_ht_bounds_get_lower(bounds, 0);
2006 	ok = wide_enough(s0, delta, h);
2007 	isl_val_free(delta);
2008 
2009 	delta = ppcg_ht_bounds_get_upper(bounds);
2010 	if (ok == isl_bool_true)
2011 		ok = wide_enough(s0, delta, h);
2012 	isl_val_free(delta);
2013 
2014 	isl_val_free(s0);
2015 	isl_val_free(h);
2016 
2017 	return ok;
2018 }
2019 
2020 /* Check that the tile will be wide enough in the first space
2021  * dimension, i.e., the base of the hexagon.  This ensures that
2022  * neighboring hexagons in the same phase are far enough apart
2023  * that they do not depend on each other.
2024  *
2025  * Error out if the condition fails to hold.
2026  */
check_width(__isl_keep ppcg_ht_bounds * bounds,__isl_keep isl_multi_val * sizes)2027 static isl_stat check_width(__isl_keep ppcg_ht_bounds *bounds,
2028 	__isl_keep isl_multi_val *sizes)
2029 {
2030 	isl_bool ok;
2031 
2032 	ok = ppcg_ht_bounds_supports_sizes(bounds, sizes);
2033 
2034 	if (ok < 0)
2035 		return isl_stat_error;
2036 	if (!ok)
2037 		isl_die(isl_multi_val_get_ctx(sizes), isl_error_invalid,
2038 			"base of hybrid tiling hexagon not sufficiently wide",
2039 			return isl_stat_error);
2040 
2041 	return isl_stat_ok;
2042 }
2043 
2044 /* Given valid bounds on the relative dependence distances for
2045  * the pair of nested nodes that "node" point to, as well as sufficiently
2046  * wide tile sizes "sizes", insert the corresponding time and space tiling
2047  * at "node", along with a pair of phase nodes that can be used
2048  * to make further changes.
2049  * The space of "sizes" should be the product of the spaces
2050  * of the schedules of the pair of parent and child nodes.
2051  * "options" determines whether full tiles should be separated
2052  * from partial tiles.
2053  *
2054  * In particular, given an input of the form
2055  *
2056  *	P - C - ...
2057  *
2058  * the output has the form
2059  *
2060  *	        /- F0 - M0 - CT0 - P - C - ...
2061  *	PT - seq
2062  *	        \- F1 - M1 - CT1 - P - C - ...
2063  *
2064  * PT is the global time tiling.  Within each of these tiles,
2065  * two phases are executed in order.  Within each phase, the schedule
2066  * space is further subdivided into tiles through CT0 and CT1.
2067  * The first dimension of each of these iterates over the hexagons
2068  * within a phase and these are independent by construction.
2069  * The F0 and F1 filters filter the statement instances that belong
2070  * to the corresponding phase.  The M0 and M1 marks contain a pointer
2071  * to a ppcg_ht_phase object that can be used to perform further changes.
2072  *
2073  * After checking that input satisfies the requirements,
2074  * a data structure is constructed that represents the tiling and
2075  * two additional data structures are constructed for the two phases
2076  * of the tiling.  These are then used to define the filters F0 and F1 and
2077  * combined to construct the time tiling PT.
2078  * Then the time tiling node PT is inserted, followed by
2079  * the sequence with the two filters, the CT space tiling nodes and
2080  * the phase markers M.
2081  */
ppcg_ht_bounds_insert_tiling(__isl_take ppcg_ht_bounds * bounds,__isl_take isl_multi_val * sizes,__isl_take isl_schedule_node * node,struct ppcg_options * options)2082 __isl_give isl_schedule_node *ppcg_ht_bounds_insert_tiling(
2083 	__isl_take ppcg_ht_bounds *bounds, __isl_take isl_multi_val *sizes,
2084 	__isl_take isl_schedule_node *node, struct ppcg_options *options)
2085 {
2086 	isl_ctx *ctx;
2087 	isl_union_set *phase0;
2088 	isl_union_set *phase1;
2089 	isl_multi_union_pw_aff *input, *dom_time;
2090 	isl_union_pw_multi_aff *upma;
2091 	isl_pw_multi_aff *time;
2092 	isl_union_set_list *phases;
2093 	ppcg_ht_tiling *tiling;
2094 	ppcg_ht_phase *phase_0;
2095 	ppcg_ht_phase *phase_1;
2096 
2097 	if (!node || !sizes || !bounds)
2098 		goto error;
2099 	if (check_input_pattern(node) < 0 || check_width(bounds, sizes) < 0)
2100 		goto error;
2101 
2102 	ctx = isl_schedule_node_get_ctx(node);
2103 
2104 	input = extract_input_schedule(node);
2105 
2106 	tiling = ppcg_ht_bounds_construct_tiling(bounds, node, input, sizes);
2107 	phase_0 = ppcg_ht_tiling_compute_phase(tiling, 1);
2108 	phase_1 = ppcg_ht_tiling_compute_phase(tiling, 0);
2109 	time = combine_time_tile(phase_0, phase_1);
2110 	ppcg_ht_tiling_free(tiling);
2111 
2112 	upma = isl_union_pw_multi_aff_from_multi_union_pw_aff(
2113 					isl_multi_union_pw_aff_copy(input));
2114 	phase0 = isl_union_set_from_set(ppcg_ht_phase_get_domain(phase_0));
2115 	phase0 = isl_union_set_preimage_union_pw_multi_aff(phase0,
2116 					isl_union_pw_multi_aff_copy(upma));
2117 	phase1 = isl_union_set_from_set(ppcg_ht_phase_get_domain(phase_1));
2118 	phase1 = isl_union_set_preimage_union_pw_multi_aff(phase1, upma);
2119 
2120 	phases = isl_union_set_list_alloc(ctx, 2);
2121 	phases = isl_union_set_list_add(phases, phase0);
2122 	phases = isl_union_set_list_add(phases, phase1);
2123 
2124 	dom_time = isl_multi_union_pw_aff_apply_pw_multi_aff(input, time);
2125 	node = isl_schedule_node_insert_partial_schedule(node, dom_time);
2126 
2127 	node = isl_schedule_node_child(node, 0);
2128 
2129 	node = isl_schedule_node_insert_sequence(node, phases);
2130 	node = isl_schedule_node_child(node, 0);
2131 	node = isl_schedule_node_child(node, 0);
2132 	node = insert_space_tiling(phase_0, node, options);
2133 	node = insert_phase(node, phase_0);
2134 	node = isl_schedule_node_parent(node);
2135 	node = isl_schedule_node_next_sibling(node);
2136 	node = isl_schedule_node_child(node, 0);
2137 	node = insert_space_tiling(phase_1, node, options);
2138 	node = insert_phase(node, phase_1);
2139 	node = isl_schedule_node_parent(node);
2140 	node = isl_schedule_node_parent(node);
2141 
2142 	node = isl_schedule_node_parent(node);
2143 
2144 	isl_multi_val_free(sizes);
2145 	return node;
2146 error:
2147 	isl_multi_val_free(sizes);
2148 	isl_schedule_node_free(node);
2149 	ppcg_ht_bounds_free(bounds);
2150 	return NULL;
2151 }
2152 
2153 /* Given a branch "node" that contains a sequence node with two phases
2154  * of hybrid tiling as input, call "fn" on each of the two phase marker
2155  * nodes.
2156  *
2157  * That is, the input is as follows
2158  *
2159  *	         /- F0 - M0 - ...
2160  *	... - seq
2161  *	         \- F1 - M1 - ...
2162  *
2163  * and "fn" is called on M0 and on M1.
2164  */
hybrid_tile_foreach_phase(__isl_take isl_schedule_node * node,__isl_give isl_schedule_node * (* fn)(__isl_take isl_schedule_node * node,void * user),void * user)2165 __isl_give isl_schedule_node *hybrid_tile_foreach_phase(
2166 	__isl_take isl_schedule_node *node,
2167 	__isl_give isl_schedule_node *(*fn)(__isl_take isl_schedule_node *node,
2168 		void *user), void *user)
2169 {
2170 	int depth0, depth;
2171 
2172 	depth0 = isl_schedule_node_get_tree_depth(node);
2173 
2174 	while (node &&
2175 	    isl_schedule_node_get_type(node) != isl_schedule_node_sequence)
2176 		node = isl_schedule_node_child(node, 0);
2177 
2178 	node = isl_schedule_node_child(node, 0);
2179 	node = isl_schedule_node_child(node, 0);
2180 	if (!node)
2181 		return NULL;
2182 	node = fn(node, user);
2183 	node = isl_schedule_node_parent(node);
2184 	node = isl_schedule_node_next_sibling(node);
2185 	node = isl_schedule_node_child(node, 0);
2186 	if (!node)
2187 		return NULL;
2188 	node = fn(node, user);
2189 	node = isl_schedule_node_parent(node);
2190 	node = isl_schedule_node_parent(node);
2191 
2192 	depth = isl_schedule_node_get_tree_depth(node);
2193 	node = isl_schedule_node_ancestor(node, depth - depth0);
2194 
2195 	return node;
2196 }
2197 
2198 /* This function is called on each of the two phase marks
2199  * in a hybrid tiling tree.
2200  * Drop the phase mark at "node".
2201  */
drop_phase_mark(__isl_take isl_schedule_node * node,void * user)2202 static __isl_give isl_schedule_node *drop_phase_mark(
2203 	__isl_take isl_schedule_node *node, void *user)
2204 {
2205 	isl_id *id;
2206 	isl_bool is_phase;
2207 
2208 	if (isl_schedule_node_get_type(node) != isl_schedule_node_mark)
2209 		return node;
2210 
2211 	id = isl_schedule_node_mark_get_id(node);
2212 	is_phase = is_phase_id(id);
2213 	isl_id_free(id);
2214 
2215 	if (is_phase < 0)
2216 		return isl_schedule_node_free(node);
2217 	if (is_phase)
2218 		node = isl_schedule_node_delete(node);
2219 
2220 	return node;
2221 }
2222 
2223 /* Given a branch "node" that contains a sequence node with two phases
2224  * of hybrid tiling as input, remove the two phase marker nodes.
2225  *
2226  * That is, the input is as follows
2227  *
2228  *	         /- F0 - M0 - ...
2229  *	... - seq
2230  *	         \- F1 - M1 - ...
2231  *
2232  * and the output is
2233  *
2234  *	         /- F0 - ...
2235  *	... - seq
2236  *	         \- F1 - ...
2237  */
hybrid_tile_drop_phase_marks(__isl_take isl_schedule_node * node)2238 __isl_give isl_schedule_node *hybrid_tile_drop_phase_marks(
2239 	__isl_take isl_schedule_node *node)
2240 {
2241 	return hybrid_tile_foreach_phase(node, &drop_phase_mark, NULL);
2242 }
2243