1 /* A scheduling optimizer for Graphite
2 Copyright (C) 2012-2013 Free Software Foundation, Inc.
3 Contributed by Tobias Grosser <tobias@grosser.es>.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22
23 #ifdef HAVE_cloog
24 #include <isl/set.h>
25 #include <isl/map.h>
26 #include <isl/union_map.h>
27 #include <isl/schedule.h>
28 #include <isl/band.h>
29 #include <isl/aff.h>
30 #include <isl/options.h>
31 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
32 #include <isl/deprecated/int.h>
33 #include <isl/deprecated/aff_int.h>
34 #endif
35 #endif
36
37 #include "system.h"
38 #include "coretypes.h"
39 #include "tree-flow.h"
40 #include "dumpfile.h"
41 #include "cfgloop.h"
42 #include "tree-chrec.h"
43 #include "tree-data-ref.h"
44 #include "tree-scalar-evolution.h"
45 #include "sese.h"
46
47 #ifdef HAVE_cloog
48 #include "graphite-poly.h"
49
50 static isl_union_set *
scop_get_domains(scop_p scop ATTRIBUTE_UNUSED)51 scop_get_domains (scop_p scop ATTRIBUTE_UNUSED)
52 {
53 int i;
54 poly_bb_p pbb;
55 isl_space *space = isl_set_get_space (scop->context);
56 isl_union_set *res = isl_union_set_empty (space);
57
58 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
59 res = isl_union_set_add_set (res, isl_set_copy (pbb->domain));
60
61 return res;
62 }
63
64 static isl_union_map *
scop_get_dependences(scop_p scop)65 scop_get_dependences (scop_p scop)
66 {
67 isl_union_map *dependences;
68
69 if (!scop->must_raw)
70 compute_deps (scop, SCOP_BBS (scop),
71 &scop->must_raw, &scop->may_raw,
72 &scop->must_raw_no_source, &scop->may_raw_no_source,
73 &scop->must_war, &scop->may_war,
74 &scop->must_war_no_source, &scop->may_war_no_source,
75 &scop->must_waw, &scop->may_waw,
76 &scop->must_waw_no_source, &scop->may_waw_no_source);
77
78 dependences = isl_union_map_copy (scop->must_raw);
79 dependences = isl_union_map_union (dependences,
80 isl_union_map_copy (scop->must_war));
81 dependences = isl_union_map_union (dependences,
82 isl_union_map_copy (scop->must_waw));
83 dependences = isl_union_map_union (dependences,
84 isl_union_map_copy (scop->may_raw));
85 dependences = isl_union_map_union (dependences,
86 isl_union_map_copy (scop->may_war));
87 dependences = isl_union_map_union (dependences,
88 isl_union_map_copy (scop->may_waw));
89
90 return dependences;
91 }
92
93 /* getTileMap - Create a map that describes a n-dimensonal tiling.
94
95 getTileMap creates a map from a n-dimensional scattering space into an
96 2*n-dimensional scattering space. The map describes a rectangular tiling.
97
98 Example:
99 scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
100
101 tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
102 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
103 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
104
105 Before tiling:
106
107 for (i = 0; i < N; i++)
108 for (j = 0; j < M; j++)
109 S(i,j)
110
111 After tiling:
112
113 for (t_i = 0; t_i < N; i+=32)
114 for (t_j = 0; t_j < M; j+=32)
115 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
116 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
117 S(i,j)
118 */
119
120 static isl_basic_map *
getTileMap(isl_ctx * ctx,int scheduleDimensions,int tileSize)121 getTileMap(isl_ctx *ctx, int scheduleDimensions, int tileSize)
122 {
123 int x;
124 /* We construct
125
126 tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
127 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
128 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
129
130 and project out the auxilary dimensions a0 and a1. */
131 isl_space *Space = isl_space_alloc(ctx, 0, scheduleDimensions,
132 scheduleDimensions * 3);
133 isl_basic_map *tileMap = isl_basic_map_universe(isl_space_copy(Space));
134
135 isl_local_space *LocalSpace = isl_local_space_from_space(Space);
136
137 for (x = 0; x < scheduleDimensions; x++)
138 {
139 int sX = x;
140 int tX = x;
141 int pX = scheduleDimensions + x;
142 int aX = 2 * scheduleDimensions + x;
143
144 isl_constraint *c;
145
146 /* sX = aX * tileSize; */
147 c = isl_equality_alloc(isl_local_space_copy(LocalSpace));
148 isl_constraint_set_coefficient_si(c, isl_dim_out, sX, 1);
149 isl_constraint_set_coefficient_si(c, isl_dim_out, aX, -tileSize);
150 tileMap = isl_basic_map_add_constraint(tileMap, c);
151
152 /* pX = sX; */
153 c = isl_equality_alloc(isl_local_space_copy(LocalSpace));
154 isl_constraint_set_coefficient_si(c, isl_dim_out, pX, 1);
155 isl_constraint_set_coefficient_si(c, isl_dim_in, sX, -1);
156 tileMap = isl_basic_map_add_constraint(tileMap, c);
157
158 /* tX <= pX */
159 c = isl_inequality_alloc(isl_local_space_copy(LocalSpace));
160 isl_constraint_set_coefficient_si(c, isl_dim_out, pX, 1);
161 isl_constraint_set_coefficient_si(c, isl_dim_out, tX, -1);
162 tileMap = isl_basic_map_add_constraint(tileMap, c);
163
164 /* pX <= tX + (tileSize - 1) */
165 c = isl_inequality_alloc(isl_local_space_copy(LocalSpace));
166 isl_constraint_set_coefficient_si(c, isl_dim_out, tX, 1);
167 isl_constraint_set_coefficient_si(c, isl_dim_out, pX, -1);
168 isl_constraint_set_constant_si(c, tileSize - 1);
169 tileMap = isl_basic_map_add_constraint(tileMap, c);
170 }
171
172 /* Project out auxilary dimensions.
173
174 The auxilary dimensions are transformed into existentially quantified ones.
175 This reduces the number of visible scattering dimensions and allows Cloog
176 to produces better code. */
177 tileMap = isl_basic_map_project_out(tileMap, isl_dim_out,
178 2 * scheduleDimensions,
179 scheduleDimensions);
180 isl_local_space_free(LocalSpace);
181 return tileMap;
182 }
183
184 /* getScheduleForBand - Get the schedule for this band.
185
186 Polly applies transformations like tiling on top of the isl calculated value.
187 This can influence the number of scheduling dimension. The number of
188 schedule dimensions is returned in the parameter 'Dimension'. */
189 static bool DisableTiling = false;
190
191 static isl_union_map *
getScheduleForBand(isl_band * Band,int * Dimensions)192 getScheduleForBand(isl_band *Band, int *Dimensions)
193 {
194 isl_union_map *PartialSchedule;
195 isl_ctx *ctx;
196 isl_space *Space;
197 isl_basic_map *TileMap;
198 isl_union_map *TileUMap;
199
200 PartialSchedule = isl_band_get_partial_schedule(Band);
201 *Dimensions = isl_band_n_member(Band);
202
203 if (DisableTiling)
204 return PartialSchedule;
205
206 /* It does not make any sense to tile a band with just one dimension. */
207 if (*Dimensions == 1)
208 return PartialSchedule;
209
210 ctx = isl_union_map_get_ctx(PartialSchedule);
211 Space = isl_union_map_get_space(PartialSchedule);
212
213 TileMap = getTileMap(ctx, *Dimensions, 32);
214 TileUMap = isl_union_map_from_map(isl_map_from_basic_map(TileMap));
215 TileUMap = isl_union_map_align_params(TileUMap, Space);
216 *Dimensions = 2 * *Dimensions;
217
218 return isl_union_map_apply_range(PartialSchedule, TileUMap);
219 }
220
221 /* Create a map that pre-vectorizes one scheduling dimension.
222
223 getPrevectorMap creates a map that maps each input dimension to the same
224 output dimension, except for the dimension DimToVectorize. DimToVectorize is
225 strip mined by 'VectorWidth' and the newly created point loop of
226 DimToVectorize is moved to the innermost level.
227
228 Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
229
230 | Before transformation
231 |
232 | A[i,j] -> [i,j]
233 |
234 | for (i = 0; i < 128; i++)
235 | for (j = 0; j < 128; j++)
236 | A(i,j);
237
238 Prevector map:
239 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
240
241 | After transformation:
242 |
243 | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
244 |
245 | for (it = 0; it < 128; it+=4)
246 | for (j = 0; j < 128; j++)
247 | for (ip = max(0,it); ip < min(128, it + 3); ip++)
248 | A(ip,j);
249
250 The goal of this transformation is to create a trivially vectorizable loop.
251 This means a parallel loop at the innermost level that has a constant number
252 of iterations corresponding to the target vector width.
253
254 This transformation creates a loop at the innermost level. The loop has a
255 constant number of iterations, if the number of loop iterations at
256 DimToVectorize can be devided by VectorWidth. The default VectorWidth is
257 currently constant and not yet target specific. This function does not reason
258 about parallelism. */
259 static isl_map *
getPrevectorMap(isl_ctx * ctx,int DimToVectorize,int ScheduleDimensions,int VectorWidth)260 getPrevectorMap(isl_ctx *ctx, int DimToVectorize,
261 int ScheduleDimensions,
262 int VectorWidth)
263 {
264 isl_space *Space;
265 isl_local_space *LocalSpace, *LocalSpaceRange;
266 isl_set *Modulo;
267 isl_map *TilingMap;
268 isl_constraint *c;
269 isl_aff *Aff;
270 int PointDimension; /* ip */
271 int TileDimension; /* it */
272 isl_int VectorWidthMP;
273 int i;
274
275 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
276
277 Space = isl_space_alloc(ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
278 TilingMap = isl_map_universe(isl_space_copy(Space));
279 LocalSpace = isl_local_space_from_space(Space);
280 PointDimension = ScheduleDimensions;
281 TileDimension = DimToVectorize;
282
283 /* Create an identity map for everything except DimToVectorize and map
284 DimToVectorize to the point loop at the innermost dimension. */
285 for (i = 0; i < ScheduleDimensions; i++)
286 {
287 c = isl_equality_alloc(isl_local_space_copy(LocalSpace));
288 isl_constraint_set_coefficient_si(c, isl_dim_in, i, -1);
289
290 if (i == DimToVectorize)
291 isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, 1);
292 else
293 isl_constraint_set_coefficient_si(c, isl_dim_out, i, 1);
294
295 TilingMap = isl_map_add_constraint(TilingMap, c);
296 }
297
298 /* it % 'VectorWidth' = 0 */
299 LocalSpaceRange = isl_local_space_range(isl_local_space_copy(LocalSpace));
300 Aff = isl_aff_zero_on_domain(LocalSpaceRange);
301 Aff = isl_aff_set_constant_si(Aff, VectorWidth);
302 Aff = isl_aff_set_coefficient_si(Aff, isl_dim_in, TileDimension, 1);
303 isl_int_init(VectorWidthMP);
304 isl_int_set_si(VectorWidthMP, VectorWidth);
305 Aff = isl_aff_mod(Aff, VectorWidthMP);
306 isl_int_clear(VectorWidthMP);
307 Modulo = isl_pw_aff_zero_set(isl_pw_aff_from_aff(Aff));
308 TilingMap = isl_map_intersect_range(TilingMap, Modulo);
309
310 /* it <= ip */
311 c = isl_inequality_alloc(isl_local_space_copy(LocalSpace));
312 isl_constraint_set_coefficient_si(c, isl_dim_out, TileDimension, -1);
313 isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, 1);
314 TilingMap = isl_map_add_constraint(TilingMap, c);
315
316 /* ip <= it + ('VectorWidth' - 1) */
317 c = isl_inequality_alloc(LocalSpace);
318 isl_constraint_set_coefficient_si(c, isl_dim_out, TileDimension, 1);
319 isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, -1);
320 isl_constraint_set_constant_si(c, VectorWidth - 1);
321 TilingMap = isl_map_add_constraint(TilingMap, c);
322
323 isl_map_dump(TilingMap);
324
325 return TilingMap;
326 }
327
328 static bool EnablePollyVector = false;
329
330 /* getScheduleForBandList - Get the scheduling map for a list of bands.
331
332 We walk recursively the forest of bands to combine the schedules of the
333 individual bands to the overall schedule. In case tiling is requested,
334 the individual bands are tiled. */
335 static isl_union_map *
getScheduleForBandList(isl_band_list * BandList)336 getScheduleForBandList(isl_band_list *BandList)
337 {
338 int NumBands, i;
339 isl_union_map *Schedule;
340 isl_ctx *ctx;
341
342 ctx = isl_band_list_get_ctx(BandList);
343 NumBands = isl_band_list_n_band(BandList);
344 Schedule = isl_union_map_empty(isl_space_params_alloc(ctx, 0));
345
346 for (i = 0; i < NumBands; i++)
347 {
348 isl_band *Band;
349 isl_union_map *PartialSchedule;
350 int ScheduleDimensions;
351 isl_space *Space;
352
353 Band = isl_band_list_get_band(BandList, i);
354 PartialSchedule = getScheduleForBand(Band, &ScheduleDimensions);
355 Space = isl_union_map_get_space(PartialSchedule);
356
357 if (isl_band_has_children(Band))
358 {
359 isl_band_list *Children;
360 isl_union_map *SuffixSchedule;
361
362 Children = isl_band_get_children(Band);
363 SuffixSchedule = getScheduleForBandList(Children);
364 PartialSchedule = isl_union_map_flat_range_product(PartialSchedule,
365 SuffixSchedule);
366 isl_band_list_free(Children);
367 }
368 else if (EnablePollyVector)
369 {
370 for (i = ScheduleDimensions - 1 ; i >= 0 ; i--)
371 {
372 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
373 if (isl_band_member_is_coincident (Band, i))
374 #else
375 if (isl_band_member_is_zero_distance(Band, i))
376 #endif
377 {
378 isl_map *TileMap;
379 isl_union_map *TileUMap;
380
381 TileMap = getPrevectorMap(ctx, i, ScheduleDimensions, 4);
382 TileUMap = isl_union_map_from_map(TileMap);
383 TileUMap = isl_union_map_align_params(TileUMap,
384 isl_space_copy(Space));
385 PartialSchedule = isl_union_map_apply_range(PartialSchedule,
386 TileUMap);
387 break;
388 }
389 }
390 }
391
392 Schedule = isl_union_map_union(Schedule, PartialSchedule);
393
394 isl_band_free(Band);
395 isl_space_free(Space);
396 }
397
398 return Schedule;
399 }
400
401 static isl_union_map *
getScheduleMap(isl_schedule * Schedule)402 getScheduleMap(isl_schedule *Schedule)
403 {
404 isl_band_list *BandList = isl_schedule_get_band_forest(Schedule);
405 isl_union_map *ScheduleMap = getScheduleForBandList(BandList);
406 isl_band_list_free(BandList);
407 return ScheduleMap;
408 }
409
410 static int
getSingleMap(__isl_take isl_map * map,void * user)411 getSingleMap(__isl_take isl_map *map, void *user)
412 {
413 isl_map **singleMap = (isl_map **) user;
414 *singleMap = map;
415
416 return 0;
417 }
418
419 static void
apply_schedule_map_to_scop(scop_p scop,isl_union_map * schedule_map)420 apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map)
421 {
422 int i;
423 poly_bb_p pbb;
424
425 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
426 {
427 isl_set *domain = isl_set_copy (pbb->domain);
428 isl_union_map *stmtBand;
429 isl_map *stmtSchedule;
430
431 stmtBand = isl_union_map_intersect_domain(isl_union_map_copy(schedule_map),
432 isl_union_set_from_set(domain));
433 isl_union_map_foreach_map(stmtBand, getSingleMap, &stmtSchedule);
434 isl_map_free(pbb->transformed);
435 pbb->transformed = stmtSchedule;
436 isl_union_map_free(stmtBand);
437 }
438 }
439
440 static const int CONSTANT_BOUND = 20;
441
442 bool
optimize_isl(scop_p scop)443 optimize_isl (scop_p scop)
444 {
445
446 isl_schedule *schedule;
447 isl_union_set *domain;
448 isl_union_map *validity, *proximity, *dependences;
449 isl_union_map *schedule_map;
450
451 domain = scop_get_domains (scop);
452 dependences = scop_get_dependences (scop);
453 dependences = isl_union_map_gist_domain(dependences,
454 isl_union_set_copy(domain));
455 dependences = isl_union_map_gist_range(dependences,
456 isl_union_set_copy(domain));
457 validity = dependences;
458
459 proximity = isl_union_map_copy (validity);
460
461 isl_options_set_schedule_max_constant_term(scop->ctx, CONSTANT_BOUND);
462 isl_options_set_schedule_maximize_band_depth(scop->ctx, 1);
463 isl_options_set_schedule_fuse(scop->ctx, ISL_SCHEDULE_FUSE_MIN);
464 isl_options_set_on_error(scop->ctx, ISL_ON_ERROR_CONTINUE);
465 schedule = isl_union_set_compute_schedule (domain, validity, proximity);
466 isl_options_set_on_error(scop->ctx, ISL_ON_ERROR_ABORT);
467
468 if (!schedule)
469 return false;
470
471 schedule_map = getScheduleMap (schedule);
472
473 apply_schedule_map_to_scop (scop, schedule_map);
474
475 isl_schedule_free (schedule);
476 isl_union_map_free (schedule_map);
477
478 return true;
479 }
480
481 #endif
482