1 /* A scheduling optimizer for Graphite
2    Copyright (C) 2012-2013 Free Software Foundation, Inc.
3    Contributed by Tobias Grosser <tobias@grosser.es>.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 
23 #ifdef HAVE_cloog
24 #include <isl/set.h>
25 #include <isl/map.h>
26 #include <isl/union_map.h>
27 #include <isl/schedule.h>
28 #include <isl/band.h>
29 #include <isl/aff.h>
30 #include <isl/options.h>
31 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
32 #include <isl/deprecated/int.h>
33 #include <isl/deprecated/aff_int.h>
34 #endif
35 #endif
36 
37 #include "system.h"
38 #include "coretypes.h"
39 #include "tree-flow.h"
40 #include "dumpfile.h"
41 #include "cfgloop.h"
42 #include "tree-chrec.h"
43 #include "tree-data-ref.h"
44 #include "tree-scalar-evolution.h"
45 #include "sese.h"
46 
47 #ifdef HAVE_cloog
48 #include "graphite-poly.h"
49 
50 static isl_union_set *
scop_get_domains(scop_p scop ATTRIBUTE_UNUSED)51 scop_get_domains (scop_p scop ATTRIBUTE_UNUSED)
52 {
53   int i;
54   poly_bb_p pbb;
55   isl_space *space = isl_set_get_space (scop->context);
56   isl_union_set *res = isl_union_set_empty (space);
57 
58   FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
59     res = isl_union_set_add_set (res, isl_set_copy (pbb->domain));
60 
61   return res;
62 }
63 
64 static isl_union_map *
scop_get_dependences(scop_p scop)65 scop_get_dependences (scop_p scop)
66 {
67   isl_union_map *dependences;
68 
69   if (!scop->must_raw)
70     compute_deps (scop, SCOP_BBS (scop),
71 		  &scop->must_raw, &scop->may_raw,
72 		  &scop->must_raw_no_source, &scop->may_raw_no_source,
73 		  &scop->must_war, &scop->may_war,
74 		  &scop->must_war_no_source, &scop->may_war_no_source,
75 		  &scop->must_waw, &scop->may_waw,
76 		  &scop->must_waw_no_source, &scop->may_waw_no_source);
77 
78   dependences = isl_union_map_copy (scop->must_raw);
79   dependences = isl_union_map_union (dependences,
80 				     isl_union_map_copy (scop->must_war));
81   dependences = isl_union_map_union (dependences,
82 				     isl_union_map_copy (scop->must_waw));
83   dependences = isl_union_map_union (dependences,
84 				     isl_union_map_copy (scop->may_raw));
85   dependences = isl_union_map_union (dependences,
86 				     isl_union_map_copy (scop->may_war));
87   dependences = isl_union_map_union (dependences,
88 				     isl_union_map_copy (scop->may_waw));
89 
90   return dependences;
91 }
92 
93 /* getTileMap - Create a map that describes a n-dimensonal tiling.
94 
95    getTileMap creates a map from a n-dimensional scattering space into an
96    2*n-dimensional scattering space. The map describes a rectangular tiling.
97 
98    Example:
99      scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
100 
101     tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
102                          t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
103                          t1 % 32 = 0 and t1 <= s1 < t1 + 32}
104 
105    Before tiling:
106 
107    for (i = 0; i < N; i++)
108      for (j = 0; j < M; j++)
109  	S(i,j)
110 
111    After tiling:
112 
113    for (t_i = 0; t_i < N; i+=32)
114      for (t_j = 0; t_j < M; j+=32)
115  	for (i = t_i; i < min(t_i + 32, N); i++)  | Unknown that N % 32 = 0
116  	  for (j = t_j; j < t_j + 32; j++)        |   Known that M % 32 = 0
117  	    S(i,j)
118    */
119 
120 static isl_basic_map *
getTileMap(isl_ctx * ctx,int scheduleDimensions,int tileSize)121 getTileMap(isl_ctx *ctx, int scheduleDimensions, int tileSize)
122 {
123   int x;
124   /* We construct
125 
126      tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
127     	                s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
128     	                s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
129 
130      and project out the auxilary dimensions a0 and a1.  */
131   isl_space *Space = isl_space_alloc(ctx, 0, scheduleDimensions,
132 				     scheduleDimensions * 3);
133   isl_basic_map *tileMap = isl_basic_map_universe(isl_space_copy(Space));
134 
135   isl_local_space *LocalSpace = isl_local_space_from_space(Space);
136 
137   for (x = 0; x < scheduleDimensions; x++)
138     {
139       int sX = x;
140       int tX = x;
141       int pX = scheduleDimensions + x;
142       int aX = 2 * scheduleDimensions + x;
143 
144       isl_constraint *c;
145 
146       /* sX = aX * tileSize; */
147       c = isl_equality_alloc(isl_local_space_copy(LocalSpace));
148       isl_constraint_set_coefficient_si(c, isl_dim_out, sX, 1);
149       isl_constraint_set_coefficient_si(c, isl_dim_out, aX, -tileSize);
150       tileMap = isl_basic_map_add_constraint(tileMap, c);
151 
152       /* pX = sX; */
153       c = isl_equality_alloc(isl_local_space_copy(LocalSpace));
154       isl_constraint_set_coefficient_si(c, isl_dim_out, pX, 1);
155       isl_constraint_set_coefficient_si(c, isl_dim_in, sX, -1);
156       tileMap = isl_basic_map_add_constraint(tileMap, c);
157 
158       /* tX <= pX */
159       c = isl_inequality_alloc(isl_local_space_copy(LocalSpace));
160       isl_constraint_set_coefficient_si(c, isl_dim_out, pX, 1);
161       isl_constraint_set_coefficient_si(c, isl_dim_out, tX, -1);
162       tileMap = isl_basic_map_add_constraint(tileMap, c);
163 
164       /* pX <= tX + (tileSize - 1) */
165       c = isl_inequality_alloc(isl_local_space_copy(LocalSpace));
166       isl_constraint_set_coefficient_si(c, isl_dim_out, tX, 1);
167       isl_constraint_set_coefficient_si(c, isl_dim_out, pX, -1);
168       isl_constraint_set_constant_si(c, tileSize - 1);
169       tileMap = isl_basic_map_add_constraint(tileMap, c);
170     }
171 
172   /* Project out auxilary dimensions.
173 
174      The auxilary dimensions are transformed into existentially quantified ones.
175      This reduces the number of visible scattering dimensions and allows Cloog
176      to produces better code.  */
177   tileMap = isl_basic_map_project_out(tileMap, isl_dim_out,
178 				      2 * scheduleDimensions,
179 				      scheduleDimensions);
180   isl_local_space_free(LocalSpace);
181   return tileMap;
182 }
183 
184 /* getScheduleForBand - Get the schedule for this band.
185 
186    Polly applies transformations like tiling on top of the isl calculated value.
187    This can influence the number of scheduling dimension. The number of
188    schedule dimensions is returned in the parameter 'Dimension'.  */
189 static bool DisableTiling = false;
190 
191 static isl_union_map *
getScheduleForBand(isl_band * Band,int * Dimensions)192 getScheduleForBand(isl_band *Band, int *Dimensions)
193 {
194   isl_union_map *PartialSchedule;
195   isl_ctx *ctx;
196   isl_space *Space;
197   isl_basic_map *TileMap;
198   isl_union_map *TileUMap;
199 
200   PartialSchedule = isl_band_get_partial_schedule(Band);
201   *Dimensions = isl_band_n_member(Band);
202 
203   if (DisableTiling)
204     return PartialSchedule;
205 
206   /* It does not make any sense to tile a band with just one dimension.  */
207   if (*Dimensions == 1)
208     return PartialSchedule;
209 
210   ctx = isl_union_map_get_ctx(PartialSchedule);
211   Space = isl_union_map_get_space(PartialSchedule);
212 
213   TileMap = getTileMap(ctx, *Dimensions, 32);
214   TileUMap = isl_union_map_from_map(isl_map_from_basic_map(TileMap));
215   TileUMap = isl_union_map_align_params(TileUMap, Space);
216   *Dimensions = 2 * *Dimensions;
217 
218   return isl_union_map_apply_range(PartialSchedule, TileUMap);
219 }
220 
221 /* Create a map that pre-vectorizes one scheduling dimension.
222 
223    getPrevectorMap creates a map that maps each input dimension to the same
224    output dimension, except for the dimension DimToVectorize. DimToVectorize is
225    strip mined by 'VectorWidth' and the newly created point loop of
226    DimToVectorize is moved to the innermost level.
227 
228    Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
229 
230    | Before transformation
231    |
232    | A[i,j] -> [i,j]
233    |
234    | for (i = 0; i < 128; i++)
235    |    for (j = 0; j < 128; j++)
236    |      A(i,j);
237 
238      Prevector map:
239      [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
240 
241    | After transformation:
242    |
243    | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
244    |
245    | for (it = 0; it < 128; it+=4)
246    |    for (j = 0; j < 128; j++)
247    |      for (ip = max(0,it); ip < min(128, it + 3); ip++)
248    |        A(ip,j);
249 
250    The goal of this transformation is to create a trivially vectorizable loop.
251    This means a parallel loop at the innermost level that has a constant number
252    of iterations corresponding to the target vector width.
253 
254    This transformation creates a loop at the innermost level. The loop has a
255    constant number of iterations, if the number of loop iterations at
256    DimToVectorize can be devided by VectorWidth. The default VectorWidth is
257    currently constant and not yet target specific. This function does not reason
258    about parallelism.  */
259 static isl_map *
getPrevectorMap(isl_ctx * ctx,int DimToVectorize,int ScheduleDimensions,int VectorWidth)260 getPrevectorMap(isl_ctx *ctx, int DimToVectorize,
261 		int ScheduleDimensions,
262 		int VectorWidth)
263 {
264   isl_space *Space;
265   isl_local_space *LocalSpace, *LocalSpaceRange;
266   isl_set *Modulo;
267   isl_map *TilingMap;
268   isl_constraint *c;
269   isl_aff *Aff;
270   int PointDimension; /* ip */
271   int TileDimension;  /* it */
272   isl_int VectorWidthMP;
273   int i;
274 
275   /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
276 
277   Space = isl_space_alloc(ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
278   TilingMap = isl_map_universe(isl_space_copy(Space));
279   LocalSpace = isl_local_space_from_space(Space);
280   PointDimension = ScheduleDimensions;
281   TileDimension = DimToVectorize;
282 
283   /* Create an identity map for everything except DimToVectorize and map
284      DimToVectorize to the point loop at the innermost dimension.  */
285   for (i = 0; i < ScheduleDimensions; i++)
286     {
287       c = isl_equality_alloc(isl_local_space_copy(LocalSpace));
288       isl_constraint_set_coefficient_si(c, isl_dim_in, i, -1);
289 
290       if (i == DimToVectorize)
291 	isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, 1);
292       else
293 	isl_constraint_set_coefficient_si(c, isl_dim_out, i, 1);
294 
295       TilingMap = isl_map_add_constraint(TilingMap, c);
296     }
297 
298   /* it % 'VectorWidth' = 0  */
299   LocalSpaceRange = isl_local_space_range(isl_local_space_copy(LocalSpace));
300   Aff = isl_aff_zero_on_domain(LocalSpaceRange);
301   Aff = isl_aff_set_constant_si(Aff, VectorWidth);
302   Aff = isl_aff_set_coefficient_si(Aff, isl_dim_in, TileDimension, 1);
303   isl_int_init(VectorWidthMP);
304   isl_int_set_si(VectorWidthMP, VectorWidth);
305   Aff = isl_aff_mod(Aff, VectorWidthMP);
306   isl_int_clear(VectorWidthMP);
307   Modulo = isl_pw_aff_zero_set(isl_pw_aff_from_aff(Aff));
308   TilingMap = isl_map_intersect_range(TilingMap, Modulo);
309 
310   /* it <= ip */
311   c = isl_inequality_alloc(isl_local_space_copy(LocalSpace));
312   isl_constraint_set_coefficient_si(c, isl_dim_out, TileDimension, -1);
313   isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, 1);
314   TilingMap = isl_map_add_constraint(TilingMap, c);
315 
316   /* ip <= it + ('VectorWidth' - 1) */
317   c = isl_inequality_alloc(LocalSpace);
318   isl_constraint_set_coefficient_si(c, isl_dim_out, TileDimension, 1);
319   isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, -1);
320   isl_constraint_set_constant_si(c, VectorWidth - 1);
321   TilingMap = isl_map_add_constraint(TilingMap, c);
322 
323   isl_map_dump(TilingMap);
324 
325   return TilingMap;
326 }
327 
328 static bool EnablePollyVector = false;
329 
330 /* getScheduleForBandList - Get the scheduling map for a list of bands.
331 
332    We walk recursively the forest of bands to combine the schedules of the
333    individual bands to the overall schedule. In case tiling is requested,
334    the individual bands are tiled.  */
335 static isl_union_map *
getScheduleForBandList(isl_band_list * BandList)336 getScheduleForBandList(isl_band_list *BandList)
337 {
338   int NumBands, i;
339   isl_union_map *Schedule;
340   isl_ctx *ctx;
341 
342   ctx = isl_band_list_get_ctx(BandList);
343   NumBands = isl_band_list_n_band(BandList);
344   Schedule = isl_union_map_empty(isl_space_params_alloc(ctx, 0));
345 
346   for (i = 0; i < NumBands; i++)
347     {
348       isl_band *Band;
349       isl_union_map *PartialSchedule;
350       int ScheduleDimensions;
351       isl_space *Space;
352 
353       Band = isl_band_list_get_band(BandList, i);
354       PartialSchedule = getScheduleForBand(Band, &ScheduleDimensions);
355       Space = isl_union_map_get_space(PartialSchedule);
356 
357       if (isl_band_has_children(Band))
358 	{
359 	  isl_band_list *Children;
360 	  isl_union_map *SuffixSchedule;
361 
362 	  Children = isl_band_get_children(Band);
363 	  SuffixSchedule = getScheduleForBandList(Children);
364 	  PartialSchedule = isl_union_map_flat_range_product(PartialSchedule,
365 							     SuffixSchedule);
366 	  isl_band_list_free(Children);
367 	}
368       else if (EnablePollyVector)
369 	{
370 	  for (i = ScheduleDimensions - 1 ;  i >= 0 ; i--)
371 	    {
372 #ifdef HAVE_ISL_SCHED_CONSTRAINTS_COMPUTE_SCHEDULE
373 	      if (isl_band_member_is_coincident (Band, i))
374 #else
375 	      if (isl_band_member_is_zero_distance(Band, i))
376 #endif
377 		{
378 		  isl_map *TileMap;
379 		  isl_union_map *TileUMap;
380 
381 		  TileMap = getPrevectorMap(ctx, i, ScheduleDimensions, 4);
382 		  TileUMap = isl_union_map_from_map(TileMap);
383 		  TileUMap = isl_union_map_align_params(TileUMap,
384 							isl_space_copy(Space));
385 		  PartialSchedule = isl_union_map_apply_range(PartialSchedule,
386 							      TileUMap);
387 		  break;
388 		}
389 	    }
390 	}
391 
392       Schedule = isl_union_map_union(Schedule, PartialSchedule);
393 
394       isl_band_free(Band);
395       isl_space_free(Space);
396     }
397 
398   return Schedule;
399 }
400 
401 static isl_union_map *
getScheduleMap(isl_schedule * Schedule)402 getScheduleMap(isl_schedule *Schedule)
403 {
404   isl_band_list *BandList = isl_schedule_get_band_forest(Schedule);
405   isl_union_map *ScheduleMap = getScheduleForBandList(BandList);
406   isl_band_list_free(BandList);
407   return ScheduleMap;
408 }
409 
410 static int
getSingleMap(__isl_take isl_map * map,void * user)411 getSingleMap(__isl_take isl_map *map, void *user)
412 {
413   isl_map **singleMap = (isl_map **) user;
414   *singleMap = map;
415 
416   return 0;
417 }
418 
419 static void
apply_schedule_map_to_scop(scop_p scop,isl_union_map * schedule_map)420 apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map)
421 {
422   int i;
423   poly_bb_p pbb;
424 
425   FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
426     {
427       isl_set *domain = isl_set_copy (pbb->domain);
428       isl_union_map *stmtBand;
429       isl_map *stmtSchedule;
430 
431       stmtBand = isl_union_map_intersect_domain(isl_union_map_copy(schedule_map),
432 						isl_union_set_from_set(domain));
433       isl_union_map_foreach_map(stmtBand, getSingleMap, &stmtSchedule);
434       isl_map_free(pbb->transformed);
435       pbb->transformed = stmtSchedule;
436       isl_union_map_free(stmtBand);
437     }
438 }
439 
440 static const int CONSTANT_BOUND = 20;
441 
442 bool
optimize_isl(scop_p scop)443 optimize_isl (scop_p scop)
444 {
445 
446   isl_schedule *schedule;
447   isl_union_set *domain;
448   isl_union_map *validity, *proximity, *dependences;
449   isl_union_map *schedule_map;
450 
451   domain = scop_get_domains (scop);
452   dependences = scop_get_dependences (scop);
453   dependences = isl_union_map_gist_domain(dependences,
454 					  isl_union_set_copy(domain));
455   dependences = isl_union_map_gist_range(dependences,
456 					 isl_union_set_copy(domain));
457   validity = dependences;
458 
459   proximity = isl_union_map_copy (validity);
460 
461   isl_options_set_schedule_max_constant_term(scop->ctx, CONSTANT_BOUND);
462   isl_options_set_schedule_maximize_band_depth(scop->ctx, 1);
463   isl_options_set_schedule_fuse(scop->ctx, ISL_SCHEDULE_FUSE_MIN);
464   isl_options_set_on_error(scop->ctx, ISL_ON_ERROR_CONTINUE);
465   schedule = isl_union_set_compute_schedule (domain, validity, proximity);
466   isl_options_set_on_error(scop->ctx, ISL_ON_ERROR_ABORT);
467 
468   if (!schedule)
469     return false;
470 
471   schedule_map = getScheduleMap (schedule);
472 
473   apply_schedule_map_to_scop (scop, schedule_map);
474 
475   isl_schedule_free (schedule);
476   isl_union_map_free (schedule_map);
477 
478   return true;
479 }
480 
481 #endif
482