1 /* A scheduling optimizer for Graphite
2    Copyright (C) 2012-2013 Free Software Foundation, Inc.
3    Contributed by Tobias Grosser <tobias@grosser.es>.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 
23 #ifdef HAVE_cloog
24 #include <isl/set.h>
25 #include <isl/map.h>
26 #include <isl/union_map.h>
27 #include <isl/schedule.h>
28 #include <isl/band.h>
29 #include <isl/aff.h>
30 #include <isl/options.h>
31 #endif
32 
33 #include "system.h"
34 #include "coretypes.h"
35 #include "tree-flow.h"
36 #include "dumpfile.h"
37 #include "cfgloop.h"
38 #include "tree-chrec.h"
39 #include "tree-data-ref.h"
40 #include "tree-scalar-evolution.h"
41 #include "sese.h"
42 
43 #ifdef HAVE_cloog
44 #include "graphite-poly.h"
45 
46 static isl_union_set *
scop_get_domains(scop_p scop ATTRIBUTE_UNUSED)47 scop_get_domains (scop_p scop ATTRIBUTE_UNUSED)
48 {
49   int i;
50   poly_bb_p pbb;
51   isl_space *space = isl_set_get_space (scop->context);
52   isl_union_set *res = isl_union_set_empty (space);
53 
54   FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
55     res = isl_union_set_add_set (res, isl_set_copy (pbb->domain));
56 
57   return res;
58 }
59 
60 static isl_union_map *
scop_get_dependences(scop_p scop)61 scop_get_dependences (scop_p scop)
62 {
63   isl_union_map *dependences;
64 
65   if (!scop->must_raw)
66     compute_deps (scop, SCOP_BBS (scop),
67 		  &scop->must_raw, &scop->may_raw,
68 		  &scop->must_raw_no_source, &scop->may_raw_no_source,
69 		  &scop->must_war, &scop->may_war,
70 		  &scop->must_war_no_source, &scop->may_war_no_source,
71 		  &scop->must_waw, &scop->may_waw,
72 		  &scop->must_waw_no_source, &scop->may_waw_no_source);
73 
74   dependences = isl_union_map_copy (scop->must_raw);
75   dependences = isl_union_map_union (dependences,
76 				     isl_union_map_copy (scop->must_war));
77   dependences = isl_union_map_union (dependences,
78 				     isl_union_map_copy (scop->must_waw));
79   dependences = isl_union_map_union (dependences,
80 				     isl_union_map_copy (scop->may_raw));
81   dependences = isl_union_map_union (dependences,
82 				     isl_union_map_copy (scop->may_war));
83   dependences = isl_union_map_union (dependences,
84 				     isl_union_map_copy (scop->may_waw));
85 
86   return dependences;
87 }
88 
89 /* getTileMap - Create a map that describes a n-dimensonal tiling.
90 
91    getTileMap creates a map from a n-dimensional scattering space into an
92    2*n-dimensional scattering space. The map describes a rectangular tiling.
93 
94    Example:
95      scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
96 
97     tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
98                          t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
99                          t1 % 32 = 0 and t1 <= s1 < t1 + 32}
100 
101    Before tiling:
102 
103    for (i = 0; i < N; i++)
104      for (j = 0; j < M; j++)
105  	S(i,j)
106 
107    After tiling:
108 
109    for (t_i = 0; t_i < N; i+=32)
110      for (t_j = 0; t_j < M; j+=32)
111  	for (i = t_i; i < min(t_i + 32, N); i++)  | Unknown that N % 32 = 0
112  	  for (j = t_j; j < t_j + 32; j++)        |   Known that M % 32 = 0
113  	    S(i,j)
114    */
115 
116 static isl_basic_map *
getTileMap(isl_ctx * ctx,int scheduleDimensions,int tileSize)117 getTileMap(isl_ctx *ctx, int scheduleDimensions, int tileSize)
118 {
119   int x;
120   /* We construct
121 
122      tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
123     	                s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
124     	                s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
125 
126      and project out the auxilary dimensions a0 and a1.  */
127   isl_space *Space = isl_space_alloc(ctx, 0, scheduleDimensions,
128 				     scheduleDimensions * 3);
129   isl_basic_map *tileMap = isl_basic_map_universe(isl_space_copy(Space));
130 
131   isl_local_space *LocalSpace = isl_local_space_from_space(Space);
132 
133   for (x = 0; x < scheduleDimensions; x++)
134     {
135       int sX = x;
136       int tX = x;
137       int pX = scheduleDimensions + x;
138       int aX = 2 * scheduleDimensions + x;
139 
140       isl_constraint *c;
141 
142       /* sX = aX * tileSize; */
143       c = isl_equality_alloc(isl_local_space_copy(LocalSpace));
144       isl_constraint_set_coefficient_si(c, isl_dim_out, sX, 1);
145       isl_constraint_set_coefficient_si(c, isl_dim_out, aX, -tileSize);
146       tileMap = isl_basic_map_add_constraint(tileMap, c);
147 
148       /* pX = sX; */
149       c = isl_equality_alloc(isl_local_space_copy(LocalSpace));
150       isl_constraint_set_coefficient_si(c, isl_dim_out, pX, 1);
151       isl_constraint_set_coefficient_si(c, isl_dim_in, sX, -1);
152       tileMap = isl_basic_map_add_constraint(tileMap, c);
153 
154       /* tX <= pX */
155       c = isl_inequality_alloc(isl_local_space_copy(LocalSpace));
156       isl_constraint_set_coefficient_si(c, isl_dim_out, pX, 1);
157       isl_constraint_set_coefficient_si(c, isl_dim_out, tX, -1);
158       tileMap = isl_basic_map_add_constraint(tileMap, c);
159 
160       /* pX <= tX + (tileSize - 1) */
161       c = isl_inequality_alloc(isl_local_space_copy(LocalSpace));
162       isl_constraint_set_coefficient_si(c, isl_dim_out, tX, 1);
163       isl_constraint_set_coefficient_si(c, isl_dim_out, pX, -1);
164       isl_constraint_set_constant_si(c, tileSize - 1);
165       tileMap = isl_basic_map_add_constraint(tileMap, c);
166     }
167 
168   /* Project out auxilary dimensions.
169 
170      The auxilary dimensions are transformed into existentially quantified ones.
171      This reduces the number of visible scattering dimensions and allows Cloog
172      to produces better code.  */
173   tileMap = isl_basic_map_project_out(tileMap, isl_dim_out,
174 				      2 * scheduleDimensions,
175 				      scheduleDimensions);
176   isl_local_space_free(LocalSpace);
177   return tileMap;
178 }
179 
180 /* getScheduleForBand - Get the schedule for this band.
181 
182    Polly applies transformations like tiling on top of the isl calculated value.
183    This can influence the number of scheduling dimension. The number of
184    schedule dimensions is returned in the parameter 'Dimension'.  */
185 static bool DisableTiling = false;
186 
187 static isl_union_map *
getScheduleForBand(isl_band * Band,int * Dimensions)188 getScheduleForBand(isl_band *Band, int *Dimensions)
189 {
190   isl_union_map *PartialSchedule;
191   isl_ctx *ctx;
192   isl_space *Space;
193   isl_basic_map *TileMap;
194   isl_union_map *TileUMap;
195 
196   PartialSchedule = isl_band_get_partial_schedule(Band);
197   *Dimensions = isl_band_n_member(Band);
198 
199   if (DisableTiling)
200     return PartialSchedule;
201 
202   /* It does not make any sense to tile a band with just one dimension.  */
203   if (*Dimensions == 1)
204     return PartialSchedule;
205 
206   ctx = isl_union_map_get_ctx(PartialSchedule);
207   Space = isl_union_map_get_space(PartialSchedule);
208 
209   TileMap = getTileMap(ctx, *Dimensions, 32);
210   TileUMap = isl_union_map_from_map(isl_map_from_basic_map(TileMap));
211   TileUMap = isl_union_map_align_params(TileUMap, Space);
212   *Dimensions = 2 * *Dimensions;
213 
214   return isl_union_map_apply_range(PartialSchedule, TileUMap);
215 }
216 
217 /* Create a map that pre-vectorizes one scheduling dimension.
218 
219    getPrevectorMap creates a map that maps each input dimension to the same
220    output dimension, except for the dimension DimToVectorize. DimToVectorize is
221    strip mined by 'VectorWidth' and the newly created point loop of
222    DimToVectorize is moved to the innermost level.
223 
224    Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
225 
226    | Before transformation
227    |
228    | A[i,j] -> [i,j]
229    |
230    | for (i = 0; i < 128; i++)
231    |    for (j = 0; j < 128; j++)
232    |      A(i,j);
233 
234      Prevector map:
235      [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
236 
237    | After transformation:
238    |
239    | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
240    |
241    | for (it = 0; it < 128; it+=4)
242    |    for (j = 0; j < 128; j++)
243    |      for (ip = max(0,it); ip < min(128, it + 3); ip++)
244    |        A(ip,j);
245 
246    The goal of this transformation is to create a trivially vectorizable loop.
247    This means a parallel loop at the innermost level that has a constant number
248    of iterations corresponding to the target vector width.
249 
250    This transformation creates a loop at the innermost level. The loop has a
251    constant number of iterations, if the number of loop iterations at
252    DimToVectorize can be devided by VectorWidth. The default VectorWidth is
253    currently constant and not yet target specific. This function does not reason
254    about parallelism.  */
255 static isl_map *
getPrevectorMap(isl_ctx * ctx,int DimToVectorize,int ScheduleDimensions,int VectorWidth)256 getPrevectorMap(isl_ctx *ctx, int DimToVectorize,
257 		int ScheduleDimensions,
258 		int VectorWidth)
259 {
260   isl_space *Space;
261   isl_local_space *LocalSpace, *LocalSpaceRange;
262   isl_set *Modulo;
263   isl_map *TilingMap;
264   isl_constraint *c;
265   isl_aff *Aff;
266   int PointDimension; /* ip */
267   int TileDimension;  /* it */
268   isl_int VectorWidthMP;
269   int i;
270 
271   /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
272 
273   Space = isl_space_alloc(ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
274   TilingMap = isl_map_universe(isl_space_copy(Space));
275   LocalSpace = isl_local_space_from_space(Space);
276   PointDimension = ScheduleDimensions;
277   TileDimension = DimToVectorize;
278 
279   /* Create an identity map for everything except DimToVectorize and map
280      DimToVectorize to the point loop at the innermost dimension.  */
281   for (i = 0; i < ScheduleDimensions; i++)
282     {
283       c = isl_equality_alloc(isl_local_space_copy(LocalSpace));
284       isl_constraint_set_coefficient_si(c, isl_dim_in, i, -1);
285 
286       if (i == DimToVectorize)
287 	isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, 1);
288       else
289 	isl_constraint_set_coefficient_si(c, isl_dim_out, i, 1);
290 
291       TilingMap = isl_map_add_constraint(TilingMap, c);
292     }
293 
294   /* it % 'VectorWidth' = 0  */
295   LocalSpaceRange = isl_local_space_range(isl_local_space_copy(LocalSpace));
296   Aff = isl_aff_zero_on_domain(LocalSpaceRange);
297   Aff = isl_aff_set_constant_si(Aff, VectorWidth);
298   Aff = isl_aff_set_coefficient_si(Aff, isl_dim_in, TileDimension, 1);
299   isl_int_init(VectorWidthMP);
300   isl_int_set_si(VectorWidthMP, VectorWidth);
301   Aff = isl_aff_mod(Aff, VectorWidthMP);
302   isl_int_clear(VectorWidthMP);
303   Modulo = isl_pw_aff_zero_set(isl_pw_aff_from_aff(Aff));
304   TilingMap = isl_map_intersect_range(TilingMap, Modulo);
305 
306   /* it <= ip */
307   c = isl_inequality_alloc(isl_local_space_copy(LocalSpace));
308   isl_constraint_set_coefficient_si(c, isl_dim_out, TileDimension, -1);
309   isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, 1);
310   TilingMap = isl_map_add_constraint(TilingMap, c);
311 
312   /* ip <= it + ('VectorWidth' - 1) */
313   c = isl_inequality_alloc(LocalSpace);
314   isl_constraint_set_coefficient_si(c, isl_dim_out, TileDimension, 1);
315   isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, -1);
316   isl_constraint_set_constant_si(c, VectorWidth - 1);
317   TilingMap = isl_map_add_constraint(TilingMap, c);
318 
319   isl_map_dump(TilingMap);
320 
321   return TilingMap;
322 }
323 
324 static bool EnablePollyVector = false;
325 
326 /* getScheduleForBandList - Get the scheduling map for a list of bands.
327 
328    We walk recursively the forest of bands to combine the schedules of the
329    individual bands to the overall schedule. In case tiling is requested,
330    the individual bands are tiled.  */
331 static isl_union_map *
getScheduleForBandList(isl_band_list * BandList)332 getScheduleForBandList(isl_band_list *BandList)
333 {
334   int NumBands, i;
335   isl_union_map *Schedule;
336   isl_ctx *ctx;
337 
338   ctx = isl_band_list_get_ctx(BandList);
339   NumBands = isl_band_list_n_band(BandList);
340   Schedule = isl_union_map_empty(isl_space_params_alloc(ctx, 0));
341 
342   for (i = 0; i < NumBands; i++)
343     {
344       isl_band *Band;
345       isl_union_map *PartialSchedule;
346       int ScheduleDimensions;
347       isl_space *Space;
348 
349       Band = isl_band_list_get_band(BandList, i);
350       PartialSchedule = getScheduleForBand(Band, &ScheduleDimensions);
351       Space = isl_union_map_get_space(PartialSchedule);
352 
353       if (isl_band_has_children(Band))
354 	{
355 	  isl_band_list *Children;
356 	  isl_union_map *SuffixSchedule;
357 
358 	  Children = isl_band_get_children(Band);
359 	  SuffixSchedule = getScheduleForBandList(Children);
360 	  PartialSchedule = isl_union_map_flat_range_product(PartialSchedule,
361 							     SuffixSchedule);
362 	  isl_band_list_free(Children);
363 	}
364       else if (EnablePollyVector)
365 	{
366 	  for (i = ScheduleDimensions - 1 ;  i >= 0 ; i--)
367 	    {
368 	      if (isl_band_member_is_zero_distance(Band, i))
369 		{
370 		  isl_map *TileMap;
371 		  isl_union_map *TileUMap;
372 
373 		  TileMap = getPrevectorMap(ctx, i, ScheduleDimensions, 4);
374 		  TileUMap = isl_union_map_from_map(TileMap);
375 		  TileUMap = isl_union_map_align_params(TileUMap,
376 							isl_space_copy(Space));
377 		  PartialSchedule = isl_union_map_apply_range(PartialSchedule,
378 							      TileUMap);
379 		  break;
380 		}
381 	    }
382 	}
383 
384       Schedule = isl_union_map_union(Schedule, PartialSchedule);
385 
386       isl_band_free(Band);
387       isl_space_free(Space);
388     }
389 
390   return Schedule;
391 }
392 
393 static isl_union_map *
getScheduleMap(isl_schedule * Schedule)394 getScheduleMap(isl_schedule *Schedule)
395 {
396   isl_band_list *BandList = isl_schedule_get_band_forest(Schedule);
397   isl_union_map *ScheduleMap = getScheduleForBandList(BandList);
398   isl_band_list_free(BandList);
399   return ScheduleMap;
400 }
401 
402 static int
getSingleMap(__isl_take isl_map * map,void * user)403 getSingleMap(__isl_take isl_map *map, void *user)
404 {
405   isl_map **singleMap = (isl_map **) user;
406   *singleMap = map;
407 
408   return 0;
409 }
410 
411 static void
apply_schedule_map_to_scop(scop_p scop,isl_union_map * schedule_map)412 apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map)
413 {
414   int i;
415   poly_bb_p pbb;
416 
417   FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
418     {
419       isl_set *domain = isl_set_copy (pbb->domain);
420       isl_union_map *stmtBand;
421       isl_map *stmtSchedule;
422 
423       stmtBand = isl_union_map_intersect_domain(isl_union_map_copy(schedule_map),
424 						isl_union_set_from_set(domain));
425       isl_union_map_foreach_map(stmtBand, getSingleMap, &stmtSchedule);
426       isl_map_free(pbb->transformed);
427       pbb->transformed = stmtSchedule;
428       isl_union_map_free(stmtBand);
429     }
430 }
431 
432 static const int CONSTANT_BOUND = 20;
433 
434 bool
optimize_isl(scop_p scop)435 optimize_isl (scop_p scop)
436 {
437 
438   isl_schedule *schedule;
439   isl_union_set *domain;
440   isl_union_map *validity, *proximity, *dependences;
441   isl_union_map *schedule_map;
442 
443   domain = scop_get_domains (scop);
444   dependences = scop_get_dependences (scop);
445   dependences = isl_union_map_gist_domain(dependences,
446 					  isl_union_set_copy(domain));
447   dependences = isl_union_map_gist_range(dependences,
448 					 isl_union_set_copy(domain));
449   validity = dependences;
450 
451   proximity = isl_union_map_copy (validity);
452 
453   isl_options_set_schedule_max_constant_term(scop->ctx, CONSTANT_BOUND);
454   isl_options_set_schedule_maximize_band_depth(scop->ctx, 1);
455   isl_options_set_schedule_fuse(scop->ctx, ISL_SCHEDULE_FUSE_MIN);
456   isl_options_set_on_error(scop->ctx, ISL_ON_ERROR_CONTINUE);
457   schedule = isl_union_set_compute_schedule (domain, validity, proximity);
458   isl_options_set_on_error(scop->ctx, ISL_ON_ERROR_ABORT);
459 
460   if (!schedule)
461     return false;
462 
463   schedule_map = getScheduleMap (schedule);
464 
465   apply_schedule_map_to_scop (scop, schedule_map);
466 
467   isl_schedule_free (schedule);
468   isl_union_map_free (schedule_map);
469 
470   return true;
471 }
472 
473 #endif
474