1 /* Heuristics and transform for loop blocking and strip mining on
2    polyhedral representation.
3 
4    Copyright (C) 2009-2014 Free Software Foundation, Inc.
5    Contributed by Sebastian Pop <sebastian.pop@amd.com> and
6    Pranav Garg  <pranav.garg2107@gmail.com>.
7 
8 This file is part of GCC.
9 
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14 
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 GNU General Public License for more details.
19 
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3.  If not see
22 <http://www.gnu.org/licenses/>.  */
23 
24 #include "config.h"
25 
26 #ifdef HAVE_cloog
27 #include <isl/set.h>
28 #include <isl/map.h>
29 #include <isl/union_map.h>
30 #include <isl/constraint.h>
31 #include <cloog/cloog.h>
32 #include <cloog/isl/domain.h>
33 #endif
34 
35 #include "system.h"
36 #include "coretypes.h"
37 #include "tree.h"
38 #include "basic-block.h"
39 #include "tree-ssa-alias.h"
40 #include "internal-fn.h"
41 #include "gimple-expr.h"
42 #include "is-a.h"
43 #include "gimple.h"
44 #include "gimple-iterator.h"
45 #include "tree-ssa-loop.h"
46 #include "dumpfile.h"
47 #include "cfgloop.h"
48 #include "tree-chrec.h"
49 #include "tree-data-ref.h"
50 #include "sese.h"
51 
52 #ifdef HAVE_cloog
53 #include "graphite-poly.h"
54 
55 
56 /* Strip mines with a factor STRIDE the scattering (time) dimension
57    around PBB at depth TIME_DEPTH.
58 
59    The following example comes from the wiki page:
60    http://gcc.gnu.org/wiki/Graphite/Strip_mine
61 
62    The strip mine of a loop with a tile of 64 can be obtained with a
63    scattering function as follows:
64 
65    $ cat ./albert_strip_mine.cloog
66    # language: C
67    c
68 
69    # parameter {n | n >= 0}
70    1 3
71    #  n  1
72    1  1  0
73    1
74    n
75 
76    1 # Number of statements:
77 
78    1
79    # {i | 0 <= i <= n}
80    2 4
81    #  i  n   1
82    1  1  0   0
83    1 -1  1   0
84 
85    0  0  0
86    1
87    i
88 
89    1 # Scattering functions
90 
91    3 6
92    #  NEW  OLD    i    n    1
93    1  -64    0    1    0    0
94    1   64    0   -1    0   63
95    0    0    1   -1    0    0
96 
97    1
98    NEW  OLD
99 
100    #the output of CLooG is like this:
101    #$ cloog ./albert_strip_mine.cloog
102    # for (NEW=0;NEW<=floord(n,64);NEW++) {
103    #   for (OLD=max(64*NEW,0);OLD<=min(64*NEW+63,n);OLD++) {
104    #     S1(i = OLD) ;
105    #   }
106    # }
107 */
108 
109 static void
pbb_strip_mine_time_depth(poly_bb_p pbb,int time_depth,int stride)110 pbb_strip_mine_time_depth (poly_bb_p pbb, int time_depth, int stride)
111 {
112   isl_space *d;
113   isl_constraint *c;
114   int iter, strip;
115   /* STRIP is the dimension that iterates with stride STRIDE.  */
116   /* ITER is the dimension that enumerates single iterations inside
117      one strip that has at most STRIDE iterations.  */
118   strip = time_depth;
119   iter = strip + 2;
120 
121   pbb->transformed = isl_map_insert_dims (pbb->transformed, isl_dim_out,
122 					  strip, 2);
123 
124   /* Lower bound of the striped loop.  */
125   d = isl_map_get_space (pbb->transformed);
126   c = isl_inequality_alloc (isl_local_space_from_space (d));
127   c = isl_constraint_set_coefficient_si (c, isl_dim_out, strip, -stride);
128   c = isl_constraint_set_coefficient_si (c, isl_dim_out, iter, 1);
129   pbb->transformed = isl_map_add_constraint (pbb->transformed, c);
130 
131   /* Upper bound of the striped loop.  */
132   d = isl_map_get_space (pbb->transformed);
133   c = isl_inequality_alloc (isl_local_space_from_space (d));
134   c = isl_constraint_set_coefficient_si (c, isl_dim_out, strip, stride);
135   c = isl_constraint_set_coefficient_si (c, isl_dim_out, iter, -1);
136   c = isl_constraint_set_constant_si (c, stride - 1);
137   pbb->transformed = isl_map_add_constraint (pbb->transformed, c);
138 
139   /* Static scheduling for ITER level.
140      This is mandatory to keep the 2d + 1 canonical scheduling format.  */
141   d = isl_map_get_space (pbb->transformed);
142   c = isl_equality_alloc (isl_local_space_from_space (d));
143   c = isl_constraint_set_coefficient_si (c, isl_dim_out, strip + 1, 1);
144   pbb->transformed = isl_map_add_constraint (pbb->transformed, c);
145 }
146 
147 /* Returns true when strip mining with STRIDE of the loop LST is
148    profitable.  */
149 
150 static bool
lst_strip_mine_profitable_p(lst_p lst,int stride)151 lst_strip_mine_profitable_p (lst_p lst, int stride)
152 {
153   mpz_t niter, strip_stride;
154   bool res;
155 
156   gcc_assert (LST_LOOP_P (lst));
157   mpz_init (strip_stride);
158   mpz_init (niter);
159 
160   mpz_set_si (strip_stride, stride);
161   lst_niter_for_loop (lst, niter);
162   res = (mpz_cmp (niter, strip_stride) > 0);
163 
164   mpz_clear (strip_stride);
165   mpz_clear (niter);
166   return res;
167 }
168 
169 /* Strip-mines all the loops of LST with STRIDE.  Return the number of
170    loops strip-mined.  */
171 
172 static int
lst_do_strip_mine_loop(lst_p lst,int depth,int stride)173 lst_do_strip_mine_loop (lst_p lst, int depth, int stride)
174 {
175   int i;
176   lst_p l;
177   poly_bb_p pbb;
178 
179   if (!lst)
180     return 0;
181 
182   if (LST_LOOP_P (lst))
183     {
184       int res = 0;
185 
186       FOR_EACH_VEC_ELT (LST_SEQ (lst), i, l)
187 	res += lst_do_strip_mine_loop (l, depth, stride);
188 
189       return res;
190     }
191 
192   pbb = LST_PBB (lst);
193   pbb_strip_mine_time_depth (pbb, psct_dynamic_dim (pbb, depth), stride);
194   return 1;
195 }
196 
197 /* Strip-mines all the loops of LST with STRIDE.  When STRIDE is zero,
198    read the stride from the PARAM_LOOP_BLOCK_TILE_SIZE.  Return the
199    number of strip-mined loops.
200 
201    Strip mining transforms a loop
202 
203    | for (i = 0; i < N; i++)
204    |   S (i);
205 
206    into the following loop nest:
207 
208    | for (k = 0; k < N; k += STRIDE)
209    |   for (j = 0; j < STRIDE; j++)
210    |     S (i = k + j);
211 */
212 
213 static int
lst_do_strip_mine(lst_p lst,int stride)214 lst_do_strip_mine (lst_p lst, int stride)
215 {
216   int i;
217   lst_p l;
218   int res = 0;
219   int depth;
220 
221   if (!stride)
222     stride = PARAM_VALUE (PARAM_LOOP_BLOCK_TILE_SIZE);
223 
224   if (!lst
225       || !LST_LOOP_P (lst))
226     return false;
227 
228   FOR_EACH_VEC_ELT (LST_SEQ (lst), i, l)
229     res += lst_do_strip_mine (l, stride);
230 
231   depth = lst_depth (lst);
232   if (depth >= 0
233       && lst_strip_mine_profitable_p (lst, stride))
234     {
235       res += lst_do_strip_mine_loop (lst, lst_depth (lst), stride);
236       lst_add_loop_under_loop (lst);
237     }
238 
239   return res;
240 }
241 
242 /* Strip mines all the loops in SCOP.  Returns the number of
243    strip-mined loops.  */
244 
245 int
scop_do_strip_mine(scop_p scop,int stride)246 scop_do_strip_mine (scop_p scop, int stride)
247 {
248   return lst_do_strip_mine (SCOP_TRANSFORMED_SCHEDULE (scop), stride);
249 }
250 
251 /* Loop blocks all the loops in SCOP.  Returns true when we manage to
252    block some loops.  */
253 
254 bool
scop_do_block(scop_p scop)255 scop_do_block (scop_p scop)
256 {
257   store_scattering (scop);
258 
259   /* If we don't strip mine at least two loops, or not interchange
260      loops, the strip mine alone will not be profitable, and the
261      transform is not a loop blocking: so revert the transform.  */
262   if (lst_do_strip_mine (SCOP_TRANSFORMED_SCHEDULE (scop), 0) < 2
263       || scop_do_interchange (scop) == 0)
264     {
265       restore_scattering (scop);
266       return false;
267     }
268 
269   if (dump_file && (dump_flags & TDF_DETAILS))
270     fprintf (dump_file, "SCoP will be loop blocked.\n");
271 
272   return true;
273 }
274 
275 #endif
276