1 /* Loop flattening for Graphite. 2 Copyright (C) 2010 Free Software Foundation, Inc. 3 Contributed by Sebastian Pop <sebastian.pop@amd.com>. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "tree-flow.h" 25 #include "tree-dump.h" 26 #include "cfgloop.h" 27 #include "tree-chrec.h" 28 #include "tree-data-ref.h" 29 #include "tree-scalar-evolution.h" 30 #include "sese.h" 31 32 #ifdef HAVE_cloog 33 #include "ppl_c.h" 34 #include "graphite-ppl.h" 35 #include "graphite-poly.h" 36 37 /* The loop flattening pass transforms loop nests into a single loop, 38 removing the loop nesting structure. The auto-vectorization can 39 then apply on the full loop body, without needing the outer-loop 40 vectorization. 41 42 The loop flattening pass that has been described in a very Fortran 43 specific way in the 1992 paper by Reinhard von Hanxleden and Ken 44 Kennedy: "Relaxing SIMD Control Flow Constraints using Loop 45 Transformations" available from 46 http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.5033 47 48 The canonical example is as follows: suppose that we have a loop 49 nest with known iteration counts 50 51 | for (i = 1; i <= 6; i++) 52 | for (j = 1; j <= 6; j++) 53 | S1(i,j); 54 55 The loop flattening is performed by linearizing the iteration space 56 using the function "f (x) = 6 * i + j". In this case, CLooG would 57 produce this code: 58 59 | for (c1=7;c1<=42;c1++) { 60 | i = floord(c1-1,6); 61 | S1(i,c1-6*i); 62 | } 63 64 There are several limitations for loop flattening that are linked 65 to the expressivity of the polyhedral model. One has to take an 66 upper bound approximation to deal with the parametric case of loop 67 flattening. For example, in the loop nest: 68 69 | for (i = 1; i <= N; i++) 70 | for (j = 1; j <= M; j++) 71 | S1(i,j); 72 73 One would like to flatten this loop using a linearization function 74 like this "f (x) = M * i + j". However CLooG's schedules are not 75 expressive enough to deal with this case, and so the parameter M 76 has to be replaced by an integer upper bound approximation. If we 77 further know in the context of the scop that "M <= 6", then it is 78 possible to linearize the loop with "f (x) = 6 * i + j". In this 79 case, CLooG would produce this code: 80 81 | for (c1=7;c1<=6*M+N;c1++) { 82 | i = ceild(c1-N,6); 83 | if (i <= floord(c1-1,6)) { 84 | S1(i,c1-6*i); 85 | } 86 | } 87 88 For an arbitrarily complex loop nest the algorithm proceeds in two 89 steps. First, the LST is flattened by removing the loops structure 90 and by inserting the statements in the order they appear in 91 depth-first order. Then, the scattering of each statement is 92 transformed accordingly. 93 94 Supposing that the original program is represented by the following 95 LST: 96 97 | (loop_1 98 | stmt_1 99 | (loop_2 stmt_3 100 | (loop_3 stmt_4) 101 | (loop_4 stmt_5 stmt_6) 102 | stmt_7 103 | ) 104 | stmt_2 105 | ) 106 107 Loop flattening traverses the LST in depth-first order, and 108 flattens pairs of loops successively by projecting the inner loops 109 in the iteration domain of the outer loops: 110 111 lst_project_loop (loop_2, loop_3, stride) 112 113 | (loop_1 114 | stmt_1 115 | (loop_2 stmt_3 stmt_4 116 | (loop_4 stmt_5 stmt_6) 117 | stmt_7 118 | ) 119 | stmt_2 120 | ) 121 122 lst_project_loop (loop_2, loop_4, stride) 123 124 | (loop_1 125 | stmt_1 126 | (loop_2 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7) 127 | stmt_2 128 | ) 129 130 lst_project_loop (loop_1, loop_2, stride) 131 132 | (loop_1 133 | stmt_1 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7 stmt_2 134 | ) 135 136 At each step, the iteration domain of the outer loop is enlarged to 137 contain enough points to iterate over the inner loop domain. */ 138 139 /* Initializes RES to the number of iterations of the linearized loop 140 LST. RES is the cardinal of the iteration domain of LST. */ 141 142 static void 143 lst_linearized_niter (lst_p lst, mpz_t res) 144 { 145 int i; 146 lst_p l; 147 mpz_t n; 148 149 mpz_init (n); 150 mpz_set_si (res, 0); 151 152 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l) 153 if (LST_LOOP_P (l)) 154 { 155 lst_linearized_niter (l, n); 156 mpz_add (res, res, n); 157 } 158 159 if (LST_LOOP_P (lst)) 160 { 161 lst_niter_for_loop (lst, n); 162 163 if (mpz_cmp_si (res, 0) != 0) 164 mpz_mul (res, res, n); 165 else 166 mpz_set (res, n); 167 } 168 169 mpz_clear (n); 170 } 171 172 /* Applies the translation "f (x) = x + OFFSET" to the loop containing 173 STMT. */ 174 175 static void 176 lst_offset (lst_p stmt, mpz_t offset) 177 { 178 lst_p inner = LST_LOOP_FATHER (stmt); 179 poly_bb_p pbb = LST_PBB (stmt); 180 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb); 181 int inner_depth = lst_depth (inner); 182 ppl_dimension_type inner_dim = psct_dynamic_dim (pbb, inner_depth); 183 ppl_Linear_Expression_t expr; 184 ppl_dimension_type dim; 185 ppl_Coefficient_t one; 186 mpz_t x; 187 188 mpz_init (x); 189 mpz_set_si (x, 1); 190 ppl_new_Coefficient (&one); 191 ppl_assign_Coefficient_from_mpz_t (one, x); 192 193 ppl_Polyhedron_space_dimension (poly, &dim); 194 ppl_new_Linear_Expression_with_dimension (&expr, dim); 195 196 ppl_set_coef (expr, inner_dim, 1); 197 ppl_set_inhomogeneous_gmp (expr, offset); 198 ppl_Polyhedron_affine_image (poly, inner_dim, expr, one); 199 ppl_delete_Linear_Expression (expr); 200 ppl_delete_Coefficient (one); 201 } 202 203 /* Scale by FACTOR the loop LST containing STMT. */ 204 205 static void 206 lst_scale (lst_p lst, lst_p stmt, mpz_t factor) 207 { 208 mpz_t x; 209 ppl_Coefficient_t one; 210 int outer_depth = lst_depth (lst); 211 poly_bb_p pbb = LST_PBB (stmt); 212 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb); 213 ppl_dimension_type outer_dim = psct_dynamic_dim (pbb, outer_depth); 214 ppl_Linear_Expression_t expr; 215 ppl_dimension_type dim; 216 217 mpz_init (x); 218 mpz_set_si (x, 1); 219 ppl_new_Coefficient (&one); 220 ppl_assign_Coefficient_from_mpz_t (one, x); 221 222 ppl_Polyhedron_space_dimension (poly, &dim); 223 ppl_new_Linear_Expression_with_dimension (&expr, dim); 224 225 /* outer_dim = factor * outer_dim. */ 226 ppl_set_coef_gmp (expr, outer_dim, factor); 227 ppl_Polyhedron_affine_image (poly, outer_dim, expr, one); 228 ppl_delete_Linear_Expression (expr); 229 230 mpz_clear (x); 231 ppl_delete_Coefficient (one); 232 } 233 234 /* Project the INNER loop into the iteration domain of the OUTER loop. 235 STRIDE is the number of iterations between two iterations of the 236 outer loop. */ 237 238 static void 239 lst_project_loop (lst_p outer, lst_p inner, mpz_t stride) 240 { 241 int i; 242 lst_p stmt; 243 mpz_t x; 244 ppl_Coefficient_t one; 245 int outer_depth = lst_depth (outer); 246 int inner_depth = lst_depth (inner); 247 248 mpz_init (x); 249 mpz_set_si (x, 1); 250 ppl_new_Coefficient (&one); 251 ppl_assign_Coefficient_from_mpz_t (one, x); 252 253 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (inner), i, stmt) 254 { 255 poly_bb_p pbb = LST_PBB (stmt); 256 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb); 257 ppl_dimension_type outer_dim = psct_dynamic_dim (pbb, outer_depth); 258 ppl_dimension_type inner_dim = psct_dynamic_dim (pbb, inner_depth); 259 ppl_Linear_Expression_t expr; 260 ppl_dimension_type dim; 261 ppl_dimension_type *ds; 262 263 /* There should be no loops under INNER. */ 264 gcc_assert (!LST_LOOP_P (stmt)); 265 ppl_Polyhedron_space_dimension (poly, &dim); 266 ppl_new_Linear_Expression_with_dimension (&expr, dim); 267 268 /* outer_dim = outer_dim * stride + inner_dim. */ 269 ppl_set_coef (expr, inner_dim, 1); 270 ppl_set_coef_gmp (expr, outer_dim, stride); 271 ppl_Polyhedron_affine_image (poly, outer_dim, expr, one); 272 ppl_delete_Linear_Expression (expr); 273 274 /* Project on inner_dim. */ 275 ppl_new_Linear_Expression_with_dimension (&expr, dim - 1); 276 ppl_Polyhedron_affine_image (poly, inner_dim, expr, one); 277 ppl_delete_Linear_Expression (expr); 278 279 /* Remove inner loop and the static schedule of its body. */ 280 /* FIXME: As long as we use PPL we are not able to remove the old 281 scattering dimensions. The reason is that these dimensions are not 282 entirely unused. They are not necessary as part of the scheduling 283 vector, as the earlier dimensions already unambiguously define the 284 execution time, however they may still be needed to carry modulo 285 constraints as introduced e.g. by strip mining. The correct solution 286 would be to project these dimensions out of the scattering polyhedra. 287 In case they are still required to carry modulo constraints they should be kept 288 internally as existentially quantified dimensions. PPL does only support 289 projection of rational polyhedra, however in this case we need an integer 290 projection. With isl this will be trivial to implement. For now we just 291 leave the dimensions. This is a little ugly, but should be correct. */ 292 if (0) { 293 ds = XNEWVEC (ppl_dimension_type, 2); 294 ds[0] = inner_dim; 295 ds[1] = inner_dim + 1; 296 ppl_Polyhedron_remove_space_dimensions (poly, ds, 2); 297 PBB_NB_SCATTERING_TRANSFORM (pbb) -= 2; 298 free (ds); 299 } 300 } 301 302 mpz_clear (x); 303 ppl_delete_Coefficient (one); 304 } 305 306 /* Flattens the loop nest LST. Return true when something changed. 307 OFFSET is used to compute the number of iterations of the outermost 308 loop before the current LST is executed. */ 309 310 static bool 311 lst_flatten_loop (lst_p lst, mpz_t init_offset) 312 { 313 int i; 314 lst_p l; 315 bool res = false; 316 mpz_t n, one, offset, stride; 317 318 mpz_init (n); 319 mpz_init (one); 320 mpz_init (offset); 321 mpz_init (stride); 322 mpz_set (offset, init_offset); 323 mpz_set_si (one, 1); 324 325 lst_linearized_niter (lst, stride); 326 lst_niter_for_loop (lst, n); 327 mpz_tdiv_q (stride, stride, n); 328 329 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l) 330 if (LST_LOOP_P (l)) 331 { 332 res = true; 333 334 lst_flatten_loop (l, offset); 335 lst_niter_for_loop (l, n); 336 337 lst_project_loop (lst, l, stride); 338 339 /* The offset is the number of iterations minus 1, as we want 340 to execute the next statements at the same iteration as the 341 last iteration of the loop. */ 342 mpz_sub (n, n, one); 343 mpz_add (offset, offset, n); 344 } 345 else 346 { 347 lst_scale (lst, l, stride); 348 if (mpz_cmp_si (offset, 0) != 0) 349 lst_offset (l, offset); 350 } 351 352 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l) 353 if (LST_LOOP_P (l)) 354 lst_remove_loop_and_inline_stmts_in_loop_father (l); 355 356 mpz_clear (n); 357 mpz_clear (one); 358 mpz_clear (offset); 359 mpz_clear (stride); 360 return res; 361 } 362 363 /* Remove all but the first 3 dimensions of the scattering: 364 - dim0: the static schedule for the loop 365 - dim1: the dynamic schedule of the loop 366 - dim2: the static schedule for the loop body. */ 367 368 static void 369 remove_unused_scattering_dimensions (lst_p lst) 370 { 371 int i; 372 lst_p stmt; 373 mpz_t x; 374 ppl_Coefficient_t one; 375 376 mpz_init (x); 377 mpz_set_si (x, 1); 378 ppl_new_Coefficient (&one); 379 ppl_assign_Coefficient_from_mpz_t (one, x); 380 381 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, stmt) 382 { 383 poly_bb_p pbb = LST_PBB (stmt); 384 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb); 385 int j, nb_dims_to_remove = PBB_NB_SCATTERING_TRANSFORM (pbb) - 3; 386 ppl_dimension_type *ds; 387 388 /* There should be no loops inside LST after flattening. */ 389 gcc_assert (!LST_LOOP_P (stmt)); 390 391 if (!nb_dims_to_remove) 392 continue; 393 394 ds = XNEWVEC (ppl_dimension_type, nb_dims_to_remove); 395 for (j = 0; j < nb_dims_to_remove; j++) 396 ds[j] = j + 3; 397 398 ppl_Polyhedron_remove_space_dimensions (poly, ds, nb_dims_to_remove); 399 PBB_NB_SCATTERING_TRANSFORM (pbb) -= nb_dims_to_remove; 400 free (ds); 401 } 402 403 mpz_clear (x); 404 ppl_delete_Coefficient (one); 405 } 406 407 /* Flattens all the loop nests of LST. Return true when something 408 changed. */ 409 410 static bool 411 lst_do_flatten (lst_p lst) 412 { 413 int i; 414 lst_p l; 415 bool res = false; 416 mpz_t zero; 417 418 if (!lst 419 || !LST_LOOP_P (lst)) 420 return false; 421 422 mpz_init (zero); 423 mpz_set_si (zero, 0); 424 425 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l) 426 if (LST_LOOP_P (l)) 427 { 428 res |= lst_flatten_loop (l, zero); 429 430 /* FIXME: As long as we use PPL we are not able to remove the old 431 scattering dimensions. The reason is that these dimensions are not 432 entirely unused. They are not necessary as part of the scheduling 433 vector, as the earlier dimensions already unambiguously define the 434 execution time, however they may still be needed to carry modulo 435 constraints as introduced e.g. by strip mining. The correct solution 436 would be to project these dimensions out of the scattering polyhedra. 437 In case they are still required to carry modulo constraints they should be kept 438 internally as existentially quantified dimensions. PPL does only support 439 projection of rational polyhedra, however in this case we need an integer 440 projection. With isl this will be trivial to implement. For now we just 441 leave the dimensions. This is a little ugly, but should be correct. */ 442 if (0) 443 remove_unused_scattering_dimensions (l); 444 } 445 446 lst_update_scattering (lst); 447 mpz_clear (zero); 448 return res; 449 } 450 451 /* Flatten all the loop nests in SCOP. Returns true when something 452 changed. */ 453 454 bool 455 flatten_all_loops (scop_p scop) 456 { 457 return lst_do_flatten (SCOP_TRANSFORMED_SCHEDULE (scop)); 458 } 459 460 #endif 461