1 /* Loop interchange. 2 Copyright (C) 2017-2018 Free Software Foundation, Inc. 3 Contributed by ARM Ltd. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by the 9 Free Software Foundation; either version 3, or (at your option) any 10 later version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT 13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #include "config.h" 22 #include "system.h" 23 #include "coretypes.h" 24 #include "backend.h" 25 #include "is-a.h" 26 #include "tree.h" 27 #include "gimple.h" 28 #include "tree-pass.h" 29 #include "ssa.h" 30 #include "gimple-pretty-print.h" 31 #include "fold-const.h" 32 #include "gimplify.h" 33 #include "gimple-iterator.h" 34 #include "gimplify-me.h" 35 #include "cfgloop.h" 36 #include "params.h" 37 #include "tree-ssa.h" 38 #include "tree-scalar-evolution.h" 39 #include "tree-ssa-loop-manip.h" 40 #include "tree-ssa-loop-niter.h" 41 #include "tree-ssa-loop-ivopts.h" 42 #include "tree-ssa-dce.h" 43 #include "tree-data-ref.h" 44 #include "tree-vectorizer.h" 45 46 /* This pass performs loop interchange: for example, the loop nest 47 48 for (int j = 0; j < N; j++) 49 for (int k = 0; k < N; k++) 50 for (int i = 0; i < N; i++) 51 c[i][j] = c[i][j] + a[i][k]*b[k][j]; 52 53 is transformed to 54 55 for (int i = 0; i < N; i++) 56 for (int j = 0; j < N; j++) 57 for (int k = 0; k < N; k++) 58 c[i][j] = c[i][j] + a[i][k]*b[k][j]; 59 60 This pass implements loop interchange in the following steps: 61 62 1) Find perfect loop nest for each innermost loop and compute data 63 dependence relations for it. For above example, loop nest is 64 <loop_j, loop_k, loop_i>. 65 2) From innermost to outermost loop, this pass tries to interchange 66 each loop pair. For above case, it firstly tries to interchange 67 <loop_k, loop_i> and loop nest becomes <loop_j, loop_i, loop_k>. 68 Then it tries to interchange <loop_j, loop_i> and loop nest becomes 69 <loop_i, loop_j, loop_k>. The overall effect is to move innermost 70 loop to the outermost position. For loop pair <loop_i, loop_j> 71 to be interchanged, we: 72 3) Check if data dependence relations are valid for loop interchange. 73 4) Check if both loops can be interchanged in terms of transformation. 74 5) Check if interchanging the two loops is profitable. 75 6) Interchange the two loops by mapping induction variables. 76 77 This pass also handles reductions in loop nest. So far we only support 78 simple reduction of inner loop and double reduction of the loop nest. */ 79 80 /* Maximum number of stmts in each loop that should be interchanged. */ 81 #define MAX_NUM_STMT (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS)) 82 /* Maximum number of data references in loop nest. */ 83 #define MAX_DATAREFS (PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS)) 84 85 /* Comparison ratio of access stride between inner/outer loops to be 86 interchanged. This is the minimum stride ratio for loop interchange 87 to be profitable. */ 88 #define OUTER_STRIDE_RATIO (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_STRIDE_RATIO)) 89 /* The same as above, but we require higher ratio for interchanging the 90 innermost two loops. */ 91 #define INNER_STRIDE_RATIO ((OUTER_STRIDE_RATIO) + 1) 92 93 /* Comparison ratio of stmt cost between inner/outer loops. Loops won't 94 be interchanged if outer loop has too many stmts. */ 95 #define STMT_COST_RATIO (3) 96 97 /* Vector of strides that DR accesses in each level loop of a loop nest. */ 98 #define DR_ACCESS_STRIDE(dr) ((vec<tree> *) dr->aux) 99 100 /* Structure recording loop induction variable. */ 101 typedef struct induction 102 { 103 /* IV itself. */ 104 tree var; 105 /* IV's initializing value, which is the init arg of the IV PHI node. */ 106 tree init_val; 107 /* IV's initializing expr, which is (the expanded result of) init_val. */ 108 tree init_expr; 109 /* IV's step. */ 110 tree step; 111 } *induction_p; 112 113 /* Enum type for loop reduction variable. */ 114 enum reduction_type 115 { 116 UNKNOWN_RTYPE = 0, 117 SIMPLE_RTYPE, 118 DOUBLE_RTYPE 119 }; 120 121 /* Structure recording loop reduction variable. */ 122 typedef struct reduction 123 { 124 /* Reduction itself. */ 125 tree var; 126 /* PHI node defining reduction variable. */ 127 gphi *phi; 128 /* Init and next variables of the reduction. */ 129 tree init; 130 tree next; 131 /* Lcssa PHI node if reduction is used outside of its definition loop. */ 132 gphi *lcssa_phi; 133 /* Stmts defining init and next. */ 134 gimple *producer; 135 gimple *consumer; 136 /* If init is loaded from memory, this is the loading memory reference. */ 137 tree init_ref; 138 /* If reduction is finally stored to memory, this is the stored memory 139 reference. */ 140 tree fini_ref; 141 enum reduction_type type; 142 } *reduction_p; 143 144 145 /* Dump reduction RE. */ 146 147 static void 148 dump_reduction (reduction_p re) 149 { 150 if (re->type == SIMPLE_RTYPE) 151 fprintf (dump_file, " Simple reduction: "); 152 else if (re->type == DOUBLE_RTYPE) 153 fprintf (dump_file, " Double reduction: "); 154 else 155 fprintf (dump_file, " Unknown reduction: "); 156 157 print_gimple_stmt (dump_file, re->phi, 0); 158 } 159 160 /* Dump LOOP's induction IV. */ 161 static void 162 dump_induction (struct loop *loop, induction_p iv) 163 { 164 fprintf (dump_file, " Induction: "); 165 print_generic_expr (dump_file, iv->var, TDF_SLIM); 166 fprintf (dump_file, " = {"); 167 print_generic_expr (dump_file, iv->init_expr, TDF_SLIM); 168 fprintf (dump_file, ", "); 169 print_generic_expr (dump_file, iv->step, TDF_SLIM); 170 fprintf (dump_file, "}_%d\n", loop->num); 171 } 172 173 /* Loop candidate for interchange. */ 174 175 struct loop_cand 176 { 177 loop_cand (struct loop *, struct loop *); 178 ~loop_cand (); 179 180 reduction_p find_reduction_by_stmt (gimple *); 181 void classify_simple_reduction (reduction_p); 182 bool analyze_iloop_reduction_var (tree); 183 bool analyze_oloop_reduction_var (loop_cand *, tree); 184 bool analyze_induction_var (tree, tree); 185 bool analyze_carried_vars (loop_cand *); 186 bool analyze_lcssa_phis (void); 187 bool can_interchange_p (loop_cand *); 188 void undo_simple_reduction (reduction_p, bitmap); 189 190 /* The loop itself. */ 191 struct loop *m_loop; 192 /* The outer loop for interchange. It equals to loop if this loop cand 193 itself represents the outer loop. */ 194 struct loop *m_outer; 195 /* Vector of induction variables in loop. */ 196 vec<induction_p> m_inductions; 197 /* Vector of reduction variables in loop. */ 198 vec<reduction_p> m_reductions; 199 /* Lcssa PHI nodes of this loop. */ 200 vec<gphi *> m_lcssa_nodes; 201 /* Single exit edge of this loop. */ 202 edge m_exit; 203 /* Basic blocks of this loop. */ 204 basic_block *m_bbs; 205 /* Number of stmts of this loop. Inner loops' stmts are not included. */ 206 int m_num_stmts; 207 /* Number of constant initialized simple reduction. */ 208 int m_const_init_reduc; 209 }; 210 211 /* Constructor. */ 212 213 loop_cand::loop_cand (struct loop *loop, struct loop *outer) 214 : m_loop (loop), m_outer (outer), m_exit (single_exit (loop)), 215 m_bbs (get_loop_body (loop)), m_num_stmts (0), m_const_init_reduc (0) 216 { 217 m_inductions.create (3); 218 m_reductions.create (3); 219 m_lcssa_nodes.create (3); 220 } 221 222 /* Destructor. */ 223 224 loop_cand::~loop_cand () 225 { 226 induction_p iv; 227 for (unsigned i = 0; m_inductions.iterate (i, &iv); ++i) 228 free (iv); 229 230 reduction_p re; 231 for (unsigned i = 0; m_reductions.iterate (i, &re); ++i) 232 free (re); 233 234 m_inductions.release (); 235 m_reductions.release (); 236 m_lcssa_nodes.release (); 237 free (m_bbs); 238 } 239 240 /* Return single use stmt of VAR in LOOP, otherwise return NULL. */ 241 242 static gimple * 243 single_use_in_loop (tree var, struct loop *loop) 244 { 245 gimple *stmt, *res = NULL; 246 use_operand_p use_p; 247 imm_use_iterator iterator; 248 249 FOR_EACH_IMM_USE_FAST (use_p, iterator, var) 250 { 251 stmt = USE_STMT (use_p); 252 if (is_gimple_debug (stmt)) 253 continue; 254 255 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt))) 256 continue; 257 258 if (res) 259 return NULL; 260 261 res = stmt; 262 } 263 return res; 264 } 265 266 /* Return true if E is unsupported in loop interchange, i.e, E is a complex 267 edge or part of irreducible loop. */ 268 269 static inline bool 270 unsupported_edge (edge e) 271 { 272 return (e->flags & (EDGE_COMPLEX | EDGE_IRREDUCIBLE_LOOP)); 273 } 274 275 /* Return the reduction if STMT is one of its lcssa PHI, producer or consumer 276 stmt. */ 277 278 reduction_p 279 loop_cand::find_reduction_by_stmt (gimple *stmt) 280 { 281 gphi *phi = dyn_cast <gphi *> (stmt); 282 reduction_p re; 283 284 for (unsigned i = 0; m_reductions.iterate (i, &re); ++i) 285 if ((phi != NULL && phi == re->lcssa_phi) 286 || (stmt == re->producer || stmt == re->consumer)) 287 return re; 288 289 return NULL; 290 } 291 292 /* Return true if current loop_cand be interchanged. ILOOP is not NULL if 293 current loop_cand is outer loop in loop nest. */ 294 295 bool 296 loop_cand::can_interchange_p (loop_cand *iloop) 297 { 298 /* For now we only support at most one reduction. */ 299 unsigned allowed_reduction_num = 1; 300 301 /* Only support reduction if the loop nest to be interchanged is the 302 innermostin two loops. */ 303 if ((iloop == NULL && m_loop->inner != NULL) 304 || (iloop != NULL && iloop->m_loop->inner != NULL)) 305 allowed_reduction_num = 0; 306 307 if (m_reductions.length () > allowed_reduction_num 308 || (m_reductions.length () == 1 309 && m_reductions[0]->type == UNKNOWN_RTYPE)) 310 return false; 311 312 /* Only support lcssa PHI node which is for reduction. */ 313 if (m_lcssa_nodes.length () > allowed_reduction_num) 314 return false; 315 316 /* Check if basic block has any unsupported operation. Note basic blocks 317 of inner loops are not checked here. */ 318 for (unsigned i = 0; i < m_loop->num_nodes; i++) 319 { 320 basic_block bb = m_bbs[i]; 321 gphi_iterator psi; 322 gimple_stmt_iterator gsi; 323 324 /* Skip basic blocks of inner loops. */ 325 if (bb->loop_father != m_loop) 326 continue; 327 328 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 329 { 330 gimple *stmt = gsi_stmt (gsi); 331 if (is_gimple_debug (stmt)) 332 continue; 333 334 if (gimple_has_side_effects (stmt)) 335 return false; 336 337 m_num_stmts++; 338 if (gcall *call = dyn_cast <gcall *> (stmt)) 339 { 340 /* In basic block of outer loop, the call should be cheap since 341 it will be moved to inner loop. */ 342 if (iloop != NULL 343 && !gimple_inexpensive_call_p (call)) 344 return false; 345 continue; 346 } 347 348 if (!iloop || !gimple_vuse (stmt)) 349 continue; 350 351 /* Support stmt accessing memory in outer loop only if it is for 352 inner loop's reduction. */ 353 if (iloop->find_reduction_by_stmt (stmt)) 354 continue; 355 356 tree lhs; 357 /* Support loop invariant memory reference if it's only used once by 358 inner loop. */ 359 /* ??? How's this checking for invariantness? */ 360 if (gimple_assign_single_p (stmt) 361 && (lhs = gimple_assign_lhs (stmt)) != NULL_TREE 362 && TREE_CODE (lhs) == SSA_NAME 363 && single_use_in_loop (lhs, iloop->m_loop)) 364 continue; 365 366 return false; 367 } 368 /* Check if loop has too many stmts. */ 369 if (m_num_stmts > MAX_NUM_STMT) 370 return false; 371 372 /* Allow PHI nodes in any basic block of inner loop, PHI nodes in outer 373 loop's header, or PHI nodes in dest bb of inner loop's exit edge. */ 374 if (!iloop || bb == m_loop->header 375 || bb == iloop->m_exit->dest) 376 continue; 377 378 /* Don't allow any other PHI nodes. */ 379 for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi)) 380 if (!virtual_operand_p (PHI_RESULT (psi.phi ()))) 381 return false; 382 } 383 384 return true; 385 } 386 387 /* Programmers and optimizers (like loop store motion) may optimize code: 388 389 for (int i = 0; i < N; i++) 390 for (int j = 0; j < N; j++) 391 a[i] += b[j][i] * c[j][i]; 392 393 into reduction: 394 395 for (int i = 0; i < N; i++) 396 { 397 // producer. Note sum can be intitialized to a constant. 398 int sum = a[i]; 399 for (int j = 0; j < N; j++) 400 { 401 sum += b[j][i] * c[j][i]; 402 } 403 // consumer. 404 a[i] = sum; 405 } 406 407 The result code can't be interchanged without undoing the optimization. 408 This function classifies this kind reduction and records information so 409 that we can undo the store motion during interchange. */ 410 411 void 412 loop_cand::classify_simple_reduction (reduction_p re) 413 { 414 gimple *producer, *consumer; 415 416 /* Check init variable of reduction and how it is initialized. */ 417 if (TREE_CODE (re->init) == SSA_NAME) 418 { 419 producer = SSA_NAME_DEF_STMT (re->init); 420 re->producer = producer; 421 basic_block bb = gimple_bb (producer); 422 if (!bb || bb->loop_father != m_outer) 423 return; 424 425 if (!gimple_assign_load_p (producer)) 426 return; 427 428 re->init_ref = gimple_assign_rhs1 (producer); 429 } 430 else if (CONSTANT_CLASS_P (re->init)) 431 m_const_init_reduc++; 432 else 433 return; 434 435 /* Check how reduction variable is used. */ 436 consumer = single_use_in_loop (PHI_RESULT (re->lcssa_phi), m_outer); 437 if (!consumer 438 || !gimple_store_p (consumer)) 439 return; 440 441 re->fini_ref = gimple_get_lhs (consumer); 442 re->consumer = consumer; 443 444 /* Simple reduction with constant initializer. */ 445 if (!re->init_ref) 446 { 447 gcc_assert (CONSTANT_CLASS_P (re->init)); 448 re->init_ref = unshare_expr (re->fini_ref); 449 } 450 451 /* Require memory references in producer and consumer are the same so 452 that we can undo reduction during interchange. */ 453 if (re->init_ref && !operand_equal_p (re->init_ref, re->fini_ref, 0)) 454 return; 455 456 re->type = SIMPLE_RTYPE; 457 } 458 459 /* Analyze reduction variable VAR for inner loop of the loop nest to be 460 interchanged. Return true if analysis succeeds. */ 461 462 bool 463 loop_cand::analyze_iloop_reduction_var (tree var) 464 { 465 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var)); 466 gphi *lcssa_phi = NULL, *use_phi; 467 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop)); 468 tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop)); 469 reduction_p re; 470 gimple *stmt, *next_def, *single_use = NULL; 471 use_operand_p use_p; 472 imm_use_iterator iterator; 473 474 if (TREE_CODE (next) != SSA_NAME) 475 return false; 476 477 next_def = SSA_NAME_DEF_STMT (next); 478 basic_block bb = gimple_bb (next_def); 479 if (!bb || !flow_bb_inside_loop_p (m_loop, bb)) 480 return false; 481 482 /* In restricted reduction, the var is (and must be) used in defining 483 the updated var. The process can be depicted as below: 484 485 var ;; = PHI<init, next> 486 | 487 | 488 v 489 +---------------------+ 490 | reduction operators | <-- other operands 491 +---------------------+ 492 | 493 | 494 v 495 next 496 497 In terms loop interchange, we don't change how NEXT is computed based 498 on VAR and OTHER OPERANDS. In case of double reduction in loop nest 499 to be interchanged, we don't changed it at all. In the case of simple 500 reduction in inner loop, we only make change how VAR/NEXT is loaded or 501 stored. With these conditions, we can relax restrictions on reduction 502 in a way that reduction operation is seen as black box. In general, 503 we can ignore reassociation of reduction operator; we can handle fake 504 reductions in which VAR is not even used to compute NEXT. */ 505 if (! single_imm_use (var, &use_p, &single_use) 506 || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use))) 507 return false; 508 509 /* Check the reduction operation. We require a left-associative operation. 510 For FP math we also need to be allowed to associate operations. */ 511 if (gassign *ass = dyn_cast <gassign *> (single_use)) 512 { 513 enum tree_code code = gimple_assign_rhs_code (ass); 514 if (! (associative_tree_code (code) 515 || (code == MINUS_EXPR 516 && use_p->use == gimple_assign_rhs1_ptr (ass))) 517 || (FLOAT_TYPE_P (TREE_TYPE (var)) 518 && ! flag_associative_math)) 519 return false; 520 } 521 else 522 return false; 523 524 /* Handle and verify a series of stmts feeding the reduction op. */ 525 if (single_use != next_def 526 && !check_reduction_path (UNKNOWN_LOCATION, m_loop, phi, next, 527 gimple_assign_rhs_code (single_use))) 528 return false; 529 530 /* Only support cases in which INIT is used in inner loop. */ 531 if (TREE_CODE (init) == SSA_NAME) 532 FOR_EACH_IMM_USE_FAST (use_p, iterator, init) 533 { 534 stmt = USE_STMT (use_p); 535 if (is_gimple_debug (stmt)) 536 continue; 537 538 if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt))) 539 return false; 540 } 541 542 FOR_EACH_IMM_USE_FAST (use_p, iterator, next) 543 { 544 stmt = USE_STMT (use_p); 545 if (is_gimple_debug (stmt)) 546 continue; 547 548 /* Or else it's used in PHI itself. */ 549 use_phi = dyn_cast <gphi *> (stmt); 550 if (use_phi == phi) 551 continue; 552 553 if (use_phi != NULL 554 && lcssa_phi == NULL 555 && gimple_bb (stmt) == m_exit->dest 556 && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next) 557 lcssa_phi = use_phi; 558 else 559 return false; 560 } 561 if (!lcssa_phi) 562 return false; 563 564 re = XCNEW (struct reduction); 565 re->var = var; 566 re->init = init; 567 re->next = next; 568 re->phi = phi; 569 re->lcssa_phi = lcssa_phi; 570 571 classify_simple_reduction (re); 572 573 if (dump_file && (dump_flags & TDF_DETAILS)) 574 dump_reduction (re); 575 576 m_reductions.safe_push (re); 577 return true; 578 } 579 580 /* Analyze reduction variable VAR for outer loop of the loop nest to be 581 interchanged. ILOOP is not NULL and points to inner loop. For the 582 moment, we only support double reduction for outer loop, like: 583 584 for (int i = 0; i < n; i++) 585 { 586 int sum = 0; 587 588 for (int j = 0; j < n; j++) // outer loop 589 for (int k = 0; k < n; k++) // inner loop 590 sum += a[i][k]*b[k][j]; 591 592 s[i] = sum; 593 } 594 595 Note the innermost two loops are the loop nest to be interchanged. 596 Return true if analysis succeeds. */ 597 598 bool 599 loop_cand::analyze_oloop_reduction_var (loop_cand *iloop, tree var) 600 { 601 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var)); 602 gphi *lcssa_phi = NULL, *use_phi; 603 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop)); 604 tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop)); 605 reduction_p re; 606 gimple *stmt, *next_def; 607 use_operand_p use_p; 608 imm_use_iterator iterator; 609 610 if (TREE_CODE (next) != SSA_NAME) 611 return false; 612 613 next_def = SSA_NAME_DEF_STMT (next); 614 basic_block bb = gimple_bb (next_def); 615 if (!bb || !flow_bb_inside_loop_p (m_loop, bb)) 616 return false; 617 618 /* Find inner loop's simple reduction that uses var as initializer. */ 619 reduction_p inner_re = NULL; 620 for (unsigned i = 0; iloop->m_reductions.iterate (i, &inner_re); ++i) 621 if (inner_re->init == var || operand_equal_p (inner_re->init, var, 0)) 622 break; 623 624 if (inner_re == NULL 625 || inner_re->type != UNKNOWN_RTYPE 626 || inner_re->producer != phi) 627 return false; 628 629 /* In case of double reduction, outer loop's reduction should be updated 630 by inner loop's simple reduction. */ 631 if (next_def != inner_re->lcssa_phi) 632 return false; 633 634 /* Outer loop's reduction should only be used to initialize inner loop's 635 simple reduction. */ 636 if (! single_imm_use (var, &use_p, &stmt) 637 || stmt != inner_re->phi) 638 return false; 639 640 /* Check this reduction is correctly used outside of loop via lcssa phi. */ 641 FOR_EACH_IMM_USE_FAST (use_p, iterator, next) 642 { 643 stmt = USE_STMT (use_p); 644 if (is_gimple_debug (stmt)) 645 continue; 646 647 /* Or else it's used in PHI itself. */ 648 use_phi = dyn_cast <gphi *> (stmt); 649 if (use_phi == phi) 650 continue; 651 652 if (lcssa_phi == NULL 653 && use_phi != NULL 654 && gimple_bb (stmt) == m_exit->dest 655 && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next) 656 lcssa_phi = use_phi; 657 else 658 return false; 659 } 660 if (!lcssa_phi) 661 return false; 662 663 re = XCNEW (struct reduction); 664 re->var = var; 665 re->init = init; 666 re->next = next; 667 re->phi = phi; 668 re->lcssa_phi = lcssa_phi; 669 re->type = DOUBLE_RTYPE; 670 inner_re->type = DOUBLE_RTYPE; 671 672 if (dump_file && (dump_flags & TDF_DETAILS)) 673 dump_reduction (re); 674 675 m_reductions.safe_push (re); 676 return true; 677 } 678 679 /* Return true if VAR is induction variable of current loop whose scev is 680 specified by CHREC. */ 681 682 bool 683 loop_cand::analyze_induction_var (tree var, tree chrec) 684 { 685 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var)); 686 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop)); 687 688 /* Var is loop invariant, though it's unlikely to happen. */ 689 if (tree_does_not_contain_chrecs (chrec)) 690 { 691 struct induction *iv = XCNEW (struct induction); 692 iv->var = var; 693 iv->init_val = init; 694 iv->init_expr = chrec; 695 iv->step = build_int_cst (TREE_TYPE (chrec), 0); 696 m_inductions.safe_push (iv); 697 return true; 698 } 699 700 if (TREE_CODE (chrec) != POLYNOMIAL_CHREC 701 || CHREC_VARIABLE (chrec) != (unsigned) m_loop->num 702 || tree_contains_chrecs (CHREC_LEFT (chrec), NULL) 703 || tree_contains_chrecs (CHREC_RIGHT (chrec), NULL)) 704 return false; 705 706 struct induction *iv = XCNEW (struct induction); 707 iv->var = var; 708 iv->init_val = init; 709 iv->init_expr = CHREC_LEFT (chrec); 710 iv->step = CHREC_RIGHT (chrec); 711 712 if (dump_file && (dump_flags & TDF_DETAILS)) 713 dump_induction (m_loop, iv); 714 715 m_inductions.safe_push (iv); 716 return true; 717 } 718 719 /* Return true if all loop carried variables defined in loop header can 720 be successfully analyzed. */ 721 722 bool 723 loop_cand::analyze_carried_vars (loop_cand *iloop) 724 { 725 edge e = loop_preheader_edge (m_outer); 726 gphi_iterator gsi; 727 728 if (dump_file && (dump_flags & TDF_DETAILS)) 729 fprintf (dump_file, "\nLoop(%d) carried vars:\n", m_loop->num); 730 731 for (gsi = gsi_start_phis (m_loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) 732 { 733 gphi *phi = gsi.phi (); 734 735 tree var = PHI_RESULT (phi); 736 if (virtual_operand_p (var)) 737 continue; 738 739 tree chrec = analyze_scalar_evolution (m_loop, var); 740 chrec = instantiate_scev (e, m_loop, chrec); 741 742 /* Analyze var as reduction variable. */ 743 if (chrec_contains_undetermined (chrec) 744 || chrec_contains_symbols_defined_in_loop (chrec, m_outer->num)) 745 { 746 if (iloop && !analyze_oloop_reduction_var (iloop, var)) 747 return false; 748 if (!iloop && !analyze_iloop_reduction_var (var)) 749 return false; 750 } 751 /* Analyze var as induction variable. */ 752 else if (!analyze_induction_var (var, chrec)) 753 return false; 754 } 755 756 return true; 757 } 758 759 /* Return TRUE if loop closed PHI nodes can be analyzed successfully. */ 760 761 bool 762 loop_cand::analyze_lcssa_phis (void) 763 { 764 gphi_iterator gsi; 765 for (gsi = gsi_start_phis (m_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi)) 766 { 767 gphi *phi = gsi.phi (); 768 769 if (virtual_operand_p (PHI_RESULT (phi))) 770 continue; 771 772 /* TODO: We only support lcssa phi for reduction for now. */ 773 if (!find_reduction_by_stmt (phi)) 774 return false; 775 } 776 777 return true; 778 } 779 780 /* CONSUMER is a stmt in BB storing reduction result into memory object. 781 When the reduction is intialized from constant value, we need to add 782 a stmt loading from the memory object to target basic block in inner 783 loop during undoing the reduction. Problem is that memory reference 784 may use ssa variables not dominating the target basic block. This 785 function finds all stmts on which CONSUMER depends in basic block BB, 786 records and returns them via STMTS. */ 787 788 static void 789 find_deps_in_bb_for_stmt (gimple_seq *stmts, basic_block bb, gimple *consumer) 790 { 791 auto_vec<gimple *, 4> worklist; 792 use_operand_p use_p; 793 ssa_op_iter iter; 794 gimple *stmt, *def_stmt; 795 gimple_stmt_iterator gsi; 796 797 /* First clear flag for stmts in bb. */ 798 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 799 gimple_set_plf (gsi_stmt (gsi), GF_PLF_1, false); 800 801 /* DFS search all depended stmts in bb and mark flag for these stmts. */ 802 worklist.safe_push (consumer); 803 while (!worklist.is_empty ()) 804 { 805 stmt = worklist.pop (); 806 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE) 807 { 808 def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p)); 809 810 if (is_a <gphi *> (def_stmt) 811 || gimple_bb (def_stmt) != bb 812 || gimple_plf (def_stmt, GF_PLF_1)) 813 continue; 814 815 worklist.safe_push (def_stmt); 816 } 817 gimple_set_plf (stmt, GF_PLF_1, true); 818 } 819 for (gsi = gsi_start_nondebug_bb (bb); 820 !gsi_end_p (gsi) && (stmt = gsi_stmt (gsi)) != consumer;) 821 { 822 /* Move dep stmts to sequence STMTS. */ 823 if (gimple_plf (stmt, GF_PLF_1)) 824 { 825 gsi_remove (&gsi, false); 826 gimple_seq_add_stmt_without_update (stmts, stmt); 827 } 828 else 829 gsi_next_nondebug (&gsi); 830 } 831 } 832 833 /* User can write, optimizers can generate simple reduction RE for inner 834 loop. In order to make interchange valid, we have to undo reduction by 835 moving producer and consumer stmts into the inner loop. For example, 836 below code: 837 838 init = MEM_REF[idx]; //producer 839 loop: 840 var = phi<init, next> 841 next = var op ... 842 reduc_sum = phi<next> 843 MEM_REF[idx] = reduc_sum //consumer 844 845 is transformed into: 846 847 loop: 848 new_var = MEM_REF[idx]; //producer after moving 849 next = new_var op ... 850 MEM_REF[idx] = next; //consumer after moving 851 852 Note if the reduction variable is initialized to constant, like: 853 854 var = phi<0.0, next> 855 856 we compute new_var as below: 857 858 loop: 859 tmp = MEM_REF[idx]; 860 new_var = !first_iteration ? tmp : 0.0; 861 862 so that the initial const is used in the first iteration of loop. Also 863 record ssa variables for dead code elimination in DCE_SEEDS. */ 864 865 void 866 loop_cand::undo_simple_reduction (reduction_p re, bitmap dce_seeds) 867 { 868 gimple *stmt; 869 gimple_stmt_iterator from, to = gsi_after_labels (m_loop->header); 870 gimple_seq stmts = NULL; 871 tree new_var; 872 873 /* Prepare the initialization stmts and insert it to inner loop. */ 874 if (re->producer != NULL) 875 { 876 gimple_set_vuse (re->producer, NULL_TREE); 877 from = gsi_for_stmt (re->producer); 878 gsi_remove (&from, false); 879 gimple_seq_add_stmt_without_update (&stmts, re->producer); 880 new_var = re->init; 881 } 882 else 883 { 884 /* Find all stmts on which expression "MEM_REF[idx]" depends. */ 885 find_deps_in_bb_for_stmt (&stmts, gimple_bb (re->consumer), re->consumer); 886 /* Because we generate new stmt loading from the MEM_REF to TMP. */ 887 tree cond, tmp = copy_ssa_name (re->var); 888 stmt = gimple_build_assign (tmp, re->init_ref); 889 gimple_seq_add_stmt_without_update (&stmts, stmt); 890 891 /* Init new_var to MEM_REF or CONST depending on if it is the first 892 iteration. */ 893 induction_p iv = m_inductions[0]; 894 cond = fold_build2 (NE_EXPR, boolean_type_node, iv->var, iv->init_val); 895 new_var = copy_ssa_name (re->var); 896 stmt = gimple_build_assign (new_var, COND_EXPR, cond, tmp, re->init); 897 gimple_seq_add_stmt_without_update (&stmts, stmt); 898 } 899 gsi_insert_seq_before (&to, stmts, GSI_SAME_STMT); 900 901 /* Replace all uses of reduction var with new variable. */ 902 use_operand_p use_p; 903 imm_use_iterator iterator; 904 FOR_EACH_IMM_USE_STMT (stmt, iterator, re->var) 905 { 906 FOR_EACH_IMM_USE_ON_STMT (use_p, iterator) 907 SET_USE (use_p, new_var); 908 909 update_stmt (stmt); 910 } 911 912 /* Move consumer stmt into inner loop, just after reduction next's def. */ 913 unlink_stmt_vdef (re->consumer); 914 release_ssa_name (gimple_vdef (re->consumer)); 915 gimple_set_vdef (re->consumer, NULL_TREE); 916 gimple_set_vuse (re->consumer, NULL_TREE); 917 gimple_assign_set_rhs1 (re->consumer, re->next); 918 from = gsi_for_stmt (re->consumer); 919 to = gsi_for_stmt (SSA_NAME_DEF_STMT (re->next)); 920 gsi_move_after (&from, &to); 921 922 /* Mark the reduction variables for DCE. */ 923 bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (re->var)); 924 bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (PHI_RESULT (re->lcssa_phi))); 925 } 926 927 /* Free DATAREFS and its auxiliary memory. */ 928 929 static void 930 free_data_refs_with_aux (vec<data_reference_p> datarefs) 931 { 932 data_reference_p dr; 933 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i) 934 if (dr->aux != NULL) 935 { 936 DR_ACCESS_STRIDE (dr)->release (); 937 delete (vec<tree> *) dr->aux; 938 } 939 940 free_data_refs (datarefs); 941 } 942 943 /* Class for loop interchange transformation. */ 944 945 class tree_loop_interchange 946 { 947 public: 948 tree_loop_interchange (vec<struct loop *> loop_nest) 949 : m_loop_nest (loop_nest), m_niters_iv_var (NULL_TREE), 950 m_dce_seeds (BITMAP_ALLOC (NULL)) { } 951 ~tree_loop_interchange () { BITMAP_FREE (m_dce_seeds); } 952 bool interchange (vec<data_reference_p>, vec<ddr_p>); 953 954 private: 955 void update_data_info (unsigned, unsigned, vec<data_reference_p>, vec<ddr_p>); 956 bool valid_data_dependences (unsigned, unsigned, vec<ddr_p>); 957 void interchange_loops (loop_cand &, loop_cand &); 958 void map_inductions_to_loop (loop_cand &, loop_cand &); 959 void move_code_to_inner_loop (struct loop *, struct loop *, basic_block *); 960 961 /* The whole loop nest in which interchange is ongoing. */ 962 vec<struct loop *> m_loop_nest; 963 /* We create new IV which is only used in loop's exit condition check. 964 In case of 3-level loop nest interchange, when we interchange the 965 innermost two loops, new IV created in the middle level loop does 966 not need to be preserved in interchanging the outermost two loops 967 later. We record the IV so that it can be skipped. */ 968 tree m_niters_iv_var; 969 /* Bitmap of seed variables for dead code elimination after interchange. */ 970 bitmap m_dce_seeds; 971 }; 972 973 /* Update data refs' access stride and dependence information after loop 974 interchange. I_IDX/O_IDX gives indices of interchanged loops in loop 975 nest. DATAREFS are data references. DDRS are data dependences. */ 976 977 void 978 tree_loop_interchange::update_data_info (unsigned i_idx, unsigned o_idx, 979 vec<data_reference_p> datarefs, 980 vec<ddr_p> ddrs) 981 { 982 struct data_reference *dr; 983 struct data_dependence_relation *ddr; 984 985 /* Update strides of data references. */ 986 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i) 987 { 988 vec<tree> *stride = DR_ACCESS_STRIDE (dr); 989 gcc_assert (stride->length () > i_idx); 990 std::swap ((*stride)[i_idx], (*stride)[o_idx]); 991 } 992 /* Update data dependences. */ 993 for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i) 994 if (DDR_ARE_DEPENDENT (ddr) != chrec_known) 995 { 996 for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j) 997 { 998 lambda_vector dist_vect = DDR_DIST_VECT (ddr, j); 999 std::swap (dist_vect[i_idx], dist_vect[o_idx]); 1000 } 1001 } 1002 } 1003 1004 /* Check data dependence relations, return TRUE if it's valid to interchange 1005 two loops specified by I_IDX/O_IDX. Theoretically, interchanging the two 1006 loops is valid only if dist vector, after interchanging, doesn't have '>' 1007 as the leftmost non-'=' direction. Practically, this function have been 1008 conservative here by not checking some valid cases. */ 1009 1010 bool 1011 tree_loop_interchange::valid_data_dependences (unsigned i_idx, unsigned o_idx, 1012 vec<ddr_p> ddrs) 1013 { 1014 struct data_dependence_relation *ddr; 1015 1016 for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i) 1017 { 1018 /* Skip no-dependence case. */ 1019 if (DDR_ARE_DEPENDENT (ddr) == chrec_known) 1020 continue; 1021 1022 for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j) 1023 { 1024 lambda_vector dist_vect = DDR_DIST_VECT (ddr, j); 1025 unsigned level = dependence_level (dist_vect, m_loop_nest.length ()); 1026 1027 /* If there is no carried dependence. */ 1028 if (level == 0) 1029 continue; 1030 1031 level --; 1032 1033 /* If dependence is not carried by any loop in between the two 1034 loops [oloop, iloop] to interchange. */ 1035 if (level < o_idx || level > i_idx) 1036 continue; 1037 1038 /* Be conservative, skip case if either direction at i_idx/o_idx 1039 levels is not '=' or '<'. */ 1040 if (dist_vect[i_idx] < 0 || dist_vect[o_idx] < 0) 1041 return false; 1042 } 1043 } 1044 1045 return true; 1046 } 1047 1048 /* Interchange two loops specified by ILOOP and OLOOP. */ 1049 1050 void 1051 tree_loop_interchange::interchange_loops (loop_cand &iloop, loop_cand &oloop) 1052 { 1053 reduction_p re; 1054 gimple_stmt_iterator gsi; 1055 tree i_niters, o_niters, var_after; 1056 1057 /* Undo inner loop's simple reduction. */ 1058 for (unsigned i = 0; iloop.m_reductions.iterate (i, &re); ++i) 1059 if (re->type != DOUBLE_RTYPE) 1060 { 1061 if (re->producer) 1062 reset_debug_uses (re->producer); 1063 1064 iloop.undo_simple_reduction (re, m_dce_seeds); 1065 } 1066 1067 /* Only need to reset debug uses for double reduction. */ 1068 for (unsigned i = 0; oloop.m_reductions.iterate (i, &re); ++i) 1069 { 1070 gcc_assert (re->type == DOUBLE_RTYPE); 1071 reset_debug_uses (SSA_NAME_DEF_STMT (re->var)); 1072 reset_debug_uses (SSA_NAME_DEF_STMT (re->next)); 1073 } 1074 1075 /* Prepare niters for both loops. */ 1076 struct loop *loop_nest = m_loop_nest[0]; 1077 edge instantiate_below = loop_preheader_edge (loop_nest); 1078 gsi = gsi_last_bb (loop_preheader_edge (loop_nest)->src); 1079 i_niters = number_of_latch_executions (iloop.m_loop); 1080 i_niters = analyze_scalar_evolution (loop_outer (iloop.m_loop), i_niters); 1081 i_niters = instantiate_scev (instantiate_below, loop_outer (iloop.m_loop), 1082 i_niters); 1083 i_niters = force_gimple_operand_gsi (&gsi, unshare_expr (i_niters), true, 1084 NULL_TREE, false, GSI_CONTINUE_LINKING); 1085 o_niters = number_of_latch_executions (oloop.m_loop); 1086 if (oloop.m_loop != loop_nest) 1087 { 1088 o_niters = analyze_scalar_evolution (loop_outer (oloop.m_loop), o_niters); 1089 o_niters = instantiate_scev (instantiate_below, loop_outer (oloop.m_loop), 1090 o_niters); 1091 } 1092 o_niters = force_gimple_operand_gsi (&gsi, unshare_expr (o_niters), true, 1093 NULL_TREE, false, GSI_CONTINUE_LINKING); 1094 1095 /* Move src's code to tgt loop. This is necessary when src is the outer 1096 loop and tgt is the inner loop. */ 1097 move_code_to_inner_loop (oloop.m_loop, iloop.m_loop, oloop.m_bbs); 1098 1099 /* Map outer loop's IV to inner loop, and vice versa. */ 1100 map_inductions_to_loop (oloop, iloop); 1101 map_inductions_to_loop (iloop, oloop); 1102 1103 /* Create canonical IV for both loops. Note canonical IV for outer/inner 1104 loop is actually from inner/outer loop. Also we record the new IV 1105 created for the outer loop so that it can be skipped in later loop 1106 interchange. */ 1107 create_canonical_iv (oloop.m_loop, oloop.m_exit, 1108 i_niters, &m_niters_iv_var, &var_after); 1109 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after)); 1110 create_canonical_iv (iloop.m_loop, iloop.m_exit, 1111 o_niters, NULL, &var_after); 1112 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after)); 1113 1114 /* Scrap niters estimation of interchanged loops. */ 1115 iloop.m_loop->any_upper_bound = false; 1116 iloop.m_loop->any_likely_upper_bound = false; 1117 free_numbers_of_iterations_estimates (iloop.m_loop); 1118 oloop.m_loop->any_upper_bound = false; 1119 oloop.m_loop->any_likely_upper_bound = false; 1120 free_numbers_of_iterations_estimates (oloop.m_loop); 1121 1122 /* Clear all cached scev information. This is expensive but shouldn't be 1123 a problem given we interchange in very limited times. */ 1124 scev_reset_htab (); 1125 1126 /* ??? The association between the loop data structure and the 1127 CFG changed, so what was loop N at the source level is now 1128 loop M. We should think of retaining the association or breaking 1129 it fully by creating a new loop instead of re-using the "wrong" one. */ 1130 } 1131 1132 /* Map induction variables of SRC loop to TGT loop. The function firstly 1133 creates the same IV of SRC loop in TGT loop, then deletes the original 1134 IV and re-initialize it using the newly created IV. For example, loop 1135 nest: 1136 1137 for (i = 0; i < N; i++) 1138 for (j = 0; j < M; j++) 1139 { 1140 //use of i; 1141 //use of j; 1142 } 1143 1144 will be transformed into: 1145 1146 for (jj = 0; jj < M; jj++) 1147 for (ii = 0; ii < N; ii++) 1148 { 1149 //use of ii; 1150 //use of jj; 1151 } 1152 1153 after loop interchange. */ 1154 1155 void 1156 tree_loop_interchange::map_inductions_to_loop (loop_cand &src, loop_cand &tgt) 1157 { 1158 induction_p iv; 1159 edge e = tgt.m_exit; 1160 gimple_stmt_iterator incr_pos = gsi_last_bb (e->src), gsi; 1161 1162 /* Map source loop's IV to target loop. */ 1163 for (unsigned i = 0; src.m_inductions.iterate (i, &iv); ++i) 1164 { 1165 gimple *use_stmt, *stmt = SSA_NAME_DEF_STMT (iv->var); 1166 gcc_assert (is_a <gphi *> (stmt)); 1167 1168 use_operand_p use_p; 1169 /* Only map original IV to target loop. */ 1170 if (m_niters_iv_var != iv->var) 1171 { 1172 /* Map the IV by creating the same one in target loop. */ 1173 tree var_before, var_after; 1174 tree base = unshare_expr (iv->init_expr); 1175 tree step = unshare_expr (iv->step); 1176 create_iv (base, step, SSA_NAME_VAR (iv->var), 1177 tgt.m_loop, &incr_pos, false, &var_before, &var_after); 1178 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_before)); 1179 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after)); 1180 1181 /* Replace uses of the original IV var with newly created IV var. */ 1182 imm_use_iterator imm_iter; 1183 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, iv->var) 1184 { 1185 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) 1186 SET_USE (use_p, var_before); 1187 1188 update_stmt (use_stmt); 1189 } 1190 } 1191 1192 /* Mark all uses for DCE. */ 1193 ssa_op_iter op_iter; 1194 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, op_iter, SSA_OP_USE) 1195 { 1196 tree use = USE_FROM_PTR (use_p); 1197 if (TREE_CODE (use) == SSA_NAME 1198 && ! SSA_NAME_IS_DEFAULT_DEF (use)) 1199 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (use)); 1200 } 1201 1202 /* Delete definition of the original IV in the source loop. */ 1203 gsi = gsi_for_stmt (stmt); 1204 remove_phi_node (&gsi, true); 1205 } 1206 } 1207 1208 /* Move stmts of outer loop to inner loop. */ 1209 1210 void 1211 tree_loop_interchange::move_code_to_inner_loop (struct loop *outer, 1212 struct loop *inner, 1213 basic_block *outer_bbs) 1214 { 1215 basic_block oloop_exit_bb = single_exit (outer)->src; 1216 gimple_stmt_iterator gsi, to; 1217 1218 for (unsigned i = 0; i < outer->num_nodes; i++) 1219 { 1220 basic_block bb = outer_bbs[i]; 1221 1222 /* Skip basic blocks of inner loop. */ 1223 if (flow_bb_inside_loop_p (inner, bb)) 1224 continue; 1225 1226 /* Move code from header/latch to header/latch. */ 1227 if (bb == outer->header) 1228 to = gsi_after_labels (inner->header); 1229 else if (bb == outer->latch) 1230 to = gsi_after_labels (inner->latch); 1231 else 1232 /* Otherwise, simply move to exit->src. */ 1233 to = gsi_last_bb (single_exit (inner)->src); 1234 1235 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) 1236 { 1237 gimple *stmt = gsi_stmt (gsi); 1238 1239 if (oloop_exit_bb == bb 1240 && stmt == gsi_stmt (gsi_last_bb (oloop_exit_bb))) 1241 { 1242 gsi_next (&gsi); 1243 continue; 1244 } 1245 1246 if (gimple_vuse (stmt)) 1247 gimple_set_vuse (stmt, NULL_TREE); 1248 if (gimple_vdef (stmt)) 1249 { 1250 unlink_stmt_vdef (stmt); 1251 release_ssa_name (gimple_vdef (stmt)); 1252 gimple_set_vdef (stmt, NULL_TREE); 1253 } 1254 1255 reset_debug_uses (stmt); 1256 gsi_move_before (&gsi, &to); 1257 } 1258 } 1259 } 1260 1261 /* Given data reference DR in LOOP_NEST, the function computes DR's access 1262 stride at each level of loop from innermost LOOP to outer. On success, 1263 it saves access stride at each level loop in a vector which is pointed 1264 by DR->aux. For example: 1265 1266 int arr[100][100][100]; 1267 for (i = 0; i < 100; i++) ;(DR->aux)strides[0] = 40000 1268 for (j = 100; j > 0; j--) ;(DR->aux)strides[1] = 400 1269 for (k = 0; k < 100; k++) ;(DR->aux)strides[2] = 4 1270 arr[i][j - 1][k] = 0; */ 1271 1272 static void 1273 compute_access_stride (struct loop *loop_nest, struct loop *loop, 1274 data_reference_p dr) 1275 { 1276 vec<tree> *strides = new vec<tree> (); 1277 basic_block bb = gimple_bb (DR_STMT (dr)); 1278 1279 while (!flow_bb_inside_loop_p (loop, bb)) 1280 { 1281 strides->safe_push (build_int_cst (sizetype, 0)); 1282 loop = loop_outer (loop); 1283 } 1284 gcc_assert (loop == bb->loop_father); 1285 1286 tree ref = DR_REF (dr); 1287 if (TREE_CODE (ref) == COMPONENT_REF 1288 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1))) 1289 { 1290 /* We can't take address of bitfields. If the bitfield is at constant 1291 offset from the start of the struct, just use address of the 1292 struct, for analysis of the strides that shouldn't matter. */ 1293 if (!TREE_OPERAND (ref, 2) 1294 || TREE_CODE (TREE_OPERAND (ref, 2)) == INTEGER_CST) 1295 ref = TREE_OPERAND (ref, 0); 1296 /* Otherwise, if we have a bit field representative, use that. */ 1297 else if (DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1)) 1298 != NULL_TREE) 1299 { 1300 tree repr = DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1)); 1301 ref = build3 (COMPONENT_REF, TREE_TYPE (repr), TREE_OPERAND (ref, 0), 1302 repr, TREE_OPERAND (ref, 2)); 1303 } 1304 /* Otherwise punt. */ 1305 else 1306 { 1307 dr->aux = strides; 1308 return; 1309 } 1310 } 1311 tree scev_base = build_fold_addr_expr (ref); 1312 tree scev = analyze_scalar_evolution (loop, scev_base); 1313 scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev); 1314 if (! chrec_contains_undetermined (scev)) 1315 { 1316 tree sl = scev; 1317 struct loop *expected = loop; 1318 while (TREE_CODE (sl) == POLYNOMIAL_CHREC) 1319 { 1320 struct loop *sl_loop = get_chrec_loop (sl); 1321 while (sl_loop != expected) 1322 { 1323 strides->safe_push (size_int (0)); 1324 expected = loop_outer (expected); 1325 } 1326 strides->safe_push (CHREC_RIGHT (sl)); 1327 sl = CHREC_LEFT (sl); 1328 expected = loop_outer (expected); 1329 } 1330 if (! tree_contains_chrecs (sl, NULL)) 1331 while (expected != loop_outer (loop_nest)) 1332 { 1333 strides->safe_push (size_int (0)); 1334 expected = loop_outer (expected); 1335 } 1336 } 1337 1338 dr->aux = strides; 1339 } 1340 1341 /* Given loop nest LOOP_NEST with innermost LOOP, the function computes 1342 access strides with respect to each level loop for all data refs in 1343 DATAREFS from inner loop to outer loop. On success, it returns the 1344 outermost loop that access strides can be computed successfully for 1345 all data references. If access strides cannot be computed at least 1346 for two levels of loop for any data reference, it returns NULL. */ 1347 1348 static struct loop * 1349 compute_access_strides (struct loop *loop_nest, struct loop *loop, 1350 vec<data_reference_p> datarefs) 1351 { 1352 unsigned i, j, num_loops = (unsigned) -1; 1353 data_reference_p dr; 1354 vec<tree> *stride; 1355 1356 for (i = 0; datarefs.iterate (i, &dr); ++i) 1357 { 1358 compute_access_stride (loop_nest, loop, dr); 1359 stride = DR_ACCESS_STRIDE (dr); 1360 if (stride->length () < num_loops) 1361 { 1362 num_loops = stride->length (); 1363 if (num_loops < 2) 1364 return NULL; 1365 } 1366 } 1367 1368 for (i = 0; datarefs.iterate (i, &dr); ++i) 1369 { 1370 stride = DR_ACCESS_STRIDE (dr); 1371 if (stride->length () > num_loops) 1372 stride->truncate (num_loops); 1373 1374 for (j = 0; j < (num_loops >> 1); ++j) 1375 std::swap ((*stride)[j], (*stride)[num_loops - j - 1]); 1376 } 1377 1378 loop = superloop_at_depth (loop, loop_depth (loop) + 1 - num_loops); 1379 gcc_assert (loop_nest == loop || flow_loop_nested_p (loop_nest, loop)); 1380 return loop; 1381 } 1382 1383 /* Prune access strides for data references in DATAREFS by removing strides 1384 of loops that isn't in current LOOP_NEST. */ 1385 1386 static void 1387 prune_access_strides_not_in_loop (struct loop *loop_nest, 1388 struct loop *innermost, 1389 vec<data_reference_p> datarefs) 1390 { 1391 data_reference_p dr; 1392 unsigned num_loops = loop_depth (innermost) - loop_depth (loop_nest) + 1; 1393 gcc_assert (num_loops > 1); 1394 1395 /* Block remove strides of loops that is not in current loop nest. */ 1396 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i) 1397 { 1398 vec<tree> *stride = DR_ACCESS_STRIDE (dr); 1399 if (stride->length () > num_loops) 1400 stride->block_remove (0, stride->length () - num_loops); 1401 } 1402 } 1403 1404 /* Dump access strides for all DATAREFS. */ 1405 1406 static void 1407 dump_access_strides (vec<data_reference_p> datarefs) 1408 { 1409 data_reference_p dr; 1410 fprintf (dump_file, "Access Strides for DRs:\n"); 1411 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i) 1412 { 1413 fprintf (dump_file, " "); 1414 print_generic_expr (dump_file, DR_REF (dr), TDF_SLIM); 1415 fprintf (dump_file, ":\t\t<"); 1416 1417 vec<tree> *stride = DR_ACCESS_STRIDE (dr); 1418 unsigned num_loops = stride->length (); 1419 for (unsigned j = 0; j < num_loops; ++j) 1420 { 1421 print_generic_expr (dump_file, (*stride)[j], TDF_SLIM); 1422 fprintf (dump_file, "%s", (j < num_loops - 1) ? ",\t" : ">\n"); 1423 } 1424 } 1425 } 1426 1427 /* Return true if it's profitable to interchange two loops whose index 1428 in whole loop nest vector are I_IDX/O_IDX respectively. The function 1429 computes and compares three types information from all DATAREFS: 1430 1) Access stride for loop I_IDX and O_IDX. 1431 2) Number of invariant memory references with respect to I_IDX before 1432 and after loop interchange. 1433 3) Flags indicating if all memory references access sequential memory 1434 in ILOOP, before and after loop interchange. 1435 If INNMOST_LOOP_P is true, the two loops for interchanging are the two 1436 innermost loops in loop nest. This function also dumps information if 1437 DUMP_INFO_P is true. */ 1438 1439 static bool 1440 should_interchange_loops (unsigned i_idx, unsigned o_idx, 1441 vec<data_reference_p> datarefs, 1442 unsigned i_stmt_cost, unsigned o_stmt_cost, 1443 bool innermost_loops_p, bool dump_info_p = true) 1444 { 1445 unsigned HOST_WIDE_INT ratio; 1446 unsigned i, j, num_old_inv_drs = 0, num_new_inv_drs = 0; 1447 struct data_reference *dr; 1448 bool all_seq_dr_before_p = true, all_seq_dr_after_p = true; 1449 widest_int iloop_strides = 0, oloop_strides = 0; 1450 unsigned num_unresolved_drs = 0; 1451 unsigned num_resolved_ok_drs = 0; 1452 unsigned num_resolved_not_ok_drs = 0; 1453 1454 if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS)) 1455 fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n"); 1456 1457 for (i = 0; datarefs.iterate (i, &dr); ++i) 1458 { 1459 vec<tree> *stride = DR_ACCESS_STRIDE (dr); 1460 tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx]; 1461 1462 bool subloop_stride_p = false; 1463 /* Data ref can't be invariant or sequential access at current loop if 1464 its address changes with respect to any subloops. */ 1465 for (j = i_idx + 1; j < stride->length (); ++j) 1466 if (!integer_zerop ((*stride)[j])) 1467 { 1468 subloop_stride_p = true; 1469 break; 1470 } 1471 1472 if (integer_zerop (iloop_stride)) 1473 { 1474 if (!subloop_stride_p) 1475 num_old_inv_drs++; 1476 } 1477 if (integer_zerop (oloop_stride)) 1478 { 1479 if (!subloop_stride_p) 1480 num_new_inv_drs++; 1481 } 1482 1483 if (TREE_CODE (iloop_stride) == INTEGER_CST 1484 && TREE_CODE (oloop_stride) == INTEGER_CST) 1485 { 1486 iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride)); 1487 oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride)); 1488 } 1489 else if (multiple_of_p (TREE_TYPE (iloop_stride), 1490 iloop_stride, oloop_stride)) 1491 num_resolved_ok_drs++; 1492 else if (multiple_of_p (TREE_TYPE (iloop_stride), 1493 oloop_stride, iloop_stride)) 1494 num_resolved_not_ok_drs++; 1495 else 1496 num_unresolved_drs++; 1497 1498 /* Data ref can't be sequential access if its address changes in sub 1499 loop. */ 1500 if (subloop_stride_p) 1501 { 1502 all_seq_dr_before_p = false; 1503 all_seq_dr_after_p = false; 1504 continue; 1505 } 1506 /* Track if all data references are sequential accesses before/after loop 1507 interchange. Note invariant is considered sequential here. */ 1508 tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))); 1509 if (all_seq_dr_before_p 1510 && ! (integer_zerop (iloop_stride) 1511 || operand_equal_p (access_size, iloop_stride, 0))) 1512 all_seq_dr_before_p = false; 1513 if (all_seq_dr_after_p 1514 && ! (integer_zerop (oloop_stride) 1515 || operand_equal_p (access_size, oloop_stride, 0))) 1516 all_seq_dr_after_p = false; 1517 } 1518 1519 if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS)) 1520 { 1521 fprintf (dump_file, "\toverall:\t\t"); 1522 print_decu (iloop_strides, dump_file); 1523 fprintf (dump_file, "\t"); 1524 print_decu (oloop_strides, dump_file); 1525 fprintf (dump_file, "\n"); 1526 1527 fprintf (dump_file, "Invariant data ref: before(%d), after(%d)\n", 1528 num_old_inv_drs, num_new_inv_drs); 1529 fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n", 1530 all_seq_dr_before_p ? "true" : "false", 1531 all_seq_dr_after_p ? "true" : "false"); 1532 fprintf (dump_file, "OK to interchage with variable strides: %d\n", 1533 num_resolved_ok_drs); 1534 fprintf (dump_file, "Not OK to interchage with variable strides: %d\n", 1535 num_resolved_not_ok_drs); 1536 fprintf (dump_file, "Variable strides we cannot decide: %d\n", 1537 num_unresolved_drs); 1538 fprintf (dump_file, "Stmt cost of inner loop: %d\n", i_stmt_cost); 1539 fprintf (dump_file, "Stmt cost of outer loop: %d\n", o_stmt_cost); 1540 } 1541 1542 if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0) 1543 return false; 1544 1545 /* Stmts of outer loop will be moved to inner loop. If there are two many 1546 such stmts, it could make inner loop costly. Here we compare stmt cost 1547 between outer and inner loops. */ 1548 if (i_stmt_cost && o_stmt_cost 1549 && num_old_inv_drs + o_stmt_cost > num_new_inv_drs 1550 && o_stmt_cost * STMT_COST_RATIO > i_stmt_cost) 1551 return false; 1552 1553 /* We use different stride comparison ratio for interchanging innermost 1554 two loops or not. The idea is to be conservative in interchange for 1555 the innermost loops. */ 1556 ratio = innermost_loops_p ? INNER_STRIDE_RATIO : OUTER_STRIDE_RATIO; 1557 /* Do interchange if it gives better data locality behavior. */ 1558 if (wi::gtu_p (iloop_strides, wi::mul (oloop_strides, ratio))) 1559 return true; 1560 if (wi::gtu_p (iloop_strides, oloop_strides)) 1561 { 1562 /* Or it creates more invariant memory references. */ 1563 if ((!all_seq_dr_before_p || all_seq_dr_after_p) 1564 && num_new_inv_drs > num_old_inv_drs) 1565 return true; 1566 /* Or it makes all memory references sequential. */ 1567 if (num_new_inv_drs >= num_old_inv_drs 1568 && !all_seq_dr_before_p && all_seq_dr_after_p) 1569 return true; 1570 } 1571 1572 return false; 1573 } 1574 1575 /* Try to interchange inner loop of a loop nest to outer level. */ 1576 1577 bool 1578 tree_loop_interchange::interchange (vec<data_reference_p> datarefs, 1579 vec<ddr_p> ddrs) 1580 { 1581 location_t loc = find_loop_location (m_loop_nest[0]); 1582 bool changed_p = false; 1583 /* In each iteration we try to interchange I-th loop with (I+1)-th loop. 1584 The overall effect is to push inner loop to outermost level in whole 1585 loop nest. */ 1586 for (unsigned i = m_loop_nest.length (); i > 1; --i) 1587 { 1588 unsigned i_idx = i - 1, o_idx = i - 2; 1589 1590 /* Check validity for loop interchange. */ 1591 if (!valid_data_dependences (i_idx, o_idx, ddrs)) 1592 break; 1593 1594 loop_cand iloop (m_loop_nest[i_idx], m_loop_nest[o_idx]); 1595 loop_cand oloop (m_loop_nest[o_idx], m_loop_nest[o_idx]); 1596 1597 /* Check if we can do transformation for loop interchange. */ 1598 if (!iloop.analyze_carried_vars (NULL) 1599 || !iloop.analyze_lcssa_phis () 1600 || !oloop.analyze_carried_vars (&iloop) 1601 || !oloop.analyze_lcssa_phis () 1602 || !iloop.can_interchange_p (NULL) 1603 || !oloop.can_interchange_p (&iloop)) 1604 break; 1605 1606 /* Outer loop's stmts will be moved to inner loop during interchange. 1607 If there are many of them, it may make inner loop very costly. We 1608 need to check number of outer loop's stmts in profit cost model of 1609 interchange. */ 1610 int stmt_cost = oloop.m_num_stmts; 1611 /* Count out the exit checking stmt of outer loop. */ 1612 stmt_cost --; 1613 /* Count out IV's increasing stmt, IVOPTs takes care if it. */ 1614 stmt_cost -= oloop.m_inductions.length (); 1615 /* Count in the additional load and cond_expr stmts caused by inner 1616 loop's constant initialized reduction. */ 1617 stmt_cost += iloop.m_const_init_reduc * 2; 1618 if (stmt_cost < 0) 1619 stmt_cost = 0; 1620 1621 /* Check profitability for loop interchange. */ 1622 if (should_interchange_loops (i_idx, o_idx, datarefs, 1623 (unsigned) iloop.m_num_stmts, 1624 (unsigned) stmt_cost, 1625 iloop.m_loop->inner == NULL)) 1626 { 1627 if (dump_file && (dump_flags & TDF_DETAILS)) 1628 fprintf (dump_file, 1629 "Loop_pair<outer:%d, inner:%d> is interchanged\n\n", 1630 oloop.m_loop->num, iloop.m_loop->num); 1631 1632 changed_p = true; 1633 interchange_loops (iloop, oloop); 1634 /* No need to update if there is no further loop interchange. */ 1635 if (o_idx > 0) 1636 update_data_info (i_idx, o_idx, datarefs, ddrs); 1637 } 1638 else 1639 { 1640 if (dump_file && (dump_flags & TDF_DETAILS)) 1641 fprintf (dump_file, 1642 "Loop_pair<outer:%d, inner:%d> is not interchanged\n\n", 1643 oloop.m_loop->num, iloop.m_loop->num); 1644 } 1645 } 1646 simple_dce_from_worklist (m_dce_seeds); 1647 1648 if (changed_p) 1649 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, 1650 "loops interchanged in loop nest\n"); 1651 1652 return changed_p; 1653 } 1654 1655 1656 /* Loop interchange pass. */ 1657 1658 namespace { 1659 1660 const pass_data pass_data_linterchange = 1661 { 1662 GIMPLE_PASS, /* type */ 1663 "linterchange", /* name */ 1664 OPTGROUP_LOOP, /* optinfo_flags */ 1665 TV_LINTERCHANGE, /* tv_id */ 1666 PROP_cfg, /* properties_required */ 1667 0, /* properties_provided */ 1668 0, /* properties_destroyed */ 1669 0, /* todo_flags_start */ 1670 0, /* todo_flags_finish */ 1671 }; 1672 1673 class pass_linterchange : public gimple_opt_pass 1674 { 1675 public: 1676 pass_linterchange (gcc::context *ctxt) 1677 : gimple_opt_pass (pass_data_linterchange, ctxt) 1678 {} 1679 1680 /* opt_pass methods: */ 1681 opt_pass * clone () { return new pass_linterchange (m_ctxt); } 1682 virtual bool gate (function *) { return flag_loop_interchange; } 1683 virtual unsigned int execute (function *); 1684 1685 }; // class pass_linterchange 1686 1687 1688 /* Return true if LOOP has proper form for interchange. We check three 1689 conditions in the function: 1690 1) In general, a loop can be interchanged only if it doesn't have 1691 basic blocks other than header, exit and latch besides possible 1692 inner loop nest. This basically restricts loop interchange to 1693 below form loop nests: 1694 1695 header<---+ 1696 | | 1697 v | 1698 INNER_LOOP | 1699 | | 1700 v | 1701 exit--->latch 1702 1703 2) Data reference in basic block that executes in different times 1704 than loop head/exit is not allowed. 1705 3) Record the innermost outer loop that doesn't form rectangle loop 1706 nest with LOOP. */ 1707 1708 static bool 1709 proper_loop_form_for_interchange (struct loop *loop, struct loop **min_outer) 1710 { 1711 edge e0, e1, exit; 1712 1713 /* Don't interchange if loop has unsupported information for the moment. */ 1714 if (loop->safelen > 0 1715 || loop->constraints != 0 1716 || loop->can_be_parallel 1717 || loop->dont_vectorize 1718 || loop->force_vectorize 1719 || loop->in_oacc_kernels_region 1720 || loop->orig_loop_num != 0 1721 || loop->simduid != NULL_TREE) 1722 return false; 1723 1724 /* Don't interchange if outer loop has basic block other than header, exit 1725 and latch. */ 1726 if (loop->inner != NULL 1727 && loop->num_nodes != loop->inner->num_nodes + 3) 1728 return false; 1729 1730 if ((exit = single_dom_exit (loop)) == NULL) 1731 return false; 1732 1733 /* Check control flow on loop header/exit blocks. */ 1734 if (loop->header == exit->src 1735 && (EDGE_COUNT (loop->header->preds) != 2 1736 || EDGE_COUNT (loop->header->succs) != 2)) 1737 return false; 1738 else if (loop->header != exit->src 1739 && (EDGE_COUNT (loop->header->preds) != 2 1740 || !single_succ_p (loop->header) 1741 || unsupported_edge (single_succ_edge (loop->header)) 1742 || EDGE_COUNT (exit->src->succs) != 2 1743 || !single_pred_p (exit->src) 1744 || unsupported_edge (single_pred_edge (exit->src)))) 1745 return false; 1746 1747 e0 = EDGE_PRED (loop->header, 0); 1748 e1 = EDGE_PRED (loop->header, 1); 1749 if (unsupported_edge (e0) || unsupported_edge (e1) 1750 || (e0->src != loop->latch && e1->src != loop->latch) 1751 || (e0->src->loop_father == loop && e1->src->loop_father == loop)) 1752 return false; 1753 1754 e0 = EDGE_SUCC (exit->src, 0); 1755 e1 = EDGE_SUCC (exit->src, 1); 1756 if (unsupported_edge (e0) || unsupported_edge (e1) 1757 || (e0->dest != loop->latch && e1->dest != loop->latch) 1758 || (e0->dest->loop_father == loop && e1->dest->loop_father == loop)) 1759 return false; 1760 1761 /* Don't interchange if any reference is in basic block that doesn't 1762 dominate exit block. */ 1763 basic_block *bbs = get_loop_body (loop); 1764 for (unsigned i = 0; i < loop->num_nodes; i++) 1765 { 1766 basic_block bb = bbs[i]; 1767 1768 if (bb->loop_father != loop 1769 || bb == loop->header || bb == exit->src 1770 || dominated_by_p (CDI_DOMINATORS, exit->src, bb)) 1771 continue; 1772 1773 for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb); 1774 !gsi_end_p (gsi); gsi_next_nondebug (&gsi)) 1775 if (gimple_vuse (gsi_stmt (gsi))) 1776 { 1777 free (bbs); 1778 return false; 1779 } 1780 } 1781 free (bbs); 1782 1783 tree niters = number_of_latch_executions (loop); 1784 niters = analyze_scalar_evolution (loop_outer (loop), niters); 1785 if (!niters || chrec_contains_undetermined (niters)) 1786 return false; 1787 1788 /* Record the innermost outer loop that doesn't form rectangle loop nest. */ 1789 for (loop_p loop2 = loop_outer (loop); 1790 loop2 && flow_loop_nested_p (*min_outer, loop2); 1791 loop2 = loop_outer (loop2)) 1792 { 1793 niters = instantiate_scev (loop_preheader_edge (loop2), 1794 loop_outer (loop), niters); 1795 if (!evolution_function_is_invariant_p (niters, loop2->num)) 1796 { 1797 *min_outer = loop2; 1798 break; 1799 } 1800 } 1801 return true; 1802 } 1803 1804 /* Return true if any two adjacent loops in loop nest [INNERMOST, LOOP_NEST] 1805 should be interchanged by looking into all DATAREFS. */ 1806 1807 static bool 1808 should_interchange_loop_nest (struct loop *loop_nest, struct loop *innermost, 1809 vec<data_reference_p> datarefs) 1810 { 1811 unsigned idx = loop_depth (innermost) - loop_depth (loop_nest); 1812 gcc_assert (idx > 0); 1813 1814 /* Check if any two adjacent loops should be interchanged. */ 1815 for (struct loop *loop = innermost; 1816 loop != loop_nest; loop = loop_outer (loop), idx--) 1817 if (should_interchange_loops (idx, idx - 1, datarefs, 0, 0, 1818 loop == innermost, false)) 1819 return true; 1820 1821 return false; 1822 } 1823 1824 /* Given loop nest LOOP_NEST and data references DATAREFS, compute data 1825 dependences for loop interchange and store it in DDRS. Note we compute 1826 dependences directly rather than call generic interface so that we can 1827 return on unknown dependence instantly. */ 1828 1829 static bool 1830 tree_loop_interchange_compute_ddrs (vec<loop_p> loop_nest, 1831 vec<data_reference_p> datarefs, 1832 vec<ddr_p> *ddrs) 1833 { 1834 struct data_reference *a, *b; 1835 struct loop *innermost = loop_nest.last (); 1836 1837 for (unsigned i = 0; datarefs.iterate (i, &a); ++i) 1838 { 1839 bool a_outer_p = gimple_bb (DR_STMT (a))->loop_father != innermost; 1840 for (unsigned j = i + 1; datarefs.iterate (j, &b); ++j) 1841 if (DR_IS_WRITE (a) || DR_IS_WRITE (b)) 1842 { 1843 bool b_outer_p = gimple_bb (DR_STMT (b))->loop_father != innermost; 1844 /* Don't support multiple write references in outer loop. */ 1845 if (a_outer_p && b_outer_p && DR_IS_WRITE (a) && DR_IS_WRITE (b)) 1846 return false; 1847 1848 ddr_p ddr = initialize_data_dependence_relation (a, b, loop_nest); 1849 ddrs->safe_push (ddr); 1850 compute_affine_dependence (ddr, loop_nest[0]); 1851 1852 /* Give up if ddr is unknown dependence or classic direct vector 1853 is not available. */ 1854 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know 1855 || (DDR_ARE_DEPENDENT (ddr) == NULL_TREE 1856 && DDR_NUM_DIR_VECTS (ddr) == 0)) 1857 return false; 1858 1859 /* If either data references is in outer loop of nest, we require 1860 no dependence here because the data reference need to be moved 1861 into inner loop during interchange. */ 1862 if (a_outer_p && b_outer_p 1863 && operand_equal_p (DR_REF (a), DR_REF (b), 0)) 1864 continue; 1865 if (DDR_ARE_DEPENDENT (ddr) != chrec_known 1866 && (a_outer_p || b_outer_p)) 1867 return false; 1868 } 1869 } 1870 1871 return true; 1872 } 1873 1874 /* Prune DATAREFS by removing any data reference not inside of LOOP. */ 1875 1876 static inline void 1877 prune_datarefs_not_in_loop (struct loop *loop, vec<data_reference_p> datarefs) 1878 { 1879 unsigned i, j; 1880 struct data_reference *dr; 1881 1882 for (i = 0, j = 0; datarefs.iterate (i, &dr); ++i) 1883 { 1884 if (flow_bb_inside_loop_p (loop, gimple_bb (DR_STMT (dr)))) 1885 datarefs[j++] = dr; 1886 else 1887 { 1888 if (dr->aux) 1889 { 1890 DR_ACCESS_STRIDE (dr)->release (); 1891 delete (vec<tree> *) dr->aux; 1892 } 1893 free_data_ref (dr); 1894 } 1895 } 1896 datarefs.truncate (j); 1897 } 1898 1899 /* Find and store data references in DATAREFS for LOOP nest. If there's 1900 difficult data reference in a basic block, we shrink the loop nest to 1901 inner loop of that basic block's father loop. On success, return the 1902 outer loop of the result loop nest. */ 1903 1904 static struct loop * 1905 prepare_data_references (struct loop *loop, vec<data_reference_p> *datarefs) 1906 { 1907 struct loop *loop_nest = loop; 1908 vec<data_reference_p> *bb_refs; 1909 basic_block bb, *bbs = get_loop_body_in_dom_order (loop); 1910 1911 for (unsigned i = 0; i < loop->num_nodes; i++) 1912 bbs[i]->aux = NULL; 1913 1914 /* Find data references for all basic blocks. Shrink loop nest on difficult 1915 data reference. */ 1916 for (unsigned i = 0; loop_nest && i < loop->num_nodes; ++i) 1917 { 1918 bb = bbs[i]; 1919 if (!flow_bb_inside_loop_p (loop_nest, bb)) 1920 continue; 1921 1922 bb_refs = new vec<data_reference_p> (); 1923 if (find_data_references_in_bb (loop, bb, bb_refs) == chrec_dont_know) 1924 { 1925 loop_nest = bb->loop_father->inner; 1926 if (loop_nest && !loop_nest->inner) 1927 loop_nest = NULL; 1928 1929 free_data_refs (*bb_refs); 1930 delete bb_refs; 1931 } 1932 else if (bb_refs->is_empty ()) 1933 delete bb_refs; 1934 else 1935 bb->aux = bb_refs; 1936 } 1937 1938 /* Collect all data references in loop nest. */ 1939 for (unsigned i = 0; i < loop->num_nodes; i++) 1940 { 1941 bb = bbs[i]; 1942 if (!bb->aux) 1943 continue; 1944 1945 bb_refs = (vec<data_reference_p> *) bb->aux; 1946 if (loop_nest && flow_bb_inside_loop_p (loop_nest, bb)) 1947 datarefs->safe_splice (*bb_refs); 1948 else 1949 free_data_refs (*bb_refs); 1950 1951 delete bb_refs; 1952 bb->aux = NULL; 1953 } 1954 free (bbs); 1955 1956 return loop_nest; 1957 } 1958 1959 /* Given innermost LOOP, return true if perfect loop nest can be found and 1960 data dependences can be computed. If succeed, record the perfect loop 1961 nest in LOOP_NEST; record all data references in DATAREFS and record all 1962 data dependence relations in DDRS. 1963 1964 We do support a restricted form of imperfect loop nest, i.e, loop nest 1965 with load/store in outer loop initializing/finalizing simple reduction 1966 of the innermost loop. For such outer loop reference, we require that 1967 it has no dependence with others sinve it will be moved to inner loop 1968 in interchange. */ 1969 1970 static bool 1971 prepare_perfect_loop_nest (struct loop *loop, vec<loop_p> *loop_nest, 1972 vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs) 1973 { 1974 struct loop *start_loop = NULL, *innermost = loop; 1975 struct loop *outermost = loops_for_fn (cfun)->tree_root; 1976 1977 /* Find loop nest from the innermost loop. The outermost is the innermost 1978 outer*/ 1979 while (loop->num != 0 && loop->inner == start_loop 1980 && flow_loop_nested_p (outermost, loop)) 1981 { 1982 if (!proper_loop_form_for_interchange (loop, &outermost)) 1983 break; 1984 1985 start_loop = loop; 1986 /* If this loop has sibling loop, the father loop won't be in perfect 1987 loop nest. */ 1988 if (loop->next != NULL) 1989 break; 1990 1991 loop = loop_outer (loop); 1992 } 1993 if (!start_loop || !start_loop->inner) 1994 return false; 1995 1996 /* Prepare the data reference vector for the loop nest, pruning outer 1997 loops we cannot handle. */ 1998 start_loop = prepare_data_references (start_loop, datarefs); 1999 if (!start_loop 2000 /* Check if there is no data reference. */ 2001 || datarefs->is_empty () 2002 /* Check if there are too many of data references. */ 2003 || (int) datarefs->length () > MAX_DATAREFS) 2004 return false; 2005 2006 /* Compute access strides for all data references, pruning outer 2007 loops we cannot analyze refs in. */ 2008 start_loop = compute_access_strides (start_loop, innermost, *datarefs); 2009 if (!start_loop) 2010 return false; 2011 2012 /* Check if any interchange is profitable in the loop nest. */ 2013 if (!should_interchange_loop_nest (start_loop, innermost, *datarefs)) 2014 return false; 2015 2016 /* Check if data dependences can be computed for loop nest starting from 2017 start_loop. */ 2018 loop = start_loop; 2019 do { 2020 loop_nest->truncate (0); 2021 2022 if (loop != start_loop) 2023 prune_datarefs_not_in_loop (start_loop, *datarefs); 2024 2025 if (find_loop_nest (start_loop, loop_nest) 2026 && tree_loop_interchange_compute_ddrs (*loop_nest, *datarefs, ddrs)) 2027 { 2028 if (dump_file && (dump_flags & TDF_DETAILS)) 2029 fprintf (dump_file, 2030 "\nConsider loop interchange for loop_nest<%d - %d>\n", 2031 start_loop->num, innermost->num); 2032 2033 if (loop != start_loop) 2034 prune_access_strides_not_in_loop (start_loop, innermost, *datarefs); 2035 2036 if (dump_file && (dump_flags & TDF_DETAILS)) 2037 dump_access_strides (*datarefs); 2038 2039 return true; 2040 } 2041 2042 free_dependence_relations (*ddrs); 2043 *ddrs = vNULL; 2044 /* Try to compute data dependences with the outermost loop stripped. */ 2045 loop = start_loop; 2046 start_loop = start_loop->inner; 2047 } while (start_loop && start_loop->inner); 2048 2049 return false; 2050 } 2051 2052 /* Main entry for loop interchange pass. */ 2053 2054 unsigned int 2055 pass_linterchange::execute (function *fun) 2056 { 2057 if (number_of_loops (fun) <= 2) 2058 return 0; 2059 2060 bool changed_p = false; 2061 struct loop *loop; 2062 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) 2063 { 2064 vec<loop_p> loop_nest = vNULL; 2065 vec<data_reference_p> datarefs = vNULL; 2066 vec<ddr_p> ddrs = vNULL; 2067 if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs)) 2068 { 2069 tree_loop_interchange loop_interchange (loop_nest); 2070 changed_p |= loop_interchange.interchange (datarefs, ddrs); 2071 } 2072 free_dependence_relations (ddrs); 2073 free_data_refs_with_aux (datarefs); 2074 loop_nest.release (); 2075 } 2076 2077 return changed_p ? (TODO_update_ssa_only_virtuals) : 0; 2078 } 2079 2080 } // anon namespace 2081 2082 gimple_opt_pass * 2083 make_pass_linterchange (gcc::context *ctxt) 2084 { 2085 return new pass_linterchange (ctxt); 2086 } 2087