1 /* Matrix layout transformations. 2 Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. 3 Contributed by Razya Ladelsky <razya@il.ibm.com> 4 Originally written by Revital Eres and Mustafa Hagog. 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free 10 Software Foundation; either version 3, or (at your option) any later 11 version. 12 13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 /* 23 Matrix flattening optimization tries to replace a N-dimensional 24 matrix with its equivalent M-dimensional matrix, where M < N. 25 This first implementation focuses on global matrices defined dynamically. 26 27 When N==1, we actually flatten the whole matrix. 28 For instance consider a two-dimensional array a [dim1] [dim2]. 29 The code for allocating space for it usually looks like: 30 31 a = (int **) malloc(dim1 * sizeof(int *)); 32 for (i=0; i<dim1; i++) 33 a[i] = (int *) malloc (dim2 * sizeof(int)); 34 35 If the array "a" is found suitable for this optimization, 36 its allocation is replaced by: 37 38 a = (int *) malloc (dim1 * dim2 *sizeof(int)); 39 40 and all the references to a[i][j] are replaced by a[i * dim2 + j]. 41 42 The two main phases of the optimization are the analysis 43 and transformation. 44 The driver of the optimization is matrix_reorg (). 45 46 47 48 Analysis phase: 49 =============== 50 51 We'll number the dimensions outside-in, meaning the most external 52 is 0, then 1, and so on. 53 The analysis part of the optimization determines K, the escape 54 level of a N-dimensional matrix (K <= N), that allows flattening of 55 the external dimensions 0,1,..., K-1. Escape level 0 means that the 56 whole matrix escapes and no flattening is possible. 57 58 The analysis part is implemented in analyze_matrix_allocation_site() 59 and analyze_matrix_accesses(). 60 61 Transformation phase: 62 ===================== 63 In this phase we define the new flattened matrices that replace the 64 original matrices in the code. 65 Implemented in transform_allocation_sites(), 66 transform_access_sites(). 67 68 Matrix Transposing 69 ================== 70 The idea of Matrix Transposing is organizing the matrix in a different 71 layout such that the dimensions are reordered. 72 This could produce better cache behavior in some cases. 73 74 For example, lets look at the matrix accesses in the following loop: 75 76 for (i=0; i<N; i++) 77 for (j=0; j<M; j++) 78 access to a[i][j] 79 80 This loop can produce good cache behavior because the elements of 81 the inner dimension are accessed sequentially. 82 83 However, if the accesses of the matrix were of the following form: 84 85 for (i=0; i<N; i++) 86 for (j=0; j<M; j++) 87 access to a[j][i] 88 89 In this loop we iterate the columns and not the rows. 90 Therefore, replacing the rows and columns 91 would have had an organization with better (cache) locality. 92 Replacing the dimensions of the matrix is called matrix transposing. 93 94 This example, of course, could be enhanced to multiple dimensions matrices 95 as well. 96 97 Since a program could include all kind of accesses, there is a decision 98 mechanism, implemented in analyze_transpose(), which implements a 99 heuristic that tries to determine whether to transpose the matrix or not, 100 according to the form of the more dominant accesses. 101 This decision is transferred to the flattening mechanism, and whether 102 the matrix was transposed or not, the matrix is flattened (if possible). 103 104 This decision making is based on profiling information and loop information. 105 If profiling information is available, decision making mechanism will be 106 operated, otherwise the matrix will only be flattened (if possible). 107 108 Both optimizations are described in the paper "Matrix flattening and 109 transposing in GCC" which was presented in GCC summit 2006. 110 http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf. */ 111 112 #include "config.h" 113 #include "system.h" 114 #include "coretypes.h" 115 #include "tm.h" 116 #include "tree.h" 117 #include "rtl.h" 118 #include "tree-inline.h" 119 #include "tree-flow.h" 120 #include "tree-flow-inline.h" 121 #include "langhooks.h" 122 #include "hashtab.h" 123 #include "flags.h" 124 #include "ggc.h" 125 #include "debug.h" 126 #include "target.h" 127 #include "cgraph.h" 128 #include "diagnostic-core.h" 129 #include "timevar.h" 130 #include "params.h" 131 #include "fibheap.h" 132 #include "intl.h" 133 #include "function.h" 134 #include "basic-block.h" 135 #include "cfgloop.h" 136 #include "tree-iterator.h" 137 #include "tree-pass.h" 138 #include "opts.h" 139 #include "tree-data-ref.h" 140 #include "tree-chrec.h" 141 #include "tree-scalar-evolution.h" 142 #include "tree-ssa-sccvn.h" 143 144 /* We need to collect a lot of data from the original malloc, 145 particularly as the gimplifier has converted: 146 147 orig_var = (struct_type *) malloc (x * sizeof (struct_type *)); 148 149 into 150 151 T3 = <constant> ; ** <constant> is amount to malloc; precomputed ** 152 T4 = malloc (T3); 153 T5 = (struct_type *) T4; 154 orig_var = T5; 155 156 The following struct fields allow us to collect all the necessary data from 157 the gimplified program. The comments in the struct below are all based 158 on the gimple example above. */ 159 160 struct malloc_call_data 161 { 162 gimple call_stmt; /* Tree for "T4 = malloc (T3);" */ 163 tree size_var; /* Var decl for T3. */ 164 tree malloc_size; /* Tree for "<constant>", the rhs assigned to T3. */ 165 }; 166 167 static tree can_calculate_expr_before_stmt (tree, sbitmap); 168 static tree can_calculate_stmt_before_stmt (gimple, sbitmap); 169 170 /* The front end of the compiler, when parsing statements of the form: 171 172 var = (type_cast) malloc (sizeof (type)); 173 174 always converts this single statement into the following statements 175 (GIMPLE form): 176 177 T.1 = sizeof (type); 178 T.2 = malloc (T.1); 179 T.3 = (type_cast) T.2; 180 var = T.3; 181 182 Since we need to create new malloc statements and modify the original 183 statements somewhat, we need to find all four of the above statements. 184 Currently record_call_1 (called for building cgraph edges) finds and 185 records the statements containing the actual call to malloc, but we 186 need to find the rest of the variables/statements on our own. That 187 is what the following function does. */ 188 static void 189 collect_data_for_malloc_call (gimple stmt, struct malloc_call_data *m_data) 190 { 191 tree size_var = NULL; 192 tree malloc_fn_decl; 193 tree arg1; 194 195 gcc_assert (is_gimple_call (stmt)); 196 197 malloc_fn_decl = gimple_call_fndecl (stmt); 198 if (malloc_fn_decl == NULL 199 || DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC) 200 return; 201 202 arg1 = gimple_call_arg (stmt, 0); 203 size_var = arg1; 204 205 m_data->call_stmt = stmt; 206 m_data->size_var = size_var; 207 if (TREE_CODE (size_var) != VAR_DECL) 208 m_data->malloc_size = size_var; 209 else 210 m_data->malloc_size = NULL_TREE; 211 } 212 213 /* Information about matrix access site. 214 For example: if an access site of matrix arr is arr[i][j] 215 the ACCESS_SITE_INFO structure will have the address 216 of arr as its stmt. The INDEX_INFO will hold information about the 217 initial address and index of each dimension. */ 218 struct access_site_info 219 { 220 /* The statement (MEM_REF or POINTER_PLUS_EXPR). */ 221 gimple stmt; 222 223 /* In case of POINTER_PLUS_EXPR, what is the offset. */ 224 tree offset; 225 226 /* The index which created the offset. */ 227 tree index; 228 229 /* The indirection level of this statement. */ 230 int level; 231 232 /* TRUE for allocation site FALSE for access site. */ 233 bool is_alloc; 234 235 /* The function containing the access site. */ 236 tree function_decl; 237 238 /* This access is iterated in the inner most loop */ 239 bool iterated_by_inner_most_loop_p; 240 }; 241 242 typedef struct access_site_info *access_site_info_p; 243 DEF_VEC_P (access_site_info_p); 244 DEF_VEC_ALLOC_P (access_site_info_p, heap); 245 246 /* Calls to free when flattening a matrix. */ 247 248 struct free_info 249 { 250 gimple stmt; 251 tree func; 252 }; 253 254 /* Information about matrix to flatten. */ 255 struct matrix_info 256 { 257 /* Decl tree of this matrix. */ 258 tree decl; 259 /* Number of dimensions; number 260 of "*" in the type declaration. */ 261 int num_dims; 262 263 /* Minimum indirection level that escapes, 0 means that 264 the whole matrix escapes, k means that dimensions 265 0 to ACTUAL_DIM - k escapes. */ 266 int min_indirect_level_escape; 267 268 gimple min_indirect_level_escape_stmt; 269 270 /* Hold the allocation site for each level (dimension). 271 We can use NUM_DIMS as the upper bound and allocate the array 272 once with this number of elements and no need to use realloc and 273 MAX_MALLOCED_LEVEL. */ 274 gimple *malloc_for_level; 275 276 int max_malloced_level; 277 278 /* Is the matrix transposed. */ 279 bool is_transposed_p; 280 281 /* The location of the allocation sites (they must be in one 282 function). */ 283 tree allocation_function_decl; 284 285 /* The calls to free for each level of indirection. */ 286 struct free_info *free_stmts; 287 288 /* An array which holds for each dimension its size. where 289 dimension 0 is the outer most (one that contains all the others). 290 */ 291 tree *dimension_size; 292 293 /* An array which holds for each dimension it's original size 294 (before transposing and flattening take place). */ 295 tree *dimension_size_orig; 296 297 /* An array which holds for each dimension the size of the type of 298 of elements accessed in that level (in bytes). */ 299 HOST_WIDE_INT *dimension_type_size; 300 301 int dimension_type_size_len; 302 303 /* An array collecting the count of accesses for each dimension. */ 304 gcov_type *dim_hot_level; 305 306 /* An array of the accesses to be flattened. 307 elements are of type "struct access_site_info *". */ 308 VEC (access_site_info_p, heap) * access_l; 309 310 /* A map of how the dimensions will be organized at the end of 311 the analyses. */ 312 int *dim_map; 313 }; 314 315 /* In each phi node we want to record the indirection level we have when we 316 get to the phi node. Usually we will have phi nodes with more than two 317 arguments, then we must assure that all of them get to the phi node with 318 the same indirection level, otherwise it's not safe to do the flattening. 319 So we record the information regarding the indirection level each time we 320 get to the phi node in this hash table. */ 321 322 struct matrix_access_phi_node 323 { 324 gimple phi; 325 int indirection_level; 326 }; 327 328 /* We use this structure to find if the SSA variable is accessed inside the 329 tree and record the tree containing it. */ 330 331 struct ssa_acc_in_tree 332 { 333 /* The variable whose accesses in the tree we are looking for. */ 334 tree ssa_var; 335 /* The tree and code inside it the ssa_var is accessed, currently 336 it could be an MEM_REF or CALL_EXPR. */ 337 enum tree_code t_code; 338 tree t_tree; 339 /* The place in the containing tree. */ 340 tree *tp; 341 tree second_op; 342 bool var_found; 343 }; 344 345 static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool, 346 sbitmap, bool); 347 static int transform_allocation_sites (void **, void *); 348 static int transform_access_sites (void **, void *); 349 static int analyze_transpose (void **, void *); 350 static int dump_matrix_reorg_analysis (void **, void *); 351 352 static bool check_transpose_p; 353 354 /* Hash function used for the phi nodes. */ 355 356 static hashval_t 357 mat_acc_phi_hash (const void *p) 358 { 359 const struct matrix_access_phi_node *const ma_phi = 360 (const struct matrix_access_phi_node *) p; 361 362 return htab_hash_pointer (ma_phi->phi); 363 } 364 365 /* Equality means phi node pointers are the same. */ 366 367 static int 368 mat_acc_phi_eq (const void *p1, const void *p2) 369 { 370 const struct matrix_access_phi_node *const phi1 = 371 (const struct matrix_access_phi_node *) p1; 372 const struct matrix_access_phi_node *const phi2 = 373 (const struct matrix_access_phi_node *) p2; 374 375 if (phi1->phi == phi2->phi) 376 return 1; 377 378 return 0; 379 } 380 381 /* Hold the PHI nodes we visit during the traversal for escaping 382 analysis. */ 383 static htab_t htab_mat_acc_phi_nodes = NULL; 384 385 /* This hash-table holds the information about the matrices we are 386 going to handle. */ 387 static htab_t matrices_to_reorg = NULL; 388 389 /* Return a hash for MTT, which is really a "matrix_info *". */ 390 static hashval_t 391 mtt_info_hash (const void *mtt) 392 { 393 return htab_hash_pointer (((const struct matrix_info *) mtt)->decl); 394 } 395 396 /* Return true if MTT1 and MTT2 (which are really both of type 397 "matrix_info *") refer to the same decl. */ 398 static int 399 mtt_info_eq (const void *mtt1, const void *mtt2) 400 { 401 const struct matrix_info *const i1 = (const struct matrix_info *) mtt1; 402 const struct matrix_info *const i2 = (const struct matrix_info *) mtt2; 403 404 if (i1->decl == i2->decl) 405 return true; 406 407 return false; 408 } 409 410 /* Return false if STMT may contain a vector expression. 411 In this situation, all matrices should not be flattened. */ 412 static bool 413 may_flatten_matrices_1 (gimple stmt) 414 { 415 switch (gimple_code (stmt)) 416 { 417 case GIMPLE_ASSIGN: 418 case GIMPLE_CALL: 419 if (!gimple_has_lhs (stmt)) 420 return true; 421 if (TREE_CODE (TREE_TYPE (gimple_get_lhs (stmt))) == VECTOR_TYPE) 422 { 423 if (dump_file) 424 fprintf (dump_file, 425 "Found vector type, don't flatten matrix\n"); 426 return false; 427 } 428 break; 429 case GIMPLE_ASM: 430 /* Asm code could contain vector operations. */ 431 return false; 432 break; 433 default: 434 break; 435 } 436 return true; 437 } 438 439 /* Return false if there are hand-written vectors in the program. 440 We disable the flattening in such a case. */ 441 static bool 442 may_flatten_matrices (struct cgraph_node *node) 443 { 444 tree decl; 445 struct function *func; 446 basic_block bb; 447 gimple_stmt_iterator gsi; 448 449 decl = node->decl; 450 if (node->analyzed) 451 { 452 func = DECL_STRUCT_FUNCTION (decl); 453 FOR_EACH_BB_FN (bb, func) 454 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 455 if (!may_flatten_matrices_1 (gsi_stmt (gsi))) 456 return false; 457 } 458 return true; 459 } 460 461 /* Given a VAR_DECL, check its type to determine whether it is 462 a definition of a dynamic allocated matrix and therefore is 463 a suitable candidate for the matrix flattening optimization. 464 Return NULL if VAR_DECL is not such decl. Otherwise, allocate 465 a MATRIX_INFO structure, fill it with the relevant information 466 and return a pointer to it. 467 TODO: handle also statically defined arrays. */ 468 static struct matrix_info * 469 analyze_matrix_decl (tree var_decl) 470 { 471 struct matrix_info *m_node, tmpmi, *mi; 472 tree var_type; 473 int dim_num = 0; 474 475 gcc_assert (matrices_to_reorg); 476 477 if (TREE_CODE (var_decl) == PARM_DECL) 478 var_type = DECL_ARG_TYPE (var_decl); 479 else if (TREE_CODE (var_decl) == VAR_DECL) 480 var_type = TREE_TYPE (var_decl); 481 else 482 return NULL; 483 484 if (!POINTER_TYPE_P (var_type)) 485 return NULL; 486 487 while (POINTER_TYPE_P (var_type)) 488 { 489 var_type = TREE_TYPE (var_type); 490 dim_num++; 491 } 492 493 if (dim_num <= 1) 494 return NULL; 495 496 if (!COMPLETE_TYPE_P (var_type) 497 || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST) 498 return NULL; 499 500 /* Check to see if this pointer is already in there. */ 501 tmpmi.decl = var_decl; 502 mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi); 503 504 if (mi) 505 return NULL; 506 507 /* Record the matrix. */ 508 509 m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info)); 510 m_node->decl = var_decl; 511 m_node->num_dims = dim_num; 512 m_node->free_stmts 513 = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info)); 514 515 /* Init min_indirect_level_escape to -1 to indicate that no escape 516 analysis has been done yet. */ 517 m_node->min_indirect_level_escape = -1; 518 m_node->is_transposed_p = false; 519 520 return m_node; 521 } 522 523 /* Free matrix E. */ 524 static void 525 mat_free (void *e) 526 { 527 struct matrix_info *mat = (struct matrix_info *) e; 528 529 if (!mat) 530 return; 531 532 free (mat->free_stmts); 533 free (mat->dim_hot_level); 534 free (mat->malloc_for_level); 535 } 536 537 /* Find all potential matrices. 538 TODO: currently we handle only multidimensional 539 dynamically allocated arrays. */ 540 static void 541 find_matrices_decl (void) 542 { 543 struct matrix_info *tmp; 544 PTR *slot; 545 struct varpool_node *vnode; 546 547 gcc_assert (matrices_to_reorg); 548 549 /* For every global variable in the program: 550 Check to see if it's of a candidate type and record it. */ 551 for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed) 552 { 553 tree var_decl = vnode->decl; 554 555 if (!var_decl || TREE_CODE (var_decl) != VAR_DECL) 556 continue; 557 558 if (matrices_to_reorg) 559 if ((tmp = analyze_matrix_decl (var_decl))) 560 { 561 if (!TREE_ADDRESSABLE (var_decl)) 562 { 563 slot = htab_find_slot (matrices_to_reorg, tmp, INSERT); 564 *slot = tmp; 565 } 566 } 567 } 568 return; 569 } 570 571 /* Mark that the matrix MI escapes at level L. */ 572 static void 573 mark_min_matrix_escape_level (struct matrix_info *mi, int l, gimple s) 574 { 575 if (mi->min_indirect_level_escape == -1 576 || (mi->min_indirect_level_escape > l)) 577 { 578 mi->min_indirect_level_escape = l; 579 mi->min_indirect_level_escape_stmt = s; 580 } 581 } 582 583 /* Find if the SSA variable is accessed inside the 584 tree and record the tree containing it. 585 The only relevant uses are the case of SSA_NAME, or SSA inside 586 MEM_REF, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR. */ 587 static void 588 ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a) 589 { 590 a->t_code = TREE_CODE (t); 591 switch (a->t_code) 592 { 593 case SSA_NAME: 594 if (t == a->ssa_var) 595 a->var_found = true; 596 break; 597 case MEM_REF: 598 if (SSA_VAR_P (TREE_OPERAND (t, 0)) 599 && TREE_OPERAND (t, 0) == a->ssa_var) 600 a->var_found = true; 601 break; 602 default: 603 break; 604 } 605 } 606 607 /* Find if the SSA variable is accessed on the right hand side of 608 gimple call STMT. */ 609 610 static void 611 ssa_accessed_in_call_rhs (gimple stmt, struct ssa_acc_in_tree *a) 612 { 613 tree decl; 614 tree arg; 615 size_t i; 616 617 a->t_code = CALL_EXPR; 618 for (i = 0; i < gimple_call_num_args (stmt); i++) 619 { 620 arg = gimple_call_arg (stmt, i); 621 if (arg == a->ssa_var) 622 { 623 a->var_found = true; 624 decl = gimple_call_fndecl (stmt); 625 a->t_tree = decl; 626 break; 627 } 628 } 629 } 630 631 /* Find if the SSA variable is accessed on the right hand side of 632 gimple assign STMT. */ 633 634 static void 635 ssa_accessed_in_assign_rhs (gimple stmt, struct ssa_acc_in_tree *a) 636 { 637 638 a->t_code = gimple_assign_rhs_code (stmt); 639 switch (a->t_code) 640 { 641 tree op1, op2; 642 643 case SSA_NAME: 644 case MEM_REF: 645 CASE_CONVERT: 646 case VIEW_CONVERT_EXPR: 647 ssa_accessed_in_tree (gimple_assign_rhs1 (stmt), a); 648 break; 649 case POINTER_PLUS_EXPR: 650 case PLUS_EXPR: 651 case MULT_EXPR: 652 op1 = gimple_assign_rhs1 (stmt); 653 op2 = gimple_assign_rhs2 (stmt); 654 655 if (op1 == a->ssa_var) 656 { 657 a->var_found = true; 658 a->second_op = op2; 659 } 660 else if (op2 == a->ssa_var) 661 { 662 a->var_found = true; 663 a->second_op = op1; 664 } 665 break; 666 default: 667 break; 668 } 669 } 670 671 /* Record the access/allocation site information for matrix MI so we can 672 handle it later in transformation. */ 673 static void 674 record_access_alloc_site_info (struct matrix_info *mi, gimple stmt, tree offset, 675 tree index, int level, bool is_alloc) 676 { 677 struct access_site_info *acc_info; 678 679 if (!mi->access_l) 680 mi->access_l = VEC_alloc (access_site_info_p, heap, 100); 681 682 acc_info 683 = (struct access_site_info *) 684 xcalloc (1, sizeof (struct access_site_info)); 685 acc_info->stmt = stmt; 686 acc_info->offset = offset; 687 acc_info->index = index; 688 acc_info->function_decl = current_function_decl; 689 acc_info->level = level; 690 acc_info->is_alloc = is_alloc; 691 692 VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info); 693 694 } 695 696 /* Record the malloc as the allocation site of the given LEVEL. But 697 first we Make sure that all the size parameters passed to malloc in 698 all the allocation sites could be pre-calculated before the call to 699 the malloc of level 0 (the main malloc call). */ 700 static void 701 add_allocation_site (struct matrix_info *mi, gimple stmt, int level) 702 { 703 struct malloc_call_data mcd; 704 705 /* Make sure that the allocation sites are in the same function. */ 706 if (!mi->allocation_function_decl) 707 mi->allocation_function_decl = current_function_decl; 708 else if (mi->allocation_function_decl != current_function_decl) 709 { 710 int min_malloc_level; 711 712 gcc_assert (mi->malloc_for_level); 713 714 /* Find the minimum malloc level that already has been seen; 715 we known its allocation function must be 716 MI->allocation_function_decl since it's different than 717 CURRENT_FUNCTION_DECL then the escaping level should be 718 MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function 719 must be set accordingly. */ 720 for (min_malloc_level = 0; 721 min_malloc_level < mi->max_malloced_level 722 && mi->malloc_for_level[min_malloc_level]; min_malloc_level++) 723 ; 724 if (level < min_malloc_level) 725 { 726 mi->allocation_function_decl = current_function_decl; 727 mark_min_matrix_escape_level (mi, min_malloc_level, stmt); 728 } 729 else 730 { 731 mark_min_matrix_escape_level (mi, level, stmt); 732 /* cannot be that (level == min_malloc_level) 733 we would have returned earlier. */ 734 return; 735 } 736 } 737 738 /* Find the correct malloc information. */ 739 collect_data_for_malloc_call (stmt, &mcd); 740 741 /* We accept only calls to malloc function; we do not accept 742 calls like calloc and realloc. */ 743 if (!mi->malloc_for_level) 744 { 745 mi->malloc_for_level = XCNEWVEC (gimple, level + 1); 746 mi->max_malloced_level = level + 1; 747 } 748 else if (mi->max_malloced_level <= level) 749 { 750 mi->malloc_for_level 751 = XRESIZEVEC (gimple, mi->malloc_for_level, level + 1); 752 753 /* Zero the newly allocated items. */ 754 memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]), 755 0, (level - mi->max_malloced_level) * sizeof (tree)); 756 757 mi->max_malloced_level = level + 1; 758 } 759 mi->malloc_for_level[level] = stmt; 760 } 761 762 /* Given an assignment statement STMT that we know that its 763 left-hand-side is the matrix MI variable, we traverse the immediate 764 uses backwards until we get to a malloc site. We make sure that 765 there is one and only one malloc site that sets this variable. When 766 we are performing the flattening we generate a new variable that 767 will hold the size for each dimension; each malloc that allocates a 768 dimension has the size parameter; we use that parameter to 769 initialize the dimension size variable so we can use it later in 770 the address calculations. LEVEL is the dimension we're inspecting. 771 Return if STMT is related to an allocation site. */ 772 773 static void 774 analyze_matrix_allocation_site (struct matrix_info *mi, gimple stmt, 775 int level, sbitmap visited) 776 { 777 if (gimple_assign_copy_p (stmt) || gimple_assign_cast_p (stmt)) 778 { 779 tree rhs = gimple_assign_rhs1 (stmt); 780 781 if (TREE_CODE (rhs) == SSA_NAME) 782 { 783 gimple def = SSA_NAME_DEF_STMT (rhs); 784 785 analyze_matrix_allocation_site (mi, def, level, visited); 786 return; 787 } 788 /* If we are back to the original matrix variable then we 789 are sure that this is analyzed as an access site. */ 790 else if (rhs == mi->decl) 791 return; 792 } 793 /* A result of call to malloc. */ 794 else if (is_gimple_call (stmt)) 795 { 796 int call_flags = gimple_call_flags (stmt); 797 798 if (!(call_flags & ECF_MALLOC)) 799 { 800 mark_min_matrix_escape_level (mi, level, stmt); 801 return; 802 } 803 else 804 { 805 tree malloc_fn_decl; 806 807 malloc_fn_decl = gimple_call_fndecl (stmt); 808 if (malloc_fn_decl == NULL_TREE) 809 { 810 mark_min_matrix_escape_level (mi, level, stmt); 811 return; 812 } 813 if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC) 814 { 815 if (dump_file) 816 fprintf (dump_file, 817 "Matrix %s is an argument to function %s\n", 818 get_name (mi->decl), get_name (malloc_fn_decl)); 819 mark_min_matrix_escape_level (mi, level, stmt); 820 return; 821 } 822 } 823 /* This is a call to malloc of level 'level'. 824 mi->max_malloced_level-1 == level means that we've 825 seen a malloc statement of level 'level' before. 826 If the statement is not the same one that we've 827 seen before, then there's another malloc statement 828 for the same level, which means that we need to mark 829 it escaping. */ 830 if (mi->malloc_for_level 831 && mi->max_malloced_level-1 == level 832 && mi->malloc_for_level[level] != stmt) 833 { 834 mark_min_matrix_escape_level (mi, level, stmt); 835 return; 836 } 837 else 838 add_allocation_site (mi, stmt, level); 839 return; 840 } 841 /* Looks like we don't know what is happening in this 842 statement so be in the safe side and mark it as escaping. */ 843 mark_min_matrix_escape_level (mi, level, stmt); 844 } 845 846 /* The transposing decision making. 847 In order to calculate the profitability of transposing, we collect two 848 types of information regarding the accesses: 849 1. profiling information used to express the hotness of an access, that 850 is how often the matrix is accessed by this access site (count of the 851 access site). 852 2. which dimension in the access site is iterated by the inner 853 most loop containing this access. 854 855 The matrix will have a calculated value of weighted hotness for each 856 dimension. 857 Intuitively the hotness level of a dimension is a function of how 858 many times it was the most frequently accessed dimension in the 859 highly executed access sites of this matrix. 860 861 As computed by following equation: 862 m n 863 __ __ 864 \ \ dim_hot_level[i] += 865 /_ /_ 866 j i 867 acc[j]->dim[i]->iter_by_inner_loop * count(j) 868 869 Where n is the number of dims and m is the number of the matrix 870 access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j] 871 iterates over dim[i] in innermost loop, and is 0 otherwise. 872 873 The organization of the new matrix should be according to the 874 hotness of each dimension. The hotness of the dimension implies 875 the locality of the elements.*/ 876 static int 877 analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED) 878 { 879 struct matrix_info *mi = (struct matrix_info *) *slot; 880 int min_escape_l = mi->min_indirect_level_escape; 881 struct loop *loop; 882 affine_iv iv; 883 struct access_site_info *acc_info; 884 int i; 885 886 if (min_escape_l < 2 || !mi->access_l) 887 { 888 if (mi->access_l) 889 { 890 FOR_EACH_VEC_ELT (access_site_info_p, mi->access_l, i, acc_info) 891 free (acc_info); 892 VEC_free (access_site_info_p, heap, mi->access_l); 893 894 } 895 return 1; 896 } 897 if (!mi->dim_hot_level) 898 mi->dim_hot_level = 899 (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type)); 900 901 902 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info); 903 i++) 904 { 905 if (gimple_assign_rhs_code (acc_info->stmt) == POINTER_PLUS_EXPR 906 && acc_info->level < min_escape_l) 907 { 908 loop = loop_containing_stmt (acc_info->stmt); 909 if (!loop || loop->inner) 910 { 911 free (acc_info); 912 continue; 913 } 914 if (simple_iv (loop, loop, acc_info->offset, &iv, true)) 915 { 916 if (iv.step != NULL) 917 { 918 HOST_WIDE_INT istep; 919 920 istep = int_cst_value (iv.step); 921 if (istep != 0) 922 { 923 acc_info->iterated_by_inner_most_loop_p = 1; 924 mi->dim_hot_level[acc_info->level] += 925 gimple_bb (acc_info->stmt)->count; 926 } 927 928 } 929 } 930 } 931 free (acc_info); 932 } 933 VEC_free (access_site_info_p, heap, mi->access_l); 934 935 return 1; 936 } 937 938 /* Find the index which defines the OFFSET from base. 939 We walk from use to def until we find how the offset was defined. */ 940 static tree 941 get_index_from_offset (tree offset, gimple def_stmt) 942 { 943 tree op1, op2, index; 944 945 if (gimple_code (def_stmt) == GIMPLE_PHI) 946 return NULL; 947 if ((gimple_assign_copy_p (def_stmt) || gimple_assign_cast_p (def_stmt)) 948 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME) 949 return get_index_from_offset (offset, 950 SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt))); 951 else if (is_gimple_assign (def_stmt) 952 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR) 953 { 954 op1 = gimple_assign_rhs1 (def_stmt); 955 op2 = gimple_assign_rhs2 (def_stmt); 956 if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST) 957 return NULL; 958 index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1; 959 return index; 960 } 961 else 962 return NULL_TREE; 963 } 964 965 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size 966 of the type related to the SSA_VAR, or the type related to the 967 lhs of STMT, in the case that it is an MEM_REF. */ 968 static void 969 update_type_size (struct matrix_info *mi, gimple stmt, tree ssa_var, 970 int current_indirect_level) 971 { 972 tree lhs; 973 HOST_WIDE_INT type_size; 974 975 /* Update type according to the type of the MEM_REF expr. */ 976 if (is_gimple_assign (stmt) 977 && TREE_CODE (gimple_assign_lhs (stmt)) == MEM_REF) 978 { 979 lhs = gimple_assign_lhs (stmt); 980 gcc_assert (POINTER_TYPE_P 981 (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0))))); 982 type_size = 983 int_size_in_bytes (TREE_TYPE 984 (TREE_TYPE 985 (SSA_NAME_VAR (TREE_OPERAND (lhs, 0))))); 986 } 987 else 988 type_size = int_size_in_bytes (TREE_TYPE (ssa_var)); 989 990 /* Record the size of elements accessed (as a whole) 991 in the current indirection level (dimension). If the size of 992 elements is not known at compile time, mark it as escaping. */ 993 if (type_size <= 0) 994 mark_min_matrix_escape_level (mi, current_indirect_level, stmt); 995 else 996 { 997 int l = current_indirect_level; 998 999 if (!mi->dimension_type_size) 1000 { 1001 mi->dimension_type_size 1002 = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT)); 1003 mi->dimension_type_size_len = l + 1; 1004 } 1005 else if (mi->dimension_type_size_len < l + 1) 1006 { 1007 mi->dimension_type_size 1008 = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size, 1009 (l + 1) * sizeof (HOST_WIDE_INT)); 1010 memset (&mi->dimension_type_size[mi->dimension_type_size_len], 1011 0, (l + 1 - mi->dimension_type_size_len) 1012 * sizeof (HOST_WIDE_INT)); 1013 mi->dimension_type_size_len = l + 1; 1014 } 1015 /* Make sure all the accesses in the same level have the same size 1016 of the type. */ 1017 if (!mi->dimension_type_size[l]) 1018 mi->dimension_type_size[l] = type_size; 1019 else if (mi->dimension_type_size[l] != type_size) 1020 mark_min_matrix_escape_level (mi, l, stmt); 1021 } 1022 } 1023 1024 /* USE_STMT represents a GIMPLE_CALL, where one of the arguments is the 1025 ssa var that we want to check because it came from some use of matrix 1026 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so 1027 far. */ 1028 1029 static int 1030 analyze_accesses_for_call_stmt (struct matrix_info *mi, tree ssa_var, 1031 gimple use_stmt, int current_indirect_level) 1032 { 1033 tree fndecl = gimple_call_fndecl (use_stmt); 1034 1035 if (gimple_call_lhs (use_stmt)) 1036 { 1037 tree lhs = gimple_call_lhs (use_stmt); 1038 struct ssa_acc_in_tree lhs_acc, rhs_acc; 1039 1040 memset (&lhs_acc, 0, sizeof (lhs_acc)); 1041 memset (&rhs_acc, 0, sizeof (rhs_acc)); 1042 1043 lhs_acc.ssa_var = ssa_var; 1044 lhs_acc.t_code = ERROR_MARK; 1045 ssa_accessed_in_tree (lhs, &lhs_acc); 1046 rhs_acc.ssa_var = ssa_var; 1047 rhs_acc.t_code = ERROR_MARK; 1048 ssa_accessed_in_call_rhs (use_stmt, &rhs_acc); 1049 1050 /* The SSA must be either in the left side or in the right side, 1051 to understand what is happening. 1052 In case the SSA_NAME is found in both sides we should be escaping 1053 at this level because in this case we cannot calculate the 1054 address correctly. */ 1055 if ((lhs_acc.var_found && rhs_acc.var_found 1056 && lhs_acc.t_code == MEM_REF) 1057 || (!rhs_acc.var_found && !lhs_acc.var_found)) 1058 { 1059 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt); 1060 return current_indirect_level; 1061 } 1062 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found); 1063 1064 /* If we are storing to the matrix at some level, then mark it as 1065 escaping at that level. */ 1066 if (lhs_acc.var_found) 1067 { 1068 int l = current_indirect_level + 1; 1069 1070 gcc_assert (lhs_acc.t_code == MEM_REF); 1071 mark_min_matrix_escape_level (mi, l, use_stmt); 1072 return current_indirect_level; 1073 } 1074 } 1075 1076 if (fndecl) 1077 { 1078 if (DECL_FUNCTION_CODE (fndecl) != BUILT_IN_FREE) 1079 { 1080 if (dump_file) 1081 fprintf (dump_file, 1082 "Matrix %s: Function call %s, level %d escapes.\n", 1083 get_name (mi->decl), get_name (fndecl), 1084 current_indirect_level); 1085 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt); 1086 } 1087 else if (mi->free_stmts[current_indirect_level].stmt != NULL 1088 && mi->free_stmts[current_indirect_level].stmt != use_stmt) 1089 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt); 1090 else 1091 { 1092 /*Record the free statements so we can delete them 1093 later. */ 1094 int l = current_indirect_level; 1095 1096 mi->free_stmts[l].stmt = use_stmt; 1097 mi->free_stmts[l].func = current_function_decl; 1098 } 1099 } 1100 return current_indirect_level; 1101 } 1102 1103 /* USE_STMT represents a phi node of the ssa var that we want to 1104 check because it came from some use of matrix 1105 MI. 1106 We check all the escaping levels that get to the PHI node 1107 and make sure they are all the same escaping; 1108 if not (which is rare) we let the escaping level be the 1109 minimum level that gets into that PHI because starting from 1110 that level we cannot expect the behavior of the indirections. 1111 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */ 1112 1113 static void 1114 analyze_accesses_for_phi_node (struct matrix_info *mi, gimple use_stmt, 1115 int current_indirect_level, sbitmap visited, 1116 bool record_accesses) 1117 { 1118 1119 struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi; 1120 1121 tmp_maphi.phi = use_stmt; 1122 if ((maphi = (struct matrix_access_phi_node *) 1123 htab_find (htab_mat_acc_phi_nodes, &tmp_maphi))) 1124 { 1125 if (maphi->indirection_level == current_indirect_level) 1126 return; 1127 else 1128 { 1129 int level = MIN (maphi->indirection_level, 1130 current_indirect_level); 1131 size_t j; 1132 gimple stmt = NULL; 1133 1134 maphi->indirection_level = level; 1135 for (j = 0; j < gimple_phi_num_args (use_stmt); j++) 1136 { 1137 tree def = PHI_ARG_DEF (use_stmt, j); 1138 1139 if (gimple_code (SSA_NAME_DEF_STMT (def)) != GIMPLE_PHI) 1140 stmt = SSA_NAME_DEF_STMT (def); 1141 } 1142 mark_min_matrix_escape_level (mi, level, stmt); 1143 } 1144 return; 1145 } 1146 maphi = (struct matrix_access_phi_node *) 1147 xcalloc (1, sizeof (struct matrix_access_phi_node)); 1148 maphi->phi = use_stmt; 1149 maphi->indirection_level = current_indirect_level; 1150 1151 /* Insert to hash table. */ 1152 pmaphi = (struct matrix_access_phi_node **) 1153 htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT); 1154 gcc_assert (pmaphi); 1155 *pmaphi = maphi; 1156 1157 if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)))) 1158 { 1159 SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))); 1160 analyze_matrix_accesses (mi, PHI_RESULT (use_stmt), 1161 current_indirect_level, false, visited, 1162 record_accesses); 1163 RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))); 1164 } 1165 } 1166 1167 /* USE_STMT represents an assign statement (the rhs or lhs include 1168 the ssa var that we want to check because it came from some use of matrix 1169 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */ 1170 1171 static int 1172 analyze_accesses_for_assign_stmt (struct matrix_info *mi, tree ssa_var, 1173 gimple use_stmt, int current_indirect_level, 1174 bool last_op, sbitmap visited, 1175 bool record_accesses) 1176 { 1177 tree lhs = gimple_get_lhs (use_stmt); 1178 struct ssa_acc_in_tree lhs_acc, rhs_acc; 1179 1180 memset (&lhs_acc, 0, sizeof (lhs_acc)); 1181 memset (&rhs_acc, 0, sizeof (rhs_acc)); 1182 1183 lhs_acc.ssa_var = ssa_var; 1184 lhs_acc.t_code = ERROR_MARK; 1185 ssa_accessed_in_tree (lhs, &lhs_acc); 1186 rhs_acc.ssa_var = ssa_var; 1187 rhs_acc.t_code = ERROR_MARK; 1188 ssa_accessed_in_assign_rhs (use_stmt, &rhs_acc); 1189 1190 /* The SSA must be either in the left side or in the right side, 1191 to understand what is happening. 1192 In case the SSA_NAME is found in both sides we should be escaping 1193 at this level because in this case we cannot calculate the 1194 address correctly. */ 1195 if ((lhs_acc.var_found && rhs_acc.var_found 1196 && lhs_acc.t_code == MEM_REF) 1197 || (!rhs_acc.var_found && !lhs_acc.var_found)) 1198 { 1199 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt); 1200 return current_indirect_level; 1201 } 1202 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found); 1203 1204 /* If we are storing to the matrix at some level, then mark it as 1205 escaping at that level. */ 1206 if (lhs_acc.var_found) 1207 { 1208 int l = current_indirect_level + 1; 1209 1210 gcc_assert (lhs_acc.t_code == MEM_REF); 1211 1212 if (!(gimple_assign_copy_p (use_stmt) 1213 || gimple_assign_cast_p (use_stmt)) 1214 || (TREE_CODE (gimple_assign_rhs1 (use_stmt)) != SSA_NAME)) 1215 mark_min_matrix_escape_level (mi, l, use_stmt); 1216 else 1217 { 1218 gimple def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (use_stmt)); 1219 analyze_matrix_allocation_site (mi, def_stmt, l, visited); 1220 if (record_accesses) 1221 record_access_alloc_site_info (mi, use_stmt, NULL_TREE, 1222 NULL_TREE, l, true); 1223 update_type_size (mi, use_stmt, NULL, l); 1224 } 1225 return current_indirect_level; 1226 } 1227 /* Now, check the right-hand-side, to see how the SSA variable 1228 is used. */ 1229 if (rhs_acc.var_found) 1230 { 1231 if (rhs_acc.t_code != MEM_REF 1232 && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME) 1233 { 1234 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt); 1235 return current_indirect_level; 1236 } 1237 /* If the access in the RHS has an indirection increase the 1238 indirection level. */ 1239 if (rhs_acc.t_code == MEM_REF) 1240 { 1241 if (record_accesses) 1242 record_access_alloc_site_info (mi, use_stmt, NULL_TREE, 1243 NULL_TREE, 1244 current_indirect_level, true); 1245 current_indirect_level += 1; 1246 } 1247 else if (rhs_acc.t_code == POINTER_PLUS_EXPR) 1248 { 1249 gcc_assert (rhs_acc.second_op); 1250 if (last_op) 1251 /* Currently we support only one PLUS expression on the 1252 SSA_NAME that holds the base address of the current 1253 indirection level; to support more general case there 1254 is a need to hold a stack of expressions and regenerate 1255 the calculation later. */ 1256 mark_min_matrix_escape_level (mi, current_indirect_level, 1257 use_stmt); 1258 else 1259 { 1260 tree index; 1261 tree op1, op2; 1262 1263 op1 = gimple_assign_rhs1 (use_stmt); 1264 op2 = gimple_assign_rhs2 (use_stmt); 1265 1266 op2 = (op1 == ssa_var) ? op2 : op1; 1267 if (TREE_CODE (op2) == INTEGER_CST) 1268 index = 1269 build_int_cst (TREE_TYPE (op1), 1270 TREE_INT_CST_LOW (op2) / 1271 int_size_in_bytes (TREE_TYPE (op1))); 1272 else 1273 { 1274 index = 1275 get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2)); 1276 if (index == NULL_TREE) 1277 { 1278 mark_min_matrix_escape_level (mi, 1279 current_indirect_level, 1280 use_stmt); 1281 return current_indirect_level; 1282 } 1283 } 1284 if (record_accesses) 1285 record_access_alloc_site_info (mi, use_stmt, op2, 1286 index, 1287 current_indirect_level, false); 1288 } 1289 } 1290 /* If we are storing this level of indirection mark it as 1291 escaping. */ 1292 if (lhs_acc.t_code == MEM_REF || TREE_CODE (lhs) != SSA_NAME) 1293 { 1294 int l = current_indirect_level; 1295 1296 /* One exception is when we are storing to the matrix 1297 variable itself; this is the case of malloc, we must make 1298 sure that it's the one and only one call to malloc so 1299 we call analyze_matrix_allocation_site to check 1300 this out. */ 1301 if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl) 1302 mark_min_matrix_escape_level (mi, current_indirect_level, 1303 use_stmt); 1304 else 1305 { 1306 /* Also update the escaping level. */ 1307 analyze_matrix_allocation_site (mi, use_stmt, l, visited); 1308 if (record_accesses) 1309 record_access_alloc_site_info (mi, use_stmt, NULL_TREE, 1310 NULL_TREE, l, true); 1311 } 1312 } 1313 else 1314 { 1315 /* We are placing it in an SSA, follow that SSA. */ 1316 analyze_matrix_accesses (mi, lhs, 1317 current_indirect_level, 1318 rhs_acc.t_code == POINTER_PLUS_EXPR, 1319 visited, record_accesses); 1320 } 1321 } 1322 return current_indirect_level; 1323 } 1324 1325 /* Given a SSA_VAR (coming from a use statement of the matrix MI), 1326 follow its uses and level of indirection and find out the minimum 1327 indirection level it escapes in (the highest dimension) and the maximum 1328 level it is accessed in (this will be the actual dimension of the 1329 matrix). The information is accumulated in MI. 1330 We look at the immediate uses, if one escapes we finish; if not, 1331 we make a recursive call for each one of the immediate uses of the 1332 resulting SSA name. */ 1333 static void 1334 analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var, 1335 int current_indirect_level, bool last_op, 1336 sbitmap visited, bool record_accesses) 1337 { 1338 imm_use_iterator imm_iter; 1339 use_operand_p use_p; 1340 1341 update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var, 1342 current_indirect_level); 1343 1344 /* We don't go beyond the escaping level when we are performing the 1345 flattening. NOTE: we keep the last indirection level that doesn't 1346 escape. */ 1347 if (mi->min_indirect_level_escape > -1 1348 && mi->min_indirect_level_escape <= current_indirect_level) 1349 return; 1350 1351 /* Now go over the uses of the SSA_NAME and check how it is used in 1352 each one of them. We are mainly looking for the pattern MEM_REF, 1353 then a POINTER_PLUS_EXPR, then MEM_REF etc. while in between there could 1354 be any number of copies and casts. */ 1355 gcc_assert (TREE_CODE (ssa_var) == SSA_NAME); 1356 1357 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var) 1358 { 1359 gimple use_stmt = USE_STMT (use_p); 1360 if (gimple_code (use_stmt) == GIMPLE_PHI) 1361 /* We check all the escaping levels that get to the PHI node 1362 and make sure they are all the same escaping; 1363 if not (which is rare) we let the escaping level be the 1364 minimum level that gets into that PHI because starting from 1365 that level we cannot expect the behavior of the indirections. */ 1366 1367 analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level, 1368 visited, record_accesses); 1369 1370 else if (is_gimple_call (use_stmt)) 1371 analyze_accesses_for_call_stmt (mi, ssa_var, use_stmt, 1372 current_indirect_level); 1373 else if (is_gimple_assign (use_stmt)) 1374 current_indirect_level = 1375 analyze_accesses_for_assign_stmt (mi, ssa_var, use_stmt, 1376 current_indirect_level, last_op, 1377 visited, record_accesses); 1378 } 1379 } 1380 1381 typedef struct 1382 { 1383 tree fn; 1384 gimple stmt; 1385 } check_var_data; 1386 1387 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of 1388 the malloc size expression and check that those aren't changed 1389 over the function. */ 1390 static tree 1391 check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data) 1392 { 1393 basic_block bb; 1394 tree t = *tp; 1395 check_var_data *callback_data = (check_var_data*) data; 1396 tree fn = callback_data->fn; 1397 gimple_stmt_iterator gsi; 1398 gimple stmt; 1399 1400 if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL) 1401 return NULL_TREE; 1402 1403 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn)) 1404 { 1405 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1406 { 1407 stmt = gsi_stmt (gsi); 1408 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt)) 1409 continue; 1410 if (gimple_get_lhs (stmt) == t) 1411 { 1412 callback_data->stmt = stmt; 1413 return t; 1414 } 1415 } 1416 } 1417 *walk_subtrees = 1; 1418 return NULL_TREE; 1419 } 1420 1421 /* Go backwards in the use-def chains and find out the expression 1422 represented by the possible SSA name in STMT, until it is composed 1423 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes 1424 we make sure that all the arguments represent the same subexpression, 1425 otherwise we fail. */ 1426 1427 static tree 1428 can_calculate_stmt_before_stmt (gimple stmt, sbitmap visited) 1429 { 1430 tree op1, op2, res; 1431 enum tree_code code; 1432 1433 switch (gimple_code (stmt)) 1434 { 1435 case GIMPLE_ASSIGN: 1436 code = gimple_assign_rhs_code (stmt); 1437 op1 = gimple_assign_rhs1 (stmt); 1438 1439 switch (code) 1440 { 1441 case POINTER_PLUS_EXPR: 1442 case PLUS_EXPR: 1443 case MINUS_EXPR: 1444 case MULT_EXPR: 1445 1446 op2 = gimple_assign_rhs2 (stmt); 1447 op1 = can_calculate_expr_before_stmt (op1, visited); 1448 if (!op1) 1449 return NULL_TREE; 1450 op2 = can_calculate_expr_before_stmt (op2, visited); 1451 if (op2) 1452 return fold_build2 (code, gimple_expr_type (stmt), op1, op2); 1453 return NULL_TREE; 1454 1455 CASE_CONVERT: 1456 res = can_calculate_expr_before_stmt (op1, visited); 1457 if (res != NULL_TREE) 1458 return build1 (code, gimple_expr_type (stmt), res); 1459 else 1460 return NULL_TREE; 1461 1462 default: 1463 if (gimple_assign_single_p (stmt)) 1464 return can_calculate_expr_before_stmt (op1, visited); 1465 else 1466 return NULL_TREE; 1467 } 1468 1469 case GIMPLE_PHI: 1470 { 1471 size_t j; 1472 1473 res = NULL_TREE; 1474 /* Make sure all the arguments represent the same value. */ 1475 for (j = 0; j < gimple_phi_num_args (stmt); j++) 1476 { 1477 tree new_res; 1478 tree def = PHI_ARG_DEF (stmt, j); 1479 1480 new_res = can_calculate_expr_before_stmt (def, visited); 1481 if (res == NULL_TREE) 1482 res = new_res; 1483 else if (!new_res || !expressions_equal_p (res, new_res)) 1484 return NULL_TREE; 1485 } 1486 return res; 1487 } 1488 1489 default: 1490 return NULL_TREE; 1491 } 1492 } 1493 1494 /* Go backwards in the use-def chains and find out the expression 1495 represented by the possible SSA name in EXPR, until it is composed 1496 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes 1497 we make sure that all the arguments represent the same subexpression, 1498 otherwise we fail. */ 1499 static tree 1500 can_calculate_expr_before_stmt (tree expr, sbitmap visited) 1501 { 1502 gimple def_stmt; 1503 tree res; 1504 1505 switch (TREE_CODE (expr)) 1506 { 1507 case SSA_NAME: 1508 /* Case of loop, we don't know to represent this expression. */ 1509 if (TEST_BIT (visited, SSA_NAME_VERSION (expr))) 1510 return NULL_TREE; 1511 1512 SET_BIT (visited, SSA_NAME_VERSION (expr)); 1513 def_stmt = SSA_NAME_DEF_STMT (expr); 1514 res = can_calculate_stmt_before_stmt (def_stmt, visited); 1515 RESET_BIT (visited, SSA_NAME_VERSION (expr)); 1516 return res; 1517 case VAR_DECL: 1518 case PARM_DECL: 1519 case INTEGER_CST: 1520 return expr; 1521 1522 default: 1523 return NULL_TREE; 1524 } 1525 } 1526 1527 /* There should be only one allocation function for the dimensions 1528 that don't escape. Here we check the allocation sites in this 1529 function. We must make sure that all the dimensions are allocated 1530 using malloc and that the malloc size parameter expression could be 1531 pre-calculated before the call to the malloc of dimension 0. 1532 1533 Given a candidate matrix for flattening -- MI -- check if it's 1534 appropriate for flattening -- we analyze the allocation 1535 sites that we recorded in the previous analysis. The result of the 1536 analysis is a level of indirection (matrix dimension) in which the 1537 flattening is safe. We check the following conditions: 1538 1. There is only one allocation site for each dimension. 1539 2. The allocation sites of all the dimensions are in the same 1540 function. 1541 (The above two are being taken care of during the analysis when 1542 we check the allocation site). 1543 3. All the dimensions that we flatten are allocated at once; thus 1544 the total size must be known before the allocation of the 1545 dimension 0 (top level) -- we must make sure we represent the 1546 size of the allocation as an expression of global parameters or 1547 constants and that those doesn't change over the function. */ 1548 1549 static int 1550 check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED) 1551 { 1552 int level; 1553 struct matrix_info *mi = (struct matrix_info *) *slot; 1554 sbitmap visited; 1555 1556 if (!mi->malloc_for_level) 1557 return 1; 1558 1559 visited = sbitmap_alloc (num_ssa_names); 1560 1561 /* Do nothing if the current function is not the allocation 1562 function of MI. */ 1563 if (mi->allocation_function_decl != current_function_decl 1564 /* We aren't in the main allocation function yet. */ 1565 || !mi->malloc_for_level[0]) 1566 return 1; 1567 1568 for (level = 1; level < mi->max_malloced_level; level++) 1569 if (!mi->malloc_for_level[level]) 1570 break; 1571 1572 mark_min_matrix_escape_level (mi, level, NULL); 1573 1574 /* Check if the expression of the size passed to malloc could be 1575 pre-calculated before the malloc of level 0. */ 1576 for (level = 1; level < mi->min_indirect_level_escape; level++) 1577 { 1578 gimple call_stmt; 1579 tree size; 1580 struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE}; 1581 1582 call_stmt = mi->malloc_for_level[level]; 1583 1584 /* Find the correct malloc information. */ 1585 collect_data_for_malloc_call (call_stmt, &mcd); 1586 1587 /* No need to check anticipation for constants. */ 1588 if (TREE_CODE (mcd.size_var) == INTEGER_CST) 1589 { 1590 if (!mi->dimension_size) 1591 { 1592 mi->dimension_size = 1593 (tree *) xcalloc (mi->min_indirect_level_escape, 1594 sizeof (tree)); 1595 mi->dimension_size_orig = 1596 (tree *) xcalloc (mi->min_indirect_level_escape, 1597 sizeof (tree)); 1598 } 1599 mi->dimension_size[level] = mcd.size_var; 1600 mi->dimension_size_orig[level] = mcd.size_var; 1601 continue; 1602 } 1603 /* ??? Here we should also add the way to calculate the size 1604 expression not only know that it is anticipated. */ 1605 sbitmap_zero (visited); 1606 size = can_calculate_expr_before_stmt (mcd.size_var, visited); 1607 if (size == NULL_TREE) 1608 { 1609 mark_min_matrix_escape_level (mi, level, call_stmt); 1610 if (dump_file) 1611 fprintf (dump_file, 1612 "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n", 1613 get_name (mi->decl), level); 1614 break; 1615 } 1616 if (!mi->dimension_size) 1617 { 1618 mi->dimension_size = 1619 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree)); 1620 mi->dimension_size_orig = 1621 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree)); 1622 } 1623 mi->dimension_size[level] = size; 1624 mi->dimension_size_orig[level] = size; 1625 } 1626 1627 /* We don't need those anymore. */ 1628 for (level = mi->min_indirect_level_escape; 1629 level < mi->max_malloced_level; level++) 1630 mi->malloc_for_level[level] = NULL; 1631 return 1; 1632 } 1633 1634 /* Track all access and allocation sites. */ 1635 static void 1636 find_sites_in_func (bool record) 1637 { 1638 sbitmap visited_stmts_1; 1639 1640 gimple_stmt_iterator gsi; 1641 gimple stmt; 1642 basic_block bb; 1643 struct matrix_info tmpmi, *mi; 1644 1645 visited_stmts_1 = sbitmap_alloc (num_ssa_names); 1646 1647 FOR_EACH_BB (bb) 1648 { 1649 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1650 { 1651 tree lhs; 1652 1653 stmt = gsi_stmt (gsi); 1654 lhs = gimple_get_lhs (stmt); 1655 if (lhs != NULL_TREE 1656 && TREE_CODE (lhs) == VAR_DECL) 1657 { 1658 tmpmi.decl = lhs; 1659 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg, 1660 &tmpmi))) 1661 { 1662 sbitmap_zero (visited_stmts_1); 1663 analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1); 1664 } 1665 } 1666 if (is_gimple_assign (stmt) 1667 && gimple_assign_single_p (stmt) 1668 && TREE_CODE (lhs) == SSA_NAME 1669 && TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL) 1670 { 1671 tmpmi.decl = gimple_assign_rhs1 (stmt); 1672 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg, 1673 &tmpmi))) 1674 { 1675 sbitmap_zero (visited_stmts_1); 1676 analyze_matrix_accesses (mi, lhs, 0, 1677 false, visited_stmts_1, record); 1678 } 1679 } 1680 } 1681 } 1682 sbitmap_free (visited_stmts_1); 1683 } 1684 1685 /* Traverse the use-def chains to see if there are matrices that 1686 are passed through pointers and we cannot know how they are accessed. 1687 For each SSA-name defined by a global variable of our interest, 1688 we traverse the use-def chains of the SSA and follow the indirections, 1689 and record in what level of indirection the use of the variable 1690 escapes. A use of a pointer escapes when it is passed to a function, 1691 stored into memory or assigned (except in malloc and free calls). */ 1692 1693 static void 1694 record_all_accesses_in_func (void) 1695 { 1696 unsigned i; 1697 sbitmap visited_stmts_1; 1698 1699 visited_stmts_1 = sbitmap_alloc (num_ssa_names); 1700 1701 for (i = 0; i < num_ssa_names; i++) 1702 { 1703 struct matrix_info tmpmi, *mi; 1704 tree ssa_var = ssa_name (i); 1705 tree rhs, lhs; 1706 1707 if (!ssa_var 1708 || !is_gimple_assign (SSA_NAME_DEF_STMT (ssa_var)) 1709 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (ssa_var))) 1710 continue; 1711 rhs = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (ssa_var)); 1712 lhs = gimple_assign_lhs (SSA_NAME_DEF_STMT (ssa_var)); 1713 if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL) 1714 continue; 1715 1716 /* If the RHS is a matrix that we want to analyze, follow the def-use 1717 chain for this SSA_VAR and check for escapes or apply the 1718 flattening. */ 1719 tmpmi.decl = rhs; 1720 if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi))) 1721 { 1722 /* This variable will track the visited PHI nodes, so we can limit 1723 its size to the maximum number of SSA names. */ 1724 sbitmap_zero (visited_stmts_1); 1725 analyze_matrix_accesses (mi, ssa_var, 1726 0, false, visited_stmts_1, true); 1727 1728 } 1729 } 1730 sbitmap_free (visited_stmts_1); 1731 } 1732 1733 /* Used when we want to convert the expression: RESULT = something * 1734 ORIG to RESULT = something * NEW_VAL. If ORIG and NEW_VAL are power 1735 of 2, shift operations can be done, else division and 1736 multiplication. */ 1737 1738 static tree 1739 compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new_val, tree result) 1740 { 1741 1742 int x, y; 1743 tree result1, ratio, log, orig_tree, new_tree; 1744 1745 x = exact_log2 (orig); 1746 y = exact_log2 (new_val); 1747 1748 if (x != -1 && y != -1) 1749 { 1750 if (x == y) 1751 return result; 1752 else if (x > y) 1753 { 1754 log = build_int_cst (TREE_TYPE (result), x - y); 1755 result1 = 1756 fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log); 1757 return result1; 1758 } 1759 log = build_int_cst (TREE_TYPE (result), y - x); 1760 result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log); 1761 1762 return result1; 1763 } 1764 orig_tree = build_int_cst (TREE_TYPE (result), orig); 1765 new_tree = build_int_cst (TREE_TYPE (result), new_val); 1766 ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree); 1767 result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree); 1768 1769 return result1; 1770 } 1771 1772 1773 /* We know that we are allowed to perform matrix flattening (according to the 1774 escape analysis), so we traverse the use-def chains of the SSA vars 1775 defined by the global variables pointing to the matrices of our interest. 1776 in each use of the SSA we calculate the offset from the base address 1777 according to the following equation: 1778 1779 a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the 1780 escaping level is m <= k, and a' is the new allocated matrix, 1781 will be translated to : 1782 1783 b[I(m+1)]...[Ik] 1784 1785 where 1786 b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im 1787 */ 1788 1789 static int 1790 transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED) 1791 { 1792 gimple_stmt_iterator gsi; 1793 struct matrix_info *mi = (struct matrix_info *) *slot; 1794 int min_escape_l = mi->min_indirect_level_escape; 1795 struct access_site_info *acc_info; 1796 enum tree_code code; 1797 int i; 1798 1799 if (min_escape_l < 2 || !mi->access_l) 1800 return 1; 1801 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info); 1802 i++) 1803 { 1804 /* This is possible because we collect the access sites before 1805 we determine the final minimum indirection level. */ 1806 if (acc_info->level >= min_escape_l) 1807 { 1808 free (acc_info); 1809 continue; 1810 } 1811 if (acc_info->is_alloc) 1812 { 1813 if (acc_info->level >= 0 && gimple_bb (acc_info->stmt)) 1814 { 1815 ssa_op_iter iter; 1816 tree def; 1817 gimple stmt = acc_info->stmt; 1818 tree lhs; 1819 1820 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF) 1821 mark_sym_for_renaming (SSA_NAME_VAR (def)); 1822 gsi = gsi_for_stmt (stmt); 1823 gcc_assert (is_gimple_assign (acc_info->stmt)); 1824 lhs = gimple_assign_lhs (acc_info->stmt); 1825 if (TREE_CODE (lhs) == SSA_NAME 1826 && acc_info->level < min_escape_l - 1) 1827 { 1828 imm_use_iterator imm_iter; 1829 use_operand_p use_p; 1830 gimple use_stmt; 1831 1832 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs) 1833 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) 1834 { 1835 tree rhs, tmp; 1836 gimple new_stmt; 1837 1838 gcc_assert (gimple_assign_rhs_code (acc_info->stmt) 1839 == MEM_REF); 1840 /* Emit convert statement to convert to type of use. */ 1841 tmp = create_tmp_var (TREE_TYPE (lhs), "new"); 1842 add_referenced_var (tmp); 1843 rhs = gimple_assign_rhs1 (acc_info->stmt); 1844 rhs = fold_convert (TREE_TYPE (tmp), 1845 TREE_OPERAND (rhs, 0)); 1846 new_stmt = gimple_build_assign (tmp, rhs); 1847 tmp = make_ssa_name (tmp, new_stmt); 1848 gimple_assign_set_lhs (new_stmt, tmp); 1849 gsi = gsi_for_stmt (acc_info->stmt); 1850 gsi_insert_after (&gsi, new_stmt, GSI_SAME_STMT); 1851 SET_USE (use_p, tmp); 1852 } 1853 } 1854 if (acc_info->level < min_escape_l - 1) 1855 gsi_remove (&gsi, true); 1856 } 1857 free (acc_info); 1858 continue; 1859 } 1860 code = gimple_assign_rhs_code (acc_info->stmt); 1861 if (code == MEM_REF 1862 && acc_info->level < min_escape_l - 1) 1863 { 1864 /* Replace the MEM_REF with NOP (cast) usually we are casting 1865 from "pointer to type" to "type". */ 1866 tree t = 1867 build1 (NOP_EXPR, TREE_TYPE (gimple_assign_rhs1 (acc_info->stmt)), 1868 TREE_OPERAND (gimple_assign_rhs1 (acc_info->stmt), 0)); 1869 gimple_assign_set_rhs_code (acc_info->stmt, NOP_EXPR); 1870 gimple_assign_set_rhs1 (acc_info->stmt, t); 1871 } 1872 else if (code == POINTER_PLUS_EXPR 1873 && acc_info->level < (min_escape_l)) 1874 { 1875 imm_use_iterator imm_iter; 1876 use_operand_p use_p; 1877 1878 tree offset; 1879 int k = acc_info->level; 1880 tree num_elements, total_elements; 1881 tree tmp1; 1882 tree d_size = mi->dimension_size[k]; 1883 1884 /* We already make sure in the analysis that the first operand 1885 is the base and the second is the offset. */ 1886 offset = acc_info->offset; 1887 if (mi->dim_map[k] == min_escape_l - 1) 1888 { 1889 if (!check_transpose_p || mi->is_transposed_p == false) 1890 tmp1 = offset; 1891 else 1892 { 1893 tree new_offset; 1894 1895 new_offset = 1896 compute_offset (mi->dimension_type_size[min_escape_l], 1897 mi->dimension_type_size[k + 1], offset); 1898 1899 total_elements = new_offset; 1900 if (new_offset != offset) 1901 { 1902 gsi = gsi_for_stmt (acc_info->stmt); 1903 tmp1 = force_gimple_operand_gsi (&gsi, total_elements, 1904 true, NULL, 1905 true, GSI_SAME_STMT); 1906 } 1907 else 1908 tmp1 = offset; 1909 } 1910 } 1911 else 1912 { 1913 d_size = mi->dimension_size[mi->dim_map[k] + 1]; 1914 num_elements = 1915 fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index), 1916 fold_convert (sizetype, d_size)); 1917 add_referenced_var (d_size); 1918 gsi = gsi_for_stmt (acc_info->stmt); 1919 tmp1 = force_gimple_operand_gsi (&gsi, num_elements, true, 1920 NULL, true, GSI_SAME_STMT); 1921 } 1922 /* Replace the offset if needed. */ 1923 if (tmp1 != offset) 1924 { 1925 if (TREE_CODE (offset) == SSA_NAME) 1926 { 1927 gimple use_stmt; 1928 1929 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset) 1930 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) 1931 if (use_stmt == acc_info->stmt) 1932 SET_USE (use_p, tmp1); 1933 } 1934 else 1935 { 1936 gcc_assert (TREE_CODE (offset) == INTEGER_CST); 1937 gimple_assign_set_rhs2 (acc_info->stmt, tmp1); 1938 update_stmt (acc_info->stmt); 1939 } 1940 } 1941 } 1942 /* ??? meanwhile this happens because we record the same access 1943 site more than once; we should be using a hash table to 1944 avoid this and insert the STMT of the access site only 1945 once. 1946 else 1947 gcc_unreachable (); */ 1948 free (acc_info); 1949 } 1950 VEC_free (access_site_info_p, heap, mi->access_l); 1951 1952 update_ssa (TODO_update_ssa); 1953 #ifdef ENABLE_CHECKING 1954 verify_ssa (true); 1955 #endif 1956 return 1; 1957 } 1958 1959 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order. */ 1960 1961 static void 1962 sort_dim_hot_level (gcov_type * a, int *dim_map, int n) 1963 { 1964 int i, j, tmp1; 1965 gcov_type tmp; 1966 1967 for (i = 0; i < n - 1; i++) 1968 { 1969 for (j = 0; j < n - 1 - i; j++) 1970 { 1971 if (a[j + 1] < a[j]) 1972 { 1973 tmp = a[j]; /* swap a[j] and a[j+1] */ 1974 a[j] = a[j + 1]; 1975 a[j + 1] = tmp; 1976 tmp1 = dim_map[j]; 1977 dim_map[j] = dim_map[j + 1]; 1978 dim_map[j + 1] = tmp1; 1979 } 1980 } 1981 } 1982 } 1983 1984 /* Replace multiple mallocs (one for each dimension) to one malloc 1985 with the size of DIM1*DIM2*...*DIMN*size_of_element 1986 Make sure that we hold the size in the malloc site inside a 1987 new global variable; this way we ensure that the size doesn't 1988 change and it is accessible from all the other functions that 1989 uses the matrix. Also, the original calls to free are deleted, 1990 and replaced by a new call to free the flattened matrix. */ 1991 1992 static int 1993 transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED) 1994 { 1995 int i; 1996 struct matrix_info *mi; 1997 tree type, oldfn, prev_dim_size; 1998 gimple call_stmt_0, use_stmt; 1999 struct cgraph_node *c_node; 2000 struct cgraph_edge *e; 2001 gimple_stmt_iterator gsi; 2002 struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE}; 2003 HOST_WIDE_INT element_size; 2004 2005 imm_use_iterator imm_iter; 2006 use_operand_p use_p; 2007 tree old_size_0, tmp; 2008 int min_escape_l; 2009 int id; 2010 2011 mi = (struct matrix_info *) *slot; 2012 2013 min_escape_l = mi->min_indirect_level_escape; 2014 2015 if (!mi->malloc_for_level) 2016 mi->min_indirect_level_escape = 0; 2017 2018 if (mi->min_indirect_level_escape < 2) 2019 return 1; 2020 2021 mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int)); 2022 for (i = 0; i < mi->min_indirect_level_escape; i++) 2023 mi->dim_map[i] = i; 2024 if (check_transpose_p) 2025 { 2026 int i; 2027 2028 if (dump_file) 2029 { 2030 fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl)); 2031 for (i = 0; i < min_escape_l; i++) 2032 { 2033 fprintf (dump_file, "dim %d before sort ", i); 2034 if (mi->dim_hot_level) 2035 fprintf (dump_file, 2036 "count is " HOST_WIDEST_INT_PRINT_DEC " \n", 2037 mi->dim_hot_level[i]); 2038 } 2039 } 2040 sort_dim_hot_level (mi->dim_hot_level, mi->dim_map, 2041 mi->min_indirect_level_escape); 2042 if (dump_file) 2043 for (i = 0; i < min_escape_l; i++) 2044 { 2045 fprintf (dump_file, "dim %d after sort\n", i); 2046 if (mi->dim_hot_level) 2047 fprintf (dump_file, "count is " HOST_WIDE_INT_PRINT_DEC 2048 " \n", (HOST_WIDE_INT) mi->dim_hot_level[i]); 2049 } 2050 for (i = 0; i < mi->min_indirect_level_escape; i++) 2051 { 2052 if (dump_file) 2053 fprintf (dump_file, "dim_map[%d] after sort %d\n", i, 2054 mi->dim_map[i]); 2055 if (mi->dim_map[i] != i) 2056 { 2057 if (dump_file) 2058 fprintf (dump_file, 2059 "Transposed dimensions: dim %d is now dim %d\n", 2060 mi->dim_map[i], i); 2061 mi->is_transposed_p = true; 2062 } 2063 } 2064 } 2065 else 2066 { 2067 for (i = 0; i < mi->min_indirect_level_escape; i++) 2068 mi->dim_map[i] = i; 2069 } 2070 /* Call statement of allocation site of level 0. */ 2071 call_stmt_0 = mi->malloc_for_level[0]; 2072 2073 /* Finds the correct malloc information. */ 2074 collect_data_for_malloc_call (call_stmt_0, &mcd); 2075 2076 mi->dimension_size[0] = mcd.size_var; 2077 mi->dimension_size_orig[0] = mcd.size_var; 2078 /* Make sure that the variables in the size expression for 2079 all the dimensions (above level 0) aren't modified in 2080 the allocation function. */ 2081 for (i = 1; i < mi->min_indirect_level_escape; i++) 2082 { 2083 tree t; 2084 check_var_data data; 2085 2086 /* mi->dimension_size must contain the expression of the size calculated 2087 in check_allocation_function. */ 2088 gcc_assert (mi->dimension_size[i]); 2089 2090 data.fn = mi->allocation_function_decl; 2091 data.stmt = NULL; 2092 t = walk_tree_without_duplicates (&(mi->dimension_size[i]), 2093 check_var_notmodified_p, 2094 &data); 2095 if (t != NULL_TREE) 2096 { 2097 mark_min_matrix_escape_level (mi, i, data.stmt); 2098 break; 2099 } 2100 } 2101 2102 if (mi->min_indirect_level_escape < 2) 2103 return 1; 2104 2105 /* Since we should make sure that the size expression is available 2106 before the call to malloc of level 0. */ 2107 gsi = gsi_for_stmt (call_stmt_0); 2108 2109 /* Find out the size of each dimension by looking at the malloc 2110 sites and create a global variable to hold it. 2111 We add the assignment to the global before the malloc of level 0. */ 2112 2113 /* To be able to produce gimple temporaries. */ 2114 oldfn = current_function_decl; 2115 current_function_decl = mi->allocation_function_decl; 2116 push_cfun (DECL_STRUCT_FUNCTION (mi->allocation_function_decl)); 2117 2118 /* Set the dimension sizes as follows: 2119 DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i] 2120 where n is the maximum non escaping level. */ 2121 element_size = mi->dimension_type_size[mi->min_indirect_level_escape]; 2122 prev_dim_size = NULL_TREE; 2123 2124 for (i = mi->min_indirect_level_escape - 1; i >= 0; i--) 2125 { 2126 tree dim_size, dim_var; 2127 gimple stmt; 2128 tree d_type_size; 2129 2130 /* Now put the size expression in a global variable and initialize it to 2131 the size expression before the malloc of level 0. */ 2132 dim_var = 2133 add_new_static_var (TREE_TYPE 2134 (mi->dimension_size_orig[mi->dim_map[i]])); 2135 type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]); 2136 2137 /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE. */ 2138 /* Find which dim ID becomes dim I. */ 2139 for (id = 0; id < mi->min_indirect_level_escape; id++) 2140 if (mi->dim_map[id] == i) 2141 break; 2142 d_type_size = 2143 build_int_cst (type, mi->dimension_type_size[id + 1]); 2144 if (!prev_dim_size) 2145 prev_dim_size = build_int_cst (type, element_size); 2146 if (!check_transpose_p && i == mi->min_indirect_level_escape - 1) 2147 { 2148 dim_size = mi->dimension_size_orig[id]; 2149 } 2150 else 2151 { 2152 dim_size = 2153 fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id], 2154 d_type_size); 2155 2156 dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size); 2157 } 2158 dim_size = force_gimple_operand_gsi (&gsi, dim_size, true, NULL, 2159 true, GSI_SAME_STMT); 2160 /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE. */ 2161 stmt = gimple_build_assign (dim_var, dim_size); 2162 mark_symbols_for_renaming (stmt); 2163 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 2164 2165 prev_dim_size = mi->dimension_size[i] = dim_var; 2166 } 2167 update_ssa (TODO_update_ssa); 2168 /* Replace the malloc size argument in the malloc of level 0 to be 2169 the size of all the dimensions. */ 2170 c_node = cgraph_get_node (mi->allocation_function_decl); 2171 gcc_checking_assert (c_node); 2172 old_size_0 = gimple_call_arg (call_stmt_0, 0); 2173 tmp = force_gimple_operand_gsi (&gsi, mi->dimension_size[0], true, 2174 NULL, true, GSI_SAME_STMT); 2175 if (TREE_CODE (old_size_0) == SSA_NAME) 2176 { 2177 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0) 2178 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) 2179 if (use_stmt == call_stmt_0) 2180 SET_USE (use_p, tmp); 2181 } 2182 /* When deleting the calls to malloc we need also to remove the edge from 2183 the call graph to keep it consistent. Notice that cgraph_edge may 2184 create a new node in the call graph if there is no node for the given 2185 declaration; this shouldn't be the case but currently there is no way to 2186 check this outside of "cgraph.c". */ 2187 for (i = 1; i < mi->min_indirect_level_escape; i++) 2188 { 2189 gimple_stmt_iterator gsi; 2190 2191 gimple call_stmt = mi->malloc_for_level[i]; 2192 gcc_assert (is_gimple_call (call_stmt)); 2193 e = cgraph_edge (c_node, call_stmt); 2194 gcc_assert (e); 2195 cgraph_remove_edge (e); 2196 gsi = gsi_for_stmt (call_stmt); 2197 /* Remove the call stmt. */ 2198 gsi_remove (&gsi, true); 2199 /* Remove the assignment of the allocated area. */ 2200 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, 2201 gimple_call_lhs (call_stmt)) 2202 { 2203 gsi = gsi_for_stmt (use_stmt); 2204 gsi_remove (&gsi, true); 2205 } 2206 } 2207 update_ssa (TODO_update_ssa); 2208 #ifdef ENABLE_CHECKING 2209 verify_ssa (true); 2210 #endif 2211 /* Delete the calls to free. */ 2212 for (i = 1; i < mi->min_indirect_level_escape; i++) 2213 { 2214 gimple_stmt_iterator gsi; 2215 2216 /* ??? wonder why this case is possible but we failed on it once. */ 2217 if (!mi->free_stmts[i].stmt) 2218 continue; 2219 2220 c_node = cgraph_get_node (mi->free_stmts[i].func); 2221 gcc_checking_assert (c_node); 2222 gcc_assert (is_gimple_call (mi->free_stmts[i].stmt)); 2223 e = cgraph_edge (c_node, mi->free_stmts[i].stmt); 2224 gcc_assert (e); 2225 cgraph_remove_edge (e); 2226 current_function_decl = mi->free_stmts[i].func; 2227 set_cfun (DECL_STRUCT_FUNCTION (mi->free_stmts[i].func)); 2228 gsi = gsi_for_stmt (mi->free_stmts[i].stmt); 2229 gsi_remove (&gsi, true); 2230 } 2231 /* Return to the previous situation. */ 2232 current_function_decl = oldfn; 2233 pop_cfun (); 2234 return 1; 2235 2236 } 2237 2238 2239 /* Print out the results of the escape analysis. */ 2240 static int 2241 dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED) 2242 { 2243 struct matrix_info *mi = (struct matrix_info *) *slot; 2244 2245 if (!dump_file) 2246 return 1; 2247 fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,", 2248 get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims); 2249 fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level); 2250 fprintf (dump_file, "\n"); 2251 if (mi->min_indirect_level_escape >= 2) 2252 fprintf (dump_file, "Flattened %d dimensions \n", 2253 mi->min_indirect_level_escape); 2254 return 1; 2255 } 2256 2257 /* Perform matrix flattening. */ 2258 2259 static unsigned int 2260 matrix_reorg (void) 2261 { 2262 struct cgraph_node *node; 2263 2264 if (profile_info) 2265 check_transpose_p = true; 2266 else 2267 check_transpose_p = false; 2268 /* If there are hand written vectors, we skip this optimization. */ 2269 for (node = cgraph_nodes; node; node = node->next) 2270 if (!may_flatten_matrices (node)) 2271 return 0; 2272 matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free); 2273 /* Find and record all potential matrices in the program. */ 2274 find_matrices_decl (); 2275 /* Analyze the accesses of the matrices (escaping analysis). */ 2276 for (node = cgraph_nodes; node; node = node->next) 2277 if (node->analyzed) 2278 { 2279 tree temp_fn; 2280 2281 temp_fn = current_function_decl; 2282 current_function_decl = node->decl; 2283 push_cfun (DECL_STRUCT_FUNCTION (node->decl)); 2284 bitmap_obstack_initialize (NULL); 2285 gimple_register_cfg_hooks (); 2286 2287 if (!gimple_in_ssa_p (cfun)) 2288 { 2289 free_dominance_info (CDI_DOMINATORS); 2290 free_dominance_info (CDI_POST_DOMINATORS); 2291 pop_cfun (); 2292 current_function_decl = temp_fn; 2293 bitmap_obstack_release (NULL); 2294 2295 return 0; 2296 } 2297 2298 #ifdef ENABLE_CHECKING 2299 verify_flow_info (); 2300 #endif 2301 2302 if (!matrices_to_reorg) 2303 { 2304 free_dominance_info (CDI_DOMINATORS); 2305 free_dominance_info (CDI_POST_DOMINATORS); 2306 pop_cfun (); 2307 current_function_decl = temp_fn; 2308 bitmap_obstack_release (NULL); 2309 2310 return 0; 2311 } 2312 2313 /* Create htap for phi nodes. */ 2314 htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash, 2315 mat_acc_phi_eq, free); 2316 if (!check_transpose_p) 2317 find_sites_in_func (false); 2318 else 2319 { 2320 find_sites_in_func (true); 2321 loop_optimizer_init (LOOPS_NORMAL); 2322 if (current_loops) 2323 scev_initialize (); 2324 htab_traverse (matrices_to_reorg, analyze_transpose, NULL); 2325 if (current_loops) 2326 { 2327 scev_finalize (); 2328 loop_optimizer_finalize (); 2329 current_loops = NULL; 2330 } 2331 } 2332 /* If the current function is the allocation function for any of 2333 the matrices we check its allocation and the escaping level. */ 2334 htab_traverse (matrices_to_reorg, check_allocation_function, NULL); 2335 free_dominance_info (CDI_DOMINATORS); 2336 free_dominance_info (CDI_POST_DOMINATORS); 2337 pop_cfun (); 2338 current_function_decl = temp_fn; 2339 bitmap_obstack_release (NULL); 2340 } 2341 htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL); 2342 /* Now transform the accesses. */ 2343 for (node = cgraph_nodes; node; node = node->next) 2344 if (node->analyzed) 2345 { 2346 /* Remember that allocation sites have been handled. */ 2347 tree temp_fn; 2348 2349 temp_fn = current_function_decl; 2350 current_function_decl = node->decl; 2351 push_cfun (DECL_STRUCT_FUNCTION (node->decl)); 2352 bitmap_obstack_initialize (NULL); 2353 gimple_register_cfg_hooks (); 2354 record_all_accesses_in_func (); 2355 htab_traverse (matrices_to_reorg, transform_access_sites, NULL); 2356 cgraph_rebuild_references (); 2357 free_dominance_info (CDI_DOMINATORS); 2358 free_dominance_info (CDI_POST_DOMINATORS); 2359 pop_cfun (); 2360 current_function_decl = temp_fn; 2361 bitmap_obstack_release (NULL); 2362 } 2363 htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL); 2364 2365 current_function_decl = NULL; 2366 set_cfun (NULL); 2367 matrices_to_reorg = NULL; 2368 return 0; 2369 } 2370 2371 2372 /* The condition for matrix flattening to be performed. */ 2373 static bool 2374 gate_matrix_reorg (void) 2375 { 2376 return flag_ipa_matrix_reorg && flag_whole_program; 2377 } 2378 2379 struct simple_ipa_opt_pass pass_ipa_matrix_reorg = 2380 { 2381 { 2382 SIMPLE_IPA_PASS, 2383 "matrix-reorg", /* name */ 2384 gate_matrix_reorg, /* gate */ 2385 matrix_reorg, /* execute */ 2386 NULL, /* sub */ 2387 NULL, /* next */ 2388 0, /* static_pass_number */ 2389 TV_NONE, /* tv_id */ 2390 0, /* properties_required */ 2391 0, /* properties_provided */ 2392 0, /* properties_destroyed */ 2393 0, /* todo_flags_start */ 2394 TODO_dump_cgraph /* todo_flags_finish */ 2395 } 2396 }; 2397