1 /* Decompose multiword subregs. 2 Copyright (C) 2007-2018 Free Software Foundation, Inc. 3 Contributed by Richard Henderson <rth@redhat.com> 4 Ian Lance Taylor <iant@google.com> 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free 10 Software Foundation; either version 3, or (at your option) any later 11 version. 12 13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 #include "config.h" 23 #include "system.h" 24 #include "coretypes.h" 25 #include "backend.h" 26 #include "rtl.h" 27 #include "tree.h" 28 #include "cfghooks.h" 29 #include "df.h" 30 #include "memmodel.h" 31 #include "tm_p.h" 32 #include "expmed.h" 33 #include "insn-config.h" 34 #include "emit-rtl.h" 35 #include "recog.h" 36 #include "cfgrtl.h" 37 #include "cfgbuild.h" 38 #include "dce.h" 39 #include "expr.h" 40 #include "tree-pass.h" 41 #include "lower-subreg.h" 42 #include "rtl-iter.h" 43 #include "target.h" 44 45 46 /* Decompose multi-word pseudo-registers into individual 47 pseudo-registers when possible and profitable. This is possible 48 when all the uses of a multi-word register are via SUBREG, or are 49 copies of the register to another location. Breaking apart the 50 register permits more CSE and permits better register allocation. 51 This is profitable if the machine does not have move instructions 52 to do this. 53 54 This pass only splits moves with modes that are wider than 55 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with 56 integer modes that are twice the width of word_mode. The latter 57 could be generalized if there was a need to do this, but the trend in 58 architectures is to not need this. 59 60 There are two useful preprocessor defines for use by maintainers: 61 62 #define LOG_COSTS 1 63 64 if you wish to see the actual cost estimates that are being used 65 for each mode wider than word mode and the cost estimates for zero 66 extension and the shifts. This can be useful when port maintainers 67 are tuning insn rtx costs. 68 69 #define FORCE_LOWERING 1 70 71 if you wish to test the pass with all the transformation forced on. 72 This can be useful for finding bugs in the transformations. */ 73 74 #define LOG_COSTS 0 75 #define FORCE_LOWERING 0 76 77 /* Bit N in this bitmap is set if regno N is used in a context in 78 which we can decompose it. */ 79 static bitmap decomposable_context; 80 81 /* Bit N in this bitmap is set if regno N is used in a context in 82 which it can not be decomposed. */ 83 static bitmap non_decomposable_context; 84 85 /* Bit N in this bitmap is set if regno N is used in a subreg 86 which changes the mode but not the size. This typically happens 87 when the register accessed as a floating-point value; we want to 88 avoid generating accesses to its subwords in integer modes. */ 89 static bitmap subreg_context; 90 91 /* Bit N in the bitmap in element M of this array is set if there is a 92 copy from reg M to reg N. */ 93 static vec<bitmap> reg_copy_graph; 94 95 struct target_lower_subreg default_target_lower_subreg; 96 #if SWITCHABLE_TARGET 97 struct target_lower_subreg *this_target_lower_subreg 98 = &default_target_lower_subreg; 99 #endif 100 101 #define twice_word_mode \ 102 this_target_lower_subreg->x_twice_word_mode 103 #define choices \ 104 this_target_lower_subreg->x_choices 105 106 /* Return true if MODE is a mode we know how to lower. When returning true, 107 store its byte size in *BYTES and its word size in *WORDS. */ 108 109 static inline bool 110 interesting_mode_p (machine_mode mode, unsigned int *bytes, 111 unsigned int *words) 112 { 113 if (!GET_MODE_SIZE (mode).is_constant (bytes)) 114 return false; 115 *words = CEIL (*bytes, UNITS_PER_WORD); 116 return true; 117 } 118 119 /* RTXes used while computing costs. */ 120 struct cost_rtxes { 121 /* Source and target registers. */ 122 rtx source; 123 rtx target; 124 125 /* A twice_word_mode ZERO_EXTEND of SOURCE. */ 126 rtx zext; 127 128 /* A shift of SOURCE. */ 129 rtx shift; 130 131 /* A SET of TARGET. */ 132 rtx set; 133 }; 134 135 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the 136 rtxes in RTXES. SPEED_P selects between the speed and size cost. */ 137 138 static int 139 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code, 140 machine_mode mode, int op1) 141 { 142 PUT_CODE (rtxes->shift, code); 143 PUT_MODE (rtxes->shift, mode); 144 PUT_MODE (rtxes->source, mode); 145 XEXP (rtxes->shift, 1) = gen_int_shift_amount (mode, op1); 146 return set_src_cost (rtxes->shift, mode, speed_p); 147 } 148 149 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X] 150 to true if it is profitable to split a double-word CODE shift 151 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing 152 for speed or size profitability. 153 154 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is 155 the cost of moving zero into a word-mode register. WORD_MOVE_COST 156 is the cost of moving between word registers. */ 157 158 static void 159 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes, 160 bool *splitting, enum rtx_code code, 161 int word_move_zero_cost, int word_move_cost) 162 { 163 int wide_cost, narrow_cost, upper_cost, i; 164 165 for (i = 0; i < BITS_PER_WORD; i++) 166 { 167 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode, 168 i + BITS_PER_WORD); 169 if (i == 0) 170 narrow_cost = word_move_cost; 171 else 172 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i); 173 174 if (code != ASHIFTRT) 175 upper_cost = word_move_zero_cost; 176 else if (i == BITS_PER_WORD - 1) 177 upper_cost = word_move_cost; 178 else 179 upper_cost = shift_cost (speed_p, rtxes, code, word_mode, 180 BITS_PER_WORD - 1); 181 182 if (LOG_COSTS) 183 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n", 184 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code), 185 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost); 186 187 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost) 188 splitting[i] = true; 189 } 190 } 191 192 /* Compute what we should do when optimizing for speed or size; SPEED_P 193 selects which. Use RTXES for computing costs. */ 194 195 static void 196 compute_costs (bool speed_p, struct cost_rtxes *rtxes) 197 { 198 unsigned int i; 199 int word_move_zero_cost, word_move_cost; 200 201 PUT_MODE (rtxes->target, word_mode); 202 SET_SRC (rtxes->set) = CONST0_RTX (word_mode); 203 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p); 204 205 SET_SRC (rtxes->set) = rtxes->source; 206 word_move_cost = set_rtx_cost (rtxes->set, speed_p); 207 208 if (LOG_COSTS) 209 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n", 210 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost); 211 212 for (i = 0; i < MAX_MACHINE_MODE; i++) 213 { 214 machine_mode mode = (machine_mode) i; 215 unsigned int size, factor; 216 if (interesting_mode_p (mode, &size, &factor) && factor > 1) 217 { 218 unsigned int mode_move_cost; 219 220 PUT_MODE (rtxes->target, mode); 221 PUT_MODE (rtxes->source, mode); 222 mode_move_cost = set_rtx_cost (rtxes->set, speed_p); 223 224 if (LOG_COSTS) 225 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n", 226 GET_MODE_NAME (mode), mode_move_cost, 227 word_move_cost, factor); 228 229 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor) 230 { 231 choices[speed_p].move_modes_to_split[i] = true; 232 choices[speed_p].something_to_do = true; 233 } 234 } 235 } 236 237 /* For the moves and shifts, the only case that is checked is one 238 where the mode of the target is an integer mode twice the width 239 of the word_mode. 240 241 If it is not profitable to split a double word move then do not 242 even consider the shifts or the zero extension. */ 243 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode]) 244 { 245 int zext_cost; 246 247 /* The only case here to check to see if moving the upper part with a 248 zero is cheaper than doing the zext itself. */ 249 PUT_MODE (rtxes->source, word_mode); 250 zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p); 251 252 if (LOG_COSTS) 253 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n", 254 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND), 255 zext_cost, word_move_cost, word_move_zero_cost); 256 257 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost) 258 choices[speed_p].splitting_zext = true; 259 260 compute_splitting_shift (speed_p, rtxes, 261 choices[speed_p].splitting_ashift, ASHIFT, 262 word_move_zero_cost, word_move_cost); 263 compute_splitting_shift (speed_p, rtxes, 264 choices[speed_p].splitting_lshiftrt, LSHIFTRT, 265 word_move_zero_cost, word_move_cost); 266 compute_splitting_shift (speed_p, rtxes, 267 choices[speed_p].splitting_ashiftrt, ASHIFTRT, 268 word_move_zero_cost, word_move_cost); 269 } 270 } 271 272 /* Do one-per-target initialisation. This involves determining 273 which operations on the machine are profitable. If none are found, 274 then the pass just returns when called. */ 275 276 void 277 init_lower_subreg (void) 278 { 279 struct cost_rtxes rtxes; 280 281 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg)); 282 283 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode).require (); 284 285 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1); 286 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2); 287 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source); 288 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source); 289 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx); 290 291 if (LOG_COSTS) 292 fprintf (stderr, "\nSize costs\n==========\n\n"); 293 compute_costs (false, &rtxes); 294 295 if (LOG_COSTS) 296 fprintf (stderr, "\nSpeed costs\n===========\n\n"); 297 compute_costs (true, &rtxes); 298 } 299 300 static bool 301 simple_move_operand (rtx x) 302 { 303 if (GET_CODE (x) == SUBREG) 304 x = SUBREG_REG (x); 305 306 if (!OBJECT_P (x)) 307 return false; 308 309 if (GET_CODE (x) == LABEL_REF 310 || GET_CODE (x) == SYMBOL_REF 311 || GET_CODE (x) == HIGH 312 || GET_CODE (x) == CONST) 313 return false; 314 315 if (MEM_P (x) 316 && (MEM_VOLATILE_P (x) 317 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x)))) 318 return false; 319 320 return true; 321 } 322 323 /* If INSN is a single set between two objects that we want to split, 324 return the single set. SPEED_P says whether we are optimizing 325 INSN for speed or size. 326 327 INSN should have been passed to recog and extract_insn before this 328 is called. */ 329 330 static rtx 331 simple_move (rtx_insn *insn, bool speed_p) 332 { 333 rtx x; 334 rtx set; 335 machine_mode mode; 336 337 if (recog_data.n_operands != 2) 338 return NULL_RTX; 339 340 set = single_set (insn); 341 if (!set) 342 return NULL_RTX; 343 344 x = SET_DEST (set); 345 if (x != recog_data.operand[0] && x != recog_data.operand[1]) 346 return NULL_RTX; 347 if (!simple_move_operand (x)) 348 return NULL_RTX; 349 350 x = SET_SRC (set); 351 if (x != recog_data.operand[0] && x != recog_data.operand[1]) 352 return NULL_RTX; 353 /* For the src we can handle ASM_OPERANDS, and it is beneficial for 354 things like x86 rdtsc which returns a DImode value. */ 355 if (GET_CODE (x) != ASM_OPERANDS 356 && !simple_move_operand (x)) 357 return NULL_RTX; 358 359 /* We try to decompose in integer modes, to avoid generating 360 inefficient code copying between integer and floating point 361 registers. That means that we can't decompose if this is a 362 non-integer mode for which there is no integer mode of the same 363 size. */ 364 mode = GET_MODE (SET_DEST (set)); 365 if (!SCALAR_INT_MODE_P (mode) 366 && !int_mode_for_size (GET_MODE_BITSIZE (mode), 0).exists ()) 367 return NULL_RTX; 368 369 /* Reject PARTIAL_INT modes. They are used for processor specific 370 purposes and it's probably best not to tamper with them. */ 371 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) 372 return NULL_RTX; 373 374 if (!choices[speed_p].move_modes_to_split[(int) mode]) 375 return NULL_RTX; 376 377 return set; 378 } 379 380 /* If SET is a copy from one multi-word pseudo-register to another, 381 record that in reg_copy_graph. Return whether it is such a 382 copy. */ 383 384 static bool 385 find_pseudo_copy (rtx set) 386 { 387 rtx dest = SET_DEST (set); 388 rtx src = SET_SRC (set); 389 unsigned int rd, rs; 390 bitmap b; 391 392 if (!REG_P (dest) || !REG_P (src)) 393 return false; 394 395 rd = REGNO (dest); 396 rs = REGNO (src); 397 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs)) 398 return false; 399 400 b = reg_copy_graph[rs]; 401 if (b == NULL) 402 { 403 b = BITMAP_ALLOC (NULL); 404 reg_copy_graph[rs] = b; 405 } 406 407 bitmap_set_bit (b, rd); 408 409 return true; 410 } 411 412 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case 413 where they are copied to another register, add the register to 414 which they are copied to DECOMPOSABLE_CONTEXT. Use 415 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track 416 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */ 417 418 static void 419 propagate_pseudo_copies (void) 420 { 421 auto_bitmap queue, propagate; 422 423 bitmap_copy (queue, decomposable_context); 424 do 425 { 426 bitmap_iterator iter; 427 unsigned int i; 428 429 bitmap_clear (propagate); 430 431 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter) 432 { 433 bitmap b = reg_copy_graph[i]; 434 if (b) 435 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context); 436 } 437 438 bitmap_and_compl (queue, propagate, decomposable_context); 439 bitmap_ior_into (decomposable_context, propagate); 440 } 441 while (!bitmap_empty_p (queue)); 442 } 443 444 /* A pointer to one of these values is passed to 445 find_decomposable_subregs. */ 446 447 enum classify_move_insn 448 { 449 /* Not a simple move from one location to another. */ 450 NOT_SIMPLE_MOVE, 451 /* A simple move we want to decompose. */ 452 DECOMPOSABLE_SIMPLE_MOVE, 453 /* Any other simple move. */ 454 SIMPLE_MOVE 455 }; 456 457 /* If we find a SUBREG in *LOC which we could use to decompose a 458 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an 459 unadorned register which is not a simple pseudo-register copy, 460 DATA will point at the type of move, and we set a bit in 461 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */ 462 463 static void 464 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi) 465 { 466 subrtx_var_iterator::array_type array; 467 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST) 468 { 469 rtx x = *iter; 470 if (GET_CODE (x) == SUBREG) 471 { 472 rtx inner = SUBREG_REG (x); 473 unsigned int regno, outer_size, inner_size, outer_words, inner_words; 474 475 if (!REG_P (inner)) 476 continue; 477 478 regno = REGNO (inner); 479 if (HARD_REGISTER_NUM_P (regno)) 480 { 481 iter.skip_subrtxes (); 482 continue; 483 } 484 485 if (!interesting_mode_p (GET_MODE (x), &outer_size, &outer_words) 486 || !interesting_mode_p (GET_MODE (inner), &inner_size, 487 &inner_words)) 488 continue; 489 490 /* We only try to decompose single word subregs of multi-word 491 registers. When we find one, we return -1 to avoid iterating 492 over the inner register. 493 494 ??? This doesn't allow, e.g., DImode subregs of TImode values 495 on 32-bit targets. We would need to record the way the 496 pseudo-register was used, and only decompose if all the uses 497 were the same number and size of pieces. Hopefully this 498 doesn't happen much. */ 499 500 if (outer_words == 1 && inner_words > 1) 501 { 502 bitmap_set_bit (decomposable_context, regno); 503 iter.skip_subrtxes (); 504 continue; 505 } 506 507 /* If this is a cast from one mode to another, where the modes 508 have the same size, and they are not tieable, then mark this 509 register as non-decomposable. If we decompose it we are 510 likely to mess up whatever the backend is trying to do. */ 511 if (outer_words > 1 512 && outer_size == inner_size 513 && !targetm.modes_tieable_p (GET_MODE (x), GET_MODE (inner))) 514 { 515 bitmap_set_bit (non_decomposable_context, regno); 516 bitmap_set_bit (subreg_context, regno); 517 iter.skip_subrtxes (); 518 continue; 519 } 520 } 521 else if (REG_P (x)) 522 { 523 unsigned int regno, size, words; 524 525 /* We will see an outer SUBREG before we see the inner REG, so 526 when we see a plain REG here it means a direct reference to 527 the register. 528 529 If this is not a simple copy from one location to another, 530 then we can not decompose this register. If this is a simple 531 copy we want to decompose, and the mode is right, 532 then we mark the register as decomposable. 533 Otherwise we don't say anything about this register -- 534 it could be decomposed, but whether that would be 535 profitable depends upon how it is used elsewhere. 536 537 We only set bits in the bitmap for multi-word 538 pseudo-registers, since those are the only ones we care about 539 and it keeps the size of the bitmaps down. */ 540 541 regno = REGNO (x); 542 if (!HARD_REGISTER_NUM_P (regno) 543 && interesting_mode_p (GET_MODE (x), &size, &words) 544 && words > 1) 545 { 546 switch (*pcmi) 547 { 548 case NOT_SIMPLE_MOVE: 549 bitmap_set_bit (non_decomposable_context, regno); 550 break; 551 case DECOMPOSABLE_SIMPLE_MOVE: 552 if (targetm.modes_tieable_p (GET_MODE (x), word_mode)) 553 bitmap_set_bit (decomposable_context, regno); 554 break; 555 case SIMPLE_MOVE: 556 break; 557 default: 558 gcc_unreachable (); 559 } 560 } 561 } 562 else if (MEM_P (x)) 563 { 564 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE; 565 566 /* Any registers used in a MEM do not participate in a 567 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion 568 here, and return -1 to block the parent's recursion. */ 569 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem); 570 iter.skip_subrtxes (); 571 } 572 } 573 } 574 575 /* Decompose REGNO into word-sized components. We smash the REG node 576 in place. This ensures that (1) something goes wrong quickly if we 577 fail to make some replacement, and (2) the debug information inside 578 the symbol table is automatically kept up to date. */ 579 580 static void 581 decompose_register (unsigned int regno) 582 { 583 rtx reg; 584 unsigned int size, words, i; 585 rtvec v; 586 587 reg = regno_reg_rtx[regno]; 588 589 regno_reg_rtx[regno] = NULL_RTX; 590 591 if (!interesting_mode_p (GET_MODE (reg), &size, &words)) 592 gcc_unreachable (); 593 594 v = rtvec_alloc (words); 595 for (i = 0; i < words; ++i) 596 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD); 597 598 PUT_CODE (reg, CONCATN); 599 XVEC (reg, 0) = v; 600 601 if (dump_file) 602 { 603 fprintf (dump_file, "; Splitting reg %u ->", regno); 604 for (i = 0; i < words; ++i) 605 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i))); 606 fputc ('\n', dump_file); 607 } 608 } 609 610 /* Get a SUBREG of a CONCATN. */ 611 612 static rtx 613 simplify_subreg_concatn (machine_mode outermode, rtx op, poly_uint64 orig_byte) 614 { 615 unsigned int outer_size, outer_words, inner_size, inner_words; 616 machine_mode innermode, partmode; 617 rtx part; 618 unsigned int final_offset; 619 unsigned int byte; 620 621 innermode = GET_MODE (op); 622 if (!interesting_mode_p (outermode, &outer_size, &outer_words) 623 || !interesting_mode_p (innermode, &inner_size, &inner_words)) 624 gcc_unreachable (); 625 626 /* Must be constant if interesting_mode_p passes. */ 627 byte = orig_byte.to_constant (); 628 gcc_assert (GET_CODE (op) == CONCATN); 629 gcc_assert (byte % outer_size == 0); 630 631 gcc_assert (byte < inner_size); 632 if (outer_size > inner_size) 633 return NULL_RTX; 634 635 inner_size /= XVECLEN (op, 0); 636 part = XVECEXP (op, 0, byte / inner_size); 637 partmode = GET_MODE (part); 638 639 final_offset = byte % inner_size; 640 if (final_offset + outer_size > inner_size) 641 return NULL_RTX; 642 643 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of 644 regular CONST_VECTORs. They have vector or integer modes, depending 645 on the capabilities of the target. Cope with them. */ 646 if (partmode == VOIDmode && VECTOR_MODE_P (innermode)) 647 partmode = GET_MODE_INNER (innermode); 648 else if (partmode == VOIDmode) 649 partmode = mode_for_size (inner_size * BITS_PER_UNIT, 650 GET_MODE_CLASS (innermode), 0).require (); 651 652 return simplify_gen_subreg (outermode, part, partmode, final_offset); 653 } 654 655 /* Wrapper around simplify_gen_subreg which handles CONCATN. */ 656 657 static rtx 658 simplify_gen_subreg_concatn (machine_mode outermode, rtx op, 659 machine_mode innermode, unsigned int byte) 660 { 661 rtx ret; 662 663 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN. 664 If OP is a SUBREG of a CONCATN, then it must be a simple mode 665 change with the same size and offset 0, or it must extract a 666 part. We shouldn't see anything else here. */ 667 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN) 668 { 669 rtx op2; 670 671 if (known_eq (GET_MODE_SIZE (GET_MODE (op)), 672 GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))) 673 && known_eq (SUBREG_BYTE (op), 0)) 674 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op), 675 GET_MODE (SUBREG_REG (op)), byte); 676 677 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op), 678 SUBREG_BYTE (op)); 679 if (op2 == NULL_RTX) 680 { 681 /* We don't handle paradoxical subregs here. */ 682 gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op))); 683 gcc_assert (!paradoxical_subreg_p (op)); 684 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op), 685 byte + SUBREG_BYTE (op)); 686 gcc_assert (op2 != NULL_RTX); 687 return op2; 688 } 689 690 op = op2; 691 gcc_assert (op != NULL_RTX); 692 gcc_assert (innermode == GET_MODE (op)); 693 } 694 695 if (GET_CODE (op) == CONCATN) 696 return simplify_subreg_concatn (outermode, op, byte); 697 698 ret = simplify_gen_subreg (outermode, op, innermode, byte); 699 700 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then 701 resolve_simple_move will ask for the high part of the paradoxical 702 subreg, which does not have a value. Just return a zero. */ 703 if (ret == NULL_RTX 704 && paradoxical_subreg_p (op)) 705 return CONST0_RTX (outermode); 706 707 gcc_assert (ret != NULL_RTX); 708 return ret; 709 } 710 711 /* Return whether we should resolve X into the registers into which it 712 was decomposed. */ 713 714 static bool 715 resolve_reg_p (rtx x) 716 { 717 return GET_CODE (x) == CONCATN; 718 } 719 720 /* Return whether X is a SUBREG of a register which we need to 721 resolve. */ 722 723 static bool 724 resolve_subreg_p (rtx x) 725 { 726 if (GET_CODE (x) != SUBREG) 727 return false; 728 return resolve_reg_p (SUBREG_REG (x)); 729 } 730 731 /* Look for SUBREGs in *LOC which need to be decomposed. */ 732 733 static bool 734 resolve_subreg_use (rtx *loc, rtx insn) 735 { 736 subrtx_ptr_iterator::array_type array; 737 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST) 738 { 739 rtx *loc = *iter; 740 rtx x = *loc; 741 if (resolve_subreg_p (x)) 742 { 743 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), 744 SUBREG_BYTE (x)); 745 746 /* It is possible for a note to contain a reference which we can 747 decompose. In this case, return 1 to the caller to indicate 748 that the note must be removed. */ 749 if (!x) 750 { 751 gcc_assert (!insn); 752 return true; 753 } 754 755 validate_change (insn, loc, x, 1); 756 iter.skip_subrtxes (); 757 } 758 else if (resolve_reg_p (x)) 759 /* Return 1 to the caller to indicate that we found a direct 760 reference to a register which is being decomposed. This can 761 happen inside notes, multiword shift or zero-extend 762 instructions. */ 763 return true; 764 } 765 766 return false; 767 } 768 769 /* Resolve any decomposed registers which appear in register notes on 770 INSN. */ 771 772 static void 773 resolve_reg_notes (rtx_insn *insn) 774 { 775 rtx *pnote, note; 776 777 note = find_reg_equal_equiv_note (insn); 778 if (note) 779 { 780 int old_count = num_validated_changes (); 781 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX)) 782 remove_note (insn, note); 783 else 784 if (old_count != num_validated_changes ()) 785 df_notes_rescan (insn); 786 } 787 788 pnote = ®_NOTES (insn); 789 while (*pnote != NULL_RTX) 790 { 791 bool del = false; 792 793 note = *pnote; 794 switch (REG_NOTE_KIND (note)) 795 { 796 case REG_DEAD: 797 case REG_UNUSED: 798 if (resolve_reg_p (XEXP (note, 0))) 799 del = true; 800 break; 801 802 default: 803 break; 804 } 805 806 if (del) 807 *pnote = XEXP (note, 1); 808 else 809 pnote = &XEXP (note, 1); 810 } 811 } 812 813 /* Return whether X can be decomposed into subwords. */ 814 815 static bool 816 can_decompose_p (rtx x) 817 { 818 if (REG_P (x)) 819 { 820 unsigned int regno = REGNO (x); 821 822 if (HARD_REGISTER_NUM_P (regno)) 823 { 824 unsigned int byte, num_bytes, num_words; 825 826 if (!interesting_mode_p (GET_MODE (x), &num_bytes, &num_words)) 827 return false; 828 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD) 829 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0) 830 return false; 831 return true; 832 } 833 else 834 return !bitmap_bit_p (subreg_context, regno); 835 } 836 837 return true; 838 } 839 840 /* Decompose the registers used in a simple move SET within INSN. If 841 we don't change anything, return INSN, otherwise return the start 842 of the sequence of moves. */ 843 844 static rtx_insn * 845 resolve_simple_move (rtx set, rtx_insn *insn) 846 { 847 rtx src, dest, real_dest; 848 rtx_insn *insns; 849 machine_mode orig_mode, dest_mode; 850 unsigned int orig_size, words; 851 bool pushing; 852 853 src = SET_SRC (set); 854 dest = SET_DEST (set); 855 orig_mode = GET_MODE (dest); 856 857 if (!interesting_mode_p (orig_mode, &orig_size, &words)) 858 gcc_unreachable (); 859 gcc_assert (words > 1); 860 861 start_sequence (); 862 863 /* We have to handle copying from a SUBREG of a decomposed reg where 864 the SUBREG is larger than word size. Rather than assume that we 865 can take a word_mode SUBREG of the destination, we copy to a new 866 register and then copy that to the destination. */ 867 868 real_dest = NULL_RTX; 869 870 if (GET_CODE (src) == SUBREG 871 && resolve_reg_p (SUBREG_REG (src)) 872 && (maybe_ne (SUBREG_BYTE (src), 0) 873 || maybe_ne (orig_size, GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))))) 874 { 875 real_dest = dest; 876 dest = gen_reg_rtx (orig_mode); 877 if (REG_P (real_dest)) 878 REG_ATTRS (dest) = REG_ATTRS (real_dest); 879 } 880 881 /* Similarly if we are copying to a SUBREG of a decomposed reg where 882 the SUBREG is larger than word size. */ 883 884 if (GET_CODE (dest) == SUBREG 885 && resolve_reg_p (SUBREG_REG (dest)) 886 && (maybe_ne (SUBREG_BYTE (dest), 0) 887 || maybe_ne (orig_size, 888 GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)))))) 889 { 890 rtx reg, smove; 891 rtx_insn *minsn; 892 893 reg = gen_reg_rtx (orig_mode); 894 minsn = emit_move_insn (reg, src); 895 smove = single_set (minsn); 896 gcc_assert (smove != NULL_RTX); 897 resolve_simple_move (smove, minsn); 898 src = reg; 899 } 900 901 /* If we didn't have any big SUBREGS of decomposed registers, and 902 neither side of the move is a register we are decomposing, then 903 we don't have to do anything here. */ 904 905 if (src == SET_SRC (set) 906 && dest == SET_DEST (set) 907 && !resolve_reg_p (src) 908 && !resolve_subreg_p (src) 909 && !resolve_reg_p (dest) 910 && !resolve_subreg_p (dest)) 911 { 912 end_sequence (); 913 return insn; 914 } 915 916 /* It's possible for the code to use a subreg of a decomposed 917 register while forming an address. We need to handle that before 918 passing the address to emit_move_insn. We pass NULL_RTX as the 919 insn parameter to resolve_subreg_use because we can not validate 920 the insn yet. */ 921 if (MEM_P (src) || MEM_P (dest)) 922 { 923 int acg; 924 925 if (MEM_P (src)) 926 resolve_subreg_use (&XEXP (src, 0), NULL_RTX); 927 if (MEM_P (dest)) 928 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX); 929 acg = apply_change_group (); 930 gcc_assert (acg); 931 } 932 933 /* If SRC is a register which we can't decompose, or has side 934 effects, we need to move via a temporary register. */ 935 936 if (!can_decompose_p (src) 937 || side_effects_p (src) 938 || GET_CODE (src) == ASM_OPERANDS) 939 { 940 rtx reg; 941 942 reg = gen_reg_rtx (orig_mode); 943 944 if (AUTO_INC_DEC) 945 { 946 rtx_insn *move = emit_move_insn (reg, src); 947 if (MEM_P (src)) 948 { 949 rtx note = find_reg_note (insn, REG_INC, NULL_RTX); 950 if (note) 951 add_reg_note (move, REG_INC, XEXP (note, 0)); 952 } 953 } 954 else 955 emit_move_insn (reg, src); 956 957 src = reg; 958 } 959 960 /* If DEST is a register which we can't decompose, or has side 961 effects, we need to first move to a temporary register. We 962 handle the common case of pushing an operand directly. We also 963 go through a temporary register if it holds a floating point 964 value. This gives us better code on systems which can't move 965 data easily between integer and floating point registers. */ 966 967 dest_mode = orig_mode; 968 pushing = push_operand (dest, dest_mode); 969 if (!can_decompose_p (dest) 970 || (side_effects_p (dest) && !pushing) 971 || (!SCALAR_INT_MODE_P (dest_mode) 972 && !resolve_reg_p (dest) 973 && !resolve_subreg_p (dest))) 974 { 975 if (real_dest == NULL_RTX) 976 real_dest = dest; 977 if (!SCALAR_INT_MODE_P (dest_mode)) 978 dest_mode = int_mode_for_mode (dest_mode).require (); 979 dest = gen_reg_rtx (dest_mode); 980 if (REG_P (real_dest)) 981 REG_ATTRS (dest) = REG_ATTRS (real_dest); 982 } 983 984 if (pushing) 985 { 986 unsigned int i, j, jinc; 987 988 gcc_assert (orig_size % UNITS_PER_WORD == 0); 989 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY); 990 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY); 991 992 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD) 993 { 994 j = 0; 995 jinc = 1; 996 } 997 else 998 { 999 j = words - 1; 1000 jinc = -1; 1001 } 1002 1003 for (i = 0; i < words; ++i, j += jinc) 1004 { 1005 rtx temp; 1006 1007 temp = copy_rtx (XEXP (dest, 0)); 1008 temp = adjust_automodify_address_nv (dest, word_mode, temp, 1009 j * UNITS_PER_WORD); 1010 emit_move_insn (temp, 1011 simplify_gen_subreg_concatn (word_mode, src, 1012 orig_mode, 1013 j * UNITS_PER_WORD)); 1014 } 1015 } 1016 else 1017 { 1018 unsigned int i; 1019 1020 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest))) 1021 emit_clobber (dest); 1022 1023 for (i = 0; i < words; ++i) 1024 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest, 1025 dest_mode, 1026 i * UNITS_PER_WORD), 1027 simplify_gen_subreg_concatn (word_mode, src, 1028 orig_mode, 1029 i * UNITS_PER_WORD)); 1030 } 1031 1032 if (real_dest != NULL_RTX) 1033 { 1034 rtx mdest, smove; 1035 rtx_insn *minsn; 1036 1037 if (dest_mode == orig_mode) 1038 mdest = dest; 1039 else 1040 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0); 1041 minsn = emit_move_insn (real_dest, mdest); 1042 1043 if (AUTO_INC_DEC && MEM_P (real_dest) 1044 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest))) 1045 { 1046 rtx note = find_reg_note (insn, REG_INC, NULL_RTX); 1047 if (note) 1048 add_reg_note (minsn, REG_INC, XEXP (note, 0)); 1049 } 1050 1051 smove = single_set (minsn); 1052 gcc_assert (smove != NULL_RTX); 1053 1054 resolve_simple_move (smove, minsn); 1055 } 1056 1057 insns = get_insns (); 1058 end_sequence (); 1059 1060 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX); 1061 1062 emit_insn_before (insns, insn); 1063 1064 /* If we get here via self-recursion, then INSN is not yet in the insns 1065 chain and delete_insn will fail. We only want to remove INSN from the 1066 current sequence. See PR56738. */ 1067 if (in_sequence_p ()) 1068 remove_insn (insn); 1069 else 1070 delete_insn (insn); 1071 1072 return insns; 1073 } 1074 1075 /* Change a CLOBBER of a decomposed register into a CLOBBER of the 1076 component registers. Return whether we changed something. */ 1077 1078 static bool 1079 resolve_clobber (rtx pat, rtx_insn *insn) 1080 { 1081 rtx reg; 1082 machine_mode orig_mode; 1083 unsigned int orig_size, words, i; 1084 int ret; 1085 1086 reg = XEXP (pat, 0); 1087 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg)) 1088 return false; 1089 1090 orig_mode = GET_MODE (reg); 1091 if (!interesting_mode_p (orig_mode, &orig_size, &words)) 1092 gcc_unreachable (); 1093 1094 ret = validate_change (NULL_RTX, &XEXP (pat, 0), 1095 simplify_gen_subreg_concatn (word_mode, reg, 1096 orig_mode, 0), 1097 0); 1098 df_insn_rescan (insn); 1099 gcc_assert (ret != 0); 1100 1101 for (i = words - 1; i > 0; --i) 1102 { 1103 rtx x; 1104 1105 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode, 1106 i * UNITS_PER_WORD); 1107 x = gen_rtx_CLOBBER (VOIDmode, x); 1108 emit_insn_after (x, insn); 1109 } 1110 1111 resolve_reg_notes (insn); 1112 1113 return true; 1114 } 1115 1116 /* A USE of a decomposed register is no longer meaningful. Return 1117 whether we changed something. */ 1118 1119 static bool 1120 resolve_use (rtx pat, rtx_insn *insn) 1121 { 1122 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0))) 1123 { 1124 delete_insn (insn); 1125 return true; 1126 } 1127 1128 resolve_reg_notes (insn); 1129 1130 return false; 1131 } 1132 1133 /* A VAR_LOCATION can be simplified. */ 1134 1135 static void 1136 resolve_debug (rtx_insn *insn) 1137 { 1138 subrtx_ptr_iterator::array_type array; 1139 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST) 1140 { 1141 rtx *loc = *iter; 1142 rtx x = *loc; 1143 if (resolve_subreg_p (x)) 1144 { 1145 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), 1146 SUBREG_BYTE (x)); 1147 1148 if (x) 1149 *loc = x; 1150 else 1151 x = copy_rtx (*loc); 1152 } 1153 if (resolve_reg_p (x)) 1154 *loc = copy_rtx (x); 1155 } 1156 1157 df_insn_rescan (insn); 1158 1159 resolve_reg_notes (insn); 1160 } 1161 1162 /* Check if INSN is a decomposable multiword-shift or zero-extend and 1163 set the decomposable_context bitmap accordingly. SPEED_P is true 1164 if we are optimizing INSN for speed rather than size. Return true 1165 if INSN is decomposable. */ 1166 1167 static bool 1168 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p) 1169 { 1170 rtx set; 1171 rtx op; 1172 rtx op_operand; 1173 1174 set = single_set (insn); 1175 if (!set) 1176 return false; 1177 1178 op = SET_SRC (set); 1179 if (GET_CODE (op) != ASHIFT 1180 && GET_CODE (op) != LSHIFTRT 1181 && GET_CODE (op) != ASHIFTRT 1182 && GET_CODE (op) != ZERO_EXTEND) 1183 return false; 1184 1185 op_operand = XEXP (op, 0); 1186 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand) 1187 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set))) 1188 || HARD_REGISTER_NUM_P (REGNO (op_operand)) 1189 || GET_MODE (op) != twice_word_mode) 1190 return false; 1191 1192 if (GET_CODE (op) == ZERO_EXTEND) 1193 { 1194 if (GET_MODE (op_operand) != word_mode 1195 || !choices[speed_p].splitting_zext) 1196 return false; 1197 } 1198 else /* left or right shift */ 1199 { 1200 bool *splitting = (GET_CODE (op) == ASHIFT 1201 ? choices[speed_p].splitting_ashift 1202 : GET_CODE (op) == ASHIFTRT 1203 ? choices[speed_p].splitting_ashiftrt 1204 : choices[speed_p].splitting_lshiftrt); 1205 if (!CONST_INT_P (XEXP (op, 1)) 1206 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD, 1207 2 * BITS_PER_WORD - 1) 1208 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD]) 1209 return false; 1210 1211 bitmap_set_bit (decomposable_context, REGNO (op_operand)); 1212 } 1213 1214 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set))); 1215 1216 return true; 1217 } 1218 1219 /* Decompose a more than word wide shift (in INSN) of a multiword 1220 pseudo or a multiword zero-extend of a wordmode pseudo into a move 1221 and 'set to zero' insn. Return a pointer to the new insn when a 1222 replacement was done. */ 1223 1224 static rtx_insn * 1225 resolve_shift_zext (rtx_insn *insn) 1226 { 1227 rtx set; 1228 rtx op; 1229 rtx op_operand; 1230 rtx_insn *insns; 1231 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX; 1232 int src_reg_num, dest_reg_num, offset1, offset2, src_offset; 1233 scalar_int_mode inner_mode; 1234 1235 set = single_set (insn); 1236 if (!set) 1237 return NULL; 1238 1239 op = SET_SRC (set); 1240 if (GET_CODE (op) != ASHIFT 1241 && GET_CODE (op) != LSHIFTRT 1242 && GET_CODE (op) != ASHIFTRT 1243 && GET_CODE (op) != ZERO_EXTEND) 1244 return NULL; 1245 1246 op_operand = XEXP (op, 0); 1247 if (!is_a <scalar_int_mode> (GET_MODE (op_operand), &inner_mode)) 1248 return NULL; 1249 1250 /* We can tear this operation apart only if the regs were already 1251 torn apart. */ 1252 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand)) 1253 return NULL; 1254 1255 /* src_reg_num is the number of the word mode register which we 1256 are operating on. For a left shift and a zero_extend on little 1257 endian machines this is register 0. */ 1258 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT) 1259 ? 1 : 0; 1260 1261 if (WORDS_BIG_ENDIAN && GET_MODE_SIZE (inner_mode) > UNITS_PER_WORD) 1262 src_reg_num = 1 - src_reg_num; 1263 1264 if (GET_CODE (op) == ZERO_EXTEND) 1265 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0; 1266 else 1267 dest_reg_num = 1 - src_reg_num; 1268 1269 offset1 = UNITS_PER_WORD * dest_reg_num; 1270 offset2 = UNITS_PER_WORD * (1 - dest_reg_num); 1271 src_offset = UNITS_PER_WORD * src_reg_num; 1272 1273 start_sequence (); 1274 1275 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set), 1276 GET_MODE (SET_DEST (set)), 1277 offset1); 1278 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set), 1279 GET_MODE (SET_DEST (set)), 1280 offset2); 1281 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand, 1282 GET_MODE (op_operand), 1283 src_offset); 1284 if (GET_CODE (op) == ASHIFTRT 1285 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1) 1286 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg), 1287 BITS_PER_WORD - 1, NULL_RTX, 0); 1288 1289 if (GET_CODE (op) != ZERO_EXTEND) 1290 { 1291 int shift_count = INTVAL (XEXP (op, 1)); 1292 if (shift_count > BITS_PER_WORD) 1293 src_reg = expand_shift (GET_CODE (op) == ASHIFT ? 1294 LSHIFT_EXPR : RSHIFT_EXPR, 1295 word_mode, src_reg, 1296 shift_count - BITS_PER_WORD, 1297 dest_reg, GET_CODE (op) != ASHIFTRT); 1298 } 1299 1300 if (dest_reg != src_reg) 1301 emit_move_insn (dest_reg, src_reg); 1302 if (GET_CODE (op) != ASHIFTRT) 1303 emit_move_insn (dest_upper, CONST0_RTX (word_mode)); 1304 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1) 1305 emit_move_insn (dest_upper, copy_rtx (src_reg)); 1306 else 1307 emit_move_insn (dest_upper, upper_src); 1308 insns = get_insns (); 1309 1310 end_sequence (); 1311 1312 emit_insn_before (insns, insn); 1313 1314 if (dump_file) 1315 { 1316 rtx_insn *in; 1317 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn)); 1318 for (in = insns; in != insn; in = NEXT_INSN (in)) 1319 fprintf (dump_file, "%d ", INSN_UID (in)); 1320 fprintf (dump_file, "\n"); 1321 } 1322 1323 delete_insn (insn); 1324 return insns; 1325 } 1326 1327 /* Print to dump_file a description of what we're doing with shift code CODE. 1328 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */ 1329 1330 static void 1331 dump_shift_choices (enum rtx_code code, bool *splitting) 1332 { 1333 int i; 1334 const char *sep; 1335 1336 fprintf (dump_file, 1337 " Splitting mode %s for %s lowering with shift amounts = ", 1338 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code)); 1339 sep = ""; 1340 for (i = 0; i < BITS_PER_WORD; i++) 1341 if (splitting[i]) 1342 { 1343 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD); 1344 sep = ","; 1345 } 1346 fprintf (dump_file, "\n"); 1347 } 1348 1349 /* Print to dump_file a description of what we're doing when optimizing 1350 for speed or size; SPEED_P says which. DESCRIPTION is a description 1351 of the SPEED_P choice. */ 1352 1353 static void 1354 dump_choices (bool speed_p, const char *description) 1355 { 1356 unsigned int size, factor, i; 1357 1358 fprintf (dump_file, "Choices when optimizing for %s:\n", description); 1359 1360 for (i = 0; i < MAX_MACHINE_MODE; i++) 1361 if (interesting_mode_p ((machine_mode) i, &size, &factor) 1362 && factor > 1) 1363 fprintf (dump_file, " %s mode %s for copy lowering.\n", 1364 choices[speed_p].move_modes_to_split[i] 1365 ? "Splitting" 1366 : "Skipping", 1367 GET_MODE_NAME ((machine_mode) i)); 1368 1369 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n", 1370 choices[speed_p].splitting_zext ? "Splitting" : "Skipping", 1371 GET_MODE_NAME (twice_word_mode)); 1372 1373 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift); 1374 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt); 1375 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt); 1376 fprintf (dump_file, "\n"); 1377 } 1378 1379 /* Look for registers which are always accessed via word-sized SUBREGs 1380 or -if DECOMPOSE_COPIES is true- via copies. Decompose these 1381 registers into several word-sized pseudo-registers. */ 1382 1383 static void 1384 decompose_multiword_subregs (bool decompose_copies) 1385 { 1386 unsigned int max; 1387 basic_block bb; 1388 bool speed_p; 1389 1390 if (dump_file) 1391 { 1392 dump_choices (false, "size"); 1393 dump_choices (true, "speed"); 1394 } 1395 1396 /* Check if this target even has any modes to consider lowering. */ 1397 if (!choices[false].something_to_do && !choices[true].something_to_do) 1398 { 1399 if (dump_file) 1400 fprintf (dump_file, "Nothing to do!\n"); 1401 return; 1402 } 1403 1404 max = max_reg_num (); 1405 1406 /* First see if there are any multi-word pseudo-registers. If there 1407 aren't, there is nothing we can do. This should speed up this 1408 pass in the normal case, since it should be faster than scanning 1409 all the insns. */ 1410 { 1411 unsigned int i; 1412 bool useful_modes_seen = false; 1413 1414 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i) 1415 if (regno_reg_rtx[i] != NULL) 1416 { 1417 machine_mode mode = GET_MODE (regno_reg_rtx[i]); 1418 if (choices[false].move_modes_to_split[(int) mode] 1419 || choices[true].move_modes_to_split[(int) mode]) 1420 { 1421 useful_modes_seen = true; 1422 break; 1423 } 1424 } 1425 1426 if (!useful_modes_seen) 1427 { 1428 if (dump_file) 1429 fprintf (dump_file, "Nothing to lower in this function.\n"); 1430 return; 1431 } 1432 } 1433 1434 if (df) 1435 { 1436 df_set_flags (DF_DEFER_INSN_RESCAN); 1437 run_word_dce (); 1438 } 1439 1440 /* FIXME: It may be possible to change this code to look for each 1441 multi-word pseudo-register and to find each insn which sets or 1442 uses that register. That should be faster than scanning all the 1443 insns. */ 1444 1445 decomposable_context = BITMAP_ALLOC (NULL); 1446 non_decomposable_context = BITMAP_ALLOC (NULL); 1447 subreg_context = BITMAP_ALLOC (NULL); 1448 1449 reg_copy_graph.create (max); 1450 reg_copy_graph.safe_grow_cleared (max); 1451 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max); 1452 1453 speed_p = optimize_function_for_speed_p (cfun); 1454 FOR_EACH_BB_FN (bb, cfun) 1455 { 1456 rtx_insn *insn; 1457 1458 FOR_BB_INSNS (bb, insn) 1459 { 1460 rtx set; 1461 enum classify_move_insn cmi; 1462 int i, n; 1463 1464 if (!INSN_P (insn) 1465 || GET_CODE (PATTERN (insn)) == CLOBBER 1466 || GET_CODE (PATTERN (insn)) == USE) 1467 continue; 1468 1469 recog_memoized (insn); 1470 1471 if (find_decomposable_shift_zext (insn, speed_p)) 1472 continue; 1473 1474 extract_insn (insn); 1475 1476 set = simple_move (insn, speed_p); 1477 1478 if (!set) 1479 cmi = NOT_SIMPLE_MOVE; 1480 else 1481 { 1482 /* We mark pseudo-to-pseudo copies as decomposable during the 1483 second pass only. The first pass is so early that there is 1484 good chance such moves will be optimized away completely by 1485 subsequent optimizations anyway. 1486 1487 However, we call find_pseudo_copy even during the first pass 1488 so as to properly set up the reg_copy_graph. */ 1489 if (find_pseudo_copy (set)) 1490 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE; 1491 else 1492 cmi = SIMPLE_MOVE; 1493 } 1494 1495 n = recog_data.n_operands; 1496 for (i = 0; i < n; ++i) 1497 { 1498 find_decomposable_subregs (&recog_data.operand[i], &cmi); 1499 1500 /* We handle ASM_OPERANDS as a special case to support 1501 things like x86 rdtsc which returns a DImode value. 1502 We can decompose the output, which will certainly be 1503 operand 0, but not the inputs. */ 1504 1505 if (cmi == SIMPLE_MOVE 1506 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS) 1507 { 1508 gcc_assert (i == 0); 1509 cmi = NOT_SIMPLE_MOVE; 1510 } 1511 } 1512 } 1513 } 1514 1515 bitmap_and_compl_into (decomposable_context, non_decomposable_context); 1516 if (!bitmap_empty_p (decomposable_context)) 1517 { 1518 unsigned int i; 1519 sbitmap_iterator sbi; 1520 bitmap_iterator iter; 1521 unsigned int regno; 1522 1523 propagate_pseudo_copies (); 1524 1525 auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun)); 1526 bitmap_clear (sub_blocks); 1527 1528 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter) 1529 decompose_register (regno); 1530 1531 FOR_EACH_BB_FN (bb, cfun) 1532 { 1533 rtx_insn *insn; 1534 1535 FOR_BB_INSNS (bb, insn) 1536 { 1537 rtx pat; 1538 1539 if (!INSN_P (insn)) 1540 continue; 1541 1542 pat = PATTERN (insn); 1543 if (GET_CODE (pat) == CLOBBER) 1544 resolve_clobber (pat, insn); 1545 else if (GET_CODE (pat) == USE) 1546 resolve_use (pat, insn); 1547 else if (DEBUG_INSN_P (insn)) 1548 resolve_debug (insn); 1549 else 1550 { 1551 rtx set; 1552 int i; 1553 1554 recog_memoized (insn); 1555 extract_insn (insn); 1556 1557 set = simple_move (insn, speed_p); 1558 if (set) 1559 { 1560 rtx_insn *orig_insn = insn; 1561 bool cfi = control_flow_insn_p (insn); 1562 1563 /* We can end up splitting loads to multi-word pseudos 1564 into separate loads to machine word size pseudos. 1565 When this happens, we first had one load that can 1566 throw, and after resolve_simple_move we'll have a 1567 bunch of loads (at least two). All those loads may 1568 trap if we can have non-call exceptions, so they 1569 all will end the current basic block. We split the 1570 block after the outer loop over all insns, but we 1571 make sure here that we will be able to split the 1572 basic block and still produce the correct control 1573 flow graph for it. */ 1574 gcc_assert (!cfi 1575 || (cfun->can_throw_non_call_exceptions 1576 && can_throw_internal (insn))); 1577 1578 insn = resolve_simple_move (set, insn); 1579 if (insn != orig_insn) 1580 { 1581 recog_memoized (insn); 1582 extract_insn (insn); 1583 1584 if (cfi) 1585 bitmap_set_bit (sub_blocks, bb->index); 1586 } 1587 } 1588 else 1589 { 1590 rtx_insn *decomposed_shift; 1591 1592 decomposed_shift = resolve_shift_zext (insn); 1593 if (decomposed_shift != NULL_RTX) 1594 { 1595 insn = decomposed_shift; 1596 recog_memoized (insn); 1597 extract_insn (insn); 1598 } 1599 } 1600 1601 for (i = recog_data.n_operands - 1; i >= 0; --i) 1602 resolve_subreg_use (recog_data.operand_loc[i], insn); 1603 1604 resolve_reg_notes (insn); 1605 1606 if (num_validated_changes () > 0) 1607 { 1608 for (i = recog_data.n_dups - 1; i >= 0; --i) 1609 { 1610 rtx *pl = recog_data.dup_loc[i]; 1611 int dup_num = recog_data.dup_num[i]; 1612 rtx *px = recog_data.operand_loc[dup_num]; 1613 1614 validate_unshare_change (insn, pl, *px, 1); 1615 } 1616 1617 i = apply_change_group (); 1618 gcc_assert (i); 1619 } 1620 } 1621 } 1622 } 1623 1624 /* If we had insns to split that caused control flow insns in the middle 1625 of a basic block, split those blocks now. Note that we only handle 1626 the case where splitting a load has caused multiple possibly trapping 1627 loads to appear. */ 1628 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi) 1629 { 1630 rtx_insn *insn, *end; 1631 edge fallthru; 1632 1633 bb = BASIC_BLOCK_FOR_FN (cfun, i); 1634 insn = BB_HEAD (bb); 1635 end = BB_END (bb); 1636 1637 while (insn != end) 1638 { 1639 if (control_flow_insn_p (insn)) 1640 { 1641 /* Split the block after insn. There will be a fallthru 1642 edge, which is OK so we keep it. We have to create the 1643 exception edges ourselves. */ 1644 fallthru = split_block (bb, insn); 1645 rtl_make_eh_edge (NULL, bb, BB_END (bb)); 1646 bb = fallthru->dest; 1647 insn = BB_HEAD (bb); 1648 } 1649 else 1650 insn = NEXT_INSN (insn); 1651 } 1652 } 1653 } 1654 1655 { 1656 unsigned int i; 1657 bitmap b; 1658 1659 FOR_EACH_VEC_ELT (reg_copy_graph, i, b) 1660 if (b) 1661 BITMAP_FREE (b); 1662 } 1663 1664 reg_copy_graph.release (); 1665 1666 BITMAP_FREE (decomposable_context); 1667 BITMAP_FREE (non_decomposable_context); 1668 BITMAP_FREE (subreg_context); 1669 } 1670 1671 /* Implement first lower subreg pass. */ 1672 1673 namespace { 1674 1675 const pass_data pass_data_lower_subreg = 1676 { 1677 RTL_PASS, /* type */ 1678 "subreg1", /* name */ 1679 OPTGROUP_NONE, /* optinfo_flags */ 1680 TV_LOWER_SUBREG, /* tv_id */ 1681 0, /* properties_required */ 1682 0, /* properties_provided */ 1683 0, /* properties_destroyed */ 1684 0, /* todo_flags_start */ 1685 0, /* todo_flags_finish */ 1686 }; 1687 1688 class pass_lower_subreg : public rtl_opt_pass 1689 { 1690 public: 1691 pass_lower_subreg (gcc::context *ctxt) 1692 : rtl_opt_pass (pass_data_lower_subreg, ctxt) 1693 {} 1694 1695 /* opt_pass methods: */ 1696 virtual bool gate (function *) { return flag_split_wide_types != 0; } 1697 virtual unsigned int execute (function *) 1698 { 1699 decompose_multiword_subregs (false); 1700 return 0; 1701 } 1702 1703 }; // class pass_lower_subreg 1704 1705 } // anon namespace 1706 1707 rtl_opt_pass * 1708 make_pass_lower_subreg (gcc::context *ctxt) 1709 { 1710 return new pass_lower_subreg (ctxt); 1711 } 1712 1713 /* Implement second lower subreg pass. */ 1714 1715 namespace { 1716 1717 const pass_data pass_data_lower_subreg2 = 1718 { 1719 RTL_PASS, /* type */ 1720 "subreg2", /* name */ 1721 OPTGROUP_NONE, /* optinfo_flags */ 1722 TV_LOWER_SUBREG, /* tv_id */ 1723 0, /* properties_required */ 1724 0, /* properties_provided */ 1725 0, /* properties_destroyed */ 1726 0, /* todo_flags_start */ 1727 TODO_df_finish, /* todo_flags_finish */ 1728 }; 1729 1730 class pass_lower_subreg2 : public rtl_opt_pass 1731 { 1732 public: 1733 pass_lower_subreg2 (gcc::context *ctxt) 1734 : rtl_opt_pass (pass_data_lower_subreg2, ctxt) 1735 {} 1736 1737 /* opt_pass methods: */ 1738 virtual bool gate (function *) { return flag_split_wide_types != 0; } 1739 virtual unsigned int execute (function *) 1740 { 1741 decompose_multiword_subregs (true); 1742 return 0; 1743 } 1744 1745 }; // class pass_lower_subreg2 1746 1747 } // anon namespace 1748 1749 rtl_opt_pass * 1750 make_pass_lower_subreg2 (gcc::context *ctxt) 1751 { 1752 return new pass_lower_subreg2 (ctxt); 1753 } 1754