1 /* Medium-level subroutines: convert bit-field store and extract 2 and shifts, multiplies and divides to rtl instructions. 3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 5 2011 6 Free Software Foundation, Inc. 7 8 This file is part of GCC. 9 10 GCC is free software; you can redistribute it and/or modify it under 11 the terms of the GNU General Public License as published by the Free 12 Software Foundation; either version 3, or (at your option) any later 13 version. 14 15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 16 WARRANTY; without even the implied warranty of MERCHANTABILITY or 17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 18 for more details. 19 20 You should have received a copy of the GNU General Public License 21 along with GCC; see the file COPYING3. If not see 22 <http://www.gnu.org/licenses/>. */ 23 24 25 #include "config.h" 26 #include "system.h" 27 #include "coretypes.h" 28 #include "tm.h" 29 #include "diagnostic-core.h" 30 #include "rtl.h" 31 #include "tree.h" 32 #include "tm_p.h" 33 #include "flags.h" 34 #include "insn-config.h" 35 #include "expr.h" 36 #include "optabs.h" 37 #include "recog.h" 38 #include "langhooks.h" 39 #include "df.h" 40 #include "target.h" 41 #include "expmed.h" 42 43 struct target_expmed default_target_expmed; 44 #if SWITCHABLE_TARGET 45 struct target_expmed *this_target_expmed = &default_target_expmed; 46 #endif 47 48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT, 49 unsigned HOST_WIDE_INT, 50 unsigned HOST_WIDE_INT, 51 unsigned HOST_WIDE_INT, 52 unsigned HOST_WIDE_INT, 53 rtx); 54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT, 55 unsigned HOST_WIDE_INT, 56 unsigned HOST_WIDE_INT, 57 unsigned HOST_WIDE_INT, 58 rtx); 59 static rtx extract_fixed_bit_field (enum machine_mode, rtx, 60 unsigned HOST_WIDE_INT, 61 unsigned HOST_WIDE_INT, 62 unsigned HOST_WIDE_INT, rtx, int, bool); 63 static rtx mask_rtx (enum machine_mode, int, int, int); 64 static rtx lshift_value (enum machine_mode, rtx, int, int); 65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT, 66 unsigned HOST_WIDE_INT, int); 67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx); 68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT); 69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT); 70 71 /* Test whether a value is zero of a power of two. */ 72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0) 73 74 #ifndef SLOW_UNALIGNED_ACCESS 75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT 76 #endif 77 78 79 /* Reduce conditional compilation elsewhere. */ 80 #ifndef HAVE_insv 81 #define HAVE_insv 0 82 #define CODE_FOR_insv CODE_FOR_nothing 83 #define gen_insv(a,b,c,d) NULL_RTX 84 #endif 85 #ifndef HAVE_extv 86 #define HAVE_extv 0 87 #define CODE_FOR_extv CODE_FOR_nothing 88 #define gen_extv(a,b,c,d) NULL_RTX 89 #endif 90 #ifndef HAVE_extzv 91 #define HAVE_extzv 0 92 #define CODE_FOR_extzv CODE_FOR_nothing 93 #define gen_extzv(a,b,c,d) NULL_RTX 94 #endif 95 96 void 97 init_expmed (void) 98 { 99 struct 100 { 101 struct rtx_def reg; rtunion reg_fld[2]; 102 struct rtx_def plus; rtunion plus_fld1; 103 struct rtx_def neg; 104 struct rtx_def mult; rtunion mult_fld1; 105 struct rtx_def sdiv; rtunion sdiv_fld1; 106 struct rtx_def udiv; rtunion udiv_fld1; 107 struct rtx_def zext; 108 struct rtx_def sdiv_32; rtunion sdiv_32_fld1; 109 struct rtx_def smod_32; rtunion smod_32_fld1; 110 struct rtx_def wide_mult; rtunion wide_mult_fld1; 111 struct rtx_def wide_lshr; rtunion wide_lshr_fld1; 112 struct rtx_def wide_trunc; 113 struct rtx_def shift; rtunion shift_fld1; 114 struct rtx_def shift_mult; rtunion shift_mult_fld1; 115 struct rtx_def shift_add; rtunion shift_add_fld1; 116 struct rtx_def shift_sub0; rtunion shift_sub0_fld1; 117 struct rtx_def shift_sub1; rtunion shift_sub1_fld1; 118 } all; 119 120 rtx pow2[MAX_BITS_PER_WORD]; 121 rtx cint[MAX_BITS_PER_WORD]; 122 int m, n; 123 enum machine_mode mode, wider_mode; 124 int speed; 125 126 127 for (m = 1; m < MAX_BITS_PER_WORD; m++) 128 { 129 pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m); 130 cint[m] = GEN_INT (m); 131 } 132 memset (&all, 0, sizeof all); 133 134 PUT_CODE (&all.reg, REG); 135 /* Avoid using hard regs in ways which may be unsupported. */ 136 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1); 137 138 PUT_CODE (&all.plus, PLUS); 139 XEXP (&all.plus, 0) = &all.reg; 140 XEXP (&all.plus, 1) = &all.reg; 141 142 PUT_CODE (&all.neg, NEG); 143 XEXP (&all.neg, 0) = &all.reg; 144 145 PUT_CODE (&all.mult, MULT); 146 XEXP (&all.mult, 0) = &all.reg; 147 XEXP (&all.mult, 1) = &all.reg; 148 149 PUT_CODE (&all.sdiv, DIV); 150 XEXP (&all.sdiv, 0) = &all.reg; 151 XEXP (&all.sdiv, 1) = &all.reg; 152 153 PUT_CODE (&all.udiv, UDIV); 154 XEXP (&all.udiv, 0) = &all.reg; 155 XEXP (&all.udiv, 1) = &all.reg; 156 157 PUT_CODE (&all.sdiv_32, DIV); 158 XEXP (&all.sdiv_32, 0) = &all.reg; 159 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32); 160 161 PUT_CODE (&all.smod_32, MOD); 162 XEXP (&all.smod_32, 0) = &all.reg; 163 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1); 164 165 PUT_CODE (&all.zext, ZERO_EXTEND); 166 XEXP (&all.zext, 0) = &all.reg; 167 168 PUT_CODE (&all.wide_mult, MULT); 169 XEXP (&all.wide_mult, 0) = &all.zext; 170 XEXP (&all.wide_mult, 1) = &all.zext; 171 172 PUT_CODE (&all.wide_lshr, LSHIFTRT); 173 XEXP (&all.wide_lshr, 0) = &all.wide_mult; 174 175 PUT_CODE (&all.wide_trunc, TRUNCATE); 176 XEXP (&all.wide_trunc, 0) = &all.wide_lshr; 177 178 PUT_CODE (&all.shift, ASHIFT); 179 XEXP (&all.shift, 0) = &all.reg; 180 181 PUT_CODE (&all.shift_mult, MULT); 182 XEXP (&all.shift_mult, 0) = &all.reg; 183 184 PUT_CODE (&all.shift_add, PLUS); 185 XEXP (&all.shift_add, 0) = &all.shift_mult; 186 XEXP (&all.shift_add, 1) = &all.reg; 187 188 PUT_CODE (&all.shift_sub0, MINUS); 189 XEXP (&all.shift_sub0, 0) = &all.shift_mult; 190 XEXP (&all.shift_sub0, 1) = &all.reg; 191 192 PUT_CODE (&all.shift_sub1, MINUS); 193 XEXP (&all.shift_sub1, 0) = &all.reg; 194 XEXP (&all.shift_sub1, 1) = &all.shift_mult; 195 196 for (speed = 0; speed < 2; speed++) 197 { 198 crtl->maybe_hot_insn_p = speed; 199 zero_cost[speed] = set_src_cost (const0_rtx, speed); 200 201 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); 202 mode != VOIDmode; 203 mode = GET_MODE_WIDER_MODE (mode)) 204 { 205 PUT_MODE (&all.reg, mode); 206 PUT_MODE (&all.plus, mode); 207 PUT_MODE (&all.neg, mode); 208 PUT_MODE (&all.mult, mode); 209 PUT_MODE (&all.sdiv, mode); 210 PUT_MODE (&all.udiv, mode); 211 PUT_MODE (&all.sdiv_32, mode); 212 PUT_MODE (&all.smod_32, mode); 213 PUT_MODE (&all.wide_trunc, mode); 214 PUT_MODE (&all.shift, mode); 215 PUT_MODE (&all.shift_mult, mode); 216 PUT_MODE (&all.shift_add, mode); 217 PUT_MODE (&all.shift_sub0, mode); 218 PUT_MODE (&all.shift_sub1, mode); 219 220 add_cost[speed][mode] = set_src_cost (&all.plus, speed); 221 neg_cost[speed][mode] = set_src_cost (&all.neg, speed); 222 mul_cost[speed][mode] = set_src_cost (&all.mult, speed); 223 sdiv_cost[speed][mode] = set_src_cost (&all.sdiv, speed); 224 udiv_cost[speed][mode] = set_src_cost (&all.udiv, speed); 225 226 sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all.sdiv_32, speed) 227 <= 2 * add_cost[speed][mode]); 228 smod_pow2_cheap[speed][mode] = (set_src_cost (&all.smod_32, speed) 229 <= 4 * add_cost[speed][mode]); 230 231 wider_mode = GET_MODE_WIDER_MODE (mode); 232 if (wider_mode != VOIDmode) 233 { 234 PUT_MODE (&all.zext, wider_mode); 235 PUT_MODE (&all.wide_mult, wider_mode); 236 PUT_MODE (&all.wide_lshr, wider_mode); 237 XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode)); 238 239 mul_widen_cost[speed][wider_mode] 240 = set_src_cost (&all.wide_mult, speed); 241 mul_highpart_cost[speed][mode] 242 = set_src_cost (&all.wide_trunc, speed); 243 } 244 245 shift_cost[speed][mode][0] = 0; 246 shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0] 247 = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode]; 248 249 n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode)); 250 for (m = 1; m < n; m++) 251 { 252 XEXP (&all.shift, 1) = cint[m]; 253 XEXP (&all.shift_mult, 1) = pow2[m]; 254 255 shift_cost[speed][mode][m] = set_src_cost (&all.shift, speed); 256 shiftadd_cost[speed][mode][m] = set_src_cost (&all.shift_add, 257 speed); 258 shiftsub0_cost[speed][mode][m] = set_src_cost (&all.shift_sub0, 259 speed); 260 shiftsub1_cost[speed][mode][m] = set_src_cost (&all.shift_sub1, 261 speed); 262 } 263 } 264 } 265 if (alg_hash_used_p) 266 memset (alg_hash, 0, sizeof (alg_hash)); 267 else 268 alg_hash_used_p = true; 269 default_rtl_profile (); 270 } 271 272 /* Return an rtx representing minus the value of X. 273 MODE is the intended mode of the result, 274 useful if X is a CONST_INT. */ 275 276 rtx 277 negate_rtx (enum machine_mode mode, rtx x) 278 { 279 rtx result = simplify_unary_operation (NEG, mode, x, mode); 280 281 if (result == 0) 282 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0); 283 284 return result; 285 } 286 287 /* Report on the availability of insv/extv/extzv and the desired mode 288 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo 289 is false; else the mode of the specified operand. If OPNO is -1, 290 all the caller cares about is whether the insn is available. */ 291 enum machine_mode 292 mode_for_extraction (enum extraction_pattern pattern, int opno) 293 { 294 const struct insn_data_d *data; 295 296 switch (pattern) 297 { 298 case EP_insv: 299 if (HAVE_insv) 300 { 301 data = &insn_data[CODE_FOR_insv]; 302 break; 303 } 304 return MAX_MACHINE_MODE; 305 306 case EP_extv: 307 if (HAVE_extv) 308 { 309 data = &insn_data[CODE_FOR_extv]; 310 break; 311 } 312 return MAX_MACHINE_MODE; 313 314 case EP_extzv: 315 if (HAVE_extzv) 316 { 317 data = &insn_data[CODE_FOR_extzv]; 318 break; 319 } 320 return MAX_MACHINE_MODE; 321 322 default: 323 gcc_unreachable (); 324 } 325 326 if (opno == -1) 327 return VOIDmode; 328 329 /* Everyone who uses this function used to follow it with 330 if (result == VOIDmode) result = word_mode; */ 331 if (data->operand[opno].mode == VOIDmode) 332 return word_mode; 333 return data->operand[opno].mode; 334 } 335 336 /* A subroutine of store_bit_field, with the same arguments. Return true 337 if the operation could be implemented. 338 339 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have 340 no other way of implementing the operation. If FALLBACK_P is false, 341 return false instead. */ 342 343 static bool 344 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, 345 unsigned HOST_WIDE_INT bitnum, 346 unsigned HOST_WIDE_INT bitregion_start, 347 unsigned HOST_WIDE_INT bitregion_end, 348 enum machine_mode fieldmode, 349 rtx value, bool fallback_p) 350 { 351 unsigned int unit 352 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD; 353 unsigned HOST_WIDE_INT offset, bitpos; 354 rtx op0 = str_rtx; 355 int byte_offset; 356 rtx orig_value; 357 358 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3); 359 360 while (GET_CODE (op0) == SUBREG) 361 { 362 /* The following line once was done only if WORDS_BIG_ENDIAN, 363 but I think that is a mistake. WORDS_BIG_ENDIAN is 364 meaningful at a much higher level; when structures are copied 365 between memory and regs, the higher-numbered regs 366 always get higher addresses. */ 367 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0))); 368 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0)); 369 370 byte_offset = 0; 371 372 /* Paradoxical subregs need special handling on big endian machines. */ 373 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size) 374 { 375 int difference = inner_mode_size - outer_mode_size; 376 377 if (WORDS_BIG_ENDIAN) 378 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD; 379 if (BYTES_BIG_ENDIAN) 380 byte_offset += difference % UNITS_PER_WORD; 381 } 382 else 383 byte_offset = SUBREG_BYTE (op0); 384 385 bitnum += byte_offset * BITS_PER_UNIT; 386 op0 = SUBREG_REG (op0); 387 } 388 389 /* No action is needed if the target is a register and if the field 390 lies completely outside that register. This can occur if the source 391 code contains an out-of-bounds access to a small array. */ 392 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) 393 return true; 394 395 /* Use vec_set patterns for inserting parts of vectors whenever 396 available. */ 397 if (VECTOR_MODE_P (GET_MODE (op0)) 398 && !MEM_P (op0) 399 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing 400 && fieldmode == GET_MODE_INNER (GET_MODE (op0)) 401 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) 402 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))))) 403 { 404 struct expand_operand ops[3]; 405 enum machine_mode outermode = GET_MODE (op0); 406 enum machine_mode innermode = GET_MODE_INNER (outermode); 407 enum insn_code icode = optab_handler (vec_set_optab, outermode); 408 int pos = bitnum / GET_MODE_BITSIZE (innermode); 409 410 create_fixed_operand (&ops[0], op0); 411 create_input_operand (&ops[1], value, innermode); 412 create_integer_operand (&ops[2], pos); 413 if (maybe_expand_insn (icode, 3, ops)) 414 return true; 415 } 416 417 /* If the target is a register, overwriting the entire object, or storing 418 a full-word or multi-word field can be done with just a SUBREG. 419 420 If the target is memory, storing any naturally aligned field can be 421 done with a simple store. For targets that support fast unaligned 422 memory, any naturally sized, unit aligned field can be done directly. */ 423 424 offset = bitnum / unit; 425 bitpos = bitnum % unit; 426 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT 427 + (offset * UNITS_PER_WORD); 428 429 if (bitpos == 0 430 && bitsize == GET_MODE_BITSIZE (fieldmode) 431 && (!MEM_P (op0) 432 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD 433 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode)) 434 && ((GET_MODE (op0) == fieldmode && byte_offset == 0) 435 || validate_subreg (fieldmode, GET_MODE (op0), op0, 436 byte_offset))) 437 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0)) 438 || (offset * BITS_PER_UNIT % bitsize == 0 439 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0)))) 440 { 441 if (MEM_P (op0)) 442 op0 = adjust_address (op0, fieldmode, offset); 443 else if (GET_MODE (op0) != fieldmode) 444 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), 445 byte_offset); 446 emit_move_insn (op0, value); 447 return true; 448 } 449 450 /* Make sure we are playing with integral modes. Pun with subregs 451 if we aren't. This must come after the entire register case above, 452 since that case is valid for any mode. The following cases are only 453 valid for integral modes. */ 454 { 455 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0)); 456 if (imode != GET_MODE (op0)) 457 { 458 if (MEM_P (op0)) 459 op0 = adjust_address (op0, imode, 0); 460 else 461 { 462 gcc_assert (imode != BLKmode); 463 op0 = gen_lowpart (imode, op0); 464 } 465 } 466 } 467 468 /* We may be accessing data outside the field, which means 469 we can alias adjacent data. */ 470 /* ?? not always for C++0x memory model ?? */ 471 if (MEM_P (op0)) 472 { 473 op0 = shallow_copy_rtx (op0); 474 set_mem_alias_set (op0, 0); 475 set_mem_expr (op0, 0); 476 } 477 478 /* If OP0 is a register, BITPOS must count within a word. 479 But as we have it, it counts within whatever size OP0 now has. 480 On a bigendian machine, these are not the same, so convert. */ 481 if (BYTES_BIG_ENDIAN 482 && !MEM_P (op0) 483 && unit > GET_MODE_BITSIZE (GET_MODE (op0))) 484 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0)); 485 486 /* Storing an lsb-aligned field in a register 487 can be done with a movestrict instruction. */ 488 489 if (!MEM_P (op0) 490 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0) 491 && bitsize == GET_MODE_BITSIZE (fieldmode) 492 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing) 493 { 494 struct expand_operand ops[2]; 495 enum insn_code icode = optab_handler (movstrict_optab, fieldmode); 496 rtx arg0 = op0; 497 unsigned HOST_WIDE_INT subreg_off; 498 499 if (GET_CODE (arg0) == SUBREG) 500 { 501 /* Else we've got some float mode source being extracted into 502 a different float mode destination -- this combination of 503 subregs results in Severe Tire Damage. */ 504 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode 505 || GET_MODE_CLASS (fieldmode) == MODE_INT 506 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT); 507 arg0 = SUBREG_REG (arg0); 508 } 509 510 subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT 511 + (offset * UNITS_PER_WORD); 512 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)) 513 { 514 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off); 515 516 create_fixed_operand (&ops[0], arg0); 517 /* Shrink the source operand to FIELDMODE. */ 518 create_convert_operand_to (&ops[1], value, fieldmode, false); 519 if (maybe_expand_insn (icode, 2, ops)) 520 return true; 521 } 522 } 523 524 /* Handle fields bigger than a word. */ 525 526 if (bitsize > BITS_PER_WORD) 527 { 528 /* Here we transfer the words of the field 529 in the order least significant first. 530 This is because the most significant word is the one which may 531 be less than full. 532 However, only do that if the value is not BLKmode. */ 533 534 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode; 535 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; 536 unsigned int i; 537 rtx last; 538 539 /* This is the mode we must force value to, so that there will be enough 540 subwords to extract. Note that fieldmode will often (always?) be 541 VOIDmode, because that is what store_field uses to indicate that this 542 is a bit field, but passing VOIDmode to operand_subword_force 543 is not allowed. */ 544 fieldmode = GET_MODE (value); 545 if (fieldmode == VOIDmode) 546 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT); 547 548 last = get_last_insn (); 549 for (i = 0; i < nwords; i++) 550 { 551 /* If I is 0, use the low-order word in both field and target; 552 if I is 1, use the next to lowest word; and so on. */ 553 unsigned int wordnum = (backwards 554 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD 555 - i - 1 556 : i); 557 unsigned int bit_offset = (backwards 558 ? MAX ((int) bitsize - ((int) i + 1) 559 * BITS_PER_WORD, 560 0) 561 : (int) i * BITS_PER_WORD); 562 rtx value_word = operand_subword_force (value, wordnum, fieldmode); 563 unsigned HOST_WIDE_INT new_bitsize = 564 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD); 565 566 /* If the remaining chunk doesn't have full wordsize we have 567 to make sure that for big endian machines the higher order 568 bits are used. */ 569 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards) 570 value_word = simplify_expand_binop (word_mode, lshr_optab, 571 value_word, 572 GEN_INT (BITS_PER_WORD 573 - new_bitsize), 574 NULL_RTX, true, 575 OPTAB_LIB_WIDEN); 576 577 if (!store_bit_field_1 (op0, new_bitsize, 578 bitnum + bit_offset, 579 bitregion_start, bitregion_end, 580 word_mode, 581 value_word, fallback_p)) 582 { 583 delete_insns_since (last); 584 return false; 585 } 586 } 587 return true; 588 } 589 590 /* From here on we can assume that the field to be stored in is 591 a full-word (whatever type that is), since it is shorter than a word. */ 592 593 /* OFFSET is the number of words or bytes (UNIT says which) 594 from STR_RTX to the first word or byte containing part of the field. */ 595 596 if (!MEM_P (op0)) 597 { 598 if (offset != 0 599 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) 600 { 601 if (!REG_P (op0)) 602 { 603 /* Since this is a destination (lvalue), we can't copy 604 it to a pseudo. We can remove a SUBREG that does not 605 change the size of the operand. Such a SUBREG may 606 have been added above. */ 607 gcc_assert (GET_CODE (op0) == SUBREG 608 && (GET_MODE_SIZE (GET_MODE (op0)) 609 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0))))); 610 op0 = SUBREG_REG (op0); 611 } 612 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0), 613 op0, (offset * UNITS_PER_WORD)); 614 } 615 offset = 0; 616 } 617 618 /* If VALUE has a floating-point or complex mode, access it as an 619 integer of the corresponding size. This can occur on a machine 620 with 64 bit registers that uses SFmode for float. It can also 621 occur for unaligned float or complex fields. */ 622 orig_value = value; 623 if (GET_MODE (value) != VOIDmode 624 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT 625 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT) 626 { 627 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value))); 628 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value); 629 } 630 631 /* Now OFFSET is nonzero only if OP0 is memory 632 and is therefore always measured in bytes. */ 633 634 if (HAVE_insv 635 && GET_MODE (value) != BLKmode 636 && bitsize > 0 637 && GET_MODE_BITSIZE (op_mode) >= bitsize 638 /* Do not use insv for volatile bitfields when 639 -fstrict-volatile-bitfields is in effect. */ 640 && !(MEM_P (op0) && MEM_VOLATILE_P (op0) 641 && flag_strict_volatile_bitfields > 0) 642 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG) 643 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))) 644 /* Do not use insv if the bit region is restricted and 645 op_mode integer at offset doesn't fit into the 646 restricted region. */ 647 && !(MEM_P (op0) && bitregion_end 648 && bitnum - bitpos + GET_MODE_BITSIZE (op_mode) 649 > bitregion_end + 1)) 650 { 651 struct expand_operand ops[4]; 652 int xbitpos = bitpos; 653 rtx value1; 654 rtx xop0 = op0; 655 rtx last = get_last_insn (); 656 bool copy_back = false; 657 658 /* Add OFFSET into OP0's address. */ 659 if (MEM_P (xop0)) 660 xop0 = adjust_address (xop0, byte_mode, offset); 661 662 /* If xop0 is a register, we need it in OP_MODE 663 to make it acceptable to the format of insv. */ 664 if (GET_CODE (xop0) == SUBREG) 665 /* We can't just change the mode, because this might clobber op0, 666 and we will need the original value of op0 if insv fails. */ 667 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0)); 668 if (REG_P (xop0) && GET_MODE (xop0) != op_mode) 669 xop0 = gen_lowpart_SUBREG (op_mode, xop0); 670 671 /* If the destination is a paradoxical subreg such that we need a 672 truncate to the inner mode, perform the insertion on a temporary and 673 truncate the result to the original destination. Note that we can't 674 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N 675 X) 0)) is (reg:N X). */ 676 if (GET_CODE (xop0) == SUBREG 677 && REG_P (SUBREG_REG (xop0)) 678 && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)), 679 op_mode))) 680 { 681 rtx tem = gen_reg_rtx (op_mode); 682 emit_move_insn (tem, xop0); 683 xop0 = tem; 684 copy_back = true; 685 } 686 687 /* We have been counting XBITPOS within UNIT. 688 Count instead within the size of the register. */ 689 if (BYTES_BIG_ENDIAN && !MEM_P (xop0)) 690 xbitpos += GET_MODE_BITSIZE (op_mode) - unit; 691 692 unit = GET_MODE_BITSIZE (op_mode); 693 694 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count 695 "backwards" from the size of the unit we are inserting into. 696 Otherwise, we count bits from the most significant on a 697 BYTES/BITS_BIG_ENDIAN machine. */ 698 699 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) 700 xbitpos = unit - bitsize - xbitpos; 701 702 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */ 703 value1 = value; 704 if (GET_MODE (value) != op_mode) 705 { 706 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize) 707 { 708 /* Optimization: Don't bother really extending VALUE 709 if it has all the bits we will actually use. However, 710 if we must narrow it, be sure we do it correctly. */ 711 712 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode)) 713 { 714 rtx tmp; 715 716 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0); 717 if (! tmp) 718 tmp = simplify_gen_subreg (op_mode, 719 force_reg (GET_MODE (value), 720 value1), 721 GET_MODE (value), 0); 722 value1 = tmp; 723 } 724 else 725 value1 = gen_lowpart (op_mode, value1); 726 } 727 else if (CONST_INT_P (value)) 728 value1 = gen_int_mode (INTVAL (value), op_mode); 729 else 730 /* Parse phase is supposed to make VALUE's data type 731 match that of the component reference, which is a type 732 at least as wide as the field; so VALUE should have 733 a mode that corresponds to that type. */ 734 gcc_assert (CONSTANT_P (value)); 735 } 736 737 create_fixed_operand (&ops[0], xop0); 738 create_integer_operand (&ops[1], bitsize); 739 create_integer_operand (&ops[2], xbitpos); 740 create_input_operand (&ops[3], value1, op_mode); 741 if (maybe_expand_insn (CODE_FOR_insv, 4, ops)) 742 { 743 if (copy_back) 744 convert_move (op0, xop0, true); 745 return true; 746 } 747 delete_insns_since (last); 748 } 749 750 /* If OP0 is a memory, try copying it to a register and seeing if a 751 cheap register alternative is available. */ 752 if (HAVE_insv && MEM_P (op0)) 753 { 754 enum machine_mode bestmode; 755 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE; 756 757 if (bitregion_end) 758 maxbits = bitregion_end - bitregion_start + 1; 759 760 /* Get the mode to use for inserting into this field. If OP0 is 761 BLKmode, get the smallest mode consistent with the alignment. If 762 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its 763 mode. Otherwise, use the smallest mode containing the field. */ 764 765 if (GET_MODE (op0) == BLKmode 766 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits 767 || (op_mode != MAX_MACHINE_MODE 768 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode))) 769 bestmode = get_best_mode (bitsize, bitnum, 770 bitregion_start, bitregion_end, 771 MEM_ALIGN (op0), 772 (op_mode == MAX_MACHINE_MODE 773 ? VOIDmode : op_mode), 774 MEM_VOLATILE_P (op0)); 775 else 776 bestmode = GET_MODE (op0); 777 778 if (bestmode != VOIDmode 779 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode) 780 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0)) 781 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0))) 782 { 783 rtx last, tempreg, xop0; 784 unsigned HOST_WIDE_INT xoffset, xbitpos; 785 786 last = get_last_insn (); 787 788 /* Adjust address to point to the containing unit of 789 that mode. Compute the offset as a multiple of this unit, 790 counting in bytes. */ 791 unit = GET_MODE_BITSIZE (bestmode); 792 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode); 793 xbitpos = bitnum % unit; 794 xop0 = adjust_address (op0, bestmode, xoffset); 795 796 /* Fetch that unit, store the bitfield in it, then store 797 the unit. */ 798 tempreg = copy_to_reg (xop0); 799 if (store_bit_field_1 (tempreg, bitsize, xbitpos, 800 bitregion_start, bitregion_end, 801 fieldmode, orig_value, false)) 802 { 803 emit_move_insn (xop0, tempreg); 804 return true; 805 } 806 delete_insns_since (last); 807 } 808 } 809 810 if (!fallback_p) 811 return false; 812 813 store_fixed_bit_field (op0, offset, bitsize, bitpos, 814 bitregion_start, bitregion_end, value); 815 return true; 816 } 817 818 /* Generate code to store value from rtx VALUE 819 into a bit-field within structure STR_RTX 820 containing BITSIZE bits starting at bit BITNUM. 821 822 BITREGION_START is bitpos of the first bitfield in this region. 823 BITREGION_END is the bitpos of the ending bitfield in this region. 824 These two fields are 0, if the C++ memory model does not apply, 825 or we are not interested in keeping track of bitfield regions. 826 827 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */ 828 829 void 830 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, 831 unsigned HOST_WIDE_INT bitnum, 832 unsigned HOST_WIDE_INT bitregion_start, 833 unsigned HOST_WIDE_INT bitregion_end, 834 enum machine_mode fieldmode, 835 rtx value) 836 { 837 /* Under the C++0x memory model, we must not touch bits outside the 838 bit region. Adjust the address to start at the beginning of the 839 bit region. */ 840 if (MEM_P (str_rtx) && bitregion_start > 0) 841 { 842 enum machine_mode bestmode; 843 enum machine_mode op_mode; 844 unsigned HOST_WIDE_INT offset; 845 846 op_mode = mode_for_extraction (EP_insv, 3); 847 if (op_mode == MAX_MACHINE_MODE) 848 op_mode = VOIDmode; 849 850 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0); 851 852 offset = bitregion_start / BITS_PER_UNIT; 853 bitnum -= bitregion_start; 854 bitregion_end -= bitregion_start; 855 bitregion_start = 0; 856 bestmode = get_best_mode (bitsize, bitnum, 857 bitregion_start, bitregion_end, 858 MEM_ALIGN (str_rtx), 859 op_mode, 860 MEM_VOLATILE_P (str_rtx)); 861 str_rtx = adjust_address (str_rtx, bestmode, offset); 862 } 863 864 if (!store_bit_field_1 (str_rtx, bitsize, bitnum, 865 bitregion_start, bitregion_end, 866 fieldmode, value, true)) 867 gcc_unreachable (); 868 } 869 870 /* Use shifts and boolean operations to store VALUE 871 into a bit field of width BITSIZE 872 in a memory location specified by OP0 except offset by OFFSET bytes. 873 (OFFSET must be 0 if OP0 is a register.) 874 The field starts at position BITPOS within the byte. 875 (If OP0 is a register, it may be a full word or a narrower mode, 876 but BITPOS still counts within a full word, 877 which is significant on bigendian machines.) */ 878 879 static void 880 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset, 881 unsigned HOST_WIDE_INT bitsize, 882 unsigned HOST_WIDE_INT bitpos, 883 unsigned HOST_WIDE_INT bitregion_start, 884 unsigned HOST_WIDE_INT bitregion_end, 885 rtx value) 886 { 887 enum machine_mode mode; 888 unsigned int total_bits = BITS_PER_WORD; 889 rtx temp; 890 int all_zero = 0; 891 int all_one = 0; 892 893 /* There is a case not handled here: 894 a structure with a known alignment of just a halfword 895 and a field split across two aligned halfwords within the structure. 896 Or likewise a structure with a known alignment of just a byte 897 and a field split across two bytes. 898 Such cases are not supposed to be able to occur. */ 899 900 if (REG_P (op0) || GET_CODE (op0) == SUBREG) 901 { 902 gcc_assert (!offset); 903 /* Special treatment for a bit field split across two registers. */ 904 if (bitsize + bitpos > BITS_PER_WORD) 905 { 906 store_split_bit_field (op0, bitsize, bitpos, 907 bitregion_start, bitregion_end, 908 value); 909 return; 910 } 911 } 912 else 913 { 914 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE; 915 916 if (bitregion_end) 917 maxbits = bitregion_end - bitregion_start + 1; 918 919 /* Get the proper mode to use for this field. We want a mode that 920 includes the entire field. If such a mode would be larger than 921 a word, we won't be doing the extraction the normal way. 922 We don't want a mode bigger than the destination. */ 923 924 mode = GET_MODE (op0); 925 if (GET_MODE_BITSIZE (mode) == 0 926 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode)) 927 mode = word_mode; 928 929 if (MEM_VOLATILE_P (op0) 930 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0 931 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits 932 && flag_strict_volatile_bitfields > 0) 933 mode = GET_MODE (op0); 934 else 935 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 936 bitregion_start, bitregion_end, 937 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0)); 938 939 if (mode == VOIDmode) 940 { 941 /* The only way this should occur is if the field spans word 942 boundaries. */ 943 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT, 944 bitregion_start, bitregion_end, value); 945 return; 946 } 947 948 total_bits = GET_MODE_BITSIZE (mode); 949 950 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to 951 be in the range 0 to total_bits-1, and put any excess bytes in 952 OFFSET. */ 953 if (bitpos >= total_bits) 954 { 955 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT); 956 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT) 957 * BITS_PER_UNIT); 958 } 959 960 /* Get ref to an aligned byte, halfword, or word containing the field. 961 Adjust BITPOS to be position within a word, 962 and OFFSET to be the offset of that word. 963 Then alter OP0 to refer to that word. */ 964 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT; 965 offset -= (offset % (total_bits / BITS_PER_UNIT)); 966 op0 = adjust_address (op0, mode, offset); 967 } 968 969 mode = GET_MODE (op0); 970 971 /* Now MODE is either some integral mode for a MEM as OP0, 972 or is a full-word for a REG as OP0. TOTAL_BITS corresponds. 973 The bit field is contained entirely within OP0. 974 BITPOS is the starting bit number within OP0. 975 (OP0's mode may actually be narrower than MODE.) */ 976 977 if (BYTES_BIG_ENDIAN) 978 /* BITPOS is the distance between our msb 979 and that of the containing datum. 980 Convert it to the distance from the lsb. */ 981 bitpos = total_bits - bitsize - bitpos; 982 983 /* Now BITPOS is always the distance between our lsb 984 and that of OP0. */ 985 986 /* Shift VALUE left by BITPOS bits. If VALUE is not constant, 987 we must first convert its mode to MODE. */ 988 989 if (CONST_INT_P (value)) 990 { 991 HOST_WIDE_INT v = INTVAL (value); 992 993 if (bitsize < HOST_BITS_PER_WIDE_INT) 994 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1; 995 996 if (v == 0) 997 all_zero = 1; 998 else if ((bitsize < HOST_BITS_PER_WIDE_INT 999 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1) 1000 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1)) 1001 all_one = 1; 1002 1003 value = lshift_value (mode, value, bitpos, bitsize); 1004 } 1005 else 1006 { 1007 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize 1008 && bitpos + bitsize != GET_MODE_BITSIZE (mode)); 1009 1010 if (GET_MODE (value) != mode) 1011 value = convert_to_mode (mode, value, 1); 1012 1013 if (must_and) 1014 value = expand_binop (mode, and_optab, value, 1015 mask_rtx (mode, 0, bitsize, 0), 1016 NULL_RTX, 1, OPTAB_LIB_WIDEN); 1017 if (bitpos > 0) 1018 value = expand_shift (LSHIFT_EXPR, mode, value, 1019 bitpos, NULL_RTX, 1); 1020 } 1021 1022 /* Now clear the chosen bits in OP0, 1023 except that if VALUE is -1 we need not bother. */ 1024 /* We keep the intermediates in registers to allow CSE to combine 1025 consecutive bitfield assignments. */ 1026 1027 temp = force_reg (mode, op0); 1028 1029 if (! all_one) 1030 { 1031 temp = expand_binop (mode, and_optab, temp, 1032 mask_rtx (mode, bitpos, bitsize, 1), 1033 NULL_RTX, 1, OPTAB_LIB_WIDEN); 1034 temp = force_reg (mode, temp); 1035 } 1036 1037 /* Now logical-or VALUE into OP0, unless it is zero. */ 1038 1039 if (! all_zero) 1040 { 1041 temp = expand_binop (mode, ior_optab, temp, value, 1042 NULL_RTX, 1, OPTAB_LIB_WIDEN); 1043 temp = force_reg (mode, temp); 1044 } 1045 1046 if (op0 != temp) 1047 { 1048 op0 = copy_rtx (op0); 1049 emit_move_insn (op0, temp); 1050 } 1051 } 1052 1053 /* Store a bit field that is split across multiple accessible memory objects. 1054 1055 OP0 is the REG, SUBREG or MEM rtx for the first of the objects. 1056 BITSIZE is the field width; BITPOS the position of its first bit 1057 (within the word). 1058 VALUE is the value to store. 1059 1060 This does not yet handle fields wider than BITS_PER_WORD. */ 1061 1062 static void 1063 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, 1064 unsigned HOST_WIDE_INT bitpos, 1065 unsigned HOST_WIDE_INT bitregion_start, 1066 unsigned HOST_WIDE_INT bitregion_end, 1067 rtx value) 1068 { 1069 unsigned int unit; 1070 unsigned int bitsdone = 0; 1071 1072 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that 1073 much at a time. */ 1074 if (REG_P (op0) || GET_CODE (op0) == SUBREG) 1075 unit = BITS_PER_WORD; 1076 else 1077 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); 1078 1079 /* If VALUE is a constant other than a CONST_INT, get it into a register in 1080 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note 1081 that VALUE might be a floating-point constant. */ 1082 if (CONSTANT_P (value) && !CONST_INT_P (value)) 1083 { 1084 rtx word = gen_lowpart_common (word_mode, value); 1085 1086 if (word && (value != word)) 1087 value = word; 1088 else 1089 value = gen_lowpart_common (word_mode, 1090 force_reg (GET_MODE (value) != VOIDmode 1091 ? GET_MODE (value) 1092 : word_mode, value)); 1093 } 1094 1095 while (bitsdone < bitsize) 1096 { 1097 unsigned HOST_WIDE_INT thissize; 1098 rtx part, word; 1099 unsigned HOST_WIDE_INT thispos; 1100 unsigned HOST_WIDE_INT offset; 1101 1102 offset = (bitpos + bitsdone) / unit; 1103 thispos = (bitpos + bitsdone) % unit; 1104 1105 /* When region of bytes we can touch is restricted, decrease 1106 UNIT close to the end of the region as needed. */ 1107 if (bitregion_end 1108 && unit > BITS_PER_UNIT 1109 && bitpos + bitsdone - thispos + unit > bitregion_end + 1) 1110 { 1111 unit = unit / 2; 1112 continue; 1113 } 1114 1115 /* THISSIZE must not overrun a word boundary. Otherwise, 1116 store_fixed_bit_field will call us again, and we will mutually 1117 recurse forever. */ 1118 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); 1119 thissize = MIN (thissize, unit - thispos); 1120 1121 if (BYTES_BIG_ENDIAN) 1122 { 1123 int total_bits; 1124 1125 /* We must do an endian conversion exactly the same way as it is 1126 done in extract_bit_field, so that the two calls to 1127 extract_fixed_bit_field will have comparable arguments. */ 1128 if (!MEM_P (value) || GET_MODE (value) == BLKmode) 1129 total_bits = BITS_PER_WORD; 1130 else 1131 total_bits = GET_MODE_BITSIZE (GET_MODE (value)); 1132 1133 /* Fetch successively less significant portions. */ 1134 if (CONST_INT_P (value)) 1135 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) 1136 >> (bitsize - bitsdone - thissize)) 1137 & (((HOST_WIDE_INT) 1 << thissize) - 1)); 1138 else 1139 /* The args are chosen so that the last part includes the 1140 lsb. Give extract_bit_field the value it needs (with 1141 endianness compensation) to fetch the piece we want. */ 1142 part = extract_fixed_bit_field (word_mode, value, 0, thissize, 1143 total_bits - bitsize + bitsdone, 1144 NULL_RTX, 1, false); 1145 } 1146 else 1147 { 1148 /* Fetch successively more significant portions. */ 1149 if (CONST_INT_P (value)) 1150 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) 1151 >> bitsdone) 1152 & (((HOST_WIDE_INT) 1 << thissize) - 1)); 1153 else 1154 part = extract_fixed_bit_field (word_mode, value, 0, thissize, 1155 bitsdone, NULL_RTX, 1, false); 1156 } 1157 1158 /* If OP0 is a register, then handle OFFSET here. 1159 1160 When handling multiword bitfields, extract_bit_field may pass 1161 down a word_mode SUBREG of a larger REG for a bitfield that actually 1162 crosses a word boundary. Thus, for a SUBREG, we must find 1163 the current word starting from the base register. */ 1164 if (GET_CODE (op0) == SUBREG) 1165 { 1166 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset; 1167 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0)); 1168 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD) 1169 word = word_offset ? const0_rtx : op0; 1170 else 1171 word = operand_subword_force (SUBREG_REG (op0), word_offset, 1172 GET_MODE (SUBREG_REG (op0))); 1173 offset = 0; 1174 } 1175 else if (REG_P (op0)) 1176 { 1177 enum machine_mode op0_mode = GET_MODE (op0); 1178 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD) 1179 word = offset ? const0_rtx : op0; 1180 else 1181 word = operand_subword_force (op0, offset, GET_MODE (op0)); 1182 offset = 0; 1183 } 1184 else 1185 word = op0; 1186 1187 /* OFFSET is in UNITs, and UNIT is in bits. 1188 store_fixed_bit_field wants offset in bytes. If WORD is const0_rtx, 1189 it is just an out-of-bounds access. Ignore it. */ 1190 if (word != const0_rtx) 1191 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize, 1192 thispos, bitregion_start, bitregion_end, part); 1193 bitsdone += thissize; 1194 } 1195 } 1196 1197 /* A subroutine of extract_bit_field_1 that converts return value X 1198 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments 1199 to extract_bit_field. */ 1200 1201 static rtx 1202 convert_extracted_bit_field (rtx x, enum machine_mode mode, 1203 enum machine_mode tmode, bool unsignedp) 1204 { 1205 if (GET_MODE (x) == tmode || GET_MODE (x) == mode) 1206 return x; 1207 1208 /* If the x mode is not a scalar integral, first convert to the 1209 integer mode of that size and then access it as a floating-point 1210 value via a SUBREG. */ 1211 if (!SCALAR_INT_MODE_P (tmode)) 1212 { 1213 enum machine_mode smode; 1214 1215 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0); 1216 x = convert_to_mode (smode, x, unsignedp); 1217 x = force_reg (smode, x); 1218 return gen_lowpart (tmode, x); 1219 } 1220 1221 return convert_to_mode (tmode, x, unsignedp); 1222 } 1223 1224 /* A subroutine of extract_bit_field, with the same arguments. 1225 If FALLBACK_P is true, fall back to extract_fixed_bit_field 1226 if we can find no other means of implementing the operation. 1227 if FALLBACK_P is false, return NULL instead. */ 1228 1229 static rtx 1230 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, 1231 unsigned HOST_WIDE_INT bitnum, 1232 int unsignedp, bool packedp, rtx target, 1233 enum machine_mode mode, enum machine_mode tmode, 1234 bool fallback_p) 1235 { 1236 unsigned int unit 1237 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD; 1238 unsigned HOST_WIDE_INT offset, bitpos; 1239 rtx op0 = str_rtx; 1240 enum machine_mode int_mode; 1241 enum machine_mode ext_mode; 1242 enum machine_mode mode1; 1243 int byte_offset; 1244 1245 if (tmode == VOIDmode) 1246 tmode = mode; 1247 1248 while (GET_CODE (op0) == SUBREG) 1249 { 1250 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT; 1251 op0 = SUBREG_REG (op0); 1252 } 1253 1254 /* If we have an out-of-bounds access to a register, just return an 1255 uninitialized register of the required mode. This can occur if the 1256 source code contains an out-of-bounds access to a small array. */ 1257 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) 1258 return gen_reg_rtx (tmode); 1259 1260 if (REG_P (op0) 1261 && mode == GET_MODE (op0) 1262 && bitnum == 0 1263 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) 1264 { 1265 /* We're trying to extract a full register from itself. */ 1266 return op0; 1267 } 1268 1269 /* See if we can get a better vector mode before extracting. */ 1270 if (VECTOR_MODE_P (GET_MODE (op0)) 1271 && !MEM_P (op0) 1272 && GET_MODE_INNER (GET_MODE (op0)) != tmode) 1273 { 1274 enum machine_mode new_mode; 1275 1276 if (GET_MODE_CLASS (tmode) == MODE_FLOAT) 1277 new_mode = MIN_MODE_VECTOR_FLOAT; 1278 else if (GET_MODE_CLASS (tmode) == MODE_FRACT) 1279 new_mode = MIN_MODE_VECTOR_FRACT; 1280 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT) 1281 new_mode = MIN_MODE_VECTOR_UFRACT; 1282 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM) 1283 new_mode = MIN_MODE_VECTOR_ACCUM; 1284 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM) 1285 new_mode = MIN_MODE_VECTOR_UACCUM; 1286 else 1287 new_mode = MIN_MODE_VECTOR_INT; 1288 1289 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode)) 1290 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0)) 1291 && targetm.vector_mode_supported_p (new_mode)) 1292 break; 1293 if (new_mode != VOIDmode) 1294 op0 = gen_lowpart (new_mode, op0); 1295 } 1296 1297 /* Use vec_extract patterns for extracting parts of vectors whenever 1298 available. */ 1299 if (VECTOR_MODE_P (GET_MODE (op0)) 1300 && !MEM_P (op0) 1301 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing 1302 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) 1303 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))))) 1304 { 1305 struct expand_operand ops[3]; 1306 enum machine_mode outermode = GET_MODE (op0); 1307 enum machine_mode innermode = GET_MODE_INNER (outermode); 1308 enum insn_code icode = optab_handler (vec_extract_optab, outermode); 1309 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode); 1310 1311 create_output_operand (&ops[0], target, innermode); 1312 create_input_operand (&ops[1], op0, outermode); 1313 create_integer_operand (&ops[2], pos); 1314 if (maybe_expand_insn (icode, 3, ops)) 1315 { 1316 target = ops[0].value; 1317 if (GET_MODE (target) != mode) 1318 return gen_lowpart (tmode, target); 1319 return target; 1320 } 1321 } 1322 1323 /* Make sure we are playing with integral modes. Pun with subregs 1324 if we aren't. */ 1325 { 1326 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0)); 1327 if (imode != GET_MODE (op0)) 1328 { 1329 if (MEM_P (op0)) 1330 op0 = adjust_address (op0, imode, 0); 1331 else if (imode != BLKmode) 1332 { 1333 op0 = gen_lowpart (imode, op0); 1334 1335 /* If we got a SUBREG, force it into a register since we 1336 aren't going to be able to do another SUBREG on it. */ 1337 if (GET_CODE (op0) == SUBREG) 1338 op0 = force_reg (imode, op0); 1339 } 1340 else if (REG_P (op0)) 1341 { 1342 rtx reg, subreg; 1343 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)), 1344 MODE_INT); 1345 reg = gen_reg_rtx (imode); 1346 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg); 1347 emit_move_insn (subreg, op0); 1348 op0 = reg; 1349 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT; 1350 } 1351 else 1352 { 1353 rtx mem = assign_stack_temp (GET_MODE (op0), 1354 GET_MODE_SIZE (GET_MODE (op0)), 0); 1355 emit_move_insn (mem, op0); 1356 op0 = adjust_address (mem, BLKmode, 0); 1357 } 1358 } 1359 } 1360 1361 /* We may be accessing data outside the field, which means 1362 we can alias adjacent data. */ 1363 if (MEM_P (op0)) 1364 { 1365 op0 = shallow_copy_rtx (op0); 1366 set_mem_alias_set (op0, 0); 1367 set_mem_expr (op0, 0); 1368 } 1369 1370 /* Extraction of a full-word or multi-word value from a structure 1371 in a register or aligned memory can be done with just a SUBREG. 1372 A subword value in the least significant part of a register 1373 can also be extracted with a SUBREG. For this, we need the 1374 byte offset of the value in op0. */ 1375 1376 bitpos = bitnum % unit; 1377 offset = bitnum / unit; 1378 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD; 1379 1380 /* If OP0 is a register, BITPOS must count within a word. 1381 But as we have it, it counts within whatever size OP0 now has. 1382 On a bigendian machine, these are not the same, so convert. */ 1383 if (BYTES_BIG_ENDIAN 1384 && !MEM_P (op0) 1385 && unit > GET_MODE_BITSIZE (GET_MODE (op0))) 1386 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0)); 1387 1388 /* ??? We currently assume TARGET is at least as big as BITSIZE. 1389 If that's wrong, the solution is to test for it and set TARGET to 0 1390 if needed. */ 1391 1392 /* Only scalar integer modes can be converted via subregs. There is an 1393 additional problem for FP modes here in that they can have a precision 1394 which is different from the size. mode_for_size uses precision, but 1395 we want a mode based on the size, so we must avoid calling it for FP 1396 modes. */ 1397 mode1 = (SCALAR_INT_MODE_P (tmode) 1398 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) 1399 : mode); 1400 1401 /* If the bitfield is volatile, we need to make sure the access 1402 remains on a type-aligned boundary. */ 1403 if (GET_CODE (op0) == MEM 1404 && MEM_VOLATILE_P (op0) 1405 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0 1406 && flag_strict_volatile_bitfields > 0) 1407 goto no_subreg_mode_swap; 1408 1409 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode) 1410 && bitpos % BITS_PER_WORD == 0) 1411 || (mode1 != BLKmode 1412 /* ??? The big endian test here is wrong. This is correct 1413 if the value is in a register, and if mode_for_size is not 1414 the same mode as op0. This causes us to get unnecessarily 1415 inefficient code from the Thumb port when -mbig-endian. */ 1416 && (BYTES_BIG_ENDIAN 1417 ? bitpos + bitsize == BITS_PER_WORD 1418 : bitpos == 0))) 1419 && ((!MEM_P (op0) 1420 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)) 1421 && GET_MODE_SIZE (mode1) != 0 1422 && byte_offset % GET_MODE_SIZE (mode1) == 0) 1423 || (MEM_P (op0) 1424 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0)) 1425 || (offset * BITS_PER_UNIT % bitsize == 0 1426 && MEM_ALIGN (op0) % bitsize == 0))))) 1427 { 1428 if (MEM_P (op0)) 1429 op0 = adjust_address (op0, mode1, offset); 1430 else if (mode1 != GET_MODE (op0)) 1431 { 1432 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 1433 byte_offset); 1434 if (sub == NULL) 1435 goto no_subreg_mode_swap; 1436 op0 = sub; 1437 } 1438 if (mode1 != mode) 1439 return convert_to_mode (tmode, op0, unsignedp); 1440 return op0; 1441 } 1442 no_subreg_mode_swap: 1443 1444 /* Handle fields bigger than a word. */ 1445 1446 if (bitsize > BITS_PER_WORD) 1447 { 1448 /* Here we transfer the words of the field 1449 in the order least significant first. 1450 This is because the most significant word is the one which may 1451 be less than full. */ 1452 1453 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; 1454 unsigned int i; 1455 1456 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target)) 1457 target = gen_reg_rtx (mode); 1458 1459 /* Indicate for flow that the entire target reg is being set. */ 1460 emit_clobber (target); 1461 1462 for (i = 0; i < nwords; i++) 1463 { 1464 /* If I is 0, use the low-order word in both field and target; 1465 if I is 1, use the next to lowest word; and so on. */ 1466 /* Word number in TARGET to use. */ 1467 unsigned int wordnum 1468 = (WORDS_BIG_ENDIAN 1469 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1 1470 : i); 1471 /* Offset from start of field in OP0. */ 1472 unsigned int bit_offset = (WORDS_BIG_ENDIAN 1473 ? MAX (0, ((int) bitsize - ((int) i + 1) 1474 * (int) BITS_PER_WORD)) 1475 : (int) i * BITS_PER_WORD); 1476 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode); 1477 rtx result_part 1478 = extract_bit_field (op0, MIN (BITS_PER_WORD, 1479 bitsize - i * BITS_PER_WORD), 1480 bitnum + bit_offset, 1, false, target_part, mode, 1481 word_mode); 1482 1483 gcc_assert (target_part); 1484 1485 if (result_part != target_part) 1486 emit_move_insn (target_part, result_part); 1487 } 1488 1489 if (unsignedp) 1490 { 1491 /* Unless we've filled TARGET, the upper regs in a multi-reg value 1492 need to be zero'd out. */ 1493 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD) 1494 { 1495 unsigned int i, total_words; 1496 1497 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD; 1498 for (i = nwords; i < total_words; i++) 1499 emit_move_insn 1500 (operand_subword (target, 1501 WORDS_BIG_ENDIAN ? total_words - i - 1 : i, 1502 1, VOIDmode), 1503 const0_rtx); 1504 } 1505 return target; 1506 } 1507 1508 /* Signed bit field: sign-extend with two arithmetic shifts. */ 1509 target = expand_shift (LSHIFT_EXPR, mode, target, 1510 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); 1511 return expand_shift (RSHIFT_EXPR, mode, target, 1512 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); 1513 } 1514 1515 /* From here on we know the desired field is smaller than a word. */ 1516 1517 /* Check if there is a correspondingly-sized integer field, so we can 1518 safely extract it as one size of integer, if necessary; then 1519 truncate or extend to the size that is wanted; then use SUBREGs or 1520 convert_to_mode to get one of the modes we really wanted. */ 1521 1522 int_mode = int_mode_for_mode (tmode); 1523 if (int_mode == BLKmode) 1524 int_mode = int_mode_for_mode (mode); 1525 /* Should probably push op0 out to memory and then do a load. */ 1526 gcc_assert (int_mode != BLKmode); 1527 1528 /* OFFSET is the number of words or bytes (UNIT says which) 1529 from STR_RTX to the first word or byte containing part of the field. */ 1530 if (!MEM_P (op0)) 1531 { 1532 if (offset != 0 1533 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) 1534 { 1535 if (!REG_P (op0)) 1536 op0 = copy_to_reg (op0); 1537 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0), 1538 op0, (offset * UNITS_PER_WORD)); 1539 } 1540 offset = 0; 1541 } 1542 1543 /* Now OFFSET is nonzero only for memory operands. */ 1544 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0); 1545 if (ext_mode != MAX_MACHINE_MODE 1546 && bitsize > 0 1547 && GET_MODE_BITSIZE (ext_mode) >= bitsize 1548 /* Do not use extv/extzv for volatile bitfields when 1549 -fstrict-volatile-bitfields is in effect. */ 1550 && !(MEM_P (op0) && MEM_VOLATILE_P (op0) 1551 && flag_strict_volatile_bitfields > 0) 1552 /* If op0 is a register, we need it in EXT_MODE to make it 1553 acceptable to the format of ext(z)v. */ 1554 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode) 1555 && !((REG_P (op0) || GET_CODE (op0) == SUBREG) 1556 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))) 1557 { 1558 struct expand_operand ops[4]; 1559 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset; 1560 rtx xop0 = op0; 1561 rtx xtarget = target; 1562 rtx xspec_target = target; 1563 rtx xspec_target_subreg = 0; 1564 1565 /* If op0 is a register, we need it in EXT_MODE to make it 1566 acceptable to the format of ext(z)v. */ 1567 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode) 1568 xop0 = gen_lowpart_SUBREG (ext_mode, xop0); 1569 if (MEM_P (xop0)) 1570 /* Get ref to first byte containing part of the field. */ 1571 xop0 = adjust_address (xop0, byte_mode, xoffset); 1572 1573 /* Now convert from counting within UNIT to counting in EXT_MODE. */ 1574 if (BYTES_BIG_ENDIAN && !MEM_P (xop0)) 1575 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit; 1576 1577 unit = GET_MODE_BITSIZE (ext_mode); 1578 1579 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count 1580 "backwards" from the size of the unit we are extracting from. 1581 Otherwise, we count bits from the most significant on a 1582 BYTES/BITS_BIG_ENDIAN machine. */ 1583 1584 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) 1585 xbitpos = unit - bitsize - xbitpos; 1586 1587 if (xtarget == 0) 1588 xtarget = xspec_target = gen_reg_rtx (tmode); 1589 1590 if (GET_MODE (xtarget) != ext_mode) 1591 { 1592 /* Don't use LHS paradoxical subreg if explicit truncation is needed 1593 between the mode of the extraction (word_mode) and the target 1594 mode. Instead, create a temporary and use convert_move to set 1595 the target. */ 1596 if (REG_P (xtarget) 1597 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode)) 1598 { 1599 xtarget = gen_lowpart (ext_mode, xtarget); 1600 if (GET_MODE_PRECISION (ext_mode) 1601 > GET_MODE_PRECISION (GET_MODE (xspec_target))) 1602 xspec_target_subreg = xtarget; 1603 } 1604 else 1605 xtarget = gen_reg_rtx (ext_mode); 1606 } 1607 1608 create_output_operand (&ops[0], xtarget, ext_mode); 1609 create_fixed_operand (&ops[1], xop0); 1610 create_integer_operand (&ops[2], bitsize); 1611 create_integer_operand (&ops[3], xbitpos); 1612 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv, 1613 4, ops)) 1614 { 1615 xtarget = ops[0].value; 1616 if (xtarget == xspec_target) 1617 return xtarget; 1618 if (xtarget == xspec_target_subreg) 1619 return xspec_target; 1620 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp); 1621 } 1622 } 1623 1624 /* If OP0 is a memory, try copying it to a register and seeing if a 1625 cheap register alternative is available. */ 1626 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0)) 1627 { 1628 enum machine_mode bestmode; 1629 1630 /* Get the mode to use for inserting into this field. If 1631 OP0 is BLKmode, get the smallest mode consistent with the 1632 alignment. If OP0 is a non-BLKmode object that is no 1633 wider than EXT_MODE, use its mode. Otherwise, use the 1634 smallest mode containing the field. */ 1635 1636 if (GET_MODE (op0) == BLKmode 1637 || (ext_mode != MAX_MACHINE_MODE 1638 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode))) 1639 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), 1640 (ext_mode == MAX_MACHINE_MODE 1641 ? VOIDmode : ext_mode), 1642 MEM_VOLATILE_P (op0)); 1643 else 1644 bestmode = GET_MODE (op0); 1645 1646 if (bestmode != VOIDmode 1647 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0)) 1648 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0))) 1649 { 1650 unsigned HOST_WIDE_INT xoffset, xbitpos; 1651 1652 /* Compute the offset as a multiple of this unit, 1653 counting in bytes. */ 1654 unit = GET_MODE_BITSIZE (bestmode); 1655 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode); 1656 xbitpos = bitnum % unit; 1657 1658 /* Make sure the register is big enough for the whole field. */ 1659 if (xoffset * BITS_PER_UNIT + unit 1660 >= offset * BITS_PER_UNIT + bitsize) 1661 { 1662 rtx last, result, xop0; 1663 1664 last = get_last_insn (); 1665 1666 /* Fetch it to a register in that size. */ 1667 xop0 = adjust_address (op0, bestmode, xoffset); 1668 xop0 = force_reg (bestmode, xop0); 1669 result = extract_bit_field_1 (xop0, bitsize, xbitpos, 1670 unsignedp, packedp, target, 1671 mode, tmode, false); 1672 if (result) 1673 return result; 1674 1675 delete_insns_since (last); 1676 } 1677 } 1678 } 1679 1680 if (!fallback_p) 1681 return NULL; 1682 1683 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize, 1684 bitpos, target, unsignedp, packedp); 1685 return convert_extracted_bit_field (target, mode, tmode, unsignedp); 1686 } 1687 1688 /* Generate code to extract a byte-field from STR_RTX 1689 containing BITSIZE bits, starting at BITNUM, 1690 and put it in TARGET if possible (if TARGET is nonzero). 1691 Regardless of TARGET, we return the rtx for where the value is placed. 1692 1693 STR_RTX is the structure containing the byte (a REG or MEM). 1694 UNSIGNEDP is nonzero if this is an unsigned bit field. 1695 PACKEDP is nonzero if the field has the packed attribute. 1696 MODE is the natural mode of the field value once extracted. 1697 TMODE is the mode the caller would like the value to have; 1698 but the value may be returned with type MODE instead. 1699 1700 If a TARGET is specified and we can store in it at no extra cost, 1701 we do so, and return TARGET. 1702 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred 1703 if they are equally easy. */ 1704 1705 rtx 1706 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, 1707 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp, 1708 rtx target, enum machine_mode mode, enum machine_mode tmode) 1709 { 1710 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp, 1711 target, mode, tmode, true); 1712 } 1713 1714 /* Extract a bit field using shifts and boolean operations 1715 Returns an rtx to represent the value. 1716 OP0 addresses a register (word) or memory (byte). 1717 BITPOS says which bit within the word or byte the bit field starts in. 1718 OFFSET says how many bytes farther the bit field starts; 1719 it is 0 if OP0 is a register. 1720 BITSIZE says how many bits long the bit field is. 1721 (If OP0 is a register, it may be narrower than a full word, 1722 but BITPOS still counts within a full word, 1723 which is significant on bigendian machines.) 1724 1725 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value). 1726 PACKEDP is true if the field has the packed attribute. 1727 1728 If TARGET is nonzero, attempts to store the value there 1729 and return TARGET, but this is not guaranteed. 1730 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */ 1731 1732 static rtx 1733 extract_fixed_bit_field (enum machine_mode tmode, rtx op0, 1734 unsigned HOST_WIDE_INT offset, 1735 unsigned HOST_WIDE_INT bitsize, 1736 unsigned HOST_WIDE_INT bitpos, rtx target, 1737 int unsignedp, bool packedp) 1738 { 1739 unsigned int total_bits = BITS_PER_WORD; 1740 enum machine_mode mode; 1741 1742 if (GET_CODE (op0) == SUBREG || REG_P (op0)) 1743 { 1744 /* Special treatment for a bit field split across two registers. */ 1745 if (bitsize + bitpos > BITS_PER_WORD) 1746 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp); 1747 } 1748 else 1749 { 1750 /* Get the proper mode to use for this field. We want a mode that 1751 includes the entire field. If such a mode would be larger than 1752 a word, we won't be doing the extraction the normal way. */ 1753 1754 if (MEM_VOLATILE_P (op0) 1755 && flag_strict_volatile_bitfields > 0) 1756 { 1757 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0) 1758 mode = GET_MODE (op0); 1759 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0) 1760 mode = GET_MODE (target); 1761 else 1762 mode = tmode; 1763 } 1764 else 1765 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0, 1766 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0)); 1767 1768 if (mode == VOIDmode) 1769 /* The only way this should occur is if the field spans word 1770 boundaries. */ 1771 return extract_split_bit_field (op0, bitsize, 1772 bitpos + offset * BITS_PER_UNIT, 1773 unsignedp); 1774 1775 total_bits = GET_MODE_BITSIZE (mode); 1776 1777 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to 1778 be in the range 0 to total_bits-1, and put any excess bytes in 1779 OFFSET. */ 1780 if (bitpos >= total_bits) 1781 { 1782 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT); 1783 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT) 1784 * BITS_PER_UNIT); 1785 } 1786 1787 /* If we're accessing a volatile MEM, we can't do the next 1788 alignment step if it results in a multi-word access where we 1789 otherwise wouldn't have one. So, check for that case 1790 here. */ 1791 if (MEM_P (op0) 1792 && MEM_VOLATILE_P (op0) 1793 && flag_strict_volatile_bitfields > 0 1794 && bitpos + bitsize <= total_bits 1795 && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits) 1796 { 1797 if (STRICT_ALIGNMENT) 1798 { 1799 static bool informed_about_misalignment = false; 1800 bool warned; 1801 1802 if (packedp) 1803 { 1804 if (bitsize == total_bits) 1805 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields, 1806 "multiple accesses to volatile structure member" 1807 " because of packed attribute"); 1808 else 1809 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields, 1810 "multiple accesses to volatile structure bitfield" 1811 " because of packed attribute"); 1812 1813 return extract_split_bit_field (op0, bitsize, 1814 bitpos + offset * BITS_PER_UNIT, 1815 unsignedp); 1816 } 1817 1818 if (bitsize == total_bits) 1819 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields, 1820 "mis-aligned access used for structure member"); 1821 else 1822 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields, 1823 "mis-aligned access used for structure bitfield"); 1824 1825 if (! informed_about_misalignment && warned) 1826 { 1827 informed_about_misalignment = true; 1828 inform (input_location, 1829 "when a volatile object spans multiple type-sized locations," 1830 " the compiler must choose between using a single mis-aligned access to" 1831 " preserve the volatility, or using multiple aligned accesses to avoid" 1832 " runtime faults; this code may fail at runtime if the hardware does" 1833 " not allow this access"); 1834 } 1835 } 1836 } 1837 else 1838 { 1839 1840 /* Get ref to an aligned byte, halfword, or word containing the field. 1841 Adjust BITPOS to be position within a word, 1842 and OFFSET to be the offset of that word. 1843 Then alter OP0 to refer to that word. */ 1844 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT; 1845 offset -= (offset % (total_bits / BITS_PER_UNIT)); 1846 } 1847 1848 op0 = adjust_address (op0, mode, offset); 1849 } 1850 1851 mode = GET_MODE (op0); 1852 1853 if (BYTES_BIG_ENDIAN) 1854 /* BITPOS is the distance between our msb and that of OP0. 1855 Convert it to the distance from the lsb. */ 1856 bitpos = total_bits - bitsize - bitpos; 1857 1858 /* Now BITPOS is always the distance between the field's lsb and that of OP0. 1859 We have reduced the big-endian case to the little-endian case. */ 1860 1861 if (unsignedp) 1862 { 1863 if (bitpos) 1864 { 1865 /* If the field does not already start at the lsb, 1866 shift it so it does. */ 1867 /* Maybe propagate the target for the shift. */ 1868 /* But not if we will return it--could confuse integrate.c. */ 1869 rtx subtarget = (target != 0 && REG_P (target) ? target : 0); 1870 if (tmode != mode) subtarget = 0; 1871 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1); 1872 } 1873 /* Convert the value to the desired mode. */ 1874 if (mode != tmode) 1875 op0 = convert_to_mode (tmode, op0, 1); 1876 1877 /* Unless the msb of the field used to be the msb when we shifted, 1878 mask out the upper bits. */ 1879 1880 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize) 1881 return expand_binop (GET_MODE (op0), and_optab, op0, 1882 mask_rtx (GET_MODE (op0), 0, bitsize, 0), 1883 target, 1, OPTAB_LIB_WIDEN); 1884 return op0; 1885 } 1886 1887 /* To extract a signed bit-field, first shift its msb to the msb of the word, 1888 then arithmetic-shift its lsb to the lsb of the word. */ 1889 op0 = force_reg (mode, op0); 1890 1891 /* Find the narrowest integer mode that contains the field. */ 1892 1893 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode; 1894 mode = GET_MODE_WIDER_MODE (mode)) 1895 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos) 1896 { 1897 op0 = convert_to_mode (mode, op0, 0); 1898 break; 1899 } 1900 1901 if (mode != tmode) 1902 target = 0; 1903 1904 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos)) 1905 { 1906 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos); 1907 /* Maybe propagate the target for the shift. */ 1908 rtx subtarget = (target != 0 && REG_P (target) ? target : 0); 1909 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1); 1910 } 1911 1912 return expand_shift (RSHIFT_EXPR, mode, op0, 1913 GET_MODE_BITSIZE (mode) - bitsize, target, 0); 1914 } 1915 1916 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value 1917 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the 1918 complement of that if COMPLEMENT. The mask is truncated if 1919 necessary to the width of mode MODE. The mask is zero-extended if 1920 BITSIZE+BITPOS is too small for MODE. */ 1921 1922 static rtx 1923 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement) 1924 { 1925 double_int mask; 1926 1927 mask = double_int_mask (bitsize); 1928 mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false); 1929 1930 if (complement) 1931 mask = double_int_not (mask); 1932 1933 return immed_double_int_const (mask, mode); 1934 } 1935 1936 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value 1937 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */ 1938 1939 static rtx 1940 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize) 1941 { 1942 double_int val; 1943 1944 val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize); 1945 val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false); 1946 1947 return immed_double_int_const (val, mode); 1948 } 1949 1950 /* Extract a bit field that is split across two words 1951 and return an RTX for the result. 1952 1953 OP0 is the REG, SUBREG or MEM rtx for the first of the two words. 1954 BITSIZE is the field width; BITPOS, position of its first bit, in the word. 1955 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */ 1956 1957 static rtx 1958 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, 1959 unsigned HOST_WIDE_INT bitpos, int unsignedp) 1960 { 1961 unsigned int unit; 1962 unsigned int bitsdone = 0; 1963 rtx result = NULL_RTX; 1964 int first = 1; 1965 1966 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that 1967 much at a time. */ 1968 if (REG_P (op0) || GET_CODE (op0) == SUBREG) 1969 unit = BITS_PER_WORD; 1970 else 1971 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); 1972 1973 while (bitsdone < bitsize) 1974 { 1975 unsigned HOST_WIDE_INT thissize; 1976 rtx part, word; 1977 unsigned HOST_WIDE_INT thispos; 1978 unsigned HOST_WIDE_INT offset; 1979 1980 offset = (bitpos + bitsdone) / unit; 1981 thispos = (bitpos + bitsdone) % unit; 1982 1983 /* THISSIZE must not overrun a word boundary. Otherwise, 1984 extract_fixed_bit_field will call us again, and we will mutually 1985 recurse forever. */ 1986 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); 1987 thissize = MIN (thissize, unit - thispos); 1988 1989 /* If OP0 is a register, then handle OFFSET here. 1990 1991 When handling multiword bitfields, extract_bit_field may pass 1992 down a word_mode SUBREG of a larger REG for a bitfield that actually 1993 crosses a word boundary. Thus, for a SUBREG, we must find 1994 the current word starting from the base register. */ 1995 if (GET_CODE (op0) == SUBREG) 1996 { 1997 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset; 1998 word = operand_subword_force (SUBREG_REG (op0), word_offset, 1999 GET_MODE (SUBREG_REG (op0))); 2000 offset = 0; 2001 } 2002 else if (REG_P (op0)) 2003 { 2004 word = operand_subword_force (op0, offset, GET_MODE (op0)); 2005 offset = 0; 2006 } 2007 else 2008 word = op0; 2009 2010 /* Extract the parts in bit-counting order, 2011 whose meaning is determined by BYTES_PER_UNIT. 2012 OFFSET is in UNITs, and UNIT is in bits. 2013 extract_fixed_bit_field wants offset in bytes. */ 2014 part = extract_fixed_bit_field (word_mode, word, 2015 offset * unit / BITS_PER_UNIT, 2016 thissize, thispos, 0, 1, false); 2017 bitsdone += thissize; 2018 2019 /* Shift this part into place for the result. */ 2020 if (BYTES_BIG_ENDIAN) 2021 { 2022 if (bitsize != bitsdone) 2023 part = expand_shift (LSHIFT_EXPR, word_mode, part, 2024 bitsize - bitsdone, 0, 1); 2025 } 2026 else 2027 { 2028 if (bitsdone != thissize) 2029 part = expand_shift (LSHIFT_EXPR, word_mode, part, 2030 bitsdone - thissize, 0, 1); 2031 } 2032 2033 if (first) 2034 result = part; 2035 else 2036 /* Combine the parts with bitwise or. This works 2037 because we extracted each part as an unsigned bit field. */ 2038 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1, 2039 OPTAB_LIB_WIDEN); 2040 2041 first = 0; 2042 } 2043 2044 /* Unsigned bit field: we are done. */ 2045 if (unsignedp) 2046 return result; 2047 /* Signed bit field: sign-extend with two arithmetic shifts. */ 2048 result = expand_shift (LSHIFT_EXPR, word_mode, result, 2049 BITS_PER_WORD - bitsize, NULL_RTX, 0); 2050 return expand_shift (RSHIFT_EXPR, word_mode, result, 2051 BITS_PER_WORD - bitsize, NULL_RTX, 0); 2052 } 2053 2054 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving 2055 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than 2056 MODE, fill the upper bits with zeros. Fail if the layout of either 2057 mode is unknown (as for CC modes) or if the extraction would involve 2058 unprofitable mode punning. Return the value on success, otherwise 2059 return null. 2060 2061 This is different from gen_lowpart* in these respects: 2062 2063 - the returned value must always be considered an rvalue 2064 2065 - when MODE is wider than SRC_MODE, the extraction involves 2066 a zero extension 2067 2068 - when MODE is smaller than SRC_MODE, the extraction involves 2069 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION). 2070 2071 In other words, this routine performs a computation, whereas the 2072 gen_lowpart* routines are conceptually lvalue or rvalue subreg 2073 operations. */ 2074 2075 rtx 2076 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src) 2077 { 2078 enum machine_mode int_mode, src_int_mode; 2079 2080 if (mode == src_mode) 2081 return src; 2082 2083 if (CONSTANT_P (src)) 2084 { 2085 /* simplify_gen_subreg can't be used here, as if simplify_subreg 2086 fails, it will happily create (subreg (symbol_ref)) or similar 2087 invalid SUBREGs. */ 2088 unsigned int byte = subreg_lowpart_offset (mode, src_mode); 2089 rtx ret = simplify_subreg (mode, src, src_mode, byte); 2090 if (ret) 2091 return ret; 2092 2093 if (GET_MODE (src) == VOIDmode 2094 || !validate_subreg (mode, src_mode, src, byte)) 2095 return NULL_RTX; 2096 2097 src = force_reg (GET_MODE (src), src); 2098 return gen_rtx_SUBREG (mode, src, byte); 2099 } 2100 2101 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC) 2102 return NULL_RTX; 2103 2104 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode) 2105 && MODES_TIEABLE_P (mode, src_mode)) 2106 { 2107 rtx x = gen_lowpart_common (mode, src); 2108 if (x) 2109 return x; 2110 } 2111 2112 src_int_mode = int_mode_for_mode (src_mode); 2113 int_mode = int_mode_for_mode (mode); 2114 if (src_int_mode == BLKmode || int_mode == BLKmode) 2115 return NULL_RTX; 2116 2117 if (!MODES_TIEABLE_P (src_int_mode, src_mode)) 2118 return NULL_RTX; 2119 if (!MODES_TIEABLE_P (int_mode, mode)) 2120 return NULL_RTX; 2121 2122 src = gen_lowpart (src_int_mode, src); 2123 src = convert_modes (int_mode, src_int_mode, src, true); 2124 src = gen_lowpart (mode, src); 2125 return src; 2126 } 2127 2128 /* Add INC into TARGET. */ 2129 2130 void 2131 expand_inc (rtx target, rtx inc) 2132 { 2133 rtx value = expand_binop (GET_MODE (target), add_optab, 2134 target, inc, 2135 target, 0, OPTAB_LIB_WIDEN); 2136 if (value != target) 2137 emit_move_insn (target, value); 2138 } 2139 2140 /* Subtract DEC from TARGET. */ 2141 2142 void 2143 expand_dec (rtx target, rtx dec) 2144 { 2145 rtx value = expand_binop (GET_MODE (target), sub_optab, 2146 target, dec, 2147 target, 0, OPTAB_LIB_WIDEN); 2148 if (value != target) 2149 emit_move_insn (target, value); 2150 } 2151 2152 /* Output a shift instruction for expression code CODE, 2153 with SHIFTED being the rtx for the value to shift, 2154 and AMOUNT the rtx for the amount to shift by. 2155 Store the result in the rtx TARGET, if that is convenient. 2156 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. 2157 Return the rtx for where the value is. */ 2158 2159 static rtx 2160 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted, 2161 rtx amount, rtx target, int unsignedp) 2162 { 2163 rtx op1, temp = 0; 2164 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR); 2165 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR); 2166 optab lshift_optab = ashl_optab; 2167 optab rshift_arith_optab = ashr_optab; 2168 optab rshift_uns_optab = lshr_optab; 2169 optab lrotate_optab = rotl_optab; 2170 optab rrotate_optab = rotr_optab; 2171 enum machine_mode op1_mode; 2172 int attempt; 2173 bool speed = optimize_insn_for_speed_p (); 2174 2175 op1 = amount; 2176 op1_mode = GET_MODE (op1); 2177 2178 /* Determine whether the shift/rotate amount is a vector, or scalar. If the 2179 shift amount is a vector, use the vector/vector shift patterns. */ 2180 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode)) 2181 { 2182 lshift_optab = vashl_optab; 2183 rshift_arith_optab = vashr_optab; 2184 rshift_uns_optab = vlshr_optab; 2185 lrotate_optab = vrotl_optab; 2186 rrotate_optab = vrotr_optab; 2187 } 2188 2189 /* Previously detected shift-counts computed by NEGATE_EXPR 2190 and shifted in the other direction; but that does not work 2191 on all machines. */ 2192 2193 if (SHIFT_COUNT_TRUNCATED) 2194 { 2195 if (CONST_INT_P (op1) 2196 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >= 2197 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode))) 2198 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1) 2199 % GET_MODE_BITSIZE (mode)); 2200 else if (GET_CODE (op1) == SUBREG 2201 && subreg_lowpart_p (op1) 2202 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1)))) 2203 op1 = SUBREG_REG (op1); 2204 } 2205 2206 if (op1 == const0_rtx) 2207 return shifted; 2208 2209 /* Check whether its cheaper to implement a left shift by a constant 2210 bit count by a sequence of additions. */ 2211 if (code == LSHIFT_EXPR 2212 && CONST_INT_P (op1) 2213 && INTVAL (op1) > 0 2214 && INTVAL (op1) < GET_MODE_PRECISION (mode) 2215 && INTVAL (op1) < MAX_BITS_PER_WORD 2216 && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode] 2217 && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST) 2218 { 2219 int i; 2220 for (i = 0; i < INTVAL (op1); i++) 2221 { 2222 temp = force_reg (mode, shifted); 2223 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX, 2224 unsignedp, OPTAB_LIB_WIDEN); 2225 } 2226 return shifted; 2227 } 2228 2229 for (attempt = 0; temp == 0 && attempt < 3; attempt++) 2230 { 2231 enum optab_methods methods; 2232 2233 if (attempt == 0) 2234 methods = OPTAB_DIRECT; 2235 else if (attempt == 1) 2236 methods = OPTAB_WIDEN; 2237 else 2238 methods = OPTAB_LIB_WIDEN; 2239 2240 if (rotate) 2241 { 2242 /* Widening does not work for rotation. */ 2243 if (methods == OPTAB_WIDEN) 2244 continue; 2245 else if (methods == OPTAB_LIB_WIDEN) 2246 { 2247 /* If we have been unable to open-code this by a rotation, 2248 do it as the IOR of two shifts. I.e., to rotate A 2249 by N bits, compute (A << N) | ((unsigned) A >> (C - N)) 2250 where C is the bitsize of A. 2251 2252 It is theoretically possible that the target machine might 2253 not be able to perform either shift and hence we would 2254 be making two libcalls rather than just the one for the 2255 shift (similarly if IOR could not be done). We will allow 2256 this extremely unlikely lossage to avoid complicating the 2257 code below. */ 2258 2259 rtx subtarget = target == shifted ? 0 : target; 2260 rtx new_amount, other_amount; 2261 rtx temp1; 2262 2263 new_amount = op1; 2264 if (CONST_INT_P (op1)) 2265 other_amount = GEN_INT (GET_MODE_BITSIZE (mode) 2266 - INTVAL (op1)); 2267 else 2268 other_amount 2269 = simplify_gen_binary (MINUS, GET_MODE (op1), 2270 GEN_INT (GET_MODE_PRECISION (mode)), 2271 op1); 2272 2273 shifted = force_reg (mode, shifted); 2274 2275 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR, 2276 mode, shifted, new_amount, 0, 1); 2277 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR, 2278 mode, shifted, other_amount, 2279 subtarget, 1); 2280 return expand_binop (mode, ior_optab, temp, temp1, target, 2281 unsignedp, methods); 2282 } 2283 2284 temp = expand_binop (mode, 2285 left ? lrotate_optab : rrotate_optab, 2286 shifted, op1, target, unsignedp, methods); 2287 } 2288 else if (unsignedp) 2289 temp = expand_binop (mode, 2290 left ? lshift_optab : rshift_uns_optab, 2291 shifted, op1, target, unsignedp, methods); 2292 2293 /* Do arithmetic shifts. 2294 Also, if we are going to widen the operand, we can just as well 2295 use an arithmetic right-shift instead of a logical one. */ 2296 if (temp == 0 && ! rotate 2297 && (! unsignedp || (! left && methods == OPTAB_WIDEN))) 2298 { 2299 enum optab_methods methods1 = methods; 2300 2301 /* If trying to widen a log shift to an arithmetic shift, 2302 don't accept an arithmetic shift of the same size. */ 2303 if (unsignedp) 2304 methods1 = OPTAB_MUST_WIDEN; 2305 2306 /* Arithmetic shift */ 2307 2308 temp = expand_binop (mode, 2309 left ? lshift_optab : rshift_arith_optab, 2310 shifted, op1, target, unsignedp, methods1); 2311 } 2312 2313 /* We used to try extzv here for logical right shifts, but that was 2314 only useful for one machine, the VAX, and caused poor code 2315 generation there for lshrdi3, so the code was deleted and a 2316 define_expand for lshrsi3 was added to vax.md. */ 2317 } 2318 2319 gcc_assert (temp); 2320 return temp; 2321 } 2322 2323 /* Output a shift instruction for expression code CODE, 2324 with SHIFTED being the rtx for the value to shift, 2325 and AMOUNT the amount to shift by. 2326 Store the result in the rtx TARGET, if that is convenient. 2327 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. 2328 Return the rtx for where the value is. */ 2329 2330 rtx 2331 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted, 2332 int amount, rtx target, int unsignedp) 2333 { 2334 return expand_shift_1 (code, mode, 2335 shifted, GEN_INT (amount), target, unsignedp); 2336 } 2337 2338 /* Output a shift instruction for expression code CODE, 2339 with SHIFTED being the rtx for the value to shift, 2340 and AMOUNT the tree for the amount to shift by. 2341 Store the result in the rtx TARGET, if that is convenient. 2342 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. 2343 Return the rtx for where the value is. */ 2344 2345 rtx 2346 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted, 2347 tree amount, rtx target, int unsignedp) 2348 { 2349 return expand_shift_1 (code, mode, 2350 shifted, expand_normal (amount), target, unsignedp); 2351 } 2352 2353 2354 /* Indicates the type of fixup needed after a constant multiplication. 2355 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that 2356 the result should be negated, and ADD_VARIANT means that the 2357 multiplicand should be added to the result. */ 2358 enum mult_variant {basic_variant, negate_variant, add_variant}; 2359 2360 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, 2361 const struct mult_cost *, enum machine_mode mode); 2362 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT, 2363 struct algorithm *, enum mult_variant *, int); 2364 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx, 2365 const struct algorithm *, enum mult_variant); 2366 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int, 2367 int, rtx *, int *, int *); 2368 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); 2369 static rtx extract_high_half (enum machine_mode, rtx); 2370 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int); 2371 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx, 2372 int, int); 2373 /* Compute and return the best algorithm for multiplying by T. 2374 The algorithm must cost less than cost_limit 2375 If retval.cost >= COST_LIMIT, no algorithm was found and all 2376 other field of the returned struct are undefined. 2377 MODE is the machine mode of the multiplication. */ 2378 2379 static void 2380 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, 2381 const struct mult_cost *cost_limit, enum machine_mode mode) 2382 { 2383 int m; 2384 struct algorithm *alg_in, *best_alg; 2385 struct mult_cost best_cost; 2386 struct mult_cost new_limit; 2387 int op_cost, op_latency; 2388 unsigned HOST_WIDE_INT orig_t = t; 2389 unsigned HOST_WIDE_INT q; 2390 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode)); 2391 int hash_index; 2392 bool cache_hit = false; 2393 enum alg_code cache_alg = alg_zero; 2394 bool speed = optimize_insn_for_speed_p (); 2395 2396 /* Indicate that no algorithm is yet found. If no algorithm 2397 is found, this value will be returned and indicate failure. */ 2398 alg_out->cost.cost = cost_limit->cost + 1; 2399 alg_out->cost.latency = cost_limit->latency + 1; 2400 2401 if (cost_limit->cost < 0 2402 || (cost_limit->cost == 0 && cost_limit->latency <= 0)) 2403 return; 2404 2405 /* Restrict the bits of "t" to the multiplication's mode. */ 2406 t &= GET_MODE_MASK (mode); 2407 2408 /* t == 1 can be done in zero cost. */ 2409 if (t == 1) 2410 { 2411 alg_out->ops = 1; 2412 alg_out->cost.cost = 0; 2413 alg_out->cost.latency = 0; 2414 alg_out->op[0] = alg_m; 2415 return; 2416 } 2417 2418 /* t == 0 sometimes has a cost. If it does and it exceeds our limit, 2419 fail now. */ 2420 if (t == 0) 2421 { 2422 if (MULT_COST_LESS (cost_limit, zero_cost[speed])) 2423 return; 2424 else 2425 { 2426 alg_out->ops = 1; 2427 alg_out->cost.cost = zero_cost[speed]; 2428 alg_out->cost.latency = zero_cost[speed]; 2429 alg_out->op[0] = alg_zero; 2430 return; 2431 } 2432 } 2433 2434 /* We'll be needing a couple extra algorithm structures now. */ 2435 2436 alg_in = XALLOCA (struct algorithm); 2437 best_alg = XALLOCA (struct algorithm); 2438 best_cost = *cost_limit; 2439 2440 /* Compute the hash index. */ 2441 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES; 2442 2443 /* See if we already know what to do for T. */ 2444 if (alg_hash[hash_index].t == t 2445 && alg_hash[hash_index].mode == mode 2446 && alg_hash[hash_index].mode == mode 2447 && alg_hash[hash_index].speed == speed 2448 && alg_hash[hash_index].alg != alg_unknown) 2449 { 2450 cache_alg = alg_hash[hash_index].alg; 2451 2452 if (cache_alg == alg_impossible) 2453 { 2454 /* The cache tells us that it's impossible to synthesize 2455 multiplication by T within alg_hash[hash_index].cost. */ 2456 if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit)) 2457 /* COST_LIMIT is at least as restrictive as the one 2458 recorded in the hash table, in which case we have no 2459 hope of synthesizing a multiplication. Just 2460 return. */ 2461 return; 2462 2463 /* If we get here, COST_LIMIT is less restrictive than the 2464 one recorded in the hash table, so we may be able to 2465 synthesize a multiplication. Proceed as if we didn't 2466 have the cache entry. */ 2467 } 2468 else 2469 { 2470 if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost)) 2471 /* The cached algorithm shows that this multiplication 2472 requires more cost than COST_LIMIT. Just return. This 2473 way, we don't clobber this cache entry with 2474 alg_impossible but retain useful information. */ 2475 return; 2476 2477 cache_hit = true; 2478 2479 switch (cache_alg) 2480 { 2481 case alg_shift: 2482 goto do_alg_shift; 2483 2484 case alg_add_t_m2: 2485 case alg_sub_t_m2: 2486 goto do_alg_addsub_t_m2; 2487 2488 case alg_add_factor: 2489 case alg_sub_factor: 2490 goto do_alg_addsub_factor; 2491 2492 case alg_add_t2_m: 2493 goto do_alg_add_t2_m; 2494 2495 case alg_sub_t2_m: 2496 goto do_alg_sub_t2_m; 2497 2498 default: 2499 gcc_unreachable (); 2500 } 2501 } 2502 } 2503 2504 /* If we have a group of zero bits at the low-order part of T, try 2505 multiplying by the remaining bits and then doing a shift. */ 2506 2507 if ((t & 1) == 0) 2508 { 2509 do_alg_shift: 2510 m = floor_log2 (t & -t); /* m = number of low zero bits */ 2511 if (m < maxm) 2512 { 2513 q = t >> m; 2514 /* The function expand_shift will choose between a shift and 2515 a sequence of additions, so the observed cost is given as 2516 MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */ 2517 op_cost = m * add_cost[speed][mode]; 2518 if (shift_cost[speed][mode][m] < op_cost) 2519 op_cost = shift_cost[speed][mode][m]; 2520 new_limit.cost = best_cost.cost - op_cost; 2521 new_limit.latency = best_cost.latency - op_cost; 2522 synth_mult (alg_in, q, &new_limit, mode); 2523 2524 alg_in->cost.cost += op_cost; 2525 alg_in->cost.latency += op_cost; 2526 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2527 { 2528 struct algorithm *x; 2529 best_cost = alg_in->cost; 2530 x = alg_in, alg_in = best_alg, best_alg = x; 2531 best_alg->log[best_alg->ops] = m; 2532 best_alg->op[best_alg->ops] = alg_shift; 2533 } 2534 2535 /* See if treating ORIG_T as a signed number yields a better 2536 sequence. Try this sequence only for a negative ORIG_T 2537 as it would be useless for a non-negative ORIG_T. */ 2538 if ((HOST_WIDE_INT) orig_t < 0) 2539 { 2540 /* Shift ORIG_T as follows because a right shift of a 2541 negative-valued signed type is implementation 2542 defined. */ 2543 q = ~(~orig_t >> m); 2544 /* The function expand_shift will choose between a shift 2545 and a sequence of additions, so the observed cost is 2546 given as MIN (m * add_cost[speed][mode], 2547 shift_cost[speed][mode][m]). */ 2548 op_cost = m * add_cost[speed][mode]; 2549 if (shift_cost[speed][mode][m] < op_cost) 2550 op_cost = shift_cost[speed][mode][m]; 2551 new_limit.cost = best_cost.cost - op_cost; 2552 new_limit.latency = best_cost.latency - op_cost; 2553 synth_mult (alg_in, q, &new_limit, mode); 2554 2555 alg_in->cost.cost += op_cost; 2556 alg_in->cost.latency += op_cost; 2557 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2558 { 2559 struct algorithm *x; 2560 best_cost = alg_in->cost; 2561 x = alg_in, alg_in = best_alg, best_alg = x; 2562 best_alg->log[best_alg->ops] = m; 2563 best_alg->op[best_alg->ops] = alg_shift; 2564 } 2565 } 2566 } 2567 if (cache_hit) 2568 goto done; 2569 } 2570 2571 /* If we have an odd number, add or subtract one. */ 2572 if ((t & 1) != 0) 2573 { 2574 unsigned HOST_WIDE_INT w; 2575 2576 do_alg_addsub_t_m2: 2577 for (w = 1; (w & t) != 0; w <<= 1) 2578 ; 2579 /* If T was -1, then W will be zero after the loop. This is another 2580 case where T ends with ...111. Handling this with (T + 1) and 2581 subtract 1 produces slightly better code and results in algorithm 2582 selection much faster than treating it like the ...0111 case 2583 below. */ 2584 if (w == 0 2585 || (w > 2 2586 /* Reject the case where t is 3. 2587 Thus we prefer addition in that case. */ 2588 && t != 3)) 2589 { 2590 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */ 2591 2592 op_cost = add_cost[speed][mode]; 2593 new_limit.cost = best_cost.cost - op_cost; 2594 new_limit.latency = best_cost.latency - op_cost; 2595 synth_mult (alg_in, t + 1, &new_limit, mode); 2596 2597 alg_in->cost.cost += op_cost; 2598 alg_in->cost.latency += op_cost; 2599 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2600 { 2601 struct algorithm *x; 2602 best_cost = alg_in->cost; 2603 x = alg_in, alg_in = best_alg, best_alg = x; 2604 best_alg->log[best_alg->ops] = 0; 2605 best_alg->op[best_alg->ops] = alg_sub_t_m2; 2606 } 2607 } 2608 else 2609 { 2610 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */ 2611 2612 op_cost = add_cost[speed][mode]; 2613 new_limit.cost = best_cost.cost - op_cost; 2614 new_limit.latency = best_cost.latency - op_cost; 2615 synth_mult (alg_in, t - 1, &new_limit, mode); 2616 2617 alg_in->cost.cost += op_cost; 2618 alg_in->cost.latency += op_cost; 2619 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2620 { 2621 struct algorithm *x; 2622 best_cost = alg_in->cost; 2623 x = alg_in, alg_in = best_alg, best_alg = x; 2624 best_alg->log[best_alg->ops] = 0; 2625 best_alg->op[best_alg->ops] = alg_add_t_m2; 2626 } 2627 } 2628 2629 /* We may be able to calculate a * -7, a * -15, a * -31, etc 2630 quickly with a - a * n for some appropriate constant n. */ 2631 m = exact_log2 (-orig_t + 1); 2632 if (m >= 0 && m < maxm) 2633 { 2634 op_cost = shiftsub1_cost[speed][mode][m]; 2635 new_limit.cost = best_cost.cost - op_cost; 2636 new_limit.latency = best_cost.latency - op_cost; 2637 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode); 2638 2639 alg_in->cost.cost += op_cost; 2640 alg_in->cost.latency += op_cost; 2641 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2642 { 2643 struct algorithm *x; 2644 best_cost = alg_in->cost; 2645 x = alg_in, alg_in = best_alg, best_alg = x; 2646 best_alg->log[best_alg->ops] = m; 2647 best_alg->op[best_alg->ops] = alg_sub_t_m2; 2648 } 2649 } 2650 2651 if (cache_hit) 2652 goto done; 2653 } 2654 2655 /* Look for factors of t of the form 2656 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)). 2657 If we find such a factor, we can multiply by t using an algorithm that 2658 multiplies by q, shift the result by m and add/subtract it to itself. 2659 2660 We search for large factors first and loop down, even if large factors 2661 are less probable than small; if we find a large factor we will find a 2662 good sequence quickly, and therefore be able to prune (by decreasing 2663 COST_LIMIT) the search. */ 2664 2665 do_alg_addsub_factor: 2666 for (m = floor_log2 (t - 1); m >= 2; m--) 2667 { 2668 unsigned HOST_WIDE_INT d; 2669 2670 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1; 2671 if (t % d == 0 && t > d && m < maxm 2672 && (!cache_hit || cache_alg == alg_add_factor)) 2673 { 2674 /* If the target has a cheap shift-and-add instruction use 2675 that in preference to a shift insn followed by an add insn. 2676 Assume that the shift-and-add is "atomic" with a latency 2677 equal to its cost, otherwise assume that on superscalar 2678 hardware the shift may be executed concurrently with the 2679 earlier steps in the algorithm. */ 2680 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m]; 2681 if (shiftadd_cost[speed][mode][m] < op_cost) 2682 { 2683 op_cost = shiftadd_cost[speed][mode][m]; 2684 op_latency = op_cost; 2685 } 2686 else 2687 op_latency = add_cost[speed][mode]; 2688 2689 new_limit.cost = best_cost.cost - op_cost; 2690 new_limit.latency = best_cost.latency - op_latency; 2691 synth_mult (alg_in, t / d, &new_limit, mode); 2692 2693 alg_in->cost.cost += op_cost; 2694 alg_in->cost.latency += op_latency; 2695 if (alg_in->cost.latency < op_cost) 2696 alg_in->cost.latency = op_cost; 2697 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2698 { 2699 struct algorithm *x; 2700 best_cost = alg_in->cost; 2701 x = alg_in, alg_in = best_alg, best_alg = x; 2702 best_alg->log[best_alg->ops] = m; 2703 best_alg->op[best_alg->ops] = alg_add_factor; 2704 } 2705 /* Other factors will have been taken care of in the recursion. */ 2706 break; 2707 } 2708 2709 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1; 2710 if (t % d == 0 && t > d && m < maxm 2711 && (!cache_hit || cache_alg == alg_sub_factor)) 2712 { 2713 /* If the target has a cheap shift-and-subtract insn use 2714 that in preference to a shift insn followed by a sub insn. 2715 Assume that the shift-and-sub is "atomic" with a latency 2716 equal to it's cost, otherwise assume that on superscalar 2717 hardware the shift may be executed concurrently with the 2718 earlier steps in the algorithm. */ 2719 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m]; 2720 if (shiftsub0_cost[speed][mode][m] < op_cost) 2721 { 2722 op_cost = shiftsub0_cost[speed][mode][m]; 2723 op_latency = op_cost; 2724 } 2725 else 2726 op_latency = add_cost[speed][mode]; 2727 2728 new_limit.cost = best_cost.cost - op_cost; 2729 new_limit.latency = best_cost.latency - op_latency; 2730 synth_mult (alg_in, t / d, &new_limit, mode); 2731 2732 alg_in->cost.cost += op_cost; 2733 alg_in->cost.latency += op_latency; 2734 if (alg_in->cost.latency < op_cost) 2735 alg_in->cost.latency = op_cost; 2736 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2737 { 2738 struct algorithm *x; 2739 best_cost = alg_in->cost; 2740 x = alg_in, alg_in = best_alg, best_alg = x; 2741 best_alg->log[best_alg->ops] = m; 2742 best_alg->op[best_alg->ops] = alg_sub_factor; 2743 } 2744 break; 2745 } 2746 } 2747 if (cache_hit) 2748 goto done; 2749 2750 /* Try shift-and-add (load effective address) instructions, 2751 i.e. do a*3, a*5, a*9. */ 2752 if ((t & 1) != 0) 2753 { 2754 do_alg_add_t2_m: 2755 q = t - 1; 2756 q = q & -q; 2757 m = exact_log2 (q); 2758 if (m >= 0 && m < maxm) 2759 { 2760 op_cost = shiftadd_cost[speed][mode][m]; 2761 new_limit.cost = best_cost.cost - op_cost; 2762 new_limit.latency = best_cost.latency - op_cost; 2763 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode); 2764 2765 alg_in->cost.cost += op_cost; 2766 alg_in->cost.latency += op_cost; 2767 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2768 { 2769 struct algorithm *x; 2770 best_cost = alg_in->cost; 2771 x = alg_in, alg_in = best_alg, best_alg = x; 2772 best_alg->log[best_alg->ops] = m; 2773 best_alg->op[best_alg->ops] = alg_add_t2_m; 2774 } 2775 } 2776 if (cache_hit) 2777 goto done; 2778 2779 do_alg_sub_t2_m: 2780 q = t + 1; 2781 q = q & -q; 2782 m = exact_log2 (q); 2783 if (m >= 0 && m < maxm) 2784 { 2785 op_cost = shiftsub0_cost[speed][mode][m]; 2786 new_limit.cost = best_cost.cost - op_cost; 2787 new_limit.latency = best_cost.latency - op_cost; 2788 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode); 2789 2790 alg_in->cost.cost += op_cost; 2791 alg_in->cost.latency += op_cost; 2792 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2793 { 2794 struct algorithm *x; 2795 best_cost = alg_in->cost; 2796 x = alg_in, alg_in = best_alg, best_alg = x; 2797 best_alg->log[best_alg->ops] = m; 2798 best_alg->op[best_alg->ops] = alg_sub_t2_m; 2799 } 2800 } 2801 if (cache_hit) 2802 goto done; 2803 } 2804 2805 done: 2806 /* If best_cost has not decreased, we have not found any algorithm. */ 2807 if (!CHEAPER_MULT_COST (&best_cost, cost_limit)) 2808 { 2809 /* We failed to find an algorithm. Record alg_impossible for 2810 this case (that is, <T, MODE, COST_LIMIT>) so that next time 2811 we are asked to find an algorithm for T within the same or 2812 lower COST_LIMIT, we can immediately return to the 2813 caller. */ 2814 alg_hash[hash_index].t = t; 2815 alg_hash[hash_index].mode = mode; 2816 alg_hash[hash_index].speed = speed; 2817 alg_hash[hash_index].alg = alg_impossible; 2818 alg_hash[hash_index].cost = *cost_limit; 2819 return; 2820 } 2821 2822 /* Cache the result. */ 2823 if (!cache_hit) 2824 { 2825 alg_hash[hash_index].t = t; 2826 alg_hash[hash_index].mode = mode; 2827 alg_hash[hash_index].speed = speed; 2828 alg_hash[hash_index].alg = best_alg->op[best_alg->ops]; 2829 alg_hash[hash_index].cost.cost = best_cost.cost; 2830 alg_hash[hash_index].cost.latency = best_cost.latency; 2831 } 2832 2833 /* If we are getting a too long sequence for `struct algorithm' 2834 to record, make this search fail. */ 2835 if (best_alg->ops == MAX_BITS_PER_WORD) 2836 return; 2837 2838 /* Copy the algorithm from temporary space to the space at alg_out. 2839 We avoid using structure assignment because the majority of 2840 best_alg is normally undefined, and this is a critical function. */ 2841 alg_out->ops = best_alg->ops + 1; 2842 alg_out->cost = best_cost; 2843 memcpy (alg_out->op, best_alg->op, 2844 alg_out->ops * sizeof *alg_out->op); 2845 memcpy (alg_out->log, best_alg->log, 2846 alg_out->ops * sizeof *alg_out->log); 2847 } 2848 2849 /* Find the cheapest way of multiplying a value of mode MODE by VAL. 2850 Try three variations: 2851 2852 - a shift/add sequence based on VAL itself 2853 - a shift/add sequence based on -VAL, followed by a negation 2854 - a shift/add sequence based on VAL - 1, followed by an addition. 2855 2856 Return true if the cheapest of these cost less than MULT_COST, 2857 describing the algorithm in *ALG and final fixup in *VARIANT. */ 2858 2859 static bool 2860 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val, 2861 struct algorithm *alg, enum mult_variant *variant, 2862 int mult_cost) 2863 { 2864 struct algorithm alg2; 2865 struct mult_cost limit; 2866 int op_cost; 2867 bool speed = optimize_insn_for_speed_p (); 2868 2869 /* Fail quickly for impossible bounds. */ 2870 if (mult_cost < 0) 2871 return false; 2872 2873 /* Ensure that mult_cost provides a reasonable upper bound. 2874 Any constant multiplication can be performed with less 2875 than 2 * bits additions. */ 2876 op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode]; 2877 if (mult_cost > op_cost) 2878 mult_cost = op_cost; 2879 2880 *variant = basic_variant; 2881 limit.cost = mult_cost; 2882 limit.latency = mult_cost; 2883 synth_mult (alg, val, &limit, mode); 2884 2885 /* This works only if the inverted value actually fits in an 2886 `unsigned int' */ 2887 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode)) 2888 { 2889 op_cost = neg_cost[speed][mode]; 2890 if (MULT_COST_LESS (&alg->cost, mult_cost)) 2891 { 2892 limit.cost = alg->cost.cost - op_cost; 2893 limit.latency = alg->cost.latency - op_cost; 2894 } 2895 else 2896 { 2897 limit.cost = mult_cost - op_cost; 2898 limit.latency = mult_cost - op_cost; 2899 } 2900 2901 synth_mult (&alg2, -val, &limit, mode); 2902 alg2.cost.cost += op_cost; 2903 alg2.cost.latency += op_cost; 2904 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) 2905 *alg = alg2, *variant = negate_variant; 2906 } 2907 2908 /* This proves very useful for division-by-constant. */ 2909 op_cost = add_cost[speed][mode]; 2910 if (MULT_COST_LESS (&alg->cost, mult_cost)) 2911 { 2912 limit.cost = alg->cost.cost - op_cost; 2913 limit.latency = alg->cost.latency - op_cost; 2914 } 2915 else 2916 { 2917 limit.cost = mult_cost - op_cost; 2918 limit.latency = mult_cost - op_cost; 2919 } 2920 2921 synth_mult (&alg2, val - 1, &limit, mode); 2922 alg2.cost.cost += op_cost; 2923 alg2.cost.latency += op_cost; 2924 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) 2925 *alg = alg2, *variant = add_variant; 2926 2927 return MULT_COST_LESS (&alg->cost, mult_cost); 2928 } 2929 2930 /* A subroutine of expand_mult, used for constant multiplications. 2931 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if 2932 convenient. Use the shift/add sequence described by ALG and apply 2933 the final fixup specified by VARIANT. */ 2934 2935 static rtx 2936 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val, 2937 rtx target, const struct algorithm *alg, 2938 enum mult_variant variant) 2939 { 2940 HOST_WIDE_INT val_so_far; 2941 rtx insn, accum, tem; 2942 int opno; 2943 enum machine_mode nmode; 2944 2945 /* Avoid referencing memory over and over and invalid sharing 2946 on SUBREGs. */ 2947 op0 = force_reg (mode, op0); 2948 2949 /* ACCUM starts out either as OP0 or as a zero, depending on 2950 the first operation. */ 2951 2952 if (alg->op[0] == alg_zero) 2953 { 2954 accum = copy_to_mode_reg (mode, const0_rtx); 2955 val_so_far = 0; 2956 } 2957 else if (alg->op[0] == alg_m) 2958 { 2959 accum = copy_to_mode_reg (mode, op0); 2960 val_so_far = 1; 2961 } 2962 else 2963 gcc_unreachable (); 2964 2965 for (opno = 1; opno < alg->ops; opno++) 2966 { 2967 int log = alg->log[opno]; 2968 rtx shift_subtarget = optimize ? 0 : accum; 2969 rtx add_target 2970 = (opno == alg->ops - 1 && target != 0 && variant != add_variant 2971 && !optimize) 2972 ? target : 0; 2973 rtx accum_target = optimize ? 0 : accum; 2974 rtx accum_inner; 2975 2976 switch (alg->op[opno]) 2977 { 2978 case alg_shift: 2979 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); 2980 /* REG_EQUAL note will be attached to the following insn. */ 2981 emit_move_insn (accum, tem); 2982 val_so_far <<= log; 2983 break; 2984 2985 case alg_add_t_m2: 2986 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); 2987 accum = force_operand (gen_rtx_PLUS (mode, accum, tem), 2988 add_target ? add_target : accum_target); 2989 val_so_far += (HOST_WIDE_INT) 1 << log; 2990 break; 2991 2992 case alg_sub_t_m2: 2993 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); 2994 accum = force_operand (gen_rtx_MINUS (mode, accum, tem), 2995 add_target ? add_target : accum_target); 2996 val_so_far -= (HOST_WIDE_INT) 1 << log; 2997 break; 2998 2999 case alg_add_t2_m: 3000 accum = expand_shift (LSHIFT_EXPR, mode, accum, 3001 log, shift_subtarget, 0); 3002 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), 3003 add_target ? add_target : accum_target); 3004 val_so_far = (val_so_far << log) + 1; 3005 break; 3006 3007 case alg_sub_t2_m: 3008 accum = expand_shift (LSHIFT_EXPR, mode, accum, 3009 log, shift_subtarget, 0); 3010 accum = force_operand (gen_rtx_MINUS (mode, accum, op0), 3011 add_target ? add_target : accum_target); 3012 val_so_far = (val_so_far << log) - 1; 3013 break; 3014 3015 case alg_add_factor: 3016 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); 3017 accum = force_operand (gen_rtx_PLUS (mode, accum, tem), 3018 add_target ? add_target : accum_target); 3019 val_so_far += val_so_far << log; 3020 break; 3021 3022 case alg_sub_factor: 3023 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); 3024 accum = force_operand (gen_rtx_MINUS (mode, tem, accum), 3025 (add_target 3026 ? add_target : (optimize ? 0 : tem))); 3027 val_so_far = (val_so_far << log) - val_so_far; 3028 break; 3029 3030 default: 3031 gcc_unreachable (); 3032 } 3033 3034 /* Write a REG_EQUAL note on the last insn so that we can cse 3035 multiplication sequences. Note that if ACCUM is a SUBREG, 3036 we've set the inner register and must properly indicate 3037 that. */ 3038 3039 tem = op0, nmode = mode; 3040 accum_inner = accum; 3041 if (GET_CODE (accum) == SUBREG) 3042 { 3043 accum_inner = SUBREG_REG (accum); 3044 nmode = GET_MODE (accum_inner); 3045 tem = gen_lowpart (nmode, op0); 3046 } 3047 3048 insn = get_last_insn (); 3049 set_dst_reg_note (insn, REG_EQUAL, 3050 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)), 3051 accum_inner); 3052 } 3053 3054 if (variant == negate_variant) 3055 { 3056 val_so_far = -val_so_far; 3057 accum = expand_unop (mode, neg_optab, accum, target, 0); 3058 } 3059 else if (variant == add_variant) 3060 { 3061 val_so_far = val_so_far + 1; 3062 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target); 3063 } 3064 3065 /* Compare only the bits of val and val_so_far that are significant 3066 in the result mode, to avoid sign-/zero-extension confusion. */ 3067 val &= GET_MODE_MASK (mode); 3068 val_so_far &= GET_MODE_MASK (mode); 3069 gcc_assert (val == val_so_far); 3070 3071 return accum; 3072 } 3073 3074 /* Perform a multiplication and return an rtx for the result. 3075 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); 3076 TARGET is a suggestion for where to store the result (an rtx). 3077 3078 We check specially for a constant integer as OP1. 3079 If you want this check for OP0 as well, then before calling 3080 you should swap the two operands if OP0 would be constant. */ 3081 3082 rtx 3083 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, 3084 int unsignedp) 3085 { 3086 enum mult_variant variant; 3087 struct algorithm algorithm; 3088 int max_cost; 3089 bool speed = optimize_insn_for_speed_p (); 3090 3091 /* Handling const0_rtx here allows us to use zero as a rogue value for 3092 coeff below. */ 3093 if (op1 == const0_rtx) 3094 return const0_rtx; 3095 if (op1 == const1_rtx) 3096 return op0; 3097 if (op1 == constm1_rtx) 3098 return expand_unop (mode, 3099 GET_MODE_CLASS (mode) == MODE_INT 3100 && !unsignedp && flag_trapv 3101 ? negv_optab : neg_optab, 3102 op0, target, 0); 3103 3104 /* These are the operations that are potentially turned into a sequence 3105 of shifts and additions. */ 3106 if (SCALAR_INT_MODE_P (mode) 3107 && (unsignedp || !flag_trapv)) 3108 { 3109 HOST_WIDE_INT coeff = 0; 3110 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); 3111 3112 /* synth_mult does an `unsigned int' multiply. As long as the mode is 3113 less than or equal in size to `unsigned int' this doesn't matter. 3114 If the mode is larger than `unsigned int', then synth_mult works 3115 only if the constant value exactly fits in an `unsigned int' without 3116 any truncation. This means that multiplying by negative values does 3117 not work; results are off by 2^32 on a 32 bit machine. */ 3118 3119 if (CONST_INT_P (op1)) 3120 { 3121 /* Attempt to handle multiplication of DImode values by negative 3122 coefficients, by performing the multiplication by a positive 3123 multiplier and then inverting the result. */ 3124 if (INTVAL (op1) < 0 3125 && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT) 3126 { 3127 /* Its safe to use -INTVAL (op1) even for INT_MIN, as the 3128 result is interpreted as an unsigned coefficient. 3129 Exclude cost of op0 from max_cost to match the cost 3130 calculation of the synth_mult. */ 3131 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), 3132 speed) 3133 - neg_cost[speed][mode]); 3134 if (max_cost > 0 3135 && choose_mult_variant (mode, -INTVAL (op1), &algorithm, 3136 &variant, max_cost)) 3137 { 3138 rtx temp = expand_mult_const (mode, op0, -INTVAL (op1), 3139 NULL_RTX, &algorithm, 3140 variant); 3141 return expand_unop (mode, neg_optab, temp, target, 0); 3142 } 3143 } 3144 else coeff = INTVAL (op1); 3145 } 3146 else if (GET_CODE (op1) == CONST_DOUBLE) 3147 { 3148 /* If we are multiplying in DImode, it may still be a win 3149 to try to work with shifts and adds. */ 3150 if (CONST_DOUBLE_HIGH (op1) == 0 3151 && CONST_DOUBLE_LOW (op1) > 0) 3152 coeff = CONST_DOUBLE_LOW (op1); 3153 else if (CONST_DOUBLE_LOW (op1) == 0 3154 && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1))) 3155 { 3156 int shift = floor_log2 (CONST_DOUBLE_HIGH (op1)) 3157 + HOST_BITS_PER_WIDE_INT; 3158 return expand_shift (LSHIFT_EXPR, mode, op0, 3159 shift, target, unsignedp); 3160 } 3161 } 3162 3163 /* We used to test optimize here, on the grounds that it's better to 3164 produce a smaller program when -O is not used. But this causes 3165 such a terrible slowdown sometimes that it seems better to always 3166 use synth_mult. */ 3167 if (coeff != 0) 3168 { 3169 /* Special case powers of two. */ 3170 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) 3171 return expand_shift (LSHIFT_EXPR, mode, op0, 3172 floor_log2 (coeff), target, unsignedp); 3173 3174 /* Exclude cost of op0 from max_cost to match the cost 3175 calculation of the synth_mult. */ 3176 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed); 3177 if (choose_mult_variant (mode, coeff, &algorithm, &variant, 3178 max_cost)) 3179 return expand_mult_const (mode, op0, coeff, target, 3180 &algorithm, variant); 3181 } 3182 } 3183 3184 if (GET_CODE (op0) == CONST_DOUBLE) 3185 { 3186 rtx temp = op0; 3187 op0 = op1; 3188 op1 = temp; 3189 } 3190 3191 /* Expand x*2.0 as x+x. */ 3192 if (GET_CODE (op1) == CONST_DOUBLE 3193 && SCALAR_FLOAT_MODE_P (mode)) 3194 { 3195 REAL_VALUE_TYPE d; 3196 REAL_VALUE_FROM_CONST_DOUBLE (d, op1); 3197 3198 if (REAL_VALUES_EQUAL (d, dconst2)) 3199 { 3200 op0 = force_reg (GET_MODE (op0), op0); 3201 return expand_binop (mode, add_optab, op0, op0, 3202 target, unsignedp, OPTAB_LIB_WIDEN); 3203 } 3204 } 3205 3206 /* This used to use umul_optab if unsigned, but for non-widening multiply 3207 there is no difference between signed and unsigned. */ 3208 op0 = expand_binop (mode, 3209 ! unsignedp 3210 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT) 3211 ? smulv_optab : smul_optab, 3212 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN); 3213 gcc_assert (op0); 3214 return op0; 3215 } 3216 3217 /* Perform a widening multiplication and return an rtx for the result. 3218 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); 3219 TARGET is a suggestion for where to store the result (an rtx). 3220 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab 3221 or smul_widen_optab. 3222 3223 We check specially for a constant integer as OP1, comparing the 3224 cost of a widening multiply against the cost of a sequence of shifts 3225 and adds. */ 3226 3227 rtx 3228 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, 3229 int unsignedp, optab this_optab) 3230 { 3231 bool speed = optimize_insn_for_speed_p (); 3232 rtx cop1; 3233 3234 if (CONST_INT_P (op1) 3235 && GET_MODE (op0) != VOIDmode 3236 && (cop1 = convert_modes (mode, GET_MODE (op0), op1, 3237 this_optab == umul_widen_optab)) 3238 && CONST_INT_P (cop1) 3239 && (INTVAL (cop1) >= 0 3240 || HWI_COMPUTABLE_MODE_P (mode))) 3241 { 3242 HOST_WIDE_INT coeff = INTVAL (cop1); 3243 int max_cost; 3244 enum mult_variant variant; 3245 struct algorithm algorithm; 3246 3247 /* Special case powers of two. */ 3248 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) 3249 { 3250 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); 3251 return expand_shift (LSHIFT_EXPR, mode, op0, 3252 floor_log2 (coeff), target, unsignedp); 3253 } 3254 3255 /* Exclude cost of op0 from max_cost to match the cost 3256 calculation of the synth_mult. */ 3257 max_cost = mul_widen_cost[speed][mode]; 3258 if (choose_mult_variant (mode, coeff, &algorithm, &variant, 3259 max_cost)) 3260 { 3261 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); 3262 return expand_mult_const (mode, op0, coeff, target, 3263 &algorithm, variant); 3264 } 3265 } 3266 return expand_binop (mode, this_optab, op0, op1, target, 3267 unsignedp, OPTAB_LIB_WIDEN); 3268 } 3269 3270 /* Return the smallest n such that 2**n >= X. */ 3271 3272 int 3273 ceil_log2 (unsigned HOST_WIDE_INT x) 3274 { 3275 return floor_log2 (x - 1) + 1; 3276 } 3277 3278 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to 3279 replace division by D, and put the least significant N bits of the result 3280 in *MULTIPLIER_PTR and return the most significant bit. 3281 3282 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the 3283 needed precision is in PRECISION (should be <= N). 3284 3285 PRECISION should be as small as possible so this function can choose 3286 multiplier more freely. 3287 3288 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that 3289 is to be used for a final right shift is placed in *POST_SHIFT_PTR. 3290 3291 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR), 3292 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */ 3293 3294 static 3295 unsigned HOST_WIDE_INT 3296 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, 3297 rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr) 3298 { 3299 HOST_WIDE_INT mhigh_hi, mlow_hi; 3300 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo; 3301 int lgup, post_shift; 3302 int pow, pow2; 3303 unsigned HOST_WIDE_INT nl, dummy1; 3304 HOST_WIDE_INT nh, dummy2; 3305 3306 /* lgup = ceil(log2(divisor)); */ 3307 lgup = ceil_log2 (d); 3308 3309 gcc_assert (lgup <= n); 3310 3311 pow = n + lgup; 3312 pow2 = n + lgup - precision; 3313 3314 /* We could handle this with some effort, but this case is much 3315 better handled directly with a scc insn, so rely on caller using 3316 that. */ 3317 gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT); 3318 3319 /* mlow = 2^(N + lgup)/d */ 3320 if (pow >= HOST_BITS_PER_WIDE_INT) 3321 { 3322 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT); 3323 nl = 0; 3324 } 3325 else 3326 { 3327 nh = 0; 3328 nl = (unsigned HOST_WIDE_INT) 1 << pow; 3329 } 3330 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0, 3331 &mlow_lo, &mlow_hi, &dummy1, &dummy2); 3332 3333 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */ 3334 if (pow2 >= HOST_BITS_PER_WIDE_INT) 3335 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT); 3336 else 3337 nl |= (unsigned HOST_WIDE_INT) 1 << pow2; 3338 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0, 3339 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2); 3340 3341 gcc_assert (!mhigh_hi || nh - d < d); 3342 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1); 3343 /* Assert that mlow < mhigh. */ 3344 gcc_assert (mlow_hi < mhigh_hi 3345 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)); 3346 3347 /* If precision == N, then mlow, mhigh exceed 2^N 3348 (but they do not exceed 2^(N+1)). */ 3349 3350 /* Reduce to lowest terms. */ 3351 for (post_shift = lgup; post_shift > 0; post_shift--) 3352 { 3353 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1); 3354 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1); 3355 if (ml_lo >= mh_lo) 3356 break; 3357 3358 mlow_hi = 0; 3359 mlow_lo = ml_lo; 3360 mhigh_hi = 0; 3361 mhigh_lo = mh_lo; 3362 } 3363 3364 *post_shift_ptr = post_shift; 3365 *lgup_ptr = lgup; 3366 if (n < HOST_BITS_PER_WIDE_INT) 3367 { 3368 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1; 3369 *multiplier_ptr = GEN_INT (mhigh_lo & mask); 3370 return mhigh_lo >= mask; 3371 } 3372 else 3373 { 3374 *multiplier_ptr = GEN_INT (mhigh_lo); 3375 return mhigh_hi; 3376 } 3377 } 3378 3379 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is 3380 congruent to 1 (mod 2**N). */ 3381 3382 static unsigned HOST_WIDE_INT 3383 invert_mod2n (unsigned HOST_WIDE_INT x, int n) 3384 { 3385 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */ 3386 3387 /* The algorithm notes that the choice y = x satisfies 3388 x*y == 1 mod 2^3, since x is assumed odd. 3389 Each iteration doubles the number of bits of significance in y. */ 3390 3391 unsigned HOST_WIDE_INT mask; 3392 unsigned HOST_WIDE_INT y = x; 3393 int nbit = 3; 3394 3395 mask = (n == HOST_BITS_PER_WIDE_INT 3396 ? ~(unsigned HOST_WIDE_INT) 0 3397 : ((unsigned HOST_WIDE_INT) 1 << n) - 1); 3398 3399 while (nbit < n) 3400 { 3401 y = y * (2 - x*y) & mask; /* Modulo 2^N */ 3402 nbit *= 2; 3403 } 3404 return y; 3405 } 3406 3407 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness 3408 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the 3409 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product 3410 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to 3411 become signed. 3412 3413 The result is put in TARGET if that is convenient. 3414 3415 MODE is the mode of operation. */ 3416 3417 rtx 3418 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0, 3419 rtx op1, rtx target, int unsignedp) 3420 { 3421 rtx tem; 3422 enum rtx_code adj_code = unsignedp ? PLUS : MINUS; 3423 3424 tem = expand_shift (RSHIFT_EXPR, mode, op0, 3425 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); 3426 tem = expand_and (mode, tem, op1, NULL_RTX); 3427 adj_operand 3428 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), 3429 adj_operand); 3430 3431 tem = expand_shift (RSHIFT_EXPR, mode, op1, 3432 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); 3433 tem = expand_and (mode, tem, op0, NULL_RTX); 3434 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), 3435 target); 3436 3437 return target; 3438 } 3439 3440 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */ 3441 3442 static rtx 3443 extract_high_half (enum machine_mode mode, rtx op) 3444 { 3445 enum machine_mode wider_mode; 3446 3447 if (mode == word_mode) 3448 return gen_highpart (mode, op); 3449 3450 gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); 3451 3452 wider_mode = GET_MODE_WIDER_MODE (mode); 3453 op = expand_shift (RSHIFT_EXPR, wider_mode, op, 3454 GET_MODE_BITSIZE (mode), 0, 1); 3455 return convert_modes (mode, wider_mode, op, 0); 3456 } 3457 3458 /* Like expand_mult_highpart, but only consider using a multiplication 3459 optab. OP1 is an rtx for the constant operand. */ 3460 3461 static rtx 3462 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1, 3463 rtx target, int unsignedp, int max_cost) 3464 { 3465 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode); 3466 enum machine_mode wider_mode; 3467 optab moptab; 3468 rtx tem; 3469 int size; 3470 bool speed = optimize_insn_for_speed_p (); 3471 3472 gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); 3473 3474 wider_mode = GET_MODE_WIDER_MODE (mode); 3475 size = GET_MODE_BITSIZE (mode); 3476 3477 /* Firstly, try using a multiplication insn that only generates the needed 3478 high part of the product, and in the sign flavor of unsignedp. */ 3479 if (mul_highpart_cost[speed][mode] < max_cost) 3480 { 3481 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; 3482 tem = expand_binop (mode, moptab, op0, narrow_op1, target, 3483 unsignedp, OPTAB_DIRECT); 3484 if (tem) 3485 return tem; 3486 } 3487 3488 /* Secondly, same as above, but use sign flavor opposite of unsignedp. 3489 Need to adjust the result after the multiplication. */ 3490 if (size - 1 < BITS_PER_WORD 3491 && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1] 3492 + 4 * add_cost[speed][mode] < max_cost)) 3493 { 3494 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; 3495 tem = expand_binop (mode, moptab, op0, narrow_op1, target, 3496 unsignedp, OPTAB_DIRECT); 3497 if (tem) 3498 /* We used the wrong signedness. Adjust the result. */ 3499 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, 3500 tem, unsignedp); 3501 } 3502 3503 /* Try widening multiplication. */ 3504 moptab = unsignedp ? umul_widen_optab : smul_widen_optab; 3505 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing 3506 && mul_widen_cost[speed][wider_mode] < max_cost) 3507 { 3508 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, 3509 unsignedp, OPTAB_WIDEN); 3510 if (tem) 3511 return extract_high_half (mode, tem); 3512 } 3513 3514 /* Try widening the mode and perform a non-widening multiplication. */ 3515 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing 3516 && size - 1 < BITS_PER_WORD 3517 && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost) 3518 { 3519 rtx insns, wop0, wop1; 3520 3521 /* We need to widen the operands, for example to ensure the 3522 constant multiplier is correctly sign or zero extended. 3523 Use a sequence to clean-up any instructions emitted by 3524 the conversions if things don't work out. */ 3525 start_sequence (); 3526 wop0 = convert_modes (wider_mode, mode, op0, unsignedp); 3527 wop1 = convert_modes (wider_mode, mode, op1, unsignedp); 3528 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0, 3529 unsignedp, OPTAB_WIDEN); 3530 insns = get_insns (); 3531 end_sequence (); 3532 3533 if (tem) 3534 { 3535 emit_insn (insns); 3536 return extract_high_half (mode, tem); 3537 } 3538 } 3539 3540 /* Try widening multiplication of opposite signedness, and adjust. */ 3541 moptab = unsignedp ? smul_widen_optab : umul_widen_optab; 3542 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing 3543 && size - 1 < BITS_PER_WORD 3544 && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1] 3545 + 4 * add_cost[speed][mode] < max_cost)) 3546 { 3547 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 3548 NULL_RTX, ! unsignedp, OPTAB_WIDEN); 3549 if (tem != 0) 3550 { 3551 tem = extract_high_half (mode, tem); 3552 /* We used the wrong signedness. Adjust the result. */ 3553 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, 3554 target, unsignedp); 3555 } 3556 } 3557 3558 return 0; 3559 } 3560 3561 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant), 3562 putting the high half of the result in TARGET if that is convenient, 3563 and return where the result is. If the operation can not be performed, 3564 0 is returned. 3565 3566 MODE is the mode of operation and result. 3567 3568 UNSIGNEDP nonzero means unsigned multiply. 3569 3570 MAX_COST is the total allowed cost for the expanded RTL. */ 3571 3572 static rtx 3573 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1, 3574 rtx target, int unsignedp, int max_cost) 3575 { 3576 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); 3577 unsigned HOST_WIDE_INT cnst1; 3578 int extra_cost; 3579 bool sign_adjust = false; 3580 enum mult_variant variant; 3581 struct algorithm alg; 3582 rtx tem; 3583 bool speed = optimize_insn_for_speed_p (); 3584 3585 gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); 3586 /* We can't support modes wider than HOST_BITS_PER_INT. */ 3587 gcc_assert (HWI_COMPUTABLE_MODE_P (mode)); 3588 3589 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode); 3590 3591 /* We can't optimize modes wider than BITS_PER_WORD. 3592 ??? We might be able to perform double-word arithmetic if 3593 mode == word_mode, however all the cost calculations in 3594 synth_mult etc. assume single-word operations. */ 3595 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD) 3596 return expand_mult_highpart_optab (mode, op0, op1, target, 3597 unsignedp, max_cost); 3598 3599 extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1]; 3600 3601 /* Check whether we try to multiply by a negative constant. */ 3602 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1)) 3603 { 3604 sign_adjust = true; 3605 extra_cost += add_cost[speed][mode]; 3606 } 3607 3608 /* See whether shift/add multiplication is cheap enough. */ 3609 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant, 3610 max_cost - extra_cost)) 3611 { 3612 /* See whether the specialized multiplication optabs are 3613 cheaper than the shift/add version. */ 3614 tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp, 3615 alg.cost.cost + extra_cost); 3616 if (tem) 3617 return tem; 3618 3619 tem = convert_to_mode (wider_mode, op0, unsignedp); 3620 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant); 3621 tem = extract_high_half (mode, tem); 3622 3623 /* Adjust result for signedness. */ 3624 if (sign_adjust) 3625 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem); 3626 3627 return tem; 3628 } 3629 return expand_mult_highpart_optab (mode, op0, op1, target, 3630 unsignedp, max_cost); 3631 } 3632 3633 3634 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */ 3635 3636 static rtx 3637 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d) 3638 { 3639 unsigned HOST_WIDE_INT masklow, maskhigh; 3640 rtx result, temp, shift, label; 3641 int logd; 3642 3643 logd = floor_log2 (d); 3644 result = gen_reg_rtx (mode); 3645 3646 /* Avoid conditional branches when they're expensive. */ 3647 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2 3648 && optimize_insn_for_speed_p ()) 3649 { 3650 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx, 3651 mode, 0, -1); 3652 if (signmask) 3653 { 3654 signmask = force_reg (mode, signmask); 3655 masklow = ((HOST_WIDE_INT) 1 << logd) - 1; 3656 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd); 3657 3658 /* Use the rtx_cost of a LSHIFTRT instruction to determine 3659 which instruction sequence to use. If logical right shifts 3660 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise 3661 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */ 3662 3663 temp = gen_rtx_LSHIFTRT (mode, result, shift); 3664 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing 3665 || (set_src_cost (temp, optimize_insn_for_speed_p ()) 3666 > COSTS_N_INSNS (2))) 3667 { 3668 temp = expand_binop (mode, xor_optab, op0, signmask, 3669 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3670 temp = expand_binop (mode, sub_optab, temp, signmask, 3671 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3672 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow), 3673 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3674 temp = expand_binop (mode, xor_optab, temp, signmask, 3675 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3676 temp = expand_binop (mode, sub_optab, temp, signmask, 3677 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3678 } 3679 else 3680 { 3681 signmask = expand_binop (mode, lshr_optab, signmask, shift, 3682 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3683 signmask = force_reg (mode, signmask); 3684 3685 temp = expand_binop (mode, add_optab, op0, signmask, 3686 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3687 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow), 3688 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3689 temp = expand_binop (mode, sub_optab, temp, signmask, 3690 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3691 } 3692 return temp; 3693 } 3694 } 3695 3696 /* Mask contains the mode's signbit and the significant bits of the 3697 modulus. By including the signbit in the operation, many targets 3698 can avoid an explicit compare operation in the following comparison 3699 against zero. */ 3700 3701 masklow = ((HOST_WIDE_INT) 1 << logd) - 1; 3702 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT) 3703 { 3704 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1); 3705 maskhigh = -1; 3706 } 3707 else 3708 maskhigh = (HOST_WIDE_INT) -1 3709 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1); 3710 3711 temp = expand_binop (mode, and_optab, op0, 3712 immed_double_const (masklow, maskhigh, mode), 3713 result, 1, OPTAB_LIB_WIDEN); 3714 if (temp != result) 3715 emit_move_insn (result, temp); 3716 3717 label = gen_label_rtx (); 3718 do_cmp_and_jump (result, const0_rtx, GE, mode, label); 3719 3720 temp = expand_binop (mode, sub_optab, result, const1_rtx, result, 3721 0, OPTAB_LIB_WIDEN); 3722 masklow = (HOST_WIDE_INT) -1 << logd; 3723 maskhigh = -1; 3724 temp = expand_binop (mode, ior_optab, temp, 3725 immed_double_const (masklow, maskhigh, mode), 3726 result, 1, OPTAB_LIB_WIDEN); 3727 temp = expand_binop (mode, add_optab, temp, const1_rtx, result, 3728 0, OPTAB_LIB_WIDEN); 3729 if (temp != result) 3730 emit_move_insn (result, temp); 3731 emit_label (label); 3732 return result; 3733 } 3734 3735 /* Expand signed division of OP0 by a power of two D in mode MODE. 3736 This routine is only called for positive values of D. */ 3737 3738 static rtx 3739 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d) 3740 { 3741 rtx temp, label; 3742 int logd; 3743 3744 logd = floor_log2 (d); 3745 3746 if (d == 2 3747 && BRANCH_COST (optimize_insn_for_speed_p (), 3748 false) >= 1) 3749 { 3750 temp = gen_reg_rtx (mode); 3751 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1); 3752 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, 3753 0, OPTAB_LIB_WIDEN); 3754 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); 3755 } 3756 3757 #ifdef HAVE_conditional_move 3758 if (BRANCH_COST (optimize_insn_for_speed_p (), false) 3759 >= 2) 3760 { 3761 rtx temp2; 3762 3763 /* ??? emit_conditional_move forces a stack adjustment via 3764 compare_from_rtx so, if the sequence is discarded, it will 3765 be lost. Do it now instead. */ 3766 do_pending_stack_adjust (); 3767 3768 start_sequence (); 3769 temp2 = copy_to_mode_reg (mode, op0); 3770 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1), 3771 NULL_RTX, 0, OPTAB_LIB_WIDEN); 3772 temp = force_reg (mode, temp); 3773 3774 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */ 3775 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx, 3776 mode, temp, temp2, mode, 0); 3777 if (temp2) 3778 { 3779 rtx seq = get_insns (); 3780 end_sequence (); 3781 emit_insn (seq); 3782 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0); 3783 } 3784 end_sequence (); 3785 } 3786 #endif 3787 3788 if (BRANCH_COST (optimize_insn_for_speed_p (), 3789 false) >= 2) 3790 { 3791 int ushift = GET_MODE_BITSIZE (mode) - logd; 3792 3793 temp = gen_reg_rtx (mode); 3794 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1); 3795 if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1)) 3796 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1), 3797 NULL_RTX, 0, OPTAB_LIB_WIDEN); 3798 else 3799 temp = expand_shift (RSHIFT_EXPR, mode, temp, 3800 ushift, NULL_RTX, 1); 3801 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, 3802 0, OPTAB_LIB_WIDEN); 3803 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); 3804 } 3805 3806 label = gen_label_rtx (); 3807 temp = copy_to_mode_reg (mode, op0); 3808 do_cmp_and_jump (temp, const0_rtx, GE, mode, label); 3809 expand_inc (temp, GEN_INT (d - 1)); 3810 emit_label (label); 3811 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); 3812 } 3813 3814 /* Emit the code to divide OP0 by OP1, putting the result in TARGET 3815 if that is convenient, and returning where the result is. 3816 You may request either the quotient or the remainder as the result; 3817 specify REM_FLAG nonzero to get the remainder. 3818 3819 CODE is the expression code for which kind of division this is; 3820 it controls how rounding is done. MODE is the machine mode to use. 3821 UNSIGNEDP nonzero means do unsigned division. */ 3822 3823 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI 3824 and then correct it by or'ing in missing high bits 3825 if result of ANDI is nonzero. 3826 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result. 3827 This could optimize to a bfexts instruction. 3828 But C doesn't use these operations, so their optimizations are 3829 left for later. */ 3830 /* ??? For modulo, we don't actually need the highpart of the first product, 3831 the low part will do nicely. And for small divisors, the second multiply 3832 can also be a low-part only multiply or even be completely left out. 3833 E.g. to calculate the remainder of a division by 3 with a 32 bit 3834 multiply, multiply with 0x55555556 and extract the upper two bits; 3835 the result is exact for inputs up to 0x1fffffff. 3836 The input range can be reduced by using cross-sum rules. 3837 For odd divisors >= 3, the following table gives right shift counts 3838 so that if a number is shifted by an integer multiple of the given 3839 amount, the remainder stays the same: 3840 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20, 3841 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0, 3842 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0, 3843 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33, 3844 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12 3845 3846 Cross-sum rules for even numbers can be derived by leaving as many bits 3847 to the right alone as the divisor has zeros to the right. 3848 E.g. if x is an unsigned 32 bit number: 3849 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28 3850 */ 3851 3852 rtx 3853 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, 3854 rtx op0, rtx op1, rtx target, int unsignedp) 3855 { 3856 enum machine_mode compute_mode; 3857 rtx tquotient; 3858 rtx quotient = 0, remainder = 0; 3859 rtx last; 3860 int size; 3861 rtx insn; 3862 optab optab1, optab2; 3863 int op1_is_constant, op1_is_pow2 = 0; 3864 int max_cost, extra_cost; 3865 static HOST_WIDE_INT last_div_const = 0; 3866 static HOST_WIDE_INT ext_op1; 3867 bool speed = optimize_insn_for_speed_p (); 3868 3869 op1_is_constant = CONST_INT_P (op1); 3870 if (op1_is_constant) 3871 { 3872 ext_op1 = INTVAL (op1); 3873 if (unsignedp) 3874 ext_op1 &= GET_MODE_MASK (mode); 3875 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1) 3876 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1)))); 3877 } 3878 3879 /* 3880 This is the structure of expand_divmod: 3881 3882 First comes code to fix up the operands so we can perform the operations 3883 correctly and efficiently. 3884 3885 Second comes a switch statement with code specific for each rounding mode. 3886 For some special operands this code emits all RTL for the desired 3887 operation, for other cases, it generates only a quotient and stores it in 3888 QUOTIENT. The case for trunc division/remainder might leave quotient = 0, 3889 to indicate that it has not done anything. 3890 3891 Last comes code that finishes the operation. If QUOTIENT is set and 3892 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If 3893 QUOTIENT is not set, it is computed using trunc rounding. 3894 3895 We try to generate special code for division and remainder when OP1 is a 3896 constant. If |OP1| = 2**n we can use shifts and some other fast 3897 operations. For other values of OP1, we compute a carefully selected 3898 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0 3899 by m. 3900 3901 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper 3902 half of the product. Different strategies for generating the product are 3903 implemented in expand_mult_highpart. 3904 3905 If what we actually want is the remainder, we generate that by another 3906 by-constant multiplication and a subtraction. */ 3907 3908 /* We shouldn't be called with OP1 == const1_rtx, but some of the 3909 code below will malfunction if we are, so check here and handle 3910 the special case if so. */ 3911 if (op1 == const1_rtx) 3912 return rem_flag ? const0_rtx : op0; 3913 3914 /* When dividing by -1, we could get an overflow. 3915 negv_optab can handle overflows. */ 3916 if (! unsignedp && op1 == constm1_rtx) 3917 { 3918 if (rem_flag) 3919 return const0_rtx; 3920 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT 3921 ? negv_optab : neg_optab, op0, target, 0); 3922 } 3923 3924 if (target 3925 /* Don't use the function value register as a target 3926 since we have to read it as well as write it, 3927 and function-inlining gets confused by this. */ 3928 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target)) 3929 /* Don't clobber an operand while doing a multi-step calculation. */ 3930 || ((rem_flag || op1_is_constant) 3931 && (reg_mentioned_p (target, op0) 3932 || (MEM_P (op0) && MEM_P (target)))) 3933 || reg_mentioned_p (target, op1) 3934 || (MEM_P (op1) && MEM_P (target)))) 3935 target = 0; 3936 3937 /* Get the mode in which to perform this computation. Normally it will 3938 be MODE, but sometimes we can't do the desired operation in MODE. 3939 If so, pick a wider mode in which we can do the operation. Convert 3940 to that mode at the start to avoid repeated conversions. 3941 3942 First see what operations we need. These depend on the expression 3943 we are evaluating. (We assume that divxx3 insns exist under the 3944 same conditions that modxx3 insns and that these insns don't normally 3945 fail. If these assumptions are not correct, we may generate less 3946 efficient code in some cases.) 3947 3948 Then see if we find a mode in which we can open-code that operation 3949 (either a division, modulus, or shift). Finally, check for the smallest 3950 mode for which we can do the operation with a library call. */ 3951 3952 /* We might want to refine this now that we have division-by-constant 3953 optimization. Since expand_mult_highpart tries so many variants, it is 3954 not straightforward to generalize this. Maybe we should make an array 3955 of possible modes in init_expmed? Save this for GCC 2.7. */ 3956 3957 optab1 = ((op1_is_pow2 && op1 != const0_rtx) 3958 ? (unsignedp ? lshr_optab : ashr_optab) 3959 : (unsignedp ? udiv_optab : sdiv_optab)); 3960 optab2 = ((op1_is_pow2 && op1 != const0_rtx) 3961 ? optab1 3962 : (unsignedp ? udivmod_optab : sdivmod_optab)); 3963 3964 for (compute_mode = mode; compute_mode != VOIDmode; 3965 compute_mode = GET_MODE_WIDER_MODE (compute_mode)) 3966 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing 3967 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing) 3968 break; 3969 3970 if (compute_mode == VOIDmode) 3971 for (compute_mode = mode; compute_mode != VOIDmode; 3972 compute_mode = GET_MODE_WIDER_MODE (compute_mode)) 3973 if (optab_libfunc (optab1, compute_mode) 3974 || optab_libfunc (optab2, compute_mode)) 3975 break; 3976 3977 /* If we still couldn't find a mode, use MODE, but expand_binop will 3978 probably die. */ 3979 if (compute_mode == VOIDmode) 3980 compute_mode = mode; 3981 3982 if (target && GET_MODE (target) == compute_mode) 3983 tquotient = target; 3984 else 3985 tquotient = gen_reg_rtx (compute_mode); 3986 3987 size = GET_MODE_BITSIZE (compute_mode); 3988 #if 0 3989 /* It should be possible to restrict the precision to GET_MODE_BITSIZE 3990 (mode), and thereby get better code when OP1 is a constant. Do that 3991 later. It will require going over all usages of SIZE below. */ 3992 size = GET_MODE_BITSIZE (mode); 3993 #endif 3994 3995 /* Only deduct something for a REM if the last divide done was 3996 for a different constant. Then set the constant of the last 3997 divide. */ 3998 max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode]; 3999 if (rem_flag && ! (last_div_const != 0 && op1_is_constant 4000 && INTVAL (op1) == last_div_const)) 4001 max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode]; 4002 4003 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; 4004 4005 /* Now convert to the best mode to use. */ 4006 if (compute_mode != mode) 4007 { 4008 op0 = convert_modes (compute_mode, mode, op0, unsignedp); 4009 op1 = convert_modes (compute_mode, mode, op1, unsignedp); 4010 4011 /* convert_modes may have placed op1 into a register, so we 4012 must recompute the following. */ 4013 op1_is_constant = CONST_INT_P (op1); 4014 op1_is_pow2 = (op1_is_constant 4015 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) 4016 || (! unsignedp 4017 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ; 4018 } 4019 4020 /* If one of the operands is a volatile MEM, copy it into a register. */ 4021 4022 if (MEM_P (op0) && MEM_VOLATILE_P (op0)) 4023 op0 = force_reg (compute_mode, op0); 4024 if (MEM_P (op1) && MEM_VOLATILE_P (op1)) 4025 op1 = force_reg (compute_mode, op1); 4026 4027 /* If we need the remainder or if OP1 is constant, we need to 4028 put OP0 in a register in case it has any queued subexpressions. */ 4029 if (rem_flag || op1_is_constant) 4030 op0 = force_reg (compute_mode, op0); 4031 4032 last = get_last_insn (); 4033 4034 /* Promote floor rounding to trunc rounding for unsigned operations. */ 4035 if (unsignedp) 4036 { 4037 if (code == FLOOR_DIV_EXPR) 4038 code = TRUNC_DIV_EXPR; 4039 if (code == FLOOR_MOD_EXPR) 4040 code = TRUNC_MOD_EXPR; 4041 if (code == EXACT_DIV_EXPR && op1_is_pow2) 4042 code = TRUNC_DIV_EXPR; 4043 } 4044 4045 if (op1 != const0_rtx) 4046 switch (code) 4047 { 4048 case TRUNC_MOD_EXPR: 4049 case TRUNC_DIV_EXPR: 4050 if (op1_is_constant) 4051 { 4052 if (unsignedp) 4053 { 4054 unsigned HOST_WIDE_INT mh; 4055 int pre_shift, post_shift; 4056 int dummy; 4057 rtx ml; 4058 unsigned HOST_WIDE_INT d = (INTVAL (op1) 4059 & GET_MODE_MASK (compute_mode)); 4060 4061 if (EXACT_POWER_OF_2_OR_ZERO_P (d)) 4062 { 4063 pre_shift = floor_log2 (d); 4064 if (rem_flag) 4065 { 4066 remainder 4067 = expand_binop (compute_mode, and_optab, op0, 4068 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1), 4069 remainder, 1, 4070 OPTAB_LIB_WIDEN); 4071 if (remainder) 4072 return gen_lowpart (mode, remainder); 4073 } 4074 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0, 4075 pre_shift, tquotient, 1); 4076 } 4077 else if (size <= HOST_BITS_PER_WIDE_INT) 4078 { 4079 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1))) 4080 { 4081 /* Most significant bit of divisor is set; emit an scc 4082 insn. */ 4083 quotient = emit_store_flag_force (tquotient, GEU, op0, op1, 4084 compute_mode, 1, 1); 4085 } 4086 else 4087 { 4088 /* Find a suitable multiplier and right shift count 4089 instead of multiplying with D. */ 4090 4091 mh = choose_multiplier (d, size, size, 4092 &ml, &post_shift, &dummy); 4093 4094 /* If the suggested multiplier is more than SIZE bits, 4095 we can do better for even divisors, using an 4096 initial right shift. */ 4097 if (mh != 0 && (d & 1) == 0) 4098 { 4099 pre_shift = floor_log2 (d & -d); 4100 mh = choose_multiplier (d >> pre_shift, size, 4101 size - pre_shift, 4102 &ml, &post_shift, &dummy); 4103 gcc_assert (!mh); 4104 } 4105 else 4106 pre_shift = 0; 4107 4108 if (mh != 0) 4109 { 4110 rtx t1, t2, t3, t4; 4111 4112 if (post_shift - 1 >= BITS_PER_WORD) 4113 goto fail1; 4114 4115 extra_cost 4116 = (shift_cost[speed][compute_mode][post_shift - 1] 4117 + shift_cost[speed][compute_mode][1] 4118 + 2 * add_cost[speed][compute_mode]); 4119 t1 = expand_mult_highpart (compute_mode, op0, ml, 4120 NULL_RTX, 1, 4121 max_cost - extra_cost); 4122 if (t1 == 0) 4123 goto fail1; 4124 t2 = force_operand (gen_rtx_MINUS (compute_mode, 4125 op0, t1), 4126 NULL_RTX); 4127 t3 = expand_shift (RSHIFT_EXPR, compute_mode, 4128 t2, 1, NULL_RTX, 1); 4129 t4 = force_operand (gen_rtx_PLUS (compute_mode, 4130 t1, t3), 4131 NULL_RTX); 4132 quotient = expand_shift 4133 (RSHIFT_EXPR, compute_mode, t4, 4134 post_shift - 1, tquotient, 1); 4135 } 4136 else 4137 { 4138 rtx t1, t2; 4139 4140 if (pre_shift >= BITS_PER_WORD 4141 || post_shift >= BITS_PER_WORD) 4142 goto fail1; 4143 4144 t1 = expand_shift 4145 (RSHIFT_EXPR, compute_mode, op0, 4146 pre_shift, NULL_RTX, 1); 4147 extra_cost 4148 = (shift_cost[speed][compute_mode][pre_shift] 4149 + shift_cost[speed][compute_mode][post_shift]); 4150 t2 = expand_mult_highpart (compute_mode, t1, ml, 4151 NULL_RTX, 1, 4152 max_cost - extra_cost); 4153 if (t2 == 0) 4154 goto fail1; 4155 quotient = expand_shift 4156 (RSHIFT_EXPR, compute_mode, t2, 4157 post_shift, tquotient, 1); 4158 } 4159 } 4160 } 4161 else /* Too wide mode to use tricky code */ 4162 break; 4163 4164 insn = get_last_insn (); 4165 if (insn != last) 4166 set_dst_reg_note (insn, REG_EQUAL, 4167 gen_rtx_UDIV (compute_mode, op0, op1), 4168 quotient); 4169 } 4170 else /* TRUNC_DIV, signed */ 4171 { 4172 unsigned HOST_WIDE_INT ml; 4173 int lgup, post_shift; 4174 rtx mlr; 4175 HOST_WIDE_INT d = INTVAL (op1); 4176 unsigned HOST_WIDE_INT abs_d; 4177 4178 /* Since d might be INT_MIN, we have to cast to 4179 unsigned HOST_WIDE_INT before negating to avoid 4180 undefined signed overflow. */ 4181 abs_d = (d >= 0 4182 ? (unsigned HOST_WIDE_INT) d 4183 : - (unsigned HOST_WIDE_INT) d); 4184 4185 /* n rem d = n rem -d */ 4186 if (rem_flag && d < 0) 4187 { 4188 d = abs_d; 4189 op1 = gen_int_mode (abs_d, compute_mode); 4190 } 4191 4192 if (d == 1) 4193 quotient = op0; 4194 else if (d == -1) 4195 quotient = expand_unop (compute_mode, neg_optab, op0, 4196 tquotient, 0); 4197 else if (HOST_BITS_PER_WIDE_INT >= size 4198 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1)) 4199 { 4200 /* This case is not handled correctly below. */ 4201 quotient = emit_store_flag (tquotient, EQ, op0, op1, 4202 compute_mode, 1, 1); 4203 if (quotient == 0) 4204 goto fail1; 4205 } 4206 else if (EXACT_POWER_OF_2_OR_ZERO_P (d) 4207 && (rem_flag ? smod_pow2_cheap[speed][compute_mode] 4208 : sdiv_pow2_cheap[speed][compute_mode]) 4209 /* We assume that cheap metric is true if the 4210 optab has an expander for this mode. */ 4211 && ((optab_handler ((rem_flag ? smod_optab 4212 : sdiv_optab), 4213 compute_mode) 4214 != CODE_FOR_nothing) 4215 || (optab_handler (sdivmod_optab, 4216 compute_mode) 4217 != CODE_FOR_nothing))) 4218 ; 4219 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)) 4220 { 4221 if (rem_flag) 4222 { 4223 remainder = expand_smod_pow2 (compute_mode, op0, d); 4224 if (remainder) 4225 return gen_lowpart (mode, remainder); 4226 } 4227 4228 if (sdiv_pow2_cheap[speed][compute_mode] 4229 && ((optab_handler (sdiv_optab, compute_mode) 4230 != CODE_FOR_nothing) 4231 || (optab_handler (sdivmod_optab, compute_mode) 4232 != CODE_FOR_nothing))) 4233 quotient = expand_divmod (0, TRUNC_DIV_EXPR, 4234 compute_mode, op0, 4235 gen_int_mode (abs_d, 4236 compute_mode), 4237 NULL_RTX, 0); 4238 else 4239 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d); 4240 4241 /* We have computed OP0 / abs(OP1). If OP1 is negative, 4242 negate the quotient. */ 4243 if (d < 0) 4244 { 4245 insn = get_last_insn (); 4246 if (insn != last 4247 && abs_d < ((unsigned HOST_WIDE_INT) 1 4248 << (HOST_BITS_PER_WIDE_INT - 1))) 4249 set_dst_reg_note (insn, REG_EQUAL, 4250 gen_rtx_DIV (compute_mode, op0, 4251 gen_int_mode 4252 (abs_d, 4253 compute_mode)), 4254 quotient); 4255 4256 quotient = expand_unop (compute_mode, neg_optab, 4257 quotient, quotient, 0); 4258 } 4259 } 4260 else if (size <= HOST_BITS_PER_WIDE_INT) 4261 { 4262 choose_multiplier (abs_d, size, size - 1, 4263 &mlr, &post_shift, &lgup); 4264 ml = (unsigned HOST_WIDE_INT) INTVAL (mlr); 4265 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1)) 4266 { 4267 rtx t1, t2, t3; 4268 4269 if (post_shift >= BITS_PER_WORD 4270 || size - 1 >= BITS_PER_WORD) 4271 goto fail1; 4272 4273 extra_cost = (shift_cost[speed][compute_mode][post_shift] 4274 + shift_cost[speed][compute_mode][size - 1] 4275 + add_cost[speed][compute_mode]); 4276 t1 = expand_mult_highpart (compute_mode, op0, mlr, 4277 NULL_RTX, 0, 4278 max_cost - extra_cost); 4279 if (t1 == 0) 4280 goto fail1; 4281 t2 = expand_shift 4282 (RSHIFT_EXPR, compute_mode, t1, 4283 post_shift, NULL_RTX, 0); 4284 t3 = expand_shift 4285 (RSHIFT_EXPR, compute_mode, op0, 4286 size - 1, NULL_RTX, 0); 4287 if (d < 0) 4288 quotient 4289 = force_operand (gen_rtx_MINUS (compute_mode, 4290 t3, t2), 4291 tquotient); 4292 else 4293 quotient 4294 = force_operand (gen_rtx_MINUS (compute_mode, 4295 t2, t3), 4296 tquotient); 4297 } 4298 else 4299 { 4300 rtx t1, t2, t3, t4; 4301 4302 if (post_shift >= BITS_PER_WORD 4303 || size - 1 >= BITS_PER_WORD) 4304 goto fail1; 4305 4306 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1); 4307 mlr = gen_int_mode (ml, compute_mode); 4308 extra_cost = (shift_cost[speed][compute_mode][post_shift] 4309 + shift_cost[speed][compute_mode][size - 1] 4310 + 2 * add_cost[speed][compute_mode]); 4311 t1 = expand_mult_highpart (compute_mode, op0, mlr, 4312 NULL_RTX, 0, 4313 max_cost - extra_cost); 4314 if (t1 == 0) 4315 goto fail1; 4316 t2 = force_operand (gen_rtx_PLUS (compute_mode, 4317 t1, op0), 4318 NULL_RTX); 4319 t3 = expand_shift 4320 (RSHIFT_EXPR, compute_mode, t2, 4321 post_shift, NULL_RTX, 0); 4322 t4 = expand_shift 4323 (RSHIFT_EXPR, compute_mode, op0, 4324 size - 1, NULL_RTX, 0); 4325 if (d < 0) 4326 quotient 4327 = force_operand (gen_rtx_MINUS (compute_mode, 4328 t4, t3), 4329 tquotient); 4330 else 4331 quotient 4332 = force_operand (gen_rtx_MINUS (compute_mode, 4333 t3, t4), 4334 tquotient); 4335 } 4336 } 4337 else /* Too wide mode to use tricky code */ 4338 break; 4339 4340 insn = get_last_insn (); 4341 if (insn != last) 4342 set_dst_reg_note (insn, REG_EQUAL, 4343 gen_rtx_DIV (compute_mode, op0, op1), 4344 quotient); 4345 } 4346 break; 4347 } 4348 fail1: 4349 delete_insns_since (last); 4350 break; 4351 4352 case FLOOR_DIV_EXPR: 4353 case FLOOR_MOD_EXPR: 4354 /* We will come here only for signed operations. */ 4355 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size) 4356 { 4357 unsigned HOST_WIDE_INT mh; 4358 int pre_shift, lgup, post_shift; 4359 HOST_WIDE_INT d = INTVAL (op1); 4360 rtx ml; 4361 4362 if (d > 0) 4363 { 4364 /* We could just as easily deal with negative constants here, 4365 but it does not seem worth the trouble for GCC 2.6. */ 4366 if (EXACT_POWER_OF_2_OR_ZERO_P (d)) 4367 { 4368 pre_shift = floor_log2 (d); 4369 if (rem_flag) 4370 { 4371 remainder = expand_binop (compute_mode, and_optab, op0, 4372 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1), 4373 remainder, 0, OPTAB_LIB_WIDEN); 4374 if (remainder) 4375 return gen_lowpart (mode, remainder); 4376 } 4377 quotient = expand_shift 4378 (RSHIFT_EXPR, compute_mode, op0, 4379 pre_shift, tquotient, 0); 4380 } 4381 else 4382 { 4383 rtx t1, t2, t3, t4; 4384 4385 mh = choose_multiplier (d, size, size - 1, 4386 &ml, &post_shift, &lgup); 4387 gcc_assert (!mh); 4388 4389 if (post_shift < BITS_PER_WORD 4390 && size - 1 < BITS_PER_WORD) 4391 { 4392 t1 = expand_shift 4393 (RSHIFT_EXPR, compute_mode, op0, 4394 size - 1, NULL_RTX, 0); 4395 t2 = expand_binop (compute_mode, xor_optab, op0, t1, 4396 NULL_RTX, 0, OPTAB_WIDEN); 4397 extra_cost = (shift_cost[speed][compute_mode][post_shift] 4398 + shift_cost[speed][compute_mode][size - 1] 4399 + 2 * add_cost[speed][compute_mode]); 4400 t3 = expand_mult_highpart (compute_mode, t2, ml, 4401 NULL_RTX, 1, 4402 max_cost - extra_cost); 4403 if (t3 != 0) 4404 { 4405 t4 = expand_shift 4406 (RSHIFT_EXPR, compute_mode, t3, 4407 post_shift, NULL_RTX, 1); 4408 quotient = expand_binop (compute_mode, xor_optab, 4409 t4, t1, tquotient, 0, 4410 OPTAB_WIDEN); 4411 } 4412 } 4413 } 4414 } 4415 else 4416 { 4417 rtx nsign, t1, t2, t3, t4; 4418 t1 = force_operand (gen_rtx_PLUS (compute_mode, 4419 op0, constm1_rtx), NULL_RTX); 4420 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX, 4421 0, OPTAB_WIDEN); 4422 nsign = expand_shift 4423 (RSHIFT_EXPR, compute_mode, t2, 4424 size - 1, NULL_RTX, 0); 4425 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign), 4426 NULL_RTX); 4427 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1, 4428 NULL_RTX, 0); 4429 if (t4) 4430 { 4431 rtx t5; 4432 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign, 4433 NULL_RTX, 0); 4434 quotient = force_operand (gen_rtx_PLUS (compute_mode, 4435 t4, t5), 4436 tquotient); 4437 } 4438 } 4439 } 4440 4441 if (quotient != 0) 4442 break; 4443 delete_insns_since (last); 4444 4445 /* Try using an instruction that produces both the quotient and 4446 remainder, using truncation. We can easily compensate the quotient 4447 or remainder to get floor rounding, once we have the remainder. 4448 Notice that we compute also the final remainder value here, 4449 and return the result right away. */ 4450 if (target == 0 || GET_MODE (target) != compute_mode) 4451 target = gen_reg_rtx (compute_mode); 4452 4453 if (rem_flag) 4454 { 4455 remainder 4456 = REG_P (target) ? target : gen_reg_rtx (compute_mode); 4457 quotient = gen_reg_rtx (compute_mode); 4458 } 4459 else 4460 { 4461 quotient 4462 = REG_P (target) ? target : gen_reg_rtx (compute_mode); 4463 remainder = gen_reg_rtx (compute_mode); 4464 } 4465 4466 if (expand_twoval_binop (sdivmod_optab, op0, op1, 4467 quotient, remainder, 0)) 4468 { 4469 /* This could be computed with a branch-less sequence. 4470 Save that for later. */ 4471 rtx tem; 4472 rtx label = gen_label_rtx (); 4473 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label); 4474 tem = expand_binop (compute_mode, xor_optab, op0, op1, 4475 NULL_RTX, 0, OPTAB_WIDEN); 4476 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label); 4477 expand_dec (quotient, const1_rtx); 4478 expand_inc (remainder, op1); 4479 emit_label (label); 4480 return gen_lowpart (mode, rem_flag ? remainder : quotient); 4481 } 4482 4483 /* No luck with division elimination or divmod. Have to do it 4484 by conditionally adjusting op0 *and* the result. */ 4485 { 4486 rtx label1, label2, label3, label4, label5; 4487 rtx adjusted_op0; 4488 rtx tem; 4489 4490 quotient = gen_reg_rtx (compute_mode); 4491 adjusted_op0 = copy_to_mode_reg (compute_mode, op0); 4492 label1 = gen_label_rtx (); 4493 label2 = gen_label_rtx (); 4494 label3 = gen_label_rtx (); 4495 label4 = gen_label_rtx (); 4496 label5 = gen_label_rtx (); 4497 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); 4498 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1); 4499 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4500 quotient, 0, OPTAB_LIB_WIDEN); 4501 if (tem != quotient) 4502 emit_move_insn (quotient, tem); 4503 emit_jump_insn (gen_jump (label5)); 4504 emit_barrier (); 4505 emit_label (label1); 4506 expand_inc (adjusted_op0, const1_rtx); 4507 emit_jump_insn (gen_jump (label4)); 4508 emit_barrier (); 4509 emit_label (label2); 4510 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3); 4511 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4512 quotient, 0, OPTAB_LIB_WIDEN); 4513 if (tem != quotient) 4514 emit_move_insn (quotient, tem); 4515 emit_jump_insn (gen_jump (label5)); 4516 emit_barrier (); 4517 emit_label (label3); 4518 expand_dec (adjusted_op0, const1_rtx); 4519 emit_label (label4); 4520 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4521 quotient, 0, OPTAB_LIB_WIDEN); 4522 if (tem != quotient) 4523 emit_move_insn (quotient, tem); 4524 expand_dec (quotient, const1_rtx); 4525 emit_label (label5); 4526 } 4527 break; 4528 4529 case CEIL_DIV_EXPR: 4530 case CEIL_MOD_EXPR: 4531 if (unsignedp) 4532 { 4533 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))) 4534 { 4535 rtx t1, t2, t3; 4536 unsigned HOST_WIDE_INT d = INTVAL (op1); 4537 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, 4538 floor_log2 (d), tquotient, 1); 4539 t2 = expand_binop (compute_mode, and_optab, op0, 4540 GEN_INT (d - 1), 4541 NULL_RTX, 1, OPTAB_LIB_WIDEN); 4542 t3 = gen_reg_rtx (compute_mode); 4543 t3 = emit_store_flag (t3, NE, t2, const0_rtx, 4544 compute_mode, 1, 1); 4545 if (t3 == 0) 4546 { 4547 rtx lab; 4548 lab = gen_label_rtx (); 4549 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); 4550 expand_inc (t1, const1_rtx); 4551 emit_label (lab); 4552 quotient = t1; 4553 } 4554 else 4555 quotient = force_operand (gen_rtx_PLUS (compute_mode, 4556 t1, t3), 4557 tquotient); 4558 break; 4559 } 4560 4561 /* Try using an instruction that produces both the quotient and 4562 remainder, using truncation. We can easily compensate the 4563 quotient or remainder to get ceiling rounding, once we have the 4564 remainder. Notice that we compute also the final remainder 4565 value here, and return the result right away. */ 4566 if (target == 0 || GET_MODE (target) != compute_mode) 4567 target = gen_reg_rtx (compute_mode); 4568 4569 if (rem_flag) 4570 { 4571 remainder = (REG_P (target) 4572 ? target : gen_reg_rtx (compute_mode)); 4573 quotient = gen_reg_rtx (compute_mode); 4574 } 4575 else 4576 { 4577 quotient = (REG_P (target) 4578 ? target : gen_reg_rtx (compute_mode)); 4579 remainder = gen_reg_rtx (compute_mode); 4580 } 4581 4582 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, 4583 remainder, 1)) 4584 { 4585 /* This could be computed with a branch-less sequence. 4586 Save that for later. */ 4587 rtx label = gen_label_rtx (); 4588 do_cmp_and_jump (remainder, const0_rtx, EQ, 4589 compute_mode, label); 4590 expand_inc (quotient, const1_rtx); 4591 expand_dec (remainder, op1); 4592 emit_label (label); 4593 return gen_lowpart (mode, rem_flag ? remainder : quotient); 4594 } 4595 4596 /* No luck with division elimination or divmod. Have to do it 4597 by conditionally adjusting op0 *and* the result. */ 4598 { 4599 rtx label1, label2; 4600 rtx adjusted_op0, tem; 4601 4602 quotient = gen_reg_rtx (compute_mode); 4603 adjusted_op0 = copy_to_mode_reg (compute_mode, op0); 4604 label1 = gen_label_rtx (); 4605 label2 = gen_label_rtx (); 4606 do_cmp_and_jump (adjusted_op0, const0_rtx, NE, 4607 compute_mode, label1); 4608 emit_move_insn (quotient, const0_rtx); 4609 emit_jump_insn (gen_jump (label2)); 4610 emit_barrier (); 4611 emit_label (label1); 4612 expand_dec (adjusted_op0, const1_rtx); 4613 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1, 4614 quotient, 1, OPTAB_LIB_WIDEN); 4615 if (tem != quotient) 4616 emit_move_insn (quotient, tem); 4617 expand_inc (quotient, const1_rtx); 4618 emit_label (label2); 4619 } 4620 } 4621 else /* signed */ 4622 { 4623 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) 4624 && INTVAL (op1) >= 0) 4625 { 4626 /* This is extremely similar to the code for the unsigned case 4627 above. For 2.7 we should merge these variants, but for 4628 2.6.1 I don't want to touch the code for unsigned since that 4629 get used in C. The signed case will only be used by other 4630 languages (Ada). */ 4631 4632 rtx t1, t2, t3; 4633 unsigned HOST_WIDE_INT d = INTVAL (op1); 4634 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, 4635 floor_log2 (d), tquotient, 0); 4636 t2 = expand_binop (compute_mode, and_optab, op0, 4637 GEN_INT (d - 1), 4638 NULL_RTX, 1, OPTAB_LIB_WIDEN); 4639 t3 = gen_reg_rtx (compute_mode); 4640 t3 = emit_store_flag (t3, NE, t2, const0_rtx, 4641 compute_mode, 1, 1); 4642 if (t3 == 0) 4643 { 4644 rtx lab; 4645 lab = gen_label_rtx (); 4646 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); 4647 expand_inc (t1, const1_rtx); 4648 emit_label (lab); 4649 quotient = t1; 4650 } 4651 else 4652 quotient = force_operand (gen_rtx_PLUS (compute_mode, 4653 t1, t3), 4654 tquotient); 4655 break; 4656 } 4657 4658 /* Try using an instruction that produces both the quotient and 4659 remainder, using truncation. We can easily compensate the 4660 quotient or remainder to get ceiling rounding, once we have the 4661 remainder. Notice that we compute also the final remainder 4662 value here, and return the result right away. */ 4663 if (target == 0 || GET_MODE (target) != compute_mode) 4664 target = gen_reg_rtx (compute_mode); 4665 if (rem_flag) 4666 { 4667 remainder= (REG_P (target) 4668 ? target : gen_reg_rtx (compute_mode)); 4669 quotient = gen_reg_rtx (compute_mode); 4670 } 4671 else 4672 { 4673 quotient = (REG_P (target) 4674 ? target : gen_reg_rtx (compute_mode)); 4675 remainder = gen_reg_rtx (compute_mode); 4676 } 4677 4678 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, 4679 remainder, 0)) 4680 { 4681 /* This could be computed with a branch-less sequence. 4682 Save that for later. */ 4683 rtx tem; 4684 rtx label = gen_label_rtx (); 4685 do_cmp_and_jump (remainder, const0_rtx, EQ, 4686 compute_mode, label); 4687 tem = expand_binop (compute_mode, xor_optab, op0, op1, 4688 NULL_RTX, 0, OPTAB_WIDEN); 4689 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label); 4690 expand_inc (quotient, const1_rtx); 4691 expand_dec (remainder, op1); 4692 emit_label (label); 4693 return gen_lowpart (mode, rem_flag ? remainder : quotient); 4694 } 4695 4696 /* No luck with division elimination or divmod. Have to do it 4697 by conditionally adjusting op0 *and* the result. */ 4698 { 4699 rtx label1, label2, label3, label4, label5; 4700 rtx adjusted_op0; 4701 rtx tem; 4702 4703 quotient = gen_reg_rtx (compute_mode); 4704 adjusted_op0 = copy_to_mode_reg (compute_mode, op0); 4705 label1 = gen_label_rtx (); 4706 label2 = gen_label_rtx (); 4707 label3 = gen_label_rtx (); 4708 label4 = gen_label_rtx (); 4709 label5 = gen_label_rtx (); 4710 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); 4711 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, 4712 compute_mode, label1); 4713 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4714 quotient, 0, OPTAB_LIB_WIDEN); 4715 if (tem != quotient) 4716 emit_move_insn (quotient, tem); 4717 emit_jump_insn (gen_jump (label5)); 4718 emit_barrier (); 4719 emit_label (label1); 4720 expand_dec (adjusted_op0, const1_rtx); 4721 emit_jump_insn (gen_jump (label4)); 4722 emit_barrier (); 4723 emit_label (label2); 4724 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, 4725 compute_mode, label3); 4726 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4727 quotient, 0, OPTAB_LIB_WIDEN); 4728 if (tem != quotient) 4729 emit_move_insn (quotient, tem); 4730 emit_jump_insn (gen_jump (label5)); 4731 emit_barrier (); 4732 emit_label (label3); 4733 expand_inc (adjusted_op0, const1_rtx); 4734 emit_label (label4); 4735 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4736 quotient, 0, OPTAB_LIB_WIDEN); 4737 if (tem != quotient) 4738 emit_move_insn (quotient, tem); 4739 expand_inc (quotient, const1_rtx); 4740 emit_label (label5); 4741 } 4742 } 4743 break; 4744 4745 case EXACT_DIV_EXPR: 4746 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size) 4747 { 4748 HOST_WIDE_INT d = INTVAL (op1); 4749 unsigned HOST_WIDE_INT ml; 4750 int pre_shift; 4751 rtx t1; 4752 4753 pre_shift = floor_log2 (d & -d); 4754 ml = invert_mod2n (d >> pre_shift, size); 4755 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, 4756 pre_shift, NULL_RTX, unsignedp); 4757 quotient = expand_mult (compute_mode, t1, 4758 gen_int_mode (ml, compute_mode), 4759 NULL_RTX, 1); 4760 4761 insn = get_last_insn (); 4762 set_dst_reg_note (insn, REG_EQUAL, 4763 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, 4764 compute_mode, op0, op1), 4765 quotient); 4766 } 4767 break; 4768 4769 case ROUND_DIV_EXPR: 4770 case ROUND_MOD_EXPR: 4771 if (unsignedp) 4772 { 4773 rtx tem; 4774 rtx label; 4775 label = gen_label_rtx (); 4776 quotient = gen_reg_rtx (compute_mode); 4777 remainder = gen_reg_rtx (compute_mode); 4778 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0) 4779 { 4780 rtx tem; 4781 quotient = expand_binop (compute_mode, udiv_optab, op0, op1, 4782 quotient, 1, OPTAB_LIB_WIDEN); 4783 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1); 4784 remainder = expand_binop (compute_mode, sub_optab, op0, tem, 4785 remainder, 1, OPTAB_LIB_WIDEN); 4786 } 4787 tem = plus_constant (op1, -1); 4788 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1); 4789 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label); 4790 expand_inc (quotient, const1_rtx); 4791 expand_dec (remainder, op1); 4792 emit_label (label); 4793 } 4794 else 4795 { 4796 rtx abs_rem, abs_op1, tem, mask; 4797 rtx label; 4798 label = gen_label_rtx (); 4799 quotient = gen_reg_rtx (compute_mode); 4800 remainder = gen_reg_rtx (compute_mode); 4801 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0) 4802 { 4803 rtx tem; 4804 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1, 4805 quotient, 0, OPTAB_LIB_WIDEN); 4806 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0); 4807 remainder = expand_binop (compute_mode, sub_optab, op0, tem, 4808 remainder, 0, OPTAB_LIB_WIDEN); 4809 } 4810 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0); 4811 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0); 4812 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem, 4813 1, NULL_RTX, 1); 4814 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label); 4815 tem = expand_binop (compute_mode, xor_optab, op0, op1, 4816 NULL_RTX, 0, OPTAB_WIDEN); 4817 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem, 4818 size - 1, NULL_RTX, 0); 4819 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx, 4820 NULL_RTX, 0, OPTAB_WIDEN); 4821 tem = expand_binop (compute_mode, sub_optab, tem, mask, 4822 NULL_RTX, 0, OPTAB_WIDEN); 4823 expand_inc (quotient, tem); 4824 tem = expand_binop (compute_mode, xor_optab, mask, op1, 4825 NULL_RTX, 0, OPTAB_WIDEN); 4826 tem = expand_binop (compute_mode, sub_optab, tem, mask, 4827 NULL_RTX, 0, OPTAB_WIDEN); 4828 expand_dec (remainder, tem); 4829 emit_label (label); 4830 } 4831 return gen_lowpart (mode, rem_flag ? remainder : quotient); 4832 4833 default: 4834 gcc_unreachable (); 4835 } 4836 4837 if (quotient == 0) 4838 { 4839 if (target && GET_MODE (target) != compute_mode) 4840 target = 0; 4841 4842 if (rem_flag) 4843 { 4844 /* Try to produce the remainder without producing the quotient. 4845 If we seem to have a divmod pattern that does not require widening, 4846 don't try widening here. We should really have a WIDEN argument 4847 to expand_twoval_binop, since what we'd really like to do here is 4848 1) try a mod insn in compute_mode 4849 2) try a divmod insn in compute_mode 4850 3) try a div insn in compute_mode and multiply-subtract to get 4851 remainder 4852 4) try the same things with widening allowed. */ 4853 remainder 4854 = sign_expand_binop (compute_mode, umod_optab, smod_optab, 4855 op0, op1, target, 4856 unsignedp, 4857 ((optab_handler (optab2, compute_mode) 4858 != CODE_FOR_nothing) 4859 ? OPTAB_DIRECT : OPTAB_WIDEN)); 4860 if (remainder == 0) 4861 { 4862 /* No luck there. Can we do remainder and divide at once 4863 without a library call? */ 4864 remainder = gen_reg_rtx (compute_mode); 4865 if (! expand_twoval_binop ((unsignedp 4866 ? udivmod_optab 4867 : sdivmod_optab), 4868 op0, op1, 4869 NULL_RTX, remainder, unsignedp)) 4870 remainder = 0; 4871 } 4872 4873 if (remainder) 4874 return gen_lowpart (mode, remainder); 4875 } 4876 4877 /* Produce the quotient. Try a quotient insn, but not a library call. 4878 If we have a divmod in this mode, use it in preference to widening 4879 the div (for this test we assume it will not fail). Note that optab2 4880 is set to the one of the two optabs that the call below will use. */ 4881 quotient 4882 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab, 4883 op0, op1, rem_flag ? NULL_RTX : target, 4884 unsignedp, 4885 ((optab_handler (optab2, compute_mode) 4886 != CODE_FOR_nothing) 4887 ? OPTAB_DIRECT : OPTAB_WIDEN)); 4888 4889 if (quotient == 0) 4890 { 4891 /* No luck there. Try a quotient-and-remainder insn, 4892 keeping the quotient alone. */ 4893 quotient = gen_reg_rtx (compute_mode); 4894 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab, 4895 op0, op1, 4896 quotient, NULL_RTX, unsignedp)) 4897 { 4898 quotient = 0; 4899 if (! rem_flag) 4900 /* Still no luck. If we are not computing the remainder, 4901 use a library call for the quotient. */ 4902 quotient = sign_expand_binop (compute_mode, 4903 udiv_optab, sdiv_optab, 4904 op0, op1, target, 4905 unsignedp, OPTAB_LIB_WIDEN); 4906 } 4907 } 4908 } 4909 4910 if (rem_flag) 4911 { 4912 if (target && GET_MODE (target) != compute_mode) 4913 target = 0; 4914 4915 if (quotient == 0) 4916 { 4917 /* No divide instruction either. Use library for remainder. */ 4918 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab, 4919 op0, op1, target, 4920 unsignedp, OPTAB_LIB_WIDEN); 4921 /* No remainder function. Try a quotient-and-remainder 4922 function, keeping the remainder. */ 4923 if (!remainder) 4924 { 4925 remainder = gen_reg_rtx (compute_mode); 4926 if (!expand_twoval_binop_libfunc 4927 (unsignedp ? udivmod_optab : sdivmod_optab, 4928 op0, op1, 4929 NULL_RTX, remainder, 4930 unsignedp ? UMOD : MOD)) 4931 remainder = NULL_RTX; 4932 } 4933 } 4934 else 4935 { 4936 /* We divided. Now finish doing X - Y * (X / Y). */ 4937 remainder = expand_mult (compute_mode, quotient, op1, 4938 NULL_RTX, unsignedp); 4939 remainder = expand_binop (compute_mode, sub_optab, op0, 4940 remainder, target, unsignedp, 4941 OPTAB_LIB_WIDEN); 4942 } 4943 } 4944 4945 return gen_lowpart (mode, rem_flag ? remainder : quotient); 4946 } 4947 4948 /* Return a tree node with data type TYPE, describing the value of X. 4949 Usually this is an VAR_DECL, if there is no obvious better choice. 4950 X may be an expression, however we only support those expressions 4951 generated by loop.c. */ 4952 4953 tree 4954 make_tree (tree type, rtx x) 4955 { 4956 tree t; 4957 4958 switch (GET_CODE (x)) 4959 { 4960 case CONST_INT: 4961 { 4962 HOST_WIDE_INT hi = 0; 4963 4964 if (INTVAL (x) < 0 4965 && !(TYPE_UNSIGNED (type) 4966 && (GET_MODE_BITSIZE (TYPE_MODE (type)) 4967 < HOST_BITS_PER_WIDE_INT))) 4968 hi = -1; 4969 4970 t = build_int_cst_wide (type, INTVAL (x), hi); 4971 4972 return t; 4973 } 4974 4975 case CONST_DOUBLE: 4976 if (GET_MODE (x) == VOIDmode) 4977 t = build_int_cst_wide (type, 4978 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x)); 4979 else 4980 { 4981 REAL_VALUE_TYPE d; 4982 4983 REAL_VALUE_FROM_CONST_DOUBLE (d, x); 4984 t = build_real (type, d); 4985 } 4986 4987 return t; 4988 4989 case CONST_VECTOR: 4990 { 4991 int units = CONST_VECTOR_NUNITS (x); 4992 tree itype = TREE_TYPE (type); 4993 tree t = NULL_TREE; 4994 int i; 4995 4996 4997 /* Build a tree with vector elements. */ 4998 for (i = units - 1; i >= 0; --i) 4999 { 5000 rtx elt = CONST_VECTOR_ELT (x, i); 5001 t = tree_cons (NULL_TREE, make_tree (itype, elt), t); 5002 } 5003 5004 return build_vector (type, t); 5005 } 5006 5007 case PLUS: 5008 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)), 5009 make_tree (type, XEXP (x, 1))); 5010 5011 case MINUS: 5012 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)), 5013 make_tree (type, XEXP (x, 1))); 5014 5015 case NEG: 5016 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))); 5017 5018 case MULT: 5019 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)), 5020 make_tree (type, XEXP (x, 1))); 5021 5022 case ASHIFT: 5023 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)), 5024 make_tree (type, XEXP (x, 1))); 5025 5026 case LSHIFTRT: 5027 t = unsigned_type_for (type); 5028 return fold_convert (type, build2 (RSHIFT_EXPR, t, 5029 make_tree (t, XEXP (x, 0)), 5030 make_tree (type, XEXP (x, 1)))); 5031 5032 case ASHIFTRT: 5033 t = signed_type_for (type); 5034 return fold_convert (type, build2 (RSHIFT_EXPR, t, 5035 make_tree (t, XEXP (x, 0)), 5036 make_tree (type, XEXP (x, 1)))); 5037 5038 case DIV: 5039 if (TREE_CODE (type) != REAL_TYPE) 5040 t = signed_type_for (type); 5041 else 5042 t = type; 5043 5044 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, 5045 make_tree (t, XEXP (x, 0)), 5046 make_tree (t, XEXP (x, 1)))); 5047 case UDIV: 5048 t = unsigned_type_for (type); 5049 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, 5050 make_tree (t, XEXP (x, 0)), 5051 make_tree (t, XEXP (x, 1)))); 5052 5053 case SIGN_EXTEND: 5054 case ZERO_EXTEND: 5055 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)), 5056 GET_CODE (x) == ZERO_EXTEND); 5057 return fold_convert (type, make_tree (t, XEXP (x, 0))); 5058 5059 case CONST: 5060 return make_tree (type, XEXP (x, 0)); 5061 5062 case SYMBOL_REF: 5063 t = SYMBOL_REF_DECL (x); 5064 if (t) 5065 return fold_convert (type, build_fold_addr_expr (t)); 5066 /* else fall through. */ 5067 5068 default: 5069 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type); 5070 5071 /* If TYPE is a POINTER_TYPE, we might need to convert X from 5072 address mode to pointer mode. */ 5073 if (POINTER_TYPE_P (type)) 5074 x = convert_memory_address_addr_space 5075 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type))); 5076 5077 /* Note that we do *not* use SET_DECL_RTL here, because we do not 5078 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */ 5079 t->decl_with_rtl.rtl = x; 5080 5081 return t; 5082 } 5083 } 5084 5085 /* Compute the logical-and of OP0 and OP1, storing it in TARGET 5086 and returning TARGET. 5087 5088 If TARGET is 0, a pseudo-register or constant is returned. */ 5089 5090 rtx 5091 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target) 5092 { 5093 rtx tem = 0; 5094 5095 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode) 5096 tem = simplify_binary_operation (AND, mode, op0, op1); 5097 if (tem == 0) 5098 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN); 5099 5100 if (target == 0) 5101 target = tem; 5102 else if (tem != target) 5103 emit_move_insn (target, tem); 5104 return target; 5105 } 5106 5107 /* Helper function for emit_store_flag. */ 5108 static rtx 5109 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code, 5110 enum machine_mode mode, enum machine_mode compare_mode, 5111 int unsignedp, rtx x, rtx y, int normalizep, 5112 enum machine_mode target_mode) 5113 { 5114 struct expand_operand ops[4]; 5115 rtx op0, last, comparison, subtarget; 5116 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode; 5117 5118 last = get_last_insn (); 5119 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp); 5120 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp); 5121 if (!x || !y) 5122 { 5123 delete_insns_since (last); 5124 return NULL_RTX; 5125 } 5126 5127 if (target_mode == VOIDmode) 5128 target_mode = result_mode; 5129 if (!target) 5130 target = gen_reg_rtx (target_mode); 5131 5132 comparison = gen_rtx_fmt_ee (code, result_mode, x, y); 5133 5134 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode); 5135 create_fixed_operand (&ops[1], comparison); 5136 create_fixed_operand (&ops[2], x); 5137 create_fixed_operand (&ops[3], y); 5138 if (!maybe_expand_insn (icode, 4, ops)) 5139 { 5140 delete_insns_since (last); 5141 return NULL_RTX; 5142 } 5143 subtarget = ops[0].value; 5144 5145 /* If we are converting to a wider mode, first convert to 5146 TARGET_MODE, then normalize. This produces better combining 5147 opportunities on machines that have a SIGN_EXTRACT when we are 5148 testing a single bit. This mostly benefits the 68k. 5149 5150 If STORE_FLAG_VALUE does not have the sign bit set when 5151 interpreted in MODE, we can do this conversion as unsigned, which 5152 is usually more efficient. */ 5153 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode)) 5154 { 5155 convert_move (target, subtarget, 5156 val_signbit_known_clear_p (result_mode, 5157 STORE_FLAG_VALUE)); 5158 op0 = target; 5159 result_mode = target_mode; 5160 } 5161 else 5162 op0 = subtarget; 5163 5164 /* If we want to keep subexpressions around, don't reuse our last 5165 target. */ 5166 if (optimize) 5167 subtarget = 0; 5168 5169 /* Now normalize to the proper value in MODE. Sometimes we don't 5170 have to do anything. */ 5171 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE) 5172 ; 5173 /* STORE_FLAG_VALUE might be the most negative number, so write 5174 the comparison this way to avoid a compiler-time warning. */ 5175 else if (- normalizep == STORE_FLAG_VALUE) 5176 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0); 5177 5178 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes 5179 it hard to use a value of just the sign bit due to ANSI integer 5180 constant typing rules. */ 5181 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE)) 5182 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0, 5183 GET_MODE_BITSIZE (result_mode) - 1, subtarget, 5184 normalizep == 1); 5185 else 5186 { 5187 gcc_assert (STORE_FLAG_VALUE & 1); 5188 5189 op0 = expand_and (result_mode, op0, const1_rtx, subtarget); 5190 if (normalizep == -1) 5191 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0); 5192 } 5193 5194 /* If we were converting to a smaller mode, do the conversion now. */ 5195 if (target_mode != result_mode) 5196 { 5197 convert_move (target, op0, 0); 5198 return target; 5199 } 5200 else 5201 return op0; 5202 } 5203 5204 5205 /* A subroutine of emit_store_flag only including "tricks" that do not 5206 need a recursive call. These are kept separate to avoid infinite 5207 loops. */ 5208 5209 static rtx 5210 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1, 5211 enum machine_mode mode, int unsignedp, int normalizep, 5212 enum machine_mode target_mode) 5213 { 5214 rtx subtarget; 5215 enum insn_code icode; 5216 enum machine_mode compare_mode; 5217 enum mode_class mclass; 5218 enum rtx_code scode; 5219 rtx tem; 5220 5221 if (unsignedp) 5222 code = unsigned_condition (code); 5223 scode = swap_condition (code); 5224 5225 /* If one operand is constant, make it the second one. Only do this 5226 if the other operand is not constant as well. */ 5227 5228 if (swap_commutative_operands_p (op0, op1)) 5229 { 5230 tem = op0; 5231 op0 = op1; 5232 op1 = tem; 5233 code = swap_condition (code); 5234 } 5235 5236 if (mode == VOIDmode) 5237 mode = GET_MODE (op0); 5238 5239 /* For some comparisons with 1 and -1, we can convert this to 5240 comparisons with zero. This will often produce more opportunities for 5241 store-flag insns. */ 5242 5243 switch (code) 5244 { 5245 case LT: 5246 if (op1 == const1_rtx) 5247 op1 = const0_rtx, code = LE; 5248 break; 5249 case LE: 5250 if (op1 == constm1_rtx) 5251 op1 = const0_rtx, code = LT; 5252 break; 5253 case GE: 5254 if (op1 == const1_rtx) 5255 op1 = const0_rtx, code = GT; 5256 break; 5257 case GT: 5258 if (op1 == constm1_rtx) 5259 op1 = const0_rtx, code = GE; 5260 break; 5261 case GEU: 5262 if (op1 == const1_rtx) 5263 op1 = const0_rtx, code = NE; 5264 break; 5265 case LTU: 5266 if (op1 == const1_rtx) 5267 op1 = const0_rtx, code = EQ; 5268 break; 5269 default: 5270 break; 5271 } 5272 5273 /* If we are comparing a double-word integer with zero or -1, we can 5274 convert the comparison into one involving a single word. */ 5275 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2 5276 && GET_MODE_CLASS (mode) == MODE_INT 5277 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0))) 5278 { 5279 if ((code == EQ || code == NE) 5280 && (op1 == const0_rtx || op1 == constm1_rtx)) 5281 { 5282 rtx op00, op01; 5283 5284 /* Do a logical OR or AND of the two words and compare the 5285 result. */ 5286 op00 = simplify_gen_subreg (word_mode, op0, mode, 0); 5287 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD); 5288 tem = expand_binop (word_mode, 5289 op1 == const0_rtx ? ior_optab : and_optab, 5290 op00, op01, NULL_RTX, unsignedp, 5291 OPTAB_DIRECT); 5292 5293 if (tem != 0) 5294 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode, 5295 unsignedp, normalizep); 5296 } 5297 else if ((code == LT || code == GE) && op1 == const0_rtx) 5298 { 5299 rtx op0h; 5300 5301 /* If testing the sign bit, can just test on high word. */ 5302 op0h = simplify_gen_subreg (word_mode, op0, mode, 5303 subreg_highpart_offset (word_mode, 5304 mode)); 5305 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode, 5306 unsignedp, normalizep); 5307 } 5308 else 5309 tem = NULL_RTX; 5310 5311 if (tem) 5312 { 5313 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode) 5314 return tem; 5315 if (!target) 5316 target = gen_reg_rtx (target_mode); 5317 5318 convert_move (target, tem, 5319 !val_signbit_known_set_p (word_mode, 5320 (normalizep ? normalizep 5321 : STORE_FLAG_VALUE))); 5322 return target; 5323 } 5324 } 5325 5326 /* If this is A < 0 or A >= 0, we can do this by taking the ones 5327 complement of A (for GE) and shifting the sign bit to the low bit. */ 5328 if (op1 == const0_rtx && (code == LT || code == GE) 5329 && GET_MODE_CLASS (mode) == MODE_INT 5330 && (normalizep || STORE_FLAG_VALUE == 1 5331 || val_signbit_p (mode, STORE_FLAG_VALUE))) 5332 { 5333 subtarget = target; 5334 5335 if (!target) 5336 target_mode = mode; 5337 5338 /* If the result is to be wider than OP0, it is best to convert it 5339 first. If it is to be narrower, it is *incorrect* to convert it 5340 first. */ 5341 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode)) 5342 { 5343 op0 = convert_modes (target_mode, mode, op0, 0); 5344 mode = target_mode; 5345 } 5346 5347 if (target_mode != mode) 5348 subtarget = 0; 5349 5350 if (code == GE) 5351 op0 = expand_unop (mode, one_cmpl_optab, op0, 5352 ((STORE_FLAG_VALUE == 1 || normalizep) 5353 ? 0 : subtarget), 0); 5354 5355 if (STORE_FLAG_VALUE == 1 || normalizep) 5356 /* If we are supposed to produce a 0/1 value, we want to do 5357 a logical shift from the sign bit to the low-order bit; for 5358 a -1/0 value, we do an arithmetic shift. */ 5359 op0 = expand_shift (RSHIFT_EXPR, mode, op0, 5360 GET_MODE_BITSIZE (mode) - 1, 5361 subtarget, normalizep != -1); 5362 5363 if (mode != target_mode) 5364 op0 = convert_modes (target_mode, mode, op0, 0); 5365 5366 return op0; 5367 } 5368 5369 mclass = GET_MODE_CLASS (mode); 5370 for (compare_mode = mode; compare_mode != VOIDmode; 5371 compare_mode = GET_MODE_WIDER_MODE (compare_mode)) 5372 { 5373 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode; 5374 icode = optab_handler (cstore_optab, optab_mode); 5375 if (icode != CODE_FOR_nothing) 5376 { 5377 do_pending_stack_adjust (); 5378 tem = emit_cstore (target, icode, code, mode, compare_mode, 5379 unsignedp, op0, op1, normalizep, target_mode); 5380 if (tem) 5381 return tem; 5382 5383 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 5384 { 5385 tem = emit_cstore (target, icode, scode, mode, compare_mode, 5386 unsignedp, op1, op0, normalizep, target_mode); 5387 if (tem) 5388 return tem; 5389 } 5390 break; 5391 } 5392 } 5393 5394 return 0; 5395 } 5396 5397 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1 5398 and storing in TARGET. Normally return TARGET. 5399 Return 0 if that cannot be done. 5400 5401 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If 5402 it is VOIDmode, they cannot both be CONST_INT. 5403 5404 UNSIGNEDP is for the case where we have to widen the operands 5405 to perform the operation. It says to use zero-extension. 5406 5407 NORMALIZEP is 1 if we should convert the result to be either zero 5408 or one. Normalize is -1 if we should convert the result to be 5409 either zero or -1. If NORMALIZEP is zero, the result will be left 5410 "raw" out of the scc insn. */ 5411 5412 rtx 5413 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, 5414 enum machine_mode mode, int unsignedp, int normalizep) 5415 { 5416 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode; 5417 enum rtx_code rcode; 5418 rtx subtarget; 5419 rtx tem, last, trueval; 5420 5421 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep, 5422 target_mode); 5423 if (tem) 5424 return tem; 5425 5426 /* If we reached here, we can't do this with a scc insn, however there 5427 are some comparisons that can be done in other ways. Don't do any 5428 of these cases if branches are very cheap. */ 5429 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0) 5430 return 0; 5431 5432 /* See what we need to return. We can only return a 1, -1, or the 5433 sign bit. */ 5434 5435 if (normalizep == 0) 5436 { 5437 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1) 5438 normalizep = STORE_FLAG_VALUE; 5439 5440 else if (val_signbit_p (mode, STORE_FLAG_VALUE)) 5441 ; 5442 else 5443 return 0; 5444 } 5445 5446 last = get_last_insn (); 5447 5448 /* If optimizing, use different pseudo registers for each insn, instead 5449 of reusing the same pseudo. This leads to better CSE, but slows 5450 down the compiler, since there are more pseudos */ 5451 subtarget = (!optimize 5452 && (target_mode == mode)) ? target : NULL_RTX; 5453 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE); 5454 5455 /* For floating-point comparisons, try the reverse comparison or try 5456 changing the "orderedness" of the comparison. */ 5457 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 5458 { 5459 enum rtx_code first_code; 5460 bool and_them; 5461 5462 rcode = reverse_condition_maybe_unordered (code); 5463 if (can_compare_p (rcode, mode, ccp_store_flag) 5464 && (code == ORDERED || code == UNORDERED 5465 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) 5466 || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) 5467 { 5468 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) 5469 || (STORE_FLAG_VALUE == -1 && normalizep == 1)); 5470 5471 /* For the reverse comparison, use either an addition or a XOR. */ 5472 if (want_add 5473 && rtx_cost (GEN_INT (normalizep), PLUS, 1, 5474 optimize_insn_for_speed_p ()) == 0) 5475 { 5476 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, 5477 STORE_FLAG_VALUE, target_mode); 5478 if (tem) 5479 return expand_binop (target_mode, add_optab, tem, 5480 GEN_INT (normalizep), 5481 target, 0, OPTAB_WIDEN); 5482 } 5483 else if (!want_add 5484 && rtx_cost (trueval, XOR, 1, 5485 optimize_insn_for_speed_p ()) == 0) 5486 { 5487 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, 5488 normalizep, target_mode); 5489 if (tem) 5490 return expand_binop (target_mode, xor_optab, tem, trueval, 5491 target, INTVAL (trueval) >= 0, OPTAB_WIDEN); 5492 } 5493 } 5494 5495 delete_insns_since (last); 5496 5497 /* Cannot split ORDERED and UNORDERED, only try the above trick. */ 5498 if (code == ORDERED || code == UNORDERED) 5499 return 0; 5500 5501 and_them = split_comparison (code, mode, &first_code, &code); 5502 5503 /* If there are no NaNs, the first comparison should always fall through. 5504 Effectively change the comparison to the other one. */ 5505 if (!HONOR_NANS (mode)) 5506 { 5507 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED)); 5508 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep, 5509 target_mode); 5510 } 5511 5512 #ifdef HAVE_conditional_move 5513 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a 5514 conditional move. */ 5515 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0, 5516 normalizep, target_mode); 5517 if (tem == 0) 5518 return 0; 5519 5520 if (and_them) 5521 tem = emit_conditional_move (target, code, op0, op1, mode, 5522 tem, const0_rtx, GET_MODE (tem), 0); 5523 else 5524 tem = emit_conditional_move (target, code, op0, op1, mode, 5525 trueval, tem, GET_MODE (tem), 0); 5526 5527 if (tem == 0) 5528 delete_insns_since (last); 5529 return tem; 5530 #else 5531 return 0; 5532 #endif 5533 } 5534 5535 /* The remaining tricks only apply to integer comparisons. */ 5536 5537 if (GET_MODE_CLASS (mode) != MODE_INT) 5538 return 0; 5539 5540 /* If this is an equality comparison of integers, we can try to exclusive-or 5541 (or subtract) the two operands and use a recursive call to try the 5542 comparison with zero. Don't do any of these cases if branches are 5543 very cheap. */ 5544 5545 if ((code == EQ || code == NE) && op1 != const0_rtx) 5546 { 5547 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1, 5548 OPTAB_WIDEN); 5549 5550 if (tem == 0) 5551 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1, 5552 OPTAB_WIDEN); 5553 if (tem != 0) 5554 tem = emit_store_flag (target, code, tem, const0_rtx, 5555 mode, unsignedp, normalizep); 5556 if (tem != 0) 5557 return tem; 5558 5559 delete_insns_since (last); 5560 } 5561 5562 /* For integer comparisons, try the reverse comparison. However, for 5563 small X and if we'd have anyway to extend, implementing "X != 0" 5564 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */ 5565 rcode = reverse_condition (code); 5566 if (can_compare_p (rcode, mode, ccp_store_flag) 5567 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing 5568 && code == NE 5569 && GET_MODE_SIZE (mode) < UNITS_PER_WORD 5570 && op1 == const0_rtx)) 5571 { 5572 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) 5573 || (STORE_FLAG_VALUE == -1 && normalizep == 1)); 5574 5575 /* Again, for the reverse comparison, use either an addition or a XOR. */ 5576 if (want_add 5577 && rtx_cost (GEN_INT (normalizep), PLUS, 1, 5578 optimize_insn_for_speed_p ()) == 0) 5579 { 5580 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, 5581 STORE_FLAG_VALUE, target_mode); 5582 if (tem != 0) 5583 tem = expand_binop (target_mode, add_optab, tem, 5584 GEN_INT (normalizep), target, 0, OPTAB_WIDEN); 5585 } 5586 else if (!want_add 5587 && rtx_cost (trueval, XOR, 1, 5588 optimize_insn_for_speed_p ()) == 0) 5589 { 5590 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, 5591 normalizep, target_mode); 5592 if (tem != 0) 5593 tem = expand_binop (target_mode, xor_optab, tem, trueval, target, 5594 INTVAL (trueval) >= 0, OPTAB_WIDEN); 5595 } 5596 5597 if (tem != 0) 5598 return tem; 5599 delete_insns_since (last); 5600 } 5601 5602 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with 5603 the constant zero. Reject all other comparisons at this point. Only 5604 do LE and GT if branches are expensive since they are expensive on 5605 2-operand machines. */ 5606 5607 if (op1 != const0_rtx 5608 || (code != EQ && code != NE 5609 && (BRANCH_COST (optimize_insn_for_speed_p (), 5610 false) <= 1 || (code != LE && code != GT)))) 5611 return 0; 5612 5613 /* Try to put the result of the comparison in the sign bit. Assume we can't 5614 do the necessary operation below. */ 5615 5616 tem = 0; 5617 5618 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has 5619 the sign bit set. */ 5620 5621 if (code == LE) 5622 { 5623 /* This is destructive, so SUBTARGET can't be OP0. */ 5624 if (rtx_equal_p (subtarget, op0)) 5625 subtarget = 0; 5626 5627 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0, 5628 OPTAB_WIDEN); 5629 if (tem) 5630 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0, 5631 OPTAB_WIDEN); 5632 } 5633 5634 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the 5635 number of bits in the mode of OP0, minus one. */ 5636 5637 if (code == GT) 5638 { 5639 if (rtx_equal_p (subtarget, op0)) 5640 subtarget = 0; 5641 5642 tem = expand_shift (RSHIFT_EXPR, mode, op0, 5643 GET_MODE_BITSIZE (mode) - 1, 5644 subtarget, 0); 5645 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0, 5646 OPTAB_WIDEN); 5647 } 5648 5649 if (code == EQ || code == NE) 5650 { 5651 /* For EQ or NE, one way to do the comparison is to apply an operation 5652 that converts the operand into a positive number if it is nonzero 5653 or zero if it was originally zero. Then, for EQ, we subtract 1 and 5654 for NE we negate. This puts the result in the sign bit. Then we 5655 normalize with a shift, if needed. 5656 5657 Two operations that can do the above actions are ABS and FFS, so try 5658 them. If that doesn't work, and MODE is smaller than a full word, 5659 we can use zero-extension to the wider mode (an unsigned conversion) 5660 as the operation. */ 5661 5662 /* Note that ABS doesn't yield a positive number for INT_MIN, but 5663 that is compensated by the subsequent overflow when subtracting 5664 one / negating. */ 5665 5666 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing) 5667 tem = expand_unop (mode, abs_optab, op0, subtarget, 1); 5668 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing) 5669 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1); 5670 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD) 5671 { 5672 tem = convert_modes (word_mode, mode, op0, 1); 5673 mode = word_mode; 5674 } 5675 5676 if (tem != 0) 5677 { 5678 if (code == EQ) 5679 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget, 5680 0, OPTAB_WIDEN); 5681 else 5682 tem = expand_unop (mode, neg_optab, tem, subtarget, 0); 5683 } 5684 5685 /* If we couldn't do it that way, for NE we can "or" the two's complement 5686 of the value with itself. For EQ, we take the one's complement of 5687 that "or", which is an extra insn, so we only handle EQ if branches 5688 are expensive. */ 5689 5690 if (tem == 0 5691 && (code == NE 5692 || BRANCH_COST (optimize_insn_for_speed_p (), 5693 false) > 1)) 5694 { 5695 if (rtx_equal_p (subtarget, op0)) 5696 subtarget = 0; 5697 5698 tem = expand_unop (mode, neg_optab, op0, subtarget, 0); 5699 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0, 5700 OPTAB_WIDEN); 5701 5702 if (tem && code == EQ) 5703 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0); 5704 } 5705 } 5706 5707 if (tem && normalizep) 5708 tem = expand_shift (RSHIFT_EXPR, mode, tem, 5709 GET_MODE_BITSIZE (mode) - 1, 5710 subtarget, normalizep == 1); 5711 5712 if (tem) 5713 { 5714 if (!target) 5715 ; 5716 else if (GET_MODE (tem) != target_mode) 5717 { 5718 convert_move (target, tem, 0); 5719 tem = target; 5720 } 5721 else if (!subtarget) 5722 { 5723 emit_move_insn (target, tem); 5724 tem = target; 5725 } 5726 } 5727 else 5728 delete_insns_since (last); 5729 5730 return tem; 5731 } 5732 5733 /* Like emit_store_flag, but always succeeds. */ 5734 5735 rtx 5736 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1, 5737 enum machine_mode mode, int unsignedp, int normalizep) 5738 { 5739 rtx tem, label; 5740 rtx trueval, falseval; 5741 5742 /* First see if emit_store_flag can do the job. */ 5743 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep); 5744 if (tem != 0) 5745 return tem; 5746 5747 if (!target) 5748 target = gen_reg_rtx (word_mode); 5749 5750 /* If this failed, we have to do this with set/compare/jump/set code. 5751 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */ 5752 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx; 5753 if (code == NE 5754 && GET_MODE_CLASS (mode) == MODE_INT 5755 && REG_P (target) 5756 && op0 == target 5757 && op1 == const0_rtx) 5758 { 5759 label = gen_label_rtx (); 5760 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, 5761 mode, NULL_RTX, NULL_RTX, label, -1); 5762 emit_move_insn (target, trueval); 5763 emit_label (label); 5764 return target; 5765 } 5766 5767 if (!REG_P (target) 5768 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1)) 5769 target = gen_reg_rtx (GET_MODE (target)); 5770 5771 /* Jump in the right direction if the target cannot implement CODE 5772 but can jump on its reverse condition. */ 5773 falseval = const0_rtx; 5774 if (! can_compare_p (code, mode, ccp_jump) 5775 && (! FLOAT_MODE_P (mode) 5776 || code == ORDERED || code == UNORDERED 5777 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) 5778 || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) 5779 { 5780 enum rtx_code rcode; 5781 if (FLOAT_MODE_P (mode)) 5782 rcode = reverse_condition_maybe_unordered (code); 5783 else 5784 rcode = reverse_condition (code); 5785 5786 /* Canonicalize to UNORDERED for the libcall. */ 5787 if (can_compare_p (rcode, mode, ccp_jump) 5788 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump))) 5789 { 5790 falseval = trueval; 5791 trueval = const0_rtx; 5792 code = rcode; 5793 } 5794 } 5795 5796 emit_move_insn (target, trueval); 5797 label = gen_label_rtx (); 5798 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, 5799 NULL_RTX, label, -1); 5800 5801 emit_move_insn (target, falseval); 5802 emit_label (label); 5803 5804 return target; 5805 } 5806 5807 /* Perform possibly multi-word comparison and conditional jump to LABEL 5808 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is 5809 now a thin wrapper around do_compare_rtx_and_jump. */ 5810 5811 static void 5812 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode, 5813 rtx label) 5814 { 5815 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU); 5816 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, 5817 NULL_RTX, NULL_RTX, label, -1); 5818 } 5819